Conversions: Support single unit input, case-insensitive fallback check (#4261)

* Allow celcisu typo, updated tests accordingly

* Store regex captures in variables

* Allow single unit inputs through guard

* Pass through single unit input to frontend

* Explcitly match guard regex, better check for valid result

* Cleanup whitespace and indentation

* Lowercase symbols that are case-insensitive

* Fix and simplify guard regex

* Better handling of named captures, ensure left unit is not only match

* Case sensitive match first, fall back to lowercase match on symbol

* fix indentation

* remove redundant m on regex match

* Removed default if duplicate.
master
Zaahir Moolla 2017-06-15 16:51:24 +02:00 committed by PJ Hampton
parent 07bbf47634
commit 1c867979c1
4 changed files with 159 additions and 60 deletions

View File

@ -41,7 +41,16 @@ my $question_prefix = qr/(?<prefix>conver(?:t|sion)|what (?:is|are|does)|how (?:
# guards and matches regex
my $factor_re = join('|', ('a', 'an', number_style_regex()));
my $guard = qr/^(?<question>$question_prefix)\s?(?<left_num>$factor_re*)\s?(?<left_unit>$keys)\s((=\s?\?)|(equals|is)\s(how many )?)?(?<connecting_word>in|(?:convert(?:ed)?)?\s?to|vs|convert|per|=|into|(?:equals)? how many|(?:equal|make) a?|are in a|(?:is what in)|(?:in to)|from)?\s?(?<right_num>$factor_re*)\s?(?:of\s)?(?<right_unit>$keys)\s?(?:conver(?:sion|ter)|calculator)?[\?]?$/i;
my $guard = qr/^
(?<question>$question_prefix)\s?
(?<left_num>$factor_re*)\s?(?<left_unit>$keys)
(?:\s
(?<connecting_word>in|(?:convert(?:ed)?)?\s?to|vs|convert|per|=(?:[\s\?]+)?|into|(?:equals|is)?\show\smany|(?:equals?|make)\sa?|are\sin\sa|(?:is\swhat\sin)|(?:in to)|from)?\s?
(?<right_num>$factor_re*)\s?(?:of\s)?(?<right_unit>$keys)\s?
(?:conver(?:sion|ter)|calculator)?[\?]?
)?
$
/ix;
# for 'most' results, like 213.800 degrees fahrenheit, decimal places
# for small, but not scientific notation, significant figures
@ -104,9 +113,18 @@ handle query => sub {
# guard the query from spurious matches
return unless $_ =~ /$guard/;
my @matches = ($+{'left_unit'}, $+{'right_unit'});
return if ("" ne $+{'left_num'} && "" ne $+{'right_num'});
my $factor = $+{'left_num'};
my $left_unit = $+{'left_unit'};
my $left_num = $+{'left_num'};
my $right_unit = $+{'right_unit'} // "";
my $right_num = $+{'right_num'} // "";
my $question = $+{'question'} // "";
my $connecting_word = $+{'connecting_word'} // "";
my $factor = $left_num;
my @matches = ($left_unit, $right_unit);
# ignore conversion when both units have a number
return if ($left_num && $right_num) || $left_unit && !($left_num || $right_unit);
# Compare factors of both units to ensure proper order when ambiguous
# also, check the <connecting_word> of regex for possible user intentions
@ -115,48 +133,53 @@ handle query => sub {
# gets factors for comparison
foreach my $type (@types) {
if( lc $+{'left_unit'} eq lc $type->{'unit'} || $type->{'symbols'} && grep {$_ eq $+{'left_unit'} } @{$type->{'symbols'}}) {
if( lc $left_unit eq lc $type->{'unit'} || $type->{'symbols'} && grep {$_ eq $left_unit } @{$type->{'symbols'}}) {
push(@factor1, $type->{'factor'});
}
my @aliases1 = @{$type->{'aliases'}};
foreach my $alias1 (@aliases1) {
if(lc $+{'left_unit'} eq lc $alias1) {
if(lc $left_unit eq lc $alias1) {
push(@factor1, $type->{'factor'});
}
}
if(lc $+{'right_unit'} eq lc $type->{'unit'} || $type->{'symbols'} && grep {$_ eq $+{'right_unit'} } @{$type->{'symbols'}}) {
if(lc $right_unit eq lc $type->{'unit'} || $type->{'symbols'} && grep {$_ eq $right_unit } @{$type->{'symbols'}}) {
push(@factor2, $type->{'factor'});
}
my @aliases2 = @{$type->{'aliases'}};
foreach my $alias2 (@aliases2) {
if(lc $+{'right_unit'} eq lc $alias2) {
if(lc $right_unit eq lc $alias2) {
push(@factor2, $type->{'factor'});
}
}
}
# handle case when there is no "to" unit
# e.g. "36 meters"
if ($left_unit && $left_num && !($right_unit || $right_num)) {
$factor = $left_num;
}
# if the query is in the format <unit> in <num> <unit> we need to flip
# also if it's like "how many cm in metre"; the "1" is implicitly metre so also flip
# But if the second unit is plural, assume we want the the implicit one on the first
# It's always ambiguous when they are both countless and plural, so shouldn't be too bad.
if (
"" ne $+{'right_num'}
|| ( "" eq $+{'left_num'}
&& "" eq $+{'right_num'}
&& $+{'question'} !~ qr/convert/i
&& $+{'connecting_word'} !~ qr/to/i ))
elsif (
"" ne $right_num
|| ( "" eq $left_num
&& "" eq $right_num
&& $question !~ qr/convert/i
&& $connecting_word !~ qr/to/i ))
{
$factor = $+{'right_num'};
@matches = ($matches[1], $matches[0]);
$factor = $right_num;
@matches = reverse @matches;
}
$factor = 1 if ($factor =~ qr/^(a[n]?)?$/i);
my $styler = number_style_for($factor);
return unless $styler;
return unless $styler->for_computation($factor) < $maximum_input;
my $result = convert({
@ -165,7 +188,7 @@ handle query => sub {
'to_unit' => $matches[1],
});
return unless defined $result->{'result'};
return unless defined $result->{'from_unit'} && defined $result->{'type'};
my $computable_factor = $styler->for_computation($factor);
if (magnitude_order($computable_factor) > 2*$accuracy + 1) {
@ -174,27 +197,29 @@ handle query => sub {
$factor = $styler->for_display($factor);
return "", structured_answer => {
data => {
raw_input => $styler->for_computation($factor),
left_unit => $result->{'from_unit'},
right_unit => $result->{'to_unit'},
physical_quantity => $result->{'type'}
},
templates => {
group => 'base',
options => {
content => 'DDH.conversions.content'
}
}
};
data => {
raw_input => $styler->for_computation($factor),
left_unit => $result->{'from_unit'},
right_unit => $result->{'to_unit'},
physical_quantity => $result->{'type'}
},
templates => {
group => 'base',
options => {
content => 'DDH.conversions.content'
}
}
};
};
sub get_matches {
my @input_matches = @_;
my @output_matches = ();
foreach my $match (@input_matches) {
foreach my $type (@types) {
if (($type->{'symbols'} && grep { $_ eq $match } @{$type->{'symbols'}})
|| ($type->{'symbols'} && grep { $_ eq lc $match } @{$type->{'symbols'}})
|| lc $match eq lc $type->{'unit'}
|| grep { $_ eq lc $match } @{$type->{'aliases'}} ) {
push(@output_matches,{
@ -211,18 +236,18 @@ sub get_matches {
sub convert {
my ($conversion) = @_;
my @matches = get_matches($conversion->{'from_unit'}, $conversion->{'to_unit'});
return if scalar(@matches) != 2;
my @inputs = ($conversion->{'from_unit'});
push @inputs, $conversion->{'to_unit'} if defined $conversion->{'to_unit'};
my @matches = get_matches(@inputs);
return if scalar(@matches) < 1;
return if $conversion->{'factor'} < 0 && !($matches[0]->{'can_be_negative'});
# matches must be of the same type (e.g., can't convert mass to length):
return if ($matches[0]->{'type'} ne $matches[1]->{'type'});
return if (scalar(@matches) > 1 && $matches[0]->{'type'} ne $matches[1]->{'type'});
return {
"result" => "",
"from_unit" => $matches[0]->{'unit'},
"to_unit" => $matches[1]->{'unit'},
"to_unit" => $matches[1]->{'unit'} // "",
"type" => $matches[0]->{'type'}
};
}

View File

@ -568,11 +568,19 @@ DDH.conversions = DDH.conversions || {};
// Defaults to length if no base is supported
var startBase = ops.data.physical_quantity || 'length';
var leftUnit = ops.data.left_unit || Units[startBase].defaults[0];
var rightUnit = ops.data.right_unit || Units[startBase].defaults[1];
var rawInput = ops.data.raw_input || '1';
var unitsSpecified = false;
// default units
var leftUnit = ops.data.left_unit || Units[startBase].defaults[0];
var rightUnit = ops.data.right_unit || Units[startBase].defaults[1];
// swaps the default unit if they are the same.
// This conditional fires when a query such as 1 gram is entered
if(rightUnit === leftUnit) {
rightUnit = Units[startBase].defaults[0];
}
return {
// anytime this is triggered, we default to a high signal
signal: "high",

View File

@ -957,7 +957,7 @@ aliases:
- joules
type: energy
unit: joule
symbols: [J]
symbols: [j]
---
aliases:
- kilojs
@ -969,7 +969,7 @@ aliases:
- kilo-joules
type: energy
unit: kilojoule
symbols: [J]
symbols: [kj]
---
aliases:
- watt hour
@ -979,7 +979,7 @@ aliases:
- Wh
type: energy
unit: Wh
symbols: [Wh]
symbols: [wh]
---
aliases:
- ergon
@ -1007,7 +1007,7 @@ aliases:
- eV
type: energy
unit: electronvolt
symbols: [eV]
symbols: [ev]
---
aliases:
- decielectron volt
@ -1177,7 +1177,7 @@ aliases:
- british thermal units
type: energy
unit: BTU
symbols: [BTU]
symbols: [btu]
---
aliases:
- calorie
@ -1202,49 +1202,49 @@ aliases:
- watts
type: power
unit: watt
symbols: [W]
symbols: [w]
---
aliases:
- kilowatt
- kilowatts
type: power
unit: kilowatt
symbols: [kW]
symbols: [kw]
---
aliases:
- megawatt
- megawatts
type: power
unit: megawatt
symbols: [MW]
symbols: [mw]
---
aliases:
- gigawatt
- gigawatts
type: power
unit: gigawatt
symbols: [GW]
symbols: [gw]
---
aliases:
- terawatt
- terawatts
type: power
unit: terawatt
symbols: [TW]
symbols: [tw]
---
aliases:
- petawatt
- petawatts
type: power
unit: petawatt
symbols: [PW]
symbols: [pw]
---
aliases:
- exawatt
- exawatts
type: power
unit: exawatt
symbols: [EW]
symbols: [ew]
---
aliases:
- metric horsepowers

View File

@ -58,7 +58,7 @@ sub make_answer_with_base(%){
ddg_goodie_test(
['DDG::Goodie::Conversions'],
# Example queries
'convert 5 oz to grams' => test_zci(
'',
@ -202,6 +202,23 @@ ddg_goodie_test(
physical_quantity => 'temperature'
})
),
## with typis
'convert 5 kelvin to farenheit' => test_zci(
'', structured_answer => make_answer({
raw_input => '5',
from_unit => 'kelvin',
to_unit => 'fahrenheit',
physical_quantity => 'temperature'
})
),
'convert 5 f to celcius' => test_zci(
'', structured_answer => make_answer({
raw_input => '5',
from_unit => 'fahrenheit',
to_unit => 'celsius',
physical_quantity => 'temperature'
})
),
'convert 50 centigrade to fahrenheit' => test_zci(
'', structured_answer => make_answer({
raw_input => '50',
@ -243,8 +260,6 @@ ddg_goodie_test(
physical_quantity => 'temperature'
})
),
# Implicit conversion requests
# MASS
'3 kilogramme to pound' => test_zci(
@ -1357,7 +1372,7 @@ ddg_goodie_test(
physical_quantity => 'temperature'
})
),
#Question format:
'How to convert meters to inches' => test_zci(
'', structured_answer => make_answer({
@ -1375,7 +1390,7 @@ ddg_goodie_test(
physical_quantity => 'length'
})
),
# Representation (scientific notation)
'30000 km to m' => test_zci(
'', structured_answer => make_answer({
@ -1442,7 +1457,7 @@ ddg_goodie_test(
physical_quantity => 'mass'
})
),
# Flexible queries
'190 lb = ?kg' => test_zci(
'', structured_answer => make_answer({
@ -1516,7 +1531,7 @@ ddg_goodie_test(
physical_quantity => 'area'
})
),
# NATURAL LANGUAGE QUERIES
'unit converter' => test_zci(
'',
@ -1530,7 +1545,7 @@ ddg_goodie_test(
'',
structured_answer => make_answer_lang()
),
# INTENTIONALLY UNTRIGGERED
'155 liters to millilitres' => test_zci(
'', structured_answer => make_answer({
@ -1604,6 +1619,23 @@ ddg_goodie_test(
physical_quantity => 'power'
})
),
'1000 w to kw' => test_zci(
'', structured_answer => make_answer({
raw_input => '1000',
from_unit => 'watt',
to_unit => 'kilowatt',
physical_quantity => 'power'
})
),
'1000 W to kW' => test_zci(
'', structured_answer => make_answer({
raw_input => '1000',
from_unit => 'watt',
to_unit => 'kilowatt',
physical_quantity => 'power'
})
),
'fortnight to days' => test_zci(
'', structured_answer => make_answer({
raw_input => '1',
@ -1877,6 +1909,14 @@ ddg_goodie_test(
physical_quantity => 'energy'
})
),
'6 kj equals how many joule' => test_zci(
'', structured_answer => make_answer({
raw_input => '6',
from_unit => 'kilojoule',
to_unit => 'joule',
physical_quantity => 'energy'
})
),
'88 l = ml' => test_zci(
'', structured_answer => make_answer({
raw_input => '88',
@ -2079,7 +2119,33 @@ ddg_goodie_test(
})
),
# natural language queries
# No right (to) unit specified. This is rectified in the backend
'1 gram' => test_zci(
'', structured_answer => make_answer({
raw_input => '1',
from_unit => 'gram',
to_unit => '',
physical_quantity => 'mass'
})
),
'1 megahertz' => test_zci(
'', structured_answer => make_answer({
raw_input => '1',
from_unit => 'megahertz',
to_unit => '',
physical_quantity => 'frequency'
})
),
'1888 s' => test_zci(
'', structured_answer => make_answer({
raw_input => '1888',
from_unit => 'second',
to_unit => '',
physical_quantity => 'duration'
})
),
# natural language queries
'unit converter' => test_zci(
'',
structured_answer => make_answer_lang()