Conversions: Case sensitive Units/Symbol handling (#4021)

* NumbersRole: Despite the role being case insensitive, the number_style_regex isn't! Adding failing test

* NumbersRole: Fixing indentation

* Conversions: Adding tests around implicit case insensitivity in current implementation

* Conversions: Moved all case insensitive behaviour into the handle query so it's explicit rather than relying on query_lc to do the work for us

* Conversions: Adding symbol definitions for case sensitive handling

* Conversions: Fixing markup

* Conversions: UTF8 with BOM on ratios and adding symbol as a trigger

* Conversions: Symbol is now used (case sensitive) to match for factors

* Conversions: The test 'BTU to KwH' should fail (this isn't a unit), however it was causing a division by zero as get_matches wasn't finding a match and this condition wasn't handled correctly

* Conversions: Typo! KwH shouldn't trigger

* Conversions: Tests involving symbols now need to be correct according to SI

* Conversions: Tests around "oz" that should be interpreted as fl oz if the answer should unambiguously be a volume

* Conversions: Will handle ambiguity around oz for more units

* Conversions: Uncommenting tests around digital storage units and fixing them so they're actually correct
master
Rob Emery 2017-04-05 23:39:19 +01:00 committed by Zaahir Moolla
parent 691062a3b0
commit b6e1bd858e
3 changed files with 310 additions and 179 deletions

View File

@ -25,6 +25,7 @@ foreach my $type (@types) {
push(@units, $type->{'unit'});
push(@units, $type->{'plural'}) unless lc $type->{'unit'} eq lc $type->{'plural'};
push(@units, @{$type->{'aliases'}});
push(@units, $type->{'symbol'}) if $type->{'symbol'};
$unit_to_plural{lc $type->{'unit'}} = $type->{'plural'};
$plural_to_unit{lc $type->{'plural'}} = $type->{'unit'};
}
@ -36,10 +37,11 @@ triggers any => @triggers;
# match longest possible key (some keys are sub-keys of other keys):
my $keys = join '|', map { quotemeta $_ } reverse sort { length($a) <=> length($b) } @units;
my $question_prefix = qr/(?<prefix>convert|what (?:is|are|does)|how (?:much|many|long) (?:is|are)?|(?:number of)|(?:how to convert))?/;
my $question_prefix = qr/(?<prefix>convert|what (?:is|are|does)|how (?:much|many|long) (?:is|are)?|(?:number of)|(?:how to convert))?/i;
# guards and matches regex
my $factor_re = join('|', ('a', 'an', number_style_regex()));
my $guard = qr/^(?<question>$question_prefix)\s?(?<left_num>$factor_re*)\s?(?<left_unit>$keys)\s(?<connecting_word>in|to|into|(?:in to)|from)?\s?(?<right_num>$factor_re*)\s?(?:of\s)?(?<right_unit>$keys)[\?]?$/i;
# for 'most' results, like 213.800 degrees fahrenheit, decimal places
@ -57,26 +59,26 @@ sub magnitude_order {
}
my $maximum_input = 10**100;
handle query_lc => sub {
handle query => sub {
# hack around issues with feet and inches for now
$_ =~ s/"/inches/;
$_ =~ s/'/feet/;
if($_ =~ /(\d+)\s*(?:feet|foot)\s*(\d+)(?:\s*inch(?:es)?)?/){
if($_ =~ /(\d+)\s*(?:feet|foot)\s*(\d+)(?:\s*inch(?:es)?)?/i){
my $feetHack = $1 + $2/12;
$_ =~ s/(\d+)\s*(?:feet|foot)\s*(\d+)(?:\s*inch(?:es)?)?/$feetHack feet/;
$_ =~ s/(\d+)\s*(?:feet|foot)\s*(\d+)(?:\s*inch(?:es)?)?/$feetHack feet/i;
}
# hack support for "degrees" prefix on temperatures
$_ =~ s/ degree[s]? (centigrade|celsius|fahrenheit|rankine)/ $1/;
$_ =~ s/ degree[s]? (centigrade|celsius|fahrenheit|rankine)/ $1/i;
# hack - convert "oz" to "fl oz" if "ml" contained in query
s/(oz|ounces)/fl oz/ if(/(ml|cup[s]?)/ && not /fl oz/);
s/(oz|ounces)/fl oz/i if(/(ml|cup[s]?|litre|liter|gallon|pint)/i && not /fl oz/i);
# guard the query from spurious matches
return unless $_ =~ /$guard/;
my @matches = ($+{'left_unit'}, $+{'right_unit'});
return if ("" ne $+{'left_num'} && "" ne $+{'right_num'});
my $factor = $+{'left_num'};
@ -85,27 +87,27 @@ handle query_lc => sub {
# also, check the <connecting_word> of regex for possible user intentions
my @factor1 = (); # conversion factors, not left_num or right_num values
my @factor2 = ();
# gets factors for comparison
foreach my $type (@types) {
if($+{'left_unit'} eq $type->{'unit'}) {
if( lc $+{'left_unit'} eq lc $type->{'unit'} || $type->{'symbol'} && $+{'left_unit'} eq $type->{'symbol'}) {
push(@factor1, $type->{'factor'});
}
my @aliases1 = @{$type->{'aliases'}};
foreach my $alias1 (@aliases1) {
if($+{'left_unit'} eq $alias1) {
if(lc $+{'left_unit'} eq lc $alias1) {
push(@factor1, $type->{'factor'});
}
}
if($+{'right_unit'} eq $type->{'unit'}) {
if(lc $+{'right_unit'} eq lc $type->{'unit'} || $type->{'symbol'} && $+{'right_unit'} eq $type->{'symbol'}) {
push(@factor2, $type->{'factor'});
}
my @aliases2 = @{$type->{'aliases'}};
foreach my $alias2 (@aliases2) {
if($+{'right_unit'} eq $alias2) {
if(lc $+{'right_unit'} eq lc $alias2) {
push(@factor2, $type->{'factor'});
}
}
@ -126,13 +128,13 @@ handle query_lc => sub {
$factor = $+{'right_num'};
@matches = ($matches[1], $matches[0]);
}
$factor = 1 if ($factor =~ qr/^(a[n]?)?$/);
$factor = 1 if ($factor =~ qr/^(a[n]?)?$/i);
my $styler = number_style_for($factor);
return unless $styler;
return unless $styler->for_computation($factor) < $maximum_input;
my $result = convert({
'factor' => $styler->for_computation($factor),
'from_unit' => $matches[0],
@ -153,7 +155,7 @@ handle query_lc => sub {
'factor' => $styler->for_computation($factor),
'from_unit' => $matches[0],
'to_unit' => $matches[1],
});
}) or return;
# We only display it in exponent form if it's above a certain number.
# We also want to display numbers from 0 to 1 in exponent form.
@ -235,7 +237,10 @@ sub get_matches {
my @output_matches = ();
foreach my $match (@input_matches) {
foreach my $type (@types) {
if (lc $match eq $type->{'unit'} || lc $match eq lc $type->{'plural'} || grep { $_ eq lc $match } @{$type->{'aliases'}}) {
if ($type->{'symbol'} && $match eq $type->{'symbol'}
|| lc $match eq lc $type->{'unit'}
|| lc $match eq lc $type->{'plural'}
|| grep { $_ eq lc $match } @{$type->{'aliases'}} ) {
push(@output_matches,{
type => $type->{'type'},
factor => $type->{'factor'},
@ -245,14 +250,13 @@ sub get_matches {
}
}
}
return if scalar(@output_matches) != 2;
return @output_matches;
}
sub convert {
my ($conversion) = @_;
my @matches = get_matches($conversion->{'from_unit'}, $conversion->{'to_unit'});
return if scalar(@matches) != 2;
return if $conversion->{'factor'} < 0 && !($matches[0]->{'can_be_negative'});
# matches must be of the same type (e.g., can't convert mass to length):

246
share/goodie/conversions/ratios.yml Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@ -45,6 +45,18 @@ ddg_goodie_test(
physical_quantity => 'mass'
})
),
'CONVERT 5 oz TO grams' => test_zci(
'5 ounces = 141.747 grams',
structured_answer => make_answer({
raw_input => '5',
from_unit => 'ounces',
raw_answer => '141.747',
to_unit => 'grams',
markup_input => '5',
styled_output => '141.747',
physical_quantity => 'mass'
})
),
'5 ounces to g' => test_zci(
'5 ounces = 141.747 grams',
structured_answer => make_answer({
@ -202,42 +214,6 @@ ddg_goodie_test(
physical_quantity => 'temperature'
})
),
'65 degrees c to f' => test_zci(
'65 degrees celsius = 149 degrees fahrenheit',
structured_answer => make_answer({
markup_input => '65',
raw_input => '65',
from_unit => 'degrees celsius',
styled_output => '149',
raw_answer => '149',
to_unit => 'degrees fahrenheit',
physical_quantity => 'temperature'
})
),
'65 degrees c to degrees f' => test_zci(
'65 degrees celsius = 149 degrees fahrenheit',
structured_answer => make_answer({
markup_input => '65',
raw_input => '65',
from_unit => 'degrees celsius',
styled_output => '149',
raw_answer => '149',
to_unit => 'degrees fahrenheit',
physical_quantity => 'temperature'
})
),
'65 c to degrees f' => test_zci(
'65 degrees celsius = 149 degrees fahrenheit',
structured_answer => make_answer({
markup_input => '65',
raw_input => '65',
from_unit => 'degrees celsius',
styled_output => '149',
raw_answer => '149',
to_unit => 'degrees fahrenheit',
physical_quantity => 'temperature'
})
),
'light year to mm' => test_zci(
'1 light year = 9.46073 * 10^18 millimeters',
structured_answer => make_answer({
@ -250,7 +226,7 @@ ddg_goodie_test(
physical_quantity => 'length'
})
),
'BTU to KwH' => test_zci(
'BTU to kWh' => test_zci(
'1 british thermal unit = 0.000292917 kilowatt-hours',
structured_answer => make_answer({
markup_input => '1',
@ -311,7 +287,7 @@ ddg_goodie_test(
physical_quantity => 'temperature'
})
),
'convert 122 fahrenheit to degrees centigrade' => test_zci(
'convert 122 fahrenheit to degrees centigrade' => test_zci(
'122 degrees fahrenheit = 50 degrees celsius',
structured_answer => make_answer({
markup_input => '122',
@ -347,7 +323,7 @@ ddg_goodie_test(
physical_quantity => 'duration'
})
),
'convert 1 yb to yib' => test_zci(
'convert 1 YB to YiB' => test_zci(
'1 yottabyte = 0.827 yobibytes',
structured_answer => make_answer({
markup_input => '1',
@ -732,7 +708,7 @@ ddg_goodie_test(
physical_quantity => 'pressure'
})
),
'1 atm to kpa' => test_zci(
'1 atm to kPa' => test_zci(
'1 atmosphere = 101.325 kilopascals',
structured_answer => make_answer({
markup_input => '1',
@ -804,7 +780,7 @@ ddg_goodie_test(
physical_quantity => 'energy'
})
),
'90 ps in watts' => test_zci(
'90 PS in watts' => test_zci(
'90 metric horsepower = 66,194.888 watts',
structured_answer => make_answer({
markup_input => '90',
@ -960,7 +936,7 @@ ddg_goodie_test(
physical_quantity => 'digital'
})
),
'0.013 mb in bits' => test_zci(
'0.013 MB in bits' => test_zci(
'0.013 megabytes = 104,000 bits',
structured_answer => make_answer({
markup_input => '0.013',
@ -972,7 +948,7 @@ ddg_goodie_test(
physical_quantity => 'digital'
})
),
'0,013 mb in bits' => test_zci(
'0,013 MB in bits' => test_zci(
'0,013 megabytes = 104.000 bits',
structured_answer => make_answer({
markup_input => '0,013',
@ -984,7 +960,7 @@ ddg_goodie_test(
physical_quantity => 'digital'
})
),
'1 exabyte to pib' => test_zci(
'1 exabyte to PiB' => test_zci(
'1 exabyte = 888.178 pebibytes',
structured_answer => make_answer({
markup_input => '1',
@ -1052,7 +1028,7 @@ ddg_goodie_test(
from_unit => 'degrees celsius',
styled_output => '53.600',
raw_answer => '53.600',
to_unit => 'degrees fahrenheit',
to_unit => 'degrees fahrenheit',
physical_quantity => 'temperature'
})
),
@ -1064,7 +1040,7 @@ ddg_goodie_test(
from_unit => 'degree fahrenheit',
styled_output => '-17.222',
raw_answer => '-17.222',
to_unit => 'degrees celsius',
to_unit => 'degrees celsius',
physical_quantity => 'temperature'
})
),
@ -1088,7 +1064,7 @@ ddg_goodie_test(
from_unit => 'degrees fahrenheit',
styled_output => '112.222',
raw_answer => '112.222',
to_unit => 'degrees celsius',
to_unit => 'degrees celsius',
physical_quantity => 'temperature'
})
),
@ -1188,6 +1164,30 @@ ddg_goodie_test(
physical_quantity => 'length'
})
),
'FEET IN AN INCHES' => test_zci(
'1 inch = 0.0833 feet',
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'inch',
styled_output => '0.0833',
raw_answer => '0.0833',
to_unit => 'feet',
physical_quantity => 'length'
})
),
'feet in AN inch' => test_zci(
'1 inch = 0.0833 feet',
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'inch',
styled_output => '0.0833',
raw_answer => '0.0833',
to_unit => 'feet',
physical_quantity => 'length'
})
),
'ml in gallons' => test_zci(
'1 us gallon = 3,785.412 millilitres',,
structured_answer => make_answer({
@ -1308,6 +1308,42 @@ ddg_goodie_test(
physical_quantity => 'volume'
})
),
'convert 2 liters to oz' => test_zci (
'2 litres = 67.628 us fluid ounces',
structured_answer => make_answer({
markup_input => '2',
raw_input => '2',
from_unit => 'litres',
styled_output => '67.628',
raw_answer => '67.628',
to_unit => 'us fluid ounces',
physical_quantity => 'volume'
})
),
'convert 2 pints to oz' => test_zci (
'2 imperial pints = 38.430 us fluid ounces',
structured_answer => make_answer({
markup_input => '2',
raw_input => '2',
from_unit => 'imperial pints',
styled_output => '38.430',
raw_answer => '38.430',
to_unit => 'us fluid ounces',
physical_quantity => 'volume'
})
),
'convert 8 oz to gallons' => test_zci (
'8 us fluid ounces = 0.0625 us gallons',
structured_answer => make_answer({
markup_input => '8',
raw_input => '8',
from_unit => 'us fluid ounces',
styled_output => '0.0625',
raw_answer => '0.0625',
to_unit => 'us gallons',
physical_quantity => 'volume'
})
),
'4 cups in quarts' => test_zci(
'4 us cups = 1 quart',
structured_answer => make_answer({
@ -1320,6 +1356,18 @@ ddg_goodie_test(
physical_quantity => 'volume'
})
),
'4 CUPS IN QUARTS' => test_zci(
'4 us cups = 1 quart',
structured_answer => make_answer({
markup_input => '4',
raw_input => '4',
from_unit => 'us cups',
styled_output => '1',
raw_answer => '1',
to_unit => 'quart',
physical_quantity => 'volume'
})
),
'how many ounces in a cup' => test_zci(
'1 us cup = 8 us fluid ounces',
structured_answer => make_answer({
@ -1332,6 +1380,18 @@ ddg_goodie_test(
physical_quantity => 'volume'
})
),
'HOW MANY OUNCES IN A CUP' => test_zci(
'1 us cup = 8 us fluid ounces',
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'us cup',
styled_output => '8',
raw_answer => '8',
to_unit => 'us fluid ounces',
physical_quantity => 'volume'
})
),
# Unusual number formats
'3e60 degrees in revolutions' => test_zci(
'3 * 10^60 degrees = 8.33333 * 10^57 revolutions',
@ -2778,8 +2838,8 @@ ddg_goodie_test(
physical_quantity => 'length'
})
),
# Representation (scientific notation)
'30000 km to m' => test_zci(
# Representation (scientific notation)
'30000 km to m' => test_zci(
'30,000 kilometers = 3 * 10^7 meters',
structured_answer => make_answer({
markup_input => '30,000',
@ -2790,11 +2850,11 @@ ddg_goodie_test(
to_unit => 'meters',
physical_quantity => 'length'
})
),
),
'3000000000000000 km to m' => test_zci(
'3 * 10^15 kilometers = 3 * 10^18 meters',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '3 * 10<sup>15</sup>',
raw_input => '3*10^15',
from_unit => 'kilometers',
@ -2806,7 +2866,7 @@ ddg_goodie_test(
),
'3000 km to m' => test_zci(
'3,000 kilometers = 3,000,000 meters',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '3,000',
raw_input => '3000',
from_unit => 'kilometers',
@ -2818,7 +2878,7 @@ ddg_goodie_test(
),
'300000000000 km to m' => test_zci(
'3 * 10^11 kilometers = 3 * 10^14 meters',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '3 * 10<sup>11</sup>',
raw_input => '3*10^11',
from_unit => 'kilometers',
@ -2830,7 +2890,7 @@ ddg_goodie_test(
),
'4e-15 km to mm' => test_zci(
'4 * 10^-15 kilometers = 4 * 10^-9 millimeters',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '4 * 10<sup>-15</sup>',
raw_input => '4*10^-15',
from_unit => 'kilometers',
@ -2842,7 +2902,7 @@ ddg_goodie_test(
),
'how many bytes in a mebibyte?' => test_zci(
'1 mebibyte = 1,048,576 bytes',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'mebibyte',
@ -2854,7 +2914,7 @@ ddg_goodie_test(
),
'how many megabytes in a gigabyte?' => test_zci(
'1 gigabyte = 1,000 megabytes',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'gigabyte',
@ -2866,7 +2926,7 @@ ddg_goodie_test(
),
'1 gigabyte in megabytes' => test_zci(
'1 gigabyte = 1,000 megabytes',
structured_answer => make_answer({
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'gigabyte',
@ -3032,8 +3092,33 @@ ddg_goodie_test(
physical_quantity => 'volume'
})
),
'10MB in Mb' => test_zci(
'10 megabytes = 80 megabits',
structured_answer => make_answer({
markup_input => '10',
raw_input => '10',
from_unit => 'megabytes',
styled_output => '80',
raw_answer => '80',
to_unit => 'megabits',
physical_quantity => 'digital'
})
),
'1kb in B' => test_zci(
'1 kilobit = 125 bytes',
structured_answer => make_answer({
markup_input => '1',
raw_input => '1',
from_unit => 'kilobit',
styled_output => '125',
raw_answer => '125',
to_unit => 'bytes',
physical_quantity => 'digital'
})
),
# Intentionally untriggered
'BTU to KwH' => undef,
'5 inches in 5 meters' => undef,
'convert 1 cm to 2 mm' => undef,
'inching towards the goal' => undef,