Merge pull request #569 from duckduckgo/mintsoft/conversion_calcs
Normalizing Number Handling.master
commit
8231b25d23
|
@ -4,8 +4,9 @@ package DDG::Goodie::Calculator;
|
|||
use feature 'state';
|
||||
|
||||
use DDG::Goodie;
|
||||
with 'DDG::GoodieRole::NumberStyler';
|
||||
|
||||
use List::Util qw( all first max );
|
||||
use List::Util qw( max );
|
||||
use Math::Trig;
|
||||
|
||||
zci is_cached => 1;
|
||||
|
@ -44,36 +45,8 @@ triggers query_nowhitespace => qr<
|
|||
$
|
||||
>xi;
|
||||
|
||||
# This is probably YAGNI territory, but since I have to reference it in two places
|
||||
# and there are a multitude of other notation systems (although some break the
|
||||
# 'thousands' assumption) I am going to pretend that I do need it.
|
||||
# If it could fit more than one the first in order gets preference.
|
||||
my @known_styles = ({
|
||||
id => 'perl',
|
||||
decimal => '.',
|
||||
thousands => ',',
|
||||
},
|
||||
{
|
||||
id => 'euro',
|
||||
decimal => ',',
|
||||
thousands => '.',
|
||||
},
|
||||
);
|
||||
|
||||
my $perl_style = first { $_->{id} eq 'perl' } @known_styles;
|
||||
foreach my $style (@known_styles) {
|
||||
$style->{fit_check} = _well_formed_for_style_func($style);
|
||||
$style->{precision} = _precision_for_style_func($style);
|
||||
$style->{make_safe} = _prepare_for_computation_func($style, $perl_style);
|
||||
$style->{make_pretty} = _display_style_func($style, $perl_style);
|
||||
}
|
||||
|
||||
# This is not as good an idea as I might think.
|
||||
# Luckily it will someday be able to be tokenized so this won't apply.
|
||||
my $all_seps = join('', map { $_->{decimal} . $_->{thousands} } @known_styles);
|
||||
|
||||
my $numbery = qr/[\d$all_seps]+/;
|
||||
my $funcy = qr/[[a-z]+\(|log[_]?\d{1,3}\(|\^|\/|\*/; # Stuff that looks like functions.
|
||||
my $number_re = number_style_regex();
|
||||
my $funcy = qr/[[a-z]+\(|log[_]?\d{1,3}\(|\^|\*|\//; # Stuff that looks like functions.
|
||||
|
||||
my %named_operations = (
|
||||
'\^' => '**',
|
||||
|
@ -82,19 +55,19 @@ my %named_operations = (
|
|||
'minus' => '-',
|
||||
'plus' => '+',
|
||||
'divided\sby' => '/',
|
||||
'ln' => 'log', # perl log() is natural log.
|
||||
'ln' => 'log', # perl log() is natural log.
|
||||
'squared' => '**2',
|
||||
);
|
||||
|
||||
my %named_constants = (
|
||||
dozen => 12,
|
||||
e => 2.71828182845904523536028747135266249, # This should be computed.
|
||||
pi => pi, # pi constant from Math::Trig
|
||||
e => 2.71828182845904523536028747135266249, # This should be computed.
|
||||
pi => pi, # pi constant from Math::Trig
|
||||
gross => 144,
|
||||
score => 20,
|
||||
);
|
||||
|
||||
my $ored_constants = join('|', keys %named_constants); # For later substitutions
|
||||
my $ored_constants = join('|', keys %named_constants); # For later substitutions
|
||||
|
||||
my $ip4_octet = qr/([01]?\d\d?|2[0-4]\d|25[0-5])/; # Each octet should look like a number between 0 and 255.
|
||||
my $ip4_regex = qr/(?:$ip4_octet\.){3}$ip4_octet/; # There should be 4 of them separated by 3 dots.
|
||||
|
@ -133,13 +106,13 @@ handle query_nowhitespace => sub {
|
|||
$tmp_expr =~ s#\b$name\b# $constant #ig;
|
||||
}
|
||||
|
||||
my @numbers = grep { $_ =~ /^$numbery$/ } (split /\s+/, $tmp_expr);
|
||||
my $style = display_style(@numbers);
|
||||
my @numbers = grep { $_ =~ /^$number_re$/ } (split /\s+/, $tmp_expr);
|
||||
my $style = number_style_for(@numbers);
|
||||
return unless $style;
|
||||
|
||||
$tmp_expr = $style->{make_safe}->($tmp_expr);
|
||||
$tmp_expr = $style->for_computation($tmp_expr);
|
||||
# Using functions makes us want answers with more precision than our inputs indicate.
|
||||
my $precision = ($query =~ $funcy) ? undef : max(map { $style->{precision}->($_) } @numbers);
|
||||
my $precision = ($query =~ $funcy) ? undef : max(map { $style->precision_of($_) } @numbers);
|
||||
|
||||
eval {
|
||||
# e.g. sin(100000)/100000 completely makes this go haywire.
|
||||
|
@ -158,15 +131,11 @@ handle query_nowhitespace => sub {
|
|||
# 0-9 check for http://yegg.duckduckgo.com/?q=%243.43%20%2434.45&format=json
|
||||
return unless (defined $precision || ($tmp_result =~ /^(?:\-|)[0-9\.]+$/));
|
||||
|
||||
# Ok, this might be overkill on flexibility.
|
||||
$tmp_result = sprintf('%0' . $perl_style->{decimal} . $precision . 'f', $tmp_result) if ($precision);
|
||||
$tmp_result = sprintf('%0.' . $precision . 'f', $tmp_result) if ($precision);
|
||||
# Dollars.
|
||||
$tmp_result = '$' . $tmp_result if ($query =~ /^\$/);
|
||||
|
||||
# Add proper separators.
|
||||
$tmp_result = $style->{make_pretty}->($tmp_result);
|
||||
|
||||
my $results = prepare_for_display($query, $tmp_result);
|
||||
my $results = prepare_for_display($query, $tmp_result, $style);
|
||||
|
||||
return if $results->{text} =~ /^\s/;
|
||||
return $results->{text},
|
||||
|
@ -178,26 +147,27 @@ handle query_nowhitespace => sub {
|
|||
};
|
||||
|
||||
sub prepare_for_display {
|
||||
my ($query, $result) = @_;
|
||||
my ($query, $result, $style) = @_;
|
||||
|
||||
# Equals varies by output type.
|
||||
$query =~ s/\=$//;
|
||||
# Show them how 'E' was interpreted.
|
||||
$query =~ s/((?:\d+?|\s))E(-?\d+)/\($1 * 10^$2\)/;
|
||||
# Show them how 'E' was interpreted. This should use the number styler, too.
|
||||
$query =~ s/((?:\d+?|\s))E(-?\d+)/\($1 * 10^$2\)/i;
|
||||
|
||||
return {
|
||||
text => format_text($query, $result),
|
||||
html => format_html($query, $result),
|
||||
text => format_text($query, $result, $style),
|
||||
html => format_html($query, $result, $style),
|
||||
};
|
||||
}
|
||||
|
||||
# Format query for HTML
|
||||
sub format_html {
|
||||
my ($query, $result) = @_;
|
||||
my ($query, $result, $style) = @_;
|
||||
|
||||
state $css = '<style type="text/css">' . share("style.css")->slurp . '</style>';
|
||||
|
||||
$query = _add_html_exponents($query);
|
||||
$query = $style->with_html($query);
|
||||
$result = $style->with_html($result);
|
||||
|
||||
return
|
||||
$css
|
||||
|
@ -208,66 +178,11 @@ sub format_html {
|
|||
. "</a></div>";
|
||||
}
|
||||
|
||||
sub _add_html_exponents {
|
||||
|
||||
my $string = shift;
|
||||
|
||||
return $string if ($string !~ /\^/ or $string =~ /^\^|\^$/); # Give back the same thing if we won't deal with it properly.
|
||||
|
||||
my @chars = split //, $string;
|
||||
my ($start_tag, $end_tag) = ('<sup>', '</sup>');
|
||||
my ($newly_up, $in_exp_number, $in_exp_parens, %power_parens);
|
||||
my ($parens_count, $number_up) = (0, 0);
|
||||
|
||||
# because of associativity and power-to-power, we need to scan nearly the whole thing
|
||||
for my $index (1 .. $#chars - 1) {
|
||||
my $this_char = $chars[$index];
|
||||
if ($this_char =~ $numbery) {
|
||||
if ($newly_up) {
|
||||
$in_exp_number = 1;
|
||||
$newly_up = 0;
|
||||
}
|
||||
} elsif ($this_char eq '(') {
|
||||
$parens_count += 1;
|
||||
$in_exp_number = 0;
|
||||
if ($newly_up) {
|
||||
$in_exp_parens += 1;
|
||||
$power_parens{$parens_count} = 1;
|
||||
$newly_up = 0;
|
||||
}
|
||||
} elsif ($this_char eq '^') {
|
||||
$chars[$index - 1] =~ s/$end_tag$//; # Added too soon!
|
||||
$number_up += 1;
|
||||
$newly_up = 1;
|
||||
$chars[$index] = $start_tag; # Replace ^ with the tag.
|
||||
} elsif ($in_exp_number) {
|
||||
$in_exp_number = 0;
|
||||
$number_up -= 1;
|
||||
$chars[$index] = $end_tag . $chars[$index];
|
||||
} elsif ($number_up && !$in_exp_parens) {
|
||||
# Must have ended another term or more
|
||||
$chars[$index] = ($end_tag x ($number_up - 1)) . $chars[$index];
|
||||
$number_up = 0;
|
||||
} elsif ($this_char eq ')') {
|
||||
# We just closed a set of parens, see if it closes one of our things
|
||||
if ($in_exp_parens && $power_parens{$parens_count}) {
|
||||
$chars[$index] .= $end_tag;
|
||||
delete $power_parens{$parens_count};
|
||||
$in_exp_parens -= 1;
|
||||
}
|
||||
$parens_count -= 1;
|
||||
}
|
||||
}
|
||||
$chars[-1] .= $end_tag x $number_up if ($number_up);
|
||||
|
||||
return join('', @chars);
|
||||
}
|
||||
|
||||
# Format query for text
|
||||
sub format_text {
|
||||
my ($query, $result) = @_;
|
||||
my ($query, $result, $style) = @_;
|
||||
|
||||
return spacing($query) . ' = ' . $result;
|
||||
return spacing($query) . ' = ' . $style->for_display($result);
|
||||
}
|
||||
|
||||
#separates symbols with a space
|
||||
|
@ -278,92 +193,9 @@ sub spacing {
|
|||
$text =~ s/(\s*(?<!<)(?:[\+\-\^xX\*\/\%]|times|plus|minus|dividedby)+\s*)/ $1 /ig;
|
||||
$text =~ s/\s*dividedby\s*/ divided by /ig;
|
||||
$text =~ s/(\d+?)((?:dozen|pi|gross|squared|score))/$1 $2/ig;
|
||||
$text =~ s/(\d+?)e/$1 e/g; # E == *10^n
|
||||
$text =~ s/([\(\)\$])/ $1 /g if ($space_for_parse);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
# Takes an array of numbers and returns which style to use for parse and display
|
||||
# If there are conflicting answers among the array, will return undef.
|
||||
sub display_style {
|
||||
my @numbers = @_;
|
||||
|
||||
my $style; # By default, assume we don't understand the numbers.
|
||||
|
||||
STYLE:
|
||||
foreach my $test_style (@known_styles) {
|
||||
if (all { $test_style->{fit_check}->($_) } @numbers) {
|
||||
# All of our numbers fit this style. Since we have them in preference order
|
||||
# we can pick it and move on.
|
||||
$style = $test_style;
|
||||
last STYLE;
|
||||
}
|
||||
}
|
||||
return $style;
|
||||
}
|
||||
|
||||
# Returns a function which evaluates whether a number fits a certain style.
|
||||
sub _well_formed_for_style_func {
|
||||
my $style = shift;
|
||||
my ($decimal, $thousands) = ($style->{decimal}, $style->{thousands});
|
||||
|
||||
return sub {
|
||||
my $number = shift;
|
||||
return (
|
||||
$number =~ /^(\d|\Q$thousands\E|\Q$decimal\E)+$/
|
||||
# Only contains things we understand.
|
||||
&& ($number !~ /\Q$thousands\E/
|
||||
|| ($number !~ /\Q$thousands\E\d{1,2}\b/ && $number !~ /\Q$thousands\E\d{4,}/ && $number !~ /^0\Q$thousands\E/))
|
||||
# You can leave out thousands breaks, but the ones you put in must be in the right place
|
||||
# which does not include following an initial 0.
|
||||
# Note that this does not confirm that they put all the 'required' ones in.
|
||||
&& ($number !~ /\Q$decimal\E/ || $number !~ /\Q$decimal\E(?:.*)?(?:\Q$decimal\E|\Q$thousands\E)/)
|
||||
# You can omit the decimal but you cannot have another decimal or thousands after:
|
||||
) ? 1 : 0;
|
||||
};
|
||||
}
|
||||
|
||||
# Returns function which given a number in a certain style, makes it nice for human eyes.
|
||||
sub _display_style_func {
|
||||
my ($style, $perl_style) = @_;
|
||||
my ($decimal, $thousands, $perl_dec) = (@{$style}{qw(decimal thousands)}, $perl_style->{decimal}); # Unpacked for easier regex-building
|
||||
|
||||
return sub {
|
||||
my $text = shift;
|
||||
$text = reverse $text;
|
||||
$text =~ s/\Q$perl_dec\E/$decimal/g;
|
||||
$text =~ s/(\d\d\d)(?=\d)(?!\d*\Q$decimal\E)/$1$thousands/g;
|
||||
|
||||
return scalar reverse $text;
|
||||
};
|
||||
}
|
||||
|
||||
# Returns function which given a number in a certain style, makes it safe for perl eval.
|
||||
sub _prepare_for_computation_func {
|
||||
my ($style, $perl_style) = @_;
|
||||
my ($decimal, $thousands, $perl_dec) = (@{$style}{qw(decimal thousands)}, $perl_style->{decimal});
|
||||
|
||||
return sub {
|
||||
my $number_text = shift;
|
||||
|
||||
$number_text =~ s/\Q$thousands\E//g; # Remove thousands seps, since they are just visual.
|
||||
$number_text =~ s/\Q$decimal\E/$perl_dec/g; # Make sure decimal mark is something perl knows how to use.
|
||||
|
||||
return $number_text;
|
||||
};
|
||||
}
|
||||
|
||||
# Returns function which given a number, determines the number of places after the decimal.
|
||||
sub _precision_for_style_func {
|
||||
my ($style) = @_;
|
||||
my $decimal = $style->{decimal};
|
||||
|
||||
return sub {
|
||||
my $number_text = shift;
|
||||
|
||||
return ($number_text =~ /\Q$decimal\E(\d+)/) ? length($1) : 0;
|
||||
};
|
||||
}
|
||||
|
||||
1;
|
||||
|
|
|
@ -2,13 +2,12 @@ package DDG::Goodie::Conversions;
|
|||
# ABSTRACT: convert between various units of measurement
|
||||
|
||||
use DDG::Goodie;
|
||||
with 'DDG::GoodieRole::NumberStyler';
|
||||
|
||||
use HTML::Entities;
|
||||
use Math::Round qw/nearest/;
|
||||
use Scalar::Util qw/looks_like_number/;
|
||||
use bignum;
|
||||
use Convert::Pluggable;
|
||||
# ^^ mass, length, time, pressure, energy, power, angle, force, temperature, digital
|
||||
|
||||
name 'Conversions';
|
||||
description 'convert between various units of measurement';
|
||||
|
@ -40,8 +39,10 @@ triggers end => @units;
|
|||
# match longest possible key (some keys are sub-keys of other keys):
|
||||
my $keys = join '|', reverse sort { length($a) <=> length($b) } @units;
|
||||
my $question_prefix = qr/(convert|what (is|are|does)|how (much|many|long) (is|are))?\s?/;
|
||||
|
||||
# guards and matches regex
|
||||
my $guard = qr/^$question_prefix[0-9\.]*\s?($keys)\s?(in|to|into|from)\s?[0-9\.]*\s?($keys)+$/;
|
||||
my $number_re = number_style_regex();
|
||||
my $guard = qr/^$question_prefix$number_re*\s?($keys)\s?(in|to|into|from)\s?$number_re*\s?($keys)$/;
|
||||
my $match_regex = qr/(?:[0-9]|\b)($keys)\b/;
|
||||
|
||||
# exceptions for pluralized forms:
|
||||
|
@ -66,9 +67,9 @@ sub append_css {
|
|||
}
|
||||
|
||||
sub wrap_html {
|
||||
my ($factor, $result) = @_;
|
||||
my $from = encode_entities($factor) . " <span class='text--secondary'>" . encode_entities($result->{'from_unit'}) . "</span>";
|
||||
my $to = encode_entities($result->{'result'}) . " <span class='text--secondary'>" . encode_entities($result->{'to_unit'}) . "</span>";
|
||||
my ($factor, $result, $styler) = @_;
|
||||
my $from = $styler->with_html($factor) . " <span class='text--secondary'>" . encode_entities($result->{'from_unit'}) . "</span>";
|
||||
my $to = $styler->with_html($result->{'result'}) . " <span class='text--secondary'>" . encode_entities($result->{'to_unit'}) . "</span>";
|
||||
return append_css("<div class='zci--conversions text--primary'>$from = $to</div>");
|
||||
}
|
||||
|
||||
|
@ -77,8 +78,8 @@ handle query_lc => sub {
|
|||
$_ =~ s/"/inches/;
|
||||
$_ =~ s/'/feet/;
|
||||
|
||||
# hack support for "degrees" prefix on temperatures
|
||||
$_ =~ s/ degrees (celsius|fahrenheit)/ $1/;
|
||||
# hack support for "degrees" prefix on temperatures
|
||||
$_ =~ s/ degrees (celsius|fahrenheit)/ $1/;
|
||||
|
||||
# guard the query from spurious matches
|
||||
return unless $_ =~ /$guard/;
|
||||
|
@ -92,26 +93,32 @@ handle query_lc => sub {
|
|||
return unless scalar @matches == 2; # conversion requires two triggers
|
||||
|
||||
# normalize the whitespace, "25cm" should work for example
|
||||
$_ =~ s/([0-9])([a-zA-Z])/$1 $2/;
|
||||
$_ =~ s/($number_re)($keys)/$1 $2/g;
|
||||
|
||||
# fix precision and rounding:
|
||||
my $precision = 3;
|
||||
my $nearest = '1';
|
||||
for my $i (1 .. $precision - 1) {
|
||||
$nearest = '0' . $nearest;
|
||||
}
|
||||
$nearest = '.' . $nearest;
|
||||
my $nearest = '.' . ('0' x ($precision-1)) . '1';
|
||||
|
||||
# get factor:
|
||||
# get factor and return if multiple numbers are specified
|
||||
my @args = split(/\s+/, $_);
|
||||
my $factor = 1;
|
||||
my $factor = "";
|
||||
foreach my $arg (@args) {
|
||||
if (looks_like_number($arg)) {
|
||||
$factor = $arg unless $factor != 1; # drop n > 1 #s
|
||||
if ($arg =~ /^$number_re$/) {
|
||||
return if $factor;
|
||||
$factor = $arg;
|
||||
}
|
||||
}
|
||||
|
||||
my $result = $c->convert( { 'factor' => $factor, 'from_unit' => $matches[0], 'to_unit' => $matches[1], 'precision' => $precision, } );
|
||||
$factor = 1 if "" eq $factor;
|
||||
|
||||
my $styler = number_style_for($factor);
|
||||
return unless $styler;
|
||||
|
||||
my $result = $c->convert( {
|
||||
'factor' => $styler->for_computation($factor),
|
||||
'from_unit' => $matches[0],
|
||||
'to_unit' => $matches[1],
|
||||
'precision' => $precision,
|
||||
} );
|
||||
|
||||
return if !$result->{'result'};
|
||||
|
||||
|
@ -119,45 +126,49 @@ handle query_lc => sub {
|
|||
|
||||
# if $result = 1.00000 .. 000n, where n <> 0 then $result != 1 and throws off pluralization, so:
|
||||
$result->{'result'} = nearest($nearest, $result->{'result'});
|
||||
|
||||
|
||||
if ($result->{'result'} == 0 || length($result->{'result'}) > 2*$precision + 1) {
|
||||
if ($result->{'result'} == 0) {
|
||||
# rounding error
|
||||
$result = $c->convert( { 'factor' => $factor, 'from_unit' => $matches[0], 'to_unit' => $matches[1], 'precision' => $precision, } );
|
||||
$result = $c->convert( {
|
||||
'factor' => $styler->for_computation($factor),
|
||||
'from_unit' => $matches[0],
|
||||
'to_unit' => $matches[1],
|
||||
'precision' => $precision,
|
||||
} );
|
||||
}
|
||||
|
||||
# We only display it in exponent form if it's above a certain number.
|
||||
# We also want to display numbers from 0 to 1 in exponent form.
|
||||
if($result->{'result'} > 1000000 || $result->{'result'} < 1) {
|
||||
# We only display it in exponent form if it's above a certain number.
|
||||
# We also want to display numbers from 0 to 1 in exponent form.
|
||||
if($result->{'result'} > 1_000_000 || $result->{'result'} < 1) {
|
||||
$f_result = (sprintf "%.${precision}g", $result->{'result'});
|
||||
} else {
|
||||
$f_result = (sprintf "%.${precision}f", $result->{'result'});
|
||||
}
|
||||
}
|
||||
|
||||
# handle pluralisation of units
|
||||
# however temperature is never plural and does require "degrees" to be prepended
|
||||
if ($result->{'type_1'} ne 'temperature') {
|
||||
if ($factor != 1) {
|
||||
$result->{'from_unit'} = (exists $plural_exceptions{$result->{'from_unit'}}) ? $plural_exceptions{$result->{'from_unit'}} : $result->{'from_unit'} . 's';
|
||||
}
|
||||
# handle pluralisation of units
|
||||
# however temperature is never plural and does require "degrees" to be prepended
|
||||
if ($result->{'type_1'} ne 'temperature') {
|
||||
if ($factor != 1) {
|
||||
$result->{'from_unit'} = (exists $plural_exceptions{$result->{'from_unit'}}) ? $plural_exceptions{$result->{'from_unit'}} : $result->{'from_unit'} . 's';
|
||||
}
|
||||
|
||||
if ($result->{'result'} != 1) {
|
||||
$result->{'to_unit'} = (exists $plural_exceptions{$result->{'to_unit'}}) ? $plural_exceptions{$result->{'to_unit'}} : $result->{'to_unit'} . 's';
|
||||
}
|
||||
}
|
||||
else {
|
||||
$result->{'from_unit'} = "degrees $result->{'from_unit'}" if ($result->{'from_unit'} ne "kelvin");
|
||||
$result->{'to_unit'} = "degrees $result->{'to_unit'}" if ($result->{'to_unit'} ne "kelvin");
|
||||
}
|
||||
if ($result->{'result'} != 1) {
|
||||
$result->{'to_unit'} = (exists $plural_exceptions{$result->{'to_unit'}}) ? $plural_exceptions{$result->{'to_unit'}} : $result->{'to_unit'} . 's';
|
||||
}
|
||||
}
|
||||
else {
|
||||
$result->{'from_unit'} = "degrees $result->{'from_unit'}" if ($result->{'from_unit'} ne "kelvin");
|
||||
$result->{'to_unit'} = "degrees $result->{'to_unit'}" if ($result->{'to_unit'} ne "kelvin");
|
||||
}
|
||||
|
||||
$result->{'result'} = defined($f_result) ? $f_result : sprintf("%.${precision}f", $result->{'result'});
|
||||
$result->{'result'} =~ s/\.0{$precision}$//;
|
||||
$result->{'result'} = $styler->for_display($result->{'result'});
|
||||
|
||||
my $output = "$factor $result->{'from_unit'} = $result->{'result'} $result->{'to_unit'}";
|
||||
return $output, html => wrap_html($factor, $result);
|
||||
my $output = $styler->for_display($factor)." $result->{'from_unit'} = $result->{'result'} $result->{'to_unit'}";
|
||||
return $output, html => wrap_html($factor, $result, $styler);
|
||||
};
|
||||
|
||||
|
||||
|
||||
1;
|
||||
1;
|
|
@ -0,0 +1,156 @@
|
|||
package DDG::GoodieRole::NumberStyle;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Moo;
|
||||
|
||||
has [qw(id decimal thousands)] => (
|
||||
is => 'ro',
|
||||
);
|
||||
|
||||
has exponential => (
|
||||
is => 'ro',
|
||||
default => sub { 'e' },
|
||||
);
|
||||
|
||||
has number_regex => (
|
||||
is => 'lazy',
|
||||
);
|
||||
|
||||
sub _build_number_regex {
|
||||
my $self = shift;
|
||||
my ($decimal, $thousands, $exponential) = ($self->decimal, $self->thousands, $self->exponential);
|
||||
|
||||
return qr/-?[\d\Q$decimal\E\Q$thousands\E]+(?:\Q$exponential\E-?\d+)?/;
|
||||
}
|
||||
|
||||
sub understands {
|
||||
my ($self, $number) = @_;
|
||||
my ($decimal, $thousands) = ($self->decimal, $self->thousands);
|
||||
|
||||
# How do we know if a number is reasonable for this style?
|
||||
# This assumes the exponentials are not included to give better answers.
|
||||
return (
|
||||
# The number must contain only things we understand: numerals and separators for this style.
|
||||
$number =~ /^-?(|\d|\Q$thousands\E|\Q$decimal\E)+$/
|
||||
&& (
|
||||
# The number is not required to contain thousands separators
|
||||
$number !~ /\Q$thousands\E/
|
||||
|| (
|
||||
# But if the number does contain thousands separators, they must delimit exactly 3 numerals.
|
||||
$number !~ /\Q$thousands\E\d{1,2}\b/
|
||||
&& $number !~ /\Q$thousands\E\d{4,}/
|
||||
# And cannot follow a leading zero
|
||||
&& $number !~ /^0\Q$thousands\E/
|
||||
))
|
||||
&& (
|
||||
# The number is not required to include decimal separators
|
||||
$number !~ /\Q$decimal\E/
|
||||
# But if one is included, it cannot be followed by another separator, whether decimal or thousands.
|
||||
|| $number !~ /\Q$decimal\E(?:.*)?(?:\Q$decimal\E|\Q$thousands\E)/
|
||||
)) ? 1 : 0;
|
||||
}
|
||||
|
||||
sub precision_of {
|
||||
my ($self, $number_text) = @_;
|
||||
my $decimal = $self->decimal;
|
||||
|
||||
return ($number_text =~ /\Q$decimal\E(\d+)/) ? length($1) : 0;
|
||||
}
|
||||
|
||||
sub for_computation {
|
||||
my ($self, $number_text) = @_;
|
||||
my ($decimal, $thousands, $exponential) = ($self->decimal, $self->thousands, $self->exponential);
|
||||
|
||||
$number_text =~ s/\Q$thousands\E//g; # Remove thousands seps, since they are just visual.
|
||||
$number_text =~ s/\Q$decimal\E/./g; # Make sure decimal mark is something perl knows how to use.
|
||||
if ($number_text =~ s/^([\d$decimal$thousands]+)\Q$exponential\E(-?[\d$decimal$thousands]+)$/$1e$2/ig) {
|
||||
# Convert to perl style exponentials and then make into human-style floats.
|
||||
$number_text = sprintf('%f', $number_text);
|
||||
}
|
||||
|
||||
return $number_text;
|
||||
}
|
||||
|
||||
sub for_display {
|
||||
my ($self, $number_text) = @_;
|
||||
my ($decimal, $thousands, $exponential) = ($self->decimal, $self->thousands, $self->exponential);
|
||||
|
||||
if ($number_text =~ /(.*)\Q$exponential\E([+-]?\d+)/i) {
|
||||
$number_text = $self->for_display($1) . ' * 10^' . $self->for_display(int $2);
|
||||
} else {
|
||||
$number_text = reverse $number_text;
|
||||
$number_text =~ s/\./$decimal/g; # Perl decimal mark to whatever we need.
|
||||
$number_text =~ s/(\d{3})(?=\d)(?!\d*\Q$decimal\E)/$1$thousands/g;
|
||||
$number_text = reverse $number_text;
|
||||
}
|
||||
|
||||
return $number_text;
|
||||
}
|
||||
|
||||
# The display version with HTML added:
|
||||
# - superscripted exponents
|
||||
sub with_html {
|
||||
my ($self, $number_text) = @_;
|
||||
|
||||
return $self->_add_html_exponents($self->for_display($number_text));
|
||||
}
|
||||
|
||||
sub _add_html_exponents {
|
||||
|
||||
my ($self, $string) = @_;
|
||||
|
||||
return $string if ($string !~ /\^/ or $string =~ /^\^|\^$/); # Give back the same thing if we won't deal with it properly.
|
||||
|
||||
my @chars = split //, $string;
|
||||
my $number_re = $self->number_regex;
|
||||
my ($start_tag, $end_tag) = ('<sup>', '</sup>');
|
||||
my ($newly_up, $in_exp_number, $in_exp_parens, %power_parens);
|
||||
my ($parens_count, $number_up) = (0, 0);
|
||||
|
||||
# because of associativity and power-to-power, we need to scan nearly the whole thing
|
||||
for my $index (1 .. $#chars - 1) {
|
||||
my $this_char = $chars[$index];
|
||||
if ($this_char =~ $number_re or ($newly_up && $this_char eq '-')) {
|
||||
if ($newly_up) {
|
||||
$in_exp_number = 1;
|
||||
$newly_up = 0;
|
||||
}
|
||||
} elsif ($this_char eq '(') {
|
||||
$parens_count += 1;
|
||||
$in_exp_number = 0;
|
||||
if ($newly_up) {
|
||||
$in_exp_parens += 1;
|
||||
$power_parens{$parens_count} = 1;
|
||||
$newly_up = 0;
|
||||
}
|
||||
} elsif ($this_char eq '^') {
|
||||
$chars[$index - 1] =~ s/$end_tag$//; # Added too soon!
|
||||
$number_up += 1;
|
||||
$newly_up = 1;
|
||||
$chars[$index] = $start_tag; # Replace ^ with the tag.
|
||||
} elsif ($in_exp_number) {
|
||||
$in_exp_number = 0;
|
||||
$number_up -= 1;
|
||||
$chars[$index] = $end_tag . $chars[$index];
|
||||
} elsif ($number_up && !$in_exp_parens) {
|
||||
# Must have ended another term or more
|
||||
$chars[$index] = ($end_tag x ($number_up - 1)) . $chars[$index];
|
||||
$number_up = 0;
|
||||
} elsif ($this_char eq ')') {
|
||||
# We just closed a set of parens, see if it closes one of our things
|
||||
if ($in_exp_parens && $power_parens{$parens_count}) {
|
||||
$chars[$index] .= $end_tag;
|
||||
delete $power_parens{$parens_count};
|
||||
$in_exp_parens -= 1;
|
||||
}
|
||||
$parens_count -= 1;
|
||||
}
|
||||
}
|
||||
$chars[-1] .= $end_tag x $number_up if ($number_up);
|
||||
|
||||
return join('', @chars);
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,53 @@
|
|||
package DDG::GoodieRole::NumberStyler;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Moo::Role;
|
||||
use DDG::GoodieRole::NumberStyle;
|
||||
|
||||
use List::Util qw( all first );
|
||||
|
||||
# If it could fit more than one the first in order gets preference.
|
||||
my @known_styles = (
|
||||
DDG::GoodieRole::NumberStyle->new({
|
||||
id => 'perl',
|
||||
decimal => '.',
|
||||
thousands => ',',
|
||||
}
|
||||
),
|
||||
DDG::GoodieRole::NumberStyle->new({
|
||||
id => 'euro',
|
||||
decimal => ',',
|
||||
thousands => '.',
|
||||
}
|
||||
),
|
||||
);
|
||||
|
||||
sub number_style_regex {
|
||||
my $return_regex = join '|', map { $_->number_regex } @known_styles;
|
||||
return qr/$return_regex/;
|
||||
}
|
||||
|
||||
# Takes an array of numbers and returns which style to use for parse and display
|
||||
# If there are conflicting answers among the array, will return undef.
|
||||
sub number_style_for {
|
||||
my @numbers = @_;
|
||||
|
||||
my $style; # By default, assume we don't understand the numbers.
|
||||
|
||||
STYLE:
|
||||
foreach my $test_style (@known_styles) {
|
||||
my $exponential = lc $test_style->exponential; # Allow for arbitrary casing.
|
||||
if (all { $test_style->understands($_) } map { split /$exponential/, lc $_ } @numbers) {
|
||||
# All of our numbers fit this style. Since we have them in preference order
|
||||
# we can pick it and move on.
|
||||
$style = $test_style;
|
||||
last STYLE;
|
||||
}
|
||||
}
|
||||
|
||||
return $style;
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Test::More;
|
||||
|
||||
subtest 'NumberStyler' => sub {
|
||||
|
||||
{ package RoleTester; use Moo; with 'DDG::GoodieRole::NumberStyler'; 1; }
|
||||
|
||||
new_ok('RoleTester', [], 'Applied to an object');
|
||||
isa_ok(RoleTester::number_style_regex(), 'Regexp', 'number_style_regex()');
|
||||
|
||||
my $style_picker = \&RoleTester::number_style_for;
|
||||
|
||||
my @valid_test_cases = (
|
||||
[['0,013'] => 'euro'],
|
||||
[['4,431', '4.321'] => 'perl'],
|
||||
[['4,431', '4,32'] => 'euro'],
|
||||
[['4534,345.0', '1'] => 'perl'], # Unenforced commas.
|
||||
[['4,431', '4,32', '5,42'] => 'euro'],
|
||||
[['4,431', '4.32', '5.42'] => 'perl'],
|
||||
);
|
||||
|
||||
foreach my $tc (@valid_test_cases) {
|
||||
my @numbers = @{$tc->[0]};
|
||||
my $expected_style_id = $tc->[1];
|
||||
is($style_picker->(@numbers)->id, $expected_style_id, '"' . join(' ', @numbers) . '" yields a style of ' . $expected_style_id);
|
||||
}
|
||||
|
||||
my @invalid_test_cases = (
|
||||
[['5234534.34.54', '1'] => 'has a mal-formed number'],
|
||||
[['4,431', '4,32', '4.32'] => 'is confusingly ambiguous'],
|
||||
[['4,431', '4.32.10', '5.42'] => 'is hard to figure'],
|
||||
[['4,431', '4,32,100', '5.42'] => 'has a mal-formed number'],
|
||||
[['4,431', '4,32,100', '5,42'] => 'is too crazy to work out'],
|
||||
);
|
||||
|
||||
foreach my $tc (@invalid_test_cases) {
|
||||
my @numbers = @{$tc->[0]};
|
||||
my $why_not = $tc->[1];
|
||||
is($style_picker->(@numbers), undef, '"' . join(' ', @numbers) . '" fails because it ' . $why_not);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
done_testing;
|
|
@ -9,25 +9,6 @@ use DDG::Goodie::Calculator; # For function subtests.
|
|||
zci answer_type => 'calc';
|
||||
zci is_cached => 1;
|
||||
|
||||
subtest 'display format selection' => sub {
|
||||
my $ds_name = 'DDG::Goodie::Calculator::display_style';
|
||||
my $ds = \&$ds_name;
|
||||
|
||||
is($ds->('0,013')->{id}, 'euro', '0,013 is euro');
|
||||
is($ds->('4,431', '4.321')->{id}, 'perl', '4,431 and 4.321 is perl');
|
||||
is($ds->('4,431', '4.32')->{id}, 'perl', '4,431 and 4.32 is perl');
|
||||
is($ds->('4,431', '4,32')->{id}, 'euro', '4,431 and 4,32 is euro');
|
||||
is($ds->('4534,345.0', '1',)->{id}, 'perl', '4534,345.0 should have another comma, not enforced; call it perl.');
|
||||
is($ds->('4,431', '4,32', '5,42')->{id}, 'euro', '4,431 and 4,32 and 5,42 is nicely euro-style');
|
||||
is($ds->('4,431', '4.32', '5.42')->{id}, 'perl', '4,431 and 4.32 and 5.42 is nicely perl-style');
|
||||
|
||||
is($ds->('5234534.34.54', '1',), undef, '5234534.34.54 and 1 has a mal-formed number, so we cannot proceed');
|
||||
is($ds->('4,431', '4,32', '4.32'), undef, '4,431 and 4,32 and 4.32 is confusingly ambig; no style');
|
||||
is($ds->('4,431', '4.32.10', '5.42'), undef, '4,431 and 4.32.10 is hard to figure; no style');
|
||||
is($ds->('4,431', '4,32,100', '5.42'), undef, '4,431 and 4,32,100 and 5.42 has a mal-formed number, so no go.');
|
||||
is($ds->('4,431', '4,32,100', '5,42'), undef, '4,431 and 4,32,100 and 5,42 is too crazy to work out; no style');
|
||||
};
|
||||
|
||||
ddg_goodie_test(
|
||||
[qw( DDG::Goodie::Calculator )],
|
||||
'what is 2-2' => test_zci(
|
||||
|
@ -380,6 +361,16 @@ ddg_goodie_test(
|
|||
heading => 'Calculator',
|
||||
html => qr/./,
|
||||
),
|
||||
'4E5 +1 ' => test_zci(
|
||||
'(4 * 10 ^ 5) + 1 = 400,001',
|
||||
heading => 'Calculator',
|
||||
html => qr/./,
|
||||
),
|
||||
'4e5 +1 ' => test_zci(
|
||||
'(4 * 10 ^ 5) + 1 = 400,001',
|
||||
heading => 'Calculator',
|
||||
html => qr/./,
|
||||
),
|
||||
'123.123.123.123/255.255.255.255' => undef,
|
||||
'83.166.167.160/27' => undef,
|
||||
'9 + 0 x 07' => undef,
|
||||
|
|
|
@ -16,36 +16,39 @@ ddg_goodie_test(
|
|||
'convert 0.111 stone to pound' => test_zci('0.111 stone = 1.554 pounds', html => qr/.*/),
|
||||
'3 kilogramme to pound' => test_zci('3 kilograms = 6.614 pounds', html => qr/.*/),
|
||||
'1.3 tonnes to ton' => test_zci('1.3 metric tons = 1.433 tons', html => qr/.*/),
|
||||
'2 tons to kg' => test_zci('2 tons = 1814.372 kilograms', html => qr/.*/),
|
||||
'2 tons to kg' => test_zci('2 tons = 1,814.372 kilograms', html => qr/.*/),
|
||||
'1 ton to kilos' => test_zci('1 ton = 907.186 kilograms', html => qr/.*/),
|
||||
'3.9 oz in g' => test_zci('3.9 ounces = 110.563 grams', html => qr/.*/),
|
||||
'2 miles to km' => test_zci('2 miles = 3.219 kilometers', html => qr/.*/),
|
||||
'convert 5 feet to in' => test_zci('5 feet = 60 inches', html => qr/.*/),
|
||||
'0.5 nautical mile to klick' => test_zci('0.5 nautical miles = 0.926 kilometers', html => qr/.*/),
|
||||
'500 miles in metres' => test_zci('500 miles = 804672.249 meters', html => qr/.*/),
|
||||
'500 miles in metres' => test_zci('500 miles = 804,672.249 meters', html => qr/.*/),
|
||||
'25 cm in inches' => test_zci('25 centimeters = 9.843 inches', html => qr/.*/),
|
||||
'1760 yards to miles' => test_zci('1760 yards = 1 mile', html => qr/.*/),
|
||||
'3520yards to miles' => test_zci('3520 yards = 2 miles', html => qr/.*/),
|
||||
'1760 yards to miles' => test_zci('1,760 yards = 1 mile', html => qr/.*/),
|
||||
'3520yards to miles' => test_zci('3,520 yards = 2 miles', html => qr/.*/),
|
||||
'3.5e-2 miles to inches' => test_zci('3.5 * 10^-2 miles = 2,217.602 inches', html => qr/.*/),
|
||||
'convert 1stone to lbs' => test_zci('1 stone = 14 pounds', html => qr/.*/),
|
||||
'30cm in in' => test_zci('30 centimeters = 11.811 inches', html => qr/.*/),
|
||||
'36 months to years' => test_zci('36 months = 3 years', html => qr/.*/),
|
||||
'43200 seconds in hours' => test_zci('43200 seconds = 12 hours', html => qr/.*/),
|
||||
'43200 seconds in hours' => test_zci('43,200 seconds = 12 hours', html => qr/.*/),
|
||||
'4 hours to minutes' => test_zci('4 hours = 240 minutes', html => qr/.*/),
|
||||
'convert 5 kelvin to fahrenheit' => test_zci('5 kelvin = -450.670 degrees fahrenheit', html => qr/.*/),
|
||||
'1 bar to pascal' => test_zci('1 bar = 100000 pascals', html => qr/.*/),
|
||||
'1 bar to pascal' => test_zci('1 bar = 100,000 pascals', html => qr/.*/),
|
||||
'1 kilopascal to psi' => test_zci('1 kilopascal = 0.145 pounds per square inch', html => qr/.*/),
|
||||
'1 atm to kpa' => test_zci('1 atmosphere = 101.325 kilopascals', html => qr/.*/),
|
||||
'5yrds to km' => test_zci('5 yards = 0.005 kilometers', html => qr/.*/),
|
||||
'12" to cm' => test_zci('12 inches = 30.480 centimeters', html => qr/.*/),
|
||||
'convert 25 inches into feet' => test_zci('25 inches = 2.083 feet', html => qr/.*/),
|
||||
'42 kilowatt hours in joules' => test_zci('42 kilowatt-hours = 1.51e+08 joules', html => qr/.*/),
|
||||
'2500kcal in tons of tnt' => test_zci('2500 large calories = 0.003 tons of TNT', html => qr/.*/),
|
||||
'90 ps in watts' => test_zci('90 metric horsepower = 66194.888 watts', html => qr/.*/),
|
||||
'1 gigawatt in horsepower' => test_zci('1 gigawatt = 1.34e+06 horsepower', html => qr/.*/),
|
||||
'42 kilowatt hours in joules' => test_zci('42 kilowatt-hours = 1.51 * 10^8 joules', html => qr/.*/),
|
||||
'2500kcal in tons of tnt' => test_zci('2,500 large calories = 0.003 tons of TNT', html => qr/.*/),
|
||||
'90 ps in watts' => test_zci('90 metric horsepower = 66,194.888 watts', html => qr/.*/),
|
||||
'1 gigawatt in horsepower' => test_zci('1 gigawatt = 1.34 * 10^6 horsepower', html => qr/.*/),
|
||||
'180 degrees in radians' => test_zci('180 degrees = 3.142 radians', html => qr/.*/),
|
||||
'270 degrees in quadrants' => test_zci('270 degrees = 3 quadrants', html => qr/.*/),
|
||||
'180 degrees in grads' => test_zci('180 degrees = 200 gradians', html => qr/.*/),
|
||||
'45 newtons to pounds force' => test_zci('45 newtons = 10.116 pounds force', html => qr/.*/),
|
||||
'4E5 newtons to pounds force' => test_zci('4 * 10^5 newtons = 89,923.577 pounds force', html => qr/.*/),
|
||||
'4,1E5 newtons to pounds force' => test_zci('4,1 * 10^5 newtons = 92.171,667 pounds force', html => qr/.*/),
|
||||
'8 poundal to newtons' => test_zci('8 poundals = 1.106 newtons', html => qr/.*/),
|
||||
'convert 5 f to celsius' => test_zci('5 degrees fahrenheit = -15 degrees celsius', html => qr/.*/),
|
||||
'6^2 oz to grams' => undef,
|
||||
|
@ -59,28 +62,33 @@ ddg_goodie_test(
|
|||
'use a ton of stones' => undef,
|
||||
'shoot onself in the foot' => undef,
|
||||
'foot in both camps' => undef,
|
||||
'10 mg to tons' => test_zci('10 milligrams = 1.1e-08 tons', html => qr/.*/),
|
||||
'10000 minutes in microseconds' => test_zci('10000 minutes = 6e+11 microseconds', html => qr/.*/),
|
||||
'10 mg to tons' => test_zci('10 milligrams = 1.1 * 10^-8 tons', html => qr/.*/),
|
||||
'10000 minutes in microseconds' => test_zci('10,000 minutes = 6 * 10^11 microseconds', html => qr/.*/),
|
||||
'convert 5 bytes to bit' => test_zci('5 bytes = 40 bits', html => qr/.*/),
|
||||
'5 GB to megabyte' => test_zci('5 gigabytes = 5000 megabytes', html => qr/.*/),
|
||||
'0.013 mb in bits' => test_zci('0.013 megabytes = 104000 bits', html => qr/.*/),
|
||||
'5 GB to megabyte' => test_zci('5 gigabytes = 5,000 megabytes', html => qr/.*/),
|
||||
'5,0 GB to megabyte' => test_zci('5,0 gigabytes = 5.000 megabytes', html => qr/.*/),
|
||||
'0.013 mb in bits' => test_zci('0.013 megabytes = 104,000 bits', html => qr/.*/),
|
||||
'0,013 mb in bits' => test_zci('0,013 megabytes = 104.000 bits', html => qr/.*/),
|
||||
'1 exabyte to pib' => test_zci('1 exabyte = 888.178 pebibytes', html => qr/.*/),
|
||||
'convert 1 yb to yib' => test_zci('1 yottabyte = 0.827 yobibytes', html => qr/.*/),
|
||||
'16 years in months' => test_zci('16 years = 192 months', html => qr/.*/),
|
||||
'1 year in months' => test_zci('1 year = 12 months', html => qr/.*/),
|
||||
'360 degrees in revolutions' => test_zci('360 degrees = 1 revolution', html => qr/.*/),
|
||||
'convert km to cm' => test_zci('1 kilometer = 100000 centimeters', html => qr/.*/),
|
||||
'3e60 degrees in revolutions' => test_zci('3 * 10^60 degrees = 8.33 * 10^57 revolutions', html => qr/.*/),
|
||||
'convert km to cm' => test_zci('1 kilometer = 100,000 centimeters', html => qr/.*/),
|
||||
'convert 10ms to seconds' => test_zci('10 milliseconds = 0.010 seconds', html => qr/.*/),
|
||||
'what is 1 inch in cm' => test_zci('1 inch = 2.540 centimeters', html => qr/.*/),
|
||||
'what are 10 yards in metres' => test_zci('10 yards = 9.144 meters', html => qr/.*/),
|
||||
'how long is 42 days in mins' => test_zci('42 days = 60480 minutes', html => qr/.*/),
|
||||
'how long is 42 days in mins' => test_zci('42 days = 60,480 minutes', html => qr/.*/),
|
||||
'how much is 40 kelvin in celsius' => test_zci('40 kelvin = -233.150 degrees celsius', html => qr/.*/),
|
||||
'12 degrees Celsius to Fahrenheit' => test_zci('12 degrees celsius = 53.600 degrees fahrenheit', html => qr/.*/),
|
||||
'1 degrees Fahrenheit to celsius' => test_zci('1 degrees fahrenheit = -17.222 degrees celsius', html => qr/.*/),
|
||||
'0 c in k' => test_zci('0 degrees celsius = 273.150 kelvin', html => qr/.*/),
|
||||
'234 f to c' => test_zci('234 degrees fahrenheit = 112.222 degrees celsius', html => qr/.*/),
|
||||
'234 f to k' => test_zci('234 degrees fahrenheit = 385.372 kelvin', html => qr/.*/),
|
||||
'7 milligrams to micrograms' => test_zci('7 milligrams = 7000 micrograms', html => qr /.*/)
|
||||
'metres from 20 yards' => test_zci('20 meters = 21.872 yards', html => qr/.*/),
|
||||
'7 milligrams to micrograms' => test_zci('7 milligrams = 7,000 micrograms', html => qr /.*/),
|
||||
'convert 1 cm to 2 mm' => undef,
|
||||
);
|
||||
|
||||
done_testing;
|
||||
|
|
Loading…
Reference in New Issue