Added unicode char lookup ("unicode æ")

master
Cosimo Streppone 2012-05-05 10:00:23 +02:00 committed by Cosimo Streppone
parent 8fc7f4030b
commit ccaf602190
2 changed files with 35 additions and 6 deletions

View File

@ -8,14 +8,17 @@ use Encode qw/encode_utf8/;
use constant {
CODEPOINT_RE => qr/^ \s* U \+ (?<codepoint> [a-f0-9]{4,6}) \s* $/xi,
NAME_RE => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
CHAR_RE => qr/^ \s* (?<char> .) \s* $}x,
UNICODE_RE => qr/^ unicode \s+ (.+) $/xi,
CODEPOINT => 1,
NAME => 2,
CHAR => 3,
};
triggers query_raw => CODEPOINT_RE;
# Also allows open-ended queries like: "LATIN SMALL LETTER X"
triggers query_raw => qr{^unicode \s+ (.+) $}xi;
triggers query_raw => UNICODE_RE;
zci is_cached => 1;
@ -24,12 +27,13 @@ zci answer_type => "unicode_conversion";
handle sub {
my $term = $_[0];
if ($term =~ m{^unicode \s+ (.+) $}x) {
# Search term starts with "unicode "
if ($term =~ UNICODE_RE) {
return unicode_lookup($1);
}
return codepoint_description($term);
};
}
sub codepoint_description {
my $term = $_[0];
@ -99,6 +103,10 @@ sub input_type ($) {
$input = $+{name};
$type = NAME;
}
elsif ($input =~ CHAR_RE) {
$input = $+{char};
$type = CHAR;
}
return ($input, $type);
}
@ -132,6 +140,10 @@ sub unicode_lookup {
my $cp = char_to_codepoint($char);
$result = codepoint_description($cp);
}
elsif ($type == CHAR) {
my $cp = char_to_codepoint($term);
$result = codepoint_description($cp);
}
return $result;
}

View File

@ -8,11 +8,28 @@ use DDG::Test::Goodie;
zci answer_type => 'unicode_conversion';
zci is_cached => 1;
# Raw query, "U+XXXX"
ddg_goodie_test(
[qw(
DDG::Goodie::Unicode
)],
[qw(DDG::Goodie::Unicode)],
'U+263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
);
# Same should work with the "unicode" start trigger too
ddg_goodie_test(
[qw(DDG::Goodie::Unicode)],
'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
);
# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX")
ddg_goodie_test(
[qw(DDG::Goodie::Unicode)],
"unicode White Smiling Face" => test_zci("\x{263A} U+263A BLACK SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
);
# Lookup by character, "unicode à"
ddg_goodie_test(
[qw(DDG::Goodie::Unicode)],
"unicode \x{263B}" => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
);
done_testing;