Added unicode char lookup ("unicode æ")
parent
8fc7f4030b
commit
ccaf602190
|
@ -8,14 +8,17 @@ use Encode qw/encode_utf8/;
|
|||
use constant {
|
||||
CODEPOINT_RE => qr/^ \s* U \+ (?<codepoint> [a-f0-9]{4,6}) \s* $/xi,
|
||||
NAME_RE => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
|
||||
CHAR_RE => qr/^ \s* (?<char> .) \s* $}x,
|
||||
UNICODE_RE => qr/^ unicode \s+ (.+) $/xi,
|
||||
CODEPOINT => 1,
|
||||
NAME => 2,
|
||||
CHAR => 3,
|
||||
};
|
||||
|
||||
triggers query_raw => CODEPOINT_RE;
|
||||
|
||||
# Also allows open-ended queries like: "LATIN SMALL LETTER X"
|
||||
triggers query_raw => qr{^unicode \s+ (.+) $}xi;
|
||||
triggers query_raw => UNICODE_RE;
|
||||
|
||||
zci is_cached => 1;
|
||||
|
||||
|
@ -24,12 +27,13 @@ zci answer_type => "unicode_conversion";
|
|||
handle sub {
|
||||
my $term = $_[0];
|
||||
|
||||
if ($term =~ m{^unicode \s+ (.+) $}x) {
|
||||
# Search term starts with "unicode "
|
||||
if ($term =~ UNICODE_RE) {
|
||||
return unicode_lookup($1);
|
||||
}
|
||||
|
||||
return codepoint_description($term);
|
||||
};
|
||||
}
|
||||
|
||||
sub codepoint_description {
|
||||
my $term = $_[0];
|
||||
|
@ -99,6 +103,10 @@ sub input_type ($) {
|
|||
$input = $+{name};
|
||||
$type = NAME;
|
||||
}
|
||||
elsif ($input =~ CHAR_RE) {
|
||||
$input = $+{char};
|
||||
$type = CHAR;
|
||||
}
|
||||
|
||||
return ($input, $type);
|
||||
}
|
||||
|
@ -132,6 +140,10 @@ sub unicode_lookup {
|
|||
my $cp = char_to_codepoint($char);
|
||||
$result = codepoint_description($cp);
|
||||
}
|
||||
elsif ($type == CHAR) {
|
||||
my $cp = char_to_codepoint($term);
|
||||
$result = codepoint_description($cp);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
|
23
t/Unicode.t
23
t/Unicode.t
|
@ -8,11 +8,28 @@ use DDG::Test::Goodie;
|
|||
zci answer_type => 'unicode_conversion';
|
||||
zci is_cached => 1;
|
||||
|
||||
# Raw query, "U+XXXX"
|
||||
ddg_goodie_test(
|
||||
[qw(
|
||||
DDG::Goodie::Unicode
|
||||
)],
|
||||
[qw(DDG::Goodie::Unicode)],
|
||||
'U+263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: ☺, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
|
||||
);
|
||||
|
||||
# Same should work with the "unicode" start trigger too
|
||||
ddg_goodie_test(
|
||||
[qw(DDG::Goodie::Unicode)],
|
||||
'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: ☻, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
|
||||
);
|
||||
|
||||
# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX")
|
||||
ddg_goodie_test(
|
||||
[qw(DDG::Goodie::Unicode)],
|
||||
"unicode White Smiling Face" => test_zci("\x{263A} U+263A BLACK SMILING FACE, decimal: 9786, HTML: ☺, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
|
||||
);
|
||||
|
||||
# Lookup by character, "unicode à"
|
||||
ddg_goodie_test(
|
||||
[qw(DDG::Goodie::Unicode)],
|
||||
"unicode \x{263B}" => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: ☻, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
|
||||
);
|
||||
|
||||
done_testing;
|
||||
|
|
Loading…
Reference in New Issue