Added unicode char lookup ("unicode æ")
parent
8fc7f4030b
commit
ccaf602190
|
@ -8,14 +8,17 @@ use Encode qw/encode_utf8/;
|
||||||
use constant {
|
use constant {
|
||||||
CODEPOINT_RE => qr/^ \s* U \+ (?<codepoint> [a-f0-9]{4,6}) \s* $/xi,
|
CODEPOINT_RE => qr/^ \s* U \+ (?<codepoint> [a-f0-9]{4,6}) \s* $/xi,
|
||||||
NAME_RE => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
|
NAME_RE => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
|
||||||
|
CHAR_RE => qr/^ \s* (?<char> .) \s* $}x,
|
||||||
|
UNICODE_RE => qr/^ unicode \s+ (.+) $/xi,
|
||||||
CODEPOINT => 1,
|
CODEPOINT => 1,
|
||||||
NAME => 2,
|
NAME => 2,
|
||||||
|
CHAR => 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
triggers query_raw => CODEPOINT_RE;
|
triggers query_raw => CODEPOINT_RE;
|
||||||
|
|
||||||
# Also allows open-ended queries like: "LATIN SMALL LETTER X"
|
# Also allows open-ended queries like: "LATIN SMALL LETTER X"
|
||||||
triggers query_raw => qr{^unicode \s+ (.+) $}xi;
|
triggers query_raw => UNICODE_RE;
|
||||||
|
|
||||||
zci is_cached => 1;
|
zci is_cached => 1;
|
||||||
|
|
||||||
|
@ -24,12 +27,13 @@ zci answer_type => "unicode_conversion";
|
||||||
handle sub {
|
handle sub {
|
||||||
my $term = $_[0];
|
my $term = $_[0];
|
||||||
|
|
||||||
if ($term =~ m{^unicode \s+ (.+) $}x) {
|
# Search term starts with "unicode "
|
||||||
|
if ($term =~ UNICODE_RE) {
|
||||||
return unicode_lookup($1);
|
return unicode_lookup($1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return codepoint_description($term);
|
return codepoint_description($term);
|
||||||
};
|
}
|
||||||
|
|
||||||
sub codepoint_description {
|
sub codepoint_description {
|
||||||
my $term = $_[0];
|
my $term = $_[0];
|
||||||
|
@ -99,6 +103,10 @@ sub input_type ($) {
|
||||||
$input = $+{name};
|
$input = $+{name};
|
||||||
$type = NAME;
|
$type = NAME;
|
||||||
}
|
}
|
||||||
|
elsif ($input =~ CHAR_RE) {
|
||||||
|
$input = $+{char};
|
||||||
|
$type = CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
return ($input, $type);
|
return ($input, $type);
|
||||||
}
|
}
|
||||||
|
@ -132,6 +140,10 @@ sub unicode_lookup {
|
||||||
my $cp = char_to_codepoint($char);
|
my $cp = char_to_codepoint($char);
|
||||||
$result = codepoint_description($cp);
|
$result = codepoint_description($cp);
|
||||||
}
|
}
|
||||||
|
elsif ($type == CHAR) {
|
||||||
|
my $cp = char_to_codepoint($term);
|
||||||
|
$result = codepoint_description($cp);
|
||||||
|
}
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
23
t/Unicode.t
23
t/Unicode.t
|
@ -8,11 +8,28 @@ use DDG::Test::Goodie;
|
||||||
zci answer_type => 'unicode_conversion';
|
zci answer_type => 'unicode_conversion';
|
||||||
zci is_cached => 1;
|
zci is_cached => 1;
|
||||||
|
|
||||||
|
# Raw query, "U+XXXX"
|
||||||
ddg_goodie_test(
|
ddg_goodie_test(
|
||||||
[qw(
|
[qw(DDG::Goodie::Unicode)],
|
||||||
DDG::Goodie::Unicode
|
|
||||||
)],
|
|
||||||
'U+263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: ☺, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
|
'U+263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: ☺, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
# Same should work with the "unicode" start trigger too
|
||||||
|
ddg_goodie_test(
|
||||||
|
[qw(DDG::Goodie::Unicode)],
|
||||||
|
'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: ☻, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
|
||||||
|
);
|
||||||
|
|
||||||
|
# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX")
|
||||||
|
ddg_goodie_test(
|
||||||
|
[qw(DDG::Goodie::Unicode)],
|
||||||
|
"unicode White Smiling Face" => test_zci("\x{263A} U+263A BLACK SMILING FACE, decimal: 9786, HTML: ☺, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
|
||||||
|
);
|
||||||
|
|
||||||
|
# Lookup by character, "unicode à"
|
||||||
|
ddg_goodie_test(
|
||||||
|
[qw(DDG::Goodie::Unicode)],
|
||||||
|
"unicode \x{263B}" => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: ☻, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
|
||||||
|
);
|
||||||
|
|
||||||
done_testing;
|
done_testing;
|
||||||
|
|
Loading…
Reference in New Issue