Merge pull request #1883 from tagawa/tagawa/unicode

Unicode: Added UTF triggers
master
Rob Emery 2015-12-12 20:11:02 +00:00
commit 7a28bc577d
4 changed files with 27 additions and 5 deletions

View File

@ -10,7 +10,7 @@ use Encode qw/encode_utf8/;
attribution github => ['cosimo', 'Cosimo Streppone'];
primary_example_queries 'U+590c';
secondary_example_queries 'unicode white smiling face';
secondary_example_queries 'unicode white smiling face', 'utf-8 smile';
description 'get information about a unicode character';
code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/Unicode.pm';
name 'Unicode';
@ -22,7 +22,7 @@ use constant {
CODEPOINT_RE => qr/^ \s* (?:U \+|\\(?:u|x{(?=.*}))) (?<codepoint> [a-f0-9]{4,6})}? \s* $/xi,
NAME_RE => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
CHAR_RE => qr/^ \s* (?<char> .) \s* $/x,
UNICODE_RE => qr/^ unicode \s+ (.+) $/xi,
UNICODE_RE => qr/^ (?:unicode|utf-(?:8|16|32)) \s+ (.+) $/xi,
CODEPOINT => 1,
NAME => 2,
CHAR => 3,

View File

@ -5,7 +5,7 @@ use strict;
use DDG::Goodie;
use URI::Escape::XS;
triggers startend => "unicode", "emoji";
triggers startend => "unicode", "emoji", "utf-8", "utf-16", "utf-32";
zci is_cached => 1;

View File

@ -20,9 +20,18 @@ ddg_goodie_test(
# Same should work with the "unicode" start trigger too
'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX")
# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX"
"unicode White Smiling Face" => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
# Lookup by name, "utf-8 bullet"
"utf-8 bullet" => test_zci("\x{2022} U+2022 BULLET, decimal: 8226, HTML: &#8226;, UTF-8: 0xE2 0x80 0xA2, block: General Punctuation"),
# Lookup by name, "utf-16 smile"
"utf-16 smile" => test_zci("\x{2323} U+2323 SMILE, decimal: 8995, HTML: &#8995;, UTF-8: 0xE2 0x8C 0xA3, block: Miscellaneous Technical"),
# Lookup by name, "utf-32 custard"
"utf-32 custard" => test_zci("\x{1F36E} U+1F36E CUSTARD, decimal: 127854, HTML: &#127854;, UTF-8: 0xF0 0x9F 0x8D 0xAE, block: Miscellaneous Symbols And Pictographs"),
# Lookup by character, "unicode à"
"unicode \x{263B}" => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
@ -31,7 +40,8 @@ ddg_goodie_test(
'\x{2764}' => test_zci("\x{2764} U+2764 HEAVY BLACK HEART, decimal: 10084, HTML: &#10084;, UTF-8: 0xE2 0x9D 0xA4, block: Dingbats"),
'unicode unknown' => undef
'unicode unknown' => undef,
'utf-15 bullet' => undef
);
done_testing;

View File

@ -39,6 +39,18 @@ ROTATED HEAVY BLACK HEART BULLET: \x{2765} (U+2765)",
'unicode 2665' =>
test_zci("BLACK HEART SUIT: \x{2665} (U+2665)",
html => "BLACK HEART SUIT: \x{2665} (U+2665)"),
# ------
'utf-8 2666' =>
test_zci("BLACK DIAMOND SUIT: \x{2666} (U+2666)",
html => "BLACK DIAMOND SUIT: \x{2666} (U+2666)"),
# ------
'utf-16 black diamond suit' =>
test_zci("BLACK DIAMOND SUIT: \x{2666} (U+2666)",
html => "BLACK DIAMOND SUIT: \x{2666} (U+2666)"),
# ------
'utf-32 black diamond suit' =>
test_zci("BLACK DIAMOND SUIT: \x{2666} (U+2666)",
html => "BLACK DIAMOND SUIT: \x{2666} (U+2666)"),
# -- emoji : lower bound
'unicode cyclone' =>
test_zci(