From bbb545c4ffa1ac7024555e962d4c37c84023a1c9 Mon Sep 17 00:00:00 2001 From: tagawa Date: Fri, 11 Dec 2015 09:13:42 +0000 Subject: [PATCH] Unicode: Added UTF triggers --- lib/DDG/Goodie/Unicode.pm | 4 ++-- lib/DDG/Goodie/UnicodeFuzzySearch.pm | 2 +- t/Unicode.t | 14 ++++++++++++-- t/UnicodeFuzzySearch.t | 12 ++++++++++++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/lib/DDG/Goodie/Unicode.pm b/lib/DDG/Goodie/Unicode.pm index 6e21198dc..2a2e35b2e 100644 --- a/lib/DDG/Goodie/Unicode.pm +++ b/lib/DDG/Goodie/Unicode.pm @@ -10,7 +10,7 @@ use Encode qw/encode_utf8/; attribution github => ['cosimo', 'Cosimo Streppone']; primary_example_queries 'U+590c'; -secondary_example_queries 'unicode white smiling face'; +secondary_example_queries 'unicode white smiling face', 'utf-8 smile'; description 'get information about a unicode character'; code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/Unicode.pm'; name 'Unicode'; @@ -22,7 +22,7 @@ use constant { CODEPOINT_RE => qr/^ \s* (?:U \+|\\(?:u|x{(?=.*}))) (? [a-f0-9]{4,6})}? \s* $/xi, NAME_RE => qr/^ (? [A-Z][A-Z\s]+) $/xi, CHAR_RE => qr/^ \s* (? .) \s* $/x, - UNICODE_RE => qr/^ unicode \s+ (.+) $/xi, + UNICODE_RE => qr/^ (?:unicode|utf-(?:8|16|32)) \s+ (.+) $/xi, CODEPOINT => 1, NAME => 2, CHAR => 3, diff --git a/lib/DDG/Goodie/UnicodeFuzzySearch.pm b/lib/DDG/Goodie/UnicodeFuzzySearch.pm index 273562163..260e553f4 100644 --- a/lib/DDG/Goodie/UnicodeFuzzySearch.pm +++ b/lib/DDG/Goodie/UnicodeFuzzySearch.pm @@ -5,7 +5,7 @@ use strict; use DDG::Goodie; use URI::Escape::XS; -triggers startend => "unicode", "emoji"; +triggers startend => "unicode", "emoji", "utf-8", "utf-16", "utf-32"; zci is_cached => 1; diff --git a/t/Unicode.t b/t/Unicode.t index bab277f8a..37e111d86 100644 --- a/t/Unicode.t +++ b/t/Unicode.t @@ -20,9 +20,18 @@ ddg_goodie_test( # Same should work with the "unicode" start trigger too 'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: ☻, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"), - # Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX") + # Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX" "unicode White Smiling Face" => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: ☺, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"), + # Lookup by name, "utf-8 bullet" + "utf-8 bullet" => test_zci("\x{2022} U+2022 BULLET, decimal: 8226, HTML: •, UTF-8: 0xE2 0x80 0xA2, block: General Punctuation"), + + # Lookup by name, "utf-16 smile" + "utf-16 smile" => test_zci("\x{2323} U+2323 SMILE, decimal: 8995, HTML: ⌣, UTF-8: 0xE2 0x8C 0xA3, block: Miscellaneous Technical"), + + # Lookup by name, "utf-32 custard" + "utf-32 custard" => test_zci("\x{1F36E} U+1F36E CUSTARD, decimal: 127854, HTML: 🍮, UTF-8: 0xF0 0x9F 0x8D 0xAE, block: Miscellaneous Symbols And Pictographs"), + # Lookup by character, "unicode à" "unicode \x{263B}" => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: ☻, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"), @@ -31,7 +40,8 @@ ddg_goodie_test( '\x{2764}' => test_zci("\x{2764} U+2764 HEAVY BLACK HEART, decimal: 10084, HTML: ❤, UTF-8: 0xE2 0x9D 0xA4, block: Dingbats"), - 'unicode unknown' => undef + 'unicode unknown' => undef, + 'utf-15 bullet' => undef ); done_testing; diff --git a/t/UnicodeFuzzySearch.t b/t/UnicodeFuzzySearch.t index b58d8c1b8..f778ec541 100644 --- a/t/UnicodeFuzzySearch.t +++ b/t/UnicodeFuzzySearch.t @@ -39,6 +39,18 @@ ROTATED HEAVY BLACK HEART BULLET: \x{2765} (U+2765)", 'unicode 2665' => test_zci("BLACK HEART SUIT: \x{2665} (U+2665)", html => "BLACK HEART SUIT: \x{2665} (U+2665)"), + # ------ + 'utf-8 2666' => + test_zci("BLACK DIAMOND SUIT: \x{2666} (U+2666)", + html => "BLACK DIAMOND SUIT: \x{2666} (U+2666)"), + # ------ + 'utf-16 black diamond suit' => + test_zci("BLACK DIAMOND SUIT: \x{2666} (U+2666)", + html => "BLACK DIAMOND SUIT: \x{2666} (U+2666)"), + # ------ + 'utf-32 black diamond suit' => + test_zci("BLACK DIAMOND SUIT: \x{2666} (U+2666)", + html => "BLACK DIAMOND SUIT: \x{2666} (U+2666)"), # -- emoji : lower bound 'unicode cyclone' => test_zci(