Added unicode char lookup ("unicode æ")

2012-05-05 10:00:23 +02:00 · 2012-05-05 10:00:23 +02:00 · ccaf602190
parent 8fc7f4030b
commit ccaf602190
2 changed files with 35 additions and 6 deletions
--- a/lib/DDG/Goodie/Unicode.pm
+++ b/lib/DDG/Goodie/Unicode.pm
@ -8,14 +8,17 @@ use Encode qw/encode_utf8/;
 use constant {
    CODEPOINT_RE => qr/^ \s* U \+ (?<codepoint> [a-f0-9]{4,6}) \s* $/xi,
    NAME_RE      => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
+    CHAR_RE      => qr/^ \s* (?<char> .) \s* $}x,
+    UNICODE_RE   => qr/^ unicode \s+ (.+) $/xi,
    CODEPOINT    => 1,
    NAME         => 2,
+    CHAR         => 3,
 };

 triggers query_raw => CODEPOINT_RE;

 # Also allows open-ended queries like: "LATIN SMALL LETTER X"
-triggers query_raw => qr{^unicode \s+ (.+) $}xi;
+triggers query_raw => UNICODE_RE;

 zci is_cached => 1;

@ -24,12 +27,13 @@ zci answer_type => "unicode_conversion";
 handle sub {
    my $term = $_[0];

-    if ($term =~ m{^unicode \s+ (.+) $}x) {
+    # Search term starts with "unicode "
+    if ($term =~ UNICODE_RE) {
        return unicode_lookup($1);
    }

    return codepoint_description($term);
-};
+}

 sub codepoint_description {
    my $term = $_[0];
@ -99,6 +103,10 @@ sub input_type ($) {
        $input = $+{name};
        $type = NAME;
    }
+    elsif ($input =~ CHAR_RE) {
+        $input = $+{char};
+        $type = CHAR;
+    }

    return ($input, $type);
 }
@ -132,6 +140,10 @@ sub unicode_lookup {
        my $cp = char_to_codepoint($char);
        $result = codepoint_description($cp);
    }
+    elsif ($type == CHAR) {
+        my $cp = char_to_codepoint($term);
+        $result = codepoint_description($cp);
+    }

    return $result;
 }
--- a/t/Unicode.t
+++ b/t/Unicode.t
@ -8,11 +8,28 @@ use DDG::Test::Goodie;
 zci answer_type => 'unicode_conversion';
 zci is_cached => 1;

+# Raw query, "U+XXXX"
 ddg_goodie_test(
-        [qw(
-                DDG::Goodie::Unicode
-        )],
+        [qw(DDG::Goodie::Unicode)],
        'U+263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
 );

+# Same should work with the "unicode" start trigger too
+ddg_goodie_test(
+        [qw(DDG::Goodie::Unicode)],
+        'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
+);
+
+# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX")
+ddg_goodie_test(
+        [qw(DDG::Goodie::Unicode)],
+        "unicode White Smiling Face" => test_zci("\x{263A} U+263A BLACK SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
+);
+
+# Lookup by character, "unicode à"
+ddg_goodie_test(
+        [qw(DDG::Goodie::Unicode)],
+        "unicode \x{263B}" => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
+);
+
 done_testing;