Merge branch 'pr/174'

master
Dylan Lloyd 2013-05-13 09:07:07 -04:00
commit ec280b5ebe
4 changed files with 24528 additions and 4 deletions

View File

@ -46,6 +46,7 @@ handle sub {
sub codepoint_description {
my $term = $_[0];
return unless $term;
if ($term !~ m{([a-f0-9]+)}i) {
return;
@ -61,15 +62,15 @@ sub codepoint_description {
my $s = $i{script};
$s =~ tr/_/ /;
if ($s ne 'Common' && $s ne 'Inherited' && $s ne 'Unknown'
&& $i{name} !~ /$s/i) {
&& $i{name} !~ /$s/i) {
$extra{script} = $i{script};
}
}
$extra{decimal} = $c;
$extra{HTML} = substr($i{category},0,1) eq 'C' ? "No visual representation" : "&#$c;";
$extra{'UTF-8'} = join ' ',
map { sprintf '0x%02X', ord $_ }
split //, encode_utf8(chr($c));
map { sprintf '0x%02X', ord $_ }
split //, encode_utf8(chr($c));
if ($i{decomposition}) {
($extra{decomposition} = $i{decomposition}) =~ s/\b(?<!<)([0-9a-fA-F]{4,6})\b(?!>)/U+$1/g;
@ -118,7 +119,7 @@ sub input_type ($) {
}
return ($input, $type);
}
}
sub name_to_char {
my $name = $_[0];

View File

@ -0,0 +1,60 @@
package DDG::Goodie::UnicodeFuzzySearch;
# ABSTRACT: returns unicode symbols matching the input
use DDG::Goodie;
triggers startend => "unicode";
zci is_cached => 1;
attribution
github => "konr",
twitter => "konr",
web => "http://konr.mobi";
primary_example_queries 'unicode black heart';
secondary_example_queries "unicode 2665";
name 'Reverse Unicode Search';
description 'returns unicode symbols matching the input';
code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/UnicodeReverse.pm';
category 'computing_info';
topics 'programming';
# UnicodeData.txt is a semicolon-separated file.
# Uploaded file version: 6.3.0, obtained from
# ftp://ftp.unicode.org/Public/6.3.0/ucd/
my @lines = split /\n/, share("UnicodeData.txt")->slurp;
handle remainder => sub {
return unless $_;
my $pattern = uc join('.*', $_);
# 1st column = number ; 2nd column = name. See
# http://www.unicode.org/draft/ucd/UnicodeData.html
my @matches;
# AS FUZZY AS POSSIBLE BUT NOT MORE - It's either (a) number (no ';'
# before) or (b) part of the name or, when there are way too many
# matches for the result to be helpful, (c) isolated words.
@matches = grep { /^[^;]*;?[^;]*$pattern/ } @lines;
@matches = grep { /\b$pattern\b/ } @lines if (scalar @matches >= 50);
return unless (scalar @matches > 0 && scalar @matches < 50);
@matches = map {
(my $code, my $name) = split /;/;
{symbol => chr hex $code,
code => $code,
name => $name};
} @matches;
my @results = map {sprintf('%s: %s (U+%s)', @{$_}{qw/name symbol code/})} @matches;
my $html = scalar @results > 1 ?
'<ul>' . join('', map {"<li>$_</li>"} @results) . '</ul>' : $results[0];
return join("\n", @results), html => $html;
};
1;

File diff suppressed because it is too large Load Diff

34
t/UnicodeFuzzySearch.t Normal file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
use DDG::Test::Goodie;
zci is_cached => 1;
zci answer_type => "unicodefuzzysearch";
ddg_goodie_test(
[qw(
DDG::Goodie::UnicodeFuzzySearch
)],
# ------
"unicode white heart" =>
test_zci("WHITE HEART SUIT: \x{2661} (U+2661)", html => "WHITE HEART SUIT: \x{2661} (U+2661)"),
"0f00 unicode" =>
test_zci("TIBETAN SYLLABLE OM: \x{0f00} (U+0F00)", html => ("TIBETAN SYLLABLE OM: \x{0f00} (U+0F00)")),
# ------
"unicode snowman" =>
test_zci("SNOWMAN: \x{2603} (U+2603)
SNOWMAN WITHOUT SNOW: \x{26c4} (U+26C4)
BLACK SNOWMAN: \x{26c7} (U+26C7)",
html => "<ul><li>SNOWMAN: \x{2603} (U+2603)</li><li>SNOWMAN WITHOUT SNOW: \x{26c4} (U+26C4)</li><li>BLACK SNOWMAN: \x{26c7} (U+26C7)</li></ul>"),
# ------
"sharp s unicode" =>
test_zci("LATIN SMALL LETTER SHARP S: \x{00df} (U+00DF)
LATIN CAPITAL LETTER SHARP S: \x{1e9e} (U+1E9E)
MUSIC SHARP SIGN: \x{266f} (U+266F)",
html => "<ul><li>LATIN SMALL LETTER SHARP S: \x{00df} (U+00DF)</li><li>LATIN CAPITAL LETTER SHARP S: \x{1e9e} (U+1E9E)</li><li>MUSIC SHARP SIGN: \x{266f} (U+266F)</li></ul>"),
);
done_testing;