zeroclickinfo-goodies/lib/DDG/Goodie/UnicodeFuzzySearch.pm

package DDG::Goodie::UnicodeFuzzySearch;
# ABSTRACT: returns unicode symbols matching the input

use DDG::Goodie;

triggers startend => "unicode";

zci is_cached => 1;

attribution
    github => "konr",
    twitter => "konr",
    web => "http://konr.mobi";
primary_example_queries 'unicode black heart';
secondary_example_queries "unicode 2665";

name 'Reverse Unicode Search';
description 'returns unicode symbols matching the input';
code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/UnicodeReverse.pm';
category 'computing_info';
topics 'programming';

# UnicodeData.txt is a semicolon-separated file.
# Uploaded file version: 6.3.0, obtained from
# ftp://ftp.unicode.org/Public/6.3.0/ucd/
my @lines = split /\n/, share("UnicodeData.txt")->slurp;

handle remainder => sub {
    return unless $_;
    my $pattern = uc join('.*', $_);

    # 1st column = number ; 2nd column = name. See
    # http://www.unicode.org/draft/ucd/UnicodeData.html
    my @matches;

    # AS FUZZY AS POSSIBLE BUT NOT MORE - It's either (a) number (no ';'
    # before) or (b) part of the name or, when there are way too many
    # matches for the result to be helpful, (c) isolated words.
    @matches = grep { /^[^;]*;?[^;]*$pattern/ } @lines;
    @matches = grep { /\b$pattern\b/ } @lines if (scalar @matches >= 50);

    return unless (scalar @matches > 0 && scalar @matches < 50);

    @matches = map {
        (my $code, my $name) = split /;/;
        {symbol => chr hex $code,
         code => $code,
         name => $name};
    } @matches;

    my @results = map {sprintf('%s: %s (U+%s)', @{$_}{qw/name symbol code/})} @matches;

    my $html = scalar @results > 1 ?
		'<ul>' . join('', map {"<li>$_</li>"} @results) . '</ul>' : $results[0];

    return join("\n", @results), html => $html;

};

1;
renamed goodie, upgraded the code and made a test file 2013-05-06 21:26:26 -07:00			`package DDG::Goodie::UnicodeFuzzySearch;`
My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:55:21 -07:00			`# ABSTRACT: returns unicode symbols matching the input`

			`use DDG::Goodie;`

			`triggers startend => "unicode";`

			`zci is_cached => 1;`

			`attribution`
			`github => "konr",`
			`twitter => "konr",`
			`web => "http://konr.mobi";`
			`primary_example_queries 'unicode black heart';`
			`secondary_example_queries "unicode 2665";`

			`name 'Reverse Unicode Search';`
			`description 'returns unicode symbols matching the input';`
			`code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/UnicodeReverse.pm';`
My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:57:34 -07:00			`category 'computing_info';`
			`topics 'programming';`
My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:55:21 -07:00
slurp on init, not handle 2013-05-12 13:28:11 -07:00			`# UnicodeData.txt is a semicolon-separated file.`
			`# Uploaded file version: 6.3.0, obtained from`
			`# ftp://ftp.unicode.org/Public/6.3.0/ucd/`
			`my @lines = split /\n/, share("UnicodeData.txt")->slurp;`

My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:55:21 -07:00			`handle remainder => sub {`
			`return unless $_;`
15->50 matches, at most, for the results to be valid. Sigma and Delta, eg, are now findable. 2013-05-02 17:06:11 -07:00			`my $pattern = uc join('.*', $_);`
renamed goodie, upgraded the code and made a test file 2013-05-06 21:26:26 -07:00
			`# 1st column = number ; 2nd column = name. See`
			`# http://www.unicode.org/draft/ucd/UnicodeData.html`
			`my @matches;`

			`# AS FUZZY AS POSSIBLE BUT NOT MORE - It's either (a) number (no ';'`
			`# before) or (b) part of the name or, when there are way too many`
			`# matches for the result to be helpful, (c) isolated words.`
			`@matches = grep { /^[^;];?[^;]$pattern/ } @lines;`
			`@matches = grep { /\b$pattern\b/ } @lines if (scalar @matches >= 50);`
My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:55:21 -07:00
15->50 matches, at most, for the results to be valid. Sigma and Delta, eg, are now findable. 2013-05-02 17:06:11 -07:00			`return unless (scalar @matches > 0 && scalar @matches < 50);`
My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:55:21 -07:00
			`@matches = map {`
			`(my $code, my $name) = split /;/;`
			`{symbol => chr hex $code,`
			`code => $code,`
			`name => $name};`
			`} @matches;`

			`my @results = map {sprintf('%s: %s (U+%s)', @{$_}{qw/name symbol code/})} @matches;`

show multiple results in list format 2013-05-12 13:41:11 -07:00			`my $html = scalar @results > 1 ?`
			`'<ul>' . join('', map {"<li>$_</li>"} @results) . '</ul>' : $results[0];`

			`return join("\n", @results), html => $html;`
My first plugin that helps finding out unicode symbols is ready to go! 2013-05-02 15:55:21 -07:00
			`};`

			`1;`