Anagram: allow for multiple word inputs.
- Cache by default; only turn off for "scrambled" results. - Simplify `calc_freq` slightly. - Remove "length" argument, as it doesn't make sense with anagrams. - Cache letter frequency info as dictionary words are examined. - Allow for multi-word queries to match single word anagrams. This still does not allow for multi-word anagrams. Although it would be nice to do so, it adds significant computational complexity.master
parent
3eb11ec20e
commit
791a4005ba
|
@ -1,5 +1,5 @@
|
|||
package DDG::Goodie::Anagram;
|
||||
# ABSTRACT: Returns an anagram based on the word and length of word supplied
|
||||
# ABSTRACT: Returns an anagram based on the supplied query.
|
||||
|
||||
use DDG::Goodie;
|
||||
use List::Util qw( shuffle );
|
||||
|
@ -13,7 +13,7 @@ foreach my $kw (@keywords) {
|
|||
foreach my $con (@connectors) {
|
||||
push @triggers, join(' ', $kw, $con); # anagram for, anagrams of, etc.
|
||||
foreach my $com (@commands) {
|
||||
push @triggers, join(' ', $com, $kw, $con); # find anagram of, show anagrams for, etc
|
||||
push @triggers, join(' ', $com, $kw, $con); # find anagram of, show anagrams for, etc.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,125 +21,96 @@ foreach my $kw (@keywords) {
|
|||
triggers start => @triggers;
|
||||
|
||||
zci answer_type => "anagram";
|
||||
zci is_cached => 0;
|
||||
zci is_cached => 1;
|
||||
|
||||
primary_example_queries "anagram of filter";
|
||||
secondary_example_queries "show anagrams for filter";
|
||||
secondary_example_queries "find anagram for partial men";
|
||||
description "find the anagram(s) of your query";
|
||||
name "Anagram";
|
||||
code_url "https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/Anagram.pm";
|
||||
category "transformations";
|
||||
topics "words_and_games";
|
||||
|
||||
attribution github => ["https://github.com/loganom", 'loganom'],
|
||||
attribution github => ["https://github.com/loganom", 'loganom'],
|
||||
github => ["https://github.com/beardlybread", "beardlybread"],
|
||||
github => ['https://github.com/gdrooid', 'gdrooid'],
|
||||
email => ['gdrooid@openmailbox.org', 'gdrooid'];
|
||||
github => ['https://github.com/gdrooid', 'gdrooid'],
|
||||
email => ['gdrooid@openmailbox.org', 'gdrooid'];
|
||||
|
||||
# Wrap the response in html
|
||||
sub html_output {
|
||||
my ($str, $list) = @_;
|
||||
return "<div class='zci--anagrams'>"
|
||||
."<span class='text--secondary'>$str</span><br/>"
|
||||
."<span class='text--primary'>$list</span>"
|
||||
."</div>";
|
||||
return "<div class='zci--anagrams'>" . "<span class='text--secondary'>$str</span><br/>" . "<span class='text--primary'>$list</span>" . "</div>";
|
||||
}
|
||||
|
||||
# Calculate the frequency of the characters in a string
|
||||
sub calc_freq {
|
||||
my ($str, $ref) = @_;
|
||||
for (split //, $str) {
|
||||
if ($ref->{$_}) {
|
||||
$ref->{$_} += 1;
|
||||
} else {
|
||||
$ref->{$_} = 1;
|
||||
}
|
||||
my ($str) = @_;
|
||||
my %freqs;
|
||||
|
||||
for (split //, lc $str) {
|
||||
$freqs{$_} += 1;
|
||||
}
|
||||
|
||||
return \%freqs;
|
||||
}
|
||||
|
||||
my @words = map { chomp; $_; } share('words')->slurp;
|
||||
my %words = map { chomp; ($_ => undef); } share('words')->slurp; # This will cache letter frequencies as they get used.
|
||||
|
||||
handle remainder => sub {
|
||||
|
||||
my $word = $_;
|
||||
$word =~ s/^"(.*)"$/$1/;
|
||||
|
||||
return unless $word; # Need a word.
|
||||
|
||||
# Do some normalization to allow for multi-word matches.
|
||||
my $match_word = lc $word;
|
||||
$match_word =~ s/[^a-z]//g;
|
||||
return unless $match_word; # Still need a word!
|
||||
|
||||
my $len = length $match_word;
|
||||
|
||||
if ($match_word eq 'voldemort') {
|
||||
return 'Tom Riddle', html => html_output("Anagrams of \"$word\"", 'Tom Riddle');
|
||||
}
|
||||
|
||||
my $query_freq = calc_freq($match_word); # Calculate the letter-freq of the query
|
||||
my @output;
|
||||
|
||||
s/^of\s+(.+)/$1/i;
|
||||
s/^"(.*)"$/$1/;
|
||||
my $len;
|
||||
my $word;
|
||||
my $full_word = 1;
|
||||
my $multiple_words = 0;
|
||||
foreach (keys %words) {
|
||||
if (/^[$match_word]{$len}$/i) {
|
||||
my $w = lc;
|
||||
next if $w eq $match_word; # Skip word if it's the same as the query
|
||||
|
||||
# If the query is of type "word length", where 'length' is not required
|
||||
if (/^([a-zA-Z]+)\s*([0-9]+)?\s*$/) {
|
||||
$word = $1;
|
||||
$word =~ s/\s+$//;
|
||||
$len = length $word;
|
||||
# If looking for anagrams shorter than the word
|
||||
if ($2 && $2 < $len) {
|
||||
$len = $2;
|
||||
$full_word = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$word = $_;
|
||||
$multiple_words = 1;
|
||||
}
|
||||
my $f = $words{$w} // calc_freq($w); # Use cached word letter-freq or calculate new
|
||||
$words{$w} //= $f; # Cache word letter-freq if we didn't have it.
|
||||
|
||||
# Return if there is no word
|
||||
return unless $word;
|
||||
|
||||
if (lc $word eq 'voldemort' and $full_word) {
|
||||
return 'Tom Riddle', html => html_output ("Anagrams of \"$word\"", 'Tom Riddle');
|
||||
}
|
||||
|
||||
# Calculate the frequency of the characters of the query
|
||||
my %query_freq;
|
||||
calc_freq lc $word, \%query_freq;
|
||||
|
||||
unless ($multiple_words) {
|
||||
foreach (@words) {
|
||||
if (/^[$word]{$len}$/i) {
|
||||
my $w = lc;
|
||||
# Skip word if it's the same as the one in the query
|
||||
next if $w eq lc $word;
|
||||
|
||||
# Calculate the frequency of the characters of a word from the list
|
||||
my %f;
|
||||
calc_freq lc, \%f;
|
||||
|
||||
my $is_anagram = 1;
|
||||
for (keys %f) {
|
||||
if ($f{$_} > $query_freq{$_}) {
|
||||
# The frequency of the characters in a word must be equal or
|
||||
# less (for shorter anagrams) than that of the same
|
||||
# character in the query
|
||||
$is_anagram = 0;
|
||||
last;
|
||||
}
|
||||
my $is_anagram = 1;
|
||||
for (keys %$f) {
|
||||
if ($f->{$_} != $query_freq->{$_}) {
|
||||
# The letter-freq in a dictionary word must equal that of the query
|
||||
$is_anagram = 0;
|
||||
last;
|
||||
}
|
||||
push (@output, $_) if $is_anagram;
|
||||
}
|
||||
push(@output, $_) if $is_anagram;
|
||||
}
|
||||
}
|
||||
|
||||
# If the query is multiple words long or there are no anagrams for its
|
||||
# single word, the query will be scrambled, as there is no other possible
|
||||
# response
|
||||
unless (@output) {
|
||||
# Scramble when no anagram can be found.
|
||||
if (!@output) {
|
||||
my $w;
|
||||
do {
|
||||
my @chars = shuffle split (//, $word);
|
||||
$w = join '', @chars;
|
||||
} while ($w eq $word);
|
||||
return $word, html => html_output('Sorry, we found no anagrams for "'.html_enc($word).'". We scrambled it for you:', $w);
|
||||
$w = join '', shuffle split(//, $word);
|
||||
} while ($w eq $match_word);
|
||||
# Do not cache the scrambled versions since the shuffle is random.
|
||||
return $word,
|
||||
html => html_output('Sorry, we found no anagrams for "' . html_enc($word) . '". We scrambled it for you:', $w),
|
||||
is_cached => 0;
|
||||
}
|
||||
|
||||
my $response = join ', ', @output;
|
||||
my $output_str = 'Anagrams of "'.html_enc($word).'"';
|
||||
unless ($full_word) {
|
||||
$output_str .= " of length $len";
|
||||
}
|
||||
my $response = join ', ', sort { $a cmp $b } @output;
|
||||
my $output_str = 'Anagrams of "' . html_enc($word) . '"';
|
||||
|
||||
return $response, html => html_output($output_str, $response);
|
||||
};
|
||||
|
||||
|
|
50
t/Anagram.t
50
t/Anagram.t
|
@ -6,30 +6,46 @@ use Test::More;
|
|||
use DDG::Test::Goodie;
|
||||
|
||||
zci answer_type => 'anagram';
|
||||
zci is_cached => 0;
|
||||
zci is_cached => 1;
|
||||
|
||||
ddg_goodie_test(
|
||||
[qw(DDG::Goodie::Anagram)],
|
||||
'Anagrams for filter' => test_zci('trifle', html => qr/Anagrams of "filter"/),
|
||||
'anagrams of events' => test_zci('Steven', html => qr/Anagrams of "events"/),
|
||||
'anagram of algorithm' => test_zci('logarithm', html => qr/Anagrams of "algorithm"/),
|
||||
'anagrams of favorite' => test_zci('favorite', html => qr/we found no anagrams for "favorite".*scrambled it for you:/),
|
||||
'anagrams for times' => test_zci('emits, items, mites, smite', html => qr/Anagrams of "times"/),
|
||||
'anagram for "Mixing it up"' => test_zci('Mixing it up', html => qr/we found no anagrams for "Mixing it up".*scrambled it for you:/),
|
||||
'show anagram of algorithm' => test_zci('logarithm', html => qr/Anagrams of "algorithm"/),
|
||||
'show anagrams of stop' => test_zci('Post, opts, post, pots, spot, tops', html => qr/Anagrams of "stop"/),
|
||||
'find anagram for stop' => test_zci('Post, opts, post, pots, spot, tops', html => qr/Anagrams of "stop"/),
|
||||
'anagrams of lost' => test_zci('lots, slot', html => qr/Anagrams of "lost"/),
|
||||
'anagram for lost' => test_zci('lots, slot', html => qr/Anagrams of "lost"/),
|
||||
'anagram of lost' => test_zci('lots, slot', html => qr/Anagrams of "lost"/),
|
||||
'anagram of filter' => test_zci('trifle', html => qr/Anagrams of "filter"/),
|
||||
'anagram of Filter' => test_zci('trifle', html => qr/Anagrams of "Filter"/),
|
||||
'anagrams of slot' => test_zci(
|
||||
# Sucessful anagram tests.
|
||||
'Anagrams for filter' => test_zci('trifle', html => qr/Anagrams of "filter"/),
|
||||
'anagrams of events' => test_zci('Steven', html => qr/Anagrams of "events"/),
|
||||
'anagram of algorithm' => test_zci('logarithm', html => qr/Anagrams of "algorithm"/),
|
||||
'show anagram of algorithm' => test_zci('logarithm', html => qr/Anagrams of "algorithm"/),
|
||||
'anagrams for times' => test_zci('emits, items, mites, smite', html => qr/Anagrams of "times"/),
|
||||
'show anagrams of stop' => test_zci('Post, opts, post, pots, spot, tops', html => qr/Anagrams of "stop"/),
|
||||
'find anagram for stop' => test_zci('Post, opts, post, pots, spot, tops', html => qr/Anagrams of "stop"/),
|
||||
'anagrams of lost' => test_zci('lots, slot', html => qr/Anagrams of "lost"/),
|
||||
'anagram for lost' => test_zci('lots, slot', html => qr/Anagrams of "lost"/),
|
||||
'anagram of lost' => test_zci('lots, slot', html => qr/Anagrams of "lost"/),
|
||||
'anagram of filter' => test_zci('trifle', html => qr/Anagrams of "filter"/),
|
||||
'anagram of Filter' => test_zci('trifle', html => qr/Anagrams of "Filter"/),
|
||||
'anagram of "partial men"' => test_zci('Parliament, parliament', html => qr/Anagrams of "partial men"/),
|
||||
'find anagram for partial men' => test_zci('Parliament, parliament', html => qr/Anagrams of "partial men"/),
|
||||
# Uncached scrambles tests.
|
||||
'anagrams of favorite' => test_zci(
|
||||
'favorite',
|
||||
html => qr/we found no anagrams for "favorite".*scrambled it for you:/,
|
||||
is_cached => 0
|
||||
),
|
||||
'anagram for "Mixing it up"' => test_zci(
|
||||
'Mixing it up',
|
||||
html => qr/we found no anagrams for "Mixing it up".*scrambled it for you:/,
|
||||
is_cached => 0
|
||||
),
|
||||
# Full HTML test.
|
||||
'anagrams of slot' => test_zci(
|
||||
'lost, lots',
|
||||
html =>
|
||||
"<div class='zci--anagrams'><span class='text--secondary'>Anagrams of \"slot\"</span><br/><span class='text--primary'>lost, lots</span></div>",
|
||||
),
|
||||
'anagram of' => undef,
|
||||
# No result tests.
|
||||
'anagram of' => undef,
|
||||
'anagrams for ""' => undef,
|
||||
'anagrams for "867-5309"' => undef,
|
||||
);
|
||||
|
||||
done_testing;
|
||||
|
|
Loading…
Reference in New Issue