79 lines
3.0 KiB
Perl
Executable File
79 lines
3.0 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
use utf8;
|
|
use Test::More;
|
|
use Test::Deep;
|
|
use DDG::Test::Goodie;
|
|
|
|
zci answer_type => 'html_entity';
|
|
zci is_cached => 1;
|
|
|
|
sub build_structured_answer {
|
|
my ($title, $subtitle) = @_;
|
|
return {
|
|
data => {
|
|
title => $title,
|
|
subtitle => $subtitle,
|
|
},
|
|
templates => {
|
|
group => 'text'
|
|
}
|
|
};
|
|
}
|
|
|
|
sub build_test {
|
|
my($text, $title, $subtitle) = @_;
|
|
return test_zci($text, structured_answer => build_structured_answer($title, $subtitle));
|
|
}
|
|
|
|
ddg_goodie_test(
|
|
[qw(DDG::Goodie::HTMLEntitiesDecode)],
|
|
|
|
# Simple decimal test
|
|
'html decode !' => build_test("Decoded HTML Entity: !", "!", "HTML Entity Decode: !"),
|
|
# Simple text test
|
|
'html entity &' => build_test("Decoded HTML Entity: &", "&","HTML Entity Decode: &"),
|
|
# Another simple text test
|
|
'decode html for >' => build_test("Decoded HTML Entity: >", ">","HTML Entity Decode: >"),
|
|
# Simple hex test
|
|
'! htmlentity' => build_test("Decoded HTML Entity: !", "!","HTML Entity Decode: !"),
|
|
|
|
# No "&" and ";" in decimal input
|
|
'#36 html decode' => build_test('Decoded HTML Entity: $', '$',"HTML Entity Decode: #36"),
|
|
# Variety in hex queries
|
|
'" decodehtml' => build_test('Decoded HTML Entity: "', '"',"HTML Entity Decode: ""),
|
|
# More variety in hex queries
|
|
'htmlentity for #x3c' => build_test("Decoded HTML Entity: <", "<","HTML Entity Decode: #x3c"),
|
|
|
|
# "¢" succeeds
|
|
'html decode ¢' => build_test("Decoded HTML Entity: ¢", '¢',"HTML Entity Decode: ¢"),
|
|
# "¢" also succeeds (missing back ";" is OK)
|
|
'html decode ¢' => build_test("Decoded HTML Entity: ¢", '¢',"HTML Entity Decode: ¢"),
|
|
# "cent" fails during the regex match because of the missing front "&" (stricter for text to eliminate false positive encoding hits)
|
|
'html decode cent' => undef,
|
|
# "cent;" fails during the regex match for the same reasons as above
|
|
'html decode cent;' => undef,
|
|
|
|
# "" has no visual representation
|
|
'html entity of ' => build_test("Decoded HTML Entity: Unicode control character (no visual representation)", "Unicode control character (no visual representation)","HTML Entity Decode: "),
|
|
|
|
# Querying for "&bunnyrabbit;" should fail
|
|
'html decode &bunnyrabbit;' => undef,
|
|
# Trying to decode "&" should fail (this is an encoding job)
|
|
'html decode &' => undef,
|
|
# Trying to decode apostrophe should fail (decode_entities() unsuccessful)
|
|
'html decode apostrophe' => undef,
|
|
|
|
# natural querying
|
|
'What is the decoded html entity for π?' => build_test("Decoded HTML Entity: π", "π","HTML Entity Decode: π"),
|
|
|
|
# natural querying
|
|
'what is decoded html entity for #960 ?' => build_test("Decoded HTML Entity: π", "π","HTML Entity Decode: #960"),
|
|
# no "html" in query
|
|
'the decoded entity for ō is?' => build_test("Decoded HTML Entity: ō", "ō","HTML Entity Decode: ō"),
|
|
);
|
|
|
|
done_testing;
|