2014-06-23 22:12:47 -07:00
#!/usr/bin/env perl
use strict ;
use warnings ;
use Test::More ;
use DDG::Test::Goodie ;
zci answer_type = > 'html_entity' ;
2014-09-27 06:42:57 -07:00
zci is_cached = > 1 ;
2014-06-23 22:12:47 -07:00
ddg_goodie_test (
[ qw( DDG::Goodie::HTMLEntitiesDecode ) ] ,
2014-10-28 11:36:15 -07:00
# Simple decimal test
2014-07-01 13:01:09 -07:00
'html decode !' = > test_zci ( "Decoded HTML Entity: !, Decimal: 33, Hexadecimal: 0021" , html = > qr/!/ ) ,
2014-10-28 11:36:15 -07:00
# Simple text test
2014-07-01 13:01:09 -07:00
'html entity &' = > test_zci ( "Decoded HTML Entity: &, Decimal: 38, Hexadecimal: 0026" , html = > qr/&/ ) ,
2014-06-23 22:12:47 -07:00
# Another simple text test
2014-07-01 13:01:09 -07:00
'decode html for >' = > test_zci ( "Decoded HTML Entity: >, Decimal: 62, Hexadecimal: 003e" , html = > qr/>/ ) ,
2014-10-28 11:36:15 -07:00
# Simple hex test
2014-07-01 13:01:09 -07:00
'! htmlentity' = > test_zci ( "Decoded HTML Entity: !, Decimal: 33, Hexadecimal: 0021" , html = > qr/!/ ) ,
2014-06-23 22:12:47 -07:00
# No "&" and ";" in decimal input
2014-07-01 13:01:09 -07:00
'#36 html decode' = > test_zci ( "Decoded HTML Entity: \$, Decimal: 36, Hexadecimal: 0024" , html = > qr/$/ ) ,
2014-06-23 22:12:47 -07:00
# Variety in hex queries
2014-07-01 13:01:09 -07:00
'" decodehtml' = > test_zci ( "Decoded HTML Entity: \", Decimal: 34, Hexadecimal: 0022" , html = > qr/"/ ) ,
2014-06-23 22:12:47 -07:00
# More variety in hex queries
2014-07-01 13:01:09 -07:00
'htmlentity for #x3c' = > test_zci ( "Decoded HTML Entity: <, Decimal: 60, Hexadecimal: 003c" , html = > qr/</ ) ,
2014-06-23 22:12:47 -07:00
# "¢" succeeds
2014-07-01 13:01:09 -07:00
'html decode ¢' = > test_zci ( qr/ Decimal: 162, Hexadecimal: 00a2/ , html = > qr/¢/ ) ,
2014-06-23 22:12:47 -07:00
# "¢" also succeeds (missing back ";" is OK)
2014-07-01 13:01:09 -07:00
'html decode ¢' = > test_zci ( qr/ Decimal: 162, Hexadecimal: 00a2/ , html = > qr/¢/ ) ,
2014-06-23 22:12:47 -07:00
# "cent" fails during the regex match because of the missing front "&" (stricter for text to eliminate false positive encoding hits)
'html decode cent' = > undef ,
# "cent;" fails during the regex match for the same reasons as above
2014-10-28 11:36:15 -07:00
'html decode cent;' = > undef ,
2014-06-23 22:12:47 -07:00
# "" has no visual representation
2014-07-01 13:01:09 -07:00
'html entity of ' = > test_zci ( "Decoded HTML Entity: Unicode control character (no visual representation), Decimal: 20, Hexadecimal: 0014" , html = > qr/Unicode control character/ ) ,
2014-10-28 11:36:15 -07:00
2014-06-23 22:12:47 -07:00
# Querying for "&bunnyrabbit;" should fail
'html decode &bunnyrabbit;' = > undef ,
# Trying to decode "&" should fail (this is an encoding job)
'html decode &' = > undef ,
# Trying to decode apostrophe should fail (decode_entities() unsuccessful)
'html decode apostrophe' = > undef ,
2014-06-25 21:28:20 -07:00
# natural querying
2014-07-01 13:01:09 -07:00
'What is the decoded html entity for π?' = > test_zci ( qr/ Decimal: 960, Hexadecimal: 03c0/ , html = > qr/π/ ) ,
2014-06-25 21:28:20 -07:00
# natural querying
2014-07-01 13:01:09 -07:00
'what is decoded html entity for #960 ?' = > test_zci ( qr/ Decimal: 960, Hexadecimal: 03c0/ , html = > qr/π/ ) ,
2014-06-26 15:06:23 -07:00
# no "html" in query
2014-07-01 13:01:09 -07:00
'the decoded entity for ō is?' = > test_zci ( qr/ Decimal: 333, Hexadecimal: 014d/ , html = > qr/ō/ ) ,
2014-06-23 22:12:47 -07:00
) ;
done_testing ;