66 lines
4.0 KiB
Perl
66 lines
4.0 KiB
Perl
package DDG::Goodie::HTMLEntitiesDecode;
|
|
# ABSTRACT: Decode HTML Entities.
|
|
# HTML Entity Encoding has been moved to a separate module
|
|
|
|
use DDG::Goodie;
|
|
use HTML::Entities 'decode_entities';
|
|
use Unicode::UCD 'charinfo';
|
|
use warnings;
|
|
use strict;
|
|
|
|
zci answer_type => 'html_entity';
|
|
zci is_cached => 1;
|
|
triggers any => 'html', 'entity', 'htmldecode', 'decodehtml', 'htmlentity';
|
|
primary_example_queries 'html decode !', 'html decode &';
|
|
secondary_example_queries 'html entity !' , '#36 decode html', 'what is the decoded html entity of $';
|
|
description 'Decode HTML entities';
|
|
name 'HTMLEntitiesDecode';
|
|
code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/HTMLEntitiesDecode.pm';
|
|
category 'computing_tools';
|
|
topics 'programming';
|
|
attribution twitter => 'crazedpsyc',
|
|
cpan => 'CRZEDPSYC' ,
|
|
twitter => ['https://twitter.com/nshanmugham', 'Nishanth Shanmugham'],
|
|
web => ['http://nishanths.github.io', 'Nishanth Shanmugham'],
|
|
github => ['https://github.com/nishanths', 'Nishanth Shanmugham'];
|
|
|
|
handle remainder => sub {
|
|
$_ =~ s/^\s+|\s+$//g; # remove front and back whitespace
|
|
$_ =~ s/(\bwhat\s*is\s*(the)?)//ig; # remove "what is the" (optional: the)
|
|
$_ =~ s/\b(the|for|of|is|entity|decode|decoded|code|character)\b//ig; # remove filler words
|
|
$_ =~ s/^\s+|\s+$//g; # remove front and back whitespace that existed in between that may show up after removing the filler words
|
|
$_ =~ s/\s*\?$//g; # remove ending question mark
|
|
return unless ((/^(&?#(?:[0-9]+(?!_))+;?)$/) || (/^(&(?:[a-zA-Z]+(?!_))+;?)$/) || (/^(&?#[xX](?:[0-9A-Fa-f]+(?!_))+;?)$/)); # decimal (') || text with no underscores (¢) || hex (')
|
|
# "&" optional for all
|
|
# ";" optional except in text type
|
|
# "?" optional: question-like queries
|
|
|
|
# Standardize the query so it works well with library decoding functions
|
|
my $entity = $1;
|
|
$entity =~ s/^&?/&/; # append '&' at the front
|
|
$entity =~ s/;?$/;/; # append ';' at the back
|
|
|
|
# Attempt to decode, exit if unsuccessful
|
|
my $decoded = decode_entities($entity); # decode_entities() returns the input if unsuccesful
|
|
my $decimal = ord($decoded);
|
|
my $hex = sprintf("%04x", $decimal);
|
|
return if (lc $entity eq lc $decoded); # safety net -- makes trying to decode something not real like "&enchantedbunny;" fail
|
|
|
|
# If invisible character, provide link instead of displaying it
|
|
my $info = charinfo($decimal); # charinfo() returns undef if input is not a "real" character
|
|
return unless (defined $info); # another safety net
|
|
if ($$info{name} eq '<control>') {
|
|
$decoded = "Unicode control character (no visual representation)";
|
|
$entity = "<a href='https://en.wikipedia.org/wiki/Unicode_control_characters'>Unicode control character</a> (no visual representation)";
|
|
} elsif(substr($$info{category},0,1) eq 'C') {
|
|
$decoded = "Special character (no visual representation)";
|
|
$entity = "<a href='https://en.wikipedia.org/wiki/Special_characters'>Special character (no visual representation)";
|
|
}
|
|
|
|
# Make answer
|
|
return "Decoded HTML Entity: $decoded, Decimal: $decimal, Hexadecimal: $hex",
|
|
html => qq(<div class="zci--htmlentitiesdecode"><div class="large"><span class="text--secondary">Decoded HTML Entity: </span><span class="text--primary">$entity</span></div><div class="small"><span class="text--secondary">Decimal: <span class="text--primary">$decimal</span>, Hexadecimal: <span class="text--primary">$hex</span></div></div></div>);
|
|
};
|
|
|
|
1;
|