From 38ec92a4a8ab3515d729d858747253cb9a920592 Mon Sep 17 00:00:00 2001 From: nishanths Date: Tue, 24 Jun 2014 20:19:37 -0700 Subject: [PATCH] cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - accented_chars changes for the future - substitutions in the Encode Goodie are now after the ‘else’ block - regex change to one liners - tests --- lib/DDG/Goodie/HTMLEntitiesDecode.pm | 4 ++-- lib/DDG/Goodie/HTMLEntitiesEncode.pm | 27 ++++++++++------------ share/goodie/htmlentities_encode/style.css | 6 ----- t/HTMLEntitiesEncode.t | 2 +- t/URLEncode.t | 3 +++ 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/lib/DDG/Goodie/HTMLEntitiesDecode.pm b/lib/DDG/Goodie/HTMLEntitiesDecode.pm index bfe907e90..e7605a698 100644 --- a/lib/DDG/Goodie/HTMLEntitiesDecode.pm +++ b/lib/DDG/Goodie/HTMLEntitiesDecode.pm @@ -31,9 +31,9 @@ sub append_css { }; handle remainder => sub { - $_ =~ s/^\s*//g; # remove front whitespace + $_ =~ s/^\s+|\s+$//g; # remove front and back whitespace $_ =~ s/^(for|of)\s+//g; # remove filler words at the start - $_ =~ s/\s*$//g; # remove back whitespace + $_ =~ s/^\s+|\s+$//g; # remove front and back whitespace that existed in between that may show up after removing the filler words return unless ((/^(&?#(?:[0-9]+(?!_))+;?)$/) || (/^(&(?:[a-zA-Z]+(?!_))+;?)$/) || (/^(&?#[xX](?:[0-9A-Fa-f]+(?!_))+;?)$/)); # decimal (') || text with no underscores (¢) || hex (') # "&" optional for all # ";" optional except in text type diff --git a/lib/DDG/Goodie/HTMLEntitiesEncode.pm b/lib/DDG/Goodie/HTMLEntitiesEncode.pm index 76b14130f..3b3ce98e4 100644 --- a/lib/DDG/Goodie/HTMLEntitiesEncode.pm +++ b/lib/DDG/Goodie/HTMLEntitiesEncode.pm @@ -256,38 +256,35 @@ attribution web => ["http://nishanths.github.io", "Nishanth Shanmugham" handle remainder => sub { # General query cleanup - $_ =~ s/^\s*//g; # remove front whitespace - $_ =~ s/\s*$//g; # remove back whitespace + $_ =~ s/^\s+|\s+$//g; # remove front and back whitespace $_ =~ s/^(for|of)\s+//g; # remove filler words at the start (note: this will remove 'for' in "for euro sign", but not 'for' in "formula sign") $_ =~ s/(symbol|sign)//g; # remove 'symbol' and 'sign' - $_ =~ s/^\s*//g; # remove front whitespace again that may show up after removing the words above - $_ =~ s/\s*$//g; # remove back whitespace again that may show up after removing the words above + $_ =~ s/^\s+|\s+$//g; # remove front and back whitespace that existed in between that may show up after removing the words above # Hash-specific query cleanup for better hits my $hashes_query = $_; $hashes_query =~ s/\-/ /g; # change '-' to ' ' - $hashes_query =~ s/"//g; # remove double quote - $hashes_query =~ s/'//g; # remove single quote + $hashes_query =~ s/"|'//g; # remove double and single quotes # Hashes lookup if ($hashes_query) { my $key; my $value; # Query is for accented character - if ($hashes_query =~ /^(a|A|e|E|i|I|o|O|u|U)\s*(grave|acute)$/) { - $hashes_query =~ s/\s*//g; # remove in-between spaces - $key = $hashes_query; # capitalization matters for accented characters lookup + if ($hashes_query =~ /^([a-zA-Z])\s*(grave|acute)$/i) { + $hashes_query = $1 . lc $2; # $1's capitalization matters for accented characters lookup, lc $2 allows for more freedom in queries + $key = $hashes_query; $value = $accented_chars{$key}; # Not an accented character -- lookup the $codes hash instead } else { $key = lc $hashes_query; $value = $codes{$key}; - # Try again after substitutions if there is no hit - unless (defined $value) { - $key =~ s/brackets/bracket/g; - $key =~ s/quotes/quote/g; - $value = $codes{$key}; - } + } + # Try again after substitutions if there is no hit + unless (defined $value) { + $key =~ s/brackets/bracket/g; + $key =~ s/quotes/quote/g; + $value = $codes{$key}; } # Make final answer if (defined $value) { diff --git a/share/goodie/htmlentities_encode/style.css b/share/goodie/htmlentities_encode/style.css index 74dd49a11..3d992e2f3 100644 --- a/share/goodie/htmlentities_encode/style.css +++ b/share/goodie/htmlentities_encode/style.css @@ -1,12 +1,6 @@ .zci--answer .zci--htmlentitiesencode { padding-top: 0.25em; padding-bottom: 0.25em; -} -.zci--answer .zci--htmlentitiesencode .line-single { - font-weight: 300; - font-size: 1.5em; -} -.zci--answer .zci--htmlentitiesencode .line-multiple { font-weight: 300; font-size: 1.5em; } diff --git a/t/HTMLEntitiesEncode.t b/t/HTMLEntitiesEncode.t index b117fdb2f..8243693e2 100644 --- a/t/HTMLEntitiesEncode.t +++ b/t/HTMLEntitiesEncode.t @@ -23,7 +23,7 @@ ddg_goodie_test( 'html entity A-acute' => test_zci("Encoded HTML Entity: Á",html => qr/Aacute/), # Variety in querying accented chars #2 - 'html entity for E grave' => test_zci("Encoded HTML Entity: È", html => qr/Egrave/), + 'html entity for E Grave' => test_zci("Encoded HTML Entity: È", html => qr/Egrave/), # Query is a single typed-in character to encode 'html escape &' => test_zci("Encoded HTML Entity: &", html => qr/amp/), diff --git a/t/URLEncode.t b/t/URLEncode.t index 6479d1671..485077e27 100644 --- a/t/URLEncode.t +++ b/t/URLEncode.t @@ -35,6 +35,9 @@ ddg_goodie_test( 'www.heroku.com/{rawwr!@#$%^&*()+=__} escapeurl' => test_zci("Percent-encoded URL: www.heroku.com%2F%7Brawwr!%40%23%24%25%5E%26*()%2B%3D__%7D", html => qr/www.heroku.com%2F\%7Brawwr\!%40%23%24%25%5E%26\*\(\)%2B%3D__\%7D/), + + 'hello there escapeurl' => test_zci("Percent-encoded URL: hello%20there", + html => qr/hello%20there/), ); done_testing;