diff --git a/lib/DDG/Goodie/Regexp.pm b/lib/DDG/Goodie/Regexp.pm index f98e61de8..0e5841b31 100644 --- a/lib/DDG/Goodie/Regexp.pm +++ b/lib/DDG/Goodie/Regexp.pm @@ -2,6 +2,7 @@ package DDG::Goodie::Regexp; # ABSTRACT: Parse a regexp. use strict; +use warnings; use DDG::Goodie; use Safe; @@ -9,26 +10,88 @@ use Safe; zci answer_type => "regexp"; zci is_cached => 1; -triggers query_lc => qr/^regex[p]? [\/\\](.+?)[\/\\] (.+)$/i; +triggers start => 'regex', 'match', 'regexp'; +triggers any => '=~'; -handle query => sub { - my $regexp = $1; - my $str = $2; +sub compile_re { + my ($re, $modifiers, $compiler) = @_; + $compiler->($re, $modifiers); +} - my $compiler = Safe->new->reval(q{ sub { qr/$_[0]/ } }); +# Using $& causes a performance penalty, apparently. +sub get_full_match { + return substr(shift, $-[0], $+[0] - $-[0]); +} - sub compile_re { - my ( $re, $compiler ) = @_; - $compiler->($re); +# Ensures that the correct numbered matches are being produced. +sub real_number_matches { + my ($one, @numbered) = @_; + # If the first match isn't defined then neither are the others! + return defined $one ? @numbered : (); +} + +sub get_match_record { + my ($regexp, $str, $modifiers) = @_; + my $compiler = Safe->new->reval(q { sub { qr/(?$_[1])$_[0]/ } }) or return; + BEGIN { + $SIG{'__WARN__'} = sub { + warn $_[0] if $_[0] !~ /Use of uninitialized value in regexp compilation/i; + } } - my @results = (); - eval { - @results = $str =~ compile_re($regexp, $compiler); + my @numbered = $str =~ compile_re($regexp, $modifiers, $compiler) or return; + @numbered = real_number_matches($1, @numbered); + my $matches = {}; + $matches->{'Full Match'} = get_full_match($str); + foreach my $match (keys %+) { + $matches->{"Named Capture <$match>"} = $+{$match}; }; + my $i = 1; + foreach my $match (@numbered) { + $matches->{"Subpattern Match $i"} = $match; + $i++; + }; + return $matches; +} - return join( ' | ', @results ), heading => 'Regexp Result' if @results; - return; +my $regex_re = qr/\/(?.+)\/(?i)?/; + +sub extract_regex_text { + my $query = shift; + $query =~ /^(?.+) =~ $regex_re$/; + ($+{regex} && $+{text}) || ($query =~ /^(?:match\s*regexp?|regexp?)\s*$regex_re\s+(?.+)$/); + return unless defined $+{regex} && defined $+{text}; + my $modifiers = $+{modifiers} // ''; + return ($+{regex}, $+{text}, $modifiers); +} + +sub get_match_keys { return sort (keys %{$_[0]}) } + +handle query => sub { + my $query = $_; + my ($regexp, $str, $modifiers) = extract_regex_text($query) or return; + my $matches = get_match_record($regexp, $str, $modifiers) or return; + my @key_order = get_match_keys($matches); + return unless $matches->{'Full Match'} ne ''; + + return $matches, + structured_answer => { + id => 'regexp', + name => 'Answer', + data => { + title => "Regular Expression Match", + subtitle => "Match regular expression /$regexp/$modifiers on $str", + record_data => $matches, + record_keys => \@key_order, + }, + templates => { + group => 'list', + options => { + content => 'record', + }, + moreAt => 0, + }, + }; }; 1; diff --git a/share/goodie/regexp/regexp.css b/share/goodie/regexp/regexp.css new file mode 100644 index 000000000..d3a1f7e6d --- /dev/null +++ b/share/goodie/regexp/regexp.css @@ -0,0 +1,4 @@ +.zci--regexp .record .record__cell--key { + width: 20em; + text-transform: none; +} diff --git a/t/Regexp.t b/t/Regexp.t index f251f9031..32758def6 100644 --- a/t/Regexp.t +++ b/t/Regexp.t @@ -8,24 +8,79 @@ use DDG::Test::Goodie; zci answer_type => 'regexp'; zci is_cached => 1; -ddg_goodie_test( - [qw( DDG::Goodie::Regexp )], - 'regexp /(hello\s)/ hello probably' => test_zci( - "hello ", - heading => 'Regexp Result', - ), - 'regexp /(dd)/ ddg' => test_zci( - "dd", - heading => 'Regexp Result', - ), - 'regex /(poss)/ many possibilities' => test_zci( - "poss", - heading => 'Regexp Result', - ), - 'regexp /(.*)/ ddg' => test_zci( - 'ddg', - heading => 'Regexp Result' - ), +sub build_structured_answer { + my ($result, $expression, $text) = @_; + return $result, + structured_answer => { + id => 'regexp', + name => 'Answer', + data => { + title => 'Regular Expression Match', + subtitle => "Match regular expression $expression on $text", + record_data => $result, + record_keys => \@{[sort (keys %$result)]}, + }, + templates => { + group => 'list', + options => { + content => 'record', + }, + moreAt => 0, + }, + }; +} + +sub build_test { test_zci(build_structured_answer(@_)) } + +ddg_goodie_test([qw( DDG::Goodie::Regexp )], + 'regexp /(?Harry|Larry) is awesome/ Harry is awesome' => build_test({ + 'Full Match' => 'Harry is awesome', + 'Named Capture ' => 'Harry', + 'Subpattern Match 1' => 'Harry', + }, '/(?Harry|Larry) is awesome/', 'Harry is awesome'), + 'regex /(he|she) walked away/ he walked away' => build_test({ + 'Full Match' => 'he walked away', + 'Subpattern Match 1' => 'he', + }, '/(he|she) walked away/', 'he walked away'), + 'match regex /How are (?:we|you) (doing|today)\?/ How are you today?' => build_test({ + 'Full Match' => 'How are you today?', + 'Subpattern Match 1' => 'today', + }, '/How are (?:we|you) (doing|today)\?/', 'How are you today?'), + 'abc =~ /[abc]+/' => build_test({ + 'Full Match' => 'abc', + }, '/[abc]+/', 'abc'), + 'DDG::Goodie::Regexp =~ /^DDG::Goodie::(?\w+)$/' => build_test({ + 'Full Match' => 'DDG::Goodie::Regexp', + 'Named Capture ' => 'Regexp', + 'Subpattern Match 1' => 'Regexp', + }, '/^DDG::Goodie::(?\w+)$/', 'DDG::Goodie::Regexp'), + 'regexp /foo/ foo' => build_test({ + 'Full Match' => 'foo', + }, '/foo/', 'foo'), + # Modifiers + 'Foo =~ /(foo)/i' => build_test({ + 'Full Match' => 'Foo', + 'Subpattern Match 1' => 'Foo', + }, '/(foo)/i', 'Foo'), + 'regexp /hello/i HELLO' => build_test({ + 'Full Match' => 'HELLO', + }, '/hello/i', 'HELLO'), + # Primary example query + 'regexp /(.*)/ ddg' => build_test({ + 'Full Match' => 'ddg', + 'Subpattern Match 1' => 'ddg', + }, '/(.*)/', 'ddg'), + # Does not match. + 'regexp /foo/ bar' => undef, + 'match /^foo$/ foo bar' => undef, + # Should not trigger. + 'What is regex?' => undef, + 'regex cheatsheet' => undef, + 'regex' => undef, + '/foo/ =~ foo' => undef, + 'regex foo /foo/' => undef, + 'BaR =~ /bar/x' => undef, + 'regexp /(?h)/ h' => undef, ); done_testing;