zeroclickinfo-goodies/lib/DDG/Goodie/RegexCheatSheet.pm

275 lines
9.8 KiB
Perl

package DDG::Goodie::RegexCheatSheet;
# ABSTRACT: Provide a cheatsheet for common Regular Expression syntax
use strict;
use warnings;
use DDG::Goodie;
zci answer_type => "regex_cheat";
zci is_cached => 1;
triggers start =>
'regex cheatsheet',
'regex cheat sheet',
'regex help',
'regexp cheatsheet',
'regexp cheat sheet',
'regexp help',
'regex symbols',
'regex symbol',
'regexp symbols',
'regexp symbol',
'regex chars',
'regex char',
'regexp chars',
'regexp char',
'regex characters',
'regex character',
'regexp characters',
'regexp character',
'regex',
'regexp',
'regular expressions',
'regular expression',
'regex guide',
'regexp guide',
'regular expression guide',
'regexp reference',
'regex reference',
'regular expression reference';
triggers end => "regex", "regexp";
attribution github => ['https://github.com/mintsoft', 'mintsoft'];
primary_example_queries 'regex';
secondary_example_queries 'regexp $';
category 'computing_tools';
# The order to display each category and in which columns
my @category_column = (
['Anchors', 'Character Classes', 'POSIX Classes', 'Pattern Modifiers', 'Escape Sequences'],
['Quantifiers', 'Groups and Ranges', 'Assertions', 'Special Characters', 'String Replacement']
);
# Titles of tables and the symbols to explain
my %categories = (
'Anchors' => [
'^', '\A', '$', '\Z', '\b', '\B', '\<', '\>'
],
'Character Classes' => [
'\c', '\s', '\S', '\d', '\D', '\w', '\W', '\x', '\O'
],
'POSIX Classes' => [
'[:upper:]', '[:lower:]', '[:alpha:]', '[:alnum:]', '[:digit:]',
'[:xdigit:]', '[:punct:]', '[:blank:]', '[:space:]', '[:cntrl:]',
'[:graph:]', '[:print:]', '[:word:]'
],
'Assertions' => [
'?=', '?!', '?<=', '?!= or ?<!', '?>', '?()', '?()|', '?#'
],
'Quantifiers' => [
'*', '+', '?', '{3}', '{3,}', '{2,5}'
],
'Escape Sequences' => [
'\\', '\Q', '\E'
],
'Special Characters' => [
'\n', '\r', '\t', '\v', '\f', '\ooo', '\xhh'
],
'Groups and Ranges' => [
'.', '(a|b)', '(...)', '(?:...)', '[abc]', '[^abc]', '[a-q]', '[A-Z]', '[0-9]'
],
'Pattern Modifiers' => [
'//g', '//i', '//m', '//s', '//x', '//e', '//U'
],
'String Replacement' => [
'$n', '$2', '$1', '$`', q{$'}, '$+', '$&'
],
);
# Symbols and their explanation/description
my %syntax_map = (
'.' => 'Any character except newline (\n)',
'(a|b)' => 'a or b',
'(...)' => 'Group',
'(?:...)' => 'Passive (non-capturing) group',
'[abc]' => 'Single character (a or b or c)',
'[^abc]' => 'Single character (not a or b or c)',
'[a-q]' => 'Single character range (a or b ... or q)',
'[A-Z]' => 'Single character range (A or B ... or Z)',
'[0-9]' => 'Single digit from 0 to 9',
'^' => "Start of string or line",
'\A' => "Start of string",
'$' => "End of string or line",
'\Z' => "End of string",
'\b' => 'Word boundary',
'\B' => 'Not word boundary',
'\<' => 'Start of word',
'\>' => 'End of word',
'\c' => 'Control character',
'\s' => 'Whitespace',
'\S' => 'Not Whitespace',
'\d' => 'Digit',
'\D' => 'Not digit',
'\w' => 'Word',
'\W' => 'Not Word',
'\x' => 'Hexadecimal digit',
'\O' => 'Octal Digit',
'[:upper:]' => 'Uppercase letters [A-Z]',
'[:lower:]' => 'Lowercase letters [a-z]',
'[:alpha:]' => 'All letters [A-Za-z]',
'[:alnum:]' => 'Digits and letters [A-Za-z0-9]',
'[:digit:]' => 'Digits [0-9]',
'[:xdigit:]' => 'Hexadecimal digits [0-9a-f]',
# '[:punct:]' => 'Punctuation [\]\[!"#$%&'."'".'()*+,./:;<=>?@\^_`{|}~-]',
'[:punct:]' => 'Punctuation',
'[:blank:]' => 'Space and tab [ \t]',
'[:space:]' => 'Blank characters [ \t\r\n\v\f]',
'[:cntrl:]' => 'Control characters [\x00-\x1F\x7F]',
'[:graph:]' => 'Printed characters [\x21-\x7E]',
'[:print:]' => 'Printed characters and spaces [\x20-\x7E]',
'[:word:]' => 'Digits, letters and underscore [A-Za-z0-9_]',
'?=' => 'Lookahead assertion',
'?!' => 'Negative lookahead',
'?<=' => 'Lookbehind assertion',
'?!= or ?<!' => 'Negative lookbehind',
'?>' => 'Once-only Subexpression',
'?()' => 'Condition [if then]',
'?()|' => 'Condition [if then else]',
'?#' => 'Comment',
'*' => '0 or more',
'+' => '1 or more',
'?' => '0 or 1 (optional)',
'{3}' => 'Exactly 3',
'{3,}' => '3 or more',
'{2,5}' => '2, 3, 4 or 5',
'\\' => 'Escape following character',
'\Q' => 'Begin literal sequence',
'\E' => 'End literal sequence',
'\n' => 'New line',
'\r' => 'Carriage return',
'\t' => 'Tab',
'\v' => 'Vertical tab',
'\f' => 'Form feed',
'\ooo' => 'Octal character ooo',
'\xhh' => 'Hex character hh',
'//g' => 'Global Match (all occurrences)',
'//i' => 'Case-insensitive',
'//m' => 'Multiple line',
'//s' => 'Treat string as single line',
'//x' => 'Allow comments and whitespace',
'//e' => 'Evaluate replacement',
'//U' => 'Ungreedy pattern',
'$n' => 'n-th non-passive group',
'$2' => '"xyz" in /^(abc(xyz))$/',
'$1' => '"xyz" in /^(?:abc)(xyz)$/',
'$`' => 'Before matched string',
q{$'} => 'After matched string',
'$+' => 'Last matched string',
'$&' => 'Entire matched string',
);
sub are_valid_char_classes($$) {
my ($a, $b) = @_;
# must be both numbers or both lowercase or both uppercase
if ($a =~ /[0-9]/ && $b =~ /[0-9]/ || $a =~ /[a-z]/ && $b =~ /[a-z]/ || $a =~ /[A-Z]/ && $b =~ /[A-Z]/) {
return $b gt $a;
}
return;
}
sub difference_between($$) {
my ($a, $b) = @_;
return ord($b) - ord($a);
}
handle remainder => sub {
my $heading = 'Regex Cheat Sheet';
# If the user has requested information on a specific pattern.
if (length $_ > 0) {
my $syntax_key = $_;
# Let the user provide [a-e], [1-2], nice simple examples only!
if ($_ =~ /^\[([a-zA-Z0-9])\-([a-zA-Z0-9])\]$/) {
return unless are_valid_char_classes($1, $2);
#if there are < 3 between them then output all between them, otherwise "0 or 1 .. or 9" style
my $range_string = "";
if (difference_between($1, $2) < 3) {
$range_string = join(" or ", ($1..$2));
}
else {
$range_string = join(" or ", ($1..$2)[0,1]) . " ... or $2";
}
return answer => "$_ - Single character range ($range_string)",
html => "<code>$_</code> - Single character range ($range_string)",
heading => $heading;
}
# Let the user provide a number for the {n} pattern, e.g., {5} would say "Exactly 5 occurrences".
elsif ($_ =~ /^\{([0-9]+)\}$/) {
return answer => "$_ - Exactly $1 occurrences",
html => "<code>" . html_enc($_) . "</code> - Exactly " . html_enc($_) . " occurrences",
heading => $heading;
}
# Let the user provide numbers for {n,} and {n,m}, e.g., {4,} would say "4 or more occurrences".
elsif ($_ =~ /^\{([0-9]+),([0-9]+)?\}$/) {
if ($2) {
return unless ($1 < $2);
return answer => "$_ - Between $1 and $2 occurrences",
html => "<code>" . html_enc($_) . "</code> - Between $1 and $2 occurrences",
heading => $heading;
}
return answer => "$_ - $1 or more",
html => "<code>" . html_enc($_) . "</code> - $1 or more occurrences",
heading => $heading;
}
# Check our map if it's in our list of regex patterns.
return unless $syntax_map{$syntax_key};
my $text_output = "$_ - $syntax_map{$syntax_key}";
my $html_output = "<code>" . html_enc($_) . "</code> - " . html_enc($syntax_map{$syntax_key});
return answer => $text_output, html => $html_output, heading => $heading;
}
# Otherwise display the complete tabular output, into n columns in the order specified.
my $text_output = '';
# Content of the div column wrapper.
my @html_columns = ();
# Add a helper function for adding the <td> tag.
sub add_table_data {
my ($text, $is_code) = @_;
if($is_code) {
return "<td><code>" . html_enc($text) . "</code></td>";
}
return "<td>" . html_enc($text) . "</tb>";
}
for(my $column = 0; $column < scalar(@category_column); ++$column) {
for my $category (@{$category_column[$column]}) {
my $new_table = "<table class='regex-table'><b>$category</b>";
$text_output .= "$category\n";
for my $syntax_object (@{$categories{$category}}) {
$new_table .= "<tr>" . add_table_data($syntax_object, 1) . add_table_data($syntax_map{$syntax_object}, 0) . "</tr>\n";
$text_output .= "\t$syntax_object - $syntax_map{$syntax_object}\n";
}
$text_output .= "\n";
$new_table .= "</table>\n";
$html_columns[$column] .= $new_table;
}
}
my $html_output = "<div class='regex-container'><div class='regex-column'>";
$html_output .= join ("</div><div class='regex-column'>", @html_columns);
$html_output .= "</div></div>";
return answer => $text_output, html => $html_output, heading => $heading;
};
1;