Unicode: Make '+' character in unicode codepoint query optional. (#4101)

* Made + in unicode codepoint queries optional

* Added test for optional + in codepoint query
master
Emily Tackett 2017-04-19 21:53:25 -05:00 committed by Zaahir Moolla
parent 99f0ea0f09
commit b9e7ad1888
2 changed files with 3 additions and 1 deletions

View File

@ -9,7 +9,7 @@ use Unicode::Char (); # For name -> codepoint lookup
use Encode qw/encode_utf8/;
use constant {
CODEPOINT_RE => qr/^ \s* (?:U \+|\\(?:u|x{(?=.*}))) (?<codepoint> [a-f0-9]{4,6})}? \s* $/xi,
CODEPOINT_RE => qr/^ \s* (?:U \+?|\\(?:u|x{(?=.*}))) (?<codepoint> [a-f0-9]{4,6})}? \s* $/xi,
NAME_RE => qr/^ (?<name> [A-Z][A-Z\s]+) $/xi,
CHAR_RE => qr/^ \s* (?<char> .) \s* $/x,
UNICODE_RE => qr/^ (?:unicode|emoji|utf-(?:8|16|32)) \s+ (.+) $/xi,

View File

@ -17,9 +17,11 @@ ddg_goodie_test(
# Raw query, "U+XXXX"
'U+263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
'\u263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
'u263A' => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),
# Same should work with the "unicode" start trigger too
'unicode U+263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
'unicode u263B' => test_zci("\x{263B} U+263B BLACK SMILING FACE, decimal: 9787, HTML: &#9787;, UTF-8: 0xE2 0x98 0xBB, block: Miscellaneous Symbols"),
# Lookup by name, "unicode LATIN SMALL LETTER A WITH CIRCUMFLEX"
"unicode White Smiling Face" => test_zci("\x{263A} U+263A WHITE SMILING FACE, decimal: 9786, HTML: &#9786;, UTF-8: 0xE2 0x98 0xBA, block: Miscellaneous Symbols"),