wpull_hooks: validate regexps with re before passing to re2

This commit is contained in:
Ivan Kozik 2018-10-09 16:23:11 +00:00
parent e6b1709ef3
commit 9e8ad88703
2 changed files with 9 additions and 3 deletions

View File

@ -1 +1 @@
__version__ = '2.1.4'
__version__ = '2.1.5'

View File

@ -25,9 +25,15 @@ working_dir = os.environ["GRAB_SITE_WORKING_DIR"]
def cf(fname):
return os.path.join(working_dir, fname)
def re2_compile(regexp):
# Validate with re first, because re2 may be more prone to segfaulting on
# bad regexps.
re.compile(regexp)
return re2.compile(regexp)
def compile_combined_regexp(patterns):
regexp = "|".join(map(lambda pattern: f"({pattern})", patterns))
return re2.compile(regexp)
return re2_compile(regexp)
def include_ignore_line(line):
return line and not line.startswith("#")
@ -318,7 +324,7 @@ class GrabSitePlugin(WpullPlugin):
for ig in sorted(ignores):
self.print_to_terminal(f"\t{ig}")
self.compiled_ignores = [(ig, re2.compile(ig)) for ig in ignores]
self.compiled_ignores = [(ig, re2_compile(ig)) for ig in ignores]
self.combined_ignore_regexp = compile_combined_regexp(ignores)
def ignore_pattern_to_regexp_strings(self, pattern):