Add --no-global-igset for starting a crawl without the "global" ignore set
parent
0e67d79ae7
commit
4994331eea
|
@ -71,6 +71,9 @@ def patch_dns_inet_is_multicast():
|
|||
@click.option('--ignore-sets', default="", metavar='LIST',
|
||||
help='Alias for --igsets.')
|
||||
|
||||
@click.option('--no-global-igset', is_flag=True,
|
||||
help='Do not add the "global" ignore set.')
|
||||
|
||||
@click.option('--import-ignores', default=None, metavar='FILE',
|
||||
help='Copy this file to DIR/ignores before the crawl begins.')
|
||||
|
||||
|
@ -169,10 +172,10 @@ def patch_dns_inet_is_multicast():
|
|||
@click.argument('start_url', nargs=-1, required=False)
|
||||
|
||||
def main(concurrency, concurrent, delay, recursive, offsite_links, igsets,
|
||||
ignore_sets, import_ignores, igon, debug, video, level, page_requisites_level,
|
||||
max_content_length, sitemaps, dupespotter, warc_max_size, ua, input_file,
|
||||
wpull_args, start_url, id, dir, finished_warc_dir, permanent_error_status_codes,
|
||||
which_wpull_args_partial, which_wpull_command):
|
||||
ignore_sets, no_global_igset, import_ignores, igon, debug, video, level,
|
||||
page_requisites_level, max_content_length, sitemaps, dupespotter, warc_max_size,
|
||||
ua, input_file, wpull_args, start_url, id, dir, finished_warc_dir,
|
||||
permanent_error_status_codes, which_wpull_args_partial, which_wpull_command):
|
||||
"""
|
||||
Runs a crawl on one or more URLs. For additional help, see
|
||||
|
||||
|
@ -326,7 +329,7 @@ which_wpull_args_partial, which_wpull_command):
|
|||
f.write(str(max_content_length))
|
||||
|
||||
with open("{}/igsets".format(working_dir), "w") as f:
|
||||
f.write("global,{}".format(igsets))
|
||||
f.write("{}{}".format("" if no_global_igset else "global,", igsets))
|
||||
|
||||
if video:
|
||||
with open("{}/video".format(working_dir), "w") as f:
|
||||
|
|
Loading…
Reference in New Issue