Implement --igon / --igoff
This commit is contained in:
parent
76ba117d34
commit
ee4dbe162e
@ -131,6 +131,9 @@ Options can come before or after the URL.
|
||||
Content-Length larger than N. (default: -1, don't skip anything). Can be changed during
|
||||
the crawl by editing the `DIR/max_content_length` file.
|
||||
|
||||
* `--igon`: Print all URLs being ignored to the terminal and dashboard. Can be
|
||||
changed during the crawl by `touch`ing or `rm`ing the `DIR/igoff` file.
|
||||
|
||||
* `--level=N`: recurse `N` levels instead of `inf` levels.
|
||||
|
||||
* `--page-requisites-level=N`: recurse page requisites `N` levels instead of `5` levels.
|
||||
|
@ -46,6 +46,11 @@ def print_version(ctx, param, value):
|
||||
@click.option('--ignore-sets', default="", metavar='LIST',
|
||||
help='Alias for --igsets.')
|
||||
|
||||
@click.option('--igon/--igoff', default=False,
|
||||
help=
|
||||
'--igon (default: false) to print all URLs being ignored to the terminal '
|
||||
'and dashboard.')
|
||||
|
||||
@click.option('--max-content-length', default=-1, metavar='N',
|
||||
help=
|
||||
"Skip the download of any response that claims a Content-Length "
|
||||
@ -68,7 +73,7 @@ def print_version(ctx, param, value):
|
||||
@click.argument('start_url')
|
||||
|
||||
def main(concurrency, concurrent, delay, recursive, offsite_links, igsets,
|
||||
ignore_sets, level, page_requisites_level, max_content_length, sitemaps,
|
||||
ignore_sets, igon, level, page_requisites_level, max_content_length, sitemaps,
|
||||
start_url):
|
||||
span_hosts_allow = "page-requisites,linked-pages"
|
||||
if not offsite_links:
|
||||
@ -106,6 +111,7 @@ start_url):
|
||||
with open("{}/igsets".format(working_dir), "w") as f:
|
||||
f.write("global,{}".format(igsets))
|
||||
|
||||
if not igon:
|
||||
with open("{}/igoff".format(working_dir), "w") as f:
|
||||
pass
|
||||
|
||||
|
@ -330,6 +330,8 @@ def update_igoff():
|
||||
job_data["suppress_ignore_reports"] = igoff
|
||||
return igoff
|
||||
|
||||
update_igoff()
|
||||
|
||||
|
||||
def maybe_log_ignore(url, pattern):
|
||||
if not update_igoff():
|
||||
|
Loading…
x
Reference in New Issue
Block a user