Implement --igon / --igoff
This commit is contained in:
parent
76ba117d34
commit
ee4dbe162e
@ -131,6 +131,9 @@ Options can come before or after the URL.
|
|||||||
Content-Length larger than N. (default: -1, don't skip anything). Can be changed during
|
Content-Length larger than N. (default: -1, don't skip anything). Can be changed during
|
||||||
the crawl by editing the `DIR/max_content_length` file.
|
the crawl by editing the `DIR/max_content_length` file.
|
||||||
|
|
||||||
|
* `--igon`: Print all URLs being ignored to the terminal and dashboard. Can be
|
||||||
|
changed during the crawl by `touch`ing or `rm`ing the `DIR/igoff` file.
|
||||||
|
|
||||||
* `--level=N`: recurse `N` levels instead of `inf` levels.
|
* `--level=N`: recurse `N` levels instead of `inf` levels.
|
||||||
|
|
||||||
* `--page-requisites-level=N`: recurse page requisites `N` levels instead of `5` levels.
|
* `--page-requisites-level=N`: recurse page requisites `N` levels instead of `5` levels.
|
||||||
|
@ -46,6 +46,11 @@ def print_version(ctx, param, value):
|
|||||||
@click.option('--ignore-sets', default="", metavar='LIST',
|
@click.option('--ignore-sets', default="", metavar='LIST',
|
||||||
help='Alias for --igsets.')
|
help='Alias for --igsets.')
|
||||||
|
|
||||||
|
@click.option('--igon/--igoff', default=False,
|
||||||
|
help=
|
||||||
|
'--igon (default: false) to print all URLs being ignored to the terminal '
|
||||||
|
'and dashboard.')
|
||||||
|
|
||||||
@click.option('--max-content-length', default=-1, metavar='N',
|
@click.option('--max-content-length', default=-1, metavar='N',
|
||||||
help=
|
help=
|
||||||
"Skip the download of any response that claims a Content-Length "
|
"Skip the download of any response that claims a Content-Length "
|
||||||
@ -68,7 +73,7 @@ def print_version(ctx, param, value):
|
|||||||
@click.argument('start_url')
|
@click.argument('start_url')
|
||||||
|
|
||||||
def main(concurrency, concurrent, delay, recursive, offsite_links, igsets,
|
def main(concurrency, concurrent, delay, recursive, offsite_links, igsets,
|
||||||
ignore_sets, level, page_requisites_level, max_content_length, sitemaps,
|
ignore_sets, igon, level, page_requisites_level, max_content_length, sitemaps,
|
||||||
start_url):
|
start_url):
|
||||||
span_hosts_allow = "page-requisites,linked-pages"
|
span_hosts_allow = "page-requisites,linked-pages"
|
||||||
if not offsite_links:
|
if not offsite_links:
|
||||||
@ -106,8 +111,9 @@ start_url):
|
|||||||
with open("{}/igsets".format(working_dir), "w") as f:
|
with open("{}/igsets".format(working_dir), "w") as f:
|
||||||
f.write("global,{}".format(igsets))
|
f.write("global,{}".format(igsets))
|
||||||
|
|
||||||
with open("{}/igoff".format(working_dir), "w") as f:
|
if not igon:
|
||||||
pass
|
with open("{}/igoff".format(working_dir), "w") as f:
|
||||||
|
pass
|
||||||
|
|
||||||
with open("{}/ignores".format(working_dir), "w") as f:
|
with open("{}/ignores".format(working_dir), "w") as f:
|
||||||
pass
|
pass
|
||||||
|
@ -330,6 +330,8 @@ def update_igoff():
|
|||||||
job_data["suppress_ignore_reports"] = igoff
|
job_data["suppress_ignore_reports"] = igoff
|
||||||
return igoff
|
return igoff
|
||||||
|
|
||||||
|
update_igoff()
|
||||||
|
|
||||||
|
|
||||||
def maybe_log_ignore(url, pattern):
|
def maybe_log_ignore(url, pattern):
|
||||||
if not update_igoff():
|
if not update_igoff():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user