add '--download-archive' cmdline option

… as well as a config file equivalent
2018-02-01 22:00:44 +01:00 · 2018-02-01 22:00:44 +01:00 · c0dd922c13
commit c0dd922c13
parent 8c3b713362
3 changed files with 22 additions and 0 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -344,6 +344,20 @@ Description User-Agent header value to be used for HTTP requests.
 =========== =====


+extractor.*.archive
+-------------------
+=========== =====
+Type        ``string``
+Default     ``null``
+Description File to store IDs of downloaded files in. Downloads of files
+            already recorded in this archive file will be skipped_.
+
+            The resulting archive file is not a plain text file but an SQLite3
+            database, as lookup operations are significantly faster when the
+            amount of stored IDs gets reasonably large.
+=========== =====
+
+
 Extractor-specific Options
 ==========================

@ -779,6 +793,7 @@ How To      - login and visit Tumblr's Applications_ section
 .. |datetime.max| replace:: ``datetime.max``
 .. |strptime| replace:: strftime() and strptime() Behavior

+.. _skipped: `extractor.*.skip`_
 .. _`date-min and date-max`: `extractor.reddit.date-min & .date-max`_
 .. _date-format: extractor.reddit.date-format_

--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@ -16,6 +16,7 @@
    },
    "extractor":
    {
+        "archive": null,
        "skip": true,
        "sleep": 0,

--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@ -206,6 +206,12 @@ def build_parser():
    )

    selection = parser.add_argument_group("Selection Options")
+    selection.add_argument(
+        "--download-archive",
+        metavar="FILE", dest="archive", action=ConfigAction,
+        help=("Record all downloaded files in the archive file and "
+              "skip downloading any file already in it.")
+    )
    selection.add_argument(
        "--range",
        metavar="RANGE", dest="image_range",