From baccf8a9588fd6b2a4149c76c7501bf7c9029e3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 8 Jun 2018 17:39:02 +0200 Subject: [PATCH] improve postprocessor handling - add pathfmt argument for __init__() - add finalization step - add option to keep or delete zipped files --- gallery_dl/job.py | 77 +++++++++++++++------------- gallery_dl/postprocessor/classify.py | 6 +-- gallery_dl/postprocessor/common.py | 6 ++- gallery_dl/postprocessor/exec.py | 2 +- gallery_dl/postprocessor/zip.py | 26 +++++++--- gallery_dl/util.py | 5 ++ 6 files changed, 75 insertions(+), 47 deletions(-) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index f3f8ca74..219e7302 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -93,6 +93,7 @@ class Job(): "https://github.com/mikf/gallery-dl/issues ."), exc.__class__.__name__, exc) log.debug("Traceback", exc_info=True) + self.handle_finalize() def dispatch(self, msg): """Call the appropriate message handler""" @@ -137,6 +138,9 @@ class Job(): def handle_queue(self, url, keywords): """Handle Message.Queue""" + def handle_finalize(self): + """Handle job finalization""" + def update_kwdict(self, kwdict): """Update 'kwdict' with additional metadata""" kwdict["category"] = self.extractor.category @@ -207,12 +211,40 @@ class DownloadJob(Job): def handle_directory(self, keywords): """Set and create the target directory for downloads""" - if not self.pathfmt: - self.pathfmt = util.PathFormat(self.extractor) - self.sleep = self.extractor.config("sleep") - self._init_archive(self.extractor.config("archive")) - self._init_postprocessors(self.extractor.config("postprocessor")) + if self.pathfmt: + self.pathfmt.set_directory(keywords) + return + + # delayed initialization + self.pathfmt = util.PathFormat(self.extractor) self.pathfmt.set_directory(keywords) + self.sleep = self.extractor.config("sleep") + + archive = self.extractor.config("archive") + if archive: + path = util.expand_path(archive) + self.archive = util.DownloadArchive(path, self.extractor) + + postprocessors = self.extractor.config("postprocessors") + if postprocessors: + self.postprocessors = [] + for pp_dict in postprocessors: + if "name" not in pp_dict: + postprocessor.log.warning("no 'name' specified") + continue + name = pp_dict["name"] + pp_cls = postprocessor.find(name) + if not pp_cls: + postprocessor.log.warning("'%s' not found", name) + continue + try: + pp_obj = pp_cls(self.pathfmt, pp_dict) + except Exception as exc: + postprocessor.log.error( + "%s: initialization failed: %s %s", + name, exc.__class__.__name__, exc) + else: + self.postprocessors.append(pp_obj) def handle_queue(self, url, keywords): try: @@ -220,6 +252,11 @@ class DownloadJob(Job): except exception.NoExtractorError: self._write_unsupported(url) + def handle_finalize(self): + if self.postprocessors: + for pp in self.postprocessors: + pp.finalize() + def get_downloader(self, url): """Return, and possibly construct, a downloader suitable for 'url'""" pos = url.find(":") @@ -233,36 +270,6 @@ class DownloadJob(Job): self.downloaders[scheme] = instance return instance - def _init_archive(self, archive): - if archive: - path = util.expand_path(archive) - self.archive = util.DownloadArchive(path, self.extractor) - - def _init_postprocessors(self, postprocessors): - if not postprocessors: - return - - self.postprocessors = [] - for pp_dict in postprocessors: - if "name" not in pp_dict: - postprocessor.log.warning("no 'name' specified") - continue - - name = pp_dict["name"] - pp_cls = postprocessor.find(name) - if not pp_cls: - postprocessor.log.warning("'%s' not found", name) - continue - - try: - pp_obj = pp_cls(pp_dict) - except Exception as exc: - postprocessor.log.error( - "%s: initialization failed: %s %s", - name, exc.__class__.__name__, exc) - else: - self.postprocessors.append(pp_obj) - class KeywordJob(Job): """Print available keywords""" diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py index a43b2b98..3af73f07 100644 --- a/gallery_dl/postprocessor/classify.py +++ b/gallery_dl/postprocessor/classify.py @@ -14,7 +14,7 @@ import os class ClassifyPP(PostProcessor): - DEFAULT_MAP = { + DEFAULT_MAPPING = { "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"), "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"), @@ -22,9 +22,9 @@ class ClassifyPP(PostProcessor): "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), } - def __init__(self, options): + def __init__(self, pathfmt, options): PostProcessor.__init__(self) - mapping = options.get("mapping", self.DEFAULT_MAP) + mapping = options.get("mapping", self.DEFAULT_MAPPING) self.mapping = { ext: directory diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 2873703e..af1c1ef5 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -12,7 +12,11 @@ from . import log class PostProcessor(): + """Base class for postprocessors""" log = log def run(self, pathfmt): - raise NotImplementedError() + """Execute the postprocessor for a file""" + + def finalize(self): + """Cleanup""" diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index a5a9f7bd..7d190402 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -14,7 +14,7 @@ import subprocess class ExecPP(PostProcessor): - def __init__(self, options): + def __init__(self, pathfmt, options): PostProcessor.__init__(self) self.args = options["command"] if options.get("async", False): diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index cf7bbf1b..fc5636f3 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Add files to ZIP archives""" +"""Store files in ZIP archives""" from .common import PostProcessor import zipfile @@ -21,19 +21,31 @@ class ZipPP(PostProcessor): "lzma": zipfile.ZIP_LZMA, } - def __init__(self, options): + def __init__(self, pathfmt, options): PostProcessor.__init__(self) + self.delete = not options.get("keep-files", False) self.ext = "." + options.get("extension", "zip") - algorithm = options.get("compression", "store") if algorithm not in self.COMPRESSION_ALGORITHMS: + self.log.warning( + "unknown compression algorithm '%s'; falling back to 'store'", + algorithm) algorithm = "store" - self.compression = self.COMPRESSION_ALGORITHMS[algorithm] + + path = pathfmt.realdirectory + self.ext + self.zfile = zipfile.ZipFile( + path, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) def run(self, pathfmt): - archive = pathfmt.realdirectory + self.ext - with zipfile.ZipFile(archive, "a", self.compression, True) as zfile: - zfile.write(pathfmt.temppath, pathfmt.filename) + # 'NameToInfo' is not officially documented, but it's available + # for all supported Python versions and using it directly is a lot + # better than calling getinfo() + if pathfmt.filename not in self.zfile.NameToInfo: + self.zfile.write(pathfmt.temppath, pathfmt.filename) + pathfmt.delete = self.delete + + def finalize(self): + self.zfile.close() __postprocessor__ = ZipPP diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 131b8730..8ed3e225 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -359,6 +359,7 @@ class PathFormat(): "directory", extractor.directory_fmt) self.formatter = Formatter(extractor.config("keywords-default")) + self.delete = False self.has_extension = False self.keywords = {} self.filename = "" @@ -472,6 +473,10 @@ class PathFormat(): def finalize(self): """Move tempfile to its target location""" + if self.delete: + os.unlink(self.temppath) + return + if self.temppath == self.realpath: return