implement 'parent-metadata' option (#1364)

experimental, might not work as expected, etc.
This commit is contained in:
Mike Fährmann 2021-03-11 01:10:34 +01:00
parent 4be27ff0fe
commit df94182e11
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 23 additions and 7 deletions

View File

@ -133,6 +133,16 @@ Description
for any spawned child extractors.
extractor.*.parent-metadata
---------------------------
Type
``bool``
Default
``false``
Description
Overwrite any metadata provided by a child extractor with its parent's.
extractor.*.path-restrict
-------------------------
Type

View File

@ -42,7 +42,14 @@ class Job():
self.status = 0
self.pred_url = self._prepare_predicates("image", True)
self.pred_queue = self._prepare_predicates("chapter", False)
self.kwdict = {}
# user-supplied metadata
kwdict = self.extractor.config("keywords")
if kwdict:
self.kwdict.update(kwdict)
# data from parent job
if parent:
pextr = parent.extractor
@ -57,9 +64,6 @@ class Job():
# reuse connection adapters
extr.session.adapters = pextr.session.adapters
# user-supplied metadata
self.userkwds = self.extractor.config("keywords")
def run(self):
"""Execute or run the job"""
sleep = self.extractor.config("sleep-extractor")
@ -137,8 +141,8 @@ class Job():
extr = self.extractor
kwdict["category"] = extr.category
kwdict["subcategory"] = extr.subcategory
if self.userkwds:
kwdict.update(self.userkwds)
if self.kwdict:
kwdict.update(self.kwdict)
def _prepare_predicates(self, target, skip=True):
predicates = []
@ -183,7 +187,7 @@ class Job():
class DownloadJob(Job):
"""Download images into appropriate directory/filename locations"""
def __init__(self, url, parent=None):
def __init__(self, url, parent=None, kwdict=None):
Job.__init__(self, url, parent)
self.log = self.get_logger("download")
self.blacklist = None
@ -198,6 +202,8 @@ class DownloadJob(Job):
pfmt = parent.pathfmt
if pfmt and parent.extractor.config("parent-directory"):
self.extractor._parentdir = pfmt.directory
if kwdict and parent.extractor.config("parent-metadata"):
self.kwdict.update(kwdict)
else:
self.visited = set()
@ -291,7 +297,7 @@ class DownloadJob(Job):
extr = None
if extr:
self.status |= self.__class__(extr, self).run()
self.status |= self.__class__(extr, self, kwdict).run()
else:
self._write_unsupported(url)