initial support for multiple URLs per image

This commit is contained in:
Mike Fährmann 2018-01-17 22:08:19 +01:00
parent 6174a5c4ef
commit 9d69401391
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 41 additions and 8 deletions

View File

@ -12,4 +12,7 @@ class Message():
Version = 1
Directory = 2
Url = 3
# Headers = 4
# Cookies = 5
Queue = 6
Urllist = 7

View File

@ -106,6 +106,12 @@ class Job():
if self.pred_queue(url, kwds):
self.handle_queue(url, kwds)
elif msg[0] == Message.Urllist:
_, urls, kwds = msg
if self.pred_url(urls[0], kwds):
self.update_kwdict(kwds)
self.handle_urllist(urls, kwds)
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
@ -116,6 +122,10 @@ class Job():
def handle_url(self, url, keywords):
"""Handle Message.Url"""
def handle_urllist(self, urls, keywords):
"""Handle Message.Urllist"""
self.handle_url(urls[0], keywords)
def handle_directory(self, keywords):
"""Handle Message.Directory"""
@ -144,14 +154,15 @@ class DownloadJob(Job):
def handle_url(self, url, keywords):
"""Download the resource specified in 'url'"""
self.pathfmt.set_keywords(keywords)
if self.pathfmt.exists():
self.out.skip(self.pathfmt.path)
return
if self.sleep:
time.sleep(self.sleep)
dlinstance = self.get_downloader(url)
dlinstance.download(url, self.pathfmt)
if self._prepare_download(keywords):
self.get_downloader(url).download(url, self.pathfmt)
def handle_urllist(self, urls, keywords):
"""Download the resource specified in 'url'"""
if self._prepare_download(keywords):
for url in urls:
if self.get_downloader(url).download(url, self.pathfmt):
return
def handle_directory(self, keywords):
"""Set and create the target directory for downloads"""
@ -179,6 +190,15 @@ class DownloadJob(Job):
self.downloaders[scheme] = instance
return instance
def _prepare_download(self, keywords):
self.pathfmt.set_keywords(keywords)
if self.pathfmt.exists():
self.out.skip(self.pathfmt.path)
return False
if self.sleep:
time.sleep(self.sleep)
return True
class KeywordJob(Job):
"""Print available keywords"""
@ -246,6 +266,13 @@ class UrlJob(Job):
def handle_url(url, _):
print(url)
@staticmethod
def handle_urllist(urls, _):
prefix = ""
for url in urls:
print(prefix, url, sep="")
prefix = "| "
def handle_queue(self, url, _):
try:
UrlJob(url, self, self.depth + 1).run()
@ -361,6 +388,9 @@ class DataJob(Job):
def handle_url(self, url, keywords):
self.data.append((Message.Url, url, keywords.copy()))
def handle_urllist(self, urls, keywords):
self.data.append((Message.Urllist, list(urls), keywords.copy()))
def handle_directory(self, keywords):
self.data.append((Message.Directory, keywords.copy()))