2015-04-10 21:45:41 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2018-01-17 15:49:46 +01:00
|
|
|
# Copyright 2014-2018 Mike Fährmann
|
2015-04-10 21:45:41 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Common classes and constants used by downloader modules."""
|
|
|
|
|
2014-10-12 21:56:44 +02:00
|
|
|
import os
|
2017-10-24 12:53:03 +02:00
|
|
|
import time
|
2017-10-24 23:33:44 +02:00
|
|
|
import logging
|
2017-11-10 21:35:53 +01:00
|
|
|
from .. import config, util, exception
|
2017-11-06 21:52:42 +01:00
|
|
|
from requests.exceptions import RequestException
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2017-10-24 12:53:03 +02:00
|
|
|
class DownloaderBase():
|
|
|
|
"""Base class for downloaders"""
|
2017-10-26 22:11:36 +02:00
|
|
|
scheme = ""
|
2017-10-24 12:53:03 +02:00
|
|
|
retries = 1
|
|
|
|
|
|
|
|
def __init__(self, session, output):
|
|
|
|
self.session = session
|
|
|
|
self.out = output
|
|
|
|
self.log = logging.getLogger("download")
|
2017-10-24 23:33:44 +02:00
|
|
|
self.downloading = False
|
2017-10-26 22:11:36 +02:00
|
|
|
self.part = self.config("part", True)
|
|
|
|
self.partdir = self.config("part-directory")
|
|
|
|
|
|
|
|
if self.partdir:
|
|
|
|
self.partdir = util.expand_path(self.partdir)
|
|
|
|
os.makedirs(self.partdir, exist_ok=True)
|
|
|
|
|
|
|
|
def config(self, key, default=None):
|
|
|
|
"""Interpolate config value for 'key'"""
|
|
|
|
return config.interpolate(("downloader", self.scheme, key), default)
|
2016-11-27 23:43:25 +01:00
|
|
|
|
2016-09-30 12:32:48 +02:00
|
|
|
def download(self, url, pathfmt):
|
2015-12-21 22:46:49 +01:00
|
|
|
"""Download the resource at 'url' and write it to a file-like object"""
|
2017-10-24 23:33:44 +02:00
|
|
|
try:
|
2018-01-19 22:54:15 +01:00
|
|
|
return self.download_impl(url, pathfmt)
|
2017-10-26 00:07:32 +02:00
|
|
|
except Exception:
|
|
|
|
print()
|
|
|
|
raise
|
2017-10-24 23:33:44 +02:00
|
|
|
finally:
|
|
|
|
# remove file from incomplete downloads
|
|
|
|
if self.downloading and not self.part:
|
|
|
|
try:
|
|
|
|
os.remove(pathfmt.realpath)
|
|
|
|
except (OSError, AttributeError):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def download_impl(self, url, pathfmt):
|
|
|
|
"""Actual implementaion of the download process"""
|
2018-01-26 18:11:13 +01:00
|
|
|
adj_ext = None
|
2017-10-24 12:53:03 +02:00
|
|
|
tries = 0
|
|
|
|
msg = ""
|
|
|
|
|
2017-10-24 23:33:44 +02:00
|
|
|
if self.part:
|
2017-10-26 00:07:32 +02:00
|
|
|
pathfmt.part_enable(self.partdir)
|
2017-10-24 12:53:03 +02:00
|
|
|
|
|
|
|
while True:
|
2017-10-26 00:07:32 +02:00
|
|
|
self.reset()
|
2017-10-24 12:53:03 +02:00
|
|
|
if tries:
|
2018-01-29 21:55:46 +01:00
|
|
|
self.log.warning("%s (%d/%d)", msg, tries, self.retries)
|
2017-10-24 12:53:03 +02:00
|
|
|
if tries >= self.retries:
|
|
|
|
return False
|
2017-11-10 21:35:53 +01:00
|
|
|
time.sleep(tries)
|
2017-10-24 12:53:03 +02:00
|
|
|
tries += 1
|
|
|
|
|
|
|
|
# check for .part file
|
2017-10-24 23:33:44 +02:00
|
|
|
filesize = pathfmt.part_size()
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2017-10-24 12:53:03 +02:00
|
|
|
# connect to (remote) source
|
|
|
|
try:
|
|
|
|
offset, size = self.connect(url, filesize)
|
2017-12-06 22:35:05 +01:00
|
|
|
except exception.DownloadRetry as exc:
|
|
|
|
msg = exc
|
|
|
|
continue
|
2017-11-10 21:35:53 +01:00
|
|
|
except exception.DownloadComplete:
|
|
|
|
break
|
2017-10-24 12:53:03 +02:00
|
|
|
except Exception as exc:
|
2018-01-29 21:55:46 +01:00
|
|
|
self.log.warning(exc)
|
2017-12-06 22:35:05 +01:00
|
|
|
return False
|
2017-10-24 12:53:03 +02:00
|
|
|
|
|
|
|
# check response
|
|
|
|
if not offset:
|
2018-01-17 15:49:46 +01:00
|
|
|
mode = "w+b"
|
2017-10-24 12:53:03 +02:00
|
|
|
if filesize:
|
|
|
|
self.log.info("Unable to resume partial download")
|
|
|
|
else:
|
2018-01-17 15:49:46 +01:00
|
|
|
mode = "r+b"
|
2017-10-24 12:53:03 +02:00
|
|
|
self.log.info("Resuming download at byte %d", offset)
|
|
|
|
|
|
|
|
# set missing filename extension
|
|
|
|
if not pathfmt.has_extension:
|
|
|
|
pathfmt.set_extension(self.get_extension())
|
|
|
|
if pathfmt.exists():
|
|
|
|
self.out.skip(pathfmt.path)
|
|
|
|
return True
|
|
|
|
|
|
|
|
self.out.start(pathfmt.path)
|
2017-10-24 23:33:44 +02:00
|
|
|
self.downloading = True
|
|
|
|
with pathfmt.open(mode) as file:
|
2018-01-17 15:49:46 +01:00
|
|
|
if offset:
|
|
|
|
file.seek(offset)
|
|
|
|
|
2017-10-24 12:53:03 +02:00
|
|
|
# download content
|
|
|
|
try:
|
|
|
|
self.receive(file)
|
2017-11-06 21:52:42 +01:00
|
|
|
except RequestException as exc:
|
2017-10-24 12:53:03 +02:00
|
|
|
msg = exc
|
|
|
|
continue
|
|
|
|
|
|
|
|
# check filesize
|
|
|
|
if size and file.tell() < size:
|
|
|
|
msg = "filesize mismatch ({} < {})".format(
|
|
|
|
file.tell(), size)
|
|
|
|
continue
|
2018-01-17 15:49:46 +01:00
|
|
|
|
|
|
|
# check filename extension
|
|
|
|
adj_ext = self._check_extension(file, pathfmt)
|
|
|
|
|
2017-10-24 12:53:03 +02:00
|
|
|
break
|
|
|
|
|
2017-10-24 23:33:44 +02:00
|
|
|
self.downloading = False
|
2018-01-17 15:49:46 +01:00
|
|
|
if adj_ext:
|
|
|
|
pathfmt.adjust_extension(adj_ext)
|
2017-10-24 23:33:44 +02:00
|
|
|
if self.part:
|
|
|
|
pathfmt.part_move()
|
2017-10-24 12:53:03 +02:00
|
|
|
self.out.success(pathfmt.path, tries)
|
|
|
|
return True
|
|
|
|
|
|
|
|
def connect(self, url, offset):
|
|
|
|
"""Connect to 'url' while respecting 'offset' if possible
|
|
|
|
|
|
|
|
Returns a 2-tuple containing the actual offset and expected filesize.
|
|
|
|
If the returned offset-value is greater than zero, all received data
|
2017-11-10 21:35:53 +01:00
|
|
|
will be appended to the existing .part file.
|
2017-10-24 12:53:03 +02:00
|
|
|
Return '0' as second tuple-field to indicate an unknown filesize.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def receive(self, file):
|
|
|
|
"""Write data to 'file'"""
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
"""Reset internal state / cleanup"""
|
|
|
|
|
|
|
|
def get_extension(self):
|
|
|
|
"""Return a filename extension appropriate for the current request"""
|
2018-01-17 15:49:46 +01:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _check_extension(file, pathfmt):
|
|
|
|
"""Check filename extension against fileheader"""
|
|
|
|
extension = pathfmt.keywords["extension"]
|
|
|
|
if extension in FILETYPE_CHECK:
|
|
|
|
file.seek(0)
|
|
|
|
header = file.read(8)
|
|
|
|
if len(header) >= 8 and not FILETYPE_CHECK[extension](header):
|
|
|
|
for ext, check in FILETYPE_CHECK.items():
|
|
|
|
if ext != extension and check(header):
|
|
|
|
return ext
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
FILETYPE_CHECK = {
|
|
|
|
"jpg": lambda h: h[0:2] == b"\xff\xd8",
|
|
|
|
"png": lambda h: h[0:8] == b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a",
|
|
|
|
"gif": lambda h: h[0:4] == b"GIF8" and h[5] == 97,
|
|
|
|
}
|