parent
80df2b3527
commit
22910f9562
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2016 Mike Fährmann
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -13,9 +13,6 @@ import os
|
||||
|
||||
class BasicDownloader():
|
||||
"""Base class for downloader modules"""
|
||||
|
||||
max_tries = 5
|
||||
|
||||
def __init__(self):
|
||||
self.downloading = False
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2016 Mike Fährmann
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -10,8 +10,10 @@
|
||||
|
||||
import time
|
||||
import requests
|
||||
import requests.exceptions as rexcepts
|
||||
import mimetypes
|
||||
from .common import BasicDownloader
|
||||
from .. import config
|
||||
|
||||
|
||||
class Downloader(BasicDownloader):
|
||||
@ -20,56 +22,70 @@ class Downloader(BasicDownloader):
|
||||
BasicDownloader.__init__(self)
|
||||
self.session = requests.session()
|
||||
self.out = output
|
||||
self.max_tries = config.get(("retries",), 5)
|
||||
self.timeout = config.get(("timeout",), None)
|
||||
|
||||
def download_impl(self, url, pathfmt):
|
||||
tries = 0
|
||||
msg = ""
|
||||
while True:
|
||||
tries += 1
|
||||
if tries > 1:
|
||||
self.out.error(pathfmt.path, msg, tries-1, self.max_tries)
|
||||
if tries > self.max_tries:
|
||||
return
|
||||
time.sleep(1)
|
||||
|
||||
# try to connect to remote source
|
||||
try:
|
||||
response = self.session.get(url, stream=True, verify=True)
|
||||
except requests.exceptions.ConnectionError as exptn:
|
||||
tries += 1
|
||||
self.out.error(pathfmt.path, exptn, tries, self.max_tries)
|
||||
time.sleep(1)
|
||||
if tries == self.max_tries:
|
||||
return tries
|
||||
response = self.session.get(
|
||||
url, stream=True, timeout=self.timeout
|
||||
)
|
||||
except (rexcepts.ConnectionError, rexcepts.Timeout) as exception:
|
||||
msg = exception
|
||||
continue
|
||||
except (rexcepts.RequestException, UnicodeError) as exception:
|
||||
msg = exception
|
||||
break
|
||||
|
||||
# reject error-status-codes
|
||||
if response.status_code != requests.codes.ok:
|
||||
tries += 1
|
||||
self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format(
|
||||
response.status_code, response.reason),
|
||||
tries, self.max_tries
|
||||
if response.status_code != 200:
|
||||
msg = 'HTTP status "{} {}"'.format(
|
||||
response.status_code, response.reason
|
||||
)
|
||||
response.close()
|
||||
if response.status_code == 404:
|
||||
return self.max_tries
|
||||
time.sleep(1)
|
||||
if tries == self.max_tries:
|
||||
return tries
|
||||
break
|
||||
continue
|
||||
|
||||
if not pathfmt.has_extension:
|
||||
# set 'extension' keyword from Content-Type header
|
||||
mtype = response.headers.get("Content-Type", "image/jpeg")
|
||||
exts = mimetypes.guess_all_extensions(mtype, strict=False)
|
||||
exts.sort()
|
||||
pathfmt.set_extension(exts[-1][1:])
|
||||
if pathfmt.exists():
|
||||
self.out.skip(pathfmt.path)
|
||||
response.close()
|
||||
return
|
||||
|
||||
# everything ok -- proceed to download
|
||||
break
|
||||
self.out.start(pathfmt.path)
|
||||
self.downloading = True
|
||||
with pathfmt.open() as file:
|
||||
try:
|
||||
for data in response.iter_content(None):
|
||||
file.write(data)
|
||||
except rexcepts.RequestException as exception:
|
||||
msg = exception
|
||||
response.close()
|
||||
continue
|
||||
self.downloading = False
|
||||
self.out.success(pathfmt.path, tries)
|
||||
return
|
||||
|
||||
if not pathfmt.has_extension:
|
||||
# set 'extension' keyword from Content-Type header
|
||||
mtype = response.headers.get("Content-Type", "image/jpeg")
|
||||
extensions = mimetypes.guess_all_extensions(mtype, strict=False)
|
||||
extensions.sort()
|
||||
pathfmt.set_extension(extensions[-1][1:])
|
||||
if pathfmt.exists():
|
||||
self.out.skip(pathfmt.path)
|
||||
response.close()
|
||||
return
|
||||
|
||||
self.out.start(pathfmt.path)
|
||||
self.downloading = True
|
||||
with pathfmt.open() as file:
|
||||
for data in response.iter_content(16384):
|
||||
file.write(data)
|
||||
self.downloading = False
|
||||
self.out.success(pathfmt.path, tries)
|
||||
# output for unrecoverable errors
|
||||
self.out.error(pathfmt.path, msg, tries, 0)
|
||||
|
||||
def set_headers(self, headers):
|
||||
"""Set headers for http requests"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user