anime-downloader/anime_downloader/downloader/base_downloader.py

101 lines
3.5 KiB
Python
Raw Normal View History

2018-07-27 11:52:21 -07:00
import os
import time
import logging
import sys
from anime_downloader import util
from anime_downloader import session
2018-07-27 11:52:21 -07:00
import requests
logger = logging.getLogger(__name__)
2018-07-27 11:52:21 -07:00
class BaseDownloader:
2020-03-27 06:50:08 -07:00
def __init__(self, source, path, force, range_size, callback=None):
2018-07-27 11:52:21 -07:00
self.chunksize = 16384
2019-05-22 10:04:27 -07:00
self._total_size = None
2020-03-27 06:50:08 -07:00
self.source = source
self.path = path
# these should be included in a options dict, maybe
self.force = force
self.range_size = range_size
if callback is None:
callback = write_status
self.callback = callback
2018-07-27 11:52:21 -07:00
2019-05-08 11:17:30 -07:00
def check_if_exists(self):
# Added Referer Header as kwik needd it.
headers = self.source.headers
if 'user-agent' not in headers:
2021-05-23 13:50:55 -07:00
headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1"
2020-03-27 06:50:08 -07:00
if self.source.referer:
headers['referer'] = self.source.referer
# I couldn't figure out how to retry based on headers with httpadapter.
for i in range(5):
with requests.get(self.source.stream_url, headers=headers, stream=True, verify=False) as r:
self._total_size = max(int(r.headers.get('Content-length', 0)),
int(r.headers.get('Content-Length', 0)),
int(r.headers.get('content-length', 0)))
if not self._total_size and not r.headers.get('Transfer-Encoding') == 'chunked':
continue
if os.path.exists(self.path):
if abs(os.stat(self.path).st_size - self._total_size) < 10 \
and not self.force:
logger.warning('File already downloaded. Skipping download.')
return True
else:
# NOTE: Unknown size assumes no mismatch and will redownload the file.
if not abs(os.stat(self.path).st_size - self._total_size) < 10 and self._total_size != 0:
logger.error('Total size mismatch ({} and {}), the file already downloaded probably comes from a different source.'.format(
self._total_size, abs(os.stat(self.path).st_size)))
sys.exit(1)
logger.debug('Total size: ' + str(self._total_size))
2018-07-27 11:52:21 -07:00
2020-03-27 06:50:08 -07:00
def download(self):
2019-05-22 10:04:27 -07:00
# TODO: Clean this up
2018-07-27 11:52:21 -07:00
self.pre_process()
2020-03-27 06:50:08 -07:00
logger.info(self.path)
2019-05-08 11:17:30 -07:00
# TODO: Use pathlib. Break into functions
2020-03-27 06:50:08 -07:00
util.make_dir(self.path.rsplit('/', 1)[0])
if self.check_if_exists():
return
2018-07-27 11:52:21 -07:00
self.start_time = time.time()
self.downloaded = 0
self._download()
self.post_process()
def _download(self):
raise NotImplementedError
def pre_process(self):
pass
def post_process(self):
pass
def report_chunk_downloaded(self):
self.downloaded += self.chunksize
2020-03-27 06:50:08 -07:00
self.callback(self.downloaded, self._total_size, self.start_time)
2018-07-27 11:52:21 -07:00
def write_status(downloaded, total_size, start_time):
elapsed_time = time.time() - start_time
rate = (downloaded / 1024) / elapsed_time if elapsed_time else 'x'
downloaded = float(downloaded) / 1048576
total_size = float(total_size) / 1048576
2018-07-27 11:52:21 -07:00
status = 'Downloaded: {0:.2f}MB/{1:.2f}MB, Rate: {2:.2f}KB/s'.format(
downloaded, total_size, rate)
sys.stdout.write("\r" + status + " " * 5 + "\r")
2018-07-27 11:52:21 -07:00
sys.stdout.flush()