fix: fix the internal downloader

master
Vishnunarayan K I 2020-03-27 19:20:08 +05:30
parent fbd96ec2aa
commit 646121f87c
6 changed files with 65 additions and 42 deletions

View File

@ -2,6 +2,7 @@ import logging
import os
import click
import requests_cache
from anime_downloader import session, util
from anime_downloader.__version__ import __version__
@ -122,8 +123,9 @@ def command(ctx, anime_url, episode_range, url, player, skip_download, quality,
if chunk_size is not None:
chunk_size *= 1e6
chunk_size = int(chunk_size)
episode.download(force=force_download,
path=download_dir,
format=file_format,
range_size=chunk_size)
with requests_cache.disabled():
episode.download(force=force_download,
path=download_dir,
format=file_format,
range_size=chunk_size)
print()

View File

@ -13,6 +13,7 @@ DEFAULT_CONFIG = {
'skip_download': False,
'download_dir': '.',
'quality': '1080p',
'chunk_size': '10',
'fallback_qualities': ['720p', '480p', '360p'],
'force_download': False,
'file_format': '{anime_title}/{anime_title}_{ep_no}',

View File

@ -10,43 +10,47 @@ logger = logging.getLogger(__name__)
class BaseDownloader:
def __init__(self, options=None):
if options is None:
options = {}
self.options = options
# TODO: replace
self.referer = self.options.get('referer', '')
def __init__(self, source, path, force, range_size, callback=None):
self.chunksize = 16384
self._total_size = None
self.url = None
self.source = source
self.path = path
# these should be included in a options dict, maybe
self.force = force
self.range_size = range_size
if callback is None:
callback = write_status
self.callback = callback
def check_if_exists(self):
# Added Referer Header as kwik needd it.
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
}
if self.source.referer:
headers['referer'] = self.source.referer
r = session.get_session().get(
self.url, headers={'referer': self.referer}, stream=True)
self.source.stream_url, headers=headers, stream=True)
self._total_size = int(r.headers['Content-length'])
logger.debug('total size: ' + str(self._total_size))
if os.path.exists(self.path):
if abs(os.stat(self.path).st_size - self._total_size) < 10 \
and not self.options['force']:
and not self.force:
logger.warning('File already downloaded. Skipping download.')
return
else:
os.remove(self.path)
def download(self, url, path, options=None):
def download(self):
# TODO: Clean this up
self.pre_process()
self.url = url
logger.info(path)
logger.info(self.path)
# TODO: Use pathlib. Break into functions
self.path = path
util.make_dir(path.rsplit('/', 1)[0])
if options is not None:
self.options = {**options, **self.options}
util.make_dir(self.path.rsplit('/', 1)[0])
self.check_if_exists()
@ -67,7 +71,7 @@ class BaseDownloader:
def report_chunk_downloaded(self):
self.downloaded += self.chunksize
write_status(self.downloaded, self._total_size, self.start_time)
self.callback(self.downloaded, self._total_size, self.start_time)
def write_status(downloaded, total_size, start_time):

View File

@ -1,34 +1,46 @@
import os
import copy
import logging
from anime_downloader.downloader.base_downloader import BaseDownloader
from anime_downloader import session
import requests
import requests_cache
session = session.get_session()
session = requests
logger = logging.getLogger(__name__)
class HTTPDownloader(BaseDownloader):
def _download(self):
if self.options['range_size'] is None:
logger.warning('Using internal downloader which might be slow. Use aria2 for full bandwidth.')
if self.range_size is None:
self._non_range_download()
else:
self._ranged_download()
def _ranged_download(self):
http_chunksize = self.options['range_size']
http_chunksize = self.range_size
range_start = 0
range_end = http_chunksize
url = self.source.stream_url
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
}
if self.source.referer:
headers['Referer'] = self.source.referer
# Make a new file, maybe not the best way
with open(self.path, 'w'):
pass
r = session.get(self.url, headers={
'referer': self.referer}, stream=True)
while self.downloaded < self.total_size:
r = session.get(self.url,
headers=set_range(
range_start, range_end, self.referer),
r = session.get(url, headers=headers, stream=True)
while self.downloaded < self._total_size:
r = session.get(url,
headers=set_range(range_start, range_end, headers),
stream=True)
if r.status_code == 206:
with open(self.path, 'ab') as f:
@ -41,12 +53,17 @@ class HTTPDownloader(BaseDownloader):
break
range_start = os.stat(self.path).st_size
range_end += http_chunksize
if range_end > self.total_size:
if range_end > self._total_size:
range_end = ''
def _non_range_download(self):
r = session.get(self.url, headers={
'referer': self.referer}, stream=True)
url = self.source.stream_url
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
}
if self.source.referer:
headers['Referer'] = self.source.referer
r = session.get(url, headers=headers, stream=True)
if r.status_code == 200:
with open(self.path, 'wb') as f:
@ -56,12 +73,10 @@ class HTTPDownloader(BaseDownloader):
self.report_chunk_downloaded()
def set_range(start=0, end='', referer=None):
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101"
"Firefox/56.0",
'referer': referer
}
def set_range(start=0, end='', headers=None):
if headers is None:
headers = {}
headers = copy.copy(headers)
headers['Range'] = 'bytes={}-{}'.format(start, end)
return headers

View File

@ -1,4 +1,5 @@
import logging
import os
import requests
import requests_cache
@ -10,7 +11,7 @@ import tempfile
logger = logging.getLogger(__name__)
cachefile = tempfile.mktemp()
cachefile = os.path.join(tempfile.gettempdir(), 'cache')
requests_cache.install_cache(cachefile, backend='sqlite', expires_after=300)
_session = requests_cache.CachedSession(cachefile)

View File

@ -347,7 +347,7 @@ class AnimeEpisode:
def get_data(self):
self._sources = self._get_sources()
logger.debug('Sources : '.format(self._sources))
logger.debug('Sources : {}'.format(self._sources))
def _get_sources(self):
raise NotImplementedError