Allowing for headers to be passed to downloader and fixed 4anime (#494)

* Update _4anime.py

* Update anime.py

* Update base_extractor.py

* Update http_downloader.py

* Update base_downloader.py

* Update util.py
master
Blatzar 2020-08-27 14:08:11 +02:00 committed by GitHub
parent a3cf51a1fe
commit 72f1f08c7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 41 additions and 13 deletions

View File

@ -28,9 +28,10 @@ class BaseDownloader:
def check_if_exists(self):
# Added Referer Header as kwik needd it.
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
}
headers = self.source.headers
if 'user-agent' not in headers:
headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
if self.source.referer:
headers['referer'] = self.source.referer

View File

@ -11,7 +11,6 @@ session = session.get_session()
session = requests
logger = logging.getLogger(__name__)
class HTTPDownloader(BaseDownloader):
def _download(self):
logger.warning('Using internal downloader which might be slow. Use aria2 for full bandwidth.')
@ -27,9 +26,10 @@ class HTTPDownloader(BaseDownloader):
range_end = http_chunksize
url = self.source.stream_url
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
}
headers = self.source.headers
if 'user-agent' not in headers:
headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
if self.source.referer:
headers['Referer'] = self.source.referer

View File

@ -3,7 +3,7 @@ from anime_downloader.sites.exceptions import NotFoundError
class BaseExtractor:
def __init__(self, url, quality=None, headers=None):
def __init__(self, url, quality=None, headers={}):
if not url.startswith('http'):
url = 'https://' + url
self.url = url
@ -11,7 +11,7 @@ class BaseExtractor:
# TODO: Maybe quality should be only delt with inside epiosde(?)
self.quality = quality
if headers is not None:
if headers:
self.headers = headers
else:
self.headers = desktop_headers

View File

@ -1,6 +1,7 @@
import logging
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
from anime_downloader.sites import helpers
from anime_downloader.const import HEADERS
logger = logging.getLogger(__name__)
@ -30,6 +31,7 @@ class Anime4(Anime, sitename = '4anime'):
soup = helpers.soupify(helpers.get(self.url)).select('ul.episodes.range.active > li > a')
return [x['href'] for x in soup]
def _scrape_metadata(self):
soup = helpers.soupify(helpers.get(self.url).text)
self.title = soup.title.text
@ -37,7 +39,24 @@ class Anime4(Anime, sitename = '4anime'):
if 'year' in i.get('href',''):
self.meta['year'] = int(i.text) if i.text.isnumeric() else None
class Anime4Episode(AnimeEpisode, sitename='4anime'):
def _get_sources(self):
stream_url = helpers.soupify(helpers.get(self.url).text).find('div', class_='videojs-desktop').find('source')['src']
self.headers = {'user-agent':HEADERS[self.hash_url(self.url, len(HEADERS))]}
resp = helpers.get(self.url, headers=self.headers)
stream_url = helpers.soupify(resp).find('div', class_='videojs-desktop').find('source')['src']
return [('no_extractor', stream_url)]
"""
Let's say the user generates link A with user agent X.
Upon retry of command it'd normally use Link A (cached), but with user agent Y
which would error because the user agent isn't consistent.
This 'hashes' the url to generate a 'random' header which is consistent throughout multiple commands.
"""
def hash_url(self, url, length):
total = 0
for i in url:
total += ord(i)
return total%length

View File

@ -267,6 +267,8 @@ class AnimeEpisode:
Episode number/title of the episode
pretty_title: string
Pretty title of episode in format <animename>-<ep_no>
headers: dict
Headers the downloader should use, used to bypass downloading restrictions.
"""
QUALITIES = []
title = ''
@ -274,13 +276,13 @@ class AnimeEpisode:
subclasses = {}
def __init__(self, url, parent: Anime = None, ep_no=None):
self.ep_no = ep_no
self.url = url
self.quality = parent.quality
self.QUALITIES = parent.QUALITIES
self._parent = parent
self._sources = None
self.headers = {}
self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)
logger.debug("Extracting stream info of id: {}".format(self.url))
@ -340,7 +342,7 @@ class AnimeEpisode:
except IndexError:
raise NotFoundError("No episode sources found.")
ext = get_extractor(sitename)(url, quality=self.quality)
ext = get_extractor(sitename)(url, quality=self.quality, headers=self.headers)
self._sources[index] = ext
return ext

View File

@ -273,13 +273,19 @@ def format_command(cmd, episode, file_format, speed_limit, path):
'{idm}' : 'idman.exe /n /d {stream_url} /p {download_dir} /f {file_format}.mp4'
}
# Allows for passing the user agent with self.headers in the site.
# Some sites block downloads using a different user agent.
if episode.headers.get('user-agent'):
useragent = episode.headers['user-agent']
else:
useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
rep_dict = {
'stream_url': episode.source().stream_url if not episode.url.startswith('magnet:?xt=urn:btih:') else episode.url,
'file_format': file_format,
'download_dir': os.path.abspath(path),
'referer': episode.source().referer,
'useragent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
'useragent': f'"{useragent}"',
'speed_limit': speed_limit
}