Allowing for headers to be passed to downloader and fixed 4anime (#494)
* Update _4anime.py * Update anime.py * Update base_extractor.py * Update http_downloader.py * Update base_downloader.py * Update util.pymaster
parent
a3cf51a1fe
commit
72f1f08c7e
|
@ -28,9 +28,10 @@ class BaseDownloader:
|
|||
|
||||
def check_if_exists(self):
|
||||
# Added Referer Header as kwik needd it.
|
||||
headers = {
|
||||
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
|
||||
}
|
||||
headers = self.source.headers
|
||||
if 'user-agent' not in headers:
|
||||
headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
|
||||
|
||||
if self.source.referer:
|
||||
headers['referer'] = self.source.referer
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ session = session.get_session()
|
|||
session = requests
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HTTPDownloader(BaseDownloader):
|
||||
def _download(self):
|
||||
logger.warning('Using internal downloader which might be slow. Use aria2 for full bandwidth.')
|
||||
|
@ -27,9 +26,10 @@ class HTTPDownloader(BaseDownloader):
|
|||
range_end = http_chunksize
|
||||
|
||||
url = self.source.stream_url
|
||||
headers = {
|
||||
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
|
||||
}
|
||||
headers = self.source.headers
|
||||
if 'user-agent' not in headers:
|
||||
headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0",
|
||||
|
||||
if self.source.referer:
|
||||
headers['Referer'] = self.source.referer
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ from anime_downloader.sites.exceptions import NotFoundError
|
|||
|
||||
|
||||
class BaseExtractor:
|
||||
def __init__(self, url, quality=None, headers=None):
|
||||
def __init__(self, url, quality=None, headers={}):
|
||||
if not url.startswith('http'):
|
||||
url = 'https://' + url
|
||||
self.url = url
|
||||
|
@ -11,7 +11,7 @@ class BaseExtractor:
|
|||
# TODO: Maybe quality should be only delt with inside epiosde(?)
|
||||
self.quality = quality
|
||||
|
||||
if headers is not None:
|
||||
if headers:
|
||||
self.headers = headers
|
||||
else:
|
||||
self.headers = desktop_headers
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import logging
|
||||
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
|
||||
from anime_downloader.sites import helpers
|
||||
from anime_downloader.const import HEADERS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -30,6 +31,7 @@ class Anime4(Anime, sitename = '4anime'):
|
|||
soup = helpers.soupify(helpers.get(self.url)).select('ul.episodes.range.active > li > a')
|
||||
return [x['href'] for x in soup]
|
||||
|
||||
|
||||
def _scrape_metadata(self):
|
||||
soup = helpers.soupify(helpers.get(self.url).text)
|
||||
self.title = soup.title.text
|
||||
|
@ -37,7 +39,24 @@ class Anime4(Anime, sitename = '4anime'):
|
|||
if 'year' in i.get('href',''):
|
||||
self.meta['year'] = int(i.text) if i.text.isnumeric() else None
|
||||
|
||||
|
||||
class Anime4Episode(AnimeEpisode, sitename='4anime'):
|
||||
def _get_sources(self):
|
||||
stream_url = helpers.soupify(helpers.get(self.url).text).find('div', class_='videojs-desktop').find('source')['src']
|
||||
self.headers = {'user-agent':HEADERS[self.hash_url(self.url, len(HEADERS))]}
|
||||
resp = helpers.get(self.url, headers=self.headers)
|
||||
stream_url = helpers.soupify(resp).find('div', class_='videojs-desktop').find('source')['src']
|
||||
return [('no_extractor', stream_url)]
|
||||
|
||||
|
||||
"""
|
||||
Let's say the user generates link A with user agent X.
|
||||
Upon retry of command it'd normally use Link A (cached), but with user agent Y
|
||||
which would error because the user agent isn't consistent.
|
||||
|
||||
This 'hashes' the url to generate a 'random' header which is consistent throughout multiple commands.
|
||||
"""
|
||||
def hash_url(self, url, length):
|
||||
total = 0
|
||||
for i in url:
|
||||
total += ord(i)
|
||||
return total%length
|
||||
|
|
|
@ -267,6 +267,8 @@ class AnimeEpisode:
|
|||
Episode number/title of the episode
|
||||
pretty_title: string
|
||||
Pretty title of episode in format <animename>-<ep_no>
|
||||
headers: dict
|
||||
Headers the downloader should use, used to bypass downloading restrictions.
|
||||
"""
|
||||
QUALITIES = []
|
||||
title = ''
|
||||
|
@ -274,13 +276,13 @@ class AnimeEpisode:
|
|||
subclasses = {}
|
||||
|
||||
def __init__(self, url, parent: Anime = None, ep_no=None):
|
||||
|
||||
self.ep_no = ep_no
|
||||
self.url = url
|
||||
self.quality = parent.quality
|
||||
self.QUALITIES = parent.QUALITIES
|
||||
self._parent = parent
|
||||
self._sources = None
|
||||
self.headers = {}
|
||||
self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)
|
||||
|
||||
logger.debug("Extracting stream info of id: {}".format(self.url))
|
||||
|
@ -340,7 +342,7 @@ class AnimeEpisode:
|
|||
except IndexError:
|
||||
raise NotFoundError("No episode sources found.")
|
||||
|
||||
ext = get_extractor(sitename)(url, quality=self.quality)
|
||||
ext = get_extractor(sitename)(url, quality=self.quality, headers=self.headers)
|
||||
self._sources[index] = ext
|
||||
|
||||
return ext
|
||||
|
|
|
@ -273,13 +273,19 @@ def format_command(cmd, episode, file_format, speed_limit, path):
|
|||
'{idm}' : 'idman.exe /n /d {stream_url} /p {download_dir} /f {file_format}.mp4'
|
||||
}
|
||||
|
||||
# Allows for passing the user agent with self.headers in the site.
|
||||
# Some sites block downloads using a different user agent.
|
||||
if episode.headers.get('user-agent'):
|
||||
useragent = episode.headers['user-agent']
|
||||
else:
|
||||
useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
|
||||
|
||||
rep_dict = {
|
||||
'stream_url': episode.source().stream_url if not episode.url.startswith('magnet:?xt=urn:btih:') else episode.url,
|
||||
'file_format': file_format,
|
||||
'download_dir': os.path.abspath(path),
|
||||
'referer': episode.source().referer,
|
||||
'useragent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
|
||||
'useragent': f'"{useragent}"',
|
||||
'speed_limit': speed_limit
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue