anime-downloader/anime_downloader/sites/anime.py

490 lines
14 KiB
Python
Raw Normal View History

2018-10-09 08:04:27 -07:00
"""
anime.py contains the base classes required for other anime classes.
"""
import os
2018-05-27 10:01:49 -07:00
import logging
import copy
import importlib
2018-05-27 13:57:14 -07:00
from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
2018-06-27 12:52:31 -07:00
from anime_downloader import util
2019-06-13 09:36:26 -07:00
from anime_downloader.config import Config
from anime_downloader.extractors import get_extractor
2018-07-27 11:52:21 -07:00
from anime_downloader.downloader import get_downloader
logger = logging.getLogger(__name__)
2019-02-23 06:57:37 -08:00
class Anime:
2018-10-09 08:04:27 -07:00
"""
Base class for all anime classes.
Parameters
----------
url: string
URL of the anime.
2021-05-10 13:19:45 -07:00
quality: One of ['360p', '480p', '540p', '720p', '1080p']
2018-10-09 08:04:27 -07:00
Quality of episodes
fallback_qualities: list
The order of fallback.
Attributes
----------
2018-10-15 10:10:35 -07:00
sitename: str
2018-10-09 08:04:27 -07:00
name of the site
2018-10-15 10:10:35 -07:00
title: str
2018-10-09 08:04:27 -07:00
Title of the anime
meta: dict
metadata about the anime. [Can be empty]
QUALITIES: list
Possible qualities for the site
"""
2018-05-27 11:59:51 -07:00
sitename = ''
title = ''
meta = dict()
2019-02-23 06:57:37 -08:00
subclasses = {}
2021-02-07 09:54:31 -08:00
subbed = None
2021-05-10 13:19:45 -07:00
QUALITIES = ['360p', '480p', '540p', '720p', '1080p']
2018-05-27 10:01:49 -07:00
2018-05-28 12:36:40 -07:00
@classmethod
def search(cls, query):
2018-10-09 08:04:27 -07:00
"""
Search searches for the anime using the query given.
2018-10-15 10:10:35 -07:00
Parameters
----------
query: str
query is the query keyword to be searched.
Returns
-------
list
List of :py:class:`~anime_downloader.sites.anime.SearchResult`
2018-10-09 08:04:27 -07:00
"""
2018-05-28 12:36:40 -07:00
return
2018-08-06 08:46:58 -07:00
def __init__(self, url=None, quality='720p',
2019-05-08 09:49:12 -07:00
fallback_qualities=None,
2021-02-07 09:54:31 -08:00
_skip_online_data=False,
subbed=None):
self.url = url
2021-02-07 09:54:31 -08:00
self.subbed = subbed
2019-05-08 09:49:12 -07:00
if fallback_qualities is None:
fallback_qualities = ['720p', '480p', '360p']
self._fallback_qualities = [
q for q in fallback_qualities if q in self.QUALITIES]
2018-05-30 03:56:27 -07:00
2018-04-30 06:31:13 -07:00
if quality in self.QUALITIES:
self.quality = quality
else:
2018-07-06 11:13:10 -07:00
raise AnimeDLError(
'Quality {0} not found in {1}'.format(quality, self.QUALITIES))
2018-04-30 06:31:13 -07:00
if not _skip_online_data:
logger.info('Extracting episode info from page')
2018-10-15 10:10:35 -07:00
self._episode_urls = self.get_data()
self._len = len(self._episode_urls)
@classmethod
2019-05-08 09:49:12 -07:00
def verify_url(cls, url):
if cls.sitename in url:
return True
return False
2018-04-30 06:31:13 -07:00
2019-06-13 09:36:26 -07:00
@property
def config(self):
2019-07-12 04:55:21 -07:00
return Config['siteconfig'][self.sitename]
2019-06-13 09:36:26 -07:00
2019-02-23 06:57:37 -08:00
def __init_subclass__(cls, sitename, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses[sitename] = cls
@classmethod
def factory(cls, sitename: str):
2019-05-08 09:49:12 -07:00
"""
factory returns the appropriate subclass for the given site name.
Parameters
----------
sitename: str
sitename is the name of the site
Returns
-------
subclass of :py:class:`Anime`
Sub class of :py:class:`Anime`
"""
2019-02-23 06:57:37 -08:00
return cls.subclasses[sitename]
@classmethod
def new_anime(cls, sitename: str):
"""
new_anime is a factory which returns the anime class corresposing to
`sitename`
Returns
-------
subclass of Anime
"""
module = importlib.import_module(
'anime_downloader.sites.{}'.format(sitename)
)
for c in dir(module):
if issubclass(c, cls):
return c
raise ImportError("Cannot find subclass of {}".format(cls))
2018-06-24 12:08:09 -07:00
def get_data(self):
2018-10-15 10:10:35 -07:00
"""
get_data is called inside the :code:`__init__` of
:py:class:`~anime_downloader.sites.anime.BaseAnime`. It is used to get
the necessary data about the anime and it's episodes.
This function calls
2021-06-24 05:38:37 -07:00
:py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_episodes`
2018-10-15 10:10:35 -07:00
and
:py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_metadata`
TODO: Refactor this so that classes which need not be soupified don't
have to overload this function.
Returns
-------
list
A list of tuples of episodes containing episode name and
episode url.
Ex::
[('1', 'https://9anime.is/.../...', ...)]
"""
2018-06-30 11:56:16 -07:00
self._episode_urls = []
try:
2019-02-23 06:57:37 -08:00
self._scrape_metadata()
except Exception as e:
logger.debug('Metadata scraping error: {}'.format(e))
2018-05-27 11:10:41 -07:00
2019-02-23 06:57:37 -08:00
self._episode_urls = self._scrape_episodes()
2018-06-30 11:56:16 -07:00
self._len = len(self._episode_urls)
2018-05-27 10:23:31 -07:00
logger.debug('EPISODE IDS: length: {}, ids: {}'.format(
2018-06-30 11:56:16 -07:00
self._len, self._episode_urls))
2018-05-27 10:23:31 -07:00
2019-07-12 09:40:16 -07:00
if not isinstance(self._episode_urls[0], tuple):
self._episode_urls = [(no + 1, id) for no, id in
enumerate(self._episode_urls)]
2018-06-30 11:56:16 -07:00
return self._episode_urls
def __getitem__(self, index):
2019-02-23 06:57:37 -08:00
episode_class = AnimeEpisode.subclasses[self.sitename]
2018-06-01 01:13:44 -07:00
if isinstance(index, int):
try:
ep_id = self._episode_urls[index]
except IndexError as e:
raise RuntimeError("No episode found with index") from e
2019-05-08 09:49:12 -07:00
return episode_class(ep_id[1], parent=self,
ep_no=ep_id[0])
2018-06-01 01:13:44 -07:00
elif isinstance(index, slice):
anime = copy.deepcopy(self)
try:
anime._episode_urls = anime._episode_urls[index]
except IndexError as e:
raise RuntimeError("No episode found with index") from e
2018-06-10 12:18:10 -07:00
return anime
2019-05-08 09:49:12 -07:00
return None
def __iter__(self):
episode_class = AnimeEpisode.subclasses[self.sitename]
for ep_id in self._episode_urls:
yield episode_class(ep_id[1], parent=self, ep_no=ep_id[0])
2018-05-27 11:10:41 -07:00
def __repr__(self):
return '''
Site: {name}
Anime: {title}
Episode count: {length}
'''.format(name=self.sitename, title=self.title, length=len(self))
2019-02-23 06:57:37 -08:00
def __len__(self):
return self._len
2018-05-31 04:04:47 -07:00
def __str__(self):
return self.title
2019-05-08 09:49:12 -07:00
def _scarpe_episodes(self):
2018-10-15 10:10:35 -07:00
"""
_scarpe_episodes is function which has to be overridden by the base
classes to scrape the episode urls from the web page.
Parameters
----------
soup: `bs4.BeautifulSoup`
soup is the html of the anime url after passing through
BeautifulSoup.
Returns
-------
:code:`list` of :code:`str`
A list of episode urls.
"""
2018-04-30 06:31:13 -07:00
return
2019-05-08 09:49:12 -07:00
def _scrape_metadata(self):
2018-10-15 10:10:35 -07:00
"""
_scrape_metadata is function which has to be overridden by the base
classes to scrape the metadata of anime from the web page.
Parameters
----------
soup: :py:class:`bs4.BeautifulSoup`
soup is the html of the anime url after passing through
BeautifulSoup.
"""
2018-05-27 11:10:41 -07:00
return
2018-05-30 03:56:27 -07:00
2019-02-23 06:57:37 -08:00
class AnimeEpisode:
2019-05-08 09:49:12 -07:00
"""
Base class for all Episode classes.
Parameters
----------
url: string
URL of the episode.
2021-05-10 13:19:45 -07:00
quality: One of ['360p', '480p', '540p', '720p', '1080p']
2019-05-08 09:49:12 -07:00
Quality of episode
fallback_qualities: list
The order of fallback.
Attributes
----------
sitename: str
name of the site
title: str
Title of the anime
meta: dict
metadata about the anime. [Can be empty]
2019-07-17 10:12:42 -07:00
ep_no: string
Episode number/title of the episode
pretty_title: string
Pretty title of episode in format <animename>-<ep_no>
headers: dict
Headers the downloader should use, used to bypass downloading restrictions.
2019-05-08 09:49:12 -07:00
"""
QUALITIES = []
title = ''
stream_url = ''
2019-02-23 06:57:37 -08:00
subclasses = {}
2019-05-08 09:49:12 -07:00
def __init__(self, url, parent: Anime = None, ep_no=None):
2018-06-09 08:24:42 -07:00
self.ep_no = ep_no
2018-06-30 11:56:16 -07:00
self.url = url
2019-05-08 09:49:12 -07:00
self.quality = parent.quality
self.QUALITIES = parent.QUALITIES
self._parent = parent
self._sources = None
self.headers = {}
self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)
logger.debug("Extracting stream info of id: {}".format(self.url))
2018-05-27 13:57:14 -07:00
def try_data():
2018-06-24 12:08:09 -07:00
self.get_data()
2018-06-29 08:30:04 -07:00
# Just to verify the source is acquired
self.source().stream_url
try:
try_data()
2018-05-27 13:57:14 -07:00
except NotFoundError:
2018-07-02 11:08:48 -07:00
# Issue #28
2018-08-06 08:46:58 -07:00
qualities = copy.copy(self._parent._fallback_qualities)
try:
qualities.remove(self.quality)
except ValueError:
pass
2018-07-04 10:12:52 -07:00
for quality in qualities:
logger.warning('Quality {} not found. Trying {}.'.format(
2018-07-06 11:13:10 -07:00
self.quality, quality))
2018-06-19 12:40:44 -07:00
self.quality = quality
try:
try_data()
2019-10-23 01:38:26 -07:00
return
2018-06-19 12:40:44 -07:00
except NotFoundError:
2018-07-02 11:08:48 -07:00
pass
2019-10-23 01:38:26 -07:00
logger.warning(f'Skipping episode: {self.ep_no}')
2019-02-23 06:57:37 -08:00
def __init_subclass__(cls, sitename: str, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses[sitename] = cls
cls.sitename = sitename
@classmethod
def factory(cls, sitename: str):
return cls.subclasses[sitename]
2019-07-12 09:40:16 -07:00
@property
def config(self):
return Config['siteconfig'][self.sitename]
def source(self, index=0):
2019-07-17 10:12:42 -07:00
"""
Get the source for episode
Returns
-------
`anime_downloader.extractors.base_extractor.BaseExtractor`
Extractor depending on the source.
"""
if not self._sources:
self.get_data()
2018-06-30 13:10:12 -07:00
try:
sitename, url = self._sources[index]
except TypeError:
return self._sources[index]
2019-10-23 01:38:26 -07:00
except IndexError:
raise NotFoundError("No episode sources found.")
2021-02-07 09:54:31 -08:00
ext = get_extractor(sitename)(
url, quality=self.quality, headers=self.headers)
2018-06-30 13:10:12 -07:00
self._sources[index] = ext
return ext
2018-06-24 12:08:09 -07:00
def get_data(self):
self._sources = self._get_sources()
2020-03-27 06:50:08 -07:00
logger.debug('Sources : {}'.format(self._sources))
def _get_sources(self):
raise NotImplementedError
def sort_sources(self, data):
"""
Formatted data should look something like this
[
{'extractor': 'mp4upload', 'url': 'https://twist.moe/mp4upload/...', 'server': 'mp4upload', 'version': 'subbed'},
{'extractor': 'vidstream', 'url': 'https://twist.moe/vidstream/...', 'server': 'vidstream', 'version': 'dubbed'},
{'extractor': 'no_extractor', 'url': 'https://twist.moe/anime/...', 'server': 'default', 'version': 'subbed'}
]
extractor = the extractor the link should be passed to
url = url to be passed to the extractor
server = the server name used in config
version = subbed/dubbed
The config should consist of a list with servers in preferred order and a preferred language, eg
"servers":["vidstream","default","mp4upload"],
"version":"subbed"
Using the example above, this function will return: [('no_extractor', 'https://twist.moe/anime/...')]
as it prioritizes preferred language over preferred server
"""
2021-02-07 09:54:31 -08:00
if self._parent and self._parent.subbed is not None:
version = "subbed" if self._parent.subbed else "dubbed"
else:
version = self.config.get('version', 'subbed')
servers = self.config.get('servers', [''])
logger.debug('Data : {}'.format(data))
# Sorts the dicts by preferred server in config
2021-02-07 09:54:31 -08:00
sorted_by_server = sorted(data, key=lambda x: servers.index(
x['server']) if x['server'] in servers else len(data))
# Sorts the above by preferred language
# resulting in a list with the dicts sorted by language and server
# with language being prioritized over server
2021-02-07 09:54:31 -08:00
sorted_by_lang = list(
sorted(sorted_by_server, key=lambda x: x['version'] == version, reverse=True))
logger.debug('Sorted sources : {}'.format(sorted_by_lang))
return '' if not sorted_by_lang else [(sorted_by_lang[0]['extractor'], sorted_by_lang[0]['url'])]
def download(self, force=False, path=None,
2018-10-11 12:50:21 -07:00
format='{anime_title}_{ep_no}', range_size=None):
2019-07-17 10:12:42 -07:00
"""
Downloads episode. This might be removed in a future release.
Parameters
----------
force: bool
Whether to force download or not.
path: string
Path to the directory/file where the file should be downloaded to.
format: string
The format of the filename if not provided.
"""
2019-02-23 06:57:37 -08:00
# TODO: Remove this shit
logger.info('Downloading {}'.format(self.pretty_title))
if format:
file_name = util.format_filename(format, self) + '.mp4'
2018-06-04 23:37:48 -07:00
2018-05-27 11:59:51 -07:00
if path is None:
2018-06-04 23:37:48 -07:00
path = './' + file_name
2018-05-30 03:56:27 -07:00
if path.endswith('.mp4'):
2018-05-27 11:59:51 -07:00
path = path
else:
2018-06-04 23:37:48 -07:00
path = os.path.join(path, file_name)
2018-05-30 03:56:27 -07:00
Downloader = get_downloader('pySmartDL')
2018-07-27 11:52:21 -07:00
downloader = Downloader(self.source(),
2018-10-11 12:50:21 -07:00
path, force, range_size=range_size)
2018-05-28 12:36:40 -07:00
2018-07-27 11:52:21 -07:00
downloader.download()
2018-05-28 12:36:40 -07:00
2019-02-23 06:57:37 -08:00
2018-05-28 12:36:40 -07:00
class SearchResult:
2018-10-15 10:10:35 -07:00
"""
SearchResult class holds the search result of a search done by an Anime
class
Parameters
----------
title: str
Title of the anime.
url: str
URL of the anime
poster: str
URL for the poster of the anime.
meta: dict
Additional metadata regarding the anime.
Attributes
----------
title: str
Title of the anime.
url: str
URL of the anime
poster: str
URL for the poster of the anime.
meta: dict
Additional metadata regarding the anime.
meta_info: dict
Metadata regarding the anime. Not shown in the results, used to match with MAL
2018-10-15 10:10:35 -07:00
"""
def __init__(self, title, url, poster='', meta='', meta_info={}):
2018-05-28 12:36:40 -07:00
self.title = title
self.url = url
self.poster = poster
2018-10-15 10:10:35 -07:00
self.meta = meta
self.meta_info = meta_info
2018-05-31 04:04:47 -07:00
2018-06-10 05:37:59 -07:00
def __repr__(self):
return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url)
2018-07-17 07:50:29 -07:00
def __str__(self):
return self.title
@property
def pretty_metadata(self):
"""
pretty_metadata is the prettified version of metadata
"""
if self.meta:
return ' | '.join(val for _, val in self.meta.items())
return ''