anime-downloader/anime_downloader/sites/anime.py

248 lines
6.7 KiB
Python
Raw Normal View History

2018-10-09 08:04:27 -07:00
"""
anime.py contains the base classes required for other anime classes.
"""
import requests
from bs4 import BeautifulSoup
2018-05-27 10:01:49 -07:00
import os
2018-05-27 10:01:49 -07:00
import logging
import copy
2018-05-27 13:57:14 -07:00
from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
2018-06-27 12:52:31 -07:00
from anime_downloader import util
from anime_downloader.extractors import get_extractor
2018-07-27 11:52:21 -07:00
from anime_downloader.downloader import get_downloader
2019-02-23 06:57:37 -08:00
class Anime:
2018-10-09 08:04:27 -07:00
"""
Base class for all anime classes.
Parameters
----------
url: string
URL of the anime.
quality: One of ['360p', '480p', '720p', '1080p']
Quality of episodes
fallback_qualities: list
The order of fallback.
Attributes
----------
sitename: string
name of the site
title: string
Title of the anime
meta: dict
metadata about the anime. [Can be empty]
QUALITIES: list
Possible qualities for the site
"""
2018-05-27 11:59:51 -07:00
sitename = ''
title = ''
meta = dict()
2019-02-23 06:57:37 -08:00
subclasses = {}
2018-05-27 11:59:51 -07:00
2018-05-27 10:01:49 -07:00
QUALITIES = None
2018-05-27 11:59:51 -07:00
_episodeClass = object
2018-05-27 10:01:49 -07:00
2018-05-28 12:36:40 -07:00
@classmethod
def search(cls, query):
2018-10-09 08:04:27 -07:00
"""
Search searches for the anime using the query given.
query :
query is
"""
2018-05-28 12:36:40 -07:00
return
2018-08-06 08:46:58 -07:00
def __init__(self, url=None, quality='720p',
fallback_qualities=['720p', '480p', '360p'],
_skip_online_data=False):
self.url = url
2018-08-06 08:46:58 -07:00
self._fallback_qualities = fallback_qualities
2018-05-30 03:56:27 -07:00
2018-04-30 06:31:13 -07:00
if quality in self.QUALITIES:
self.quality = quality
else:
2018-07-06 11:13:10 -07:00
raise AnimeDLError(
'Quality {0} not found in {1}'.format(quality, self.QUALITIES))
2018-04-30 06:31:13 -07:00
if not _skip_online_data:
logging.info('Extracting episode info from page')
2018-06-24 12:08:09 -07:00
self.get_data()
@classmethod
2018-04-30 06:31:13 -07:00
def verify_url(self, url):
if self.sitename in url:
return True
return False
2018-04-30 06:31:13 -07:00
2019-02-23 06:57:37 -08:00
def __init_subclass__(cls, sitename, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses[sitename] = cls
@classmethod
def factory(cls, sitename: str):
return cls.subclasses[sitename]
2018-06-24 12:08:09 -07:00
def get_data(self):
2018-06-30 11:56:16 -07:00
self._episode_urls = []
try:
2019-02-23 06:57:37 -08:00
self._scrape_metadata()
except Exception as e:
logging.debug('Metadata scraping error: {}'.format(e))
2018-05-27 11:10:41 -07:00
2019-02-23 06:57:37 -08:00
self._episode_urls = self._scrape_episodes()
2018-06-30 11:56:16 -07:00
self._len = len(self._episode_urls)
2018-05-27 10:23:31 -07:00
logging.debug('EPISODE IDS: length: {}, ids: {}'.format(
2018-06-30 11:56:16 -07:00
self._len, self._episode_urls))
2018-05-27 10:23:31 -07:00
2018-06-30 11:56:16 -07:00
self._episode_urls = [(no+1, id) for no, id in
2018-07-06 11:13:10 -07:00
enumerate(self._episode_urls)]
2018-06-30 11:56:16 -07:00
return self._episode_urls
def __getitem__(self, index):
2019-02-23 06:57:37 -08:00
episode_class = AnimeEpisode.subclasses[self.sitename]
2018-06-01 01:13:44 -07:00
if isinstance(index, int):
2018-06-30 11:56:16 -07:00
ep_id = self._episode_urls[index]
2019-02-23 06:57:37 -08:00
return episode_class(ep_id[1], self.quality, parent=self,
ep_no=ep_id[0])
2018-06-01 01:13:44 -07:00
elif isinstance(index, slice):
anime = copy.deepcopy(self)
2018-06-30 11:56:16 -07:00
anime._episode_urls = anime._episode_urls[index]
2018-06-10 12:18:10 -07:00
return anime
2018-05-27 11:10:41 -07:00
def __repr__(self):
return '''
Site: {name}
Anime: {title}
Episode count: {length}
'''.format(name=self.sitename, title=self.title, length=len(self))
2019-02-23 06:57:37 -08:00
def __len__(self):
return self._len
2018-05-31 04:04:47 -07:00
def __str__(self):
return self.title
2019-02-23 06:57:37 -08:00
def _scrape_episodes(self):
2018-04-30 06:31:13 -07:00
return
2019-02-23 06:57:37 -08:00
def _scrape_metadata(self):
2018-05-27 11:10:41 -07:00
return
2018-05-30 03:56:27 -07:00
2019-02-23 06:57:37 -08:00
class AnimeEpisode:
QUALITIES = None
title = ''
stream_url = ''
2019-02-23 06:57:37 -08:00
subclasses = {}
2018-06-30 11:56:16 -07:00
def __init__(self, url, quality='720p', parent=None,
ep_no=None):
if quality not in self.QUALITIES:
raise AnimeDLError('Incorrect quality: "{}"'.format(quality))
2018-06-09 08:24:42 -07:00
self.ep_no = ep_no
2018-06-30 11:56:16 -07:00
self.url = url
self.quality = quality
self._parent = parent
self._sources = None
self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)
2018-06-30 11:56:16 -07:00
logging.debug("Extracting stream info of id: {}".format(self.url))
2018-05-27 13:57:14 -07:00
2018-06-29 08:30:04 -07:00
# TODO: New flag: online_data=False
2018-05-27 13:57:14 -07:00
try:
2018-06-24 12:08:09 -07:00
self.get_data()
2018-06-29 08:30:04 -07:00
# Just to verify the source is acquired
self.source().stream_url
2018-05-27 13:57:14 -07:00
except NotFoundError:
2018-07-02 11:08:48 -07:00
# Issue #28
2018-08-06 08:46:58 -07:00
qualities = copy.copy(self._parent._fallback_qualities)
try:
qualities.remove(self.quality)
except ValueError:
pass
2018-07-04 10:12:52 -07:00
for quality in qualities:
2018-07-06 11:13:10 -07:00
logging.warning('Quality {} not found. Trying {}.'.format(
self.quality, quality))
2018-06-19 12:40:44 -07:00
self.quality = quality
try:
2018-06-24 12:08:09 -07:00
self.get_data()
2018-07-04 10:12:52 -07:00
self.source().stream_url
2018-07-02 11:08:48 -07:00
# parent.quality = self.quality
2018-06-19 12:40:44 -07:00
break
except NotFoundError:
2018-07-02 11:08:48 -07:00
# Issue #28
2018-08-06 08:46:58 -07:00
# qualities.remove(self.quality)
2018-07-02 11:08:48 -07:00
pass
2019-02-23 06:57:37 -08:00
def __init_subclass__(cls, sitename: str, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses[sitename] = cls
cls.sitename = sitename
@classmethod
def factory(cls, sitename: str):
return cls.subclasses[sitename]
def source(self, index=0):
if not self._sources:
self.get_data()
2018-06-30 13:10:12 -07:00
try:
sitename, url = self._sources[index]
except TypeError:
return self._sources[index]
extractor = get_extractor(sitename)
2018-06-30 13:10:12 -07:00
ext = extractor(url, quality=self.quality)
self._sources[index] = ext
return ext
2018-06-24 12:08:09 -07:00
def get_data(self):
self._sources = self._get_sources()
logging.debug('Sources : '.format(self._sources))
def _get_sources(self):
raise NotImplementedError
def download(self, force=False, path=None,
2018-10-11 12:50:21 -07:00
format='{anime_title}_{ep_no}', range_size=None):
2019-02-23 06:57:37 -08:00
# TODO: Remove this shit
logging.info('Downloading {}'.format(self.pretty_title))
if format:
file_name = util.format_filename(format, self)+'.mp4'
2018-06-04 23:37:48 -07:00
2018-05-27 11:59:51 -07:00
if path is None:
2018-06-04 23:37:48 -07:00
path = './' + file_name
2018-05-30 03:56:27 -07:00
if path.endswith('.mp4'):
2018-05-27 11:59:51 -07:00
path = path
else:
2018-06-04 23:37:48 -07:00
path = os.path.join(path, file_name)
2018-05-30 03:56:27 -07:00
2018-07-27 11:52:21 -07:00
Downloader = get_downloader('http')
downloader = Downloader(self.source(),
2018-10-11 12:50:21 -07:00
path, force, range_size=range_size)
2018-05-28 12:36:40 -07:00
2018-07-27 11:52:21 -07:00
downloader.download()
2018-05-28 12:36:40 -07:00
2019-02-23 06:57:37 -08:00
2018-05-28 12:36:40 -07:00
class SearchResult:
def __init__(self, title, url, poster):
self.title = title
self.url = url
self.poster = poster
self.meta = ''
2018-05-31 04:04:47 -07:00
2018-06-10 05:37:59 -07:00
def __repr__(self):
return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url)
2018-07-17 07:50:29 -07:00
def __str__(self):
return self.title