From a583aa03639f87e5d18dabf928145ca2a6aab548 Mon Sep 17 00:00:00 2001 From: Vishnunarayan K I Date: Sat, 23 Feb 2019 20:27:37 +0530 Subject: [PATCH] rewrite: site handling rewrite --- anime_downloader/sites/anime.py | 62 ++++++++++++---------- anime_downloader/sites/animepahe.py | 7 ++- anime_downloader/sites/baseanimecf.py | 6 +-- anime_downloader/sites/gogoanime.py | 43 +++++++-------- anime_downloader/sites/helpers/__init__.py | 1 + anime_downloader/sites/helpers/request.py | 61 +++++++++++++++++++++ anime_downloader/sites/kissanime.py | 30 ++++------- anime_downloader/sites/kisscartoon.py | 24 ++++----- anime_downloader/sites/masterani.py | 8 ++- anime_downloader/sites/nineanime.py | 17 +++--- anime_downloader/sites/twistmoe.py | 8 ++- 11 files changed, 156 insertions(+), 111 deletions(-) create mode 100644 anime_downloader/sites/helpers/__init__.py create mode 100644 anime_downloader/sites/helpers/request.py diff --git a/anime_downloader/sites/anime.py b/anime_downloader/sites/anime.py index 48c4e13..39c532b 100644 --- a/anime_downloader/sites/anime.py +++ b/anime_downloader/sites/anime.py @@ -1,22 +1,23 @@ from bs4 import BeautifulSoup -import time import os import logging -import sys import copy from anime_downloader import session from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError from anime_downloader import util +from anime_downloader.sites import helpers from anime_downloader.const import desktop_headers from anime_downloader.extractors import get_extractor from anime_downloader.downloader import get_downloader -class BaseAnime: + +class Anime: sitename = '' title = '' meta = dict() + subclasses = {} QUALITIES = None _episodeClass = object @@ -47,17 +48,22 @@ class BaseAnime: return True return False + def __init_subclass__(cls, sitename, **kwargs): + super().__init_subclass__(**kwargs) + cls.subclasses[sitename] = cls + + @classmethod + def factory(cls, sitename: str): + return cls.subclasses[sitename] + def get_data(self): self._episode_urls = [] - r = session.get_session().get(self.url, headers=desktop_headers) - soup = BeautifulSoup(r.text, 'html.parser') - try: - self._scrape_metadata(soup) + self._scrape_metadata() except Exception as e: logging.debug('Metadata scraping error: {}'.format(e)) - self._episode_urls = self._scarpe_episodes(soup) + self._episode_urls = self._scrape_episodes() self._len = len(self._episode_urls) logging.debug('EPISODE IDS: length: {}, ids: {}'.format( @@ -68,13 +74,11 @@ class BaseAnime: return self._episode_urls - def __len__(self): - return self._len - def __getitem__(self, index): + episode_class = AnimeEpisode.subclasses[self.sitename] if isinstance(index, int): ep_id = self._episode_urls[index] - return self._episodeClass(ep_id[1], self.quality, parent=self, + return episode_class(ep_id[1], self.quality, parent=self, ep_no=ep_id[0]) elif isinstance(index, slice): anime = copy.deepcopy(self) @@ -88,20 +92,24 @@ Anime: {title} Episode count: {length} '''.format(name=self.sitename, title=self.title, length=len(self)) + def __len__(self): + return self._len + def __str__(self): return self.title - def _scarpe_episodes(self, soup): + def _scrape_episodes(self): return - def _scrape_metadata(self, soup): + def _scrape_metadata(self): return -class BaseEpisode: +class AnimeEpisode: QUALITIES = None title = '' stream_url = '' + subclasses = {} def __init__(self, url, quality='720p', parent=None, ep_no=None): @@ -143,6 +151,15 @@ class BaseEpisode: # qualities.remove(self.quality) pass + def __init_subclass__(cls, sitename: str, **kwargs): + super().__init_subclass__(**kwargs) + cls.subclasses[sitename] = cls + cls.sitename = sitename + + @classmethod + def factory(cls, sitename: str): + return cls.subclasses[sitename] + def source(self, index=0): if not self._sources: self.get_data() @@ -167,6 +184,7 @@ class BaseEpisode: def download(self, force=False, path=None, format='{anime_title}_{ep_no}', range_size=None): + # TODO: Remove this shit logging.info('Downloading {}'.format(self.pretty_title)) if format: file_name = util.format_filename(format, self)+'.mp4' @@ -184,6 +202,7 @@ class BaseEpisode: downloader.download() + class SearchResult: def __init__(self, title, url, poster): self.title = title @@ -196,16 +215,3 @@ class SearchResult: def __str__(self): return self.title - - -def write_status(downloaded, total_size, start_time): - elapsed_time = time.time()-start_time - rate = (downloaded/1024)/elapsed_time if elapsed_time else 'x' - downloaded = float(downloaded)/1048576 - total_size = float(total_size)/1048576 - - status = 'Downloaded: {0:.2f}MB/{1:.2f}MB, Rate: {2:.2f}KB/s'.format( - downloaded, total_size, rate) - - sys.stdout.write("\r" + status + " "*5 + "\r") - sys.stdout.flush() diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index b9c9545..4b183e5 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -2,8 +2,7 @@ import cfscrape import logging import re -from anime_downloader.sites.anime import BaseEpisode, SearchResult -from anime_downloader.sites.baseanimecf import BaseAnimeCF +from anime_downloader.sites.anime import AnimeEpisode, SearchResult, Anime from anime_downloader.sites.exceptions import NotFoundError from anime_downloader import util from anime_downloader.session import get_session @@ -11,7 +10,7 @@ from anime_downloader.session import get_session scraper = get_session(cfscrape.create_scraper()) -class AnimePaheEpisode(BaseEpisode): +class AnimePaheEpisode(AnimeEpisode): QUALITIES = ['360p', '480p', '720p', '1080p'] def _get_source(self, episode_id, server): @@ -50,7 +49,7 @@ class AnimePaheEpisode(BaseEpisode): return sources raise NotFoundError -class AnimePahe(BaseAnimeCF): +class AnimePahe(Anime): sitename = 'animepahe' api_url = 'https://animepahe.com/api' base_anime_url = 'https://animepahe.com/anime/' diff --git a/anime_downloader/sites/baseanimecf.py b/anime_downloader/sites/baseanimecf.py index ae8254b..0499cde 100644 --- a/anime_downloader/sites/baseanimecf.py +++ b/anime_downloader/sites/baseanimecf.py @@ -2,14 +2,14 @@ import cfscrape from bs4 import BeautifulSoup import logging -from anime_downloader.sites.anime import BaseAnime +from anime_downloader.sites.anime import Anime from anime_downloader.const import get_random_header from anime_downloader.session import get_session scraper = get_session(cfscrape.create_scraper()) -class BaseAnimeCF(BaseAnime): +class BaseAnimeCF(Anime): def get_data(self): headers = get_random_header() if hasattr(self, '_referer'): @@ -20,7 +20,7 @@ class BaseAnimeCF(BaseAnime): self._scrape_metadata(soup) - self._episode_urls = self._scarpe_episodes(soup) + self._episode_urls = self._scrape_episodes(soup) self._len = len(self._episode_urls) logging.debug('EPISODE IDS: length: {}, ids: {}'.format( diff --git a/anime_downloader/sites/gogoanime.py b/anime_downloader/sites/gogoanime.py index 6e6d4c3..cf115ae 100644 --- a/anime_downloader/sites/gogoanime.py +++ b/anime_downloader/sites/gogoanime.py @@ -1,19 +1,15 @@ import logging -from bs4 import BeautifulSoup -from anime_downloader import session -from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult -from anime_downloader import util - -session = session.get_session() +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers -class GogoanimeEpisode(BaseEpisode): +class GogoanimeEpisode(AnimeEpisode, sitename='gogoanime'): QUALITIES = ['360p', '480p', '720p'] _base_url = 'https://www2.gogoanime.se' def _get_sources(self): - soup = BeautifulSoup(session.get(self.url).text, 'html.parser') + soup = helpers.soupfiy(helpers.get(self.url)) extractors_url = [] for element in soup.select('.anime_muti_link > ul > li'): @@ -30,27 +26,24 @@ class GogoanimeEpisode(BaseEpisode): return extractors_url -class GogoAnime(BaseAnime): - sitename = 'gogoanime' +class GogoAnime(Anime, sitename='gogoanime'): QUALITIES = ['360p', '480p', '720p'] _episode_list_url = 'https://www2.gogoanime.se//load-list-episode' - _episodeClass = GogoanimeEpisode _search_api_url = 'https://api.watchanime.cc/site/loadAjaxSearch' @classmethod def search(cls, query): - resp = util.get_json( - cls._search_api_url, - params={ - 'keyword': query, - 'id': -1, - 'link_web': 'https://www1.gogoanime.sh/' - } - ) + params = { + 'keyword': query, + 'id': -1, + 'link_web': 'https://www1.gogoanime.sh/' + } + soup = helpers.soupfiy(helpers.get( + cls._search_api_url, params=params + ).json()['content']) search_results = [] - soup = BeautifulSoup(resp['content'], 'html.parser') for element in soup('a', class_='ss-title'): search_result = SearchResult( title=element.text, @@ -61,7 +54,8 @@ class GogoAnime(BaseAnime): search_results.append(search_result) return search_results - def _scarpe_episodes(self, soup): + def _scrape_episodes(self): + soup = helpers.soupfiy(helpers.get(self.url)) anime_id = soup.select_one('input#movie_id').attrs['value'] params = { 'default_ep': 0, @@ -70,8 +64,8 @@ class GogoAnime(BaseAnime): 'id': anime_id, } - res = session.get(self._episode_list_url, params=params) - soup = BeautifulSoup(res.text, 'html.parser') + soup = helpers.soupfiy(helpers.get(self._episode_list_url, + params=params)) epurls = list( reversed(['https://www2.gogoanime.se'+a.get('href').strip() @@ -80,7 +74,8 @@ class GogoAnime(BaseAnime): return epurls - def _scrape_metadata(self, soup): + def _scrape_metadata(self): + soup = helpers.soupfiy(helpers.get(self.url)) meta = soup.select_one('.anime_info_body_bg') self.title = meta.find('h1').text self.poster = meta.find('img').get('src') diff --git a/anime_downloader/sites/helpers/__init__.py b/anime_downloader/sites/helpers/__init__.py new file mode 100644 index 0000000..ce72652 --- /dev/null +++ b/anime_downloader/sites/helpers/__init__.py @@ -0,0 +1 @@ +from anime_downloader.sites.helpers.request import * diff --git a/anime_downloader/sites/helpers/request.py b/anime_downloader/sites/helpers/request.py new file mode 100644 index 0000000..8a44fb4 --- /dev/null +++ b/anime_downloader/sites/helpers/request.py @@ -0,0 +1,61 @@ +# TODO: Check without node installed +# cfscrape is a necessery dependency +import cfscrape +import logging +from bs4 import BeautifulSoup + +from anime_downloader import session + +__all__ = [ + 'get', + 'post', + 'soupfiy', +] + +logger = logging.getLogger(__name__) + +req_session = session.get_session() +cf_session = session.get_session(cfscrape.create_scraper()) + + +def get(url: str, + cf: bool = True, + **kwargs): + ''' + get performs a get request + ''' + # TODO: Add headers + sess = cf_session if cf else req_session + res = sess.get(url, **kwargs) + # TODO: check status codes + return res + + +def post(url: str, + cf: bool = True, + **kwargs): + ''' + get performs a get request + ''' + # TODO: Add headers + sess = cf_session if cf else req_session + res = sess.post(url, **kwargs) + # TODO: check status codes + return res + + +def soupfiy(res): + # TODO: res datatype + """soupfiy Beautiful soups response object of request + + Parameters + ---------- + res : + res is `request.response` + + Returns + ------- + BeautifulSoup.Soup + """ + soup = BeautifulSoup(res.text, 'html.parser') + return soup diff --git a/anime_downloader/sites/kissanime.py b/anime_downloader/sites/kissanime.py index fe61eec..7c0da07 100644 --- a/anime_downloader/sites/kissanime.py +++ b/anime_downloader/sites/kissanime.py @@ -1,19 +1,13 @@ -import cfscrape -from bs4 import BeautifulSoup import re import logging -from anime_downloader.sites.anime import BaseEpisode, SearchResult -from anime_downloader.sites.baseanimecf import BaseAnimeCF +from anime_downloader.sites.anime import AnimeEpisode, SearchResult, Anime +from anime_downloader.sites import helpers from anime_downloader.sites.exceptions import NotFoundError from anime_downloader.const import get_random_header -from anime_downloader.session import get_session -scraper = get_session(cfscrape.create_scraper(delay=10)) - - -class KissanimeEpisode(BaseEpisode): +class KissanimeEpisode(AnimeEpisode, sitename='kissanime'): QUALITIES = ['360p', '480p', '720p', '1080p'] _base_url = 'http://kissanime.ru' VERIFY_HUMAN = True @@ -22,7 +16,7 @@ class KissanimeEpisode(BaseEpisode): episode_url = self.url+'&s=rapidvideo' logging.debug('Calling url: {}'.format(episode_url)) - ret = scraper.get(episode_url) + ret = helpers.get(episode_url, cf=True) data = self._scrape_episode(ret) return data @@ -34,26 +28,22 @@ class KissanimeEpisode(BaseEpisode): return [('rapidvideo', rapid_url)] -class KissAnime(BaseAnimeCF): - sitename = 'kissanime' +class KissAnime(Anime, sitename='kissanime'): _referer = 'http://kissanime.ru/' QUALITIES = ['360p', '480p', '720p', '1080p'] - _episodeClass = KissanimeEpisode @classmethod def search(cls, query): headers = get_random_header() headers['referer'] = 'http://kissanime.ru/' - res = scraper.post( + soup = helpers.soupfiy(helpers.post( 'http://kissanime.ru/Search/Anime', data={ 'type': 'Anime', 'keyword': query, }, headers=headers, - ) - - soup = BeautifulSoup(res.text, 'html.parser') + )) # If only one anime found, kissanime redirects to anime page. # We don't want that @@ -79,7 +69,8 @@ class KissAnime(BaseAnimeCF): return ret - def _scarpe_episodes(self, soup): + def _scrape_episodes(self): + soup = helpers.soupfiy(helpers.get(self.url, cf=True)) ret = soup.find('table', {'class': 'listing'}).find_all('a') ret = ['http://kissanime.ru'+str(a['href']) for a in ret] logging.debug('Unfiltered episodes : {}'.format(ret)) @@ -97,6 +88,7 @@ class KissAnime(BaseAnimeCF): ret = ret[::-1] return ret - def _scrape_metadata(self, soup): + def _scrape_metadata(self): + soup = helpers.soupfiy(helpers.get(self.url, cf=True)) info_div = soup.find('div', {'class': 'barContent'}) self.title = info_div.find('a', {'class': 'bigChar'}).text diff --git a/anime_downloader/sites/kisscartoon.py b/anime_downloader/sites/kisscartoon.py index b841533..ac8a205 100644 --- a/anime_downloader/sites/kisscartoon.py +++ b/anime_downloader/sites/kisscartoon.py @@ -1,18 +1,14 @@ -from anime_downloader import session from anime_downloader.sites.kissanime import KissAnime -from anime_downloader.sites.anime import BaseEpisode, SearchResult +from anime_downloader.sites.anime import AnimeEpisode, SearchResult +from anime_downloader.sites import helpers from anime_downloader.sites.exceptions import NotFoundError from anime_downloader.const import desktop_headers, get_random_header from bs4 import BeautifulSoup -import cfscrape import logging -scraper = session.get_session(cfscrape.create_scraper()) -session = session.get_session() - -class KisscartoonEpisode(BaseEpisode): +class KisscartoonEpisode(AnimeEpisode, sitename='kisscartoon'): _base_url = '' VERIFY_HUMAN = False _episode_list_url = 'https://kisscartoon.ac/ajax/anime/load_episodes' @@ -25,12 +21,12 @@ class KisscartoonEpisode(BaseEpisode): } headers = desktop_headers headers['referer'] = self.url - res = session.get(self._episode_list_url, params=params, headers=headers) + res = helpers.get(self._episode_list_url, params=params, headers=headers) url = res.json()['value'] headers = desktop_headers headers['referer'] = self.url - res = session.get('https:' + url, headers=headers) + res = helpers.get('https:' + url, headers=headers) return [( 'no_extractor', @@ -38,15 +34,12 @@ class KisscartoonEpisode(BaseEpisode): )] -class KissCartoon(KissAnime): - sitename = 'kisscartoon' - _episodeClass = KisscartoonEpisode - +class KissCartoon(KissAnime, sitename='kisscartoon'): @classmethod def search(cls, query): headers = get_random_header() headers['referer'] = 'http://kisscartoon.ac/' - res = scraper.get( + res = helpers.get( 'http://kisscartoon.ac/Search/', params={ 's': query, @@ -68,7 +61,8 @@ class KissCartoon(KissAnime): return ret - def _scarpe_episodes(self, soup): + def _scrape_episodes(self): + soup = helpers.soupfiy(helpers.get(self.url)) ret = soup.find('div', {'class': 'listing'}).find_all('a') ret = [str(a['href']) for a in ret] diff --git a/anime_downloader/sites/masterani.py b/anime_downloader/sites/masterani.py index 3e62b06..bb93212 100644 --- a/anime_downloader/sites/masterani.py +++ b/anime_downloader/sites/masterani.py @@ -6,14 +6,14 @@ import requests from bs4 import BeautifulSoup from anime_downloader import util -from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.const import desktop_headers from anime_downloader.session import get_session scraper = get_session(cfscrape.create_scraper()) -class MasteraniEpisode(BaseEpisode): +class MasteraniEpisode(AnimeEpisode, sitename='masterani'): QUALITIES = ['360p', '480p', '720p', '1080p'] def _get_sources(self): @@ -51,11 +51,9 @@ class MasteraniEpisode(BaseEpisode): return ret -class Masterani(BaseAnime): - sitename = 'masterani' +class Masterani(Anime, sitename='masterani'): QUALITIES = ['360p', '480p', '720p', '1080p'] _api_url = 'https://www.masterani.me/api/anime/{}/detailed' - _episodeClass = MasteraniEpisode @classmethod def search(cls, query): diff --git a/anime_downloader/sites/nineanime.py b/anime_downloader/sites/nineanime.py index dbc3e14..5f7adf5 100644 --- a/anime_downloader/sites/nineanime.py +++ b/anime_downloader/sites/nineanime.py @@ -1,6 +1,7 @@ from anime_downloader import session -from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites.exceptions import NotFoundError, AnimeDLError +from anime_downloader.sites import helpers from anime_downloader import util from anime_downloader.const import desktop_headers @@ -13,7 +14,7 @@ __all__ = ['NineAnimeEpisode', 'NineAnime'] session = session.get_session() -class NineAnimeEpisode(BaseEpisode): +class NineAnimeEpisode(AnimeEpisode, sitename='9anime'): QUALITIES = ['360p', '480p', '720p', '1080p'] _base_url = r'https://9anime.to/ajax/episode/info' ts = 0 @@ -54,14 +55,12 @@ class NineAnimeEpisode(BaseEpisode): ] -class NineAnime(BaseAnime): - sitename = '9anime' +class NineAnime(Anime, sitename='9anime'): QUALITIES = ['360p', '480p', '720p', '1080p'] - _episodeClass = NineAnimeEpisode @classmethod def search(cls, query): - r = session.get('https://www4.9anime.to/search?', params={'keyword': query}, headers=desktop_headers) + r = helpers.get('https://www4.9anime.to/search?', params={'keyword': query}, headers=desktop_headers) logging.debug(r.url) @@ -90,7 +89,8 @@ class NineAnime(BaseAnime): return ret - def _scarpe_episodes(self, soup): + def _scrape_episodes(self): + soup = helpers.soupfiy(helpers.get(self.url)) ts = soup.find('html')['data-ts'] self._episodeClass.ts = ts logging.debug('data-ts: {}'.format(ts)) @@ -123,7 +123,8 @@ class NineAnime(BaseAnime): return episode_ids - def _scrape_metadata(self, soup): + def _scrape_metadata(self): + soup = helpers.soupfiy(helpers.get(self.url)) self.title = str(soup.find('div', {'class': 'widget info'}).find( 'h2', {'class': 'title'}).text) diff --git a/anime_downloader/sites/twistmoe.py b/anime_downloader/sites/twistmoe.py index 32165a2..aa3be0c 100644 --- a/anime_downloader/sites/twistmoe.py +++ b/anime_downloader/sites/twistmoe.py @@ -6,7 +6,7 @@ from bs4 import BeautifulSoup import warnings from anime_downloader import session -from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult # Don't warn if not using fuzzywuzzy[speedup] @@ -19,17 +19,15 @@ KEY = b"k8B$B@0L8D$tDYHGmRg98sQ7!%GOEGOX27T" session = session.get_session() -class TwistMoeEpisode(BaseEpisode): +class TwistMoeEpisode(AnimeEpisode, sitename='twist.moe'): QUALITIES = ['360p', '480p', '720p', '1080p'] def _get_sources(self): return [('no_extractor', self.url)] -class TwistMoe(BaseAnime): - sitename = 'twist.moe' +class TwistMoe(Anime, sitename='twist.moe'): QUALITIES = ['360p', '480p', '720p', '1080p'] - _episodeClass = TwistMoeEpisode _api_url = "https://twist.moe/api/anime/{}/sources" @classmethod