rewrite: site handling rewrite

master
Vishnunarayan K I 2019-02-23 20:27:37 +05:30
parent f37278fd1f
commit a583aa0363
11 changed files with 156 additions and 111 deletions

View File

@ -1,22 +1,23 @@
from bs4 import BeautifulSoup
import time
import os
import logging
import sys
import copy
from anime_downloader import session
from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
from anime_downloader import util
from anime_downloader.sites import helpers
from anime_downloader.const import desktop_headers
from anime_downloader.extractors import get_extractor
from anime_downloader.downloader import get_downloader
class BaseAnime:
class Anime:
sitename = ''
title = ''
meta = dict()
subclasses = {}
QUALITIES = None
_episodeClass = object
@ -47,17 +48,22 @@ class BaseAnime:
return True
return False
def __init_subclass__(cls, sitename, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses[sitename] = cls
@classmethod
def factory(cls, sitename: str):
return cls.subclasses[sitename]
def get_data(self):
self._episode_urls = []
r = session.get_session().get(self.url, headers=desktop_headers)
soup = BeautifulSoup(r.text, 'html.parser')
try:
self._scrape_metadata(soup)
self._scrape_metadata()
except Exception as e:
logging.debug('Metadata scraping error: {}'.format(e))
self._episode_urls = self._scarpe_episodes(soup)
self._episode_urls = self._scrape_episodes()
self._len = len(self._episode_urls)
logging.debug('EPISODE IDS: length: {}, ids: {}'.format(
@ -68,13 +74,11 @@ class BaseAnime:
return self._episode_urls
def __len__(self):
return self._len
def __getitem__(self, index):
episode_class = AnimeEpisode.subclasses[self.sitename]
if isinstance(index, int):
ep_id = self._episode_urls[index]
return self._episodeClass(ep_id[1], self.quality, parent=self,
return episode_class(ep_id[1], self.quality, parent=self,
ep_no=ep_id[0])
elif isinstance(index, slice):
anime = copy.deepcopy(self)
@ -88,20 +92,24 @@ Anime: {title}
Episode count: {length}
'''.format(name=self.sitename, title=self.title, length=len(self))
def __len__(self):
return self._len
def __str__(self):
return self.title
def _scarpe_episodes(self, soup):
def _scrape_episodes(self):
return
def _scrape_metadata(self, soup):
def _scrape_metadata(self):
return
class BaseEpisode:
class AnimeEpisode:
QUALITIES = None
title = ''
stream_url = ''
subclasses = {}
def __init__(self, url, quality='720p', parent=None,
ep_no=None):
@ -143,6 +151,15 @@ class BaseEpisode:
# qualities.remove(self.quality)
pass
def __init_subclass__(cls, sitename: str, **kwargs):
super().__init_subclass__(**kwargs)
cls.subclasses[sitename] = cls
cls.sitename = sitename
@classmethod
def factory(cls, sitename: str):
return cls.subclasses[sitename]
def source(self, index=0):
if not self._sources:
self.get_data()
@ -167,6 +184,7 @@ class BaseEpisode:
def download(self, force=False, path=None,
format='{anime_title}_{ep_no}', range_size=None):
# TODO: Remove this shit
logging.info('Downloading {}'.format(self.pretty_title))
if format:
file_name = util.format_filename(format, self)+'.mp4'
@ -184,6 +202,7 @@ class BaseEpisode:
downloader.download()
class SearchResult:
def __init__(self, title, url, poster):
self.title = title
@ -196,16 +215,3 @@ class SearchResult:
def __str__(self):
return self.title
def write_status(downloaded, total_size, start_time):
elapsed_time = time.time()-start_time
rate = (downloaded/1024)/elapsed_time if elapsed_time else 'x'
downloaded = float(downloaded)/1048576
total_size = float(total_size)/1048576
status = 'Downloaded: {0:.2f}MB/{1:.2f}MB, Rate: {2:.2f}KB/s'.format(
downloaded, total_size, rate)
sys.stdout.write("\r" + status + " "*5 + "\r")
sys.stdout.flush()

View File

@ -2,8 +2,7 @@ import cfscrape
import logging
import re
from anime_downloader.sites.anime import BaseEpisode, SearchResult
from anime_downloader.sites.baseanimecf import BaseAnimeCF
from anime_downloader.sites.anime import AnimeEpisode, SearchResult, Anime
from anime_downloader.sites.exceptions import NotFoundError
from anime_downloader import util
from anime_downloader.session import get_session
@ -11,7 +10,7 @@ from anime_downloader.session import get_session
scraper = get_session(cfscrape.create_scraper())
class AnimePaheEpisode(BaseEpisode):
class AnimePaheEpisode(AnimeEpisode):
QUALITIES = ['360p', '480p', '720p', '1080p']
def _get_source(self, episode_id, server):
@ -50,7 +49,7 @@ class AnimePaheEpisode(BaseEpisode):
return sources
raise NotFoundError
class AnimePahe(BaseAnimeCF):
class AnimePahe(Anime):
sitename = 'animepahe'
api_url = 'https://animepahe.com/api'
base_anime_url = 'https://animepahe.com/anime/'

View File

@ -2,14 +2,14 @@ import cfscrape
from bs4 import BeautifulSoup
import logging
from anime_downloader.sites.anime import BaseAnime
from anime_downloader.sites.anime import Anime
from anime_downloader.const import get_random_header
from anime_downloader.session import get_session
scraper = get_session(cfscrape.create_scraper())
class BaseAnimeCF(BaseAnime):
class BaseAnimeCF(Anime):
def get_data(self):
headers = get_random_header()
if hasattr(self, '_referer'):
@ -20,7 +20,7 @@ class BaseAnimeCF(BaseAnime):
self._scrape_metadata(soup)
self._episode_urls = self._scarpe_episodes(soup)
self._episode_urls = self._scrape_episodes(soup)
self._len = len(self._episode_urls)
logging.debug('EPISODE IDS: length: {}, ids: {}'.format(

View File

@ -1,19 +1,15 @@
import logging
from bs4 import BeautifulSoup
from anime_downloader import session
from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult
from anime_downloader import util
session = session.get_session()
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
from anime_downloader.sites import helpers
class GogoanimeEpisode(BaseEpisode):
class GogoanimeEpisode(AnimeEpisode, sitename='gogoanime'):
QUALITIES = ['360p', '480p', '720p']
_base_url = 'https://www2.gogoanime.se'
def _get_sources(self):
soup = BeautifulSoup(session.get(self.url).text, 'html.parser')
soup = helpers.soupfiy(helpers.get(self.url))
extractors_url = []
for element in soup.select('.anime_muti_link > ul > li'):
@ -30,27 +26,24 @@ class GogoanimeEpisode(BaseEpisode):
return extractors_url
class GogoAnime(BaseAnime):
sitename = 'gogoanime'
class GogoAnime(Anime, sitename='gogoanime'):
QUALITIES = ['360p', '480p', '720p']
_episode_list_url = 'https://www2.gogoanime.se//load-list-episode'
_episodeClass = GogoanimeEpisode
_search_api_url = 'https://api.watchanime.cc/site/loadAjaxSearch'
@classmethod
def search(cls, query):
resp = util.get_json(
cls._search_api_url,
params={
'keyword': query,
'id': -1,
'link_web': 'https://www1.gogoanime.sh/'
}
)
params = {
'keyword': query,
'id': -1,
'link_web': 'https://www1.gogoanime.sh/'
}
soup = helpers.soupfiy(helpers.get(
cls._search_api_url, params=params
).json()['content'])
search_results = []
soup = BeautifulSoup(resp['content'], 'html.parser')
for element in soup('a', class_='ss-title'):
search_result = SearchResult(
title=element.text,
@ -61,7 +54,8 @@ class GogoAnime(BaseAnime):
search_results.append(search_result)
return search_results
def _scarpe_episodes(self, soup):
def _scrape_episodes(self):
soup = helpers.soupfiy(helpers.get(self.url))
anime_id = soup.select_one('input#movie_id').attrs['value']
params = {
'default_ep': 0,
@ -70,8 +64,8 @@ class GogoAnime(BaseAnime):
'id': anime_id,
}
res = session.get(self._episode_list_url, params=params)
soup = BeautifulSoup(res.text, 'html.parser')
soup = helpers.soupfiy(helpers.get(self._episode_list_url,
params=params))
epurls = list(
reversed(['https://www2.gogoanime.se'+a.get('href').strip()
@ -80,7 +74,8 @@ class GogoAnime(BaseAnime):
return epurls
def _scrape_metadata(self, soup):
def _scrape_metadata(self):
soup = helpers.soupfiy(helpers.get(self.url))
meta = soup.select_one('.anime_info_body_bg')
self.title = meta.find('h1').text
self.poster = meta.find('img').get('src')

View File

@ -0,0 +1 @@
from anime_downloader.sites.helpers.request import *

View File

@ -0,0 +1,61 @@
# TODO: Check without node installed
# cfscrape is a necessery dependency
import cfscrape
import logging
from bs4 import BeautifulSoup
from anime_downloader import session
__all__ = [
'get',
'post',
'soupfiy',
]
logger = logging.getLogger(__name__)
req_session = session.get_session()
cf_session = session.get_session(cfscrape.create_scraper())
def get(url: str,
cf: bool = True,
**kwargs):
'''
get performs a get request
'''
# TODO: Add headers
sess = cf_session if cf else req_session
res = sess.get(url, **kwargs)
# TODO: check status codes
return res
def post(url: str,
cf: bool = True,
**kwargs):
'''
get performs a get request
'''
# TODO: Add headers
sess = cf_session if cf else req_session
res = sess.post(url, **kwargs)
# TODO: check status codes
return res
def soupfiy(res):
# TODO: res datatype
"""soupfiy Beautiful soups response object of request
Parameters
----------
res :
res is `request.response`
Returns
-------
BeautifulSoup.Soup
"""
soup = BeautifulSoup(res.text, 'html.parser')
return soup

View File

@ -1,19 +1,13 @@
import cfscrape
from bs4 import BeautifulSoup
import re
import logging
from anime_downloader.sites.anime import BaseEpisode, SearchResult
from anime_downloader.sites.baseanimecf import BaseAnimeCF
from anime_downloader.sites.anime import AnimeEpisode, SearchResult, Anime
from anime_downloader.sites import helpers
from anime_downloader.sites.exceptions import NotFoundError
from anime_downloader.const import get_random_header
from anime_downloader.session import get_session
scraper = get_session(cfscrape.create_scraper(delay=10))
class KissanimeEpisode(BaseEpisode):
class KissanimeEpisode(AnimeEpisode, sitename='kissanime'):
QUALITIES = ['360p', '480p', '720p', '1080p']
_base_url = 'http://kissanime.ru'
VERIFY_HUMAN = True
@ -22,7 +16,7 @@ class KissanimeEpisode(BaseEpisode):
episode_url = self.url+'&s=rapidvideo'
logging.debug('Calling url: {}'.format(episode_url))
ret = scraper.get(episode_url)
ret = helpers.get(episode_url, cf=True)
data = self._scrape_episode(ret)
return data
@ -34,26 +28,22 @@ class KissanimeEpisode(BaseEpisode):
return [('rapidvideo', rapid_url)]
class KissAnime(BaseAnimeCF):
sitename = 'kissanime'
class KissAnime(Anime, sitename='kissanime'):
_referer = 'http://kissanime.ru/'
QUALITIES = ['360p', '480p', '720p', '1080p']
_episodeClass = KissanimeEpisode
@classmethod
def search(cls, query):
headers = get_random_header()
headers['referer'] = 'http://kissanime.ru/'
res = scraper.post(
soup = helpers.soupfiy(helpers.post(
'http://kissanime.ru/Search/Anime',
data={
'type': 'Anime',
'keyword': query,
},
headers=headers,
)
soup = BeautifulSoup(res.text, 'html.parser')
))
# If only one anime found, kissanime redirects to anime page.
# We don't want that
@ -79,7 +69,8 @@ class KissAnime(BaseAnimeCF):
return ret
def _scarpe_episodes(self, soup):
def _scrape_episodes(self):
soup = helpers.soupfiy(helpers.get(self.url, cf=True))
ret = soup.find('table', {'class': 'listing'}).find_all('a')
ret = ['http://kissanime.ru'+str(a['href']) for a in ret]
logging.debug('Unfiltered episodes : {}'.format(ret))
@ -97,6 +88,7 @@ class KissAnime(BaseAnimeCF):
ret = ret[::-1]
return ret
def _scrape_metadata(self, soup):
def _scrape_metadata(self):
soup = helpers.soupfiy(helpers.get(self.url, cf=True))
info_div = soup.find('div', {'class': 'barContent'})
self.title = info_div.find('a', {'class': 'bigChar'}).text

View File

@ -1,18 +1,14 @@
from anime_downloader import session
from anime_downloader.sites.kissanime import KissAnime
from anime_downloader.sites.anime import BaseEpisode, SearchResult
from anime_downloader.sites.anime import AnimeEpisode, SearchResult
from anime_downloader.sites import helpers
from anime_downloader.sites.exceptions import NotFoundError
from anime_downloader.const import desktop_headers, get_random_header
from bs4 import BeautifulSoup
import cfscrape
import logging
scraper = session.get_session(cfscrape.create_scraper())
session = session.get_session()
class KisscartoonEpisode(BaseEpisode):
class KisscartoonEpisode(AnimeEpisode, sitename='kisscartoon'):
_base_url = ''
VERIFY_HUMAN = False
_episode_list_url = 'https://kisscartoon.ac/ajax/anime/load_episodes'
@ -25,12 +21,12 @@ class KisscartoonEpisode(BaseEpisode):
}
headers = desktop_headers
headers['referer'] = self.url
res = session.get(self._episode_list_url, params=params, headers=headers)
res = helpers.get(self._episode_list_url, params=params, headers=headers)
url = res.json()['value']
headers = desktop_headers
headers['referer'] = self.url
res = session.get('https:' + url, headers=headers)
res = helpers.get('https:' + url, headers=headers)
return [(
'no_extractor',
@ -38,15 +34,12 @@ class KisscartoonEpisode(BaseEpisode):
)]
class KissCartoon(KissAnime):
sitename = 'kisscartoon'
_episodeClass = KisscartoonEpisode
class KissCartoon(KissAnime, sitename='kisscartoon'):
@classmethod
def search(cls, query):
headers = get_random_header()
headers['referer'] = 'http://kisscartoon.ac/'
res = scraper.get(
res = helpers.get(
'http://kisscartoon.ac/Search/',
params={
's': query,
@ -68,7 +61,8 @@ class KissCartoon(KissAnime):
return ret
def _scarpe_episodes(self, soup):
def _scrape_episodes(self):
soup = helpers.soupfiy(helpers.get(self.url))
ret = soup.find('div', {'class': 'listing'}).find_all('a')
ret = [str(a['href']) for a in ret]

View File

@ -6,14 +6,14 @@ import requests
from bs4 import BeautifulSoup
from anime_downloader import util
from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
from anime_downloader.const import desktop_headers
from anime_downloader.session import get_session
scraper = get_session(cfscrape.create_scraper())
class MasteraniEpisode(BaseEpisode):
class MasteraniEpisode(AnimeEpisode, sitename='masterani'):
QUALITIES = ['360p', '480p', '720p', '1080p']
def _get_sources(self):
@ -51,11 +51,9 @@ class MasteraniEpisode(BaseEpisode):
return ret
class Masterani(BaseAnime):
sitename = 'masterani'
class Masterani(Anime, sitename='masterani'):
QUALITIES = ['360p', '480p', '720p', '1080p']
_api_url = 'https://www.masterani.me/api/anime/{}/detailed'
_episodeClass = MasteraniEpisode
@classmethod
def search(cls, query):

View File

@ -1,6 +1,7 @@
from anime_downloader import session
from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
from anime_downloader.sites.exceptions import NotFoundError, AnimeDLError
from anime_downloader.sites import helpers
from anime_downloader import util
from anime_downloader.const import desktop_headers
@ -13,7 +14,7 @@ __all__ = ['NineAnimeEpisode', 'NineAnime']
session = session.get_session()
class NineAnimeEpisode(BaseEpisode):
class NineAnimeEpisode(AnimeEpisode, sitename='9anime'):
QUALITIES = ['360p', '480p', '720p', '1080p']
_base_url = r'https://9anime.to/ajax/episode/info'
ts = 0
@ -54,14 +55,12 @@ class NineAnimeEpisode(BaseEpisode):
]
class NineAnime(BaseAnime):
sitename = '9anime'
class NineAnime(Anime, sitename='9anime'):
QUALITIES = ['360p', '480p', '720p', '1080p']
_episodeClass = NineAnimeEpisode
@classmethod
def search(cls, query):
r = session.get('https://www4.9anime.to/search?', params={'keyword': query}, headers=desktop_headers)
r = helpers.get('https://www4.9anime.to/search?', params={'keyword': query}, headers=desktop_headers)
logging.debug(r.url)
@ -90,7 +89,8 @@ class NineAnime(BaseAnime):
return ret
def _scarpe_episodes(self, soup):
def _scrape_episodes(self):
soup = helpers.soupfiy(helpers.get(self.url))
ts = soup.find('html')['data-ts']
self._episodeClass.ts = ts
logging.debug('data-ts: {}'.format(ts))
@ -123,7 +123,8 @@ class NineAnime(BaseAnime):
return episode_ids
def _scrape_metadata(self, soup):
def _scrape_metadata(self):
soup = helpers.soupfiy(helpers.get(self.url))
self.title = str(soup.find('div', {'class': 'widget info'}).find(
'h2', {'class': 'title'}).text)

View File

@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
import warnings
from anime_downloader import session
from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
# Don't warn if not using fuzzywuzzy[speedup]
@ -19,17 +19,15 @@ KEY = b"k8B$B@0L8D$tDYHGmRg98sQ7!%GOEGOX27T"
session = session.get_session()
class TwistMoeEpisode(BaseEpisode):
class TwistMoeEpisode(AnimeEpisode, sitename='twist.moe'):
QUALITIES = ['360p', '480p', '720p', '1080p']
def _get_sources(self):
return [('no_extractor', self.url)]
class TwistMoe(BaseAnime):
sitename = 'twist.moe'
class TwistMoe(Anime, sitename='twist.moe'):
QUALITIES = ['360p', '480p', '720p', '1080p']
_episodeClass = TwistMoeEpisode
_api_url = "https://twist.moe/api/anime/{}/sources"
@classmethod