Fixes AnimePahe and Kwik (#316)
* Rely on AnimePahe for episode naming * Remove use of enumerate * Add useful debug info for mp4upload * Fix minor regex mishap for mp4upload * Better title naming for mp4upload * Minor tweaks complete * MP4Upload regex minor improvement * Make collection of sources look better * Revert back to using enumerate for episode numbering * Added utility function to parse episode range * Replace episode range collecting with utility function to parse episode range * Add grammar option to cli.py * Make grammar more consistent * Implement grammar parser and add as util function * Added search to gogoanime * Enable getting episode sources for Gogoanime * Minor refactor for grammar parser * Use new episode parser by default and add gogoanime to provider choices * Fix minor oversight to identify None type passed to episode parser * Remove explicit checks for None type in episode string parsers * Enable retries for request session * Make cfscrape capable of retrying * Make provider list more readable in cli.py * Handle failure to find stream URL better in MP4Upload extractor * Revert changes to match master * Update gogoanime domain * Fix failure to skip already downloaded files * Fix potential bug * Enable ranged download to resume stopped download * Avoid constantly opening and closing file in downloader * Make init the same as main forks * Changed files to match main * Add new line * Modify init * Added animefreak * Add useful comment for animefreak * Added animefreak to README.md * Use json method in helpers.get * Update title test for animefreak * Prioritise mp4upload as source and fix mp4upload source url * Better title handling and more explicit errors * More informative mp4upload exception * Adds changes for new animepahe API usage and fixes title handling * Fixes kwik * Minor fix for kwikmaster
parent
d50f3f919f
commit
eebcf7a38b
|
@ -1,7 +1,9 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
from anime_downloader.extractors.base_extractor import BaseExtractor
|
||||
from anime_downloader.sites import helpers
|
||||
from anime_downloader import util
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -14,28 +16,20 @@ class Kwik(BaseExtractor):
|
|||
'''
|
||||
|
||||
def _get_data(self):
|
||||
|
||||
# Need a javascript deobsufication api/python, so someone smarter
|
||||
# than me can work on that for now I will add the pattern I observed
|
||||
|
||||
# alternatively you can pattern match on `src` for stream_url part
|
||||
source_parts_re = re.compile(r'action=\"([^"]+)\".*value=\"([^"]+)\".*Click Here to Download',
|
||||
re.DOTALL)
|
||||
|
||||
# Kwik servers don't have direct link access you need to be referred
|
||||
# from somewhere, I will just use the url itself.
|
||||
# from somewhere, I will just use the url itself. We then
|
||||
# have to rebuild the url. Hopefully kwik doesn't block this too
|
||||
eval_re = re.compile(r';(eval.*\))')
|
||||
stream_parts_re = re.compile(r'https:\/\/(.*?)\..*\/(\d+)\/(.*)\/.*token=(.*)&expires=([^\']+)')
|
||||
title_re = re.compile(r'title>(.*)<')
|
||||
|
||||
download_url = self.url.replace('kwik.cx/e/', 'kwik.cx/f/')
|
||||
kwik_text = helpers.get(self.url, referer=self.url).text
|
||||
obsfucated_js = eval_re.search(kwik_text).group(1)
|
||||
deobsfucated_js = util.deobfuscate_packed_js(obsfucated_js)
|
||||
|
||||
kwik_text = helpers.get(download_url, referer=download_url).text
|
||||
post_url, token = source_parts_re.search(kwik_text).group(1, 2)
|
||||
|
||||
stream_url = helpers.post(post_url,
|
||||
referer=download_url,
|
||||
data={'_token': token},
|
||||
allow_redirects=False).headers['Location']
|
||||
|
||||
title = stream_url.rsplit('/', 1)[-1].rsplit('.', 1)[0]
|
||||
title = title_re.search(kwik_text).group(1)
|
||||
cdn, digits, file, token, expires = stream_parts_re.search(deobsfucated_js).group(1, 2, 3, 4, 5)
|
||||
stream_url = f'https://{cdn}.nextstream.org/get/{token}/{expires}/mp4/{digits}/{file}/{title}'
|
||||
|
||||
logger.debug('Stream URL: %s' % stream_url)
|
||||
return {
|
||||
|
|
|
@ -4,7 +4,6 @@ import re
|
|||
from anime_downloader.sites.anime import AnimeEpisode, SearchResult, Anime
|
||||
from anime_downloader.sites.exceptions import NotFoundError
|
||||
from anime_downloader.sites import helpers
|
||||
from anime_downloader import util
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -12,35 +11,40 @@ logger = logging.getLogger(__name__)
|
|||
class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'):
|
||||
QUALITIES = ['360p', '480p', '720p', '1080p']
|
||||
|
||||
def _get_source(self, episode_id, server):
|
||||
|
||||
def _get_source(self, episode_id, server, session_id):
|
||||
# We will extract the episodes data through the animepahe api
|
||||
# which returns the available qualities and the episode sources.
|
||||
params = {
|
||||
'id': episode_id,
|
||||
'm': 'embed',
|
||||
'p': server
|
||||
'p': server,
|
||||
'session': session_id
|
||||
}
|
||||
|
||||
episode = helpers.get('https://animepahe.com/api', params=params).json()
|
||||
sources = episode['data'][episode_id]
|
||||
episode_data = helpers.get('https://animepahe.com/api', params=params).json()
|
||||
episode_data = episode_data['data']
|
||||
sources = {}
|
||||
|
||||
for info in episode_data:
|
||||
quality = list(episode_data[info].keys())[0]
|
||||
sources[f'{quality}p'] = episode_data[info][quality]['url']
|
||||
|
||||
if self.quality in sources:
|
||||
return (server, sources[self.quality]['url'])
|
||||
return (server, sources[self.quality])
|
||||
return
|
||||
|
||||
def _get_sources(self):
|
||||
|
||||
supported_servers = ['kwik','mp4upload','rapidvideo']
|
||||
episode_id = self.url.rsplit('/', 1)[-1]
|
||||
|
||||
sourcetext = helpers.get(self.url, cf=True).text
|
||||
supported_servers = ['kwik', 'mp4upload', 'rapidvideo']
|
||||
source_text = helpers.get(self.url, cf=True).text
|
||||
sources = []
|
||||
serverlist = re.findall(r'data-provider="([^"]+)', sourcetext)
|
||||
for server in serverlist:
|
||||
|
||||
server_list = re.findall(r'data-provider="([^"]+)', source_text)
|
||||
episode_id, session_id = re.search(r'getEmbeds\((\d+), "([^"]+)', source_text).groups()
|
||||
|
||||
for server in server_list:
|
||||
if server not in supported_servers:
|
||||
continue
|
||||
source = self._get_source(episode_id, server)
|
||||
source = self._get_source(episode_id, server, session_id)
|
||||
if source:
|
||||
sources.append(source)
|
||||
|
||||
|
@ -48,6 +52,7 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'):
|
|||
return sources
|
||||
raise NotFoundError
|
||||
|
||||
|
||||
class AnimePahe(Anime, sitename='animepahe'):
|
||||
sitename = 'animepahe'
|
||||
api_url = 'https://animepahe.com/api'
|
||||
|
@ -63,11 +68,7 @@ class AnimePahe(Anime, sitename='animepahe'):
|
|||
'q': query
|
||||
}
|
||||
|
||||
search_results = helpers.get(
|
||||
cls.api_url,
|
||||
params=params,
|
||||
).json()
|
||||
|
||||
search_results = helpers.get(cls.api_url, params=params).json()
|
||||
results = []
|
||||
|
||||
for search_result in search_results['data']:
|
||||
|
@ -83,15 +84,8 @@ class AnimePahe(Anime, sitename='animepahe'):
|
|||
return results
|
||||
|
||||
def get_data(self):
|
||||
# Extract anime id from page, using this shoddy approach as
|
||||
# I have neglected my regular expression skills to the point of
|
||||
# disappointment
|
||||
resp = helpers.get(self.url, cf=True).text
|
||||
first_search = '$.getJSON(\'/api?m=release&id='
|
||||
last_search = '&l=\' + limit + \'&sort=\' + sort + \'&page=\' + page'
|
||||
|
||||
anime_id = (resp[resp.find(first_search)+len(first_search):
|
||||
resp.find(last_search)])
|
||||
anime_id = re.search(r'&id=(\d+)', resp).groups()[0]
|
||||
|
||||
self.params = {
|
||||
'm': 'release',
|
||||
|
@ -103,7 +97,6 @@ class AnimePahe(Anime, sitename='animepahe'):
|
|||
resp = helpers.get(self.api_url, params=self.params).json()
|
||||
|
||||
self._scrape_metadata(resp['data'])
|
||||
|
||||
self._episode_urls = self._scrape_episodes(resp)
|
||||
self._len = len(self._episode_urls)
|
||||
|
||||
|
@ -117,9 +110,7 @@ class AnimePahe(Anime, sitename='animepahe'):
|
|||
# from the length of the episodes list to get correct episode
|
||||
# numbers
|
||||
for no, anime_ep in enumerate(ani_json, len(episodes)):
|
||||
episodes.append(
|
||||
(no+1, self.url + '/' + str(anime_ep['id']),)
|
||||
)
|
||||
episodes.append((no + 1, f'{self.url}/{anime_ep["id"]}',))
|
||||
|
||||
return episodes
|
||||
|
||||
|
@ -127,10 +118,7 @@ class AnimePahe(Anime, sitename='animepahe'):
|
|||
episodes = self._collect_episodes(ani_json['data'])
|
||||
|
||||
if not episodes:
|
||||
raise NotFoundError(
|
||||
'No episodes found in url "{}"'.format(self.url),
|
||||
self.url
|
||||
)
|
||||
raise NotFoundError(f'No episodes found for {self.url}')
|
||||
else:
|
||||
# Check if other pages exist since animepahe only loads
|
||||
# first page and make subsequent calls to the api for every
|
||||
|
@ -147,4 +135,4 @@ class AnimePahe(Anime, sitename='animepahe'):
|
|||
return episodes
|
||||
|
||||
def _scrape_metadata(self, data):
|
||||
self.title = data[0]['anime_title']
|
||||
self.title = data[0]['title']
|
||||
|
|
Loading…
Reference in New Issue