Fixes AnimePahe and Kwik (#316)

* Rely on AnimePahe for episode naming

* Remove use of enumerate

* Add useful debug info for mp4upload

* Fix minor regex mishap for mp4upload

* Better title naming for mp4upload

* Minor tweaks complete

* MP4Upload regex minor improvement

* Make collection of sources look better

* Revert back to using enumerate for episode numbering

* Added utility function to parse episode range

* Replace episode range collecting with utility function to parse episode range

* Add grammar option to cli.py

* Make grammar more consistent

* Implement grammar parser and add as util function

* Added search to gogoanime

* Enable getting episode sources for Gogoanime

* Minor refactor for grammar parser

* Use new episode parser by default and add gogoanime to provider choices

* Fix minor oversight to identify None type passed to episode parser

* Remove explicit checks for None type in episode string parsers

* Enable retries for request session

* Make cfscrape capable of retrying

* Make provider list more readable in cli.py

* Handle failure to find stream URL better in MP4Upload extractor

* Revert changes to match master

* Update gogoanime domain

* Fix failure to skip already downloaded files

* Fix potential bug

* Enable ranged download to resume stopped download

* Avoid constantly opening and closing file in downloader

* Make init the same as main forks

* Changed files to match main

* Add new line

* Modify init

* Added animefreak

* Add useful comment for animefreak

* Added animefreak to README.md

* Use json method in helpers.get

* Update title test for animefreak

* Prioritise mp4upload as source and fix mp4upload source url

* Better title handling and more explicit errors

* More informative mp4upload exception

* Adds changes for new animepahe API usage and fixes title handling

* Fixes kwik

* Minor fix for kwik
master
Gomile 2020-04-14 20:34:47 +02:00 committed by GitHub
parent d50f3f919f
commit eebcf7a38b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 56 deletions

View File

@ -1,7 +1,9 @@
import logging
import re
from anime_downloader.extractors.base_extractor import BaseExtractor
from anime_downloader.sites import helpers
from anime_downloader import util
logger = logging.getLogger(__name__)
@ -14,28 +16,20 @@ class Kwik(BaseExtractor):
'''
def _get_data(self):
# Need a javascript deobsufication api/python, so someone smarter
# than me can work on that for now I will add the pattern I observed
# alternatively you can pattern match on `src` for stream_url part
source_parts_re = re.compile(r'action=\"([^"]+)\".*value=\"([^"]+)\".*Click Here to Download',
re.DOTALL)
# Kwik servers don't have direct link access you need to be referred
# from somewhere, I will just use the url itself.
# from somewhere, I will just use the url itself. We then
# have to rebuild the url. Hopefully kwik doesn't block this too
eval_re = re.compile(r';(eval.*\))')
stream_parts_re = re.compile(r'https:\/\/(.*?)\..*\/(\d+)\/(.*)\/.*token=(.*)&expires=([^\']+)')
title_re = re.compile(r'title>(.*)<')
download_url = self.url.replace('kwik.cx/e/', 'kwik.cx/f/')
kwik_text = helpers.get(self.url, referer=self.url).text
obsfucated_js = eval_re.search(kwik_text).group(1)
deobsfucated_js = util.deobfuscate_packed_js(obsfucated_js)
kwik_text = helpers.get(download_url, referer=download_url).text
post_url, token = source_parts_re.search(kwik_text).group(1, 2)
stream_url = helpers.post(post_url,
referer=download_url,
data={'_token': token},
allow_redirects=False).headers['Location']
title = stream_url.rsplit('/', 1)[-1].rsplit('.', 1)[0]
title = title_re.search(kwik_text).group(1)
cdn, digits, file, token, expires = stream_parts_re.search(deobsfucated_js).group(1, 2, 3, 4, 5)
stream_url = f'https://{cdn}.nextstream.org/get/{token}/{expires}/mp4/{digits}/{file}/{title}'
logger.debug('Stream URL: %s' % stream_url)
return {

View File

@ -4,7 +4,6 @@ import re
from anime_downloader.sites.anime import AnimeEpisode, SearchResult, Anime
from anime_downloader.sites.exceptions import NotFoundError
from anime_downloader.sites import helpers
from anime_downloader import util
logger = logging.getLogger(__name__)
@ -12,35 +11,40 @@ logger = logging.getLogger(__name__)
class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'):
QUALITIES = ['360p', '480p', '720p', '1080p']
def _get_source(self, episode_id, server):
def _get_source(self, episode_id, server, session_id):
# We will extract the episodes data through the animepahe api
# which returns the available qualities and the episode sources.
params = {
'id': episode_id,
'm': 'embed',
'p': server
'p': server,
'session': session_id
}
episode = helpers.get('https://animepahe.com/api', params=params).json()
sources = episode['data'][episode_id]
episode_data = helpers.get('https://animepahe.com/api', params=params).json()
episode_data = episode_data['data']
sources = {}
for info in episode_data:
quality = list(episode_data[info].keys())[0]
sources[f'{quality}p'] = episode_data[info][quality]['url']
if self.quality in sources:
return (server, sources[self.quality]['url'])
return (server, sources[self.quality])
return
def _get_sources(self):
supported_servers = ['kwik','mp4upload','rapidvideo']
episode_id = self.url.rsplit('/', 1)[-1]
sourcetext = helpers.get(self.url, cf=True).text
supported_servers = ['kwik', 'mp4upload', 'rapidvideo']
source_text = helpers.get(self.url, cf=True).text
sources = []
serverlist = re.findall(r'data-provider="([^"]+)', sourcetext)
for server in serverlist:
server_list = re.findall(r'data-provider="([^"]+)', source_text)
episode_id, session_id = re.search(r'getEmbeds\((\d+), "([^"]+)', source_text).groups()
for server in server_list:
if server not in supported_servers:
continue
source = self._get_source(episode_id, server)
source = self._get_source(episode_id, server, session_id)
if source:
sources.append(source)
@ -48,6 +52,7 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'):
return sources
raise NotFoundError
class AnimePahe(Anime, sitename='animepahe'):
sitename = 'animepahe'
api_url = 'https://animepahe.com/api'
@ -63,11 +68,7 @@ class AnimePahe(Anime, sitename='animepahe'):
'q': query
}
search_results = helpers.get(
cls.api_url,
params=params,
).json()
search_results = helpers.get(cls.api_url, params=params).json()
results = []
for search_result in search_results['data']:
@ -83,15 +84,8 @@ class AnimePahe(Anime, sitename='animepahe'):
return results
def get_data(self):
# Extract anime id from page, using this shoddy approach as
# I have neglected my regular expression skills to the point of
# disappointment
resp = helpers.get(self.url, cf=True).text
first_search = '$.getJSON(\'/api?m=release&id='
last_search = '&l=\' + limit + \'&sort=\' + sort + \'&page=\' + page'
anime_id = (resp[resp.find(first_search)+len(first_search):
resp.find(last_search)])
anime_id = re.search(r'&id=(\d+)', resp).groups()[0]
self.params = {
'm': 'release',
@ -103,7 +97,6 @@ class AnimePahe(Anime, sitename='animepahe'):
resp = helpers.get(self.api_url, params=self.params).json()
self._scrape_metadata(resp['data'])
self._episode_urls = self._scrape_episodes(resp)
self._len = len(self._episode_urls)
@ -117,9 +110,7 @@ class AnimePahe(Anime, sitename='animepahe'):
# from the length of the episodes list to get correct episode
# numbers
for no, anime_ep in enumerate(ani_json, len(episodes)):
episodes.append(
(no+1, self.url + '/' + str(anime_ep['id']),)
)
episodes.append((no + 1, f'{self.url}/{anime_ep["id"]}',))
return episodes
@ -127,10 +118,7 @@ class AnimePahe(Anime, sitename='animepahe'):
episodes = self._collect_episodes(ani_json['data'])
if not episodes:
raise NotFoundError(
'No episodes found in url "{}"'.format(self.url),
self.url
)
raise NotFoundError(f'No episodes found for {self.url}')
else:
# Check if other pages exist since animepahe only loads
# first page and make subsequent calls to the api for every
@ -147,4 +135,4 @@ class AnimePahe(Anime, sitename='animepahe'):
return episodes
def _scrape_metadata(self, data):
self.title = data[0]['anime_title']
self.title = data[0]['title']