From f3aae408cab0e547ba21f5365fe367de2876cce6 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Sun, 16 May 2021 01:00:12 -0400 Subject: [PATCH 1/8] Kwik Fixes --- anime_downloader/extractors/kwik.py | 130 ++++++++++++++++++++-------- anime_downloader/sites/animepahe.py | 2 +- anime_downloader/sites/init.py | 1 + 3 files changed, 98 insertions(+), 35 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 37bdaf0..0e157e1 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -1,10 +1,15 @@ import logging +from platform import node import re +import subprocess import requests +import tempfile from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites.helpers.request import temp_dir from anime_downloader.sites import helpers from anime_downloader import util +from anime_downloader.util import eval_in_node from subprocess import CalledProcessError logger = logging.getLogger(__name__) @@ -18,55 +23,112 @@ class Kwik(BaseExtractor): ''' def _get_data(self): + ld = logger.debug # Kwik servers don't have direct link access you need to be referred # from somewhere, I will just use the url itself. We then # have to rebuild the url. Hopefully kwik doesn't block this too # Necessary - self.url = self.url.replace(".cx/e/", ".cx/f/") - self.headers.update({"referer": self.url}) + #ld(self.url) + #self.url = self.url.replace(".cx/e/", ".cx/f/") + #self.headers.update({"referer": self.url}) - cookies = util.get_hcaptcha_cookies(self.url) + headers = {"Referer": "https://kwik.cx/"} - if not cookies: - resp = util.bypass_hcaptcha(self.url) - else: - resp = requests.get(self.url, cookies=cookies) + + + res = requests.get(self.url, headers=headers) - title_re = re.compile(r'title>(.*)<') + #ld(res.text) - kwik_text = resp.text - deobfuscated = None + evalText = helpers.soupify(res.text) - loops = 0 - while not deobfuscated and loops < 6: - try: - deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) - except (AttributeError, CalledProcessError) as e: - if type(e) == AttributeError: - resp = util.bypass_hcaptcha(self.url) - kwik_text = resp.text + scripts = evalText.select("script") - if type(e) == CalledProcessError: - resp = requests.get(self.url, cookies=cookies) - finally: - cookies = resp.cookies - title = title_re.search(kwik_text).group(1) - loops += 1 + for i in scripts: + rexd = re.compile("", "") + break - post_url = deobfuscated.form["action"] - token = deobfuscated.input["value"] + tf = tempfile.mktemp(dir=temp_dir) - resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) - stream_url = resp.headers["Location"] + with open(tf, 'w', encoding="utf-8") as f: + f.write(rexd) + + #print(tf) - logger.debug('Stream URL: %s' % stream_url) + #ld(nodeRes) + + nodeRes = str(subprocess.getoutput(f"node {tf}")) + + ld(nodeRes) + + stream_url = re.search(r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") + #reg = re.compile("[\s\S]*") + + ld(stream_url) + + #kwik_text = resp.text + + #title_re = re.compile(r'title>(.*)<') + #title = title_re.search(kwik_text).group(1) return { 'stream_url': stream_url, - 'meta': { - 'title': title, - 'thumbnail': '' - }, - 'referer': None +# 'meta': { +# 'title': title, +# 'thumbnail': '' +# }, + 'referer': "https://kwik.cx/" } + + + + + #cookies = util.get_hcaptcha_cookies(self.url) + + #if not cookies: + # resp = util.bypass_hcaptcha(self.url) + #else: + # resp = requests.get(self.url, cookies=cookies) + + + + # + #deobfuscated = None + + #loops = 0 + #while not deobfuscated and loops < 6: + # try: + # deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) + # except (AttributeError, CalledProcessError) as e: + # if type(e) == AttributeError: + # resp = util.bypass_hcaptcha(self.url) + # kwik_text = resp.text + + # if type(e) == CalledProcessError: + # resp = requests.get(self.url, cookies=cookies) + # finally: + # cookies = resp.cookies + # + # loops += 1 + + #post_url = deobfuscated.form["action"] + #token = deobfuscated.input["value"] + + #resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) + #stream_url = resp.headers["Location"] + + #logger.debug('Stream URL: %s' % stream_url) + + #return { + # 'stream_url': stream_url, + # 'meta': { + # 'title': title, + # 'thumbnail': '' + # }, + # 'referer': None + #} diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 97ddb6b..9f09cb0 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -74,7 +74,7 @@ class AnimePahe(Anime, sitename='animepahe'): for search_result in search_results['data']: search_result_info = SearchResult( title=search_result['title'], - url=cls.base_anime_url + search_result['slug'], + url=cls.base_anime_url + search_result['session'], poster=search_result['poster'] ) diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 054d83b..0e8d2c8 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -18,6 +18,7 @@ ALL_ANIME_SITES = [ ('animetake','animetake','AnimeTake'), ('animeonline','animeonline360','AnimeOnline'), ('animeout', 'animeout', 'AnimeOut'), + ('animepahe', 'animepahe', 'AnimePahe'), ('animerush', 'animerush', 'AnimeRush'), ('animesimple', 'animesimple', 'AnimeSimple'), ('animesuge', 'animesuge', 'AnimeSuge'), From fd7599e8629beff0c304e04aad58b39e865b08b7 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Sun, 16 May 2021 01:01:30 -0400 Subject: [PATCH 2/8] autopep8 --- anime_downloader/extractors/kwik.py | 44 +++++++++++++---------------- anime_downloader/sites/animepahe.py | 6 ++-- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 0e157e1..54c2180 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -29,17 +29,15 @@ class Kwik(BaseExtractor): # have to rebuild the url. Hopefully kwik doesn't block this too # Necessary - #ld(self.url) + # ld(self.url) #self.url = self.url.replace(".cx/e/", ".cx/f/") #self.headers.update({"referer": self.url}) headers = {"Referer": "https://kwik.cx/"} - - res = requests.get(self.url, headers=headers) - #ld(res.text) + # ld(res.text) evalText = helpers.soupify(res.text) @@ -57,18 +55,19 @@ class Kwik(BaseExtractor): with open(tf, 'w', encoding="utf-8") as f: f.write(rexd) - - #print(tf) - #ld(nodeRes) - + # print(tf) + + # ld(nodeRes) + nodeRes = str(subprocess.getoutput(f"node {tf}")) ld(nodeRes) - stream_url = re.search(r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") + stream_url = re.search( + r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") #reg = re.compile("[\s\S]*") - + ld(stream_url) #kwik_text = resp.text @@ -78,30 +77,25 @@ class Kwik(BaseExtractor): return { 'stream_url': stream_url, -# 'meta': { -# 'title': title, -# 'thumbnail': '' -# }, + # 'meta': { + # 'title': title, + # 'thumbnail': '' + # }, 'referer': "https://kwik.cx/" } - - - #cookies = util.get_hcaptcha_cookies(self.url) - #if not cookies: + # if not cookies: # resp = util.bypass_hcaptcha(self.url) - #else: + # else: # resp = requests.get(self.url, cookies=cookies) - - # #deobfuscated = None #loops = 0 - #while not deobfuscated and loops < 6: + # while not deobfuscated and loops < 6: # try: # deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) # except (AttributeError, CalledProcessError) as e: @@ -113,7 +107,7 @@ class Kwik(BaseExtractor): # resp = requests.get(self.url, cookies=cookies) # finally: # cookies = resp.cookies - # + # # loops += 1 #post_url = deobfuscated.form["action"] @@ -124,11 +118,11 @@ class Kwik(BaseExtractor): #logger.debug('Stream URL: %s' % stream_url) - #return { + # return { # 'stream_url': stream_url, # 'meta': { # 'title': title, # 'thumbnail': '' # }, # 'referer': None - #} + # } diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 9f09cb0..8db5992 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -21,7 +21,8 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): 'session': session_id } - episode_data = helpers.get('https://animepahe.com/api', params=params).json() + episode_data = helpers.get( + 'https://animepahe.com/api', params=params).json() episode_data = episode_data['data'] sources = {} @@ -39,7 +40,8 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): sources = [] server_list = re.findall(r'data-provider="([^"]+)', source_text) - episode_id, session_id = re.search("getUrls\((\d+?), \"(.*)?\"", source_text).groups() + episode_id, session_id = re.search( + "getUrls\((\d+?), \"(.*)?\"", source_text).groups() for server in server_list: if server not in supported_servers: From 52d768e6c2365091e66d4d008313aac5301b44f8 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Sun, 16 May 2021 01:04:01 -0400 Subject: [PATCH 3/8] removing commented out code --- anime_downloader/extractors/kwik.py | 63 ----------------------------- 1 file changed, 63 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 54c2180..3cb93f9 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -29,16 +29,11 @@ class Kwik(BaseExtractor): # have to rebuild the url. Hopefully kwik doesn't block this too # Necessary - # ld(self.url) - #self.url = self.url.replace(".cx/e/", ".cx/f/") - #self.headers.update({"referer": self.url}) headers = {"Referer": "https://kwik.cx/"} res = requests.get(self.url, headers=headers) - # ld(res.text) - evalText = helpers.soupify(res.text) scripts = evalText.select("script") @@ -55,74 +50,16 @@ class Kwik(BaseExtractor): with open(tf, 'w', encoding="utf-8") as f: f.write(rexd) - - # print(tf) - - # ld(nodeRes) - nodeRes = str(subprocess.getoutput(f"node {tf}")) ld(nodeRes) stream_url = re.search( r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") - #reg = re.compile("[\s\S]*") ld(stream_url) - #kwik_text = resp.text - - #title_re = re.compile(r'title>(.*)<') - #title = title_re.search(kwik_text).group(1) - return { 'stream_url': stream_url, - # 'meta': { - # 'title': title, - # 'thumbnail': '' - # }, 'referer': "https://kwik.cx/" } - - #cookies = util.get_hcaptcha_cookies(self.url) - - # if not cookies: - # resp = util.bypass_hcaptcha(self.url) - # else: - # resp = requests.get(self.url, cookies=cookies) - - # - #deobfuscated = None - - #loops = 0 - # while not deobfuscated and loops < 6: - # try: - # deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) - # except (AttributeError, CalledProcessError) as e: - # if type(e) == AttributeError: - # resp = util.bypass_hcaptcha(self.url) - # kwik_text = resp.text - - # if type(e) == CalledProcessError: - # resp = requests.get(self.url, cookies=cookies) - # finally: - # cookies = resp.cookies - # - # loops += 1 - - #post_url = deobfuscated.form["action"] - #token = deobfuscated.input["value"] - - #resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) - #stream_url = resp.headers["Location"] - - #logger.debug('Stream URL: %s' % stream_url) - - # return { - # 'stream_url': stream_url, - # 'meta': { - # 'title': title, - # 'thumbnail': '' - # }, - # 'referer': None - # } From 4710e0fddf68cb8450d55bd0b5dfdf7952759233 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 16 May 2021 20:14:34 +0300 Subject: [PATCH 4/8] optimized animepahe I completely changed the way the episodes are scraped. But as a downside only the kwik server is used. --- anime_downloader/sites/animepahe.py | 179 ++++++++++++---------------- 1 file changed, 74 insertions(+), 105 deletions(-) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 8db5992..ea73981 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -8,59 +8,9 @@ from anime_downloader.sites import helpers logger = logging.getLogger(__name__) -class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): - QUALITIES = ['360p', '480p', '720p', '1080p'] - - def _get_source(self, episode_id, server, session_id): - # We will extract the episodes data through the animepahe api - # which returns the available qualities and the episode sources. - params = { - 'id': episode_id, - 'm': 'embed', - 'p': server, - 'session': session_id - } - - episode_data = helpers.get( - 'https://animepahe.com/api', params=params).json() - episode_data = episode_data['data'] - sources = {} - - for info in range(len(episode_data)): - quality = list(episode_data[info].keys())[0] - sources[f'{quality}p'] = episode_data[info][quality]['kwik'] - - if self.quality in sources: - return (server, sources[self.quality]) - return - - def _get_sources(self): - supported_servers = ['kwik', 'mp4upload', 'rapidvideo'] - source_text = helpers.get(self.url, cf=True).text - sources = [] - - server_list = re.findall(r'data-provider="([^"]+)', source_text) - episode_id, session_id = re.search( - "getUrls\((\d+?), \"(.*)?\"", source_text).groups() - - for server in server_list: - if server not in supported_servers: - continue - source = self._get_source(episode_id, server, session_id) - if source: - sources.append(source) - - if sources: - return sources - raise NotFoundError - - class AnimePahe(Anime, sitename='animepahe'): sitename = 'animepahe' api_url = 'https://animepahe.com/api' - base_anime_url = 'https://animepahe.com/anime/' - QUALITIES = ['360p', '480p', '720p', '1080p'] - _episodeClass = AnimePaheEpisode @classmethod def search(cls, query): @@ -71,68 +21,87 @@ class AnimePahe(Anime, sitename='animepahe'): } search_results = helpers.get(cls.api_url, params=params).json() - results = [] + if search_results['total'] == []: + return [] - for search_result in search_results['data']: - search_result_info = SearchResult( - title=search_result['title'], - url=cls.base_anime_url + search_result['session'], - poster=search_result['poster'] + return [ + SearchResult( + title=result['title'] + " (" + result['type'] + ")", + url="https://animepahe.com/anime/" + result['session'] + "/" + str(result['id']), # noqa + poster=result['poster'] ) + for result in search_results['data'] + ] - logger.debug(search_result_info) - results.append(search_result_info) + def _scrape_episodes(self): + attr = self.url.split('/') + session = attr[-2] + id_ = attr[-1] + page = 1 + headers = {'referer': 'https://animepahe.com/'} - return results + apiUri = self.api_url + '?m=release&id=' + id_ + '&sort=episode_asc&page=' + jsonResponse = helpers.get(apiUri + str(page), headers=headers).json() + lastPage = jsonResponse['last_page'] + perPage = jsonResponse['per_page'] + total = jsonResponse['total'] + ep = 1 + episodes = [] - def get_data(self): - page = helpers.get(self.url, cf=True).text - anime_id = re.search(r'&id=(\d+)', page).group(1) - - self.params = { - 'm': 'release', - 'id': anime_id, - 'sort': 'episode_asc', - 'page': 1 - } - - json_resp = helpers.get(self.api_url, params=self.params).json() - self._scrape_metadata(page) - self._episode_urls = self._scrape_episodes(json_resp) - self._len = len(self._episode_urls) - return self._episode_urls - - def _collect_episodes(self, ani_json, episodes=[]): - # Avoid changing original list - episodes = episodes[:] - - # If episodes is not an empty list we ensure that we start off - # from the length of the episodes list to get correct episode - # numbers - for no, anime_ep in enumerate(ani_json, len(episodes)): - episodes.append((no + 1, f'{self.url}/{anime_ep["id"]}',)) - - return episodes - - def _scrape_episodes(self, ani_json): - episodes = self._collect_episodes(ani_json['data']) - - if not episodes: - raise NotFoundError(f'No episodes found for {self.url}') + if (lastPage == 1 and perPage > total): + for epi in jsonResponse['data']: + episodes.append( + f'{self.api_url}?m=links&id={epi["anime_id"]}&session={epi["session"]}&p=kwik!!TRUE!!') else: - # Check if other pages exist since animepahe only loads - # first page and make subsequent calls to the api for every - # page - start_page = ani_json['current_page'] + 1 - end_page = ani_json['last_page'] + 1 - - for i in range(start_page, end_page): - self.params['page'] = i - resp = helpers.get(self.api_url, params=self.params).json() - - episodes = self._collect_episodes(resp['data'], episodes) - + stop = False + for page in range(lastPage): + if stop: + break + for i in range(perPage): + if ep <= total: + episodes.append( + f'{self.api_url}?m=release&id={id_}&sort=episode_asc&page={page+1}&ep={ep}!!FALSE!!') + ep += 1 + else: + stop = True + break return episodes def _scrape_metadata(self, data): self.title = re.search(r'

([^<]+)', data).group(1) + + +class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): + def _get_sources(self): + if '!!TRUE!!' in self.url: + self.url = self.url.replace('!!TRUE!!', '') + else: + headers = {'referer': 'https://animepahe.com/'} + regex = r"\&ep\=(\d+)\!\!FALSE\!\!" + episodeNum = int(re.findall(regex, self.url)[0]) + self.url = re.sub(regex, '', self.url) + jsonResponse = helpers.get(self.url, headers=headers).json() + + ep = None + for episode in jsonResponse['data']: + if int(episode['episode']) == episodeNum: + ep = episode + if ep: + self.url = 'https://animepahe.com/api?m=links&id=' + str(ep['anime_id']) + '&session=' + ep['session'] + '&p=kwik' # noqa + else: + raise NotFoundError + + episode_data = helpers.get(self.url, cf=True).json() + + episode_data = episode_data['data'] + sources = {} + + for info in range(len(episode_data)): + quality = list(episode_data[info].keys())[0] + + sources[('720' if quality == '800' else quality) + 'p'] = episode_data[info][quality]['kwik'] + + return [ + ('kwik', sources[x]) + for x in sources + ] From 6a746ea7386e50c90d0a6272d36cbbe910a4aed0 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 16 May 2021 20:28:49 +0300 Subject: [PATCH 5/8] added sort sources and fixed metadata --- anime_downloader/sites/animepahe.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index ea73981..fce5721 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -27,7 +27,7 @@ class AnimePahe(Anime, sitename='animepahe'): return [ SearchResult( title=result['title'] + " (" + result['type'] + ")", - url="https://animepahe.com/anime/" + result['session'] + "/" + str(result['id']), # noqa + url="https://animepahe.com/anime/TITLE!" + result['title'] + " (" + result['type'] + ")" + '!TITLE/' + result['session'] + "/" + str(result['id']), # noqa poster=result['poster'] ) for result in search_results['data'] @@ -67,8 +67,8 @@ class AnimePahe(Anime, sitename='animepahe'): break return episodes - def _scrape_metadata(self, data): - self.title = re.search(r'

([^<]+)', data).group(1) + def _scrape_metadata(self): + self.title = re.findall(r"TITLE!(.*?)!TITLE", self.url)[0] class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): @@ -94,14 +94,15 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): episode_data = helpers.get(self.url, cf=True).json() episode_data = episode_data['data'] - sources = {} + sources_list = [] for info in range(len(episode_data)): quality = list(episode_data[info].keys())[0] + sources_list.append({ + 'extractor': 'kwik', + 'url': episode_data[info][quality]['kwik'], + 'server': 'kwik', + 'version': 'subbed' + }) - sources[('720' if quality == '800' else quality) + 'p'] = episode_data[info][quality]['kwik'] - - return [ - ('kwik', sources[x]) - for x in sources - ] + return self.sort_sources(sources_list) From d7326e12bf3447a30e3e86c7afe75bd29b8c50de Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:57:40 +0300 Subject: [PATCH 6/8] Update config.py --- anime_downloader/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/anime_downloader/config.py b/anime_downloader/config.py index 71a6f73..6e5ee71 100644 --- a/anime_downloader/config.py +++ b/anime_downloader/config.py @@ -73,6 +73,9 @@ DEFAULT_CONFIG = { 'anistream.xyz': { 'version': 'subbed', }, + 'animepahe': { + 'version': 'subbed', + }, 'animeflv': { 'version': 'subbed', 'servers': [ From 69de7f3e1bb1d8a0d8aef08b279f829b5168190d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:58:34 +0300 Subject: [PATCH 7/8] Update animepahe.py --- anime_downloader/sites/animepahe.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index fce5721..0ef1476 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -91,18 +91,17 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): else: raise NotFoundError - episode_data = helpers.get(self.url, cf=True).json() + episode_data = helpers.get(self.url).json() - episode_data = episode_data['data'] - sources_list = [] + data = episode_data['data'] + qualities = [x + 'p' for f in data for x in f] - for info in range(len(episode_data)): - quality = list(episode_data[info].keys())[0] - sources_list.append({ - 'extractor': 'kwik', - 'url': episode_data[info][quality]['kwik'], - 'server': 'kwik', - 'version': 'subbed' - }) + sources_list = [ + f[x]['kwik_adfly'] for f in data for x in f + ] - return self.sort_sources(sources_list) + for i, quality in enumerate(qualities): + if self.quality == quality: + return [("kwik", sources_list[i])] + + return [("kwik", x) for x in sources_list] From ff38e125ca3731b9619da5b1471b0019bed585fe Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:58:51 +0300 Subject: [PATCH 8/8] Update kwik.py --- anime_downloader/extractors/kwik.py | 159 +++++++++++++++++++--------- 1 file changed, 108 insertions(+), 51 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 3cb93f9..dab6ca4 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -1,65 +1,122 @@ -import logging -from platform import node -import re -import subprocess +from base64 import b64decode import requests -import tempfile +import logging +import re from anime_downloader.extractors.base_extractor import BaseExtractor -from anime_downloader.sites.helpers.request import temp_dir from anime_downloader.sites import helpers -from anime_downloader import util -from anime_downloader.util import eval_in_node from subprocess import CalledProcessError +from anime_downloader import util logger = logging.getLogger(__name__) class Kwik(BaseExtractor): - '''Extracts video url from kwik pages, Kwik has some `security` - which allows to access kwik pages when only referred by something - and the kwik video stream when referred through the corresponding - kwik video page. - ''' + YTSM = re.compile(r"ysmm = '([^']+)") + + KWIK_PARAMS_RE = re.compile(r'\("(\w+)",\d+,"(\w+)",(\d+),(\d+),\d+\)') + KWIK_D_URL = re.compile(r'action="([^"]+)"') + KWIK_D_TOKEN = re.compile(r'value="([^"]+)"') + + CHARACTER_MAP = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/" + + def get_string(self, content: str, s1: int, s2: int) -> str: + slice_2 = self.CHARACTER_MAP[0:s2] + + acc = 0 + for n, i in enumerate(content[::-1]): + acc += int(i if i.isdigit() else 0) * s1**n + + k = '' + while acc > 0: + k = slice_2[int(acc % s2)] + k + acc = (acc - (acc % s2)) / s2 + + return k or '0' + + def decrypt(self, full_string: str, key: str, v1: int, v2: int) -> str: + v1, v2 = int(v1), int(v2) + r, i = "", 0 + + while i < len(full_string): + s = "" + while (full_string[i] != key[v2]): + s += full_string[i] + i += 1 + j = 0 + while j < len(key): + s = s.replace(key[j], str(j)) + j += 1 + r += chr(int(self.get_string(s, v2, 10)) - v1) + i += 1 + return r + + def decode_adfly(self, coded_key: str) -> str: + r, j = '', '' + for n, l in enumerate(coded_key): + if not n % 2: + r += l + else: + j = l + j + + encoded_uri = list(r + j) + numbers = ((i, n) for i, n in enumerate(encoded_uri) if str.isdigit(n)) + for first, second in zip(numbers, numbers): + xor = int(first[1]) ^ int(second[1]) + if xor < 10: + encoded_uri[first[0]] = str(xor) + + return b64decode(("".join(encoded_uri)).encode("utf-8") + )[16:-16].decode('utf-8', errors='ignore') + + def bypass_adfly(self, adfly_url): + session = requests.session() + + response_code = 302 + while response_code != 200: + adfly_content = session.get( + session.get( + adfly_url, + allow_redirects=False).headers.get('location'), + allow_redirects=False) + response_code = adfly_content.status_code + return self.decode_adfly(self.YTSM.search(adfly_content.text).group(1)) + + def get_stream_url_from_kwik(self, adfly_url): + session = requests.session() + + f_content = requests.get( + self.bypass_adfly(adfly_url), + headers={ + 'referer': 'https://kwik.cx/' + } + ) + decrypted = self.decrypt( + * + self.KWIK_PARAMS_RE.search( + f_content.text + ).group( + 1, 2, + 3, 4 + ) + ) + + code = 419 + while code != 302: + content = session.post( + self.KWIK_D_URL.search(decrypted).group(1), + allow_redirects=False, + data={ + '_token': self.KWIK_D_TOKEN.search(decrypted).group(1)}, + headers={ + 'referer': str(f_content.url), + 'cookie': f_content.headers.get('set-cookie')}) + code = content.status_code + + return content.headers.get('location') def _get_data(self): - ld = logger.debug - # Kwik servers don't have direct link access you need to be referred - # from somewhere, I will just use the url itself. We then - # have to rebuild the url. Hopefully kwik doesn't block this too - - # Necessary - - headers = {"Referer": "https://kwik.cx/"} - - res = requests.get(self.url, headers=headers) - - evalText = helpers.soupify(res.text) - - scripts = evalText.select("script") - - for i in scripts: - rexd = re.compile("", "") - break - - tf = tempfile.mktemp(dir=temp_dir) - - with open(tf, 'w', encoding="utf-8") as f: - f.write(rexd) - nodeRes = str(subprocess.getoutput(f"node {tf}")) - - ld(nodeRes) - - stream_url = re.search( - r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") - - ld(stream_url) - return { - 'stream_url': stream_url, - 'referer': "https://kwik.cx/" + 'stream_url': self.get_stream_url_from_kwik(self.url), + 'referer': None }