From e3e7a82a25a126129023669caebe364a6ccf2796 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Tue, 1 Sep 2020 23:55:51 +0300 Subject: [PATCH 001/130] added a cache system for selescrape --- anime_downloader/sites/helpers/selescrape.py | 86 ++++++++++++++++---- 1 file changed, 72 insertions(+), 14 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 2d90f74..62486c7 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -15,6 +15,7 @@ import logging import click import time import json + serverLogger.setLevel(logging.ERROR) logger = logging.getLogger(__name__) @@ -73,6 +74,49 @@ def add_url_params(url, params): return url if not params else url + '?' + urlencode(params) + +def cache_request(url, request_type, response, cookies, user_agent): + timestamp = { + 'year': time.localtime().tm_year, + 'month': time.localtime().tm_mon, + 'day': time.localtime().tm_mday, + 'hour': time.localtime().tm_hour, + 'minute': time.localtime().tm_min + } + + tmp_cache = {} + tmp_cache[url] = { + 'data': response, + 'time': timestamp, + 'type': request_type, + 'cookies': cookies, + 'user_agent': user_agent + } + + with open(os.path.join(get_data_dir(), 'cached_requests.json'), 'w') as f: + json.dump(tmp_cache, f, indent=4) + +def check_cache(url): + file = os.path.join(get_data_dir(), 'cached_requests.json') + if os.path.isfile(file): + with open(file, 'r') as f: + data = json.loads(f.read()) + try: + cached_request = data[url] + except KeyError: + return None + timestamp = cached_request['time'] + if (timestamp['year'] == time.localtime().tm_year and + timestamp['month'] == time.localtime().tm_mon and + timestamp['day'] == time.localtime().tm_mday and + time.localtime().tm_hour - timestamp['hour'] <= 1): + return cached_request + else: + return None + else: + return None + + def driver_select(): # ''' it configures what each browser should do @@ -177,21 +221,35 @@ def cloudflare_wait(driver): def request(request_type, url, **kwargs): #Headers not yet supported , headers={} params = kwargs.get('params', {}) - new_url = add_url_params(url, params) - driver = driver_select() - status = status_select(driver, new_url, 'hide') - try: - cloudflare_wait(driver) - user_agent = driver.execute_script("return navigator.userAgent;") #dirty, but allows for all sorts of things above - cookies = driver.get_cookies() - text = driver.page_source - driver.close() + url = add_url_params(url, params) + if bool(check_cache(url)): + cached_data = check_cache(url) + text = cached_data['data'] + user_agent = cached_data['user_agent'] + request_type = cached_data['type'] + cookies = cached_data['cookies'] return SeleResponse(url, request_type, text, cookies, user_agent) - except: - driver.save_screenshot(f"{get_data_dir()}/screenshot.png"); - driver.close() - logger.error(f'There was a problem getting the page: {new_url}. \ - See the screenshot for more info:\n{get_data_dir()}/screenshot.png') + + else: + + driver = driver_select() + status = status_select(driver, url, 'hide') + + try: + cloudflare_wait(driver) + user_agent = driver.execute_script("return navigator.userAgent;") #dirty, but allows for all sorts of things above + cookies = driver.get_cookies() + text = driver.page_source + driver.close() + cache_request(url, request_type, text, cookies, user_agent) + return SeleResponse(url, request_type, text, cookies, user_agent) + + except: + driver.save_screenshot(f"{get_data_dir()}/screenshot.png"); + driver.close() + logger.error(f'There was a problem getting the page: {url}. \ + See the screenshot for more info:\t{get_data_dir()}/screenshot.png') + class SeleResponse: From 7a6aa3b494fca7ff70ec8665d53871067c6b5527 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 00:05:29 +0300 Subject: [PATCH 002/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 62486c7..1166dc0 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -112,6 +112,9 @@ def check_cache(url): time.localtime().tm_hour - timestamp['hour'] <= 1): return cached_request else: + old_cache = cached_request.pop(url, None) + with open(file, 'w') as f: + json.dump(cached_request, f, indent=4) return None else: return None From 2da7b111913960769cbf35531774daf343fc89c9 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 00:39:58 +0300 Subject: [PATCH 003/130] changed to Unix Timestamps. --- anime_downloader/sites/helpers/selescrape.py | 23 ++++++++------------ 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 1166dc0..2649615 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -76,18 +76,16 @@ def add_url_params(url, params): def cache_request(url, request_type, response, cookies, user_agent): - timestamp = { - 'year': time.localtime().tm_year, - 'month': time.localtime().tm_mon, - 'day': time.localtime().tm_mday, - 'hour': time.localtime().tm_hour, - 'minute': time.localtime().tm_min - } + """ + This function saves the response from a Selenium request in a json. + It uses timestamps so that the rest of the code + can know if its an old cache or a new one. + """ tmp_cache = {} tmp_cache[url] = { 'data': response, - 'time': timestamp, + 'expiry': time.time(), 'type': request_type, 'cookies': cookies, 'user_agent': user_agent @@ -105,14 +103,11 @@ def check_cache(url): cached_request = data[url] except KeyError: return None - timestamp = cached_request['time'] - if (timestamp['year'] == time.localtime().tm_year and - timestamp['month'] == time.localtime().tm_mon and - timestamp['day'] == time.localtime().tm_mday and - time.localtime().tm_hour - timestamp['hour'] <= 1): + timestamp = cached_request['expiry'] + if (time.time() - timestamp <= 3600): return cached_request else: - old_cache = cached_request.pop(url, None) + print(cached_request.pop(url, None)) with open(file, 'w') as f: json.dump(cached_request, f, indent=4) return None From 5ffb63555b081b65e000af1a8713e26db0c39a21 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 00:50:59 +0300 Subject: [PATCH 004/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 1 + 1 file changed, 1 insertion(+) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 2649615..1ab6860 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -247,6 +247,7 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ driver.close() logger.error(f'There was a problem getting the page: {url}. \ See the screenshot for more info:\t{get_data_dir()}/screenshot.png') + exit() From 49cee92c9849079f7ad8ee762bd8fdf5cba3cff2 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 13:00:33 +0300 Subject: [PATCH 005/130] bug fix Now properly handles the saving of the cached requests without overwriting the old ones. --- anime_downloader/sites/helpers/selescrape.py | 22 ++++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 1ab6860..2fbdce2 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -81,8 +81,12 @@ def cache_request(url, request_type, response, cookies, user_agent): It uses timestamps so that the rest of the code can know if its an old cache or a new one. """ - - tmp_cache = {} + file = os.path.join(get_data_dir(), 'cached_requests.json') + if os.path.isfile(file): + with open(file, 'r') as f: + tmp_cache = json.loads(f.read()) + else: + tmp_cache = {} tmp_cache[url] = { 'data': response, 'expiry': time.time(), @@ -91,7 +95,7 @@ def cache_request(url, request_type, response, cookies, user_agent): 'user_agent': user_agent } - with open(os.path.join(get_data_dir(), 'cached_requests.json'), 'w') as f: + with open(file, 'w') as f: json.dump(tmp_cache, f, indent=4) def check_cache(url): @@ -100,22 +104,22 @@ def check_cache(url): with open(file, 'r') as f: data = json.loads(f.read()) try: - cached_request = data[url] + data[url] except KeyError: return None - timestamp = cached_request['expiry'] + timestamp = data[url]['expiry'] if (time.time() - timestamp <= 3600): - return cached_request + return data[url] else: - print(cached_request.pop(url, None)) + data.pop(url, None) with open(file, 'w') as f: - json.dump(cached_request, f, indent=4) + json.dump(data, f, indent=4) return None else: return None -def driver_select(): # +def driver_select(): ''' it configures what each browser should do and gives the driver variable that is used From 169fdde334ad5d31cd55da82079e64c2490d653b Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 15:28:04 +0300 Subject: [PATCH 006/130] moved the cache to the TEMP folder --- anime_downloader/sites/helpers/selescrape.py | 46 +++++--------------- 1 file changed, 11 insertions(+), 35 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 2fbdce2..9130c5d 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -10,6 +10,7 @@ from bs4 import BeautifulSoup from logging import exception from sys import platform import requests +import tempfile import os import logging import click @@ -70,18 +71,13 @@ def get_driver_binary(): return binary_path -def add_url_params(url, params): - return url if not params else url + '?' + urlencode(params) - - - def cache_request(url, request_type, response, cookies, user_agent): """ This function saves the response from a Selenium request in a json. It uses timestamps so that the rest of the code can know if its an old cache or a new one. """ - file = os.path.join(get_data_dir(), 'cached_requests.json') + file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') if os.path.isfile(file): with open(file, 'r') as f: tmp_cache = json.loads(f.read()) @@ -90,7 +86,7 @@ def cache_request(url, request_type, response, cookies, user_agent): tmp_cache[url] = { 'data': response, 'expiry': time.time(), - 'type': request_type, + 'method': request_type, 'cookies': cookies, 'user_agent': user_agent } @@ -99,7 +95,7 @@ def cache_request(url, request_type, response, cookies, user_agent): json.dump(tmp_cache, f, indent=4) def check_cache(url): - file = os.path.join(get_data_dir(), 'cached_requests.json') + file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') if os.path.isfile(file): with open(file, 'r') as f: data = json.loads(f.read()) @@ -172,28 +168,6 @@ def driver_select(): return driver -def status_select(driver, url, status='hide'): - ''' - For now it doesnt do what its name suggests, - I have planned to add a status reporter of the http response code. - This part of the code is not removed because it is part of its core. - Treat it like it isnt here. - ''' - try: - if status == 'hide': - driver.get(url) - elif status == 'show': - r = requests.head(url) - if r.status_code == 503: - raise RuntimeError("This website's sevice is unavailable or has cloudflare on.") - driver.get(url) - return r.status_code - else: - driver.get(url) - except requests.ConnectionError: - raise RuntimeError("Failed to establish a connection using the requests library.") - - def cloudflare_wait(driver): ''' It waits until cloudflare has gone away before doing any further actions. @@ -213,8 +187,9 @@ def cloudflare_wait(driver): time.sleep(0.25) delta = time.time() - start if delta >= abort_after: - logger.error(f'Timeout:\nCouldnt bypass cloudflare. \ - See the screenshot for more info:\n{get_data_dir()}/screenshot.png') + logger.error(f'Timeout:\tCouldnt bypass cloudflare. \ + See the screenshot for more info:\t{get_data_dir()}/screenshot.png') + break title = driver.title if not title == "Just a moment...": break @@ -223,19 +198,20 @@ def cloudflare_wait(driver): def request(request_type, url, **kwargs): #Headers not yet supported , headers={} params = kwargs.get('params', {}) - url = add_url_params(url, params) + url = url if not params else url + '?' + urlencode(params) + if bool(check_cache(url)): cached_data = check_cache(url) text = cached_data['data'] user_agent = cached_data['user_agent'] - request_type = cached_data['type'] + request_type = cached_data['method'] cookies = cached_data['cookies'] return SeleResponse(url, request_type, text, cookies, user_agent) else: driver = driver_select() - status = status_select(driver, url, 'hide') + driver.get(url) try: cloudflare_wait(driver) From 05e1f4d484cd14b0bdc26c4d265a470f6a7342a6 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 16:28:40 +0300 Subject: [PATCH 007/130] Made the caching use ``SeleResponse.__dict__`` --- anime_downloader/sites/helpers/selescrape.py | 48 ++++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 9130c5d..8d8ba41 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -34,7 +34,6 @@ def open_config(): from anime_downloader.config import Config return Config - data = open_config() @@ -71,30 +70,38 @@ def get_driver_binary(): return binary_path -def cache_request(url, request_type, response, cookies, user_agent): +def cache_request(**kwargs): """ This function saves the response from a Selenium request in a json. - It uses timestamps so that the rest of the code - can know if its an old cache or a new one. + It uses timestamps so that the rest of the code can know if the cache has expired or not. """ + file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') if os.path.isfile(file): with open(file, 'r') as f: tmp_cache = json.loads(f.read()) else: tmp_cache = {} - tmp_cache[url] = { - 'data': response, + tmp_cache[kwargs.get('url')] = { + 'data': kwargs.get('text'), 'expiry': time.time(), - 'method': request_type, - 'cookies': cookies, - 'user_agent': user_agent + 'method': kwargs.get('method'), + 'cookies': kwargs.get('cookies'), + 'user_agent': kwargs.get('user_agent') } with open(file, 'w') as f: json.dump(tmp_cache, f, indent=4) def check_cache(url): + """ + This function checks if the cache file exists, + if it exists then it will read the file + And it will verify if the cache is less than or equal to 1 hour ago + If it is it will return it as it is. + If it isn't it will delete the expired cache from the file and return None + If the file doesn't exist at all it will return None + """ file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') if os.path.isfile(file): with open(file, 'r') as f: @@ -127,16 +134,16 @@ def driver_select(): driver_binary = get_driver_binary() binary = None if not driver_binary else driver_binary if browser == 'firefox': - fireFoxOptions = webdriver.FirefoxOptions() - fireFoxOptions.headless = True - fireFoxOptions.add_argument('--log fatal') + fireFox_Options = webdriver.FirefoxOptions() + fireFox_Options.headless = True + fireFox_Options.add_argument('--log fatal') if binary == None: - driver = webdriver.Firefox(options=fireFoxOptions, service_log_path=os.path.devnull) + driver = webdriver.Firefox(options=fireFox_Options, service_log_path=os.path.devnull) else: try: - driver = webdriver.Firefox(options=fireFoxOptions, service_log_path=os.path.devnull) + driver = webdriver.Firefox(options=fireFox_Options, service_log_path=os.path.devnull) except: - driver = webdriver.Firefox(executable_path=binary, options=fireFoxOptions, service_log_path=os.path.devnull) + driver = webdriver.Firefox(executable_path=binary, options=fireFox_Options, service_log_path=os.path.devnull) elif browser == 'chrome': from selenium.webdriver.chrome.options import Options chrome_options = Options() @@ -174,12 +181,12 @@ def cloudflare_wait(driver): The way it works is by getting the title of the page and as long as it is "Just a moment..." it will keep waiting. This part of the code won't make the code execute slower - if the target website has not a Cloudflare redirection. + if the target website has no Cloudflare redirection. At most it will sleep 1 second as a precaution. - Also, i have made it time out after 30 seconds, useful if the target website is not responsive + Also, i have made it time out after 50 seconds, useful if the target website is not responsive and to stop it from running infinitely. ''' - abort_after = 30 + abort_after = 50 start = time.time() title = driver.title # title = "Just a moment..." @@ -219,8 +226,9 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ cookies = driver.get_cookies() text = driver.page_source driver.close() - cache_request(url, request_type, text, cookies, user_agent) - return SeleResponse(url, request_type, text, cookies, user_agent) + seleResponse = SeleResponse(url, request_type, text, cookies, user_agent) + cache_request(**seleResponse.__dict__) + return seleResponse except: driver.save_screenshot(f"{get_data_dir()}/screenshot.png"); From 1ae4199d72709139621cae0e08cba406cb29cf35 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 16:33:05 +0300 Subject: [PATCH 008/130] added some comma --- anime_downloader/sites/helpers/selescrape.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 8d8ba41..547285d 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -98,8 +98,8 @@ def check_cache(url): This function checks if the cache file exists, if it exists then it will read the file And it will verify if the cache is less than or equal to 1 hour ago - If it is it will return it as it is. - If it isn't it will delete the expired cache from the file and return None + If it is, it will return it as it is. + If it isn't, it will delete the expired cache from the file and return None If the file doesn't exist at all it will return None """ file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') From 4123b1c7038deccafface58019e5d512dcfdeac0 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 17:08:59 +0300 Subject: [PATCH 009/130] fixed bug: Firefox not getting random user agent from anime dl --- anime_downloader/sites/helpers/selescrape.py | 40 ++++++++++---------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 547285d..315ee86 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -70,7 +70,7 @@ def get_driver_binary(): return binary_path -def cache_request(**kwargs): +def cache_request(sele_response): """ This function saves the response from a Selenium request in a json. It uses timestamps so that the rest of the code can know if the cache has expired or not. @@ -79,15 +79,16 @@ def cache_request(**kwargs): file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') if os.path.isfile(file): with open(file, 'r') as f: - tmp_cache = json.loads(f.read()) + tmp_cache = json.load(f) else: tmp_cache = {} - tmp_cache[kwargs.get('url')] = { - 'data': kwargs.get('text'), + data = sele_response.__dict__ + tmp_cache[data['url']] = { + 'data': data['text'], 'expiry': time.time(), - 'method': kwargs.get('method'), - 'cookies': kwargs.get('cookies'), - 'user_agent': kwargs.get('user_agent') + 'method': data['method'], + 'cookies': data['cookies'], + 'user_agent': data['user_agent'] } with open(file, 'w') as f: @@ -105,11 +106,9 @@ def check_cache(url): file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') if os.path.isfile(file): with open(file, 'r') as f: - data = json.loads(f.read()) - try: - data[url] - except KeyError: - return None + data = json.load(f) + if url not in data: + return timestamp = data[url]['expiry'] if (time.time() - timestamp <= 3600): return data[url] @@ -117,9 +116,9 @@ def check_cache(url): data.pop(url, None) with open(file, 'w') as f: json.dump(data, f, indent=4) - return None + return else: - return None + return def driver_select(): @@ -135,15 +134,18 @@ def driver_select(): binary = None if not driver_binary else driver_binary if browser == 'firefox': fireFox_Options = webdriver.FirefoxOptions() + fireFox_Profile = webdriver.FirefoxProfile() + fireFox_Profile.set_preference("general.useragent.override", get_random_header()['user-agent']) fireFox_Options.headless = True fireFox_Options.add_argument('--log fatal') if binary == None: - driver = webdriver.Firefox(options=fireFox_Options, service_log_path=os.path.devnull) + driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) else: try: - driver = webdriver.Firefox(options=fireFox_Options, service_log_path=os.path.devnull) + driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) except: - driver = webdriver.Firefox(executable_path=binary, options=fireFox_Options, service_log_path=os.path.devnull) + driver = webdriver.Firefox(fireFox_Profile, executable_path=binary, options=fireFox_Options, service_log_path=os.path.devnull) + elif browser == 'chrome': from selenium.webdriver.chrome.options import Options chrome_options = Options() @@ -155,7 +157,7 @@ def driver_select(): chrome_options.add_argument(f"--user-data-dir={profile_path}") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1920,1080") - chrome_options.add_argument(f'user-agent={get_random_header()}') + chrome_options.add_argument(f"user-agent={get_random_header()['user-agent']}") if binary == None: if executable == None: driver = webdriver.Chrome(options=chrome_options) @@ -227,7 +229,7 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ text = driver.page_source driver.close() seleResponse = SeleResponse(url, request_type, text, cookies, user_agent) - cache_request(**seleResponse.__dict__) + cache_request(seleResponse) return seleResponse except: From 3999698bb1b8915de75ade92f2b1e07e6fd262da Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 17:27:21 +0300 Subject: [PATCH 010/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 315ee86..396e6ef 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -20,7 +20,6 @@ import json serverLogger.setLevel(logging.ERROR) logger = logging.getLogger(__name__) - def get_data_dir(): ''' Gets the folder directory selescrape will store data, @@ -116,9 +115,6 @@ def check_cache(url): data.pop(url, None) with open(file, 'w') as f: json.dump(data, f, indent=4) - return - else: - return def driver_select(): From 91870487a930b087f14d9072367cd02ed9c2fa22 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 18:26:10 +0300 Subject: [PATCH 011/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 27 +++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 396e6ef..6571954 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -129,11 +129,13 @@ def driver_select(): driver_binary = get_driver_binary() binary = None if not driver_binary else driver_binary if browser == 'firefox': + fireFox_Options = webdriver.FirefoxOptions() - fireFox_Profile = webdriver.FirefoxProfile() - fireFox_Profile.set_preference("general.useragent.override", get_random_header()['user-agent']) fireFox_Options.headless = True fireFox_Options.add_argument('--log fatal') + fireFox_Profile = webdriver.FirefoxProfile() + fireFox_Profile.set_preference("general.useragent.override", get_random_header()['user-agent']) + if binary == None: driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) else: @@ -145,15 +147,14 @@ def driver_select(): elif browser == 'chrome': from selenium.webdriver.chrome.options import Options chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--disable-gpu") + ops = ["--headless", "--disable-gpu", '--log-level=OFF', f"--user-data-dir={profile_path}", + "--no-sandbox", "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}"] + for option in ops: + chrome_options.add_argument(option) + profile_path = os.path.join(data_dir, 'Selenium_chromium') log_path = os.path.join(data_dir, 'chromedriver.log') - chrome_options.add_argument('--log-level=OFF') - chrome_options.add_argument(f"--user-data-dir={profile_path}") - chrome_options.add_argument("--no-sandbox") - chrome_options.add_argument("--window-size=1920,1080") - chrome_options.add_argument(f"user-agent={get_random_header()['user-agent']}") + if binary == None: if executable == None: driver = webdriver.Chrome(options=chrome_options) @@ -214,16 +215,18 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ return SeleResponse(url, request_type, text, cookies, user_agent) else: - + driver = driver_select() driver.get(url) try: + cloudflare_wait(driver) user_agent = driver.execute_script("return navigator.userAgent;") #dirty, but allows for all sorts of things above cookies = driver.get_cookies() text = driver.page_source driver.close() + seleResponse = SeleResponse(url, request_type, text, cookies, user_agent) cache_request(seleResponse) return seleResponse @@ -231,8 +234,8 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ except: driver.save_screenshot(f"{get_data_dir()}/screenshot.png"); driver.close() - logger.error(f'There was a problem getting the page: {url}. \ - See the screenshot for more info:\t{get_data_dir()}/screenshot.png') + logger.error(f'There was a problem getting the page: {url}.' + + '\nSee the screenshot for more info:\t{get_data_dir()}/screenshot.png') exit() From 8cb3a8dfb0e2e9f1942bdf5ffa80079fea1dcb5b Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 2 Sep 2020 18:31:25 +0300 Subject: [PATCH 012/130] removed unneeded imports --- anime_downloader/sites/helpers/selescrape.py | 71 ++++++++++---------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 6571954..3679377 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -1,15 +1,8 @@ -from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.remote.remote_connection import LOGGER as serverLogger -from selenium.webdriver.support.ui import WebDriverWait from anime_downloader.const import get_random_header -from selenium.webdriver.common.by import By from urllib.parse import urlencode -from urllib.parse import urlsplit from selenium import webdriver -from bs4 import BeautifulSoup -from logging import exception from sys import platform -import requests import tempfile import os import logging @@ -20,9 +13,10 @@ import json serverLogger.setLevel(logging.ERROR) logger = logging.getLogger(__name__) + def get_data_dir(): ''' - Gets the folder directory selescrape will store data, + Gets the folder directory selescrape will store data, such as cookies or browser extensions and logs. ''' APP_NAME = 'anime downloader' @@ -33,6 +27,7 @@ def open_config(): from anime_downloader.config import Config return Config + data = open_config() @@ -40,20 +35,23 @@ def get_browser_config(): ''' Decides what browser selescrape will use. ''' - os_browser = { #maps os to a browser - 'linux':'firefox', - 'darwin':'chrome', - 'win32':'chrome' + os_browser = { # maps os to a browser + 'linux': 'firefox', + 'darwin': 'chrome', + 'win32': 'chrome' } for a in os_browser: if platform.startswith(a): - browser = os_browser[a] + browser = os_browser[a] else: browser = 'chrome' + value = data['dl']['selescrape_browser'] value = value.lower() if value else value + if value in ['chrome', 'firefox']: browser = value + return browser @@ -88,11 +86,12 @@ def cache_request(sele_response): 'method': data['method'], 'cookies': data['cookies'], 'user_agent': data['user_agent'] - } + } with open(file, 'w') as f: json.dump(tmp_cache, f, indent=4) + def check_cache(url): """ This function checks if the cache file exists, @@ -119,8 +118,8 @@ def check_cache(url): def driver_select(): ''' - it configures what each browser should do - and gives the driver variable that is used + it configures what each browser should do + and gives the driver variable that is used to perform any actions below this function. ''' browser = get_browser_config() @@ -135,28 +134,29 @@ def driver_select(): fireFox_Options.add_argument('--log fatal') fireFox_Profile = webdriver.FirefoxProfile() fireFox_Profile.set_preference("general.useragent.override", get_random_header()['user-agent']) - - if binary == None: + + if not binary: driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) else: try: driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) except: - driver = webdriver.Firefox(fireFox_Profile, executable_path=binary, options=fireFox_Options, service_log_path=os.path.devnull) + driver = webdriver.Firefox(fireFox_Profile, executable_path=binary, options=fireFox_Options, + service_log_path=os.path.devnull) elif browser == 'chrome': from selenium.webdriver.chrome.options import Options chrome_options = Options() - ops = ["--headless", "--disable-gpu", '--log-level=OFF', f"--user-data-dir={profile_path}", - "--no-sandbox", "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}"] + ops = ["--headless", "--disable-gpu", '--log-level=OFF', f"--user-data-dir={profile_path}", + "--no-sandbox", "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}"] for option in ops: chrome_options.add_argument(option) profile_path = os.path.join(data_dir, 'Selenium_chromium') log_path = os.path.join(data_dir, 'chromedriver.log') - if binary == None: - if executable == None: + if not binary: + if not executable: driver = webdriver.Chrome(options=chrome_options) else: from selenium.webdriver.common.desired_capabilities import DesiredCapabilities @@ -164,25 +164,26 @@ def driver_select(): cap['binary_location'] = executable driver = webdriver.Chrome(desired_capabilities=cap, options=chrome_options) else: - if executable == None: + if not executable: driver = webdriver.Chrome(options=chrome_options) else: from selenium.webdriver.common.desired_capabilities import DesiredCapabilities cap = DesiredCapabilities.CHROME cap['binary_location'] = executable - driver = webdriver.Chrome(executable_path=binary, desired_capabilities=cap, options=chrome_options, service_log_path=os.path.devnull) + driver = webdriver.Chrome(executable_path=binary, desired_capabilities=cap, options=chrome_options, + service_log_path=os.path.devnull) return driver def cloudflare_wait(driver): ''' It waits until cloudflare has gone away before doing any further actions. - The way it works is by getting the title of the page + The way it works is by getting the title of the page and as long as it is "Just a moment..." it will keep waiting. - This part of the code won't make the code execute slower + This part of the code won't make the code execute slower if the target website has no Cloudflare redirection. - At most it will sleep 1 second as a precaution. - Also, i have made it time out after 50 seconds, useful if the target website is not responsive + At most it will sleep 1 second as a precaution. + Also, i have made it time out after 50 seconds, useful if the target website is not responsive and to stop it from running infinitely. ''' abort_after = 50 @@ -199,10 +200,10 @@ def cloudflare_wait(driver): title = driver.title if not title == "Just a moment...": break - time.sleep(1) # This is necessary to make sure everything has loaded fine. + time.sleep(1) # This is necessary to make sure everything has loaded fine. -def request(request_type, url, **kwargs): #Headers not yet supported , headers={} +def request(request_type, url, **kwargs): # Headers not yet supported , headers={} params = kwargs.get('params', {}) url = url if not params else url + '?' + urlencode(params) @@ -222,11 +223,11 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ try: cloudflare_wait(driver) - user_agent = driver.execute_script("return navigator.userAgent;") #dirty, but allows for all sorts of things above + user_agent = driver.execute_script("return navigator.userAgent;") cookies = driver.get_cookies() text = driver.page_source driver.close() - + seleResponse = SeleResponse(url, request_type, text, cookies, user_agent) cache_request(seleResponse) return seleResponse @@ -235,11 +236,10 @@ def request(request_type, url, **kwargs): #Headers not yet supported , headers={ driver.save_screenshot(f"{get_data_dir()}/screenshot.png"); driver.close() logger.error(f'There was a problem getting the page: {url}.' + - '\nSee the screenshot for more info:\t{get_data_dir()}/screenshot.png') + '\nSee the screenshot for more info:\t{get_data_dir()}/screenshot.png') exit() - class SeleResponse: """ Class for the selenium response. @@ -257,6 +257,7 @@ class SeleResponse: user_agent: string User agent used on the webpage """ + def __init__(self, url, method, text, cookies, user_agent): self.url = url self.method = method From 924d84499c73dbd459f872a002b8a792aafaf52b Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 18 Oct 2020 18:51:09 +0300 Subject: [PATCH 013/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 3679377..2b6dc13 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -145,6 +145,8 @@ def driver_select(): service_log_path=os.path.devnull) elif browser == 'chrome': + profile_path = os.path.join(data_dir, 'Selenium_chromium') + log_path = os.path.join(data_dir, 'chromedriver.log') from selenium.webdriver.chrome.options import Options chrome_options = Options() ops = ["--headless", "--disable-gpu", '--log-level=OFF', f"--user-data-dir={profile_path}", @@ -152,9 +154,6 @@ def driver_select(): for option in ops: chrome_options.add_argument(option) - profile_path = os.path.join(data_dir, 'Selenium_chromium') - log_path = os.path.join(data_dir, 'chromedriver.log') - if not binary: if not executable: driver = webdriver.Chrome(options=chrome_options) From c067bcad78233a0466d89c16161c1f60fd0b7937 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sat, 24 Oct 2020 22:39:35 +0300 Subject: [PATCH 014/130] fixed a bug --- anime_downloader/sites/helpers/selescrape.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 2b6dc13..88ba4d0 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -195,11 +195,12 @@ def cloudflare_wait(driver): if delta >= abort_after: logger.error(f'Timeout:\tCouldnt bypass cloudflare. \ See the screenshot for more info:\t{get_data_dir()}/screenshot.png') - break + return 1 title = driver.title if not title == "Just a moment...": break - time.sleep(1) # This is necessary to make sure everything has loaded fine. + time.sleep(2) # This is necessary to make sure everything has loaded fine. + return 0 def request(request_type, url, **kwargs): # Headers not yet supported , headers={} @@ -221,11 +222,15 @@ def request(request_type, url, **kwargs): # Headers not yet supported , headers try: - cloudflare_wait(driver) + exit_code = cloudflare_wait(driver) user_agent = driver.execute_script("return navigator.userAgent;") cookies = driver.get_cookies() text = driver.page_source driver.close() + if exit_code == 0: + pass + else: + return SeleResponse(url, request_type, None, cookies, user_agent) seleResponse = SeleResponse(url, request_type, text, cookies, user_agent) cache_request(seleResponse) From 0108b852e6d5a05ed3e65ebce283cb25c87b5085 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 29 Oct 2020 23:42:44 +0200 Subject: [PATCH 015/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 88ba4d0..2faad32 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -206,9 +206,9 @@ def cloudflare_wait(driver): def request(request_type, url, **kwargs): # Headers not yet supported , headers={} params = kwargs.get('params', {}) url = url if not params else url + '?' + urlencode(params) - - if bool(check_cache(url)): - cached_data = check_cache(url) + check_caches = check_cache(url) + if bool(check_caches): + cached_data = check_caches text = cached_data['data'] user_agent = cached_data['user_agent'] request_type = cached_data['method'] From 2a77e6934d0802144bb75e11be9c4377d074f31f Mon Sep 17 00:00:00 2001 From: czoins Date: Wed, 25 Nov 2020 21:01:30 +0100 Subject: [PATCH 016/130] Improve test command functionality --- anime_downloader/commands/test.py | 176 ++++++++++++++++++++++++------ anime_downloader/util.py | 8 ++ 2 files changed, 152 insertions(+), 32 deletions(-) diff --git a/anime_downloader/commands/test.py b/anime_downloader/commands/test.py index afb106d..fca0d92 100644 --- a/anime_downloader/commands/test.py +++ b/anime_downloader/commands/test.py @@ -3,11 +3,15 @@ import sys import threading import os import click +from fuzzywuzzy import fuzz from anime_downloader.sites import get_anime_class, ALL_ANIME_SITES from anime_downloader import util from anime_downloader.__version__ import __version__ +import requests +logging.getLogger(requests.packages.urllib3.__package__).setLevel(logging.ERROR) #disable Retry warnings + logger = logging.getLogger(__name__) echo = click.echo @@ -15,54 +19,162 @@ sitenames = [v[1] for v in ALL_ANIME_SITES] class SiteThread(threading.Thread): - def __init__(self, site, *args, **kwargs): - self.site = site + def __init__(self, provider, anime, verify, v_tries, *args, **kwargs): + self.provider = provider + self.anime = anime + self.verify = verify + self.v_tries = v_tries + self.search_result = None self.exception = None super().__init__(*args, **kwargs) def run(self): try: - ani = get_anime_class(self.site) + ani = get_anime_class(self.provider) + self.search_result = ani.search(self.anime) + if self.search_result: + if self.verify: + ratios = [[fuzz.token_set_ratio(self.anime.lower(), sr.title.lower()), sr] for sr in self.search_result] + ratios = sorted(ratios, key=lambda x: x[0], reverse=True) + + end = len(ratios) + for r in range(self.v_tries): + if r == end: break + try: + anime_choice = ratios[r][1] + anime_url = ani(anime_choice.url) + stream_url = anime_url[0].source().stream_url + self.exception = None + break + except Exception as e: + self.exception = e + + self.search_result = util.format_search_results(self.search_result) - # this should be more dynamic - sr = ani.search('naruto')[0] - - anime = ani(sr.url) - - stream_url = anime[0].source().stream_url except Exception as e: self.exception = e - @click.command() -@click.argument('test_query', default='naruto') -def command(test_query): - """Test all sites to see which ones are working and which ones aren't. Test naruto as a default.""" +@click.argument('anime', default='naruto') +@click.option( + '-f', '--full-search', is_flag=True, + help='Don\'t ask to stop searching on anime match.') +@click.option( + '-p', '--providers', + help='Limit search to specific provider(s) separated by a comma.' +) +@click.option( + '-e', '--exclude', + help='Provider(s) to exclude separated by a comma.' +) +@click.option( + '-s', '--selenium', is_flag=True, + help='Enable providers using selenium.' +) +@click.option( + '-v', '--verify', is_flag=True, + help='Verify extraction of stream url in case of anime match.' +) +@click.option( + '-n', '--v-tries', type=int, default=1, + help='Number of tries to extract stream url. (default: 1)' +) +@click.option( + '-z', '--no-fuzzy', is_flag=True, + help='Disable fuzzy search to include possible inaccurate results.' +) +@click.option( + '-d', '--no-results', is_flag=True, + help='Disable echoing the search results at the end of testing.' +) +@click.option( + '-t', '--timeout', type=int, default=10, + help='How long to wait for a site to respond. (default: 10s)' +) + +def command(anime, full_search, providers, exclude, selenium, verify, v_tries, no_fuzzy, no_results, timeout): + """Test all sites to see which ones are working and which ones aren't. Test naruto as a default. Return results for each provider.""" + util.print_info(__version__) logger = logging.getLogger("anime_downloader") logger.setLevel(logging.ERROR) - threads = [] + if providers: + providers = [p.strip() for p in providers.split(",")] + for p in providers: + if not p in sitenames: + raise click.BadParameter(f"{p}. Choose from {', '.join(sitenames)}") + else: + providers = sitenames + if not selenium: + providers.remove("kisscartoon") - for site in sitenames: - t = SiteThread(site, daemon=True) + if exclude: + exclude = [e.strip() for e in exclude.split(",")] + for e in exclude: + if not e in sitenames: + raise click.BadParameter(f"{e}. Choose from {', '.join(sitenames)}") + else: + if e in providers: + providers.remove(e) + + if os.name == 'nt': + p, f = '', '' # Emojis don't work in cmd + else: + p, f = '✅ ', '❌ ' + + if verify: + timeout = timeout + (3 * (v_tries - 1)) + + threads = [] + matches = [] + + for provider in providers: + t = SiteThread(provider, anime, verify, v_tries, daemon=True) t.start() threads.append(t) - for thread in threads: - if os.name == 'nt': - p, f = 'Works: ', "Doesn't work: " # Emojis doesn't work in cmd - else: - p, f = '✅ ', '❌ ' - thread.join(timeout=10) - if not thread.is_alive(): - if not thread.exception: - # echo(click.style('Works ', fg='green') + site) - echo(click.style(p, fg='green') + thread.site) + for i, thread in enumerate(threads): + try: + click.echo(f"[{i+1} of {len(threads)}] Searching ", nl=False) + click.secho(f"{thread.provider}", nl=False, fg="cyan") + click.echo(f"... (CTRL-C to stop) : ", nl=False) + thread.join(timeout=timeout) + if not thread.is_alive(): + if not thread.exception: + if thread.search_result: + if not no_fuzzy: + ratio = fuzz.token_set_ratio(anime.lower(), thread.search_result.lower()) + else: + ratio = 100 + if ratio > 50: + matches.append([thread.provider, thread.search_result, ratio]) + click.secho(p + "Works, anime found.", fg="green") + if not full_search: + click.echo(f"\n- - -{thread.provider}- - -\n\n{thread.search_result}") + confirm = click.confirm(f"Found anime in {thread.provider}. Keep seaching? (use -f / --full-search to disable this prompt)", default=True) + if not confirm: + break + else: + click.secho(p + "Works, anime not found.", fg="yellow") + else: + click.secho(p + "Works, anime not found.", fg="yellow") + else: + logging.debug('Error occurred during testing.') + logging.debug(thread.exception) + if thread.search_result: + click.secho(f + "Not working: anime found, extraction failed.", fg="red") + else: + click.secho(f + "Not working.", fg="red") else: - logging.debug('Error occurred during testing') - logging.debug(thread.exception) - echo(click.style(f, fg='red') + thread.site) - else: - logging.debug('timeout during testing') - echo(click.style(f, fg='red') + thread.site) + logging.debug('Timeout during testing.') + click.secho(f + "Not working: Timeout. Use -t to specify longer waiting period.", fg="red") + + except KeyboardInterrupt: + skip = click.confirm(f"\nSkip {thread.provider} and continue searching? (Press enter for Yes)", default=True) + if not skip: + break + + if not no_results: + click.echo("\n" + util.format_matches(matches)) + \ No newline at end of file diff --git a/anime_downloader/util.py b/anime_downloader/util.py index 9e5f4ed..94bc21d 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -77,6 +77,14 @@ def format_search_results(search_results): table = '\n'.join(table.split('\n')[::-1]) return table +def format_matches(matches): + if matches: + table = [[[p], [sr]] for p, sr, r in sorted(matches, key = lambda x: x[2], reverse=True)] + table = [a for b in table for a in b] + else: + table = [["None"]] + table = tabulate(table, ['RESULTS'], tablefmt='grid', colalign=("center",)) + return table def search(query, provider, val=None, season_info=None, ratio=50): # Will use animeinfo sync if season_info is provided From 8199967c5c2840d9932b2e05881ba0ac001e4bde Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sun, 7 Feb 2021 17:54:31 +0000 Subject: [PATCH 017/130] Add -s/--sub and -d/--dub flags --- anime_downloader/commands/dl.py | 21 +++++++++++++++++++-- anime_downloader/sites/anime.py | 19 ++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/anime_downloader/commands/dl.py b/anime_downloader/commands/dl.py index e705066..cbb79d5 100644 --- a/anime_downloader/commands/dl.py +++ b/anime_downloader/commands/dl.py @@ -79,12 +79,23 @@ sitenames = [v[1] for v in ALL_ANIME_SITES] help="Set the speed limit (in KB/s or MB/s) for downloading when using aria2c", metavar='K/M' ) +@click.option( + "--sub", "-s", type=bool, is_flag=True, + help="If flag is set, it downloads the subbed version of an anime if the provider supports it. Must not be used with the --dub/-d flag") +@click.option( + "--dub", "-d", type=bool, is_flag=True, + help="If flag is set, it downloads the dubbed version of anime if the provider supports it. Must not be used with the --sub/-s flag") @click.pass_context def command(ctx, anime_url, episode_range, url, player, skip_download, quality, force_download, download_dir, file_format, provider, - external_downloader, chunk_size, disable_ssl, fallback_qualities, choice, skip_fillers, speed_limit): + external_downloader, chunk_size, disable_ssl, fallback_qualities, choice, skip_fillers, speed_limit, sub, dub): """ Download the anime using the url or search for it. """ + + if sub and dub: + raise click.UsageError( + "--dub/-d and --sub/-s flags cannot be used together") + query = anime_url[:] util.print_info(__version__) @@ -98,8 +109,14 @@ def command(ctx, anime_url, episode_range, url, player, skip_download, quality, anime_url, _ = util.search(anime_url, provider, choice) cls = get_anime_class(anime_url) + subbed = None + + if sub or dub: + subbed = subbed is not None + anime = cls(anime_url, quality=quality, - fallback_qualities=fallback_qualities) + fallback_qualities=fallback_qualities, + subbed=subbed) logger.info('Found anime: {}'.format(anime.title)) animes = util.parse_ep_str(anime, episode_range) diff --git a/anime_downloader/sites/anime.py b/anime_downloader/sites/anime.py index 5006924..34cdbbf 100644 --- a/anime_downloader/sites/anime.py +++ b/anime_downloader/sites/anime.py @@ -43,6 +43,7 @@ class Anime: title = '' meta = dict() subclasses = {} + subbed = None QUALITIES = ['360p', '480p', '720p', '1080p'] @classmethod @@ -64,8 +65,10 @@ class Anime: def __init__(self, url=None, quality='720p', fallback_qualities=None, - _skip_online_data=False): + _skip_online_data=False, + subbed=None): self.url = url + self.subbed = subbed if fallback_qualities is None: fallback_qualities = ['720p', '480p', '360p'] @@ -342,7 +345,8 @@ class AnimeEpisode: except IndexError: raise NotFoundError("No episode sources found.") - ext = get_extractor(sitename)(url, quality=self.quality, headers=self.headers) + ext = get_extractor(sitename)( + url, quality=self.quality, headers=self.headers) self._sources[index] = ext return ext @@ -377,19 +381,24 @@ class AnimeEpisode: Using the example above, this function will return: [('no_extractor', 'https://twist.moe/anime/...')] as it prioritizes preferred language over preferred server """ + if self._parent and self._parent.subbed is not None: + version = "subbed" if self._parent.subbed else "dubbed" + else: + version = self.config.get('version', 'subbed') - version = self.config.get('version', 'subbed') # TODO add a flag for this servers = self.config.get('servers', ['']) logger.debug('Data : {}'.format(data)) # Sorts the dicts by preferred server in config - sorted_by_server = sorted(data, key=lambda x: servers.index(x['server']) if x['server'] in servers else len(data)) + sorted_by_server = sorted(data, key=lambda x: servers.index( + x['server']) if x['server'] in servers else len(data)) # Sorts the above by preferred language # resulting in a list with the dicts sorted by language and server # with language being prioritized over server - sorted_by_lang = list(sorted(sorted_by_server, key=lambda x: x['version'] == version, reverse=True)) + sorted_by_lang = list( + sorted(sorted_by_server, key=lambda x: x['version'] == version, reverse=True)) logger.debug('Sorted sources : {}'.format(sorted_by_lang)) return '' if not sorted_by_lang else [(sorted_by_lang[0]['extractor'], sorted_by_lang[0]['url'])] From f95ffecdaa11b51c4e5c3d7ab38ab0b4cf6a1ada Mon Sep 17 00:00:00 2001 From: czoins Date: Tue, 16 Feb 2021 23:55:37 +0100 Subject: [PATCH 018/130] Disable results table and enable full search by default --- anime_downloader/commands/test.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/anime_downloader/commands/test.py b/anime_downloader/commands/test.py index fca0d92..3d3dbbf 100644 --- a/anime_downloader/commands/test.py +++ b/anime_downloader/commands/test.py @@ -57,8 +57,8 @@ class SiteThread(threading.Thread): @click.command() @click.argument('anime', default='naruto') @click.option( - '-f', '--full-search', is_flag=True, - help='Don\'t ask to stop searching on anime match.') + '-f', '--prompt-found', is_flag=True, + help='Ask to stop searching on anime match.') @click.option( '-p', '--providers', help='Limit search to specific provider(s) separated by a comma.' @@ -84,15 +84,15 @@ class SiteThread(threading.Thread): help='Disable fuzzy search to include possible inaccurate results.' ) @click.option( - '-d', '--no-results', is_flag=True, - help='Disable echoing the search results at the end of testing.' + '-r', '--print-results', is_flag=True, + help='Enable echoing the search results at the end of testing.' ) @click.option( '-t', '--timeout', type=int, default=10, help='How long to wait for a site to respond. (default: 10s)' ) -def command(anime, full_search, providers, exclude, selenium, verify, v_tries, no_fuzzy, no_results, timeout): +def command(anime, prompt_found, providers, exclude, selenium, verify, v_tries, no_fuzzy, print_results, timeout): """Test all sites to see which ones are working and which ones aren't. Test naruto as a default. Return results for each provider.""" util.print_info(__version__) @@ -150,9 +150,10 @@ def command(anime, full_search, providers, exclude, selenium, verify, v_tries, n if ratio > 50: matches.append([thread.provider, thread.search_result, ratio]) click.secho(p + "Works, anime found.", fg="green") - if not full_search: - click.echo(f"\n- - -{thread.provider}- - -\n\n{thread.search_result}") - confirm = click.confirm(f"Found anime in {thread.provider}. Keep seaching? (use -f / --full-search to disable this prompt)", default=True) + if prompt_found: + if print_results: + click.echo(f"\n- - -{thread.provider}- - -\n\n{thread.search_result}") + confirm = click.confirm(f"Found anime in {thread.provider}. Keep seaching?", default=True) if not confirm: break else: @@ -175,6 +176,8 @@ def command(anime, full_search, providers, exclude, selenium, verify, v_tries, n if not skip: break - if not no_results: + if print_results: click.echo("\n" + util.format_matches(matches)) + else: + click.echo("\n" + "Test finished.") \ No newline at end of file From 2a4c03705915c1881394fcaf8a9b803d881eea0a Mon Sep 17 00:00:00 2001 From: czoins Date: Thu, 18 Feb 2021 08:02:02 +0100 Subject: [PATCH 019/130] Remove selenium related code --- anime_downloader/commands/test.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/anime_downloader/commands/test.py b/anime_downloader/commands/test.py index 3d3dbbf..06e1775 100644 --- a/anime_downloader/commands/test.py +++ b/anime_downloader/commands/test.py @@ -67,10 +67,6 @@ class SiteThread(threading.Thread): '-e', '--exclude', help='Provider(s) to exclude separated by a comma.' ) -@click.option( - '-s', '--selenium', is_flag=True, - help='Enable providers using selenium.' -) @click.option( '-v', '--verify', is_flag=True, help='Verify extraction of stream url in case of anime match.' @@ -92,7 +88,7 @@ class SiteThread(threading.Thread): help='How long to wait for a site to respond. (default: 10s)' ) -def command(anime, prompt_found, providers, exclude, selenium, verify, v_tries, no_fuzzy, print_results, timeout): +def command(anime, prompt_found, providers, exclude, verify, v_tries, no_fuzzy, print_results, timeout): """Test all sites to see which ones are working and which ones aren't. Test naruto as a default. Return results for each provider.""" util.print_info(__version__) @@ -106,8 +102,6 @@ def command(anime, prompt_found, providers, exclude, selenium, verify, v_tries, raise click.BadParameter(f"{p}. Choose from {', '.join(sitenames)}") else: providers = sitenames - if not selenium: - providers.remove("kisscartoon") if exclude: exclude = [e.strip() for e in exclude.split(",")] From b5c104f6a5aad89a05949e7a8da3120632a30512 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 25 Feb 2021 16:16:57 +0200 Subject: [PATCH 020/130] fixed streamtape (#637) --- anime_downloader/extractors/streamtape.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/anime_downloader/extractors/streamtape.py b/anime_downloader/extractors/streamtape.py index 7a419b1..ba5424b 100644 --- a/anime_downloader/extractors/streamtape.py +++ b/anime_downloader/extractors/streamtape.py @@ -7,9 +7,12 @@ import re class StreamTape(BaseExtractor): def _get_data(self): resp = helpers.get(self.url, cache=False).text - url = "https:" + \ - re.search( - "document\.getElementById\([\"']videolink[\"']\);.*?innerHTML.*?=.*?[\"'](.*?)[\"']", resp).group(1) + groups = re.search( + r"document\.getElementById\(.*?\)\.innerHTML = [\"'](.*?)[\"'] \+ [\"'](.*?)[\"']", + resp + ) + url = "https:" + groups[1] + groups[2] + return { 'stream_url': url, From 99991bce477106a3a4ee5a24fc93e1fcadc1d355 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Tue, 9 Mar 2021 19:35:03 +0200 Subject: [PATCH 021/130] properly embedded the image --- docs/usage/installation.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/usage/installation.rst b/docs/usage/installation.rst index a58b0fa..6d25f46 100644 --- a/docs/usage/installation.rst +++ b/docs/usage/installation.rst @@ -139,7 +139,8 @@ The following steps install Anime-Downloader; - Delete the highlighted line as to match the image below; -:image: https://i.imgur.com/0fRiNP6.png +.. image:: https://i.imgur.com/0fRiNP6.png + :width: 250 - Press ctrl+o then enter then press ctrl+X. From 3c1b852e102aa97d8995c92a28f0b265a43947e2 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 11:18:05 +0200 Subject: [PATCH 022/130] added a poster image to the search results --- anime_downloader/sites/twistmoe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/anime_downloader/sites/twistmoe.py b/anime_downloader/sites/twistmoe.py index 6e65ace..8e53205 100644 --- a/anime_downloader/sites/twistmoe.py +++ b/anime_downloader/sites/twistmoe.py @@ -55,6 +55,7 @@ class TwistMoe(Anime, sitename='twist.moe'): animes.append(SearchResult( title=anime['title'], url='https://twist.moe/a/' + anime['slug']['slug'] + '/', + poster=f"https://media.kitsu.io/anime/poster_images/{anime['hb_id']}/large.jpg" )) animes = [ani[0] for ani in process.extract(query, animes)] return animes From 240ea6cf07415deae9e1d9ab53cec3a17caa35e9 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 13:08:06 +0200 Subject: [PATCH 023/130] fix --- anime_downloader/sites/_4anime.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 81afb47..7236254 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -1,5 +1,6 @@ import logging import re +from anime_downloader.util import eval_in_node from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers from anime_downloader.const import HEADERS @@ -49,12 +50,10 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): 'user-agent': HEADERS[self.hash_url(self.url, len(HEADERS))]} resp = helpers.get(self.url, headers=self.headers) + text = eval_in_node(re.search(r"(eval\(function\(p,a,c,k,e,d\).*source.*\))", resp.text).group(1).replace('eval', 'console.log')) # E.g. document.write( ' Download' ); - stream_url = helpers.soupify( - re.search("({text}"))).group(1) - # Otherwise we end up with "url" and barring that, url\ - stream_url = re.search('"(.*?)\\\\"', stream_url).group(1) return [('no_extractor', stream_url)] """ From a738afd94d7f06041f7ed2dd11a384bdcff43371 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:33:02 +0200 Subject: [PATCH 024/130] Create unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 168 +++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 anime_downloader/sites/helpers/unpacker.py diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py new file mode 100644 index 0000000..98d5c72 --- /dev/null +++ b/anime_downloader/sites/helpers/unpacker.py @@ -0,0 +1,168 @@ +# +# Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier +# by Einar Lielmanis +# +# written by Stefano Sanfilippo +# +# usage: +# +# if detect(some_string): +# unpacked = unpack(some_string) +# + +"""Unpacker for Dean Edward's p.a.c.k.e.r""" + +import re +import string +import sys + + +class UnpackingError(Exception): + """Badly packed source or general error. Argument is a + meaningful description.""" + + pass + + +PRIORITY = 1 + + +def detect(source): + global beginstr + global endstr + beginstr = "" + endstr = "" + begin_offset = -1 + """Detects whether `source` is P.A.C.K.E.R. coded.""" + mystr = re.search( + "eval[ ]*\([ ]*function[ ]*\([ ]*p[ ]*,[ ]*a[ ]*,[ ]*c[" + " ]*,[ ]*k[ ]*,[ ]*e[ ]*,[ ]*", + source, + ) + if mystr: + begin_offset = mystr.start() + beginstr = source[:begin_offset] + if begin_offset != -1: + """ Find endstr""" + source_end = source[begin_offset:] + if source_end.split("')))", 1)[0] == source_end: + try: + endstr = source_end.split("}))", 1)[1] + except IndexError: + endstr = "" + else: + endstr = source_end.split("')))", 1)[1] + return mystr is not None + + +def unpack(source): + """Unpacks P.A.C.K.E.R. packed js code.""" + payload, symtab, radix, count = _filterargs(source) + + if count != len(symtab): + raise UnpackingError("Malformed p.a.c.k.e.r. symtab.") + + try: + unbase = Unbaser(radix) + except TypeError: + raise UnpackingError("Unknown p.a.c.k.e.r. encoding.") + + def lookup(match): + """Look up symbols in the synthetic symtab.""" + word = match.group(0) + return symtab[unbase(word)] or word + + payload = payload.replace("\\\\", "\\").replace("\\'", "'") + if sys.version_info.major == 2: + source = re.sub(r"\b\w+\b", lookup, payload) + else: + source = re.sub(r"\b\w+\b", lookup, payload, flags=re.ASCII) + return _replacestrings(source) + + +def _filterargs(source): + """Juice from a source file the four args needed by decoder.""" + juicers = [ + (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"), + (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"), + ] + for juicer in juicers: + args = re.search(juicer, source, re.DOTALL) + if args: + a = args.groups() + if a[1] == "[]": + a = list(a) + a[1] = 62 + a = tuple(a) + try: + return a[0], a[3].split("|"), int(a[1]), int(a[2]) + except ValueError: + raise UnpackingError("Corrupted p.a.c.k.e.r. data.") + + # could not find a satisfying regex + raise UnpackingError( + "Could not make sense of p.a.c.k.e.r data (unexpected code structure)" + ) + + +def _replacestrings(source): + global beginstr + global endstr + """Strip string lookup table (list) and replace values in source.""" + match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL) + + if match: + varname, strings = match.groups() + startpoint = len(match.group(0)) + lookup = strings.split('","') + variable = "%s[%%d]" % varname + for index, value in enumerate(lookup): + source = source.replace(variable % index, '"%s"' % value) + return source[startpoint:] + return beginstr + source + endstr + + +class Unbaser(object): + """Functor for a given base. Will efficiently convert + strings to natural numbers.""" + + ALPHABET = { + 62: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + 95: ( + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + ), + } + + def __init__(self, base): + self.base = base + + # fill elements 37...61, if necessary + if 36 < base < 62: + if not hasattr(self.ALPHABET, self.ALPHABET[62][:base]): + self.ALPHABET[base] = self.ALPHABET[62][:base] + # attrs = self.ALPHABET + # print ', '.join("%s: %s" % item for item in attrs.items()) + # If base can be handled by int() builtin, let it do it for us + if 2 <= base <= 36: + self.unbase = lambda string: int(string, base) + else: + # Build conversion dictionary cache + try: + self.dictionary = dict( + (cipher, index) for index, cipher in enumerate(self.ALPHABET[base]) + ) + except KeyError: + raise TypeError("Unsupported base encoding.") + + self.unbase = self._dictunbaser + + def __call__(self, string): + return self.unbase(string) + + def _dictunbaser(self, string): + """Decodes a value to an integer.""" + ret = 0 + for index, cipher in enumerate(string[::-1]): + ret += (self.base ** index) * self.dictionary[cipher] + return ret From 3349209af5ca2e5088dd5d8a93ad6e4a4a2f760d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:34:03 +0200 Subject: [PATCH 025/130] Update unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py index 98d5c72..d710c51 100644 --- a/anime_downloader/sites/helpers/unpacker.py +++ b/anime_downloader/sites/helpers/unpacker.py @@ -1,4 +1,6 @@ # +# taken from https://github.com/beautify-web/js-beautify/blob/main/python/jsbeautifier/unpackers/packer.py +# # Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier # by Einar Lielmanis # From cb4c21875bd001736c03afb8dd314cf4e9f3d7db Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:34:20 +0200 Subject: [PATCH 026/130] Update __init__.py --- anime_downloader/sites/helpers/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/anime_downloader/sites/helpers/__init__.py b/anime_downloader/sites/helpers/__init__.py index b3f7a99..401b212 100644 --- a/anime_downloader/sites/helpers/__init__.py +++ b/anime_downloader/sites/helpers/__init__.py @@ -1,2 +1,3 @@ from anime_downloader.sites.helpers.request import * from anime_downloader.sites.helpers.util import not_working +from anime_downloader.sites.helpers.unpacker import detect, unpack From b217a8d3868c6bbda0c791a0aad87a86c0723893 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:34:39 +0200 Subject: [PATCH 027/130] Update __init__.py --- anime_downloader/sites/helpers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/helpers/__init__.py b/anime_downloader/sites/helpers/__init__.py index 401b212..2ed8a9c 100644 --- a/anime_downloader/sites/helpers/__init__.py +++ b/anime_downloader/sites/helpers/__init__.py @@ -1,3 +1,3 @@ from anime_downloader.sites.helpers.request import * from anime_downloader.sites.helpers.util import not_working -from anime_downloader.sites.helpers.unpacker import detect, unpack +from anime_downloader.sites.helpers.unpacker import detect_packed_js, unpack_packed_js From cbaf97e837b0a4dbec1a66b7a6abe307ea4c4041 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:35:37 +0200 Subject: [PATCH 028/130] Update unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py index d710c51..8bed2b6 100644 --- a/anime_downloader/sites/helpers/unpacker.py +++ b/anime_downloader/sites/helpers/unpacker.py @@ -8,7 +8,8 @@ # # usage: # -# if detect(some_string): +# if +(some_string): # unpacked = unpack(some_string) # @@ -29,7 +30,7 @@ class UnpackingError(Exception): PRIORITY = 1 -def detect(source): +def detect_packed_js(source): global beginstr global endstr beginstr = "" @@ -57,7 +58,7 @@ def detect(source): return mystr is not None -def unpack(source): +def unpack_packed_js(source): """Unpacks P.A.C.K.E.R. packed js code.""" payload, symtab, radix, count = _filterargs(source) From 20a57117ea32c8d1827e01187efddcc8801fe8da Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:37:15 +0200 Subject: [PATCH 029/130] Update _4anime.py --- anime_downloader/sites/_4anime.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 7236254..7353767 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -1,6 +1,5 @@ import logging import re -from anime_downloader.util import eval_in_node from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers from anime_downloader.const import HEADERS @@ -50,7 +49,11 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): 'user-agent': HEADERS[self.hash_url(self.url, len(HEADERS))]} resp = helpers.get(self.url, headers=self.headers) - text = eval_in_node(re.search(r"(eval\(function\(p,a,c,k,e,d\).*source.*\))", resp.text).group(1).replace('eval', 'console.log')) + text = re.search(r"(eval\(function\(p,a,c,k,e,d\).*source.*\))", resp.text).group(1) + if helpers.detect_packed_js(text): + text = helpers.unpack_packed_js(text) + else: + return [] # E.g. document.write( ' Download' ); stream_url = re.search(r"{text}"))).group(1) From c75e4719946cd059e11e71bd88c8ac5fd97b046d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Thu, 11 Mar 2021 15:38:54 +0200 Subject: [PATCH 030/130] Update unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py index 8bed2b6..86b3318 100644 --- a/anime_downloader/sites/helpers/unpacker.py +++ b/anime_downloader/sites/helpers/unpacker.py @@ -8,9 +8,8 @@ # # usage: # -# if -(some_string): -# unpacked = unpack(some_string) +# if detect_packed_js(some_string): +# unpacked = unpack_packed_js(some_string) # """Unpacker for Dean Edward's p.a.c.k.e.r""" From f273f36cf5450c678a51b32917942cc64d57f1e0 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:40:37 +0200 Subject: [PATCH 031/130] Update unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 174 +-------------------- 1 file changed, 7 insertions(+), 167 deletions(-) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py index 86b3318..179f39b 100644 --- a/anime_downloader/sites/helpers/unpacker.py +++ b/anime_downloader/sites/helpers/unpacker.py @@ -1,170 +1,10 @@ -# -# taken from https://github.com/beautify-web/js-beautify/blob/main/python/jsbeautifier/unpackers/packer.py -# -# Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier -# by Einar Lielmanis -# -# written by Stefano Sanfilippo -# -# usage: -# -# if detect_packed_js(some_string): -# unpacked = unpack_packed_js(some_string) -# +from jsbeautifier.unpackers import javascriptobfuscator, myobfuscate, packer -"""Unpacker for Dean Edward's p.a.c.k.e.r""" - -import re -import string -import sys +UNPACKERS = [javascriptobfuscator, myobfuscate, packer] -class UnpackingError(Exception): - """Badly packed source or general error. Argument is a - meaningful description.""" - - pass - - -PRIORITY = 1 - - -def detect_packed_js(source): - global beginstr - global endstr - beginstr = "" - endstr = "" - begin_offset = -1 - """Detects whether `source` is P.A.C.K.E.R. coded.""" - mystr = re.search( - "eval[ ]*\([ ]*function[ ]*\([ ]*p[ ]*,[ ]*a[ ]*,[ ]*c[" - " ]*,[ ]*k[ ]*,[ ]*e[ ]*,[ ]*", - source, - ) - if mystr: - begin_offset = mystr.start() - beginstr = source[:begin_offset] - if begin_offset != -1: - """ Find endstr""" - source_end = source[begin_offset:] - if source_end.split("')))", 1)[0] == source_end: - try: - endstr = source_end.split("}))", 1)[1] - except IndexError: - endstr = "" - else: - endstr = source_end.split("')))", 1)[1] - return mystr is not None - - -def unpack_packed_js(source): - """Unpacks P.A.C.K.E.R. packed js code.""" - payload, symtab, radix, count = _filterargs(source) - - if count != len(symtab): - raise UnpackingError("Malformed p.a.c.k.e.r. symtab.") - - try: - unbase = Unbaser(radix) - except TypeError: - raise UnpackingError("Unknown p.a.c.k.e.r. encoding.") - - def lookup(match): - """Look up symbols in the synthetic symtab.""" - word = match.group(0) - return symtab[unbase(word)] or word - - payload = payload.replace("\\\\", "\\").replace("\\'", "'") - if sys.version_info.major == 2: - source = re.sub(r"\b\w+\b", lookup, payload) - else: - source = re.sub(r"\b\w+\b", lookup, payload, flags=re.ASCII) - return _replacestrings(source) - - -def _filterargs(source): - """Juice from a source file the four args needed by decoder.""" - juicers = [ - (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"), - (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"), - ] - for juicer in juicers: - args = re.search(juicer, source, re.DOTALL) - if args: - a = args.groups() - if a[1] == "[]": - a = list(a) - a[1] = 62 - a = tuple(a) - try: - return a[0], a[3].split("|"), int(a[1]), int(a[2]) - except ValueError: - raise UnpackingError("Corrupted p.a.c.k.e.r. data.") - - # could not find a satisfying regex - raise UnpackingError( - "Could not make sense of p.a.c.k.e.r data (unexpected code structure)" - ) - - -def _replacestrings(source): - global beginstr - global endstr - """Strip string lookup table (list) and replace values in source.""" - match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL) - - if match: - varname, strings = match.groups() - startpoint = len(match.group(0)) - lookup = strings.split('","') - variable = "%s[%%d]" % varname - for index, value in enumerate(lookup): - source = source.replace(variable % index, '"%s"' % value) - return source[startpoint:] - return beginstr + source + endstr - - -class Unbaser(object): - """Functor for a given base. Will efficiently convert - strings to natural numbers.""" - - ALPHABET = { - 62: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", - 95: ( - " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" - ), - } - - def __init__(self, base): - self.base = base - - # fill elements 37...61, if necessary - if 36 < base < 62: - if not hasattr(self.ALPHABET, self.ALPHABET[62][:base]): - self.ALPHABET[base] = self.ALPHABET[62][:base] - # attrs = self.ALPHABET - # print ', '.join("%s: %s" % item for item in attrs.items()) - # If base can be handled by int() builtin, let it do it for us - if 2 <= base <= 36: - self.unbase = lambda string: int(string, base) - else: - # Build conversion dictionary cache - try: - self.dictionary = dict( - (cipher, index) for index, cipher in enumerate(self.ALPHABET[base]) - ) - except KeyError: - raise TypeError("Unsupported base encoding.") - - self.unbase = self._dictunbaser - - def __call__(self, string): - return self.unbase(string) - - def _dictunbaser(self, string): - """Decodes a value to an integer.""" - ret = 0 - for index, cipher in enumerate(string[::-1]): - ret += (self.base ** index) * self.dictionary[cipher] - return ret +def unpack(js): + for unpacker in UNPACKERS: + if unpacker.detect(js): + return unpacker.unpack(js) + return js From c77647c82e3345223ed8b37f03631fd4bd35f410 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:41:13 +0200 Subject: [PATCH 032/130] Update __init__.py --- anime_downloader/sites/helpers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/helpers/__init__.py b/anime_downloader/sites/helpers/__init__.py index 2ed8a9c..ca93725 100644 --- a/anime_downloader/sites/helpers/__init__.py +++ b/anime_downloader/sites/helpers/__init__.py @@ -1,3 +1,3 @@ from anime_downloader.sites.helpers.request import * from anime_downloader.sites.helpers.util import not_working -from anime_downloader.sites.helpers.unpacker import detect_packed_js, unpack_packed_js +from anime_downloader.sites.helpers.unpacker import deobfuscate_packed_js From 51e3047bf4779bad7a8bd9b93b0fa68e4d022f05 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:41:27 +0200 Subject: [PATCH 033/130] Update unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py index 179f39b..13f614c 100644 --- a/anime_downloader/sites/helpers/unpacker.py +++ b/anime_downloader/sites/helpers/unpacker.py @@ -3,7 +3,7 @@ from jsbeautifier.unpackers import javascriptobfuscator, myobfuscate, packer UNPACKERS = [javascriptobfuscator, myobfuscate, packer] -def unpack(js): +def deobfuscate_packed_js(js): for unpacker in UNPACKERS: if unpacker.detect(js): return unpacker.unpack(js) From fbb57c94a24de553a7c39a5df252c86cf56e0100 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:42:08 +0200 Subject: [PATCH 034/130] Update setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a087e7c..6cd1dc1 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ setup( 'requests-cache>=0.4.13', 'tabulate>=0.8.3', 'pycryptodome>=3.8.2', + 'jsbeautifier' ], extras_require={ 'selescrape': ['selenium'], From ccf44444f4ca3c8599129287d8993288b9eb1349 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:43:15 +0200 Subject: [PATCH 035/130] Update _4anime.py --- anime_downloader/sites/_4anime.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 7353767..2fa1aea 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -50,10 +50,8 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): resp = helpers.get(self.url, headers=self.headers) text = re.search(r"(eval\(function\(p,a,c,k,e,d\).*source.*\))", resp.text).group(1) - if helpers.detect_packed_js(text): - text = helpers.unpack_packed_js(text) - else: - return [] + text = helpers.deobfuscate_packed_js(text) + # E.g. document.write( ' Download' ); stream_url = re.search(r"{text}"))).group(1) From 0ce02738bec717ccaac77e8b218eb14aa2f16b06 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:53:48 +0200 Subject: [PATCH 036/130] Update _4anime.py --- anime_downloader/sites/_4anime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 2fa1aea..4d96cf2 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -53,7 +53,7 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): text = helpers.deobfuscate_packed_js(text) # E.g. document.write( ' Download' ); - stream_url = re.search(r"{text}"))).group(1) + stream_url = re.search(r"src=\\\\\"(.*)\\\\\" type", str(helpers.soupify(f""))).group(1) return [('no_extractor', stream_url)] From 403f492803a85c3cef26620e0e8960c13b4e6e72 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 15 Mar 2021 00:55:01 +0200 Subject: [PATCH 037/130] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6cd1dc1..42a98d1 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ setup( 'requests-cache>=0.4.13', 'tabulate>=0.8.3', 'pycryptodome>=3.8.2', - 'jsbeautifier' + 'jsbeautifier==1.11.0' ], extras_require={ 'selescrape': ['selenium'], From 5ef30a8e180a891736f54b6cda511a1a0a2dfdd5 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:04:09 +0000 Subject: [PATCH 038/130] Animefreak: Do not use cache when getting sources for anime episodes --- anime_downloader/sites/animefreak.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/animefreak.py b/anime_downloader/sites/animefreak.py index 939b23d..caca5e7 100644 --- a/anime_downloader/sites/animefreak.py +++ b/anime_downloader/sites/animefreak.py @@ -32,7 +32,8 @@ class AnimeFreak(Anime, sitename='animefreak'): episodes = [a.get('href') for a in episode_links][::-1] # Get links ending with episode-.*, e.g. episode-74 - episode_numbers = [int(re.search("episode-(\d+)", x.split("/")[-1]).group(1)) for x in episodes if re.search("episode-\d+", x.split("/")[-1])] + episode_numbers = [int(re.search("episode-(\d+)", x.split("/")[-1]).group(1)) + for x in episodes if re.search("episode-\d+", x.split("/")[-1])] # Ensure that the number of episode numbers which have been extracted match the number of episodes if len(episodes) == len(episode_numbers) and len(episode_numbers) == len(set(episode_numbers)): @@ -47,7 +48,7 @@ class AnimeFreak(Anime, sitename='animefreak'): class AnimeFreakEpisode(AnimeEpisode, sitename='animefreak'): def _get_sources(self): - page = helpers.get(self.url).text + page = helpers.get(self.url, cache=False).text source_re = re.compile(r'loadVideo.+file: "([^"]+)', re.DOTALL) match = source_re.findall(page) From 2c7c37bc12e08cb1d29691907b1b7cc21470f41c Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sun, 21 Mar 2021 20:20:31 +0000 Subject: [PATCH 039/130] Update ryuanime --- anime_downloader/config.py | 7 ++++- anime_downloader/extractors/vidstream.py | 15 ++++++++-- anime_downloader/sites/ryuanime.py | 35 ++++++++++++++---------- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/anime_downloader/config.py b/anime_downloader/config.py index 0b60707..141bfd6 100644 --- a/anime_downloader/config.py +++ b/anime_downloader/config.py @@ -117,7 +117,12 @@ DEFAULT_CONFIG = { }, 'ryuanime': { 'version': 'subbed', - 'server': 'trollvid', + 'servers': [ + 'vidstream', + 'mp4upload', + 'xstreamcdn', + 'trollvid' + ] }, 'animekisa': { 'server': 'gcloud', diff --git a/anime_downloader/extractors/vidstream.py b/anime_downloader/extractors/vidstream.py index 1faa216..aa79701 100644 --- a/anime_downloader/extractors/vidstream.py +++ b/anime_downloader/extractors/vidstream.py @@ -28,7 +28,8 @@ class VidStream(BaseExtractor): } url = self.url.replace('https:////', 'https://') - url = url.replace('https://gogo-stream.com/download', 'https://gogo-stream.com/server.php') + url = url.replace('https://gogo-stream.com/download', + 'https://gogo-stream.com/server.php') soup = helpers.soupify(helpers.get(url)) linkserver = soup.select('li.linkserver') logger.debug('Linkserver: {}'.format(linkserver)) @@ -64,7 +65,11 @@ class VidStream(BaseExtractor): # # # Used to create a download url. - soup_id = soup.select('input#id')[0]['value'] + try: + soup_id = soup.select('input#id')[0]['value'] + except IndexError: + return self._get_link_new(soup) + soup_title = soup.select('input#title')[0]['value'] soup_typesub = soup.select('input#typesub')[0].get('value', 'SUB') @@ -103,6 +108,11 @@ class VidStream(BaseExtractor): return {'stream_url': ''} + def _get_link_new(self, soup): + link_buttons = soup.select('div.mirror_link')[ + 0].select('div.dowload > a[href]') + return {'stream_url': link_buttons[0].get('href')} + class Extractor: """dummy class to prevent changing self""" @@ -110,4 +120,3 @@ class Extractor: def __init__(self, dictionary): for k, v in dictionary.items(): setattr(self, k, v) - diff --git a/anime_downloader/sites/ryuanime.py b/anime_downloader/sites/ryuanime.py index c3089a0..59fdded 100644 --- a/anime_downloader/sites/ryuanime.py +++ b/anime_downloader/sites/ryuanime.py @@ -22,13 +22,16 @@ class RyuAnime(Anime, sitename='ryuanime'): @classmethod def search(cls, query): - soup = helpers.soupify(helpers.get("https://ryuanime.com/browse-anime", params={"search": query})) - result_data = soup.select("li.list-inline-item:has(p.anime-name):has(a.ani-link)") + soup = helpers.soupify(helpers.get( + "https://ryuanime.com/browse-anime", params={"search": query})) + result_data = soup.select( + "li.list-inline-item:has(p.anime-name):has(a.ani-link)") search_results = [ SearchResult( title=result.select("p.anime-name")[0].text, - url='https://ryuanime.com' + result.select("a.ani-link")[0].get("href") + url='https://ryuanime.com' + + result.select("a.ani-link")[0].get("href") ) for result in result_data ] @@ -36,7 +39,8 @@ class RyuAnime(Anime, sitename='ryuanime'): def _scrape_episodes(self): soup = helpers.soupify(helpers.get(self.url)) - episodes = ['https://ryuanime.com' + x.get("href") for x in soup.select("li.jt-di > a")] + episodes = ['https://ryuanime.com' + + x.get("href") for x in soup.select("li.jt-di > a")] if len(episodes) == 0: logger.warning("No episodes found") @@ -49,17 +53,16 @@ class RyuAnime(Anime, sitename='ryuanime'): class RyuAnimeEpisode(AnimeEpisode, sitename='ryuanime'): - def getLink(self, name, _id): - if name == "trollvid": - return "https://trollvid.net/embed/" + _id - elif name == "mp4upload": - return f"https://mp4upload.com/embed-{_id}.html" - elif name == "xstreamcdn": - return f"https://xstreamcdn.com/v/" + _id - def _get_sources(self): page = helpers.get(self.url).text + server_links = { + 'trollvid': 'https://trollvid.net/embed/{}', + 'mp4upload': 'https://mp4upload.com/embed-{}.html', + 'xstreamcdn': 'https://xstreamcdn.com/v/{}', + 'vidstreaming': 'https://vidstreaming.io/download?id={}' + } + # Example: """ [ @@ -69,16 +72,20 @@ class RyuAnimeEpisode(AnimeEpisode, sitename='ryuanime'): } ] """ - hosts = json.loads(re.search(r"let.*?episode.*?videos.*?(\[\{.*?\}\])", page).group(1)) + hosts = json.loads( + re.search(r"let.*?episode.*?videos.*?(\[\{.*?\}\])", page).group(1)) sources_list = [] for host in hosts: name = host.get("host") _id = host.get("id") - link = self.getLink(name, _id) + link = server_links[name].format(_id) if link: + if name == 'vidstreaming': + name = 'vidstream' + sources_list.append({ "extractor": name, "url": link, From eb08bea3c6a4f0a15717bf709180ed69f55b30c1 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Fri, 19 Feb 2021 00:47:41 +0000 Subject: [PATCH 040/130] Add quality selection to animeout --- anime_downloader/sites/animeout.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/animeout.py b/anime_downloader/sites/animeout.py index 4d3209a..d8aad28 100644 --- a/anime_downloader/sites/animeout.py +++ b/anime_downloader/sites/animeout.py @@ -13,7 +13,8 @@ class AnimeOut(Anime, sitename='animeout'): @classmethod def search(cls, query): - search_results = helpers.soupify(helpers.get(cls.url, params={'s': query})).select('h3.post-title > a') + search_results = helpers.soupify(helpers.get( + cls.url, params={'s': query})).select('h3.post-title > a') # Removes the unneded metadata from the title # Used by MAL matcher clean_title_regex = r'\(.*?\)' @@ -31,7 +32,19 @@ class AnimeOut(Anime, sitename='animeout'): # Only uses the direct download links for consistency. soup = helpers.soupify(helpers.get(self.url)) elements = soup.select('article.post a') - return [i.get('href') for i in elements if 'Direct Download' in i.text] + episodes = [i.get('href') + for i in elements if 'Direct Download' in i.text] + + filters = [self.quality, "1080p", "720p"] + quality_filtered = [] + + for i in range(3): + if not quality_filtered: + quality_filtered = [x for x in episodes if filters[i] in x] + else: + break + + return episodes if not quality_filtered else quality_filtered def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) From c097942aa162492b20f4e0345d6d5498c2a5b092 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sun, 21 Mar 2021 20:28:59 +0000 Subject: [PATCH 041/130] Update for loop condtion --- anime_downloader/sites/animeout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/animeout.py b/anime_downloader/sites/animeout.py index d8aad28..04825e9 100644 --- a/anime_downloader/sites/animeout.py +++ b/anime_downloader/sites/animeout.py @@ -38,9 +38,9 @@ class AnimeOut(Anime, sitename='animeout'): filters = [self.quality, "1080p", "720p"] quality_filtered = [] - for i in range(3): + for _filter in filters: if not quality_filtered: - quality_filtered = [x for x in episodes if filters[i] in x] + quality_filtered = [x for x in episodes if _filter in x] else: break From 5acab0b641e28876e6742c01e43195a667247718 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:49:59 +0000 Subject: [PATCH 042/130] Remove fastani: Site is gone --- README.md | 1 - anime_downloader/sites/fastani.py | 80 ------------------------------- anime_downloader/sites/init.py | 1 - 3 files changed, 82 deletions(-) delete mode 100644 anime_downloader/sites/fastani.py diff --git a/README.md b/README.md index 8227d14..50aae9b 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,6 @@ Yeah. Me too! That's why this tool exists. - Dbanimes - EraiRaws - EgyAnime - usually m3u8 (good for streaming, not so much for downloading) -- FastAni - GenoAnime - GurminderBoparai (AnimeChameleon) - itsaturday diff --git a/anime_downloader/sites/fastani.py b/anime_downloader/sites/fastani.py deleted file mode 100644 index dcb013a..0000000 --- a/anime_downloader/sites/fastani.py +++ /dev/null @@ -1,80 +0,0 @@ -from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult -from anime_downloader.sites import helpers -import re -import logging - -logger = logging.getLogger(__name__) - - -class FastAni(Anime, sitename="fastani"): - - sitename = 'fastani' - - @classmethod - def getToken(cls): - resp = helpers.get("https://fastani.net") - site_text = resp.text - cookies = resp.cookies - - # Path to js file, e.g /static/js/main.f450dd1c.chunk.js - which contains the token - js_location = "https://fastani.net" + re.search(r"src=\"(\/static\/js\/main.*?)\"", site_text).group(1) - js = helpers.get(js_location).text - - # Get authorization token, e.g: {authorization:"Bearer h8X2exbErErNSxRnr6sSXAE2ycUSyrbU"} - key, token = re.search("method:\"GET\".*?\"(.*?)\".*?\"(.*?)\"", js).group(1,2) - - return ({key: token}, cookies) - - @classmethod - def search(cls, query): - headers, cookies = cls.getToken() - results = helpers.get(f"https://fastani.net/api/data?page=1&search={query}&tags=&years=", headers=headers, cookies=cookies).json() - - return [ - SearchResult( - title=x.get('title').get("english"), - # Need to know selected anime and original query for _scrape_episodes - url=f"https://fastani.net/{selected}/{query}" - ) - for selected, x in zip(range(len(results["animeData"]["cards"])), results["animeData"]["cards"]) - ] - - def _scrape_episodes(self): - headers, cookies = self.getToken() - split = self.url.split("/") - query, selected = split[-1], int(split[-2]) - anime = helpers.get(f"https://fastani.net/api/data?page=1&search={query}&tags=&years=", headers=headers, cookies=cookies).json() - - cdnData = anime["animeData"]["cards"][selected]["cdnData"] - - # Get all episodes from all seasons of the anime - # JSON Example: - """ - { - 'seasons': [{ - 'episodes': [{ - 'file': 'https://private.fastani.net/Naruto/Season 1/Naruto S01E001.mp4', - 'directory': 'https://private.fastani.net/Naruto/Season 1', - 'timestamp': '2020-09-11T16:22:48.744Z', - 'thumb': 'https://private.fastani.net/Naruto/Season 1/thumbs/20_thumbnail_001.jpg', - 'title': 'Enter: Naruto Uzumaki!' - } - ... - ] - } - """ - episodes = [j["file"] for i in [x["episodes"] for x in cdnData["seasons"]] for j in i] - - return episodes - - def _scrape_metadata(self): - headers, cookies = self.getToken() - split = self.url.split("/") - query, selected = split[-1], int(split[-2]) - anime = helpers.get(f"https://fastani.net/api/data?page=1&search={query}&tags=&years=", headers=headers, cookies=cookies).json() - self.title = anime["animeData"]["cards"][selected]["title"]["english"] - - -class FastAniEpisode(AnimeEpisode, sitename='fastani'): - def _get_sources(self): - return [("no_extractor", self.url)] diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 1030e95..6577051 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -26,7 +26,6 @@ ALL_ANIME_SITES = [ ('dbanimes', 'dbanimes', 'DBAnimes'), ('erairaws', 'erai-raws', 'EraiRaws'), ('egyanime', 'egyanime', 'EgyAnime'), - ('fastani', 'fastani', 'FastAni'), ('genoanime', 'genoanime', 'GenoAnime'), ('itsaturday', 'itsaturday', 'Itsaturday'), ('justdubs', 'justdubs', 'JustDubs'), From 44f2002aaa43724a6ae24763f03edb7043e88ee8 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 25 Mar 2021 14:00:39 +0000 Subject: [PATCH 043/130] Fix trollvid --- anime_downloader/extractors/trollvid.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/anime_downloader/extractors/trollvid.py b/anime_downloader/extractors/trollvid.py index 6e4f2e3..4eb4144 100644 --- a/anime_downloader/extractors/trollvid.py +++ b/anime_downloader/extractors/trollvid.py @@ -26,16 +26,17 @@ class Trollvid(BaseExtractor): elif token: token = token.group(1) - trollvid_id = self.url.split('/')[-1] # something like: 084df78d215a + # something like: 084df78d215a + trollvid_id = self.url.split('/')[-1] post = helpers.post(f'https://mp4.sh/v/{trollvid_id}', data={'token': token}, referer=self.url, ).json() # {'success':True} on success. - if post.get('success') and post.get('data'): + if post.get('success') and post.get('file'): return { - 'stream_url': post['data'] + 'stream_url': post['file'] } # In case neither methods work. From ac070b312e578de550880fd64a09b15138853c48 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Fri, 26 Mar 2021 11:48:35 -0400 Subject: [PATCH 044/130] remove `c` from aria2c (#656) --- docs/usage/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage/installation.rst b/docs/usage/installation.rst index 6d25f46..0f7df86 100644 --- a/docs/usage/installation.rst +++ b/docs/usage/installation.rst @@ -86,7 +86,7 @@ This does not require a rooted device to work. - Install Aria2c via the following command if using Termux; :: - pkg install aria2c + pkg install aria2 - Install Python via the following command if using Termux; :: From 750966b5f4d3a34ceeb4847afc9179569289655e Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Fri, 26 Mar 2021 21:25:23 +0200 Subject: [PATCH 045/130] Update voiranime.py --- anime_downloader/sites/voiranime.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/anime_downloader/sites/voiranime.py b/anime_downloader/sites/voiranime.py index 4f3bf35..5a431d8 100644 --- a/anime_downloader/sites/voiranime.py +++ b/anime_downloader/sites/voiranime.py @@ -13,7 +13,7 @@ class VoirAnime(Anime, sitename='voiranime'): @classmethod def search(cls, query): - search_results = helpers.soupify(helpers.get(cls.url, params={'s': query})).select('div.item-head > h3 > a') + search_results = helpers.soupify(helpers.get(cls.url, params={'s': query})).select('.post-title > h3 > a') search_results = [ SearchResult( title=i.text, @@ -23,21 +23,27 @@ class VoirAnime(Anime, sitename='voiranime'): return search_results def _scrape_episodes(self): - soup = helpers.soupify(helpers.get(self.url)) - next_page = soup.select('a.ct-btn')[0].get('href') - soup = helpers.soupify(helpers.get(next_page)) - episodes = soup.select('ul.video-series-list > li > a.btn-default') - return [i.get('href') for i in episodes] + html = helpers.get(self.url).text + episodes = list(re.findall(r"
  • \n", html)) + return episodes[::-1] def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) - self.title = soup.select('div.container > h1')[0].text + self.title = soup.select_one('.post-title > h1').text class VoirAnimeEpisode(AnimeEpisode, sitename='voiranime'): def _get_sources(self): + base_url = 'https://voiranime.com/' soup = helpers.soupify(helpers.get(self.url)) + servers = [ + base_url + x['data-redirect'] + for x in soup.select('.host-select > option') + ] """These could probably be condensed down to one, but would look too spooky""" + + # code below doesnt work anymore, since voiranime introduced captcha + multilinks_regex = r'var\s*multilinks\s*=\s*\[\[{(.*?)}]];' mutilinks_iframe_regex = r"iframe\s*src=\\(\"|')([^(\"|')]*)" multilinks = re.search(multilinks_regex, str(soup)).group(1) From e105c197b760ac92423ca29e5764c4cce45dd979 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Fri, 26 Mar 2021 21:25:50 +0200 Subject: [PATCH 046/130] removed voiranim --- anime_downloader/sites/init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 6577051..585d372 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -41,7 +41,7 @@ ALL_ANIME_SITES = [ ('twistmoe', 'twist.moe', 'TwistMoe'), ('tenshimoe', 'tenshi.moe', 'TenshiMoe'), ('vidstream', 'vidstream', 'VidStream'), - ('voiranime', 'voiranime', 'VoirAnime'), + # ('voiranime', 'voiranime', 'VoirAnime'), ('vostfree', 'vostfree', 'VostFree'), ] From b65e2a3ad91bb0eaa98167ce757c7cfe4bb3ed66 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Fri, 26 Mar 2021 21:31:16 +0200 Subject: [PATCH 047/130] changed the dependency to be optional --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 42a98d1..0aef6c6 100644 --- a/setup.py +++ b/setup.py @@ -30,11 +30,11 @@ setup( 'cfscrape>=2.0.5', 'requests-cache>=0.4.13', 'tabulate>=0.8.3', - 'pycryptodome>=3.8.2', - 'jsbeautifier==1.11.0' + 'pycryptodome>=3.8.2' ], extras_require={ 'selescrape': ['selenium'], + 'unpacker': ['jsbeautifier==1.11.0'], 'gui': ['PyQt5>=5.15.1', 'selenium'], 'dev': [ 'pytest', From 3f649db0cb451de62d96c2c05ccefbb7346cc883 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Fri, 26 Mar 2021 21:33:33 +0200 Subject: [PATCH 048/130] Update unpacker.py --- anime_downloader/sites/helpers/unpacker.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py index 13f614c..4b4a29c 100644 --- a/anime_downloader/sites/helpers/unpacker.py +++ b/anime_downloader/sites/helpers/unpacker.py @@ -1,10 +1,11 @@ -from jsbeautifier.unpackers import javascriptobfuscator, myobfuscate, packer - -UNPACKERS = [javascriptobfuscator, myobfuscate, packer] - - -def deobfuscate_packed_js(js): - for unpacker in UNPACKERS: - if unpacker.detect(js): - return unpacker.unpack(js) - return js +try: + from jsbeautifier.unpackers import javascriptobfuscator, myobfuscate, packer + UNPACKERS = [javascriptobfuscator, myobfuscate, packer] + def deobfuscate_packed_js(js): + for unpacker in UNPACKERS: + if unpacker.detect(js): + return unpacker.unpack(js) + return js +except ImportError: + def deobfuscate_packed_js(js): + return js From 7ab0bdb9ba3ccce12d86b89832dc5742a3ef4e52 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Fri, 26 Mar 2021 19:43:32 +0000 Subject: [PATCH 049/130] Add unpacker to README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8227d14..9105e80 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Yeah. Me too! That's why this tool exists. ## Supported Sites **Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** -- 4Anime +- 4Anime - requires jsbeautifier - AnimeBinge - Animedaisuki - Animeflix @@ -107,6 +107,7 @@ If you have trouble installing, see extended installation instructions [here](ht - You might have to use pip3 depending on your system - To install this project with gui and all its dependencies, add `#egg=anime-downloader[gui]` to the pip command you are using to install it. Example: `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime_downloader[gui]` - To install this project with selescrape (if you are using GUI, ignore this line), do the same as above - but with `#egg=anime-downloader[selescrape]` +- To install this project with jsbeautifier run `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime-downloader[unpacker] ## Usage From 6f96f02d43be6ce9cb41229666df3e190b64be8c Mon Sep 17 00:00:00 2001 From: Red <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Fri, 26 Mar 2021 19:46:22 +0000 Subject: [PATCH 050/130] Add missing droptick --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0157f47..2d75a9c 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ If you have trouble installing, see extended installation instructions [here](ht - You might have to use pip3 depending on your system - To install this project with gui and all its dependencies, add `#egg=anime-downloader[gui]` to the pip command you are using to install it. Example: `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime_downloader[gui]` - To install this project with selescrape (if you are using GUI, ignore this line), do the same as above - but with `#egg=anime-downloader[selescrape]` -- To install this project with jsbeautifier run `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime-downloader[unpacker] +- To install this project with jsbeautifier run `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime-downloader[unpacker]` ## Usage From 376cbc6f139a226503c4965c209e4572fd7b87ab Mon Sep 17 00:00:00 2001 From: Red <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Fri, 26 Mar 2021 19:48:39 +0000 Subject: [PATCH 051/130] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2d75a9c..3c60cc7 100644 --- a/README.md +++ b/README.md @@ -104,9 +104,9 @@ If you have trouble installing, see extended installation instructions [here](ht **Note**: - For Cloudflare scraping either [cfscrape](https://github.com/Anorov/cloudflare-scrape) or [selenium](https://www.selenium.dev/) is used. [Cfscrape](https://github.com/Anorov/cloudflare-scrape) depends on [`node-js`](https://nodejs.org/en/) and [selenium](https://www.selenium.dev/) utilizes an automated invisible instance of a browser (chrome/firefox). So, if you want to use Cloudflare enabled sites, make sure you have [node-js](https://nodejs.org/en/) and a [webdriver](https://www.selenium.dev/selenium/docs/api/py/index.html#drivers) installed. - You might have to use pip3 depending on your system -- To install this project with gui and all its dependencies, add `#egg=anime-downloader[gui]` to the pip command you are using to install it. Example: `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime_downloader[gui]` +- To install this project with gui and all its dependencies, add `#egg=anime-downloader[gui]` to the pip command you are using to install it. Example: `pip install --force-reinstall -U git+https://github.com/anime-dl/anime-downloader#egg=anime_downloader[gui]` - To install this project with selescrape (if you are using GUI, ignore this line), do the same as above - but with `#egg=anime-downloader[selescrape]` -- To install this project with jsbeautifier run `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime-downloader[unpacker]` +- To install this project with jsbeautifier run `pip install --force-reinstall -U git+https://github.com/anime-dl/anime-downloader#egg=anime-downloader[unpacker]` ## Usage From a0e5434bfcc1281f7883f844487ac3fce523d5e6 Mon Sep 17 00:00:00 2001 From: Iggy <44432163+IguanasInPyjamas@users.noreply.github.com> Date: Thu, 1 Apr 2021 10:07:35 +0100 Subject: [PATCH 052/130] Update __version__.py --- anime_downloader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 79b3919..5f25180 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.7' +__version__ = '5.0.9' From 045efcf34f5fe991e941cee6907c53bd688764d2 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 1 Apr 2021 23:13:12 +0100 Subject: [PATCH 053/130] Add Animtime --- README.md | 1 + anime_downloader/extractors/init.py | 6 ++ anime_downloader/extractors/wasabisys.py | 11 ++++ anime_downloader/sites/animtime.py | 73 ++++++++++++++++++++++++ anime_downloader/sites/init.py | 1 + 5 files changed, 92 insertions(+) create mode 100644 anime_downloader/extractors/wasabisys.py create mode 100644 anime_downloader/sites/animtime.py diff --git a/README.md b/README.md index 3c60cc7..17c01d9 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ Yeah. Me too! That's why this tool exists. **Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** - 4Anime - requires jsbeautifier +- AnimTime - AnimeBinge - Animedaisuki - Animeflix diff --git a/anime_downloader/extractors/init.py b/anime_downloader/extractors/init.py index a529519..3625c6d 100644 --- a/anime_downloader/extractors/init.py +++ b/anime_downloader/extractors/init.py @@ -168,6 +168,12 @@ ALL_EXTRACTORS = [ 'modulename': 'streamium', 'regex': 'streamium', 'class': 'Streamium' + }, + { + 'sitename': 'wasabisys', + 'modulename': 'wasabisys', + 'regex': 'wasabisys', + 'class': 'Wasabisys' } ] diff --git a/anime_downloader/extractors/wasabisys.py b/anime_downloader/extractors/wasabisys.py new file mode 100644 index 0000000..d92b538 --- /dev/null +++ b/anime_downloader/extractors/wasabisys.py @@ -0,0 +1,11 @@ +from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites import helpers + + +class Wasabisys(BaseExtractor): + def _get_data(self): + + return { + 'stream_url': self.url, + 'referer': 'https://animtime.com/' + } diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py new file mode 100644 index 0000000..b5814ef --- /dev/null +++ b/anime_downloader/sites/animtime.py @@ -0,0 +1,73 @@ + +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers +from difflib import get_close_matches + +import re +import logging +logger = logging.getLogger(__name__) + +class AnimTime(Anime, sitename='animtime'): + sitename='animtime' + + @classmethod + def get_title_dict(cls, script): + script_text = helpers.get(script).text + title_function = re.search("tm=.*?}", script_text).group() + titles_regexed = re.findall("t\[t\.(.*?)=(\d+)", title_function) + titles = dict([(' '.join(re.sub( r"([A-Z])", r" \1", x[0]).split()), x[1]) for x in titles_regexed]) + + return titles + + @classmethod + def get_script_link(cls): + soup = helpers.soupify(helpers.get('https://animtime.com')) + script = 'https://animtime.com/' + soup.select('script[src*=main]')[0].get('src') + + return script + + @classmethod + def search(cls, query): + titles = cls.get_title_dict(cls.get_script_link()) + matches = get_close_matches(query, titles, cutoff=0.2) + + search_results = [ + SearchResult( + title=match, + url='https://animtime.com/title/{}'.format(titles.get(match)) + ) + for match in matches + ] + + logger.info(search_results) + return search_results + + def _scrape_episodes(self): + link = self.get_script_link() + titles = dict((y, x) for x, y in self.get_title_dict(link).items()) + current_title = titles.get(self.url.split('/')[-1]) + + script_text = helpers.get(link).text + ep_count = int(re.search("zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + + episodes = [] + for i in range(ep_count): + episodes.append(self.url + f'/episode/{i + 1}') + + return episodes + + def _scrape_metadata(self): + titles = dict((y, x) for (x, y) in self.get_title_dict(self.get_script_link()).items()) + self.title = titles.get(self.url.split('/')[-1]) + +class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): + def _get_sources(self): + titles = dict((y, x) for x, y in AnimTime.get_title_dict(AnimTime.get_script_link()).items()) + current_title = titles.get(self.url.split('/')[-3]) + current_ep = "{0:03}".format(int(self.url.split('/')[-1])) + + script_text = helpers.get(AnimTime.get_script_link()).text + regexed_link = re.search('tm\.' + current_title.replace(" ", "") + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) + link = regexed_link.replace('"+t+"', current_ep) + + return [('wasabisys', link)] diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 585d372..054d83b 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -4,6 +4,7 @@ ALL_ANIME_SITES = [ # ('filename', 'sitename', 'classname') ('_4anime', '4anime', 'Anime4'), ('anitube', 'anitube', 'AniTube'), + ('animtime', 'animtime', 'AnimTime'), ('anime8', 'anime8', 'Anime8'), ('animebinge', 'animebinge', 'AnimeBinge'), ('animechameleon', 'gurminder', 'AnimeChameleon'), From c794ef14c232f1cfc4b57cbcfa0b9d234e2319c5 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 1 Apr 2021 23:16:19 +0100 Subject: [PATCH 054/130] autopep8 --- anime_downloader/sites/animtime.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index b5814ef..79a350a 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -7,22 +7,25 @@ import re import logging logger = logging.getLogger(__name__) + class AnimTime(Anime, sitename='animtime'): - sitename='animtime' + sitename = 'animtime' @classmethod def get_title_dict(cls, script): script_text = helpers.get(script).text title_function = re.search("tm=.*?}", script_text).group() titles_regexed = re.findall("t\[t\.(.*?)=(\d+)", title_function) - titles = dict([(' '.join(re.sub( r"([A-Z])", r" \1", x[0]).split()), x[1]) for x in titles_regexed]) + titles = dict([(' '.join(re.sub(r"([A-Z])", r" \1", x[0]).split()), x[1]) + for x in titles_regexed]) return titles @classmethod def get_script_link(cls): soup = helpers.soupify(helpers.get('https://animtime.com')) - script = 'https://animtime.com/' + soup.select('script[src*=main]')[0].get('src') + script = 'https://animtime.com/' + \ + soup.select('script[src*=main]')[0].get('src') return script @@ -35,9 +38,9 @@ class AnimTime(Anime, sitename='animtime'): SearchResult( title=match, url='https://animtime.com/title/{}'.format(titles.get(match)) - ) - for match in matches - ] + ) + for match in matches + ] logger.info(search_results) return search_results @@ -45,10 +48,11 @@ class AnimTime(Anime, sitename='animtime'): def _scrape_episodes(self): link = self.get_script_link() titles = dict((y, x) for x, y in self.get_title_dict(link).items()) - current_title = titles.get(self.url.split('/')[-1]) + current_title = titles.get(self.url.split('/')[-1]) script_text = helpers.get(link).text - ep_count = int(re.search("zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + ep_count = int(re.search( + "zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) episodes = [] for i in range(ep_count): @@ -57,17 +61,21 @@ class AnimTime(Anime, sitename='animtime'): return episodes def _scrape_metadata(self): - titles = dict((y, x) for (x, y) in self.get_title_dict(self.get_script_link()).items()) + titles = dict((y, x) for (x, y) in self.get_title_dict( + self.get_script_link()).items()) self.title = titles.get(self.url.split('/')[-1]) + class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): def _get_sources(self): - titles = dict((y, x) for x, y in AnimTime.get_title_dict(AnimTime.get_script_link()).items()) + titles = dict((y, x) for x, y in AnimTime.get_title_dict( + AnimTime.get_script_link()).items()) current_title = titles.get(self.url.split('/')[-3]) current_ep = "{0:03}".format(int(self.url.split('/')[-1])) script_text = helpers.get(AnimTime.get_script_link()).text - regexed_link = re.search('tm\.' + current_title.replace(" ", "") + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) + regexed_link = re.search('tm\.' + current_title.replace(" ", "") + + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) link = regexed_link.replace('"+t+"', current_ep) return [('wasabisys', link)] From a9b1bf98cfe171bb7afd93744108120ca09b5b91 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 1 Apr 2021 23:17:32 +0100 Subject: [PATCH 055/130] Remove logging --- anime_downloader/sites/animtime.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index 79a350a..af46f24 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -4,8 +4,6 @@ from anime_downloader.sites import helpers from difflib import get_close_matches import re -import logging -logger = logging.getLogger(__name__) class AnimTime(Anime, sitename='animtime'): @@ -42,7 +40,6 @@ class AnimTime(Anime, sitename='animtime'): for match in matches ] - logger.info(search_results) return search_results def _scrape_episodes(self): From 941a20889b43b6d334791735b3ca44be48c7285a Mon Sep 17 00:00:00 2001 From: Gizmofire Date: Fri, 2 Apr 2021 16:50:16 -0700 Subject: [PATCH 056/130] 4anime source change. MIght need some more testing, but it is working. --- anime_downloader/sites/_4anime.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 4d96cf2..dfcf89f 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -49,12 +49,7 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): 'user-agent': HEADERS[self.hash_url(self.url, len(HEADERS))]} resp = helpers.get(self.url, headers=self.headers) - text = re.search(r"(eval\(function\(p,a,c,k,e,d\).*source.*\))", resp.text).group(1) - text = helpers.deobfuscate_packed_js(text) - - # E.g. document.write( ' Download' ); - stream_url = re.search(r"src=\\\\\"(.*)\\\\\" type", str(helpers.soupify(f""))).group(1) - + stream_url = soupify(resp).source['src'] return [('no_extractor', stream_url)] """ From b45e5f0c55ecf8c833a4cd59dc5335c9e8b08b7b Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:49:55 +0100 Subject: [PATCH 057/130] Change soupify to helpers.soupify --- anime_downloader/sites/_4anime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index dfcf89f..adae4b5 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -49,7 +49,7 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): 'user-agent': HEADERS[self.hash_url(self.url, len(HEADERS))]} resp = helpers.get(self.url, headers=self.headers) - stream_url = soupify(resp).source['src'] + stream_url = helpers.soupify(resp).source['src'] return [('no_extractor', stream_url)] """ From 247618f8215ab7d3777ca183b86ba46dd9045bd8 Mon Sep 17 00:00:00 2001 From: Red <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Sat, 3 Apr 2021 19:27:16 +0100 Subject: [PATCH 058/130] Removed "requires jsbeautifier" from 4anime --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c60cc7..69059b7 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Yeah. Me too! That's why this tool exists. ## Supported Sites **Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** -- 4Anime - requires jsbeautifier +- 4Anime - AnimeBinge - Animedaisuki - Animeflix From 35f982f3f2a749dbcfd4ed4ae8b2475dfb4fcb20 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 4 Apr 2021 16:16:04 +0300 Subject: [PATCH 059/130] fixed variable name mistake for ep range --- anime_downloader/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/util.py b/anime_downloader/util.py index de6e364..4db6dad 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -211,7 +211,7 @@ def parse_ep_str(anime, grammar): ep = sorted(anime._episode_urls)[-1] else: ep = [x for x in anime._episode_urls if x[0] - == int(grammar)][0] + == int(episode_grammar)][0] ep_cls = AnimeEpisode.subclasses[anime.sitename] From b1413fb58576c9e76b249418018bf12bee54de8f Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 4 Apr 2021 16:20:42 +0300 Subject: [PATCH 060/130] Update util.py --- anime_downloader/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/util.py b/anime_downloader/util.py index 4db6dad..6f40ba1 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -207,7 +207,7 @@ def parse_ep_str(anime, grammar): else: from anime_downloader.sites.anime import AnimeEpisode - if grammar == '0': + if episode_grammar == '0': ep = sorted(anime._episode_urls)[-1] else: ep = [x for x in anime._episode_urls if x[0] From 8fa2bf2d48a6be2492804cc40f2e0f078a31fc1d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 3 May 2021 18:41:16 +0300 Subject: [PATCH 061/130] added uget --- anime_downloader/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/anime_downloader/util.py b/anime_downloader/util.py index 6f40ba1..a4604fa 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -305,7 +305,8 @@ def format_command(cmd, episode, file_format, speed_limit, path): '--check-certificate=false --user-agent={useragent} --max-overall-download-limit={speed_limit} ' '--console-log-level={log_level}', '{idm}': 'idman.exe /n /d {stream_url} /p {download_dir} /f {file_format}.mp4', - '{wget}': 'wget {stream_url} --referer={referer} --user-agent={useragent} -O {download_dir}/{file_format}.mp4 -c' + '{wget}': 'wget {stream_url} --referer={referer} --user-agent={useragent} -O {download_dir}/{file_format}.mp4 -c', + '{uget}': 'uget --http-referer={referer} --http-user-agent={useragent} --folder={download_dir} --filename={file_format}.mp4 {stream_url}' } # Allows for passing the user agent with self.headers in the site. From f9e2e8ce2c5ed1da0ce4f55b13785a55c64674bb Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 3 May 2021 20:16:18 +0300 Subject: [PATCH 062/130] improved the search results --- anime_downloader/sites/animtime.py | 108 ++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index af46f24..5908aed 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -6,36 +6,79 @@ from difflib import get_close_matches import re +def format_title_case(text): + """ + Will format text to title case and in will have roman numbers in capital case + only I is supported so only up to III, any number bigger than that will keep its original capitalization case + """ + words = text.split() + new_text = [] + + for word in words: + if word.lower().replace('i', '') == '': + new_text += ['I' * len(word)] + continue + + elif word.lower() == 'dub': + new_text += ['(Dub)'] + continue + + new_text += [word.title()] + + return ' '.join(new_text) + + +def get_title_dict(script): + """ + Returns a tuple with two dictionaries + the 1st one has the anime slugs with their pretty title + and the 2nd one has the anime slugs with their ids + """ + script_text = helpers.get(script).text + title_function = re.search("tm=.*?}", script_text).group() + titles_dict = { + x[0]: format_title_case(x[1].replace('-', ' ')) + for x in re.findall(r"qd\[tm\.(.*?)\]=.*?\".*?/animtime/(.*?)/", script_text) + } + id_dict = { + x[0]: x[1] + for x in re.findall(r"t\[t\.(.*?)=(\d+)", title_function) + } + + for title in id_dict: + """ + For any anime that are not matched in the pretty titles dictionary (titles_dict) + + for example Bleach (with the id of 1 is not in titles_dict) + """ + if title not in titles_dict: + titles_dict[title] = ' '.join( + re.sub(r"([A-Z])", r" \1", title).split()) + + return titles_dict, id_dict + + +def get_script_link(): + soup = helpers.soupify(helpers.get('https://animtime.com')) + script = 'https://animtime.com/' + \ + soup.select('script[src*=main]')[0].get('src') + + return script + + class AnimTime(Anime, sitename='animtime'): sitename = 'animtime' - @classmethod - def get_title_dict(cls, script): - script_text = helpers.get(script).text - title_function = re.search("tm=.*?}", script_text).group() - titles_regexed = re.findall("t\[t\.(.*?)=(\d+)", title_function) - titles = dict([(' '.join(re.sub(r"([A-Z])", r" \1", x[0]).split()), x[1]) - for x in titles_regexed]) - - return titles - - @classmethod - def get_script_link(cls): - soup = helpers.soupify(helpers.get('https://animtime.com')) - script = 'https://animtime.com/' + \ - soup.select('script[src*=main]')[0].get('src') - - return script - @classmethod def search(cls, query): - titles = cls.get_title_dict(cls.get_script_link()) - matches = get_close_matches(query, titles, cutoff=0.2) + titles = get_title_dict(get_script_link()) + matches = get_close_matches(query, titles[0], cutoff=0.2) search_results = [ SearchResult( - title=match, - url='https://animtime.com/title/{}'.format(titles.get(match)) + title=titles[0].get(match), + url='https://animtime.com/title/{}'.format( + titles[1].get(match)) ) for match in matches ] @@ -43,13 +86,13 @@ class AnimTime(Anime, sitename='animtime'): return search_results def _scrape_episodes(self): - link = self.get_script_link() - titles = dict((y, x) for x, y in self.get_title_dict(link).items()) + link = get_script_link() + titles = dict((y, x) for x, y in get_title_dict(link)[1].items()) current_title = titles.get(self.url.split('/')[-1]) script_text = helpers.get(link).text ep_count = int(re.search( - "zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + r"\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) episodes = [] for i in range(ep_count): @@ -58,19 +101,20 @@ class AnimTime(Anime, sitename='animtime'): return episodes def _scrape_metadata(self): - titles = dict((y, x) for (x, y) in self.get_title_dict( - self.get_script_link()).items()) - self.title = titles.get(self.url.split('/')[-1]) + titles = get_title_dict(get_script_link())[1] + self.title = next(x for x, y in titles.items() + if int(y) == int(self.url.split('/')[-1])) class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): def _get_sources(self): - titles = dict((y, x) for x, y in AnimTime.get_title_dict( - AnimTime.get_script_link()).items()) - current_title = titles.get(self.url.split('/')[-3]) + titles = get_title_dict(get_script_link())[1] + + current_title = next(x for x, y in titles.items() + if int(y) == int(self.url.split('/')[-3])) current_ep = "{0:03}".format(int(self.url.split('/')[-1])) - script_text = helpers.get(AnimTime.get_script_link()).text + script_text = helpers.get(get_script_link()).text regexed_link = re.search('tm\.' + current_title.replace(" ", "") + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) link = regexed_link.replace('"+t+"', current_ep) From 0e106d66e8c8c7331dd7aeb8ac56b6609c8822f0 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 3 May 2021 20:17:26 +0300 Subject: [PATCH 063/130] Update animtime.py --- anime_downloader/sites/animtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index 5908aed..cdaa7b7 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -8,7 +8,7 @@ import re def format_title_case(text): """ - Will format text to title case and in will have roman numbers in capital case + Will format text to title case and it will have roman numbers in capital case only I is supported so only up to III, any number bigger than that will keep its original capitalization case """ words = text.split() From cb566e9ec5cb2f732db57e9845ed07cff69c8cd9 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sat, 8 May 2021 21:22:29 +0300 Subject: [PATCH 064/130] fixed animesuge --- anime_downloader/sites/animesuge.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/anime_downloader/sites/animesuge.py b/anime_downloader/sites/animesuge.py index 22ebe81..c438b1a 100644 --- a/anime_downloader/sites/animesuge.py +++ b/anime_downloader/sites/animesuge.py @@ -5,12 +5,14 @@ from anime_downloader.sites import helpers import re import json + class AnimeSuge(Anime, sitename="animesuge"): sitename = "animesuge" @classmethod def search(cls, query): - soup = helpers.soupify(helpers.get("https://animesuge.io/ajax/anime/search", params={"keyword": query}).json()['html']) + soup = helpers.soupify(helpers.get( + "https://animesuge.io/ajax/anime/search", params={"keyword": query}).json()['html']) search_results = [ SearchResult( @@ -27,8 +29,9 @@ class AnimeSuge(Anime, sitename="animesuge"): _id = re.search(r".*-(.*)", self.url).group(1) soup = helpers.soupify(helpers.get(ep_url, params={'id': _id})) - - return ['https://animesuge.io' + x.get('href') for x in soup.select('a:not(.more)')] + eps = ['https://animesuge.io' + re.search(r"(/anime.*?/ep-\d+)", x.get( + 'href')).group(1).replace('\\', '') for x in soup.select('a:not(.more)')] + return eps def _scrape_metadata(self): self.title = helpers.soupify(helpers.get(self.url)).find("h1").text @@ -37,13 +40,17 @@ class AnimeSuge(Anime, sitename="animesuge"): class AnimeSugeEpisode(NineAnimeEpisode, sitename='animesuge'): def _get_sources(self): # Get id and ep no. from url, e.g: https://animesuge.io/anime/naruto-xx8z/ep-190 -> xx8z, 190 - _id, ep_no = re.search(r".*\/anime\/.*-(.*?)\/.*-(\d+)$", self.url).group(1, 2) + _id, ep_no = re.search( + r".*\/anime\/.*-(.*?)\/.*-(\d+)$", self.url).group(1, 2) # Get sources json from html, e.g: """ 190""" + # data_sources = json.loads( data_sources = json.loads(helpers.soupify(helpers.get("https://animesuge.io/ajax/anime/servers", - params={"id": _id, "episode": ep_no})).select(f"a[data-base='{ep_no}']")[0].get("data-sources")) + params={"id": _id, "episode": ep_no}).json()['html']).select(f"a[data-base='{ep_no}']")[0].get("data-sources")) + + # # Only includes supported # Unsupported ones {'28': 'openstream'} @@ -60,14 +67,18 @@ class AnimeSugeEpisode(NineAnimeEpisode, sitename='animesuge'): params={"id": _id}).json()['url'] break # Makes it more consistent. - except HTTPError: + except requests.HTTPError: time.sleep(5) continue server = id_source_map[key] + link = self.decodeString(link) + + if 'mp4upload.com/embed' in link: + link = re.search(r"(https://.*?\.html)", link).group(1) sources_list.append({ 'extractor': server, - 'url': self.decodeString(link), + 'url': link, 'server': server, # This may not be true, can't see the info on page. 'version': 'subbed' From cbac6bc5938f87e9371b0a88365b75eae1269239 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sat, 8 May 2021 23:33:02 +0300 Subject: [PATCH 065/130] fix linux --- anime_downloader/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/anime_downloader/util.py b/anime_downloader/util.py index a4604fa..71342c2 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -306,7 +306,7 @@ def format_command(cmd, episode, file_format, speed_limit, path): '--console-log-level={log_level}', '{idm}': 'idman.exe /n /d {stream_url} /p {download_dir} /f {file_format}.mp4', '{wget}': 'wget {stream_url} --referer={referer} --user-agent={useragent} -O {download_dir}/{file_format}.mp4 -c', - '{uget}': 'uget --http-referer={referer} --http-user-agent={useragent} --folder={download_dir} --filename={file_format}.mp4 {stream_url}' + '{uget}': '/CMD/ --http-referer={referer} --http-user-agent={useragent} --folder={download_dir} --filename={file_format}.mp4 {stream_url}' } # Allows for passing the user agent with self.headers in the site. @@ -343,6 +343,9 @@ def format_command(cmd, episode, file_format, speed_limit, path): if cmd == "{idm}": rep_dict['file_format'] = rep_dict['file_format'].replace('/', '\\') + if cmd == '{uget}': + cmd_dict['{uget}'] = cmd_dict['{uget}'].replace('/CMD/', 'uget-gtk' if check_in_path('uget-gtk') else 'uget') + if cmd in cmd_dict: cmd = cmd_dict[cmd] From a7f53d23a278424ab58cd52e53ae3f5ff76b6684 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 10 May 2021 14:42:41 +0300 Subject: [PATCH 066/130] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 579273a..d50b5a7 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Yeah. Me too! That's why this tool exists. - animeout - Animerush - Animesimple +- Animesuge - requires Node.js - Animevibe - AnimeTake - AniTube From b92d91dd7164c5a3d4110c35dfb59e3740ff52bd Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 10 May 2021 14:44:13 +0300 Subject: [PATCH 067/130] Changed: Animesuge -> AnimeSuge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d50b5a7..ef45059 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Yeah. Me too! That's why this tool exists. - animeout - Animerush - Animesimple -- Animesuge - requires Node.js +- AnimeSuge - requires Node.js - Animevibe - AnimeTake - AniTube From 1d7d85f97e19e7af5da7a45d9f8c764aaf4e16ef Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Mon, 10 May 2021 21:19:45 +0100 Subject: [PATCH 068/130] Add 540p as a viable quality --- anime_downloader/commands/dl.py | 2 +- anime_downloader/sites/anime.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/anime_downloader/commands/dl.py b/anime_downloader/commands/dl.py index cbb79d5..fd15e4a 100644 --- a/anime_downloader/commands/dl.py +++ b/anime_downloader/commands/dl.py @@ -33,7 +33,7 @@ sitenames = [v[1] for v in ALL_ANIME_SITES] '--download-dir', metavar='PATH', help="Specify the directory to download to") @click.option( - '--quality', '-q', type=click.Choice(['360p', '480p', '720p', '1080p']), + '--quality', '-q', type=click.Choice(['360p', '480p', '540p', '720p', '1080p']), help='Specify the quality of episode. Default-720p') @click.option( '--fallback-qualities', '-fq', cls=util.ClickListOption, diff --git a/anime_downloader/sites/anime.py b/anime_downloader/sites/anime.py index 34cdbbf..faa604c 100644 --- a/anime_downloader/sites/anime.py +++ b/anime_downloader/sites/anime.py @@ -23,7 +23,7 @@ class Anime: ---------- url: string URL of the anime. - quality: One of ['360p', '480p', '720p', '1080p'] + quality: One of ['360p', '480p', '540p', '720p', '1080p'] Quality of episodes fallback_qualities: list The order of fallback. @@ -44,7 +44,7 @@ class Anime: meta = dict() subclasses = {} subbed = None - QUALITIES = ['360p', '480p', '720p', '1080p'] + QUALITIES = ['360p', '480p', '540p', '720p', '1080p'] @classmethod def search(cls, query): @@ -253,7 +253,7 @@ class AnimeEpisode: ---------- url: string URL of the episode. - quality: One of ['360p', '480p', '720p', '1080p'] + quality: One of ['360p', '480p', '540p', '720p', '1080p'] Quality of episode fallback_qualities: list The order of fallback. From f3aae408cab0e547ba21f5365fe367de2876cce6 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Sun, 16 May 2021 01:00:12 -0400 Subject: [PATCH 069/130] Kwik Fixes --- anime_downloader/extractors/kwik.py | 130 ++++++++++++++++++++-------- anime_downloader/sites/animepahe.py | 2 +- anime_downloader/sites/init.py | 1 + 3 files changed, 98 insertions(+), 35 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 37bdaf0..0e157e1 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -1,10 +1,15 @@ import logging +from platform import node import re +import subprocess import requests +import tempfile from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites.helpers.request import temp_dir from anime_downloader.sites import helpers from anime_downloader import util +from anime_downloader.util import eval_in_node from subprocess import CalledProcessError logger = logging.getLogger(__name__) @@ -18,55 +23,112 @@ class Kwik(BaseExtractor): ''' def _get_data(self): + ld = logger.debug # Kwik servers don't have direct link access you need to be referred # from somewhere, I will just use the url itself. We then # have to rebuild the url. Hopefully kwik doesn't block this too # Necessary - self.url = self.url.replace(".cx/e/", ".cx/f/") - self.headers.update({"referer": self.url}) + #ld(self.url) + #self.url = self.url.replace(".cx/e/", ".cx/f/") + #self.headers.update({"referer": self.url}) - cookies = util.get_hcaptcha_cookies(self.url) + headers = {"Referer": "https://kwik.cx/"} - if not cookies: - resp = util.bypass_hcaptcha(self.url) - else: - resp = requests.get(self.url, cookies=cookies) + + + res = requests.get(self.url, headers=headers) - title_re = re.compile(r'title>(.*)<') + #ld(res.text) - kwik_text = resp.text - deobfuscated = None + evalText = helpers.soupify(res.text) - loops = 0 - while not deobfuscated and loops < 6: - try: - deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) - except (AttributeError, CalledProcessError) as e: - if type(e) == AttributeError: - resp = util.bypass_hcaptcha(self.url) - kwik_text = resp.text + scripts = evalText.select("script") - if type(e) == CalledProcessError: - resp = requests.get(self.url, cookies=cookies) - finally: - cookies = resp.cookies - title = title_re.search(kwik_text).group(1) - loops += 1 + for i in scripts: + rexd = re.compile("", "") + break - post_url = deobfuscated.form["action"] - token = deobfuscated.input["value"] + tf = tempfile.mktemp(dir=temp_dir) - resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) - stream_url = resp.headers["Location"] + with open(tf, 'w', encoding="utf-8") as f: + f.write(rexd) + + #print(tf) - logger.debug('Stream URL: %s' % stream_url) + #ld(nodeRes) + + nodeRes = str(subprocess.getoutput(f"node {tf}")) + + ld(nodeRes) + + stream_url = re.search(r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") + #reg = re.compile("[\s\S]*") + + ld(stream_url) + + #kwik_text = resp.text + + #title_re = re.compile(r'title>(.*)<') + #title = title_re.search(kwik_text).group(1) return { 'stream_url': stream_url, - 'meta': { - 'title': title, - 'thumbnail': '' - }, - 'referer': None +# 'meta': { +# 'title': title, +# 'thumbnail': '' +# }, + 'referer': "https://kwik.cx/" } + + + + + #cookies = util.get_hcaptcha_cookies(self.url) + + #if not cookies: + # resp = util.bypass_hcaptcha(self.url) + #else: + # resp = requests.get(self.url, cookies=cookies) + + + + # + #deobfuscated = None + + #loops = 0 + #while not deobfuscated and loops < 6: + # try: + # deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) + # except (AttributeError, CalledProcessError) as e: + # if type(e) == AttributeError: + # resp = util.bypass_hcaptcha(self.url) + # kwik_text = resp.text + + # if type(e) == CalledProcessError: + # resp = requests.get(self.url, cookies=cookies) + # finally: + # cookies = resp.cookies + # + # loops += 1 + + #post_url = deobfuscated.form["action"] + #token = deobfuscated.input["value"] + + #resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) + #stream_url = resp.headers["Location"] + + #logger.debug('Stream URL: %s' % stream_url) + + #return { + # 'stream_url': stream_url, + # 'meta': { + # 'title': title, + # 'thumbnail': '' + # }, + # 'referer': None + #} diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 97ddb6b..9f09cb0 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -74,7 +74,7 @@ class AnimePahe(Anime, sitename='animepahe'): for search_result in search_results['data']: search_result_info = SearchResult( title=search_result['title'], - url=cls.base_anime_url + search_result['slug'], + url=cls.base_anime_url + search_result['session'], poster=search_result['poster'] ) diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 054d83b..0e8d2c8 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -18,6 +18,7 @@ ALL_ANIME_SITES = [ ('animetake','animetake','AnimeTake'), ('animeonline','animeonline360','AnimeOnline'), ('animeout', 'animeout', 'AnimeOut'), + ('animepahe', 'animepahe', 'AnimePahe'), ('animerush', 'animerush', 'AnimeRush'), ('animesimple', 'animesimple', 'AnimeSimple'), ('animesuge', 'animesuge', 'AnimeSuge'), From fd7599e8629beff0c304e04aad58b39e865b08b7 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Sun, 16 May 2021 01:01:30 -0400 Subject: [PATCH 070/130] autopep8 --- anime_downloader/extractors/kwik.py | 44 +++++++++++++---------------- anime_downloader/sites/animepahe.py | 6 ++-- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 0e157e1..54c2180 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -29,17 +29,15 @@ class Kwik(BaseExtractor): # have to rebuild the url. Hopefully kwik doesn't block this too # Necessary - #ld(self.url) + # ld(self.url) #self.url = self.url.replace(".cx/e/", ".cx/f/") #self.headers.update({"referer": self.url}) headers = {"Referer": "https://kwik.cx/"} - - res = requests.get(self.url, headers=headers) - #ld(res.text) + # ld(res.text) evalText = helpers.soupify(res.text) @@ -57,18 +55,19 @@ class Kwik(BaseExtractor): with open(tf, 'w', encoding="utf-8") as f: f.write(rexd) - - #print(tf) - #ld(nodeRes) - + # print(tf) + + # ld(nodeRes) + nodeRes = str(subprocess.getoutput(f"node {tf}")) ld(nodeRes) - stream_url = re.search(r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") + stream_url = re.search( + r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") #reg = re.compile("[\s\S]*") - + ld(stream_url) #kwik_text = resp.text @@ -78,30 +77,25 @@ class Kwik(BaseExtractor): return { 'stream_url': stream_url, -# 'meta': { -# 'title': title, -# 'thumbnail': '' -# }, + # 'meta': { + # 'title': title, + # 'thumbnail': '' + # }, 'referer': "https://kwik.cx/" } - - - #cookies = util.get_hcaptcha_cookies(self.url) - #if not cookies: + # if not cookies: # resp = util.bypass_hcaptcha(self.url) - #else: + # else: # resp = requests.get(self.url, cookies=cookies) - - # #deobfuscated = None #loops = 0 - #while not deobfuscated and loops < 6: + # while not deobfuscated and loops < 6: # try: # deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) # except (AttributeError, CalledProcessError) as e: @@ -113,7 +107,7 @@ class Kwik(BaseExtractor): # resp = requests.get(self.url, cookies=cookies) # finally: # cookies = resp.cookies - # + # # loops += 1 #post_url = deobfuscated.form["action"] @@ -124,11 +118,11 @@ class Kwik(BaseExtractor): #logger.debug('Stream URL: %s' % stream_url) - #return { + # return { # 'stream_url': stream_url, # 'meta': { # 'title': title, # 'thumbnail': '' # }, # 'referer': None - #} + # } diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 9f09cb0..8db5992 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -21,7 +21,8 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): 'session': session_id } - episode_data = helpers.get('https://animepahe.com/api', params=params).json() + episode_data = helpers.get( + 'https://animepahe.com/api', params=params).json() episode_data = episode_data['data'] sources = {} @@ -39,7 +40,8 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): sources = [] server_list = re.findall(r'data-provider="([^"]+)', source_text) - episode_id, session_id = re.search("getUrls\((\d+?), \"(.*)?\"", source_text).groups() + episode_id, session_id = re.search( + "getUrls\((\d+?), \"(.*)?\"", source_text).groups() for server in server_list: if server not in supported_servers: From 52d768e6c2365091e66d4d008313aac5301b44f8 Mon Sep 17 00:00:00 2001 From: nate-moo <40650681+nate-moo@users.noreply.github.com> Date: Sun, 16 May 2021 01:04:01 -0400 Subject: [PATCH 071/130] removing commented out code --- anime_downloader/extractors/kwik.py | 63 ----------------------------- 1 file changed, 63 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 54c2180..3cb93f9 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -29,16 +29,11 @@ class Kwik(BaseExtractor): # have to rebuild the url. Hopefully kwik doesn't block this too # Necessary - # ld(self.url) - #self.url = self.url.replace(".cx/e/", ".cx/f/") - #self.headers.update({"referer": self.url}) headers = {"Referer": "https://kwik.cx/"} res = requests.get(self.url, headers=headers) - # ld(res.text) - evalText = helpers.soupify(res.text) scripts = evalText.select("script") @@ -55,74 +50,16 @@ class Kwik(BaseExtractor): with open(tf, 'w', encoding="utf-8") as f: f.write(rexd) - - # print(tf) - - # ld(nodeRes) - nodeRes = str(subprocess.getoutput(f"node {tf}")) ld(nodeRes) stream_url = re.search( r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") - #reg = re.compile("[\s\S]*") ld(stream_url) - #kwik_text = resp.text - - #title_re = re.compile(r'title>(.*)<') - #title = title_re.search(kwik_text).group(1) - return { 'stream_url': stream_url, - # 'meta': { - # 'title': title, - # 'thumbnail': '' - # }, 'referer': "https://kwik.cx/" } - - #cookies = util.get_hcaptcha_cookies(self.url) - - # if not cookies: - # resp = util.bypass_hcaptcha(self.url) - # else: - # resp = requests.get(self.url, cookies=cookies) - - # - #deobfuscated = None - - #loops = 0 - # while not deobfuscated and loops < 6: - # try: - # deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) - # except (AttributeError, CalledProcessError) as e: - # if type(e) == AttributeError: - # resp = util.bypass_hcaptcha(self.url) - # kwik_text = resp.text - - # if type(e) == CalledProcessError: - # resp = requests.get(self.url, cookies=cookies) - # finally: - # cookies = resp.cookies - # - # loops += 1 - - #post_url = deobfuscated.form["action"] - #token = deobfuscated.input["value"] - - #resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) - #stream_url = resp.headers["Location"] - - #logger.debug('Stream URL: %s' % stream_url) - - # return { - # 'stream_url': stream_url, - # 'meta': { - # 'title': title, - # 'thumbnail': '' - # }, - # 'referer': None - # } From 3ec4e54765f65a554ff112fcc37e833d6f3caab9 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 16 May 2021 18:10:16 +0300 Subject: [PATCH 072/130] fix animesimple --- anime_downloader/sites/animesimple.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/animesimple.py b/anime_downloader/sites/animesimple.py index 9939fc4..eb32063 100644 --- a/anime_downloader/sites/animesimple.py +++ b/anime_downloader/sites/animesimple.py @@ -20,7 +20,7 @@ class AnimeSimple(Anime, sitename='animesimple'): return [ SearchResult( title=i.get('title') if i.get('title') else i.select('img')[0].get('alt'), - url=i.get('href')) + url=("https:" if i.get('href')[0] == '/' else "") + i.get('href')) for i in search_results ] @@ -34,7 +34,7 @@ class AnimeSimple(Anime, sitename='animesimple'): 'top': 10000, # max 10 000 episodes 'bottom': 0, })) - return [i.get('href') for i in elements] + return [("https:" if i.get('href')[0] == '/' else "") + i.get('href') for i in elements] def _scrape_metadata(self): self.title = helpers.soupify(helpers.get(self.url)).select('li.breadcrumb-item.active')[0].text From 4710e0fddf68cb8450d55bd0b5dfdf7952759233 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 16 May 2021 20:14:34 +0300 Subject: [PATCH 073/130] optimized animepahe I completely changed the way the episodes are scraped. But as a downside only the kwik server is used. --- anime_downloader/sites/animepahe.py | 179 ++++++++++++---------------- 1 file changed, 74 insertions(+), 105 deletions(-) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 8db5992..ea73981 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -8,59 +8,9 @@ from anime_downloader.sites import helpers logger = logging.getLogger(__name__) -class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): - QUALITIES = ['360p', '480p', '720p', '1080p'] - - def _get_source(self, episode_id, server, session_id): - # We will extract the episodes data through the animepahe api - # which returns the available qualities and the episode sources. - params = { - 'id': episode_id, - 'm': 'embed', - 'p': server, - 'session': session_id - } - - episode_data = helpers.get( - 'https://animepahe.com/api', params=params).json() - episode_data = episode_data['data'] - sources = {} - - for info in range(len(episode_data)): - quality = list(episode_data[info].keys())[0] - sources[f'{quality}p'] = episode_data[info][quality]['kwik'] - - if self.quality in sources: - return (server, sources[self.quality]) - return - - def _get_sources(self): - supported_servers = ['kwik', 'mp4upload', 'rapidvideo'] - source_text = helpers.get(self.url, cf=True).text - sources = [] - - server_list = re.findall(r'data-provider="([^"]+)', source_text) - episode_id, session_id = re.search( - "getUrls\((\d+?), \"(.*)?\"", source_text).groups() - - for server in server_list: - if server not in supported_servers: - continue - source = self._get_source(episode_id, server, session_id) - if source: - sources.append(source) - - if sources: - return sources - raise NotFoundError - - class AnimePahe(Anime, sitename='animepahe'): sitename = 'animepahe' api_url = 'https://animepahe.com/api' - base_anime_url = 'https://animepahe.com/anime/' - QUALITIES = ['360p', '480p', '720p', '1080p'] - _episodeClass = AnimePaheEpisode @classmethod def search(cls, query): @@ -71,68 +21,87 @@ class AnimePahe(Anime, sitename='animepahe'): } search_results = helpers.get(cls.api_url, params=params).json() - results = [] + if search_results['total'] == []: + return [] - for search_result in search_results['data']: - search_result_info = SearchResult( - title=search_result['title'], - url=cls.base_anime_url + search_result['session'], - poster=search_result['poster'] + return [ + SearchResult( + title=result['title'] + " (" + result['type'] + ")", + url="https://animepahe.com/anime/" + result['session'] + "/" + str(result['id']), # noqa + poster=result['poster'] ) + for result in search_results['data'] + ] - logger.debug(search_result_info) - results.append(search_result_info) + def _scrape_episodes(self): + attr = self.url.split('/') + session = attr[-2] + id_ = attr[-1] + page = 1 + headers = {'referer': 'https://animepahe.com/'} - return results + apiUri = self.api_url + '?m=release&id=' + id_ + '&sort=episode_asc&page=' + jsonResponse = helpers.get(apiUri + str(page), headers=headers).json() + lastPage = jsonResponse['last_page'] + perPage = jsonResponse['per_page'] + total = jsonResponse['total'] + ep = 1 + episodes = [] - def get_data(self): - page = helpers.get(self.url, cf=True).text - anime_id = re.search(r'&id=(\d+)', page).group(1) - - self.params = { - 'm': 'release', - 'id': anime_id, - 'sort': 'episode_asc', - 'page': 1 - } - - json_resp = helpers.get(self.api_url, params=self.params).json() - self._scrape_metadata(page) - self._episode_urls = self._scrape_episodes(json_resp) - self._len = len(self._episode_urls) - return self._episode_urls - - def _collect_episodes(self, ani_json, episodes=[]): - # Avoid changing original list - episodes = episodes[:] - - # If episodes is not an empty list we ensure that we start off - # from the length of the episodes list to get correct episode - # numbers - for no, anime_ep in enumerate(ani_json, len(episodes)): - episodes.append((no + 1, f'{self.url}/{anime_ep["id"]}',)) - - return episodes - - def _scrape_episodes(self, ani_json): - episodes = self._collect_episodes(ani_json['data']) - - if not episodes: - raise NotFoundError(f'No episodes found for {self.url}') + if (lastPage == 1 and perPage > total): + for epi in jsonResponse['data']: + episodes.append( + f'{self.api_url}?m=links&id={epi["anime_id"]}&session={epi["session"]}&p=kwik!!TRUE!!') else: - # Check if other pages exist since animepahe only loads - # first page and make subsequent calls to the api for every - # page - start_page = ani_json['current_page'] + 1 - end_page = ani_json['last_page'] + 1 - - for i in range(start_page, end_page): - self.params['page'] = i - resp = helpers.get(self.api_url, params=self.params).json() - - episodes = self._collect_episodes(resp['data'], episodes) - + stop = False + for page in range(lastPage): + if stop: + break + for i in range(perPage): + if ep <= total: + episodes.append( + f'{self.api_url}?m=release&id={id_}&sort=episode_asc&page={page+1}&ep={ep}!!FALSE!!') + ep += 1 + else: + stop = True + break return episodes def _scrape_metadata(self, data): self.title = re.search(r'

    ([^<]+)', data).group(1) + + +class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): + def _get_sources(self): + if '!!TRUE!!' in self.url: + self.url = self.url.replace('!!TRUE!!', '') + else: + headers = {'referer': 'https://animepahe.com/'} + regex = r"\&ep\=(\d+)\!\!FALSE\!\!" + episodeNum = int(re.findall(regex, self.url)[0]) + self.url = re.sub(regex, '', self.url) + jsonResponse = helpers.get(self.url, headers=headers).json() + + ep = None + for episode in jsonResponse['data']: + if int(episode['episode']) == episodeNum: + ep = episode + if ep: + self.url = 'https://animepahe.com/api?m=links&id=' + str(ep['anime_id']) + '&session=' + ep['session'] + '&p=kwik' # noqa + else: + raise NotFoundError + + episode_data = helpers.get(self.url, cf=True).json() + + episode_data = episode_data['data'] + sources = {} + + for info in range(len(episode_data)): + quality = list(episode_data[info].keys())[0] + + sources[('720' if quality == '800' else quality) + 'p'] = episode_data[info][quality]['kwik'] + + return [ + ('kwik', sources[x]) + for x in sources + ] From 6a746ea7386e50c90d0a6272d36cbbe910a4aed0 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Sun, 16 May 2021 20:28:49 +0300 Subject: [PATCH 074/130] added sort sources and fixed metadata --- anime_downloader/sites/animepahe.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index ea73981..fce5721 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -27,7 +27,7 @@ class AnimePahe(Anime, sitename='animepahe'): return [ SearchResult( title=result['title'] + " (" + result['type'] + ")", - url="https://animepahe.com/anime/" + result['session'] + "/" + str(result['id']), # noqa + url="https://animepahe.com/anime/TITLE!" + result['title'] + " (" + result['type'] + ")" + '!TITLE/' + result['session'] + "/" + str(result['id']), # noqa poster=result['poster'] ) for result in search_results['data'] @@ -67,8 +67,8 @@ class AnimePahe(Anime, sitename='animepahe'): break return episodes - def _scrape_metadata(self, data): - self.title = re.search(r'

    ([^<]+)', data).group(1) + def _scrape_metadata(self): + self.title = re.findall(r"TITLE!(.*?)!TITLE", self.url)[0] class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): @@ -94,14 +94,15 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): episode_data = helpers.get(self.url, cf=True).json() episode_data = episode_data['data'] - sources = {} + sources_list = [] for info in range(len(episode_data)): quality = list(episode_data[info].keys())[0] + sources_list.append({ + 'extractor': 'kwik', + 'url': episode_data[info][quality]['kwik'], + 'server': 'kwik', + 'version': 'subbed' + }) - sources[('720' if quality == '800' else quality) + 'p'] = episode_data[info][quality]['kwik'] - - return [ - ('kwik', sources[x]) - for x in sources - ] + return self.sort_sources(sources_list) From 36aa35ea2d81b97022180cccf037f26aef980cd7 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Wed, 19 May 2021 22:53:12 +0300 Subject: [PATCH 075/130] fixed genoanime --- anime_downloader/sites/genoanime.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/anime_downloader/sites/genoanime.py b/anime_downloader/sites/genoanime.py index c7763b8..595f95f 100644 --- a/anime_downloader/sites/genoanime.py +++ b/anime_downloader/sites/genoanime.py @@ -38,4 +38,11 @@ class GenoAnimeEpisode(AnimeEpisode, sitename='genoanime'): def _get_sources(self): soup = helpers.soupify(helpers.get(self.url)) soup = helpers.soupify(helpers.get(soup.iframe.get("src"))) - return [("no_extractor", soup.source.get("src"))] + id_ = re.findall(r"data: {id: [\"'](.*?)[\"']}", str(soup))[0] + + response = helpers.post('https://genoanime.com/player/genovids.php', data={"id": id_}).json() # noqa + + return [ + ("no_extractor", x['src']) + for x in response['url'] + ] From 03c0329aa914ab7f1bd9397a6504a7f372b95b5b Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Wed, 19 May 2021 21:14:20 +0100 Subject: [PATCH 076/130] Genoanime: import re --- anime_downloader/sites/genoanime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/genoanime.py b/anime_downloader/sites/genoanime.py index 595f95f..d4ede49 100644 --- a/anime_downloader/sites/genoanime.py +++ b/anime_downloader/sites/genoanime.py @@ -1,7 +1,7 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers - +import re class GenoAnime(Anime, sitename="genoanime"): sitename = "genoanime" From 57a59567d5eec2bbff13712f9e6441ef5aa407c2 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 13:42:08 +0300 Subject: [PATCH 077/130] Update init.py --- anime_downloader/sites/init.py | 1 + 1 file changed, 1 insertion(+) diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 054d83b..0f22317 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -44,6 +44,7 @@ ALL_ANIME_SITES = [ ('vidstream', 'vidstream', 'VidStream'), # ('voiranime', 'voiranime', 'VoirAnime'), ('vostfree', 'vostfree', 'VostFree'), + ('wcostream', 'wcostream', 'WcoStream'), ] From 7c8974fd5dd4a14e84e83f7ce8061a81903cde5c Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 13:42:39 +0300 Subject: [PATCH 078/130] Create wcostream.py --- anime_downloader/sites/wcostream.py | 68 +++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 anime_downloader/sites/wcostream.py diff --git a/anime_downloader/sites/wcostream.py b/anime_downloader/sites/wcostream.py new file mode 100644 index 0000000..d596b93 --- /dev/null +++ b/anime_downloader/sites/wcostream.py @@ -0,0 +1,68 @@ +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.extractors import get_extractor +from anime_downloader.sites import helpers +import re + + +class WcoStream(Anime, sitename='wcostream'): + + sitename = 'wcostream' + + @classmethod + def search(cls, query): + soup = helpers.soupify(helpers.get( + 'https://wcostream.cc/search', + params={'keyword': query} + )) + results = soup.select('.film_list-wrap > .flw-item') + + return [ + SearchResult( + title=x.find('img')['alt'], + url=x.find('a')['href'], + meta={'year': x.select_one('.fd-infor > .fdi-item').text.strip()}, + meta_info={ + 'version_key_dubbed': '(Dub)' + } + ) + for x in results + ] + + def _scrape_episodes(self): + soup = helpers.soupify(helpers.get(self.url)) + episodes = soup.select_one('#content-episodes').select('ul.nav > li.nav-item') # noqa + return [ + x.find('a')['href'] + for x in episodes + if 'javascript' not in str(x) + ] + + def _scrape_metadata(self): + soup = helpers.soupify(helpers.get(self.url)) + self.title = soup.select_one( + 'meta[property="og:title"]' + )['content'].split('Episode')[0].strip() + + +class WcoStreamEpisode(AnimeEpisode, sitename='wcostream'): + def _get_sources(self): + soup = helpers.soupify(helpers.get(self.url)) + servers = soup.select("#servers-list > ul > li") + servers = [ + { + "name": server.find('span').text.strip(), + "link": server.find('a')['data-embed'] + } + for server in servers + ] + sources = [] + + for server in servers: + ext = get_extractor('wcostream')( + server['link'], + quality=self.quality, + headers={} + ) + sources.extend([('no_extractor', x['stream_url']) for x in ext._get_data()]) # noqa + + return sources From 2add87ff62db26725cb1f9225e9a0e9e09a65231 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 13:43:15 +0300 Subject: [PATCH 079/130] Update init.py --- anime_downloader/extractors/init.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/anime_downloader/extractors/init.py b/anime_downloader/extractors/init.py index 3625c6d..f4ecbc3 100644 --- a/anime_downloader/extractors/init.py +++ b/anime_downloader/extractors/init.py @@ -67,6 +67,12 @@ ALL_EXTRACTORS = [ 'regex': 'yourupload', 'class': 'Yourupload' }, + { + 'sitename': 'wcostream', + 'modulename': 'wcostream', + 'regex': 'wcostream', + 'class': 'WcoStream' + }, { 'sitename': 'vidstream', 'modulename': 'vidstream', From 44dd797b7864be151b81de7ab27a4881a222df11 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 13:43:33 +0300 Subject: [PATCH 080/130] Create wcostream.py --- anime_downloader/extractors/wcostream.py | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 anime_downloader/extractors/wcostream.py diff --git a/anime_downloader/extractors/wcostream.py b/anime_downloader/extractors/wcostream.py new file mode 100644 index 0000000..0b65fee --- /dev/null +++ b/anime_downloader/extractors/wcostream.py @@ -0,0 +1,37 @@ +from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites import helpers +import re + + +class WcoStream(BaseExtractor): + def _get_data(self): + try: + if self.url.startswith('https://vidstream.pro/e'): + base_url = 'https://vidstream.pro' + elif self.url.startswith('https://mcloud.to/e/'): + base_url = 'https://mcloud.to' + else: + return [] + + html = helpers.get(self.url, referer='https://wcostream.cc/') + id_ = re.findall(r"/e/(.*?)\?domain", self.url)[0] + skey = re.findall(r"skey\s=\s['\"](.*?)['\"];", html.text)[0] + + apiLink = f"{base_url}/info/{id_}?domain=wcostream.cc&skey={skey}" + referer = f"{base_url}/e/{id_}?domain=wcostream.cc" + + response = helpers.get(apiLink, referer=referer).json() + + if response['success'] is True: + sources = [ + { + 'stream_url': x['file'] + } + for x in response['media']['sources'] + ] + return sources + else: + return [] + + except Exception: + return {"stream_url": ''} From d88e6fef720fec446d48f9fc87c3250773779367 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 15:09:39 +0300 Subject: [PATCH 081/130] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ef45059..4e0d2b5 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,7 @@ Yeah. Me too! That's why this tool exists. - Vidstream - Voiranime - Vostfree +- Wcostream Sites that require Selenium **DO NOT** and **WILL NOT** work on mobile operating systems From 4205ea44173ee2c480195d5c1abc7ec705be714d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 15:10:22 +0300 Subject: [PATCH 082/130] Update config.py --- anime_downloader/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/anime_downloader/config.py b/anime_downloader/config.py index 141bfd6..71a6f73 100644 --- a/anime_downloader/config.py +++ b/anime_downloader/config.py @@ -133,6 +133,10 @@ DEFAULT_CONFIG = { 'servers': ['vidstream', 'gcloud', 'yourupload', 'hydrax'], 'version': 'subbed', }, + 'wcostream': { + 'servers': ['vidstreampro', 'mcloud'], + 'version': 'subbed', + }, 'animeflix': { 'server': 'AUEngine', 'fallback_servers': ['FastStream'], From 0dbdfb86dc919788b6bece15a003182f02116a0c Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 15:20:08 +0300 Subject: [PATCH 083/130] Update wcostream.py --- anime_downloader/sites/wcostream.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/anime_downloader/sites/wcostream.py b/anime_downloader/sites/wcostream.py index d596b93..8c7b65b 100644 --- a/anime_downloader/sites/wcostream.py +++ b/anime_downloader/sites/wcostream.py @@ -1,6 +1,8 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.extractors import get_extractor +from anime_downloader.config import Config from anime_downloader.sites import helpers + import re @@ -46,6 +48,7 @@ class WcoStream(Anime, sitename='wcostream'): class WcoStreamEpisode(AnimeEpisode, sitename='wcostream'): def _get_sources(self): + config = Config._read_config()['siteconfig']['wcostream'] soup = helpers.soupify(helpers.get(self.url)) servers = soup.select("#servers-list > ul > li") servers = [ @@ -55,6 +58,8 @@ class WcoStreamEpisode(AnimeEpisode, sitename='wcostream'): } for server in servers ] + + servers = sorted(servers, key=lambda x: x['name'].lower() in config['servers'][0].lower())[::-1] # noqa sources = [] for server in servers: From 7fb7020fb91fa6429050b71f57b51bf96c1148a6 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sun, 23 May 2021 16:33:48 +0300 Subject: [PATCH 084/130] Update wcostream.py --- anime_downloader/sites/wcostream.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/anime_downloader/sites/wcostream.py b/anime_downloader/sites/wcostream.py index 8c7b65b..44ead83 100644 --- a/anime_downloader/sites/wcostream.py +++ b/anime_downloader/sites/wcostream.py @@ -1,6 +1,5 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.extractors import get_extractor -from anime_downloader.config import Config from anime_downloader.sites import helpers import re @@ -48,7 +47,6 @@ class WcoStream(Anime, sitename='wcostream'): class WcoStreamEpisode(AnimeEpisode, sitename='wcostream'): def _get_sources(self): - config = Config._read_config()['siteconfig']['wcostream'] soup = helpers.soupify(helpers.get(self.url)) servers = soup.select("#servers-list > ul > li") servers = [ @@ -59,7 +57,7 @@ class WcoStreamEpisode(AnimeEpisode, sitename='wcostream'): for server in servers ] - servers = sorted(servers, key=lambda x: x['name'].lower() in config['servers'][0].lower())[::-1] # noqa + servers = sorted(servers, key=lambda x: x['name'].lower() in self.config['servers'][0].lower())[::-1] # noqa sources = [] for server in servers: From bb6815f872917ad843d89bda62b00b6c837f6bf9 Mon Sep 17 00:00:00 2001 From: Zero Date: Sun, 23 May 2021 16:50:55 -0400 Subject: [PATCH 085/130] Update user-agents --- anime_downloader/const.py | 250 +++++++++--------- anime_downloader/downloader/SmartDL.py | 2 +- .../downloader/base_downloader.py | 2 +- .../downloader/http_downloader.py | 4 +- anime_downloader/sites/erairaws.py | 2 +- anime_downloader/sites/putlockers.py | 2 +- anime_downloader/sites/twistmoe.py | 2 +- anime_downloader/util.py | 2 +- 8 files changed, 133 insertions(+), 133 deletions(-) diff --git a/anime_downloader/const.py b/anime_downloader/const.py index 980e6b3..440b479 100644 --- a/anime_downloader/const.py +++ b/anime_downloader/const.py @@ -1,14 +1,14 @@ import random mobile_headers = { - 'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) \ - AppleWebKit/604.1.38 (KHTML, like Gecko) \ - Version/11.0 Mobile/15A402 Safari/604.1" + 'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) \ + AppleWebKit/605.1.15 (KHTML, like Gecko) \ + Version/14.0 Mobile/15E148 Safari/604.1" } desktop_headers = { - 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101 \ -Firefox/56.0" + 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) \ + Gecko/20100101 Firefox/88.0.1" } @@ -16,123 +16,123 @@ def get_random_header(): return {'user-agent': random.choice(HEADERS)} -HEADERS = ['Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36', - 'Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1', - 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5', - 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/536.5 (KHTML like Gecko) Chrome/19.0.1084.56 Safari/1EA69', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0 Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0', - 'Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872', - 'Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.46 Safari/535.19', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/10.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.56 Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/17.0.940.0 Safari/535.8', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7ad-imcjapan-syosyaman-xkgi3lqg03!wgz', - 'Mozilla/5.0 (X11; CrOS i686 1193.158.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7xs5D9rRDFpg2g', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.6 (KHTML, like Gecko) Chrome/16.0.897.0 Safari/535.6', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2', - 'Mozilla/5.0 (X11; FreeBSD i386) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.10 Chromium/15.0.874.120 Chrome/15.0.874.120 Safari/535.2', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.04 Chromium/15.0.871.0 Chrome/15.0.871.0 Safari/535.2', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2', - 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.3 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.3 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.212.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0', ] +HEADERS = ['Mozilla/5.0 (Windows NT 6.1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.211 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.210 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.208 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.207 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.206 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 5.1) WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.206 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0) WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.201 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.199 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.195 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.198 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.197 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.194 Safari/605.1.15', + 'Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.192 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/605.1.14 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.14', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/605.1.14 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.14', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/605.1.14 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.14', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/605.1.13 (KHTML, like Gecko) Chrome/90.0.4430.208 Safari/605.1.13', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.1.12 (KHTML, like Gecko) Chrome/90.0.4429.205 Safari/605.1.12', + 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/605.1.11 (KHTML, like Gecko) Chrome/90.0.4429.203 Safari/605.1.11', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AAppleWebKit/605.1.10 (KHTML, like Gecko) Chrome/90.0.4429.201 Safari/605.1.10', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/605.0.9 (KHTML, like Gecko) Chrome/90.0.4428.105 Safari/605.0.9', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/605.1.12 (KHTML, like Gecko) Chrome/90.0.4428.196 Safari/605.1.12', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.0.8 (KHTML, like Gecko) Chrome/90.0.4428.97 Safari/605.0.8', + 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/605.0.7 (KHTML, like Gecko) Chrome/90.0.4428.92 Safari/2BC75', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/605.0.4 (KHTML, like Gecko) Chrome/90.0.4428.89 Safari/605.0.4', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.0.2 (KHTML, like Gecko) Chrome/90.0.4427.85 Safari/605.0.2', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/605.0.1 (KHTML, like Gecko) Chrome/90.0.4427.83 Safari/605.0.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/604.2.9 (KHTML, like Gecko) Chrome/90.0.4427.76 Safari/604.2.9', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.8 (KHTML, like Gecko) Chrome/90.0.4426.74 Safari/604.2.8', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.74 Safari/604.2.7', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4424.65 Safari/604.2.5', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4424.64 Safari/604.2.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4424.62 Safari/604.2.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.55 Safari/604.2.5', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.53 Safari/604.2.5', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.52 Safari/604.2.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.50 Safari/604.2.5', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4422.94 Safari/604.2.5 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0', + 'Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4422.91 Safari/604.2.5', + 'Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.89 Safari/604.2.3', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.88 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.87 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.87 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.86 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.85 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.81 Safari/604.2.3', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.104 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.104 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.102 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.102 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.101 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.100 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.99 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.99 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.95 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.78 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.77 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.77 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.76 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/20.10 Chromium/90.0.4420.72 Chrome/90.0.4420.72 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/20.04 Chromium/90.0.4420.70 Chrome/90.0.4420.70 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/19.10 Chromium/90.0.4420.69 Chrome/90.0.4420.69 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/19.10 Chromium/90.0.4420.67 Chrome/90.0.4420.67 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.96 Safari/604.2.1', + 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.95 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.95 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.92 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.92 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.90 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/20.04 Chrome/90.0.4419.86 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.83 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.83 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.82 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.81 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.78 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.78 Safari/604.2.0.7ad-imcjapan-syosyaman-xkgi4lqg18!wgz', + 'Mozilla/5.0 (X11; CrOS i686 1193.158.0) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.75 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.74 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.73 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.70 Safari/604.2.0.2xs8D9rRDFpg8g', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.67 Safari/604.2.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.66 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.66 Safari/604.2.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.107 Safari/604.1', + 'Mozilla/5.0 (X11; FreeBSD i386) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.105 Safari/604.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.1 (KHTML, like Gecko) Ubuntu/20.10 Chromium/90.0.4417.104 Chrome/90.0.4417.104 Safari/604.1', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.104 Safari/604.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.103 Safari/604.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.1 (KHTML, like Gecko) Ubuntu/20.04 Chromium/90.0.4417.103 Chrome/90.0.4417.103 Safari/604.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.103 Safari/604.1', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.101 Safari/604.1', + 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.99 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.99 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.98 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.95 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.92 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.90 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.85 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.102 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.100 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.99 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.96 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.96 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.95 Safari/604.0', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.95 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.90 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.90 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.89 Safari/603.9', + 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.88 Safari/603.9', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.88 Safari/603.9', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.85 Safari/603.9', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.82 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.79 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.77 Safari/603.9', ] diff --git a/anime_downloader/downloader/SmartDL.py b/anime_downloader/downloader/SmartDL.py index bdc9936..03f54c3 100644 --- a/anime_downloader/downloader/SmartDL.py +++ b/anime_downloader/downloader/SmartDL.py @@ -12,7 +12,7 @@ class pySmartDL(BaseDownloader): headers = self.source.headers if 'user-agent' not in headers: - headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" # This allows backwards compatible while also working with # PySmartDl as it only passes user agent if spelled "User-Agent" diff --git a/anime_downloader/downloader/base_downloader.py b/anime_downloader/downloader/base_downloader.py index 59f9429..1ce91a3 100644 --- a/anime_downloader/downloader/base_downloader.py +++ b/anime_downloader/downloader/base_downloader.py @@ -30,7 +30,7 @@ class BaseDownloader: # Added Referer Header as kwik needd it. headers = self.source.headers if 'user-agent' not in headers: - headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" if self.source.referer: headers['referer'] = self.source.referer diff --git a/anime_downloader/downloader/http_downloader.py b/anime_downloader/downloader/http_downloader.py index 5c8f0ea..4affe49 100644 --- a/anime_downloader/downloader/http_downloader.py +++ b/anime_downloader/downloader/http_downloader.py @@ -29,7 +29,7 @@ class HTTPDownloader(BaseDownloader): url = self.source.stream_url headers = self.source.headers if 'user-agent' not in headers: - headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" if self.source.referer: headers['Referer'] = self.source.referer @@ -60,7 +60,7 @@ class HTTPDownloader(BaseDownloader): def _non_range_download(self): url = self.source.stream_url headers = { - 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" } if self.source.referer: headers['Referer'] = self.source.referer diff --git a/anime_downloader/sites/erairaws.py b/anime_downloader/sites/erairaws.py index 1aafcd5..bff13b3 100644 --- a/anime_downloader/sites/erairaws.py +++ b/anime_downloader/sites/erairaws.py @@ -197,7 +197,7 @@ class EraiRawsEpisode(AnimeEpisode, sitename='erai-raws'): headers = { 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101 Firefox/56.0', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', diff --git a/anime_downloader/sites/putlockers.py b/anime_downloader/sites/putlockers.py index 088c2da..31908d7 100644 --- a/anime_downloader/sites/putlockers.py +++ b/anime_downloader/sites/putlockers.py @@ -46,7 +46,7 @@ class PutLockers(Anime, sitename="putlockers"): class PutLockersEpisode(AnimeEpisode, sitename="putlockers"): def _get_sources(self): self.headers = { - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101 Firefox/56.0"} + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1"} text = helpers.get(self.url).text sources_list = [] diff --git a/anime_downloader/sites/twistmoe.py b/anime_downloader/sites/twistmoe.py index 6e65ace..1d52453 100644 --- a/anime_downloader/sites/twistmoe.py +++ b/anime_downloader/sites/twistmoe.py @@ -37,7 +37,7 @@ class TwistMoe(Anime, sitename='twist.moe'): @classmethod def search(self, query): headers = { - 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.46 Safari/537.36', + 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/605.1.15', 'x-access-token': '0df14814b9e590a1f26d3071a4ed7974' } # soup = helpers.soupify(helpers.get('https://twist.moe/', allow_redirects=True, headers=headers)) diff --git a/anime_downloader/util.py b/anime_downloader/util.py index 72d1827..d60e932 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -322,7 +322,7 @@ def format_command(cmd, episode, file_format, speed_limit, path): if episode.headers.get('user-agent'): useragent = episode.headers['user-agent'] else: - useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36' + useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/605.1.15' stream_url = episode.source().stream_url if not episode.url.startswith( 'magnet:?xt=urn:btih:') else episode.url From 08c51d4d692aaad296deaad594a671cc4671e244 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 24 May 2021 01:19:14 +0300 Subject: [PATCH 086/130] small fix --- anime_downloader/sites/wcostream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/wcostream.py b/anime_downloader/sites/wcostream.py index 44ead83..978ee93 100644 --- a/anime_downloader/sites/wcostream.py +++ b/anime_downloader/sites/wcostream.py @@ -35,7 +35,7 @@ class WcoStream(Anime, sitename='wcostream'): return [ x.find('a')['href'] for x in episodes - if 'javascript' not in str(x) + if 'https://wcostream.cc/watch' in x.find('a')['href'] ] def _scrape_metadata(self): From 0e270c08a56b06be4c36b0358c494315732f71eb Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 24 May 2021 20:01:08 +0300 Subject: [PATCH 087/130] Update init.py --- anime_downloader/extractors/init.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/anime_downloader/extractors/init.py b/anime_downloader/extractors/init.py index f4ecbc3..45409ba 100644 --- a/anime_downloader/extractors/init.py +++ b/anime_downloader/extractors/init.py @@ -1,4 +1,6 @@ from importlib import import_module +import re + ALL_EXTRACTORS = [ { @@ -186,7 +188,7 @@ ALL_EXTRACTORS = [ def get_extractor(name): for extractor in ALL_EXTRACTORS: - if extractor['regex'] in name.lower(): + if re.match(extractor['regex'], name.lower()): module = import_module( 'anime_downloader.extractors.{}'.format( extractor['modulename']) From 1f9a7dd35fbfa78962df301d5563c8a989656f12 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 25 May 2021 22:13:14 +0300 Subject: [PATCH 088/130] reworked some logic and also made the code more readable --- anime_downloader/sites/helpers/selescrape.py | 182 ++++++++++++------- 1 file changed, 113 insertions(+), 69 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 9ad32c7..89f4c80 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -4,14 +4,25 @@ from urllib.parse import urlencode from selenium import webdriver from sys import platform import tempfile -import os import logging import click import time import json +import os + + +def open_config(): + from anime_downloader.config import Config + return Config + serverLogger.setLevel(logging.ERROR) logger = logging.getLogger(__name__) +TEMP_FOLDER = os.path.join(tempfile.gettempdir(), 'AnimeDL-SeleniumCache') +data = open_config() + +if not os.path.isdir(TEMP_FOLDER): + os.makedirs(TEMP_FOLDER) def get_data_dir(): @@ -23,14 +34,6 @@ def get_data_dir(): return os.path.join(click.get_app_dir(APP_NAME), 'data') -def open_config(): - from anime_downloader.config import Config - return Config - - -data = open_config() - - def get_browser_config(): ''' Decides what browser selescrape will use. @@ -63,24 +66,31 @@ def get_browser_executable(): def get_driver_binary(): value = data['dl']['selescrape_driver_binary_path'] - binary_path = value.lower() if value else value - return binary_path + if value: + return value + + return None def cache_request(sele_response): """ This function saves the response from a Selenium request in a json. - It uses timestamps so that the rest of the code can know if the cache has expired or not. + It uses timestamps to can know if the cache has expired or not. """ - file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') + file = os.path.join(TEMP_FOLDER, 'selenium_cached_requests.json') + if os.path.isfile(file): with open(file, 'r') as f: tmp_cache = json.load(f) else: tmp_cache = {} + data = sele_response.__dict__ - tmp_cache[data['url']] = { + url = data['url'] + url = (url[:-1] if url and url[-1] == '/' else url) + + tmp_cache[url] = { 'data': data['text'], 'expiry': time.time(), 'method': data['method'], @@ -96,80 +106,111 @@ def check_cache(url): """ This function checks if the cache file exists, if it exists then it will read the file - And it will verify if the cache is less than or equal to 1 hour ago + And it will verify if the cache is less than or equal to 30 mins ago If it is, it will return it as it is. If it isn't, it will delete the expired cache from the file and return None If the file doesn't exist at all it will return None """ - file = os.path.join(tempfile.gettempdir(), 'selenium_cached_requests.json') + file = os.path.join(TEMP_FOLDER, 'selenium_cached_requests.json') if os.path.isfile(file): + with open(file, 'r') as f: data = json.load(f) - if url not in data: + + # Yes, this is ugly, + # but its the best way that I found to find the cache + # when the url is not exactly the same (a slash at the end or not) + clean_url = (url[:-1] if url and url[-1] == '/' else url) + found = False + + for link in data: + if link == clean_url: + url = link + found = True + + if not found: return + timestamp = data[url]['expiry'] - if (time.time() - timestamp <= 3600): + + if (time.time() - timestamp <= 1800): return data[url] else: data.pop(url, None) + with open(file, 'w') as f: json.dump(data, f, indent=4) def driver_select(): ''' - it configures what each browser should do - and gives the driver variable that is used - to perform any actions below this function. + This configures what each browser should do + and returns the corresponding driver. ''' browser = get_browser_config() data_dir = get_data_dir() executable = get_browser_executable() - driver_binary = get_driver_binary() - binary = None if not driver_binary else driver_binary + binary = get_driver_binary() + if browser == 'firefox': fireFox_Options = webdriver.FirefoxOptions() - fireFox_Options.headless = True - fireFox_Options.add_argument('--log fatal') - fireFox_Profile = webdriver.FirefoxProfile() - fireFox_Profile.set_preference("general.useragent.override", get_random_header()['user-agent']) + ops = [ + "--width=1920", "--height=1080", + "headless", "--log fatal" + ] - if not binary: - driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) - else: - try: - driver = webdriver.Firefox(fireFox_Profile, options=fireFox_Options, service_log_path=os.path.devnull) - except: - driver = webdriver.Firefox(fireFox_Profile, executable_path=binary, options=fireFox_Options, - service_log_path=os.path.devnull) + for option in ops: + fireFox_Options.add_argument(option) + + fireFox_Profile = webdriver.FirefoxProfile() + fireFox_Profile.set_preference( + "general.useragent.override", get_random_header()['user-agent'] + ) + + driver = webdriver.Firefox( + # sets user-agent + firefox_profile=fireFox_Profile, + # sets various firefox settings + options=fireFox_Options, + # by default it will be None, if a chromedriver location is in the config then it will use that + executable_path=(binary if binary else "geckodriver"), + # an attempt at stopping selenium from printing a pile of garbage to the console. + service_log_path=os.path.devnull + ) elif browser == 'chrome': - profile_path = os.path.join(data_dir, 'Selenium_chromium') - log_path = os.path.join(data_dir, 'chromedriver.log') from selenium.webdriver.chrome.options import Options + + profile_path = os.path.join(data_dir, 'Selenium_chromium') chrome_options = Options() - ops = ["--headless", "--disable-gpu", '--log-level=OFF', f"--user-data-dir={profile_path}", - "--no-sandbox", "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}"] + + ops = [ + "--headless", "--disable-gpu", '--log-level=OFF', + f"--user-data-dir={profile_path}", "--no-sandbox", + "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}" # noqa + ] + for option in ops: chrome_options.add_argument(option) + cap = None + if not binary: - if not executable: - driver = webdriver.Chrome(options=chrome_options) - else: - from selenium.webdriver.common.desired_capabilities import DesiredCapabilities - cap = DesiredCapabilities.CHROME - cap['binary_location'] = executable - driver = webdriver.Chrome(desired_capabilities=cap, options=chrome_options) - else: - if not executable: - driver = webdriver.Chrome(options=chrome_options) - else: - from selenium.webdriver.common.desired_capabilities import DesiredCapabilities - cap = DesiredCapabilities.CHROME - cap['binary_location'] = executable - driver = webdriver.Chrome(executable_path=binary, desired_capabilities=cap, options=chrome_options, - service_log_path=os.path.devnull) + from selenium.webdriver.common.desired_capabilities import DesiredCapabilities + + cap = DesiredCapabilities.CHROME + cap['binary_location'] = executable + + driver = webdriver.Chrome( + # sets user-agent, and various chrome settings + options=chrome_options, + # by default it will be None, if a chromedriver location is in the config then it will use that + executable_path=binary, + # by default it will be None, if a binary location is in the config then it will use that + desired_capabilities=cap, + # an attempt at stopping selenium from printing a pile of garbage to the console. + service_log_path=os.path.devnull + ) return driver @@ -184,19 +225,19 @@ def cloudflare_wait(driver): Also, i have made it time out after 50 seconds, useful if the target website is not responsive and to stop it from running infinitely. ''' - abort_after = 50 + abort_after = 50 # seconds start = time.time() title = driver.title # title = "Just a moment..." - while title == "Just a moment...": - time.sleep(0.25) + while "Just a moment" in title: + time.sleep(0.35) delta = time.time() - start if delta >= abort_after: logger.error(f'Timeout:\tCouldnt bypass cloudflare. \ See the screenshot for more info:\t{get_data_dir()}/screenshot.png') return 1 title = driver.title - if not title == "Just a moment...": + if not "Just a moment" in title: break time.sleep(2) # This is necessary to make sure everything has loaded fine. return 0 @@ -204,10 +245,11 @@ def cloudflare_wait(driver): def request(request_type, url, **kwargs): # Headers not yet supported , headers={} params = kwargs.get('params', {}) + url = url if not params else url + '?' + urlencode(params) - check_caches = check_cache(url) - if bool(check_caches): - cached_data = check_caches + cached_data = check_cache(url) + + if cached_data: text = cached_data['data'] user_agent = cached_data['user_agent'] request_type = cached_data['method'] @@ -215,28 +257,30 @@ def request(request_type, url, **kwargs): # Headers not yet supported , headers return SeleResponse(url, request_type, text, cookies, user_agent) else: - driver = driver_select() driver.get(url) try: - exit_code = cloudflare_wait(driver) user_agent = driver.execute_script("return navigator.userAgent;") cookies = driver.get_cookies() text = driver.page_source driver.close() - if exit_code == 0: - pass - else: + + if exit_code != 0: return SeleResponse(url, request_type, None, cookies, user_agent) - seleResponse = SeleResponse(url, request_type, text, cookies, user_agent) + seleResponse = SeleResponse( + url, request_type, + text, cookies, + user_agent + ) + cache_request(seleResponse) return seleResponse except: - driver.save_screenshot(f"{get_data_dir()}/screenshot.png"); + driver.save_screenshot(f"{get_data_dir()}/screenshot.png") driver.close() logger.error(f'There was a problem getting the page: {url}.' + '\nSee the screenshot for more info:\t{get_data_dir()}/screenshot.png') From 6d8b52af5f9f3c3f6612ffee7be3e27c63ee00ab Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 25 May 2021 22:18:19 +0300 Subject: [PATCH 089/130] fix firefox headless --- anime_downloader/sites/helpers/selescrape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 89f4c80..3677963 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -156,7 +156,7 @@ def driver_select(): fireFox_Options = webdriver.FirefoxOptions() ops = [ "--width=1920", "--height=1080", - "headless", "--log fatal" + "-headless", "--log fatal" ] for option in ops: From 7bcc0707486e28d92758476f0999656a651efde1 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 25 May 2021 22:37:32 +0300 Subject: [PATCH 090/130] Update selescrape.py --- anime_downloader/sites/helpers/selescrape.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 3677963..acb4556 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -61,7 +61,8 @@ def get_browser_config(): def get_browser_executable(): value = data['dl']['selescrape_browser_executable_path'] executable_value = value.lower() if value else value - return executable_value + if executable_value: + return executable_value def get_driver_binary(): @@ -69,8 +70,6 @@ def get_driver_binary(): if value: return value - return None - def cache_request(sele_response): """ @@ -172,7 +171,9 @@ def driver_select(): firefox_profile=fireFox_Profile, # sets various firefox settings options=fireFox_Options, - # by default it will be None, if a chromedriver location is in the config then it will use that + # by default it will be None, if a binary location is in the config then it will use that + firefox_binary=None if not executable else executable, + # by default it will be "geckodriver", if a geckodriver location is in the config then it will use that executable_path=(binary if binary else "geckodriver"), # an attempt at stopping selenium from printing a pile of garbage to the console. service_log_path=os.path.devnull @@ -195,7 +196,7 @@ def driver_select(): cap = None - if not binary: + if executable: from selenium.webdriver.common.desired_capabilities import DesiredCapabilities cap = DesiredCapabilities.CHROME @@ -204,8 +205,8 @@ def driver_select(): driver = webdriver.Chrome( # sets user-agent, and various chrome settings options=chrome_options, - # by default it will be None, if a chromedriver location is in the config then it will use that - executable_path=binary, + # by default it will be "chromedriver", if a chromedriver location is in the config then it will use that + executable_path=(binary if binary else "chromedriver"), # by default it will be None, if a binary location is in the config then it will use that desired_capabilities=cap, # an attempt at stopping selenium from printing a pile of garbage to the console. From 220f097333ea58dc7655ace933be787f0bc5f694 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 25 May 2021 22:46:12 +0300 Subject: [PATCH 091/130] added cache flag --- anime_downloader/sites/helpers/selescrape.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index acb4556..ec4891d 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -16,6 +16,7 @@ def open_config(): return Config +cache = False serverLogger.setLevel(logging.ERROR) logger = logging.getLogger(__name__) TEMP_FOLDER = os.path.join(tempfile.gettempdir(), 'AnimeDL-SeleniumCache') @@ -76,6 +77,8 @@ def cache_request(sele_response): This function saves the response from a Selenium request in a json. It uses timestamps to can know if the cache has expired or not. """ + if not cache: + return file = os.path.join(TEMP_FOLDER, 'selenium_cached_requests.json') @@ -105,11 +108,13 @@ def check_cache(url): """ This function checks if the cache file exists, if it exists then it will read the file - And it will verify if the cache is less than or equal to 30 mins ago + And it will verify if the cache is less than or equal to 30 mins old If it is, it will return it as it is. If it isn't, it will delete the expired cache from the file and return None If the file doesn't exist at all it will return None """ + if not cache: + return file = os.path.join(TEMP_FOLDER, 'selenium_cached_requests.json') if os.path.isfile(file): From 46d7db8fac119163b7338ba0638622cf66026d1e Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 25 May 2021 22:46:32 +0300 Subject: [PATCH 092/130] Update request.py --- anime_downloader/sites/helpers/request.py | 1 + 1 file changed, 1 insertion(+) diff --git a/anime_downloader/sites/helpers/request.py b/anime_downloader/sites/helpers/request.py index 924ea6b..c193f1c 100644 --- a/anime_downloader/sites/helpers/request.py +++ b/anime_downloader/sites/helpers/request.py @@ -57,6 +57,7 @@ def setup(func): from selenium import webdriver from anime_downloader.sites.helpers import selescrape sess = selescrape + sess.cache = cache except ImportError: sess = cf_session logger.warning("This provider may not work correctly because it requires selenium to work.\nIf you want to install it then run: 'pip install selenium' .") From 4e184b3f6c8cc82396e74db7adcf66bf5f70ae5d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 25 May 2021 22:47:40 +0300 Subject: [PATCH 093/130] added sel to docstring --- anime_downloader/sites/helpers/request.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/anime_downloader/sites/helpers/request.py b/anime_downloader/sites/helpers/request.py index c193f1c..2881573 100644 --- a/anime_downloader/sites/helpers/request.py +++ b/anime_downloader/sites/helpers/request.py @@ -46,6 +46,8 @@ def setup(func): cf : bool cf if True performs the request through cfscrape. For cloudflare protected sites. + sel : bool + sel if True perfroms the request through selescrape (selenium). referer : str a url sent as referer in request headers ''' @@ -108,6 +110,8 @@ def get(url: str, cf : bool cf if True performs the request through cfscrape. For cloudflare protected sites. + sel : bool + sel if True perfroms the request through selescrape (selenium). referer : str a url sent as referer in request headers ''' From 52bda62aed90138fc8adf3eb9e969667d2af5528 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 21 Jun 2021 20:43:29 +0300 Subject: [PATCH 094/130] added file not found error --- anime_downloader/extractors/yourupload.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/anime_downloader/extractors/yourupload.py b/anime_downloader/extractors/yourupload.py index 1451f3e..7e0c430 100644 --- a/anime_downloader/extractors/yourupload.py +++ b/anime_downloader/extractors/yourupload.py @@ -3,6 +3,7 @@ import re from anime_downloader.extractors.base_extractor import BaseExtractor from anime_downloader.sites import helpers +from requests.exceptions import HTTPError logger = logging.getLogger(__name__) @@ -10,7 +11,13 @@ logger = logging.getLogger(__name__) class Yourupload(BaseExtractor): def _get_data(self): regex = r"file: '([^']*)" - file = re.search(regex, helpers.get(self.url).text).group(1) + try: + response = helpers.get(self.url) + except HTTPError: + logger.error('File not found.') + raise + + file = re.search(regex, response.text).group(1) return { 'stream_url': file, 'referer': self.url From c9f1a6ef842767f15bc9424d4f19aad16b557b8c Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 21 Jun 2021 20:44:50 +0300 Subject: [PATCH 095/130] Update animerush.py --- anime_downloader/sites/animerush.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/anime_downloader/sites/animerush.py b/anime_downloader/sites/animerush.py index f0d1f35..f4a880e 100644 --- a/anime_downloader/sites/animerush.py +++ b/anime_downloader/sites/animerush.py @@ -1,7 +1,9 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers -from anime_downloader.extractors import get_extractor +from anime_downloader.extractors.init import ALL_EXTRACTORS + import logging +import re logger = logging.getLogger(__name__) @@ -41,12 +43,20 @@ class AnimeRushEpisode(AnimeEpisode, sitename='animerush'): sources_list = [] # Sources [0] is the url [1] is the name of the source # eg: [['https://mp4upload.com/embed-r07potgdvbkr-650x370.html', 'Mp4upload Video']] + domain_regex = r"\/\/(?:\w{3,6}\.)?(.*?)\." for i in sources: - # Not exactly ideal setup for more extractors - # If more advanced sources needs to get added look at watchmovie or darkanime - server = 'yourupload' if 'yourupload' in i[0] else 'mp4upload' + found = False + domain = re.findall(domain_regex, i[0])[0] + + for extractor in ALL_EXTRACTORS: + if re.match(extractor['regex'], domain.lower()): + found = True + + if not found: + continue + sources_list.append({ - 'extractor': server, + 'extractor': i[0], 'url': i[0], 'server': i[1], 'version': 'subbed' From 6e409f9545183fad87bfb6984c62a540fb4f9d33 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 21 Jun 2021 20:47:22 +0300 Subject: [PATCH 096/130] Update animerush.py --- anime_downloader/sites/animerush.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/animerush.py b/anime_downloader/sites/animerush.py index f4a880e..e107d9a 100644 --- a/anime_downloader/sites/animerush.py +++ b/anime_downloader/sites/animerush.py @@ -56,7 +56,7 @@ class AnimeRushEpisode(AnimeEpisode, sitename='animerush'): continue sources_list.append({ - 'extractor': i[0], + 'extractor': domain, 'url': i[0], 'server': i[1], 'version': 'subbed' From 38ebb40ca35d7045ba02bbdeef288abf97843669 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 21 Jun 2021 20:50:54 +0300 Subject: [PATCH 097/130] Update animerush.py --- anime_downloader/sites/animerush.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/animerush.py b/anime_downloader/sites/animerush.py index e107d9a..eccad31 100644 --- a/anime_downloader/sites/animerush.py +++ b/anime_downloader/sites/animerush.py @@ -46,10 +46,10 @@ class AnimeRushEpisode(AnimeEpisode, sitename='animerush'): domain_regex = r"\/\/(?:\w{3,6}\.)?(.*?)\." for i in sources: found = False - domain = re.findall(domain_regex, i[0])[0] + domain = re.findall(domain_regex, i[0])[0].lower() for extractor in ALL_EXTRACTORS: - if re.match(extractor['regex'], domain.lower()): + if re.match(extractor['regex'], domain): found = True if not found: From a30b5afef3d9fd60010393885d74c6d220aff757 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Mon, 21 Jun 2021 20:53:02 +0300 Subject: [PATCH 098/130] Update yourupload.py --- anime_downloader/extractors/yourupload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/extractors/yourupload.py b/anime_downloader/extractors/yourupload.py index 7e0c430..4429b7a 100644 --- a/anime_downloader/extractors/yourupload.py +++ b/anime_downloader/extractors/yourupload.py @@ -15,7 +15,7 @@ class Yourupload(BaseExtractor): response = helpers.get(self.url) except HTTPError: logger.error('File not found.') - raise + return {'stream_url': ''} file = re.search(regex, response.text).group(1) return { From af572fdcccd26a8787163404f7c840327b9fec36 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 24 Jun 2021 05:13:08 +0300 Subject: [PATCH 099/130] Update twistmoe.py --- anime_downloader/sites/twistmoe.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/anime_downloader/sites/twistmoe.py b/anime_downloader/sites/twistmoe.py index 8e53205..6a21dae 100644 --- a/anime_downloader/sites/twistmoe.py +++ b/anime_downloader/sites/twistmoe.py @@ -82,6 +82,28 @@ class TwistMoe(Anime, sitename='twist.moe'): return self._episode_urls + def _scrape_metadata(self): + slug = self.url.split('a/')[-1][:-1] + api_url = "https://api.twist.moe/api/anime/" + slug + res = helpers.get( + api_url, + headers={ + 'x-access-token': '0df14814b9e590a1f26d3071a4ed7974' + } + ).json() + if 'hb_id' in res: + kitsu_api_url = "https://kitsu.io/api/edge/anime/" + str(res['hb_id']) + kitsu_data = helpers.get(kitsu_api_url).json() + attributes = kitsu_data['data']['attributes'] + + self.meta['title'] = attributes['canonicalTitle'] + self.meta['year'] = attributes['startDate'].split('-')[0] + self.meta['airing_status'] = attributes['status'] + self.meta['poster'] = attributes['posterImage']['original'] + self.meta['cover'] = attributes['coverImage']['original'] + self.meta['total_eps'] = attributes['episodeCount'] + self.meta['desc'] = attributes['description'] + # From stackoverflow https://stackoverflow.com/questions/36762098/how-to-decrypt-password-from-javascript-cryptojs-aes-encryptpassword-passphras def pad(data): length = BLOCK_SIZE - (len(data) % BLOCK_SIZE) From 6114dde0ac93bbb568f7d184cc614968fc8fbc28 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 24 Jun 2021 14:21:27 +0300 Subject: [PATCH 100/130] Update _4anime.py --- anime_downloader/sites/_4anime.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 81afb47..46b01a0 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -19,12 +19,13 @@ class Anime4(Anime, sitename='4anime'): "options": "qtranslate_lang=0&set_intitle=None&customset%5B%5D=anime" } soup = helpers.soupify(helpers.post( - "https://4anime.to/wp-admin/admin-ajax.php", data=data)).select('div.info > a') + "https://4anime.to/wp-admin/admin-ajax.php", data=data)).select('.item') search_results = [ SearchResult( - title=i.text, - url=i['href'] + title=i.select_one('.info > a').text, + url=i.select_one('.info > a').get('href', ''), + poster="https://4anime.to" + i.find('img').get('src', '') ) for i in soup ] @@ -41,6 +42,19 @@ class Anime4(Anime, sitename='4anime'): for i in soup.select('.detail > a'): if 'year' in i.get('href', ''): self.meta['year'] = int(i.text) if i.text.isnumeric() else None + elif 'status' in i.get('href', ''): + self.meta['airing_status'] = i.text.strip() + + desc_soup = soup.select_one("#description-mob") + if "READ MORE" in str(desc_soup): + desc = desc_soup.select('#fullcontent p') + self.meta['description'] = "\n".join([x.text for x in desc]) + else: + self.meta['description'] = desc_soup.select_one('p:nth-child(2)').text + + self.meta['poster'] = "https://4anime.to" + soup.select_one("#details > div.cover > img").get('src', '') + self.meta['total_eps'] = len(soup.select('ul.episodes.range.active > li > a')) + self.meta['cover'] = "https://4anime.to/static/Dr1FzAv.jpg" class Anime4Episode(AnimeEpisode, sitename='4anime'): From c6e94a27912e49a7a9e9f06e6721620fc0971496 Mon Sep 17 00:00:00 2001 From: scft null Date: Thu, 24 Jun 2021 12:38:37 +0000 Subject: [PATCH 101/130] Added AnimeStar to sites --- README.md | 1 + anime_downloader/sites/anime.py | 2 +- anime_downloader/sites/animestar.py | 63 +++++++++++++++++++++++++++++ anime_downloader/sites/init.py | 1 + 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 anime_downloader/sites/animestar.py diff --git a/README.md b/README.md index 4e0d2b5..e81b7f4 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ Yeah. Me too! That's why this tool exists. - Animerush - Animesimple - AnimeSuge - requires Node.js +- AnimeStar - Animevibe - AnimeTake - AniTube diff --git a/anime_downloader/sites/anime.py b/anime_downloader/sites/anime.py index faa604c..48578ca 100644 --- a/anime_downloader/sites/anime.py +++ b/anime_downloader/sites/anime.py @@ -143,7 +143,7 @@ class Anime: the necessary data about the anime and it's episodes. This function calls - :py:class:`~anime_downloader.sites.anime.BaseAnime._scarpe_episodes` + :py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_episodes` and :py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_metadata` diff --git a/anime_downloader/sites/animestar.py b/anime_downloader/sites/animestar.py new file mode 100644 index 0000000..7467b64 --- /dev/null +++ b/anime_downloader/sites/animestar.py @@ -0,0 +1,63 @@ +import re +from urllib.parse import urlparse +from datetime import datetime +from requests import Request + +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers +from anime_downloader.const import get_random_header + +_headers = get_random_header() | { 'X-Requested-By': 'animestar-web'} + + +class AnimeStar(Anime, sitename='animestar'): + sitename = 'animestar' + # Neither 720p nor 1080p are guaranteed, but they could happen + QUALITIES = ['360p', '480p', '540p', '720p', '1080p'] + _real_getter = 'https://api.animestar.app/api/drama?id=' + + @classmethod + def search(cls, query): + return [ + SearchResult( + title=i['name'], + url='https://animestar.app/show-details/deadbeef/'+i['_id'], + poster=i['image'], + meta={'genre': i['genre']}, + meta_info={ + 'title_cleaned': re.sub(r'\(.*?\)', '', i['name']).strip() + }) + for i in helpers.get('https://api.animestar.app/api/drama/search', + params={'q': query}, + headers=_headers).json() + ] + + + def _scrape_episodes(self): + return [ + Request('GET', 'https://api.animestar.app/api/utility/get-stream-links', + params={'url': i['videoUrl'], 'server': 1} + ).prepare().url + for i in sorted(helpers.get(self._real_getter+urlparse(self.url).path.split('/')[-1], + headers=_headers).json()['episodes'], + key=lambda i: i['number']) + ] + + def _scrape_metadata(self): + resp = helpers.get(self._real_getter+urlparse(self.url).path.split('/')[-1], + headers=_headers).json() + self.title = resp['name'] + self.subbed = resp['audioType'] == 'SUB' + self.meta['names_alt'] = resp['altNames'] + self.meta['year'] = resp['releaseYear'] + self.meta['status'] = resp['tvStatus'] + self.meta['genre'] = resp['genre'] + self.meta['type'] = resp['type'] + self.meta['story'] = resp['synopsis'] + self.meta['views'] = resp['views'] + self.meta['ctime'] = datetime.fromtimestamp(resp['createdAt']/1000).strftime('%Y-%m-%d %H:%M') + self.meta['mtime'] = datetime.fromtimestamp(resp['modifiedAt']/1000).strftime('%Y-%m-%d %H:%M') + +class AnimeStarEpisode(AnimeEpisode, sitename='animestar'): + def _get_sources(self): + return [('no_extractor', helpers.get(self.url, headers=_headers).json()['url'])] diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 0f22317..019ac4c 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -20,6 +20,7 @@ ALL_ANIME_SITES = [ ('animeout', 'animeout', 'AnimeOut'), ('animerush', 'animerush', 'AnimeRush'), ('animesimple', 'animesimple', 'AnimeSimple'), + ('animestar', 'animestar', 'AnimeStar'), ('animesuge', 'animesuge', 'AnimeSuge'), ('animevibe', 'animevibe', 'AnimeVibe'), ('animixplay', 'animixplay', 'AniMixPlay'), From 9c92ad425d09f573bdfbdd64d886c30c6d5fa631 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 24 Jun 2021 15:52:39 +0300 Subject: [PATCH 102/130] fixed tenshi.moe and added metadata --- anime_downloader/sites/tenshimoe.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/anime_downloader/sites/tenshimoe.py b/anime_downloader/sites/tenshimoe.py index 7644b5e..8e03f47 100644 --- a/anime_downloader/sites/tenshimoe.py +++ b/anime_downloader/sites/tenshimoe.py @@ -1,5 +1,6 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers +import re class TenshiMoe(Anime, sitename='tenshi.moe'): @@ -23,19 +24,36 @@ class TenshiMoe(Anime, sitename='tenshi.moe'): def _scrape_episodes(self): soup = helpers.soupify(helpers.get(self.url)) eps = soup.select( - 'li[class^=episode] > a' + 'li[class*="episode"] > a' ) eps = [x['href'] for x in eps] return eps def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url).text) - self.title = soup.title.text.split('—')[0].strip() + self.title = soup.select_one('span.value > span[title="English"]').parent.text.strip() + self.meta['year'] = int(re.findall(r"(\d{4})", soup.select_one('li.release-date .value').text)[0]) + self.meta['airing_status'] = soup.select_one('li.status > .value').text.strip() + self.meta['total_eps'] = int(soup.select_one('.entry-episodes > h2 > span').text.strip()) + self.meta['desc'] = soup.select_one('.entry-description > .card-body').text.strip() + self.meta['poster'] = soup.select_one('img.cover-image').get('src', '') + self.meta['cover'] = '' class TenshiMoeEpisode(AnimeEpisode, sitename='tenshi.moe'): + QUALITIES = ['360p', '480p', '720p', '1080p'] + def _get_sources(self): soup = helpers.soupify(helpers.get(self.url)) - # Might break with something other than mp4! - link = soup.find_all('source', type="video/mp4")[-1]['src'] - return [('no_extractor', link)] + soup = soup.select_one('.embed-responsive > iframe') + + mp4moe = helpers.soupify(helpers.get(soup.get('src'), referer=self.url)) + mp4moe = mp4moe.select_one('video#player') + qualities_ = [x.get("title") for x in mp4moe.select('source')] + sources = [ + ('no_extractor', x.get('src')) + for x in mp4moe.select('source') + ] + + if self.quality in qualities_: + return [sources[qualities_.index(self.quality)]] From dea2bd843b3c9ddb0e68609d6a903f69959b72f2 Mon Sep 17 00:00:00 2001 From: scft null Date: Thu, 24 Jun 2021 12:55:18 +0000 Subject: [PATCH 103/130] README.md order quickfix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e81b7f4..208300e 100644 --- a/README.md +++ b/README.md @@ -67,8 +67,8 @@ Yeah. Me too! That's why this tool exists. - animeout - Animerush - Animesimple -- AnimeSuge - requires Node.js - AnimeStar +- AnimeSuge - requires Node.js - Animevibe - AnimeTake - AniTube From e84cc01c44ab8f8fabae97cb9cb5fe6c571e5711 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 24 Jun 2021 17:29:13 +0300 Subject: [PATCH 104/130] Update tenshimoe.py --- anime_downloader/sites/tenshimoe.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/anime_downloader/sites/tenshimoe.py b/anime_downloader/sites/tenshimoe.py index 8e03f47..8c71b59 100644 --- a/anime_downloader/sites/tenshimoe.py +++ b/anime_downloader/sites/tenshimoe.py @@ -10,13 +10,21 @@ class TenshiMoe(Anime, sitename='tenshi.moe'): @classmethod def search(cls, query): soup = helpers.soupify( - helpers.get('https://tenshi.moe/anime', params={'q': query})) - results = soup.select('ul.loop.anime-loop.list > li > a') + helpers.get( + 'https://tenshi.moe/anime', + params={'q': query}, + cookies={'loop-view': 'thumb'}, + cache=False + ) + ) + + results = soup.select('ul.thumb > li > a') return [ SearchResult( title=x['title'], url=x['href'], + poster=x.find('img')['src'] ) for x in results ] From 9c9221ca866cc07279cf64b77fc3cafa81487bb3 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Wed, 30 Jun 2021 18:26:41 +0300 Subject: [PATCH 105/130] fix #697 Now it shouldn't include not existing episodes in the list. --- anime_downloader/sites/animerush.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/animerush.py b/anime_downloader/sites/animerush.py index eccad31..6b22300 100644 --- a/anime_downloader/sites/animerush.py +++ b/anime_downloader/sites/animerush.py @@ -25,7 +25,7 @@ class AnimeRush(Anime, sitename='animerush'): def _scrape_episodes(self): soup = helpers.soupify(helpers.get(self.url)).select('div.episode_list > a') - return ['https:' + i.get('href') for i in soup[::-1]] + return ['https:' + i.get('href') for i in soup[::-1] if "Coming soon" not in str(i)] def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) From 3b19807efaccd0a714a94b78e4c852d81cad9d01 Mon Sep 17 00:00:00 2001 From: PrismaticYT <45874270+RPMYT@users.noreply.github.com> Date: Sat, 10 Jul 2021 11:17:04 +1200 Subject: [PATCH 106/130] Fix 'CachedResponse has no len()' error --- anime_downloader/sites/helpers/request.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/anime_downloader/sites/helpers/request.py b/anime_downloader/sites/helpers/request.py index 924ea6b..e8fb8a2 100644 --- a/anime_downloader/sites/helpers/request.py +++ b/anime_downloader/sites/helpers/request.py @@ -146,9 +146,7 @@ def soupify(res): ------- BeautifulSoup.Soup """ - if isinstance(res, requests.Response): - res = res.text - soup = BeautifulSoup(res, 'html.parser') + soup = BeautifulSoup(res.text, 'html.parser') return soup From 3a124381d346d3b01d5931a8be1762f6cc2e4023 Mon Sep 17 00:00:00 2001 From: PrismaticYT <45874270+RPMYT@users.noreply.github.com> Date: Sat, 10 Jul 2021 11:57:36 +1200 Subject: [PATCH 107/130] Fix resulting TypeErrors. --- anime_downloader/sites/helpers/request.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/anime_downloader/sites/helpers/request.py b/anime_downloader/sites/helpers/request.py index e8fb8a2..6bec3f2 100644 --- a/anime_downloader/sites/helpers/request.py +++ b/anime_downloader/sites/helpers/request.py @@ -146,7 +146,10 @@ def soupify(res): ------- BeautifulSoup.Soup """ - soup = BeautifulSoup(res.text, 'html.parser') + if isinstance(res, str): + soup = BeautifulSoup(res, 'html.parser') + else: + soup = BeautifulSoup(res.text, 'html.parser') return soup From a99a73bedf052554215541b1485406f49905b8b2 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Wed, 21 Jul 2021 13:38:57 +0300 Subject: [PATCH 108/130] upped the patch number --- anime_downloader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 5f25180..5a5c818 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.9' +__version__ = '5.0.10' From ebfad498db78bd1f346cded6e99c4032132a2df5 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 22 Jul 2021 01:06:05 +0300 Subject: [PATCH 109/130] Update tenshimoe.py --- anime_downloader/sites/tenshimoe.py | 63 +++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/anime_downloader/sites/tenshimoe.py b/anime_downloader/sites/tenshimoe.py index 7644b5e..c7f006d 100644 --- a/anime_downloader/sites/tenshimoe.py +++ b/anime_downloader/sites/tenshimoe.py @@ -2,6 +2,18 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers +def parse_search_page(soup): + results = soup.select('ul.thumb > li > a') + return [ + SearchResult( + title=x['title'], + url=x['href'], + poster=x.find('img')['src'] + ) + for x in results + ] + + class TenshiMoe(Anime, sitename='tenshi.moe'): sitename = 'tenshi.moe' @@ -9,21 +21,34 @@ class TenshiMoe(Anime, sitename='tenshi.moe'): @classmethod def search(cls, query): soup = helpers.soupify( - helpers.get('https://tenshi.moe/anime', params={'q': query})) - results = soup.select('ul.loop.anime-loop.list > li > a') - - return [ - SearchResult( - title=x['title'], - url=x['href'], + helpers.get( + 'https://tenshi.moe/anime', + params={'q': query}, + cookies={'loop-view': 'thumb'} ) - for x in results - ] + ) + + results = parse_search_page(soup) + + while soup.select_one(".pagination"): + link = soup.select_one('a.page-link[rel="next"]') + if link: + soup = helpers.soupify( + helpers.get( + link['href'], + cookies={'loop-view': 'thumb'} + ) + ) + results.extend(parse_search_page(soup)) + else: + break + + return results def _scrape_episodes(self): soup = helpers.soupify(helpers.get(self.url)) eps = soup.select( - 'li[class^=episode] > a' + 'li[class*="episode"] > a' ) eps = [x['href'] for x in eps] return eps @@ -34,8 +59,20 @@ class TenshiMoe(Anime, sitename='tenshi.moe'): class TenshiMoeEpisode(AnimeEpisode, sitename='tenshi.moe'): + QUALITIES = ['360p', '480p', '720p', '1080p'] + def _get_sources(self): soup = helpers.soupify(helpers.get(self.url)) - # Might break with something other than mp4! - link = soup.find_all('source', type="video/mp4")[-1]['src'] - return [('no_extractor', link)] + soup = soup.select_one('.embed-responsive > iframe') + + mp4moe = helpers.soupify(helpers.get(soup.get('src'), referer=self.url)) + mp4moe = mp4moe.select_one('video#player') + qualities_ = [x.get("title") for x in mp4moe.select('source')] + sources = [ + ('no_extractor', x.get('src')) + for x in mp4moe.select('source') + ] + + if self.quality in qualities_: + return [sources[qualities_.index(self.quality)]] + From 54e8d72f5c397dbded4e82d7bf6b2a4be13d9afc Mon Sep 17 00:00:00 2001 From: Gourob Dev <64770452+hoshiya4522@users.noreply.github.com> Date: Thu, 22 Jul 2021 04:18:38 +0000 Subject: [PATCH 110/130] Fix typo The code says that the provider was animepahe --- docs/usage/dl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage/dl.rst b/docs/usage/dl.rst index 7fb84e8..ef052bf 100644 --- a/docs/usage/dl.rst +++ b/docs/usage/dl.rst @@ -16,7 +16,7 @@ Search and download anime dl 'code geass' -To search on kissanime, +To search on animepahe, .. code:: bash From a77520aba2f3770d2190094583d7b05793d872d9 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 22 Jul 2021 21:12:43 +0300 Subject: [PATCH 111/130] Update __version__.py --- anime_downloader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 5a5c818..093a174 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.10' +__version__ = '5.0.11' From fc8bb2c10584a90173f6789cd9c514d8b21d958b Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Tue, 3 Aug 2021 23:27:09 +0300 Subject: [PATCH 112/130] Update shiro.py --- anime_downloader/sites/shiro.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/anime_downloader/sites/shiro.py b/anime_downloader/sites/shiro.py index c9bce16..022f348 100644 --- a/anime_downloader/sites/shiro.py +++ b/anime_downloader/sites/shiro.py @@ -13,6 +13,8 @@ def get_token(): token = re.search(r'token\:\"(.*?)\"', script)[1] return token +def get_api_url(): + return "https://tapi.shiro.is" class Shiro(Anime, sitename='shiro'): sitename = 'shiro' @@ -20,18 +22,20 @@ class Shiro(Anime, sitename='shiro'): @classmethod def search(cls, query): cls.token = get_token() + cls.api_url = get_api_url() + params = { 'search': query, 'token': cls.token } - results = helpers.get('https://ani.api-web.site/advanced', params=params).json()['data'] # noqa + results = helpers.get(f'{cls.api_url}/advanced', params=params).json()['data'] # noqa if 'nav' in results: results = results['nav']['currentPage']['items'] search_results = [ SearchResult( title=i['name'], url='https://shiro.is/anime/' + i['slug'], - poster='https://ani-cdn.api-web.site/' + i['image'], + poster=f'{cls.api_url}/' + i['image'], meta={'year': i['year']}, meta_info={ 'version_key_dubbed': '(Sub)' if i['language'] == 'subbed' else '(Dub)' # noqa @@ -46,17 +50,19 @@ class Shiro(Anime, sitename='shiro'): def _scrape_episodes(self): self.token = get_token() + self.api_url = get_api_url() + slug = self.url.split('/')[-1] if 'episode' in slug: - api_link = 'https://ani.api-web.site/anime-episode/slug/' + slug + api_link = f'{self.api_url}/anime-episode/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() slug = r['data']['anime_slug'] - api_link = 'https://ani.api-web.site/anime/slug/' + slug + api_link = f'{self.api_url}/anime/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() if r['status'] == 'Found': episodes = r['data']['episodes'] episodes = [ - 'https://ani.googledrive.stream/vidstreaming/vid-ad/' + x['videos'][0]['video_id'] # noqa + "https://cherry.subsplea.se/" + x['videos'][0]['video_id'] # noqa for x in episodes ] return episodes @@ -65,18 +71,21 @@ class Shiro(Anime, sitename='shiro'): def _scrape_metadata(self): self.token = get_token() + self.api_url = get_api_url() + + slug = self.url.split('/')[-1] if 'episode' in slug: - api_link = 'https://ani.api-web.site/anime-episode/slug/' + slug + api_link = f'{self.api_url}/anime-episode/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() slug = r['data']['anime_slug'] - api_link = 'https://ani.api-web.site/anime/slug/' + slug + api_link = f'{self.api_url}/anime/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() self.title = r['data']['name'] class ShiroEpisode(AnimeEpisode, sitename='shiro'): def _get_sources(self): - r = helpers.get(self.url).text + r = helpers.get(self.url, referer="https://shiro.is/").text link = re.search(r'\"file\"\:\"(.*?)\"', r)[1] return [('no_extractor', link)] From b2538ef2c79e751079ce13d51380a8d93d2fb5cf Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Wed, 4 Aug 2021 00:00:51 +0300 Subject: [PATCH 113/130] Update shiro.py --- anime_downloader/sites/shiro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/shiro.py b/anime_downloader/sites/shiro.py index 022f348..8e0fb38 100644 --- a/anime_downloader/sites/shiro.py +++ b/anime_downloader/sites/shiro.py @@ -87,5 +87,5 @@ class Shiro(Anime, sitename='shiro'): class ShiroEpisode(AnimeEpisode, sitename='shiro'): def _get_sources(self): r = helpers.get(self.url, referer="https://shiro.is/").text - link = re.search(r'\"file\"\:\"(.*?)\"', r)[1] + link = re.search(r'source\s+src=\"(.*?)\"', r)[1] return [('no_extractor', link)] From 2f2eac108b90efda64a27e95bfabb643bf5ef0d3 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 6 Aug 2021 03:04:11 +0300 Subject: [PATCH 114/130] feat: remove 4anime --- anime_downloader/sites/_4anime.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index adae4b5..68fee49 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -3,10 +3,12 @@ import re from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers from anime_downloader.const import HEADERS +from anime_downloader.sites.helpers.util import not_working logger = logging.getLogger(__name__) +@not_working("4anime has been shut down") class Anime4(Anime, sitename='4anime'): sitename = '4anime' From 27e21aaa30832c690568015ee9cff73a0a2777bc Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 6 Aug 2021 03:05:08 +0300 Subject: [PATCH 115/130] Update init.py --- anime_downloader/sites/init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 019ac4c..e8966a3 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -2,7 +2,7 @@ from importlib import import_module ALL_ANIME_SITES = [ # ('filename', 'sitename', 'classname') - ('_4anime', '4anime', 'Anime4'), + # ('_4anime', '4anime', 'Anime4'), ('anitube', 'anitube', 'AniTube'), ('animtime', 'animtime', 'AnimTime'), ('anime8', 'anime8', 'Anime8'), From be769cf7837e829ecfdc0597162fcd98e7781f78 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 6 Aug 2021 23:44:54 +0300 Subject: [PATCH 116/130] upped the version number --- anime_downloader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 093a174..20c4d2e 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.11' +__version__ = '5.0.12' From b54b8417f14e3cd7f19f39df41015ea7249d02f4 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 6 Aug 2021 23:55:12 +0300 Subject: [PATCH 117/130] Update cli.py --- anime_downloader/cli.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/anime_downloader/cli.py b/anime_downloader/cli.py index 3aeefe2..e11ff6b 100644 --- a/anime_downloader/cli.py +++ b/anime_downloader/cli.py @@ -11,6 +11,29 @@ from anime_downloader import util echo = click.echo +def check_for_update(): + from pkg_resources import parse_version + import requests + import re + + version_file = "https://raw.githubusercontent.com/anime-dl/anime-downloader/master/anime_downloader/__version__.py" + regex = r"__version__\s+=\s+[\"'](\d+\.\d+\.\d+)[\"']" + r = requests.get(version_file) + + if not r.ok: + return + + current_ver = parse_version(__version__) + remote_ver = parse_version(re.match(regex, r.text).group(1)) + + if remote_ver > current_ver: + print( + "New version (on GitHub) is available: {} -> {}\n".format( + current_ver, remote_ver + ) + ) + + class CLIClass(click.MultiCommand): def list_commands(self, ctx): @@ -49,6 +72,11 @@ def cli(log_level): def main(): + try: + check_for_update() + except Exception: + pass + try: cli() except Exception as e: From c591a47b4093576e802551f559851640cd0c05fb Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 6 Aug 2021 23:57:17 +0300 Subject: [PATCH 118/130] Update __version__.py --- anime_downloader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 20c4d2e..219f78b 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.12' +__version__ = '5.0.13' From fd683a4a2fab6803ef50b239b10fa5defd9db7fb Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sat, 7 Aug 2021 00:00:44 +0300 Subject: [PATCH 119/130] Update cli.py --- anime_downloader/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/cli.py b/anime_downloader/cli.py index e11ff6b..c89de27 100644 --- a/anime_downloader/cli.py +++ b/anime_downloader/cli.py @@ -17,7 +17,7 @@ def check_for_update(): import re version_file = "https://raw.githubusercontent.com/anime-dl/anime-downloader/master/anime_downloader/__version__.py" - regex = r"__version__\s+=\s+[\"'](\d+\.\d+\.\d+)[\"']" + regex = r"__version__\s*=\s*[\"'](\d+\.\d+\.\d+)[\"']" r = requests.get(version_file) if not r.ok: From 7b6d9e71ab72686a6f86073f299d3b2d688ff505 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Thu, 19 Aug 2021 01:07:48 +0300 Subject: [PATCH 120/130] improved the regex in animtime --- anime_downloader/sites/animtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index cdaa7b7..c2df997 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -38,7 +38,7 @@ def get_title_dict(script): title_function = re.search("tm=.*?}", script_text).group() titles_dict = { x[0]: format_title_case(x[1].replace('-', ' ')) - for x in re.findall(r"qd\[tm\.(.*?)\]=.*?\".*?/animtime/(.*?)/", script_text) + for x in re.findall(r"\[tm\.([a-zA-Z0-9]+?)\]=function\(\w\)\{return\"[a-zA-Z0-9\.\:/-]+?\/animtime\/([a-zA-Z-]+?)\/", script_text) } id_dict = { x[0]: x[1] From d7326e12bf3447a30e3e86c7afe75bd29b8c50de Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:57:40 +0300 Subject: [PATCH 121/130] Update config.py --- anime_downloader/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/anime_downloader/config.py b/anime_downloader/config.py index 71a6f73..6e5ee71 100644 --- a/anime_downloader/config.py +++ b/anime_downloader/config.py @@ -73,6 +73,9 @@ DEFAULT_CONFIG = { 'anistream.xyz': { 'version': 'subbed', }, + 'animepahe': { + 'version': 'subbed', + }, 'animeflv': { 'version': 'subbed', 'servers': [ From 69de7f3e1bb1d8a0d8aef08b279f829b5168190d Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:58:34 +0300 Subject: [PATCH 122/130] Update animepahe.py --- anime_downloader/sites/animepahe.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index fce5721..0ef1476 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -91,18 +91,17 @@ class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): else: raise NotFoundError - episode_data = helpers.get(self.url, cf=True).json() + episode_data = helpers.get(self.url).json() - episode_data = episode_data['data'] - sources_list = [] + data = episode_data['data'] + qualities = [x + 'p' for f in data for x in f] - for info in range(len(episode_data)): - quality = list(episode_data[info].keys())[0] - sources_list.append({ - 'extractor': 'kwik', - 'url': episode_data[info][quality]['kwik'], - 'server': 'kwik', - 'version': 'subbed' - }) + sources_list = [ + f[x]['kwik_adfly'] for f in data for x in f + ] - return self.sort_sources(sources_list) + for i, quality in enumerate(qualities): + if self.quality == quality: + return [("kwik", sources_list[i])] + + return [("kwik", x) for x in sources_list] From ff38e125ca3731b9619da5b1471b0019bed585fe Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:58:51 +0300 Subject: [PATCH 123/130] Update kwik.py --- anime_downloader/extractors/kwik.py | 159 +++++++++++++++++++--------- 1 file changed, 108 insertions(+), 51 deletions(-) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 3cb93f9..dab6ca4 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -1,65 +1,122 @@ -import logging -from platform import node -import re -import subprocess +from base64 import b64decode import requests -import tempfile +import logging +import re from anime_downloader.extractors.base_extractor import BaseExtractor -from anime_downloader.sites.helpers.request import temp_dir from anime_downloader.sites import helpers -from anime_downloader import util -from anime_downloader.util import eval_in_node from subprocess import CalledProcessError +from anime_downloader import util logger = logging.getLogger(__name__) class Kwik(BaseExtractor): - '''Extracts video url from kwik pages, Kwik has some `security` - which allows to access kwik pages when only referred by something - and the kwik video stream when referred through the corresponding - kwik video page. - ''' + YTSM = re.compile(r"ysmm = '([^']+)") + + KWIK_PARAMS_RE = re.compile(r'\("(\w+)",\d+,"(\w+)",(\d+),(\d+),\d+\)') + KWIK_D_URL = re.compile(r'action="([^"]+)"') + KWIK_D_TOKEN = re.compile(r'value="([^"]+)"') + + CHARACTER_MAP = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/" + + def get_string(self, content: str, s1: int, s2: int) -> str: + slice_2 = self.CHARACTER_MAP[0:s2] + + acc = 0 + for n, i in enumerate(content[::-1]): + acc += int(i if i.isdigit() else 0) * s1**n + + k = '' + while acc > 0: + k = slice_2[int(acc % s2)] + k + acc = (acc - (acc % s2)) / s2 + + return k or '0' + + def decrypt(self, full_string: str, key: str, v1: int, v2: int) -> str: + v1, v2 = int(v1), int(v2) + r, i = "", 0 + + while i < len(full_string): + s = "" + while (full_string[i] != key[v2]): + s += full_string[i] + i += 1 + j = 0 + while j < len(key): + s = s.replace(key[j], str(j)) + j += 1 + r += chr(int(self.get_string(s, v2, 10)) - v1) + i += 1 + return r + + def decode_adfly(self, coded_key: str) -> str: + r, j = '', '' + for n, l in enumerate(coded_key): + if not n % 2: + r += l + else: + j = l + j + + encoded_uri = list(r + j) + numbers = ((i, n) for i, n in enumerate(encoded_uri) if str.isdigit(n)) + for first, second in zip(numbers, numbers): + xor = int(first[1]) ^ int(second[1]) + if xor < 10: + encoded_uri[first[0]] = str(xor) + + return b64decode(("".join(encoded_uri)).encode("utf-8") + )[16:-16].decode('utf-8', errors='ignore') + + def bypass_adfly(self, adfly_url): + session = requests.session() + + response_code = 302 + while response_code != 200: + adfly_content = session.get( + session.get( + adfly_url, + allow_redirects=False).headers.get('location'), + allow_redirects=False) + response_code = adfly_content.status_code + return self.decode_adfly(self.YTSM.search(adfly_content.text).group(1)) + + def get_stream_url_from_kwik(self, adfly_url): + session = requests.session() + + f_content = requests.get( + self.bypass_adfly(adfly_url), + headers={ + 'referer': 'https://kwik.cx/' + } + ) + decrypted = self.decrypt( + * + self.KWIK_PARAMS_RE.search( + f_content.text + ).group( + 1, 2, + 3, 4 + ) + ) + + code = 419 + while code != 302: + content = session.post( + self.KWIK_D_URL.search(decrypted).group(1), + allow_redirects=False, + data={ + '_token': self.KWIK_D_TOKEN.search(decrypted).group(1)}, + headers={ + 'referer': str(f_content.url), + 'cookie': f_content.headers.get('set-cookie')}) + code = content.status_code + + return content.headers.get('location') def _get_data(self): - ld = logger.debug - # Kwik servers don't have direct link access you need to be referred - # from somewhere, I will just use the url itself. We then - # have to rebuild the url. Hopefully kwik doesn't block this too - - # Necessary - - headers = {"Referer": "https://kwik.cx/"} - - res = requests.get(self.url, headers=headers) - - evalText = helpers.soupify(res.text) - - scripts = evalText.select("script") - - for i in scripts: - rexd = re.compile("", "") - break - - tf = tempfile.mktemp(dir=temp_dir) - - with open(tf, 'w', encoding="utf-8") as f: - f.write(rexd) - nodeRes = str(subprocess.getoutput(f"node {tf}")) - - ld(nodeRes) - - stream_url = re.search( - r"source='([^;]*)';", nodeRes).group().replace("source='", "").replace("';", "") - - ld(stream_url) - return { - 'stream_url': stream_url, - 'referer': "https://kwik.cx/" + 'stream_url': self.get_stream_url_from_kwik(self.url), + 'referer': None } From a509e4a8054665180bb2f172b3c0883a5a7da0ce Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 23:29:34 +0300 Subject: [PATCH 124/130] bumped up the minor version number --- anime_downloader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 219f78b..ebd8476 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.13' +__version__ = '5.0.14' From b28dffe87fad061f4d4ad0f16592c53eded2f3f9 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Fri, 20 Aug 2021 23:36:15 +0300 Subject: [PATCH 125/130] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 208300e..7ced95e 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Yeah. Me too! That's why this tool exists. **Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** -- 4Anime +- AnimePahe - AnimTime - AnimeBinge - Animedaisuki From 1470574ff32765e78e3143f5810975d579e37f0f Mon Sep 17 00:00:00 2001 From: Sreekaran Date: Sun, 3 Oct 2021 00:26:02 +0530 Subject: [PATCH 126/130] chore: update docs: link people to anime-dl/anime-downloader (#718) --- README.md | 10 +++++----- anime_downloader/cli.py | 2 +- docs/index.rst | 2 +- docs/usage/installation.rst | 12 ++++++------ setup.py | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 7ced95e..1831b6b 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,11 @@ A simple yet powerful tool for downloading anime.

    - - + + - - + + @@ -52,7 +52,7 @@ Yeah. Me too! That's why this tool exists. * Instructions for Mobile Operating Systems can be found in the [Installation Documentation Page](https://anime-downlader.readthedocs.io/en/latest/usage/installation.html) ## Supported Sites -**Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** +**Details about the sites can be found in [FAQ](https://github.com/anime-dl/anime-downloader/wiki/FAQ)** - AnimePahe diff --git a/anime_downloader/cli.py b/anime_downloader/cli.py index c89de27..007b467 100644 --- a/anime_downloader/cli.py +++ b/anime_downloader/cli.py @@ -68,7 +68,7 @@ def cli(log_level): """ util.setup_logger(log_level) # if not util.check_in_path('aria2c'): - # raise logger.ERROR("Aria2 is not in path. Please follow installation instructions: https://github.com/vn-ki/anime-downloader/wiki/Installation") + # raise logger.ERROR("Aria2 is not in path. Please follow installation instructions: https://github.com/anime-dl/anime-downloader/wiki/Installation") def main(): diff --git a/docs/index.rst b/docs/index.rst index 066167c..0e08a5d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,7 +16,7 @@ Features - Search and download. - Save yourselves from those malicious ads. - Download using external downloader ([aria2](https://aria2.github.io/) recommended). -- Configurable using `config.json`. See [doc](https://github.com/vn-ki/anime-downloader/wiki/Config). +- Configurable using `config.json`. See [doc](https://github.com/anime-dl/anime-downloader/wiki/Config). Supported Sites --------------- diff --git a/docs/usage/installation.rst b/docs/usage/installation.rst index 0f7df86..7c0b37e 100644 --- a/docs/usage/installation.rst +++ b/docs/usage/installation.rst @@ -19,14 +19,14 @@ Add the following to a file named install.bat and then run it as Administrator; @"%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell.exe" -NoProfile -InputFormat None -ExecutionPolicy Bypass -Command " [System.Net.ServicePointManager]::SecurityProtocol = 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" && SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" choco install -y git mpv python3 aria2 nodejs - refreshenv && pip3 install -U git+https://github.com/vn-ki/anime-downloader.git && echo Testing providers, the install is done && anime test + refreshenv && pip3 install -U git+https://github.com/anime-dl/anime-downloader.git && echo Testing providers, the install is done && anime test Windows via ``choco`` ~~~~~~~~~~~~~~~~~~~~~ Contributed by @CodaTheOtaku -**NOTE** Ensure the Command Prompt (cmd) is being ran as Administrator. +**NOTE:** Ensure the Command Prompt (cmd) is being ran as Administrator. - Install `Chocolatey`_ Package manager. @@ -35,7 +35,7 @@ Windows via ``choco`` choco install -y git mpv python3 aria2 nodejs - Once these are installed; :: - pip3 install -U git+https://github.com/vn-ki/anime-downloader.git + pip3 install -U git+https://github.com/anime-dl/anime-downloader.git - Then, the commands to view a show would be; :: @@ -65,7 +65,7 @@ all the following ``pip`` with ``pip3``. - To install the bleeding-edge version of Anime-Downloader use this alternative command;: - pip3 install -U git+https://github.com/vn-ki/anime-downloader.git + pip3 install -U git+https://github.com/anime-dl/anime-downloader.git - Enjoy. @@ -98,7 +98,7 @@ This does not require a rooted device to work. - Install Anime-Downloader via the following command after python and git are installed; :: - pip3 install -U git+https://github.com/vn-ki/anime-downloader.git + pip3 install -U git+https://github.com/anime-dl/anime-downloader.git - The usage commands should now match the commands used on PC. @@ -123,7 +123,7 @@ The following steps install Anime-Downloader; - Firstly, clone the repository via this command; :: - git clone https://github.com/vn-ki/anime-downloader.git + git clone https://github.com/anime-dl/anime-downloader.git - Next, change your directory into the cloned repo. To do so, use the following case-sensitive command; :: diff --git a/setup.py b/setup.py index 0aef6c6..24782f6 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( author_email='vishnunarayan6105@gmail.com', description='Download your favourite anime', packages=find_packages(), - url='https://github.com/vn-ki/anime-downloader', + url='https://github.com/anime-dl/anime-downloader', keywords=['anime', 'downloader', '9anime', 'download', 'kissanime'], install_requires=[ 'pySmartDL>=1.3.4', From 0ad98589efc63e9da3055f7066aa70273358891c Mon Sep 17 00:00:00 2001 From: Sreekaran Date: Sun, 3 Oct 2021 00:33:54 +0530 Subject: [PATCH 127/130] docs: add CONTRIBUTING.md (#719) --- CONTRIBUTING.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..14612f1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,37 @@ +# Contributing to Support + +Thank you for taking the time to contribute. Please read the [CODE of CONDUCT](CODE_OF_CONDUCT.md). +As a contributor, here are the guidelines we would like you to follow: + +- [Commit Message Guidelines](#commit) + +--- + +## Commit Message Guidelines 😎 + +In order to make git commit messages **easier to read** and faster to reason about, we follow some guidelines on most commits to keep the **format predictable**. Check [Conventional Commits specification](https://conventionalcommits.org) for more information about our guidelines. + +**Examples**: + +``` +docs(changelog): update changelog to beta.5 +docs: add API documentation to the bot +test(server): add cache tests to the movie resource +fix(web): add validation to phone input field +fix(web): remove avatar image from being required in form +fix(release): need to depend on latest rxjs and zone.js +``` + +### Type + +Must be one of the following: + +- **build**: Changes that affect the build system or external dependencies (example scopes: gulp, broccoli, npm) +- **ci**: Changes to our CI configuration files and scripts (example scopes: Circle, BrowserStack, SauceLabs) +- **docs**: Documentation only changes +- **feat**: A new feature +- **fix**: A bug fix +- **perf**: A code change that improves performance +- **refactor**: A code change that neither fixes a bug nor adds a feature +- **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc) +- **test**: Adding missing tests or correcting existing tests \ No newline at end of file From 4d5457c73f6dba0bceeb3631f4d6f2815bc303cc Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sat, 2 Oct 2021 22:58:35 +0300 Subject: [PATCH 128/130] changed the hash link for "Commit Message Guidelines" --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 14612f1..9f78633 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,11 +3,11 @@ Thank you for taking the time to contribute. Please read the [CODE of CONDUCT](CODE_OF_CONDUCT.md). As a contributor, here are the guidelines we would like you to follow: -- [Commit Message Guidelines](#commit) +- [Commit Message Guidelines](#commit-message-guidelines-) --- -## Commit Message Guidelines 😎 +## Commit Message Guidelines 😎 In order to make git commit messages **easier to read** and faster to reason about, we follow some guidelines on most commits to keep the **format predictable**. Check [Conventional Commits specification](https://conventionalcommits.org) for more information about our guidelines. @@ -34,4 +34,4 @@ Must be one of the following: - **perf**: A code change that improves performance - **refactor**: A code change that neither fixes a bug nor adds a feature - **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc) -- **test**: Adding missing tests or correcting existing tests \ No newline at end of file +- **test**: Adding missing tests or correcting existing tests From f76b16135baebdbd8e013e8669a1fbddb570cbcd Mon Sep 17 00:00:00 2001 From: Sreekaran Date: Sun, 3 Oct 2021 12:08:53 +0530 Subject: [PATCH 129/130] docs: edit CONTRIBUTING -- remove link to CoC and introduce a more vague commit message guideline --- CONTRIBUTING.md | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 14612f1..6ddf7ba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing to Support -Thank you for taking the time to contribute. Please read the [CODE of CONDUCT](CODE_OF_CONDUCT.md). +Thank you for taking the time to contribute. As a contributor, here are the guidelines we would like you to follow: - [Commit Message Guidelines](#commit) @@ -9,29 +9,4 @@ As a contributor, here are the guidelines we would like you to follow: ## Commit Message Guidelines 😎 -In order to make git commit messages **easier to read** and faster to reason about, we follow some guidelines on most commits to keep the **format predictable**. Check [Conventional Commits specification](https://conventionalcommits.org) for more information about our guidelines. - -**Examples**: - -``` -docs(changelog): update changelog to beta.5 -docs: add API documentation to the bot -test(server): add cache tests to the movie resource -fix(web): add validation to phone input field -fix(web): remove avatar image from being required in form -fix(release): need to depend on latest rxjs and zone.js -``` - -### Type - -Must be one of the following: - -- **build**: Changes that affect the build system or external dependencies (example scopes: gulp, broccoli, npm) -- **ci**: Changes to our CI configuration files and scripts (example scopes: Circle, BrowserStack, SauceLabs) -- **docs**: Documentation only changes -- **feat**: A new feature -- **fix**: A bug fix -- **perf**: A code change that improves performance -- **refactor**: A code change that neither fixes a bug nor adds a feature -- **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc) -- **test**: Adding missing tests or correcting existing tests \ No newline at end of file +Nothing much honestly, just briefly describe the changes you made and you're good to go. \ No newline at end of file From e397f00934afb5e476dc1361517234bdc8ea6695 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixWasTaken@users.noreply.github.com> Date: Sat, 16 Oct 2021 13:39:55 +0300 Subject: [PATCH 130/130] disabled animepahe --- anime_downloader/sites/init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index a0d0d3b..27a4ae9 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -18,7 +18,7 @@ ALL_ANIME_SITES = [ ('animetake','animetake','AnimeTake'), ('animeonline','animeonline360','AnimeOnline'), ('animeout', 'animeout', 'AnimeOut'), - ('animepahe', 'animepahe', 'AnimePahe'), + # ('animepahe', 'animepahe', 'AnimePahe'), ('animerush', 'animerush', 'AnimeRush'), ('animesimple', 'animesimple', 'AnimeSimple'), ('animestar', 'animestar', 'AnimeStar'),