diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..f737479 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,10 @@ +# Contributing to Support + +Thank you for taking the time to contribute. +As a contributor, here are the guidelines we would like you to follow: + +--- + +## Commit Message Guidelines 😎 + +Nothing much honestly, just briefly describe the changes you made and you're good to go. diff --git a/README.md b/README.md index 8227d14..1831b6b 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,11 @@ A simple yet powerful tool for downloading anime.

- - + + - - + + @@ -52,9 +52,11 @@ Yeah. Me too! That's why this tool exists. * Instructions for Mobile Operating Systems can be found in the [Installation Documentation Page](https://anime-downlader.readthedocs.io/en/latest/usage/installation.html) ## Supported Sites -**Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** +**Details about the sites can be found in [FAQ](https://github.com/anime-dl/anime-downloader/wiki/FAQ)** -- 4Anime + +- AnimePahe +- AnimTime - AnimeBinge - Animedaisuki - Animeflix @@ -65,6 +67,8 @@ Yeah. Me too! That's why this tool exists. - animeout - Animerush - Animesimple +- AnimeStar +- AnimeSuge - requires Node.js - Animevibe - AnimeTake - AniTube @@ -74,7 +78,6 @@ Yeah. Me too! That's why this tool exists. - Dbanimes - EraiRaws - EgyAnime - usually m3u8 (good for streaming, not so much for downloading) -- FastAni - GenoAnime - GurminderBoparai (AnimeChameleon) - itsaturday @@ -91,6 +94,7 @@ Yeah. Me too! That's why this tool exists. - Vidstream - Voiranime - Vostfree +- Wcostream Sites that require Selenium **DO NOT** and **WILL NOT** work on mobile operating systems @@ -105,8 +109,9 @@ If you have trouble installing, see extended installation instructions [here](ht **Note**: - For Cloudflare scraping either [cfscrape](https://github.com/Anorov/cloudflare-scrape) or [selenium](https://www.selenium.dev/) is used. [Cfscrape](https://github.com/Anorov/cloudflare-scrape) depends on [`node-js`](https://nodejs.org/en/) and [selenium](https://www.selenium.dev/) utilizes an automated invisible instance of a browser (chrome/firefox). So, if you want to use Cloudflare enabled sites, make sure you have [node-js](https://nodejs.org/en/) and a [webdriver](https://www.selenium.dev/selenium/docs/api/py/index.html#drivers) installed. - You might have to use pip3 depending on your system -- To install this project with gui and all its dependencies, add `#egg=anime-downloader[gui]` to the pip command you are using to install it. Example: `pip install -U git+https://github.com/anime-dl/anime-downloader#egg=anime_downloader[gui]` +- To install this project with gui and all its dependencies, add `#egg=anime-downloader[gui]` to the pip command you are using to install it. Example: `pip install --force-reinstall -U git+https://github.com/anime-dl/anime-downloader#egg=anime_downloader[gui]` - To install this project with selescrape (if you are using GUI, ignore this line), do the same as above - but with `#egg=anime-downloader[selescrape]` +- To install this project with jsbeautifier run `pip install --force-reinstall -U git+https://github.com/anime-dl/anime-downloader#egg=anime-downloader[unpacker]` ## Usage diff --git a/anime_downloader/__version__.py b/anime_downloader/__version__.py index 79b3919..ebd8476 100644 --- a/anime_downloader/__version__.py +++ b/anime_downloader/__version__.py @@ -1 +1 @@ -__version__ = '5.0.7' +__version__ = '5.0.14' diff --git a/anime_downloader/cli.py b/anime_downloader/cli.py index 3aeefe2..007b467 100644 --- a/anime_downloader/cli.py +++ b/anime_downloader/cli.py @@ -11,6 +11,29 @@ from anime_downloader import util echo = click.echo +def check_for_update(): + from pkg_resources import parse_version + import requests + import re + + version_file = "https://raw.githubusercontent.com/anime-dl/anime-downloader/master/anime_downloader/__version__.py" + regex = r"__version__\s*=\s*[\"'](\d+\.\d+\.\d+)[\"']" + r = requests.get(version_file) + + if not r.ok: + return + + current_ver = parse_version(__version__) + remote_ver = parse_version(re.match(regex, r.text).group(1)) + + if remote_ver > current_ver: + print( + "New version (on GitHub) is available: {} -> {}\n".format( + current_ver, remote_ver + ) + ) + + class CLIClass(click.MultiCommand): def list_commands(self, ctx): @@ -45,10 +68,15 @@ def cli(log_level): """ util.setup_logger(log_level) # if not util.check_in_path('aria2c'): - # raise logger.ERROR("Aria2 is not in path. Please follow installation instructions: https://github.com/vn-ki/anime-downloader/wiki/Installation") + # raise logger.ERROR("Aria2 is not in path. Please follow installation instructions: https://github.com/anime-dl/anime-downloader/wiki/Installation") def main(): + try: + check_for_update() + except Exception: + pass + try: cli() except Exception as e: diff --git a/anime_downloader/commands/dl.py b/anime_downloader/commands/dl.py index 529dc4f..4717ab7 100644 --- a/anime_downloader/commands/dl.py +++ b/anime_downloader/commands/dl.py @@ -35,7 +35,7 @@ sitenames = [v[1] for v in ALL_ANIME_SITES] '--download-dir', metavar='PATH', help="Specify the directory to download to") @click.option( - '--quality', '-q', type=click.Choice(['360p', '480p', '720p', '1080p']), + '--quality', '-q', type=click.Choice(['360p', '480p', '540p', '720p', '1080p']), help='Specify the quality of episode. Default-720p') @click.option( '--fallback-qualities', '-fq', cls=util.ClickListOption, @@ -81,10 +81,16 @@ sitenames = [v[1] for v in ALL_ANIME_SITES] help="Set the speed limit (in KB/s or MB/s) for downloading when using aria2c", metavar='K/M' ) +@click.option( + "--sub", "-s", type=bool, is_flag=True, + help="If flag is set, it downloads the subbed version of an anime if the provider supports it. Must not be used with the --dub/-d flag") +@click.option( + "--dub", "-d", type=bool, is_flag=True, + help="If flag is set, it downloads the dubbed version of anime if the provider supports it. Must not be used with the --sub/-s flag") @click.pass_context def command(ctx, anime_url, episode_range, url, player, skip_download, quality, force_download, download_dir, file_format, provider, - external_downloader, chunk_size, disable_ssl, fallback_qualities, choice, skip_fillers, speed_limit): + external_downloader, chunk_size, disable_ssl, fallback_qualities, choice, skip_fillers, speed_limit, sub, dub): """ Download the anime using the url or search for it. """ @@ -95,6 +101,10 @@ def command(ctx, anime_url, episode_range, url, player, skip_download, quality, raise UsageError( "Invalid value for '--episode' / '-e': {} is not a valid range".format(episode_range)) + if sub and dub: + raise click.UsageError( + "--dub/-d and --sub/-s flags cannot be used together") + query = anime_url[:] util.print_info(__version__) @@ -108,8 +118,14 @@ def command(ctx, anime_url, episode_range, url, player, skip_download, quality, anime_url, _ = util.search(anime_url, provider, choice) cls = get_anime_class(anime_url) + subbed = None + + if sub or dub: + subbed = subbed is not None + anime = cls(anime_url, quality=quality, - fallback_qualities=fallback_qualities) + fallback_qualities=fallback_qualities, + subbed=subbed) logger.info('Found anime: {}'.format(anime.title)) animes = util.parse_ep_str(anime, episode_range) diff --git a/anime_downloader/commands/test.py b/anime_downloader/commands/test.py index afb106d..06e1775 100644 --- a/anime_downloader/commands/test.py +++ b/anime_downloader/commands/test.py @@ -3,11 +3,15 @@ import sys import threading import os import click +from fuzzywuzzy import fuzz from anime_downloader.sites import get_anime_class, ALL_ANIME_SITES from anime_downloader import util from anime_downloader.__version__ import __version__ +import requests +logging.getLogger(requests.packages.urllib3.__package__).setLevel(logging.ERROR) #disable Retry warnings + logger = logging.getLogger(__name__) echo = click.echo @@ -15,54 +19,159 @@ sitenames = [v[1] for v in ALL_ANIME_SITES] class SiteThread(threading.Thread): - def __init__(self, site, *args, **kwargs): - self.site = site + def __init__(self, provider, anime, verify, v_tries, *args, **kwargs): + self.provider = provider + self.anime = anime + self.verify = verify + self.v_tries = v_tries + self.search_result = None self.exception = None super().__init__(*args, **kwargs) def run(self): try: - ani = get_anime_class(self.site) + ani = get_anime_class(self.provider) + self.search_result = ani.search(self.anime) + if self.search_result: + if self.verify: + ratios = [[fuzz.token_set_ratio(self.anime.lower(), sr.title.lower()), sr] for sr in self.search_result] + ratios = sorted(ratios, key=lambda x: x[0], reverse=True) + + end = len(ratios) + for r in range(self.v_tries): + if r == end: break + try: + anime_choice = ratios[r][1] + anime_url = ani(anime_choice.url) + stream_url = anime_url[0].source().stream_url + self.exception = None + break + except Exception as e: + self.exception = e + + self.search_result = util.format_search_results(self.search_result) - # this should be more dynamic - sr = ani.search('naruto')[0] - - anime = ani(sr.url) - - stream_url = anime[0].source().stream_url except Exception as e: self.exception = e - @click.command() -@click.argument('test_query', default='naruto') -def command(test_query): - """Test all sites to see which ones are working and which ones aren't. Test naruto as a default.""" +@click.argument('anime', default='naruto') +@click.option( + '-f', '--prompt-found', is_flag=True, + help='Ask to stop searching on anime match.') +@click.option( + '-p', '--providers', + help='Limit search to specific provider(s) separated by a comma.' +) +@click.option( + '-e', '--exclude', + help='Provider(s) to exclude separated by a comma.' +) +@click.option( + '-v', '--verify', is_flag=True, + help='Verify extraction of stream url in case of anime match.' +) +@click.option( + '-n', '--v-tries', type=int, default=1, + help='Number of tries to extract stream url. (default: 1)' +) +@click.option( + '-z', '--no-fuzzy', is_flag=True, + help='Disable fuzzy search to include possible inaccurate results.' +) +@click.option( + '-r', '--print-results', is_flag=True, + help='Enable echoing the search results at the end of testing.' +) +@click.option( + '-t', '--timeout', type=int, default=10, + help='How long to wait for a site to respond. (default: 10s)' +) + +def command(anime, prompt_found, providers, exclude, verify, v_tries, no_fuzzy, print_results, timeout): + """Test all sites to see which ones are working and which ones aren't. Test naruto as a default. Return results for each provider.""" + util.print_info(__version__) logger = logging.getLogger("anime_downloader") logger.setLevel(logging.ERROR) - threads = [] + if providers: + providers = [p.strip() for p in providers.split(",")] + for p in providers: + if not p in sitenames: + raise click.BadParameter(f"{p}. Choose from {', '.join(sitenames)}") + else: + providers = sitenames - for site in sitenames: - t = SiteThread(site, daemon=True) + if exclude: + exclude = [e.strip() for e in exclude.split(",")] + for e in exclude: + if not e in sitenames: + raise click.BadParameter(f"{e}. Choose from {', '.join(sitenames)}") + else: + if e in providers: + providers.remove(e) + + if os.name == 'nt': + p, f = '', '' # Emojis don't work in cmd + else: + p, f = '✅ ', '❌ ' + + if verify: + timeout = timeout + (3 * (v_tries - 1)) + + threads = [] + matches = [] + + for provider in providers: + t = SiteThread(provider, anime, verify, v_tries, daemon=True) t.start() threads.append(t) - for thread in threads: - if os.name == 'nt': - p, f = 'Works: ', "Doesn't work: " # Emojis doesn't work in cmd - else: - p, f = '✅ ', '❌ ' - thread.join(timeout=10) - if not thread.is_alive(): - if not thread.exception: - # echo(click.style('Works ', fg='green') + site) - echo(click.style(p, fg='green') + thread.site) + for i, thread in enumerate(threads): + try: + click.echo(f"[{i+1} of {len(threads)}] Searching ", nl=False) + click.secho(f"{thread.provider}", nl=False, fg="cyan") + click.echo(f"... (CTRL-C to stop) : ", nl=False) + thread.join(timeout=timeout) + if not thread.is_alive(): + if not thread.exception: + if thread.search_result: + if not no_fuzzy: + ratio = fuzz.token_set_ratio(anime.lower(), thread.search_result.lower()) + else: + ratio = 100 + if ratio > 50: + matches.append([thread.provider, thread.search_result, ratio]) + click.secho(p + "Works, anime found.", fg="green") + if prompt_found: + if print_results: + click.echo(f"\n- - -{thread.provider}- - -\n\n{thread.search_result}") + confirm = click.confirm(f"Found anime in {thread.provider}. Keep seaching?", default=True) + if not confirm: + break + else: + click.secho(p + "Works, anime not found.", fg="yellow") + else: + click.secho(p + "Works, anime not found.", fg="yellow") + else: + logging.debug('Error occurred during testing.') + logging.debug(thread.exception) + if thread.search_result: + click.secho(f + "Not working: anime found, extraction failed.", fg="red") + else: + click.secho(f + "Not working.", fg="red") else: - logging.debug('Error occurred during testing') - logging.debug(thread.exception) - echo(click.style(f, fg='red') + thread.site) - else: - logging.debug('timeout during testing') - echo(click.style(f, fg='red') + thread.site) + logging.debug('Timeout during testing.') + click.secho(f + "Not working: Timeout. Use -t to specify longer waiting period.", fg="red") + + except KeyboardInterrupt: + skip = click.confirm(f"\nSkip {thread.provider} and continue searching? (Press enter for Yes)", default=True) + if not skip: + break + + if print_results: + click.echo("\n" + util.format_matches(matches)) + else: + click.echo("\n" + "Test finished.") + \ No newline at end of file diff --git a/anime_downloader/config.py b/anime_downloader/config.py index 0b60707..6e5ee71 100644 --- a/anime_downloader/config.py +++ b/anime_downloader/config.py @@ -73,6 +73,9 @@ DEFAULT_CONFIG = { 'anistream.xyz': { 'version': 'subbed', }, + 'animepahe': { + 'version': 'subbed', + }, 'animeflv': { 'version': 'subbed', 'servers': [ @@ -117,7 +120,12 @@ DEFAULT_CONFIG = { }, 'ryuanime': { 'version': 'subbed', - 'server': 'trollvid', + 'servers': [ + 'vidstream', + 'mp4upload', + 'xstreamcdn', + 'trollvid' + ] }, 'animekisa': { 'server': 'gcloud', @@ -128,6 +136,10 @@ DEFAULT_CONFIG = { 'servers': ['vidstream', 'gcloud', 'yourupload', 'hydrax'], 'version': 'subbed', }, + 'wcostream': { + 'servers': ['vidstreampro', 'mcloud'], + 'version': 'subbed', + }, 'animeflix': { 'server': 'AUEngine', 'fallback_servers': ['FastStream'], diff --git a/anime_downloader/const.py b/anime_downloader/const.py index 980e6b3..440b479 100644 --- a/anime_downloader/const.py +++ b/anime_downloader/const.py @@ -1,14 +1,14 @@ import random mobile_headers = { - 'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) \ - AppleWebKit/604.1.38 (KHTML, like Gecko) \ - Version/11.0 Mobile/15A402 Safari/604.1" + 'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) \ + AppleWebKit/605.1.15 (KHTML, like Gecko) \ + Version/14.0 Mobile/15E148 Safari/604.1" } desktop_headers = { - 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101 \ -Firefox/56.0" + 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) \ + Gecko/20100101 Firefox/88.0.1" } @@ -16,123 +16,123 @@ def get_random_header(): return {'user-agent': random.choice(HEADERS)} -HEADERS = ['Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36', - 'Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1', - 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5', - 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/536.5 (KHTML like Gecko) Chrome/19.0.1084.56 Safari/1EA69', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0 Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0', - 'Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872', - 'Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.46 Safari/535.19', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/10.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.56 Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/17.0.940.0 Safari/535.8', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7ad-imcjapan-syosyaman-xkgi3lqg03!wgz', - 'Mozilla/5.0 (X11; CrOS i686 1193.158.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7xs5D9rRDFpg2g', - 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.6 (KHTML, like Gecko) Chrome/16.0.897.0 Safari/535.6', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2', - 'Mozilla/5.0 (X11; FreeBSD i386) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', - 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.10 Chromium/15.0.874.120 Chrome/15.0.874.120 Safari/535.2', - 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.04 Chromium/15.0.871.0 Chrome/15.0.871.0 Safari/535.2', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2', - 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.3 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.3 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.212.1 Safari/532.1', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0', - 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0', ] +HEADERS = ['Mozilla/5.0 (Windows NT 6.1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.211 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.210 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.208 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.207 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.206 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 5.1) WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.206 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0) WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.201 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.199 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.195 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.198 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.197 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.194 Safari/605.1.15', + 'Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.192 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/605.1.14 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.14', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/605.1.14 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.14', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/605.1.14 (KHTML, like Gecko) Chrome/90.0.4430.209 Safari/605.1.14', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/605.1.13 (KHTML, like Gecko) Chrome/90.0.4430.208 Safari/605.1.13', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.1.12 (KHTML, like Gecko) Chrome/90.0.4429.205 Safari/605.1.12', + 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/605.1.11 (KHTML, like Gecko) Chrome/90.0.4429.203 Safari/605.1.11', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AAppleWebKit/605.1.10 (KHTML, like Gecko) Chrome/90.0.4429.201 Safari/605.1.10', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/605.0.9 (KHTML, like Gecko) Chrome/90.0.4428.105 Safari/605.0.9', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/605.1.12 (KHTML, like Gecko) Chrome/90.0.4428.196 Safari/605.1.12', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.0.8 (KHTML, like Gecko) Chrome/90.0.4428.97 Safari/605.0.8', + 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/605.0.7 (KHTML, like Gecko) Chrome/90.0.4428.92 Safari/2BC75', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/605.0.4 (KHTML, like Gecko) Chrome/90.0.4428.89 Safari/605.0.4', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/605.0.2 (KHTML, like Gecko) Chrome/90.0.4427.85 Safari/605.0.2', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/605.0.1 (KHTML, like Gecko) Chrome/90.0.4427.83 Safari/605.0.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/604.2.9 (KHTML, like Gecko) Chrome/90.0.4427.76 Safari/604.2.9', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.8 (KHTML, like Gecko) Chrome/90.0.4426.74 Safari/604.2.8', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.75 Safari/604.2.7', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.7 (KHTML, like Gecko) Chrome/90.0.4425.74 Safari/604.2.7', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4424.65 Safari/604.2.5', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4424.64 Safari/604.2.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4424.62 Safari/604.2.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.55 Safari/604.2.5', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.53 Safari/604.2.5', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.52 Safari/604.2.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4423.50 Safari/604.2.5', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4422.94 Safari/604.2.5 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0', + 'Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/604.2.5 (KHTML, like Gecko) Chrome/90.0.4422.91 Safari/604.2.5', + 'Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.89 Safari/604.2.3', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.88 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.87 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.87 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.86 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.85 Safari/604.2.3', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.3 (KHTML, like Gecko) Chrome/90.0.4422.81 Safari/604.2.3', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.104 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.104 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.102 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.102 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.101 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.100 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.99 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.99 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4421.95 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.78 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.77 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.77 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4420.76 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/20.10 Chromium/90.0.4420.72 Chrome/90.0.4420.72 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/20.04 Chromium/90.0.4420.70 Chrome/90.0.4420.70 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/19.10 Chromium/90.0.4420.69 Chrome/90.0.4420.69 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/19.10 Chromium/90.0.4420.67 Chrome/90.0.4420.67 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.96 Safari/604.2.1', + 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.95 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.95 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.92 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.92 Safari/604.2.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4419.90 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Ubuntu/20.04 Chrome/90.0.4419.86 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.83 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.83 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.82 Safari/604.2.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.2.1 (KHTML, like Gecko) Chrome/90.0.4418.81 Safari/604.2.1', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.78 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.78 Safari/604.2.0.7ad-imcjapan-syosyaman-xkgi4lqg18!wgz', + 'Mozilla/5.0 (X11; CrOS i686 1193.158.0) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.75 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.74 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.73 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.70 Safari/604.2.0.2xs8D9rRDFpg8g', + 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.67 Safari/604.2.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.66 Safari/604.2.0', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.2.0 (KHTML, like Gecko) Chrome/90.0.4418.66 Safari/604.2.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.107 Safari/604.1', + 'Mozilla/5.0 (X11; FreeBSD i386) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.105 Safari/604.1', + 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/604.1 (KHTML, like Gecko) Ubuntu/20.10 Chromium/90.0.4417.104 Chrome/90.0.4417.104 Safari/604.1', + 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.104 Safari/604.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.103 Safari/604.1', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/604.1 (KHTML, like Gecko) Ubuntu/20.04 Chromium/90.0.4417.103 Chrome/90.0.4417.103 Safari/604.1', + 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.103 Safari/604.1', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/604.1 (KHTML, like Gecko) Chrome/90.0.4417.101 Safari/604.1', + 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.99 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.99 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.98 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.95 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.92 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.90 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4417.85 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.102 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.100 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.99 Safari/604.0', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.96 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.96 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.95 Safari/604.0', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/604.0 (KHTML, like Gecko) Chrome/90.0.4416.95 Safari/604.0', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.90 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.90 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.89 Safari/603.9', + 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.88 Safari/603.9', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.88 Safari/603.9', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.85 Safari/603.9', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.82 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.79 Safari/603.9', + 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/603.9 (KHTML, like Gecko) Chrome/90.0.4416.77 Safari/603.9', ] diff --git a/anime_downloader/downloader/SmartDL.py b/anime_downloader/downloader/SmartDL.py index bdc9936..03f54c3 100644 --- a/anime_downloader/downloader/SmartDL.py +++ b/anime_downloader/downloader/SmartDL.py @@ -12,7 +12,7 @@ class pySmartDL(BaseDownloader): headers = self.source.headers if 'user-agent' not in headers: - headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" # This allows backwards compatible while also working with # PySmartDl as it only passes user agent if spelled "User-Agent" diff --git a/anime_downloader/downloader/base_downloader.py b/anime_downloader/downloader/base_downloader.py index 59f9429..1ce91a3 100644 --- a/anime_downloader/downloader/base_downloader.py +++ b/anime_downloader/downloader/base_downloader.py @@ -30,7 +30,7 @@ class BaseDownloader: # Added Referer Header as kwik needd it. headers = self.source.headers if 'user-agent' not in headers: - headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" if self.source.referer: headers['referer'] = self.source.referer diff --git a/anime_downloader/downloader/http_downloader.py b/anime_downloader/downloader/http_downloader.py index 5c8f0ea..4affe49 100644 --- a/anime_downloader/downloader/http_downloader.py +++ b/anime_downloader/downloader/http_downloader.py @@ -29,7 +29,7 @@ class HTTPDownloader(BaseDownloader): url = self.source.stream_url headers = self.source.headers if 'user-agent' not in headers: - headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" if self.source.referer: headers['Referer'] = self.source.referer @@ -60,7 +60,7 @@ class HTTPDownloader(BaseDownloader): def _non_range_download(self): url = self.source.stream_url headers = { - 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0" + 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1" } if self.source.referer: headers['Referer'] = self.source.referer diff --git a/anime_downloader/extractors/init.py b/anime_downloader/extractors/init.py index a529519..45409ba 100644 --- a/anime_downloader/extractors/init.py +++ b/anime_downloader/extractors/init.py @@ -1,4 +1,6 @@ from importlib import import_module +import re + ALL_EXTRACTORS = [ { @@ -67,6 +69,12 @@ ALL_EXTRACTORS = [ 'regex': 'yourupload', 'class': 'Yourupload' }, + { + 'sitename': 'wcostream', + 'modulename': 'wcostream', + 'regex': 'wcostream', + 'class': 'WcoStream' + }, { 'sitename': 'vidstream', 'modulename': 'vidstream', @@ -168,13 +176,19 @@ ALL_EXTRACTORS = [ 'modulename': 'streamium', 'regex': 'streamium', 'class': 'Streamium' + }, + { + 'sitename': 'wasabisys', + 'modulename': 'wasabisys', + 'regex': 'wasabisys', + 'class': 'Wasabisys' } ] def get_extractor(name): for extractor in ALL_EXTRACTORS: - if extractor['regex'] in name.lower(): + if re.match(extractor['regex'], name.lower()): module = import_module( 'anime_downloader.extractors.{}'.format( extractor['modulename']) diff --git a/anime_downloader/extractors/kwik.py b/anime_downloader/extractors/kwik.py index 37bdaf0..dab6ca4 100644 --- a/anime_downloader/extractors/kwik.py +++ b/anime_downloader/extractors/kwik.py @@ -1,72 +1,122 @@ +from base64 import b64decode +import requests import logging import re -import requests from anime_downloader.extractors.base_extractor import BaseExtractor from anime_downloader.sites import helpers -from anime_downloader import util from subprocess import CalledProcessError +from anime_downloader import util logger = logging.getLogger(__name__) class Kwik(BaseExtractor): - '''Extracts video url from kwik pages, Kwik has some `security` - which allows to access kwik pages when only referred by something - and the kwik video stream when referred through the corresponding - kwik video page. - ''' + YTSM = re.compile(r"ysmm = '([^']+)") + + KWIK_PARAMS_RE = re.compile(r'\("(\w+)",\d+,"(\w+)",(\d+),(\d+),\d+\)') + KWIK_D_URL = re.compile(r'action="([^"]+)"') + KWIK_D_TOKEN = re.compile(r'value="([^"]+)"') + + CHARACTER_MAP = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/" + + def get_string(self, content: str, s1: int, s2: int) -> str: + slice_2 = self.CHARACTER_MAP[0:s2] + + acc = 0 + for n, i in enumerate(content[::-1]): + acc += int(i if i.isdigit() else 0) * s1**n + + k = '' + while acc > 0: + k = slice_2[int(acc % s2)] + k + acc = (acc - (acc % s2)) / s2 + + return k or '0' + + def decrypt(self, full_string: str, key: str, v1: int, v2: int) -> str: + v1, v2 = int(v1), int(v2) + r, i = "", 0 + + while i < len(full_string): + s = "" + while (full_string[i] != key[v2]): + s += full_string[i] + i += 1 + j = 0 + while j < len(key): + s = s.replace(key[j], str(j)) + j += 1 + r += chr(int(self.get_string(s, v2, 10)) - v1) + i += 1 + return r + + def decode_adfly(self, coded_key: str) -> str: + r, j = '', '' + for n, l in enumerate(coded_key): + if not n % 2: + r += l + else: + j = l + j + + encoded_uri = list(r + j) + numbers = ((i, n) for i, n in enumerate(encoded_uri) if str.isdigit(n)) + for first, second in zip(numbers, numbers): + xor = int(first[1]) ^ int(second[1]) + if xor < 10: + encoded_uri[first[0]] = str(xor) + + return b64decode(("".join(encoded_uri)).encode("utf-8") + )[16:-16].decode('utf-8', errors='ignore') + + def bypass_adfly(self, adfly_url): + session = requests.session() + + response_code = 302 + while response_code != 200: + adfly_content = session.get( + session.get( + adfly_url, + allow_redirects=False).headers.get('location'), + allow_redirects=False) + response_code = adfly_content.status_code + return self.decode_adfly(self.YTSM.search(adfly_content.text).group(1)) + + def get_stream_url_from_kwik(self, adfly_url): + session = requests.session() + + f_content = requests.get( + self.bypass_adfly(adfly_url), + headers={ + 'referer': 'https://kwik.cx/' + } + ) + decrypted = self.decrypt( + * + self.KWIK_PARAMS_RE.search( + f_content.text + ).group( + 1, 2, + 3, 4 + ) + ) + + code = 419 + while code != 302: + content = session.post( + self.KWIK_D_URL.search(decrypted).group(1), + allow_redirects=False, + data={ + '_token': self.KWIK_D_TOKEN.search(decrypted).group(1)}, + headers={ + 'referer': str(f_content.url), + 'cookie': f_content.headers.get('set-cookie')}) + code = content.status_code + + return content.headers.get('location') def _get_data(self): - # Kwik servers don't have direct link access you need to be referred - # from somewhere, I will just use the url itself. We then - # have to rebuild the url. Hopefully kwik doesn't block this too - - # Necessary - self.url = self.url.replace(".cx/e/", ".cx/f/") - self.headers.update({"referer": self.url}) - - cookies = util.get_hcaptcha_cookies(self.url) - - if not cookies: - resp = util.bypass_hcaptcha(self.url) - else: - resp = requests.get(self.url, cookies=cookies) - - title_re = re.compile(r'title>(.*)<') - - kwik_text = resp.text - deobfuscated = None - - loops = 0 - while not deobfuscated and loops < 6: - try: - deobfuscated = helpers.soupify(util.deobfuscate_packed_js(re.search(r'<(script).*(var\s+_.*escape.*?)(?s)', kwik_text).group(2))) - except (AttributeError, CalledProcessError) as e: - if type(e) == AttributeError: - resp = util.bypass_hcaptcha(self.url) - kwik_text = resp.text - - if type(e) == CalledProcessError: - resp = requests.get(self.url, cookies=cookies) - finally: - cookies = resp.cookies - title = title_re.search(kwik_text).group(1) - loops += 1 - - post_url = deobfuscated.form["action"] - token = deobfuscated.input["value"] - - resp = helpers.post(post_url, headers=self.headers, params={"_token": token}, cookies=cookies, allow_redirects=False) - stream_url = resp.headers["Location"] - - logger.debug('Stream URL: %s' % stream_url) - return { - 'stream_url': stream_url, - 'meta': { - 'title': title, - 'thumbnail': '' - }, + 'stream_url': self.get_stream_url_from_kwik(self.url), 'referer': None } diff --git a/anime_downloader/extractors/streamtape.py b/anime_downloader/extractors/streamtape.py index 7a419b1..ba5424b 100644 --- a/anime_downloader/extractors/streamtape.py +++ b/anime_downloader/extractors/streamtape.py @@ -7,9 +7,12 @@ import re class StreamTape(BaseExtractor): def _get_data(self): resp = helpers.get(self.url, cache=False).text - url = "https:" + \ - re.search( - "document\.getElementById\([\"']videolink[\"']\);.*?innerHTML.*?=.*?[\"'](.*?)[\"']", resp).group(1) + groups = re.search( + r"document\.getElementById\(.*?\)\.innerHTML = [\"'](.*?)[\"'] \+ [\"'](.*?)[\"']", + resp + ) + url = "https:" + groups[1] + groups[2] + return { 'stream_url': url, diff --git a/anime_downloader/extractors/trollvid.py b/anime_downloader/extractors/trollvid.py index 6e4f2e3..4eb4144 100644 --- a/anime_downloader/extractors/trollvid.py +++ b/anime_downloader/extractors/trollvid.py @@ -26,16 +26,17 @@ class Trollvid(BaseExtractor): elif token: token = token.group(1) - trollvid_id = self.url.split('/')[-1] # something like: 084df78d215a + # something like: 084df78d215a + trollvid_id = self.url.split('/')[-1] post = helpers.post(f'https://mp4.sh/v/{trollvid_id}', data={'token': token}, referer=self.url, ).json() # {'success':True} on success. - if post.get('success') and post.get('data'): + if post.get('success') and post.get('file'): return { - 'stream_url': post['data'] + 'stream_url': post['file'] } # In case neither methods work. diff --git a/anime_downloader/extractors/vidstream.py b/anime_downloader/extractors/vidstream.py index 1faa216..aa79701 100644 --- a/anime_downloader/extractors/vidstream.py +++ b/anime_downloader/extractors/vidstream.py @@ -28,7 +28,8 @@ class VidStream(BaseExtractor): } url = self.url.replace('https:////', 'https://') - url = url.replace('https://gogo-stream.com/download', 'https://gogo-stream.com/server.php') + url = url.replace('https://gogo-stream.com/download', + 'https://gogo-stream.com/server.php') soup = helpers.soupify(helpers.get(url)) linkserver = soup.select('li.linkserver') logger.debug('Linkserver: {}'.format(linkserver)) @@ -64,7 +65,11 @@ class VidStream(BaseExtractor): # # # Used to create a download url. - soup_id = soup.select('input#id')[0]['value'] + try: + soup_id = soup.select('input#id')[0]['value'] + except IndexError: + return self._get_link_new(soup) + soup_title = soup.select('input#title')[0]['value'] soup_typesub = soup.select('input#typesub')[0].get('value', 'SUB') @@ -103,6 +108,11 @@ class VidStream(BaseExtractor): return {'stream_url': ''} + def _get_link_new(self, soup): + link_buttons = soup.select('div.mirror_link')[ + 0].select('div.dowload > a[href]') + return {'stream_url': link_buttons[0].get('href')} + class Extractor: """dummy class to prevent changing self""" @@ -110,4 +120,3 @@ class Extractor: def __init__(self, dictionary): for k, v in dictionary.items(): setattr(self, k, v) - diff --git a/anime_downloader/extractors/wasabisys.py b/anime_downloader/extractors/wasabisys.py new file mode 100644 index 0000000..d92b538 --- /dev/null +++ b/anime_downloader/extractors/wasabisys.py @@ -0,0 +1,11 @@ +from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites import helpers + + +class Wasabisys(BaseExtractor): + def _get_data(self): + + return { + 'stream_url': self.url, + 'referer': 'https://animtime.com/' + } diff --git a/anime_downloader/extractors/wcostream.py b/anime_downloader/extractors/wcostream.py new file mode 100644 index 0000000..0b65fee --- /dev/null +++ b/anime_downloader/extractors/wcostream.py @@ -0,0 +1,37 @@ +from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites import helpers +import re + + +class WcoStream(BaseExtractor): + def _get_data(self): + try: + if self.url.startswith('https://vidstream.pro/e'): + base_url = 'https://vidstream.pro' + elif self.url.startswith('https://mcloud.to/e/'): + base_url = 'https://mcloud.to' + else: + return [] + + html = helpers.get(self.url, referer='https://wcostream.cc/') + id_ = re.findall(r"/e/(.*?)\?domain", self.url)[0] + skey = re.findall(r"skey\s=\s['\"](.*?)['\"];", html.text)[0] + + apiLink = f"{base_url}/info/{id_}?domain=wcostream.cc&skey={skey}" + referer = f"{base_url}/e/{id_}?domain=wcostream.cc" + + response = helpers.get(apiLink, referer=referer).json() + + if response['success'] is True: + sources = [ + { + 'stream_url': x['file'] + } + for x in response['media']['sources'] + ] + return sources + else: + return [] + + except Exception: + return {"stream_url": ''} diff --git a/anime_downloader/extractors/yourupload.py b/anime_downloader/extractors/yourupload.py index 1451f3e..4429b7a 100644 --- a/anime_downloader/extractors/yourupload.py +++ b/anime_downloader/extractors/yourupload.py @@ -3,6 +3,7 @@ import re from anime_downloader.extractors.base_extractor import BaseExtractor from anime_downloader.sites import helpers +from requests.exceptions import HTTPError logger = logging.getLogger(__name__) @@ -10,7 +11,13 @@ logger = logging.getLogger(__name__) class Yourupload(BaseExtractor): def _get_data(self): regex = r"file: '([^']*)" - file = re.search(regex, helpers.get(self.url).text).group(1) + try: + response = helpers.get(self.url) + except HTTPError: + logger.error('File not found.') + return {'stream_url': ''} + + file = re.search(regex, response.text).group(1) return { 'stream_url': file, 'referer': self.url diff --git a/anime_downloader/sites/_4anime.py b/anime_downloader/sites/_4anime.py index 81afb47..bdf3752 100644 --- a/anime_downloader/sites/_4anime.py +++ b/anime_downloader/sites/_4anime.py @@ -3,10 +3,12 @@ import re from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers from anime_downloader.const import HEADERS +from anime_downloader.sites.helpers.util import not_working logger = logging.getLogger(__name__) +@not_working("4anime has been shut down") class Anime4(Anime, sitename='4anime'): sitename = '4anime' @@ -19,12 +21,13 @@ class Anime4(Anime, sitename='4anime'): "options": "qtranslate_lang=0&set_intitle=None&customset%5B%5D=anime" } soup = helpers.soupify(helpers.post( - "https://4anime.to/wp-admin/admin-ajax.php", data=data)).select('div.info > a') + "https://4anime.to/wp-admin/admin-ajax.php", data=data)).select('.item') search_results = [ SearchResult( - title=i.text, - url=i['href'] + title=i.select_one('.info > a').text, + url=i.select_one('.info > a').get('href', ''), + poster="https://4anime.to" + i.find('img').get('src', '') ) for i in soup ] @@ -41,6 +44,19 @@ class Anime4(Anime, sitename='4anime'): for i in soup.select('.detail > a'): if 'year' in i.get('href', ''): self.meta['year'] = int(i.text) if i.text.isnumeric() else None + elif 'status' in i.get('href', ''): + self.meta['airing_status'] = i.text.strip() + + desc_soup = soup.select_one("#description-mob") + if "READ MORE" in str(desc_soup): + desc = desc_soup.select('#fullcontent p') + self.meta['description'] = "\n".join([x.text for x in desc]) + else: + self.meta['description'] = desc_soup.select_one('p:nth-child(2)').text + + self.meta['poster'] = "https://4anime.to" + soup.select_one("#details > div.cover > img").get('src', '') + self.meta['total_eps'] = len(soup.select('ul.episodes.range.active > li > a')) + self.meta['cover'] = "https://4anime.to/static/Dr1FzAv.jpg" class Anime4Episode(AnimeEpisode, sitename='4anime'): @@ -49,12 +65,7 @@ class Anime4Episode(AnimeEpisode, sitename='4anime'): 'user-agent': HEADERS[self.hash_url(self.url, len(HEADERS))]} resp = helpers.get(self.url, headers=self.headers) - # E.g. document.write( ' Download' ); - stream_url = helpers.soupify( - re.search("( a') + search_results = helpers.soupify(helpers.get( + cls.url, params={'s': query})).select('h3.post-title > a') # Removes the unneded metadata from the title # Used by MAL matcher clean_title_regex = r'\(.*?\)' @@ -31,7 +32,19 @@ class AnimeOut(Anime, sitename='animeout'): # Only uses the direct download links for consistency. soup = helpers.soupify(helpers.get(self.url)) elements = soup.select('article.post a') - return [i.get('href') for i in elements if 'Direct Download' in i.text] + episodes = [i.get('href') + for i in elements if 'Direct Download' in i.text] + + filters = [self.quality, "1080p", "720p"] + quality_filtered = [] + + for _filter in filters: + if not quality_filtered: + quality_filtered = [x for x in episodes if _filter in x] + else: + break + + return episodes if not quality_filtered else quality_filtered def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index 97ddb6b..0ef1476 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -8,57 +8,9 @@ from anime_downloader.sites import helpers logger = logging.getLogger(__name__) -class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): - QUALITIES = ['360p', '480p', '720p', '1080p'] - - def _get_source(self, episode_id, server, session_id): - # We will extract the episodes data through the animepahe api - # which returns the available qualities and the episode sources. - params = { - 'id': episode_id, - 'm': 'embed', - 'p': server, - 'session': session_id - } - - episode_data = helpers.get('https://animepahe.com/api', params=params).json() - episode_data = episode_data['data'] - sources = {} - - for info in range(len(episode_data)): - quality = list(episode_data[info].keys())[0] - sources[f'{quality}p'] = episode_data[info][quality]['kwik'] - - if self.quality in sources: - return (server, sources[self.quality]) - return - - def _get_sources(self): - supported_servers = ['kwik', 'mp4upload', 'rapidvideo'] - source_text = helpers.get(self.url, cf=True).text - sources = [] - - server_list = re.findall(r'data-provider="([^"]+)', source_text) - episode_id, session_id = re.search("getUrls\((\d+?), \"(.*)?\"", source_text).groups() - - for server in server_list: - if server not in supported_servers: - continue - source = self._get_source(episode_id, server, session_id) - if source: - sources.append(source) - - if sources: - return sources - raise NotFoundError - - class AnimePahe(Anime, sitename='animepahe'): sitename = 'animepahe' api_url = 'https://animepahe.com/api' - base_anime_url = 'https://animepahe.com/anime/' - QUALITIES = ['360p', '480p', '720p', '1080p'] - _episodeClass = AnimePaheEpisode @classmethod def search(cls, query): @@ -69,68 +21,87 @@ class AnimePahe(Anime, sitename='animepahe'): } search_results = helpers.get(cls.api_url, params=params).json() - results = [] + if search_results['total'] == []: + return [] - for search_result in search_results['data']: - search_result_info = SearchResult( - title=search_result['title'], - url=cls.base_anime_url + search_result['slug'], - poster=search_result['poster'] + return [ + SearchResult( + title=result['title'] + " (" + result['type'] + ")", + url="https://animepahe.com/anime/TITLE!" + result['title'] + " (" + result['type'] + ")" + '!TITLE/' + result['session'] + "/" + str(result['id']), # noqa + poster=result['poster'] ) + for result in search_results['data'] + ] - logger.debug(search_result_info) - results.append(search_result_info) + def _scrape_episodes(self): + attr = self.url.split('/') + session = attr[-2] + id_ = attr[-1] + page = 1 + headers = {'referer': 'https://animepahe.com/'} - return results + apiUri = self.api_url + '?m=release&id=' + id_ + '&sort=episode_asc&page=' + jsonResponse = helpers.get(apiUri + str(page), headers=headers).json() + lastPage = jsonResponse['last_page'] + perPage = jsonResponse['per_page'] + total = jsonResponse['total'] + ep = 1 + episodes = [] - def get_data(self): - page = helpers.get(self.url, cf=True).text - anime_id = re.search(r'&id=(\d+)', page).group(1) - - self.params = { - 'm': 'release', - 'id': anime_id, - 'sort': 'episode_asc', - 'page': 1 - } - - json_resp = helpers.get(self.api_url, params=self.params).json() - self._scrape_metadata(page) - self._episode_urls = self._scrape_episodes(json_resp) - self._len = len(self._episode_urls) - return self._episode_urls - - def _collect_episodes(self, ani_json, episodes=[]): - # Avoid changing original list - episodes = episodes[:] - - # If episodes is not an empty list we ensure that we start off - # from the length of the episodes list to get correct episode - # numbers - for no, anime_ep in enumerate(ani_json, len(episodes)): - episodes.append((no + 1, f'{self.url}/{anime_ep["id"]}',)) - - return episodes - - def _scrape_episodes(self, ani_json): - episodes = self._collect_episodes(ani_json['data']) - - if not episodes: - raise NotFoundError(f'No episodes found for {self.url}') + if (lastPage == 1 and perPage > total): + for epi in jsonResponse['data']: + episodes.append( + f'{self.api_url}?m=links&id={epi["anime_id"]}&session={epi["session"]}&p=kwik!!TRUE!!') else: - # Check if other pages exist since animepahe only loads - # first page and make subsequent calls to the api for every - # page - start_page = ani_json['current_page'] + 1 - end_page = ani_json['last_page'] + 1 - - for i in range(start_page, end_page): - self.params['page'] = i - resp = helpers.get(self.api_url, params=self.params).json() - - episodes = self._collect_episodes(resp['data'], episodes) - + stop = False + for page in range(lastPage): + if stop: + break + for i in range(perPage): + if ep <= total: + episodes.append( + f'{self.api_url}?m=release&id={id_}&sort=episode_asc&page={page+1}&ep={ep}!!FALSE!!') + ep += 1 + else: + stop = True + break return episodes - def _scrape_metadata(self, data): - self.title = re.search(r'

([^<]+)', data).group(1) + def _scrape_metadata(self): + self.title = re.findall(r"TITLE!(.*?)!TITLE", self.url)[0] + + +class AnimePaheEpisode(AnimeEpisode, sitename='animepahe'): + def _get_sources(self): + if '!!TRUE!!' in self.url: + self.url = self.url.replace('!!TRUE!!', '') + else: + headers = {'referer': 'https://animepahe.com/'} + regex = r"\&ep\=(\d+)\!\!FALSE\!\!" + episodeNum = int(re.findall(regex, self.url)[0]) + self.url = re.sub(regex, '', self.url) + jsonResponse = helpers.get(self.url, headers=headers).json() + + ep = None + for episode in jsonResponse['data']: + if int(episode['episode']) == episodeNum: + ep = episode + if ep: + self.url = 'https://animepahe.com/api?m=links&id=' + str(ep['anime_id']) + '&session=' + ep['session'] + '&p=kwik' # noqa + else: + raise NotFoundError + + episode_data = helpers.get(self.url).json() + + data = episode_data['data'] + qualities = [x + 'p' for f in data for x in f] + + sources_list = [ + f[x]['kwik_adfly'] for f in data for x in f + ] + + for i, quality in enumerate(qualities): + if self.quality == quality: + return [("kwik", sources_list[i])] + + return [("kwik", x) for x in sources_list] diff --git a/anime_downloader/sites/animerush.py b/anime_downloader/sites/animerush.py index f0d1f35..6b22300 100644 --- a/anime_downloader/sites/animerush.py +++ b/anime_downloader/sites/animerush.py @@ -1,7 +1,9 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers -from anime_downloader.extractors import get_extractor +from anime_downloader.extractors.init import ALL_EXTRACTORS + import logging +import re logger = logging.getLogger(__name__) @@ -23,7 +25,7 @@ class AnimeRush(Anime, sitename='animerush'): def _scrape_episodes(self): soup = helpers.soupify(helpers.get(self.url)).select('div.episode_list > a') - return ['https:' + i.get('href') for i in soup[::-1]] + return ['https:' + i.get('href') for i in soup[::-1] if "Coming soon" not in str(i)] def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) @@ -41,12 +43,20 @@ class AnimeRushEpisode(AnimeEpisode, sitename='animerush'): sources_list = [] # Sources [0] is the url [1] is the name of the source # eg: [['https://mp4upload.com/embed-r07potgdvbkr-650x370.html', 'Mp4upload Video']] + domain_regex = r"\/\/(?:\w{3,6}\.)?(.*?)\." for i in sources: - # Not exactly ideal setup for more extractors - # If more advanced sources needs to get added look at watchmovie or darkanime - server = 'yourupload' if 'yourupload' in i[0] else 'mp4upload' + found = False + domain = re.findall(domain_regex, i[0])[0].lower() + + for extractor in ALL_EXTRACTORS: + if re.match(extractor['regex'], domain): + found = True + + if not found: + continue + sources_list.append({ - 'extractor': server, + 'extractor': domain, 'url': i[0], 'server': i[1], 'version': 'subbed' diff --git a/anime_downloader/sites/animesimple.py b/anime_downloader/sites/animesimple.py index 9939fc4..eb32063 100644 --- a/anime_downloader/sites/animesimple.py +++ b/anime_downloader/sites/animesimple.py @@ -20,7 +20,7 @@ class AnimeSimple(Anime, sitename='animesimple'): return [ SearchResult( title=i.get('title') if i.get('title') else i.select('img')[0].get('alt'), - url=i.get('href')) + url=("https:" if i.get('href')[0] == '/' else "") + i.get('href')) for i in search_results ] @@ -34,7 +34,7 @@ class AnimeSimple(Anime, sitename='animesimple'): 'top': 10000, # max 10 000 episodes 'bottom': 0, })) - return [i.get('href') for i in elements] + return [("https:" if i.get('href')[0] == '/' else "") + i.get('href') for i in elements] def _scrape_metadata(self): self.title = helpers.soupify(helpers.get(self.url)).select('li.breadcrumb-item.active')[0].text diff --git a/anime_downloader/sites/animestar.py b/anime_downloader/sites/animestar.py new file mode 100644 index 0000000..7467b64 --- /dev/null +++ b/anime_downloader/sites/animestar.py @@ -0,0 +1,63 @@ +import re +from urllib.parse import urlparse +from datetime import datetime +from requests import Request + +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers +from anime_downloader.const import get_random_header + +_headers = get_random_header() | { 'X-Requested-By': 'animestar-web'} + + +class AnimeStar(Anime, sitename='animestar'): + sitename = 'animestar' + # Neither 720p nor 1080p are guaranteed, but they could happen + QUALITIES = ['360p', '480p', '540p', '720p', '1080p'] + _real_getter = 'https://api.animestar.app/api/drama?id=' + + @classmethod + def search(cls, query): + return [ + SearchResult( + title=i['name'], + url='https://animestar.app/show-details/deadbeef/'+i['_id'], + poster=i['image'], + meta={'genre': i['genre']}, + meta_info={ + 'title_cleaned': re.sub(r'\(.*?\)', '', i['name']).strip() + }) + for i in helpers.get('https://api.animestar.app/api/drama/search', + params={'q': query}, + headers=_headers).json() + ] + + + def _scrape_episodes(self): + return [ + Request('GET', 'https://api.animestar.app/api/utility/get-stream-links', + params={'url': i['videoUrl'], 'server': 1} + ).prepare().url + for i in sorted(helpers.get(self._real_getter+urlparse(self.url).path.split('/')[-1], + headers=_headers).json()['episodes'], + key=lambda i: i['number']) + ] + + def _scrape_metadata(self): + resp = helpers.get(self._real_getter+urlparse(self.url).path.split('/')[-1], + headers=_headers).json() + self.title = resp['name'] + self.subbed = resp['audioType'] == 'SUB' + self.meta['names_alt'] = resp['altNames'] + self.meta['year'] = resp['releaseYear'] + self.meta['status'] = resp['tvStatus'] + self.meta['genre'] = resp['genre'] + self.meta['type'] = resp['type'] + self.meta['story'] = resp['synopsis'] + self.meta['views'] = resp['views'] + self.meta['ctime'] = datetime.fromtimestamp(resp['createdAt']/1000).strftime('%Y-%m-%d %H:%M') + self.meta['mtime'] = datetime.fromtimestamp(resp['modifiedAt']/1000).strftime('%Y-%m-%d %H:%M') + +class AnimeStarEpisode(AnimeEpisode, sitename='animestar'): + def _get_sources(self): + return [('no_extractor', helpers.get(self.url, headers=_headers).json()['url'])] diff --git a/anime_downloader/sites/animesuge.py b/anime_downloader/sites/animesuge.py index 22ebe81..c438b1a 100644 --- a/anime_downloader/sites/animesuge.py +++ b/anime_downloader/sites/animesuge.py @@ -5,12 +5,14 @@ from anime_downloader.sites import helpers import re import json + class AnimeSuge(Anime, sitename="animesuge"): sitename = "animesuge" @classmethod def search(cls, query): - soup = helpers.soupify(helpers.get("https://animesuge.io/ajax/anime/search", params={"keyword": query}).json()['html']) + soup = helpers.soupify(helpers.get( + "https://animesuge.io/ajax/anime/search", params={"keyword": query}).json()['html']) search_results = [ SearchResult( @@ -27,8 +29,9 @@ class AnimeSuge(Anime, sitename="animesuge"): _id = re.search(r".*-(.*)", self.url).group(1) soup = helpers.soupify(helpers.get(ep_url, params={'id': _id})) - - return ['https://animesuge.io' + x.get('href') for x in soup.select('a:not(.more)')] + eps = ['https://animesuge.io' + re.search(r"(/anime.*?/ep-\d+)", x.get( + 'href')).group(1).replace('\\', '') for x in soup.select('a:not(.more)')] + return eps def _scrape_metadata(self): self.title = helpers.soupify(helpers.get(self.url)).find("h1").text @@ -37,13 +40,17 @@ class AnimeSuge(Anime, sitename="animesuge"): class AnimeSugeEpisode(NineAnimeEpisode, sitename='animesuge'): def _get_sources(self): # Get id and ep no. from url, e.g: https://animesuge.io/anime/naruto-xx8z/ep-190 -> xx8z, 190 - _id, ep_no = re.search(r".*\/anime\/.*-(.*?)\/.*-(\d+)$", self.url).group(1, 2) + _id, ep_no = re.search( + r".*\/anime\/.*-(.*?)\/.*-(\d+)$", self.url).group(1, 2) # Get sources json from html, e.g: """ 190""" + # data_sources = json.loads( data_sources = json.loads(helpers.soupify(helpers.get("https://animesuge.io/ajax/anime/servers", - params={"id": _id, "episode": ep_no})).select(f"a[data-base='{ep_no}']")[0].get("data-sources")) + params={"id": _id, "episode": ep_no}).json()['html']).select(f"a[data-base='{ep_no}']")[0].get("data-sources")) + + # # Only includes supported # Unsupported ones {'28': 'openstream'} @@ -60,14 +67,18 @@ class AnimeSugeEpisode(NineAnimeEpisode, sitename='animesuge'): params={"id": _id}).json()['url'] break # Makes it more consistent. - except HTTPError: + except requests.HTTPError: time.sleep(5) continue server = id_source_map[key] + link = self.decodeString(link) + + if 'mp4upload.com/embed' in link: + link = re.search(r"(https://.*?\.html)", link).group(1) sources_list.append({ 'extractor': server, - 'url': self.decodeString(link), + 'url': link, 'server': server, # This may not be true, can't see the info on page. 'version': 'subbed' diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py new file mode 100644 index 0000000..c2df997 --- /dev/null +++ b/anime_downloader/sites/animtime.py @@ -0,0 +1,122 @@ + +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers +from difflib import get_close_matches + +import re + + +def format_title_case(text): + """ + Will format text to title case and it will have roman numbers in capital case + only I is supported so only up to III, any number bigger than that will keep its original capitalization case + """ + words = text.split() + new_text = [] + + for word in words: + if word.lower().replace('i', '') == '': + new_text += ['I' * len(word)] + continue + + elif word.lower() == 'dub': + new_text += ['(Dub)'] + continue + + new_text += [word.title()] + + return ' '.join(new_text) + + +def get_title_dict(script): + """ + Returns a tuple with two dictionaries + the 1st one has the anime slugs with their pretty title + and the 2nd one has the anime slugs with their ids + """ + script_text = helpers.get(script).text + title_function = re.search("tm=.*?}", script_text).group() + titles_dict = { + x[0]: format_title_case(x[1].replace('-', ' ')) + for x in re.findall(r"\[tm\.([a-zA-Z0-9]+?)\]=function\(\w\)\{return\"[a-zA-Z0-9\.\:/-]+?\/animtime\/([a-zA-Z-]+?)\/", script_text) + } + id_dict = { + x[0]: x[1] + for x in re.findall(r"t\[t\.(.*?)=(\d+)", title_function) + } + + for title in id_dict: + """ + For any anime that are not matched in the pretty titles dictionary (titles_dict) + + for example Bleach (with the id of 1 is not in titles_dict) + """ + if title not in titles_dict: + titles_dict[title] = ' '.join( + re.sub(r"([A-Z])", r" \1", title).split()) + + return titles_dict, id_dict + + +def get_script_link(): + soup = helpers.soupify(helpers.get('https://animtime.com')) + script = 'https://animtime.com/' + \ + soup.select('script[src*=main]')[0].get('src') + + return script + + +class AnimTime(Anime, sitename='animtime'): + sitename = 'animtime' + + @classmethod + def search(cls, query): + titles = get_title_dict(get_script_link()) + matches = get_close_matches(query, titles[0], cutoff=0.2) + + search_results = [ + SearchResult( + title=titles[0].get(match), + url='https://animtime.com/title/{}'.format( + titles[1].get(match)) + ) + for match in matches + ] + + return search_results + + def _scrape_episodes(self): + link = get_script_link() + titles = dict((y, x) for x, y in get_title_dict(link)[1].items()) + current_title = titles.get(self.url.split('/')[-1]) + + script_text = helpers.get(link).text + ep_count = int(re.search( + r"\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + + episodes = [] + for i in range(ep_count): + episodes.append(self.url + f'/episode/{i + 1}') + + return episodes + + def _scrape_metadata(self): + titles = get_title_dict(get_script_link())[1] + self.title = next(x for x, y in titles.items() + if int(y) == int(self.url.split('/')[-1])) + + +class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): + def _get_sources(self): + titles = get_title_dict(get_script_link())[1] + + current_title = next(x for x, y in titles.items() + if int(y) == int(self.url.split('/')[-3])) + current_ep = "{0:03}".format(int(self.url.split('/')[-1])) + + script_text = helpers.get(get_script_link()).text + regexed_link = re.search('tm\.' + current_title.replace(" ", "") + + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) + link = regexed_link.replace('"+t+"', current_ep) + + return [('wasabisys', link)] diff --git a/anime_downloader/sites/erairaws.py b/anime_downloader/sites/erairaws.py index 1aafcd5..bff13b3 100644 --- a/anime_downloader/sites/erairaws.py +++ b/anime_downloader/sites/erairaws.py @@ -197,7 +197,7 @@ class EraiRawsEpisode(AnimeEpisode, sitename='erai-raws'): headers = { 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101 Firefox/56.0', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', diff --git a/anime_downloader/sites/fastani.py b/anime_downloader/sites/fastani.py deleted file mode 100644 index dcb013a..0000000 --- a/anime_downloader/sites/fastani.py +++ /dev/null @@ -1,80 +0,0 @@ -from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult -from anime_downloader.sites import helpers -import re -import logging - -logger = logging.getLogger(__name__) - - -class FastAni(Anime, sitename="fastani"): - - sitename = 'fastani' - - @classmethod - def getToken(cls): - resp = helpers.get("https://fastani.net") - site_text = resp.text - cookies = resp.cookies - - # Path to js file, e.g /static/js/main.f450dd1c.chunk.js - which contains the token - js_location = "https://fastani.net" + re.search(r"src=\"(\/static\/js\/main.*?)\"", site_text).group(1) - js = helpers.get(js_location).text - - # Get authorization token, e.g: {authorization:"Bearer h8X2exbErErNSxRnr6sSXAE2ycUSyrbU"} - key, token = re.search("method:\"GET\".*?\"(.*?)\".*?\"(.*?)\"", js).group(1,2) - - return ({key: token}, cookies) - - @classmethod - def search(cls, query): - headers, cookies = cls.getToken() - results = helpers.get(f"https://fastani.net/api/data?page=1&search={query}&tags=&years=", headers=headers, cookies=cookies).json() - - return [ - SearchResult( - title=x.get('title').get("english"), - # Need to know selected anime and original query for _scrape_episodes - url=f"https://fastani.net/{selected}/{query}" - ) - for selected, x in zip(range(len(results["animeData"]["cards"])), results["animeData"]["cards"]) - ] - - def _scrape_episodes(self): - headers, cookies = self.getToken() - split = self.url.split("/") - query, selected = split[-1], int(split[-2]) - anime = helpers.get(f"https://fastani.net/api/data?page=1&search={query}&tags=&years=", headers=headers, cookies=cookies).json() - - cdnData = anime["animeData"]["cards"][selected]["cdnData"] - - # Get all episodes from all seasons of the anime - # JSON Example: - """ - { - 'seasons': [{ - 'episodes': [{ - 'file': 'https://private.fastani.net/Naruto/Season 1/Naruto S01E001.mp4', - 'directory': 'https://private.fastani.net/Naruto/Season 1', - 'timestamp': '2020-09-11T16:22:48.744Z', - 'thumb': 'https://private.fastani.net/Naruto/Season 1/thumbs/20_thumbnail_001.jpg', - 'title': 'Enter: Naruto Uzumaki!' - } - ... - ] - } - """ - episodes = [j["file"] for i in [x["episodes"] for x in cdnData["seasons"]] for j in i] - - return episodes - - def _scrape_metadata(self): - headers, cookies = self.getToken() - split = self.url.split("/") - query, selected = split[-1], int(split[-2]) - anime = helpers.get(f"https://fastani.net/api/data?page=1&search={query}&tags=&years=", headers=headers, cookies=cookies).json() - self.title = anime["animeData"]["cards"][selected]["title"]["english"] - - -class FastAniEpisode(AnimeEpisode, sitename='fastani'): - def _get_sources(self): - return [("no_extractor", self.url)] diff --git a/anime_downloader/sites/genoanime.py b/anime_downloader/sites/genoanime.py index c7763b8..d4ede49 100644 --- a/anime_downloader/sites/genoanime.py +++ b/anime_downloader/sites/genoanime.py @@ -1,7 +1,7 @@ from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult from anime_downloader.sites import helpers - +import re class GenoAnime(Anime, sitename="genoanime"): sitename = "genoanime" @@ -38,4 +38,11 @@ class GenoAnimeEpisode(AnimeEpisode, sitename='genoanime'): def _get_sources(self): soup = helpers.soupify(helpers.get(self.url)) soup = helpers.soupify(helpers.get(soup.iframe.get("src"))) - return [("no_extractor", soup.source.get("src"))] + id_ = re.findall(r"data: {id: [\"'](.*?)[\"']}", str(soup))[0] + + response = helpers.post('https://genoanime.com/player/genovids.php', data={"id": id_}).json() # noqa + + return [ + ("no_extractor", x['src']) + for x in response['url'] + ] diff --git a/anime_downloader/sites/helpers/__init__.py b/anime_downloader/sites/helpers/__init__.py index b3f7a99..ca93725 100644 --- a/anime_downloader/sites/helpers/__init__.py +++ b/anime_downloader/sites/helpers/__init__.py @@ -1,2 +1,3 @@ from anime_downloader.sites.helpers.request import * from anime_downloader.sites.helpers.util import not_working +from anime_downloader.sites.helpers.unpacker import deobfuscate_packed_js diff --git a/anime_downloader/sites/helpers/request.py b/anime_downloader/sites/helpers/request.py index 924ea6b..c1a00b5 100644 --- a/anime_downloader/sites/helpers/request.py +++ b/anime_downloader/sites/helpers/request.py @@ -46,6 +46,8 @@ def setup(func): cf : bool cf if True performs the request through cfscrape. For cloudflare protected sites. + sel : bool + sel if True perfroms the request through selescrape (selenium). referer : str a url sent as referer in request headers ''' @@ -57,6 +59,7 @@ def setup(func): from selenium import webdriver from anime_downloader.sites.helpers import selescrape sess = selescrape + sess.cache = cache except ImportError: sess = cf_session logger.warning("This provider may not work correctly because it requires selenium to work.\nIf you want to install it then run: 'pip install selenium' .") @@ -107,6 +110,8 @@ def get(url: str, cf : bool cf if True performs the request through cfscrape. For cloudflare protected sites. + sel : bool + sel if True perfroms the request through selescrape (selenium). referer : str a url sent as referer in request headers ''' @@ -146,9 +151,10 @@ def soupify(res): ------- BeautifulSoup.Soup """ - if isinstance(res, requests.Response): - res = res.text - soup = BeautifulSoup(res, 'html.parser') + if isinstance(res, str): + soup = BeautifulSoup(res, 'html.parser') + else: + soup = BeautifulSoup(res.text, 'html.parser') return soup diff --git a/anime_downloader/sites/helpers/selescrape.py b/anime_downloader/sites/helpers/selescrape.py index 0a5e6ec..ec4891d 100644 --- a/anime_downloader/sites/helpers/selescrape.py +++ b/anime_downloader/sites/helpers/selescrape.py @@ -1,31 +1,14 @@ -from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.remote.remote_connection import LOGGER as serverLogger -from selenium.webdriver.support.ui import WebDriverWait from anime_downloader.const import get_random_header -from selenium.webdriver.common.by import By from urllib.parse import urlencode -from urllib.parse import urlsplit from selenium import webdriver -from bs4 import BeautifulSoup -from logging import exception from sys import platform -import requests -import os +import tempfile import logging import click import time import json -serverLogger.setLevel(logging.ERROR) -logger = logging.getLogger(__name__) - - -def get_data_dir(): - ''' - Gets the folder directory selescrape will store data, - such as cookies or browser extensions and logs. - ''' - APP_NAME = 'anime downloader' - return os.path.join(click.get_app_dir(APP_NAME), 'data') +import os def open_config(): @@ -33,8 +16,24 @@ def open_config(): return Config +cache = False +serverLogger.setLevel(logging.ERROR) +logger = logging.getLogger(__name__) +TEMP_FOLDER = os.path.join(tempfile.gettempdir(), 'AnimeDL-SeleniumCache') data = open_config() +if not os.path.isdir(TEMP_FOLDER): + os.makedirs(TEMP_FOLDER) + + +def get_data_dir(): + ''' + Gets the folder directory selescrape will store data, + such as cookies or browser extensions and logs. + ''' + APP_NAME = 'anime downloader' + return os.path.join(click.get_app_dir(APP_NAME), 'data') + def get_browser_config(): ''' @@ -50,148 +49,248 @@ def get_browser_config(): browser = os_browser[a] else: browser = 'chrome' + value = data['dl']['selescrape_browser'] value = value.lower() if value else value + if value in ['chrome', 'firefox']: browser = value + return browser def get_browser_executable(): value = data['dl']['selescrape_browser_executable_path'] executable_value = value.lower() if value else value - return executable_value + if executable_value: + return executable_value def get_driver_binary(): value = data['dl']['selescrape_driver_binary_path'] - binary_path = value.lower() if value else value - return binary_path + if value: + return value -def add_url_params(url, params): - return url if not params else url + '?' + urlencode(params) +def cache_request(sele_response): + """ + This function saves the response from a Selenium request in a json. + It uses timestamps to can know if the cache has expired or not. + """ + if not cache: + return + + file = os.path.join(TEMP_FOLDER, 'selenium_cached_requests.json') + + if os.path.isfile(file): + with open(file, 'r') as f: + tmp_cache = json.load(f) + else: + tmp_cache = {} + + data = sele_response.__dict__ + url = data['url'] + url = (url[:-1] if url and url[-1] == '/' else url) + + tmp_cache[url] = { + 'data': data['text'], + 'expiry': time.time(), + 'method': data['method'], + 'cookies': data['cookies'], + 'user_agent': data['user_agent'] + } + + with open(file, 'w') as f: + json.dump(tmp_cache, f, indent=4) + + +def check_cache(url): + """ + This function checks if the cache file exists, + if it exists then it will read the file + And it will verify if the cache is less than or equal to 30 mins old + If it is, it will return it as it is. + If it isn't, it will delete the expired cache from the file and return None + If the file doesn't exist at all it will return None + """ + if not cache: + return + file = os.path.join(TEMP_FOLDER, 'selenium_cached_requests.json') + if os.path.isfile(file): + + with open(file, 'r') as f: + data = json.load(f) + + # Yes, this is ugly, + # but its the best way that I found to find the cache + # when the url is not exactly the same (a slash at the end or not) + clean_url = (url[:-1] if url and url[-1] == '/' else url) + found = False + + for link in data: + if link == clean_url: + url = link + found = True + + if not found: + return + + timestamp = data[url]['expiry'] + + if (time.time() - timestamp <= 1800): + return data[url] + else: + data.pop(url, None) + + with open(file, 'w') as f: + json.dump(data, f, indent=4) def driver_select(): ''' - it configures what each browser should do - and gives the driver variable that is used - to perform any actions below this function. + This configures what each browser should do + and returns the corresponding driver. ''' browser = get_browser_config() data_dir = get_data_dir() executable = get_browser_executable() - driver_binary = get_driver_binary() - binary = None if not driver_binary else driver_binary + binary = get_driver_binary() + if browser == 'firefox': - fireFoxOptions = webdriver.FirefoxOptions() - fireFoxOptions.headless = True - fireFoxOptions.add_argument('--log fatal') - if binary == None: - driver = webdriver.Firefox(options=fireFoxOptions, service_log_path=os.path.devnull) - else: - try: - driver = webdriver.Firefox(options=fireFoxOptions, service_log_path=os.path.devnull) - except: - driver = webdriver.Firefox(executable_path=binary, options=fireFoxOptions, service_log_path=os.path.devnull) + fireFox_Options = webdriver.FirefoxOptions() + ops = [ + "--width=1920", "--height=1080", + "-headless", "--log fatal" + ] + + for option in ops: + fireFox_Options.add_argument(option) + + fireFox_Profile = webdriver.FirefoxProfile() + fireFox_Profile.set_preference( + "general.useragent.override", get_random_header()['user-agent'] + ) + + driver = webdriver.Firefox( + # sets user-agent + firefox_profile=fireFox_Profile, + # sets various firefox settings + options=fireFox_Options, + # by default it will be None, if a binary location is in the config then it will use that + firefox_binary=None if not executable else executable, + # by default it will be "geckodriver", if a geckodriver location is in the config then it will use that + executable_path=(binary if binary else "geckodriver"), + # an attempt at stopping selenium from printing a pile of garbage to the console. + service_log_path=os.path.devnull + ) + elif browser == 'chrome': from selenium.webdriver.chrome.options import Options - chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--disable-gpu") + profile_path = os.path.join(data_dir, 'Selenium_chromium') - log_path = os.path.join(data_dir, 'chromedriver.log') - chrome_options.add_argument('--log-level=OFF') - chrome_options.add_argument(f"--user-data-dir={profile_path}") - chrome_options.add_argument("--no-sandbox") - chrome_options.add_argument("--window-size=1920,1080") - chrome_options.add_argument(f'user-agent={get_random_header()}') - if binary == None: - if executable == None: - driver = webdriver.Chrome(options=chrome_options) - else: - from selenium.webdriver.common.desired_capabilities import DesiredCapabilities - cap = DesiredCapabilities.CHROME - cap['binary_location'] = executable - driver = webdriver.Chrome(desired_capabilities=cap, options=chrome_options) - else: - if executable == None: - driver = webdriver.Chrome(options=chrome_options) - else: - from selenium.webdriver.common.desired_capabilities import DesiredCapabilities - cap = DesiredCapabilities.CHROME - cap['binary_location'] = executable - driver = webdriver.Chrome(executable_path=binary, desired_capabilities=cap, options=chrome_options, service_log_path=os.path.devnull) + chrome_options = Options() + + ops = [ + "--headless", "--disable-gpu", '--log-level=OFF', + f"--user-data-dir={profile_path}", "--no-sandbox", + "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}" # noqa + ] + + for option in ops: + chrome_options.add_argument(option) + + cap = None + + if executable: + from selenium.webdriver.common.desired_capabilities import DesiredCapabilities + + cap = DesiredCapabilities.CHROME + cap['binary_location'] = executable + + driver = webdriver.Chrome( + # sets user-agent, and various chrome settings + options=chrome_options, + # by default it will be "chromedriver", if a chromedriver location is in the config then it will use that + executable_path=(binary if binary else "chromedriver"), + # by default it will be None, if a binary location is in the config then it will use that + desired_capabilities=cap, + # an attempt at stopping selenium from printing a pile of garbage to the console. + service_log_path=os.path.devnull + ) return driver -def status_select(driver, url, status='hide'): - ''' - For now it doesnt do what its name suggests, - I have planned to add a status reporter of the http response code. - This part of the code is not removed because it is part of its core. - Treat it like it isnt here. - ''' - try: - if status == 'hide': - driver.get(url) - elif status == 'show': - r = requests.head(url) - if r.status_code == 503: - raise RuntimeError("This website's sevice is unavailable or has cloudflare on.") - driver.get(url) - return r.status_code - else: - driver.get(url) - except requests.ConnectionError: - raise RuntimeError("Failed to establish a connection using the requests library.") - - def cloudflare_wait(driver): ''' It waits until cloudflare has gone away before doing any further actions. - The way it works is by getting the title of the page + The way it works is by getting the title of the page and as long as it is "Just a moment..." it will keep waiting. - This part of the code won't make the code execute slower - if the target website has not a Cloudflare redirection. - At most it will sleep 1 second as a precaution. - Also, i have made it time out after 30 seconds, useful if the target website is not responsive + This part of the code won't make the code execute slower + if the target website has no Cloudflare redirection. + At most it will sleep 1 second as a precaution. + Also, i have made it time out after 50 seconds, useful if the target website is not responsive and to stop it from running infinitely. ''' - abort_after = 30 + abort_after = 50 # seconds start = time.time() title = driver.title # title = "Just a moment..." - while title == "Just a moment...": - time.sleep(0.25) + while "Just a moment" in title: + time.sleep(0.35) delta = time.time() - start if delta >= abort_after: - logger.error(f'Timeout:\nCouldnt bypass cloudflare. \ - See the screenshot for more info:\n{get_data_dir()}/screenshot.png') + logger.error(f'Timeout:\tCouldnt bypass cloudflare. \ + See the screenshot for more info:\t{get_data_dir()}/screenshot.png') + return 1 title = driver.title - if not title == "Just a moment...": + if not "Just a moment" in title: break - time.sleep(1) # This is necessary to make sure everything has loaded fine. + time.sleep(2) # This is necessary to make sure everything has loaded fine. + return 0 def request(request_type, url, **kwargs): # Headers not yet supported , headers={} params = kwargs.get('params', {}) - new_url = add_url_params(url, params) - driver = driver_select() - status = status_select(driver, new_url, 'hide') - try: - cloudflare_wait(driver) - user_agent = driver.execute_script("return navigator.userAgent;") # dirty, but allows for all sorts of things above - cookies = driver.get_cookies() - text = driver.page_source - driver.close() + + url = url if not params else url + '?' + urlencode(params) + cached_data = check_cache(url) + + if cached_data: + text = cached_data['data'] + user_agent = cached_data['user_agent'] + request_type = cached_data['method'] + cookies = cached_data['cookies'] return SeleResponse(url, request_type, text, cookies, user_agent) - except: - driver.save_screenshot(f"{get_data_dir()}/screenshot.png") - driver.close() - logger.error(f'There was a problem getting the page: {new_url}. \ - See the screenshot for more info:\n{get_data_dir()}/screenshot.png') + + else: + driver = driver_select() + driver.get(url) + + try: + exit_code = cloudflare_wait(driver) + user_agent = driver.execute_script("return navigator.userAgent;") + cookies = driver.get_cookies() + text = driver.page_source + driver.close() + + if exit_code != 0: + return SeleResponse(url, request_type, None, cookies, user_agent) + + seleResponse = SeleResponse( + url, request_type, + text, cookies, + user_agent + ) + + cache_request(seleResponse) + return seleResponse + + except: + driver.save_screenshot(f"{get_data_dir()}/screenshot.png") + driver.close() + logger.error(f'There was a problem getting the page: {url}.' + + '\nSee the screenshot for more info:\t{get_data_dir()}/screenshot.png') + return class SeleResponse: @@ -224,5 +323,5 @@ class SeleResponse: return self.text def __repr__(self): - return ''.format( + return ''.format( self.url, self.method, self.text, self.cookies, self.user_agent) diff --git a/anime_downloader/sites/helpers/unpacker.py b/anime_downloader/sites/helpers/unpacker.py new file mode 100644 index 0000000..4b4a29c --- /dev/null +++ b/anime_downloader/sites/helpers/unpacker.py @@ -0,0 +1,11 @@ +try: + from jsbeautifier.unpackers import javascriptobfuscator, myobfuscate, packer + UNPACKERS = [javascriptobfuscator, myobfuscate, packer] + def deobfuscate_packed_js(js): + for unpacker in UNPACKERS: + if unpacker.detect(js): + return unpacker.unpack(js) + return js +except ImportError: + def deobfuscate_packed_js(js): + return js diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 1030e95..27a4ae9 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -2,8 +2,9 @@ from importlib import import_module ALL_ANIME_SITES = [ # ('filename', 'sitename', 'classname') - ('_4anime', '4anime', 'Anime4'), + # ('_4anime', '4anime', 'Anime4'), ('anitube', 'anitube', 'AniTube'), + ('animtime', 'animtime', 'AnimTime'), ('anime8', 'anime8', 'Anime8'), ('animebinge', 'animebinge', 'AnimeBinge'), ('animechameleon', 'gurminder', 'AnimeChameleon'), @@ -17,8 +18,10 @@ ALL_ANIME_SITES = [ ('animetake','animetake','AnimeTake'), ('animeonline','animeonline360','AnimeOnline'), ('animeout', 'animeout', 'AnimeOut'), + # ('animepahe', 'animepahe', 'AnimePahe'), ('animerush', 'animerush', 'AnimeRush'), ('animesimple', 'animesimple', 'AnimeSimple'), + ('animestar', 'animestar', 'AnimeStar'), ('animesuge', 'animesuge', 'AnimeSuge'), ('animevibe', 'animevibe', 'AnimeVibe'), ('animixplay', 'animixplay', 'AniMixPlay'), @@ -26,7 +29,6 @@ ALL_ANIME_SITES = [ ('dbanimes', 'dbanimes', 'DBAnimes'), ('erairaws', 'erai-raws', 'EraiRaws'), ('egyanime', 'egyanime', 'EgyAnime'), - ('fastani', 'fastani', 'FastAni'), ('genoanime', 'genoanime', 'GenoAnime'), ('itsaturday', 'itsaturday', 'Itsaturday'), ('justdubs', 'justdubs', 'JustDubs'), @@ -42,8 +44,9 @@ ALL_ANIME_SITES = [ ('twistmoe', 'twist.moe', 'TwistMoe'), ('tenshimoe', 'tenshi.moe', 'TenshiMoe'), ('vidstream', 'vidstream', 'VidStream'), - ('voiranime', 'voiranime', 'VoirAnime'), + # ('voiranime', 'voiranime', 'VoirAnime'), ('vostfree', 'vostfree', 'VostFree'), + ('wcostream', 'wcostream', 'WcoStream'), ] diff --git a/anime_downloader/sites/putlockers.py b/anime_downloader/sites/putlockers.py index 088c2da..31908d7 100644 --- a/anime_downloader/sites/putlockers.py +++ b/anime_downloader/sites/putlockers.py @@ -46,7 +46,7 @@ class PutLockers(Anime, sitename="putlockers"): class PutLockersEpisode(AnimeEpisode, sitename="putlockers"): def _get_sources(self): self.headers = { - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101 Firefox/56.0"} + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0.1) Gecko/20100101 Firefox/88.0.1"} text = helpers.get(self.url).text sources_list = [] diff --git a/anime_downloader/sites/ryuanime.py b/anime_downloader/sites/ryuanime.py index c3089a0..59fdded 100644 --- a/anime_downloader/sites/ryuanime.py +++ b/anime_downloader/sites/ryuanime.py @@ -22,13 +22,16 @@ class RyuAnime(Anime, sitename='ryuanime'): @classmethod def search(cls, query): - soup = helpers.soupify(helpers.get("https://ryuanime.com/browse-anime", params={"search": query})) - result_data = soup.select("li.list-inline-item:has(p.anime-name):has(a.ani-link)") + soup = helpers.soupify(helpers.get( + "https://ryuanime.com/browse-anime", params={"search": query})) + result_data = soup.select( + "li.list-inline-item:has(p.anime-name):has(a.ani-link)") search_results = [ SearchResult( title=result.select("p.anime-name")[0].text, - url='https://ryuanime.com' + result.select("a.ani-link")[0].get("href") + url='https://ryuanime.com' + + result.select("a.ani-link")[0].get("href") ) for result in result_data ] @@ -36,7 +39,8 @@ class RyuAnime(Anime, sitename='ryuanime'): def _scrape_episodes(self): soup = helpers.soupify(helpers.get(self.url)) - episodes = ['https://ryuanime.com' + x.get("href") for x in soup.select("li.jt-di > a")] + episodes = ['https://ryuanime.com' + + x.get("href") for x in soup.select("li.jt-di > a")] if len(episodes) == 0: logger.warning("No episodes found") @@ -49,17 +53,16 @@ class RyuAnime(Anime, sitename='ryuanime'): class RyuAnimeEpisode(AnimeEpisode, sitename='ryuanime'): - def getLink(self, name, _id): - if name == "trollvid": - return "https://trollvid.net/embed/" + _id - elif name == "mp4upload": - return f"https://mp4upload.com/embed-{_id}.html" - elif name == "xstreamcdn": - return f"https://xstreamcdn.com/v/" + _id - def _get_sources(self): page = helpers.get(self.url).text + server_links = { + 'trollvid': 'https://trollvid.net/embed/{}', + 'mp4upload': 'https://mp4upload.com/embed-{}.html', + 'xstreamcdn': 'https://xstreamcdn.com/v/{}', + 'vidstreaming': 'https://vidstreaming.io/download?id={}' + } + # Example: """ [ @@ -69,16 +72,20 @@ class RyuAnimeEpisode(AnimeEpisode, sitename='ryuanime'): } ] """ - hosts = json.loads(re.search(r"let.*?episode.*?videos.*?(\[\{.*?\}\])", page).group(1)) + hosts = json.loads( + re.search(r"let.*?episode.*?videos.*?(\[\{.*?\}\])", page).group(1)) sources_list = [] for host in hosts: name = host.get("host") _id = host.get("id") - link = self.getLink(name, _id) + link = server_links[name].format(_id) if link: + if name == 'vidstreaming': + name = 'vidstream' + sources_list.append({ "extractor": name, "url": link, diff --git a/anime_downloader/sites/shiro.py b/anime_downloader/sites/shiro.py index c9bce16..8e0fb38 100644 --- a/anime_downloader/sites/shiro.py +++ b/anime_downloader/sites/shiro.py @@ -13,6 +13,8 @@ def get_token(): token = re.search(r'token\:\"(.*?)\"', script)[1] return token +def get_api_url(): + return "https://tapi.shiro.is" class Shiro(Anime, sitename='shiro'): sitename = 'shiro' @@ -20,18 +22,20 @@ class Shiro(Anime, sitename='shiro'): @classmethod def search(cls, query): cls.token = get_token() + cls.api_url = get_api_url() + params = { 'search': query, 'token': cls.token } - results = helpers.get('https://ani.api-web.site/advanced', params=params).json()['data'] # noqa + results = helpers.get(f'{cls.api_url}/advanced', params=params).json()['data'] # noqa if 'nav' in results: results = results['nav']['currentPage']['items'] search_results = [ SearchResult( title=i['name'], url='https://shiro.is/anime/' + i['slug'], - poster='https://ani-cdn.api-web.site/' + i['image'], + poster=f'{cls.api_url}/' + i['image'], meta={'year': i['year']}, meta_info={ 'version_key_dubbed': '(Sub)' if i['language'] == 'subbed' else '(Dub)' # noqa @@ -46,17 +50,19 @@ class Shiro(Anime, sitename='shiro'): def _scrape_episodes(self): self.token = get_token() + self.api_url = get_api_url() + slug = self.url.split('/')[-1] if 'episode' in slug: - api_link = 'https://ani.api-web.site/anime-episode/slug/' + slug + api_link = f'{self.api_url}/anime-episode/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() slug = r['data']['anime_slug'] - api_link = 'https://ani.api-web.site/anime/slug/' + slug + api_link = f'{self.api_url}/anime/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() if r['status'] == 'Found': episodes = r['data']['episodes'] episodes = [ - 'https://ani.googledrive.stream/vidstreaming/vid-ad/' + x['videos'][0]['video_id'] # noqa + "https://cherry.subsplea.se/" + x['videos'][0]['video_id'] # noqa for x in episodes ] return episodes @@ -65,18 +71,21 @@ class Shiro(Anime, sitename='shiro'): def _scrape_metadata(self): self.token = get_token() + self.api_url = get_api_url() + + slug = self.url.split('/')[-1] if 'episode' in slug: - api_link = 'https://ani.api-web.site/anime-episode/slug/' + slug + api_link = f'{self.api_url}/anime-episode/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() slug = r['data']['anime_slug'] - api_link = 'https://ani.api-web.site/anime/slug/' + slug + api_link = f'{self.api_url}/anime/slug/' + slug r = helpers.get(api_link, params={'token': self.token}).json() self.title = r['data']['name'] class ShiroEpisode(AnimeEpisode, sitename='shiro'): def _get_sources(self): - r = helpers.get(self.url).text - link = re.search(r'\"file\"\:\"(.*?)\"', r)[1] + r = helpers.get(self.url, referer="https://shiro.is/").text + link = re.search(r'source\s+src=\"(.*?)\"', r)[1] return [('no_extractor', link)] diff --git a/anime_downloader/sites/tenshimoe.py b/anime_downloader/sites/tenshimoe.py index 7644b5e..bf19ab7 100644 --- a/anime_downloader/sites/tenshimoe.py +++ b/anime_downloader/sites/tenshimoe.py @@ -1,41 +1,84 @@ -from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult -from anime_downloader.sites import helpers - - -class TenshiMoe(Anime, sitename='tenshi.moe'): - - sitename = 'tenshi.moe' - - @classmethod - def search(cls, query): - soup = helpers.soupify( - helpers.get('https://tenshi.moe/anime', params={'q': query})) - results = soup.select('ul.loop.anime-loop.list > li > a') - - return [ - SearchResult( - title=x['title'], - url=x['href'], - ) - for x in results - ] - - def _scrape_episodes(self): - soup = helpers.soupify(helpers.get(self.url)) - eps = soup.select( - 'li[class^=episode] > a' - ) - eps = [x['href'] for x in eps] - return eps - - def _scrape_metadata(self): - soup = helpers.soupify(helpers.get(self.url).text) - self.title = soup.title.text.split('—')[0].strip() - - -class TenshiMoeEpisode(AnimeEpisode, sitename='tenshi.moe'): - def _get_sources(self): - soup = helpers.soupify(helpers.get(self.url)) - # Might break with something other than mp4! - link = soup.find_all('source', type="video/mp4")[-1]['src'] - return [('no_extractor', link)] +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers +import re + + +def parse_search_page(soup): + results = soup.select('ul.thumb > li > a') + return [ + SearchResult( + title=x['title'], + url=x['href'], + poster=x.find('img')['src'] + ) + for x in results + ] + + +class TenshiMoe(Anime, sitename='tenshi.moe'): + + sitename = 'tenshi.moe' + + @classmethod + def search(cls, query): + soup = helpers.soupify( + helpers.get( + 'https://tenshi.moe/anime', + params={'q': query}, + cookies={'loop-view': 'thumb'} + ) + ) + + results = parse_search_page(soup) + + while soup.select_one(".pagination"): + link = soup.select_one('a.page-link[rel="next"]') + if link: + soup = helpers.soupify( + helpers.get( + link['href'], + cookies={'loop-view': 'thumb'} + ) + ) + results.extend(parse_search_page(soup)) + else: + break + + return results + + def _scrape_episodes(self): + soup = helpers.soupify(helpers.get(self.url)) + eps = soup.select( + 'li[class*="episode"] > a' + ) + eps = [x['href'] for x in eps] + return eps + + def _scrape_metadata(self): + soup = helpers.soupify(helpers.get(self.url).text) + self.title = soup.select_one('span.value > span[title="English"]').parent.text.strip() + self.meta['year'] = int(re.findall(r"(\d{4})", soup.select_one('li.release-date .value').text)[0]) + self.meta['airing_status'] = soup.select_one('li.status > .value').text.strip() + self.meta['total_eps'] = int(soup.select_one('.entry-episodes > h2 > span').text.strip()) + self.meta['desc'] = soup.select_one('.entry-description > .card-body').text.strip() + self.meta['poster'] = soup.select_one('img.cover-image').get('src', '') + self.meta['cover'] = '' + + +class TenshiMoeEpisode(AnimeEpisode, sitename='tenshi.moe'): + QUALITIES = ['360p', '480p', '720p', '1080p'] + + def _get_sources(self): + soup = helpers.soupify(helpers.get(self.url)) + soup = soup.select_one('.embed-responsive > iframe') + + mp4moe = helpers.soupify(helpers.get(soup.get('src'), referer=self.url)) + mp4moe = mp4moe.select_one('video#player') + qualities_ = [x.get("title") for x in mp4moe.select('source')] + sources = [ + ('no_extractor', x.get('src')) + for x in mp4moe.select('source') + ] + + if self.quality in qualities_: + return [sources[qualities_.index(self.quality)]] diff --git a/anime_downloader/sites/twistmoe.py b/anime_downloader/sites/twistmoe.py index 6e65ace..66ef534 100644 --- a/anime_downloader/sites/twistmoe.py +++ b/anime_downloader/sites/twistmoe.py @@ -37,7 +37,7 @@ class TwistMoe(Anime, sitename='twist.moe'): @classmethod def search(self, query): headers = { - 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.46 Safari/537.36', + 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/605.1.15', 'x-access-token': '0df14814b9e590a1f26d3071a4ed7974' } # soup = helpers.soupify(helpers.get('https://twist.moe/', allow_redirects=True, headers=headers)) @@ -55,6 +55,7 @@ class TwistMoe(Anime, sitename='twist.moe'): animes.append(SearchResult( title=anime['title'], url='https://twist.moe/a/' + anime['slug']['slug'] + '/', + poster=f"https://media.kitsu.io/anime/poster_images/{anime['hb_id']}/large.jpg" )) animes = [ani[0] for ani in process.extract(query, animes)] return animes @@ -81,6 +82,28 @@ class TwistMoe(Anime, sitename='twist.moe'): return self._episode_urls + def _scrape_metadata(self): + slug = self.url.split('a/')[-1][:-1] + api_url = "https://api.twist.moe/api/anime/" + slug + res = helpers.get( + api_url, + headers={ + 'x-access-token': '0df14814b9e590a1f26d3071a4ed7974' + } + ).json() + if 'hb_id' in res: + kitsu_api_url = "https://kitsu.io/api/edge/anime/" + str(res['hb_id']) + kitsu_data = helpers.get(kitsu_api_url).json() + attributes = kitsu_data['data']['attributes'] + + self.meta['title'] = attributes['canonicalTitle'] + self.meta['year'] = attributes['startDate'].split('-')[0] + self.meta['airing_status'] = attributes['status'] + self.meta['poster'] = attributes['posterImage']['original'] + self.meta['cover'] = attributes['coverImage']['original'] + self.meta['total_eps'] = attributes['episodeCount'] + self.meta['desc'] = attributes['description'] + # From stackoverflow https://stackoverflow.com/questions/36762098/how-to-decrypt-password-from-javascript-cryptojs-aes-encryptpassword-passphras def pad(data): length = BLOCK_SIZE - (len(data) % BLOCK_SIZE) diff --git a/anime_downloader/sites/voiranime.py b/anime_downloader/sites/voiranime.py index 4f3bf35..5a431d8 100644 --- a/anime_downloader/sites/voiranime.py +++ b/anime_downloader/sites/voiranime.py @@ -13,7 +13,7 @@ class VoirAnime(Anime, sitename='voiranime'): @classmethod def search(cls, query): - search_results = helpers.soupify(helpers.get(cls.url, params={'s': query})).select('div.item-head > h3 > a') + search_results = helpers.soupify(helpers.get(cls.url, params={'s': query})).select('.post-title > h3 > a') search_results = [ SearchResult( title=i.text, @@ -23,21 +23,27 @@ class VoirAnime(Anime, sitename='voiranime'): return search_results def _scrape_episodes(self): - soup = helpers.soupify(helpers.get(self.url)) - next_page = soup.select('a.ct-btn')[0].get('href') - soup = helpers.soupify(helpers.get(next_page)) - episodes = soup.select('ul.video-series-list > li > a.btn-default') - return [i.get('href') for i in episodes] + html = helpers.get(self.url).text + episodes = list(re.findall(r"
  • \n", html)) + return episodes[::-1] def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) - self.title = soup.select('div.container > h1')[0].text + self.title = soup.select_one('.post-title > h1').text class VoirAnimeEpisode(AnimeEpisode, sitename='voiranime'): def _get_sources(self): + base_url = 'https://voiranime.com/' soup = helpers.soupify(helpers.get(self.url)) + servers = [ + base_url + x['data-redirect'] + for x in soup.select('.host-select > option') + ] """These could probably be condensed down to one, but would look too spooky""" + + # code below doesnt work anymore, since voiranime introduced captcha + multilinks_regex = r'var\s*multilinks\s*=\s*\[\[{(.*?)}]];' mutilinks_iframe_regex = r"iframe\s*src=\\(\"|')([^(\"|')]*)" multilinks = re.search(multilinks_regex, str(soup)).group(1) diff --git a/anime_downloader/sites/wcostream.py b/anime_downloader/sites/wcostream.py new file mode 100644 index 0000000..978ee93 --- /dev/null +++ b/anime_downloader/sites/wcostream.py @@ -0,0 +1,71 @@ +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.extractors import get_extractor +from anime_downloader.sites import helpers + +import re + + +class WcoStream(Anime, sitename='wcostream'): + + sitename = 'wcostream' + + @classmethod + def search(cls, query): + soup = helpers.soupify(helpers.get( + 'https://wcostream.cc/search', + params={'keyword': query} + )) + results = soup.select('.film_list-wrap > .flw-item') + + return [ + SearchResult( + title=x.find('img')['alt'], + url=x.find('a')['href'], + meta={'year': x.select_one('.fd-infor > .fdi-item').text.strip()}, + meta_info={ + 'version_key_dubbed': '(Dub)' + } + ) + for x in results + ] + + def _scrape_episodes(self): + soup = helpers.soupify(helpers.get(self.url)) + episodes = soup.select_one('#content-episodes').select('ul.nav > li.nav-item') # noqa + return [ + x.find('a')['href'] + for x in episodes + if 'https://wcostream.cc/watch' in x.find('a')['href'] + ] + + def _scrape_metadata(self): + soup = helpers.soupify(helpers.get(self.url)) + self.title = soup.select_one( + 'meta[property="og:title"]' + )['content'].split('Episode')[0].strip() + + +class WcoStreamEpisode(AnimeEpisode, sitename='wcostream'): + def _get_sources(self): + soup = helpers.soupify(helpers.get(self.url)) + servers = soup.select("#servers-list > ul > li") + servers = [ + { + "name": server.find('span').text.strip(), + "link": server.find('a')['data-embed'] + } + for server in servers + ] + + servers = sorted(servers, key=lambda x: x['name'].lower() in self.config['servers'][0].lower())[::-1] # noqa + sources = [] + + for server in servers: + ext = get_extractor('wcostream')( + server['link'], + quality=self.quality, + headers={} + ) + sources.extend([('no_extractor', x['stream_url']) for x in ext._get_data()]) # noqa + + return sources diff --git a/anime_downloader/util.py b/anime_downloader/util.py index de6e364..d60e932 100644 --- a/anime_downloader/util.py +++ b/anime_downloader/util.py @@ -77,6 +77,14 @@ def format_search_results(search_results): table = '\n'.join(table.split('\n')[::-1]) return table +def format_matches(matches): + if matches: + table = [[[p], [sr]] for p, sr, r in sorted(matches, key = lambda x: x[2], reverse=True)] + table = [a for b in table for a in b] + else: + table = [["None"]] + table = tabulate(table, ['RESULTS'], tablefmt='grid', colalign=("center",)) + return table def search(query, provider, val=None, season_info=None, ratio=50): # Will use animeinfo sync if season_info is provided @@ -207,11 +215,11 @@ def parse_ep_str(anime, grammar): else: from anime_downloader.sites.anime import AnimeEpisode - if grammar == '0': + if episode_grammar == '0': ep = sorted(anime._episode_urls)[-1] else: ep = [x for x in anime._episode_urls if x[0] - == int(grammar)][0] + == int(episode_grammar)][0] ep_cls = AnimeEpisode.subclasses[anime.sitename] @@ -305,7 +313,8 @@ def format_command(cmd, episode, file_format, speed_limit, path): '--check-certificate=false --user-agent={useragent} --max-overall-download-limit={speed_limit} ' '--console-log-level={log_level}', '{idm}': 'idman.exe /n /d {stream_url} /p {download_dir} /f {file_format}.mp4', - '{wget}': 'wget {stream_url} --referer={referer} --user-agent={useragent} -O {download_dir}/{file_format}.mp4 -c' + '{wget}': 'wget {stream_url} --referer={referer} --user-agent={useragent} -O {download_dir}/{file_format}.mp4 -c', + '{uget}': '/CMD/ --http-referer={referer} --http-user-agent={useragent} --folder={download_dir} --filename={file_format}.mp4 {stream_url}' } # Allows for passing the user agent with self.headers in the site. @@ -313,7 +322,7 @@ def format_command(cmd, episode, file_format, speed_limit, path): if episode.headers.get('user-agent'): useragent = episode.headers['user-agent'] else: - useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36' + useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/605.1.15' stream_url = episode.source().stream_url if not episode.url.startswith( 'magnet:?xt=urn:btih:') else episode.url @@ -342,6 +351,9 @@ def format_command(cmd, episode, file_format, speed_limit, path): if cmd == "{idm}": rep_dict['file_format'] = rep_dict['file_format'].replace('/', '\\') + if cmd == '{uget}': + cmd_dict['{uget}'] = cmd_dict['{uget}'].replace('/CMD/', 'uget-gtk' if check_in_path('uget-gtk') else 'uget') + if cmd in cmd_dict: cmd = cmd_dict[cmd] diff --git a/docs/index.rst b/docs/index.rst index 066167c..0e08a5d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,7 +16,7 @@ Features - Search and download. - Save yourselves from those malicious ads. - Download using external downloader ([aria2](https://aria2.github.io/) recommended). -- Configurable using `config.json`. See [doc](https://github.com/vn-ki/anime-downloader/wiki/Config). +- Configurable using `config.json`. See [doc](https://github.com/anime-dl/anime-downloader/wiki/Config). Supported Sites --------------- diff --git a/docs/usage/dl.rst b/docs/usage/dl.rst index 7fb84e8..ef052bf 100644 --- a/docs/usage/dl.rst +++ b/docs/usage/dl.rst @@ -16,7 +16,7 @@ Search and download anime dl 'code geass' -To search on kissanime, +To search on animepahe, .. code:: bash diff --git a/docs/usage/installation.rst b/docs/usage/installation.rst index a58b0fa..7c0b37e 100644 --- a/docs/usage/installation.rst +++ b/docs/usage/installation.rst @@ -19,14 +19,14 @@ Add the following to a file named install.bat and then run it as Administrator; @"%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell.exe" -NoProfile -InputFormat None -ExecutionPolicy Bypass -Command " [System.Net.ServicePointManager]::SecurityProtocol = 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" && SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" choco install -y git mpv python3 aria2 nodejs - refreshenv && pip3 install -U git+https://github.com/vn-ki/anime-downloader.git && echo Testing providers, the install is done && anime test + refreshenv && pip3 install -U git+https://github.com/anime-dl/anime-downloader.git && echo Testing providers, the install is done && anime test Windows via ``choco`` ~~~~~~~~~~~~~~~~~~~~~ Contributed by @CodaTheOtaku -**NOTE** Ensure the Command Prompt (cmd) is being ran as Administrator. +**NOTE:** Ensure the Command Prompt (cmd) is being ran as Administrator. - Install `Chocolatey`_ Package manager. @@ -35,7 +35,7 @@ Windows via ``choco`` choco install -y git mpv python3 aria2 nodejs - Once these are installed; :: - pip3 install -U git+https://github.com/vn-ki/anime-downloader.git + pip3 install -U git+https://github.com/anime-dl/anime-downloader.git - Then, the commands to view a show would be; :: @@ -65,7 +65,7 @@ all the following ``pip`` with ``pip3``. - To install the bleeding-edge version of Anime-Downloader use this alternative command;: - pip3 install -U git+https://github.com/vn-ki/anime-downloader.git + pip3 install -U git+https://github.com/anime-dl/anime-downloader.git - Enjoy. @@ -86,7 +86,7 @@ This does not require a rooted device to work. - Install Aria2c via the following command if using Termux; :: - pkg install aria2c + pkg install aria2 - Install Python via the following command if using Termux; :: @@ -98,7 +98,7 @@ This does not require a rooted device to work. - Install Anime-Downloader via the following command after python and git are installed; :: - pip3 install -U git+https://github.com/vn-ki/anime-downloader.git + pip3 install -U git+https://github.com/anime-dl/anime-downloader.git - The usage commands should now match the commands used on PC. @@ -123,7 +123,7 @@ The following steps install Anime-Downloader; - Firstly, clone the repository via this command; :: - git clone https://github.com/vn-ki/anime-downloader.git + git clone https://github.com/anime-dl/anime-downloader.git - Next, change your directory into the cloned repo. To do so, use the following case-sensitive command; :: @@ -139,7 +139,8 @@ The following steps install Anime-Downloader; - Delete the highlighted line as to match the image below; -:image: https://i.imgur.com/0fRiNP6.png +.. image:: https://i.imgur.com/0fRiNP6.png + :width: 250 - Press ctrl+o then enter then press ctrl+X. diff --git a/setup.py b/setup.py index a087e7c..24782f6 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( author_email='vishnunarayan6105@gmail.com', description='Download your favourite anime', packages=find_packages(), - url='https://github.com/vn-ki/anime-downloader', + url='https://github.com/anime-dl/anime-downloader', keywords=['anime', 'downloader', '9anime', 'download', 'kissanime'], install_requires=[ 'pySmartDL>=1.3.4', @@ -30,10 +30,11 @@ setup( 'cfscrape>=2.0.5', 'requests-cache>=0.4.13', 'tabulate>=0.8.3', - 'pycryptodome>=3.8.2', + 'pycryptodome>=3.8.2' ], extras_require={ 'selescrape': ['selenium'], + 'unpacker': ['jsbeautifier==1.11.0'], 'gui': ['PyQt5>=5.15.1', 'selenium'], 'dev': [ 'pytest',