From 045efcf34f5fe991e941cee6907c53bd688764d2 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 1 Apr 2021 23:13:12 +0100 Subject: [PATCH 1/5] Add Animtime --- README.md | 1 + anime_downloader/extractors/init.py | 6 ++ anime_downloader/extractors/wasabisys.py | 11 ++++ anime_downloader/sites/animtime.py | 73 ++++++++++++++++++++++++ anime_downloader/sites/init.py | 1 + 5 files changed, 92 insertions(+) create mode 100644 anime_downloader/extractors/wasabisys.py create mode 100644 anime_downloader/sites/animtime.py diff --git a/README.md b/README.md index 3c60cc7..17c01d9 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ Yeah. Me too! That's why this tool exists. **Details about the sites can be found in [FAQ](https://github.com/vn-ki/anime-downloader/wiki/FAQ)** - 4Anime - requires jsbeautifier +- AnimTime - AnimeBinge - Animedaisuki - Animeflix diff --git a/anime_downloader/extractors/init.py b/anime_downloader/extractors/init.py index a529519..3625c6d 100644 --- a/anime_downloader/extractors/init.py +++ b/anime_downloader/extractors/init.py @@ -168,6 +168,12 @@ ALL_EXTRACTORS = [ 'modulename': 'streamium', 'regex': 'streamium', 'class': 'Streamium' + }, + { + 'sitename': 'wasabisys', + 'modulename': 'wasabisys', + 'regex': 'wasabisys', + 'class': 'Wasabisys' } ] diff --git a/anime_downloader/extractors/wasabisys.py b/anime_downloader/extractors/wasabisys.py new file mode 100644 index 0000000..d92b538 --- /dev/null +++ b/anime_downloader/extractors/wasabisys.py @@ -0,0 +1,11 @@ +from anime_downloader.extractors.base_extractor import BaseExtractor +from anime_downloader.sites import helpers + + +class Wasabisys(BaseExtractor): + def _get_data(self): + + return { + 'stream_url': self.url, + 'referer': 'https://animtime.com/' + } diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py new file mode 100644 index 0000000..b5814ef --- /dev/null +++ b/anime_downloader/sites/animtime.py @@ -0,0 +1,73 @@ + +from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult +from anime_downloader.sites import helpers +from difflib import get_close_matches + +import re +import logging +logger = logging.getLogger(__name__) + +class AnimTime(Anime, sitename='animtime'): + sitename='animtime' + + @classmethod + def get_title_dict(cls, script): + script_text = helpers.get(script).text + title_function = re.search("tm=.*?}", script_text).group() + titles_regexed = re.findall("t\[t\.(.*?)=(\d+)", title_function) + titles = dict([(' '.join(re.sub( r"([A-Z])", r" \1", x[0]).split()), x[1]) for x in titles_regexed]) + + return titles + + @classmethod + def get_script_link(cls): + soup = helpers.soupify(helpers.get('https://animtime.com')) + script = 'https://animtime.com/' + soup.select('script[src*=main]')[0].get('src') + + return script + + @classmethod + def search(cls, query): + titles = cls.get_title_dict(cls.get_script_link()) + matches = get_close_matches(query, titles, cutoff=0.2) + + search_results = [ + SearchResult( + title=match, + url='https://animtime.com/title/{}'.format(titles.get(match)) + ) + for match in matches + ] + + logger.info(search_results) + return search_results + + def _scrape_episodes(self): + link = self.get_script_link() + titles = dict((y, x) for x, y in self.get_title_dict(link).items()) + current_title = titles.get(self.url.split('/')[-1]) + + script_text = helpers.get(link).text + ep_count = int(re.search("zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + + episodes = [] + for i in range(ep_count): + episodes.append(self.url + f'/episode/{i + 1}') + + return episodes + + def _scrape_metadata(self): + titles = dict((y, x) for (x, y) in self.get_title_dict(self.get_script_link()).items()) + self.title = titles.get(self.url.split('/')[-1]) + +class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): + def _get_sources(self): + titles = dict((y, x) for x, y in AnimTime.get_title_dict(AnimTime.get_script_link()).items()) + current_title = titles.get(self.url.split('/')[-3]) + current_ep = "{0:03}".format(int(self.url.split('/')[-1])) + + script_text = helpers.get(AnimTime.get_script_link()).text + regexed_link = re.search('tm\.' + current_title.replace(" ", "") + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) + link = regexed_link.replace('"+t+"', current_ep) + + return [('wasabisys', link)] diff --git a/anime_downloader/sites/init.py b/anime_downloader/sites/init.py index 585d372..054d83b 100644 --- a/anime_downloader/sites/init.py +++ b/anime_downloader/sites/init.py @@ -4,6 +4,7 @@ ALL_ANIME_SITES = [ # ('filename', 'sitename', 'classname') ('_4anime', '4anime', 'Anime4'), ('anitube', 'anitube', 'AniTube'), + ('animtime', 'animtime', 'AnimTime'), ('anime8', 'anime8', 'Anime8'), ('animebinge', 'animebinge', 'AnimeBinge'), ('animechameleon', 'gurminder', 'AnimeChameleon'), From c794ef14c232f1cfc4b57cbcfa0b9d234e2319c5 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 1 Apr 2021 23:16:19 +0100 Subject: [PATCH 2/5] autopep8 --- anime_downloader/sites/animtime.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index b5814ef..79a350a 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -7,22 +7,25 @@ import re import logging logger = logging.getLogger(__name__) + class AnimTime(Anime, sitename='animtime'): - sitename='animtime' + sitename = 'animtime' @classmethod def get_title_dict(cls, script): script_text = helpers.get(script).text title_function = re.search("tm=.*?}", script_text).group() titles_regexed = re.findall("t\[t\.(.*?)=(\d+)", title_function) - titles = dict([(' '.join(re.sub( r"([A-Z])", r" \1", x[0]).split()), x[1]) for x in titles_regexed]) + titles = dict([(' '.join(re.sub(r"([A-Z])", r" \1", x[0]).split()), x[1]) + for x in titles_regexed]) return titles @classmethod def get_script_link(cls): soup = helpers.soupify(helpers.get('https://animtime.com')) - script = 'https://animtime.com/' + soup.select('script[src*=main]')[0].get('src') + script = 'https://animtime.com/' + \ + soup.select('script[src*=main]')[0].get('src') return script @@ -35,9 +38,9 @@ class AnimTime(Anime, sitename='animtime'): SearchResult( title=match, url='https://animtime.com/title/{}'.format(titles.get(match)) - ) - for match in matches - ] + ) + for match in matches + ] logger.info(search_results) return search_results @@ -45,10 +48,11 @@ class AnimTime(Anime, sitename='animtime'): def _scrape_episodes(self): link = self.get_script_link() titles = dict((y, x) for x, y in self.get_title_dict(link).items()) - current_title = titles.get(self.url.split('/')[-1]) + current_title = titles.get(self.url.split('/')[-1]) script_text = helpers.get(link).text - ep_count = int(re.search("zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + ep_count = int(re.search( + "zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) episodes = [] for i in range(ep_count): @@ -57,17 +61,21 @@ class AnimTime(Anime, sitename='animtime'): return episodes def _scrape_metadata(self): - titles = dict((y, x) for (x, y) in self.get_title_dict(self.get_script_link()).items()) + titles = dict((y, x) for (x, y) in self.get_title_dict( + self.get_script_link()).items()) self.title = titles.get(self.url.split('/')[-1]) + class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): def _get_sources(self): - titles = dict((y, x) for x, y in AnimTime.get_title_dict(AnimTime.get_script_link()).items()) + titles = dict((y, x) for x, y in AnimTime.get_title_dict( + AnimTime.get_script_link()).items()) current_title = titles.get(self.url.split('/')[-3]) current_ep = "{0:03}".format(int(self.url.split('/')[-1])) script_text = helpers.get(AnimTime.get_script_link()).text - regexed_link = re.search('tm\.' + current_title.replace(" ", "") + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) + regexed_link = re.search('tm\.' + current_title.replace(" ", "") + + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) link = regexed_link.replace('"+t+"', current_ep) return [('wasabisys', link)] From a9b1bf98cfe171bb7afd93744108120ca09b5b91 Mon Sep 17 00:00:00 2001 From: AbdullahM0hamed <25087116+AbdullahM0hamed@users.noreply.github.com> Date: Thu, 1 Apr 2021 23:17:32 +0100 Subject: [PATCH 3/5] Remove logging --- anime_downloader/sites/animtime.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index 79a350a..af46f24 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -4,8 +4,6 @@ from anime_downloader.sites import helpers from difflib import get_close_matches import re -import logging -logger = logging.getLogger(__name__) class AnimTime(Anime, sitename='animtime'): @@ -42,7 +40,6 @@ class AnimTime(Anime, sitename='animtime'): for match in matches ] - logger.info(search_results) return search_results def _scrape_episodes(self): From f9e2e8ce2c5ed1da0ce4f55b13785a55c64674bb Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 3 May 2021 20:16:18 +0300 Subject: [PATCH 4/5] improved the search results --- anime_downloader/sites/animtime.py | 108 ++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index af46f24..5908aed 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -6,36 +6,79 @@ from difflib import get_close_matches import re +def format_title_case(text): + """ + Will format text to title case and in will have roman numbers in capital case + only I is supported so only up to III, any number bigger than that will keep its original capitalization case + """ + words = text.split() + new_text = [] + + for word in words: + if word.lower().replace('i', '') == '': + new_text += ['I' * len(word)] + continue + + elif word.lower() == 'dub': + new_text += ['(Dub)'] + continue + + new_text += [word.title()] + + return ' '.join(new_text) + + +def get_title_dict(script): + """ + Returns a tuple with two dictionaries + the 1st one has the anime slugs with their pretty title + and the 2nd one has the anime slugs with their ids + """ + script_text = helpers.get(script).text + title_function = re.search("tm=.*?}", script_text).group() + titles_dict = { + x[0]: format_title_case(x[1].replace('-', ' ')) + for x in re.findall(r"qd\[tm\.(.*?)\]=.*?\".*?/animtime/(.*?)/", script_text) + } + id_dict = { + x[0]: x[1] + for x in re.findall(r"t\[t\.(.*?)=(\d+)", title_function) + } + + for title in id_dict: + """ + For any anime that are not matched in the pretty titles dictionary (titles_dict) + + for example Bleach (with the id of 1 is not in titles_dict) + """ + if title not in titles_dict: + titles_dict[title] = ' '.join( + re.sub(r"([A-Z])", r" \1", title).split()) + + return titles_dict, id_dict + + +def get_script_link(): + soup = helpers.soupify(helpers.get('https://animtime.com')) + script = 'https://animtime.com/' + \ + soup.select('script[src*=main]')[0].get('src') + + return script + + class AnimTime(Anime, sitename='animtime'): sitename = 'animtime' - @classmethod - def get_title_dict(cls, script): - script_text = helpers.get(script).text - title_function = re.search("tm=.*?}", script_text).group() - titles_regexed = re.findall("t\[t\.(.*?)=(\d+)", title_function) - titles = dict([(' '.join(re.sub(r"([A-Z])", r" \1", x[0]).split()), x[1]) - for x in titles_regexed]) - - return titles - - @classmethod - def get_script_link(cls): - soup = helpers.soupify(helpers.get('https://animtime.com')) - script = 'https://animtime.com/' + \ - soup.select('script[src*=main]')[0].get('src') - - return script - @classmethod def search(cls, query): - titles = cls.get_title_dict(cls.get_script_link()) - matches = get_close_matches(query, titles, cutoff=0.2) + titles = get_title_dict(get_script_link()) + matches = get_close_matches(query, titles[0], cutoff=0.2) search_results = [ SearchResult( - title=match, - url='https://animtime.com/title/{}'.format(titles.get(match)) + title=titles[0].get(match), + url='https://animtime.com/title/{}'.format( + titles[1].get(match)) ) for match in matches ] @@ -43,13 +86,13 @@ class AnimTime(Anime, sitename='animtime'): return search_results def _scrape_episodes(self): - link = self.get_script_link() - titles = dict((y, x) for x, y in self.get_title_dict(link).items()) + link = get_script_link() + titles = dict((y, x) for x, y in get_title_dict(link)[1].items()) current_title = titles.get(self.url.split('/')[-1]) script_text = helpers.get(link).text ep_count = int(re.search( - "zd\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) + r"\[tm\.{}\]=(\d+)".format(current_title.replace(' ', '')), script_text).group(1)) episodes = [] for i in range(ep_count): @@ -58,19 +101,20 @@ class AnimTime(Anime, sitename='animtime'): return episodes def _scrape_metadata(self): - titles = dict((y, x) for (x, y) in self.get_title_dict( - self.get_script_link()).items()) - self.title = titles.get(self.url.split('/')[-1]) + titles = get_title_dict(get_script_link())[1] + self.title = next(x for x, y in titles.items() + if int(y) == int(self.url.split('/')[-1])) class AnimTimeEpisode(AnimeEpisode, sitename='animtime'): def _get_sources(self): - titles = dict((y, x) for x, y in AnimTime.get_title_dict( - AnimTime.get_script_link()).items()) - current_title = titles.get(self.url.split('/')[-3]) + titles = get_title_dict(get_script_link())[1] + + current_title = next(x for x, y in titles.items() + if int(y) == int(self.url.split('/')[-3])) current_ep = "{0:03}".format(int(self.url.split('/')[-1])) - script_text = helpers.get(AnimTime.get_script_link()).text + script_text = helpers.get(get_script_link()).text regexed_link = re.search('tm\.' + current_title.replace(" ", "") + '\]=function\(.*?return.*?(https.*?)"}', script_text).group(1) link = regexed_link.replace('"+t+"', current_ep) From 0e106d66e8c8c7331dd7aeb8ac56b6609c8822f0 Mon Sep 17 00:00:00 2001 From: Arjix <53124886+ArjixGamer@users.noreply.github.com> Date: Mon, 3 May 2021 20:17:26 +0300 Subject: [PATCH 5/5] Update animtime.py --- anime_downloader/sites/animtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anime_downloader/sites/animtime.py b/anime_downloader/sites/animtime.py index 5908aed..cdaa7b7 100644 --- a/anime_downloader/sites/animtime.py +++ b/anime_downloader/sites/animtime.py @@ -8,7 +8,7 @@ import re def format_title_case(text): """ - Will format text to title case and in will have roman numbers in capital case + Will format text to title case and it will have roman numbers in capital case only I is supported so only up to III, any number bigger than that will keep its original capitalization case """ words = text.split()