From 5d1af225a7319def43d28c55b8475f94bdad3d6a Mon Sep 17 00:00:00 2001 From: Gomile <39145128+ngomile@users.noreply.github.com> Date: Wed, 18 Mar 2020 13:36:27 +0200 Subject: [PATCH] =?UTF-8?q?Prioritise=20mp4upload=20as=20source=20for=20an?= =?UTF-8?q?istream=20and=20fix=20issues=20with=E2=80=A6=20(#285)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Rely on AnimePahe for episode naming * Remove use of enumerate * Add useful debug info for mp4upload * Fix minor regex mishap for mp4upload * Better title naming for mp4upload * Minor tweaks complete * MP4Upload regex minor improvement * Make collection of sources look better * Revert back to using enumerate for episode numbering * Added utility function to parse episode range * Replace episode range collecting with utility function to parse episode range * Add grammar option to cli.py * Make grammar more consistent * Implement grammar parser and add as util function * Added search to gogoanime * Enable getting episode sources for Gogoanime * Minor refactor for grammar parser * Use new episode parser by default and add gogoanime to provider choices * Fix minor oversight to identify None type passed to episode parser * Remove explicit checks for None type in episode string parsers * Enable retries for request session * Make cfscrape capable of retrying * Make provider list more readable in cli.py * Handle failure to find stream URL better in MP4Upload extractor * Revert changes to match master * Update gogoanime domain * Fix failure to skip already downloaded files * Fix potential bug * Enable ranged download to resume stopped download * Avoid constantly opening and closing file in downloader * Make init the same as main forks * Changed files to match main * Add new line * Modify init * Added animefreak * Add useful comment for animefreak * Added animefreak to README.md * Use json method in helpers.get * Update title test for animefreak * Prioritise mp4upload as source and fix mp4upload source url * Better title handling and more explicit errors * More informative mp4upload exception --- anime_downloader/extractors/mp4upload.py | 23 +++++++++++++---------- anime_downloader/sites/anistream.py | 4 ++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/anime_downloader/extractors/mp4upload.py b/anime_downloader/extractors/mp4upload.py index 52fdf46..bc2d7c9 100644 --- a/anime_downloader/extractors/mp4upload.py +++ b/anime_downloader/extractors/mp4upload.py @@ -18,22 +18,25 @@ class MP4Upload(BaseExtractor): # Extract the important bits from the embed page, with thanks to the # code I saw from github user py7hon in his/her mp4upload-direct # program as inspiration for this. Only with regex. - source_parts_re = re.compile( - r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', - re.DOTALL) + source_parts_re = re.compile(r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL) + not_download_page_re = re.compile(r'type="submit" name="method_free"', re.DOTALL) + title_re = re.compile(r'h2>Download File (.*?)\.mp4<\/h2>', re.DOTALL) mp4u_embed = helpers.get(self.url).text - domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups() + source_parts = source_parts_re.match(mp4u_embed) + if not source_parts: + raise Exception(f"Failed to find source parts to build URL {self.url}") - logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % - (domain, video_id, protocol)) + domain, video_id, protocol = source_parts.groups() + + logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % (domain, video_id, protocol)) url = self.url.replace('embed-', '') # Return to non-embed page to collect title - mp4u_page = helpers.soupify(helpers.get(url).text) - - title = mp4u_page.find('span', {'class': 'dfilename'}).text - title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_') + mp4u_page = helpers.get(url, referer=self.url).text + title = title_re.search(mp4u_page) + # The N/A here will probably come to haunt me some day + title = title.groups()[0] if title else 'N/A' logger.debug('Title is %s' % title) diff --git a/anime_downloader/sites/anistream.py b/anime_downloader/sites/anistream.py index f7c97c9..2625560 100644 --- a/anime_downloader/sites/anistream.py +++ b/anime_downloader/sites/anistream.py @@ -63,5 +63,5 @@ class AnistreamEpisode(AnimeEpisode, sitename='anistream.xyz'): if v['host'] == 'trollvid': sources.append(('trollvid', 'https://trollvid.net/embed/' + v['id'])) if v['host'] == 'mp4upload': - sources.append(('mp4upload', 'https://www.mp4upload.com/embed/{v[id]}.html')) - return sources + sources.append(('mp4upload', f'https://www.mp4upload.com/embed-{v["id"]}.html')) + return sorted(sources)