Prioritise mp4upload as source for anistream and fix issues with… (#285)

* Rely on AnimePahe for episode naming * Remove use of enumerate * Add useful debug info for mp4upload * Fix minor regex mishap for mp4upload * Better title naming for mp4upload * Minor tweaks complete * MP4Upload regex minor improvement * Make collection of sources look better * Revert back to using enumerate for episode numbering * Added utility function to parse episode range * Replace episode range collecting with utility function to parse episode range * Add grammar option to cli.py * Make grammar more consistent * Implement grammar parser and add as util function * Added search to gogoanime * Enable getting episode sources for Gogoanime * Minor refactor for grammar parser * Use new episode parser by default and add gogoanime to provider choices * Fix minor oversight to identify None type passed to episode parser * Remove explicit checks for None type in episode string parsers * Enable retries for request session * Make cfscrape capable of retrying * Make provider list more readable in cli.py * Handle failure to find stream URL better in MP4Upload extractor * Revert changes to match master * Update gogoanime domain * Fix failure to skip already downloaded files * Fix potential bug * Enable ranged download to resume stopped download * Avoid constantly opening and closing file in downloader * Make init the same as main forks * Changed files to match main * Add new line * Modify init * Added animefreak * Add useful comment for animefreak * Added animefreak to README.md * Use json method in helpers.get * Update title test for animefreak * Prioritise mp4upload as source and fix mp4upload source url * Better title handling and more explicit errors * More informative mp4upload exception
2020-03-18 13:36:27 +02:00 · 2020-03-18 13:36:27 +02:00 · 5d1af225a7
parent 0c97817d00
commit 5d1af225a7
2 changed files with 15 additions and 12 deletions
--- a/anime_downloader/extractors/mp4upload.py
+++ b/anime_downloader/extractors/mp4upload.py
@ -18,22 +18,25 @@ class MP4Upload(BaseExtractor):
        # Extract the important bits from the embed page, with thanks to the
        # code I saw from github user py7hon in his/her mp4upload-direct
        # program as inspiration for this. Only with regex.
-        source_parts_re = re.compile(
-                                r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?',
-                                re.DOTALL)
+        source_parts_re = re.compile(r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL)
+        not_download_page_re = re.compile(r'type="submit" name="method_free"', re.DOTALL)
+        title_re = re.compile(r'h2>Download File (.*?)\.mp4<\/h2>', re.DOTALL)

        mp4u_embed = helpers.get(self.url).text
-        domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()
+        source_parts = source_parts_re.match(mp4u_embed)
+        if not source_parts:
+            raise Exception(f"Failed to find source parts to build URL {self.url}")

-        logger.debug('Domain: %s, Video ID: %s, Protocol: %s' %
-                      (domain, video_id, protocol))
+        domain, video_id, protocol = source_parts.groups()
+
+        logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % (domain, video_id, protocol))

        url = self.url.replace('embed-', '')
        # Return to non-embed page to collect title
-        mp4u_page = helpers.soupify(helpers.get(url).text)
-
-        title = mp4u_page.find('span', {'class': 'dfilename'}).text
-        title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
+        mp4u_page = helpers.get(url, referer=self.url).text
+        title = title_re.search(mp4u_page)
+        # The N/A here will probably come to haunt me some day
+        title = title.groups()[0] if title else 'N/A'

        logger.debug('Title is %s' % title)

--- a/anime_downloader/sites/anistream.py
+++ b/anime_downloader/sites/anistream.py
@ -63,5 +63,5 @@ class AnistreamEpisode(AnimeEpisode, sitename='anistream.xyz'):
            if v['host'] == 'trollvid':
                sources.append(('trollvid', 'https://trollvid.net/embed/' + v['id']))
            if v['host'] == 'mp4upload':
-                sources.append(('mp4upload', 'https://www.mp4upload.com/embed/{v[id]}.html'))
-        return sources
+                sources.append(('mp4upload', f'https://www.mp4upload.com/embed-{v["id"]}.html'))
+        return sorted(sources)