From 5d1af225a7319def43d28c55b8475f94bdad3d6a Mon Sep 17 00:00:00 2001
From: Gomile <39145128+ngomile@users.noreply.github.com>
Date: Wed, 18 Mar 2020 13:36:27 +0200
Subject: [PATCH] =?UTF-8?q?Prioritise=20mp4upload=20as=20source=20for=20an?=
 =?UTF-8?q?istream=20and=20fix=20issues=20with=E2=80=A6=20(#285)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Rely on AnimePahe for episode naming

* Remove use of enumerate

* Add useful debug info for mp4upload

* Fix minor regex mishap for mp4upload

* Better title naming for mp4upload

* Minor tweaks complete

* MP4Upload regex minor improvement

* Make collection of sources look better

* Revert back to using enumerate for episode numbering

* Added utility function to parse episode range

* Replace episode range collecting with utility function to parse episode range

* Add grammar option to cli.py

* Make grammar more consistent

* Implement grammar parser and add as util function

* Added search to gogoanime

* Enable getting episode sources for Gogoanime

* Minor refactor for grammar parser

* Use new episode parser by default and add gogoanime to provider choices

* Fix minor oversight to identify None type passed to episode parser

* Remove explicit checks for None type in episode string parsers

* Enable retries for request session

* Make cfscrape capable of retrying

* Make provider list more readable in cli.py

* Handle failure to find stream URL better in MP4Upload extractor

* Revert changes to match master

* Update gogoanime domain

* Fix failure to skip already downloaded files

* Fix potential bug

* Enable ranged download to resume stopped download

* Avoid constantly opening and closing file in downloader

* Make init the same as main forks

* Changed files to match main

* Add new line

* Modify init

* Added animefreak

* Add useful comment for animefreak

* Added animefreak to README.md

* Use json method in helpers.get

* Update title test for animefreak

* Prioritise mp4upload as source and fix mp4upload source url

* Better title handling and more explicit errors

* More informative mp4upload exception
---
 anime_downloader/extractors/mp4upload.py | 23 +++++++++++++----------
 anime_downloader/sites/anistream.py      |  4 ++--
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/anime_downloader/extractors/mp4upload.py b/anime_downloader/extractors/mp4upload.py
index 52fdf46..bc2d7c9 100644
--- a/anime_downloader/extractors/mp4upload.py
+++ b/anime_downloader/extractors/mp4upload.py
@@ -18,22 +18,25 @@ class MP4Upload(BaseExtractor):
         # Extract the important bits from the embed page, with thanks to the
         # code I saw from github user py7hon in his/her mp4upload-direct
         # program as inspiration for this. Only with regex.
-        source_parts_re = re.compile(
-                                r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?',
-                                re.DOTALL)
+        source_parts_re = re.compile(r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL)
+        not_download_page_re = re.compile(r'type="submit" name="method_free"', re.DOTALL)
+        title_re = re.compile(r'h2>Download File (.*?)\.mp4<\/h2>', re.DOTALL)
 
         mp4u_embed = helpers.get(self.url).text
-        domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()
+        source_parts = source_parts_re.match(mp4u_embed)
+        if not source_parts:
+            raise Exception(f"Failed to find source parts to build URL {self.url}")
 
-        logger.debug('Domain: %s, Video ID: %s, Protocol: %s' %
-                      (domain, video_id, protocol))
+        domain, video_id, protocol = source_parts.groups()
+
+        logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % (domain, video_id, protocol))
 
         url = self.url.replace('embed-', '')
         # Return to non-embed page to collect title
-        mp4u_page = helpers.soupify(helpers.get(url).text)
-
-        title = mp4u_page.find('span', {'class': 'dfilename'}).text
-        title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
+        mp4u_page = helpers.get(url, referer=self.url).text
+        title = title_re.search(mp4u_page)
+        # The N/A here will probably come to haunt me some day
+        title = title.groups()[0] if title else 'N/A'
 
         logger.debug('Title is %s' % title)
 
diff --git a/anime_downloader/sites/anistream.py b/anime_downloader/sites/anistream.py
index f7c97c9..2625560 100644
--- a/anime_downloader/sites/anistream.py
+++ b/anime_downloader/sites/anistream.py
@@ -63,5 +63,5 @@ class AnistreamEpisode(AnimeEpisode, sitename='anistream.xyz'):
             if v['host'] == 'trollvid':
                 sources.append(('trollvid', 'https://trollvid.net/embed/' + v['id']))
             if v['host'] == 'mp4upload':
-                sources.append(('mp4upload', 'https://www.mp4upload.com/embed/{v[id]}.html'))
-        return sources
+                sources.append(('mp4upload', f'https://www.mp4upload.com/embed-{v["id"]}.html'))
+        return sorted(sources)