Fixes MP4Upload issues and minor improvements (#72)

Some minor improvements and fixes have been made. The MP4Upload extractor is guaranteed to return the correct url unless changes happen to the mp4upload site. An SSL error can occur when an attempt is made to download from the url in the HTTPDownloader class though I believe that is an issue involving said class as this extractor is correctly getting the url for now. For now I've mostly noticed the SSL error problem when trying to download through masterani as the urls that are extracted from it seem to require that. Though works just fine with animepahe. To test you can just run this command in python to see correct link resolving `MP4Upload('https://mp4upload.com/embed-dz2jeya02ace.html').stream_url` which is for an embed link from masterani.
2018-08-31 10:17:03 +02:00 · 2018-08-31 10:17:03 +02:00 · 0049ea6b20
parent 20fa9db24d
commit 0049ea6b20
2 changed files with 15 additions and 7 deletions
--- a/anime_downloader/extractors/mp4upload.py
+++ b/anime_downloader/extractors/mp4upload.py
@ -1,3 +1,4 @@
+import logging
 import re
 import requests
 from bs4 import BeautifulSoup
@ -15,23 +16,30 @@ class MP4Upload(BaseExtractor):
        # code I saw from github user py7hon in his/her mp4upload-direct
        # program as inspiration for this. Only with regex.
        source_parts_re = re.compile(
-                                r'.*?(www\d).*?\|video\|(.*?)\|(\d+)\|.*?',
+                                r'.*?(www\d+).*?\|video\|(.*?)\|(\d+)\|.*?',
                                re.DOTALL)

        mp4u_embed = requests.get(self.url).text
        domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()

+        logging.debug('Domain: %s, Video ID: %s, Protocol: %s' %
+                      (domain, video_id, protocol))
+
        url = self.url.replace('embed-', '')
        # Return to non-embed page to collect title
        mp4u_page = BeautifulSoup(requests.get(url).text, 'html.parser')

        title = mp4u_page.find('span', {'class': 'dfilename'}).text
-        title = title[:title.rfind('_')]
+        title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
+
+        logging.debug('Title is %s' % title)

        # Create the stream url
        stream_url = 'https://{}.mp4upload.com:{}/d/{}/{}.mp4'
        stream_url = stream_url.format(domain, protocol, video_id, title)

+        logging.debug('Stream URL: %s' % stream_url)
+
        return {
            'stream_url': stream_url,
            'meta': {
--- a/anime_downloader/sites/animepahe.py
+++ b/anime_downloader/sites/animepahe.py
@ -97,12 +97,12 @@ class AnimePahe(BaseAnimeCF):
        # Avoid changing original list
        episodes = episodes[:]

-        # If episodes is not an empty list we ensure that we start off
-        # from the length of the episodes list to get correct episode
-        # numbers
-        for no, anime_ep in enumerate(ani_json, len(episodes)):
+        for anime_ep in ani_json:
+            epi_no = anime_ep['episode']
+
            episodes.append(
-                (no+1, self.url + '/' + str(anime_ep['id']),)
+                (epi_no if not epi_no.startswith('0') else epi_no[1:],
+                 self.url + '/' + str(anime_ep['id']),)
            )

        return episodes