From 0049ea6b208d1b848b6a522daebed89ad87bb3e7 Mon Sep 17 00:00:00 2001 From: Gomile <39145128+ngomile@users.noreply.github.com> Date: Fri, 31 Aug 2018 10:17:03 +0200 Subject: [PATCH] Fixes MP4Upload issues and minor improvements (#72) Some minor improvements and fixes have been made. The MP4Upload extractor is guaranteed to return the correct url unless changes happen to the mp4upload site. An SSL error can occur when an attempt is made to download from the url in the HTTPDownloader class though I believe that is an issue involving said class as this extractor is correctly getting the url for now. For now I've mostly noticed the SSL error problem when trying to download through masterani as the urls that are extracted from it seem to require that. Though works just fine with animepahe. To test you can just run this command in python to see correct link resolving `MP4Upload('https://mp4upload.com/embed-dz2jeya02ace.html').stream_url` which is for an embed link from masterani. --- anime_downloader/extractors/mp4upload.py | 12 ++++++++++-- anime_downloader/sites/animepahe.py | 10 +++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/anime_downloader/extractors/mp4upload.py b/anime_downloader/extractors/mp4upload.py index 70af9c9..9633757 100644 --- a/anime_downloader/extractors/mp4upload.py +++ b/anime_downloader/extractors/mp4upload.py @@ -1,3 +1,4 @@ +import logging import re import requests from bs4 import BeautifulSoup @@ -15,23 +16,30 @@ class MP4Upload(BaseExtractor): # code I saw from github user py7hon in his/her mp4upload-direct # program as inspiration for this. Only with regex. source_parts_re = re.compile( - r'.*?(www\d).*?\|video\|(.*?)\|(\d+)\|.*?', + r'.*?(www\d+).*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL) mp4u_embed = requests.get(self.url).text domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups() + logging.debug('Domain: %s, Video ID: %s, Protocol: %s' % + (domain, video_id, protocol)) + url = self.url.replace('embed-', '') # Return to non-embed page to collect title mp4u_page = BeautifulSoup(requests.get(url).text, 'html.parser') title = mp4u_page.find('span', {'class': 'dfilename'}).text - title = title[:title.rfind('_')] + title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_') + + logging.debug('Title is %s' % title) # Create the stream url stream_url = 'https://{}.mp4upload.com:{}/d/{}/{}.mp4' stream_url = stream_url.format(domain, protocol, video_id, title) + logging.debug('Stream URL: %s' % stream_url) + return { 'stream_url': stream_url, 'meta': { diff --git a/anime_downloader/sites/animepahe.py b/anime_downloader/sites/animepahe.py index bb6f78e..fff7180 100644 --- a/anime_downloader/sites/animepahe.py +++ b/anime_downloader/sites/animepahe.py @@ -97,12 +97,12 @@ class AnimePahe(BaseAnimeCF): # Avoid changing original list episodes = episodes[:] - # If episodes is not an empty list we ensure that we start off - # from the length of the episodes list to get correct episode - # numbers - for no, anime_ep in enumerate(ani_json, len(episodes)): + for anime_ep in ani_json: + epi_no = anime_ep['episode'] + episodes.append( - (no+1, self.url + '/' + str(anime_ep['id']),) + (epi_no if not epi_no.startswith('0') else epi_no[1:], + self.url + '/' + str(anime_ep['id']),) ) return episodes