Fixes MP4Upload issues and minor improvements (#72)

Some minor improvements and fixes have been made. The MP4Upload extractor is guaranteed to return the correct url unless changes happen to the mp4upload site. An SSL error can occur when an attempt is made to download from the url in the HTTPDownloader class though I believe that is an issue involving said class as this extractor is correctly getting the url for now. 

For now I've mostly noticed the SSL error problem when trying to download through masterani as the urls that are extracted from it seem to require that. Though works just fine with animepahe. To test you can just run this command in python to see correct link resolving `MP4Upload('https://mp4upload.com/embed-dz2jeya02ace.html').stream_url` which is for an embed link from masterani.
master
Gomile 2018-08-31 10:17:03 +02:00 committed by Vishnunarayan K I
parent 20fa9db24d
commit 0049ea6b20
2 changed files with 15 additions and 7 deletions

View File

@ -1,3 +1,4 @@
import logging
import re
import requests
from bs4 import BeautifulSoup
@ -15,23 +16,30 @@ class MP4Upload(BaseExtractor):
# code I saw from github user py7hon in his/her mp4upload-direct
# program as inspiration for this. Only with regex.
source_parts_re = re.compile(
r'.*?(www\d).*?\|video\|(.*?)\|(\d+)\|.*?',
r'.*?(www\d+).*?\|video\|(.*?)\|(\d+)\|.*?',
re.DOTALL)
mp4u_embed = requests.get(self.url).text
domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()
logging.debug('Domain: %s, Video ID: %s, Protocol: %s' %
(domain, video_id, protocol))
url = self.url.replace('embed-', '')
# Return to non-embed page to collect title
mp4u_page = BeautifulSoup(requests.get(url).text, 'html.parser')
title = mp4u_page.find('span', {'class': 'dfilename'}).text
title = title[:title.rfind('_')]
title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
logging.debug('Title is %s' % title)
# Create the stream url
stream_url = 'https://{}.mp4upload.com:{}/d/{}/{}.mp4'
stream_url = stream_url.format(domain, protocol, video_id, title)
logging.debug('Stream URL: %s' % stream_url)
return {
'stream_url': stream_url,
'meta': {

View File

@ -97,12 +97,12 @@ class AnimePahe(BaseAnimeCF):
# Avoid changing original list
episodes = episodes[:]
# If episodes is not an empty list we ensure that we start off
# from the length of the episodes list to get correct episode
# numbers
for no, anime_ep in enumerate(ani_json, len(episodes)):
for anime_ep in ani_json:
epi_no = anime_ep['episode']
episodes.append(
(no+1, self.url + '/' + str(anime_ep['id']),)
(epi_no if not epi_no.startswith('0') else epi_no[1:],
self.url + '/' + str(anime_ep['id']),)
)
return episodes