2018-08-31 01:17:03 -07:00
|
|
|
import logging
|
2018-08-27 13:13:52 -07:00
|
|
|
import re
|
2018-10-04 06:00:29 -07:00
|
|
|
|
2018-08-19 13:03:07 -07:00
|
|
|
from anime_downloader.extractors.base_extractor import BaseExtractor
|
2019-03-22 06:47:00 -07:00
|
|
|
from anime_downloader.sites import helpers
|
2018-08-19 13:03:07 -07:00
|
|
|
|
2019-03-22 06:47:00 -07:00
|
|
|
logger = logging.getLogger(__name__)
|
2018-10-16 02:59:53 -07:00
|
|
|
|
2018-08-19 13:03:07 -07:00
|
|
|
|
|
|
|
class MP4Upload(BaseExtractor):
|
2018-08-27 13:13:52 -07:00
|
|
|
'''Extracts video url from mp4upload embed pages, performs a request
|
|
|
|
back to the non-embed mp4upload page to extract the title of the video
|
|
|
|
albeit imperfectly as mp4upload doesn't place full title on the main
|
|
|
|
page of whichever video you are dealing with.
|
2018-08-19 13:03:07 -07:00
|
|
|
'''
|
2019-03-22 06:47:00 -07:00
|
|
|
|
2018-08-27 13:13:52 -07:00
|
|
|
def _get_data(self):
|
|
|
|
# Extract the important bits from the embed page, with thanks to the
|
|
|
|
# code I saw from github user py7hon in his/her mp4upload-direct
|
|
|
|
# program as inspiration for this. Only with regex.
|
|
|
|
source_parts_re = re.compile(
|
2018-10-20 01:37:42 -07:00
|
|
|
r'.*?100\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?',
|
2018-08-27 13:13:52 -07:00
|
|
|
re.DOTALL)
|
|
|
|
|
2019-03-22 06:47:00 -07:00
|
|
|
mp4u_embed = helpers.get(self.url).text
|
2018-08-27 13:13:52 -07:00
|
|
|
domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()
|
|
|
|
|
2019-03-22 06:47:00 -07:00
|
|
|
logger.debug('Domain: %s, Video ID: %s, Protocol: %s' %
|
2018-08-31 01:17:03 -07:00
|
|
|
(domain, video_id, protocol))
|
|
|
|
|
2018-08-27 13:13:52 -07:00
|
|
|
url = self.url.replace('embed-', '')
|
|
|
|
# Return to non-embed page to collect title
|
2019-03-22 06:47:00 -07:00
|
|
|
mp4u_page = helpers.soupify(helpers.get(url).text)
|
2018-08-27 13:13:52 -07:00
|
|
|
|
|
|
|
title = mp4u_page.find('span', {'class': 'dfilename'}).text
|
2018-08-31 01:17:03 -07:00
|
|
|
title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
|
|
|
|
|
2019-03-22 06:47:00 -07:00
|
|
|
logger.debug('Title is %s' % title)
|
2018-08-27 13:13:52 -07:00
|
|
|
|
|
|
|
# Create the stream url
|
|
|
|
stream_url = 'https://{}.mp4upload.com:{}/d/{}/{}.mp4'
|
|
|
|
stream_url = stream_url.format(domain, protocol, video_id, title)
|
|
|
|
|
2019-03-22 06:47:00 -07:00
|
|
|
logger.debug('Stream URL: %s' % stream_url)
|
2018-08-31 01:17:03 -07:00
|
|
|
|
2018-08-27 13:13:52 -07:00
|
|
|
return {
|
|
|
|
'stream_url': stream_url,
|
|
|
|
'meta': {
|
|
|
|
'title': title,
|
|
|
|
'thumbnail': ''
|
|
|
|
}
|
|
|
|
}
|