anime-downloader/anime_downloader/extractors/mp4upload.py

53 lines
1.8 KiB
Python

import logging
import re
from anime_downloader.extractors.base_extractor import BaseExtractor
from anime_downloader.sites import helpers
logger = logging.getLogger(__name__)
class MP4Upload(BaseExtractor):
'''Extracts video url from mp4upload embed pages, performs a request
back to the non-embed mp4upload page to extract the title of the video
albeit imperfectly as mp4upload doesn't place full title on the main
page of whichever video you are dealing with.
'''
def _get_data(self):
# Extract the important bits from the embed page, with thanks to the
# code I saw from github user py7hon in his/her mp4upload-direct
# program as inspiration for this. Only with regex.
source_parts_re = re.compile(
r'.*?100\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?',
re.DOTALL)
mp4u_embed = helpers.get(self.url).text
domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()
logger.debug('Domain: %s, Video ID: %s, Protocol: %s' %
(domain, video_id, protocol))
url = self.url.replace('embed-', '')
# Return to non-embed page to collect title
mp4u_page = helpers.soupify(helpers.get(url).text)
title = mp4u_page.find('span', {'class': 'dfilename'}).text
title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
logger.debug('Title is %s' % title)
# Create the stream url
stream_url = 'https://{}.mp4upload.com:{}/d/{}/{}.mp4'
stream_url = stream_url.format(domain, protocol, video_id, title)
logger.debug('Stream URL: %s' % stream_url)
return {
'stream_url': stream_url,
'meta': {
'title': title,
'thumbnail': ''
}
}