diff --git a/anime_downloader/extractors/mp4upload.py b/anime_downloader/extractors/mp4upload.py index 52fdf46..bc2d7c9 100644 --- a/anime_downloader/extractors/mp4upload.py +++ b/anime_downloader/extractors/mp4upload.py @@ -18,22 +18,25 @@ class MP4Upload(BaseExtractor): # Extract the important bits from the embed page, with thanks to the # code I saw from github user py7hon in his/her mp4upload-direct # program as inspiration for this. Only with regex. - source_parts_re = re.compile( - r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', - re.DOTALL) + source_parts_re = re.compile(r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL) + not_download_page_re = re.compile(r'type="submit" name="method_free"', re.DOTALL) + title_re = re.compile(r'h2>Download File (.*?)\.mp4<\/h2>', re.DOTALL) mp4u_embed = helpers.get(self.url).text - domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups() + source_parts = source_parts_re.match(mp4u_embed) + if not source_parts: + raise Exception(f"Failed to find source parts to build URL {self.url}") - logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % - (domain, video_id, protocol)) + domain, video_id, protocol = source_parts.groups() + + logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % (domain, video_id, protocol)) url = self.url.replace('embed-', '') # Return to non-embed page to collect title - mp4u_page = helpers.soupify(helpers.get(url).text) - - title = mp4u_page.find('span', {'class': 'dfilename'}).text - title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_') + mp4u_page = helpers.get(url, referer=self.url).text + title = title_re.search(mp4u_page) + # The N/A here will probably come to haunt me some day + title = title.groups()[0] if title else 'N/A' logger.debug('Title is %s' % title) diff --git a/anime_downloader/sites/anistream.py b/anime_downloader/sites/anistream.py index f7c97c9..2625560 100644 --- a/anime_downloader/sites/anistream.py +++ b/anime_downloader/sites/anistream.py @@ -63,5 +63,5 @@ class AnistreamEpisode(AnimeEpisode, sitename='anistream.xyz'): if v['host'] == 'trollvid': sources.append(('trollvid', 'https://trollvid.net/embed/' + v['id'])) if v['host'] == 'mp4upload': - sources.append(('mp4upload', 'https://www.mp4upload.com/embed/{v[id]}.html')) - return sources + sources.append(('mp4upload', f'https://www.mp4upload.com/embed-{v["id"]}.html')) + return sorted(sources)