Prioritise mp4upload as source for anistream and fix issues with… (#285)

* Rely on AnimePahe for episode naming

* Remove use of enumerate

* Add useful debug info for mp4upload

* Fix minor regex mishap for mp4upload

* Better title naming for mp4upload

* Minor tweaks complete

* MP4Upload regex minor improvement

* Make collection of sources look better

* Revert back to using enumerate for episode numbering

* Added utility function to parse episode range

* Replace episode range collecting with utility function to parse episode range

* Add grammar option to cli.py

* Make grammar more consistent

* Implement grammar parser and add as util function

* Added search to gogoanime

* Enable getting episode sources for Gogoanime

* Minor refactor for grammar parser

* Use new episode parser by default and add gogoanime to provider choices

* Fix minor oversight to identify None type passed to episode parser

* Remove explicit checks for None type in episode string parsers

* Enable retries for request session

* Make cfscrape capable of retrying

* Make provider list more readable in cli.py

* Handle failure to find stream URL better in MP4Upload extractor

* Revert changes to match master

* Update gogoanime domain

* Fix failure to skip already downloaded files

* Fix potential bug

* Enable ranged download to resume stopped download

* Avoid constantly opening and closing file in downloader

* Make init the same as main forks

* Changed files to match main

* Add new line

* Modify init

* Added animefreak

* Add useful comment for animefreak

* Added animefreak to README.md

* Use json method in helpers.get

* Update title test for animefreak

* Prioritise mp4upload as source and fix mp4upload source url

* Better title handling and more explicit errors

* More informative mp4upload exception
master
Gomile 2020-03-18 13:36:27 +02:00 committed by GitHub
parent 0c97817d00
commit 5d1af225a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 12 deletions

View File

@ -18,22 +18,25 @@ class MP4Upload(BaseExtractor):
# Extract the important bits from the embed page, with thanks to the
# code I saw from github user py7hon in his/her mp4upload-direct
# program as inspiration for this. Only with regex.
source_parts_re = re.compile(
r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?',
re.DOTALL)
source_parts_re = re.compile(r'.*?false\|(.*?)\|.*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL)
not_download_page_re = re.compile(r'type="submit" name="method_free"', re.DOTALL)
title_re = re.compile(r'h2>Download File (.*?)\.mp4<\/h2>', re.DOTALL)
mp4u_embed = helpers.get(self.url).text
domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()
source_parts = source_parts_re.match(mp4u_embed)
if not source_parts:
raise Exception(f"Failed to find source parts to build URL {self.url}")
logger.debug('Domain: %s, Video ID: %s, Protocol: %s' %
(domain, video_id, protocol))
domain, video_id, protocol = source_parts.groups()
logger.debug('Domain: %s, Video ID: %s, Protocol: %s' % (domain, video_id, protocol))
url = self.url.replace('embed-', '')
# Return to non-embed page to collect title
mp4u_page = helpers.soupify(helpers.get(url).text)
title = mp4u_page.find('span', {'class': 'dfilename'}).text
title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')
mp4u_page = helpers.get(url, referer=self.url).text
title = title_re.search(mp4u_page)
# The N/A here will probably come to haunt me some day
title = title.groups()[0] if title else 'N/A'
logger.debug('Title is %s' % title)

View File

@ -63,5 +63,5 @@ class AnistreamEpisode(AnimeEpisode, sitename='anistream.xyz'):
if v['host'] == 'trollvid':
sources.append(('trollvid', 'https://trollvid.net/embed/' + v['id']))
if v['host'] == 'mp4upload':
sources.append(('mp4upload', 'https://www.mp4upload.com/embed/{v[id]}.html'))
return sources
sources.append(('mp4upload', f'https://www.mp4upload.com/embed-{v["id"]}.html'))
return sorted(sources)