added animepahe

created mp4upload extractor but not implemented
master
ngomile 2018-08-19 22:03:07 +02:00
parent c01f4637c3
commit 2e7833da09
18 changed files with 469 additions and 243 deletions

View File

@ -1 +1 @@
__version__ = '3.1.1'
__version__ = '3.4.2'

View File

@ -46,6 +46,9 @@ def cli():
@click.option(
'--quality', '-q', type=click.Choice(['360p', '480p', '720p', '1080p']),
help='Specify the quality of episode. Default-720p')
@click.option(
'--fallback-qualities', '-fq', cls=util.ClickListOption,
help='Specifiy the order of fallback qualities as a list.')
@click.option(
'--force-download', '-f', is_flag=True,
help='Force downloads even if file exists')
@ -61,7 +64,7 @@ def cli():
@click.option(
'--provider',
help='The anime provider (website) for search.',
type=click.Choice(['9anime', 'kissanime'])
type=click.Choice(['9anime', 'kissanime', 'twist.moe', 'animepahe'])
)
@click.option(
'--external-downloader', '-xd',
@ -69,10 +72,16 @@ def cli():
'Use "{aria2}" to use aria2 as downloader. See github wiki.',
metavar='DOWNLOAD COMMAND'
)
@click.option(
'--chunk-size',
help='Chunk size for downloading in chunks(in MB). Use this if you '
'experience throttling.',
type=int
)
@click.pass_context
def dl(ctx, anime_url, episode_range, url, player, skip_download, quality,
force_download, log_level, download_dir, file_format, provider,
external_downloader):
external_downloader, chunk_size, fallback_qualities):
""" Download the anime using the url or search for it.
"""
@ -86,7 +95,8 @@ def dl(ctx, anime_url, episode_range, url, player, skip_download, quality,
cls = get_anime_class(anime_url)
try:
anime = cls(anime_url, quality=quality)
anime = cls(anime_url, quality=quality,
fallback_qualities=fallback_qualities)
except Exception as e:
if log_level != 'DEBUG':
echo(click.style(str(e), fg='red'))
@ -127,10 +137,13 @@ def dl(ctx, anime_url, episode_range, url, player, skip_download, quality,
util.external_download(external_downloader, episode,
file_format, path=download_dir)
continue
if chunk_size is not None:
chunk_size *= 1e6
chunk_size = int(chunk_size)
episode.download(force=force_download,
path=download_dir,
format=file_format)
format=file_format,
range_size=chunk_size)
print()
@ -158,8 +171,9 @@ def dl(ctx, anime_url, episode_range, url, player, skip_download, quality,
@click.option(
'--provider',
help='The anime provider (website) for search.',
type=click.Choice(['9anime', 'kissanime'])
type=click.Choice(['9anime', 'kissanime', 'twist.moe'])
)
@click.option(
'--log-level', '-ll', 'log_level',
type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR']),

View File

@ -12,6 +12,7 @@ DEFAULT_CONFIG = {
'skip_download': False,
'download_dir': '.',
'quality': '720p',
'fallback_qualities': ['720p', '480p', '360p'],
'force_download': False,
'log_level': 'INFO',
'file_format': '{anime_title}/{anime_title}_{ep_no}',

View File

@ -0,0 +1,5 @@
from anime_downloader.downloader.http_downloader import HTTPDownloader
def get_downloader(downloader):
return HTTPDownloader

View File

@ -0,0 +1,66 @@
import os
import requests
import time
import logging
import sys
from anime_downloader import util
class BaseDownloader:
def __init__(self, source, path, force, range_size=None):
logging.info(path)
self.url = source.stream_url
self.referer = source.referer
self.path = path
self.range_size = range_size
util.make_dir(path.rsplit('/', 1)[0])
self.chunksize = 16384
r = requests.get(self.url, stream=True)
self.total_size = int(r.headers['Content-length'])
if os.path.exists(path):
if abs(os.stat(path).st_size - self.total_size)<10 and not force:
logging.warning('File already downloaded. Skipping download.')
return
else:
os.remove(path)
def download(self):
self.pre_process()
self.start_time = time.time()
self.downloaded = 0
self._download()
self.post_process()
def _download(self):
raise NotImplementedError
def pre_process(self):
pass
def post_process(self):
pass
def report_chunk_downloaded(self):
self.downloaded += self.chunksize
write_status(self.downloaded, self.total_size, self.start_time)
def write_status(downloaded, total_size, start_time):
elapsed_time = time.time()-start_time
rate = (downloaded/1024)/elapsed_time if elapsed_time else 'x'
downloaded = float(downloaded)/1048576
total_size = float(total_size)/1048576
status = 'Downloaded: {0:.2f}MB/{1:.2f}MB, Rate: {2:.2f}KB/s'.format(
downloaded, total_size, rate)
sys.stdout.write("\r" + status + " "*5 + "\r")
sys.stdout.flush()

View File

@ -0,0 +1,61 @@
import requests
import os
from anime_downloader.downloader.base_downloader import BaseDownloader
class HTTPDownloader(BaseDownloader):
def _download(self):
if self.range_size is None:
self._non_range_download()
else:
self._ranged_download()
def _ranged_download(self):
http_chunksize = self.range_size
range_start = 0
range_end = http_chunksize
# Make a new file, maybe not the best way
with open(self.path, 'w'):
pass
r = requests.get(self.url, stream=True)
while self.downloaded < self.total_size:
r = requests.get(self.url,
headers=set_range(range_start, range_end),
stream=True)
if r.status_code == 206:
with open(self.path, 'ab') as f:
for chunk in r.iter_content(chunk_size=self.chunksize):
if chunk:
f.write(chunk)
self.report_chunk_downloaded()
if range_end == '':
break
range_start = os.stat(self.path).st_size
range_end += http_chunksize
if range_end > self.total_size:
range_end = ''
def _non_range_download(self):
r = requests.get(self.url, stream=True)
if r.status_code == 200:
with open(self.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=self.chunksize):
if chunk:
f.write(chunk)
self.report_chunk_downloaded()
def set_range(start=0, end=''):
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101"
"Firefox/56.0"
}
headers['Range'] = 'bytes={}-{}'.format(start, end)
return headers

View File

@ -19,6 +19,12 @@ ALL_EXTRACTORS = [
'regex': 'stream.moe',
'class': 'StreamMoe',
},
{
'sitename': 'mp4upload',
'modulename': 'mp4upload',
'regex': 'mp4upload',
'class': 'MP4Upload'
}
]

View File

@ -0,0 +1,6 @@
from anime_downloader.extractors.base_extractor import BaseExtractor
class MP4Upload(BaseExtractor):
'''
'''

View File

@ -10,7 +10,9 @@ class RapidVideo(BaseExtractor):
def _get_data(self):
url = self.url + '&q=' + self.quality
logging.debug('Calling Rapid url: {}'.format(url))
r = requests.get(url, headers=self.headers)
headers = self.headers
headers['referer'] = url
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
# TODO: Make these a different function. Can be reused in other classes

View File

@ -11,7 +11,7 @@ from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
from anime_downloader import util
from anime_downloader.const import desktop_headers
from anime_downloader.extractors import get_extractor
from anime_downloader.downloader import get_downloader
class BaseAnime:
sitename = ''
@ -25,8 +25,11 @@ class BaseAnime:
def search(cls, query):
return
def __init__(self, url=None, quality='720p', _skip_online_data=False):
def __init__(self, url=None, quality='720p',
fallback_qualities=['720p', '480p', '360p'],
_skip_online_data=False):
self.url = url
self._fallback_qualities = fallback_qualities
if quality in self.QUALITIES:
self.quality = quality
@ -121,8 +124,11 @@ class BaseEpisode:
self.source().stream_url
except NotFoundError:
# Issue #28
qualities = copy.copy(self.QUALITIES)
qualities.remove(self.quality)
qualities = copy.copy(self._parent._fallback_qualities)
try:
qualities.remove(self.quality)
except ValueError:
pass
for quality in qualities:
logging.warning('Quality {} not found. Trying {}.'.format(
self.quality, quality))
@ -134,7 +140,7 @@ class BaseEpisode:
break
except NotFoundError:
# Issue #28
qualities.remove(self.quality)
# qualities.remove(self.quality)
pass
def source(self, index=0):
@ -159,7 +165,7 @@ class BaseEpisode:
raise NotImplementedError
def download(self, force=False, path=None,
format='{anime_title}_{ep_no}'):
format='{anime_title}_{ep_no}', range_size=None):
logging.info('Downloading {}'.format(self.pretty_title))
if format:
file_name = util.format_filename(format, self)+'.mp4'
@ -171,32 +177,11 @@ class BaseEpisode:
else:
path = os.path.join(path, file_name)
logging.info(path)
r = requests.get(self.source().stream_url, stream=True)
util.make_dir(path.rsplit('/', 1)[0])
total_size = int(r.headers['Content-length'])
downloaded, chunksize = 0, 16384
start_time = time.time()
if os.path.exists(path):
if os.stat(path).st_size == total_size and not force:
logging.warning('File already downloaded. Skipping download.')
return
else:
os.remove(path)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r.iter_content(chunk_size=chunksize):
if chunk:
f.write(chunk)
downloaded += chunksize
write_status((downloaded), (total_size),
start_time)
Downloader = get_downloader('http')
downloader = Downloader(self.source(),
path, force, range_size=range_size)
downloader.download()
class SearchResult:
def __init__(self, title, url, poster):
@ -208,6 +193,9 @@ class SearchResult:
def __repr__(self):
return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url)
def __str__(self):
return self.title
def write_status(downloaded, total_size, start_time):
elapsed_time = time.time()-start_time

View File

@ -0,0 +1,134 @@
import cfscrape
import logging
from anime_downloader.sites.anime import BaseEpisode, SearchResult
from anime_downloader.sites.baseanimecf import BaseAnimeCF
from anime_downloader.sites.exceptions import NotFoundError
from anime_downloader import util
scraper = cfscrape.create_scraper()
class AnimePaheEpisode(BaseEpisode):
QUALITIES = ['360p', '480p', '720p', '1080p']
def _get_sources(self):
episode_id = self.url.rsplit('/', 1)[-1]
# We will extract the episodes data through the animepahe api
# which returns the available qualities and the episode sources.
# We rely on mp4upload for animepahe as it is the most used provider.
params = {
'id': episode_id,
'm': 'embed',
'p': 'mp4upload'
}
episode = util.get_json('https://animepahe.com/api', params=params)
sources = episode['data'][episode_id]
if self.quality in sources:
return [('mp4upload', sources[self.quality]['url'])]
raise NotFoundError
class AnimePahe(BaseAnimeCF):
sitename = 'animepahe'
api_url = 'https://animepahe.com/api'
base_anime_url = 'https://animepahe.com/anime/'
QUALITIES = ['360p', '480p', '720p', '1080p']
_episodeClass = AnimePaheEpisode
@classmethod
def search(cls, query):
params = {
'l': 8,
'm': 'search',
'q': query
}
search_results = util.get_json(
cls.api_url,
params=params,
)
results = []
for search_result in search_results['data']:
search_result_info = SearchResult(
title=search_result['title'],
url=cls.base_anime_url + search_result['slug'],
poster=search_result['image']
)
logging.debug(search_result_info)
results.append(search_result_info)
return results
def get_data(self):
# Extract anime id from page, using this shoddy approach as
# I have neglected my regular expression skills to the point of
# disappointment
resp = scraper.get(self.url).text
first_search = '$.getJSON(\'/api?m=release&id='
last_search = '&l=\' + limit + \'&sort=\' + sort + \'&page=\' + page'
anime_id = (resp[resp.find(first_search)+len(first_search):
resp.find(last_search)])
self.params = {
'm': 'release',
'id': anime_id,
'sort': 'episode_asc',
'page': 1
}
resp = util.get_json(self.api_url, params=self.params)
self._scrape_metadata(resp['data'])
self._episode_urls = self._scrape_episodes(resp)
self._len = len(self._episode_urls)
return self._episode_urls
def _collect_episodes(self, ani_json, episodes=[]):
# Avoid changing original list
episodes = episodes[:]
# If episodes is not an empty list we ensure that we start off
# from the length of the episodes list to get correct episode
# numbers
for no, anime_ep in enumerate(ani_json, len(episodes)):
episodes.append(
(no+1, self.url + '/' + str(anime_ep['id']),)
)
return episodes
def _scrape_episodes(self, ani_json):
episodes = self._collect_episodes(ani_json['data'])
if not episodes:
raise NotFoundError(
'No episodes found in url "{}"'.format(self.url),
self.url
)
else:
# Check if other pages exist since animepahe only loads
# first page and make subsequent calls to the api for every
# page
start_page = ani_json['current_page'] + 1
end_page = ani_json['last_page'] + 1
for i in range(start_page, end_page):
self.params['page'] = i
resp = util.get(self.api_url, params=self.params)
episodes = self._collect_episodes(resp['data'], episodes)
return episodes
def _scrape_metadata(self, data):
self.title = data[0]['anime_title']

View File

@ -8,6 +8,8 @@ ALL_ANIME_SITES = [
('kissanime', 'kissanime', 'KissAnime'),
('kisscartoon', 'kisscartoon', 'KissCartoon'),
('masterani', 'masterani', 'Masterani'),
('twistmoe', 'twist.moe', 'TwistMoe'),
('animepahe', 'animepahe', 'AnimePahe')
]

View File

@ -94,17 +94,22 @@ class NineAnime(BaseAnime):
self._episodeClass.ts = ts
logging.debug('data-ts: {}'.format(ts))
episodes = soup.find_all('ul', ['episodes'])
# TODO: !HACK!
# The below code should be refractored whenever I'm not lazy.
# This was done as a fix to 9anime's switch to lazy loading of
# episodes. I'm busy and lazy now, so I'm writing bad code.
# Gomen'nasai
api_url = "https://www8.9anime.is/ajax/film/servers/{}"
api_url = api_url.format(self.url.rsplit('watch/', 1)[1].rsplit('.', 1)[1].split('/')[0])
soup = BeautifulSoup(requests.get(api_url).json()['html'], 'html.parser')
episodes = soup.find('div', {'class': 'server', 'data-name': 33})
episodes = episodes.find_all('li')
if episodes == []:
err = 'No episodes found in url "{}"'.format(self.url)
args = [self.url]
raise NotFoundError(err, *args)
servers = soup.find_all('span', {'class': 'tab'})[:-3]
episodes = episodes[:int(len(episodes)/len(servers))]
episode_ids = []
for x in episodes:
@ -160,11 +165,11 @@ def a_old(t, e):
def generate_(data, DD=None):
if DD is None:
DD = "ab031348"
DD = "0a9de5a4"
param_ = s(DD)
for key, value in data.items():
if DD == "ab031348":
if DD == "0a9de5a4":
trans = a(DD + key, str(value))
else:
trans = a_old(DD + key, str(value))

View File

@ -0,0 +1,101 @@
from Crypto import Random
from Crypto.Cipher import AES
import base64
from hashlib import md5
import requests
from bs4 import BeautifulSoup
import warnings
from anime_downloader.sites.anime import BaseAnime, BaseEpisode, SearchResult
# Don't warn if not using fuzzywuzzy[speedup]
with warnings.catch_warnings():
warnings.simplefilter('ignore')
from fuzzywuzzy import process
BLOCK_SIZE = 16
KEY = b"k8B$B@0L8D$tDYHGmRg98sQ7!%GOEGOX27T"
class TwistMoeEpisode(BaseEpisode):
QUALITIES = ['360p', '480p', '720p', '1080p']
def _get_sources(self):
return [('no_extractor', self.url)]
class TwistMoe(BaseAnime):
sitename = 'twist.moe'
QUALITIES = ['360p', '480p', '720p', '1080p']
_episodeClass = TwistMoeEpisode
_api_url = "https://twist.moe/api/anime/{}/sources"
@classmethod
def search(self, query):
r = requests.get('https://twist.moe')
soup = BeautifulSoup(r.text, 'html.parser')
all_anime = soup.select_one('nav.series').select('li')
animes = []
for anime in all_anime:
animes.append(SearchResult(
title=anime.find('span').contents[0].strip(),
url='https://twist.moe' + anime.find('a')['href'],
poster='',
))
animes = [ani[0] for ani in process.extract(query, animes)]
return animes
def get_data(self):
anime_name = self.url.split('/a/')[-1].split('/')[0]
url = self._api_url.format(anime_name)
episodes = requests.get(
url,
headers={
'x-access-token': '1rj2vRtegS8Y60B3w3qNZm5T2Q0TN2NR'
}
)
episodes = episodes.json()
self.title = anime_name
episode_urls = ['https://eu1.twist.moe' +
decrypt(episode['source'].encode('utf-8'), KEY).decode('utf-8')
for episode in episodes]
self._episode_urls = [(i+1, episode_url) for i, episode_url in enumerate(episode_urls)]
self._len = len(self._episode_urls)
return self._episode_urls
# From stackoverflow https://stackoverflow.com/questions/36762098/how-to-decrypt-password-from-javascript-cryptojs-aes-encryptpassword-passphras
def pad(data):
length = BLOCK_SIZE - (len(data) % BLOCK_SIZE)
return data + (chr(length)*length).encode()
def unpad(data):
return data[:-(data[-1] if type(data[-1]) == int else ord(data[-1]))]
def bytes_to_key(data, salt, output=48):
# extended from https://gist.github.com/gsakkis/4546068
assert len(salt) == 8, len(salt)
data += salt
key = md5(data).digest()
final_key = key
while len(final_key) < output:
key = md5(key + data).digest()
final_key += key
return final_key[:output]
def decrypt(encrypted, passphrase):
encrypted = base64.b64decode(encrypted)
assert encrypted[0:8] == b"Salted__"
salt = encrypted[8:16]
key_iv = bytes_to_key(passphrase, salt, 32+16)
key = key_iv[:32]
iv = key_iv[32:]
aes = AES.new(key, AES.MODE_CBC, iv)
return unpad(aes.decrypt(encrypted[16:]))

View File

@ -9,6 +9,7 @@ import re
import os
import errno
import time
import ast
from anime_downloader.sites import get_anime_class
from anime_downloader.const import desktop_headers
@ -179,3 +180,14 @@ def make_dir(path):
except OSError as e:
if e.errno != errno.EEXIST:
raise
class ClickListOption(click.Option):
def type_cast_value(self, ctx, value):
try:
if isinstance(value, list):
return value
return ast.literal_eval(value)
except:
raise click.BadParameter(value)

View File

@ -2,7 +2,7 @@ from anime_downloader import get_anime_class
from anime_downloader.sites.nineanime import NineAnime
import pytest
import os
@pytest.fixture
def anime(anime_url):
@ -21,20 +21,21 @@ def test_title(anime):
assert anime.title.lower() in ['kochinpa!', 'kochin pa!']
# This fail on remote ci servers. So disabling for now
# def test_episode(anime):
# episode1 = anime[0]
# assert episode1.stream_url.endswith('.mp4')
@pytest.mark.skipif(bool(os.environ.get('CI')), reason="Test fails on ci")
def test_episode(anime):
episode1 = anime[0]
assert episode1.stream_url.endswith('.mp4')
# def test_download(anime, tmpdir):
# eps = (anime[0], anime[6], anime[-1])
# for ep in eps:
# ep.download(path=str(tmpdir))
@pytest.mark.skipif(bool(os.environ.get('CI')), reason="Test fails on ci")
def test_download(anime, tmpdir):
eps = (anime[0], anime[6], anime[-1])
for ep in eps:
ep.download(path=str(tmpdir))
def test_search():
results = NineAnime.search('dragon ball super')
assert len(results) == 30
assert results[0].title.lower() == 'dragon ball super'
assert results[0].title.lower() in ['dragon ball super', 'dragon ball super movie']

View File

@ -1,187 +0,0 @@
from anime_downloader import cli
from click.testing import CliRunner
def assert_lines(lines, test_string):
for line in lines:
if line and not line.startswith('INFO') and not line.startswith('DEBUG'):
assert test_string in line
def test_streamurl():
runner = CliRunner()
result = runner.invoke(
cli.dl,
[
'https://www4.9anime.is/watch/the-seven-deadly-sins-signs-of-holy-war.lxqm/39px7y',
'--url',
'-q',
'720p',
'-ll',
'DEBUG'
]
)
assert result.exit_code == 0
lines = [r.strip() for r in result.output.split('\n')]
assert_lines(lines, '.mp4')
def test_download(tmpdir):
runner = CliRunner()
result = runner.invoke(
cli.dl,
[
'https://www4.9anime.is/watch/kochinpa.p6l6/j6ooy2',
'--download-dir',
str(tmpdir),
'-q',
'720p',
'--log-level',
'DEBUG',
'--file-format',
'{anime_title}_{ep_no}',
]
)
assert result.exit_code == 0
length = len(tmpdir.listdir())
assert length == 12
def test_range():
runner = CliRunner()
result = runner.invoke(
cli.dl,
[
'https://www4.9anime.is/watch/naruto.xx8z/r9k04y',
'--url',
'-e',
'50:55',
'-q',
'360p',
'-ll',
'DEBUG'
]
)
assert result.exit_code == 0
lines = [r.strip() for r in result.output.split('\n')]
assert_lines(lines, '.mp4')
assert len(lines[:-1]) == 5
def test_search():
runner = CliRunner()
result = runner.invoke(
cli.dl,
[
'dragon ball super',
'--url',
'-e',
'50:55',
'-q',
'720p',
'-ll',
'DEBUG'
],
input='1\n'
)
# Currently only checking for exit codes
assert result.exit_code == 0
result2 = runner.invoke(
cli.dl,
[
'dragon ball super',
'--url',
'-e',
'50:55',
'-q',
'720p',
'-ll',
'DEBUG'
],
input='77\n'
)
assert result2.exit_code == 1
def test_watch_new():
runner = CliRunner()
result = runner.invoke(
cli.watch,
[
'dragon ball super',
'--new',
],
input='1\n'
)
# Currently only checking for exit codes
assert result.exit_code == 0
def test_watch_list():
runner = CliRunner()
result = runner.invoke(
cli.watch,
[
'dragon ball super',
'--list',
],
)
# Currently only checking for exit codes
assert result.exit_code == 0
assert 'dragon ball super' in result.output.lower()
def test_watch_remove():
runner = CliRunner()
result = runner.invoke(
cli.watch,
[
'dragon ball super',
'--remove',
],
input='y\n',
)
# Currently only checking for exit codes
assert result.exit_code == 0
def test_watch_download(tmpdir):
runner = CliRunner()
# First add the anime
runner.invoke(
cli.watch,
[
'https://www5.9anime.is/watch/kochinpa.p6l6',
'--new',
],
)
# Now test the download
with runner.isolated_filesystem():
result = runner.invoke(
cli.watch,
[
'--list',
],
input='1\ndownload 6:7\n'
)
assert result.exit_code == 0

9
tests/test_util.py Normal file
View File

@ -0,0 +1,9 @@
import pytest
from anime_downloader import util
def test_split_anime():
anime_list = list(range(20))
assert len(util.split_anime(anime_list, '1:10')) == 9