anime-downloader/anime_downloader/sites/anime.py

"""
anime.py contains the base classes required for other anime classes.
"""
import requests
from bs4 import BeautifulSoup

import os
import logging
import copy

from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
from anime_downloader import util
from anime_downloader.extractors import get_extractor
from anime_downloader.downloader import get_downloader


class Anime:
    """
    Base class for all anime classes.

    Parameters
    ----------
    url: string
        URL of the anime.
    quality: One of ['360p', '480p', '720p', '1080p']
        Quality of episodes
    fallback_qualities: list
        The order of fallback.

    Attributes
    ----------
    sitename: string
        name of the site
    title: string
        Title of the anime
    meta: dict
        metadata about the anime. [Can be empty]
    QUALITIES: list
        Possible qualities for the site
    """
    sitename = ''
    title = ''
    meta = dict()
    subclasses = {}

    QUALITIES = None
    _episodeClass = object

    @classmethod
    def search(cls, query):
        """
        Search searches for the anime using the query given.

        query :
            query is
        """
        return

    def __init__(self, url=None, quality='720p',
                 fallback_qualities=['720p', '480p', '360p'],
                 _skip_online_data=False):
        self.url = url
        self._fallback_qualities = fallback_qualities

        if quality in self.QUALITIES:
            self.quality = quality
        else:
            raise AnimeDLError(
                'Quality {0} not found in {1}'.format(quality, self.QUALITIES))

        if not _skip_online_data:
            logging.info('Extracting episode info from page')
            self.get_data()

    @classmethod
    def verify_url(self, url):
        if self.sitename in url:
            return True
        return False

    def __init_subclass__(cls, sitename, **kwargs):
        super().__init_subclass__(**kwargs)
        cls.subclasses[sitename] = cls

    @classmethod
    def factory(cls, sitename: str):
        return cls.subclasses[sitename]

    def get_data(self):
        self._episode_urls = []
        try:
            self._scrape_metadata()
        except Exception as e:
            logging.debug('Metadata scraping error: {}'.format(e))

        self._episode_urls = self._scrape_episodes()
        self._len = len(self._episode_urls)

        logging.debug('EPISODE IDS: length: {}, ids: {}'.format(
            self._len, self._episode_urls))

        self._episode_urls = [(no+1, id) for no, id in
                              enumerate(self._episode_urls)]

        return self._episode_urls

    def __getitem__(self, index):
        episode_class = AnimeEpisode.subclasses[self.sitename]
        if isinstance(index, int):
            ep_id = self._episode_urls[index]
            return episode_class(ep_id[1], self.quality, parent=self,
                                      ep_no=ep_id[0])
        elif isinstance(index, slice):
            anime = copy.deepcopy(self)
            anime._episode_urls = anime._episode_urls[index]
            return anime

    def __repr__(self):
        return '''
Site: {name}
Anime: {title}
Episode count: {length}
'''.format(name=self.sitename, title=self.title, length=len(self))

    def __len__(self):
        return self._len

    def __str__(self):
        return self.title

    def _scrape_episodes(self):
        return

    def _scrape_metadata(self):
        return


class AnimeEpisode:
    QUALITIES = None
    title = ''
    stream_url = ''
    subclasses = {}

    def __init__(self, url, quality='720p', parent=None,
                 ep_no=None):
        if quality not in self.QUALITIES:
            raise AnimeDLError('Incorrect quality: "{}"'.format(quality))

        self.ep_no = ep_no
        self.url = url
        self.quality = quality
        self._parent = parent
        self._sources = None
        self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)

        logging.debug("Extracting stream info of id: {}".format(self.url))

        # TODO: New flag: online_data=False
        try:
            self.get_data()
            # Just to verify the source is acquired
            self.source().stream_url
        except NotFoundError:
            # Issue #28
            qualities = copy.copy(self._parent._fallback_qualities)
            try:
                qualities.remove(self.quality)
            except ValueError:
                pass
            for quality in qualities:
                logging.warning('Quality {} not found. Trying {}.'.format(
                    self.quality, quality))
                self.quality = quality
                try:
                    self.get_data()
                    self.source().stream_url
                    # parent.quality = self.quality
                    break
                except NotFoundError:
                    # Issue #28
                    # qualities.remove(self.quality)
                    pass

    def __init_subclass__(cls, sitename: str, **kwargs):
        super().__init_subclass__(**kwargs)
        cls.subclasses[sitename] = cls
        cls.sitename = sitename

    @classmethod
    def factory(cls, sitename: str):
        return cls.subclasses[sitename]

    def source(self, index=0):
        if not self._sources:
            self.get_data()

        try:
            sitename, url = self._sources[index]
        except TypeError:
            return self._sources[index]

        extractor = get_extractor(sitename)
        ext = extractor(url, quality=self.quality)
        self._sources[index] = ext

        return ext

    def get_data(self):
        self._sources = self._get_sources()
        logging.debug('Sources : '.format(self._sources))

    def _get_sources(self):
        raise NotImplementedError

    def download(self, force=False, path=None,
                 format='{anime_title}_{ep_no}', range_size=None):
        # TODO: Remove this shit
        logging.info('Downloading {}'.format(self.pretty_title))
        if format:
            file_name = util.format_filename(format, self)+'.mp4'

        if path is None:
            path = './' + file_name
        if path.endswith('.mp4'):
            path = path
        else:
            path = os.path.join(path, file_name)

        Downloader = get_downloader('http')
        downloader = Downloader(self.source(),
                                path, force, range_size=range_size)

        downloader.download()


class SearchResult:
    def __init__(self, title, url, poster):
        self.title = title
        self.url = url
        self.poster = poster
        self.meta = ''

    def __repr__(self):
        return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url)

    def __str__(self):
        return self.title
add docs 2018-10-09 08:04:27 -07:00			`"""`
			`anime.py contains the base classes required for other anime classes.`
			`"""`
			`import requests`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00			`from bs4 import BeautifulSoup`
Setup logging 2018-05-27 10:01:49 -07:00
Donot download already downloaded files 2018-02-08 15:07:11 -08:00			`import os`
Setup logging 2018-05-27 10:01:49 -07:00			`import logging`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`import copy`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
test cli 2018-05-27 13:57:14 -07:00			`from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError`
external downloader 2018-06-27 12:52:31 -07:00			`from anime_downloader import util`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00			`from anime_downloader.extractors import get_extractor`
yohoo. Downloader refractor 2018-07-27 11:52:21 -07:00			`from anime_downloader.downloader import get_downloader`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00
			`class Anime:`
add docs 2018-10-09 08:04:27 -07:00			`"""`
			`Base class for all anime classes.`

			`Parameters`
			`----------`
			`url: string`
			`URL of the anime.`
			`quality: One of ['360p', '480p', '720p', '1080p']`
			`Quality of episodes`
			`fallback_qualities: list`
			`The order of fallback.`

			`Attributes`
			`----------`
			`sitename: string`
			`name of the site`
			`title: string`
			`Title of the anime`
			`meta: dict`
			`metadata about the anime. [Can be empty]`
			`QUALITIES: list`
			`Possible qualities for the site`
			`"""`
Add tests and travis 2018-05-27 11:59:51 -07:00			`sitename = ''`
			`title = ''`
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`meta = dict()`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`subclasses = {}`
Add tests and travis 2018-05-27 11:59:51 -07:00
Setup logging 2018-05-27 10:01:49 -07:00			`QUALITIES = None`
Add tests and travis 2018-05-27 11:59:51 -07:00			`_episodeClass = object`
Setup logging 2018-05-27 10:01:49 -07:00
Add search 2018-05-28 12:36:40 -07:00			`@classmethod`
			`def search(cls, query):`
add docs 2018-10-09 08:04:27 -07:00			`"""`
			`Search searches for the anime using the query given.`

			`query :`
			`query is`
			`"""`
Add search 2018-05-28 12:36:40 -07:00			`return`

add fallback qualities 2018-08-06 08:46:58 -07:00			`def __init__(self, url=None, quality='720p',`
			`fallback_qualities=['720p', '480p', '360p'],`
			`_skip_online_data=False):`
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`self.url = url`
add fallback qualities 2018-08-06 08:46:58 -07:00			`self._fallback_qualities = fallback_qualities`
Some very big changes 2018-05-30 03:56:27 -07:00
Refractor - pt1 2018-04-30 06:31:13 -07:00			`if quality in self.QUALITIES:`
			`self.quality = quality`
			`else:`
pep8 2018-07-06 11:13:10 -07:00			`raise AnimeDLError(`
			`'Quality {0} not found in {1}'.format(quality, self.QUALITIES))`
Refractor - pt1 2018-04-30 06:31:13 -07:00
Implement provider option for dl and watch 2018-06-19 11:37:13 -07:00			`if not _skip_online_data:`
			`logging.info('Extracting episode info from page')`
let's rename stuff! 2018-06-24 12:08:09 -07:00			`self.get_data()`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`@classmethod`
Refractor - pt1 2018-04-30 06:31:13 -07:00			`def verify_url(self, url):`
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`if self.sitename in url:`
			`return True`
			`return False`
Refractor - pt1 2018-04-30 06:31:13 -07:00
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def __init_subclass__(cls, sitename, **kwargs):`
			`super().__init_subclass__(**kwargs)`
			`cls.subclasses[sitename] = cls`

			`@classmethod`
			`def factory(cls, sitename: str):`
			`return cls.subclasses[sitename]`

let's rename stuff! 2018-06-24 12:08:09 -07:00			`def get_data(self):`
More clean up 2018-06-30 11:56:16 -07:00			`self._episode_urls = []`
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`try:`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`self._scrape_metadata()`
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`except Exception as e:`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00			`logging.debug('Metadata scraping error: {}'.format(e))`
Improve usage as library 2018-05-27 11:10:41 -07:00
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`self._episode_urls = self._scrape_episodes()`
More clean up 2018-06-30 11:56:16 -07:00			`self._len = len(self._episode_urls)`
Some more logging 2018-05-27 10:23:31 -07:00
			`logging.debug('EPISODE IDS: length: {}, ids: {}'.format(`
More clean up 2018-06-30 11:56:16 -07:00			`self._len, self._episode_urls))`
Some more logging 2018-05-27 10:23:31 -07:00
More clean up 2018-06-30 11:56:16 -07:00			`self._episode_urls = [(no+1, id) for no, id in`
pep8 2018-07-06 11:13:10 -07:00			`enumerate(self._episode_urls)]`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00
More clean up 2018-06-30 11:56:16 -07:00			`return self._episode_urls`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
			`def __getitem__(self, index):`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`episode_class = AnimeEpisode.subclasses[self.sitename]`
Add player for watch 2018-06-01 01:13:44 -07:00			`if isinstance(index, int):`
More clean up 2018-06-30 11:56:16 -07:00			`ep_id = self._episode_urls[index]`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`return episode_class(ep_id[1], self.quality, parent=self,`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`ep_no=ep_id[0])`
Add player for watch 2018-06-01 01:13:44 -07:00			`elif isinstance(index, slice):`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`anime = copy.deepcopy(self)`
More clean up 2018-06-30 11:56:16 -07:00			`anime._episode_urls = anime._episode_urls[index]`
Add tests for list download 2018-06-10 12:18:10 -07:00			`return anime`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
Improve usage as library 2018-05-27 11:10:41 -07:00			`def __repr__(self):`
			`return '''`
			`Site: {name}`
			`Anime: {title}`
			`Episode count: {length}`
			`'''.format(name=self.sitename, title=self.title, length=len(self))`

rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def __len__(self):`
			`return self._len`

Way too many changes in one commit 2018-05-31 04:04:47 -07:00			`def __str__(self):`
			`return self.title`

rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def _scrape_episodes(self):`
Refractor - pt1 2018-04-30 06:31:13 -07:00			`return`

rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def _scrape_metadata(self):`
Improve usage as library 2018-05-27 11:10:41 -07:00			`return`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
Some very big changes 2018-05-30 03:56:27 -07:00
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`class AnimeEpisode:`
Fix everything; except the tool doesnt work 2018-05-27 06:36:08 -07:00			`QUALITIES = None`
			`title = ''`
			`stream_url = ''`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`subclasses = {}`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
More clean up 2018-06-30 11:56:16 -07:00			`def __init__(self, url, quality='720p', parent=None,`
Remove unused imports, enclose session into function 2018-10-14 23:41:15 -07:00			`ep_no=None):`
Fix everything; except the tool doesnt work 2018-05-27 06:36:08 -07:00			`if quality not in self.QUALITIES:`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00			`raise AnimeDLError('Incorrect quality: "{}"'.format(quality))`

Add download to selected anime options 2018-06-09 08:24:42 -07:00			`self.ep_no = ep_no`
More clean up 2018-06-30 11:56:16 -07:00			`self.url = url`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00			`self.quality = quality`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`self._parent = parent`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00			`self._sources = None`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)`

More clean up 2018-06-30 11:56:16 -07:00			`logging.debug("Extracting stream info of id: {}".format(self.url))`
test cli 2018-05-27 13:57:14 -07:00
Update readme and more bug fixes 2018-06-29 08:30:04 -07:00			`# TODO: New flag: online_data=False`
test cli 2018-05-27 13:57:14 -07:00			`try:`
let's rename stuff! 2018-06-24 12:08:09 -07:00			`self.get_data()`
Update readme and more bug fixes 2018-06-29 08:30:04 -07:00			`# Just to verify the source is acquired`
			`self.source().stream_url`
test cli 2018-05-27 13:57:14 -07:00			`except NotFoundError:`
Fix issue #28 2018-07-02 11:08:48 -07:00			`# Issue #28`
add fallback qualities 2018-08-06 08:46:58 -07:00			`qualities = copy.copy(self._parent._fallback_qualities)`
			`try:`
			`qualities.remove(self.quality)`
			`except ValueError:`
			`pass`
Fix retrying bug and fix kisscartoon 2018-07-04 10:12:52 -07:00			`for quality in qualities:`
pep8 2018-07-06 11:13:10 -07:00			`logging.warning('Quality {} not found. Trying {}.'.format(`
			`self.quality, quality))`
Make gogoanime default for now 2018-06-19 12:40:44 -07:00			`self.quality = quality`
			`try:`
let's rename stuff! 2018-06-24 12:08:09 -07:00			`self.get_data()`
Fix retrying bug and fix kisscartoon 2018-07-04 10:12:52 -07:00			`self.source().stream_url`
Fix issue #28 2018-07-02 11:08:48 -07:00			`# parent.quality = self.quality`
Make gogoanime default for now 2018-06-19 12:40:44 -07:00			`break`
			`except NotFoundError:`
Fix issue #28 2018-07-02 11:08:48 -07:00			`# Issue #28`
add fallback qualities 2018-08-06 08:46:58 -07:00			`# qualities.remove(self.quality)`
Fix issue #28 2018-07-02 11:08:48 -07:00			`pass`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def __init_subclass__(cls, sitename: str, **kwargs):`
			`super().__init_subclass__(**kwargs)`
			`cls.subclasses[sitename] = cls`
			`cls.sitename = sitename`

			`@classmethod`
			`def factory(cls, sitename: str):`
			`return cls.subclasses[sitename]`

Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00			`def source(self, index=0):`
			`if not self._sources:`
			`self.get_data()`

cache extractor 2018-06-30 13:10:12 -07:00			`try:`
			`sitename, url = self._sources[index]`
			`except TypeError:`
			`return self._sources[index]`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00
			`extractor = get_extractor(sitename)`
cache extractor 2018-06-30 13:10:12 -07:00			`ext = extractor(url, quality=self.quality)`
			`self._sources[index] = ext`

			`return ext`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00
let's rename stuff! 2018-06-24 12:08:09 -07:00			`def get_data(self):`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00			`self._sources = self._get_sources()`
[masterani] fix scraping of extractor info 2018-08-29 13:16:03 -07:00			`logging.debug('Sources : '.format(self._sources))`
Refractor stream url fetching Way too many changes in one commit 2018-06-29 06:55:08 -07:00
			`def _get_sources(self):`
Fix everything; except the tool doesnt work 2018-05-27 06:36:08 -07:00			`raise NotImplementedError`
Initial implementation Implemented cli and basic skeleton 2018-02-04 15:21:15 -08:00
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`def download(self, force=False, path=None,`
Remove unused `ssl` parameter 2018-10-11 12:50:21 -07:00			`format='{anime_title}_{ep_no}', range_size=None):`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`# TODO: Remove this shit`
Add file format This changed a lot of things, I did not think this would happen :P 2018-06-10 11:03:56 -07:00			`logging.info('Downloading {}'.format(self.pretty_title))`
			`if format:`
			`file_name = util.format_filename(format, self)+'.mp4'`
Slugify title etc. 2018-06-04 23:37:48 -07:00
Add tests and travis 2018-05-27 11:59:51 -07:00			`if path is None:`
Slugify title etc. 2018-06-04 23:37:48 -07:00			`path = './' + file_name`
Some very big changes 2018-05-30 03:56:27 -07:00			`if path.endswith('.mp4'):`
Add tests and travis 2018-05-27 11:59:51 -07:00			`path = path`
			`else:`
Slugify title etc. 2018-06-04 23:37:48 -07:00			`path = os.path.join(path, file_name)`
Some very big changes 2018-05-30 03:56:27 -07:00
yohoo. Downloader refractor 2018-07-27 11:52:21 -07:00			`Downloader = get_downloader('http')`
			`downloader = Downloader(self.source(),`
Remove unused `ssl` parameter 2018-10-11 12:50:21 -07:00			`path, force, range_size=range_size)`
Add search 2018-05-28 12:36:40 -07:00
yohoo. Downloader refractor 2018-07-27 11:52:21 -07:00			`downloader.download()`
Add search 2018-05-28 12:36:40 -07:00
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00
Add search 2018-05-28 12:36:40 -07:00			`class SearchResult:`
			`def __init__(self, title, url, poster):`
			`self.title = title`
			`self.url = url`
			`self.poster = poster`
Add other anime sites support for cli and add more metadata 2018-06-01 05:37:41 -07:00			`self.meta = ''`
Way too many changes in one commit 2018-05-31 04:04:47 -07:00
Clean up 2018-06-10 05:37:59 -07:00			`def __repr__(self):`
			`return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url)`

twist.moe :tada: 2018-07-17 07:50:29 -07:00			`def __str__(self):`
			`return self.title`