anime-downloader/anime_downloader/sites/anime.py

"""
anime.py contains the base classes required for other anime classes.
"""
import requests
from bs4 import BeautifulSoup

import os
import logging
import copy

from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
from anime_downloader import util
from anime_downloader.extractors import get_extractor
from anime_downloader.downloader import get_downloader


class Anime:
    """
    Base class for all anime classes.

    Parameters
    ----------
    url: string
        URL of the anime.
    quality: One of ['360p', '480p', '720p', '1080p']
        Quality of episodes
    fallback_qualities: list
        The order of fallback.

    Attributes
    ----------
    sitename: str
        name of the site
    title: str
        Title of the anime
    meta: dict
        metadata about the anime. [Can be empty]
    QUALITIES: list
        Possible qualities for the site
    """
    sitename = ''
    title = ''
    meta = dict()
    subclasses = {}

    QUALITIES = None
    _episodeClass = object

    @classmethod
    def search(cls, query):
        """
        Search searches for the anime using the query given.

        Parameters
        ----------
        query: str
            query is the query keyword to be searched.

        Returns
        -------
        list
            List of :py:class:`~anime_downloader.sites.anime.SearchResult`
        """
        return

    def __init__(self, url=None, quality='720p',
                 fallback_qualities=['720p', '480p', '360p'],
                 _skip_online_data=False):
        self.url = url
        self._fallback_qualities = fallback_qualities

        if quality in self.QUALITIES:
            self.quality = quality
        else:
            raise AnimeDLError(
                'Quality {0} not found in {1}'.format(quality, self.QUALITIES))

        if not _skip_online_data:
            logging.info('Extracting episode info from page')
            self._episode_urls = self.get_data()
            self._len = len(self._episode_urls)

    @classmethod
    def verify_url(self, url):
        if self.sitename in url:
            return True
        return False

    def __init_subclass__(cls, sitename, **kwargs):
        super().__init_subclass__(**kwargs)
        cls.subclasses[sitename] = cls

    @classmethod
    def factory(cls, sitename: str):
        return cls.subclasses[sitename]

    def get_data(self):
        """
        get_data is called inside the :code:`__init__` of
        :py:class:`~anime_downloader.sites.anime.BaseAnime`. It is used to get
        the necessary data about the anime and it's episodes.

        This function calls
        :py:class:`~anime_downloader.sites.anime.BaseAnime._scarpe_episodes`
        and
        :py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_metadata`

        TODO: Refactor this so that classes which need not be soupified don't
        have to overload this function.

        Returns
        -------
        list
            A list of tuples of episodes containing episode name and
            episode url.
            Ex::
                [('1', 'https://9anime.is/.../...', ...)]

        """
        self._episode_urls = []
        try:
            self._scrape_metadata()
        except Exception as e:
            logging.debug('Metadata scraping error: {}'.format(e))

        self._episode_urls = self._scrape_episodes()
        self._len = len(self._episode_urls)

        logging.debug('EPISODE IDS: length: {}, ids: {}'.format(
            self._len, self._episode_urls))

        self._episode_urls = [(no+1, id) for no, id in
                              enumerate(self._episode_urls)]

        return self._episode_urls

    def __getitem__(self, index):
        episode_class = AnimeEpisode.subclasses[self.sitename]
        if isinstance(index, int):
            ep_id = self._episode_urls[index]
            return episode_class(ep_id[1], self.quality, parent=self,
                                      ep_no=ep_id[0])
        elif isinstance(index, slice):
            anime = copy.deepcopy(self)
            anime._episode_urls = anime._episode_urls[index]
            return anime

    def __repr__(self):
        return '''
Site: {name}
Anime: {title}
Episode count: {length}
'''.format(name=self.sitename, title=self.title, length=len(self))

    def __len__(self):
        return self._len

    def __str__(self):
        return self.title

    def _scarpe_episodes(self, soup):
        """
        _scarpe_episodes is function which has to be overridden by the base
        classes to scrape the episode urls from the web page.

        Parameters
        ----------
        soup: `bs4.BeautifulSoup`
            soup is the html of the anime url after passing through
            BeautifulSoup.

        Returns
        -------
        :code:`list` of :code:`str`
            A list of episode urls.
        """
        return

    def _scrape_metadata(self, soup):
        """
        _scrape_metadata is function which has to be overridden by the base
        classes to scrape the metadata of anime from the web page.

        Parameters
        ----------
        soup: :py:class:`bs4.BeautifulSoup`
            soup is the html of the anime url after passing through
            BeautifulSoup.
        """
        return


class AnimeEpisode:
    QUALITIES = None
    title = ''
    stream_url = ''
    subclasses = {}

    def __init__(self, url, quality='720p', parent=None,
                 ep_no=None):
        if quality not in self.QUALITIES:
            raise AnimeDLError('Incorrect quality: "{}"'.format(quality))

        self.ep_no = ep_no
        self.url = url
        self.quality = quality
        self._parent = parent
        self._sources = None
        self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no)

        logging.debug("Extracting stream info of id: {}".format(self.url))

        # TODO: New flag: online_data=False
        try:
            self.get_data()
            # Just to verify the source is acquired
            self.source().stream_url
        except NotFoundError:
            # Issue #28
            qualities = copy.copy(self._parent._fallback_qualities)
            try:
                qualities.remove(self.quality)
            except ValueError:
                pass
            for quality in qualities:
                logging.warning('Quality {} not found. Trying {}.'.format(
                    self.quality, quality))
                self.quality = quality
                try:
                    self.get_data()
                    self.source().stream_url
                    # parent.quality = self.quality
                    break
                except NotFoundError:
                    # Issue #28
                    # qualities.remove(self.quality)
                    pass

    def __init_subclass__(cls, sitename: str, **kwargs):
        super().__init_subclass__(**kwargs)
        cls.subclasses[sitename] = cls
        cls.sitename = sitename

    @classmethod
    def factory(cls, sitename: str):
        return cls.subclasses[sitename]

    def source(self, index=0):
        if not self._sources:
            self.get_data()

        try:
            sitename, url = self._sources[index]
        except TypeError:
            return self._sources[index]

        extractor = get_extractor(sitename)
        ext = extractor(url, quality=self.quality)
        self._sources[index] = ext

        return ext

    def get_data(self):
        self._sources = self._get_sources()
        logging.debug('Sources : '.format(self._sources))

    def _get_sources(self):
        raise NotImplementedError

    def download(self, force=False, path=None,
                 format='{anime_title}_{ep_no}', range_size=None):
        # TODO: Remove this shit
        logging.info('Downloading {}'.format(self.pretty_title))
        if format:
            file_name = util.format_filename(format, self)+'.mp4'

        if path is None:
            path = './' + file_name
        if path.endswith('.mp4'):
            path = path
        else:
            path = os.path.join(path, file_name)

        Downloader = get_downloader('http')
        downloader = Downloader(self.source(),
                                path, force, range_size=range_size)

        downloader.download()


class SearchResult:
    """
    SearchResult class holds the search result of a search done by an Anime
    class

    Parameters
    ----------
    title: str
        Title of the anime.
    url: str
        URL of the anime
    poster: str
        URL for the poster of the anime.
    meta: dict
        Additional metadata regarding the anime.

    Attributes
    ----------
    title: str
        Title of the anime.
    url: str
        URL of the anime
    poster: str
        URL for the poster of the anime.
    meta: dict
        Additional metadata regarding the anime.
    """

    def __init__(self, title, url, poster, meta=''):
        self.title = title
        self.url = url
        self.poster = poster
        self.meta = meta

    def __repr__(self):
        return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url)

    def __str__(self):
        return self.title