anime-downloader/anime_downloader/sites/helpers/request.py

# TODO: Check without node installed
# cfscrape is a necessery dependency
import cfscrape
import logging
from bs4 import BeautifulSoup
import tempfile
import os
import requests

from anime_downloader import session
from anime_downloader.const import get_random_header

__all__ = [
    'get',
    'post',
    'soupify',
]

logger = logging.getLogger(__name__)

cf_session = cfscrape.create_scraper()
default_headers = get_random_header()
temp_dir = tempfile.mkdtemp(prefix='animedl')
logger.debug(f"HTML file temp_dir: {temp_dir}")


def setup(func):
    """
    setup is a decorator which takes a function
    and converts it into a request method
    """
    def setup_func(url: str,
                   cf: bool = False,
                   sel: bool = False,
                   referer: str = None,
                   cache: bool = True,
                   headers=None,
                   **kwargs):
        '''
        {0} performs a {0} request

        Parameters
        ----------
        url : str
            url is the url of the request to be performed
        cf : bool
            cf if True performs the request through cfscrape.
            For cloudflare protected sites.
        sel : bool
            sel if True perfroms the request through selescrape (selenium).
        referer : str
            a url sent as referer in request headers
        '''
        selescrape = None
        if cf:
            sess = cf_session
        elif sel:
            try:
                from selenium import webdriver
                from anime_downloader.sites.helpers import selescrape
                sess = selescrape
                sess.cache = cache
            except ImportError:
                sess = cf_session
                logger.warning("This provider may not work correctly because it requires selenium to work.\nIf you want to install it then run:  'pip install selenium' .")
        else:
            sess = session.get_session(cache=cache)

        if headers:
            default_headers.update(headers)
        if referer:
            default_headers['referer'] = referer

        logger.debug('-----')
        logger.debug('{} {}'.format(func.__name__.upper(), url))
        logger.debug(kwargs)
        logger.debug(default_headers)
        logger.debug('-----')

        res = sess.request(func.__name__.upper(),
                           url,
                           headers=default_headers,
                           **kwargs)

        if sess != selescrape:  # TODO fix this for selescrape too
            res.raise_for_status()
            logger.debug(res.url)
            # logger.debug(res.text)
            if logger.getEffectiveLevel() == logging.DEBUG:
                _log_response_body(res)
        return res

    setup_func.__doc__ = setup_func.__doc__.format(func.__name__)
    return setup_func


@setup
def get(url: str,
        cf: bool = False,
        referer: str = None,
        headers=None,
        **kwargs):
    '''
    get performs a get request

    Parameters
    ----------
    url : str
        url is the url of the request to be performed
    cf : bool
        cf if True performs the request through cfscrape.
        For cloudflare protected sites.
    sel : bool
        sel if True perfroms the request through selescrape (selenium).
    referer : str
        a url sent as referer in request headers
    '''


@setup
def post(url: str,
         cf: bool = False,
         referer: str = None,
         headers=None,
         **kwargs):
    '''
    post performs a post request

    Parameters
    ----------
    url : str
        url is the url of the request to be performed
    cf : bool
        cf if True performs the request through cfscrape.
        For cloudflare protected sites.
    referer : str
        a url sent as referer in request headers
    '''


def soupify(res):
    # TODO: res datatype
    """soupify Beautiful soups response object of request

    Parameters
    ----------
    res :
        res is `request.response`

    Returns
    -------
    BeautifulSoup.Soup
    """
    if isinstance(res, str):
        soup = BeautifulSoup(res, 'html.parser')
    else:
        soup = BeautifulSoup(res.text, 'html.parser')
    return soup


def _log_response_body(res):
    import json
    import pathlib
    file = tempfile.mktemp(dir=temp_dir)
    logger.debug(file)
    with open(file, 'w', encoding="utf-8") as f:
        f.write(res.text)

    data_file = temp_dir + '/data.json'
    if not os.path.exists(data_file):
        with open(data_file, 'w') as f:
            json.dump([], f)
    data = None
    with open(data_file, 'r') as f:
        data = json.load(f)
        data.append({
            'method': res.request.method,
            'url': res.url,
            'file': pathlib.Path(file).name,
        })
    with open(data_file, 'w') as f:
        json.dump(data, f)
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`# TODO: Check without node installed`
			`# cfscrape is a necessery dependency`
			`import cfscrape`
			`import logging`
			`from bs4 import BeautifulSoup`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00			`import tempfile`
			`import os`
			`import requests`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00
			`from anime_downloader import session`
rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`from anime_downloader.const import get_random_header`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00
			`__all__ = [`
			`'get',`
			`'post',`
rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`'soupify',`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`]`

			`logger = logging.getLogger(__name__)`

fix: don't use cache for cf 2020-04-03 01:51:51 -07:00			`cf_session = cfscrape.create_scraper()`
rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`default_headers = get_random_header()`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00			`temp_dir = tempfile.mkdtemp(prefix='animedl')`
chore: lots of docs and some tests 2019-07-17 10:12:42 -07:00			`logger.debug(f"HTML file temp_dir: {temp_dir}")`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00

refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`def setup(func):`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00			`"""`
			`setup is a decorator which takes a function`
			`and converts it into a request method`
			`"""`
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`def setup_func(url: str,`
fix: actually fix cache 2020-04-03 02:06:37 -07:00			`cf: bool = False,`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00			`sel: bool = False,`
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`referer: str = None,`
Added cache flag to helpers (#527) * Update request.py * Update session.py 2020-10-03 02:05:52 -07:00			`cache: bool = True,`
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`headers=None,`
			`**kwargs):`
docs: some more docs 2019-03-22 07:27:26 -07:00			`'''`
			`{0} performs a {0} request`

			`Parameters`
			`----------`
			`url : str`
			`url is the url of the request to be performed`
			`cf : bool`
			`cf if True performs the request through cfscrape.`
			`For cloudflare protected sites.`
added sel to docstring 2021-05-25 12:47:40 -07:00			`sel : bool`
			`sel if True perfroms the request through selescrape (selenium).`
docs: some more docs 2019-03-22 07:27:26 -07:00			`referer : str`
			`a url sent as referer in request headers`
			`'''`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00			`selescrape = None`
			`if cf:`
			`sess = cf_session`
			`elif sel:`
			`try:`
			`from selenium import webdriver`
			`from anime_downloader.sites.helpers import selescrape`
			`sess = selescrape`
Update request.py 2021-05-25 12:46:32 -07:00			`sess.cache = cache`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00			`except ImportError:`
			`sess = cf_session`
			`logger.warning("This provider may not work correctly because it requires selenium to work.\nIf you want to install it then run: 'pip install selenium' .")`
Pep fixed the whole codebase (#512) * PEP-fix * Delete a2zanime.py * Delete dreamanime.py 2020-09-20 04:08:12 -07:00			`else:`
Added cache flag to helpers (#527) * Update request.py * Update session.py 2020-10-03 02:05:52 -07:00			`sess = session.get_session(cache=cache)`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`if headers:`
			`default_headers.update(headers)`
			`if referer:`
feat: new siteconfig format in config 2019-07-12 04:55:21 -07:00			`default_headers['referer'] = referer`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`logger.debug('-----')`
			`logger.debug('{} {}'.format(func.__name__.upper(), url))`
			`logger.debug(kwargs)`
			`logger.debug(default_headers)`
			`logger.debug('-----')`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`res = sess.request(func.__name__.upper(),`
			`url,`
			`headers=default_headers,`
			`**kwargs)`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00
Pep fixed the whole codebase (#512) * PEP-fix * Delete a2zanime.py * Delete dreamanime.py 2020-09-20 04:08:12 -07:00			`if sess != selescrape: # TODO fix this for selescrape too`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00			`res.raise_for_status()`
			`logger.debug(res.url)`
			`# logger.debug(res.text)`
			`if logger.getEffectiveLevel() == logging.DEBUG:`
			`_log_response_body(res)`
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`return res`
Forked the latest anime dl and added selescrape to it. (#363) Co-authored-by: Blatzar <blatzar@gmail.com> 2020-06-07 08:31:12 -07:00
docs: some more docs 2019-03-22 07:27:26 -07:00			`setup_func.__doc__ = setup_func.__doc__.format(func.__name__)`
refactor(helpers): refactor helper get and post 2019-03-13 06:01:07 -07:00			`return setup_func`


			`@setup`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def get(url: str,`
fix: actually fix cache 2020-04-03 02:06:37 -07:00			`cf: bool = False,`
rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`referer: str = None,`
			`headers=None,`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`**kwargs):`
			`'''`
			`get performs a get request`
docs: some more docs 2019-03-22 07:27:26 -07:00
			`Parameters`
			`----------`
			`url : str`
			`url is the url of the request to be performed`
			`cf : bool`
			`cf if True performs the request through cfscrape.`
			`For cloudflare protected sites.`
added sel to docstring 2021-05-25 12:47:40 -07:00			`sel : bool`
			`sel if True perfroms the request through selescrape (selenium).`
docs: some more docs 2019-03-22 07:27:26 -07:00			`referer : str`
			`a url sent as referer in request headers`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`'''`


chore: logging replaced with logger, style 2019-03-22 06:47:00 -07:00			`@setup`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`def post(url: str,`
fix: actually fix cache 2020-04-03 02:06:37 -07:00			`cf: bool = False,`
rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`referer: str = None,`
			`headers=None,`
			`**kwargs):`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`'''`
docs: some more docs 2019-03-22 07:27:26 -07:00			`post performs a post request`

			`Parameters`
			`----------`
			`url : str`
			`url is the url of the request to be performed`
			`cf : bool`
			`cf if True performs the request through cfscrape.`
			`For cloudflare protected sites.`
			`referer : str`
			`a url sent as referer in request headers`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`'''`


rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`def soupify(res):`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`# TODO: res datatype`
rewrite: site rewrite pt2 and add request cache 2019-02-23 10:45:49 -08:00			`"""soupify Beautiful soups response object of request`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00
			`Parameters`
			`----------`
			`res :`
			res is `request.response`

			`Returns`
			`-------`
			`BeautifulSoup.Soup`
			`"""`
Fix resulting TypeErrors. 2021-07-09 16:57:36 -07:00			`if isinstance(res, str):`
			`soup = BeautifulSoup(res, 'html.parser')`
			`else:`
			`soup = BeautifulSoup(res.text, 'html.parser')`
rewrite: site handling rewrite 2019-02-23 06:57:37 -08:00			`return soup`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00

			`def _log_response_body(res):`
			`import json`
chore: lots of docs and some tests 2019-07-17 10:12:42 -07:00			`import pathlib`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00			`file = tempfile.mktemp(dir=temp_dir)`
chore: logging replaced with logger, style 2019-03-22 06:47:00 -07:00			`logger.debug(file)`
Updated Request.py (#292) Fixed encoding error 2020-03-18 03:55:49 -07:00			`with open(file, 'w', encoding="utf-8") as f:`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00			`f.write(res.text)`

			`data_file = temp_dir + '/data.json'`
			`if not os.path.exists(data_file):`
			`with open(data_file, 'w') as f:`
			`json.dump([], f)`
			`data = None`
			`with open(data_file, 'r') as f:`
			`data = json.load(f)`
			`data.append({`
			`'method': res.request.method,`
			`'url': res.url,`
chore: lots of docs and some tests 2019-07-17 10:12:42 -07:00			`'file': pathlib.Path(file).name,`
chore(helpers): more logging log the response text to tempfiles for mocks 2019-03-13 06:44:17 -07:00			`})`
			`with open(data_file, 'w') as f:`
			`json.dump(data, f)`