2019-02-23 06:57:37 -08:00
# TODO: Check without node installed
# cfscrape is a necessery dependency
import cfscrape
import logging
from bs4 import BeautifulSoup
2019-03-13 06:44:17 -07:00
import tempfile
import os
import requests
2019-02-23 06:57:37 -08:00
from anime_downloader import session
2019-02-23 10:45:49 -08:00
from anime_downloader . const import get_random_header
2019-02-23 06:57:37 -08:00
__all__ = [
' get ' ,
' post ' ,
2019-02-23 10:45:49 -08:00
' soupify ' ,
2019-02-23 06:57:37 -08:00
]
logger = logging . getLogger ( __name__ )
2020-04-03 01:51:51 -07:00
cf_session = cfscrape . create_scraper ( )
2019-02-23 10:45:49 -08:00
default_headers = get_random_header ( )
2019-03-13 06:44:17 -07:00
temp_dir = tempfile . mkdtemp ( prefix = ' animedl ' )
2019-07-17 10:12:42 -07:00
logger . debug ( f " HTML file temp_dir: { temp_dir } " )
2019-02-23 06:57:37 -08:00
2019-03-13 06:01:07 -07:00
def setup ( func ) :
2019-03-13 06:44:17 -07:00
"""
setup is a decorator which takes a function
and converts it into a request method
"""
2019-03-13 06:01:07 -07:00
def setup_func ( url : str ,
2020-04-03 02:06:37 -07:00
cf : bool = False ,
2020-06-07 08:31:12 -07:00
sel : bool = False ,
2019-03-13 06:01:07 -07:00
referer : str = None ,
2020-10-03 02:05:52 -07:00
cache : bool = True ,
2019-03-13 06:01:07 -07:00
headers = None ,
* * kwargs ) :
2019-03-22 07:27:26 -07:00
'''
{ 0 } performs a { 0 } request
Parameters
- - - - - - - - - -
url : str
url is the url of the request to be performed
cf : bool
cf if True performs the request through cfscrape .
For cloudflare protected sites .
2021-05-25 12:47:40 -07:00
sel : bool
sel if True perfroms the request through selescrape ( selenium ) .
2019-03-22 07:27:26 -07:00
referer : str
a url sent as referer in request headers
'''
2020-06-07 08:31:12 -07:00
selescrape = None
if cf :
sess = cf_session
elif sel :
try :
from selenium import webdriver
from anime_downloader . sites . helpers import selescrape
sess = selescrape
2021-05-25 12:46:32 -07:00
sess . cache = cache
2020-06-07 08:31:12 -07:00
except ImportError :
sess = cf_session
logger . warning ( " This provider may not work correctly because it requires selenium to work. \n If you want to install it then run: ' pip install selenium ' . " )
2020-09-20 04:08:12 -07:00
else :
2020-10-03 02:05:52 -07:00
sess = session . get_session ( cache = cache )
2020-06-07 08:31:12 -07:00
2019-03-13 06:01:07 -07:00
if headers :
default_headers . update ( headers )
if referer :
2019-07-12 04:55:21 -07:00
default_headers [ ' referer ' ] = referer
2019-03-13 06:44:17 -07:00
2019-03-13 06:01:07 -07:00
logger . debug ( ' ----- ' )
logger . debug ( ' {} {} ' . format ( func . __name__ . upper ( ) , url ) )
logger . debug ( kwargs )
logger . debug ( default_headers )
logger . debug ( ' ----- ' )
2019-03-13 06:44:17 -07:00
2019-03-13 06:01:07 -07:00
res = sess . request ( func . __name__ . upper ( ) ,
url ,
headers = default_headers ,
* * kwargs )
2020-06-07 08:31:12 -07:00
2020-09-20 04:08:12 -07:00
if sess != selescrape : # TODO fix this for selescrape too
2020-06-07 08:31:12 -07:00
res . raise_for_status ( )
logger . debug ( res . url )
# logger.debug(res.text)
if logger . getEffectiveLevel ( ) == logging . DEBUG :
_log_response_body ( res )
2019-03-13 06:01:07 -07:00
return res
2020-06-07 08:31:12 -07:00
2019-03-22 07:27:26 -07:00
setup_func . __doc__ = setup_func . __doc__ . format ( func . __name__ )
2019-03-13 06:01:07 -07:00
return setup_func
@setup
2019-02-23 06:57:37 -08:00
def get ( url : str ,
2020-04-03 02:06:37 -07:00
cf : bool = False ,
2019-02-23 10:45:49 -08:00
referer : str = None ,
headers = None ,
2019-02-23 06:57:37 -08:00
* * kwargs ) :
'''
get performs a get request
2019-03-22 07:27:26 -07:00
Parameters
- - - - - - - - - -
url : str
url is the url of the request to be performed
cf : bool
cf if True performs the request through cfscrape .
For cloudflare protected sites .
2021-05-25 12:47:40 -07:00
sel : bool
sel if True perfroms the request through selescrape ( selenium ) .
2019-03-22 07:27:26 -07:00
referer : str
a url sent as referer in request headers
2019-02-23 06:57:37 -08:00
'''
2019-03-22 06:47:00 -07:00
@setup
2019-02-23 06:57:37 -08:00
def post ( url : str ,
2020-04-03 02:06:37 -07:00
cf : bool = False ,
2019-02-23 10:45:49 -08:00
referer : str = None ,
headers = None ,
* * kwargs ) :
2019-02-23 06:57:37 -08:00
'''
2019-03-22 07:27:26 -07:00
post performs a post request
Parameters
- - - - - - - - - -
url : str
url is the url of the request to be performed
cf : bool
cf if True performs the request through cfscrape .
For cloudflare protected sites .
referer : str
a url sent as referer in request headers
2019-02-23 06:57:37 -08:00
'''
2019-02-23 10:45:49 -08:00
def soupify ( res ) :
2019-02-23 06:57:37 -08:00
# TODO: res datatype
2019-02-23 10:45:49 -08:00
""" soupify Beautiful soups response object of request
2019-02-23 06:57:37 -08:00
Parameters
- - - - - - - - - -
res :
res is ` request . response `
Returns
- - - - - - -
BeautifulSoup . Soup
"""
2021-07-09 16:57:36 -07:00
if isinstance ( res , str ) :
soup = BeautifulSoup ( res , ' html.parser ' )
else :
soup = BeautifulSoup ( res . text , ' html.parser ' )
2019-02-23 06:57:37 -08:00
return soup
2019-03-13 06:44:17 -07:00
def _log_response_body ( res ) :
import json
2019-07-17 10:12:42 -07:00
import pathlib
2019-03-13 06:44:17 -07:00
file = tempfile . mktemp ( dir = temp_dir )
2019-03-22 06:47:00 -07:00
logger . debug ( file )
2020-03-18 03:55:49 -07:00
with open ( file , ' w ' , encoding = " utf-8 " ) as f :
2019-03-13 06:44:17 -07:00
f . write ( res . text )
data_file = temp_dir + ' /data.json '
if not os . path . exists ( data_file ) :
with open ( data_file , ' w ' ) as f :
json . dump ( [ ] , f )
data = None
with open ( data_file , ' r ' ) as f :
data = json . load ( f )
data . append ( {
' method ' : res . request . method ,
' url ' : res . url ,
2019-07-17 10:12:42 -07:00
' file ' : pathlib . Path ( file ) . name ,
2019-03-13 06:44:17 -07:00
} )
with open ( data_file , ' w ' ) as f :
json . dump ( data , f )