gallery-dl/gallery_dl/cloudflare.py

128 lines
3.7 KiB
Python
Raw Normal View History

2015-11-07 02:30:08 +01:00
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
2015-11-07 02:30:08 +01:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
2015-11-07 13:06:23 +01:00
"""Methods to access sites behind Cloudflare protection"""
import re
2015-11-07 02:30:08 +01:00
import time
import operator
2015-11-07 13:06:23 +01:00
import urllib.parse
2015-11-07 02:30:08 +01:00
from . import text
from .cache import cache
2015-11-07 02:30:08 +01:00
2017-01-30 19:40:15 +01:00
2018-01-11 18:49:19 +01:00
def request_func(self, *args, **kwargs):
cookies = _cookiecache(self.root)
if cookies:
2018-01-11 18:49:19 +01:00
self.session.cookies.update(cookies)
response = self.session.get(*args, **kwargs)
if response.status_code == 503:
_cookiecache.invalidate(self.root)
self.log.info("Solving Cloudflare challenge")
response = solve_challenge(self.session, response)
_cookiecache(self.root, self.session.cookies)
return response
2017-01-30 19:40:15 +01:00
def solve_challenge(session, response):
session.headers["Referer"] = response.url
page = response.text
2015-11-07 02:30:08 +01:00
params = text.extract_all(page, (
('jschl_vc', 'name="jschl_vc" value="', '"'),
('pass' , 'name="pass" value="', '"'),
))[0]
params["jschl_answer"] = solve_jschl(response.url, page)
2015-11-07 02:30:08 +01:00
time.sleep(4)
2018-04-26 17:00:26 +02:00
url = text.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
return session.get(url, params=params)
2015-11-07 02:30:08 +01:00
2017-01-30 19:40:15 +01:00
2015-11-07 13:06:23 +01:00
def solve_jschl(url, page):
"""Solve challenge to get 'jschl_answer' value"""
# build variable name
# e.g. '...f, wqnVscP={"DERKbJk":+(...' --> wqnVscP.DERKbJk
2015-11-07 02:30:08 +01:00
data, pos = text.extract_all(page, (
2016-07-12 12:03:25 +02:00
('var' , ',f, ', '='),
2015-11-07 02:30:08 +01:00
('key' , '"', '"'),
2017-01-30 19:40:15 +01:00
('expr', ':', '}'),
2015-11-07 02:30:08 +01:00
))
variable = "{}.{}".format(data["var"], data["key"])
vlength = len(variable)
# evaluate the initial expression
solution = evaluate_expression(data["expr"])
# iterator over all remaining expressions
# and combine their values in 'solution'
2017-01-30 19:40:15 +01:00
expressions = text.extract(
page, "'challenge-form');", "f.submit();", pos)[0]
2015-11-07 02:30:08 +01:00
for expr in expressions.split(";")[1:]:
2015-11-07 02:30:08 +01:00
if expr.startswith(variable):
# select arithmetc function based on operator (+, -, *)
2015-11-07 02:30:08 +01:00
func = operator_functions[expr[vlength]]
# evaluate the rest of the expression
2015-11-07 13:06:23 +01:00
value = evaluate_expression(expr[vlength+2:])
# combine the expression value with our current solution
2015-11-07 02:30:08 +01:00
solution = func(solution, value)
2015-11-07 02:30:08 +01:00
elif expr.startswith("a.value"):
# add length of the hostname, i.e. add 11 for 'example.org'
solution += len(urllib.parse.urlsplit(url).netloc)
if ".toFixed(" in expr:
# trim the solution to 10 decimal places
# and strip trailing zeros
solution = "{:.10f}".format(solution).rstrip("0")
return solution
2015-11-07 02:30:08 +01:00
2017-01-30 19:40:15 +01:00
def evaluate_expression(expr, split_re=re.compile(r"\(+([^)]*)\)")):
2017-01-30 19:40:15 +01:00
"""Evaluate a Javascript expression for the challenge"""
if "/" in expr:
# split the expression in numerator and denominator subexpressions,
# evaluate them separately,
# and return their fraction-result
num, _, denom = expr.partition("/")
return evaluate_expression(num) / evaluate_expression(denom)
# iterate over all subexpressions,
# evaluate them,
# and accumulate their values in 'result'
result = ""
for subexpr in split_re.findall(expr):
result += str(sum(
expression_values[part]
for part in subexpr.split("[]")
))
return int(result)
2015-11-07 02:30:08 +01:00
2017-01-30 19:40:15 +01:00
2015-11-07 02:30:08 +01:00
operator_functions = {
"+": operator.add,
"-": operator.sub,
"*": operator.mul,
}
expression_values = {
"": 0,
"+": 0,
"!+": 1,
"+!!": 1,
}
2017-01-30 19:40:15 +01:00
@cache(maxage=365*24*60*60, keyarg=0)
def _cookiecache(key, item=None):
return item