fix/improve Cloudflare bypass code (#728, #757)

- support changing values for 'k'
- use XML parser to get request parameters
  (some input fields are now embedded in an HTML comment)
This commit is contained in:
Mike Fährmann 2020-05-15 22:56:33 +02:00
parent 39cd389679
commit d17e9628b3
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -8,11 +8,11 @@
"""Methods to access sites behind Cloudflare protection"""
import re
import time
import operator
import collections
import urllib.parse
from xml.etree import ElementTree
from . import text
from .cache import memcache
@ -41,12 +41,16 @@ def solve_challenge(session, response, kwargs):
url = root + text.unescape(text.extract(page, 'action="', '"')[0])
headers["Referer"] = response.url
for inpt in text.extract_iter(page, "<input ", ">"):
name = text.extract(inpt, 'name="', '"')[0]
form = text.extract(page, 'id="challenge-form"', '</form>')[0]
for element in ElementTree.fromstring(
"<f>" + form + "</f>").findall("input"):
name = element.attrib.get("name")
if not name:
continue
if name == "jschl_answer":
value = solve_js_challenge(page, parsed.netloc)
else:
value = text.unescape(text.extract(inpt, 'value="', '"')[0])
value = element.attrib.get("value")
params[name] = value
time.sleep(4)
@ -84,6 +88,8 @@ def solve_js_challenge(page, netloc):
variable = "{}.{}".format(data["var"], data["key"])
vlength = len(variable)
k = text.extract(page, "k = '", "'")[0]
# evaluate the initial expression
solution = evaluate_expression(data["expr"], page, netloc)
@ -97,7 +103,7 @@ def solve_js_challenge(page, netloc):
# select arithmetc function based on operator (+/-/*)
func = OPERATORS[expr[vlength]]
# evaluate the rest of the expression
value = evaluate_expression(expr[vlength+2:], page, netloc)
value = evaluate_expression(expr[vlength+2:], page, netloc, k)
# combine expression value with our current solution
solution = func(solution, value)
@ -110,17 +116,18 @@ def solve_js_challenge(page, netloc):
solution = "{:.10f}".format(solution)
return solution
elif expr.startswith("k+="):
k += str(evaluate_expression(expr[3:], page, netloc))
def evaluate_expression(expr, page, netloc, *,
split_re=re.compile(r"[(+]+([^)]*)\)")):
def evaluate_expression(expr, page, netloc, k=""):
"""Evaluate a single Javascript expression for the challenge"""
if expr.startswith("function(p)"):
# get HTML element with ID k and evaluate the expression inside
# 'eval(eval("document.getElementById(k).innerHTML"))'
k, pos = text.extract(page, "k = '", "'")
e, pos = text.extract(page, 'id="'+k+'"', '<')
return evaluate_expression(e.partition(">")[2], page, netloc)
expr = text.extract(page, 'id="'+k+'"', '<')[0]
return evaluate_expression(expr.partition(">")[2], page, netloc)
if "/" in expr:
# split the expression in numerator and denominator subexpressions,