prevent unhandled exception on Cloudflare challenges (#868)

The relatively new v2 challenges aren't supported (*), but retrying
often enough may yield a v1 challenge which can be solved.

(*) and probably never will. They are far too complicated to do without
a real browser.
This commit is contained in:
Mike Fährmann 2020-07-08 23:22:33 +02:00
parent 6e2af9a8d8
commit dbf841ebd1
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -32,8 +32,28 @@ def solve_challenge(session, response, kwargs):
"""Solve Cloudflare challenge and get cfclearance cookie"""
parsed = urllib.parse.urlsplit(response.url)
root = parsed.scheme + "://" + parsed.netloc
page = response.text
cf_kwargs = {}
headers = cf_kwargs["headers"] = collections.OrderedDict()
params = cf_kwargs["data"] = collections.OrderedDict()
headers["Referer"] = response.url
form = text.extract(page, 'id="challenge-form"', '</form>')[0]
for element in ElementTree.fromstring(
"<f>" + form + "</f>").findall("input"):
name = element.attrib.get("name")
if not name:
continue
if name == "jschl_answer":
try:
value = solve_js_challenge(page, parsed.netloc)
except Exception:
return response, None, None
else:
value = element.attrib.get("value")
params[name] = value
try:
params = {"ray": text.extract(page, '?ray=', '"')[0]}
@ -45,25 +65,8 @@ def solve_challenge(session, response, kwargs):
except Exception:
pass
cf_kwargs = {}
headers = cf_kwargs["headers"] = collections.OrderedDict()
params = cf_kwargs["data"] = collections.OrderedDict()
url = root + text.unescape(text.extract(page, 'action="', '"')[0])
headers["Referer"] = response.url
form = text.extract(page, 'id="challenge-form"', '</form>')[0]
for element in ElementTree.fromstring(
"<f>" + form + "</f>").findall("input"):
name = element.attrib.get("name")
if not name:
continue
if name == "jschl_answer":
value = solve_js_challenge(page, parsed.netloc)
else:
value = element.attrib.get("value")
params[name] = value
time.sleep(4)
url = root + text.unescape(text.extract(page, 'action="', '"')[0])
cf_response = session.request("POST", url, **cf_kwargs)
if cf_response.history: