2015-11-07 02:30:08 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2017-04-08 11:02:32 +02:00
|
|
|
# Copyright 2015-2017 Mike Fährmann
|
2015-11-07 02:30:08 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2015-11-07 13:06:23 +01:00
|
|
|
"""Methods to access sites behind Cloudflare protection"""
|
|
|
|
|
2015-11-07 02:30:08 +01:00
|
|
|
import time
|
|
|
|
import operator
|
2015-11-07 13:06:23 +01:00
|
|
|
import urllib.parse
|
2015-11-07 02:30:08 +01:00
|
|
|
from . import text
|
2016-03-07 16:34:42 +01:00
|
|
|
from .cache import cache
|
2015-11-07 02:30:08 +01:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2016-12-16 13:28:36 +01:00
|
|
|
def request_func(self, *args):
|
|
|
|
cookies = _cookiecache(self.root)
|
|
|
|
if cookies:
|
|
|
|
self.session.cookies = cookies
|
|
|
|
response = self.session.get(*args)
|
|
|
|
if response.status_code != 200:
|
|
|
|
_cookiecache.invalidate(self.root)
|
2017-04-08 11:02:32 +02:00
|
|
|
self.log.debug(response.text)
|
2017-03-25 12:23:30 +01:00
|
|
|
self.log.info("Solving Cloudflare challenge")
|
2016-12-16 13:28:36 +01:00
|
|
|
response = solve_challenge(self.session, response)
|
|
|
|
_cookiecache(self.root, self.session.cookies)
|
|
|
|
return response
|
2016-11-20 18:05:49 +01:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2016-12-16 13:28:36 +01:00
|
|
|
def solve_challenge(session, response):
|
|
|
|
session.headers["Referer"] = response.url
|
|
|
|
page = response.text
|
2015-11-07 02:30:08 +01:00
|
|
|
params = text.extract_all(page, (
|
|
|
|
('jschl_vc', 'name="jschl_vc" value="', '"'),
|
|
|
|
('pass' , 'name="pass" value="', '"'),
|
|
|
|
))[0]
|
2016-12-16 13:28:36 +01:00
|
|
|
params["jschl_answer"] = solve_jschl(response.url, page)
|
2015-11-07 02:30:08 +01:00
|
|
|
time.sleep(4)
|
2016-12-16 13:28:36 +01:00
|
|
|
url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
|
|
|
|
return session.get(url, params=params)
|
2015-11-07 02:30:08 +01:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2015-11-07 13:06:23 +01:00
|
|
|
def solve_jschl(url, page):
|
|
|
|
"""Solve challenge to get 'jschl_answer' value"""
|
2015-11-07 02:30:08 +01:00
|
|
|
data, pos = text.extract_all(page, (
|
2016-07-12 12:03:25 +02:00
|
|
|
('var' , ',f, ', '='),
|
2015-11-07 02:30:08 +01:00
|
|
|
('key' , '"', '"'),
|
2017-01-30 19:40:15 +01:00
|
|
|
('expr', ':', '}'),
|
2015-11-07 02:30:08 +01:00
|
|
|
))
|
2015-11-07 13:06:23 +01:00
|
|
|
solution = evaluate_expression(data["expr"])
|
2015-11-07 02:30:08 +01:00
|
|
|
variable = "{}.{}".format(data["var"], data["key"])
|
|
|
|
vlength = len(variable)
|
2017-01-30 19:40:15 +01:00
|
|
|
expressions = text.extract(
|
|
|
|
page, "'challenge-form');", "f.submit();", pos
|
|
|
|
)[0]
|
2015-11-07 02:30:08 +01:00
|
|
|
for expr in expressions.split(";")[1:]:
|
|
|
|
if expr.startswith(variable):
|
|
|
|
func = operator_functions[expr[vlength]]
|
2015-11-07 13:06:23 +01:00
|
|
|
value = evaluate_expression(expr[vlength+2:])
|
2015-11-07 02:30:08 +01:00
|
|
|
solution = func(solution, value)
|
|
|
|
elif expr.startswith("a.value"):
|
2016-11-20 18:05:49 +01:00
|
|
|
return solution + len(urllib.parse.urlsplit(url).netloc)
|
2015-11-07 02:30:08 +01:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2015-11-07 13:06:23 +01:00
|
|
|
def evaluate_expression(expr):
|
2017-01-30 19:40:15 +01:00
|
|
|
"""Evaluate a Javascript expression for the challenge"""
|
2015-11-07 02:30:08 +01:00
|
|
|
stack = []
|
|
|
|
ranges = []
|
|
|
|
value = ""
|
|
|
|
for index, char in enumerate(expr):
|
|
|
|
if char == "(":
|
|
|
|
stack.append(index+1)
|
|
|
|
elif char == ")":
|
|
|
|
begin = stack.pop()
|
|
|
|
if stack:
|
|
|
|
ranges.append((begin, index))
|
|
|
|
for subexpr in [expr[begin:end] for begin, end in ranges] or (expr,):
|
|
|
|
num = 0
|
|
|
|
for part in subexpr.split("[]"):
|
|
|
|
num += expression_values[part]
|
|
|
|
value += str(num)
|
|
|
|
return int(value)
|
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2015-11-07 02:30:08 +01:00
|
|
|
operator_functions = {
|
|
|
|
"+": operator.add,
|
|
|
|
"-": operator.sub,
|
|
|
|
"*": operator.mul,
|
|
|
|
}
|
|
|
|
|
|
|
|
expression_values = {
|
|
|
|
"": 0,
|
|
|
|
"+": 0,
|
|
|
|
"!+": 1,
|
|
|
|
"+!!": 1,
|
|
|
|
}
|
2016-12-16 13:28:36 +01:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2016-12-16 13:28:36 +01:00
|
|
|
@cache(maxage=365*24*60*60, keyarg=0)
|
|
|
|
def _cookiecache(key, item=None):
|
|
|
|
return item
|