cache cfclearance cookies

This commit is contained in:
Mike Fährmann 2019-03-14 16:14:29 +01:00
parent 34ea0d6a10
commit f612284d24
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 22 additions and 2 deletions

View File

@ -13,6 +13,7 @@ import time
import operator import operator
import urllib.parse import urllib.parse
from . import text from . import text
from .cache import memcache
def is_challenge(response): def is_challenge(response):
@ -47,7 +48,14 @@ def solve_challenge(session, response, kwargs):
location = cf_response.headers["Location"] location = cf_response.headers["Location"]
if location[0] == "/": if location[0] == "/":
location = root + location location = root + location
return location
for cookie in cf_response.cookies:
if cookie.name == "cf_clearance":
return location, cookie.domain, {
cookie.name: cookie.value,
"__cfduid" : response.cookies.get("__cfduid", ""),
}
return location, "", {}
def solve_js_challenge(page, netloc): def solve_js_challenge(page, netloc):
@ -126,3 +134,8 @@ VALUES = {
"!+": 1, "!+": 1,
"+!!": 1, "+!!": 1,
} }
@memcache(keyarg=0)
def cookies(category):
return None

View File

@ -90,7 +90,9 @@ class Extractor():
return response return response
if cloudflare.is_challenge(response): if cloudflare.is_challenge(response):
self.log.info("Solving Cloudflare challenge") self.log.info("Solving Cloudflare challenge")
url = cloudflare.solve_challenge(session, response, kwargs) url, domain, cookies = cloudflare.solve_challenge(
session, response, kwargs)
cloudflare.cookies.update(self.category, (domain, cookies))
continue continue
msg = "{}: {} for url: {}".format(code, response.reason, url) msg = "{}: {} for url: {}".format(code, response.reason, url)
@ -159,6 +161,11 @@ class Extractor():
else: else:
self.session.cookies.update(cookiejar) self.session.cookies.update(cookiejar)
cookies = cloudflare.cookies(self.category)
if cookies:
domain, cookies = cookies
self._update_cookies_dict(cookies, domain)
def _update_cookies(self, cookies, *, domain=""): def _update_cookies(self, cookies, *, domain=""):
"""Update the session's cookiejar with 'cookies'""" """Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict): if isinstance(cookies, dict):