Headless challenge solving

master
Ryan 2020-05-26 15:01:33 -07:00
parent fd974bea8f
commit 972d5335fa
3 changed files with 149 additions and 88 deletions

163
index.js
View File

@ -5,8 +5,9 @@ const sandbox = require('./lib/sandbox');
const decodeEmails = require('./lib/email-decode.js');
const { getDefaultHeaders, caseless } = require('./lib/headers');
const brotli = require('./lib/brotli');
const crypto = require('crypto');
const { getChromeRuntimeMock } = require('./lib/chromeRuntime');
const { deprecate } = require('util');
const puppeteer = require('puppeteer');
const {
RequestError,
@ -42,7 +43,8 @@ function defaults (params) {
gzip: true,
agentOptions: {
// Removes a few problematic TLSv1.0 ciphers to avoid CAPTCHA
ciphers: crypto.constants.defaultCipherList + ':!ECDHE+SHA:!AES128-SHA'
sigalgs: 'ECDSA+SHA256'
// ciphers: crypto.constants.defaultCipherList + ':!ECDHE+SHA:!AES128-SHA'
}
};
@ -284,7 +286,7 @@ function validateResponse (options, response, body) {
return false;
}
function onChallenge (options, response, body) {
async function onChallenge (options, response, body) {
const callback = options.callback;
const uri = response.request.uri;
// The query string to send back to Cloudflare
@ -301,100 +303,85 @@ function onChallenge (options, response, body) {
return callback(error);
}
let timeout = parseInt(options.cloudflareTimeout);
let match;
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
match = body.match(/name="(.+?)" value="(.+?)"/);
if (match) {
const hiddenInputName = match[1];
payload[hiddenInputName] = match[2];
}
match = body.match(/name="jschl_vc" value="(\w+)"/);
if (!match) {
cause = 'challengeId (jschl_vc) extraction failed';
return callback(new ParserError(cause, options, response));
}
payload.jschl_vc = match[1];
match = body.match(/name="pass" value="(.+?)"/);
if (!match) {
cause = 'Attribute (pass) value extraction failed';
return callback(new ParserError(cause, options, response));
}
payload.pass = match[1];
match = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value\s*=.+?)\r?\n(?:[^{<>]*},\s*(\d{4,}))?/);
if (!match) {
cause = 'setTimeout callback extraction failed';
return callback(new ParserError(cause, options, response));
}
if (isNaN(timeout)) {
if (match[2] !== undefined) {
timeout = parseInt(match[2]);
if (timeout > options.cloudflareMaxTimeout) {
if (debugging) {
console.warn('Cloudflare\'s timeout is excessive: ' + (timeout / 1000) + 's');
await page.evaluateOnNewDocument(
args => {
if (args && args.fns) {
for (const fn of Object.keys(args.fns)) {
eval(`var ${fn} = ${args.fns[fn]}`) // eslint-disable-line
}
timeout = options.cloudflareMaxTimeout;
}
} else {
cause = 'Failed to parse challenge timeout';
return callback(new ParserError(cause, options, response));
window.chrome = getChromeRuntimeMock(window);
},
{
fns: {
getChromeRuntimeMock: `${getChromeRuntimeMock.toString()}`
}
}
}
);
// Append a.value so it's always returned from the vm
response.challenge = match[1] + '; a.value';
const ua = response.request.headers[Object.keys(response.request.headers).find(key => key.toLowerCase() === 'user-agent')];
await page.setUserAgent(ua || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36');
try {
const ctx = new sandbox.Context({ hostname: uri.hostname, body });
payload.jschl_answer = sandbox.eval(response.challenge, ctx);
} catch (error) {
error.message = 'Challenge evaluation failed: ' + error.message;
return callback(new ParserError(error, options, response));
}
await page.setRequestInterception(true);
page.on('request', async request => {
if (request.url() === uri.href) {
options.challengesToSolve -= 1;
if (isNaN(payload.jschl_answer)) {
cause = 'Challenge answer is not a number';
return callback(new ParserError(cause, options, response));
}
if (options.challengesToSolve === 0) {
cause = 'Cloudflare challenge loop';
error = new CloudflareError(cause, options, response);
error.errorType = 4;
// Prevent reusing the headers object to simplify unit testing.
options.headers = Object.assign({}, options.headers);
// Use the original uri as the referer and to construct the answer uri.
options.headers.Referer = uri.href;
// Check is form to be submitted via GET or POST
match = body.match(/id="challenge-form" action="(.+?)" method="(.+?)"/);
if (match && match[2] && match[2] === 'POST') {
options.uri = uri.protocol + '//' + uri.host + match[1];
// Pass the payload using body form
options.form = payload;
options.method = 'POST';
} else {
// Whatever is there, fallback to GET
options.uri = uri.protocol + '//' + uri.host + '/cdn-cgi/l/chk_jschl';
// Pass the payload using query string
options.qs = payload;
}
// Decrement the number of challenges to solve.
options.challengesToSolve -= 1;
// baseUrl can't be used in conjunction with an absolute uri
if (options.baseUrl !== undefined) {
options.baseUrl = undefined;
}
// Change required by Cloudflate in Jan-Feb 2020
options.uri = options.uri.replace(/&amp;/g, '&');
browser.close();
// Make request with answer after delay.
timeout -= Date.now() - response.responseStartTime;
setTimeout(performRequest, timeout, options, false);
return callback(error);
}
request.respond({
status: 503,
body
});
} else if (request.isNavigationRequest()) {
// Prevent reusing the headers object to simplify unit testing.
options.headers = Object.assign({}, options.headers);
// Use the original uri as the referer and to construct the answer uri.
options.headers.Referer = uri.href;
// Check is form to be submitted via GET or POST
options.uri = request.url();
const method = request.method();
if (method === 'POST') {
const parts = request.postData().split('&').map(p => p.split('='));
for (const part of parts) {
payload[part[0]] = decodeURIComponent(part[1]);
};
// Pass the payload using body form
options.form = payload;
options.method = 'POST';
} else {
// Whatever is there, fallback to GET
options.uri = uri.protocol + '//' + uri.host + '/cdn-cgi/l/chk_jschl';
// Pass the payload using query string
options.qs = { ...response.request.qs, ...payload };
}
if (options.baseUrl !== undefined) {
options.baseUrl = undefined;
}
performRequest(options, false);
browser.close();
} else {
request.continue();
}
});
await page.goto(uri.href);
}
// Parses the reCAPTCHA form and hands control over to the user

73
lib/chromeRuntime.js Normal file
View File

@ -0,0 +1,73 @@
const getChromeRuntimeMock = window => {
const installer = { install () {} };
return {
app: {
isInstalled: false,
InstallState: {
DISABLED: 'disabled',
INSTALLED: 'installed',
NOT_INSTALLED: 'not_installed'
},
RunningState: {
CANNOT_RUN: 'cannot_run',
READY_TO_RUN: 'ready_to_run',
RUNNING: 'running'
}
},
csi () {},
loadTimes () {},
webstore: {
onInstallStageChanged: {},
onDownloadProgress: {},
install (url, onSuccess, onFailure) {
installer.install(url, onSuccess, onFailure);
}
},
runtime: {
OnInstalledReason: {
CHROME_UPDATE: 'chrome_update',
INSTALL: 'install',
SHARED_MODULE_UPDATE: 'shared_module_update',
UPDATE: 'update'
},
OnRestartRequiredReason: {
APP_UPDATE: 'app_update',
OS_UPDATE: 'os_update',
PERIODIC: 'periodic'
},
PlatformArch: {
ARM: 'arm',
MIPS: 'mips',
MIPS64: 'mips64',
X86_32: 'x86-32',
X86_64: 'x86-64'
},
PlatformNaclArch: {
ARM: 'arm',
MIPS: 'mips',
MIPS64: 'mips64',
X86_32: 'x86-32',
X86_64: 'x86-64'
},
PlatformOs: {
ANDROID: 'android',
CROS: 'cros',
LINUX: 'linux',
MAC: 'mac',
OPENBSD: 'openbsd',
WIN: 'win'
},
RequestUpdateCheckStatus: {
NO_UPDATE: 'no_update',
THROTTLED: 'throttled',
UPDATE_AVAILABLE: 'update_available'
},
connect: function() {}.bind(function() {}), // eslint-disable-line
sendMessage: function() {}.bind(function() {}) // eslint-disable-line
}
};
};
module.exports = {
getChromeRuntimeMock
};

View File

@ -41,6 +41,7 @@
"license": "MIT",
"homepage": "https://github.com/codemanki/cloudscraper",
"dependencies": {
"puppeteer": "^3.1.0",
"request-promise": "^4.2.4"
},
"devDependencies": {