This commit is contained in:
zhaozg 2014-09-03 17:18:29 +08:00
commit 39f219e4b0
9 changed files with 404 additions and 82 deletions

View File

@ -0,0 +1,266 @@
-----------------------------------------------------------------------------
-- A Browser Class for easy Web Automation with Lua-cURL
-- Author: Kai Uwe Jesussek
-- RCS ID: $Id: browser.lua,v 0.1 2011/03/11 23:55:20 kai Exp $
-----------------------------------------------------------------------------
local cURL = require("lcurl.cURL")
local string = require("string")
local table = require("table")
local base = _G
USERAGENT = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" --windows xp internet explorer 6.0
--this function joins 2 urls (absolute or relative)
function url_join(_base, _url)
assert(type(_url) == "string")
if _base == nil or _base == "" then
return _url
end
assert(type(_base) == "string")
local base = url_split(_base)
local url = url_split(_url)
local protocol = base.protocol
local host = base.host
local path = ""
local port = ""
if url.protocol ~= nil then
protocol = url.protocol
if url.path ~= nil then
path = url.path
end
if url.port ~= nil and url.port ~= "" then
port = url.port
end
if url.host ~= nil then
host = url.host
end
else
if _url:sub(1,2) == "//" then
--set host and path
host, port, path = _url:match("^//([^;/%?]+)(:?%d*)(/?.*)")
if path == nil then
path = ""
end
elseif _url:sub(1,1) == "/" then
port = base.port
--replace path
path = _url
else
--combine paths :(
path = base.path:match("^(.*)/[^/]*")
port = base.port
if path ~= nil then
path = path .. "/" .. _url
else
path = _url
end
end
end
local ret = protocol .. "://" .. host .. port .. path
return ret
end
--this function splits an url into its parts
function url_split(_url)
--print(_url)
local ret = {}
--test ipv6
ret.protocol, ret.host, ret.port, ret.path = _url:match("^(https?)://(%[[0-9a-fA-F:]+%])(:?%d*)(.*)$")
if ret.host == nil then
--fall back to ipv4
ret.protocol, ret.host, ret.port, ret.path = _url:match("^(https?)://([^:/]+)(:?%d*)(.*)$")
end
return ret
end
-----------------------------------------------------------------------------
-- Encodes a string into its escaped hexadecimal representation
-- Input
-- s: binary string to be encoded
-- Returns
-- escaped representation of string binary
-- taken from Lua Socket and added underscore to ignore (MIT-License)
-----------------------------------------------------------------------------
function escape(s)
return string.gsub(s, "([^A-Za-z0-9_])", function(c)
return string.format("%%%02x", string.byte(c))
end)
end
-----------------------------------------------------------------------------
-- Encodes a string into its escaped hexadecimal representation
-- Input
-- s: binary string to be encoded
-- Returns
-- escaped representation of string binary
-- taken from Lua Socket
-----------------------------------------------------------------------------
function unescape(s)
return string.gsub(s, "%%(%x%x)", function(hex)
return string.char(base.tonumber(hex, 16))
end)
end
-- from encodes a key, value dictionary table
function tblencode (_arguments)
local ret = ""
if _arguments == nil or next(_arguments) == nil then -- no _arguments or empty _arguments?
return ret
end
--iterate over each key -> value pairs and urlencode them
for key, vals in pairs(_arguments) do
if type(vals) ~= "table" then
vals = {vals}
end
for i,val in ipairs(vals) do
ret = ret .. "&"..key.. "=" ..escape(val)
end
end
--cut off leadin '&'
return string.sub(ret,2)
end
--function helper for result
--taken from luasocket page (MIT-License)
local function build_w_cb(t)
return function(s)
table.insert(t, s)
return #s,nil
end
end
--function helper for headers
--taken from luasocket page (MIT-License)
local function h_build_w_cb(t)
return function(s)
--stores the received data in the table t
--prepare header data
name, value = s:match("(.-): (.+)")
if name and value then
t.headers[name] = value:gsub("[\n\r]", "")
else
code, codemessage = string.match(s, "^HTTP/.* (%d+) (.+)$")
if code and codemessage then
t.code = tonumber(code)
t.codemessage = codemessage:gsub("[\n\r]", "")
end
end
return #s,nil
end
end
--the browser easy to use interface
browser = {}
function browser:new(_share)
if _share == nil then
_share = cURL.share_init()
_share:setopt_share("COOKIE")
_share:setopt_share("DNS")
end
local object = { url = nil, share = _share}
setmetatable(object, {__index = browser})
return object
end
--this function sets the proxy variables for the prepare function
function browser:setProxy(_proxy, _proxytype)
self.proxy = _proxy
self.proxytype = _proxytype
print("setting proxy", self.proxy, self.proxytype)
end
--this function prepares a request
function browser:prepare(post_data, urlencoded)
local req = cURL.easy_init()
req:setopt_share(self.share)
req:setopt_url(self.url)
req:setopt_useragent(USERAGENT)
if self.proxy ~= nil and self.proxytype ~= nil then
req:setopt_proxy(self.proxy)
req:setopt_proxytype(self.proxytype)
end
if self.caInfoPath ~= nil then
req:setopt_cainfo(self.caInfoPath)
end
if post_data ~= nil then
if urlencoded and type(post_data) == "table" then
post_data = tblencode(post_data)
end
if type(post_data) == "string" then
req:setopt_post(1)
req:setopt_postfields(post_data)
req:setopt_postfieldsize(#post_data)
else
req:post(post_data)
end
end
return req
end
--this function sets the url
function browser:setUrl(url)
--appends a leading / to url if needed
if self.url and self.url:match("^(https?://[^/]+)$") then
self.url = self.url .. "/"
end
self.url = url_join(self.url or "", url)
end
--opens a webpage :) only the first parameter is required
function browser:open(url, post_data, redirect, urlencoded)
local redirect = redirect or true
local urlencoded = urlencoded == nil
local ret = {}
response_body = {}
ret.headers = {}
self:setUrl(url)
local req = self:prepare(post_data, urlencoded)
req:perform({headerfunction=h_build_w_cb(ret), writefunction=build_w_cb(response_body)})
self:setUrl(url)
ret.body = table.concat(response_body)
if redirect and ret.headers and (ret.headers.Location or ret.headers.location) and (ret.code == 301 or ret.code == 302) then
return self:open(url_join(self.url, ret.headers.Location or ret.headers.location), nil, extra_headers, redirect)
end
return ret
end
--opens a webpage :) only the first and second parameters are required
function browser:save(url, filename, post_data)
local ret = {}
ret.headers = {}
self:setUrl(url)
local req = self:prepare(post_data, false)
file = io.open(filename)
req:perform({headerfunction=h_build_w_cb(ret), writefunction=function(str) file:write(str) end })
file:close()
end
function browser:setCaInfo(path)
self.caInfoPath = path
end
--[[ usage examples:
-- b = browser:new()
-- resp = b:open("http://www.html-kit.com/tools/cookietester/")
-- print(resp.body)
-- table.foreach(resp.headers, print)
--]]

View File

@ -279,7 +279,7 @@ static int lcurl_easy_set_POSTFIELDS(lua_State *L){
#undef LCURL_LST_OPT
#undef LCURL_LNG_OPT
static int lcurl_hpost_read_callback(char *buffer, size_t size, size_t nitems, void *arg);
static size_t lcurl_hpost_read_callback(char *buffer, size_t size, size_t nitems, void *arg);
static int lcurl_easy_set_HTTPPOST(lua_State *L){
lcurl_easy_t *p = lcurl_geteasy(L);
@ -393,10 +393,19 @@ static int lcurl_easy_unset_HTTPPOST(lua_State *L){
return lcurl_fail_ex(L, p->err_mode, LCURL_ERROR_EASY, code);
}
lcurl_storage_remove_i(L, p->storage, CURLOPT_HTTPPOST);
//! @fixme unset readdata/readfunction for
// curl_easy_setopt(p->curl, CURLOPT_READFUNCTION, 0);
lcurl_storage_get_i(L, p->storage, CURLOPT_HTTPPOST);
if(!lua_isnil(L, -1)){
lcurl_hpost_t *form = lcurl_gethpost_at(L, -1);
if(form->stream){
/* with stream we do not set CURLOPT_READDATA but
we also unset it to be sure that there no way to
call default curl reader with our READDATA
*/
curl_easy_setopt(p->curl, CURLOPT_READFUNCTION, 0);
curl_easy_setopt(p->curl, CURLOPT_READDATA, 0);
}
lcurl_storage_remove_i(L, p->storage, CURLOPT_HTTPPOST);
}
lua_settop(L, 1);
return 1;
@ -592,7 +601,7 @@ static int lcurl_easy_set_callback(lua_State *L,
return 1;
}
static int lcurl_write_callback_(lua_State*L,
static size_t lcurl_write_callback_(lua_State*L,
lcurl_easy_t *p, lcurl_callback_t *c,
char *ptr, size_t size, size_t nmemb
){
@ -625,7 +634,7 @@ static int lcurl_write_callback_(lua_State*L,
//{ Writer
static int lcurl_write_callback(char *ptr, size_t size, size_t nmemb, void *arg){
static size_t lcurl_write_callback(char *ptr, size_t size, size_t nmemb, void *arg){
lcurl_easy_t *p = arg;
return lcurl_write_callback_(p->L, p, &p->wr, ptr, size, nmemb);
}
@ -642,7 +651,7 @@ static int lcurl_easy_set_WRITEFUNCTION(lua_State *L){
//{ Reader
static int lcurl_read_callback(lua_State *L,
static size_t lcurl_read_callback(lua_State *L,
lcurl_callback_t *rd, lcurl_read_buffer_t *rbuffer,
char *buffer, size_t size, size_t nitems
){
@ -699,12 +708,12 @@ static int lcurl_read_callback(lua_State *L,
return data_size;
}
static int lcurl_easy_read_callback(char *buffer, size_t size, size_t nitems, void *arg){
static size_t lcurl_easy_read_callback(char *buffer, size_t size, size_t nitems, void *arg){
lcurl_easy_t *p = arg;
return lcurl_read_callback(p->L, &p->rd, &p->rbuffer, buffer, size, nitems);
}
static int lcurl_hpost_read_callback(char *buffer, size_t size, size_t nitems, void *arg){
static size_t lcurl_hpost_read_callback(char *buffer, size_t size, size_t nitems, void *arg){
lcurl_hpost_stream_t *p = arg;
return lcurl_read_callback(p->L, &p->rd, &p->rbuffer, buffer, size, nitems);
}
@ -721,7 +730,7 @@ static int lcurl_easy_set_READFUNCTION(lua_State *L){
//{ Header
static int lcurl_header_callback(char *ptr, size_t size, size_t nmemb, void *arg){
static size_t lcurl_header_callback(char *ptr, size_t size, size_t nmemb, void *arg){
lcurl_easy_t *p = arg;
return lcurl_write_callback_(p->L, p, &p->hd, ptr, size, nmemb);
}
@ -791,9 +800,8 @@ static int lcurl_easy_set_PROGRESSFUNCTION(lua_State *L){
#if LCURL_CURL_VER_GE(7,32,0)
if(p->pr.cb_ref != LUA_NOREF){
CURLcode code;
code = curl_easy_setopt(p->curl, CURLOPT_XFERINFOFUNCTION, lcurl_xferinfo_callback);
code = curl_easy_setopt(p->curl, CURLOPT_XFERINFODATA, p);
curl_easy_setopt(p->curl, CURLOPT_XFERINFOFUNCTION, lcurl_xferinfo_callback);
curl_easy_setopt(p->curl, CURLOPT_XFERINFODATA, p);
}
#endif
@ -835,7 +843,7 @@ static int lcurl_easy_setopt(lua_State *L){
return lcurl_fail_ex(L, p->err_mode, LCURL_ERROR_EASY, LCURL_E_UNKNOWN_OPTION);
}
static int lcurl_easy_unsetsetopt(lua_State *L){
static int lcurl_easy_unsetopt(lua_State *L){
lcurl_easy_t *p = lcurl_geteasy(L);
long opt;
@ -905,6 +913,7 @@ static const struct luaL_Reg lcurl_easy_methods[] = {
{ "reset", lcurl_easy_reset },
{ "setopt", lcurl_easy_setopt },
{ "getinfo", lcurl_easy_getinfo },
{ "unsetopt", lcurl_easy_unsetopt },
{ "escape", lcurl_easy_escape },
{ "unescape", lcurl_easy_unescape },
{ "perform", lcurl_easy_perform },

View File

@ -6,7 +6,60 @@
#define LCURL_HTTPPOST_NAME LCURL_PREFIX" HTTPPost"
static const char *LCURL_HTTPPOST = LCURL_HTTPPOST_NAME;
//{
//{ stream
static lcurl_hpost_stream_t *lcurl_hpost_stream_add(lua_State *L, lcurl_hpost_t *p){
lcurl_hpost_stream_t *ptr = p->stream;
lcurl_hpost_stream_t *stream = malloc(sizeof(lcurl_hpost_stream_t));
if(!stream) return NULL;
stream->L = L;
stream->rbuffer.ref = LUA_NOREF;
stream->rd.cb_ref = stream->rd.ud_ref = LUA_NOREF;
stream->next = NULL;
if(!p->stream) p->stream = stream;
else{
while(ptr->next) ptr = ptr->next;
ptr->next = stream;
}
return stream;
}
static void lcurl_hpost_stream_free(lua_State *L, lcurl_hpost_stream_t *ptr){
if(ptr){
luaL_unref(L, LCURL_LUA_REGISTRY, ptr->rbuffer.ref);
luaL_unref(L, LCURL_LUA_REGISTRY, ptr->rd.cb_ref);
luaL_unref(L, LCURL_LUA_REGISTRY, ptr->rd.ud_ref);
free(ptr);
}
}
static void lcurl_hpost_stream_free_last(lua_State *L, lcurl_hpost_t *p){
lcurl_hpost_stream_t *ptr = p->stream;
if(!ptr) return;
if(!ptr->next){
lcurl_hpost_stream_free(L, ptr);
p->stream = 0;
}
while(ptr->next->next) ptr = ptr->next;
lcurl_hpost_stream_free(L, ptr->next);
ptr->next = NULL;
}
static void lcurl_hpost_stream_free_all(lua_State *L, lcurl_hpost_t *p){
lcurl_hpost_stream_t *ptr = p->stream;
while(ptr){
lcurl_hpost_stream_t *next = ptr->next;
lcurl_hpost_stream_free(L, ptr);
ptr = next;
}
p->stream = 0;
}
//}
//{ HTTPPost
int lcurl_hpost_create(lua_State *L, int error_mode){
lcurl_hpost_t *p = lutil_newudatap(L, lcurl_hpost_t, LCURL_HTTPPOST);
@ -108,7 +161,7 @@ static int lcurl_hpost_add_file(lua_State *L){
const char *path = luaL_checkstring(L, 3);
const char *type = 0, *fname = 0;
struct curl_slist *list = NULL;
struct curl_forms forms[3];
struct curl_forms forms[4];
CURLFORMcode code;
int i = 0;
@ -132,8 +185,9 @@ static int lcurl_hpost_add_file(lua_State *L){
}
}
if(type){ forms[i].option = CURLFORM_CONTENTTYPE; forms[i++].value = type; }
if(list){ forms[i].option = CURLFORM_CONTENTHEADER; forms[i++].value = (char*)list; }
if(fname){ forms[i].option = CURLFORM_FILENAME; forms[i++].value = fname; }
if(type) { forms[i].option = CURLFORM_CONTENTTYPE; forms[i++].value = type; }
if(list) { forms[i].option = CURLFORM_CONTENTHEADER; forms[i++].value = (char*)list; }
forms[i].option = CURLFORM_END;
code = curl_formadd(&p->post, &p->last,
@ -154,55 +208,6 @@ static int lcurl_hpost_add_file(lua_State *L){
return 1;
}
static lcurl_hpost_stream_t *lcurl_hpost_stream_add(lua_State *L, lcurl_hpost_t *p){
lcurl_hpost_stream_t *ptr = p->stream;
lcurl_hpost_stream_t *stream = malloc(sizeof(lcurl_hpost_stream_t));
if(!stream) return NULL;
stream->L = L;
stream->rbuffer.ref = LUA_NOREF;
stream->rd.cb_ref = stream->rd.ud_ref = LUA_NOREF;
stream->next = NULL;
if(!p->stream) p->stream = stream;
else{
while(ptr->next) ptr = ptr->next;
ptr->next = stream;
}
return stream;
}
static void lcurl_hpost_stream_free(lua_State *L, lcurl_hpost_stream_t *ptr){
if(ptr){
luaL_unref(L, LCURL_LUA_REGISTRY, ptr->rbuffer.ref);
luaL_unref(L, LCURL_LUA_REGISTRY, ptr->rd.cb_ref);
luaL_unref(L, LCURL_LUA_REGISTRY, ptr->rd.ud_ref);
free(ptr);
}
}
static void lcurl_hpost_stream_free_last(lua_State *L, lcurl_hpost_t *p){
lcurl_hpost_stream_t *ptr = p->stream;
if(!ptr) return;
if(!ptr->next){
lcurl_hpost_stream_free(L, ptr);
p->stream = 0;
}
while(ptr->next->next) ptr = ptr->next;
lcurl_hpost_stream_free(L, ptr->next);
ptr->next = NULL;
}
static void lcurl_hpost_stream_free_all(lua_State *L, lcurl_hpost_t *p){
lcurl_hpost_stream_t *ptr = p->stream;
while(ptr){
lcurl_hpost_stream_t *next = ptr->next;
lcurl_hpost_stream_free(L, ptr);
ptr = next;
}
p->stream = 0;
}
static int lcurl_hpost_add_stream(lua_State *L){
// add_stream(name, [filename, [type,]] [headers,] size, reader [,context])
lcurl_hpost_t *p = lcurl_gethpost(L);
@ -472,12 +477,6 @@ static int lcurl_hpost_get(lua_State *L){
return 1;
}
static int lcurl_hpost_storage(lua_State *L){
lcurl_hpost_t *p = lcurl_gethpost(L);
lua_rawgeti(L, LCURL_LUA_REGISTRY, p->storage);
return 1;
}
static int lcurl_hpost_free(lua_State *L){
lcurl_hpost_t *p = lcurl_gethpost(L);
if(p->post){
@ -504,7 +503,6 @@ static const struct luaL_Reg lcurl_hpost_methods[] = {
{"add_files", lcurl_hpost_add_files },
{"storage", lcurl_hpost_storage },
{"get", lcurl_hpost_get },
{"free", lcurl_hpost_free },
{"__gc", lcurl_hpost_free },

View File

@ -15,8 +15,6 @@
#include "lcutils.h"
#include "lchttppost.h"
static const char *LCURL_ERROR_TAG = "LCURL_ERROR_TAG";
#define LCURL_MULTI_NAME LCURL_PREFIX" Multi"
static const char *LCURL_MULTI = LCURL_MULTI_NAME;

View File

@ -4,8 +4,6 @@
#include "lcutils.h"
#include "lchttppost.h"
static const char *LCURL_ERROR_TAG = "LCURL_ERROR_TAG";
#define LCURL_SHARE_NAME LCURL_PREFIX" Share"
static const char *LCURL_SHARE = LCURL_SHARE_NAME;

View File

@ -53,11 +53,21 @@ void lcurl_storage_remove_i(lua_State *L, int storage, int i){
lua_rawgeti(L, -1, LCURL_STORAGE_KV);
if(lua_istable(L, -1)){
lua_pushnil(L);
lua_rawseti(L, -3, i);
lua_rawseti(L, -2, i);
}
lua_pop(L, 2);
}
void lcurl_storage_get_i(lua_State *L, int storage, int i){
lua_rawgeti(L, LCURL_LUA_REGISTRY, storage);
lua_rawgeti(L, -1, LCURL_STORAGE_KV);
if(lua_istable(L, -1)){
lua_rawgeti(L, -1, i);
lua_remove(L, -2);
}
lua_remove(L, -2);
}
struct curl_slist* lcurl_storage_remove_slist(lua_State *L, int storage, int idx){
struct curl_slist* list;
assert(idx != LUA_NOREF);
@ -141,7 +151,7 @@ int lcurl_set_callback(lua_State *L, lcurl_callback_t *c, int i, const char *met
i = lua_absindex(L, i);
luaL_argcheck(L, !lua_isnoneornil(L, i), i, "no function present");
luaL_argcheck(L, (top < (i + 1)), i + 2, "no arguments expected");
luaL_argcheck(L, (top < (i + 2)), i + 2, "no arguments expected");
// if(top > (i + 1)) lua_settop(L, i + 1); // this for force ignore other arguments

View File

@ -33,6 +33,8 @@ void lcurl_storage_preserve_iv(lua_State *L, int storage, int i, int v);
void lcurl_storage_remove_i(lua_State *L, int storage, int i);
void lcurl_storage_get_i(lua_State *L, int storage, int i);
int lcurl_storage_free(lua_State *L, int storage);
struct curl_slist* lcurl_util_array_to_slist(lua_State *L, int t);

View File

@ -268,7 +268,7 @@ Share.__index = function(self, k)
end
function Share:new()
local h, err = curl.easy()
local h, err = curl.share()
if not h then return nil, err end
local o = setmetatable({

View File

@ -52,6 +52,14 @@ function test_write_to_file_abort()
end
function test_reset_write_callback()
f = assert(io.open(fname, "w+b"))
c = assert(curl.easy{url = url})
assert_equal(c, c:setopt_writefunction(f))
assert_equal(c, c:setopt_writefunction(f.write, f))
assert_equal(c, c:setopt_writefunction(print))
end
end
local _ENV = TEST_CASE'escape' do
@ -99,8 +107,41 @@ function test()
assert(not pfrom.value)
end
function test_unset()
local pfrom, e
do
local form = curl.form()
e = curl.easy{httppost = form}
pfrom = weak_ptr(form)
end
gc_collect()
assert(pfrom.value)
e:unsetopt_httppost()
gc_collect()
assert(not pfrom.value)
end
function test_reset()
local pfrom, e
do
local form = curl.form()
e = curl.easy{httppost = form}
pfrom = weak_ptr(form)
end
gc_collect()
assert(pfrom.value)
e:reset()
gc_collect()
assert(not pfrom.value)
end
end
if not HAS_RUNNER then lunit.run() end