diff --git a/src/url.lua b/src/url.lua index 110ea94..0a3a80a 100644 --- a/src/url.lua +++ b/src/url.lua @@ -76,6 +76,34 @@ function _M.unescape(s) end)) end +----------------------------------------------------------------------------- +-- Removes '..' and '.' components appropriately from a path. +-- Input +-- path +-- Returns +-- dot-normalized path +local function remove_dot_components(path) + local marker = string.char(1) + repeat + local was = path + path = path:gsub('//', '/'..marker..'/', 1) + until path == was + repeat + local was = path + path = path:gsub('/%./', '/', 1) + until path == was + repeat + local was = path + path = path:gsub('[^/]+/%.%./([^/]+)', '%1', 1) + until path == was + path = path:gsub('[^/]+/%.%./*$', '') + path = path:gsub('/%.%.$', '/') + path = path:gsub('/%.$', '/') + path = path:gsub('^/%.%./', '/') + path = path:gsub(marker, '') + return path +end + ----------------------------------------------------------------------------- -- Builds a path from a base path and a relative path -- Input @@ -85,23 +113,12 @@ end -- corresponding absolute path ----------------------------------------------------------------------------- local function absolute_path(base_path, relative_path) - if string.sub(relative_path, 1, 1) == "/" then return relative_path end - local path = string.gsub(base_path, "[^/]*$", "") - path = path .. relative_path - path = string.gsub(path, "([^/]*%./)", function (s) - if s ~= "./" then return s else return "" end - end) - path = string.gsub(path, "/%.$", "/") - local reduced - while reduced ~= path do - reduced = path - path = string.gsub(reduced, "([^/]*/%.%./)", function (s) - if s ~= "../../" then return "" else return s end - end) - end - path = string.gsub(reduced, "([^/]*/%.%.)$", function (s) - if s ~= "../.." then return "" else return s end - end) + if string.sub(relative_path, 1, 1) == "/" then + return remove_dot_components(relative_path) end + base_path = base_path:gsub("[^/]*$", "") + if not base_path:find'/$' then base_path = base_path .. '/' end + local path = base_path .. relative_path + path = remove_dot_components(path) return path end @@ -227,10 +244,14 @@ function _M.absolute(base_url, relative_url) else base_parsed = _M.parse(base_url) end + local result local relative_parsed = _M.parse(relative_url) - if not base_parsed then return relative_url - elseif not relative_parsed then return base_url - elseif relative_parsed.scheme then return relative_url + if not base_parsed then + result = relative_url + elseif not relative_parsed then + result = base_url + elseif relative_parsed.scheme then + result = relative_url else relative_parsed.scheme = base_parsed.scheme if not relative_parsed.authority then @@ -248,8 +269,9 @@ function _M.absolute(base_url, relative_url) relative_parsed.path) end end - return _M.build(relative_parsed) + result = _M.build(relative_parsed) end + return remove_dot_components(result) end ----------------------------------------------------------------------------- diff --git a/test/urltest.lua b/test/urltest.lua index 1090a7e..ae8ba75 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -61,7 +61,7 @@ end local check_absolute_url = function(base, relative, absolute) local res = socket.url.absolute(base, relative) if res ~= absolute then - io.write("absolute: In test for '", relative, "' expected '", + io.write("absolute: In test for base='", base, "', rel='", relative, "' expected '", absolute, "' but got '", res, "'\n") os.exit() end @@ -627,25 +627,37 @@ check_absolute_url("http://a/b/c/d;p?q#f", "/g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "//g", "http://g") check_absolute_url("http://a/b/c/d;p?q#f", "?y", "http://a/b/c/d;p?y") check_absolute_url("http://a/b/c/d;p?q#f", "g?y", "http://a/b/c/g?y") -check_absolute_url("http://a/b/c/d;p?q#f", "g?y/./x", "http://a/b/c/g?y/./x") +check_absolute_url("http://a/b/c/d;p?q#f", "g?y/./x", "http://a/b/c/g?y/x") check_absolute_url("http://a/b/c/d;p?q#f", "#s", "http://a/b/c/d;p?q#s") check_absolute_url("http://a/b/c/d;p?q#f", "g#s", "http://a/b/c/g#s") -check_absolute_url("http://a/b/c/d;p?q#f", "g#s/./x", "http://a/b/c/g#s/./x") +check_absolute_url("http://a/b/c/d;p?q#f", "g#s/./x", "http://a/b/c/g#s/x") check_absolute_url("http://a/b/c/d;p?q#f", "g?y#s", "http://a/b/c/g?y#s") check_absolute_url("http://a/b/c/d;p?q#f", ";x", "http://a/b/c/d;x") check_absolute_url("http://a/b/c/d;p?q#f", "g;x", "http://a/b/c/g;x") check_absolute_url("http://a/b/c/d;p?q#f", "g;x?y#s", "http://a/b/c/g;x?y#s") check_absolute_url("http://a/b/c/d;p?q#f", ".", "http://a/b/c/") check_absolute_url("http://a/b/c/d;p?q#f", "./", "http://a/b/c/") +check_absolute_url("http://a/b/c/d;p?q#f", "./g", "http://a/b/c/g") +check_absolute_url("http://a/b/c/d;p?q#f", "./g/", "http://a/b/c/g/") +check_absolute_url("http://a/b/c/d;p?q#f", "././g", "http://a/b/c/g") +check_absolute_url("http://a/b/c/d;p?q#f", "././g/", "http://a/b/c/g/") +check_absolute_url("http://a/b/c/d;p?q#f", "g/.", "http://a/b/c/g/") +check_absolute_url("http://a/b/c/d;p?q#f", "g/./", "http://a/b/c/g/") +check_absolute_url("http://a/b/c/d;p?q#f", "g/./.", "http://a/b/c/g/") +check_absolute_url("http://a/b/c/d;p?q#f", "g/././", "http://a/b/c/g/") +check_absolute_url("http://a/b/c/d;p?q#f", "./.", "http://a/b/c/") +check_absolute_url("http://a/b/c/d;p?q#f", "././.", "http://a/b/c/") +check_absolute_url("http://a/b/c/d;p?q#f", "././g/./.", "http://a/b/c/g/") check_absolute_url("http://a/b/c/d;p?q#f", "..", "http://a/b/") check_absolute_url("http://a/b/c/d;p?q#f", "../", "http://a/b/") check_absolute_url("http://a/b/c/d;p?q#f", "../g", "http://a/b/g") check_absolute_url("http://a/b/c/d;p?q#f", "../..", "http://a/") check_absolute_url("http://a/b/c/d;p?q#f", "../../", "http://a/") check_absolute_url("http://a/b/c/d;p?q#f", "../../g", "http://a/g") +check_absolute_url("http://a/b/c/d;p?q#f", "../../../g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "", "http://a/b/c/d;p?q#f") -check_absolute_url("http://a/b/c/d;p?q#f", "/./g", "http://a/./g") -check_absolute_url("http://a/b/c/d;p?q#f", "/../g", "http://a/../g") +check_absolute_url("http://a/b/c/d;p?q#f", "/./g", "http://a/g") +check_absolute_url("http://a/b/c/d;p?q#f", "/../g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "g.", "http://a/b/c/g.") check_absolute_url("http://a/b/c/d;p?q#f", ".g", "http://a/b/c/.g") check_absolute_url("http://a/b/c/d;p?q#f", "g..", "http://a/b/c/g..") @@ -655,6 +667,17 @@ check_absolute_url("http://a/b/c/d;p?q#f", "./g/.", "http://a/b/c/g/") check_absolute_url("http://a/b/c/d;p?q#f", "g/./h", "http://a/b/c/g/h") check_absolute_url("http://a/b/c/d;p?q#f", "g/../h", "http://a/b/c/h") +check_absolute_url("http://a/b/c/d:p?q#f/", "../g/", "http://a/b/g/") +check_absolute_url("http://a/b/c/d:p?q#f/", "../g", "http://a/b/g") +check_absolute_url("http://a/b/c/d:p?q#f/", "../.g/", "http://a/b/.g/") +check_absolute_url("http://a/b/c/d:p?q#f/", "../.g", "http://a/b/.g") +check_absolute_url("http://a/b/c/d:p?q#f/", "../.g.h/", "http://a/b/.g.h/") +check_absolute_url("http://a/b/c/d:p?q#f/", "../.g.h", "http://a/b/.g.h") + +check_absolute_url("http://a/b/c/d:p?q#f/", "g.h/", "http://a/b/c/g.h/") +check_absolute_url("http://a/b/c/d:p?q#f/", "../g.h/", "http://a/b/g.h/") +check_absolute_url("http://a/", "../g.h/", "http://a/g.h/") + -- extra tests check_absolute_url("//a/b/c/d;p?q#f", "d/e/f", "//a/b/c/d/e/f") check_absolute_url("/a/b/c/d;p?q#f", "d/e/f", "/a/b/c/d/e/f") @@ -662,6 +685,17 @@ check_absolute_url("a/b/c/d", "d/e/f", "a/b/c/d/e/f") check_absolute_url("a/b/c/d/../", "d/e/f", "a/b/c/d/e/f") check_absolute_url("http://velox.telemar.com.br", "/dashboard/index.html", "http://velox.telemar.com.br/dashboard/index.html") +check_absolute_url("http://example.com/", "../.badhost.com/", "http://example.com/.badhost.com/") +check_absolute_url("http://example.com/", "...badhost.com/", "http://example.com/...badhost.com/") +check_absolute_url("http://example.com/a/b/c/d/", "../q", "http://example.com/a/b/c/q") +check_absolute_url("http://example.com/a/b/c/d/", "../../q", "http://example.com/a/b/q") +check_absolute_url("http://example.com/a/b/c/d/", "../../../q", "http://example.com/a/q") +check_absolute_url("http://example.com", ".badhost.com", "http://example.com/.badhost.com") +check_absolute_url("http://example.com/a/b/c/d/", "..//../../../q", "http://example.com/a/q") +check_absolute_url("http://example.com/a/b/c/d/", "..//a/../../../../q", "http://example.com/a/q") +check_absolute_url("http://example.com/a/b/c/d/", "..//a/..//../../../q", "http://example.com/a/b/q") +check_absolute_url("http://example.com/a/b/c/d/", "..//a/..///../../../../q", "http://example.com/a/b/q") +check_absolute_url("http://example.com/a/b/c/d/", "../x/a/../y/z/../../../../q", "http://example.com/a/b/q") print("testing path parsing and composition") check_parse_path("/eu/tu/ele", { "eu", "tu", "ele"; is_absolute = 1 })