181 lines
5.3 KiB
Lua
181 lines
5.3 KiB
Lua
-- lzr_csv: RFC-4180-compliant Lua CSV Parser
|
|
|
|
-- NOTE: The "rules" mentioned in this file refer
|
|
-- to the rules mentioned in RFC 4180 under
|
|
-- section 2.
|
|
|
|
|
|
-- Rule 4: The value separator is comma
|
|
local SEP = ","
|
|
|
|
lzr_csv = {}
|
|
|
|
function lzr_csv.parse_csv(text)
|
|
if type(text) ~= "string" then
|
|
return nil, "Not a string"
|
|
end
|
|
|
|
-- List of all records so far
|
|
-- (a record is a list of values)
|
|
local records = {}
|
|
-- The record we a currently working on and adding values to
|
|
local currentRecord = {}
|
|
-- Keep track of position in string
|
|
local pos = 1
|
|
-- Final position in string
|
|
local lastPos = string.len(text)
|
|
|
|
-- The current value we are parsing right now.
|
|
-- When finished, it must be added to currentRecord.
|
|
local currentValue = ""
|
|
|
|
-- Status variables to change parsing state:
|
|
|
|
-- If true, expects a LF character in the character check
|
|
-- (required for CRLF check)
|
|
local awaitingLF = false
|
|
-- If true, we are currently parsing a quoted value,
|
|
-- i.e. a value enclosed in quotes. As long this is true,
|
|
-- the parsing rules are a bit different.
|
|
local inQuotedValue = false
|
|
-- If true, the previous character was a quote.
|
|
-- Only used while inQuotedValue is true
|
|
local prevWasQuote = false
|
|
|
|
-- Begin of the main parsing loop
|
|
while true do
|
|
-- We reached the end of the string
|
|
if (pos > lastPos) then
|
|
if awaitingLF then
|
|
return nil, "Line Feed character expected but reached end of string (pos="..pos..")"
|
|
end
|
|
-- According to rule 2, a CRLF may or may not be at the end
|
|
-- of the last record.
|
|
if #currentRecord > 0 or currentValue ~= "" then
|
|
-- There was no CRLF. So flush the final value
|
|
-- to the list of records and call it a day.
|
|
table.insert(currentRecord, currentValue)
|
|
table.insert(records, currentRecord)
|
|
if #records >= 2 then
|
|
if #records[#records] ~= #records[#records-1] then
|
|
return nil, "Number of entries per record is not equal! (line "..(#records-1)..")"
|
|
end
|
|
end
|
|
end
|
|
-- (If there *was* a CRLF,
|
|
-- then currentRecord must be empty in which case
|
|
-- no additional record should be added.)
|
|
break
|
|
end
|
|
|
|
-- Get next character
|
|
local c = string.sub(text, pos, pos)
|
|
-- Rule 5: Values may be enclosed by quotes
|
|
if inQuotedValue then
|
|
-- This marks the "inside" of a quoted value
|
|
if c == '"' then
|
|
if prevWasQuote then
|
|
currentValue = currentValue .. c
|
|
prevWasQuote = false
|
|
else
|
|
prevWasQuote = true
|
|
end
|
|
else
|
|
if prevWasQuote then
|
|
-- This marks the *end* of a quoted value,
|
|
-- special parsing will be deactivated.
|
|
inQuotedValue = false
|
|
prevWasQuote = false
|
|
pos = pos - 1
|
|
else
|
|
currentValue = currentValue .. c
|
|
end
|
|
end
|
|
-- Rule 5: Values may be enclosed by quotes
|
|
elseif (c == '"') then
|
|
-- This marks the *beginning* of a quoted value,
|
|
-- activate special parsing mode in next iteration.
|
|
currentValue = ""
|
|
prevWasQuote = false
|
|
inQuotedValue = true
|
|
-- Rule 1: Records must be separated by CRLF
|
|
elseif (c == "\013") then -- CR (Carriage Return)
|
|
if awaitingLF then
|
|
return nil, "Carriage Return found but Line Feed expected (pos="..pos..")"
|
|
end
|
|
awaitingLF = true
|
|
-- Rule 1: Records must be separated by CRLF
|
|
elseif c == '\010' then -- LF (Line Feed)
|
|
if not awaitingLF then
|
|
return nil, "Line Feed found but there was no preceding Carriage Return (pos="..pos..")"
|
|
end
|
|
table.insert(currentRecord, currentValue)
|
|
local recordCopy = {}
|
|
for l=1, #currentRecord do
|
|
table.insert(recordCopy, currentRecord[l])
|
|
end
|
|
table.insert(records, recordCopy)
|
|
if #records >= 2 then
|
|
local recLen = #records
|
|
if #records[recLen] ~= #records[recLen-1] then
|
|
return nil, "Number of entries per record is not equal! (line "..(recLen-1)..")"
|
|
end
|
|
end
|
|
currentRecord = {}
|
|
currentValue = ""
|
|
awaitingLF = false
|
|
-- Rule 4: Values are separated by comma
|
|
elseif c == SEP then
|
|
table.insert(currentRecord, currentValue)
|
|
currentValue = ""
|
|
-- Parse a single normal character
|
|
else
|
|
if awaitingLF then
|
|
return nil, "Line Feed character expected but other character found (pos="..pos..")"
|
|
end
|
|
currentValue = currentValue .. c
|
|
end
|
|
pos = pos + 1
|
|
end
|
|
|
|
return records
|
|
end
|
|
|
|
-- Escapes the quote characters in the given string
|
|
local escape_quotes = function(str)
|
|
return string.gsub(str, '"', '""')
|
|
end
|
|
|
|
function lzr_csv.write_csv(rows)
|
|
local output_rows = {}
|
|
for r=1, #rows do
|
|
local values = table.copy(rows[r])
|
|
for v=1, #values do
|
|
-- Check if field contains a 'special character' (for rule 5)
|
|
local contains_special_char = string.find(values[v], '[",\013\010]') ~= nil
|
|
|
|
-- Rule 7: Quote characters within a value must be escaped
|
|
if string.find(values[v], '"') then
|
|
values[v] = escape_quotes(values[v])
|
|
end
|
|
-- Rule 5: Field containining linebreak, quotation mark or comma
|
|
-- need to be enclosed by quotation marks
|
|
if contains_special_char then
|
|
-- Note: We *can* enclose every value with quotation marks,
|
|
-- but only insert them when needed to keep it more
|
|
-- readable.
|
|
values[v] = '"' .. values[v] .. '"'
|
|
end
|
|
end
|
|
-- Rule 4: Separate values by comma
|
|
local output_row = table.concat(values, SEP)
|
|
table.insert(output_rows, output_row)
|
|
end
|
|
-- Rule 1: Separate records by CRLF
|
|
local output_all = table.concat(output_rows, "\013\010")
|
|
return output_all
|
|
end
|
|
|
|
|
|
dofile(minetest.get_modpath("lzr_csv").."/test.lua")
|