181 lines
5.3 KiB
Lua

-- lzr_csv: RFC-4180-compliant Lua CSV Parser
-- NOTE: The "rules" mentioned in this file refer
-- to the rules mentioned in RFC 4180 under
-- section 2.
-- Rule 4: The value separator is comma
local SEP = ","
lzr_csv = {}
function lzr_csv.parse_csv(text)
if type(text) ~= "string" then
return nil, "Not a string"
end
-- List of all records so far
-- (a record is a list of values)
local records = {}
-- The record we a currently working on and adding values to
local currentRecord = {}
-- Keep track of position in string
local pos = 1
-- Final position in string
local lastPos = string.len(text)
-- The current value we are parsing right now.
-- When finished, it must be added to currentRecord.
local currentValue = ""
-- Status variables to change parsing state:
-- If true, expects a LF character in the character check
-- (required for CRLF check)
local awaitingLF = false
-- If true, we are currently parsing a quoted value,
-- i.e. a value enclosed in quotes. As long this is true,
-- the parsing rules are a bit different.
local inQuotedValue = false
-- If true, the previous character was a quote.
-- Only used while inQuotedValue is true
local prevWasQuote = false
-- Begin of the main parsing loop
while true do
-- We reached the end of the string
if (pos > lastPos) then
if awaitingLF then
return nil, "Line Feed character expected but reached end of string (pos="..pos..")"
end
-- According to rule 2, a CRLF may or may not be at the end
-- of the last record.
if #currentRecord > 0 or currentValue ~= "" then
-- There was no CRLF. So flush the final value
-- to the list of records and call it a day.
table.insert(currentRecord, currentValue)
table.insert(records, currentRecord)
if #records >= 2 then
if #records[#records] ~= #records[#records-1] then
return nil, "Number of entries per record is not equal! (line "..(#records-1)..")"
end
end
end
-- (If there *was* a CRLF,
-- then currentRecord must be empty in which case
-- no additional record should be added.)
break
end
-- Get next character
local c = string.sub(text, pos, pos)
-- Rule 5: Values may be enclosed by quotes
if inQuotedValue then
-- This marks the "inside" of a quoted value
if c == '"' then
if prevWasQuote then
currentValue = currentValue .. c
prevWasQuote = false
else
prevWasQuote = true
end
else
if prevWasQuote then
-- This marks the *end* of a quoted value,
-- special parsing will be deactivated.
inQuotedValue = false
prevWasQuote = false
pos = pos - 1
else
currentValue = currentValue .. c
end
end
-- Rule 5: Values may be enclosed by quotes
elseif (c == '"') then
-- This marks the *beginning* of a quoted value,
-- activate special parsing mode in next iteration.
currentValue = ""
prevWasQuote = false
inQuotedValue = true
-- Rule 1: Records must be separated by CRLF
elseif (c == "\013") then -- CR (Carriage Return)
if awaitingLF then
return nil, "Carriage Return found but Line Feed expected (pos="..pos..")"
end
awaitingLF = true
-- Rule 1: Records must be separated by CRLF
elseif c == '\010' then -- LF (Line Feed)
if not awaitingLF then
return nil, "Line Feed found but there was no preceding Carriage Return (pos="..pos..")"
end
table.insert(currentRecord, currentValue)
local recordCopy = {}
for l=1, #currentRecord do
table.insert(recordCopy, currentRecord[l])
end
table.insert(records, recordCopy)
if #records >= 2 then
local recLen = #records
if #records[recLen] ~= #records[recLen-1] then
return nil, "Number of entries per record is not equal! (line "..(recLen-1)..")"
end
end
currentRecord = {}
currentValue = ""
awaitingLF = false
-- Rule 4: Values are separated by comma
elseif c == SEP then
table.insert(currentRecord, currentValue)
currentValue = ""
-- Parse a single normal character
else
if awaitingLF then
return nil, "Line Feed character expected but other character found (pos="..pos..")"
end
currentValue = currentValue .. c
end
pos = pos + 1
end
return records
end
-- Escapes the quote characters in the given string
local escape_quotes = function(str)
return string.gsub(str, '"', '""')
end
function lzr_csv.write_csv(rows)
local output_rows = {}
for r=1, #rows do
local values = table.copy(rows[r])
for v=1, #values do
-- Check if field contains a 'special character' (for rule 5)
local contains_special_char = string.find(values[v], '[",\013\010]') ~= nil
-- Rule 7: Quote characters within a value must be escaped
if string.find(values[v], '"') then
values[v] = escape_quotes(values[v])
end
-- Rule 5: Field containining linebreak, quotation mark or comma
-- need to be enclosed by quotation marks
if contains_special_char then
-- Note: We *can* enclose every value with quotation marks,
-- but only insert them when needed to keep it more
-- readable.
values[v] = '"' .. values[v] .. '"'
end
end
-- Rule 4: Separate values by comma
local output_row = table.concat(values, SEP)
table.insert(output_rows, output_row)
end
-- Rule 1: Separate records by CRLF
local output_all = table.concat(output_rows, "\013\010")
return output_all
end
dofile(minetest.get_modpath("lzr_csv").."/test.lua")