2024-09-03 20:44:56 +02:00

160 lines
4.6 KiB
Lua

-- lzr_csv: RFC-4180-compliant Lua CSV Parser
-- NOTE: The "rules" mentioned in this file refer
-- to the rules mentioned in RFC 4180 under
-- section 2.
-- Rule 4: The value separator is comma
local SEP = ","
lzr_csv = {}
function lzr_csv.parse_csv(text)
if type(text) ~= "string" then
return nil, "Not a string"
end
-- List of all records so far
-- (a record is a list of values)
local records = {}
-- The record we a currently working on and adding values to
local currentRecord = {}
-- Keep track of position in string
local pos = 1
-- Final position in string
local lastPos = string.len(text)
-- The current value we are parsing right now.
-- When finished, it must be added to currentRecord.
local currentValue = ""
-- Status variables to change parsing state:
-- If true, expects a LF character in the character check
-- (required for CRLF check)
local awaitingLF = false
-- If true, we are currently parsing a quoted value,
-- i.e. a value enclosed in quotes. As long this is true,
-- the parsing rules are a bit different.
local inQuotedValue = false
-- If true, the previous character was a quote.
-- Only used while inQuotedValue is true
local prevWasQuote = false
-- Begin of the main parsing loop
while true do
-- We reached the end of the string
if (pos > lastPos) then
if awaitingLF then
return nil, "Line Feed character expected but reached end of string (pos="..pos..")"
end
-- According to rule 2, a CRLF may or may not be at the end
-- of the last record.
if #currentRecord > 0 or currentValue ~= "" then
-- There was no CRLF. So flush the final value
-- to the list of records and call it a day.
table.insert(currentRecord, currentValue)
table.insert(records, currentRecord)
end
-- (If there *was* a CRLF,
-- then currentRecord must be empty in which case
-- no additional record should be added.)
break
end
-- Get next character
local c = string.sub(text, pos, pos)
-- Rule 5: Values may be enclosed by quotes
if inQuotedValue then
-- This marks the "inside" of a quoted value
if c == '"' then
if prevWasQuote then
currentValue = currentValue .. c
prevWasQuote = false
else
prevWasQuote = true
end
else
if prevWasQuote then
-- This marks the *end* of a quoted value,
-- special parsing will be deactivated.
inQuotedValue = false
prevWasQuote = false
pos = pos - 1
else
currentValue = currentValue .. c
end
end
-- Rule 5: Values may be enclosed by quotes
elseif (c == '"') then
-- This marks the *beginning* of a quoted value,
-- activate special parsing mode in next iteration.
currentValue = ""
prevWasQuote = false
inQuotedValue = true
-- Rule 1: Records must be separated by CRLF
elseif (c == "\013") then -- CR (Carriage Return)
if awaitingLF then
return nil, "Carriage Return found but Line Feed expected (pos="..pos..")"
end
awaitingLF = true
-- Rule 1: Records must be separated by CRLF
elseif c == '\010' then -- LF (Line Feed)
if not awaitingLF then
return nil, "Line Feed found but there was no preceding Carriage Return (pos="..pos..")"
end
table.insert(currentRecord, currentValue)
local recordCopy = {}
for l=1, #currentRecord do
table.insert(recordCopy, currentRecord[l])
end
table.insert(records, recordCopy)
currentRecord = {}
currentValue = ""
awaitingLF = false
-- Rule 4: Values are separated by comma
elseif c == SEP then
table.insert(currentRecord, currentValue)
currentValue = ""
-- Parse a single normal character
else
if awaitingLF then
return nil, "Line Feed character expected but other character found (pos="..pos..")"
end
currentValue = currentValue .. c
end
pos = pos + 1
end
return records
end
-- Escapes the quote characters in the given string
local escape_quotes = function(str)
return string.gsub(str, '"', '""')
end
function lzr_csv.write_csv(rows)
local output_rows = {}
for r=1, #rows do
local values = table.copy(rows[r])
for v=1, #values do
-- Rule 5: Enclose each value with quotes.
-- We enclose ALL values although this is
-- not stricly neccessary, but keeps the code simpler.
-- Rule 7: Quote characters within a value must be escaped
values[v] = '"' .. escape_quotes(values[v]) .. '"'
end
-- Rule 4: Separate values by comma
local output_row = table.concat(values, SEP)
table.insert(output_rows, output_row)
end
-- Rule 1: Separate records by CRLF
local output_all = table.concat(output_rows, "\013\010")
return output_all
end
dofile(minetest.get_modpath("lzr_csv").."/test.lua")