-- lzr_csv: RFC-4180-compliant Lua CSV Parser -- NOTE: The "rules" mentioned in this file refer -- to the rules mentioned in RFC 4180 under -- section 2. -- Rule 4: The value separator is comma local SEP = "," lzr_csv = {} function lzr_csv.parse_csv(text) -- List of all records so far -- (a record is a list of values) local records = {} -- The record we a currently working on and adding values to local currentRecord = {} -- Keep track of position in string local pos = 1 -- Final position in string local lastPos = string.len(text) -- The current value we are parsing right now. -- When finished, it must be added to currentRecord. local currentValue = "" -- Status variables to change parsing state: -- If true, expects a LF character in the character check -- (required for CRLF check) local awaitingLF = false -- If true, we are currently parsing a quoted value, -- i.e. a value enclosed in quotes. As long this is true, -- the parsing rules are a bit different. local inQuotedValue = false -- If true, the previous character was a quote. -- Only used while inQuotedValue is true local prevWasQuote = false -- Begin of the main parsing loop while true do -- We reached the end of the string if (pos > lastPos) then -- According to rule 2, a CRLF may or may not be at the end -- of the last record. if #currentRecord > 0 then -- There was no CRLF. So flush the final value -- to the list of records and call it a day. table.insert(currentRecord, currentValue) table.insert(records, currentRecord) end -- (If there *was* a CRLF, -- then currentRecord must be empty in which case -- no additional record should be added.) break end -- Get next character local c = string.sub(text, pos, pos) -- Rule 5: Values may be enclosed by quotes if inQuotedValue then -- This marks the "inside" of a quoted value if c == '"' then if prevWasQuote then currentValue = currentValue .. c prevWasQuote = false else prevWasQuote = true end else if prevWasQuote then -- This marks the *end* of a quoted value, -- special parsing will be deactivated. inQuotedValue = false prevWasQuote = false pos = pos - 1 else currentValue = currentValue .. c end end -- Rule 5: Values may be enclosed by quotes elseif (c == '"') then -- This marks the *beginning* of a quoted value, -- activate special parsing mode in next iteration. currentValue = "" prevWasQuote = false inQuotedValue = true -- Rule 1: Records must be separated by CRLF elseif (c == "\013") then -- CR (Carriage Return) if awaitingLF then minetest.log("error", "[lzr_csv] Carriage Return found but Line Feed expected (pos="..pos..")") return nil end awaitingLF = true -- Rule 1: Records must be separated by CRLF elseif c == '\010' then -- LF (Line Feed) if not awaitingLF then minetest.log("error", "[lzr_csv] Line Feed found but there was no preceding Carriage Return (pos="..pos..")") return nil end table.insert(currentRecord, currentValue) local recordCopy = {} for l=1, #currentRecord do table.insert(recordCopy, currentRecord[l]) end table.insert(records, recordCopy) currentRecord = {} currentValue = "" awaitingLF = false -- Rule 4: Values are separated by comma elseif c == SEP then table.insert(currentRecord, currentValue) currentValue = "" -- Parse a single normal character else if awaitingLF then minetest.log("error", "[lzr_csv] Line Feed character expected but other character found (pos="..pos..")") return nil end currentValue = currentValue .. c end pos = pos + 1 end return records end -- Escapes the quote characters in the given string local escape_quotes = function(str) return string.gsub(str, '"', '""') end function lzr_csv.write_csv(rows) local output_rows = {} for r=1, #rows do local values = table.copy(rows[r]) for v=1, #values do -- Rule 5: Enclose each value with quotes. -- We enclose ALL values although this is -- not stricly neccessary, but keeps the code simpler. -- Rule 7: Quote characters within a value must be escaped values[v] = '"' .. escape_quotes(values[v]) .. '"' end -- Rule 4: Separate values by comma local output_row = table.concat(values, SEP) table.insert(output_rows, output_row) end -- Rule 1: Separate records by CRLF local output_all = table.concat(output_rows, "\013\010") return output_all end