-- lzr_csv: RFC-4180-compliant Lua CSV Parser -- NOTE: The "rules" mentioned in this file refer -- to the rules mentioned in RFC 4180 under -- section 2. -- Rule 4: The value separator is comma local SEP = "," lzr_csv = {} function lzr_csv.parse_csv(text) if type(text) ~= "string" then return nil, "Not a string" end -- List of all records so far -- (a record is a list of values) local records = {} -- The record we a currently working on and adding values to local currentRecord = {} -- Keep track of position in string local pos = 1 -- Final position in string local lastPos = string.len(text) -- The current value we are parsing right now. -- When finished, it must be added to currentRecord. local currentValue = "" -- Status variables to change parsing state: -- If true, expects a LF character in the character check -- (required for CRLF check) local awaitingLF = false -- If true, we are currently parsing a quoted value, -- i.e. a value enclosed in quotes. As long this is true, -- the parsing rules are a bit different. local inQuotedValue = false -- If true, the previous character was a quote. -- Only used while inQuotedValue is true local prevWasQuote = false -- Begin of the main parsing loop while true do -- We reached the end of the string if (pos > lastPos) then if awaitingLF then return nil, "Line Feed character expected but reached end of string (pos="..pos..")" end -- According to rule 2, a CRLF may or may not be at the end -- of the last record. if #currentRecord > 0 or currentValue ~= "" then -- There was no CRLF. So flush the final value -- to the list of records and call it a day. table.insert(currentRecord, currentValue) table.insert(records, currentRecord) if #records >= 2 then if #records[#records] ~= #records[#records-1] then return nil, "Number of entries per record is not equal! (line "..(#records-1)..")" end end end -- (If there *was* a CRLF, -- then currentRecord must be empty in which case -- no additional record should be added.) break end -- Get next character local c = string.sub(text, pos, pos) -- Rule 5: Values may be enclosed by quotes if inQuotedValue then -- This marks the "inside" of a quoted value if c == '"' then if prevWasQuote then currentValue = currentValue .. c prevWasQuote = false else prevWasQuote = true end else if prevWasQuote then -- This marks the *end* of a quoted value, -- special parsing will be deactivated. inQuotedValue = false prevWasQuote = false pos = pos - 1 else currentValue = currentValue .. c end end -- Rule 5: Values may be enclosed by quotes elseif (c == '"') then -- This marks the *beginning* of a quoted value, -- activate special parsing mode in next iteration. currentValue = "" prevWasQuote = false inQuotedValue = true -- Rule 1: Records must be separated by CRLF elseif (c == "\013") then -- CR (Carriage Return) if awaitingLF then return nil, "Carriage Return found but Line Feed expected (pos="..pos..")" end awaitingLF = true -- Rule 1: Records must be separated by CRLF elseif c == '\010' then -- LF (Line Feed) if not awaitingLF then return nil, "Line Feed found but there was no preceding Carriage Return (pos="..pos..")" end table.insert(currentRecord, currentValue) local recordCopy = {} for l=1, #currentRecord do table.insert(recordCopy, currentRecord[l]) end table.insert(records, recordCopy) if #records >= 2 then local recLen = #records if #records[recLen] ~= #records[recLen-1] then return nil, "Number of entries per record is not equal! (line "..(recLen-1)..")" end end currentRecord = {} currentValue = "" awaitingLF = false -- Rule 4: Values are separated by comma elseif c == SEP then table.insert(currentRecord, currentValue) currentValue = "" -- Parse a single normal character else if awaitingLF then return nil, "Line Feed character expected but other character found (pos="..pos..")" end currentValue = currentValue .. c end pos = pos + 1 end return records end -- Escapes the quote characters in the given string local escape_quotes = function(str) return string.gsub(str, '"', '""') end function lzr_csv.write_csv(rows) local output_rows = {} for r=1, #rows do local values = table.copy(rows[r]) for v=1, #values do -- Check if field contains a 'special character' (for rule 5) local contains_special_char = string.find(values[v], '[",\013\010]') ~= nil -- Rule 7: Quote characters within a value must be escaped if string.find(values[v], '"') then values[v] = escape_quotes(values[v]) end -- Rule 5: Field containining linebreak, quotation mark or comma -- need to be enclosed by quotation marks if contains_special_char then -- Note: We *can* enclose every value with quotation marks, -- but only insert them when needed to keep it more -- readable. values[v] = '"' .. values[v] .. '"' end end -- Rule 4: Separate values by comma local output_row = table.concat(values, SEP) table.insert(output_rows, output_row) end -- Rule 1: Separate records by CRLF local output_all = table.concat(output_rows, "\013\010") return output_all end dofile(minetest.get_modpath("lzr_csv").."/test.lua")