diff --git a/CMakeLists.txt b/CMakeLists.txt index 01958f160..4383b086e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -446,6 +446,7 @@ set(ZIG_CPP_SOURCES set(ZIG_STD_FILES "array_list.zig" + "ascii.zig" "atomic.zig" "atomic/int.zig" "atomic/queue.zig" diff --git a/std/ascii.zig b/std/ascii.zig new file mode 100644 index 000000000..5a35fa246 --- /dev/null +++ b/std/ascii.zig @@ -0,0 +1,232 @@ +// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does. +// I could have taken only a u7 to make this clear, but it would be slower +// It is my opinion that encodings other than UTF-8 should not be supported. +// +// (and 128 bytes is not much to pay). +// Also does not handle Unicode character classes. +// +// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png + +const tIndex = enum(u3) { + Alpha, + Hex, + Space, + Digit, + Lower, + Upper, + // Ctrl, < 0x20 || == DEL + // Print, = Graph || == ' '. NOT '\t' et cetera + Punct, + Graph, + //ASCII, | ~0b01111111 + //isBlank, == ' ' || == '\x09' +}; + +const combinedTable = init: { + comptime var table: [256]u8 = undefined; + + const std = @import("std"); + const mem = std.mem; + + const alpha = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + const lower = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + const upper = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const digit = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const hex = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const space = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const punct = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + }; + const graph = []u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + }; + + comptime var i = 0; + inline while (i < 128) : (i += 1) { + table[i] = + u8(alpha[i]) << @enumToInt(tIndex.Alpha) | + u8(hex[i]) << @enumToInt(tIndex.Hex) | + u8(space[i]) << @enumToInt(tIndex.Space) | + u8(digit[i]) << @enumToInt(tIndex.Digit) | + u8(lower[i]) << @enumToInt(tIndex.Lower) | + u8(upper[i]) << @enumToInt(tIndex.Upper) | + u8(punct[i]) << @enumToInt(tIndex.Punct) | + u8(graph[i]) << @enumToInt(tIndex.Graph); + } + mem.set(u8, table[128..256], 0); + break :init table; +}; + +fn inTable(c: u8, t: tIndex) bool { + return (combinedTable[c] & (u8(1) << @enumToInt(t))) != 0; +} + +pub fn isAlNum(c: u8) bool { + return (combinedTable[c] & ((u8(1) << @enumToInt(tIndex.Alpha)) | + u8(1) << @enumToInt(tIndex.Digit))) != 0; +} + +pub fn isAlpha(c: u8) bool { + return inTable(c, tIndex.Alpha); +} + +pub fn isCtrl(c: u8) bool { + return c < 0x20 or c == 127; //DEL +} + +pub fn isCntrl(c: u8) bool { + return isCtrl(c); +} + +pub fn isDigit(c: u8) bool { + return inTable(c, tIndex.Digit); +} + +pub fn isGraph(c: u8) bool { + return inTable(c, tIndex.Graph); +} + +pub fn isLower(c: u8) bool { + return inTable(c, tIndex.Lower); +} + +pub fn isPrint(c: u8) bool { + return inTable(c, tIndex.Graph) or c == ' '; +} + +pub fn isPunct(c: u8) bool { + return inTable(c, tIndex.Punct); +} + +pub fn isSpace(c: u8) bool { + return inTable(c, tIndex.Space); +} + +pub fn isUpper(c: u8) bool { + return inTable(c, tIndex.Upper); +} + +pub fn isXDigit(c: u8) bool { + return inTable(c, tIndex.Hex); +} + +pub fn isASCII(c: u8) bool { + return c < 128; +} + +pub fn isBlank(c: u8) bool { + return (c == ' ') or (c == '\x09'); +} + +pub fn toUpper(c: u8) u8 { + if (isLower(c)) { + return c - 0x20; + } else { + return c; + } +} + +pub fn toLower(c: u8) u8 { + if (isUpper(c)) { + return c + 0x20; + } else { + return c; + } +} + +test "ascii character classes" { + const std = @import("std"); + const testing = std.testing; + + testing.expect('C' == toUpper('c')); + testing.expect(':' == toUpper(':')); + testing.expect('\xab' == toUpper('\xab')); + testing.expect('c' == toLower('C')); + testing.expect(isAlpha('c')); + testing.expect(!isAlpha('5')); + testing.expect(isSpace(' ')); +} diff --git a/std/std.zig b/std/std.zig index a0c679582..c668d2a10 100644 --- a/std/std.zig +++ b/std/std.zig @@ -42,6 +42,7 @@ pub const pdb = @import("pdb.zig"); pub const rand = @import("rand.zig"); pub const rb = @import("rb.zig"); pub const sort = @import("sort.zig"); +pub const ascii = @import("ascii.zig"); pub const testing = @import("testing.zig"); pub const unicode = @import("unicode.zig"); pub const valgrind = @import("valgrind.zig");