285 lines
9.3 KiB
Zig
285 lines
9.3 KiB
Zig
// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does.
|
|
// I could have taken only a u7 to make this clear, but it would be slower
|
|
// It is my opinion that encodings other than UTF-8 should not be supported.
|
|
//
|
|
// (and 128 bytes is not much to pay).
|
|
// Also does not handle Unicode character classes.
|
|
//
|
|
// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png
|
|
|
|
const std = @import("std");
|
|
|
|
const tIndex = enum(u3) {
|
|
Alpha,
|
|
Hex,
|
|
Space,
|
|
Digit,
|
|
Lower,
|
|
Upper,
|
|
// Ctrl, < 0x20 || == DEL
|
|
// Print, = Graph || == ' '. NOT '\t' et cetera
|
|
Punct,
|
|
Graph,
|
|
//ASCII, | ~0b01111111
|
|
//isBlank, == ' ' || == '\x09'
|
|
};
|
|
|
|
const combinedTable = init: {
|
|
comptime var table: [256]u8 = undefined;
|
|
|
|
const mem = std.mem;
|
|
|
|
const alpha = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
};
|
|
const lower = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
};
|
|
const upper = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const digit = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const hex = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const space = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const punct = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
|
|
};
|
|
const graph = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
|
};
|
|
|
|
comptime var i = 0;
|
|
inline while (i < 128) : (i += 1) {
|
|
table[i] =
|
|
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
|
|
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
|
|
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
|
|
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
|
|
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
|
|
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
|
|
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
|
|
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
|
|
}
|
|
mem.set(u8, table[128..256], 0);
|
|
break :init table;
|
|
};
|
|
|
|
fn inTable(c: u8, t: tIndex) bool {
|
|
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
|
|
}
|
|
|
|
pub fn isAlNum(c: u8) bool {
|
|
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
|
|
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
|
|
}
|
|
|
|
pub fn isAlpha(c: u8) bool {
|
|
return inTable(c, tIndex.Alpha);
|
|
}
|
|
|
|
pub fn isCntrl(c: u8) bool {
|
|
return c < 0x20 or c == 127; //DEL
|
|
}
|
|
|
|
pub fn isDigit(c: u8) bool {
|
|
return inTable(c, tIndex.Digit);
|
|
}
|
|
|
|
pub fn isGraph(c: u8) bool {
|
|
return inTable(c, tIndex.Graph);
|
|
}
|
|
|
|
pub fn isLower(c: u8) bool {
|
|
return inTable(c, tIndex.Lower);
|
|
}
|
|
|
|
pub fn isPrint(c: u8) bool {
|
|
return inTable(c, tIndex.Graph) or c == ' ';
|
|
}
|
|
|
|
pub fn isPunct(c: u8) bool {
|
|
return inTable(c, tIndex.Punct);
|
|
}
|
|
|
|
pub fn isSpace(c: u8) bool {
|
|
return inTable(c, tIndex.Space);
|
|
}
|
|
|
|
pub fn isUpper(c: u8) bool {
|
|
return inTable(c, tIndex.Upper);
|
|
}
|
|
|
|
pub fn isXDigit(c: u8) bool {
|
|
return inTable(c, tIndex.Hex);
|
|
}
|
|
|
|
pub fn isASCII(c: u8) bool {
|
|
return c < 128;
|
|
}
|
|
|
|
pub fn isBlank(c: u8) bool {
|
|
return (c == ' ') or (c == '\x09');
|
|
}
|
|
|
|
pub fn toUpper(c: u8) u8 {
|
|
if (isLower(c)) {
|
|
return c & 0b11011111;
|
|
} else {
|
|
return c;
|
|
}
|
|
}
|
|
|
|
pub fn toLower(c: u8) u8 {
|
|
if (isUpper(c)) {
|
|
return c | 0b00100000;
|
|
} else {
|
|
return c;
|
|
}
|
|
}
|
|
|
|
test "ascii character classes" {
|
|
const testing = std.testing;
|
|
|
|
testing.expect('C' == toUpper('c'));
|
|
testing.expect(':' == toUpper(':'));
|
|
testing.expect('\xab' == toUpper('\xab'));
|
|
testing.expect('c' == toLower('C'));
|
|
testing.expect(isAlpha('c'));
|
|
testing.expect(!isAlpha('5'));
|
|
testing.expect(isSpace(' '));
|
|
}
|
|
|
|
pub fn allocLowerString(allocator: *std.mem.Allocator, ascii_string: []const u8) ![]u8 {
|
|
const result = try allocator.alloc(u8, ascii_string.len);
|
|
for (result) |*c, i| {
|
|
c.* = toLower(ascii_string[i]);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
test "allocLowerString" {
|
|
var buf: [100]u8 = undefined;
|
|
const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator;
|
|
const result = try allocLowerString(allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!");
|
|
std.testing.expect(std.mem.eql(u8, "abcdefghijklmnopqrst0234+💩!", result));
|
|
}
|
|
|
|
pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool {
|
|
if (a.len != b.len) return false;
|
|
for (a) |a_c, i| {
|
|
if (toLower(a_c) != toLower(b[i])) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
test "eqlIgnoreCase" {
|
|
std.testing.expect(eqlIgnoreCase("HEl💩Lo!", "hel💩lo!"));
|
|
std.testing.expect(!eqlIgnoreCase("hElLo!", "hello! "));
|
|
std.testing.expect(!eqlIgnoreCase("hElLo!", "helro!"));
|
|
}
|
|
|
|
/// Finds `substr` in `container`, starting at `start_index`.
|
|
/// TODO boyer-moore algorithm
|
|
pub fn indexOfIgnoreCasePos(container: []const u8, start_index: usize, substr: []const u8) ?usize {
|
|
if (substr.len > container.len) return null;
|
|
|
|
var i: usize = start_index;
|
|
const end = container.len - substr.len;
|
|
while (i <= end) : (i += 1) {
|
|
if (eqlIgnoreCase(container[i .. i + substr.len], substr)) return i;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/// Finds `substr` in `container`, starting at `start_index`.
|
|
pub fn indexOfIgnoreCase(container: []const u8, substr: []const u8) ?usize {
|
|
return indexOfIgnoreCasePos(container, 0, substr);
|
|
}
|
|
|
|
test "indexOfIgnoreCase" {
|
|
std.testing.expect(indexOfIgnoreCase("one Two Three Four", "foUr").? == 14);
|
|
std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null);
|
|
std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0);
|
|
std.testing.expect(indexOfIgnoreCase("foo", "fool") == null);
|
|
|
|
std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0);
|
|
}
|