zig/lib/std/ascii.zig

285 lines
9.3 KiB
Zig

// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does.
// I could have taken only a u7 to make this clear, but it would be slower
// It is my opinion that encodings other than UTF-8 should not be supported.
//
// (and 128 bytes is not much to pay).
// Also does not handle Unicode character classes.
//
// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png
const std = @import("std");
const tIndex = enum(u3) {
Alpha,
Hex,
Space,
Digit,
Lower,
Upper,
// Ctrl, < 0x20 || == DEL
// Print, = Graph || == ' '. NOT '\t' et cetera
Punct,
Graph,
//ASCII, | ~0b01111111
//isBlank, == ' ' || == '\x09'
};
const combinedTable = init: {
comptime var table: [256]u8 = undefined;
const mem = std.mem;
const alpha = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const lower = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const upper = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const digit = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const hex = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const space = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const punct = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
};
const graph = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};
comptime var i = 0;
inline while (i < 128) : (i += 1) {
table[i] =
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
}
mem.set(u8, table[128..256], 0);
break :init table;
};
fn inTable(c: u8, t: tIndex) bool {
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
}
pub fn isAlNum(c: u8) bool {
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
}
pub fn isAlpha(c: u8) bool {
return inTable(c, tIndex.Alpha);
}
pub fn isCntrl(c: u8) bool {
return c < 0x20 or c == 127; //DEL
}
pub fn isDigit(c: u8) bool {
return inTable(c, tIndex.Digit);
}
pub fn isGraph(c: u8) bool {
return inTable(c, tIndex.Graph);
}
pub fn isLower(c: u8) bool {
return inTable(c, tIndex.Lower);
}
pub fn isPrint(c: u8) bool {
return inTable(c, tIndex.Graph) or c == ' ';
}
pub fn isPunct(c: u8) bool {
return inTable(c, tIndex.Punct);
}
pub fn isSpace(c: u8) bool {
return inTable(c, tIndex.Space);
}
pub fn isUpper(c: u8) bool {
return inTable(c, tIndex.Upper);
}
pub fn isXDigit(c: u8) bool {
return inTable(c, tIndex.Hex);
}
pub fn isASCII(c: u8) bool {
return c < 128;
}
pub fn isBlank(c: u8) bool {
return (c == ' ') or (c == '\x09');
}
pub fn toUpper(c: u8) u8 {
if (isLower(c)) {
return c & 0b11011111;
} else {
return c;
}
}
pub fn toLower(c: u8) u8 {
if (isUpper(c)) {
return c | 0b00100000;
} else {
return c;
}
}
test "ascii character classes" {
const testing = std.testing;
testing.expect('C' == toUpper('c'));
testing.expect(':' == toUpper(':'));
testing.expect('\xab' == toUpper('\xab'));
testing.expect('c' == toLower('C'));
testing.expect(isAlpha('c'));
testing.expect(!isAlpha('5'));
testing.expect(isSpace(' '));
}
pub fn allocLowerString(allocator: *std.mem.Allocator, ascii_string: []const u8) ![]u8 {
const result = try allocator.alloc(u8, ascii_string.len);
for (result) |*c, i| {
c.* = toLower(ascii_string[i]);
}
return result;
}
test "allocLowerString" {
var buf: [100]u8 = undefined;
const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator;
const result = try allocLowerString(allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!");
std.testing.expect(std.mem.eql(u8, "abcdefghijklmnopqrst0234+💩!", result));
}
pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool {
if (a.len != b.len) return false;
for (a) |a_c, i| {
if (toLower(a_c) != toLower(b[i])) return false;
}
return true;
}
test "eqlIgnoreCase" {
std.testing.expect(eqlIgnoreCase("HEl💩Lo!", "hel💩lo!"));
std.testing.expect(!eqlIgnoreCase("hElLo!", "hello! "));
std.testing.expect(!eqlIgnoreCase("hElLo!", "helro!"));
}
/// Finds `substr` in `container`, starting at `start_index`.
/// TODO boyer-moore algorithm
pub fn indexOfIgnoreCasePos(container: []const u8, start_index: usize, substr: []const u8) ?usize {
if (substr.len > container.len) return null;
var i: usize = start_index;
const end = container.len - substr.len;
while (i <= end) : (i += 1) {
if (eqlIgnoreCase(container[i .. i + substr.len], substr)) return i;
}
return null;
}
/// Finds `substr` in `container`, starting at `start_index`.
pub fn indexOfIgnoreCase(container: []const u8, substr: []const u8) ?usize {
return indexOfIgnoreCasePos(container, 0, substr);
}
test "indexOfIgnoreCase" {
std.testing.expect(indexOfIgnoreCase("one Two Three Four", "foUr").? == 14);
std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null);
std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0);
std.testing.expect(indexOfIgnoreCase("foo", "fool") == null);
std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0);
}