Merge pull request #5452 from squeek502/comptime-string-map

Add std.ComptimeStringMap based on the tokenizer optimization in #5442
master
Andrew Kelley 2020-05-28 23:06:40 -04:00 committed by GitHub
commit 1e0de896b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 324 additions and 178 deletions

View File

@ -277,83 +277,79 @@ pub const Token = struct {
};
// TODO extensions
pub const keywords = [_]Keyword{
Keyword.init("auto", .Keyword_auto),
Keyword.init("break", .Keyword_break),
Keyword.init("case", .Keyword_case),
Keyword.init("char", .Keyword_char),
Keyword.init("const", .Keyword_const),
Keyword.init("continue", .Keyword_continue),
Keyword.init("default", .Keyword_default),
Keyword.init("do", .Keyword_do),
Keyword.init("double", .Keyword_double),
Keyword.init("else", .Keyword_else),
Keyword.init("enum", .Keyword_enum),
Keyword.init("extern", .Keyword_extern),
Keyword.init("float", .Keyword_float),
Keyword.init("for", .Keyword_for),
Keyword.init("goto", .Keyword_goto),
Keyword.init("if", .Keyword_if),
Keyword.init("int", .Keyword_int),
Keyword.init("long", .Keyword_long),
Keyword.init("register", .Keyword_register),
Keyword.init("return", .Keyword_return),
Keyword.init("short", .Keyword_short),
Keyword.init("signed", .Keyword_signed),
Keyword.init("sizeof", .Keyword_sizeof),
Keyword.init("static", .Keyword_static),
Keyword.init("struct", .Keyword_struct),
Keyword.init("switch", .Keyword_switch),
Keyword.init("typedef", .Keyword_typedef),
Keyword.init("union", .Keyword_union),
Keyword.init("unsigned", .Keyword_unsigned),
Keyword.init("void", .Keyword_void),
Keyword.init("volatile", .Keyword_volatile),
Keyword.init("while", .Keyword_while),
pub const keywords = std.ComptimeStringMap(Id, .{
.{"auto", .Keyword_auto},
.{"break", .Keyword_break},
.{"case", .Keyword_case},
.{"char", .Keyword_char},
.{"const", .Keyword_const},
.{"continue", .Keyword_continue},
.{"default", .Keyword_default},
.{"do", .Keyword_do},
.{"double", .Keyword_double},
.{"else", .Keyword_else},
.{"enum", .Keyword_enum},
.{"extern", .Keyword_extern},
.{"float", .Keyword_float},
.{"for", .Keyword_for},
.{"goto", .Keyword_goto},
.{"if", .Keyword_if},
.{"int", .Keyword_int},
.{"long", .Keyword_long},
.{"register", .Keyword_register},
.{"return", .Keyword_return},
.{"short", .Keyword_short},
.{"signed", .Keyword_signed},
.{"sizeof", .Keyword_sizeof},
.{"static", .Keyword_static},
.{"struct", .Keyword_struct},
.{"switch", .Keyword_switch},
.{"typedef", .Keyword_typedef},
.{"union", .Keyword_union},
.{"unsigned", .Keyword_unsigned},
.{"void", .Keyword_void},
.{"volatile", .Keyword_volatile},
.{"while", .Keyword_while},
// ISO C99
Keyword.init("_Bool", .Keyword_bool),
Keyword.init("_Complex", .Keyword_complex),
Keyword.init("_Imaginary", .Keyword_imaginary),
Keyword.init("inline", .Keyword_inline),
Keyword.init("restrict", .Keyword_restrict),
.{"_Bool", .Keyword_bool},
.{"_Complex", .Keyword_complex},
.{"_Imaginary", .Keyword_imaginary},
.{"inline", .Keyword_inline},
.{"restrict", .Keyword_restrict},
// ISO C11
Keyword.init("_Alignas", .Keyword_alignas),
Keyword.init("_Alignof", .Keyword_alignof),
Keyword.init("_Atomic", .Keyword_atomic),
Keyword.init("_Generic", .Keyword_generic),
Keyword.init("_Noreturn", .Keyword_noreturn),
Keyword.init("_Static_assert", .Keyword_static_assert),
Keyword.init("_Thread_local", .Keyword_thread_local),
.{"_Alignas", .Keyword_alignas},
.{"_Alignof", .Keyword_alignof},
.{"_Atomic", .Keyword_atomic},
.{"_Generic", .Keyword_generic},
.{"_Noreturn", .Keyword_noreturn},
.{"_Static_assert", .Keyword_static_assert},
.{"_Thread_local", .Keyword_thread_local},
// Preprocessor directives
Keyword.init("include", .Keyword_include),
Keyword.init("define", .Keyword_define),
Keyword.init("ifdef", .Keyword_ifdef),
Keyword.init("ifndef", .Keyword_ifndef),
Keyword.init("error", .Keyword_error),
Keyword.init("pragma", .Keyword_pragma),
};
.{"include", .Keyword_include},
.{"define", .Keyword_define},
.{"ifdef", .Keyword_ifdef},
.{"ifndef", .Keyword_ifndef},
.{"error", .Keyword_error},
.{"pragma", .Keyword_pragma},
});
// TODO perfect hash at comptime
// TODO do this in the preprocessor
pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id {
var hash = std.hash_map.hashString(bytes);
for (keywords) |kw| {
if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) {
switch (kw.id) {
.Keyword_include,
.Keyword_define,
.Keyword_ifdef,
.Keyword_ifndef,
.Keyword_error,
.Keyword_pragma,
=> if (!pp_directive) return null,
else => {},
}
return kw.id;
if (keywords.get(bytes)) |id| {
switch (id) {
.Keyword_include,
.Keyword_define,
.Keyword_ifdef,
.Keyword_ifndef,
.Keyword_error,
.Keyword_pragma,
=> if (!pp_directive) return null,
else => {},
}
return id;
}
return null;
}

View File

@ -0,0 +1,177 @@
const std = @import("std.zig");
const mem = std.mem;
/// Like ComptimeStringHashMap but optimized for small sets of disparate string keys.
/// Works by separating the keys by length at comptime and only checking strings of
/// equal length at runtime.
///
/// `kvs` expects a list literal containing list literals or an array/slice of structs
/// where `.@"0"` is the `[]const u8` key and `.@"1"` is the associated value of type `V`.
/// TODO: https://github.com/ziglang/zig/issues/4335
pub fn ComptimeStringMap(comptime V: type, comptime kvs: var) type {
const precomputed = comptime blk: {
@setEvalBranchQuota(2000);
const KV = struct {
key: []const u8,
value: V,
};
var sorted_kvs: [kvs.len]KV = undefined;
const lenAsc = (struct {
fn lenAsc(a: KV, b: KV) bool {
return a.key.len < b.key.len;
}
}).lenAsc;
for (kvs) |kv, i| {
if (V != void) {
sorted_kvs[i] = .{.key = kv.@"0", .value = kv.@"1"};
} else {
sorted_kvs[i] = .{.key = kv.@"0", .value = {}};
}
}
std.sort.sort(KV, &sorted_kvs, lenAsc);
const min_len = sorted_kvs[0].key.len;
const max_len = sorted_kvs[sorted_kvs.len - 1].key.len;
var len_indexes: [max_len + 1]usize = undefined;
var len: usize = 0;
var i: usize = 0;
while (len <= max_len) : (len += 1) {
// find the first keyword len == len
while (len > sorted_kvs[i].key.len) {
i += 1;
}
len_indexes[len] = i;
}
break :blk .{
.min_len = min_len,
.max_len = max_len,
.sorted_kvs = sorted_kvs,
.len_indexes = len_indexes,
};
};
return struct {
pub fn has(str: []const u8) bool {
return get(str) != null;
}
pub fn get(str: []const u8) ?V {
if (str.len < precomputed.min_len or str.len > precomputed.max_len)
return null;
var i = precomputed.len_indexes[str.len];
while (true) {
const kv = precomputed.sorted_kvs[i];
if (kv.key.len != str.len)
return null;
if (mem.eql(u8, kv.key, str))
return kv.value;
i += 1;
if (i >= precomputed.sorted_kvs.len)
return null;
}
}
};
}
const TestEnum = enum {
A,
B,
C,
D,
E,
};
test "ComptimeStringMap list literal of list literals" {
const map = ComptimeStringMap(TestEnum, .{
.{"these", .D},
.{"have", .A},
.{"nothing", .B},
.{"incommon", .C},
.{"samelen", .E},
});
testMap(map);
}
test "ComptimeStringMap array of structs" {
const KV = struct {
@"0": []const u8,
@"1": TestEnum,
};
const map = ComptimeStringMap(TestEnum, [_]KV{
.{.@"0" = "these", .@"1" = .D},
.{.@"0" = "have", .@"1" = .A},
.{.@"0" = "nothing", .@"1" = .B},
.{.@"0" = "incommon", .@"1" = .C},
.{.@"0" = "samelen", .@"1" = .E},
});
testMap(map);
}
test "ComptimeStringMap slice of structs" {
const KV = struct {
@"0": []const u8,
@"1": TestEnum,
};
const slice: []const KV = &[_]KV{
.{.@"0" = "these", .@"1" = .D},
.{.@"0" = "have", .@"1" = .A},
.{.@"0" = "nothing", .@"1" = .B},
.{.@"0" = "incommon", .@"1" = .C},
.{.@"0" = "samelen", .@"1" = .E},
};
const map = ComptimeStringMap(TestEnum, slice);
testMap(map);
}
fn testMap(comptime map: var) void {
std.testing.expectEqual(TestEnum.A, map.get("have").?);
std.testing.expectEqual(TestEnum.B, map.get("nothing").?);
std.testing.expect(null == map.get("missing"));
std.testing.expectEqual(TestEnum.D, map.get("these").?);
std.testing.expectEqual(TestEnum.E, map.get("samelen").?);
std.testing.expect(!map.has("missing"));
std.testing.expect(map.has("these"));
}
test "ComptimeStringMap void value type, slice of structs" {
const KV = struct {
@"0": []const u8,
};
const slice: []const KV = &[_]KV{
.{.@"0" = "these"},
.{.@"0" = "have"},
.{.@"0" = "nothing"},
.{.@"0" = "incommon"},
.{.@"0" = "samelen"},
};
const map = ComptimeStringMap(void, slice);
testSet(map);
}
test "ComptimeStringMap void value type, list literal of list literals" {
const map = ComptimeStringMap(void, .{
.{"these"},
.{"have"},
.{"nothing"},
.{"incommon"},
.{"samelen"},
});
testSet(map);
}
fn testSet(comptime map: var) void {
std.testing.expectEqual({}, map.get("have").?);
std.testing.expectEqual({}, map.get("nothing").?);
std.testing.expect(null == map.get("missing"));
std.testing.expectEqual({}, map.get("these").?);
std.testing.expectEqual({}, map.get("samelen").?);
std.testing.expect(!map.has("missing"));
std.testing.expect(map.has("these"));
}

View File

@ -53,12 +53,37 @@ test "std.meta.tagName" {
}
pub fn stringToEnum(comptime T: type, str: []const u8) ?T {
inline for (@typeInfo(T).Enum.fields) |enumField| {
if (mem.eql(u8, str, enumField.name)) {
return @field(T, enumField.name);
// Using ComptimeStringMap here is more performant, but it will start to take too
// long to compile if the enum is large enough, due to the current limits of comptime
// performance when doing things like constructing lookup maps at comptime.
// TODO The '100' here is arbitrary and should be increased when possible:
// - https://github.com/ziglang/zig/issues/4055
// - https://github.com/ziglang/zig/issues/3863
if (@typeInfo(T).Enum.fields.len <= 100) {
const kvs = comptime build_kvs: {
// In order to generate an array of structs that play nice with anonymous
// list literals, we need to give them "0" and "1" field names.
// TODO https://github.com/ziglang/zig/issues/4335
const EnumKV = struct {
@"0": []const u8,
@"1": T,
};
var kvs_array: [@typeInfo(T).Enum.fields.len]EnumKV = undefined;
inline for (@typeInfo(T).Enum.fields) |enumField, i| {
kvs_array[i] = .{ .@"0" = enumField.name, .@"1" = @field(T, enumField.name) };
}
break :build_kvs kvs_array[0..];
};
const map = std.ComptimeStringMap(T, kvs);
return map.get(str);
} else {
inline for (@typeInfo(T).Enum.fields) |enumField| {
if (mem.eql(u8, str, enumField.name)) {
return @field(T, enumField.name);
}
}
return null;
}
return null;
}
test "std.meta.stringToEnum" {

View File

@ -8,6 +8,7 @@ pub const BloomFilter = @import("bloom_filter.zig").BloomFilter;
pub const BufMap = @import("buf_map.zig").BufMap;
pub const BufSet = @import("buf_set.zig").BufSet;
pub const ChildProcess = @import("child_process.zig").ChildProcess;
pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringMap;
pub const DynLib = @import("dynamic_library.zig").DynLib;
pub const HashMap = @import("hash_map.zig").HashMap;
pub const Mutex = @import("mutex.zig").Mutex;

View File

@ -10,115 +10,62 @@ pub const Token = struct {
end: usize,
};
pub const Keyword = struct {
bytes: []const u8,
id: Id,
fn init(bytes: []const u8, id: Id) Keyword {
return .{
.bytes = bytes,
.id = id,
};
}
};
pub const keywords = [_]Keyword{
Keyword.init("align", .Keyword_align),
Keyword.init("allowzero", .Keyword_allowzero),
Keyword.init("and", .Keyword_and),
Keyword.init("anyframe", .Keyword_anyframe),
Keyword.init("asm", .Keyword_asm),
Keyword.init("async", .Keyword_async),
Keyword.init("await", .Keyword_await),
Keyword.init("break", .Keyword_break),
Keyword.init("callconv", .Keyword_callconv),
Keyword.init("catch", .Keyword_catch),
Keyword.init("comptime", .Keyword_comptime),
Keyword.init("const", .Keyword_const),
Keyword.init("continue", .Keyword_continue),
Keyword.init("defer", .Keyword_defer),
Keyword.init("else", .Keyword_else),
Keyword.init("enum", .Keyword_enum),
Keyword.init("errdefer", .Keyword_errdefer),
Keyword.init("error", .Keyword_error),
Keyword.init("export", .Keyword_export),
Keyword.init("extern", .Keyword_extern),
Keyword.init("false", .Keyword_false),
Keyword.init("fn", .Keyword_fn),
Keyword.init("for", .Keyword_for),
Keyword.init("if", .Keyword_if),
Keyword.init("inline", .Keyword_inline),
Keyword.init("noalias", .Keyword_noalias),
Keyword.init("noasync", .Keyword_nosuspend), // TODO: remove this
Keyword.init("noinline", .Keyword_noinline),
Keyword.init("nosuspend", .Keyword_nosuspend),
Keyword.init("null", .Keyword_null),
Keyword.init("or", .Keyword_or),
Keyword.init("orelse", .Keyword_orelse),
Keyword.init("packed", .Keyword_packed),
Keyword.init("pub", .Keyword_pub),
Keyword.init("resume", .Keyword_resume),
Keyword.init("return", .Keyword_return),
Keyword.init("linksection", .Keyword_linksection),
Keyword.init("struct", .Keyword_struct),
Keyword.init("suspend", .Keyword_suspend),
Keyword.init("switch", .Keyword_switch),
Keyword.init("test", .Keyword_test),
Keyword.init("threadlocal", .Keyword_threadlocal),
Keyword.init("true", .Keyword_true),
Keyword.init("try", .Keyword_try),
Keyword.init("undefined", .Keyword_undefined),
Keyword.init("union", .Keyword_union),
Keyword.init("unreachable", .Keyword_unreachable),
Keyword.init("usingnamespace", .Keyword_usingnamespace),
Keyword.init("var", .Keyword_var),
Keyword.init("volatile", .Keyword_volatile),
Keyword.init("while", .Keyword_while),
};
pub const keywords = std.ComptimeStringMap(Id, .{
.{"align", .Keyword_align},
.{"allowzero", .Keyword_allowzero},
.{"and", .Keyword_and},
.{"anyframe", .Keyword_anyframe},
.{"asm", .Keyword_asm},
.{"async", .Keyword_async},
.{"await", .Keyword_await},
.{"break", .Keyword_break},
.{"callconv", .Keyword_callconv},
.{"catch", .Keyword_catch},
.{"comptime", .Keyword_comptime},
.{"const", .Keyword_const},
.{"continue", .Keyword_continue},
.{"defer", .Keyword_defer},
.{"else", .Keyword_else},
.{"enum", .Keyword_enum},
.{"errdefer", .Keyword_errdefer},
.{"error", .Keyword_error},
.{"export", .Keyword_export},
.{"extern", .Keyword_extern},
.{"false", .Keyword_false},
.{"fn", .Keyword_fn},
.{"for", .Keyword_for},
.{"if", .Keyword_if},
.{"inline", .Keyword_inline},
.{"noalias", .Keyword_noalias},
.{"noasync", .Keyword_nosuspend}, // TODO: remove this
.{"noinline", .Keyword_noinline},
.{"nosuspend", .Keyword_nosuspend},
.{"null", .Keyword_null},
.{"or", .Keyword_or},
.{"orelse", .Keyword_orelse},
.{"packed", .Keyword_packed},
.{"pub", .Keyword_pub},
.{"resume", .Keyword_resume},
.{"return", .Keyword_return},
.{"linksection", .Keyword_linksection},
.{"struct", .Keyword_struct},
.{"suspend", .Keyword_suspend},
.{"switch", .Keyword_switch},
.{"test", .Keyword_test},
.{"threadlocal", .Keyword_threadlocal},
.{"true", .Keyword_true},
.{"try", .Keyword_try},
.{"undefined", .Keyword_undefined},
.{"union", .Keyword_union},
.{"unreachable", .Keyword_unreachable},
.{"usingnamespace", .Keyword_usingnamespace},
.{"var", .Keyword_var},
.{"volatile", .Keyword_volatile},
.{"while", .Keyword_while},
});
pub fn getKeyword(bytes: []const u8) ?Id {
const precomputed = comptime blk: {
@setEvalBranchQuota(2000);
var sorted_keywords = keywords;
const lenAsc = (struct {
fn lenAsc(a: Keyword, b: Keyword) bool {
return a.bytes.len < b.bytes.len;
}
}).lenAsc;
std.sort.sort(Keyword, &sorted_keywords, lenAsc);
const min_len = sorted_keywords[0].bytes.len;
const max_len = sorted_keywords[sorted_keywords.len - 1].bytes.len;
var len_indexes: [max_len + 1]usize = undefined;
var len: usize = 0;
var kw_i: usize = 0;
while (len <= max_len) : (len += 1) {
// find the first keyword len == len
while (len > sorted_keywords[kw_i].bytes.len) {
kw_i += 1;
}
len_indexes[len] = kw_i;
}
break :blk .{
.min_len = min_len,
.max_len = max_len,
.sorted_keywords = sorted_keywords,
.len_indexes = len_indexes,
};
};
if (bytes.len < precomputed.min_len or bytes.len > precomputed.max_len)
return null;
var i = precomputed.len_indexes[bytes.len];
while (true) {
const kw = precomputed.sorted_keywords[i];
if (kw.bytes.len != bytes.len)
return null;
if (mem.eql(u8, kw.bytes, bytes))
return kw.id;
i += 1;
if (i >= precomputed.sorted_keywords.len)
return null;
}
return keywords.get(bytes);
}
pub const Id = enum {