From 0f7de58b642539ad6a71368940f43f59a41e71b2 Mon Sep 17 00:00:00 2001 From: kristopher tate Date: Mon, 26 Nov 2018 01:09:12 +0900 Subject: [PATCH 1/2] std.mem: add new separate method and rework SplitIterator; --- std/mem.zig | 104 +++++++++++++++++++++++++++++++++++++++++++----- std/os/path.zig | 14 ++++--- 2 files changed, 103 insertions(+), 15 deletions(-) diff --git a/std/mem.zig b/std/mem.zig index 005d88791..adb8b0595 100644 --- a/std/mem.zig +++ b/std/mem.zig @@ -607,11 +607,15 @@ pub fn eql_slice_u8(a: []const u8, b: []const u8) bool { /// any of the bytes in `split_bytes`. /// split(" abc def ghi ", " ") /// Will return slices for "abc", "def", "ghi", null, in that order. +/// If `split_bytes` does not exist in buffer, +/// the iterator will return `buffer`, null, in that order. pub fn split(buffer: []const u8, split_bytes: []const u8) SplitIterator { return SplitIterator{ .index = 0, .buffer = buffer, .split_bytes = split_bytes, + .glob = true, + .spun = false, }; } @@ -621,6 +625,76 @@ test "mem.split" { assert(eql(u8, it.next().?, "def")); assert(eql(u8, it.next().?, "ghi")); assert(it.next() == null); + + it = split("..\\bob", "\\"); + assert(eql(u8, it.next().?, "..")); + assert(eql(u8, "..", "..\\bob"[0..it.index])); + assert(eql(u8, it.next().?, "bob")); + assert(it.next() == null); + + it = split("//a/b", "/"); + assert(eql(u8, it.next().?, "a")); + assert(eql(u8, it.next().?, "b")); + assert(eql(u8, "//a/b", "//a/b"[0..it.index])); + assert(it.next() == null); + + it = split("|", "|"); + assert(it.next() == null); + + it = split("", "|"); + assert(eql(u8, it.next().?, "")); + assert(it.next() == null); + + it = split("hello", ""); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); + + it = split("hello", " "); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); +} + +/// Returns an iterator that iterates over the slices of `buffer` that +/// seperates by bytes in `delimiter`. +/// separate("abc|def||ghi", "|") +/// Will return slices for "abc", "def", "", "ghi", null, in that order. +/// If `delimiter` does not exist in buffer, +/// the iterator will return `buffer`, null, in that order. +pub fn separate(buffer: []const u8, delimiter: []const u8) SplitIterator { + return SplitIterator{ + .index = 0, + .buffer = buffer, + .split_bytes = delimiter, + .glob = false, + .spun = false, + }; +} + +test "mem.separate" { + var it = separate("abc|def||ghi", "|"); + assert(eql(u8, it.next().?, "abc")); + assert(eql(u8, it.next().?, "def")); + assert(eql(u8, it.next().?, "")); + assert(eql(u8, it.next().?, "ghi")); + assert(it.next() == null); + + it = separate("", "|"); + assert(eql(u8, it.next().?, "")); + assert(it.next() == null); + + it = separate("|", "|"); + assert(eql(u8, it.next().?, "")); + assert(eql(u8, it.next().?, "")); + assert(it.next() == null); + + it = separate("hello", ""); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); + + it = separate("hello", " "); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); + } pub fn startsWith(comptime T: type, haystack: []const T, needle: []const T) bool { @@ -645,20 +719,32 @@ pub const SplitIterator = struct { buffer: []const u8, split_bytes: []const u8, index: usize, + glob: bool, + spun: bool, + /// Iterates and returns null or optionally a slice the next split segment pub fn next(self: *SplitIterator) ?[]const u8 { - // move to beginning of token - while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} - const start = self.index; - if (start == self.buffer.len) { - return null; + if (self.spun) { + if (self.index + 1 > self.buffer.len) return null; + self.index += 1; } - // move to end of token - while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} - const end = self.index; + self.spun = true; - return self.buffer[start..end]; + if (self.glob) { + while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} + } + + var cursor = self.index; + while (cursor < self.buffer.len and !self.isSplitByte(self.buffer[cursor])) : (cursor += 1) {} + + defer self.index = cursor; + + if (cursor == self.buffer.len) { + return if (self.glob and self.index == cursor and self.index > 0) null else self.buffer[self.index..]; + } + + return self.buffer[self.index..cursor]; } /// Returns a slice of the remaining bytes. Does not affect iterator state. diff --git a/std/os/path.zig b/std/os/path.zig index 0d636353a..2938c9ab9 100644 --- a/std/os/path.zig +++ b/std/os/path.zig @@ -967,12 +967,14 @@ pub fn relativeWindows(allocator: *Allocator, from: []const u8, to: []const u8) // shave off the trailing slash result_index -= 1; - var rest_it = mem.split(to_rest, "/\\"); - while (rest_it.next()) |to_component| { - result[result_index] = '\\'; - result_index += 1; - mem.copy(u8, result[result_index..], to_component); - result_index += to_component.len; + if (to_rest.len > 0) { + var rest_it = mem.split(to_rest, "/\\"); + while (rest_it.next()) |to_component| { + result[result_index] = '\\'; + result_index += 1; + mem.copy(u8, result[result_index..], to_component); + result_index += to_component.len; + } } return result[0..result_index]; From ff1b2889f3819610aee17ddbecdf716eb573ca2f Mon Sep 17 00:00:00 2001 From: kristopher tate Date: Fri, 30 Nov 2018 02:17:15 +0900 Subject: [PATCH 2/2] std.mem: split: test for multiple seperator bytes; --- std/mem.zig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/std/mem.zig b/std/mem.zig index adb8b0595..e27ad1a01 100644 --- a/std/mem.zig +++ b/std/mem.zig @@ -654,6 +654,16 @@ test "mem.split" { assert(it.next() == null); } +test "mem.split (multibyte)" { + var it = split("a|b,c/d e", " /,|"); + assert(eql(u8, it.next().?, "a")); + assert(eql(u8, it.next().?, "b")); + assert(eql(u8, it.next().?, "c")); + assert(eql(u8, it.next().?, "d")); + assert(eql(u8, it.next().?, "e")); + assert(it.next() == null); +} + /// Returns an iterator that iterates over the slices of `buffer` that /// seperates by bytes in `delimiter`. /// separate("abc|def||ghi", "|") @@ -694,7 +704,16 @@ test "mem.separate" { it = separate("hello", " "); assert(eql(u8, it.next().?, "hello")); assert(it.next() == null); +} +test "mem.separate (multibyte)" { + var it = separate("a|b,c/d e", " /,|"); + assert(eql(u8, it.next().?, "a")); + assert(eql(u8, it.next().?, "b")); + assert(eql(u8, it.next().?, "c")); + assert(eql(u8, it.next().?, "d")); + assert(eql(u8, it.next().?, "e")); + assert(it.next() == null); } pub fn startsWith(comptime T: type, haystack: []const T, needle: []const T) bool {