diff --git a/std/mem.zig b/std/mem.zig index a403d8045..bec3816d8 100644 --- a/std/mem.zig +++ b/std/mem.zig @@ -692,11 +692,15 @@ pub fn eql_slice_u8(a: []const u8, b: []const u8) bool { /// any of the bytes in `split_bytes`. /// split(" abc def ghi ", " ") /// Will return slices for "abc", "def", "ghi", null, in that order. +/// If `split_bytes` does not exist in buffer, +/// the iterator will return `buffer`, null, in that order. pub fn split(buffer: []const u8, split_bytes: []const u8) SplitIterator { return SplitIterator{ .index = 0, .buffer = buffer, .split_bytes = split_bytes, + .glob = true, + .spun = false, }; } @@ -706,6 +710,95 @@ test "mem.split" { assert(eql(u8, it.next().?, "def")); assert(eql(u8, it.next().?, "ghi")); assert(it.next() == null); + + it = split("..\\bob", "\\"); + assert(eql(u8, it.next().?, "..")); + assert(eql(u8, "..", "..\\bob"[0..it.index])); + assert(eql(u8, it.next().?, "bob")); + assert(it.next() == null); + + it = split("//a/b", "/"); + assert(eql(u8, it.next().?, "a")); + assert(eql(u8, it.next().?, "b")); + assert(eql(u8, "//a/b", "//a/b"[0..it.index])); + assert(it.next() == null); + + it = split("|", "|"); + assert(it.next() == null); + + it = split("", "|"); + assert(eql(u8, it.next().?, "")); + assert(it.next() == null); + + it = split("hello", ""); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); + + it = split("hello", " "); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); +} + +test "mem.split (multibyte)" { + var it = split("a|b,c/d e", " /,|"); + assert(eql(u8, it.next().?, "a")); + assert(eql(u8, it.next().?, "b")); + assert(eql(u8, it.next().?, "c")); + assert(eql(u8, it.next().?, "d")); + assert(eql(u8, it.next().?, "e")); + assert(it.next() == null); +} + +/// Returns an iterator that iterates over the slices of `buffer` that +/// seperates by bytes in `delimiter`. +/// separate("abc|def||ghi", "|") +/// Will return slices for "abc", "def", "", "ghi", null, in that order. +/// If `delimiter` does not exist in buffer, +/// the iterator will return `buffer`, null, in that order. +pub fn separate(buffer: []const u8, delimiter: []const u8) SplitIterator { + return SplitIterator{ + .index = 0, + .buffer = buffer, + .split_bytes = delimiter, + .glob = false, + .spun = false, + }; +} + +test "mem.separate" { + var it = separate("abc|def||ghi", "|"); + assert(eql(u8, it.next().?, "abc")); + assert(eql(u8, it.next().?, "def")); + assert(eql(u8, it.next().?, "")); + assert(eql(u8, it.next().?, "ghi")); + assert(it.next() == null); + + it = separate("", "|"); + assert(eql(u8, it.next().?, "")); + assert(it.next() == null); + + it = separate("|", "|"); + assert(eql(u8, it.next().?, "")); + assert(eql(u8, it.next().?, "")); + assert(it.next() == null); + + it = separate("hello", ""); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); + + it = separate("hello", " "); + assert(eql(u8, it.next().?, "hello")); + assert(it.next() == null); +} + +test "mem.separate (multibyte)" { + var it = separate("a|b,c/d e", " /,|"); + assert(eql(u8, it.next().?, "a")); + assert(eql(u8, it.next().?, "b")); + assert(eql(u8, it.next().?, "c")); + assert(eql(u8, it.next().?, "d")); + assert(eql(u8, it.next().?, "e")); + assert(it.next() == null); } pub fn startsWith(comptime T: type, haystack: []const T, needle: []const T) bool { @@ -730,20 +823,32 @@ pub const SplitIterator = struct { buffer: []const u8, split_bytes: []const u8, index: usize, + glob: bool, + spun: bool, + /// Iterates and returns null or optionally a slice the next split segment pub fn next(self: *SplitIterator) ?[]const u8 { - // move to beginning of token - while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} - const start = self.index; - if (start == self.buffer.len) { - return null; + if (self.spun) { + if (self.index + 1 > self.buffer.len) return null; + self.index += 1; } - // move to end of token - while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} - const end = self.index; + self.spun = true; - return self.buffer[start..end]; + if (self.glob) { + while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {} + } + + var cursor = self.index; + while (cursor < self.buffer.len and !self.isSplitByte(self.buffer[cursor])) : (cursor += 1) {} + + defer self.index = cursor; + + if (cursor == self.buffer.len) { + return if (self.glob and self.index == cursor and self.index > 0) null else self.buffer[self.index..]; + } + + return self.buffer[self.index..cursor]; } /// Returns a slice of the remaining bytes. Does not affect iterator state. diff --git a/std/os/path.zig b/std/os/path.zig index 31f949f0d..4d3d3d6a8 100644 --- a/std/os/path.zig +++ b/std/os/path.zig @@ -967,12 +967,14 @@ pub fn relativeWindows(allocator: *Allocator, from: []const u8, to: []const u8) // shave off the trailing slash result_index -= 1; - var rest_it = mem.split(to_rest, "/\\"); - while (rest_it.next()) |to_component| { - result[result_index] = '\\'; - result_index += 1; - mem.copy(u8, result[result_index..], to_component); - result_index += to_component.len; + if (to_rest.len > 0) { + var rest_it = mem.split(to_rest, "/\\"); + while (rest_it.next()) |to_component| { + result[result_index] = '\\'; + result_index += 1; + mem.copy(u8, result[result_index..], to_component); + result_index += to_component.len; + } } return result[0..result_index];