std: Introduce std.unicode.utf8ValidCodepoint

master
LemonBoy 2020-09-21 15:50:43 +02:00
parent 6c4efab106
commit 44533f10fe
1 changed files with 25 additions and 0 deletions

View File

@ -153,6 +153,15 @@ pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u21 {
return value;
}
/// Returns true if the given unicode codepoint can be encoded in UTF-8.
pub fn utf8ValidCodepoint(value: u21) bool {
return switch (value) {
0xD800...0xDFFF => false, // Surrogates range
0x110000...0x1FFFFF => false, // Above the maximum codepoint value
else => true,
};
}
/// Returns the length of a supplied UTF-8 string literal in terms of unicode
/// codepoints.
/// Asserts that the data is valid UTF-8.
@ -785,3 +794,19 @@ test "utf8 count codepoints" {
try testUtf8CountCodepoints();
comptime testUtf8CountCodepoints() catch unreachable;
}
fn testUtf8ValidCodepoint() !void {
testing.expect(utf8ValidCodepoint('e'));
testing.expect(utf8ValidCodepoint('ë'));
testing.expect(utf8ValidCodepoint('は'));
testing.expect(utf8ValidCodepoint(0xe000));
testing.expect(utf8ValidCodepoint(0x10ffff));
testing.expect(!utf8ValidCodepoint(0xd800));
testing.expect(!utf8ValidCodepoint(0xdfff));
testing.expect(!utf8ValidCodepoint(0x110000));
}
test "utf8 valid codepoint" {
try testUtf8ValidCodepoint();
comptime testUtf8ValidCodepoint() catch unreachable;
}