Merge branch 'master' into llvm6
commit
1d77f8db28
|
@ -605,6 +605,7 @@ install(FILES "${CMAKE_SOURCE_DIR}/std/os/windows/index.zig" DESTINATION "${ZIG_
|
|||
install(FILES "${CMAKE_SOURCE_DIR}/std/os/windows/util.zig" DESTINATION "${ZIG_STD_DEST}/os/windows")
|
||||
install(FILES "${CMAKE_SOURCE_DIR}/std/rand.zig" DESTINATION "${ZIG_STD_DEST}")
|
||||
install(FILES "${CMAKE_SOURCE_DIR}/std/sort.zig" DESTINATION "${ZIG_STD_DEST}")
|
||||
install(FILES "${CMAKE_SOURCE_DIR}/std/unicode.zig" DESTINATION "${ZIG_STD_DEST}")
|
||||
install(FILES "${CMAKE_SOURCE_DIR}/std/special/bootstrap.zig" DESTINATION "${ZIG_STD_DEST}/special")
|
||||
install(FILES "${CMAKE_SOURCE_DIR}/std/special/bootstrap_lib.zig" DESTINATION "${ZIG_STD_DEST}/special")
|
||||
install(FILES "${CMAKE_SOURCE_DIR}/std/special/build_file_template.zig" DESTINATION "${ZIG_STD_DEST}/special")
|
||||
|
|
|
@ -276,6 +276,7 @@ pub fn installStdLib(b: &Builder) {
|
|||
"os/windows/util.zig",
|
||||
"rand.zig",
|
||||
"sort.zig",
|
||||
"unicode.zig",
|
||||
"special/bootstrap.zig",
|
||||
"special/bootstrap_lib.zig",
|
||||
"special/build_file_template.zig",
|
||||
|
|
|
@ -298,7 +298,7 @@ pub fn main() -> %void {
|
|||
<li>Ascii control characters, except for U+000a (LF): U+0000 - U+0009, U+000b - U+0001f, U+007f. (Note that Windows line endings (CRLF) are not allowed, and hard tabs are not allowed.)</li>
|
||||
<li>Non-Ascii Unicode line endings: U+0085 (NEL), U+2028 (LS), U+2029 (PS).</li>
|
||||
</ul>
|
||||
<p>The codepoint U+000a (LF) (which is encoded as the single-byte value 0x0a) is the line terminator character. This character always terminates a line of zig source code. A non-empty zig source must end with the line terminator character.</p>
|
||||
<p>The codepoint U+000a (LF) (which is encoded as the single-byte value 0x0a) is the line terminator character. This character always terminates a line of zig source code (except possbly the last line of the file).</p>
|
||||
<p>For some discussion on the rationale behind these design decisions, see <a href="https://github.com/zig-lang/zig/issues/663">issue #663</a></p>
|
||||
<h2 id="values">Values</h2>
|
||||
<pre><code class="zig">const warn = @import("std").debug.warn;
|
||||
|
|
|
@ -213,11 +213,14 @@ pub const Module = struct {
|
|||
};
|
||||
%defer self.allocator.free(root_src_real_path);
|
||||
|
||||
const source_code = io.readFileAlloc(root_src_real_path, self.allocator) %% |err| {
|
||||
const source_code = io.readFileAllocExtra(root_src_real_path, self.allocator, 3) %% |err| {
|
||||
%return printError("unable to open '{}': {}", root_src_real_path, err);
|
||||
return err;
|
||||
};
|
||||
%defer self.allocator.free(source_code);
|
||||
source_code[source_code.len - 3] = '\n';
|
||||
source_code[source_code.len - 2] = '\n';
|
||||
source_code[source_code.len - 1] = '\n';
|
||||
|
||||
warn("====input:====\n");
|
||||
|
||||
|
|
|
@ -1086,7 +1086,13 @@ pub const Parser = struct {
|
|||
var fixed_buffer_mem: [100 * 1024]u8 = undefined;
|
||||
|
||||
fn testParse(source: []const u8, allocator: &mem.Allocator) -> %[]u8 {
|
||||
var tokenizer = Tokenizer.init(source);
|
||||
var padded_source: [0x100]u8 = undefined;
|
||||
std.mem.copy(u8, padded_source[0..source.len], source);
|
||||
padded_source[source.len + 0] = '\n';
|
||||
padded_source[source.len + 1] = '\n';
|
||||
padded_source[source.len + 2] = '\n';
|
||||
|
||||
var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
|
||||
var parser = Parser.init(&tokenizer, allocator, "(memory buffer)");
|
||||
defer parser.deinit();
|
||||
|
||||
|
|
|
@ -70,7 +70,6 @@ pub const Token = struct {
|
|||
Identifier,
|
||||
StringLiteral: StrLitKind,
|
||||
Eof,
|
||||
NoEolAtEof,
|
||||
Builtin,
|
||||
Bang,
|
||||
Equal,
|
||||
|
@ -140,7 +139,6 @@ pub const Token = struct {
|
|||
pub const Tokenizer = struct {
|
||||
buffer: []const u8,
|
||||
index: usize,
|
||||
actual_file_end: usize,
|
||||
pending_invalid_token: ?Token,
|
||||
|
||||
pub const Location = struct {
|
||||
|
@ -179,17 +177,15 @@ pub const Tokenizer = struct {
|
|||
std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
|
||||
}
|
||||
|
||||
/// buffer must end with "\n\n\n". This is so that attempting to decode
|
||||
/// a the 3 trailing bytes of a 4-byte utf8 sequence is never a buffer overflow.
|
||||
pub fn init(buffer: []const u8) -> Tokenizer {
|
||||
var source_len = buffer.len;
|
||||
while (source_len > 0) : (source_len -= 1) {
|
||||
if (buffer[source_len - 1] == '\n') break;
|
||||
// last line is incomplete, so skip it, and give an error when we get there.
|
||||
}
|
||||
|
||||
std.debug.assert(buffer[buffer.len - 1] == '\n');
|
||||
std.debug.assert(buffer[buffer.len - 2] == '\n');
|
||||
std.debug.assert(buffer[buffer.len - 3] == '\n');
|
||||
return Tokenizer {
|
||||
.buffer = buffer[0..source_len],
|
||||
.buffer = buffer,
|
||||
.index = 0,
|
||||
.actual_file_end = buffer.len,
|
||||
.pending_invalid_token = null,
|
||||
};
|
||||
}
|
||||
|
@ -512,17 +508,14 @@ pub const Tokenizer = struct {
|
|||
}
|
||||
}
|
||||
result.end = self.index;
|
||||
|
||||
if (result.id == Token.Id.Eof) {
|
||||
if (self.pending_invalid_token) |token| {
|
||||
self.pending_invalid_token = null;
|
||||
return token;
|
||||
}
|
||||
if (self.actual_file_end != self.buffer.len) {
|
||||
// instead of an Eof, give an error token
|
||||
result.id = Token.Id.NoEolAtEof;
|
||||
result.end = self.actual_file_end;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -553,161 +546,96 @@ pub const Tokenizer = struct {
|
|||
return 0;
|
||||
} else {
|
||||
// check utf8-encoded character.
|
||||
// remember that the last byte in the buffer is guaranteed to be '\n',
|
||||
// which means we really don't need to do bounds checks here,
|
||||
// as long as we check one byte at a time for being a continuation byte.
|
||||
var value: u32 = undefined;
|
||||
var length: u3 = undefined;
|
||||
if (c0 & 0b11100000 == 0b11000000) {value = c0 & 0b00011111; length = 2;}
|
||||
else if (c0 & 0b11110000 == 0b11100000) {value = c0 & 0b00001111; length = 3;}
|
||||
else if (c0 & 0b11111000 == 0b11110000) {value = c0 & 0b00000111; length = 4;}
|
||||
else return 1; // unexpected continuation or too many leading 1's
|
||||
|
||||
const c1 = self.buffer[self.index + 1];
|
||||
if (c1 & 0b11000000 != 0b10000000) return 1; // expected continuation
|
||||
value <<= 6;
|
||||
value |= c1 & 0b00111111;
|
||||
if (length == 2) {
|
||||
if (value < 0x80) return length; // overlong
|
||||
if (value == 0x85) return length; // U+0085 (NEL)
|
||||
self.index += length - 1;
|
||||
return 0;
|
||||
const length = std.unicode.utf8ByteSequenceLength(c0) %% return 1;
|
||||
// the last 3 bytes in the buffer are guaranteed to be '\n',
|
||||
// which means we don't need to do any bounds checking here.
|
||||
const bytes = self.buffer[self.index..self.index + length];
|
||||
switch (length) {
|
||||
2 => {
|
||||
const value = std.unicode.utf8Decode2(bytes) %% return length;
|
||||
if (value == 0x85) return length; // U+0085 (NEL)
|
||||
},
|
||||
3 => {
|
||||
const value = std.unicode.utf8Decode3(bytes) %% return length;
|
||||
if (value == 0x2028) return length; // U+2028 (LS)
|
||||
if (value == 0x2029) return length; // U+2029 (PS)
|
||||
},
|
||||
4 => {
|
||||
_ = std.unicode.utf8Decode4(bytes) %% return length;
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
const c2 = self.buffer[self.index + 2];
|
||||
if (c2 & 0b11000000 != 0b10000000) return 2; // expected continuation
|
||||
value <<= 6;
|
||||
value |= c2 & 0b00111111;
|
||||
if (length == 3) {
|
||||
if (value < 0x800) return length; // overlong
|
||||
if (value == 0x2028) return length; // U+2028 (LS)
|
||||
if (value == 0x2029) return length; // U+2029 (PS)
|
||||
if (0xd800 <= value and value <= 0xdfff) return length; // surrogate halves not allowed in utf8
|
||||
self.index += length - 1;
|
||||
return 0;
|
||||
}
|
||||
const c3 = self.buffer[self.index + 3];
|
||||
if (c3 & 0b11000000 != 0b10000000) return 3; // expected continuation
|
||||
value <<= 6;
|
||||
value |= c3 & 0b00111111;
|
||||
if (length == 4) {
|
||||
if (value < 0x10000) return length; // overlong
|
||||
if (value > 0x10FFFF) return length; // out of bounds
|
||||
self.index += length - 1;
|
||||
return 0;
|
||||
}
|
||||
unreachable;
|
||||
self.index += length - 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
test "tokenizer - source must end with eol" {
|
||||
testTokenizeWithEol("", []Token.Id {
|
||||
}, true);
|
||||
testTokenizeWithEol("no newline", []Token.Id {
|
||||
}, false);
|
||||
testTokenizeWithEol("test\n", []Token.Id {
|
||||
test "tokenizer" {
|
||||
testTokenize("test", []Token.Id {
|
||||
Token.Id.Keyword_test,
|
||||
}, true);
|
||||
testTokenizeWithEol("test\nno newline", []Token.Id {
|
||||
Token.Id.Keyword_test,
|
||||
}, false);
|
||||
});
|
||||
}
|
||||
|
||||
test "tokenizer - invalid token characters" {
|
||||
testTokenize("#\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("`\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("#", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("`", []Token.Id{Token.Id.Invalid});
|
||||
}
|
||||
|
||||
test "tokenizer - invalid literal/comment characters" {
|
||||
testTokenize("\"\x00\"\n", []Token.Id {
|
||||
testTokenize("\"\x00\"", []Token.Id {
|
||||
Token.Id { .StringLiteral = Token.StrLitKind.Normal },
|
||||
Token.Id.Invalid,
|
||||
});
|
||||
testTokenize("//\x00\n", []Token.Id {
|
||||
testTokenize("//\x00", []Token.Id {
|
||||
Token.Id.Invalid,
|
||||
});
|
||||
testTokenize("//\x1f\n", []Token.Id {
|
||||
testTokenize("//\x1f", []Token.Id {
|
||||
Token.Id.Invalid,
|
||||
});
|
||||
testTokenize("//\x7f\n", []Token.Id {
|
||||
testTokenize("//\x7f", []Token.Id {
|
||||
Token.Id.Invalid,
|
||||
});
|
||||
}
|
||||
|
||||
test "tokenizer - valid unicode" {
|
||||
testTokenize("//\xc2\x80\n", []Token.Id{});
|
||||
testTokenize("//\xdf\xbf\n", []Token.Id{});
|
||||
testTokenize("//\xe0\xa0\x80\n", []Token.Id{});
|
||||
testTokenize("//\xe1\x80\x80\n", []Token.Id{});
|
||||
testTokenize("//\xef\xbf\xbf\n", []Token.Id{});
|
||||
testTokenize("//\xf0\x90\x80\x80\n", []Token.Id{});
|
||||
testTokenize("//\xf1\x80\x80\x80\n", []Token.Id{});
|
||||
testTokenize("//\xf3\xbf\xbf\xbf\n", []Token.Id{});
|
||||
testTokenize("//\xf4\x8f\xbf\xbf\n", []Token.Id{});
|
||||
test "tokenizer - utf8" {
|
||||
testTokenize("//\xc2\x80", []Token.Id{});
|
||||
testTokenize("//\xf4\x8f\xbf\xbf", []Token.Id{});
|
||||
}
|
||||
|
||||
test "tokenizer - invalid unicode continuation bytes" {
|
||||
// unexpected continuation
|
||||
testTokenize("//\x80\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xbf\n", []Token.Id{Token.Id.Invalid});
|
||||
// too many leading 1's
|
||||
testTokenize("//\xf8\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xff\n", []Token.Id{Token.Id.Invalid});
|
||||
// expected continuation for 2 byte sequences
|
||||
testTokenize("//\xc2\x00\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xc2\xc0\n", []Token.Id{Token.Id.Invalid});
|
||||
// expected continuation for 3 byte sequences
|
||||
testTokenize("//\xe0\x00\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0\xc0\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0\xa0\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0\xa0\x00\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0\xa0\xc0\n", []Token.Id{Token.Id.Invalid});
|
||||
// expected continuation for 4 byte sequences
|
||||
testTokenize("//\xf0\x00\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\xc0\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x90\x00\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x90\xc0\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x90\x80\x00\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x90\x80\xc0\n", []Token.Id{Token.Id.Invalid});
|
||||
test "tokenizer - invalid utf8" {
|
||||
testTokenize("//\x80", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xbf", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf8", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xff", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xc2\xc0", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x90\x80\xc0", []Token.Id{Token.Id.Invalid});
|
||||
}
|
||||
|
||||
test "tokenizer - overlong utf8 codepoint" {
|
||||
testTokenize("//\xc0\x80\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xc1\xbf\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0\x80\x80\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe0\x9f\xbf\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x80\x80\x80\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf0\x8f\xbf\xbf\n", []Token.Id{Token.Id.Invalid});
|
||||
}
|
||||
|
||||
test "tokenizer - misc invalid utf8" {
|
||||
// codepoint out of bounds
|
||||
testTokenize("//\xf4\x90\x80\x80\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xf7\xbf\xbf\xbf\n", []Token.Id{Token.Id.Invalid});
|
||||
test "tokenizer - illegal unicode codepoints" {
|
||||
// unicode newline characters.U+0085, U+2028, U+2029
|
||||
testTokenize("//\xc2\x84\n", []Token.Id{});
|
||||
testTokenize("//\xc2\x85\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xc2\x86\n", []Token.Id{});
|
||||
testTokenize("//\xe2\x80\xa7\n", []Token.Id{});
|
||||
testTokenize("//\xe2\x80\xa8\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe2\x80\xa9\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe2\x80\xaa\n", []Token.Id{});
|
||||
// surrogate halves
|
||||
testTokenize("//\xed\x9f\x80\n", []Token.Id{});
|
||||
testTokenize("//\xed\xa0\x80\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xed\xbf\xbf\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xee\x80\x80\n", []Token.Id{});
|
||||
// surrogate halves are invalid, even in surrogate pairs
|
||||
testTokenize("//\xed\xa0\xad\xed\xb2\xa9\n", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xc2\x84", []Token.Id{});
|
||||
testTokenize("//\xc2\x85", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xc2\x86", []Token.Id{});
|
||||
testTokenize("//\xe2\x80\xa7", []Token.Id{});
|
||||
testTokenize("//\xe2\x80\xa8", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe2\x80\xa9", []Token.Id{Token.Id.Invalid});
|
||||
testTokenize("//\xe2\x80\xaa", []Token.Id{});
|
||||
}
|
||||
|
||||
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) {
|
||||
testTokenizeWithEol(source, expected_tokens, true);
|
||||
}
|
||||
fn testTokenizeWithEol(source: []const u8, expected_tokens: []const Token.Id, expected_eol_at_eof: bool) {
|
||||
var tokenizer = Tokenizer.init(source);
|
||||
// (test authors, just make this bigger if you need it)
|
||||
var padded_source: [0x100]u8 = undefined;
|
||||
std.mem.copy(u8, padded_source[0..source.len], source);
|
||||
padded_source[source.len + 0] = '\n';
|
||||
padded_source[source.len + 1] = '\n';
|
||||
padded_source[source.len + 2] = '\n';
|
||||
|
||||
var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
|
||||
for (expected_tokens) |expected_token_id| {
|
||||
const token = tokenizer.next();
|
||||
std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id));
|
||||
|
@ -718,5 +646,5 @@ fn testTokenizeWithEol(source: []const u8, expected_tokens: []const Token.Id, ex
|
|||
else => {},
|
||||
}
|
||||
}
|
||||
std.debug.assert(tokenizer.next().id == if (expected_eol_at_eof) Token.Id.Eof else Token.Id.NoEolAtEof);
|
||||
std.debug.assert(tokenizer.next().id == Token.Id.Eof);
|
||||
}
|
||||
|
|
1428
src/ir.cpp
1428
src/ir.cpp
File diff suppressed because it is too large
Load Diff
|
@ -14,6 +14,8 @@ const State = enum { // TODO put inside format function and make sure the name a
|
|||
CloseBrace,
|
||||
Integer,
|
||||
IntegerWidth,
|
||||
Float,
|
||||
FloatWidth,
|
||||
Character,
|
||||
Buf,
|
||||
BufWidth,
|
||||
|
@ -37,7 +39,6 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
|
|||
switch (state) {
|
||||
State.Start => switch (c) {
|
||||
'{' => {
|
||||
// TODO if you make this an if statement with `and` then it breaks
|
||||
if (start_index < i) {
|
||||
%return output(context, fmt[start_index..i]);
|
||||
}
|
||||
|
@ -85,6 +86,8 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
|
|||
},
|
||||
's' => {
|
||||
state = State.Buf;
|
||||
},'.' => {
|
||||
state = State.Float;
|
||||
},
|
||||
else => @compileError("Unknown format character: " ++ []u8{c}),
|
||||
},
|
||||
|
@ -129,6 +132,30 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
|
|||
'0' ... '9' => {},
|
||||
else => @compileError("Unexpected character in format string: " ++ []u8{c}),
|
||||
},
|
||||
State.Float => switch (c) {
|
||||
'}' => {
|
||||
%return formatFloatDecimal(args[next_arg], 0, context, output);
|
||||
next_arg += 1;
|
||||
state = State.Start;
|
||||
start_index = i + 1;
|
||||
},
|
||||
'0' ... '9' => {
|
||||
width_start = i;
|
||||
state = State.FloatWidth;
|
||||
},
|
||||
else => @compileError("Unexpected character in format string: " ++ []u8{c}),
|
||||
},
|
||||
State.FloatWidth => switch (c) {
|
||||
'}' => {
|
||||
width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10);
|
||||
%return formatFloatDecimal(args[next_arg], width, context, output);
|
||||
next_arg += 1;
|
||||
state = State.Start;
|
||||
start_index = i + 1;
|
||||
},
|
||||
'0' ... '9' => {},
|
||||
else => @compileError("Unexpected character in format string: " ++ []u8{c}),
|
||||
},
|
||||
State.BufWidth => switch (c) {
|
||||
'}' => {
|
||||
width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10);
|
||||
|
@ -267,6 +294,47 @@ pub fn formatFloat(value: var, context: var, output: fn(@typeOf(context), []cons
|
|||
}
|
||||
}
|
||||
|
||||
pub fn formatFloatDecimal(value: var, precision: usize, context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void {
|
||||
var x = f64(value);
|
||||
|
||||
// Errol doesn't handle these special cases.
|
||||
if (math.isNan(x)) {
|
||||
return output(context, "NaN");
|
||||
}
|
||||
if (math.signbit(x)) {
|
||||
%return output(context, "-");
|
||||
x = -x;
|
||||
}
|
||||
if (math.isPositiveInf(x)) {
|
||||
return output(context, "Infinity");
|
||||
}
|
||||
if (x == 0.0) {
|
||||
return output(context, "0.0");
|
||||
}
|
||||
|
||||
var buffer: [32]u8 = undefined;
|
||||
const float_decimal = errol3(x, buffer[0..]);
|
||||
|
||||
const num_left_digits = if (float_decimal.exp > 0) usize(float_decimal.exp) else 1;
|
||||
|
||||
%return output(context, float_decimal.digits[0 .. num_left_digits]);
|
||||
%return output(context, ".");
|
||||
if (float_decimal.digits.len > 1) {
|
||||
const num_valid_digtis = if (@typeOf(value) == f32) math.min(usize(7), float_decimal.digits.len)
|
||||
else
|
||||
float_decimal.digits.len;
|
||||
|
||||
const num_right_digits = if (precision != 0)
|
||||
math.min(precision, (num_valid_digtis-num_left_digits))
|
||||
else
|
||||
num_valid_digtis - num_left_digits;
|
||||
%return output(context, float_decimal.digits[num_left_digits .. (num_left_digits + num_right_digits)]);
|
||||
} else {
|
||||
%return output(context, "0");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn formatInt(value: var, base: u8, uppercase: bool, width: usize,
|
||||
context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void
|
||||
{
|
||||
|
@ -540,6 +608,39 @@ test "fmt.format" {
|
|||
const result = %%bufPrint(buf1[0..], "f64: {}\n", -math.inf_f64);
|
||||
assert(mem.eql(u8, result, "f64: -Infinity\n"));
|
||||
}
|
||||
{
|
||||
var buf1: [32]u8 = undefined;
|
||||
const value: f32 = 1.1234;
|
||||
const result = %%bufPrint(buf1[0..], "f32: {.1}\n", value);
|
||||
assert(mem.eql(u8, result, "f32: 1.1\n"));
|
||||
}
|
||||
{
|
||||
var buf1: [32]u8 = undefined;
|
||||
const value: f32 = 1234.567;
|
||||
const result = %%bufPrint(buf1[0..], "f32: {.2}\n", value);
|
||||
assert(mem.eql(u8, result, "f32: 1234.56\n"));
|
||||
}
|
||||
{
|
||||
var buf1: [32]u8 = undefined;
|
||||
const value: f32 = -11.1234;
|
||||
const result = %%bufPrint(buf1[0..], "f32: {.4}\n", value);
|
||||
// -11.1234 is converted to f64 -11.12339... internally (errol3() function takes f64).
|
||||
// -11.12339... is truncated to -11.1233
|
||||
assert(mem.eql(u8, result, "f32: -11.1233\n"));
|
||||
}
|
||||
{
|
||||
var buf1: [32]u8 = undefined;
|
||||
const value: f32 = 91.12345;
|
||||
const result = %%bufPrint(buf1[0..], "f32: {.}\n", value);
|
||||
assert(mem.eql(u8, result, "f32: 91.12345\n"));
|
||||
}
|
||||
{
|
||||
var buf1: [32]u8 = undefined;
|
||||
const value: f64 = 91.12345678901235;
|
||||
const result = %%bufPrint(buf1[0..], "f64: {.10}\n", value);
|
||||
assert(mem.eql(u8, result, "f64: 91.1234567890\n"));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ pub const net = @import("net.zig");
|
|||
pub const os = @import("os/index.zig");
|
||||
pub const rand = @import("rand.zig");
|
||||
pub const sort = @import("sort.zig");
|
||||
pub const unicode = @import("unicode.zig");
|
||||
|
||||
test "std" {
|
||||
// run tests from these
|
||||
|
@ -53,4 +54,5 @@ test "std" {
|
|||
_ = @import("os/index.zig");
|
||||
_ = @import("rand.zig");
|
||||
_ = @import("sort.zig");
|
||||
_ = @import("unicode.zig");
|
||||
}
|
||||
|
|
|
@ -500,11 +500,16 @@ pub fn writeFile(path: []const u8, data: []const u8, allocator: ?&mem.Allocator)
|
|||
|
||||
/// On success, caller owns returned buffer.
|
||||
pub fn readFileAlloc(path: []const u8, allocator: &mem.Allocator) -> %[]u8 {
|
||||
return readFileAllocExtra(path, allocator, 0);
|
||||
}
|
||||
/// On success, caller owns returned buffer.
|
||||
/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized.
|
||||
pub fn readFileAllocExtra(path: []const u8, allocator: &mem.Allocator, extra_len: usize) -> %[]u8 {
|
||||
var file = %return File.openRead(path, allocator);
|
||||
defer file.close();
|
||||
|
||||
const size = %return file.getEndPos();
|
||||
const buf = %return allocator.alloc(u8, size);
|
||||
const buf = %return allocator.alloc(u8, size + extra_len);
|
||||
%defer allocator.free(buf);
|
||||
|
||||
var adapter = FileInStream.init(&file);
|
||||
|
|
|
@ -39,7 +39,7 @@ fn acos32(x: f32) -> f32 {
|
|||
if (hx >> 31 != 0) {
|
||||
return 2.0 * pio2_hi + 0x1.0p-120;
|
||||
} else {
|
||||
return 0;
|
||||
return 0.0;
|
||||
}
|
||||
} else {
|
||||
return math.nan(f32);
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
const std = @import("./index.zig");
|
||||
|
||||
error Utf8InvalidStartByte;
|
||||
|
||||
/// Given the first byte of a UTF-8 codepoint,
|
||||
/// returns a number 1-4 indicating the total length of the codepoint in bytes.
|
||||
/// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte.
|
||||
pub fn utf8ByteSequenceLength(first_byte: u8) -> %u3 {
|
||||
if (first_byte < 0b10000000) return u3(1);
|
||||
if (first_byte & 0b11100000 == 0b11000000) return u3(2);
|
||||
if (first_byte & 0b11110000 == 0b11100000) return u3(3);
|
||||
if (first_byte & 0b11111000 == 0b11110000) return u3(4);
|
||||
return error.Utf8InvalidStartByte;
|
||||
}
|
||||
|
||||
error Utf8OverlongEncoding;
|
||||
error Utf8ExpectedContinuation;
|
||||
error Utf8EncodesSurrogateHalf;
|
||||
error Utf8CodepointTooLarge;
|
||||
|
||||
/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
|
||||
/// bytes.len must be equal to %%utf8ByteSequenceLength(bytes[0]).
|
||||
/// If you already know the length at comptime, you can call one of
|
||||
/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
|
||||
pub fn utf8Decode(bytes: []const u8) -> %u32 {
|
||||
return switch (bytes.len) {
|
||||
1 => u32(bytes[0]),
|
||||
2 => utf8Decode2(bytes),
|
||||
3 => utf8Decode3(bytes),
|
||||
4 => utf8Decode4(bytes),
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
pub fn utf8Decode2(bytes: []const u8) -> %u32 {
|
||||
std.debug.assert(bytes.len == 2);
|
||||
std.debug.assert(bytes[0] & 0b11100000 == 0b11000000);
|
||||
var value: u32 = bytes[0] & 0b00011111;
|
||||
|
||||
if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
|
||||
value <<= 6;
|
||||
value |= bytes[1] & 0b00111111;
|
||||
|
||||
if (value < 0x80) return error.Utf8OverlongEncoding;
|
||||
|
||||
return value;
|
||||
}
|
||||
pub fn utf8Decode3(bytes: []const u8) -> %u32 {
|
||||
std.debug.assert(bytes.len == 3);
|
||||
std.debug.assert(bytes[0] & 0b11110000 == 0b11100000);
|
||||
var value: u32 = bytes[0] & 0b00001111;
|
||||
|
||||
if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
|
||||
value <<= 6;
|
||||
value |= bytes[1] & 0b00111111;
|
||||
|
||||
if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
|
||||
value <<= 6;
|
||||
value |= bytes[2] & 0b00111111;
|
||||
|
||||
if (value < 0x800) return error.Utf8OverlongEncoding;
|
||||
if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
|
||||
|
||||
return value;
|
||||
}
|
||||
pub fn utf8Decode4(bytes: []const u8) -> %u32 {
|
||||
std.debug.assert(bytes.len == 4);
|
||||
std.debug.assert(bytes[0] & 0b11111000 == 0b11110000);
|
||||
var value: u32 = bytes[0] & 0b00000111;
|
||||
|
||||
if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
|
||||
value <<= 6;
|
||||
value |= bytes[1] & 0b00111111;
|
||||
|
||||
if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
|
||||
value <<= 6;
|
||||
value |= bytes[2] & 0b00111111;
|
||||
|
||||
if (bytes[3] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
|
||||
value <<= 6;
|
||||
value |= bytes[3] & 0b00111111;
|
||||
|
||||
if (value < 0x10000) return error.Utf8OverlongEncoding;
|
||||
if (value > 0x10FFFF) return error.Utf8CodepointTooLarge;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
error UnexpectedEof;
|
||||
test "valid utf8" {
|
||||
testValid("\x00", 0x0);
|
||||
testValid("\x20", 0x20);
|
||||
testValid("\x7f", 0x7f);
|
||||
testValid("\xc2\x80", 0x80);
|
||||
testValid("\xdf\xbf", 0x7ff);
|
||||
testValid("\xe0\xa0\x80", 0x800);
|
||||
testValid("\xe1\x80\x80", 0x1000);
|
||||
testValid("\xef\xbf\xbf", 0xffff);
|
||||
testValid("\xf0\x90\x80\x80", 0x10000);
|
||||
testValid("\xf1\x80\x80\x80", 0x40000);
|
||||
testValid("\xf3\xbf\xbf\xbf", 0xfffff);
|
||||
testValid("\xf4\x8f\xbf\xbf", 0x10ffff);
|
||||
}
|
||||
|
||||
test "invalid utf8 continuation bytes" {
|
||||
// unexpected continuation
|
||||
testError("\x80", error.Utf8InvalidStartByte);
|
||||
testError("\xbf", error.Utf8InvalidStartByte);
|
||||
// too many leading 1's
|
||||
testError("\xf8", error.Utf8InvalidStartByte);
|
||||
testError("\xff", error.Utf8InvalidStartByte);
|
||||
// expected continuation for 2 byte sequences
|
||||
testError("\xc2", error.UnexpectedEof);
|
||||
testError("\xc2\x00", error.Utf8ExpectedContinuation);
|
||||
testError("\xc2\xc0", error.Utf8ExpectedContinuation);
|
||||
// expected continuation for 3 byte sequences
|
||||
testError("\xe0", error.UnexpectedEof);
|
||||
testError("\xe0\x00", error.UnexpectedEof);
|
||||
testError("\xe0\xc0", error.UnexpectedEof);
|
||||
testError("\xe0\xa0", error.UnexpectedEof);
|
||||
testError("\xe0\xa0\x00", error.Utf8ExpectedContinuation);
|
||||
testError("\xe0\xa0\xc0", error.Utf8ExpectedContinuation);
|
||||
// expected continuation for 4 byte sequences
|
||||
testError("\xf0", error.UnexpectedEof);
|
||||
testError("\xf0\x00", error.UnexpectedEof);
|
||||
testError("\xf0\xc0", error.UnexpectedEof);
|
||||
testError("\xf0\x90\x00", error.UnexpectedEof);
|
||||
testError("\xf0\x90\xc0", error.UnexpectedEof);
|
||||
testError("\xf0\x90\x80\x00", error.Utf8ExpectedContinuation);
|
||||
testError("\xf0\x90\x80\xc0", error.Utf8ExpectedContinuation);
|
||||
}
|
||||
|
||||
test "overlong utf8 codepoint" {
|
||||
testError("\xc0\x80", error.Utf8OverlongEncoding);
|
||||
testError("\xc1\xbf", error.Utf8OverlongEncoding);
|
||||
testError("\xe0\x80\x80", error.Utf8OverlongEncoding);
|
||||
testError("\xe0\x9f\xbf", error.Utf8OverlongEncoding);
|
||||
testError("\xf0\x80\x80\x80", error.Utf8OverlongEncoding);
|
||||
testError("\xf0\x8f\xbf\xbf", error.Utf8OverlongEncoding);
|
||||
}
|
||||
|
||||
test "misc invalid utf8" {
|
||||
// codepoint out of bounds
|
||||
testError("\xf4\x90\x80\x80", error.Utf8CodepointTooLarge);
|
||||
testError("\xf7\xbf\xbf\xbf", error.Utf8CodepointTooLarge);
|
||||
// surrogate halves
|
||||
testValid("\xed\x9f\xbf", 0xd7ff);
|
||||
testError("\xed\xa0\x80", error.Utf8EncodesSurrogateHalf);
|
||||
testError("\xed\xbf\xbf", error.Utf8EncodesSurrogateHalf);
|
||||
testValid("\xee\x80\x80", 0xe000);
|
||||
}
|
||||
|
||||
fn testError(bytes: []const u8, expected_err: error) {
|
||||
if (testDecode(bytes)) |_| {
|
||||
unreachable;
|
||||
} else |err| {
|
||||
std.debug.assert(err == expected_err);
|
||||
}
|
||||
}
|
||||
|
||||
fn testValid(bytes: []const u8, expected_codepoint: u32) {
|
||||
std.debug.assert(%%testDecode(bytes) == expected_codepoint);
|
||||
}
|
||||
|
||||
fn testDecode(bytes: []const u8) -> %u32 {
|
||||
const length = %return utf8ByteSequenceLength(bytes[0]);
|
||||
if (bytes.len < length) return error.UnexpectedEof;
|
||||
std.debug.assert(bytes.len == length);
|
||||
return utf8Decode(bytes);
|
||||
}
|
|
@ -230,20 +230,21 @@ fn foo(args: ...) {
|
|||
|
||||
|
||||
test "peer type resolution: error and [N]T" {
|
||||
assert(mem.eql(u8, %%testPeerErrorAndArray(0), "OK"));
|
||||
comptime assert(mem.eql(u8, %%testPeerErrorAndArray(0), "OK"));
|
||||
// TODO: implicit %T to %U where T can implicitly cast to U
|
||||
//assert(mem.eql(u8, %%testPeerErrorAndArray(0), "OK"));
|
||||
//comptime assert(mem.eql(u8, %%testPeerErrorAndArray(0), "OK"));
|
||||
|
||||
assert(mem.eql(u8, %%testPeerErrorAndArray2(1), "OKK"));
|
||||
comptime assert(mem.eql(u8, %%testPeerErrorAndArray2(1), "OKK"));
|
||||
}
|
||||
|
||||
error BadValue;
|
||||
fn testPeerErrorAndArray(x: u8) -> %[]const u8 {
|
||||
return switch (x) {
|
||||
0x00 => "OK",
|
||||
else => error.BadValue,
|
||||
};
|
||||
}
|
||||
//fn testPeerErrorAndArray(x: u8) -> %[]const u8 {
|
||||
// return switch (x) {
|
||||
// 0x00 => "OK",
|
||||
// else => error.BadValue,
|
||||
// };
|
||||
//}
|
||||
fn testPeerErrorAndArray2(x: u8) -> %[]const u8 {
|
||||
return switch (x) {
|
||||
0x00 => "OK",
|
||||
|
|
|
@ -560,3 +560,14 @@ fn hereIsAnOpaqueType(ptr: &OpaqueA) -> &OpaqueA {
|
|||
var a = ptr;
|
||||
return a;
|
||||
}
|
||||
|
||||
test "comptime if inside runtime while which unconditionally breaks" {
|
||||
testComptimeIfInsideRuntimeWhileWhichUnconditionallyBreaks(true);
|
||||
comptime testComptimeIfInsideRuntimeWhileWhichUnconditionallyBreaks(true);
|
||||
}
|
||||
fn testComptimeIfInsideRuntimeWhileWhichUnconditionallyBreaks(cond: bool) {
|
||||
while (cond) {
|
||||
if (false) { }
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue