Merge pull request #4741 from momumi/master

allow `_` separators in number literals (stage 1)
master
Andrew Kelley 2020-03-23 00:54:54 -04:00 committed by GitHub
commit 13d04f9963
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 903 additions and 160 deletions

View File

@ -885,6 +885,12 @@ const hex_int = 0xff;
const another_hex_int = 0xFF;
const octal_int = 0o755;
const binary_int = 0b11110000;
// underscores may be placed between two digits as a visual separator
const one_billion = 1_000_000_000;
const binary_mask = 0b1_1111_1111;
const permissions = 0o7_5_5;
const big_address = 0xFF80_0000_0000_0000;
{#code_end#}
{#header_close#}
{#header_open|Runtime Integer Values#}
@ -947,6 +953,11 @@ const yet_another = 123.0e+77;
const hex_floating_point = 0x103.70p-5;
const another_hex_float = 0x103.70;
const yet_another_hex_float = 0x103.70P-5;
// underscores may be placed between two digits as a visual separator
const lightspeed = 299_792_458.000_000;
const nanosecond = 0.000_000_001;
const more_hex = 0x1234_5678.9ABC_CDEFp-10;
{#code_end#}
<p>
There is no syntax for NaN, infinity, or negative infinity. For these special values,

View File

@ -373,6 +373,7 @@ pub const Int = struct {
const d = switch (ch) {
'0'...'9' => ch - '0',
'a'...'f' => (ch - 'a') + 0xa,
'A'...'F' => (ch - 'A') + 0xa,
else => return error.InvalidCharForDigit,
};
@ -393,8 +394,9 @@ pub const Int = struct {
/// Set self from the string representation `value`.
///
/// value must contain only digits <= `base`. Base prefixes are not allowed (e.g. 0x43 should
/// simply be 43).
/// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
/// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
/// ignored and can be used as digit separators.
///
/// Returns an error if memory could not be allocated or `value` has invalid digits for the
/// requested base.
@ -415,6 +417,9 @@ pub const Int = struct {
try self.set(0);
for (value[i..]) |ch| {
if (ch == '_') {
continue;
}
const d = try charToDigit(ch, base);
const ap_d = Int.initFixed(([_]Limb{d})[0..]);
@ -1582,6 +1587,22 @@ test "big.int string negative" {
testing.expect((try a.to(i32)) == -1023);
}
test "big.int string set number with underscores" {
var a = try Int.init(testing.allocator);
defer a.deinit();
try a.setString(10, "__1_2_0_3_1_7_2_4_1_2_0_____9_1__2__4_7_8_1_2_4_1_2_9_0_8_4_7_1_2_4___");
testing.expect((try a.to(u128)) == 120317241209124781241290847124);
}
test "big.int string set case insensitive number" {
var a = try Int.init(testing.allocator);
defer a.deinit();
try a.setString(16, "aB_cD_eF");
testing.expect((try a.to(u32)) == 0xabcdef);
}
test "big.int string set bad char error" {
var a = try Int.init(testing.allocator);
defer a.deinit();

View File

@ -69,23 +69,23 @@ test "floatundisf" {
test__floatundisf(0, 0.0);
test__floatundisf(1, 1.0);
test__floatundisf(2, 2.0);
test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F);
test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F);
test__floatundisf(0x8000008000000000, 0x1p+63F);
test__floatundisf(0x8000010000000000, 0x1.000002p+63F);
test__floatundisf(0x8000000000000000, 0x1p+63F);
test__floatundisf(0x8000000000000001, 0x1p+63F);
test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F);
test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F);
test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F);
test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F);
test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F);
test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62);
test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62);
test__floatundisf(0x8000008000000000, 0x1p+63);
test__floatundisf(0x8000010000000000, 0x1.000002p+63);
test__floatundisf(0x8000000000000000, 0x1p+63);
test__floatundisf(0x8000000000000001, 0x1p+63);
test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64);
test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64);
test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50);
test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50);
}

View File

@ -2815,6 +2815,75 @@ test "zig fmt: extern without container keyword returns error" {
);
}
test "zig fmt: integer literals with underscore separators" {
try testTransform(
\\const
\\ x =
\\ 1_234_567
\\ +(0b0_1-0o7_0+0xff_FF ) + 0_0;
,
\\const x = 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0;
\\
);
}
test "zig fmt: hex literals with underscore separators" {
try testTransform(
\\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 {
\\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000;
\\ for (c [ 0_0 .. ]) |_, i| {
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
\\ }
\\ return c;
\\}
\\
\\
,
\\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 {
\\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000;
\\ for (c[0_0..]) |_, i| {
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
\\ }
\\ return c;
\\}
\\
);
}
test "zig fmt: decimal float literals with underscore separators" {
try testTransform(
\\pub fn main() void {
\\ const a:f64=(10.0e-0+(10.e+0))+10_00.00_00e-2+00_00.00_10e+4;
\\ const b:f64=010.0--0_10.+0_1_0.0_0+1e2;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
,
\\pub fn main() void {
\\ const a: f64 = (10.0e-0 + (10.e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4;
\\ const b: f64 = 010.0 - -0_10. + 0_1_0.0_0 + 1e2;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
\\
);
}
test "zig fmt: hexadeciaml float literals with underscore separators" {
try testTransform(
\\pub fn main() void {
\\ const a: f64 = (0x10.0p-0+(0x10.p+0))+0x10_00.00_00p-8+0x00_00.00_10p+16;
\\ const b: f64 = 0x0010.0--0x00_10.+0x10.00+0x1p4;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
,
\\pub fn main() void {
\\ const a: f64 = (0x10.0p-0 + (0x10.p+0)) + 0x10_00.00_00p-8 + 0x00_00.00_10p+16;
\\ const b: f64 = 0x0010.0 - -0x00_10. + 0x10.00 + 0x1p4;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
\\
);
}
const std = @import("std");
const mem = std.mem;
const warn = std.debug.warn;

View File

@ -387,17 +387,23 @@ pub const Tokenizer = struct {
DocComment,
ContainerDocComment,
Zero,
IntegerLiteral,
IntegerLiteralWithRadix,
IntegerLiteralWithRadixHex,
NumberDot,
IntegerLiteralDec,
IntegerLiteralDecNoUnderscore,
IntegerLiteralBin,
IntegerLiteralBinNoUnderscore,
IntegerLiteralOct,
IntegerLiteralOctNoUnderscore,
IntegerLiteralHex,
IntegerLiteralHexNoUnderscore,
NumberDotDec,
NumberDotHex,
FloatFraction,
FloatFractionDec,
FloatFractionDecNoUnderscore,
FloatFractionHex,
FloatFractionHexNoUnderscore,
FloatExponentUnsigned,
FloatExponentUnsignedHex,
FloatExponentNumber,
FloatExponentNumberHex,
FloatExponentNumberNoUnderscore,
Ampersand,
Caret,
Percent,
@ -412,6 +418,10 @@ pub const Tokenizer = struct {
SawAtSign,
};
fn isIdentifierChar(char: u8) bool {
return std.ascii.isAlNum(char) or char == '_';
}
pub fn next(self: *Tokenizer) Token {
if (self.pending_invalid_token) |token| {
self.pending_invalid_token = null;
@ -550,7 +560,7 @@ pub const Tokenizer = struct {
result.id = Token.Id.IntegerLiteral;
},
'1'...'9' => {
state = State.IntegerLiteral;
state = State.IntegerLiteralDec;
result.id = Token.Id.IntegerLiteral;
},
else => {
@ -1048,55 +1058,145 @@ pub const Tokenizer = struct {
else => self.checkLiteralCharacter(),
},
State.Zero => switch (c) {
'b', 'o' => {
state = State.IntegerLiteralWithRadix;
'b' => {
state = State.IntegerLiteralBinNoUnderscore;
},
'o' => {
state = State.IntegerLiteralOctNoUnderscore;
},
'x' => {
state = State.IntegerLiteralWithRadixHex;
state = State.IntegerLiteralHexNoUnderscore;
},
'0'...'9', '_', '.', 'e', 'E' => {
// reinterpret as a decimal number
self.index -= 1;
state = State.IntegerLiteralDec;
},
else => {
// reinterpret as a normal number
self.index -= 1;
state = State.IntegerLiteral;
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.IntegerLiteral => switch (c) {
'.' => {
state = State.NumberDot;
State.IntegerLiteralBinNoUnderscore => switch (c) {
'0'...'1' => {
state = State.IntegerLiteralBin;
},
'p', 'P', 'e', 'E' => {
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralBin => switch (c) {
'_' => {
state = State.IntegerLiteralBinNoUnderscore;
},
'0'...'1' => {},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.IntegerLiteralOctNoUnderscore => switch (c) {
'0'...'7' => {
state = State.IntegerLiteralOct;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralOct => switch (c) {
'_' => {
state = State.IntegerLiteralOctNoUnderscore;
},
'0'...'7' => {},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.IntegerLiteralDecNoUnderscore => switch (c) {
'0'...'9' => {
state = State.IntegerLiteralDec;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralDec => switch (c) {
'_' => {
state = State.IntegerLiteralDecNoUnderscore;
},
'.' => {
state = State.NumberDotDec;
result.id = Token.Id.FloatLiteral;
},
'e', 'E' => {
state = State.FloatExponentUnsigned;
result.id = Token.Id.FloatLiteral;
},
'0'...'9' => {},
else => break,
},
State.IntegerLiteralWithRadix => switch (c) {
'.' => {
state = State.NumberDot;
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
'0'...'9' => {},
else => break,
},
State.IntegerLiteralWithRadixHex => switch (c) {
State.IntegerLiteralHexNoUnderscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
state = State.IntegerLiteralHex;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralHex => switch (c) {
'_' => {
state = State.IntegerLiteralHexNoUnderscore;
},
'.' => {
state = State.NumberDotHex;
result.id = Token.Id.FloatLiteral;
},
'p', 'P' => {
state = State.FloatExponentUnsignedHex;
state = State.FloatExponentUnsigned;
result.id = Token.Id.FloatLiteral;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.NumberDot => switch (c) {
State.NumberDotDec => switch (c) {
'.' => {
self.index -= 1;
state = State.Start;
break;
},
else => {
self.index -= 1;
'e', 'E' => {
state = State.FloatExponentUnsigned;
},
'0'...'9' => {
result.id = Token.Id.FloatLiteral;
state = State.FloatFraction;
state = State.FloatFractionDec;
},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.NumberDotHex => switch (c) {
@ -1105,65 +1205,112 @@ pub const Tokenizer = struct {
state = State.Start;
break;
},
else => {
self.index -= 1;
'p', 'P' => {
state = State.FloatExponentUnsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {
result.id = Token.Id.FloatLiteral;
state = State.FloatFractionHex;
},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.FloatFraction => switch (c) {
State.FloatFractionDecNoUnderscore => switch (c) {
'0'...'9' => {
state = State.FloatFractionDec;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.FloatFractionDec => switch (c) {
'_' => {
state = State.FloatFractionDecNoUnderscore;
},
'e', 'E' => {
state = State.FloatExponentUnsigned;
},
'0'...'9' => {},
else => break,
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.FloatFractionHexNoUnderscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
state = State.FloatFractionHex;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.FloatFractionHex => switch (c) {
'_' => {
state = State.FloatFractionHexNoUnderscore;
},
'p', 'P' => {
state = State.FloatExponentUnsignedHex;
state = State.FloatExponentUnsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.FloatExponentUnsigned => switch (c) {
'+', '-' => {
state = State.FloatExponentNumber;
state = State.FloatExponentNumberNoUnderscore;
},
else => {
// reinterpret as a normal exponent number
self.index -= 1;
state = State.FloatExponentNumber;
state = State.FloatExponentNumberNoUnderscore;
},
},
State.FloatExponentUnsignedHex => switch (c) {
'+', '-' => {
state = State.FloatExponentNumberHex;
State.FloatExponentNumberNoUnderscore => switch (c) {
'0'...'9' => {
state = State.FloatExponentNumber;
},
else => {
// reinterpret as a normal exponent number
self.index -= 1;
state = State.FloatExponentNumberHex;
result.id = Token.Id.Invalid;
break;
},
},
State.FloatExponentNumber => switch (c) {
'_' => {
state = State.FloatExponentNumberNoUnderscore;
},
'0'...'9' => {},
else => break,
},
State.FloatExponentNumberHex => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
}
} else if (self.index == self.buffer.len) {
switch (state) {
State.Start,
State.IntegerLiteral,
State.IntegerLiteralWithRadix,
State.IntegerLiteralWithRadixHex,
State.FloatFraction,
State.IntegerLiteralDec,
State.IntegerLiteralBin,
State.IntegerLiteralOct,
State.IntegerLiteralHex,
State.NumberDotDec,
State.NumberDotHex,
State.FloatFractionDec,
State.FloatFractionHex,
State.FloatExponentNumber,
State.FloatExponentNumberHex,
State.StringLiteral, // find this error later
State.MultilineStringLiteralLine,
State.Builtin,
@ -1184,10 +1331,14 @@ pub const Tokenizer = struct {
result.id = Token.Id.ContainerDocComment;
},
State.NumberDot,
State.NumberDotHex,
State.IntegerLiteralDecNoUnderscore,
State.IntegerLiteralBinNoUnderscore,
State.IntegerLiteralOctNoUnderscore,
State.IntegerLiteralHexNoUnderscore,
State.FloatFractionDecNoUnderscore,
State.FloatFractionHexNoUnderscore,
State.FloatExponentNumberNoUnderscore,
State.FloatExponentUnsigned,
State.FloatExponentUnsignedHex,
State.SawAtSign,
State.Backslash,
State.CharLiteral,
@ -1585,6 +1736,236 @@ test "correctly parse pointer assignment" {
});
}
test "tokenizer - number literals decimal" {
testTokenize("0", &[_]Token.Id{.IntegerLiteral});
testTokenize("1", &[_]Token.Id{.IntegerLiteral});
testTokenize("2", &[_]Token.Id{.IntegerLiteral});
testTokenize("3", &[_]Token.Id{.IntegerLiteral});
testTokenize("4", &[_]Token.Id{.IntegerLiteral});
testTokenize("5", &[_]Token.Id{.IntegerLiteral});
testTokenize("6", &[_]Token.Id{.IntegerLiteral});
testTokenize("7", &[_]Token.Id{.IntegerLiteral});
testTokenize("8", &[_]Token.Id{.IntegerLiteral});
testTokenize("9", &[_]Token.Id{.IntegerLiteral});
testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("00_", &[_]Token.Id{.Invalid});
testTokenize("0_0_", &[_]Token.Id{.Invalid});
testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1.", &[_]Token.Id{.FloatLiteral});
testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
testTokenize("1e", &[_]Token.Id{.Invalid});
testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e", &[_]Token.Id{.Invalid});
testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
}
test "tokenizer - number literals binary" {
testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1_", &[_]Token.Id{.Invalid});
testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
}
test "tokenizer - number literals octal" {
testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1_", &[_]Token.Id{.Invalid});
testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
}
test "tokenizer - number literals hexadeciaml" {
testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x1_", &[_]Token.Id{.Invalid});
testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1p", &[_]Token.Id{.Invalid});
testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.p", &[_]Token.Id{.Invalid});
testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
}
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| {

View File

@ -1311,13 +1311,16 @@ pub const Builder = struct {
var base: u8 = undefined;
var rest: []const u8 = undefined;
if (int_token.len >= 3 and int_token[0] == '0') {
base = switch (int_token[1]) {
'b' => 2,
'o' => 8,
'x' => 16,
else => unreachable,
};
rest = int_token[2..];
switch (int_token[1]) {
'b' => base = 2,
'o' => base = 8,
'x' => base = 16,
else => {
base = 10;
rest = int_token;
},
}
} else {
base = 10;
rest = int_token;

View File

@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok)
int x;
long long y;
int neg = 0;
c = shgetc(f);
if (c=='+' || c=='-') {
neg = (c=='-');
c = shgetc(f);
if (c-'0'>=10U && pok) shunget(f);
}
if (c-'0'>=10U) {
if (c-'0'>=10U && c!='_') {
shunget(f);
return LLONG_MIN;
}
for (x=0; c-'0'<10U && x<INT_MAX/10; c = shgetc(f))
x = 10*x + c-'0';
for (y=x; c-'0'<10U && y<LLONG_MAX/100; c = shgetc(f))
y = 10*y + c-'0';
for (; c-'0'<10U; c = shgetc(f));
for (x=0; ; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c-'0'<10U && x<INT_MAX/10) {
x = 10*x + c-'0';
} else {
break;
}
}
for (y=x; ; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c-'0'<10U && y<LLONG_MAX/100) {
y = 10*y + c-'0';
} else {
break;
}
}
for (; c-'0'<10U || c=='_'; c = shgetc(f));
shunget(f);
return neg ? -y : y;
}
@ -450,16 +464,36 @@ static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int si
j=0;
k=0;
/* Don't let leading zeros consume buffer space */
for (; c=='0'; c = shgetc(f)) gotdig=1;
/* Don't let leading zeros/underscores consume buffer space */
for (; ; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c=='0') {
gotdig=1;
} else {
break;
}
}
if (c=='.') {
gotrad = 1;
for (c = shgetc(f); c=='0'; c = shgetc(f)) gotdig=1, lrp--;
for (c = shgetc(f); ; c = shgetc(f)) {
if (c == '_') {
continue;
} else if (c=='0') {
gotdig=1;
lrp--;
} else {
break;
}
}
}
x[0] = 0;
for (; c-'0'<10U || c=='.'; c = shgetc(f)) {
if (c == '.') {
for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) {
if (c == '_') {
continue;
} else if (c == '.') {
if (gotrad) break;
gotrad = 1;
lrp = dc;
@ -773,18 +807,29 @@ static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int
c = shgetc(f);
/* Skip leading zeros */
for (; c=='0'; c = shgetc(f)) gotdig = 1;
/* Skip leading zeros/underscores */
for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1;
if (c=='.') {
gotrad = 1;
c = shgetc(f);
/* Count zeros after the radix point before significand */
for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1;
for (rp=0; ; c = shgetc(f)) {
if (c == '_') {
continue;
} else if (c == '0') {
gotdig = 1;
rp--;
} else {
break;
}
}
}
for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) {
if (c=='.') {
for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c=='.') {
if (gotrad) break;
rp = dc;
gotrad = 1;

View File

@ -177,10 +177,13 @@ enum TokenizeState {
TokenizeStateSymbol,
TokenizeStateZero, // "0", which might lead to "0x"
TokenizeStateNumber, // "123", "0x123"
TokenizeStateNumberNoUnderscore, // "12_", "0x12_" next char must be digit
TokenizeStateNumberDot,
TokenizeStateFloatFraction, // "123.456", "0x123.456"
TokenizeStateFloatFractionNoUnderscore, // "123.45_", "0x123.45_"
TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5"
TokenizeStateFloatExponentNumber, // "123.456e7", "123.456e+7", "123.456e-7"
TokenizeStateFloatExponentNumberNoUnderscore, // "123.456e7_", "123.456e+7_", "123.456e-7_"
TokenizeStateString,
TokenizeStateStringEscape,
TokenizeStateStringEscapeUnicodeStart,
@ -233,14 +236,10 @@ struct Tokenize {
Token *cur_tok;
Tokenization *out;
uint32_t radix;
int32_t exp_add_amt;
bool is_exp_negative;
bool is_trailing_underscore;
size_t char_code_index;
bool unicode;
uint32_t char_code;
int exponent_in_bin_or_dec;
BigInt specified_exponent;
BigInt significand;
size_t remaining_code_units;
};
@ -426,20 +425,16 @@ void tokenize(Buf *buf, Tokenization *out) {
case '0':
t.state = TokenizeStateZero;
begin_token(&t, TokenIdIntLiteral);
t.is_trailing_underscore = false;
t.radix = 10;
t.exp_add_amt = 1;
t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0);
bigint_init_unsigned(&t.specified_exponent, 0);
break;
case DIGIT_NON_ZERO:
t.state = TokenizeStateNumber;
begin_token(&t, TokenIdIntLiteral);
t.is_trailing_underscore = false;
t.radix = 10;
t.exp_add_amt = 1;
t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c));
bigint_init_unsigned(&t.specified_exponent, 0);
break;
case '"':
begin_token(&t, TokenIdStringLiteral);
@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) {
case 'b':
t.radix = 2;
t.state = TokenizeStateNumber;
t.state = TokenizeStateNumberNoUnderscore;
break;
case 'o':
t.radix = 8;
t.exp_add_amt = 3;
t.state = TokenizeStateNumber;
t.state = TokenizeStateNumberNoUnderscore;
break;
case 'x':
t.radix = 16;
t.exp_add_amt = 4;
t.state = TokenizeStateNumber;
t.state = TokenizeStateNumberNoUnderscore;
break;
default:
// reinterpret as normal number
@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
case TokenizeStateNumberNoUnderscore:
if (c == '_') {
invalid_char_error(&t, c);
break;
} else if (get_digit_value(c) < t.radix) {
t.is_trailing_underscore = false;
t.state = TokenizeStateNumber;
}
// fall through
case TokenizeStateNumber:
{
if (c == '_') {
t.is_trailing_underscore = true;
t.state = TokenizeStateNumberNoUnderscore;
break;
}
if (c == '.') {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c);
}
@ -1218,17 +1229,26 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
}
if (is_exponent_signifier(c, t.radix)) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c);
}
t.state = TokenizeStateFloatExponentUnsigned;
t.radix = 10; // exponent is always base 10
assert(t.cur_tok->id == TokenIdIntLiteral);
bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@ -1259,20 +1279,41 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
t.pos -= 1;
t.state = TokenizeStateFloatFraction;
t.state = TokenizeStateFloatFractionNoUnderscore;
assert(t.cur_tok->id == TokenIdIntLiteral);
bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
continue;
}
case TokenizeStateFloatFractionNoUnderscore:
if (c == '_') {
invalid_char_error(&t, c);
} else if (get_digit_value(c) < t.radix) {
t.is_trailing_underscore = false;
t.state = TokenizeStateFloatFraction;
}
// fall through
case TokenizeStateFloatFraction:
{
if (c == '_') {
t.is_trailing_underscore = true;
t.state = TokenizeStateFloatFractionNoUnderscore;
break;
}
if (is_exponent_signifier(c, t.radix)) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
t.state = TokenizeStateFloatExponentUnsigned;
t.radix = 10; // exponent is always base 10
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@ -1282,46 +1323,47 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
continue;
}
t.exponent_in_bin_or_dec -= t.exp_add_amt;
if (t.radix == 10) {
// For now we use strtod to parse decimal floats, so we just have to get to the
// end of the token.
break;
}
BigInt digit_value_bi;
bigint_init_unsigned(&digit_value_bi, digit_value);
BigInt radix_bi;
bigint_init_unsigned(&radix_bi, t.radix);
BigInt multiplied;
bigint_mul(&multiplied, &t.significand, &radix_bi);
bigint_add(&t.significand, &multiplied, &digit_value_bi);
break;
// we use parse_f128 to generate the float literal, so just
// need to get to the end of the token
}
break;
case TokenizeStateFloatExponentUnsigned:
switch (c) {
case '+':
t.is_exp_negative = false;
t.state = TokenizeStateFloatExponentNumber;
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
case '-':
t.is_exp_negative = true;
t.state = TokenizeStateFloatExponentNumber;
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
default:
// reinterpret as normal exponent number
t.pos -= 1;
t.is_exp_negative = false;
t.state = TokenizeStateFloatExponentNumber;
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
continue;
}
break;
case TokenizeStateFloatExponentNumberNoUnderscore:
if (c == '_') {
invalid_char_error(&t, c);
} else if (get_digit_value(c) < t.radix) {
t.is_trailing_underscore = false;
t.state = TokenizeStateFloatExponentNumber;
}
// fall through
case TokenizeStateFloatExponentNumber:
{
if (c == '_') {
t.is_trailing_underscore = true;
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@ -1331,21 +1373,9 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
continue;
}
if (t.radix == 10) {
// For now we use strtod to parse decimal floats, so we just have to get to the
// end of the token.
break;
}
BigInt digit_value_bi;
bigint_init_unsigned(&digit_value_bi, digit_value);
BigInt radix_bi;
bigint_init_unsigned(&radix_bi, 10);
BigInt multiplied;
bigint_mul(&multiplied, &t.specified_exponent, &radix_bi);
bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi);
// we use parse_f128 to generate the float literal, so just
// need to get to the end of the token
}
break;
case TokenizeStateSawDash:
@ -1399,6 +1429,9 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateStart:
case TokenizeStateError:
break;
case TokenizeStateNumberNoUnderscore:
case TokenizeStateFloatFractionNoUnderscore:
case TokenizeStateFloatExponentNumberNoUnderscore:
case TokenizeStateNumberDot:
tokenize_error(&t, "unterminated number literal");
break;

View File

@ -395,11 +395,163 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
\\ var bad_float :f32 = 0.0;
\\ bad_float = bad_float + .20;
\\ std.debug.assert(bad_float < 1.0);
\\})
\\}
, &[_][]const u8{
"tmp.zig:5:29: error: invalid token: '.'",
});
cases.add("invalid exponent in float literal - 1",
\\fn main() void {
\\ var bad: f128 = 0x1.0p1ab1;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: 'a'",
});
cases.add("invalid exponent in float literal - 2",
\\fn main() void {
\\ var bad: f128 = 0x1.0p50F;
\\}
, &[_][]const u8{
"tmp.zig:2:29: error: invalid character: 'F'",
});
cases.add("invalid underscore placement in float literal - 1",
\\fn main() void {
\\ var bad: f128 = 0._0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 2",
\\fn main() void {
\\ var bad: f128 = 0_.0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '.'",
});
cases.add("invalid underscore placement in float literal - 3",
\\fn main() void {
\\ var bad: f128 = 0.0_;
\\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: ';'",
});
cases.add("invalid underscore placement in float literal - 4",
\\fn main() void {
\\ var bad: f128 = 1.0e_1;
\\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 5",
\\fn main() void {
\\ var bad: f128 = 1.0e+_1;
\\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 6",
\\fn main() void {
\\ var bad: f128 = 1.0e-_1;
\\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 7",
\\fn main() void {
\\ var bad: f128 = 1.0e-1_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("invalid underscore placement in float literal - 9",
\\fn main() void {
\\ var bad: f128 = 1__0.0e-1;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 10",
\\fn main() void {
\\ var bad: f128 = 1.0__0e-1;
\\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 11",
\\fn main() void {
\\ var bad: f128 = 1.0e-1__0;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 12",
\\fn main() void {
\\ var bad: f128 = 0_x0.0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: 'x'",
});
cases.add("invalid underscore placement in float literal - 13",
\\fn main() void {
\\ var bad: f128 = 0x_0.0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 14",
\\fn main() void {
\\ var bad: f128 = 0x0.0_p1;
\\}
, &[_][]const u8{
"tmp.zig:2:27: error: invalid character: 'p'",
});
cases.add("invalid underscore placement in int literal - 1",
\\fn main() void {
\\ var bad: u128 = 0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: ';'",
});
cases.add("invalid underscore placement in int literal - 2",
\\fn main() void {
\\ var bad: u128 = 0b0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("invalid underscore placement in int literal - 3",
\\fn main() void {
\\ var bad: u128 = 0o0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("invalid underscore placement in int literal - 4",
\\fn main() void {
\\ var bad: u128 = 0x0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("var args without c calling conv",
\\fn foo(args: ...) void {}
\\comptime {

View File

@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" {
comptime S.doTheTest();
}
test "underscore separator parsing" {
expect(0_0_0_0 == 0);
expect(1_234_567 == 1234567);
expect(001_234_567 == 1234567);
expect(0_0_1_2_3_4_5_6_7 == 1234567);
expect(0b0_0_0_0 == 0);
expect(0b1010_1010 == 0b10101010);
expect(0b0000_1010_1010 == 0b10101010);
expect(0b1_0_1_0_1_0_1_0 == 0b10101010);
expect(0o0_0_0_0 == 0);
expect(0o1010_1010 == 0o10101010);
expect(0o0000_1010_1010 == 0o10101010);
expect(0o1_0_1_0_1_0_1_0 == 0o10101010);
expect(0x0_0_0_0 == 0);
expect(0x1010_1010 == 0x10101010);
expect(0x0000_1010_1010 == 0x10101010);
expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
expect(123_456.789_000e1_0 == 123456.789000e10);
expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
}
test "hex float literal within range" {
const a = 0x1.0p16383;
const b = 0x0.1p16387;