Merge pull request #4741 from momumi/master
allow `_` separators in number literals (stage 1)master
commit
13d04f9963
|
@ -885,6 +885,12 @@ const hex_int = 0xff;
|
|||
const another_hex_int = 0xFF;
|
||||
const octal_int = 0o755;
|
||||
const binary_int = 0b11110000;
|
||||
|
||||
// underscores may be placed between two digits as a visual separator
|
||||
const one_billion = 1_000_000_000;
|
||||
const binary_mask = 0b1_1111_1111;
|
||||
const permissions = 0o7_5_5;
|
||||
const big_address = 0xFF80_0000_0000_0000;
|
||||
{#code_end#}
|
||||
{#header_close#}
|
||||
{#header_open|Runtime Integer Values#}
|
||||
|
@ -947,6 +953,11 @@ const yet_another = 123.0e+77;
|
|||
const hex_floating_point = 0x103.70p-5;
|
||||
const another_hex_float = 0x103.70;
|
||||
const yet_another_hex_float = 0x103.70P-5;
|
||||
|
||||
// underscores may be placed between two digits as a visual separator
|
||||
const lightspeed = 299_792_458.000_000;
|
||||
const nanosecond = 0.000_000_001;
|
||||
const more_hex = 0x1234_5678.9ABC_CDEFp-10;
|
||||
{#code_end#}
|
||||
<p>
|
||||
There is no syntax for NaN, infinity, or negative infinity. For these special values,
|
||||
|
|
|
@ -373,6 +373,7 @@ pub const Int = struct {
|
|||
const d = switch (ch) {
|
||||
'0'...'9' => ch - '0',
|
||||
'a'...'f' => (ch - 'a') + 0xa,
|
||||
'A'...'F' => (ch - 'A') + 0xa,
|
||||
else => return error.InvalidCharForDigit,
|
||||
};
|
||||
|
||||
|
@ -393,8 +394,9 @@ pub const Int = struct {
|
|||
|
||||
/// Set self from the string representation `value`.
|
||||
///
|
||||
/// value must contain only digits <= `base`. Base prefixes are not allowed (e.g. 0x43 should
|
||||
/// simply be 43).
|
||||
/// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
|
||||
/// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
|
||||
/// ignored and can be used as digit separators.
|
||||
///
|
||||
/// Returns an error if memory could not be allocated or `value` has invalid digits for the
|
||||
/// requested base.
|
||||
|
@ -415,6 +417,9 @@ pub const Int = struct {
|
|||
try self.set(0);
|
||||
|
||||
for (value[i..]) |ch| {
|
||||
if (ch == '_') {
|
||||
continue;
|
||||
}
|
||||
const d = try charToDigit(ch, base);
|
||||
|
||||
const ap_d = Int.initFixed(([_]Limb{d})[0..]);
|
||||
|
@ -1582,6 +1587,22 @@ test "big.int string negative" {
|
|||
testing.expect((try a.to(i32)) == -1023);
|
||||
}
|
||||
|
||||
test "big.int string set number with underscores" {
|
||||
var a = try Int.init(testing.allocator);
|
||||
defer a.deinit();
|
||||
|
||||
try a.setString(10, "__1_2_0_3_1_7_2_4_1_2_0_____9_1__2__4_7_8_1_2_4_1_2_9_0_8_4_7_1_2_4___");
|
||||
testing.expect((try a.to(u128)) == 120317241209124781241290847124);
|
||||
}
|
||||
|
||||
test "big.int string set case insensitive number" {
|
||||
var a = try Int.init(testing.allocator);
|
||||
defer a.deinit();
|
||||
|
||||
try a.setString(16, "aB_cD_eF");
|
||||
testing.expect((try a.to(u32)) == 0xabcdef);
|
||||
}
|
||||
|
||||
test "big.int string set bad char error" {
|
||||
var a = try Int.init(testing.allocator);
|
||||
defer a.deinit();
|
||||
|
|
|
@ -69,23 +69,23 @@ test "floatundisf" {
|
|||
test__floatundisf(0, 0.0);
|
||||
test__floatundisf(1, 1.0);
|
||||
test__floatundisf(2, 2.0);
|
||||
test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F);
|
||||
test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F);
|
||||
test__floatundisf(0x8000008000000000, 0x1p+63F);
|
||||
test__floatundisf(0x8000010000000000, 0x1.000002p+63F);
|
||||
test__floatundisf(0x8000000000000000, 0x1p+63F);
|
||||
test__floatundisf(0x8000000000000001, 0x1p+63F);
|
||||
test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F);
|
||||
test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F);
|
||||
test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F);
|
||||
test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F);
|
||||
test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F);
|
||||
test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62);
|
||||
test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62);
|
||||
test__floatundisf(0x8000008000000000, 0x1p+63);
|
||||
test__floatundisf(0x8000010000000000, 0x1.000002p+63);
|
||||
test__floatundisf(0x8000000000000000, 0x1p+63);
|
||||
test__floatundisf(0x8000000000000001, 0x1p+63);
|
||||
test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64);
|
||||
test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64);
|
||||
test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50);
|
||||
test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50);
|
||||
test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50);
|
||||
}
|
||||
|
|
|
@ -2815,6 +2815,75 @@ test "zig fmt: extern without container keyword returns error" {
|
|||
);
|
||||
}
|
||||
|
||||
test "zig fmt: integer literals with underscore separators" {
|
||||
try testTransform(
|
||||
\\const
|
||||
\\ x =
|
||||
\\ 1_234_567
|
||||
\\ +(0b0_1-0o7_0+0xff_FF ) + 0_0;
|
||||
,
|
||||
\\const x = 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0;
|
||||
\\
|
||||
);
|
||||
}
|
||||
|
||||
test "zig fmt: hex literals with underscore separators" {
|
||||
try testTransform(
|
||||
\\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 {
|
||||
\\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000;
|
||||
\\ for (c [ 0_0 .. ]) |_, i| {
|
||||
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
|
||||
\\ }
|
||||
\\ return c;
|
||||
\\}
|
||||
\\
|
||||
\\
|
||||
,
|
||||
\\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 {
|
||||
\\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000;
|
||||
\\ for (c[0_0..]) |_, i| {
|
||||
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
|
||||
\\ }
|
||||
\\ return c;
|
||||
\\}
|
||||
\\
|
||||
);
|
||||
}
|
||||
|
||||
test "zig fmt: decimal float literals with underscore separators" {
|
||||
try testTransform(
|
||||
\\pub fn main() void {
|
||||
\\ const a:f64=(10.0e-0+(10.e+0))+10_00.00_00e-2+00_00.00_10e+4;
|
||||
\\ const b:f64=010.0--0_10.+0_1_0.0_0+1e2;
|
||||
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
|
||||
\\}
|
||||
,
|
||||
\\pub fn main() void {
|
||||
\\ const a: f64 = (10.0e-0 + (10.e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4;
|
||||
\\ const b: f64 = 010.0 - -0_10. + 0_1_0.0_0 + 1e2;
|
||||
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
|
||||
\\}
|
||||
\\
|
||||
);
|
||||
}
|
||||
|
||||
test "zig fmt: hexadeciaml float literals with underscore separators" {
|
||||
try testTransform(
|
||||
\\pub fn main() void {
|
||||
\\ const a: f64 = (0x10.0p-0+(0x10.p+0))+0x10_00.00_00p-8+0x00_00.00_10p+16;
|
||||
\\ const b: f64 = 0x0010.0--0x00_10.+0x10.00+0x1p4;
|
||||
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
|
||||
\\}
|
||||
,
|
||||
\\pub fn main() void {
|
||||
\\ const a: f64 = (0x10.0p-0 + (0x10.p+0)) + 0x10_00.00_00p-8 + 0x00_00.00_10p+16;
|
||||
\\ const b: f64 = 0x0010.0 - -0x00_10. + 0x10.00 + 0x1p4;
|
||||
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
|
||||
\\}
|
||||
\\
|
||||
);
|
||||
}
|
||||
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const warn = std.debug.warn;
|
||||
|
|
|
@ -387,17 +387,23 @@ pub const Tokenizer = struct {
|
|||
DocComment,
|
||||
ContainerDocComment,
|
||||
Zero,
|
||||
IntegerLiteral,
|
||||
IntegerLiteralWithRadix,
|
||||
IntegerLiteralWithRadixHex,
|
||||
NumberDot,
|
||||
IntegerLiteralDec,
|
||||
IntegerLiteralDecNoUnderscore,
|
||||
IntegerLiteralBin,
|
||||
IntegerLiteralBinNoUnderscore,
|
||||
IntegerLiteralOct,
|
||||
IntegerLiteralOctNoUnderscore,
|
||||
IntegerLiteralHex,
|
||||
IntegerLiteralHexNoUnderscore,
|
||||
NumberDotDec,
|
||||
NumberDotHex,
|
||||
FloatFraction,
|
||||
FloatFractionDec,
|
||||
FloatFractionDecNoUnderscore,
|
||||
FloatFractionHex,
|
||||
FloatFractionHexNoUnderscore,
|
||||
FloatExponentUnsigned,
|
||||
FloatExponentUnsignedHex,
|
||||
FloatExponentNumber,
|
||||
FloatExponentNumberHex,
|
||||
FloatExponentNumberNoUnderscore,
|
||||
Ampersand,
|
||||
Caret,
|
||||
Percent,
|
||||
|
@ -412,6 +418,10 @@ pub const Tokenizer = struct {
|
|||
SawAtSign,
|
||||
};
|
||||
|
||||
fn isIdentifierChar(char: u8) bool {
|
||||
return std.ascii.isAlNum(char) or char == '_';
|
||||
}
|
||||
|
||||
pub fn next(self: *Tokenizer) Token {
|
||||
if (self.pending_invalid_token) |token| {
|
||||
self.pending_invalid_token = null;
|
||||
|
@ -550,7 +560,7 @@ pub const Tokenizer = struct {
|
|||
result.id = Token.Id.IntegerLiteral;
|
||||
},
|
||||
'1'...'9' => {
|
||||
state = State.IntegerLiteral;
|
||||
state = State.IntegerLiteralDec;
|
||||
result.id = Token.Id.IntegerLiteral;
|
||||
},
|
||||
else => {
|
||||
|
@ -1048,55 +1058,145 @@ pub const Tokenizer = struct {
|
|||
else => self.checkLiteralCharacter(),
|
||||
},
|
||||
State.Zero => switch (c) {
|
||||
'b', 'o' => {
|
||||
state = State.IntegerLiteralWithRadix;
|
||||
'b' => {
|
||||
state = State.IntegerLiteralBinNoUnderscore;
|
||||
},
|
||||
'o' => {
|
||||
state = State.IntegerLiteralOctNoUnderscore;
|
||||
},
|
||||
'x' => {
|
||||
state = State.IntegerLiteralWithRadixHex;
|
||||
state = State.IntegerLiteralHexNoUnderscore;
|
||||
},
|
||||
'0'...'9', '_', '.', 'e', 'E' => {
|
||||
// reinterpret as a decimal number
|
||||
self.index -= 1;
|
||||
state = State.IntegerLiteralDec;
|
||||
},
|
||||
else => {
|
||||
// reinterpret as a normal number
|
||||
self.index -= 1;
|
||||
state = State.IntegerLiteral;
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteral => switch (c) {
|
||||
'.' => {
|
||||
state = State.NumberDot;
|
||||
State.IntegerLiteralBinNoUnderscore => switch (c) {
|
||||
'0'...'1' => {
|
||||
state = State.IntegerLiteralBin;
|
||||
},
|
||||
'p', 'P', 'e', 'E' => {
|
||||
else => {
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteralBin => switch (c) {
|
||||
'_' => {
|
||||
state = State.IntegerLiteralBinNoUnderscore;
|
||||
},
|
||||
'0'...'1' => {},
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteralOctNoUnderscore => switch (c) {
|
||||
'0'...'7' => {
|
||||
state = State.IntegerLiteralOct;
|
||||
},
|
||||
else => {
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteralOct => switch (c) {
|
||||
'_' => {
|
||||
state = State.IntegerLiteralOctNoUnderscore;
|
||||
},
|
||||
'0'...'7' => {},
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteralDecNoUnderscore => switch (c) {
|
||||
'0'...'9' => {
|
||||
state = State.IntegerLiteralDec;
|
||||
},
|
||||
else => {
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteralDec => switch (c) {
|
||||
'_' => {
|
||||
state = State.IntegerLiteralDecNoUnderscore;
|
||||
},
|
||||
'.' => {
|
||||
state = State.NumberDotDec;
|
||||
result.id = Token.Id.FloatLiteral;
|
||||
},
|
||||
'e', 'E' => {
|
||||
state = State.FloatExponentUnsigned;
|
||||
result.id = Token.Id.FloatLiteral;
|
||||
},
|
||||
'0'...'9' => {},
|
||||
else => break,
|
||||
},
|
||||
State.IntegerLiteralWithRadix => switch (c) {
|
||||
'.' => {
|
||||
state = State.NumberDot;
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
'0'...'9' => {},
|
||||
else => break,
|
||||
},
|
||||
State.IntegerLiteralWithRadixHex => switch (c) {
|
||||
State.IntegerLiteralHexNoUnderscore => switch (c) {
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||||
state = State.IntegerLiteralHex;
|
||||
},
|
||||
else => {
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.IntegerLiteralHex => switch (c) {
|
||||
'_' => {
|
||||
state = State.IntegerLiteralHexNoUnderscore;
|
||||
},
|
||||
'.' => {
|
||||
state = State.NumberDotHex;
|
||||
result.id = Token.Id.FloatLiteral;
|
||||
},
|
||||
'p', 'P' => {
|
||||
state = State.FloatExponentUnsignedHex;
|
||||
state = State.FloatExponentUnsigned;
|
||||
result.id = Token.Id.FloatLiteral;
|
||||
},
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
||||
else => break,
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.NumberDot => switch (c) {
|
||||
State.NumberDotDec => switch (c) {
|
||||
'.' => {
|
||||
self.index -= 1;
|
||||
state = State.Start;
|
||||
break;
|
||||
},
|
||||
else => {
|
||||
self.index -= 1;
|
||||
'e', 'E' => {
|
||||
state = State.FloatExponentUnsigned;
|
||||
},
|
||||
'0'...'9' => {
|
||||
result.id = Token.Id.FloatLiteral;
|
||||
state = State.FloatFraction;
|
||||
state = State.FloatFractionDec;
|
||||
},
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.NumberDotHex => switch (c) {
|
||||
|
@ -1105,65 +1205,112 @@ pub const Tokenizer = struct {
|
|||
state = State.Start;
|
||||
break;
|
||||
},
|
||||
else => {
|
||||
self.index -= 1;
|
||||
'p', 'P' => {
|
||||
state = State.FloatExponentUnsigned;
|
||||
},
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||||
result.id = Token.Id.FloatLiteral;
|
||||
state = State.FloatFractionHex;
|
||||
},
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.FloatFraction => switch (c) {
|
||||
State.FloatFractionDecNoUnderscore => switch (c) {
|
||||
'0'...'9' => {
|
||||
state = State.FloatFractionDec;
|
||||
},
|
||||
else => {
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.FloatFractionDec => switch (c) {
|
||||
'_' => {
|
||||
state = State.FloatFractionDecNoUnderscore;
|
||||
},
|
||||
'e', 'E' => {
|
||||
state = State.FloatExponentUnsigned;
|
||||
},
|
||||
'0'...'9' => {},
|
||||
else => break,
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.FloatFractionHexNoUnderscore => switch (c) {
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||||
state = State.FloatFractionHex;
|
||||
},
|
||||
else => {
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.FloatFractionHex => switch (c) {
|
||||
'_' => {
|
||||
state = State.FloatFractionHexNoUnderscore;
|
||||
},
|
||||
'p', 'P' => {
|
||||
state = State.FloatExponentUnsignedHex;
|
||||
state = State.FloatExponentUnsigned;
|
||||
},
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
||||
else => break,
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.FloatExponentUnsigned => switch (c) {
|
||||
'+', '-' => {
|
||||
state = State.FloatExponentNumber;
|
||||
state = State.FloatExponentNumberNoUnderscore;
|
||||
},
|
||||
else => {
|
||||
// reinterpret as a normal exponent number
|
||||
self.index -= 1;
|
||||
state = State.FloatExponentNumber;
|
||||
state = State.FloatExponentNumberNoUnderscore;
|
||||
},
|
||||
},
|
||||
State.FloatExponentUnsignedHex => switch (c) {
|
||||
'+', '-' => {
|
||||
state = State.FloatExponentNumberHex;
|
||||
State.FloatExponentNumberNoUnderscore => switch (c) {
|
||||
'0'...'9' => {
|
||||
state = State.FloatExponentNumber;
|
||||
},
|
||||
else => {
|
||||
// reinterpret as a normal exponent number
|
||||
self.index -= 1;
|
||||
state = State.FloatExponentNumberHex;
|
||||
result.id = Token.Id.Invalid;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.FloatExponentNumber => switch (c) {
|
||||
'_' => {
|
||||
state = State.FloatExponentNumberNoUnderscore;
|
||||
},
|
||||
'0'...'9' => {},
|
||||
else => break,
|
||||
},
|
||||
State.FloatExponentNumberHex => switch (c) {
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
||||
else => break,
|
||||
else => {
|
||||
if (isIdentifierChar(c)) {
|
||||
result.id = Token.Id.Invalid;
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
}
|
||||
} else if (self.index == self.buffer.len) {
|
||||
switch (state) {
|
||||
State.Start,
|
||||
State.IntegerLiteral,
|
||||
State.IntegerLiteralWithRadix,
|
||||
State.IntegerLiteralWithRadixHex,
|
||||
State.FloatFraction,
|
||||
State.IntegerLiteralDec,
|
||||
State.IntegerLiteralBin,
|
||||
State.IntegerLiteralOct,
|
||||
State.IntegerLiteralHex,
|
||||
State.NumberDotDec,
|
||||
State.NumberDotHex,
|
||||
State.FloatFractionDec,
|
||||
State.FloatFractionHex,
|
||||
State.FloatExponentNumber,
|
||||
State.FloatExponentNumberHex,
|
||||
State.StringLiteral, // find this error later
|
||||
State.MultilineStringLiteralLine,
|
||||
State.Builtin,
|
||||
|
@ -1184,10 +1331,14 @@ pub const Tokenizer = struct {
|
|||
result.id = Token.Id.ContainerDocComment;
|
||||
},
|
||||
|
||||
State.NumberDot,
|
||||
State.NumberDotHex,
|
||||
State.IntegerLiteralDecNoUnderscore,
|
||||
State.IntegerLiteralBinNoUnderscore,
|
||||
State.IntegerLiteralOctNoUnderscore,
|
||||
State.IntegerLiteralHexNoUnderscore,
|
||||
State.FloatFractionDecNoUnderscore,
|
||||
State.FloatFractionHexNoUnderscore,
|
||||
State.FloatExponentNumberNoUnderscore,
|
||||
State.FloatExponentUnsigned,
|
||||
State.FloatExponentUnsignedHex,
|
||||
State.SawAtSign,
|
||||
State.Backslash,
|
||||
State.CharLiteral,
|
||||
|
@ -1585,6 +1736,236 @@ test "correctly parse pointer assignment" {
|
|||
});
|
||||
}
|
||||
|
||||
test "tokenizer - number literals decimal" {
|
||||
testTokenize("0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("1", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("2", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("3", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("4", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("5", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("6", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("7", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("8", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("9", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
|
||||
testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
|
||||
|
||||
testTokenize("00_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0_0_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
|
||||
|
||||
testTokenize("1.", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
|
||||
|
||||
testTokenize("1e", &[_]Token.Id{.Invalid});
|
||||
testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
|
||||
testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
|
||||
testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
|
||||
testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
|
||||
testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0e", &[_]Token.Id{.Invalid});
|
||||
testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
|
||||
testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
|
||||
}
|
||||
|
||||
test "tokenizer - number literals binary" {
|
||||
testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
|
||||
testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
|
||||
testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
|
||||
|
||||
testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b1_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
|
||||
}
|
||||
|
||||
test "tokenizer - number literals octal" {
|
||||
testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
||||
testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
|
||||
testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
|
||||
testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
|
||||
|
||||
testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o1_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
|
||||
}
|
||||
|
||||
test "tokenizer - number literals hexadeciaml" {
|
||||
testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
|
||||
testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
|
||||
|
||||
testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x1_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
|
||||
|
||||
testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
|
||||
|
||||
testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
|
||||
testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
|
||||
|
||||
testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
|
||||
testTokenize("0x1p", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.p", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
|
||||
testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
|
||||
testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
|
||||
testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
|
||||
testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
|
||||
testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
|
||||
}
|
||||
|
||||
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
|
||||
var tokenizer = Tokenizer.init(source);
|
||||
for (expected_tokens) |expected_token_id| {
|
||||
|
|
|
@ -1311,13 +1311,16 @@ pub const Builder = struct {
|
|||
var base: u8 = undefined;
|
||||
var rest: []const u8 = undefined;
|
||||
if (int_token.len >= 3 and int_token[0] == '0') {
|
||||
base = switch (int_token[1]) {
|
||||
'b' => 2,
|
||||
'o' => 8,
|
||||
'x' => 16,
|
||||
else => unreachable,
|
||||
};
|
||||
rest = int_token[2..];
|
||||
switch (int_token[1]) {
|
||||
'b' => base = 2,
|
||||
'o' => base = 8,
|
||||
'x' => base = 16,
|
||||
else => {
|
||||
base = 10;
|
||||
rest = int_token;
|
||||
},
|
||||
}
|
||||
} else {
|
||||
base = 10;
|
||||
rest = int_token;
|
||||
|
|
|
@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok)
|
|||
int x;
|
||||
long long y;
|
||||
int neg = 0;
|
||||
|
||||
|
||||
c = shgetc(f);
|
||||
if (c=='+' || c=='-') {
|
||||
neg = (c=='-');
|
||||
c = shgetc(f);
|
||||
if (c-'0'>=10U && pok) shunget(f);
|
||||
}
|
||||
if (c-'0'>=10U) {
|
||||
if (c-'0'>=10U && c!='_') {
|
||||
shunget(f);
|
||||
return LLONG_MIN;
|
||||
}
|
||||
for (x=0; c-'0'<10U && x<INT_MAX/10; c = shgetc(f))
|
||||
x = 10*x + c-'0';
|
||||
for (y=x; c-'0'<10U && y<LLONG_MAX/100; c = shgetc(f))
|
||||
y = 10*y + c-'0';
|
||||
for (; c-'0'<10U; c = shgetc(f));
|
||||
for (x=0; ; c = shgetc(f)) {
|
||||
if (c=='_') {
|
||||
continue;
|
||||
} else if (c-'0'<10U && x<INT_MAX/10) {
|
||||
x = 10*x + c-'0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (y=x; ; c = shgetc(f)) {
|
||||
if (c=='_') {
|
||||
continue;
|
||||
} else if (c-'0'<10U && y<LLONG_MAX/100) {
|
||||
y = 10*y + c-'0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; c-'0'<10U || c=='_'; c = shgetc(f));
|
||||
shunget(f);
|
||||
return neg ? -y : y;
|
||||
}
|
||||
|
@ -450,16 +464,36 @@ static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int si
|
|||
j=0;
|
||||
k=0;
|
||||
|
||||
/* Don't let leading zeros consume buffer space */
|
||||
for (; c=='0'; c = shgetc(f)) gotdig=1;
|
||||
/* Don't let leading zeros/underscores consume buffer space */
|
||||
for (; ; c = shgetc(f)) {
|
||||
if (c=='_') {
|
||||
continue;
|
||||
} else if (c=='0') {
|
||||
gotdig=1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (c=='.') {
|
||||
gotrad = 1;
|
||||
for (c = shgetc(f); c=='0'; c = shgetc(f)) gotdig=1, lrp--;
|
||||
for (c = shgetc(f); ; c = shgetc(f)) {
|
||||
if (c == '_') {
|
||||
continue;
|
||||
} else if (c=='0') {
|
||||
gotdig=1;
|
||||
lrp--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x[0] = 0;
|
||||
for (; c-'0'<10U || c=='.'; c = shgetc(f)) {
|
||||
if (c == '.') {
|
||||
for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) {
|
||||
if (c == '_') {
|
||||
continue;
|
||||
} else if (c == '.') {
|
||||
if (gotrad) break;
|
||||
gotrad = 1;
|
||||
lrp = dc;
|
||||
|
@ -773,18 +807,29 @@ static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int
|
|||
|
||||
c = shgetc(f);
|
||||
|
||||
/* Skip leading zeros */
|
||||
for (; c=='0'; c = shgetc(f)) gotdig = 1;
|
||||
/* Skip leading zeros/underscores */
|
||||
for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1;
|
||||
|
||||
if (c=='.') {
|
||||
gotrad = 1;
|
||||
c = shgetc(f);
|
||||
/* Count zeros after the radix point before significand */
|
||||
for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1;
|
||||
for (rp=0; ; c = shgetc(f)) {
|
||||
if (c == '_') {
|
||||
continue;
|
||||
} else if (c == '0') {
|
||||
gotdig = 1;
|
||||
rp--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) {
|
||||
if (c=='.') {
|
||||
for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) {
|
||||
if (c=='_') {
|
||||
continue;
|
||||
} else if (c=='.') {
|
||||
if (gotrad) break;
|
||||
rp = dc;
|
||||
gotrad = 1;
|
||||
|
|
|
@ -177,10 +177,13 @@ enum TokenizeState {
|
|||
TokenizeStateSymbol,
|
||||
TokenizeStateZero, // "0", which might lead to "0x"
|
||||
TokenizeStateNumber, // "123", "0x123"
|
||||
TokenizeStateNumberNoUnderscore, // "12_", "0x12_" next char must be digit
|
||||
TokenizeStateNumberDot,
|
||||
TokenizeStateFloatFraction, // "123.456", "0x123.456"
|
||||
TokenizeStateFloatFractionNoUnderscore, // "123.45_", "0x123.45_"
|
||||
TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
|
||||
TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5"
|
||||
TokenizeStateFloatExponentNumber, // "123.456e7", "123.456e+7", "123.456e-7"
|
||||
TokenizeStateFloatExponentNumberNoUnderscore, // "123.456e7_", "123.456e+7_", "123.456e-7_"
|
||||
TokenizeStateString,
|
||||
TokenizeStateStringEscape,
|
||||
TokenizeStateStringEscapeUnicodeStart,
|
||||
|
@ -233,14 +236,10 @@ struct Tokenize {
|
|||
Token *cur_tok;
|
||||
Tokenization *out;
|
||||
uint32_t radix;
|
||||
int32_t exp_add_amt;
|
||||
bool is_exp_negative;
|
||||
bool is_trailing_underscore;
|
||||
size_t char_code_index;
|
||||
bool unicode;
|
||||
uint32_t char_code;
|
||||
int exponent_in_bin_or_dec;
|
||||
BigInt specified_exponent;
|
||||
BigInt significand;
|
||||
size_t remaining_code_units;
|
||||
};
|
||||
|
||||
|
@ -426,20 +425,16 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
case '0':
|
||||
t.state = TokenizeStateZero;
|
||||
begin_token(&t, TokenIdIntLiteral);
|
||||
t.is_trailing_underscore = false;
|
||||
t.radix = 10;
|
||||
t.exp_add_amt = 1;
|
||||
t.exponent_in_bin_or_dec = 0;
|
||||
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0);
|
||||
bigint_init_unsigned(&t.specified_exponent, 0);
|
||||
break;
|
||||
case DIGIT_NON_ZERO:
|
||||
t.state = TokenizeStateNumber;
|
||||
begin_token(&t, TokenIdIntLiteral);
|
||||
t.is_trailing_underscore = false;
|
||||
t.radix = 10;
|
||||
t.exp_add_amt = 1;
|
||||
t.exponent_in_bin_or_dec = 0;
|
||||
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c));
|
||||
bigint_init_unsigned(&t.specified_exponent, 0);
|
||||
break;
|
||||
case '"':
|
||||
begin_token(&t, TokenIdStringLiteral);
|
||||
|
@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
switch (c) {
|
||||
case 'b':
|
||||
t.radix = 2;
|
||||
t.state = TokenizeStateNumber;
|
||||
t.state = TokenizeStateNumberNoUnderscore;
|
||||
break;
|
||||
case 'o':
|
||||
t.radix = 8;
|
||||
t.exp_add_amt = 3;
|
||||
t.state = TokenizeStateNumber;
|
||||
t.state = TokenizeStateNumberNoUnderscore;
|
||||
break;
|
||||
case 'x':
|
||||
t.radix = 16;
|
||||
t.exp_add_amt = 4;
|
||||
t.state = TokenizeStateNumber;
|
||||
t.state = TokenizeStateNumberNoUnderscore;
|
||||
break;
|
||||
default:
|
||||
// reinterpret as normal number
|
||||
|
@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateNumberNoUnderscore:
|
||||
if (c == '_') {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
} else if (get_digit_value(c) < t.radix) {
|
||||
t.is_trailing_underscore = false;
|
||||
t.state = TokenizeStateNumber;
|
||||
}
|
||||
// fall through
|
||||
case TokenizeStateNumber:
|
||||
{
|
||||
if (c == '_') {
|
||||
t.is_trailing_underscore = true;
|
||||
t.state = TokenizeStateNumberNoUnderscore;
|
||||
break;
|
||||
}
|
||||
if (c == '.') {
|
||||
if (t.is_trailing_underscore) {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
}
|
||||
if (t.radix != 16 && t.radix != 10) {
|
||||
invalid_char_error(&t, c);
|
||||
}
|
||||
|
@ -1218,17 +1229,26 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
break;
|
||||
}
|
||||
if (is_exponent_signifier(c, t.radix)) {
|
||||
if (t.is_trailing_underscore) {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
}
|
||||
if (t.radix != 16 && t.radix != 10) {
|
||||
invalid_char_error(&t, c);
|
||||
}
|
||||
t.state = TokenizeStateFloatExponentUnsigned;
|
||||
t.radix = 10; // exponent is always base 10
|
||||
assert(t.cur_tok->id == TokenIdIntLiteral);
|
||||
bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
|
||||
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
|
||||
break;
|
||||
}
|
||||
uint32_t digit_value = get_digit_value(c);
|
||||
if (digit_value >= t.radix) {
|
||||
if (t.is_trailing_underscore) {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_symbol_char(c)) {
|
||||
invalid_char_error(&t, c);
|
||||
}
|
||||
|
@ -1259,20 +1279,41 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
continue;
|
||||
}
|
||||
t.pos -= 1;
|
||||
t.state = TokenizeStateFloatFraction;
|
||||
t.state = TokenizeStateFloatFractionNoUnderscore;
|
||||
assert(t.cur_tok->id == TokenIdIntLiteral);
|
||||
bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
|
||||
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
|
||||
continue;
|
||||
}
|
||||
case TokenizeStateFloatFractionNoUnderscore:
|
||||
if (c == '_') {
|
||||
invalid_char_error(&t, c);
|
||||
} else if (get_digit_value(c) < t.radix) {
|
||||
t.is_trailing_underscore = false;
|
||||
t.state = TokenizeStateFloatFraction;
|
||||
}
|
||||
// fall through
|
||||
case TokenizeStateFloatFraction:
|
||||
{
|
||||
if (c == '_') {
|
||||
t.is_trailing_underscore = true;
|
||||
t.state = TokenizeStateFloatFractionNoUnderscore;
|
||||
break;
|
||||
}
|
||||
if (is_exponent_signifier(c, t.radix)) {
|
||||
if (t.is_trailing_underscore) {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
}
|
||||
t.state = TokenizeStateFloatExponentUnsigned;
|
||||
t.radix = 10; // exponent is always base 10
|
||||
break;
|
||||
}
|
||||
uint32_t digit_value = get_digit_value(c);
|
||||
if (digit_value >= t.radix) {
|
||||
if (t.is_trailing_underscore) {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
}
|
||||
if (is_symbol_char(c)) {
|
||||
invalid_char_error(&t, c);
|
||||
}
|
||||
|
@ -1282,46 +1323,47 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
t.state = TokenizeStateStart;
|
||||
continue;
|
||||
}
|
||||
t.exponent_in_bin_or_dec -= t.exp_add_amt;
|
||||
if (t.radix == 10) {
|
||||
// For now we use strtod to parse decimal floats, so we just have to get to the
|
||||
// end of the token.
|
||||
break;
|
||||
}
|
||||
BigInt digit_value_bi;
|
||||
bigint_init_unsigned(&digit_value_bi, digit_value);
|
||||
|
||||
BigInt radix_bi;
|
||||
bigint_init_unsigned(&radix_bi, t.radix);
|
||||
|
||||
BigInt multiplied;
|
||||
bigint_mul(&multiplied, &t.significand, &radix_bi);
|
||||
|
||||
bigint_add(&t.significand, &multiplied, &digit_value_bi);
|
||||
break;
|
||||
// we use parse_f128 to generate the float literal, so just
|
||||
// need to get to the end of the token
|
||||
}
|
||||
break;
|
||||
case TokenizeStateFloatExponentUnsigned:
|
||||
switch (c) {
|
||||
case '+':
|
||||
t.is_exp_negative = false;
|
||||
t.state = TokenizeStateFloatExponentNumber;
|
||||
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
|
||||
break;
|
||||
case '-':
|
||||
t.is_exp_negative = true;
|
||||
t.state = TokenizeStateFloatExponentNumber;
|
||||
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
|
||||
break;
|
||||
default:
|
||||
// reinterpret as normal exponent number
|
||||
t.pos -= 1;
|
||||
t.is_exp_negative = false;
|
||||
t.state = TokenizeStateFloatExponentNumber;
|
||||
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizeStateFloatExponentNumberNoUnderscore:
|
||||
if (c == '_') {
|
||||
invalid_char_error(&t, c);
|
||||
} else if (get_digit_value(c) < t.radix) {
|
||||
t.is_trailing_underscore = false;
|
||||
t.state = TokenizeStateFloatExponentNumber;
|
||||
}
|
||||
// fall through
|
||||
case TokenizeStateFloatExponentNumber:
|
||||
{
|
||||
if (c == '_') {
|
||||
t.is_trailing_underscore = true;
|
||||
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
|
||||
break;
|
||||
}
|
||||
uint32_t digit_value = get_digit_value(c);
|
||||
if (digit_value >= t.radix) {
|
||||
if (t.is_trailing_underscore) {
|
||||
invalid_char_error(&t, c);
|
||||
break;
|
||||
}
|
||||
if (is_symbol_char(c)) {
|
||||
invalid_char_error(&t, c);
|
||||
}
|
||||
|
@ -1331,21 +1373,9 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
t.state = TokenizeStateStart;
|
||||
continue;
|
||||
}
|
||||
if (t.radix == 10) {
|
||||
// For now we use strtod to parse decimal floats, so we just have to get to the
|
||||
// end of the token.
|
||||
break;
|
||||
}
|
||||
BigInt digit_value_bi;
|
||||
bigint_init_unsigned(&digit_value_bi, digit_value);
|
||||
|
||||
BigInt radix_bi;
|
||||
bigint_init_unsigned(&radix_bi, 10);
|
||||
|
||||
BigInt multiplied;
|
||||
bigint_mul(&multiplied, &t.specified_exponent, &radix_bi);
|
||||
|
||||
bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi);
|
||||
// we use parse_f128 to generate the float literal, so just
|
||||
// need to get to the end of the token
|
||||
}
|
||||
break;
|
||||
case TokenizeStateSawDash:
|
||||
|
@ -1399,6 +1429,9 @@ void tokenize(Buf *buf, Tokenization *out) {
|
|||
case TokenizeStateStart:
|
||||
case TokenizeStateError:
|
||||
break;
|
||||
case TokenizeStateNumberNoUnderscore:
|
||||
case TokenizeStateFloatFractionNoUnderscore:
|
||||
case TokenizeStateFloatExponentNumberNoUnderscore:
|
||||
case TokenizeStateNumberDot:
|
||||
tokenize_error(&t, "unterminated number literal");
|
||||
break;
|
||||
|
|
|
@ -395,11 +395,163 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
|
|||
\\ var bad_float :f32 = 0.0;
|
||||
\\ bad_float = bad_float + .20;
|
||||
\\ std.debug.assert(bad_float < 1.0);
|
||||
\\})
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:5:29: error: invalid token: '.'",
|
||||
});
|
||||
|
||||
cases.add("invalid exponent in float literal - 1",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0x1.0p1ab1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:28: error: invalid character: 'a'",
|
||||
});
|
||||
|
||||
cases.add("invalid exponent in float literal - 2",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0x1.0p50F;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:29: error: invalid character: 'F'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 1",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0._0;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:23: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 2",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0_.0;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:23: error: invalid character: '.'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 3",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0.0_;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:25: error: invalid character: ';'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 4",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1.0e_1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:25: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 5",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1.0e+_1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:26: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 6",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1.0e-_1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:26: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 7",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1.0e-1_;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:28: error: invalid character: ';'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 9",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1__0.0e-1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:23: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 10",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1.0__0e-1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:25: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 11",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 1.0e-1__0;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:28: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 12",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0_x0.0;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:23: error: invalid character: 'x'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 13",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0x_0.0;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:23: error: invalid character: '_'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in float literal - 14",
|
||||
\\fn main() void {
|
||||
\\ var bad: f128 = 0x0.0_p1;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:27: error: invalid character: 'p'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in int literal - 1",
|
||||
\\fn main() void {
|
||||
\\ var bad: u128 = 0010_;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:26: error: invalid character: ';'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in int literal - 2",
|
||||
\\fn main() void {
|
||||
\\ var bad: u128 = 0b0010_;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:28: error: invalid character: ';'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in int literal - 3",
|
||||
\\fn main() void {
|
||||
\\ var bad: u128 = 0o0010_;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:28: error: invalid character: ';'",
|
||||
});
|
||||
|
||||
cases.add("invalid underscore placement in int literal - 4",
|
||||
\\fn main() void {
|
||||
\\ var bad: u128 = 0x0010_;
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
"tmp.zig:2:28: error: invalid character: ';'",
|
||||
});
|
||||
|
||||
cases.add("var args without c calling conv",
|
||||
\\fn foo(args: ...) void {}
|
||||
\\comptime {
|
||||
|
|
|
@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" {
|
|||
comptime S.doTheTest();
|
||||
}
|
||||
|
||||
test "underscore separator parsing" {
|
||||
expect(0_0_0_0 == 0);
|
||||
expect(1_234_567 == 1234567);
|
||||
expect(001_234_567 == 1234567);
|
||||
expect(0_0_1_2_3_4_5_6_7 == 1234567);
|
||||
|
||||
expect(0b0_0_0_0 == 0);
|
||||
expect(0b1010_1010 == 0b10101010);
|
||||
expect(0b0000_1010_1010 == 0b10101010);
|
||||
expect(0b1_0_1_0_1_0_1_0 == 0b10101010);
|
||||
|
||||
expect(0o0_0_0_0 == 0);
|
||||
expect(0o1010_1010 == 0o10101010);
|
||||
expect(0o0000_1010_1010 == 0o10101010);
|
||||
expect(0o1_0_1_0_1_0_1_0 == 0o10101010);
|
||||
|
||||
expect(0x0_0_0_0 == 0);
|
||||
expect(0x1010_1010 == 0x10101010);
|
||||
expect(0x0000_1010_1010 == 0x10101010);
|
||||
expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
|
||||
|
||||
expect(123_456.789_000e1_0 == 123456.789000e10);
|
||||
expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
|
||||
|
||||
expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
|
||||
expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
|
||||
}
|
||||
|
||||
test "hex float literal within range" {
|
||||
const a = 0x1.0p16383;
|
||||
const b = 0x0.1p16387;
|
||||
|
|
Loading…
Reference in New Issue