diff --git a/.builds/freebsd.yml b/.builds/freebsd.yml index 9b936657f..37e6a6568 100644 --- a/.builds/freebsd.yml +++ b/.builds/freebsd.yml @@ -1,7 +1,7 @@ image: freebsd/latest secrets: - - 6c60aaee-92e7-4e7d-812c-114817689b4d - - dd0bd962-7664-4d3e-b0f3-41c9ee96b8b8 + - 51bfddf5-86a6-4e01-8576-358c72a4a0a4 + - 5cfede76-914e-4071-893e-e5e2e6ae3cea sources: - https://github.com/ziglang/zig tasks: diff --git a/README.md b/README.md index 39ef64497..021a6b6d1 100644 --- a/README.md +++ b/README.md @@ -68,3 +68,21 @@ make install ##### Windows See https://github.com/ziglang/zig/wiki/Building-Zig-on-Windows + +## License + +The ultimate goal of the Zig project is to serve users. As a first-order +effect, this means users of the compiler, helping programmers to write better +code. Even more important, however, are the end users. + +Zig is intended to be used to help end users accomplish their goals. For +example, it would be inappropriate and offensive to use Zig to implement +[dark patterns](https://en.wikipedia.org/wiki/Dark_pattern) and it would be +shameful to utilize Zig to exploit people instead of benefit them. + +However, such problems are best solved with social norms, not with software +licenses. Any attempt to complicate the software license of Zig would risk +compromising the value Zig provides to users. + +Therefore, Zig is available under the MIT (Expat) License, and comes with a +humble request: use it to make software better serve the needs of end users. diff --git a/build.zig b/build.zig index 8ac2d4f8b..a6a2d8737 100644 --- a/build.zig +++ b/build.zig @@ -123,7 +123,14 @@ pub fn build(b: *Builder) !void { .source_dir = "lib", .install_dir = .Lib, .install_subdir = "zig", - .exclude_extensions = &[_][]const u8{ "test.zig", "README.md" }, + .exclude_extensions = &[_][]const u8{ + "test.zig", + "README.md", + ".z.0", + ".z.9", + ".gz", + "rfc1951.txt", + }, }); const test_filter = b.option([]const u8, "test-filter", "Skip tests that do not match filter"); diff --git a/ci/azure/windows_msvc_script.bat b/ci/azure/windows_msvc_script.bat index 5c3593fb9..1568b1b15 100644 --- a/ci/azure/windows_msvc_script.bat +++ b/ci/azure/windows_msvc_script.bat @@ -24,7 +24,7 @@ cd %ZIGBUILDDIR% cmake.exe .. -Thost=x64 -G"Visual Studio 16 2019" -A x64 "-DCMAKE_INSTALL_PREFIX=%ZIGINSTALLDIR%" "-DCMAKE_PREFIX_PATH=%ZIGPREFIXPATH%" -DCMAKE_BUILD_TYPE=Release || exit /b msbuild /maxcpucount /p:Configuration=Release INSTALL.vcxproj || exit /b -"%ZIGINSTALLDIR%\bin\zig.exe" build test -Dskip-compile-errors || exit /b +"%ZIGINSTALLDIR%\bin\zig.exe" build test -Dskip-non-native -Dskip-compile-errors || exit /b set "PATH=%CD:~0,2%\msys64\usr\bin;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem" SET "MSYSTEM=MINGW64" diff --git a/ci/srht/freebsd_script b/ci/srht/freebsd_script index 97b869c9f..31aea6c3d 100755 --- a/ci/srht/freebsd_script +++ b/ci/srht/freebsd_script @@ -28,11 +28,8 @@ make $JOBS install release/bin/zig build test-fmt release/bin/zig build test-behavior - -# This test is disabled because it triggers "out of memory" on the sr.ht CI service. -# See https://github.com/ziglang/zig/issues/3210 -# release/bin/zig build test-std - +# TODO get these tests passing on freebsd and re-enable +#release/bin/zig build test-std release/bin/zig build test-compiler-rt release/bin/zig build test-compare-output release/bin/zig build test-standalone @@ -44,7 +41,8 @@ release/bin/zig build test-translate-c release/bin/zig build test-run-translated-c # TODO disabled until we are shipping self-hosted #release/bin/zig build test-gen-h -release/bin/zig build test-compile-errors +# TODO disabled to save time and hit that 45 minute limit +#release/bin/zig build test-compile-errors release/bin/zig build docs if [ -f ~/.s3cfg ]; then diff --git a/ci/srht/on_master_success b/ci/srht/on_master_success index c5a01f3bf..691c18a05 100755 --- a/ci/srht/on_master_success +++ b/ci/srht/on_master_success @@ -23,7 +23,7 @@ packages: - jq - xz secrets: - - 6c60aaee-92e7-4e7d-812c-114817689b4d + - 51bfddf5-86a6-4e01-8576-358c72a4a0a4 sources: - https://github.com/ziglang/zig tasks: @@ -36,4 +36,4 @@ jq <$YML_FILE -sR '{ -H Authorization:"token $OAUTH_TOKEN" \ -H Content-Type:application/json \ -X POST \ - -d @- "https://builds.sr.ht/api/jobs" + -d @- "https://builds.hut.lavatech.top/api/jobs" diff --git a/doc/langref.html.in b/doc/langref.html.in index 10bc81e6d..dce23a43e 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -9728,7 +9728,7 @@ const c = @cImport({
  • Does not support Zig-only pointer attributes such as alignment. Use normal {#link|Pointers#} please!
  • -

    When a C pointer is pointing to a single struct (not an array), deference the C pointer to +

    When a C pointer is pointing to a single struct (not an array), dereference the C pointer to access to the struct's fields or member data. That syntax looks like this:

    {#syntax#}ptr_to_struct.*.struct_member{#endsyntax#}

    diff --git a/lib/std/build.zig b/lib/std/build.zig index 1673737be..69f44bad3 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -258,9 +258,14 @@ pub const Builder = struct { })); } - pub fn addSharedLibrary(self: *Builder, name: []const u8, root_src: ?[]const u8, ver: Version) *LibExeObjStep { + pub fn addSharedLibrary( + self: *Builder, + name: []const u8, + root_src: ?[]const u8, + kind: LibExeObjStep.SharedLibKind, + ) *LibExeObjStep { const root_src_param = if (root_src) |p| @as(FileSource, .{ .path = p }) else null; - return LibExeObjStep.createSharedLibrary(self, name, root_src_param, ver); + return LibExeObjStep.createSharedLibrary(self, name, root_src_param, kind); } pub fn addStaticLibrary(self: *Builder, name: []const u8, root_src: ?[]const u8) *LibExeObjStep { @@ -338,11 +343,13 @@ pub const Builder = struct { return TranslateCStep.create(self, source); } - pub fn version(self: *const Builder, major: u32, minor: u32, patch: u32) Version { - return Version{ - .major = major, - .minor = minor, - .patch = patch, + pub fn version(self: *const Builder, major: u32, minor: u32, patch: u32) LibExeObjStep.SharedLibKind { + return .{ + .versioned = .{ + .major = major, + .minor = minor, + .patch = patch, + }, }; } @@ -1048,6 +1055,7 @@ pub const Builder = struct { .Bin => self.exe_dir, .Lib => self.lib_dir, .Header => self.h_dir, + .Custom => |path| fs.path.join(self.allocator, &[_][]const u8{ self.install_path, path }) catch unreachable, }; return fs.path.resolve( self.allocator, @@ -1166,7 +1174,7 @@ pub const LibExeObjStep = struct { version_script: ?[]const u8 = null, out_filename: []const u8, is_dynamic: bool, - version: Version, + version: ?Version, build_mode: builtin.Mode, kind: Kind, major_only_filename: []const u8, @@ -1180,6 +1188,7 @@ pub const LibExeObjStep = struct { emit_llvm_ir: bool = false, emit_asm: bool = false, emit_bin: bool = true, + emit_docs: bool = false, emit_h: bool = false, bundle_compiler_rt: bool, disable_stack_probing: bool, @@ -1212,6 +1221,8 @@ pub const LibExeObjStep = struct { is_linking_libc: bool = false, vcpkg_bin_path: ?[]const u8 = null, + /// This may be set in order to override the default install directory + override_dest_dir: ?InstallDir, installed_path: ?[]const u8, install_step: ?*InstallArtifactStep, @@ -1268,33 +1279,41 @@ pub const LibExeObjStep = struct { Test, }; - pub fn createSharedLibrary(builder: *Builder, name: []const u8, root_src: ?FileSource, ver: Version) *LibExeObjStep { + const SharedLibKind = union(enum) { + versioned: Version, + unversioned: void, + }; + + pub fn createSharedLibrary(builder: *Builder, name: []const u8, root_src: ?FileSource, kind: SharedLibKind) *LibExeObjStep { const self = builder.allocator.create(LibExeObjStep) catch unreachable; - self.* = initExtraArgs(builder, name, root_src, Kind.Lib, true, ver); + self.* = initExtraArgs(builder, name, root_src, Kind.Lib, true, switch (kind) { + .versioned => |ver| ver, + .unversioned => null, + }); return self; } pub fn createStaticLibrary(builder: *Builder, name: []const u8, root_src: ?FileSource) *LibExeObjStep { const self = builder.allocator.create(LibExeObjStep) catch unreachable; - self.* = initExtraArgs(builder, name, root_src, Kind.Lib, false, builder.version(0, 0, 0)); + self.* = initExtraArgs(builder, name, root_src, Kind.Lib, false, null); return self; } pub fn createObject(builder: *Builder, name: []const u8, root_src: ?FileSource) *LibExeObjStep { const self = builder.allocator.create(LibExeObjStep) catch unreachable; - self.* = initExtraArgs(builder, name, root_src, Kind.Obj, false, builder.version(0, 0, 0)); + self.* = initExtraArgs(builder, name, root_src, Kind.Obj, false, null); return self; } pub fn createExecutable(builder: *Builder, name: []const u8, root_src: ?FileSource, is_dynamic: bool) *LibExeObjStep { const self = builder.allocator.create(LibExeObjStep) catch unreachable; - self.* = initExtraArgs(builder, name, root_src, Kind.Exe, is_dynamic, builder.version(0, 0, 0)); + self.* = initExtraArgs(builder, name, root_src, Kind.Exe, is_dynamic, null); return self; } pub fn createTest(builder: *Builder, name: []const u8, root_src: FileSource) *LibExeObjStep { const self = builder.allocator.create(LibExeObjStep) catch unreachable; - self.* = initExtraArgs(builder, name, root_src, Kind.Test, false, builder.version(0, 0, 0)); + self.* = initExtraArgs(builder, name, root_src, Kind.Test, false, null); return self; } @@ -1304,7 +1323,7 @@ pub const LibExeObjStep = struct { root_src: ?FileSource, kind: Kind, is_dynamic: bool, - ver: Version, + ver: ?Version, ) LibExeObjStep { if (mem.indexOf(u8, name, "/") != null or mem.indexOf(u8, name, "\\") != null) { panic("invalid name: '{}'. It looks like a file path, but it is supposed to be the library or application name.", .{name}); @@ -1348,6 +1367,7 @@ pub const LibExeObjStep = struct { .rdynamic = false, .output_dir = null, .single_threaded = false, + .override_dest_dir = null, .installed_path = null, .install_step = null, }; @@ -1375,17 +1395,17 @@ pub const LibExeObjStep = struct { self.target.staticLibSuffix(), }); self.out_lib_filename = self.out_filename; - } else { + } else if (self.version) |version| { if (self.target.isDarwin()) { self.out_filename = self.builder.fmt("lib{}.{d}.{d}.{d}.dylib", .{ self.name, - self.version.major, - self.version.minor, - self.version.patch, + version.major, + version.minor, + version.patch, }); self.major_only_filename = self.builder.fmt("lib{}.{d}.dylib", .{ self.name, - self.version.major, + version.major, }); self.name_only_filename = self.builder.fmt("lib{}.dylib", .{self.name}); self.out_lib_filename = self.out_filename; @@ -1395,14 +1415,25 @@ pub const LibExeObjStep = struct { } else { self.out_filename = self.builder.fmt("lib{}.so.{d}.{d}.{d}", .{ self.name, - self.version.major, - self.version.minor, - self.version.patch, + version.major, + version.minor, + version.patch, }); - self.major_only_filename = self.builder.fmt("lib{}.so.{d}", .{ self.name, self.version.major }); + self.major_only_filename = self.builder.fmt("lib{}.so.{d}", .{ self.name, version.major }); self.name_only_filename = self.builder.fmt("lib{}.so", .{self.name}); self.out_lib_filename = self.out_filename; } + } else { + if (self.target.isDarwin()) { + self.out_filename = self.builder.fmt("lib{}.dylib", .{self.name}); + self.out_lib_filename = self.out_filename; + } else if (self.target.isWindows()) { + self.out_filename = self.builder.fmt("{}.dll", .{self.name}); + self.out_lib_filename = self.builder.fmt("{}.lib", .{self.name}); + } else { + self.out_filename = self.builder.fmt("lib{}.so", .{self.name}); + self.out_lib_filename = self.out_filename; + } } }, } @@ -2003,6 +2034,7 @@ pub const LibExeObjStep = struct { if (self.emit_llvm_ir) try zig_args.append("-femit-llvm-ir"); if (self.emit_asm) try zig_args.append("-femit-asm"); if (!self.emit_bin) try zig_args.append("-fno-emit-bin"); + if (self.emit_docs) try zig_args.append("-femit-docs"); if (self.emit_h) try zig_args.append("-femit-h"); if (self.strip) { @@ -2037,14 +2069,16 @@ pub const LibExeObjStep = struct { zig_args.append(self.name) catch unreachable; if (self.kind == Kind.Lib and self.is_dynamic) { - zig_args.append("--ver-major") catch unreachable; - zig_args.append(builder.fmt("{}", .{self.version.major})) catch unreachable; + if (self.version) |version| { + zig_args.append("--ver-major") catch unreachable; + zig_args.append(builder.fmt("{}", .{version.major})) catch unreachable; - zig_args.append("--ver-minor") catch unreachable; - zig_args.append(builder.fmt("{}", .{self.version.minor})) catch unreachable; + zig_args.append("--ver-minor") catch unreachable; + zig_args.append(builder.fmt("{}", .{version.minor})) catch unreachable; - zig_args.append("--ver-patch") catch unreachable; - zig_args.append(builder.fmt("{}", .{self.version.patch})) catch unreachable; + zig_args.append("--ver-patch") catch unreachable; + zig_args.append(builder.fmt("{}", .{version.patch})) catch unreachable; + } } if (self.is_dynamic) { try zig_args.append("-dynamic"); @@ -2285,7 +2319,7 @@ pub const LibExeObjStep = struct { } } - if (self.kind == Kind.Lib and self.is_dynamic and self.target.wantSharedLibSymLinks()) { + if (self.kind == Kind.Lib and self.is_dynamic and self.version != null and self.target.wantSharedLibSymLinks()) { try doAtomicSymLinks(builder.allocator, self.getOutputPath(), self.major_only_filename, self.name_only_filename); } } @@ -2309,17 +2343,17 @@ pub const InstallArtifactStep = struct { .builder = builder, .step = Step.init(.InstallArtifact, builder.fmt("install {}", .{artifact.step.name}), builder.allocator, make), .artifact = artifact, - .dest_dir = switch (artifact.kind) { + .dest_dir = artifact.override_dest_dir orelse switch (artifact.kind) { .Obj => unreachable, .Test => unreachable, - .Exe => .Bin, - .Lib => .Lib, + .Exe => InstallDir{ .Bin = {} }, + .Lib => InstallDir{ .Lib = {} }, }, .pdb_dir = if (artifact.producesPdbFile()) blk: { if (artifact.kind == .Exe) { - break :blk InstallDir.Bin; + break :blk InstallDir{ .Bin = {} }; } else { - break :blk InstallDir.Lib; + break :blk InstallDir{ .Lib = {} }; } } else null, .h_dir = if (artifact.kind == .Lib and artifact.emit_h) .Header else null, @@ -2329,8 +2363,10 @@ pub const InstallArtifactStep = struct { builder.pushInstalledFile(self.dest_dir, artifact.out_filename); if (self.artifact.isDynamicLibrary()) { - builder.pushInstalledFile(.Lib, artifact.major_only_filename); - builder.pushInstalledFile(.Lib, artifact.name_only_filename); + if (self.artifact.version != null) { + builder.pushInstalledFile(.Lib, artifact.major_only_filename); + builder.pushInstalledFile(.Lib, artifact.name_only_filename); + } if (self.artifact.target.isWindows()) { builder.pushInstalledFile(.Lib, artifact.out_lib_filename); } @@ -2350,7 +2386,7 @@ pub const InstallArtifactStep = struct { const full_dest_path = builder.getInstallPath(self.dest_dir, self.artifact.out_filename); try builder.updateFile(self.artifact.getOutputPath(), full_dest_path); - if (self.artifact.isDynamicLibrary() and self.artifact.target.wantSharedLibSymLinks()) { + if (self.artifact.isDynamicLibrary() and self.artifact.version != null and self.artifact.target.wantSharedLibSymLinks()) { try doAtomicSymLinks(builder.allocator, full_dest_path, self.artifact.major_only_filename, self.artifact.name_only_filename); } if (self.pdb_dir) |pdb_dir| { @@ -2615,11 +2651,13 @@ const VcpkgRootStatus = enum { pub const VcpkgLinkage = std.builtin.LinkMode; -pub const InstallDir = enum { - Prefix, - Lib, - Bin, - Header, +pub const InstallDir = union(enum) { + Prefix: void, + Lib: void, + Bin: void, + Header: void, + /// A path relative to the prefix + Custom: []const u8, }; pub const InstalledFile = struct { diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index 911a0eb15..52b8f641c 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -317,7 +317,6 @@ pub const TypeInfo = union(enum) { /// therefore must be kept in sync with the compiler implementation. pub const UnionField = struct { name: []const u8, - enum_field: ?EnumField, field_type: type, }; diff --git a/lib/std/c.zig b/lib/std/c.zig index 1b3f403ab..aa50fff90 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -132,8 +132,6 @@ pub usingnamespace switch (builtin.os.tag) { }, }; -pub extern "c" fn setreuid(ruid: c_uint, euid: c_uint) c_int; -pub extern "c" fn setregid(rgid: c_uint, egid: c_uint) c_int; pub extern "c" fn rmdir(path: [*:0]const u8) c_int; pub extern "c" fn getenv(name: [*:0]const u8) ?[*:0]u8; pub extern "c" fn sysctl(name: [*]const c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int; @@ -237,8 +235,15 @@ pub usingnamespace switch (builtin.os.tag) { pub extern "c" fn kill(pid: pid_t, sig: c_int) c_int; pub extern "c" fn getdirentries(fd: fd_t, buf_ptr: [*]u8, nbytes: usize, basep: *i64) isize; -pub extern "c" fn setgid(ruid: c_uint, euid: c_uint) c_int; -pub extern "c" fn setuid(uid: c_uint) c_int; + +pub extern "c" fn setuid(uid: uid_t) c_int; +pub extern "c" fn setgid(gid: gid_t) c_int; +pub extern "c" fn seteuid(euid: uid_t) c_int; +pub extern "c" fn setegid(egid: gid_t) c_int; +pub extern "c" fn setreuid(ruid: uid_t, euid: uid_t) c_int; +pub extern "c" fn setregid(rgid: gid_t, egid: gid_t) c_int; +pub extern "c" fn setresuid(ruid: uid_t, euid: uid_t, suid: uid_t) c_int; +pub extern "c" fn setresgid(rgid: gid_t, egid: gid_t, sgid: gid_t) c_int; pub extern "c" fn aligned_alloc(alignment: usize, size: usize) ?*c_void; pub extern "c" fn malloc(usize) ?*c_void; @@ -335,3 +340,5 @@ pub extern "c" fn sync() void; pub extern "c" fn syncfs(fd: c_int) c_int; pub extern "c" fn fsync(fd: c_int) c_int; pub extern "c" fn fdatasync(fd: c_int) c_int; + +pub extern "c" fn prctl(option: c_int, ...) c_int; diff --git a/lib/std/compress.zig b/lib/std/compress.zig new file mode 100644 index 000000000..95f496021 --- /dev/null +++ b/lib/std/compress.zig @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +const std = @import("std.zig"); + +pub const deflate = @import("compress/deflate.zig"); +pub const gzip = @import("compress/gzip.zig"); +pub const zlib = @import("compress/zlib.zig"); + +test "" { + _ = gzip; + _ = zlib; +} diff --git a/lib/std/compress/deflate.zig b/lib/std/compress/deflate.zig new file mode 100644 index 000000000..9fe96cacb --- /dev/null +++ b/lib/std/compress/deflate.zig @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +// +// Decompressor for DEFLATE data streams (RFC1951) +// +// Heavily inspired by the simple decompressor puff.c by Mark Adler + +const std = @import("std"); +const io = std.io; +const math = std.math; +const mem = std.mem; + +const assert = std.debug.assert; + +const MAXBITS = 15; +const MAXLCODES = 286; +const MAXDCODES = 30; +const MAXCODES = MAXLCODES + MAXDCODES; +const FIXLCODES = 288; + +// The maximum length of a Huffman code's prefix we can decode using the fast +// path. The factor 9 is inherited from Zlib, tweaking the value showed little +// or no changes in the profiler output. +const PREFIX_LUT_BITS = 9; + +const Huffman = struct { + // Number of codes for each possible length + count: [MAXBITS + 1]u16, + // Mapping between codes and symbols + symbol: [MAXCODES]u16, + + // The decoding process uses a trick explained by Mark Adler in [1]. + // We basically precompute for a fixed number of codes (0 <= x <= 2^N-1) + // the symbol and the effective code length we'd get if the decoder was run + // on the given N-bit sequence. + // A code with length 0 means the sequence is not a valid prefix for this + // canonical Huffman code and we have to decode it using a slower method. + // + // [1] https://github.com/madler/zlib/blob/v1.2.11/doc/algorithm.txt#L58 + prefix_lut: [1 << PREFIX_LUT_BITS]u16, + prefix_lut_len: [1 << PREFIX_LUT_BITS]u16, + // The following info refer to the codes of length PREFIX_LUT_BITS+1 and are + // used to bootstrap the bit-by-bit reading method if the fast-path fails. + last_code: u16, + last_index: u16, + + fn construct(self: *Huffman, code_length: []const u16) !void { + for (self.count) |*val| { + val.* = 0; + } + + for (code_length) |len| { + self.count[len] += 1; + } + + // All zero. + if (self.count[0] == code_length.len) + return; + + var left: isize = 1; + for (self.count[1..]) |val| { + // Each added bit doubles the amount of codes. + left *= 2; + // Make sure the number of codes with this length isn't too high. + left -= @as(isize, @bitCast(i16, val)); + if (left < 0) + return error.InvalidTree; + } + + // Compute the offset of the first symbol represented by a code of a + // given length in the symbol table, together with the first canonical + // Huffman code for that length. + var offset: [MAXBITS + 1]u16 = undefined; + var codes: [MAXBITS + 1]u16 = undefined; + { + offset[1] = 0; + codes[1] = 0; + var len: usize = 1; + while (len < MAXBITS) : (len += 1) { + offset[len + 1] = offset[len] + self.count[len]; + codes[len + 1] = (codes[len] + self.count[len]) << 1; + } + } + + self.prefix_lut_len = mem.zeroes(@TypeOf(self.prefix_lut_len)); + + for (code_length) |len, symbol| { + if (len != 0) { + // Fill the symbol table. + // The symbols are assigned sequentially for each length. + self.symbol[offset[len]] = @truncate(u16, symbol); + // Track the last assigned offset + offset[len] += 1; + } + + if (len == 0 or len > PREFIX_LUT_BITS) + continue; + + // Given a Huffman code of length N we have to massage it so + // that it becomes an index in the lookup table. + // The bit order is reversed as the fast path reads the bit + // sequence MSB to LSB using an &, the order is flipped wrt the + // one obtained by reading bit-by-bit. + // The codes are prefix-free, if the prefix matches we can + // safely ignore the trail bits. We do so by replicating the + // symbol info for each combination of the trailing bits. + const bits_to_fill = @intCast(u5, PREFIX_LUT_BITS - len); + const rev_code = bitReverse(codes[len], len); + // Track the last used code, but only for lengths < PREFIX_LUT_BITS + codes[len] += 1; + + var j: usize = 0; + while (j < @as(usize, 1) << bits_to_fill) : (j += 1) { + const index = rev_code | (j << @intCast(u5, len)); + assert(self.prefix_lut_len[index] == 0); + self.prefix_lut[index] = @truncate(u16, symbol); + self.prefix_lut_len[index] = @truncate(u16, len); + } + } + + self.last_code = codes[PREFIX_LUT_BITS + 1]; + self.last_index = offset[PREFIX_LUT_BITS + 1] - self.count[PREFIX_LUT_BITS + 1]; + } +}; + +// Reverse bit-by-bit a N-bit value +fn bitReverse(x: usize, N: usize) usize { + var tmp: usize = 0; + var i: usize = 0; + while (i < N) : (i += 1) { + tmp |= ((x >> @intCast(u5, i)) & 1) << @intCast(u5, N - i - 1); + } + return tmp; +} + +pub fn InflateStream(comptime ReaderType: type) type { + return struct { + const Self = @This(); + + pub const Error = ReaderType.Error || error{ + EndOfStream, + BadCounts, + InvalidBlockType, + InvalidDistance, + InvalidFixedCode, + InvalidLength, + InvalidStoredSize, + InvalidSymbol, + InvalidTree, + MissingEOBCode, + NoLastLength, + OutOfCodes, + }; + pub const Reader = io.Reader(*Self, Error, read); + + inner_reader: ReaderType, + + // True if the decoder met the end of the compressed stream, no further + // data can be decompressed + seen_eos: bool, + + state: union(enum) { + // Parse a compressed block header and set up the internal state for + // decompressing its contents. + DecodeBlockHeader: void, + // Decode all the symbols in a compressed block. + DecodeBlockData: void, + // Copy N bytes of uncompressed data from the underlying stream into + // the window. + Copy: usize, + // Copy 1 byte into the window. + CopyLit: u8, + // Copy L bytes from the window itself, starting from D bytes + // behind. + CopyFrom: struct { distance: u16, length: u16 }, + }, + + // Sliding window for the LZ77 algorithm + window: struct { + const WSelf = @This(); + + // invariant: buffer length is always a power of 2 + buf: []u8, + // invariant: ri <= wi + wi: usize = 0, // Write index + ri: usize = 0, // Read index + el: usize = 0, // Number of readable elements + + fn readable(self: *WSelf) usize { + return self.el; + } + + fn writable(self: *WSelf) usize { + return self.buf.len - self.el; + } + + // Insert a single byte into the window. + // Returns 1 if there's enough space for the new byte and 0 + // otherwise. + fn append(self: *WSelf, value: u8) usize { + if (self.writable() < 1) return 0; + self.appendUnsafe(value); + return 1; + } + + // Insert a single byte into the window. + // Assumes there's enough space. + inline fn appendUnsafe(self: *WSelf, value: u8) void { + self.buf[self.wi] = value; + self.wi = (self.wi + 1) & (self.buf.len - 1); + self.el += 1; + } + + // Fill dest[] with data from the window, starting from the read + // position. This updates the read pointer. + // Returns the number of read bytes or 0 if there's nothing to read + // yet. + fn read(self: *WSelf, dest: []u8) usize { + const N = math.min(dest.len, self.readable()); + + if (N == 0) return 0; + + if (self.ri + N < self.buf.len) { + // The data doesn't wrap around + mem.copy(u8, dest, self.buf[self.ri .. self.ri + N]); + } else { + // The data wraps around the buffer, split the copy + std.mem.copy(u8, dest, self.buf[self.ri..]); + // How much data we've copied from `ri` to the end + const r = self.buf.len - self.ri; + std.mem.copy(u8, dest[r..], self.buf[0 .. N - r]); + } + + self.ri = (self.ri + N) & (self.buf.len - 1); + self.el -= N; + + return N; + } + + // Copy `length` bytes starting from `distance` bytes behind the + // write pointer. + // Be careful as the length may be greater than the distance, that's + // how the compressor encodes run-length encoded sequences. + fn copyFrom(self: *WSelf, distance: usize, length: usize) usize { + const N = math.min(length, self.writable()); + + if (N == 0) return 0; + + // TODO: Profile and, if needed, replace with smarter juggling + // of the window memory for the non-overlapping case. + var i: usize = 0; + while (i < N) : (i += 1) { + const index = (self.wi -% distance) & (self.buf.len - 1); + self.appendUnsafe(self.buf[index]); + } + + return N; + } + }, + + // Compressor-local Huffman tables used to decompress blocks with + // dynamic codes. + huffman_tables: [2]Huffman = undefined, + + // Huffman tables used for decoding length/distance pairs. + hdist: *Huffman, + hlen: *Huffman, + + // Temporary buffer for the bitstream, only bits 0..`bits_left` are + // considered valid. + bits: u32, + bits_left: usize, + + fn peekBits(self: *Self, bits: usize) !u32 { + while (self.bits_left < bits) { + const byte = try self.inner_reader.readByte(); + self.bits |= @as(u32, byte) << @intCast(u5, self.bits_left); + self.bits_left += 8; + } + return self.bits & ((@as(u32, 1) << @intCast(u5, bits)) - 1); + } + fn readBits(self: *Self, bits: usize) !u32 { + const val = self.peekBits(bits); + self.discardBits(bits); + return val; + } + fn discardBits(self: *Self, bits: usize) void { + self.bits >>= @intCast(u5, bits); + self.bits_left -= bits; + } + + fn stored(self: *Self) !void { + // Discard the remaining bits, the lenght field is always + // byte-aligned (and so is the data) + self.discardBits(self.bits_left); + + const length = try self.inner_reader.readIntLittle(u16); + const length_cpl = try self.inner_reader.readIntLittle(u16); + + if (length != ~length_cpl) + return error.InvalidStoredSize; + + self.state = .{ .Copy = length }; + } + + fn fixed(self: *Self) !void { + comptime var lencode: Huffman = undefined; + comptime var distcode: Huffman = undefined; + + // The Huffman codes are specified in the RFC1951, section 3.2.6 + comptime { + @setEvalBranchQuota(100000); + + const len_lengths = // + [_]u16{8} ** 144 ++ + [_]u16{9} ** 112 ++ + [_]u16{7} ** 24 ++ + [_]u16{8} ** 8; + assert(len_lengths.len == FIXLCODES); + try lencode.construct(len_lengths[0..]); + + const dist_lengths = [_]u16{5} ** MAXDCODES; + try distcode.construct(dist_lengths[0..]); + } + + self.hlen = &lencode; + self.hdist = &distcode; + self.state = .DecodeBlockData; + } + + fn dynamic(self: *Self) !void { + // Number of length codes + const nlen = (try self.readBits(5)) + 257; + // Number of distance codes + const ndist = (try self.readBits(5)) + 1; + // Number of code length codes + const ncode = (try self.readBits(4)) + 4; + + if (nlen > MAXLCODES or ndist > MAXDCODES) + return error.BadCounts; + + // Permutation of code length codes + const ORDER = [19]u16{ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, + 12, 3, 13, 2, 14, 1, 15, + }; + + // Build the Huffman table to decode the code length codes + var lencode: Huffman = undefined; + { + var lengths = std.mem.zeroes([19]u16); + + // Read the code lengths, missing ones are left as zero + for (ORDER[0..ncode]) |val| { + lengths[val] = @intCast(u16, try self.readBits(3)); + } + + try lencode.construct(lengths[0..]); + } + + // Read the length/literal and distance code length tables. + // Zero the table by default so we can avoid explicitly writing out + // zeros for codes 17 and 18 + var lengths = std.mem.zeroes([MAXCODES]u16); + + var i: usize = 0; + while (i < nlen + ndist) { + const symbol = try self.decode(&lencode); + + switch (symbol) { + 0...15 => { + lengths[i] = symbol; + i += 1; + }, + 16 => { + // repeat last length 3..6 times + if (i == 0) return error.NoLastLength; + + const last_length = lengths[i - 1]; + const repeat = 3 + (try self.readBits(2)); + const last_index = i + repeat; + while (i < last_index) : (i += 1) { + lengths[i] = last_length; + } + }, + 17 => { + // repeat zero 3..10 times + i += 3 + (try self.readBits(3)); + }, + 18 => { + // repeat zero 11..138 times + i += 11 + (try self.readBits(7)); + }, + else => return error.InvalidSymbol, + } + } + + if (i > nlen + ndist) + return error.InvalidLength; + + // Check if the end of block code is present + if (lengths[256] == 0) + return error.MissingEOBCode; + + try self.huffman_tables[0].construct(lengths[0..nlen]); + try self.huffman_tables[1].construct(lengths[nlen .. nlen + ndist]); + + self.hlen = &self.huffman_tables[0]; + self.hdist = &self.huffman_tables[1]; + self.state = .DecodeBlockData; + } + + fn codes(self: *Self, lencode: *Huffman, distcode: *Huffman) !bool { + // Size base for length codes 257..285 + const LENS = [29]u16{ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, + }; + // Extra bits for length codes 257..285 + const LEXT = [29]u16{ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, + }; + // Offset base for distance codes 0..29 + const DISTS = [30]u16{ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, + }; + // Extra bits for distance codes 0..29 + const DEXT = [30]u16{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + }; + + while (true) { + const symbol = try self.decode(lencode); + + switch (symbol) { + 0...255 => { + // Literal value + const c = @truncate(u8, symbol); + if (self.window.append(c) == 0) { + self.state = .{ .CopyLit = c }; + return false; + } + }, + 256 => { + // End of block symbol + return true; + }, + 257...285 => { + // Length/distance pair + const length_symbol = symbol - 257; + const length = LENS[length_symbol] + + @intCast(u16, try self.readBits(LEXT[length_symbol])); + + const distance_symbol = try self.decode(distcode); + const distance = DISTS[distance_symbol] + + @intCast(u16, try self.readBits(DEXT[distance_symbol])); + + if (distance > self.window.buf.len) + return error.InvalidDistance; + + const written = self.window.copyFrom(distance, length); + if (written != length) { + self.state = .{ + .CopyFrom = .{ + .distance = distance, + .length = length - @truncate(u16, written), + }, + }; + return false; + } + }, + else => return error.InvalidFixedCode, + } + } + } + + fn decode(self: *Self, h: *Huffman) !u16 { + // Fast path, read some bits and hope they're prefixes of some code + const prefix = try self.peekBits(PREFIX_LUT_BITS); + if (h.prefix_lut_len[prefix] != 0) { + self.discardBits(h.prefix_lut_len[prefix]); + return h.prefix_lut[prefix]; + } + + // The sequence we've read is not a prefix of any code of length <= + // PREFIX_LUT_BITS, keep decoding it using a slower method + self.discardBits(PREFIX_LUT_BITS); + + // Speed up the decoding by starting from the first code length + // that's not covered by the table + var len: usize = PREFIX_LUT_BITS + 1; + var first: usize = h.last_code; + var index: usize = h.last_index; + + // Reverse the prefix so that the LSB becomes the MSB and make space + // for the next bit + var code = bitReverse(prefix, PREFIX_LUT_BITS + 1); + + while (len <= MAXBITS) : (len += 1) { + code |= try self.readBits(1); + const count = h.count[len]; + if (code < first + count) + return h.symbol[index + (code - first)]; + index += count; + first += count; + first <<= 1; + code <<= 1; + } + + return error.OutOfCodes; + } + + fn step(self: *Self) !void { + while (true) { + switch (self.state) { + .DecodeBlockHeader => { + // The compressed stream is done + if (self.seen_eos) return; + + const last = @intCast(u1, try self.readBits(1)); + const kind = @intCast(u2, try self.readBits(2)); + + self.seen_eos = last != 0; + + // The next state depends on the block type + switch (kind) { + 0 => try self.stored(), + 1 => try self.fixed(), + 2 => try self.dynamic(), + 3 => return error.InvalidBlockType, + } + }, + .DecodeBlockData => { + if (!try self.codes(self.hlen, self.hdist)) { + return; + } + + self.state = .DecodeBlockHeader; + }, + .Copy => |*length| { + const N = math.min(self.window.writable(), length.*); + + // TODO: This loop can be more efficient. On the other + // hand uncompressed blocks are not that common so... + var i: usize = 0; + while (i < N) : (i += 1) { + var tmp: [1]u8 = undefined; + if ((try self.inner_reader.read(&tmp)) != 1) { + // Unexpected end of stream, keep this error + // consistent with the use of readBitsNoEof + return error.EndOfStream; + } + self.window.appendUnsafe(tmp[0]); + } + + if (N != length.*) { + length.* -= N; + return; + } + + self.state = .DecodeBlockHeader; + }, + .CopyLit => |c| { + if (self.window.append(c) == 0) { + return; + } + + self.state = .DecodeBlockData; + }, + .CopyFrom => |*info| { + const written = self.window.copyFrom(info.distance, info.length); + if (written != info.length) { + info.length -= @truncate(u16, written); + return; + } + + self.state = .DecodeBlockData; + }, + } + } + } + + fn init(source: ReaderType, window_slice: []u8) Self { + assert(math.isPowerOfTwo(window_slice.len)); + + return Self{ + .inner_reader = source, + .window = .{ .buf = window_slice }, + .seen_eos = false, + .state = .DecodeBlockHeader, + .hdist = undefined, + .hlen = undefined, + .bits = 0, + .bits_left = 0, + }; + } + + // Implements the io.Reader interface + pub fn read(self: *Self, buffer: []u8) Error!usize { + if (buffer.len == 0) + return 0; + + // Try reading as much as possible from the window + var read_amt: usize = self.window.read(buffer); + while (read_amt < buffer.len) { + // Run the state machine, we can detect the "effective" end of + // stream condition by checking if any progress was made. + // Why "effective"? Because even though `seen_eos` is true we + // may still have to finish processing other decoding steps. + try self.step(); + // No progress was made + if (self.window.readable() == 0) + break; + + read_amt += self.window.read(buffer[read_amt..]); + } + + return read_amt; + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + }; +} + +pub fn inflateStream(reader: anytype, window_slice: []u8) InflateStream(@TypeOf(reader)) { + return InflateStream(@TypeOf(reader)).init(reader, window_slice); +} diff --git a/lib/std/compress/gzip.zig b/lib/std/compress/gzip.zig new file mode 100644 index 000000000..aad173139 --- /dev/null +++ b/lib/std/compress/gzip.zig @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +// +// Decompressor for GZIP data streams (RFC1952) + +const std = @import("std"); +const io = std.io; +const fs = std.fs; +const testing = std.testing; +const mem = std.mem; +const deflate = std.compress.deflate; + +// Flags for the FLG field in the header +const FTEXT = 1 << 0; +const FHCRC = 1 << 1; +const FEXTRA = 1 << 2; +const FNAME = 1 << 3; +const FCOMMENT = 1 << 4; + +pub fn GzipStream(comptime ReaderType: type) type { + return struct { + const Self = @This(); + + pub const Error = ReaderType.Error || + deflate.InflateStream(ReaderType).Error || + error{ CorruptedData, WrongChecksum }; + pub const Reader = io.Reader(*Self, Error, read); + + allocator: *mem.Allocator, + inflater: deflate.InflateStream(ReaderType), + in_reader: ReaderType, + hasher: std.hash.Crc32, + window_slice: []u8, + read_amt: usize, + + info: struct { + filename: ?[]const u8, + comment: ?[]const u8, + modification_time: u32, + }, + + fn init(allocator: *mem.Allocator, source: ReaderType) !Self { + // gzip header format is specified in RFC1952 + const header = try source.readBytesNoEof(10); + + // Check the ID1/ID2 fields + if (header[0] != 0x1f or header[1] != 0x8b) + return error.BadHeader; + + const CM = header[2]; + // The CM field must be 8 to indicate the use of DEFLATE + if (CM != 8) return error.InvalidCompression; + // Flags + const FLG = header[3]; + // Modification time, as a Unix timestamp. + // If zero there's no timestamp available. + const MTIME = mem.readIntLittle(u32, header[4..8]); + // Extra flags + const XFL = header[8]; + // Operating system where the compression took place + const OS = header[9]; + + if (FLG & FEXTRA != 0) { + // Skip the extra data, we could read and expose it to the user + // if somebody needs it. + const len = try source.readIntLittle(u16); + try source.skipBytes(len, .{}); + } + + var filename: ?[]const u8 = null; + if (FLG & FNAME != 0) { + filename = try source.readUntilDelimiterAlloc( + allocator, + 0, + std.math.maxInt(usize), + ); + } + errdefer if (filename) |p| allocator.free(p); + + var comment: ?[]const u8 = null; + if (FLG & FCOMMENT != 0) { + comment = try source.readUntilDelimiterAlloc( + allocator, + 0, + std.math.maxInt(usize), + ); + } + errdefer if (comment) |p| allocator.free(p); + + if (FLG & FHCRC != 0) { + // TODO: Evaluate and check the header checksum. The stdlib has + // no CRC16 yet :( + _ = try source.readIntLittle(u16); + } + + // The RFC doesn't say anything about the DEFLATE window size to be + // used, default to 32K. + var window_slice = try allocator.alloc(u8, 32 * 1024); + + return Self{ + .allocator = allocator, + .inflater = deflate.inflateStream(source, window_slice), + .in_reader = source, + .hasher = std.hash.Crc32.init(), + .window_slice = window_slice, + .info = .{ + .filename = filename, + .comment = comment, + .modification_time = MTIME, + }, + .read_amt = 0, + }; + } + + pub fn deinit(self: *Self) void { + self.allocator.free(self.window_slice); + if (self.info.filename) |filename| + self.allocator.free(filename); + if (self.info.comment) |comment| + self.allocator.free(comment); + } + + // Implements the io.Reader interface + pub fn read(self: *Self, buffer: []u8) Error!usize { + if (buffer.len == 0) + return 0; + + // Read from the compressed stream and update the computed checksum + const r = try self.inflater.read(buffer); + if (r != 0) { + self.hasher.update(buffer[0..r]); + self.read_amt += r; + return r; + } + + // We've reached the end of stream, check if the checksum matches + const hash = try self.in_reader.readIntLittle(u32); + if (hash != self.hasher.final()) + return error.WrongChecksum; + + // The ISIZE field is the size of the uncompressed input modulo 2^32 + const input_size = try self.in_reader.readIntLittle(u32); + if (self.read_amt & 0xffffffff != input_size) + return error.CorruptedData; + + return 0; + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + }; +} + +pub fn gzipStream(allocator: *mem.Allocator, reader: anytype) !GzipStream(@TypeOf(reader)) { + return GzipStream(@TypeOf(reader)).init(allocator, reader); +} + +fn testReader(data: []const u8, comptime expected: []const u8) !void { + var in_stream = io.fixedBufferStream(data); + + var gzip_stream = try gzipStream(testing.allocator, in_stream.reader()); + defer gzip_stream.deinit(); + + // Read and decompress the whole file + const buf = try gzip_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); + defer testing.allocator.free(buf); + // Calculate its SHA256 hash and check it against the reference + var hash: [32]u8 = undefined; + std.crypto.hash.sha2.Sha256.hash(buf, hash[0..], .{}); + + assertEqual(expected, &hash); +} + +// Assert `expected` == `input` where `input` is a bytestring. +pub fn assertEqual(comptime expected: []const u8, input: []const u8) void { + var expected_bytes: [expected.len / 2]u8 = undefined; + for (expected_bytes) |*r, i| { + r.* = std.fmt.parseInt(u8, expected[2 * i .. 2 * i + 2], 16) catch unreachable; + } + + testing.expectEqualSlices(u8, &expected_bytes, input); +} + +// All the test cases are obtained by compressing the RFC1952 text +// +// https://tools.ietf.org/rfc/rfc1952.txt length=25037 bytes +// SHA256=164ef0897b4cbec63abf1b57f069f3599bd0fb7c72c2a4dee21bd7e03ec9af67 +test "compressed data" { + try testReader( + @embedFile("rfc1952.txt.gz"), + "164ef0897b4cbec63abf1b57f069f3599bd0fb7c72c2a4dee21bd7e03ec9af67", + ); +} + +test "sanity checks" { + // Truncated header + testing.expectError( + error.EndOfStream, + testReader(&[_]u8{ 0x1f, 0x8B }, ""), + ); + // Wrong CM + testing.expectError( + error.InvalidCompression, + testReader(&[_]u8{ + 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, + }, ""), + ); + // Wrong checksum + testing.expectError( + error.WrongChecksum, + testReader(&[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, + }, ""), + ); + // Truncated checksum + testing.expectError( + error.EndOfStream, + testReader(&[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, + }, ""), + ); + // Wrong initial size + testing.expectError( + error.CorruptedData, + testReader(&[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + }, ""), + ); + // Truncated initial size field + testing.expectError( + error.EndOfStream, + testReader(&[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, + }, ""), + ); +} diff --git a/lib/std/compress/rfc1951.txt b/lib/std/compress/rfc1951.txt new file mode 100644 index 000000000..403c8c722 --- /dev/null +++ b/lib/std/compress/rfc1951.txt @@ -0,0 +1,955 @@ + + + + + + +Network Working Group P. Deutsch +Request for Comments: 1951 Aladdin Enterprises +Category: Informational May 1996 + + + DEFLATE Compressed Data Format Specification version 1.3 + +Status of This Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +IESG Note: + + The IESG takes no position on the validity of any Intellectual + Property Rights statements contained in this document. + +Notices + + Copyright (c) 1996 L. Peter Deutsch + + Permission is granted to copy and distribute this document for any + purpose and without charge, including translations into other + languages and incorporation into compilations, provided that the + copyright notice and this notice are preserved, and that any + substantive changes or deletions from the original are clearly + marked. + + A pointer to the latest version of this and related documentation in + HTML format can be found at the URL + . + +Abstract + + This specification defines a lossless compressed data format that + compresses data using a combination of the LZ77 algorithm and Huffman + coding, with efficiency comparable to the best currently available + general-purpose compression methods. The data can be produced or + consumed, even for an arbitrarily long sequentially presented input + data stream, using only an a priori bounded amount of intermediate + storage. The format can be implemented readily in a manner not + covered by patents. + + + + + + + + +Deutsch Informational [Page 1] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +Table of Contents + + 1. Introduction ................................................... 2 + 1.1. Purpose ................................................... 2 + 1.2. Intended audience ......................................... 3 + 1.3. Scope ..................................................... 3 + 1.4. Compliance ................................................ 3 + 1.5. Definitions of terms and conventions used ................ 3 + 1.6. Changes from previous versions ............................ 4 + 2. Compressed representation overview ............................. 4 + 3. Detailed specification ......................................... 5 + 3.1. Overall conventions ....................................... 5 + 3.1.1. Packing into bytes .................................. 5 + 3.2. Compressed block format ................................... 6 + 3.2.1. Synopsis of prefix and Huffman coding ............... 6 + 3.2.2. Use of Huffman coding in the "deflate" format ....... 7 + 3.2.3. Details of block format ............................. 9 + 3.2.4. Non-compressed blocks (BTYPE=00) ................... 11 + 3.2.5. Compressed blocks (length and distance codes) ...... 11 + 3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12 + 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13 + 3.3. Compliance ............................................... 14 + 4. Compression algorithm details ................................. 14 + 5. References .................................................... 16 + 6. Security Considerations ....................................... 16 + 7. Source code ................................................... 16 + 8. Acknowledgements .............................................. 16 + 9. Author's Address .............................................. 17 + +1. Introduction + + 1.1. Purpose + + The purpose of this specification is to define a lossless + compressed data format that: + * Is independent of CPU type, operating system, file system, + and character set, and hence can be used for interchange; + * Can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a + priori bounded amount of intermediate storage, and hence + can be used in data communications or similar structures + such as Unix filters; + * Compresses data with efficiency comparable to the best + currently available general-purpose compression methods, + and in particular considerably better than the "compress" + program; + * Can be implemented readily in a manner not covered by + patents, and hence can be practiced freely; + + + +Deutsch Informational [Page 2] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + * Is compatible with the file format produced by the current + widely used gzip utility, in that conforming decompressors + will be able to read data produced by the existing gzip + compressor. + + The data format defined by this specification does not attempt to: + + * Allow random access to compressed data; + * Compress specialized data (e.g., raster graphics) as well + as the best currently available specialized algorithms. + + A simple counting argument shows that no lossless compression + algorithm can compress every possible input data set. For the + format defined here, the worst case expansion is 5 bytes per 32K- + byte block, i.e., a size increase of 0.015% for large data sets. + English text usually compresses by a factor of 2.5 to 3; + executable files usually compress somewhat less; graphical data + such as raster images may compress much more. + + 1.2. Intended audience + + This specification is intended for use by implementors of software + to compress data into "deflate" format and/or decompress data from + "deflate" format. + + The text of the specification assumes a basic background in + programming at the level of bits and other primitive data + representations. Familiarity with the technique of Huffman coding + is helpful but not required. + + 1.3. Scope + + The specification specifies a method for representing a sequence + of bytes as a (usually shorter) sequence of bits, and a method for + packing the latter bit sequence into bytes. + + 1.4. Compliance + + Unless otherwise indicated below, a compliant decompressor must be + able to accept and decompress any data set that conforms to all + the specifications presented here; a compliant compressor must + produce data sets that conform to all the specifications presented + here. + + 1.5. Definitions of terms and conventions used + + Byte: 8 bits stored or transmitted as a unit (same as an octet). + For this specification, a byte is exactly 8 bits, even on machines + + + +Deutsch Informational [Page 3] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + which store a character on a number of bits different from eight. + See below, for the numbering of bits within a byte. + + String: a sequence of arbitrary bytes. + + 1.6. Changes from previous versions + + There have been no technical changes to the deflate format since + version 1.1 of this specification. In version 1.2, some + terminology was changed. Version 1.3 is a conversion of the + specification to RFC style. + +2. Compressed representation overview + + A compressed data set consists of a series of blocks, corresponding + to successive blocks of input data. The block sizes are arbitrary, + except that non-compressible blocks are limited to 65,535 bytes. + + Each block is compressed using a combination of the LZ77 algorithm + and Huffman coding. The Huffman trees for each block are independent + of those for previous or subsequent blocks; the LZ77 algorithm may + use a reference to a duplicated string occurring in a previous block, + up to 32K input bytes before. + + Each block consists of two parts: a pair of Huffman code trees that + describe the representation of the compressed data part, and a + compressed data part. (The Huffman trees themselves are compressed + using Huffman encoding.) The compressed data consists of a series of + elements of two types: literal bytes (of strings that have not been + detected as duplicated within the previous 32K input bytes), and + pointers to duplicated strings, where a pointer is represented as a + pair . The representation used in the + "deflate" format limits distances to 32K bytes and lengths to 258 + bytes, but does not limit the size of a block, except for + uncompressible blocks, which are limited as noted above. + + Each type of value (literals, distances, and lengths) in the + compressed data is represented using a Huffman code, using one code + tree for literals and lengths and a separate code tree for distances. + The code trees for each block appear in a compact form just before + the compressed data for that block. + + + + + + + + + + +Deutsch Informational [Page 4] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +3. Detailed specification + + 3.1. Overall conventions In the diagrams below, a box like this: + + +---+ + | | <-- the vertical bars might be missing + +---+ + + represents one byte; a box like this: + + +==============+ + | | + +==============+ + + represents a variable number of bytes. + + Bytes stored within a computer do not have a "bit order", since + they are always treated as a unit. However, a byte considered as + an integer between 0 and 255 does have a most- and least- + significant bit, and since we write numbers with the most- + significant digit on the left, we also write bytes with the most- + significant bit on the left. In the diagrams below, we number the + bits of a byte so that bit 0 is the least-significant bit, i.e., + the bits are numbered: + + +--------+ + |76543210| + +--------+ + + Within a computer, a number may occupy multiple bytes. All + multi-byte numbers in the format described here are stored with + the least-significant byte first (at the lower memory address). + For example, the decimal number 520 is stored as: + + 0 1 + +--------+--------+ + |00001000|00000010| + +--------+--------+ + ^ ^ + | | + | + more significant byte = 2 x 256 + + less significant byte = 8 + + 3.1.1. Packing into bytes + + This document does not address the issue of the order in which + bits of a byte are transmitted on a bit-sequential medium, + since the final data format described here is byte- rather than + + + +Deutsch Informational [Page 5] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + bit-oriented. However, we describe the compressed block format + in below, as a sequence of data elements of various bit + lengths, not a sequence of bytes. We must therefore specify + how to pack these data elements into bytes to form the final + compressed byte sequence: + + * Data elements are packed into bytes in order of + increasing bit number within the byte, i.e., starting + with the least-significant bit of the byte. + * Data elements other than Huffman codes are packed + starting with the least-significant bit of the data + element. + * Huffman codes are packed starting with the most- + significant bit of the code. + + In other words, if one were to print out the compressed data as + a sequence of bytes, starting with the first byte at the + *right* margin and proceeding to the *left*, with the most- + significant bit of each byte on the left as usual, one would be + able to parse the result from right to left, with fixed-width + elements in the correct MSB-to-LSB order and Huffman codes in + bit-reversed order (i.e., with the first bit of the code in the + relative LSB position). + + 3.2. Compressed block format + + 3.2.1. Synopsis of prefix and Huffman coding + + Prefix coding represents symbols from an a priori known + alphabet by bit sequences (codes), one code for each symbol, in + a manner such that different symbols may be represented by bit + sequences of different lengths, but a parser can always parse + an encoded string unambiguously symbol-by-symbol. + + We define a prefix code in terms of a binary tree in which the + two edges descending from each non-leaf node are labeled 0 and + 1 and in which the leaf nodes correspond one-for-one with (are + labeled with) the symbols of the alphabet; then the code for a + symbol is the sequence of 0's and 1's on the edges leading from + the root to the leaf labeled with that symbol. For example: + + + + + + + + + + + +Deutsch Informational [Page 6] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + /\ Symbol Code + 0 1 ------ ---- + / \ A 00 + /\ B B 1 + 0 1 C 011 + / \ D 010 + A /\ + 0 1 + / \ + D C + + A parser can decode the next symbol from an encoded input + stream by walking down the tree from the root, at each step + choosing the edge corresponding to the next input bit. + + Given an alphabet with known symbol frequencies, the Huffman + algorithm allows the construction of an optimal prefix code + (one which represents strings with those symbol frequencies + using the fewest bits of any possible prefix codes for that + alphabet). Such a code is called a Huffman code. (See + reference [1] in Chapter 5, references for additional + information on Huffman codes.) + + Note that in the "deflate" format, the Huffman codes for the + various alphabets must not exceed certain maximum code lengths. + This constraint complicates the algorithm for computing code + lengths from symbol frequencies. Again, see Chapter 5, + references for details. + + 3.2.2. Use of Huffman coding in the "deflate" format + + The Huffman codes used for each alphabet in the "deflate" + format have two additional rules: + + * All codes of a given bit length have lexicographically + consecutive values, in the same order as the symbols + they represent; + + * Shorter codes lexicographically precede longer codes. + + + + + + + + + + + + +Deutsch Informational [Page 7] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + We could recode the example above to follow this rule as + follows, assuming that the order of the alphabet is ABCD: + + Symbol Code + ------ ---- + A 10 + B 0 + C 110 + D 111 + + I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are + lexicographically consecutive. + + Given this rule, we can define the Huffman code for an alphabet + just by giving the bit lengths of the codes for each symbol of + the alphabet in order; this is sufficient to determine the + actual codes. In our example, the code is completely defined + by the sequence of bit lengths (2, 1, 3, 3). The following + algorithm generates the codes as integers, intended to be read + from most- to least-significant bit. The code lengths are + initially in tree[I].Len; the codes are produced in + tree[I].Code. + + 1) Count the number of codes for each code length. Let + bl_count[N] be the number of codes of length N, N >= 1. + + 2) Find the numerical value of the smallest code for each + code length: + + code = 0; + bl_count[0] = 0; + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = code; + } + + 3) Assign numerical values to all codes, using consecutive + values for all codes of the same length with the base + values determined at step 2. Codes that are never used + (which have a bit length of zero) must not be assigned a + value. + + for (n = 0; n <= max_code; n++) { + len = tree[n].Len; + if (len != 0) { + tree[n].Code = next_code[len]; + next_code[len]++; + } + + + +Deutsch Informational [Page 8] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + } + + Example: + + Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3, + 3, 2, 4, 4). After step 1, we have: + + N bl_count[N] + - ----------- + 2 1 + 3 5 + 4 2 + + Step 2 computes the following next_code values: + + N next_code[N] + - ------------ + 1 0 + 2 0 + 3 2 + 4 14 + + Step 3 produces the following code values: + + Symbol Length Code + ------ ------ ---- + A 3 010 + B 3 011 + C 3 100 + D 3 101 + E 3 110 + F 2 00 + G 4 1110 + H 4 1111 + + 3.2.3. Details of block format + + Each block of compressed data begins with 3 header bits + containing the following data: + + first bit BFINAL + next 2 bits BTYPE + + Note that the header bits do not necessarily begin on a byte + boundary, since a block does not necessarily occupy an integral + number of bytes. + + + + + +Deutsch Informational [Page 9] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + BFINAL is set if and only if this is the last block of the data + set. + + BTYPE specifies how the data are compressed, as follows: + + 00 - no compression + 01 - compressed with fixed Huffman codes + 10 - compressed with dynamic Huffman codes + 11 - reserved (error) + + The only difference between the two compressed cases is how the + Huffman codes for the literal/length and distance alphabets are + defined. + + In all cases, the decoding algorithm for the actual data is as + follows: + + do + read block header from input stream. + if stored with no compression + skip any remaining bits in current partially + processed byte + read LEN and NLEN (see next section) + copy LEN bytes of data to output + otherwise + if compressed with dynamic Huffman codes + read representation of code trees (see + subsection below) + loop (until end of block code recognized) + decode literal/length value from input stream + if value < 256 + copy value (literal byte) to output stream + otherwise + if value = end of block (256) + break from loop + otherwise (value = 257..285) + decode distance from input stream + + move backwards distance bytes in the output + stream, and copy length bytes from this + position to the output stream. + end loop + while not last block + + Note that a duplicated string reference may refer to a string + in a previous block; i.e., the backward distance may cross one + or more block boundaries. However a distance cannot refer past + the beginning of the output stream. (An application using a + + + +Deutsch Informational [Page 10] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + preset dictionary might discard part of the output stream; a + distance can refer to that part of the output stream anyway) + Note also that the referenced string may overlap the current + position; for example, if the last 2 bytes decoded have values + X and Y, a string reference with + adds X,Y,X,Y,X to the output stream. + + We now specify each compression method in turn. + + 3.2.4. Non-compressed blocks (BTYPE=00) + + Any bits of input up to the next byte boundary are ignored. + The rest of the block consists of the following information: + + 0 1 2 3 4... + +---+---+---+---+================================+ + | LEN | NLEN |... LEN bytes of literal data...| + +---+---+---+---+================================+ + + LEN is the number of data bytes in the block. NLEN is the + one's complement of LEN. + + 3.2.5. Compressed blocks (length and distance codes) + + As noted above, encoded data blocks in the "deflate" format + consist of sequences of symbols drawn from three conceptually + distinct alphabets: either literal bytes, from the alphabet of + byte values (0..255), or pairs, + where the length is drawn from (3..258) and the distance is + drawn from (1..32,768). In fact, the literal and length + alphabets are merged into a single alphabet (0..285), where + values 0..255 represent literal bytes, the value 256 indicates + end-of-block, and values 257..285 represent length codes + (possibly in conjunction with extra bits following the symbol + code) as follows: + + + + + + + + + + + + + + + + +Deutsch Informational [Page 11] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + Extra Extra Extra + Code Bits Length(s) Code Bits Lengths Code Bits Length(s) + ---- ---- ------ ---- ---- ------- ---- ---- ------- + 257 0 3 267 1 15,16 277 4 67-82 + 258 0 4 268 1 17,18 278 4 83-98 + 259 0 5 269 2 19-22 279 4 99-114 + 260 0 6 270 2 23-26 280 4 115-130 + 261 0 7 271 2 27-30 281 5 131-162 + 262 0 8 272 2 31-34 282 5 163-194 + 263 0 9 273 3 35-42 283 5 195-226 + 264 0 10 274 3 43-50 284 5 227-257 + 265 1 11,12 275 3 51-58 285 0 258 + 266 1 13,14 276 3 59-66 + + The extra bits should be interpreted as a machine integer + stored with the most-significant bit first, e.g., bits 1110 + represent the value 14. + + Extra Extra Extra + Code Bits Dist Code Bits Dist Code Bits Distance + ---- ---- ---- ---- ---- ------ ---- ---- -------- + 0 0 1 10 4 33-48 20 9 1025-1536 + 1 0 2 11 4 49-64 21 9 1537-2048 + 2 0 3 12 5 65-96 22 10 2049-3072 + 3 0 4 13 5 97-128 23 10 3073-4096 + 4 1 5,6 14 6 129-192 24 11 4097-6144 + 5 1 7,8 15 6 193-256 25 11 6145-8192 + 6 2 9-12 16 7 257-384 26 12 8193-12288 + 7 2 13-16 17 7 385-512 27 12 12289-16384 + 8 3 17-24 18 8 513-768 28 13 16385-24576 + 9 3 25-32 19 8 769-1024 29 13 24577-32768 + + 3.2.6. Compression with fixed Huffman codes (BTYPE=01) + + The Huffman codes for the two alphabets are fixed, and are not + represented explicitly in the data. The Huffman code lengths + for the literal/length alphabet are: + + Lit Value Bits Codes + --------- ---- ----- + 0 - 143 8 00110000 through + 10111111 + 144 - 255 9 110010000 through + 111111111 + 256 - 279 7 0000000 through + 0010111 + 280 - 287 8 11000000 through + 11000111 + + + +Deutsch Informational [Page 12] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + The code lengths are sufficient to generate the actual codes, + as described above; we show the codes in the table for added + clarity. Literal/length values 286-287 will never actually + occur in the compressed data, but participate in the code + construction. + + Distance codes 0-31 are represented by (fixed-length) 5-bit + codes, with possible additional bits as shown in the table + shown in Paragraph 3.2.5, above. Note that distance codes 30- + 31 will never actually occur in the compressed data. + + 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) + + The Huffman codes for the two alphabets appear in the block + immediately after the header bits and before the actual + compressed data, first the literal/length code and then the + distance code. Each code is defined by a sequence of code + lengths, as discussed in Paragraph 3.2.2, above. For even + greater compactness, the code length sequences themselves are + compressed using a Huffman code. The alphabet for code lengths + is as follows: + + 0 - 15: Represent code lengths of 0 - 15 + 16: Copy the previous code length 3 - 6 times. + The next 2 bits indicate repeat length + (0 = 3, ... , 3 = 6) + Example: Codes 8, 16 (+2 bits 11), + 16 (+2 bits 10) will expand to + 12 code lengths of 8 (1 + 6 + 5) + 17: Repeat a code length of 0 for 3 - 10 times. + (3 bits of length) + 18: Repeat a code length of 0 for 11 - 138 times + (7 bits of length) + + A code length of 0 indicates that the corresponding symbol in + the literal/length or distance alphabet will not occur in the + block, and should not participate in the Huffman code + construction algorithm given earlier. If only one distance + code is used, it is encoded using one bit, not zero bits; in + this case there is a single code length of one, with one unused + code. One distance code of zero bits means that there are no + distance codes used at all (the data is all literals). + + We can now define the format of the block: + + 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) + 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) + 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) + + + +Deutsch Informational [Page 13] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + (HCLEN + 4) x 3 bits: code lengths for the code length + alphabet given just above, in the order: 16, 17, 18, + 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + + These code lengths are interpreted as 3-bit integers + (0-7); as above, a code length of 0 means the + corresponding symbol (literal/length or distance code + length) is not used. + + HLIT + 257 code lengths for the literal/length alphabet, + encoded using the code length Huffman code + + HDIST + 1 code lengths for the distance alphabet, + encoded using the code length Huffman code + + The actual compressed data of the block, + encoded using the literal/length and distance Huffman + codes + + The literal/length symbol 256 (end of data), + encoded using the literal/length Huffman code + + The code length repeat codes can cross from HLIT + 257 to the + HDIST + 1 code lengths. In other words, all code lengths form + a single sequence of HLIT + HDIST + 258 values. + + 3.3. Compliance + + A compressor may limit further the ranges of values specified in + the previous section and still be compliant; for example, it may + limit the range of backward pointers to some value smaller than + 32K. Similarly, a compressor may limit the size of blocks so that + a compressible block fits in memory. + + A compliant decompressor must accept the full range of possible + values defined in the previous section, and must accept blocks of + arbitrary size. + +4. Compression algorithm details + + While it is the intent of this document to define the "deflate" + compressed data format without reference to any particular + compression algorithm, the format is related to the compressed + formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below); + since many variations of LZ77 are patented, it is strongly + recommended that the implementor of a compressor follow the general + algorithm presented here, which is known not to be patented per se. + The material in this section is not part of the definition of the + + + +Deutsch Informational [Page 14] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + specification per se, and a compressor need not follow it in order to + be compliant. + + The compressor terminates a block when it determines that starting a + new block with fresh trees would be useful, or when the block size + fills up the compressor's block buffer. + + The compressor uses a chained hash table to find duplicated strings, + using a hash function that operates on 3-byte sequences. At any + given point during compression, let XYZ be the next 3 input bytes to + be examined (not necessarily all different, of course). First, the + compressor examines the hash chain for XYZ. If the chain is empty, + the compressor simply writes out X as a literal byte and advances one + byte in the input. If the hash chain is not empty, indicating that + the sequence XYZ (or, if we are unlucky, some other 3 bytes with the + same hash function value) has occurred recently, the compressor + compares all strings on the XYZ hash chain with the actual input data + sequence starting at the current point, and selects the longest + match. + + The compressor searches the hash chains starting with the most recent + strings, to favor small distances and thus take advantage of the + Huffman encoding. The hash chains are singly linked. There are no + deletions from the hash chains; the algorithm simply discards matches + that are too old. To avoid a worst-case situation, very long hash + chains are arbitrarily truncated at a certain length, determined by a + run-time parameter. + + To improve overall compression, the compressor optionally defers the + selection of matches ("lazy matching"): after a match of length N has + been found, the compressor searches for a longer match starting at + the next input byte. If it finds a longer match, it truncates the + previous match to a length of one (thus producing a single literal + byte) and then emits the longer match. Otherwise, it emits the + original match, and, as described above, advances N bytes before + continuing. + + Run-time parameters also control this "lazy match" procedure. If + compression ratio is most important, the compressor attempts a + complete second search regardless of the length of the first match. + In the normal case, if the current match is "long enough", the + compressor reduces the search for a longer match, thus speeding up + the process. If speed is most important, the compressor inserts new + strings in the hash table only when no match was found, or when the + match is not "too long". This degrades the compression ratio but + saves time since there are both fewer insertions and fewer searches. + + + + + +Deutsch Informational [Page 15] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +5. References + + [1] Huffman, D. A., "A Method for the Construction of Minimum + Redundancy Codes", Proceedings of the Institute of Radio + Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101. + + [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data + Compression", IEEE Transactions on Information Theory, Vol. 23, + No. 3, pp. 337-343. + + [3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources, + available in ftp://ftp.uu.net/pub/archiving/zip/doc/ + + [4] Gailly, J.-L., and Adler, M., GZIP documentation and sources, + available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/ + + [5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix + encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169. + + [6] Hirschberg and Lelewer, "Efficient decoding of prefix codes," + Comm. ACM, 33,4, April 1990, pp. 449-459. + +6. Security Considerations + + Any data compression method involves the reduction of redundancy in + the data. Consequently, any corruption of the data is likely to have + severe effects and be difficult to correct. Uncompressed text, on + the other hand, will probably still be readable despite the presence + of some corrupted bytes. + + It is recommended that systems using this data format provide some + means of validating the integrity of the compressed data. See + reference [3], for example. + +7. Source code + + Source code for a C language implementation of a "deflate" compliant + compressor and decompressor is available within the zlib package at + ftp://ftp.uu.net/pub/archiving/zip/zlib/. + +8. Acknowledgements + + Trademarks cited in this document are the property of their + respective owners. + + Phil Katz designed the deflate format. Jean-Loup Gailly and Mark + Adler wrote the related software described in this specification. + Glenn Randers-Pehrson converted this document to RFC and HTML format. + + + +Deutsch Informational [Page 16] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +9. Author's Address + + L. Peter Deutsch + Aladdin Enterprises + 203 Santa Margarita Ave. + Menlo Park, CA 94025 + + Phone: (415) 322-0103 (AM only) + FAX: (415) 322-1734 + EMail: + + Questions about the technical content of this specification can be + sent by email to: + + Jean-Loup Gailly and + Mark Adler + + Editorial comments on this specification can be sent by email to: + + L. Peter Deutsch and + Glenn Randers-Pehrson + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Deutsch Informational [Page 17] + diff --git a/lib/std/compress/rfc1951.txt.fixed.z.9 b/lib/std/compress/rfc1951.txt.fixed.z.9 new file mode 100644 index 000000000..8ea590477 Binary files /dev/null and b/lib/std/compress/rfc1951.txt.fixed.z.9 differ diff --git a/lib/std/compress/rfc1951.txt.z.0 b/lib/std/compress/rfc1951.txt.z.0 new file mode 100644 index 000000000..3f50fb68f Binary files /dev/null and b/lib/std/compress/rfc1951.txt.z.0 differ diff --git a/lib/std/compress/rfc1951.txt.z.9 b/lib/std/compress/rfc1951.txt.z.9 new file mode 100644 index 000000000..84e7cbe5b Binary files /dev/null and b/lib/std/compress/rfc1951.txt.z.9 differ diff --git a/lib/std/compress/rfc1952.txt.gz b/lib/std/compress/rfc1952.txt.gz new file mode 100644 index 000000000..be43b90a7 Binary files /dev/null and b/lib/std/compress/rfc1952.txt.gz differ diff --git a/lib/std/compress/zlib.zig b/lib/std/compress/zlib.zig new file mode 100644 index 000000000..d4bac4a8a --- /dev/null +++ b/lib/std/compress/zlib.zig @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +// +// Decompressor for ZLIB data streams (RFC1950) + +const std = @import("std"); +const io = std.io; +const fs = std.fs; +const testing = std.testing; +const mem = std.mem; +const deflate = std.compress.deflate; + +pub fn ZlibStream(comptime ReaderType: type) type { + return struct { + const Self = @This(); + + pub const Error = ReaderType.Error || + deflate.InflateStream(ReaderType).Error || + error{ WrongChecksum, Unsupported }; + pub const Reader = io.Reader(*Self, Error, read); + + allocator: *mem.Allocator, + inflater: deflate.InflateStream(ReaderType), + in_reader: ReaderType, + hasher: std.hash.Adler32, + window_slice: []u8, + + fn init(allocator: *mem.Allocator, source: ReaderType) !Self { + // Zlib header format is specified in RFC1950 + const header = try source.readBytesNoEof(2); + + const CM = @truncate(u4, header[0]); + const CINFO = @truncate(u4, header[0] >> 4); + const FCHECK = @truncate(u5, header[1]); + const FDICT = @truncate(u1, header[1] >> 5); + + if ((@as(u16, header[0]) << 8 | header[1]) % 31 != 0) + return error.BadHeader; + + // The CM field must be 8 to indicate the use of DEFLATE + if (CM != 8) return error.InvalidCompression; + // CINFO is the base-2 logarithm of the window size, minus 8. + // Values above 7 are unspecified and therefore rejected. + if (CINFO > 7) return error.InvalidWindowSize; + const window_size: u16 = @as(u16, 1) << (CINFO + 8); + + // TODO: Support this case + if (FDICT != 0) + return error.Unsupported; + + var window_slice = try allocator.alloc(u8, window_size); + + return Self{ + .allocator = allocator, + .inflater = deflate.inflateStream(source, window_slice), + .in_reader = source, + .hasher = std.hash.Adler32.init(), + .window_slice = window_slice, + }; + } + + pub fn deinit(self: *Self) void { + self.allocator.free(self.window_slice); + } + + // Implements the io.Reader interface + pub fn read(self: *Self, buffer: []u8) Error!usize { + if (buffer.len == 0) + return 0; + + // Read from the compressed stream and update the computed checksum + const r = try self.inflater.read(buffer); + if (r != 0) { + self.hasher.update(buffer[0..r]); + return r; + } + + // We've reached the end of stream, check if the checksum matches + const hash = try self.in_reader.readIntBig(u32); + if (hash != self.hasher.final()) + return error.WrongChecksum; + + return 0; + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + }; +} + +pub fn zlibStream(allocator: *mem.Allocator, reader: anytype) !ZlibStream(@TypeOf(reader)) { + return ZlibStream(@TypeOf(reader)).init(allocator, reader); +} + +fn testReader(data: []const u8, comptime expected: []const u8) !void { + var in_stream = io.fixedBufferStream(data); + + var zlib_stream = try zlibStream(testing.allocator, in_stream.reader()); + defer zlib_stream.deinit(); + + // Read and decompress the whole file + const buf = try zlib_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); + defer testing.allocator.free(buf); + // Calculate its SHA256 hash and check it against the reference + var hash: [32]u8 = undefined; + std.crypto.hash.sha2.Sha256.hash(buf, hash[0..], .{}); + + assertEqual(expected, &hash); +} + +// Assert `expected` == `input` where `input` is a bytestring. +pub fn assertEqual(comptime expected: []const u8, input: []const u8) void { + var expected_bytes: [expected.len / 2]u8 = undefined; + for (expected_bytes) |*r, i| { + r.* = std.fmt.parseInt(u8, expected[2 * i .. 2 * i + 2], 16) catch unreachable; + } + + testing.expectEqualSlices(u8, &expected_bytes, input); +} + +// All the test cases are obtained by compressing the RFC1950 text +// +// https://tools.ietf.org/rfc/rfc1950.txt length=36944 bytes +// SHA256=5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009 +test "compressed data" { + // Compressed with compression level = 0 + try testReader( + @embedFile("rfc1951.txt.z.0"), + "5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009", + ); + // Compressed with compression level = 9 + try testReader( + @embedFile("rfc1951.txt.z.9"), + "5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009", + ); + // Compressed with compression level = 9 and fixed Huffman codes + try testReader( + @embedFile("rfc1951.txt.fixed.z.9"), + "5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009", + ); +} + +test "sanity checks" { + // Truncated header + testing.expectError( + error.EndOfStream, + testReader(&[_]u8{0x78}, ""), + ); + // Failed FCHECK check + testing.expectError( + error.BadHeader, + testReader(&[_]u8{ 0x78, 0x9D }, ""), + ); + // Wrong CM + testing.expectError( + error.InvalidCompression, + testReader(&[_]u8{ 0x79, 0x94 }, ""), + ); + // Wrong CINFO + testing.expectError( + error.InvalidWindowSize, + testReader(&[_]u8{ 0x88, 0x98 }, ""), + ); + // Wrong checksum + testing.expectError( + error.WrongChecksum, + testReader(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""), + ); + // Truncated checksum + testing.expectError( + error.EndOfStream, + testReader(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""), + ); +} diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig index 5de2f1389..3a1ae599a 100644 --- a/lib/std/crypto.zig +++ b/lib/std/crypto.zig @@ -35,6 +35,15 @@ pub const onetimeauth = struct { pub const Poly1305 = @import("crypto/poly1305.zig").Poly1305; }; +/// A Key Derivation Function (KDF) is intended to turn a weak, human generated password into a +/// strong key, suitable for cryptographic uses. It does this by salting and stretching the +/// password. Salting injects non-secret random data, so that identical passwords will be converted +/// into unique keys. Stretching applies a deliberately slow hashing function to frustrate +/// brute-force guessing. +pub const kdf = struct { + pub const pbkdf2 = @import("crypto/pbkdf2.zig").pbkdf2; +}; + /// Core functions, that should rarely be used directly by applications. pub const core = struct { pub const aes = @import("crypto/aes.zig"); @@ -70,6 +79,20 @@ const std = @import("std.zig"); pub const randomBytes = std.os.getrandom; test "crypto" { + inline for (std.meta.declarations(@This())) |decl| { + switch (decl.data) { + .Type => |t| { + std.meta.refAllDecls(t); + }, + .Var => |v| { + _ = v; + }, + .Fn => |f| { + _ = f; + }, + } + } + _ = @import("crypto/aes.zig"); _ = @import("crypto/blake2.zig"); _ = @import("crypto/blake3.zig"); @@ -77,6 +100,7 @@ test "crypto" { _ = @import("crypto/gimli.zig"); _ = @import("crypto/hmac.zig"); _ = @import("crypto/md5.zig"); + _ = @import("crypto/pbkdf2.zig"); _ = @import("crypto/poly1305.zig"); _ = @import("crypto/sha1.zig"); _ = @import("crypto/sha2.zig"); diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig index 15ddaf7b3..e20b27220 100644 --- a/lib/std/crypto/benchmark.zig +++ b/lib/std/crypto/benchmark.zig @@ -5,8 +5,8 @@ // and substantial portions of the software. // zig run benchmark.zig --release-fast --override-lib-dir .. -const builtin = @import("builtin"); -const std = @import("std"); +const std = @import("../std.zig"); +const builtin = std.builtin; const mem = std.mem; const time = std.time; const Timer = time.Timer; diff --git a/lib/std/crypto/blake2.zig b/lib/std/crypto/blake2.zig index 1cb143811..1599be3b2 100644 --- a/lib/std/crypto/blake2.zig +++ b/lib/std/crypto/blake2.zig @@ -3,10 +3,10 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -const mem = @import("../mem.zig"); -const builtin = @import("builtin"); -const debug = @import("../debug.zig"); -const math = @import("../math.zig"); +const std = @import("../std.zig"); +const mem = std.mem; +const math = std.math; +const debug = std.debug; const htest = @import("test.zig"); const RoundParam = struct { diff --git a/lib/std/crypto/chacha20.zig b/lib/std/crypto/chacha20.zig index d2167db31..915f81b9f 100644 --- a/lib/std/crypto/chacha20.zig +++ b/lib/std/crypto/chacha20.zig @@ -7,10 +7,8 @@ const std = @import("../std.zig"); const mem = std.mem; -const endian = std.endian; const assert = std.debug.assert; const testing = std.testing; -const builtin = @import("builtin"); const maxInt = std.math.maxInt; const Poly1305 = std.crypto.onetimeauth.Poly1305; diff --git a/lib/std/crypto/md5.zig b/lib/std/crypto/md5.zig index 0d221fabf..7ba78b587 100644 --- a/lib/std/crypto/md5.zig +++ b/lib/std/crypto/md5.zig @@ -3,12 +3,10 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -const mem = @import("../mem.zig"); -const math = @import("../math.zig"); -const endian = @import("../endian.zig"); -const builtin = @import("builtin"); -const debug = @import("../debug.zig"); -const fmt = @import("../fmt.zig"); +const std = @import("../std.zig"); +const mem = std.mem; +const math = std.math; +const debug = std.debug; const RoundParam = struct { a: usize, diff --git a/lib/std/crypto/pbkdf2.zig b/lib/std/crypto/pbkdf2.zig new file mode 100644 index 000000000..85c8e0110 --- /dev/null +++ b/lib/std/crypto/pbkdf2.zig @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. + +const std = @import("std"); +const mem = std.mem; +const maxInt = std.math.maxInt; + +// RFC 2898 Section 5.2 +// +// FromSpec: +// +// PBKDF2 applies a pseudorandom function (see Appendix B.1 for an +// example) to derive keys. The length of the derived key is essentially +// unbounded. (However, the maximum effective search space for the +// derived key may be limited by the structure of the underlying +// pseudorandom function. See Appendix B.1 for further discussion.) +// PBKDF2 is recommended for new applications. +// +// PBKDF2 (P, S, c, dkLen) +// +// Options: PRF underlying pseudorandom function (hLen +// denotes the length in octets of the +// pseudorandom function output) +// +// Input: P password, an octet string +// S salt, an octet string +// c iteration count, a positive integer +// dkLen intended length in octets of the derived +// key, a positive integer, at most +// (2^32 - 1) * hLen +// +// Output: DK derived key, a dkLen-octet string + +// Based on Apple's CommonKeyDerivation, based originally on code by Damien Bergamini. + +pub const Pbkdf2Error = error{ + /// At least one round is required + TooFewRounds, + + /// Maximum length of the derived key is `maxInt(u32) * Prf.mac_length` + DerivedKeyTooLong, +}; + +/// Apply PBKDF2 to generate a key from a password. +/// +/// PBKDF2 is defined in RFC 2898, and is a recommendation of NIST SP 800-132. +/// +/// derivedKey: Slice of appropriate size for generated key. Generally 16 or 32 bytes in length. +/// May be uninitialized. All bytes will be overwritten. +/// Maximum size is `maxInt(u32) * Hash.digest_length` +/// It is a programming error to pass buffer longer than the maximum size. +/// +/// password: Arbitrary sequence of bytes of any length, including empty. +/// +/// salt: Arbitrary sequence of bytes of any length, including empty. A common length is 8 bytes. +/// +/// rounds: Iteration count. Must be greater than 0. Common values range from 1,000 to 100,000. +/// Larger iteration counts improve security by increasing the time required to compute +/// the derivedKey. It is common to tune this parameter to achieve approximately 100ms. +/// +/// Prf: Pseudo-random function to use. A common choice is `std.crypto.auth.hmac.HmacSha256`. +pub fn pbkdf2(derivedKey: []u8, password: []const u8, salt: []const u8, rounds: u32, comptime Prf: type) Pbkdf2Error!void { + if (rounds < 1) return error.TooFewRounds; + + const dkLen = derivedKey.len; + const hLen = Prf.mac_length; + comptime std.debug.assert(hLen >= 1); + + // FromSpec: + // + // 1. If dkLen > maxInt(u32) * hLen, output "derived key too long" and + // stop. + // + if (comptime (maxInt(usize) > maxInt(u32) * hLen) and (dkLen > @as(usize, maxInt(u32) * hLen))) { + // If maxInt(usize) is less than `maxInt(u32) * hLen` then dkLen is always inbounds + return error.DerivedKeyTooLong; + } + + // FromSpec: + // + // 2. Let l be the number of hLen-long blocks of bytes in the derived key, + // rounding up, and let r be the number of bytes in the last + // block + // + + // l will not overflow, proof: + // let `L(dkLen, hLen) = (dkLen + hLen - 1) / hLen` + // then `L^-1(l, hLen) = l*hLen - hLen + 1` + // 1) L^-1(maxInt(u32), hLen) <= maxInt(u32)*hLen + // 2) maxInt(u32)*hLen - hLen + 1 <= maxInt(u32)*hLen // subtract maxInt(u32)*hLen + 1 + // 3) -hLen <= -1 // multiply by -1 + // 4) hLen >= 1 + const r_ = dkLen % hLen; + const l = @intCast(u32, (dkLen / hLen) + @as(u1, if (r_ == 0) 0 else 1)); // original: (dkLen + hLen - 1) / hLen + const r = if (r_ == 0) hLen else r_; + + // FromSpec: + // + // 3. For each block of the derived key apply the function F defined + // below to the password P, the salt S, the iteration count c, and + // the block index to compute the block: + // + // T_1 = F (P, S, c, 1) , + // T_2 = F (P, S, c, 2) , + // ... + // T_l = F (P, S, c, l) , + // + // where the function F is defined as the exclusive-or sum of the + // first c iterates of the underlying pseudorandom function PRF + // applied to the password P and the concatenation of the salt S + // and the block index i: + // + // F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c + // + // where + // + // U_1 = PRF (P, S || INT (i)) , + // U_2 = PRF (P, U_1) , + // ... + // U_c = PRF (P, U_{c-1}) . + // + // Here, INT (i) is a four-octet encoding of the integer i, most + // significant octet first. + // + // 4. Concatenate the blocks and extract the first dkLen octets to + // produce a derived key DK: + // + // DK = T_1 || T_2 || ... || T_l<0..r-1> + var block: u32 = 0; // Spec limits to u32 + while (block < l) : (block += 1) { + var prevBlock: [hLen]u8 = undefined; + var newBlock: [hLen]u8 = undefined; + + // U_1 = PRF (P, S || INT (i)) + const blockIndex = mem.toBytes(mem.nativeToBig(u32, block + 1)); // Block index starts at 0001 + var ctx = Prf.init(password); + ctx.update(salt); + ctx.update(blockIndex[0..]); + ctx.final(prevBlock[0..]); + + // Choose portion of DK to write into (T_n) and initialize + const offset = block * hLen; + const blockLen = if (block != l - 1) hLen else r; + const dkBlock: []u8 = derivedKey[offset..][0..blockLen]; + mem.copy(u8, dkBlock, prevBlock[0..dkBlock.len]); + + var i: u32 = 1; + while (i < rounds) : (i += 1) { + // U_c = PRF (P, U_{c-1}) + Prf.create(&newBlock, prevBlock[0..], password); + mem.copy(u8, prevBlock[0..], newBlock[0..]); + + // F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c + for (dkBlock) |_, j| { + dkBlock[j] ^= newBlock[j]; + } + } + } +} + +const htest = @import("test.zig"); +const HmacSha1 = std.crypto.auth.hmac.HmacSha1; + +// RFC 6070 PBKDF2 HMAC-SHA1 Test Vectors +test "RFC 6070 one iteration" { + const p = "password"; + const s = "salt"; + const c = 1; + const dkLen = 20; + + var derivedKey: [dkLen]u8 = undefined; + + try pbkdf2(&derivedKey, p, s, c, HmacSha1); + + const expected = "0c60c80f961f0e71f3a9b524af6012062fe037a6"; + + htest.assertEqual(expected, derivedKey[0..]); +} + +test "RFC 6070 two iterations" { + const p = "password"; + const s = "salt"; + const c = 2; + const dkLen = 20; + + var derivedKey: [dkLen]u8 = undefined; + + try pbkdf2(&derivedKey, p, s, c, HmacSha1); + + const expected = "ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957"; + + htest.assertEqual(expected, derivedKey[0..]); +} + +test "RFC 6070 4096 iterations" { + const p = "password"; + const s = "salt"; + const c = 4096; + const dkLen = 20; + + var derivedKey: [dkLen]u8 = undefined; + + try pbkdf2(&derivedKey, p, s, c, HmacSha1); + + const expected = "4b007901b765489abead49d926f721d065a429c1"; + + htest.assertEqual(expected, derivedKey[0..]); +} + +test "RFC 6070 16,777,216 iterations" { + // These iteration tests are slow so we always skip them. Results have been verified. + if (true) { + return error.SkipZigTest; + } + + const p = "password"; + const s = "salt"; + const c = 16777216; + const dkLen = 20; + + var derivedKey = [_]u8{0} ** dkLen; + + try pbkdf2(&derivedKey, p, s, c, HmacSha1); + + const expected = "eefe3d61cd4da4e4e9945b3d6ba2158c2634e984"; + + htest.assertEqual(expected, derivedKey[0..]); +} + +test "RFC 6070 multi-block salt and password" { + const p = "passwordPASSWORDpassword"; + const s = "saltSALTsaltSALTsaltSALTsaltSALTsalt"; + const c = 4096; + const dkLen = 25; + + var derivedKey: [dkLen]u8 = undefined; + + try pbkdf2(&derivedKey, p, s, c, HmacSha1); + + const expected = "3d2eec4fe41c849b80c8d83662c0e44a8b291a964cf2f07038"; + + htest.assertEqual(expected, derivedKey[0..]); +} + +test "RFC 6070 embedded NUL" { + const p = "pass\x00word"; + const s = "sa\x00lt"; + const c = 4096; + const dkLen = 16; + + var derivedKey: [dkLen]u8 = undefined; + + try pbkdf2(&derivedKey, p, s, c, HmacSha1); + + const expected = "56fa6aa75548099dcc37d7f03425e0c3"; + + htest.assertEqual(expected, derivedKey[0..]); +} + +test "Very large dkLen" { + // This test allocates 8GB of memory and is expected to take several hours to run. + if (true) { + return error.SkipZigTest; + } + const p = "password"; + const s = "salt"; + const c = 1; + const dkLen = 1 << 33; + + var derivedKey = try std.testing.allocator.alloc(u8, dkLen); + defer { + std.testing.allocator.free(derivedKey); + } + + try pbkdf2(derivedKey, p, s, c, HmacSha1); + // Just verify this doesn't crash with an overflow +} diff --git a/lib/std/crypto/poly1305.zig b/lib/std/crypto/poly1305.zig index c0b462c60..a95b9d7cb 100644 --- a/lib/std/crypto/poly1305.zig +++ b/lib/std/crypto/poly1305.zig @@ -3,7 +3,7 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -const std = @import("std"); +const std = @import("../std.zig"); const mem = std.mem; pub const Poly1305 = struct { diff --git a/lib/std/crypto/sha1.zig b/lib/std/crypto/sha1.zig index 03fd55b6a..f4b380e4f 100644 --- a/lib/std/crypto/sha1.zig +++ b/lib/std/crypto/sha1.zig @@ -3,11 +3,10 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -const mem = @import("../mem.zig"); -const math = @import("../math.zig"); -const endian = @import("../endian.zig"); -const debug = @import("../debug.zig"); -const builtin = @import("builtin"); +const std = @import("../std.zig"); +const mem = std.mem; +const math = std.math; +const debug = std.debug; const RoundParam = struct { a: usize, diff --git a/lib/std/crypto/sha2.zig b/lib/std/crypto/sha2.zig index af2c22fe1..3e388f2e4 100644 --- a/lib/std/crypto/sha2.zig +++ b/lib/std/crypto/sha2.zig @@ -3,11 +3,10 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -const mem = @import("../mem.zig"); -const math = @import("../math.zig"); -const endian = @import("../endian.zig"); -const debug = @import("../debug.zig"); -const builtin = @import("builtin"); +const std = @import("../std.zig"); +const mem = std.mem; +const math = std.math; +const debug = std.debug; const htest = @import("test.zig"); ///////////////////// diff --git a/lib/std/crypto/sha3.zig b/lib/std/crypto/sha3.zig index 3d6dad1be..991eb3862 100644 --- a/lib/std/crypto/sha3.zig +++ b/lib/std/crypto/sha3.zig @@ -3,11 +3,10 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -const mem = @import("../mem.zig"); -const math = @import("../math.zig"); -const endian = @import("../endian.zig"); -const debug = @import("../debug.zig"); -const builtin = @import("builtin"); +const std = @import("../std.zig"); +const mem = std.mem; +const math = std.math; +const debug = std.debug; const htest = @import("test.zig"); pub const Sha3_224 = Keccak(224, 0x06); diff --git a/lib/std/crypto/siphash.zig b/lib/std/crypto/siphash.zig index 26c892fdd..ae059b256 100644 --- a/lib/std/crypto/siphash.zig +++ b/lib/std/crypto/siphash.zig @@ -218,8 +218,9 @@ fn SipHash(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) } /// Return an authentication tag for the current state + /// Assumes `out` is less than or equal to `mac_length`. pub fn final(self: *Self, out: []u8) void { - std.debug.assert(out.len >= mac_length); + std.debug.assert(out.len <= mac_length); mem.writeIntLittle(T, out[0..mac_length], self.state.final(self.buf[0..self.buf_len])); } diff --git a/lib/std/crypto/test.zig b/lib/std/crypto/test.zig index 2987706b1..1655be707 100644 --- a/lib/std/crypto/test.zig +++ b/lib/std/crypto/test.zig @@ -5,7 +5,6 @@ // and substantial portions of the software. const std = @import("../std.zig"); const testing = std.testing; -const mem = std.mem; const fmt = std.fmt; // Hash using the specified hasher `H` asserting `expected == H(input)`. diff --git a/lib/std/elf.zig b/lib/std/elf.zig index cd2b5fcd0..9f5672121 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -471,7 +471,7 @@ pub const SectionHeaderIterator = struct { if (self.elf_header.is_64) { var shdr: Elf64_Shdr = undefined; - const offset = self.elf_header.phoff + @sizeOf(@TypeOf(shdr)) * self.index; + const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index; try preadNoEof(self.file, mem.asBytes(&shdr), offset); // ELF endianness matches native endianness. diff --git a/lib/std/event/loop.zig b/lib/std/event/loop.zig index b34ad8c94..2600b337b 100644 --- a/lib/std/event/loop.zig +++ b/lib/std/event/loop.zig @@ -112,7 +112,8 @@ pub const Loop = struct { /// have the correct pointer value. /// https://github.com/ziglang/zig/issues/2761 and https://github.com/ziglang/zig/issues/2765 pub fn init(self: *Loop) !void { - if (builtin.single_threaded) { + if (builtin.single_threaded + or (@hasDecl(root, "event_loop_mode") and root.event_loop_mode == .single_threaded)) { return self.initSingleThreaded(); } else { return self.initMultiThreaded(); diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index a652bd8c2..56a1aba21 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -22,7 +22,7 @@ pub const Alignment = enum { pub const FormatOptions = struct { precision: ?usize = null, width: ?usize = null, - alignment: Alignment = .Left, + alignment: Alignment = .Right, fill: u8 = ' ', }; @@ -327,7 +327,7 @@ pub fn formatType( max_depth: usize, ) @TypeOf(writer).Error!void { if (comptime std.mem.eql(u8, fmt, "*")) { - try writer.writeAll(@typeName(@typeInfo(@TypeOf(value)).Pointer.child)); + try writer.writeAll(@typeName(std.meta.Child(@TypeOf(value)))); try writer.writeAll("@"); try formatInt(@ptrToInt(value), 16, false, FormatOptions{}, writer); return; @@ -399,7 +399,7 @@ pub fn formatType( try writer.writeAll(@tagName(@as(UnionTagType, value))); try writer.writeAll(" = "); inline for (info.fields) |u_field| { - if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) { + if (value == @field(UnionTagType, u_field.name)) { try formatType(@field(value, u_field.name), fmt, options, writer, max_depth - 1); } } @@ -631,26 +631,22 @@ pub fn formatBuf( writer: anytype, ) !void { const width = options.width orelse buf.len; - var padding = if (width > buf.len) (width - buf.len) else 0; - const pad_byte = [1]u8{options.fill}; + const padding = if (width > buf.len) (width - buf.len) else 0; + switch (options.alignment) { .Left => { try writer.writeAll(buf); - while (padding > 0) : (padding -= 1) { - try writer.writeAll(&pad_byte); - } + try writer.writeByteNTimes(options.fill, padding); }, .Center => { - const padl = padding / 2; - var i: usize = 0; - while (i < padl) : (i += 1) try writer.writeAll(&pad_byte); + const left_padding = padding / 2; + const right_padding = (padding + 1) / 2; + try writer.writeByteNTimes(options.fill, left_padding); try writer.writeAll(buf); - while (i < padding) : (i += 1) try writer.writeAll(&pad_byte); + try writer.writeByteNTimes(options.fill, right_padding); }, .Right => { - while (padding > 0) : (padding -= 1) { - try writer.writeAll(&pad_byte); - } + try writer.writeByteNTimes(options.fill, padding); try writer.writeAll(buf); }, } @@ -941,61 +937,27 @@ pub fn formatInt( options: FormatOptions, writer: anytype, ) !void { + assert(base >= 2); + const int_value = if (@TypeOf(value) == comptime_int) blk: { const Int = math.IntFittingRange(value, value); break :blk @as(Int, value); } else value; - if (@typeInfo(@TypeOf(int_value)).Int.is_signed) { - return formatIntSigned(int_value, base, uppercase, options, writer); - } else { - return formatIntUnsigned(int_value, base, uppercase, options, writer); - } -} + const value_info = @typeInfo(@TypeOf(int_value)).Int; -fn formatIntSigned( - value: anytype, - base: u8, - uppercase: bool, - options: FormatOptions, - writer: anytype, -) !void { - const new_options = FormatOptions{ - .width = if (options.width) |w| (if (w == 0) 0 else w - 1) else null, - .precision = options.precision, - .fill = options.fill, - }; - const bit_count = @typeInfo(@TypeOf(value)).Int.bits; - const Uint = std.meta.Int(false, bit_count); - if (value < 0) { - try writer.writeAll("-"); - const new_value = math.absCast(value); - return formatIntUnsigned(new_value, base, uppercase, new_options, writer); - } else if (options.width == null or options.width.? == 0) { - return formatIntUnsigned(@intCast(Uint, value), base, uppercase, options, writer); - } else { - try writer.writeAll("+"); - const new_value = @intCast(Uint, value); - return formatIntUnsigned(new_value, base, uppercase, new_options, writer); - } -} + // The type must have the same size as `base` or be wider in order for the + // division to work + const min_int_bits = comptime math.max(value_info.bits, 8); + const MinInt = std.meta.Int(false, min_int_bits); -fn formatIntUnsigned( - value: anytype, - base: u8, - uppercase: bool, - options: FormatOptions, - writer: anytype, -) !void { - assert(base >= 2); - const value_info = @typeInfo(@TypeOf(value)).Int; - var buf: [math.max(value_info.bits, 1)]u8 = undefined; - const min_int_bits = comptime math.max(value_info.bits, @typeInfo(@TypeOf(base)).Int.bits); - const MinInt = std.meta.Int(value_info.is_signed, min_int_bits); - var a: MinInt = value; + const abs_value = math.absCast(int_value); + // The worst case in terms of space needed is base 2, plus 1 for the sign + var buf: [1 + math.max(value_info.bits, 1)]u8 = undefined; + + var a: MinInt = abs_value; var index: usize = buf.len; - while (true) { const digit = a % base; index -= 1; @@ -1004,25 +966,21 @@ fn formatIntUnsigned( if (a == 0) break; } - const digits_buf = buf[index..]; - const width = options.width orelse 0; - const padding = if (width > digits_buf.len) (width - digits_buf.len) else 0; - - if (padding > index) { - const zero_byte: u8 = options.fill; - var leftover_padding = padding - index; - while (true) { - try writer.writeAll(@as(*const [1]u8, &zero_byte)[0..]); - leftover_padding -= 1; - if (leftover_padding == 0) break; + if (value_info.is_signed) { + if (value < 0) { + // Negative integer + index -= 1; + buf[index] = '-'; + } else if (options.width == null or options.width.? == 0) { + // Positive integer, omit the plus sign + } else { + // Positive integer + index -= 1; + buf[index] = '+'; } - mem.set(u8, buf[0..index], options.fill); - return writer.writeAll(&buf); - } else { - const padded_buf = buf[index - padding ..]; - mem.set(u8, padded_buf[0..padding], options.fill); - return writer.writeAll(padded_buf); } + + return formatBuf(buf[index..], options, writer); } pub fn formatIntBuf(out_buf: []u8, value: anytype, base: u8, uppercase: bool, options: FormatOptions) usize { @@ -1246,6 +1204,10 @@ test "optional" { const value: ?i32 = null; try testFmt("optional: null\n", "optional: {}\n", .{value}); } + { + const value = @intToPtr(?*i32, 0xf000d000); + try testFmt("optional: *i32@f000d000\n", "optional: {*}\n", .{value}); + } } test "error" { @@ -1283,7 +1245,17 @@ test "int.specifier" { test "int.padded" { try testFmt("u8: ' 1'", "u8: '{:4}'", .{@as(u8, 1)}); - try testFmt("u8: 'xxx1'", "u8: '{:x<4}'", .{@as(u8, 1)}); + try testFmt("u8: '1000'", "u8: '{:0<4}'", .{@as(u8, 1)}); + try testFmt("u8: '0001'", "u8: '{:0>4}'", .{@as(u8, 1)}); + try testFmt("u8: '0100'", "u8: '{:0^4}'", .{@as(u8, 1)}); + try testFmt("i8: '-1 '", "i8: '{:<4}'", .{@as(i8, -1)}); + try testFmt("i8: ' -1'", "i8: '{:>4}'", .{@as(i8, -1)}); + try testFmt("i8: ' -1 '", "i8: '{:^4}'", .{@as(i8, -1)}); + try testFmt("i16: '-1234'", "i16: '{:4}'", .{@as(i16, -1234)}); + try testFmt("i16: '+1234'", "i16: '{:4}'", .{@as(i16, 1234)}); + try testFmt("i16: '-12345'", "i16: '{:4}'", .{@as(i16, -12345)}); + try testFmt("i16: '+12345'", "i16: '{:4}'", .{@as(i16, 12345)}); + try testFmt("u16: '12345'", "u16: '{:4}'", .{@as(u16, 12345)}); } test "buffer" { @@ -1329,7 +1301,7 @@ test "slice" { try testFmt("slice: []const u8@deadbeef\n", "slice: {}\n", .{value}); } - try testFmt("buf: Test \n", "buf: {s:5}\n", .{"Test"}); + try testFmt("buf: Test\n", "buf: {s:5}\n", .{"Test"}); try testFmt("buf: Test\n Other text", "buf: {s}\n Other text", .{"Test"}); } @@ -1362,7 +1334,7 @@ test "cstr" { .{@ptrCast([*c]const u8, "Test C")}, ); try testFmt( - "cstr: Test C \n", + "cstr: Test C\n", "cstr: {s:10}\n", .{@ptrCast([*c]const u8, "Test C")}, ); @@ -1805,7 +1777,7 @@ test "vector" { try testFmt("{ true, false, true, false }", "{}", .{vbool}); try testFmt("{ -2, -1, 0, 1 }", "{}", .{vi64}); - try testFmt("{ - 2, - 1, + 0, + 1 }", "{d:5}", .{vi64}); + try testFmt("{ -2, -1, +0, +1 }", "{d:5}", .{vi64}); try testFmt("{ 1000, 2000, 3000, 4000 }", "{}", .{vu64}); try testFmt("{ 3e8, 7d0, bb8, fa0 }", "{x}", .{vu64}); try testFmt("{ 1kB, 2kB, 3kB, 4kB }", "{B}", .{vu64}); @@ -1818,15 +1790,16 @@ test "enum-literal" { test "padding" { try testFmt("Simple", "{}", .{"Simple"}); - try testFmt("true ", "{:10}", .{true}); + try testFmt(" true", "{:10}", .{true}); try testFmt(" true", "{:>10}", .{true}); try testFmt("======true", "{:=>10}", .{true}); try testFmt("true======", "{:=<10}", .{true}); try testFmt(" true ", "{:^10}", .{true}); try testFmt("===true===", "{:=^10}", .{true}); - try testFmt("Minimum width", "{:18} width", .{"Minimum"}); + try testFmt(" Minimum width", "{:18} width", .{"Minimum"}); try testFmt("==================Filled", "{:=>24}", .{"Filled"}); try testFmt(" Centered ", "{:^24}", .{"Centered"}); + try testFmt("-", "{:-^1}", .{""}); } test "decimal float padding" { diff --git a/lib/std/fs.zig b/lib/std/fs.zig index a217fb3e9..1890d7e13 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -21,10 +21,6 @@ pub const wasi = @import("fs/wasi.zig"); // TODO audit these APIs with respect to Dir and absolute paths -pub const rename = os.rename; -pub const renameZ = os.renameZ; -pub const renameC = @compileError("deprecated: renamed to renameZ"); -pub const renameW = os.renameW; pub const realpath = os.realpath; pub const realpathZ = os.realpathZ; pub const realpathC = @compileError("deprecated: renamed to realpathZ"); @@ -90,7 +86,7 @@ pub fn atomicSymLink(allocator: *Allocator, existing_path: []const u8, new_path: base64_encoder.encode(tmp_path[dirname.len + 1 ..], &rand_buf); if (cwd().symLink(existing_path, tmp_path, .{})) { - return rename(tmp_path, new_path); + return cwd().rename(tmp_path, new_path); } else |err| switch (err) { error.PathAlreadyExists => continue, else => return err, // TODO zig should know this set does not include PathAlreadyExists @@ -255,6 +251,45 @@ pub fn deleteDirAbsoluteW(dir_path: [*:0]const u16) !void { return os.rmdirW(dir_path); } +pub const renameC = @compileError("deprecated: use renameZ, dir.renameZ, or renameAbsoluteZ"); + +/// Same as `Dir.rename` except the paths are absolute. +pub fn renameAbsolute(old_path: []const u8, new_path: []const u8) !void { + assert(path.isAbsolute(old_path)); + assert(path.isAbsolute(new_path)); + return os.rename(old_path, new_path); +} + +/// Same as `renameAbsolute` except the path parameters are null-terminated. +pub fn renameAbsoluteZ(old_path: [*:0]const u8, new_path: [*:0]const u8) !void { + assert(path.isAbsoluteZ(old_path)); + assert(path.isAbsoluteZ(new_path)); + return os.renameZ(old_path, new_path); +} + +/// Same as `renameAbsolute` except the path parameters are WTF-16 and target OS is assumed Windows. +pub fn renameAbsoluteW(old_path: [*:0]const u16, new_path: [*:0]const u16) !void { + assert(path.isAbsoluteWindowsW(old_path)); + assert(path.isAbsoluteWindowsW(new_path)); + return os.renameW(old_path, new_path); +} + +/// Same as `Dir.rename`, except `new_sub_path` is relative to `new_dir` +pub fn rename(old_dir: Dir, old_sub_path: []const u8, new_dir: Dir, new_sub_path: []const u8) !void { + return os.renameat(old_dir.fd, old_sub_path, new_dir.fd, new_sub_path); +} + +/// Same as `rename` except the parameters are null-terminated. +pub fn renameZ(old_dir: Dir, old_sub_path_z: [*:0]const u8, new_dir: Dir, new_sub_path_z: [*:0]const u8) !void { + return os.renameatZ(old_dir.fd, old_sub_path_z, new_dir.fd, new_sub_path_z); +} + +/// Same as `rename` except the parameters are UTF16LE, NT prefixed. +/// This function is Windows-only. +pub fn renameW(old_dir: Dir, old_sub_path_w: []const u16, new_dir: Dir, new_sub_path_w: []const u16) !void { + return os.renameatW(old_dir.fd, old_sub_path_w, new_dir.fd, new_sub_path_w); +} + pub const Dir = struct { fd: os.fd_t, @@ -1338,6 +1373,27 @@ pub const Dir = struct { }; } + pub const RenameError = os.RenameError; + + /// Change the name or location of a file or directory. + /// If new_sub_path already exists, it will be replaced. + /// Renaming a file over an existing directory or a directory + /// over an existing file will fail with `error.IsDir` or `error.NotDir` + pub fn rename(self: Dir, old_sub_path: []const u8, new_sub_path: []const u8) RenameError!void { + return os.renameat(self.fd, old_sub_path, self.fd, new_sub_path); + } + + /// Same as `rename` except the parameters are null-terminated. + pub fn renameZ(self: Dir, old_sub_path_z: [*:0]const u8, new_sub_path_z: [*:0]const u8) RenameError!void { + return os.renameatZ(self.fd, old_sub_path_z, self.fd, new_sub_path_z); + } + + /// Same as `rename` except the parameters are UTF16LE, NT prefixed. + /// This function is Windows-only. + pub fn renameW(self: Dir, old_sub_path_w: []const u16, new_sub_path_w: []const u16) RenameError!void { + return os.renameatW(self.fd, old_sub_path_w, self.fd, new_sub_path_w); + } + /// Creates a symbolic link named `sym_link_path` which contains the string `target_path`. /// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent /// one; the latter case is known as a dangling link. diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index ef1b501ec..73babf5fa 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -728,7 +728,7 @@ pub const File = struct { } var i: usize = 0; while (i < trailers.len) { - while (amt >= headers[i].iov_len) { + while (amt >= trailers[i].iov_len) { amt -= trailers[i].iov_len; i += 1; if (i >= trailers.len) return; @@ -740,14 +740,16 @@ pub const File = struct { } pub const Reader = io.Reader(File, ReadError, read); + /// Deprecated: use `Reader` pub const InStream = Reader; - pub fn reader(file: File) io.Reader(File, ReadError, read) { + pub fn reader(file: File) Reader { return .{ .context = file }; } + /// Deprecated: use `reader` - pub fn inStream(file: File) io.InStream(File, ReadError, read) { + pub fn inStream(file: File) Reader { return .{ .context = file }; } diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index a59bc4624..b3cc1fe56 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -274,6 +274,167 @@ test "file operations on directories" { dir.close(); } +test "Dir.rename files" { + var tmp_dir = tmpDir(.{}); + defer tmp_dir.cleanup(); + + testing.expectError(error.FileNotFound, tmp_dir.dir.rename("missing_file_name", "something_else")); + + // Renaming files + const test_file_name = "test_file"; + const renamed_test_file_name = "test_file_renamed"; + var file = try tmp_dir.dir.createFile(test_file_name, .{ .read = true }); + file.close(); + try tmp_dir.dir.rename(test_file_name, renamed_test_file_name); + + // Ensure the file was renamed + testing.expectError(error.FileNotFound, tmp_dir.dir.openFile(test_file_name, .{})); + file = try tmp_dir.dir.openFile(renamed_test_file_name, .{}); + file.close(); + + // Rename to self succeeds + try tmp_dir.dir.rename(renamed_test_file_name, renamed_test_file_name); + + // Rename to existing file succeeds + var existing_file = try tmp_dir.dir.createFile("existing_file", .{ .read = true }); + existing_file.close(); + try tmp_dir.dir.rename(renamed_test_file_name, "existing_file"); + + testing.expectError(error.FileNotFound, tmp_dir.dir.openFile(renamed_test_file_name, .{})); + file = try tmp_dir.dir.openFile("existing_file", .{}); + file.close(); +} + +test "Dir.rename directories" { + // TODO: Fix on Windows, see https://github.com/ziglang/zig/issues/6364 + if (builtin.os.tag == .windows) return error.SkipZigTest; + + var tmp_dir = tmpDir(.{}); + defer tmp_dir.cleanup(); + + // Renaming directories + try tmp_dir.dir.makeDir("test_dir"); + try tmp_dir.dir.rename("test_dir", "test_dir_renamed"); + + // Ensure the directory was renamed + testing.expectError(error.FileNotFound, tmp_dir.dir.openDir("test_dir", .{})); + var dir = try tmp_dir.dir.openDir("test_dir_renamed", .{}); + + // Put a file in the directory + var file = try dir.createFile("test_file", .{ .read = true }); + file.close(); + dir.close(); + + try tmp_dir.dir.rename("test_dir_renamed", "test_dir_renamed_again"); + + // Ensure the directory was renamed and the file still exists in it + testing.expectError(error.FileNotFound, tmp_dir.dir.openDir("test_dir_renamed", .{})); + dir = try tmp_dir.dir.openDir("test_dir_renamed_again", .{}); + file = try dir.openFile("test_file", .{}); + file.close(); + dir.close(); + + // Try to rename to a non-empty directory now + var target_dir = try tmp_dir.dir.makeOpenPath("non_empty_target_dir", .{}); + file = try target_dir.createFile("filler", .{ .read = true }); + file.close(); + + testing.expectError(error.PathAlreadyExists, tmp_dir.dir.rename("test_dir_renamed_again", "non_empty_target_dir")); + + // Ensure the directory was not renamed + dir = try tmp_dir.dir.openDir("test_dir_renamed_again", .{}); + file = try dir.openFile("test_file", .{}); + file.close(); + dir.close(); +} + +test "Dir.rename file <-> dir" { + // TODO: Fix on Windows, see https://github.com/ziglang/zig/issues/6364 + if (builtin.os.tag == .windows) return error.SkipZigTest; + + var tmp_dir = tmpDir(.{}); + defer tmp_dir.cleanup(); + + var file = try tmp_dir.dir.createFile("test_file", .{ .read = true }); + file.close(); + try tmp_dir.dir.makeDir("test_dir"); + testing.expectError(error.IsDir, tmp_dir.dir.rename("test_file", "test_dir")); + testing.expectError(error.NotDir, tmp_dir.dir.rename("test_dir", "test_file")); +} + +test "rename" { + var tmp_dir1 = tmpDir(.{}); + defer tmp_dir1.cleanup(); + + var tmp_dir2 = tmpDir(.{}); + defer tmp_dir2.cleanup(); + + // Renaming files + const test_file_name = "test_file"; + const renamed_test_file_name = "test_file_renamed"; + var file = try tmp_dir1.dir.createFile(test_file_name, .{ .read = true }); + file.close(); + try fs.rename(tmp_dir1.dir, test_file_name, tmp_dir2.dir, renamed_test_file_name); + + // ensure the file was renamed + testing.expectError(error.FileNotFound, tmp_dir1.dir.openFile(test_file_name, .{})); + file = try tmp_dir2.dir.openFile(renamed_test_file_name, .{}); + file.close(); +} + +test "renameAbsolute" { + if (builtin.os.tag == .wasi) return error.SkipZigTest; + + var tmp_dir = tmpDir(.{}); + defer tmp_dir.cleanup(); + + // Get base abs path + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const allocator = &arena.allocator; + + const base_path = blk: { + const relative_path = try fs.path.join(&arena.allocator, &[_][]const u8{ "zig-cache", "tmp", tmp_dir.sub_path[0..] }); + break :blk try fs.realpathAlloc(&arena.allocator, relative_path); + }; + + testing.expectError(error.FileNotFound, fs.renameAbsolute( + try fs.path.join(allocator, &[_][]const u8{ base_path, "missing_file_name" }), + try fs.path.join(allocator, &[_][]const u8{ base_path, "something_else" }), + )); + + // Renaming files + const test_file_name = "test_file"; + const renamed_test_file_name = "test_file_renamed"; + var file = try tmp_dir.dir.createFile(test_file_name, .{ .read = true }); + file.close(); + try fs.renameAbsolute( + try fs.path.join(allocator, &[_][]const u8{ base_path, test_file_name }), + try fs.path.join(allocator, &[_][]const u8{ base_path, renamed_test_file_name }), + ); + + // ensure the file was renamed + testing.expectError(error.FileNotFound, tmp_dir.dir.openFile(test_file_name, .{})); + file = try tmp_dir.dir.openFile(renamed_test_file_name, .{}); + const stat = try file.stat(); + testing.expect(stat.kind == .File); + file.close(); + + // Renaming directories + const test_dir_name = "test_dir"; + const renamed_test_dir_name = "test_dir_renamed"; + try tmp_dir.dir.makeDir(test_dir_name); + try fs.renameAbsolute( + try fs.path.join(allocator, &[_][]const u8{ base_path, test_dir_name }), + try fs.path.join(allocator, &[_][]const u8{ base_path, renamed_test_dir_name }), + ); + + // ensure the directory was renamed + testing.expectError(error.FileNotFound, tmp_dir.dir.openDir(test_dir_name, .{})); + var dir = try tmp_dir.dir.openDir(renamed_test_dir_name, .{}); + dir.close(); +} + test "openSelfExe" { if (builtin.os.tag == .wasi) return error.SkipZigTest; diff --git a/lib/std/hash/auto_hash.zig b/lib/std/hash/auto_hash.zig index 5877c77b5..2e707d545 100644 --- a/lib/std/hash/auto_hash.zig +++ b/lib/std/hash/auto_hash.zig @@ -139,9 +139,8 @@ pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void { const tag = meta.activeTag(key); const s = hash(hasher, tag, strat); inline for (info.fields) |field| { - const enum_field = field.enum_field.?; - if (enum_field.value == @enumToInt(tag)) { - hash(hasher, @field(key, enum_field.name), strat); + if (@field(tag_type, field.name) == tag) { + hash(hasher, @field(key, field.name), strat); // TODO use a labelled break when it does not crash the compiler. cf #2908 // break :blk; return; diff --git a/lib/std/hash/crc.zig b/lib/std/hash/crc.zig index 37695df8b..6290369fc 100644 --- a/lib/std/hash/crc.zig +++ b/lib/std/hash/crc.zig @@ -71,10 +71,7 @@ pub fn Crc32WithPoly(comptime poly: Polynomial) type { const p = input[i .. i + 8]; // Unrolling this way gives ~50Mb/s increase - self.crc ^= (@as(u32, p[0]) << 0); - self.crc ^= (@as(u32, p[1]) << 8); - self.crc ^= (@as(u32, p[2]) << 16); - self.crc ^= (@as(u32, p[3]) << 24); + self.crc ^= std.mem.readIntLittle(u32, p[0..4]); self.crc = lookup_tables[0][p[7]] ^ diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 144a512ed..458706d71 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -113,7 +113,7 @@ pub fn HashMap( return self.unmanaged.clearAndFree(self.allocator); } - pub fn count(self: Self) usize { + pub fn count(self: Self) Size { return self.unmanaged.count(); } diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 6db1be539..16de215cc 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -489,7 +489,7 @@ pub const HeapAllocator = switch (builtin.os.tag) { const full_len = os.windows.kernel32.HeapSize(heap_handle, 0, ptr); assert(full_len != std.math.maxInt(usize)); assert(full_len >= amt); - break :init mem.alignBackwardAnyAlign(full_len - (aligned_addr - root_addr), len_align); + break :init mem.alignBackwardAnyAlign(full_len - (aligned_addr - root_addr) - @sizeOf(usize), len_align); }; const buf = @intToPtr([*]u8, aligned_addr)[0..return_len]; getRecordPtr(buf).* = root_addr; diff --git a/lib/std/heap/arena_allocator.zig b/lib/std/heap/arena_allocator.zig index e4bce8087..0737cb2ef 100644 --- a/lib/std/heap/arena_allocator.zig +++ b/lib/std/heap/arena_allocator.zig @@ -26,7 +26,7 @@ pub const ArenaAllocator = struct { return .{ .allocator = Allocator{ .allocFn = alloc, - .resizeFn = Allocator.noResize, + .resizeFn = resize, }, .child_allocator = child_allocator, .state = self, @@ -84,4 +84,26 @@ pub const ArenaAllocator = struct { return result; } } + + fn resize(allocator: *Allocator, buf: []u8, buf_align: u29, new_len: usize, len_align: u29, ret_addr: usize) Allocator.Error!usize { + const self = @fieldParentPtr(ArenaAllocator, "allocator", allocator); + + const cur_node = self.state.buffer_list.first orelse return error.OutOfMemory; + const cur_buf = cur_node.data[@sizeOf(BufNode)..]; + if (@ptrToInt(cur_buf.ptr) + self.state.end_index != @ptrToInt(buf.ptr) + buf.len) { + if (new_len > buf.len) + return error.OutOfMemory; + return new_len; + } + + if (buf.len >= new_len) { + self.state.end_index -= buf.len - new_len; + return new_len; + } else if (cur_buf.len - self.state.end_index >= new_len - buf.len) { + self.state.end_index += new_len - buf.len; + return new_len; + } else { + return error.OutOfMemory; + } + } }; diff --git a/lib/std/io/serialization.zig b/lib/std/io/serialization.zig index 925c929ce..79a12989b 100644 --- a/lib/std/io/serialization.zig +++ b/lib/std/io/serialization.zig @@ -156,7 +156,7 @@ pub fn Deserializer(comptime endian: builtin.Endian, comptime packing: Packing, const tag = try self.deserializeInt(TagInt); inline for (info.fields) |field_info| { - if (field_info.enum_field.?.value == tag) { + if (@enumToInt(@field(TagType, field_info.name)) == tag) { const name = field_info.name; const FieldType = field_info.field_type; ptr.* = @unionInit(C, name, undefined); @@ -320,7 +320,7 @@ pub fn Serializer(comptime endian: builtin.Endian, comptime packing: Packing, co // value, but @field requires a comptime value. Our alternative // is to check each field for a match inline for (info.fields) |field_info| { - if (field_info.enum_field.?.value == @enumToInt(active_tag)) { + if (@field(TagType, field_info.name) == active_tag) { const name = field_info.name; const FieldType = field_info.field_type; try self.serialize(@field(value, name)); diff --git a/lib/std/json.zig b/lib/std/json.zig index 2f8a70d0e..cf479ab2c 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1613,7 +1613,7 @@ pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void { .Union => |unionInfo| { if (unionInfo.tag_type) |UnionTagType| { inline for (unionInfo.fields) |u_field| { - if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) { + if (value == @field(UnionTagType, u_field.name)) { parseFree(u_field.field_type, @field(value, u_field.name), options); break; } @@ -2458,7 +2458,7 @@ pub fn stringify( const info = @typeInfo(T).Union; if (info.tag_type) |UnionTagType| { inline for (info.fields) |u_field| { - if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) { + if (value == @field(UnionTagType, u_field.name)) { return try stringify(@field(value, u_field.name), options, out_stream); } } diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 5217a7376..d3296ee17 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -647,6 +647,32 @@ pub const nlist_64 = extern struct { n_value: u64, }; +/// Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD +/// format. The modifications from the original format were changing the value +/// of the r_symbolnum field for "local" (r_extern == 0) relocation entries. +/// This modification is required to support symbols in an arbitrary number of +/// sections not just the three sections (text, data and bss) in a 4.3BSD file. +/// Also the last 4 bits have had the r_type tag added to them. +pub const relocation_info = packed struct { + /// offset in the section to what is being relocated + r_address: i32, + + /// symbol index if r_extern == 1 or section ordinal if r_extern == 0 + r_symbolnum: u24, + + /// was relocated pc relative already + r_pcrel: u1, + + /// 0=byte, 1=word, 2=long, 3=quad + r_length: u2, + + /// does not include value of sym referenced + r_extern: u1, + + /// if not 0, machine specific relocation type + r_type: u4, +}; + /// After MacOS X 10.1 when a new load command is added that is required to be /// understood by the dynamic linker for the image to execute properly the /// LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic @@ -1086,13 +1112,58 @@ pub const N_ECOML = 0xe8; /// second stab entry with length information pub const N_LENG = 0xfe; -/// If a segment contains any sections marked with S_ATTR_DEBUG then all -/// sections in that segment must have this attribute. No section other than -/// a section marked with this attribute may reference the contents of this -/// section. A section with this attribute may contain no symbols and must have -/// a section type S_REGULAR. The static linker will not copy section contents -/// from sections with this attribute into its output file. These sections -/// generally contain DWARF debugging info. +// For the two types of symbol pointers sections and the symbol stubs section +// they have indirect symbol table entries. For each of the entries in the +// section the indirect symbol table entries, in corresponding order in the +// indirect symbol table, start at the index stored in the reserved1 field +// of the section structure. Since the indirect symbol table entries +// correspond to the entries in the section the number of indirect symbol table +// entries is inferred from the size of the section divided by the size of the +// entries in the section. For symbol pointers sections the size of the entries +// in the section is 4 bytes and for symbol stubs sections the byte size of the +// stubs is stored in the reserved2 field of the section structure. + +/// section with only non-lazy symbol pointers +pub const S_NON_LAZY_SYMBOL_POINTERS = 0x6; + +/// section with only lazy symbol pointers +pub const S_LAZY_SYMBOL_POINTERS = 0x7; + +/// section with only symbol stubs, byte size of stub in the reserved2 field +pub const S_SYMBOL_STUBS = 0x8; + +/// section with only function pointers for initialization +pub const S_MOD_INIT_FUNC_POINTERS = 0x9; + +/// section with only function pointers for termination +pub const S_MOD_TERM_FUNC_POINTERS = 0xa; + +/// section contains symbols that are to be coalesced +pub const S_COALESCED = 0xb; + +/// zero fill on demand section (that can be larger than 4 gigabytes) +pub const S_GB_ZEROFILL = 0xc; + +/// section with only pairs of function pointers for interposing +pub const S_INTERPOSING = 0xd; + +/// section with only 16 byte literals +pub const S_16BYTE_LITERALS = 0xe; + +/// section contains DTrace Object Format +pub const S_DTRACE_DOF = 0xf; + +/// section with only lazy symbol pointers to lazy loaded dylibs +pub const S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10; + +// If a segment contains any sections marked with S_ATTR_DEBUG then all +// sections in that segment must have this attribute. No section other than +// a section marked with this attribute may reference the contents of this +// section. A section with this attribute may contain no symbols and must have +// a section type S_REGULAR. The static linker will not copy section contents +// from sections with this attribute into its output file. These sections +// generally contain DWARF debugging info. + /// a debug section pub const S_ATTR_DEBUG = 0x02000000; @@ -1154,3 +1225,35 @@ pub const VM_PROT_WRITE: vm_prot_t = 0x2; /// VM execute permission pub const VM_PROT_EXECUTE: vm_prot_t = 0x4; + +pub const reloc_type_x86_64 = packed enum(u4) { + /// for absolute addresses + X86_64_RELOC_UNSIGNED = 0, + + /// for signed 32-bit displacement + X86_64_RELOC_SIGNED, + + /// a CALL/JMP instruction with 32-bit displacement + X86_64_RELOC_BRANCH, + + /// a MOVQ load of a GOT entry + X86_64_RELOC_GOT_LOAD, + + /// other GOT references + X86_64_RELOC_GOT, + + /// must be followed by a X86_64_RELOC_UNSIGNED + X86_64_RELOC_SUBTRACTOR, + + /// for signed 32-bit displacement with a -1 addend + X86_64_RELOC_SIGNED_1, + + /// for signed 32-bit displacement with a -2 addend + X86_64_RELOC_SIGNED_2, + + /// for signed 32-bit displacement with a -4 addend + X86_64_RELOC_SIGNED_4, + + /// for thread local variables + X86_64_RELOC_TLV, +}; diff --git a/lib/std/meta.zig b/lib/std/meta.zig index 73e066149..1507aa9de 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -465,10 +465,13 @@ pub fn TagPayloadType(comptime U: type, tag: @TagType(U)) type { testing.expect(trait.is(.Union)(U)); const info = @typeInfo(U).Union; + const tag_info = @typeInfo(@TagType(U)).Enum; inline for (info.fields) |field_info| { - if (field_info.enum_field.?.value == @enumToInt(tag)) return field_info.field_type; + if (comptime mem.eql(u8, field_info.name, @tagName(tag))) + return field_info.field_type; } + unreachable; } @@ -504,15 +507,14 @@ pub fn eql(a: anytype, b: @TypeOf(a)) bool { } }, .Union => |info| { - if (info.tag_type) |_| { + if (info.tag_type) |Tag| { const tag_a = activeTag(a); const tag_b = activeTag(b); if (tag_a != tag_b) return false; inline for (info.fields) |field_info| { - const enum_field = field_info.enum_field.?; - if (enum_field.value == @enumToInt(tag_a)) { - return eql(@field(a, enum_field.name), @field(b, enum_field.name)); + if (@field(Tag, field_info.name) == tag_a) { + return eql(@field(a, field_info.name), @field(b, field_info.name)); } } return false; @@ -715,7 +717,7 @@ pub fn cast(comptime DestType: type, target: anytype) DestType { }, .Optional => |opt| { if (@typeInfo(opt.child) == .Pointer) { - return @ptrCast(DestType, @alignCast(dest_ptr, target)); + return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target)); } }, else => {}, @@ -723,23 +725,24 @@ pub fn cast(comptime DestType: type, target: anytype) DestType { }, .Optional => |dest_opt| { if (@typeInfo(dest_opt.child) == .Pointer) { + const dest_ptr = @typeInfo(dest_opt.child).Pointer; switch (@typeInfo(TargetType)) { .Int, .ComptimeInt => { return @intToPtr(DestType, target); }, .Pointer => { - return @ptrCast(DestType, @alignCast(@alignOf(dest_opt.child.Child), target)); + return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target)); }, .Optional => |target_opt| { if (@typeInfo(target_opt.child) == .Pointer) { - return @ptrCast(DestType, @alignCast(@alignOf(dest_opt.child.Child), target)); + return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target)); } }, else => {}, } } }, - .Enum, .EnumLiteral => { + .Enum => { if (@typeInfo(TargetType) == .Int or @typeInfo(TargetType) == .ComptimeInt) { return @intToEnum(DestType, target); } @@ -747,15 +750,18 @@ pub fn cast(comptime DestType: type, target: anytype) DestType { .Int, .ComptimeInt => { switch (@typeInfo(TargetType)) { .Pointer => { - return @as(DestType, @ptrToInt(target)); + return @intCast(DestType, @ptrToInt(target)); }, .Optional => |opt| { if (@typeInfo(opt.child) == .Pointer) { - return @as(DestType, @ptrToInt(target)); + return @intCast(DestType, @ptrToInt(target)); } }, - .Enum, .EnumLiteral => { - return @as(DestType, @enumToInt(target)); + .Enum => { + return @intCast(DestType, @enumToInt(target)); + }, + .Int, .ComptimeInt => { + return @intCast(DestType, target); }, else => {}, } @@ -774,10 +780,49 @@ test "std.meta.cast" { var i = @as(i64, 10); - testing.expect(cast(?*c_void, 0) == @intToPtr(?*c_void, 0)); testing.expect(cast(*u8, 16) == @intToPtr(*u8, 16)); - testing.expect(cast(u64, @as(u32, 10)) == @as(u64, 10)); - testing.expect(cast(E, 1) == .One); - testing.expect(cast(u8, E.Two) == 2); testing.expect(cast(*u64, &i).* == @as(u64, 10)); + testing.expect(cast(*i64, @as(?*align(1) i64, &i)) == &i); + + testing.expect(cast(?*u8, 2) == @intToPtr(*u8, 2)); + testing.expect(cast(?*i64, @as(*align(1) i64, &i)) == &i); + testing.expect(cast(?*i64, @as(?*align(1) i64, &i)) == &i); + + testing.expect(cast(E, 1) == .One); + + testing.expectEqual(@as(u32, 4), cast(u32, @intToPtr(*u32, 4))); + testing.expectEqual(@as(u32, 4), cast(u32, @intToPtr(?*u32, 4))); + testing.expectEqual(@as(u32, 10), cast(u32, @as(u64, 10))); + testing.expectEqual(@as(u8, 2), cast(u8, E.Two)); +} + +/// Given a value returns its size as C's sizeof operator would. +/// This is for translate-c and is not intended for general use. +pub fn sizeof(target: anytype) usize { + switch (@typeInfo(@TypeOf(target))) { + .Type => return @sizeOf(target), + .Float, .Int, .Struct, .Union, .Enum => return @sizeOf(@TypeOf(target)), + .ComptimeFloat => return @sizeOf(f64), // TODO c_double #3999 + .ComptimeInt => { + // TODO to get the correct result we have to translate + // `1073741824 * 4` as `int(1073741824) *% int(4)` since + // sizeof(1073741824 * 4) != sizeof(4294967296). + + // TODO test if target fits in int, long or long long + return @sizeOf(c_int); + }, + else => @compileError("TODO implement std.meta.sizeof for type " ++ @typeName(@TypeOf(target))), + } +} + +test "sizeof" { + const E = extern enum(c_int) { One, _ }; + const S = extern struct { a: u32 }; + + testing.expect(sizeof(u32) == 4); + testing.expect(sizeof(@as(u32, 2)) == 4); + testing.expect(sizeof(2) == @sizeOf(c_int)); + testing.expect(sizeof(E) == @sizeOf(c_int)); + testing.expect(sizeof(E.One) == @sizeOf(c_int)); + testing.expect(sizeof(S) == 4); } diff --git a/lib/std/os.zig b/lib/std/os.zig index 181bf4930..0b09b1f82 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -320,6 +320,7 @@ pub const ReadError = error{ /// Linux has a limit on how many bytes may be transferred in one `read` call, which is `0x7ffff000` /// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as /// well as stuffing the errno codes into the last `4096` values. This is noted on the `read` man page. +/// The limit on Darwin is `0x7fffffff`, trying to read more than that returns EINVAL. /// For POSIX the limit is `math.maxInt(isize)`. pub fn read(fd: fd_t, buf: []u8) ReadError!usize { if (builtin.os.tag == .windows) { @@ -353,6 +354,7 @@ pub fn read(fd: fd_t, buf: []u8) ReadError!usize { // Prevents EINVAL. const max_count = switch (std.Target.current.os.tag) { .linux => 0x7ffff000, + .macosx, .ios, .watchos, .tvos => math.maxInt(i32), else => math.maxInt(isize), }; const adjusted_len = math.min(max_count, buf.len); @@ -693,6 +695,7 @@ pub const WriteError = error{ /// Linux has a limit on how many bytes may be transferred in one `write` call, which is `0x7ffff000` /// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as /// well as stuffing the errno codes into the last `4096` values. This is noted on the `write` man page. +/// The limit on Darwin is `0x7fffffff`, trying to read more than that returns EINVAL. /// The corresponding POSIX limit is `math.maxInt(isize)`. pub fn write(fd: fd_t, bytes: []const u8) WriteError!usize { if (builtin.os.tag == .windows) { @@ -726,6 +729,7 @@ pub fn write(fd: fd_t, bytes: []const u8) WriteError!usize { const max_count = switch (std.Target.current.os.tag) { .linux => 0x7ffff000, + .macosx, .ios, .watchos, .tvos => math.maxInt(i32), else => math.maxInt(isize), }; const adjusted_len = math.min(max_count, bytes.len); @@ -851,6 +855,7 @@ pub const PWriteError = WriteError || error{Unseekable}; /// Linux has a limit on how many bytes may be transferred in one `pwrite` call, which is `0x7ffff000` /// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as /// well as stuffing the errno codes into the last `4096` values. This is noted on the `write` man page. +/// The limit on Darwin is `0x7fffffff`, trying to write more than that returns EINVAL. /// The corresponding POSIX limit is `math.maxInt(isize)`. pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) PWriteError!usize { if (std.Target.current.os.tag == .windows) { @@ -888,6 +893,7 @@ pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) PWriteError!usize { // Prevent EINVAL. const max_count = switch (std.Target.current.os.tag) { .linux => 0x7ffff000, + .macosx, .ios, .watchos, .tvos => math.maxInt(i32), else => math.maxInt(isize), }; const adjusted_len = math.min(max_count, bytes.len); @@ -1884,7 +1890,7 @@ pub fn unlinkatW(dirfd: fd_t, sub_path_w: []const u16, flags: u32) UnlinkatError return windows.DeleteFile(sub_path_w, .{ .dir = dirfd, .remove_dir = remove_dir }); } -const RenameError = error{ +pub const RenameError = error{ /// In WASI, this error may occur when the file descriptor does /// not hold the required rights to rename a resource by path relative to it. AccessDenied, @@ -2101,6 +2107,7 @@ pub fn renameatW( .ACCESS_DENIED => return error.AccessDenied, .OBJECT_NAME_NOT_FOUND => return error.FileNotFound, .OBJECT_PATH_NOT_FOUND => return error.FileNotFound, + .NOT_SAME_DEVICE => return error.RenameAcrossMountPoints, else => return windows.unexpectedStatus(rc), } } @@ -2515,9 +2522,9 @@ pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) Read pub const SetEidError = error{ InvalidUserId, PermissionDenied, -}; +} || UnexpectedError; -pub const SetIdError = error{ResourceLimitReached} || SetEidError || UnexpectedError; +pub const SetIdError = error{ResourceLimitReached} || SetEidError; pub fn setuid(uid: uid_t) SetIdError!void { switch (errno(system.setuid(uid))) { @@ -3084,7 +3091,7 @@ pub fn connect(sockfd: socket_t, sock_addr: *const sockaddr, len: socklen_t) Con .WSAECONNREFUSED => return error.ConnectionRefused, .WSAETIMEDOUT => return error.ConnectionTimedOut, .WSAEHOSTUNREACH // TODO: should we return NetworkUnreachable in this case as well? - , .WSAENETUNREACH => return error.NetworkUnreachable, + , .WSAENETUNREACH => return error.NetworkUnreachable, .WSAEFAULT => unreachable, .WSAEINVAL => unreachable, .WSAEISCONN => unreachable, @@ -4711,6 +4718,7 @@ fn count_iovec_bytes(iovs: []const iovec_const) usize { /// Linux has a limit on how many bytes may be transferred in one `sendfile` call, which is `0x7ffff000` /// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as /// well as stuffing the errno codes into the last `4096` values. This is cited on the `sendfile` man page. +/// The limit on Darwin is `0x7fffffff`, trying to write more than that returns EINVAL. /// The corresponding POSIX limit on this is `math.maxInt(isize)`. pub fn sendfile( out_fd: fd_t, @@ -4733,6 +4741,7 @@ pub fn sendfile( }); const max_count = switch (std.Target.current.os.tag) { .linux => 0x7ffff000, + .macosx, .ios, .watchos, .tvos => math.maxInt(i32), else => math.maxInt(size_t), }; @@ -5418,3 +5427,42 @@ pub fn fdatasync(fd: fd_t) SyncError!void { else => |err| return std.os.unexpectedErrno(err), } } + +pub const PrctlError = error{ + /// Can only occur with PR_SET_SECCOMP/SECCOMP_MODE_FILTER or + /// PR_SET_MM/PR_SET_MM_EXE_FILE + AccessDenied, + /// Can only occur with PR_SET_MM/PR_SET_MM_EXE_FILE + InvalidFileDescriptor, + InvalidAddress, + /// Can only occur with PR_SET_SPECULATION_CTRL, PR_MPX_ENABLE_MANAGEMENT, + /// or PR_MPX_DISABLE_MANAGEMENT + UnsupportedFeature, + /// Can only occur wih PR_SET_FP_MODE + OperationNotSupported, + PermissionDenied, +} || UnexpectedError; + +pub fn prctl(option: i32, args: anytype) PrctlError!u31 { + if (@typeInfo(@TypeOf(args)) != .Struct) + @compileError("Expected tuple or struct argument, found " ++ @typeName(@TypeOf(args))); + if (args.len > 4) + @compileError("prctl takes a maximum of 4 optional arguments"); + + var buf: [4]usize = undefined; + inline for (args) |arg, i| buf[i] = arg; + + const rc = system.prctl(option, buf[0], buf[1], buf[2], buf[3]); + switch (errno(rc)) { + 0 => return @intCast(u31, rc), + EACCES => return error.AccessDenied, + EBADF => return error.InvalidFileDescriptor, + EFAULT => return error.InvalidAddress, + EINVAL => unreachable, + ENODEV, ENXIO => return error.UnsupportedFeature, + EOPNOTSUPP => return error.OperationNotSupported, + EPERM, EBUSY => return error.PermissionDenied, + ERANGE => unreachable, + else => |err| return std.os.unexpectedErrno(err), + } +} diff --git a/lib/std/os/bits/linux.zig b/lib/std/os/bits/linux.zig index 6d85d0623..df31bc32f 100644 --- a/lib/std/os/bits/linux.zig +++ b/lib/std/os/bits/linux.zig @@ -20,10 +20,13 @@ pub usingnamespace switch (builtin.arch) { .arm => @import("linux/arm-eabi.zig"), .riscv64 => @import("linux/riscv64.zig"), .mips, .mipsel => @import("linux/mips.zig"), + .powerpc64, .powerpc64le => @import("linux/powerpc64.zig"), else => struct {}, }; pub usingnamespace @import("linux/netlink.zig"); +pub usingnamespace @import("linux/prctl.zig"); +pub usingnamespace @import("linux/securebits.zig"); const is_mips = builtin.arch.isMIPS(); @@ -1590,6 +1593,123 @@ pub const RR_A = 1; pub const RR_CNAME = 5; pub const RR_AAAA = 28; +/// Turn off Nagle's algorithm +pub const TCP_NODELAY = 1; +/// Limit MSS +pub const TCP_MAXSEG = 2; +/// Never send partially complete segments. +pub const TCP_CORK = 3; +/// Start keeplives after this period, in seconds +pub const TCP_KEEPIDLE = 4; +/// Interval between keepalives +pub const TCP_KEEPINTVL = 5; +/// Number of keepalives before death +pub const TCP_KEEPCNT = 6; +/// Number of SYN retransmits +pub const TCP_SYNCNT = 7; +/// Life time of orphaned FIN-WAIT-2 state +pub const TCP_LINGER2 = 8; +/// Wake up listener only when data arrive +pub const TCP_DEFER_ACCEPT = 9; +/// Bound advertised window +pub const TCP_WINDOW_CLAMP = 10; +/// Information about this connection. +pub const TCP_INFO = 11; +/// Block/reenable quick acks +pub const TCP_QUICKACK = 12; +/// Congestion control algorithm +pub const TCP_CONGESTION = 13; +/// TCP MD5 Signature (RFC2385) +pub const TCP_MD5SIG = 14; +/// Use linear timeouts for thin streams +pub const TCP_THIN_LINEAR_TIMEOUTS = 16; +/// Fast retrans. after 1 dupack +pub const TCP_THIN_DUPACK = 17; +/// How long for loss retry before timeout +pub const TCP_USER_TIMEOUT = 18; +/// TCP sock is under repair right now +pub const TCP_REPAIR = 19; +pub const TCP_REPAIR_QUEUE = 20; +pub const TCP_QUEUE_SEQ = 21; +pub const TCP_REPAIR_OPTIONS = 22; +/// Enable FastOpen on listeners +pub const TCP_FASTOPEN = 23; +pub const TCP_TIMESTAMP = 24; +/// limit number of unsent bytes in write queue +pub const TCP_NOTSENT_LOWAT = 25; +/// Get Congestion Control (optional) info +pub const TCP_CC_INFO = 26; +/// Record SYN headers for new connections +pub const TCP_SAVE_SYN = 27; +/// Get SYN headers recorded for connection +pub const TCP_SAVED_SYN = 28; +/// Get/set window parameters +pub const TCP_REPAIR_WINDOW = 29; +/// Attempt FastOpen with connect +pub const TCP_FASTOPEN_CONNECT = 30; +/// Attach a ULP to a TCP connection +pub const TCP_ULP = 31; +/// TCP MD5 Signature with extensions +pub const TCP_MD5SIG_EXT = 32; +/// Set the key for Fast Open (cookie) +pub const TCP_FASTOPEN_KEY = 33; +/// Enable TFO without a TFO cookie +pub const TCP_FASTOPEN_NO_COOKIE = 34; +pub const TCP_ZEROCOPY_RECEIVE = 35; +/// Notify bytes available to read as a cmsg on read +pub const TCP_INQ = 36; +pub const TCP_CM_INQ = TCP_INQ; +/// delay outgoing packets by XX usec +pub const TCP_TX_DELAY = 37; + +pub const TCP_REPAIR_ON = 1; +pub const TCP_REPAIR_OFF = 0; +/// Turn off without window probes +pub const TCP_REPAIR_OFF_NO_WP = -1; + +pub const tcp_repair_opt = extern struct { + opt_code: u32, + opt_val: u32, +}; + +pub const tcp_repair_window = extern struct { + snd_wl1: u32, + snd_wnd: u32, + max_window: u32, + rcv_wnd: u32, + rcv_wup: u32, +}; + +pub const TcpRepairOption = extern enum { + TCP_NO_QUEUE, + TCP_RECV_QUEUE, + TCP_SEND_QUEUE, + TCP_QUEUES_NR, +}; + +/// why fastopen failed from client perspective +pub const tcp_fastopen_client_fail = extern enum { + /// catch-all + TFO_STATUS_UNSPEC, + /// if not in TFO_CLIENT_NO_COOKIE mode + TFO_COOKIE_UNAVAILABLE, + /// SYN-ACK did not ack SYN data + TFO_DATA_NOT_ACKED, + /// SYN-ACK did not ack SYN data after timeout + TFO_SYN_RETRANSMITTED, +}; + +/// for TCP_INFO socket option +pub const TCPI_OPT_TIMESTAMPS = 1; +pub const TCPI_OPT_SACK = 2; +pub const TCPI_OPT_WSCALE = 4; +/// ECN was negociated at TCP session init +pub const TCPI_OPT_ECN = 8; +/// we received at least one packet with ECT +pub const TCPI_OPT_ECN_SEEN = 16; +/// SYN-ACK acked data in SYN sent or rcvd +pub const TCPI_OPT_SYN_DATA = 32; + pub const nfds_t = usize; pub const pollfd = extern struct { fd: fd_t, diff --git a/lib/std/os/bits/linux/powerpc64.zig b/lib/std/os/bits/linux/powerpc64.zig new file mode 100644 index 000000000..adc6c87c1 --- /dev/null +++ b/lib/std/os/bits/linux/powerpc64.zig @@ -0,0 +1,602 @@ +const std = @import("../../../std.zig"); +const linux = std.os.linux; +const socklen_t = linux.socklen_t; +const iovec = linux.iovec; +const iovec_const = linux.iovec_const; +const uid_t = linux.uid_t; +const gid_t = linux.gid_t; +const pid_t = linux.pid_t; +const stack_t = linux.stack_t; +const sigset_t = linux.sigset_t; +pub const SYS = extern enum(usize) { + restart_syscall = 0, + exit = 1, + fork = 2, + read = 3, + write = 4, + open = 5, + close = 6, + waitpid = 7, + creat = 8, + link = 9, + unlink = 10, + execve = 11, + chdir = 12, + time = 13, + mknod = 14, + chmod = 15, + lchown = 16, + @"break" = 17, + oldstat = 18, + lseek = 19, + getpid = 20, + mount = 21, + umount = 22, + setuid = 23, + getuid = 24, + stime = 25, + ptrace = 26, + alarm = 27, + oldfstat = 28, + pause = 29, + utime = 30, + stty = 31, + gtty = 32, + access = 33, + nice = 34, + ftime = 35, + sync = 36, + kill = 37, + rename = 38, + mkdir = 39, + rmdir = 40, + dup = 41, + pipe = 42, + times = 43, + prof = 44, + brk = 45, + setgid = 46, + getgid = 47, + signal = 48, + geteuid = 49, + getegid = 50, + acct = 51, + umount2 = 52, + lock = 53, + ioctl = 54, + fcntl = 55, + mpx = 56, + setpgid = 57, + ulimit = 58, + oldolduname = 59, + umask = 60, + chroot = 61, + ustat = 62, + dup2 = 63, + getppid = 64, + getpgrp = 65, + setsid = 66, + sigaction = 67, + sgetmask = 68, + ssetmask = 69, + setreuid = 70, + setregid = 71, + sigsuspend = 72, + sigpending = 73, + sethostname = 74, + setrlimit = 75, + getrlimit = 76, + getrusage = 77, + gettimeofday = 78, + settimeofday = 79, + getgroups = 80, + setgroups = 81, + select = 82, + symlink = 83, + oldlstat = 84, + readlink = 85, + uselib = 86, + swapon = 87, + reboot = 88, + readdir = 89, + mmap = 90, + munmap = 91, + truncate = 92, + ftruncate = 93, + fchmod = 94, + fchown = 95, + getpriority = 96, + setpriority = 97, + profil = 98, + statfs = 99, + fstatfs = 100, + ioperm = 101, + socketcall = 102, + syslog = 103, + setitimer = 104, + getitimer = 105, + stat = 106, + lstat = 107, + fstat = 108, + olduname = 109, + iopl = 110, + vhangup = 111, + idle = 112, + vm86 = 113, + wait4 = 114, + swapoff = 115, + sysinfo = 116, + ipc = 117, + fsync = 118, + sigreturn = 119, + clone = 120, + setdomainname = 121, + uname = 122, + modify_ldt = 123, + adjtimex = 124, + mprotect = 125, + sigprocmask = 126, + create_module = 127, + init_module = 128, + delete_module = 129, + get_kernel_syms = 130, + quotactl = 131, + getpgid = 132, + fchdir = 133, + bdflush = 134, + sysfs = 135, + personality = 136, + afs_syscall = 137, + setfsuid = 138, + setfsgid = 139, + _llseek = 140, + getdents = 141, + _newselect = 142, + flock = 143, + msync = 144, + readv = 145, + writev = 146, + getsid = 147, + fdatasync = 148, + _sysctl = 149, + mlock = 150, + munlock = 151, + mlockall = 152, + munlockall = 153, + sched_setparam = 154, + sched_getparam = 155, + sched_setscheduler = 156, + sched_getscheduler = 157, + sched_yield = 158, + sched_get_priority_max = 159, + sched_get_priority_min = 160, + sched_rr_get_interval = 161, + nanosleep = 162, + mremap = 163, + setresuid = 164, + getresuid = 165, + query_module = 166, + poll = 167, + nfsservctl = 168, + setresgid = 169, + getresgid = 170, + prctl = 171, + rt_sigreturn = 172, + rt_sigaction = 173, + rt_sigprocmask = 174, + rt_sigpending = 175, + rt_sigtimedwait = 176, + rt_sigqueueinfo = 177, + rt_sigsuspend = 178, + pread64 = 179, + pwrite64 = 180, + chown = 181, + getcwd = 182, + capget = 183, + capset = 184, + sigaltstack = 185, + sendfile = 186, + getpmsg = 187, + putpmsg = 188, + vfork = 189, + ugetrlimit = 190, + readahead = 191, + pciconfig_read = 198, + pciconfig_write = 199, + pciconfig_iobase = 200, + multiplexer = 201, + getdents64 = 202, + pivot_root = 203, + madvise = 205, + mincore = 206, + gettid = 207, + tkill = 208, + setxattr = 209, + lsetxattr = 210, + fsetxattr = 211, + getxattr = 212, + lgetxattr = 213, + fgetxattr = 214, + listxattr = 215, + llistxattr = 216, + flistxattr = 217, + removexattr = 218, + lremovexattr = 219, + fremovexattr = 220, + futex = 221, + sched_setaffinity = 222, + sched_getaffinity = 223, + tuxcall = 225, + io_setup = 227, + io_destroy = 228, + io_getevents = 229, + io_submit = 230, + io_cancel = 231, + set_tid_address = 232, + fadvise64 = 233, + exit_group = 234, + lookup_dcookie = 235, + epoll_create = 236, + epoll_ctl = 237, + epoll_wait = 238, + remap_file_pages = 239, + timer_create = 240, + timer_settime = 241, + timer_gettime = 242, + timer_getoverrun = 243, + timer_delete = 244, + clock_settime = 245, + clock_gettime = 246, + clock_getres = 247, + clock_nanosleep = 248, + swapcontext = 249, + tgkill = 250, + utimes = 251, + statfs64 = 252, + fstatfs64 = 253, + rtas = 255, + sys_debug_setcontext = 256, + migrate_pages = 258, + mbind = 259, + get_mempolicy = 260, + set_mempolicy = 261, + mq_open = 262, + mq_unlink = 263, + mq_timedsend = 264, + mq_timedreceive = 265, + mq_notify = 266, + mq_getsetattr = 267, + kexec_load = 268, + add_key = 269, + request_key = 270, + keyctl = 271, + waitid = 272, + ioprio_set = 273, + ioprio_get = 274, + inotify_init = 275, + inotify_add_watch = 276, + inotify_rm_watch = 277, + spu_run = 278, + spu_create = 279, + pselect6 = 280, + ppoll = 281, + unshare = 282, + splice = 283, + tee = 284, + vmsplice = 285, + openat = 286, + mkdirat = 287, + mknodat = 288, + fchownat = 289, + futimesat = 290, + newfstatat = 291, + unlinkat = 292, + renameat = 293, + linkat = 294, + symlinkat = 295, + readlinkat = 296, + fchmodat = 297, + faccessat = 298, + get_robust_list = 299, + set_robust_list = 300, + move_pages = 301, + getcpu = 302, + epoll_pwait = 303, + utimensat = 304, + signalfd = 305, + timerfd_create = 306, + eventfd = 307, + sync_file_range2 = 308, + fallocate = 309, + subpage_prot = 310, + timerfd_settime = 311, + timerfd_gettime = 312, + signalfd4 = 313, + eventfd2 = 314, + epoll_create1 = 315, + dup3 = 316, + pipe2 = 317, + inotify_init1 = 318, + perf_event_open = 319, + preadv = 320, + pwritev = 321, + rt_tgsigqueueinfo = 322, + fanotify_init = 323, + fanotify_mark = 324, + prlimit64 = 325, + socket = 326, + bind = 327, + connect = 328, + listen = 329, + accept = 330, + getsockname = 331, + getpeername = 332, + socketpair = 333, + send = 334, + sendto = 335, + recv = 336, + recvfrom = 337, + shutdown = 338, + setsockopt = 339, + getsockopt = 340, + sendmsg = 341, + recvmsg = 342, + recvmmsg = 343, + accept4 = 344, + name_to_handle_at = 345, + open_by_handle_at = 346, + clock_adjtime = 347, + syncfs = 348, + sendmmsg = 349, + setns = 350, + process_vm_readv = 351, + process_vm_writev = 352, + finit_module = 353, + kcmp = 354, + sched_setattr = 355, + sched_getattr = 356, + renameat2 = 357, + seccomp = 358, + getrandom = 359, + memfd_create = 360, + bpf = 361, + execveat = 362, + switch_endian = 363, + userfaultfd = 364, + membarrier = 365, + mlock2 = 378, + copy_file_range = 379, + preadv2 = 380, + pwritev2 = 381, + kexec_file_load = 382, + statx = 383, + pkey_alloc = 384, + pkey_free = 385, + pkey_mprotect = 386, + rseq = 387, + io_pgetevents = 388, + semtimedop = 392, + semget = 393, + semctl = 394, + shmget = 395, + shmctl = 396, + shmat = 397, + shmdt = 398, + msgget = 399, + msgsnd = 400, + msgrcv = 401, + msgctl = 402, + pidfd_send_signal = 424, + io_uring_setup = 425, + io_uring_enter = 426, + io_uring_register = 427, + open_tree = 428, + move_mount = 429, + fsopen = 430, + fsconfig = 431, + fsmount = 432, + fspick = 433, + pidfd_open = 434, + clone3 = 435, + openat2 = 437, + pidfd_getfd = 438, + + _, +}; + +pub const O_CREAT = 0o100; +pub const O_EXCL = 0o200; +pub const O_NOCTTY = 0o400; +pub const O_TRUNC = 0o1000; +pub const O_APPEND = 0o2000; +pub const O_NONBLOCK = 0o4000; +pub const O_DSYNC = 0o10000; +pub const O_SYNC = 0o4010000; +pub const O_RSYNC = 0o4010000; +pub const O_DIRECTORY = 0o40000; +pub const O_NOFOLLOW = 0o100000; +pub const O_CLOEXEC = 0o2000000; + +pub const O_ASYNC = 0o20000; +pub const O_DIRECT = 0o400000; +pub const O_LARGEFILE = 0o200000; +pub const O_NOATIME = 0o1000000; +pub const O_PATH = 0o10000000; +pub const O_TMPFILE = 0o20200000; +pub const O_NDELAY = O_NONBLOCK; + +pub const F_DUPFD = 0; +pub const F_GETFD = 1; +pub const F_SETFD = 2; +pub const F_GETFL = 3; +pub const F_SETFL = 4; + +pub const F_SETOWN = 8; +pub const F_GETOWN = 9; +pub const F_SETSIG = 10; +pub const F_GETSIG = 11; + +pub const F_GETLK = 5; +pub const F_SETLK = 6; +pub const F_SETLKW = 7; + +pub const F_RDLCK = 0; +pub const F_WRLCK = 1; +pub const F_UNLCK = 2; + +pub const LOCK_SH = 1; +pub const LOCK_EX = 2; +pub const LOCK_UN = 8; +pub const LOCK_NB = 4; + +pub const F_SETOWN_EX = 15; +pub const F_GETOWN_EX = 16; + +pub const F_GETOWNER_UIDS = 17; + +/// stack-like segment +pub const MAP_GROWSDOWN = 0x0100; + +/// ETXTBSY +pub const MAP_DENYWRITE = 0x0800; + +/// mark it as an executable +pub const MAP_EXECUTABLE = 0x1000; + +/// pages are locked +pub const MAP_LOCKED = 0x0080; + +/// don't check for reservations +pub const MAP_NORESERVE = 0x0040; + +pub const VDSO_CGT_SYM = "__kernel_clock_gettime"; +pub const VDSO_CGT_VER = "LINUX_2.6.15"; + +pub const Flock = extern struct { + l_type: i16, + l_whence: i16, + l_start: off_t, + l_len: off_t, + l_pid: pid_t, + __unused: [4]u8, +}; + +pub const msghdr = extern struct { + msg_name: ?*sockaddr, + msg_namelen: socklen_t, + msg_iov: [*]iovec, + msg_iovlen: usize, + msg_control: ?*c_void, + msg_controllen: usize, + msg_flags: i32, +}; + +pub const msghdr_const = extern struct { + msg_name: ?*const sockaddr, + msg_namelen: socklen_t, + msg_iov: [*]iovec_const, + msg_iovlen: usize, + msg_control: ?*c_void, + msg_controllen: usize, + msg_flags: i32, +}; + +pub const blksize_t = i64; +pub const nlink_t = u64; +pub const time_t = i64; +pub const mode_t = u32; +pub const off_t = i64; +pub const ino_t = u64; +pub const dev_t = u64; +pub const blkcnt_t = i64; + +/// Renamed to Stat to not conflict with the stat function. +/// atime, mtime, and ctime have functions to return `timespec`, +/// because although this is a POSIX API, the layout and names of +/// the structs are inconsistent across operating systems, and +/// in C, macros are used to hide the differences. Here we use +/// methods to accomplish this. +pub const Stat = extern struct { + dev: dev_t, + ino: ino_t, + nlink: nlink_t, + mode: mode_t, + uid: uid_t, + gid: gid_t, + rdev: dev_t, + size: off_t, + blksize: blksize_t, + blocks: blkcnt_t, + atim: timespec, + mtim: timespec, + ctim: timespec, + __unused: [3]u64, + + pub fn atime(self: Stat) timespec { + return self.atim; + } + + pub fn mtime(self: Stat) timespec { + return self.mtim; + } + + pub fn ctime(self: Stat) timespec { + return self.ctim; + } +}; + +pub const timespec = extern struct { + tv_sec: time_t, + tv_nsec: isize, +}; + +pub const timeval = extern struct { + tv_sec: isize, + tv_usec: isize, +}; + +pub const timezone = extern struct { + tz_minuteswest: i32, + tz_dsttime: i32, +}; + +pub const greg_t = u64; +pub const gregset_t = [48]greg_t; +pub const fpregset_t = [33]f64; + +/// The position of the vscr register depends on endianness. +/// On C, macros are used to change vscr_word's offset to +/// account for this. Here we'll just define vscr_word_le +/// and vscr_word_be. Code must take care to use the correct one. +pub const vrregset = extern struct { + vrregs: [32][4]u32 align(16), + vscr_word_le: u32, + _pad1: [2]u32, + vscr_word_be: u32, + vrsave: u32, + _pad2: [3]u32, +}; +pub const vrregset_t = vrregset; + +pub const mcontext_t = extern struct { + __unused: [4]u64, + signal: i32, + _pad0: i32, + handler: u64, + oldmask: u64, + regs: ?*c_void, + gp_regs: gregset_t, + fp_regs: fpregset_t, + v_regs: *vrregset_t, + vmx_reserve: [34 + 34 + 32 + 1]i64, +}; + +pub const ucontext_t = extern struct { + flags: u32, + link: *ucontext_t, + stack: stack_t, + sigmask: sigset_t, + mcontext: mcontext_t, +}; + +pub const Elf_Symndx = u32; diff --git a/lib/std/os/bits/linux/prctl.zig b/lib/std/os/bits/linux/prctl.zig new file mode 100644 index 000000000..7fa9969af --- /dev/null +++ b/lib/std/os/bits/linux/prctl.zig @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. + +pub const PR_SET_PDEATHSIG = 1; +pub const PR_GET_PDEATHSIG = 2; + +pub const PR_GET_DUMPABLE = 3; +pub const PR_SET_DUMPABLE = 4; + +pub const PR_GET_UNALIGN = 5; +pub const PR_SET_UNALIGN = 6; +pub const PR_UNALIGN_NOPRINT = 1; +pub const PR_UNALIGN_SIGBUS = 2; + +pub const PR_GET_KEEPCAPS = 7; +pub const PR_SET_KEEPCAPS = 8; + +pub const PR_GET_FPEMU = 9; +pub const PR_SET_FPEMU = 10; +pub const PR_FPEMU_NOPRINT = 1; +pub const PR_FPEMU_SIGFPE = 2; + +pub const PR_GET_FPEXC = 11; +pub const PR_SET_FPEXC = 12; +pub const PR_FP_EXC_SW_ENABLE = 0x80; +pub const PR_FP_EXC_DIV = 0x010000; +pub const PR_FP_EXC_OVF = 0x020000; +pub const PR_FP_EXC_UND = 0x040000; +pub const PR_FP_EXC_RES = 0x080000; +pub const PR_FP_EXC_INV = 0x100000; +pub const PR_FP_EXC_DISABLED = 0; +pub const PR_FP_EXC_NONRECOV = 1; +pub const PR_FP_EXC_ASYNC = 2; +pub const PR_FP_EXC_PRECISE = 3; + +pub const PR_GET_TIMING = 13; +pub const PR_SET_TIMING = 14; +pub const PR_TIMING_STATISTICAL = 0; +pub const PR_TIMING_TIMESTAMP = 1; + +pub const PR_SET_NAME = 15; +pub const PR_GET_NAME = 16; + +pub const PR_GET_ENDIAN = 19; +pub const PR_SET_ENDIAN = 20; +pub const PR_ENDIAN_BIG = 0; +pub const PR_ENDIAN_LITTLE = 1; +pub const PR_ENDIAN_PPC_LITTLE = 2; + +pub const PR_GET_SECCOMP = 21; +pub const PR_SET_SECCOMP = 22; + +pub const PR_CAPBSET_READ = 23; +pub const PR_CAPBSET_DROP = 24; + +pub const PR_GET_TSC = 25; +pub const PR_SET_TSC = 26; +pub const PR_TSC_ENABLE = 1; +pub const PR_TSC_SIGSEGV = 2; + +pub const PR_GET_SECUREBITS = 27; +pub const PR_SET_SECUREBITS = 28; + +pub const PR_SET_TIMERSLACK = 29; +pub const PR_GET_TIMERSLACK = 30; + +pub const PR_TASK_PERF_EVENTS_DISABLE = 31; +pub const PR_TASK_PERF_EVENTS_ENABLE = 32; + +pub const PR_MCE_KILL = 33; +pub const PR_MCE_KILL_CLEAR = 0; +pub const PR_MCE_KILL_SET = 1; + +pub const PR_MCE_KILL_LATE = 0; +pub const PR_MCE_KILL_EARLY = 1; +pub const PR_MCE_KILL_DEFAULT = 2; + +pub const PR_MCE_KILL_GET = 34; + +pub const PR_SET_MM = 35; +pub const PR_SET_MM_START_CODE = 1; +pub const PR_SET_MM_END_CODE = 2; +pub const PR_SET_MM_START_DATA = 3; +pub const PR_SET_MM_END_DATA = 4; +pub const PR_SET_MM_START_STACK = 5; +pub const PR_SET_MM_START_BRK = 6; +pub const PR_SET_MM_BRK = 7; +pub const PR_SET_MM_ARG_START = 8; +pub const PR_SET_MM_ARG_END = 9; +pub const PR_SET_MM_ENV_START = 10; +pub const PR_SET_MM_ENV_END = 11; +pub const PR_SET_MM_AUXV = 12; +pub const PR_SET_MM_EXE_FILE = 13; +pub const PR_SET_MM_MAP = 14; +pub const PR_SET_MM_MAP_SIZE = 15; + +pub const prctl_mm_map = extern struct { + start_code: u64, + end_code: u64, + start_data: u64, + end_data: u64, + start_brk: u64, + brk: u64, + start_stack: u64, + arg_start: u64, + arg_end: u64, + env_start: u64, + env_end: u64, + auxv: *u64, + auxv_size: u32, + exe_fd: u32, +}; + +pub const PR_SET_PTRACER = 0x59616d61; +pub const PR_SET_PTRACER_ANY = std.math.maxInt(c_ulong); + +pub const PR_SET_CHILD_SUBREAPER = 36; +pub const PR_GET_CHILD_SUBREAPER = 37; + +pub const PR_SET_NO_NEW_PRIVS = 38; +pub const PR_GET_NO_NEW_PRIVS = 39; + +pub const PR_GET_TID_ADDRESS = 40; + +pub const PR_SET_THP_DISABLE = 41; +pub const PR_GET_THP_DISABLE = 42; + +pub const PR_MPX_ENABLE_MANAGEMENT = 43; +pub const PR_MPX_DISABLE_MANAGEMENT = 44; + +pub const PR_SET_FP_MODE = 45; +pub const PR_GET_FP_MODE = 46; +pub const PR_FP_MODE_FR = 1 << 0; +pub const PR_FP_MODE_FRE = 1 << 1; + +pub const PR_CAP_AMBIENT = 47; +pub const PR_CAP_AMBIENT_IS_SET = 1; +pub const PR_CAP_AMBIENT_RAISE = 2; +pub const PR_CAP_AMBIENT_LOWER = 3; +pub const PR_CAP_AMBIENT_CLEAR_ALL = 4; + +pub const PR_SVE_SET_VL = 50; +pub const PR_SVE_SET_VL_ONEXEC = 1 << 18; +pub const PR_SVE_GET_VL = 51; +pub const PR_SVE_VL_LEN_MASK = 0xffff; +pub const PR_SVE_VL_INHERIT = 1 << 17; + +pub const PR_GET_SPECULATION_CTRL = 52; +pub const PR_SET_SPECULATION_CTRL = 53; +pub const PR_SPEC_STORE_BYPASS = 0; +pub const PR_SPEC_NOT_AFFECTED = 0; +pub const PR_SPEC_PRCTL = 1 << 0; +pub const PR_SPEC_ENABLE = 1 << 1; +pub const PR_SPEC_DISABLE = 1 << 2; +pub const PR_SPEC_FORCE_DISABLE = 1 << 3; diff --git a/lib/std/os/bits/linux/securebits.zig b/lib/std/os/bits/linux/securebits.zig new file mode 100644 index 000000000..0086a694d --- /dev/null +++ b/lib/std/os/bits/linux/securebits.zig @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. + +fn issecure_mask(comptime x: comptime_int) comptime_int { + return 1 << x; +} + +pub const SECUREBITS_DEFAULT = 0x00000000; + +pub const SECURE_NOROOT = 0; +pub const SECURE_NOROOT_LOCKED = 1; + +pub const SECBIT_NOROOT = issecure_mask(SECURE_NOROOT); +pub const SECBIT_NOROOT_LOCKED = issecure_mask(SECURE_NOROOT_LOCKED); + +pub const SECURE_NO_SETUID_FIXUP = 2; +pub const SECURE_NO_SETUID_FIXUP_LOCKED = 3; + +pub const SECBIT_NO_SETUID_FIXUP = issecure_mask(SECURE_NO_SETUID_FIXUP); +pub const SECBIT_NO_SETUID_FIXUP_LOCKED = issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED); + +pub const SECURE_KEEP_CAPS = 4; +pub const SECURE_KEEP_CAPS_LOCKED = 5; + +pub const SECBIT_KEEP_CAPS = issecure_mask(SECURE_KEEP_CAPS); +pub const SECBIT_KEEP_CAPS_LOCKED = issecure_mask(SECURE_KEEP_CAPS_LOCKED); + +pub const SECURE_NO_CAP_AMBIENT_RAISE = 6; +pub const SECURE_NO_CAP_AMBIENT_RAISE_LOCKED = 7; + +pub const SECBIT_NO_CAP_AMBIENT_RAISE = issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE); +pub const SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED = issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED); + +pub const SECURE_ALL_BITS = issecure_mask(SECURE_NOROOT) | + issecure_mask(SECURE_NO_SETUID_FIXUP) | + issecure_mask(SECURE_KEEP_CAPS) | + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE); +pub const SECURE_ALL_LOCKS = SECURE_ALL_BITS << 1; diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 8f697fb96..50d1e4ae7 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -25,6 +25,7 @@ pub usingnamespace switch (builtin.arch) { .arm => @import("linux/arm-eabi.zig"), .riscv64 => @import("linux/riscv64.zig"), .mips, .mipsel => @import("linux/mips.zig"), + .powerpc64, .powerpc64le => @import("linux/powerpc64.zig"), else => struct {}, }; pub usingnamespace @import("bits.zig"); @@ -1258,6 +1259,10 @@ pub fn fdatasync(fd: fd_t) usize { return syscall1(.fdatasync, @bitCast(usize, @as(isize, fd))); } +pub fn prctl(option: i32, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize { + return syscall5(.prctl, @bitCast(usize, @as(isize, option)), arg2, arg3, arg4, arg5); +} + test "" { if (builtin.os.tag == .linux) { _ = @import("linux/test.zig"); diff --git a/lib/std/os/linux/bpf.zig b/lib/std/os/linux/bpf.zig index 928c157c4..44c938feb 100644 --- a/lib/std/os/linux/bpf.zig +++ b/lib/std/os/linux/bpf.zig @@ -3,9 +3,16 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -usingnamespace std.os; +usingnamespace std.os.linux; const std = @import("../../std.zig"); +const errno = getErrno; +const unexpectedErrno = std.os.unexpectedErrno; const expectEqual = std.testing.expectEqual; +const expectError = std.testing.expectError; +const expect = std.testing.expect; + +pub const btf = @import("bpf/btf.zig"); +pub const kern = @import("bpf/kern.zig"); // instruction classes pub const LD = 0x00; @@ -62,6 +69,7 @@ pub const MAXINSNS = 4096; // instruction classes /// jmp mode in word width pub const JMP32 = 0x06; + /// alu mode in double word width pub const ALU64 = 0x07; @@ -72,14 +80,17 @@ pub const XADD = 0xc0; // alu/jmp fields /// mov reg to reg pub const MOV = 0xb0; + /// sign extending arithmetic shift right */ pub const ARSH = 0xc0; // change endianness of a register /// flags for endianness conversion: pub const END = 0xd0; + /// convert to little-endian */ pub const TO_LE = 0x00; + /// convert to big-endian pub const TO_BE = 0x08; pub const FROM_LE = TO_LE; @@ -88,29 +99,39 @@ pub const FROM_BE = TO_BE; // jmp encodings /// jump != * pub const JNE = 0x50; + /// LT is unsigned, '<' pub const JLT = 0xa0; + /// LE is unsigned, '<=' * pub const JLE = 0xb0; + /// SGT is signed '>', GT in x86 pub const JSGT = 0x60; + /// SGE is signed '>=', GE in x86 pub const JSGE = 0x70; + /// SLT is signed, '<' pub const JSLT = 0xc0; + /// SLE is signed, '<=' pub const JSLE = 0xd0; + /// function call pub const CALL = 0x80; + /// function return pub const EXIT = 0x90; /// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the /// program in this cgroup yields to sub-cgroup program. pub const F_ALLOW_OVERRIDE = 0x1; + /// Flag for prog_attach command. If a sub-cgroup installs some bpf program, /// that cgroup program gets run in addition to the program in this cgroup. pub const F_ALLOW_MULTI = 0x2; + /// Flag for prog_attach command. pub const F_REPLACE = 0x4; @@ -164,47 +185,61 @@ pub const PSEUDO_CALL = 1; /// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing pub const ANY = 0; + /// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist pub const NOEXIST = 1; + /// flag for BPF_MAP_UPDATE_ELEM command. update existing element pub const EXIST = 2; + /// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update pub const F_LOCK = 4; /// flag for BPF_MAP_CREATE command */ pub const BPF_F_NO_PREALLOC = 0x1; + /// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in /// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can /// scale and perform better. Note, the LRU nodes (including free nodes) cannot /// be moved across different LRU lists. pub const BPF_F_NO_COMMON_LRU = 0x2; + /// flag for BPF_MAP_CREATE command. Specify numa node during map creation pub const BPF_F_NUMA_NODE = 0x4; + /// flag for BPF_MAP_CREATE command. Flags for BPF object read access from /// syscall side pub const BPF_F_RDONLY = 0x8; + /// flag for BPF_MAP_CREATE command. Flags for BPF object write access from /// syscall side pub const BPF_F_WRONLY = 0x10; + /// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset /// instead of pointer pub const BPF_F_STACK_BUILD_ID = 0x20; + /// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This /// should only be used for testing. pub const BPF_F_ZERO_SEED = 0x40; + /// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program /// side. pub const BPF_F_RDONLY_PROG = 0x80; + /// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program /// side. pub const BPF_F_WRONLY_PROG = 0x100; + /// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted /// socket pub const BPF_F_CLONE = 0x200; + /// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map pub const BPF_F_MMAPABLE = 0x400; -/// These values correspond to "syscalls" within the BPF program's environment +/// These values correspond to "syscalls" within the BPF program's environment, +/// each one is documented in std.os.linux.BPF.kern pub const Helper = enum(i32) { unspec, map_lookup_elem, @@ -325,9 +360,34 @@ pub const Helper = enum(i32) { tcp_send_ack, send_signal_thread, jiffies64, + read_branch_records, + get_ns_current_pid_tgid, + xdp_output, + get_netns_cookie, + get_current_ancestor_cgroup_id, + sk_assign, + ktime_get_boot_ns, + seq_printf, + seq_write, + sk_cgroup_id, + sk_ancestor_cgroup_id, + ringbuf_output, + ringbuf_reserve, + ringbuf_submit, + ringbuf_discard, + ringbuf_query, + csum_level, + skc_to_tcp6_sock, + skc_to_tcp_sock, + skc_to_tcp_timewait_sock, + skc_to_tcp_request_sock, + skc_to_udp6_sock, + get_task_stack, _, }; +// TODO: determine that this is the expected bit layout for both little and big +// endian systems /// a single BPF instruction pub const Insn = packed struct { code: u8, @@ -340,19 +400,30 @@ pub const Insn = packed struct { /// frame pub const Reg = packed enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 }; const Source = packed enum(u1) { reg, imm }; + + const Mode = packed enum(u8) { + imm = IMM, + abs = ABS, + ind = IND, + mem = MEM, + len = LEN, + msh = MSH, + }; + const AluOp = packed enum(u8) { add = ADD, sub = SUB, mul = MUL, div = DIV, - op_or = OR, - op_and = AND, + alu_or = OR, + alu_and = AND, lsh = LSH, rsh = RSH, neg = NEG, mod = MOD, xor = XOR, mov = MOV, + arsh = ARSH, }; pub const Size = packed enum(u8) { @@ -368,6 +439,13 @@ pub const Insn = packed struct { jgt = JGT, jge = JGE, jset = JSET, + jlt = JLT, + jle = JLE, + jne = JNE, + jsgt = JSGT, + jsge = JSGE, + jslt = JSLT, + jsle = JSLE, }; const ImmOrReg = union(Source) { @@ -419,22 +497,100 @@ pub const Insn = packed struct { return alu(64, .add, dst, src); } + pub fn sub(dst: Reg, src: anytype) Insn { + return alu(64, .sub, dst, src); + } + + pub fn mul(dst: Reg, src: anytype) Insn { + return alu(64, .mul, dst, src); + } + + pub fn div(dst: Reg, src: anytype) Insn { + return alu(64, .div, dst, src); + } + + pub fn alu_or(dst: Reg, src: anytype) Insn { + return alu(64, .alu_or, dst, src); + } + + pub fn alu_and(dst: Reg, src: anytype) Insn { + return alu(64, .alu_and, dst, src); + } + + pub fn lsh(dst: Reg, src: anytype) Insn { + return alu(64, .lsh, dst, src); + } + + pub fn rsh(dst: Reg, src: anytype) Insn { + return alu(64, .rsh, dst, src); + } + + pub fn neg(dst: Reg) Insn { + return alu(64, .neg, dst, 0); + } + + pub fn mod(dst: Reg, src: anytype) Insn { + return alu(64, .mod, dst, src); + } + + pub fn xor(dst: Reg, src: anytype) Insn { + return alu(64, .xor, dst, src); + } + + pub fn arsh(dst: Reg, src: anytype) Insn { + return alu(64, .arsh, dst, src); + } + fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn { return imm_reg(JMP | @enumToInt(op), dst, src, off); } + pub fn ja(off: i16) Insn { + return jmp(.ja, .r0, 0, off); + } + pub fn jeq(dst: Reg, src: anytype, off: i16) Insn { return jmp(.jeq, dst, src, off); } - pub fn stx_mem(size: Size, dst: Reg, src: Reg, off: i16) Insn { - return Insn{ - .code = STX | @enumToInt(size) | MEM, - .dst = @enumToInt(dst), - .src = @enumToInt(src), - .off = off, - .imm = 0, - }; + pub fn jgt(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jgt, dst, src, off); + } + + pub fn jge(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jge, dst, src, off); + } + + pub fn jlt(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jlt, dst, src, off); + } + + pub fn jle(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jle, dst, src, off); + } + + pub fn jset(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jset, dst, src, off); + } + + pub fn jne(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jne, dst, src, off); + } + + pub fn jsgt(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jsgt, dst, src, off); + } + + pub fn jsge(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jsge, dst, src, off); + } + + pub fn jslt(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jslt, dst, src, off); + } + + pub fn jsle(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jsle, dst, src, off); } pub fn xadd(dst: Reg, src: Reg) Insn { @@ -447,17 +603,34 @@ pub const Insn = packed struct { }; } - /// direct packet access, R0 = *(uint *)(skb->data + imm32) - pub fn ld_abs(size: Size, imm: i32) Insn { + fn ld(mode: Mode, size: Size, dst: Reg, src: Reg, imm: i32) Insn { return Insn{ - .code = LD | @enumToInt(size) | ABS, - .dst = 0, - .src = 0, + .code = @enumToInt(mode) | @enumToInt(size) | LD, + .dst = @enumToInt(dst), + .src = @enumToInt(src), .off = 0, .imm = imm, }; } + pub fn ld_abs(size: Size, dst: Reg, src: Reg, imm: i32) Insn { + return ld(.abs, size, dst, src, imm); + } + + pub fn ld_ind(size: Size, dst: Reg, src: Reg, imm: i32) Insn { + return ld(.ind, size, dst, src, imm); + } + + pub fn ldx(size: Size, dst: Reg, src: Reg, off: i16) Insn { + return Insn{ + .code = MEM | @enumToInt(size) | LDX, + .dst = @enumToInt(dst), + .src = @enumToInt(src), + .off = off, + .imm = 0, + }; + } + fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn { return Insn{ .code = LD | DW | IMM, @@ -478,6 +651,14 @@ pub const Insn = packed struct { }; } + pub fn ld_dw1(dst: Reg, imm: u64) Insn { + return ld_imm_impl1(dst, .r0, imm); + } + + pub fn ld_dw2(imm: u64) Insn { + return ld_imm_impl2(imm); + } + pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn { return ld_imm_impl1(dst, @intToEnum(Reg, PSEUDO_MAP_FD), @intCast(u64, map_fd)); } @@ -486,6 +667,53 @@ pub const Insn = packed struct { return ld_imm_impl2(@intCast(u64, map_fd)); } + pub fn st(comptime size: Size, dst: Reg, off: i16, imm: i32) Insn { + if (size == .double_word) @compileError("TODO: need to determine how to correctly handle double words"); + return Insn{ + .code = MEM | @enumToInt(size) | ST, + .dst = @enumToInt(dst), + .src = 0, + .off = off, + .imm = imm, + }; + } + + pub fn stx(size: Size, dst: Reg, off: i16, src: Reg) Insn { + return Insn{ + .code = MEM | @enumToInt(size) | STX, + .dst = @enumToInt(dst), + .src = @enumToInt(src), + .off = off, + .imm = 0, + }; + } + + fn endian_swap(endian: std.builtin.Endian, comptime size: Size, dst: Reg) Insn { + return Insn{ + .code = switch (endian) { + .Big => 0xdc, + .Little => 0xd4, + }, + .dst = @enumToInt(dst), + .src = 0, + .off = 0, + .imm = switch (size) { + .byte => @compileError("can't swap a single byte"), + .half_word => 16, + .word => 32, + .double_word => 64, + }, + }; + } + + pub fn le(comptime size: Size, dst: Reg) Insn { + return endian_swap(.Little, size, dst); + } + + pub fn be(comptime size: Size, dst: Reg) Insn { + return endian_swap(.Big, size, dst); + } + pub fn call(helper: Helper) Insn { return Insn{ .code = JMP | CALL, @@ -508,95 +736,242 @@ pub const Insn = packed struct { } }; -fn expect_insn(insn: Insn, val: u64) void { - expectEqual(@bitCast(u64, insn), val); -} - test "insn bitsize" { expectEqual(@bitSizeOf(Insn), 64); } -// mov instructions -test "mov imm" { - expect_insn(Insn.mov(.r1, 1), 0x00000001000001b7); +fn expect_opcode(code: u8, insn: Insn) void { + expectEqual(code, insn.code); } -test "mov reg" { - expect_insn(Insn.mov(.r6, .r1), 0x00000000000016bf); -} +// The opcodes were grabbed from https://github.com/iovisor/bpf-docs/blob/master/eBPF.md +test "opcodes" { + // instructions that have a name that end with 1 or 2 are consecutive for + // loading 64-bit immediates (imm is only 32 bits wide) -// alu instructions -test "add imm" { - expect_insn(Insn.add(.r2, -4), 0xfffffffc00000207); -} + // alu instructions + expect_opcode(0x07, Insn.add(.r1, 0)); + expect_opcode(0x0f, Insn.add(.r1, .r2)); + expect_opcode(0x17, Insn.sub(.r1, 0)); + expect_opcode(0x1f, Insn.sub(.r1, .r2)); + expect_opcode(0x27, Insn.mul(.r1, 0)); + expect_opcode(0x2f, Insn.mul(.r1, .r2)); + expect_opcode(0x37, Insn.div(.r1, 0)); + expect_opcode(0x3f, Insn.div(.r1, .r2)); + expect_opcode(0x47, Insn.alu_or(.r1, 0)); + expect_opcode(0x4f, Insn.alu_or(.r1, .r2)); + expect_opcode(0x57, Insn.alu_and(.r1, 0)); + expect_opcode(0x5f, Insn.alu_and(.r1, .r2)); + expect_opcode(0x67, Insn.lsh(.r1, 0)); + expect_opcode(0x6f, Insn.lsh(.r1, .r2)); + expect_opcode(0x77, Insn.rsh(.r1, 0)); + expect_opcode(0x7f, Insn.rsh(.r1, .r2)); + expect_opcode(0x87, Insn.neg(.r1)); + expect_opcode(0x97, Insn.mod(.r1, 0)); + expect_opcode(0x9f, Insn.mod(.r1, .r2)); + expect_opcode(0xa7, Insn.xor(.r1, 0)); + expect_opcode(0xaf, Insn.xor(.r1, .r2)); + expect_opcode(0xb7, Insn.mov(.r1, 0)); + expect_opcode(0xbf, Insn.mov(.r1, .r2)); + expect_opcode(0xc7, Insn.arsh(.r1, 0)); + expect_opcode(0xcf, Insn.arsh(.r1, .r2)); -// ld instructions -test "ld_abs" { - expect_insn(Insn.ld_abs(.byte, 42), 0x0000002a00000030); -} + // atomic instructions: might be more of these not documented in the wild + expect_opcode(0xdb, Insn.xadd(.r1, .r2)); -test "ld_map_fd" { - expect_insn(Insn.ld_map_fd1(.r1, 42), 0x0000002a00001118); - expect_insn(Insn.ld_map_fd2(42), 0x0000000000000000); -} + // TODO: byteswap instructions + expect_opcode(0xd4, Insn.le(.half_word, .r1)); + expectEqual(@intCast(i32, 16), Insn.le(.half_word, .r1).imm); + expect_opcode(0xd4, Insn.le(.word, .r1)); + expectEqual(@intCast(i32, 32), Insn.le(.word, .r1).imm); + expect_opcode(0xd4, Insn.le(.double_word, .r1)); + expectEqual(@intCast(i32, 64), Insn.le(.double_word, .r1).imm); + expect_opcode(0xdc, Insn.be(.half_word, .r1)); + expectEqual(@intCast(i32, 16), Insn.be(.half_word, .r1).imm); + expect_opcode(0xdc, Insn.be(.word, .r1)); + expectEqual(@intCast(i32, 32), Insn.be(.word, .r1).imm); + expect_opcode(0xdc, Insn.be(.double_word, .r1)); + expectEqual(@intCast(i32, 64), Insn.be(.double_word, .r1).imm); -// st instructions -test "stx_mem" { - expect_insn(Insn.stx_mem(.word, .r10, .r0, -4), 0x00000000fffc0a63); -} + // memory instructions + expect_opcode(0x18, Insn.ld_dw1(.r1, 0)); + expect_opcode(0x00, Insn.ld_dw2(0)); -test "xadd" { - expect_insn(Insn.xadd(.r0, .r1), 0x00000000000010db); -} + // loading a map fd + expect_opcode(0x18, Insn.ld_map_fd1(.r1, 0)); + expectEqual(@intCast(u4, PSEUDO_MAP_FD), Insn.ld_map_fd1(.r1, 0).src); + expect_opcode(0x00, Insn.ld_map_fd2(0)); -// jmp instructions -test "jeq imm" { - expect_insn(Insn.jeq(.r0, 0, 2), 0x0000000000020015); -} + expect_opcode(0x38, Insn.ld_abs(.double_word, .r1, .r2, 0)); + expect_opcode(0x20, Insn.ld_abs(.word, .r1, .r2, 0)); + expect_opcode(0x28, Insn.ld_abs(.half_word, .r1, .r2, 0)); + expect_opcode(0x30, Insn.ld_abs(.byte, .r1, .r2, 0)); -// other instructions -test "call" { - expect_insn(Insn.call(.map_lookup_elem), 0x0000000100000085); -} + expect_opcode(0x58, Insn.ld_ind(.double_word, .r1, .r2, 0)); + expect_opcode(0x40, Insn.ld_ind(.word, .r1, .r2, 0)); + expect_opcode(0x48, Insn.ld_ind(.half_word, .r1, .r2, 0)); + expect_opcode(0x50, Insn.ld_ind(.byte, .r1, .r2, 0)); -test "exit" { - expect_insn(Insn.exit(), 0x0000000000000095); + expect_opcode(0x79, Insn.ldx(.double_word, .r1, .r2, 0)); + expect_opcode(0x61, Insn.ldx(.word, .r1, .r2, 0)); + expect_opcode(0x69, Insn.ldx(.half_word, .r1, .r2, 0)); + expect_opcode(0x71, Insn.ldx(.byte, .r1, .r2, 0)); + + expect_opcode(0x62, Insn.st(.word, .r1, 0, 0)); + expect_opcode(0x6a, Insn.st(.half_word, .r1, 0, 0)); + expect_opcode(0x72, Insn.st(.byte, .r1, 0, 0)); + + expect_opcode(0x63, Insn.stx(.word, .r1, 0, .r2)); + expect_opcode(0x6b, Insn.stx(.half_word, .r1, 0, .r2)); + expect_opcode(0x73, Insn.stx(.byte, .r1, 0, .r2)); + expect_opcode(0x7b, Insn.stx(.double_word, .r1, 0, .r2)); + + // branch instructions + expect_opcode(0x05, Insn.ja(0)); + expect_opcode(0x15, Insn.jeq(.r1, 0, 0)); + expect_opcode(0x1d, Insn.jeq(.r1, .r2, 0)); + expect_opcode(0x25, Insn.jgt(.r1, 0, 0)); + expect_opcode(0x2d, Insn.jgt(.r1, .r2, 0)); + expect_opcode(0x35, Insn.jge(.r1, 0, 0)); + expect_opcode(0x3d, Insn.jge(.r1, .r2, 0)); + expect_opcode(0xa5, Insn.jlt(.r1, 0, 0)); + expect_opcode(0xad, Insn.jlt(.r1, .r2, 0)); + expect_opcode(0xb5, Insn.jle(.r1, 0, 0)); + expect_opcode(0xbd, Insn.jle(.r1, .r2, 0)); + expect_opcode(0x45, Insn.jset(.r1, 0, 0)); + expect_opcode(0x4d, Insn.jset(.r1, .r2, 0)); + expect_opcode(0x55, Insn.jne(.r1, 0, 0)); + expect_opcode(0x5d, Insn.jne(.r1, .r2, 0)); + expect_opcode(0x65, Insn.jsgt(.r1, 0, 0)); + expect_opcode(0x6d, Insn.jsgt(.r1, .r2, 0)); + expect_opcode(0x75, Insn.jsge(.r1, 0, 0)); + expect_opcode(0x7d, Insn.jsge(.r1, .r2, 0)); + expect_opcode(0xc5, Insn.jslt(.r1, 0, 0)); + expect_opcode(0xcd, Insn.jslt(.r1, .r2, 0)); + expect_opcode(0xd5, Insn.jsle(.r1, 0, 0)); + expect_opcode(0xdd, Insn.jsle(.r1, .r2, 0)); + expect_opcode(0x85, Insn.call(.unspec)); + expect_opcode(0x95, Insn.exit()); } pub const Cmd = extern enum(usize) { + /// Create a map and return a file descriptor that refers to the map. The + /// close-on-exec file descriptor flag is automatically enabled for the new + /// file descriptor. + /// + /// uses MapCreateAttr map_create, + + /// Look up an element by key in a specified map and return its value. + /// + /// uses MapElemAttr map_lookup_elem, + + /// Create or update an element (key/value pair) in a specified map. + /// + /// uses MapElemAttr map_update_elem, + + /// Look up and delete an element by key in a specified map. + /// + /// uses MapElemAttr map_delete_elem, + + /// Look up an element by key in a specified map and return the key of the + /// next element. map_get_next_key, + + /// Verify and load an eBPF program, returning a new file descriptor + /// associated with the program. The close-on-exec file descriptor flag + /// is automatically enabled for the new file descriptor. + /// + /// uses ProgLoadAttr prog_load, + + /// Pin a map or eBPF program to a path within the minimal BPF filesystem + /// + /// uses ObjAttr obj_pin, + + /// Get the file descriptor of a BPF object pinned to a certain path + /// + /// uses ObjAttr obj_get, + + /// uses ProgAttachAttr prog_attach, + + /// uses ProgAttachAttr prog_detach, + + /// uses TestRunAttr prog_test_run, + + /// uses GetIdAttr prog_get_next_id, + + /// uses GetIdAttr map_get_next_id, + + /// uses GetIdAttr prog_get_fd_by_id, + + /// uses GetIdAttr map_get_fd_by_id, + + /// uses InfoAttr obj_get_info_by_fd, + + /// uses QueryAttr prog_query, + + /// uses RawTracepointAttr raw_tracepoint_open, + + /// uses BtfLoadAttr btf_load, + + /// uses GetIdAttr btf_get_fd_by_id, + + /// uses TaskFdQueryAttr task_fd_query, + + /// uses MapElemAttr map_lookup_and_delete_elem, map_freeze, + + /// uses GetIdAttr btf_get_next_id, + + /// uses MapBatchAttr map_lookup_batch, + + /// uses MapBatchAttr map_lookup_and_delete_batch, + + /// uses MapBatchAttr map_update_batch, + + /// uses MapBatchAttr map_delete_batch, + + /// uses LinkCreateAttr link_create, + + /// uses LinkUpdateAttr link_update, + + /// uses GetIdAttr link_get_fd_by_id, + + /// uses GetIdAttr link_get_next_id, + + /// uses EnableStatsAttr enable_stats, + + /// uses IterCreateAttr iter_create, link_detach, _, @@ -630,42 +1005,138 @@ pub const MapType = extern enum(u32) { sk_storage, devmap_hash, struct_ops, + + /// An ordered and shared CPU version of perf_event_array. They have + /// similar semantics: + /// - variable length records + /// - no blocking: when full, reservation fails + /// - memory mappable for ease and speed + /// - epoll notifications for new data, but can busy poll + /// + /// Ringbufs give BPF programs two sets of APIs: + /// - ringbuf_output() allows copy data from one place to a ring + /// buffer, similar to bpf_perf_event_output() + /// - ringbuf_reserve()/ringbuf_commit()/ringbuf_discard() split the + /// process into two steps. First a fixed amount of space is reserved, + /// if that is successful then the program gets a pointer to a chunk of + /// memory and can be submitted with commit() or discarded with + /// discard() + /// + /// ringbuf_output() will incurr an extra memory copy, but allows to submit + /// records of the length that's not known beforehand, and is an easy + /// replacement for perf_event_outptu(). + /// + /// ringbuf_reserve() avoids the extra memory copy but requires a known size + /// of memory beforehand. + /// + /// ringbuf_query() allows to query properties of the map, 4 are currently + /// supported: + /// - BPF_RB_AVAIL_DATA: amount of unconsumed data in ringbuf + /// - BPF_RB_RING_SIZE: returns size of ringbuf + /// - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical position + /// of consumer and producer respectively + /// + /// key size: 0 + /// value size: 0 + /// max entries: size of ringbuf, must be power of 2 ringbuf, + _, }; pub const ProgType = extern enum(u32) { unspec, + + /// context type: __sk_buff socket_filter, + + /// context type: bpf_user_pt_regs_t kprobe, + + /// context type: __sk_buff sched_cls, + + /// context type: __sk_buff sched_act, + + /// context type: u64 tracepoint, + + /// context type: xdp_md xdp, + + /// context type: bpf_perf_event_data perf_event, + + /// context type: __sk_buff cgroup_skb, + + /// context type: bpf_sock cgroup_sock, + + /// context type: __sk_buff lwt_in, + + /// context type: __sk_buff lwt_out, + + /// context type: __sk_buff lwt_xmit, + + /// context type: bpf_sock_ops sock_ops, + + /// context type: __sk_buff sk_skb, + + /// context type: bpf_cgroup_dev_ctx cgroup_device, + + /// context type: sk_msg_md sk_msg, + + /// context type: bpf_raw_tracepoint_args raw_tracepoint, + + /// context type: bpf_sock_addr cgroup_sock_addr, + + /// context type: __sk_buff lwt_seg6local, + + /// context type: u32 lirc_mode2, + + /// context type: sk_reuseport_md sk_reuseport, + + /// context type: __sk_buff flow_dissector, + + /// context type: bpf_sysctl cgroup_sysctl, + + /// context type: bpf_raw_tracepoint_args raw_tracepoint_writable, + + /// context type: bpf_sockopt cgroup_sockopt, + + /// context type: void * tracing, + + /// context type: void * struct_ops, + + /// context type: void * ext, + + /// context type: void * lsm, + + /// context type: bpf_sk_lookup sk_lookup, + _, }; pub const AttachType = extern enum(u32) { @@ -715,27 +1186,38 @@ const obj_name_len = 16; pub const MapCreateAttr = extern struct { /// one of MapType map_type: u32, + /// size of key in bytes key_size: u32, + /// size of value in bytes value_size: u32, + /// max number of entries in a map max_entries: u32, + /// .map_create related flags map_flags: u32, + /// fd pointing to the inner map inner_map_fd: fd_t, + /// numa node (effective only if MapCreateFlags.numa_node is set) numa_node: u32, map_name: [obj_name_len]u8, + /// ifindex of netdev to create on map_ifindex: u32, + /// fd pointing to a BTF type data btf_fd: fd_t, + /// BTF type_id of the key btf_key_type_id: u32, + /// BTF type_id of the value bpf_value_type_id: u32, + /// BTF type_id of a kernel struct stored as the map value btf_vmlinux_value_type_id: u32, }; @@ -755,10 +1237,12 @@ pub const MapElemAttr = extern struct { pub const MapBatchAttr = extern struct { /// start batch, NULL to start from beginning in_batch: u64, + /// output: next start batch out_batch: u64, keys: u64, values: u64, + /// input/output: /// input: # of key/value elements /// output: # of filled elements @@ -775,35 +1259,49 @@ pub const ProgLoadAttr = extern struct { insn_cnt: u32, insns: u64, license: u64, + /// verbosity level of verifier log_level: u32, + /// size of user buffer log_size: u32, + /// user supplied buffer log_buf: u64, + /// not used kern_version: u32, prog_flags: u32, prog_name: [obj_name_len]u8, - /// ifindex of netdev to prep for. For some prog types expected attach - /// type must be known at load time to verify attach type specific parts - /// of prog (context accesses, allowed helpers, etc). + + /// ifindex of netdev to prep for. prog_ifindex: u32, + + /// For some prog types expected attach type must be known at load time to + /// verify attach type specific parts of prog (context accesses, allowed + /// helpers, etc). expected_attach_type: u32, + /// fd pointing to BTF type data prog_btf_fd: fd_t, + /// userspace bpf_func_info size func_info_rec_size: u32, func_info: u64, + /// number of bpf_func_info records func_info_cnt: u32, + /// userspace bpf_line_info size line_info_rec_size: u32, line_info: u64, + /// number of bpf_line_info records line_info_cnt: u32, + /// in-kernel BTF type id to attach to attact_btf_id: u32, + /// 0 to attach to vmlinux attach_prog_id: u32, }; @@ -819,29 +1317,36 @@ pub const ObjAttr = extern struct { pub const ProgAttachAttr = extern struct { /// container object to attach to target_fd: fd_t, + /// eBPF program to attach attach_bpf_fd: fd_t, + attach_type: u32, attach_flags: u32, + // TODO: BPF_F_REPLACE flags /// previously attached eBPF program to replace if .replace is used replace_bpf_fd: fd_t, }; /// struct used by Cmd.prog_test_run command -pub const TestAttr = extern struct { +pub const TestRunAttr = extern struct { prog_fd: fd_t, retval: u32, + /// input: len of data_in data_size_in: u32, + /// input/output: len of data_out. returns ENOSPC if data_out is too small. data_size_out: u32, data_in: u64, data_out: u64, repeat: u32, duration: u32, + /// input: len of ctx_in ctx_size_in: u32, + /// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small. ctx_size_out: u32, ctx_in: u64, @@ -894,26 +1399,35 @@ pub const BtfLoadAttr = extern struct { btf_log_level: u32, }; +/// struct used by Cmd.task_fd_query pub const TaskFdQueryAttr = extern struct { /// input: pid pid: pid_t, + /// input: fd fd: fd_t, + /// input: flags flags: u32, + /// input/output: buf len buf_len: u32, + /// input/output: /// tp_name for tracepoint /// symbol for kprobe /// filename for uprobe buf: u64, + /// output: prod_id prog_id: u32, + /// output: BPF_FD_TYPE fd_type: u32, + /// output: probe_offset probe_offset: u64, + /// output: probe_addr probe_addr: u64, }; @@ -922,9 +1436,11 @@ pub const TaskFdQueryAttr = extern struct { pub const LinkCreateAttr = extern struct { /// eBPF program to attach prog_fd: fd_t, + /// object to attach to target_fd: fd_t, attach_type: u32, + /// extra flags flags: u32, }; @@ -932,10 +1448,13 @@ pub const LinkCreateAttr = extern struct { /// struct used by Cmd.link_update command pub const LinkUpdateAttr = extern struct { link_fd: fd_t, + /// new program to update link with new_prog_fd: fd_t, + /// extra flags flags: u32, + /// expected link's program fd, it is specified only if BPF_F_REPLACE is /// set in flags old_prog_fd: fd_t, @@ -952,6 +1471,7 @@ pub const IterCreateAttr = extern struct { flags: u32, }; +/// Mega struct that is passed to the bpf() syscall pub const Attr = extern union { map_create: MapCreateAttr, map_elem: MapElemAttr, @@ -971,3 +1491,176 @@ pub const Attr = extern union { enable_stats: EnableStatsAttr, iter_create: IterCreateAttr, }; + +pub const Log = struct { + level: u32, + buf: []u8, +}; + +pub fn map_create(map_type: MapType, key_size: u32, value_size: u32, max_entries: u32) !fd_t { + var attr = Attr{ + .map_create = std.mem.zeroes(MapCreateAttr), + }; + + attr.map_create.map_type = @enumToInt(map_type); + attr.map_create.key_size = key_size; + attr.map_create.value_size = value_size; + attr.map_create.max_entries = max_entries; + + const rc = bpf(.map_create, &attr, @sizeOf(MapCreateAttr)); + return switch (errno(rc)) { + 0 => @intCast(fd_t, rc), + EINVAL => error.MapTypeOrAttrInvalid, + ENOMEM => error.SystemResources, + EPERM => error.AccessDenied, + else => |err| unexpectedErrno(rc), + }; +} + +test "map_create" { + const map = try map_create(.hash, 4, 4, 32); + defer std.os.close(map); +} + +pub fn map_lookup_elem(fd: fd_t, key: []const u8, value: []u8) !void { + var attr = Attr{ + .map_elem = std.mem.zeroes(MapElemAttr), + }; + + attr.map_elem.map_fd = fd; + attr.map_elem.key = @ptrToInt(key.ptr); + attr.map_elem.result.value = @ptrToInt(value.ptr); + + const rc = bpf(.map_lookup_elem, &attr, @sizeOf(MapElemAttr)); + switch (errno(rc)) { + 0 => return, + EBADF => return error.BadFd, + EFAULT => unreachable, + EINVAL => return error.FieldInAttrNeedsZeroing, + ENOENT => return error.NotFound, + EPERM => return error.AccessDenied, + else => |err| return unexpectedErrno(rc), + } +} + +pub fn map_update_elem(fd: fd_t, key: []const u8, value: []const u8, flags: u64) !void { + var attr = Attr{ + .map_elem = std.mem.zeroes(MapElemAttr), + }; + + attr.map_elem.map_fd = fd; + attr.map_elem.key = @ptrToInt(key.ptr); + attr.map_elem.result = .{ .value = @ptrToInt(value.ptr) }; + attr.map_elem.flags = flags; + + const rc = bpf(.map_update_elem, &attr, @sizeOf(MapElemAttr)); + switch (errno(rc)) { + 0 => return, + E2BIG => return error.ReachedMaxEntries, + EBADF => return error.BadFd, + EFAULT => unreachable, + EINVAL => return error.FieldInAttrNeedsZeroing, + ENOMEM => return error.SystemResources, + EPERM => return error.AccessDenied, + else => |err| return unexpectedErrno(err), + } +} + +pub fn map_delete_elem(fd: fd_t, key: []const u8) !void { + var attr = Attr{ + .map_elem = std.mem.zeroes(MapElemAttr), + }; + + attr.map_elem.map_fd = fd; + attr.map_elem.key = @ptrToInt(key.ptr); + + const rc = bpf(.map_delete_elem, &attr, @sizeOf(MapElemAttr)); + switch (errno(rc)) { + 0 => return, + EBADF => return error.BadFd, + EFAULT => unreachable, + EINVAL => return error.FieldInAttrNeedsZeroing, + ENOENT => return error.NotFound, + EPERM => return error.AccessDenied, + else => |err| return unexpectedErrno(err), + } +} + +test "map lookup, update, and delete" { + const key_size = 4; + const value_size = 4; + const map = try map_create(.hash, key_size, value_size, 1); + defer std.os.close(map); + + const key = std.mem.zeroes([key_size]u8); + var value = std.mem.zeroes([value_size]u8); + + // fails looking up value that doesn't exist + expectError(error.NotFound, map_lookup_elem(map, &key, &value)); + + // succeed at updating and looking up element + try map_update_elem(map, &key, &value, 0); + try map_lookup_elem(map, &key, &value); + + // fails inserting more than max entries + const second_key = [key_size]u8{ 0, 0, 0, 1 }; + expectError(error.ReachedMaxEntries, map_update_elem(map, &second_key, &value, 0)); + + // succeed at deleting an existing elem + try map_delete_elem(map, &key); + expectError(error.NotFound, map_lookup_elem(map, &key, &value)); + + // fail at deleting a non-existing elem + expectError(error.NotFound, map_delete_elem(map, &key)); +} + +pub fn prog_load( + prog_type: ProgType, + insns: []const Insn, + log: ?*Log, + license: []const u8, + kern_version: u32, +) !fd_t { + var attr = Attr{ + .prog_load = std.mem.zeroes(ProgLoadAttr), + }; + + attr.prog_load.prog_type = @enumToInt(prog_type); + attr.prog_load.insns = @ptrToInt(insns.ptr); + attr.prog_load.insn_cnt = @intCast(u32, insns.len); + attr.prog_load.license = @ptrToInt(license.ptr); + attr.prog_load.kern_version = kern_version; + + if (log) |l| { + attr.prog_load.log_buf = @ptrToInt(l.buf.ptr); + attr.prog_load.log_size = @intCast(u32, l.buf.len); + attr.prog_load.log_level = l.level; + } + + const rc = bpf(.prog_load, &attr, @sizeOf(ProgLoadAttr)); + return switch (errno(rc)) { + 0 => @intCast(fd_t, rc), + EACCES => error.UnsafeProgram, + EFAULT => unreachable, + EINVAL => error.InvalidProgram, + EPERM => error.AccessDenied, + else => |err| unexpectedErrno(err), + }; +} + +test "prog_load" { + // this should fail because it does not set r0 before exiting + const bad_prog = [_]Insn{ + Insn.exit(), + }; + + const good_prog = [_]Insn{ + Insn.mov(.r0, 0), + Insn.exit(), + }; + + const prog = try prog_load(.socket_filter, &good_prog, null, "MIT", 0); + defer std.os.close(prog); + + expectError(error.UnsafeProgram, prog_load(.socket_filter, &bad_prog, null, "MIT", 0)); +} diff --git a/lib/std/os/linux/bpf/btf.zig b/lib/std/os/linux/bpf/btf.zig new file mode 100644 index 000000000..5338994af --- /dev/null +++ b/lib/std/os/linux/bpf/btf.zig @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +const magic = 0xeb9f; +const version = 1; + +pub const ext = @import("ext.zig"); + +/// All offsets are in bytes relative to the end of this header +pub const Header = packed struct { + magic: u16, + version: u8, + flags: u8, + hdr_len: u32, + + /// offset of type section + type_off: u32, + + /// length of type section + type_len: u32, + + /// offset of string section + str_off: u32, + + /// length of string section + str_len: u32, +}; + +/// Max number of type identifiers +pub const max_type = 0xfffff; + +/// Max offset into string section +pub const max_name_offset = 0xffffff; + +/// Max number of struct/union/enum member of func args +pub const max_vlen = 0xffff; + +pub const Type = packed struct { + name_off: u32, + info: packed struct { + /// number of struct's members + vlen: u16, + + unused_1: u8, + kind: Kind, + unused_2: u3, + + /// used by Struct, Union, and Fwd + kind_flag: bool, + }, + + /// size is used by Int, Enum, Struct, Union, and DataSec, it tells the size + /// of the type it is describing + /// + /// type is used by Ptr, Typedef, Volatile, Const, Restrict, Func, + /// FuncProto, and Var. It is a type_id referring to another type + size_type: union { size: u32, typ: u32 }, +}; + +/// For some kinds, Type is immediately followed by extra data +pub const Kind = enum(u4) { + unknown, + int, + ptr, + array, + structure, + kind_union, + enumeration, + fwd, + typedef, + kind_volatile, + constant, + restrict, + func, + funcProto, + variable, + dataSec, +}; + +/// Int kind is followed by this struct +pub const IntInfo = packed struct { + bits: u8, + unused: u8, + offset: u8, + encoding: enum(u4) { + signed = 1 << 0, + char = 1 << 1, + boolean = 1 << 2, + }, +}; + +test "IntInfo is 32 bits" { + std.testing.expectEqual(@bitSizeOf(IntInfo), 32); +} + +/// Enum kind is followed by this struct +pub const Enum = packed struct { + name_off: u32, + val: i32, +}; + +/// Array kind is followd by this struct +pub const Array = packed struct { + typ: u32, + index_type: u32, + nelems: u32, +}; + +/// Struct and Union kinds are followed by multiple Member structs. The exact +/// number is stored in vlen +pub const Member = packed struct { + name_off: u32, + typ: u32, + + /// if the kind_flag is set, offset contains both member bitfield size and + /// bit offset, the bitfield size is set for bitfield members. If the type + /// info kind_flag is not set, the offset contains only bit offset + offset: packed struct { + bit: u24, + bitfield_size: u8, + }, +}; + +/// FuncProto is followed by multiple Params, the exact number is stored in vlen +pub const Param = packed struct { + name_off: u32, + typ: u32, +}; + +pub const VarLinkage = enum { + static, + global_allocated, + global_extern, +}; + +pub const FuncLinkage = enum { + static, + global, + external, +}; + +/// Var kind is followd by a single Var struct to describe additional +/// information related to the variable such as its linkage +pub const Var = packed struct { + linkage: u32, +}; + +/// Datasec kind is followed by multible VarSecInfo to describe all Var kind +/// types it contains along with it's in-section offset as well as size. +pub const VarSecInfo = packed struct { + typ: u32, + offset: u32, + size: u32, +}; diff --git a/lib/std/os/linux/bpf/btf_ext.zig b/lib/std/os/linux/bpf/btf_ext.zig new file mode 100644 index 000000000..ce412fdf4 --- /dev/null +++ b/lib/std/os/linux/bpf/btf_ext.zig @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +pub const Header = packed struct { + magic: u16, + version: u8, + flags: u8, + hdr_len: u32, + + /// All offsets are in bytes relative to the end of this header + func_info_off: u32, + func_info_len: u32, + line_info_off: u32, + line_info_len: u32, +}; + +pub const InfoSec = packed struct { + sec_name_off: u32, + num_info: u32, + // TODO: communicate that there is data here + //data: [0]u8, +}; diff --git a/lib/std/os/linux/bpf/helpers.zig b/lib/std/os/linux/bpf/helpers.zig new file mode 100644 index 000000000..9228e1f1f --- /dev/null +++ b/lib/std/os/linux/bpf/helpers.zig @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +const kern = @import("kern.zig"); + +// in BPF, all the helper calls +// TODO: when https://github.com/ziglang/zig/issues/1717 is here, make a nice +// function that uses the Helper enum +// +// Note, these function signatures were created from documentation found in +// '/usr/include/linux/bpf.h' +pub const map_lookup_elem = @intToPtr(fn (map: *const kern.MapDef, key: ?*const c_void) ?*c_void, 1); +pub const map_update_elem = @intToPtr(fn (map: *const kern.MapDef, key: ?*const c_void, value: ?*const c_void, flags: u64) c_long, 2); +pub const map_delete_elem = @intToPtr(fn (map: *const kern.MapDef, key: ?*const c_void) c_long, 3); +pub const probe_read = @intToPtr(fn (dst: ?*c_void, size: u32, unsafe_ptr: ?*const c_void) c_long, 4); +pub const ktime_get_ns = @intToPtr(fn () u64, 5); +pub const trace_printk = @intToPtr(fn (fmt: [*:0]const u8, fmt_size: u32, arg1: u64, arg2: u64, arg3: u64) c_long, 6); +pub const get_prandom_u32 = @intToPtr(fn () u32, 7); +pub const get_smp_processor_id = @intToPtr(fn () u32, 8); +pub const skb_store_bytes = @intToPtr(fn (skb: *kern.SkBuff, offset: u32, from: ?*const c_void, len: u32, flags: u64) c_long, 9); +pub const l3_csum_replace = @intToPtr(fn (skb: *kern.SkBuff, offset: u32, from: u64, to: u64, size: u64) c_long, 10); +pub const l4_csum_replace = @intToPtr(fn (skb: *kern.SkBuff, offset: u32, from: u64, to: u64, flags: u64) c_long, 11); +pub const tail_call = @intToPtr(fn (ctx: ?*c_void, prog_array_map: *const kern.MapDef, index: u32) c_long, 12); +pub const clone_redirect = @intToPtr(fn (skb: *kern.SkBuff, ifindex: u32, flags: u64) c_long, 13); +pub const get_current_pid_tgid = @intToPtr(fn () u64, 14); +pub const get_current_uid_gid = @intToPtr(fn () u64, 15); +pub const get_current_comm = @intToPtr(fn (buf: ?*c_void, size_of_buf: u32) c_long, 16); +pub const get_cgroup_classid = @intToPtr(fn (skb: *kern.SkBuff) u32, 17); +// Note vlan_proto is big endian +pub const skb_vlan_push = @intToPtr(fn (skb: *kern.SkBuff, vlan_proto: u16, vlan_tci: u16) c_long, 18); +pub const skb_vlan_pop = @intToPtr(fn (skb: *kern.SkBuff) c_long, 19); +pub const skb_get_tunnel_key = @intToPtr(fn (skb: *kern.SkBuff, key: *kern.TunnelKey, size: u32, flags: u64) c_long, 20); +pub const skb_set_tunnel_key = @intToPtr(fn (skb: *kern.SkBuff, key: *kern.TunnelKey, size: u32, flags: u64) c_long, 21); +pub const perf_event_read = @intToPtr(fn (map: *const kern.MapDef, flags: u64) u64, 22); +pub const redirect = @intToPtr(fn (ifindex: u32, flags: u64) c_long, 23); +pub const get_route_realm = @intToPtr(fn (skb: *kern.SkBuff) u32, 24); +pub const perf_event_output = @intToPtr(fn (ctx: ?*c_void, map: *const kern.MapDef, flags: u64, data: ?*c_void, size: u64) c_long, 25); +pub const skb_load_bytes = @intToPtr(fn (skb: ?*c_void, offset: u32, to: ?*c_void, len: u32) c_long, 26); +pub const get_stackid = @intToPtr(fn (ctx: ?*c_void, map: *const kern.MapDef, flags: u64) c_long, 27); +// from and to point to __be32 +pub const csum_diff = @intToPtr(fn (from: *u32, from_size: u32, to: *u32, to_size: u32, seed: u32) i64, 28); +pub const skb_get_tunnel_opt = @intToPtr(fn (skb: *kern.SkBuff, opt: ?*c_void, size: u32) c_long, 29); +pub const skb_set_tunnel_opt = @intToPtr(fn (skb: *kern.SkBuff, opt: ?*c_void, size: u32) c_long, 30); +// proto is __be16 +pub const skb_change_proto = @intToPtr(fn (skb: *kern.SkBuff, proto: u16, flags: u64) c_long, 31); +pub const skb_change_type = @intToPtr(fn (skb: *kern.SkBuff, skb_type: u32) c_long, 32); +pub const skb_under_cgroup = @intToPtr(fn (skb: *kern.SkBuff, map: ?*const c_void, index: u32) c_long, 33); +pub const get_hash_recalc = @intToPtr(fn (skb: *kern.SkBuff) u32, 34); +pub const get_current_task = @intToPtr(fn () u64, 35); +pub const probe_write_user = @intToPtr(fn (dst: ?*c_void, src: ?*const c_void, len: u32) c_long, 36); +pub const current_task_under_cgroup = @intToPtr(fn (map: *const kern.MapDef, index: u32) c_long, 37); +pub const skb_change_tail = @intToPtr(fn (skb: *kern.SkBuff, len: u32, flags: u64) c_long, 38); +pub const skb_pull_data = @intToPtr(fn (skb: *kern.SkBuff, len: u32) c_long, 39); +pub const csum_update = @intToPtr(fn (skb: *kern.SkBuff, csum: u32) i64, 40); +pub const set_hash_invalid = @intToPtr(fn (skb: *kern.SkBuff) void, 41); +pub const get_numa_node_id = @intToPtr(fn () c_long, 42); +pub const skb_change_head = @intToPtr(fn (skb: *kern.SkBuff, len: u32, flags: u64) c_long, 43); +pub const xdp_adjust_head = @intToPtr(fn (xdp_md: *kern.XdpMd, delta: c_int) c_long, 44); +pub const probe_read_str = @intToPtr(fn (dst: ?*c_void, size: u32, unsafe_ptr: ?*const c_void) c_long, 45); +pub const get_socket_cookie = @intToPtr(fn (ctx: ?*c_void) u64, 46); +pub const get_socket_uid = @intToPtr(fn (skb: *kern.SkBuff) u32, 47); +pub const set_hash = @intToPtr(fn (skb: *kern.SkBuff, hash: u32) c_long, 48); +pub const setsockopt = @intToPtr(fn (bpf_socket: *kern.SockOps, level: c_int, optname: c_int, optval: ?*c_void, optlen: c_int) c_long, 49); +pub const skb_adjust_room = @intToPtr(fn (skb: *kern.SkBuff, len_diff: i32, mode: u32, flags: u64) c_long, 50); +pub const redirect_map = @intToPtr(fn (map: *const kern.MapDef, key: u32, flags: u64) c_long, 51); +pub const sk_redirect_map = @intToPtr(fn (skb: *kern.SkBuff, map: *const kern.MapDef, key: u32, flags: u64) c_long, 52); +pub const sock_map_update = @intToPtr(fn (skops: *kern.SockOps, map: *const kern.MapDef, key: ?*c_void, flags: u64) c_long, 53); +pub const xdp_adjust_meta = @intToPtr(fn (xdp_md: *kern.XdpMd, delta: c_int) c_long, 54); +pub const perf_event_read_value = @intToPtr(fn (map: *const kern.MapDef, flags: u64, buf: *kern.PerfEventValue, buf_size: u32) c_long, 55); +pub const perf_prog_read_value = @intToPtr(fn (ctx: *kern.PerfEventData, buf: *kern.PerfEventValue, buf_size: u32) c_long, 56); +pub const getsockopt = @intToPtr(fn (bpf_socket: ?*c_void, level: c_int, optname: c_int, optval: ?*c_void, optlen: c_int) c_long, 57); +pub const override_return = @intToPtr(fn (regs: *PtRegs, rc: u64) c_long, 58); +pub const sock_ops_cb_flags_set = @intToPtr(fn (bpf_sock: *kern.SockOps, argval: c_int) c_long, 59); +pub const msg_redirect_map = @intToPtr(fn (msg: *kern.SkMsgMd, map: *const kern.MapDef, key: u32, flags: u64) c_long, 60); +pub const msg_apply_bytes = @intToPtr(fn (msg: *kern.SkMsgMd, bytes: u32) c_long, 61); +pub const msg_cork_bytes = @intToPtr(fn (msg: *kern.SkMsgMd, bytes: u32) c_long, 62); +pub const msg_pull_data = @intToPtr(fn (msg: *kern.SkMsgMd, start: u32, end: u32, flags: u64) c_long, 63); +pub const bind = @intToPtr(fn (ctx: *kern.BpfSockAddr, addr: *kern.SockAddr, addr_len: c_int) c_long, 64); +pub const xdp_adjust_tail = @intToPtr(fn (xdp_md: *kern.XdpMd, delta: c_int) c_long, 65); +pub const skb_get_xfrm_state = @intToPtr(fn (skb: *kern.SkBuff, index: u32, xfrm_state: *kern.XfrmState, size: u32, flags: u64) c_long, 66); +pub const get_stack = @intToPtr(fn (ctx: ?*c_void, buf: ?*c_void, size: u32, flags: u64) c_long, 67); +pub const skb_load_bytes_relative = @intToPtr(fn (skb: ?*const c_void, offset: u32, to: ?*c_void, len: u32, start_header: u32) c_long, 68); +pub const fib_lookup = @intToPtr(fn (ctx: ?*c_void, params: *kern.FibLookup, plen: c_int, flags: u32) c_long, 69); +pub const sock_hash_update = @intToPtr(fn (skops: *kern.SockOps, map: *const kern.MapDef, key: ?*c_void, flags: u64) c_long, 70); +pub const msg_redirect_hash = @intToPtr(fn (msg: *kern.SkMsgMd, map: *const kern.MapDef, key: ?*c_void, flags: u64) c_long, 71); +pub const sk_redirect_hash = @intToPtr(fn (skb: *kern.SkBuff, map: *const kern.MapDef, key: ?*c_void, flags: u64) c_long, 72); +pub const lwt_push_encap = @intToPtr(fn (skb: *kern.SkBuff, typ: u32, hdr: ?*c_void, len: u32) c_long, 73); +pub const lwt_seg6_store_bytes = @intToPtr(fn (skb: *kern.SkBuff, offset: u32, from: ?*const c_void, len: u32) c_long, 74); +pub const lwt_seg6_adjust_srh = @intToPtr(fn (skb: *kern.SkBuff, offset: u32, delta: i32) c_long, 75); +pub const lwt_seg6_action = @intToPtr(fn (skb: *kern.SkBuff, action: u32, param: ?*c_void, param_len: u32) c_long, 76); +pub const rc_repeat = @intToPtr(fn (ctx: ?*c_void) c_long, 77); +pub const rc_keydown = @intToPtr(fn (ctx: ?*c_void, protocol: u32, scancode: u64, toggle: u32) c_long, 78); +pub const skb_cgroup_id = @intToPtr(fn (skb: *kern.SkBuff) u64, 79); +pub const get_current_cgroup_id = @intToPtr(fn () u64, 80); +pub const get_local_storage = @intToPtr(fn (map: ?*c_void, flags: u64) ?*c_void, 81); +pub const sk_select_reuseport = @intToPtr(fn (reuse: *kern.SkReusePortMd, map: *const kern.MapDef, key: ?*c_void, flags: u64) c_long, 82); +pub const skb_ancestor_cgroup_id = @intToPtr(fn (skb: *kern.SkBuff, ancestor_level: c_int) u64, 83); +pub const sk_lookup_tcp = @intToPtr(fn (ctx: ?*c_void, tuple: *kern.SockTuple, tuple_size: u32, netns: u64, flags: u64) ?*kern.Sock, 84); +pub const sk_lookup_udp = @intToPtr(fn (ctx: ?*c_void, tuple: *kern.SockTuple, tuple_size: u32, netns: u64, flags: u64) ?*kern.Sock, 85); +pub const sk_release = @intToPtr(fn (sock: *kern.Sock) c_long, 86); +pub const map_push_elem = @intToPtr(fn (map: *const kern.MapDef, value: ?*const c_void, flags: u64) c_long, 87); +pub const map_pop_elem = @intToPtr(fn (map: *const kern.MapDef, value: ?*c_void) c_long, 88); +pub const map_peek_elem = @intToPtr(fn (map: *const kern.MapDef, value: ?*c_void) c_long, 89); +pub const msg_push_data = @intToPtr(fn (msg: *kern.SkMsgMd, start: u32, len: u32, flags: u64) c_long, 90); +pub const msg_pop_data = @intToPtr(fn (msg: *kern.SkMsgMd, start: u32, len: u32, flags: u64) c_long, 91); +pub const rc_pointer_rel = @intToPtr(fn (ctx: ?*c_void, rel_x: i32, rel_y: i32) c_long, 92); +pub const spin_lock = @intToPtr(fn (lock: *kern.SpinLock) c_long, 93); +pub const spin_unlock = @intToPtr(fn (lock: *kern.SpinLock) c_long, 94); +pub const sk_fullsock = @intToPtr(fn (sk: *kern.Sock) ?*SkFullSock, 95); +pub const tcp_sock = @intToPtr(fn (sk: *kern.Sock) ?*kern.TcpSock, 96); +pub const skb_ecn_set_ce = @intToPtr(fn (skb: *kern.SkBuff) c_long, 97); +pub const get_listener_sock = @intToPtr(fn (sk: *kern.Sock) ?*kern.Sock, 98); +pub const skc_lookup_tcp = @intToPtr(fn (ctx: ?*c_void, tuple: *kern.SockTuple, tuple_size: u32, netns: u64, flags: u64) ?*kern.Sock, 99); +pub const tcp_check_syncookie = @intToPtr(fn (sk: *kern.Sock, iph: ?*c_void, iph_len: u32, th: *TcpHdr, th_len: u32) c_long, 100); +pub const sysctl_get_name = @intToPtr(fn (ctx: *kern.SysCtl, buf: ?*u8, buf_len: c_ulong, flags: u64) c_long, 101); +pub const sysctl_get_current_value = @intToPtr(fn (ctx: *kern.SysCtl, buf: ?*u8, buf_len: c_ulong) c_long, 102); +pub const sysctl_get_new_value = @intToPtr(fn (ctx: *kern.SysCtl, buf: ?*u8, buf_len: c_ulong) c_long, 103); +pub const sysctl_set_new_value = @intToPtr(fn (ctx: *kern.SysCtl, buf: ?*const u8, buf_len: c_ulong) c_long, 104); +pub const strtol = @intToPtr(fn (buf: *const u8, buf_len: c_ulong, flags: u64, res: *c_long) c_long, 105); +pub const strtoul = @intToPtr(fn (buf: *const u8, buf_len: c_ulong, flags: u64, res: *c_ulong) c_long, 106); +pub const sk_storage_get = @intToPtr(fn (map: *const kern.MapDef, sk: *kern.Sock, value: ?*c_void, flags: u64) ?*c_void, 107); +pub const sk_storage_delete = @intToPtr(fn (map: *const kern.MapDef, sk: *kern.Sock) c_long, 108); +pub const send_signal = @intToPtr(fn (sig: u32) c_long, 109); +pub const tcp_gen_syncookie = @intToPtr(fn (sk: *kern.Sock, iph: ?*c_void, iph_len: u32, th: *TcpHdr, th_len: u32) i64, 110); +pub const skb_output = @intToPtr(fn (ctx: ?*c_void, map: *const kern.MapDef, flags: u64, data: ?*c_void, size: u64) c_long, 111); +pub const probe_read_user = @intToPtr(fn (dst: ?*c_void, size: u32, unsafe_ptr: ?*const c_void) c_long, 112); +pub const probe_read_kernel = @intToPtr(fn (dst: ?*c_void, size: u32, unsafe_ptr: ?*const c_void) c_long, 113); +pub const probe_read_user_str = @intToPtr(fn (dst: ?*c_void, size: u32, unsafe_ptr: ?*const c_void) c_long, 114); +pub const probe_read_kernel_str = @intToPtr(fn (dst: ?*c_void, size: u32, unsafe_ptr: ?*const c_void) c_long, 115); +pub const tcp_send_ack = @intToPtr(fn (tp: ?*c_void, rcv_nxt: u32) c_long, 116); +pub const send_signal_thread = @intToPtr(fn (sig: u32) c_long, 117); +pub const jiffies64 = @intToPtr(fn () u64, 118); +pub const read_branch_records = @intToPtr(fn (ctx: *kern.PerfEventData, buf: ?*c_void, size: u32, flags: u64) c_long, 119); +pub const get_ns_current_pid_tgid = @intToPtr(fn (dev: u64, ino: u64, nsdata: *kern.PidNsInfo, size: u32) c_long, 120); +pub const xdp_output = @intToPtr(fn (ctx: ?*c_void, map: *const kern.MapDef, flags: u64, data: ?*c_void, size: u64) c_long, 121); +pub const get_netns_cookie = @intToPtr(fn (ctx: ?*c_void) u64, 122); +pub const get_current_ancestor_cgroup_id = @intToPtr(fn (ancestor_level: c_int) u64, 123); +pub const sk_assign = @intToPtr(fn (skb: *kern.SkBuff, sk: *kern.Sock, flags: u64) c_long, 124); +pub const ktime_get_boot_ns = @intToPtr(fn () u64, 125); +pub const seq_printf = @intToPtr(fn (m: *kern.SeqFile, fmt: ?*const u8, fmt_size: u32, data: ?*const c_void, data_len: u32) c_long, 126); +pub const seq_write = @intToPtr(fn (m: *kern.SeqFile, data: ?*const u8, len: u32) c_long, 127); +pub const sk_cgroup_id = @intToPtr(fn (sk: *kern.BpfSock) u64, 128); +pub const sk_ancestor_cgroup_id = @intToPtr(fn (sk: *kern.BpfSock, ancestor_level: c_long) u64, 129); +pub const ringbuf_output = @intToPtr(fn (ringbuf: ?*c_void, data: ?*c_void, size: u64, flags: u64) ?*c_void, 130); +pub const ringbuf_reserve = @intToPtr(fn (ringbuf: ?*c_void, size: u64, flags: u64) ?*c_void, 131); +pub const ringbuf_submit = @intToPtr(fn (data: ?*c_void, flags: u64) void, 132); +pub const ringbuf_discard = @intToPtr(fn (data: ?*c_void, flags: u64) void, 133); +pub const ringbuf_query = @intToPtr(fn (ringbuf: ?*c_void, flags: u64) u64, 134); +pub const csum_level = @intToPtr(fn (skb: *kern.SkBuff, level: u64) c_long, 134); +pub const skc_to_tcp6_sock = @intToPtr(fn (sk: ?*c_void) ?*kern.Tcp6Sock, 135); +pub const skc_to_tcp_sock = @intToPtr(fn (sk: ?*c_void) ?*kern.TcpSock, 136); +pub const skc_to_tcp_timewait_sock = @intToPtr(fn (sk: ?*c_void) ?*kern.TcpTimewaitSock, 137); +pub const skc_to_tcp_request_sock = @intToPtr(fn (sk: ?*c_void) ?*kern.TcpRequestSock, 138); +pub const skc_to_udp6_sock = @intToPtr(fn (sk: ?*c_void) ?*kern.Udp6Sock, 139); +pub const get_task_stack = @intToPtr(fn (task: ?*c_void, buf: ?*c_void, size: u32, flags: u64) c_long, 140); diff --git a/lib/std/os/linux/bpf/kern.zig b/lib/std/os/linux/bpf/kern.zig new file mode 100644 index 000000000..3bd605301 --- /dev/null +++ b/lib/std/os/linux/bpf/kern.zig @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +const std = @import("../../../std.zig"); + +const in_bpf_program = switch (std.builtin.arch) { + .bpfel, .bpfeb => true, + else => false, +}; + +pub const helpers = if (in_bpf_program) @import("helpers.zig") else struct {}; + +pub const BpfSock = @Type(.Opaque); +pub const BpfSockAddr = @Type(.Opaque); +pub const FibLookup = @Type(.Opaque); +pub const MapDef = @Type(.Opaque); +pub const PerfEventData = @Type(.Opaque); +pub const PerfEventValue = @Type(.Opaque); +pub const PidNsInfo = @Type(.Opaque); +pub const SeqFile = @Type(.Opaque); +pub const SkBuff = @Type(.Opaque); +pub const SkMsgMd = @Type(.Opaque); +pub const SkReusePortMd = @Type(.Opaque); +pub const Sock = @Type(.Opaque); +pub const SockAddr = @Type(.Opaque); +pub const SockOps = @Type(.Opaque); +pub const SockTuple = @Type(.Opaque); +pub const SpinLock = @Type(.Opaque); +pub const SysCtl = @Type(.Opaque); +pub const Tcp6Sock = @Type(.Opaque); +pub const TcpRequestSock = @Type(.Opaque); +pub const TcpSock = @Type(.Opaque); +pub const TcpTimewaitSock = @Type(.Opaque); +pub const TunnelKey = @Type(.Opaque); +pub const Udp6Sock = @Type(.Opaque); +pub const XdpMd = @Type(.Opaque); +pub const XfrmState = @Type(.Opaque); diff --git a/lib/std/os/linux/powerpc64.zig b/lib/std/os/linux/powerpc64.zig new file mode 100644 index 000000000..337a6aa30 --- /dev/null +++ b/lib/std/os/linux/powerpc64.zig @@ -0,0 +1,127 @@ +usingnamespace @import("../bits.zig"); + +pub fn syscall0(number: SYS) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +pub fn syscall1(number: SYS, arg1: usize) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)), + [arg1] "{r3}" (arg1) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +pub fn syscall2(number: SYS, arg1: usize, arg2: usize) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)), + [arg1] "{r3}" (arg1), + [arg2] "{r4}" (arg2) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +pub fn syscall3(number: SYS, arg1: usize, arg2: usize, arg3: usize) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)), + [arg1] "{r3}" (arg1), + [arg2] "{r4}" (arg2), + [arg3] "{r5}" (arg3) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +pub fn syscall4(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)), + [arg1] "{r3}" (arg1), + [arg2] "{r4}" (arg2), + [arg3] "{r5}" (arg3), + [arg4] "{r6}" (arg4) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +pub fn syscall5(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)), + [arg1] "{r3}" (arg1), + [arg2] "{r4}" (arg2), + [arg3] "{r5}" (arg3), + [arg4] "{r6}" (arg4), + [arg5] "{r7}" (arg5) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +pub fn syscall6( + number: SYS, + arg1: usize, + arg2: usize, + arg3: usize, + arg4: usize, + arg5: usize, + arg6: usize, +) usize { + return asm volatile ( + \\ sc + \\ bns+ 1f + \\ neg 3, 3 + \\ 1: + : [ret] "={r3}" (-> usize) + : [number] "{r0}" (@enumToInt(number)), + [arg1] "{r3}" (arg1), + [arg2] "{r4}" (arg2), + [arg3] "{r5}" (arg3), + [arg4] "{r6}" (arg4), + [arg5] "{r7}" (arg5), + [arg6] "{r8}" (arg6) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} + +/// This matches the libc clone function. +pub extern fn clone(func: fn (arg: usize) callconv(.C) u8, stack: usize, flags: usize, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize; + +pub const restore = restore_rt; + +pub fn restore_rt() callconv(.Naked) void { + return asm volatile ("sc" + : + : [number] "{r0}" (@enumToInt(SYS.rt_sigreturn)) + : "memory", "cr0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); +} diff --git a/lib/std/os/linux/tls.zig b/lib/std/os/linux/tls.zig index b10dae14d..0330a4e41 100644 --- a/lib/std/os/linux/tls.zig +++ b/lib/std/os/linux/tls.zig @@ -53,7 +53,7 @@ const TLSVariant = enum { }; const tls_variant = switch (builtin.arch) { - .arm, .armeb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel => TLSVariant.VariantI, + .arm, .armeb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => TLSVariant.VariantI, .x86_64, .i386 => TLSVariant.VariantII, else => @compileError("undefined tls_variant for this architecture"), }; @@ -77,12 +77,12 @@ const tls_tp_points_past_tcb = switch (builtin.arch) { // make the generated code more efficient const tls_tp_offset = switch (builtin.arch) { - .mips, .mipsel => 0x7000, + .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => 0x7000, else => 0, }; const tls_dtv_offset = switch (builtin.arch) { - .mips, .mipsel => 0x8000, + .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => 0x8000, .riscv32, .riscv64 => 0x800, else => 0, }; @@ -165,6 +165,13 @@ pub fn setThreadPointer(addr: usize) void { const rc = std.os.linux.syscall1(.set_thread_area, addr); assert(rc == 0); }, + .powerpc, .powerpc64, .powerpc64le => { + asm volatile ( + \\ mr 13, %[addr] + : + : [addr] "r" (addr) + ); + }, else => @compileError("Unsupported architecture"), } } diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index bd9dc8b32..de0d0ea45 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -828,7 +828,7 @@ pub fn DeleteFile(sub_path_w: []const u16, options: DeleteFileOptions) DeleteFil } } -pub const MoveFileError = error{Unexpected}; +pub const MoveFileError = error{ FileNotFound, Unexpected }; pub fn MoveFileEx(old_path: []const u8, new_path: []const u8, flags: DWORD) MoveFileError!void { const old_path_w = try sliceToPrefixedFileW(old_path); @@ -839,6 +839,7 @@ pub fn MoveFileEx(old_path: []const u8, new_path: []const u8, flags: DWORD) Move pub fn MoveFileExW(old_path: [*:0]const u16, new_path: [*:0]const u16, flags: DWORD) MoveFileError!void { if (kernel32.MoveFileExW(old_path, new_path, flags) == 0) { switch (kernel32.GetLastError()) { + .FILE_NOT_FOUND => return error.FileNotFound, else => |err| return unexpectedError(err), } } diff --git a/lib/std/priority_queue.zig b/lib/std/priority_queue.zig index 1c0d230d4..b9be9b70b 100644 --- a/lib/std/priority_queue.zig +++ b/lib/std/priority_queue.zig @@ -195,7 +195,7 @@ pub fn PriorityQueue(comptime T: type) type { count: usize, pub fn next(it: *Iterator) ?T { - if (it.count > it.queue.len - 1) return null; + if (it.count >= it.queue.len) return null; const out = it.count; it.count += 1; return it.queue.items[out]; @@ -428,3 +428,12 @@ test "std.PriorityQueue: remove at index" { expectEqual(queue.remove(), 3); expectEqual(queue.removeOrNull(), null); } + +test "std.PriorityQueue: iterator while empty" { + var queue = PQ.init(testing.allocator, lessThan); + defer queue.deinit(); + + var it = queue.iterator(); + + expectEqual(it.next(), null); +} diff --git a/lib/std/process.zig b/lib/std/process.zig index 9cb571714..2813d8cba 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -593,8 +593,10 @@ pub fn getUserInfo(name: []const u8) !UserInfo { /// TODO this reads /etc/passwd. But sometimes the user/id mapping is in something else /// like NIS, AD, etc. See `man nss` or look at an strace for `id myuser`. pub fn posixGetUserInfo(name: []const u8) !UserInfo { - var reader = try io.Reader.open("/etc/passwd", null); - defer reader.close(); + const file = try std.fs.openFileAbsolute("/etc/passwd", .{}); + defer file.close(); + + const reader = file.reader(); const State = enum { Start, @@ -650,8 +652,8 @@ pub fn posixGetUserInfo(name: []const u8) !UserInfo { '0'...'9' => byte - '0', else => return error.CorruptPasswordFile, }; - if (@mulWithOverflow(u32, uid, 10, *uid)) return error.CorruptPasswordFile; - if (@addWithOverflow(u32, uid, digit, *uid)) return error.CorruptPasswordFile; + if (@mulWithOverflow(u32, uid, 10, &uid)) return error.CorruptPasswordFile; + if (@addWithOverflow(u32, uid, digit, &uid)) return error.CorruptPasswordFile; }, }, .ReadGroupId => switch (byte) { @@ -666,8 +668,8 @@ pub fn posixGetUserInfo(name: []const u8) !UserInfo { '0'...'9' => byte - '0', else => return error.CorruptPasswordFile, }; - if (@mulWithOverflow(u32, gid, 10, *gid)) return error.CorruptPasswordFile; - if (@addWithOverflow(u32, gid, digit, *gid)) return error.CorruptPasswordFile; + if (@mulWithOverflow(u32, gid, 10, &gid)) return error.CorruptPasswordFile; + if (@addWithOverflow(u32, gid, digit, &gid)) return error.CorruptPasswordFile; }, }, } diff --git a/lib/std/special/c.zig b/lib/std/special/c.zig index ce8d1c29c..53f7e1738 100644 --- a/lib/std/special/c.zig +++ b/lib/std/special/c.zig @@ -394,6 +394,61 @@ fn clone() callconv(.Naked) void { \\ syscall ); }, + + .powerpc64, .powerpc64le => { + asm volatile ( + \\ # store non-volatile regs r30, r31 on stack in order to put our + \\ # start func and its arg there + \\ stwu 30, -16(1) + \\ stw 31, 4(1) + \\ # save r3 (func) into r30, and r6(arg) into r31 + \\ mr 30, 3 + \\ mr 31, 6 + \\ # create initial stack frame for new thread + \\ clrrwi 4, 4, 4 + \\ li 0, 0 + \\ stwu 0, -16(4) + \\ #move c into first arg + \\ mr 3, 5 + \\ mr 5, 7 + \\ mr 6, 8 + \\ mr 7, 9 + \\ # move syscall number into r0 + \\ li 0, 120 + \\ sc + + \\ # check for syscall error + \\ bns+ 1f # jump to label 1 if no summary overflow. + \\ #else + \\ neg 3, 3 #negate the result (errno) + \\1: + \\ # compare sc result with 0 + \\ cmpwi cr7, 3, 0 + + \\ # if not 0, jump to end + \\ bne cr7, 2f + + \\ #else: we're the child + \\ #call funcptr: move arg (d) into r3 + \\ mr 3, 31 + \\ #move r30 (funcptr) into CTR reg + \\ mtctr 30 + \\ # call CTR reg + \\ bctrl + \\ # mov SYS_exit into r0 (the exit param is already in r3) + \\ li 0, 1 + \\ sc + + \\2: + \\ # restore stack + \\ lwz 30, 0(1) + \\ lwz 31, 4(1) + \\ addi 1, 1, 16 + + \\ blr + ); + }, + else => @compileError("Implement clone() for this arch."), } } diff --git a/lib/std/start.zig b/lib/std/start.zig index c65cd0898..aea31a153 100644 --- a/lib/std/start.zig +++ b/lib/std/start.zig @@ -121,6 +121,21 @@ fn _start() callconv(.Naked) noreturn { : [argc] "=r" (-> [*]usize) ); }, + .powerpc64le => { + // Before returning the stack pointer, we have to set up a backchain + // and a few other registers required by the ELFv2 ABI. + // TODO: Support powerpc64 (big endian) on ELFv2. + starting_stack_ptr = asm ( + \\ mr 4, 1 + \\ subi 1, 1, 32 + \\ li 5, 0 + \\ std 5, 0(1) + \\ mr %[argc], 4 + : [argc] "=r" (-> [*]usize) + : + : "r4", "r5" + ); + }, else => @compileError("unsupported arch"), } // If LLVM inlines stack variables into _start, they will overwrite diff --git a/lib/std/std.zig b/lib/std/std.zig index 330f3c253..4236b2929 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -50,6 +50,7 @@ pub const builtin = @import("builtin.zig"); pub const c = @import("c.zig"); pub const cache_hash = @import("cache_hash.zig"); pub const coff = @import("coff.zig"); +pub const compress = @import("compress.zig"); pub const crypto = @import("crypto.zig"); pub const cstr = @import("cstr.zig"); pub const debug = @import("debug.zig"); diff --git a/src-self-hosted/libc_installation.zig b/src-self-hosted/libc_installation.zig index 65c6c8c16..fa2ef30cc 100644 --- a/src-self-hosted/libc_installation.zig +++ b/src-self-hosted/libc_installation.zig @@ -9,6 +9,8 @@ const is_darwin = Target.current.isDarwin(); const is_windows = Target.current.os.tag == .windows; const is_gnu = Target.current.isGnu(); +const log = std.log.scoped(.libc_installation); + usingnamespace @import("windows_sdk.zig"); /// See the render function implementation for documentation of the fields. @@ -37,7 +39,6 @@ pub const LibCInstallation = struct { pub fn parse( allocator: *Allocator, libc_file: []const u8, - stderr: anytype, ) !LibCInstallation { var self: LibCInstallation = .{}; @@ -62,7 +63,7 @@ pub const LibCInstallation = struct { if (line.len == 0 or line[0] == '#') continue; var line_it = std.mem.split(line, "="); const name = line_it.next() orelse { - try stderr.print("missing equal sign after field name\n", .{}); + log.err("missing equal sign after field name\n", .{}); return error.ParseError; }; const value = line_it.rest(); @@ -81,31 +82,31 @@ pub const LibCInstallation = struct { } inline for (fields) |field, i| { if (!found_keys[i].found) { - try stderr.print("missing field: {}\n", .{field.name}); + log.err("missing field: {}\n", .{field.name}); return error.ParseError; } } if (self.include_dir == null) { - try stderr.print("include_dir may not be empty\n", .{}); + log.err("include_dir may not be empty\n", .{}); return error.ParseError; } if (self.sys_include_dir == null) { - try stderr.print("sys_include_dir may not be empty\n", .{}); + log.err("sys_include_dir may not be empty\n", .{}); return error.ParseError; } if (self.crt_dir == null and !is_darwin) { - try stderr.print("crt_dir may not be empty for {}\n", .{@tagName(Target.current.os.tag)}); + log.err("crt_dir may not be empty for {}\n", .{@tagName(Target.current.os.tag)}); return error.ParseError; } if (self.msvc_lib_dir == null and is_windows and !is_gnu) { - try stderr.print("msvc_lib_dir may not be empty for {}-{}\n", .{ + log.err("msvc_lib_dir may not be empty for {}-{}\n", .{ @tagName(Target.current.os.tag), @tagName(Target.current.abi), }); return error.ParseError; } if (self.kernel32_lib_dir == null and is_windows and !is_gnu) { - try stderr.print("kernel32_lib_dir may not be empty for {}-{}\n", .{ + log.err("kernel32_lib_dir may not be empty for {}-{}\n", .{ @tagName(Target.current.os.tag), @tagName(Target.current.abi), }); diff --git a/src-self-hosted/link/MachO.zig b/src-self-hosted/link/MachO.zig index 27d0488f2..13932e514 100644 --- a/src-self-hosted/link/MachO.zig +++ b/src-self-hosted/link/MachO.zig @@ -32,6 +32,20 @@ const LoadCommand = union(enum) { .Dysymtab => |x| x.cmdsize, }; } + + pub fn write(self: LoadCommand, file: *fs.File, offset: u64) !void { + return switch (self) { + .Segment => |cmd| writeGeneric(cmd, file, offset), + .LinkeditData => |cmd| writeGeneric(cmd, file, offset), + .Symtab => |cmd| writeGeneric(cmd, file, offset), + .Dysymtab => |cmd| writeGeneric(cmd, file, offset), + }; + } + + fn writeGeneric(cmd: anytype, file: *fs.File, offset: u64) !void { + const slice = [1]@TypeOf(cmd){cmd}; + return file.pwriteAll(mem.sliceAsBytes(slice[0..1]), offset); + } }; base: File, @@ -258,8 +272,7 @@ pub fn flush(self: *MachO, module: *Module) !void { var last_cmd_offset: usize = @sizeOf(macho.mach_header_64); for (self.load_commands.items) |cmd| { - const cmd_to_write = [1]@TypeOf(cmd){cmd}; - try self.base.file.?.pwriteAll(mem.sliceAsBytes(cmd_to_write[0..1]), last_cmd_offset); + try cmd.write(&self.base.file.?, last_cmd_offset); last_cmd_offset += cmd.cmdsize(); } const off = @sizeOf(macho.mach_header_64) + @sizeOf(macho.segment_command_64); @@ -346,19 +359,18 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .n_desc = 0, .n_value = addr, }; - self.offset_table.items[decl.link.macho.offset_table_index.?] = addr; + // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. + const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; + try self.updateDeclExports(module, decl, decl_exports); try self.writeSymbol(decl.link.macho.symbol_table_index.?); const text_section = self.sections.items[self.text_section_index.?]; const section_offset = symbol.n_value - text_section.addr; const file_offset = text_section.offset + section_offset; log.debug("file_offset 0x{x}\n", .{file_offset}); - try self.base.file.?.pwriteAll(code, file_offset); - // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. - const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; - return self.updateDeclExports(module, decl, decl_exports); + try self.base.file.?.pwriteAll(code, file_offset); } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {} @@ -374,7 +386,7 @@ pub fn updateDeclExports( if (decl.link.macho.symbol_table_index == null) return; - var decl_sym = self.symbol_table.items[decl.link.macho.symbol_table_index.?]; + const decl_sym = &self.symbol_table.items[decl.link.macho.symbol_table_index.?]; // TODO implement if (exports.len == 0) return; @@ -488,9 +500,8 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const addr = blk: { if (self.last_text_block) |last| { const last_symbol = self.symbol_table.items[last.symbol_table_index.?]; - const ideal_capacity = last.size * alloc_num / alloc_den; - const ideal_capacity_end_addr = last_symbol.n_value + ideal_capacity; - const new_start_addr = mem.alignForwardGeneric(u64, ideal_capacity_end_addr, alignment); + const end_addr = last_symbol.n_value + last.size; + const new_start_addr = mem.alignForwardGeneric(u64, end_addr, alignment); block_placement = last; break :blk new_start_addr; } else { @@ -504,10 +515,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const text_capacity = self.allocatedSize(text_section.offset); const needed_size = (addr + new_block_size) - text_section.addr; log.debug("text capacity 0x{x}, needed size 0x{x}\n", .{ text_capacity, needed_size }); - - if (needed_size > text_capacity) { - // TODO handle growth - } + assert(needed_size <= text_capacity); // TODO handle growth self.last_text_block = text_block; text_section.size = needed_size; @@ -659,7 +667,7 @@ fn writeSymbol(self: *MachO, index: usize) !void { defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - var sym = [1]macho.nlist_64{self.symbol_table.items[index]}; + const sym = [1]macho.nlist_64{self.symbol_table.items[index]}; const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; log.debug("writing symbol {} at 0x{x}\n", .{ sym[0], off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); diff --git a/src-self-hosted/stage2.zig b/src-self-hosted/stage2.zig index 45b8ad307..ac4d89bb2 100644 --- a/src-self-hosted/stage2.zig +++ b/src-self-hosted/stage2.zig @@ -598,12 +598,9 @@ const Stage2LibCInstallation = extern struct { // ABI warning export fn stage2_libc_parse(stage1_libc: *Stage2LibCInstallation, libc_file_z: [*:0]const u8) Error { - stderr_file = std.io.getStdErr(); - stderr = stderr_file.outStream(); const libc_file = mem.spanZ(libc_file_z); - var libc = LibCInstallation.parse(std.heap.c_allocator, libc_file, stderr) catch |err| switch (err) { + var libc = LibCInstallation.parse(std.heap.c_allocator, libc_file) catch |err| switch (err) { error.ParseError => return .SemanticAnalyzeFail, - error.DiskQuota => return .DiskQuota, error.FileTooBig => return .FileTooBig, error.InputOutput => return .FileSystem, error.NoSpaceLeft => return .NoSpaceLeft, @@ -612,7 +609,6 @@ export fn stage2_libc_parse(stage1_libc: *Stage2LibCInstallation, libc_file_z: [ error.SystemResources => return .SystemResources, error.OperationAborted => return .OperationAborted, error.WouldBlock => unreachable, - error.NotOpenForWriting => unreachable, error.NotOpenForReading => unreachable, error.Unexpected => return .Unexpected, error.IsDir => return .IsDir, diff --git a/src-self-hosted/translate_c.zig b/src-self-hosted/translate_c.zig index 68d1dabb0..a5619d56f 100644 --- a/src-self-hosted/translate_c.zig +++ b/src-self-hosted/translate_c.zig @@ -2032,7 +2032,7 @@ fn escapeChar(c: u8, char_buf: *[4]u8) []const u8 { // Handle the remaining escapes Zig doesn't support by turning them // into their respective hex representation else => if (std.ascii.isCntrl(c)) - std.fmt.bufPrint(char_buf, "\\x{x:0<2}", .{c}) catch unreachable + std.fmt.bufPrint(char_buf, "\\x{x:0>2}", .{c}) catch unreachable else std.fmt.bufPrint(char_buf, "{c}", .{c}) catch unreachable, }; @@ -5881,7 +5881,7 @@ fn parseCPrimaryExpr(c: *Context, m: *MacroCtx, scope: *Scope) ParseError!*ast.N }, .Identifier => { const mangled_name = scope.getAlias(slice); - return transCreateNodeIdentifier(c, mangled_name); + return transCreateNodeIdentifier(c, checkForBuiltinTypedef(mangled_name) orelse mangled_name); }, .LParen => { const inner_node = try parseCExpr(c, m, scope); @@ -5899,6 +5899,10 @@ fn parseCPrimaryExpr(c: *Context, m: *MacroCtx, scope: *Scope) ParseError!*ast.N saw_l_paren = true; _ = m.next(); }, + // (type)sizeof(x) + .Keyword_sizeof, + // (type)alignof(x) + .Keyword_alignof, // (type)identifier .Identifier => {}, // (type)integer @@ -6309,6 +6313,48 @@ fn parseCPrefixOpExpr(c: *Context, m: *MacroCtx, scope: *Scope) ParseError!*ast. node.rhs = try parseCPrefixOpExpr(c, m, scope); return &node.base; }, + .Keyword_sizeof => { + const inner = if (m.peek().? == .LParen) blk: { + _ = m.next(); + const inner = try parseCExpr(c, m, scope); + if (m.next().? != .RParen) { + try m.fail(c, "unable to translate C expr: expected ')'", .{}); + return error.ParseError; + } + break :blk inner; + } else try parseCPrefixOpExpr(c, m, scope); + + //(@import("std").meta.sizeof(dest, x)) + const import_fn_call = try c.createBuiltinCall("@import", 1); + const std_node = try transCreateNodeStringLiteral(c, "\"std\""); + import_fn_call.params()[0] = std_node; + import_fn_call.rparen_token = try appendToken(c, .RParen, ")"); + const inner_field_access = try transCreateNodeFieldAccess(c, &import_fn_call.base, "meta"); + const outer_field_access = try transCreateNodeFieldAccess(c, inner_field_access, "sizeof"); + + const sizeof_call = try c.createCall(outer_field_access, 1); + sizeof_call.params()[0] = inner; + sizeof_call.rtoken = try appendToken(c, .RParen, ")"); + return &sizeof_call.base; + }, + .Keyword_alignof => { + // TODO this won't work if using 's + // #define alignof _Alignof + if (m.next().? != .LParen) { + try m.fail(c, "unable to translate C expr: expected '('", .{}); + return error.ParseError; + } + const inner = try parseCExpr(c, m, scope); + if (m.next().? != .RParen) { + try m.fail(c, "unable to translate C expr: expected ')'", .{}); + return error.ParseError; + } + + const builtin_call = try c.createBuiltinCall("@alignOf", 1); + builtin_call.params()[0] = inner; + builtin_call.rparen_token = try appendToken(c, .RParen, ")"); + return &builtin_call.base; + }, else => { m.i -= 1; return try parseCSuffixOpExpr(c, m, scope); diff --git a/src/all_types.hpp b/src/all_types.hpp index 3fbcc8958..1fa04f2b7 100644 --- a/src/all_types.hpp +++ b/src/all_types.hpp @@ -2265,6 +2265,7 @@ struct CodeGen { Stage2LibCInstallation *libc; + bool is_versioned; size_t version_major; size_t version_minor; size_t version_patch; diff --git a/src/analyze.cpp b/src/analyze.cpp index b1d362f6e..3ba4fd792 100644 --- a/src/analyze.cpp +++ b/src/analyze.cpp @@ -1003,7 +1003,8 @@ bool want_first_arg_sret(CodeGen *g, FnTypeId *fn_type_id) { g->zig_target->arch == ZigLLVM_x86_64 || target_is_arm(g->zig_target) || target_is_riscv(g->zig_target) || - target_is_wasm(g->zig_target)) + target_is_wasm(g->zig_target) || + target_is_ppc(g->zig_target)) { X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type); return abi_class == X64CABIClass_MEMORY || abi_class == X64CABIClass_MEMORY_nobyval; @@ -2372,7 +2373,10 @@ static Error resolve_union_alignment(CodeGen *g, ZigType *union_type) { if (field->gen_index == UINT32_MAX) continue; - AstNode *align_expr = field->decl_node->data.struct_field.align_expr; + AstNode *align_expr = nullptr; + if (union_type->data.unionation.decl_node->type == NodeTypeContainerDecl) { + align_expr = field->decl_node->data.struct_field.align_expr; + } if (align_expr != nullptr) { if (!analyze_const_align(g, &union_type->data.unionation.decls_scope->base, align_expr, &field->align)) @@ -2468,9 +2472,6 @@ static Error resolve_union_type(CodeGen *g, ZigType *union_type) { AstNode *decl_node = union_type->data.unionation.decl_node; - - assert(decl_node->type == NodeTypeContainerDecl); - uint32_t field_count = union_type->data.unionation.src_field_count; TypeUnionField *most_aligned_union_member = union_type->data.unionation.most_aligned_union_member; @@ -2603,16 +2604,16 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { if (decl_node->type == NodeTypeContainerDecl) { assert(!enum_type->data.enumeration.fields); field_count = (uint32_t)decl_node->data.container_decl.fields.length; - if (field_count == 0) { - add_node_error(g, decl_node, buf_sprintf("enums must have 1 or more fields")); - - enum_type->data.enumeration.src_field_count = field_count; - enum_type->data.enumeration.fields = nullptr; - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - return ErrorSemanticAnalyzeFail; - } } else { - field_count = enum_type->data.enumeration.src_field_count; + field_count = enum_type->data.enumeration.src_field_count + enum_type->data.enumeration.non_exhaustive; + } + + if (field_count == 0) { + add_node_error(g, decl_node, buf_sprintf("enums must have 1 or more fields")); + enum_type->data.enumeration.src_field_count = field_count; + enum_type->data.enumeration.fields = nullptr; + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + return ErrorSemanticAnalyzeFail; } Scope *scope = &enum_type->data.enumeration.decls_scope->base; @@ -3055,7 +3056,6 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { return ErrorNone; AstNode *decl_node = union_type->data.unionation.decl_node; - assert(decl_node->type == NodeTypeContainerDecl); if (union_type->data.unionation.resolve_loop_flag_zero_bits) { if (union_type->data.unionation.resolve_status != ResolveStatusInvalid) { @@ -3069,30 +3069,51 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { union_type->data.unionation.resolve_loop_flag_zero_bits = true; - assert(union_type->data.unionation.fields == nullptr); - uint32_t field_count = (uint32_t)decl_node->data.container_decl.fields.length; + uint32_t field_count; + if (decl_node->type == NodeTypeContainerDecl) { + assert(union_type->data.unionation.fields == nullptr); + field_count = (uint32_t)decl_node->data.container_decl.fields.length; + union_type->data.unionation.src_field_count = field_count; + union_type->data.unionation.fields = heap::c_allocator.allocate(field_count); + union_type->data.unionation.fields_by_name.init(field_count); + } else { + field_count = union_type->data.unionation.src_field_count; + assert(field_count == 0 || union_type->data.unionation.fields != nullptr); + } + if (field_count == 0) { add_node_error(g, decl_node, buf_sprintf("unions must have 1 or more fields")); union_type->data.unionation.src_field_count = field_count; union_type->data.unionation.resolve_status = ResolveStatusInvalid; return ErrorSemanticAnalyzeFail; } - union_type->data.unionation.src_field_count = field_count; - union_type->data.unionation.fields = heap::c_allocator.allocate(field_count); - union_type->data.unionation.fields_by_name.init(field_count); Scope *scope = &union_type->data.unionation.decls_scope->base; HashMap occupied_tag_values = {}; - AstNode *enum_type_node = decl_node->data.container_decl.init_arg_expr; - union_type->data.unionation.have_explicit_tag_type = decl_node->data.container_decl.auto_enum || - enum_type_node != nullptr; - bool auto_layout = (union_type->data.unionation.layout == ContainerLayoutAuto); - bool want_safety = (field_count >= 2) && (auto_layout || enum_type_node != nullptr) && !(g->build_mode == BuildModeFastRelease || g->build_mode == BuildModeSmallRelease); + bool is_auto_enum; // union(enum) or union(enum(expr)) + bool is_explicit_enum; // union(expr) + AstNode *enum_type_node; // expr in union(enum(expr)) or union(expr) + if (decl_node->type == NodeTypeContainerDecl) { + is_auto_enum = decl_node->data.container_decl.auto_enum; + is_explicit_enum = decl_node->data.container_decl.init_arg_expr != nullptr; + enum_type_node = decl_node->data.container_decl.init_arg_expr; + } else { + is_auto_enum = false; + is_explicit_enum = union_type->data.unionation.tag_type != nullptr; + enum_type_node = nullptr; + } + union_type->data.unionation.have_explicit_tag_type = is_auto_enum || is_explicit_enum; + + bool is_auto_layout = union_type->data.unionation.layout == ContainerLayoutAuto; + bool want_safety = (field_count >= 2) + && (is_auto_layout || is_explicit_enum) + && !(g->build_mode == BuildModeFastRelease || g->build_mode == BuildModeSmallRelease); ZigType *tag_type; - bool create_enum_type = decl_node->data.container_decl.auto_enum || (enum_type_node == nullptr && want_safety); + bool create_enum_type = is_auto_enum || (!is_explicit_enum && want_safety); bool *covered_enum_fields; + bool *is_zero_bits = heap::c_allocator.allocate(field_count); ZigLLVMDIEnumerator **di_enumerators; if (create_enum_type) { occupied_tag_values.init(field_count); @@ -3134,87 +3155,96 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { tag_type->data.enumeration.fields_by_name.init(field_count); tag_type->data.enumeration.decls_scope = union_type->data.unionation.decls_scope; } else if (enum_type_node != nullptr) { - ZigType *enum_type = analyze_type_expr(g, scope, enum_type_node); - if (type_is_invalid(enum_type)) { + tag_type = analyze_type_expr(g, scope, enum_type_node); + } else { + if (decl_node->type == NodeTypeContainerDecl) { + tag_type = nullptr; + } else { + tag_type = union_type->data.unionation.tag_type; + } + } + if (tag_type != nullptr) { + if (type_is_invalid(tag_type)) { union_type->data.unionation.resolve_status = ResolveStatusInvalid; return ErrorSemanticAnalyzeFail; } - if (enum_type->id != ZigTypeIdEnum) { + if (tag_type->id != ZigTypeIdEnum) { union_type->data.unionation.resolve_status = ResolveStatusInvalid; - add_node_error(g, enum_type_node, - buf_sprintf("expected enum tag type, found '%s'", buf_ptr(&enum_type->name))); + add_node_error(g, enum_type_node != nullptr ? enum_type_node : decl_node, + buf_sprintf("expected enum tag type, found '%s'", buf_ptr(&tag_type->name))); return ErrorSemanticAnalyzeFail; } - if ((err = type_resolve(g, enum_type, ResolveStatusAlignmentKnown))) { + if ((err = type_resolve(g, tag_type, ResolveStatusAlignmentKnown))) { assert(g->errors.length != 0); return err; } - tag_type = enum_type; - covered_enum_fields = heap::c_allocator.allocate(enum_type->data.enumeration.src_field_count); - } else { - tag_type = nullptr; + covered_enum_fields = heap::c_allocator.allocate(tag_type->data.enumeration.src_field_count); } union_type->data.unionation.tag_type = tag_type; - uint32_t gen_field_index = 0; for (uint32_t i = 0; i < field_count; i += 1) { - AstNode *field_node = decl_node->data.container_decl.fields.at(i); - Buf *field_name = field_node->data.struct_field.name; TypeUnionField *union_field = &union_type->data.unionation.fields[i]; - union_field->name = field_node->data.struct_field.name; - union_field->decl_node = field_node; - union_field->gen_index = UINT32_MAX; + if (decl_node->type == NodeTypeContainerDecl) { + AstNode *field_node = decl_node->data.container_decl.fields.at(i); + union_field->name = field_node->data.struct_field.name; + union_field->decl_node = field_node; + union_field->gen_index = UINT32_MAX; + is_zero_bits[i] = false; - auto field_entry = union_type->data.unionation.fields_by_name.put_unique(union_field->name, union_field); - if (field_entry != nullptr) { - ErrorMsg *msg = add_node_error(g, field_node, - buf_sprintf("duplicate union field: '%s'", buf_ptr(union_field->name))); - add_error_note(g, msg, field_entry->value->decl_node, buf_sprintf("other field here")); - union_type->data.unionation.resolve_status = ResolveStatusInvalid; - return ErrorSemanticAnalyzeFail; + auto field_entry = union_type->data.unionation.fields_by_name.put_unique(union_field->name, union_field); + if (field_entry != nullptr) { + ErrorMsg *msg = add_node_error(g, union_field->decl_node, + buf_sprintf("duplicate union field: '%s'", buf_ptr(union_field->name))); + add_error_note(g, msg, field_entry->value->decl_node, buf_sprintf("other field here")); + union_type->data.unionation.resolve_status = ResolveStatusInvalid; + return ErrorSemanticAnalyzeFail; + } + + if (field_node->data.struct_field.type == nullptr) { + if (is_auto_enum || is_explicit_enum) { + union_field->type_entry = g->builtin_types.entry_void; + is_zero_bits[i] = true; + } else { + add_node_error(g, field_node, buf_sprintf("union field missing type")); + union_type->data.unionation.resolve_status = ResolveStatusInvalid; + return ErrorSemanticAnalyzeFail; + } + } else { + ZigValue *field_type_val = analyze_const_value(g, scope, + field_node->data.struct_field.type, g->builtin_types.entry_type, nullptr, LazyOkNoUndef); + if (type_is_invalid(field_type_val->type)) { + union_type->data.unionation.resolve_status = ResolveStatusInvalid; + return ErrorSemanticAnalyzeFail; + } + assert(field_type_val->special != ConstValSpecialRuntime); + union_field->type_val = field_type_val; + } + + if (field_node->data.struct_field.value != nullptr && !is_auto_enum) { + ErrorMsg *msg = add_node_error(g, field_node->data.struct_field.value, + buf_create_from_str("untagged union field assignment")); + add_error_note(g, msg, decl_node, buf_create_from_str("consider 'union(enum)' here")); + } } - bool field_is_zero_bits; - if (field_node->data.struct_field.type == nullptr) { - if (decl_node->data.container_decl.auto_enum || - decl_node->data.container_decl.init_arg_expr != nullptr) - { - union_field->type_entry = g->builtin_types.entry_void; - field_is_zero_bits = true; - } else { - add_node_error(g, field_node, buf_sprintf("union field missing type")); - union_type->data.unionation.resolve_status = ResolveStatusInvalid; - return ErrorSemanticAnalyzeFail; - } - } else { - ZigValue *field_type_val = analyze_const_value(g, scope, - field_node->data.struct_field.type, g->builtin_types.entry_type, nullptr, LazyOkNoUndef); - if (type_is_invalid(field_type_val->type)) { - union_type->data.unionation.resolve_status = ResolveStatusInvalid; - return ErrorSemanticAnalyzeFail; - } - assert(field_type_val->special != ConstValSpecialRuntime); - union_field->type_val = field_type_val; - if (union_type->data.unionation.resolve_status == ResolveStatusInvalid) - return ErrorSemanticAnalyzeFail; - + if (union_field->type_val != nullptr) { bool field_is_opaque_type; - if ((err = type_val_resolve_is_opaque_type(g, field_type_val, &field_is_opaque_type))) { + if ((err = type_val_resolve_is_opaque_type(g, union_field->type_val, &field_is_opaque_type))) { union_type->data.unionation.resolve_status = ResolveStatusInvalid; return ErrorSemanticAnalyzeFail; } if (field_is_opaque_type) { - add_node_error(g, field_node, + add_node_error(g, union_field->decl_node, buf_create_from_str( "opaque types have unknown size and therefore cannot be directly embedded in unions")); union_type->data.unionation.resolve_status = ResolveStatusInvalid; return ErrorSemanticAnalyzeFail; } - switch (type_val_resolve_requires_comptime(g, field_type_val)) { + switch (type_val_resolve_requires_comptime(g, union_field->type_val)) { case ReqCompTimeInvalid: if (g->trace_err != nullptr) { - g->trace_err = add_error_note(g, g->trace_err, field_node, + g->trace_err = add_error_note(g, g->trace_err, union_field->decl_node, buf_create_from_str("while checking this field")); } union_type->data.unionation.resolve_status = ResolveStatusInvalid; @@ -3226,29 +3256,25 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { break; } - if ((err = type_val_resolve_zero_bits(g, field_type_val, union_type, nullptr, &field_is_zero_bits))) { + if ((err = type_val_resolve_zero_bits(g, union_field->type_val, union_type, nullptr, &is_zero_bits[i]))) { union_type->data.unionation.resolve_status = ResolveStatusInvalid; return ErrorSemanticAnalyzeFail; } } - if (field_node->data.struct_field.value != nullptr && !decl_node->data.container_decl.auto_enum) { - ErrorMsg *msg = add_node_error(g, field_node->data.struct_field.value, - buf_create_from_str("untagged union field assignment")); - add_error_note(g, msg, decl_node, buf_create_from_str("consider 'union(enum)' here")); - } - if (create_enum_type) { - di_enumerators[i] = ZigLLVMCreateDebugEnumerator(g->dbuilder, buf_ptr(field_name), i); + di_enumerators[i] = ZigLLVMCreateDebugEnumerator(g->dbuilder, buf_ptr(union_field->name), i); union_field->enum_field = &tag_type->data.enumeration.fields[i]; - union_field->enum_field->name = field_name; + union_field->enum_field->name = union_field->name; union_field->enum_field->decl_index = i; - union_field->enum_field->decl_node = field_node; + union_field->enum_field->decl_node = union_field->decl_node; auto prev_entry = tag_type->data.enumeration.fields_by_name.put_unique(union_field->enum_field->name, union_field->enum_field); assert(prev_entry == nullptr); // caught by union de-duplicator above - AstNode *tag_value = field_node->data.struct_field.value; + AstNode *tag_value = decl_node->type == NodeTypeContainerDecl + ? union_field->decl_node->data.struct_field.value : nullptr; + // In this first pass we resolve explicit tag values. // In a second pass we will fill in the unspecified ones. if (tag_value != nullptr) { @@ -3276,11 +3302,11 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { return ErrorSemanticAnalyzeFail; } } - } else if (enum_type_node != nullptr) { - union_field->enum_field = find_enum_type_field(tag_type, field_name); + } else if (tag_type != nullptr) { + union_field->enum_field = find_enum_type_field(tag_type, union_field->name); if (union_field->enum_field == nullptr) { - ErrorMsg *msg = add_node_error(g, field_node, - buf_sprintf("enum field not found: '%s'", buf_ptr(field_name))); + ErrorMsg *msg = add_node_error(g, union_field->decl_node, + buf_sprintf("enum field not found: '%s'", buf_ptr(union_field->name))); add_error_note(g, msg, tag_type->data.enumeration.decl_node, buf_sprintf("enum declared here")); union_type->data.unionation.resolve_status = ResolveStatusInvalid; @@ -3289,21 +3315,23 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { covered_enum_fields[union_field->enum_field->decl_index] = true; } else { union_field->enum_field = heap::c_allocator.create(); - union_field->enum_field->name = field_name; + union_field->enum_field->name = union_field->name; union_field->enum_field->decl_index = i; bigint_init_unsigned(&union_field->enum_field->value, i); } assert(union_field->enum_field != nullptr); - - if (field_is_zero_bits) - continue; - - union_field->gen_index = gen_field_index; - gen_field_index += 1; } - bool src_have_tag = decl_node->data.container_decl.auto_enum || - decl_node->data.container_decl.init_arg_expr != nullptr; + uint32_t gen_field_index = 0; + for (uint32_t i = 0; i < field_count; i += 1) { + TypeUnionField *union_field = &union_type->data.unionation.fields[i]; + if (!is_zero_bits[i]) { + union_field->gen_index = gen_field_index; + gen_field_index += 1; + } + } + + bool src_have_tag = is_auto_enum || is_explicit_enum; if (src_have_tag && union_type->data.unionation.layout != ContainerLayoutAuto) { const char *qual_str; @@ -3317,8 +3345,7 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { qual_str = "extern"; break; } - AstNode *source_node = (decl_node->data.container_decl.init_arg_expr != nullptr) ? - decl_node->data.container_decl.init_arg_expr : decl_node; + AstNode *source_node = enum_type_node != nullptr ? enum_type_node : decl_node; add_node_error(g, source_node, buf_sprintf("%s union does not support enum tag type", qual_str)); union_type->data.unionation.resolve_status = ResolveStatusInvalid; @@ -3326,43 +3353,47 @@ static Error resolve_union_zero_bits(CodeGen *g, ZigType *union_type) { } if (create_enum_type) { - // Now iterate again and populate the unspecified tag values - uint32_t next_maybe_unoccupied_index = 0; + if (decl_node->type == NodeTypeContainerDecl) { + // Now iterate again and populate the unspecified tag values + uint32_t next_maybe_unoccupied_index = 0; - for (uint32_t field_i = 0; field_i < field_count; field_i += 1) { - AstNode *field_node = decl_node->data.container_decl.fields.at(field_i); - TypeUnionField *union_field = &union_type->data.unionation.fields[field_i]; - AstNode *tag_value = field_node->data.struct_field.value; + for (uint32_t field_i = 0; field_i < field_count; field_i += 1) { + AstNode *field_node = decl_node->data.container_decl.fields.at(field_i); + TypeUnionField *union_field = &union_type->data.unionation.fields[field_i]; + AstNode *tag_value = field_node->data.struct_field.value; - if (tag_value == nullptr) { - if (occupied_tag_values.size() == 0) { - bigint_init_unsigned(&union_field->enum_field->value, next_maybe_unoccupied_index); - next_maybe_unoccupied_index += 1; - } else { - BigInt proposed_value; - for (;;) { - bigint_init_unsigned(&proposed_value, next_maybe_unoccupied_index); + if (tag_value == nullptr) { + if (occupied_tag_values.size() == 0) { + bigint_init_unsigned(&union_field->enum_field->value, next_maybe_unoccupied_index); next_maybe_unoccupied_index += 1; - auto entry = occupied_tag_values.put_unique(proposed_value, field_node); - if (entry != nullptr) { - continue; + } else { + BigInt proposed_value; + for (;;) { + bigint_init_unsigned(&proposed_value, next_maybe_unoccupied_index); + next_maybe_unoccupied_index += 1; + auto entry = occupied_tag_values.put_unique(proposed_value, field_node); + if (entry != nullptr) { + continue; + } + break; } - break; + bigint_init_bigint(&union_field->enum_field->value, &proposed_value); } - bigint_init_bigint(&union_field->enum_field->value, &proposed_value); } } } - } else if (enum_type_node != nullptr) { + } else if (tag_type != nullptr) { for (uint32_t i = 0; i < tag_type->data.enumeration.src_field_count; i += 1) { TypeEnumField *enum_field = &tag_type->data.enumeration.fields[i]; if (!covered_enum_fields[i]) { - AstNode *enum_decl_node = tag_type->data.enumeration.decl_node; - AstNode *field_node = enum_decl_node->data.container_decl.fields.at(i); ErrorMsg *msg = add_node_error(g, decl_node, buf_sprintf("enum field missing: '%s'", buf_ptr(enum_field->name))); - add_error_note(g, msg, field_node, - buf_sprintf("declared here")); + if (decl_node->type == NodeTypeContainerDecl) { + AstNode *enum_decl_node = tag_type->data.enumeration.decl_node; + AstNode *field_node = enum_decl_node->data.container_decl.fields.at(i); + add_error_note(g, msg, field_node, + buf_sprintf("declared here")); + } union_type->data.unionation.resolve_status = ResolveStatusInvalid; } } @@ -8350,7 +8381,7 @@ static void resolve_llvm_types_struct(CodeGen *g, ZigType *struct_type, ResolveS ZigLLVMDIFile *di_file; ZigLLVMDIScope *di_scope; unsigned line; - if (decl_node != nullptr && !struct_type->data.structure.created_by_at_type) { + if (decl_node != nullptr) { Scope *scope = &struct_type->data.structure.decls_scope->base; ZigType *import = get_scope_import(scope); di_file = import->data.structure.root_struct->di_file; @@ -8713,7 +8744,7 @@ static void resolve_llvm_types_union(CodeGen *g, ZigType *union_type, ResolveSta uint64_t store_size_in_bits = union_field->type_entry->size_in_bits; uint64_t abi_align_in_bits = 8*union_field->type_entry->abi_align; - AstNode *field_node = decl_node->data.container_decl.fields.at(i); + AstNode *field_node = union_field->decl_node; union_inner_di_types[union_field->gen_index] = ZigLLVMCreateDebugMemberType(g->dbuilder, ZigLLVMTypeToScope(union_type->llvm_di_type), buf_ptr(union_field->enum_field->name), import->data.structure.root_struct->di_file, (unsigned)(field_node->line + 1), diff --git a/src/codegen.cpp b/src/codegen.cpp index d895c26c9..2a216f69c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -91,7 +91,8 @@ void codegen_set_test_name_prefix(CodeGen *g, Buf *prefix) { g->test_name_prefix = prefix; } -void codegen_set_lib_version(CodeGen *g, size_t major, size_t minor, size_t patch) { +void codegen_set_lib_version(CodeGen *g, bool is_versioned, size_t major, size_t minor, size_t patch) { + g->is_versioned = is_versioned; g->version_major = major; g->version_minor = minor; g->version_patch = patch; @@ -10824,6 +10825,7 @@ static Error check_cache(CodeGen *g, Buf *manifest_dir, Buf *digest) { cache_bool(ch, g->emit_bin); cache_bool(ch, g->emit_llvm_ir); cache_bool(ch, g->emit_asm); + cache_bool(ch, g->is_versioned); cache_usize(ch, g->version_major); cache_usize(ch, g->version_minor); cache_usize(ch, g->version_patch); @@ -10894,7 +10896,7 @@ static void resolve_out_paths(CodeGen *g) { buf_resize(out_basename, 0); buf_append_str(out_basename, target_lib_file_prefix(g->zig_target)); buf_append_buf(out_basename, g->root_out_name); - buf_append_str(out_basename, target_lib_file_ext(g->zig_target, !g->is_dynamic, + buf_append_str(out_basename, target_lib_file_ext(g->zig_target, !g->is_dynamic, g->is_versioned, g->version_major, g->version_minor, g->version_patch)); break; } diff --git a/src/codegen.hpp b/src/codegen.hpp index 191da9a04..3139071d5 100644 --- a/src/codegen.hpp +++ b/src/codegen.hpp @@ -38,7 +38,7 @@ void codegen_set_rdynamic(CodeGen *g, bool rdynamic); void codegen_set_linker_script(CodeGen *g, const char *linker_script); void codegen_set_test_filter(CodeGen *g, Buf *filter); void codegen_set_test_name_prefix(CodeGen *g, Buf *prefix); -void codegen_set_lib_version(CodeGen *g, size_t major, size_t minor, size_t patch); +void codegen_set_lib_version(CodeGen *g, bool is_versioned, size_t major, size_t minor, size_t patch); void codegen_add_time_event(CodeGen *g, const char *name); void codegen_print_timing_report(CodeGen *g, FILE *f); void codegen_link(CodeGen *g); diff --git a/src/glibc.cpp b/src/glibc.cpp index 2456cab44..62f5604ba 100644 --- a/src/glibc.cpp +++ b/src/glibc.cpp @@ -335,7 +335,7 @@ Error glibc_build_dummies_and_maps(CodeGen *g, const ZigGLibCAbi *glibc_abi, con bool is_ld = (strcmp(lib->name, "ld") == 0); CodeGen *child_gen = create_child_codegen(g, zig_file_path, OutTypeLib, nullptr, lib->name, progress_node); - codegen_set_lib_version(child_gen, lib->sover, 0, 0); + codegen_set_lib_version(child_gen, true, lib->sover, 0, 0); child_gen->is_dynamic = true; child_gen->is_dummy_so = true; child_gen->version_script_path = map_file_path; diff --git a/src/ir.cpp b/src/ir.cpp index 5fe9dfa0c..804dee418 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -64,6 +64,7 @@ enum ConstCastResultId { ConstCastResultIdPointerChild, ConstCastResultIdSliceChild, ConstCastResultIdOptionalChild, + ConstCastResultIdOptionalShape, ConstCastResultIdErrorUnionPayload, ConstCastResultIdErrorUnionErrorSet, ConstCastResultIdFnAlign, @@ -11947,8 +11948,22 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, ZigType *wanted } } - // maybe + // optional types if (wanted_type->id == ZigTypeIdOptional && actual_type->id == ZigTypeIdOptional) { + // Consider the case where the wanted type is ??[*]T and the actual one + // is ?[*]T, we cannot turn the former into the latter even though the + // child types are compatible (?[*]T and [*]T are both represented as a + // pointer). The extra level of indirection in ??[*]T means it's + // represented as a regular, fat, optional type and, as a consequence, + // has a different shape than the one of ?[*]T. + if ((wanted_ptr_type != nullptr) != (actual_ptr_type != nullptr)) { + // The use of type_mismatch is intentional + result.id = ConstCastResultIdOptionalShape; + result.data.type_mismatch = heap::c_allocator.allocate_nonzero(1); + result.data.type_mismatch->wanted_type = wanted_type; + result.data.type_mismatch->actual_type = actual_type; + return result; + } ConstCastOnly child = types_match_const_cast_only(ira, wanted_type->data.maybe.child_type, actual_type->data.maybe.child_type, source_node, wanted_is_mutable); if (child.id == ConstCastResultIdInvalid) @@ -14550,6 +14565,13 @@ static void report_recursive_error(IrAnalyze *ira, AstNode *source_node, ConstCa report_recursive_error(ira, source_node, &cast_result->data.optional->child, msg); break; } + case ConstCastResultIdOptionalShape: { + add_error_note(ira->codegen, parent_msg, source_node, + buf_sprintf("optional type child '%s' cannot cast into optional type '%s'", + buf_ptr(&cast_result->data.type_mismatch->actual_type->name), + buf_ptr(&cast_result->data.type_mismatch->wanted_type->name))); + break; + } case ConstCastResultIdErrorUnionErrorSet: { ErrorMsg *msg = add_error_note(ira->codegen, parent_msg, source_node, buf_sprintf("error set '%s' cannot cast into error set '%s'", @@ -25425,8 +25447,6 @@ static Error ir_make_type_info_value(IrAnalyze *ira, IrInst* source_instr, ZigTy init_const_slice(ira->codegen, fields[2], union_field_array, 0, union_field_count, false); - ZigType *type_info_enum_field_type = ir_type_info_get_type(ira, "EnumField", nullptr); - for (uint32_t union_field_index = 0; union_field_index < union_field_count; union_field_index++) { TypeUnionField *union_field = &type_entry->data.unionation.fields[union_field_index]; ZigValue *union_field_val = &union_field_array->data.x_array.data.s_none.elements[union_field_index]; @@ -25434,20 +25454,10 @@ static Error ir_make_type_info_value(IrAnalyze *ira, IrInst* source_instr, ZigTy union_field_val->special = ConstValSpecialStatic; union_field_val->type = type_info_union_field_type; - ZigValue **inner_fields = alloc_const_vals_ptrs(ira->codegen, 3); + ZigValue **inner_fields = alloc_const_vals_ptrs(ira->codegen, 2); inner_fields[1]->special = ConstValSpecialStatic; - inner_fields[1]->type = get_optional_type(ira->codegen, type_info_enum_field_type); - - if (fields[1]->data.x_optional == nullptr) { - inner_fields[1]->data.x_optional = nullptr; - } else { - inner_fields[1]->data.x_optional = ira->codegen->pass1_arena->create(); - make_enum_field_val(ira, inner_fields[1]->data.x_optional, union_field->enum_field, type_info_enum_field_type); - } - - inner_fields[2]->special = ConstValSpecialStatic; - inner_fields[2]->type = ira->codegen->builtin_types.entry_type; - inner_fields[2]->data.x_type = union_field->type_entry; + inner_fields[1]->type = ira->codegen->builtin_types.entry_type; + inner_fields[1]->data.x_type = union_field->type_entry; ZigValue *name = create_const_str_lit(ira->codegen, union_field->name)->data.x_ptr.data.ref.pointee; init_const_slice(ira->codegen, inner_fields[0], name, 0, buf_len(union_field->name), true); @@ -26103,7 +26113,8 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI entry->data.structure.layout = layout; entry->data.structure.special = is_tuple ? StructSpecialInferredTuple : StructSpecialNone; entry->data.structure.created_by_at_type = true; - entry->data.structure.decls_scope = create_decls_scope(ira->codegen, nullptr, nullptr, entry, entry, &entry->name); + entry->data.structure.decls_scope = create_decls_scope( + ira->codegen, source_instr->source_node, source_instr->scope, entry, get_scope_import(source_instr->scope), &entry->name); assert(fields_ptr->data.x_ptr.special == ConstPtrSpecialBaseArray); assert(fields_ptr->data.x_ptr.data.base_array.elem_index == 0); @@ -26227,13 +26238,89 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI return ira->codegen->invalid_inst_gen->value->type; field->value = *field_int_value; } - return entry; } - case ZigTypeIdUnion: - ir_add_error(ira, source_instr, buf_sprintf( - "TODO implement @Type for 'TypeInfo.%s': see https://github.com/ziglang/zig/issues/2907", type_id_name(tagTypeId))); - return ira->codegen->invalid_inst_gen->value->type; + case ZigTypeIdUnion: { + assert(payload->special == ConstValSpecialStatic); + assert(payload->type == ir_type_info_get_type(ira, "Union", nullptr)); + + ZigValue *layout_value = get_const_field(ira, source_instr->source_node, payload, "layout", 0); + if (layout_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + assert(layout_value->special == ConstValSpecialStatic); + assert(layout_value->type == ir_type_info_get_type(ira, "ContainerLayout", nullptr)); + ContainerLayout layout = (ContainerLayout)bigint_as_u32(&layout_value->data.x_enum_tag); + + ZigType *tag_type = get_const_field_meta_type_optional(ira, source_instr->source_node, payload, "tag_type", 1); + if (tag_type != nullptr && type_is_invalid(tag_type)) { + return ira->codegen->invalid_inst_gen->value->type; + } + if (tag_type != nullptr && tag_type->id != ZigTypeIdEnum) { + ir_add_error(ira, source_instr, buf_sprintf( + "expected enum type, found '%s'", type_id_name(tag_type->id))); + return ira->codegen->invalid_inst_gen->value->type; + } + + ZigValue *fields_value = get_const_field(ira, source_instr->source_node, payload, "fields", 2); + if (fields_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + + assert(fields_value->special == ConstValSpecialStatic); + assert(is_slice(fields_value->type)); + ZigValue *fields_ptr = fields_value->data.x_struct.fields[slice_ptr_index]; + ZigValue *fields_len_value = fields_value->data.x_struct.fields[slice_len_index]; + size_t fields_len = bigint_as_usize(&fields_len_value->data.x_bigint); + + ZigValue *decls_value = get_const_field(ira, source_instr->source_node, payload, "decls", 3); + if (decls_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + + assert(decls_value->special == ConstValSpecialStatic); + assert(is_slice(decls_value->type)); + ZigValue *decls_len_value = decls_value->data.x_struct.fields[slice_len_index]; + size_t decls_len = bigint_as_usize(&decls_len_value->data.x_bigint); + if (decls_len != 0) { + ir_add_error(ira, source_instr, buf_create_from_str("TypeInfo.Union.decls must be empty for @Type")); + return ira->codegen->invalid_inst_gen->value->type; + } + + ZigType *entry = new_type_table_entry(ZigTypeIdUnion); + buf_init_from_buf(&entry->name, + get_anon_type_name(ira->codegen, ira->old_irb.exec, "union", source_instr->scope, source_instr->source_node, &entry->name)); + entry->data.unionation.decl_node = source_instr->source_node; + entry->data.unionation.fields = heap::c_allocator.allocate(fields_len); + entry->data.unionation.fields_by_name.init(fields_len); + entry->data.unionation.decls_scope = create_decls_scope( + ira->codegen, source_instr->source_node, source_instr->scope, entry, get_scope_import(source_instr->scope), &entry->name); + entry->data.unionation.tag_type = tag_type; + entry->data.unionation.src_field_count = fields_len; + entry->data.unionation.layout = layout; + + assert(fields_ptr->data.x_ptr.special == ConstPtrSpecialBaseArray); + assert(fields_ptr->data.x_ptr.data.base_array.elem_index == 0); + ZigValue *fields_arr = fields_ptr->data.x_ptr.data.base_array.array_val; + assert(fields_arr->special == ConstValSpecialStatic); + assert(fields_arr->data.x_array.special == ConstArraySpecialNone); + for (size_t i = 0; i < fields_len; i++) { + ZigValue *field_value = &fields_arr->data.x_array.data.s_none.elements[i]; + assert(field_value->type == ir_type_info_get_type(ira, "UnionField", nullptr)); + TypeUnionField *field = &entry->data.unionation.fields[i]; + field->name = buf_alloc(); + if ((err = get_const_field_buf(ira, source_instr->source_node, field_value, "name", 0, field->name))) + return ira->codegen->invalid_inst_gen->value->type; + if (entry->data.unionation.fields_by_name.put_unique(field->name, field) != nullptr) { + ir_add_error(ira, source_instr, buf_sprintf("duplicate union field '%s'", buf_ptr(field->name))); + return ira->codegen->invalid_inst_gen->value->type; + } + field->decl_node = source_instr->source_node; + ZigValue *type_value = get_const_field(ira, source_instr->source_node, field_value, "field_type", 1); + if (type_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + field->type_val = type_value; + field->type_entry = type_value->data.x_type; + } + return entry; + } case ZigTypeIdFn: case ZigTypeIdBoundFn: ir_add_error(ira, source_instr, buf_sprintf( diff --git a/src/main.cpp b/src/main.cpp index e2f6a82a1..348321598 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -416,6 +416,7 @@ static int main0(int argc, char **argv) { const char *test_filter = nullptr; const char *test_name_prefix = nullptr; bool test_evented_io = false; + bool is_versioned = false; size_t ver_major = 0; size_t ver_minor = 0; size_t ver_patch = 0; @@ -870,6 +871,7 @@ static int main0(int argc, char **argv) { fprintf(stderr, "expected linker arg after '%s'\n", buf_ptr(arg)); return EXIT_FAILURE; } + is_versioned = true; ver_major = atoi(buf_ptr(linker_args.at(i))); } else if (buf_eql_str(arg, "--minor-image-version")) { i += 1; @@ -877,6 +879,7 @@ static int main0(int argc, char **argv) { fprintf(stderr, "expected linker arg after '%s'\n", buf_ptr(arg)); return EXIT_FAILURE; } + is_versioned = true; ver_minor = atoi(buf_ptr(linker_args.at(i))); } else if (buf_eql_str(arg, "--stack")) { i += 1; @@ -1228,10 +1231,13 @@ static int main0(int argc, char **argv) { } else if (strcmp(arg, "--test-name-prefix") == 0) { test_name_prefix = argv[i]; } else if (strcmp(arg, "--ver-major") == 0) { + is_versioned = true; ver_major = atoi(argv[i]); } else if (strcmp(arg, "--ver-minor") == 0) { + is_versioned = true; ver_minor = atoi(argv[i]); } else if (strcmp(arg, "--ver-patch") == 0) { + is_versioned = true; ver_patch = atoi(argv[i]); } else if (strcmp(arg, "--test-cmd") == 0) { test_exec_args.append(argv[i]); @@ -1590,7 +1596,7 @@ static int main0(int argc, char **argv) { g->emit_llvm_ir = emit_llvm_ir; codegen_set_out_name(g, buf_out_name); - codegen_set_lib_version(g, ver_major, ver_minor, ver_patch); + codegen_set_lib_version(g, is_versioned, ver_major, ver_minor, ver_patch); g->want_single_threaded = want_single_threaded; codegen_set_linker_script(g, linker_script); g->version_script_path = version_script; diff --git a/src/target.cpp b/src/target.cpp index 84080ba1f..dff134a01 100644 --- a/src/target.cpp +++ b/src/target.cpp @@ -779,7 +779,7 @@ const char *target_lib_file_prefix(const ZigTarget *target) { } } -const char *target_lib_file_ext(const ZigTarget *target, bool is_static, +const char *target_lib_file_ext(const ZigTarget *target, bool is_static, bool is_versioned, size_t version_major, size_t version_minor, size_t version_patch) { if (target_is_wasm(target)) { @@ -799,11 +799,19 @@ const char *target_lib_file_ext(const ZigTarget *target, bool is_static, if (is_static) { return ".a"; } else if (target_os_is_darwin(target->os)) { - return buf_ptr(buf_sprintf(".%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".dylib", - version_major, version_minor, version_patch)); + if (is_versioned) { + return buf_ptr(buf_sprintf(".%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".dylib", + version_major, version_minor, version_patch)); + } else { + return ".dylib"; + } } else { - return buf_ptr(buf_sprintf(".so.%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".%" ZIG_PRI_usize, - version_major, version_minor, version_patch)); + if (is_versioned) { + return buf_ptr(buf_sprintf(".so.%" ZIG_PRI_usize ".%" ZIG_PRI_usize ".%" ZIG_PRI_usize, + version_major, version_minor, version_patch)); + } else { + return ".so"; + } } } } @@ -853,6 +861,9 @@ const char *arch_stack_pointer_register_name(ZigLLVM_ArchType arch) { case ZigLLVM_riscv32: case ZigLLVM_riscv64: case ZigLLVM_mipsel: + case ZigLLVM_ppc: + case ZigLLVM_ppc64: + case ZigLLVM_ppc64le: return "sp"; case ZigLLVM_wasm32: @@ -879,7 +890,6 @@ const char *arch_stack_pointer_register_name(ZigLLVM_ArchType arch) { case ZigLLVM_msp430: case ZigLLVM_nvptx: case ZigLLVM_nvptx64: - case ZigLLVM_ppc64le: case ZigLLVM_r600: case ZigLLVM_renderscript32: case ZigLLVM_renderscript64: @@ -893,8 +903,6 @@ const char *arch_stack_pointer_register_name(ZigLLVM_ArchType arch) { case ZigLLVM_tce: case ZigLLVM_tcele: case ZigLLVM_xcore: - case ZigLLVM_ppc: - case ZigLLVM_ppc64: case ZigLLVM_ve: zig_panic("TODO populate this table with stack pointer register name for this CPU architecture"); } @@ -1325,6 +1333,11 @@ bool target_is_mips(const ZigTarget *target) { target->arch == ZigLLVM_mips64 || target->arch == ZigLLVM_mips64el; } +bool target_is_ppc(const ZigTarget *target) { + return target->arch == ZigLLVM_ppc || target->arch == ZigLLVM_ppc64 || + target->arch == ZigLLVM_ppc64le; +} + unsigned target_fn_align(const ZigTarget *target) { return 16; } diff --git a/src/target.hpp b/src/target.hpp index 898fa9020..5e44301ff 100644 --- a/src/target.hpp +++ b/src/target.hpp @@ -87,7 +87,7 @@ const char *target_asm_file_ext(const ZigTarget *target); const char *target_llvm_ir_file_ext(const ZigTarget *target); const char *target_exe_file_ext(const ZigTarget *target); const char *target_lib_file_prefix(const ZigTarget *target); -const char *target_lib_file_ext(const ZigTarget *target, bool is_static, +const char *target_lib_file_ext(const ZigTarget *target, bool is_static, bool is_versioned, size_t version_major, size_t version_minor, size_t version_patch); bool target_can_exec(const ZigTarget *host_target, const ZigTarget *guest_target); @@ -95,6 +95,7 @@ ZigLLVM_OSType get_llvm_os_type(Os os_type); bool target_is_arm(const ZigTarget *target); bool target_is_mips(const ZigTarget *target); +bool target_is_ppc(const ZigTarget *target); bool target_allows_addr_zero(const ZigTarget *target); bool target_has_valgrind_support(const ZigTarget *target); bool target_os_is_darwin(Os os); diff --git a/test/compile_errors.zig b/test/compile_errors.zig index 31f2b57dc..f457c7460 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -10,6 +10,135 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { "tmp.zig:2:37: error: expected type '[:1]const u8', found '*const [2:2]u8'", }); + cases.add("@Type for union with opaque field", + \\const TypeInfo = @import("builtin").TypeInfo; + \\const Untagged = @Type(.{ + \\ .Union = .{ + \\ .layout = .Auto, + \\ .tag_type = null, + \\ .fields = &[_]TypeInfo.UnionField{ + \\ .{ .name = "foo", .field_type = @Type(.Opaque) }, + \\ }, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ }, + \\}); + \\export fn entry() void { + \\ _ = Untagged{}; + \\} + , &[_][]const u8{ + "tmp.zig:2:25: error: opaque types have unknown size and therefore cannot be directly embedded in unions", + "tmp.zig:13:17: note: referenced here", + }); + + cases.add("@Type for union with zero fields", + \\const TypeInfo = @import("builtin").TypeInfo; + \\const Untagged = @Type(.{ + \\ .Union = .{ + \\ .layout = .Auto, + \\ .tag_type = null, + \\ .fields = &[_]TypeInfo.UnionField{}, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ }, + \\}); + \\export fn entry() void { + \\ _ = Untagged{}; + \\} + , &[_][]const u8{ + "tmp.zig:2:25: error: unions must have 1 or more fields", + "tmp.zig:11:17: note: referenced here", + }); + + cases.add("@Type for exhaustive enum with zero fields", + \\const TypeInfo = @import("builtin").TypeInfo; + \\const Tag = @Type(.{ + \\ .Enum = .{ + \\ .layout = .Auto, + \\ .tag_type = u1, + \\ .fields = &[_]TypeInfo.EnumField{}, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ .is_exhaustive = true, + \\ }, + \\}); + \\export fn entry() void { + \\ _ = @intToEnum(Tag, 0); + \\} + , &[_][]const u8{ + "tmp.zig:2:20: error: enums must have 1 or more fields", + "tmp.zig:12:9: note: referenced here", + }); + + cases.add("@Type for tagged union with extra union field", + \\const TypeInfo = @import("builtin").TypeInfo; + \\const Tag = @Type(.{ + \\ .Enum = .{ + \\ .layout = .Auto, + \\ .tag_type = u1, + \\ .fields = &[_]TypeInfo.EnumField{ + \\ .{ .name = "signed", .value = 0 }, + \\ .{ .name = "unsigned", .value = 1 }, + \\ }, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ .is_exhaustive = true, + \\ }, + \\}); + \\const Tagged = @Type(.{ + \\ .Union = .{ + \\ .layout = .Auto, + \\ .tag_type = Tag, + \\ .fields = &[_]TypeInfo.UnionField{ + \\ .{ .name = "signed", .field_type = i32 }, + \\ .{ .name = "unsigned", .field_type = u32 }, + \\ .{ .name = "arst", .field_type = f32 }, + \\ }, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ }, + \\}); + \\export fn entry() void { + \\ var tagged = Tagged{ .signed = -1 }; + \\ tagged = .{ .unsigned = 1 }; + \\} + , &[_][]const u8{ + "tmp.zig:14:23: error: enum field not found: 'arst'", + "tmp.zig:2:20: note: enum declared here", + "tmp.zig:27:24: note: referenced here", + }); + + cases.add("@Type for tagged union with extra enum field", + \\const TypeInfo = @import("builtin").TypeInfo; + \\const Tag = @Type(.{ + \\ .Enum = .{ + \\ .layout = .Auto, + \\ .tag_type = u2, + \\ .fields = &[_]TypeInfo.EnumField{ + \\ .{ .name = "signed", .value = 0 }, + \\ .{ .name = "unsigned", .value = 1 }, + \\ .{ .name = "arst", .field_type = 2 }, + \\ }, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ .is_exhaustive = true, + \\ }, + \\}); + \\const Tagged = @Type(.{ + \\ .Union = .{ + \\ .layout = .Auto, + \\ .tag_type = Tag, + \\ .fields = &[_]TypeInfo.UnionField{ + \\ .{ .name = "signed", .field_type = i32 }, + \\ .{ .name = "unsigned", .field_type = u32 }, + \\ }, + \\ .decls = &[_]TypeInfo.Declaration{}, + \\ }, + \\}); + \\export fn entry() void { + \\ var tagged = Tagged{ .signed = -1 }; + \\ tagged = .{ .unsigned = 1 }; + \\} + , &[_][]const u8{ + "tmp.zig:9:32: error: no member named 'field_type' in struct 'std.builtin.EnumField'", + "tmp.zig:18:21: note: referenced here", + "tmp.zig:27:18: note: referenced here", + }); + cases.add("@Type with undefined", \\comptime { \\ _ = @Type(.{ .Array = .{ .len = 0, .child = u8, .sentinel = undefined } }); @@ -7419,7 +7548,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { }); cases.add( // fixed bug #2032 - "compile diagnostic string for top level decl type", + "compile diagnostic string for top level decl type", \\export fn entry() void { \\ var foo: u32 = @This(){}; \\} diff --git a/test/stack_traces.zig b/test/stack_traces.zig index 2ab022e6e..496be0513 100644 --- a/test/stack_traces.zig +++ b/test/stack_traces.zig @@ -282,10 +282,10 @@ pub fn addCases(cases: *tests.StackTracesContext) void { \\source.zig:10:8: [address] in main (test) \\ foo(); \\ ^ - \\start.zig:254:29: [address] in std.start.posixCallMainAndExit (test) + \\start.zig:269:29: [address] in std.start.posixCallMainAndExit (test) \\ return root.main(); \\ ^ - \\start.zig:128:5: [address] in std.start._start (test) + \\start.zig:143:5: [address] in std.start._start (test) \\ @call(.{ .modifier = .never_inline }, posixCallMainAndExit, .{}); \\ ^ \\ @@ -294,7 +294,7 @@ pub fn addCases(cases: *tests.StackTracesContext) void { switch (std.Target.current.cpu.arch) { .aarch64 => "", // TODO disabled; results in segfault else => - \\start.zig:128:5: [address] in std.start._start (test) + \\start.zig:143:5: [address] in std.start._start (test) \\ @call(.{ .modifier = .never_inline }, posixCallMainAndExit, .{}); \\ ^ \\ diff --git a/test/stage1/behavior/cast.zig b/test/stage1/behavior/cast.zig index 4b678cd2d..ce0d16d1a 100644 --- a/test/stage1/behavior/cast.zig +++ b/test/stage1/behavior/cast.zig @@ -849,3 +849,8 @@ test "comptime float casts" { expect(b == 2); expect(@TypeOf(b) == comptime_int); } + +test "cast from ?[*]T to ??[*]T" { + const a: ??[*]u8 = @as(?[*]u8, null); + expect(a != null and a.? == null); +} diff --git a/test/stage1/behavior/translate_c_macros.h b/test/stage1/behavior/translate_c_macros.h index abc6c1e3c..49806a524 100644 --- a/test/stage1/behavior/translate_c_macros.h +++ b/test/stage1/behavior/translate_c_macros.h @@ -6,4 +6,7 @@ typedef struct Color { unsigned char a; } Color; #define CLITERAL(type) (type) -#define LIGHTGRAY CLITERAL(Color){ 200, 200, 200, 255 } // Light Gray \ No newline at end of file +#define LIGHTGRAY CLITERAL(Color){ 200, 200, 200, 255 } // Light Gray + +#define MY_SIZEOF(x) ((int)sizeof(x)) +#define MY_SIZEOF2(x) ((int)sizeof x) diff --git a/test/stage1/behavior/translate_c_macros.zig b/test/stage1/behavior/translate_c_macros.zig index ea42016e9..2cfb2331f 100644 --- a/test/stage1/behavior/translate_c_macros.zig +++ b/test/stage1/behavior/translate_c_macros.zig @@ -1,12 +1,18 @@ const expect = @import("std").testing.expect; +const expectEqual = @import("std").testing.expectEqual; const h = @cImport(@cInclude("stage1/behavior/translate_c_macros.h")); test "initializer list expression" { - @import("std").testing.expectEqual(h.Color{ + expectEqual(h.Color{ .r = 200, .g = 200, .b = 200, .a = 255, }, h.LIGHTGRAY); } + +test "sizeof in macros" { + expectEqual(@as(c_int, @sizeOf(u32)), h.MY_SIZEOF(u32)); + expectEqual(@as(c_int, @sizeOf(u32)), h.MY_SIZEOF2(u32)); +} diff --git a/test/stage1/behavior/type.zig b/test/stage1/behavior/type.zig index 81bd741ec..38d23175d 100644 --- a/test/stage1/behavior/type.zig +++ b/test/stage1/behavior/type.zig @@ -313,3 +313,106 @@ test "Type.Enum" { testing.expectEqual(@as(u32, 5), @enumToInt(Bar.b)); testing.expectEqual(@as(u32, 6), @enumToInt(@intToEnum(Bar, 6))); } + +test "Type.Union" { + const Untagged = @Type(.{ + .Union = .{ + .layout = .Auto, + .tag_type = null, + .fields = &[_]TypeInfo.UnionField{ + .{ .name = "int", .field_type = i32 }, + .{ .name = "float", .field_type = f32 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + }, + }); + var untagged = Untagged{ .int = 1 }; + untagged.float = 2.0; + untagged.int = 3; + testing.expectEqual(@as(i32, 3), untagged.int); + + const PackedUntagged = @Type(.{ + .Union = .{ + .layout = .Packed, + .tag_type = null, + .fields = &[_]TypeInfo.UnionField{ + .{ .name = "signed", .field_type = i32 }, + .{ .name = "unsigned", .field_type = u32 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + }, + }); + var packed_untagged = PackedUntagged{ .signed = -1 }; + testing.expectEqual(@as(i32, -1), packed_untagged.signed); + testing.expectEqual(~@as(u32, 0), packed_untagged.unsigned); + + const Tag = @Type(.{ + .Enum = .{ + .layout = .Auto, + .tag_type = u1, + .fields = &[_]TypeInfo.EnumField{ + .{ .name = "signed", .value = 0 }, + .{ .name = "unsigned", .value = 1 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + .is_exhaustive = true, + }, + }); + const Tagged = @Type(.{ + .Union = .{ + .layout = .Auto, + .tag_type = Tag, + .fields = &[_]TypeInfo.UnionField{ + .{ .name = "signed", .field_type = i32 }, + .{ .name = "unsigned", .field_type = u32 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + }, + }); + var tagged = Tagged{ .signed = -1 }; + testing.expectEqual(Tag.signed, tagged); + tagged = .{ .unsigned = 1 }; + testing.expectEqual(Tag.unsigned, tagged); +} + +test "Type.Union from Type.Enum" { + const Tag = @Type(.{ + .Enum = .{ + .layout = .Auto, + .tag_type = u0, + .fields = &[_]TypeInfo.EnumField{ + .{ .name = "working_as_expected", .value = 0 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + .is_exhaustive = true, + }, + }); + const T = @Type(.{ + .Union = .{ + .layout = .Auto, + .tag_type = Tag, + .fields = &[_]TypeInfo.UnionField{ + .{ .name = "working_as_expected", .field_type = u32 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + }, + }); + _ = T; + _ = @typeInfo(T).Union; +} + +test "Type.Union from regular enum" { + const E = enum { working_as_expected = 0 }; + const T = @Type(.{ + .Union = .{ + .layout = .Auto, + .tag_type = E, + .fields = &[_]TypeInfo.UnionField{ + .{ .name = "working_as_expected", .field_type = u32 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + }, + }); + _ = T; + _ = @typeInfo(T).Union; +} diff --git a/test/stage1/behavior/type_info.zig b/test/stage1/behavior/type_info.zig index 409993a74..9e066d5f1 100644 --- a/test/stage1/behavior/type_info.zig +++ b/test/stage1/behavior/type_info.zig @@ -198,8 +198,6 @@ fn testUnion() void { expect(typeinfo_info.Union.layout == .Auto); expect(typeinfo_info.Union.tag_type.? == TypeId); expect(typeinfo_info.Union.fields.len == 25); - expect(typeinfo_info.Union.fields[4].enum_field != null); - expect(typeinfo_info.Union.fields[4].enum_field.?.value == 4); expect(typeinfo_info.Union.fields[4].field_type == @TypeOf(@typeInfo(u8).Int)); expect(typeinfo_info.Union.decls.len == 21); @@ -213,7 +211,6 @@ fn testUnion() void { expect(notag_union_info.Union.tag_type == null); expect(notag_union_info.Union.layout == .Auto); expect(notag_union_info.Union.fields.len == 2); - expect(notag_union_info.Union.fields[0].enum_field == null); expect(notag_union_info.Union.fields[1].field_type == u32); const TestExternUnion = extern union { @@ -223,7 +220,6 @@ fn testUnion() void { const extern_union_info = @typeInfo(TestExternUnion); expect(extern_union_info.Union.layout == .Extern); expect(extern_union_info.Union.tag_type == null); - expect(extern_union_info.Union.fields[0].enum_field == null); expect(extern_union_info.Union.fields[0].field_type == *c_void); } diff --git a/test/translate_c.zig b/test/translate_c.zig index 41f1e7829..83fdda8d8 100644 --- a/test/translate_c.zig +++ b/test/translate_c.zig @@ -2761,12 +2761,15 @@ pub fn addCases(cases: *tests.TranslateCContext) void { cases.add("macro cast", \\#define FOO(bar) baz((void *)(baz)) \\#define BAR (void*) a + \\#define BAZ (uint32_t)(2) , &[_][]const u8{ \\pub inline fn FOO(bar: anytype) @TypeOf(baz((@import("std").meta.cast(?*c_void, baz)))) { \\ return baz((@import("std").meta.cast(?*c_void, baz))); \\} , \\pub const BAR = (@import("std").meta.cast(?*c_void, a)); + , + \\pub const BAZ = (@import("std").meta.cast(u32, 2)); }); cases.add("macro with cast to unsigned short, long, and long long",