// To get started, run this tool with no args and read the help message. // // The build systems of musl-libc and glibc require specifying a single target // architecture. Meanwhile, Zig supports out-of-the-box cross compilation for // every target. So the process to create libc headers that Zig ships is to use // this tool. // First, use the musl/glibc build systems to create installations of all the // targets in the `glibc_targets`/`musl_targets` variables. // Next, run this tool to create a new directory which puts .h files into // subdirectories, with `generic` being files that apply to all architectures. // You'll then have to manually update Zig source repo with these new files. const std = @import("std"); const builtin = @import("builtin"); const Arch = builtin.Arch; const Abi = builtin.Abi; const Os = builtin.Os; const assert = std.debug.assert; const LibCTarget = struct { name: []const u8, arch: MultiArch, }; const MultiArch = union(enum) { aarch64, arm, mips, mips64, powerpc64, specific: @TagType(Arch), fn eql(a: MultiArch, b: MultiArch) bool { if (@enumToInt(a) != @enumToInt(b)) return false; if (@TagType(MultiArch)(a) != .specific) return true; return a.specific == b.specific; } }; const glibc_targets = [_]LibCTarget{ LibCTarget{ .name = "aarch64_be-linux-gnu", .zig_arch = Arch.aarch64_be, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "aarch64-linux-gnu", .zig_arch = Arch.aarch64, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "aarch64-linux-gnu-disable-multi-arch", .zig_arch = Arch.aarch64, .zig_abi = null, }, LibCTarget{ .name = "alpha-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "armeb-linux-gnueabi", .zig_arch = Arch.armeb, .zig_abi = Abi.gnueabi, }, LibCTarget{ .name = "armeb-linux-gnueabi-be8", .zig_arch = Arch.armeb, .zig_abi = null, }, LibCTarget{ .name = "armeb-linux-gnueabihf", .zig_arch = Arch.armeb, .zig_abi = Abi.gnueabihf, }, LibCTarget{ .name = "armeb-linux-gnueabihf-be8", .zig_arch = Arch.armeb, .zig_abi = null, }, LibCTarget{ .name = "arm-linux-gnueabi", .zig_arch = Arch.arm, .zig_abi = Abi.gnueabi, }, LibCTarget{ .name = "arm-linux-gnueabihf", .zig_arch = Arch.arm, .zig_abi = Abi.gnueabihf, }, LibCTarget{ .name = "arm-linux-gnueabihf-v7a", .zig_arch = Arch.arm, .zig_abi = null, }, LibCTarget{ .name = "arm-linux-gnueabihf-v7a-disable-multi-arch", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "hppa-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "i486-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "i586-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "i686-gnu", .zig_arch = Arch.i386, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "i686-linux-gnu", .zig_arch = Arch.i386, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "i686-linux-gnu-disable-multi-arch", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "i686-linux-gnu-enable-obsolete", .zig_arch = Arch.i386, .zig_abi = null, }, LibCTarget{ .name = "i686-linux-gnu-static-pie", .zig_arch = Arch.i386, .zig_abi = null, }, LibCTarget{ .name = "ia64-linux-gnu", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "m68k-linux-gnu", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "m68k-linux-gnu-coldfire", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "m68k-linux-gnu-coldfire-soft", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "microblazeel-linux-gnu", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "microblaze-linux-gnu", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "mips64el-linux-gnu-n32", .zig_arch = Arch.mips64el, .zig_abi = Abi.gnuabin32, }, LibCTarget{ .name = "mips64el-linux-gnu-n32-nan2008", .zig_arch = Arch.mips64el, .zig_abi = null, }, LibCTarget{ .name = "mips64el-linux-gnu-n32-nan2008-soft", .zig_arch = Arch.mips64el, .zig_abi = null, }, LibCTarget{ .name = "mips64el-linux-gnu-n32-soft", .zig_arch = Arch.mips64el, .zig_abi = null, }, LibCTarget{ .name = "mips64el-linux-gnu-n64", .zig_arch = Arch.mips64el, .zig_abi = Abi.gnuabi64, }, LibCTarget{ .name = "mips64el-linux-gnu-n64-nan2008", .zig_arch = Arch.mips64el, .zig_abi = null, }, LibCTarget{ .name = "mips64el-linux-gnu-n64-nan2008-soft", .zig_arch = Arch.mips64el, .zig_abi = null, }, LibCTarget{ .name = "mips64el-linux-gnu-n64-soft", .zig_arch = Arch.mips64el, .zig_abi = null, }, LibCTarget{ .name = "mips64-linux-gnu-n32", .zig_arch = Arch.mips64, .zig_abi = Abi.gnuabin32, }, LibCTarget{ .name = "mips64-linux-gnu-n32-nan2008", .zig_arch = Arch.mips64, .zig_abi = null, }, LibCTarget{ .name = "mips64-linux-gnu-n32-nan2008-soft", .zig_arch = Arch.mips64, .zig_abi = null, }, LibCTarget{ .name = "mips64-linux-gnu-n32-soft", .zig_arch = Arch.mips64, .zig_abi = null, }, LibCTarget{ .name = "mips64-linux-gnu-n64", .zig_arch = Arch.mips64, .zig_abi = Abi.gnuabi64, }, LibCTarget{ .name = "mips64-linux-gnu-n64-nan2008", .zig_arch = Arch.mips64, .zig_abi = null, }, LibCTarget{ .name = "mips64-linux-gnu-n64-nan2008-soft", .zig_arch = Arch.mips64, .zig_abi = null, }, LibCTarget{ .name = "mips64-linux-gnu-n64-soft", .zig_arch = Arch.mips64, .zig_abi = null, }, LibCTarget{ .name = "mipsel-linux-gnu", .zig_arch = Arch.mipsel, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "mipsel-linux-gnu-nan2008", .zig_arch = Arch.mipsel, .zig_abi = null, }, LibCTarget{ .name = "mipsel-linux-gnu-nan2008-soft", .zig_arch = Arch.mipsel, .zig_abi = null, }, LibCTarget{ .name = "mipsel-linux-gnu-soft", .zig_arch = Arch.mipsel, .zig_abi = null, }, LibCTarget{ .name = "mips-linux-gnu", .zig_arch = Arch.mips, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "mips-linux-gnu-nan2008", .zig_arch = Arch.mips, .zig_abi = null, }, LibCTarget{ .name = "mips-linux-gnu-nan2008-soft", .zig_arch = Arch.mips, .zig_abi = null, }, LibCTarget{ .name = "mips-linux-gnu-soft", .zig_arch = Arch.mips, .zig_abi = null, }, LibCTarget{ .name = "powerpc64le-linux-gnu", .zig_arch = Arch.powerpc64le, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "powerpc64-linux-gnu", .zig_arch = Arch.powerpc64, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "powerpc-linux-gnu", .zig_arch = Arch.powerpc, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "powerpc-linux-gnu-power4", .zig_arch = Arch.powerpc, .zig_abi = null, }, LibCTarget{ .name = "powerpc-linux-gnu-soft", .zig_arch = Arch.powerpc, .zig_abi = null, }, LibCTarget{ .name = "powerpc-linux-gnuspe", .zig_arch = Arch.powerpc, .zig_abi = null, }, LibCTarget{ .name = "powerpc-linux-gnuspe-e500v1", .zig_arch = Arch.powerpc, .zig_abi = null, }, LibCTarget{ .name = "riscv64-linux-gnu-rv64imac-lp64", .zig_arch = Arch.riscv64, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "riscv64-linux-gnu-rv64imafdc-lp64", .zig_arch = Arch.riscv64, .zig_abi = null, }, LibCTarget{ .name = "riscv64-linux-gnu-rv64imafdc-lp64d", .zig_arch = Arch.riscv64, .zig_abi = null, }, LibCTarget{ .name = "s390-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "s390x-linux-gnu", .zig_arch = Arch.s390x, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sh3eb-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sh3-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sh4eb-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sh4eb-linux-gnu-soft", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "sh4-linux-gnu", .zig_arch = null, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sh4-linux-gnu-soft", .zig_arch = null, .zig_abi = null, }, LibCTarget{ .name = "sparc64-linux-gnu", .zig_arch = Arch.sparc, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sparc64-linux-gnu-disable-multi-arch", .zig_arch = Arch.sparc, .zig_abi = null, }, LibCTarget{ .name = "sparcv9-linux-gnu", .zig_arch = Arch.sparcv9, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "sparcv9-linux-gnu-disable-multi-arch", .zig_arch = Arch.sparcv9, .zig_abi = null, }, LibCTarget{ .name = "x86_64-linux-gnu", .zig_arch = Arch.x86_64, .zig_abi = Abi.gnu, }, LibCTarget{ .name = "x86_64-linux-gnu-disable-multi-arch", .zig_arch = Arch.x86_64, .zig_abi = null, }, LibCTarget{ .name = "x86_64-linux-gnu-enable-obsolete", .zig_arch = Arch.x86_64, .zig_abi = null, }, LibCTarget{ .name = "x86_64-linux-gnu-static-pie", .zig_arch = Arch.x86_64, .zig_abi = null, }, LibCTarget{ .name = "x86_64-linux-gnu-x32", .zig_arch = Arch.x86_64, .zig_abi = Abi.gnux32, }, LibCTarget{ .name = "x86_64-linux-gnu-x32-static-pie", .zig_arch = Arch.x86_64, .zig_abi = null, }, }; const musl_targets = [_]LibCTarget{ LibCTarget{ .name = "aarch64", .arch = MultiArch.aarch64, }, LibCTarget{ .name = "arm", .arch = MultiArch.arm, }, LibCTarget{ .name = "i386", .arch = MultiArch{ .specific = .i386 }, }, LibCTarget{ .name = "mips", .arch = MultiArch.mips, }, LibCTarget{ .name = "mips64", .arch = MultiArch.mips64, }, LibCTarget{ .name = "powerpc", .arch = MultiArch{ .specific = .powerpc }, }, LibCTarget{ .name = "powerpc64", .arch = MultiArch.powerpc64, }, LibCTarget{ .name = "riscv64", .arch = MultiArch{ .specific = .riscv64 }, }, LibCTarget{ .name = "s390x", .arch = MultiArch{ .specific = .s390x }, }, LibCTarget{ .name = "x86_64", .arch = MultiArch{ .specific = .x86_64 }, }, }; const DestTarget = struct { arch: MultiArch, os: Os, abi: Abi, fn hash(a: DestTarget) u32 { return @enumToInt(a.arch) +% (@enumToInt(a.os) *% u32(4202347608)) +% (@enumToInt(a.abi) *% u32(4082223418)); } fn eql(a: DestTarget, b: DestTarget) bool { return a.arch.eql(b.arch) and a.os == b.os and a.abi == b.abi; } }; const Contents = struct { bytes: []const u8, hit_count: usize, hash: []const u8, is_generic: bool, fn hitCountLessThan(lhs: *const Contents, rhs: *const Contents) bool { return lhs.hit_count < rhs.hit_count; } }; const HashToContents = std.AutoHashMap([]const u8, Contents); const TargetToHash = std.HashMap(DestTarget, []const u8, DestTarget.hash, DestTarget.eql); const PathTable = std.AutoHashMap([]const u8, *TargetToHash); const LibCVendor = enum { musl, glibc, }; pub fn main() !void { var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); const allocator = &arena.allocator; const args = try std.process.argsAlloc(allocator); var search_paths = std.ArrayList([]const u8).init(allocator); var opt_out_dir: ?[]const u8 = null; var opt_abi: ?[]const u8 = null; var arg_i: usize = 1; while (arg_i < args.len) : (arg_i += 1) { if (std.mem.eql(u8, args[arg_i], "--help")) usageAndExit(args[0]); if (arg_i + 1 >= args.len) { std.debug.warn("expected argument after '{}'\n", args[arg_i]); usageAndExit(args[0]); } if (std.mem.eql(u8, args[arg_i], "--search-path")) { try search_paths.append(args[arg_i + 1]); } else if (std.mem.eql(u8, args[arg_i], "--out")) { assert(opt_out_dir == null); opt_out_dir = args[arg_i + 1]; } else if (std.mem.eql(u8, args[arg_i], "--abi")) { assert(opt_abi == null); opt_abi = args[arg_i + 1]; } else { std.debug.warn("unrecognized argument: {}\n", args[arg_i]); usageAndExit(args[0]); } arg_i += 1; } const out_dir = opt_out_dir orelse usageAndExit(args[0]); const abi_name = opt_abi orelse usageAndExit(args[0]); const vendor = if (std.mem.eql(u8, abi_name, "musl")) LibCVendor.musl else if (std.mem.eql(u8, abi_name, "glibc")) LibCVendor.glibc else { std.debug.warn("unrecognized C ABI: {}\n", abi_name); usageAndExit(args[0]); }; const generic_name = try std.fmt.allocPrint(allocator, "generic-{}", abi_name); // TODO compiler crashed when I wrote this the canonical way var libc_targets: []const LibCTarget = undefined; switch (vendor) { .musl => libc_targets = musl_targets, .glibc => @panic("TODO this regressed"), // glibc_targets, } var path_table = PathTable.init(allocator); var hash_to_contents = HashToContents.init(allocator); var max_bytes_saved: usize = 0; var total_bytes: usize = 0; var hasher = std.crypto.Sha256.init(); for (libc_targets) |libc_target| { const dest_target = DestTarget{ .arch = libc_target.arch, .abi = switch (vendor) { .musl => .musl, else => @panic("TODO this regressed"), }, .os = .linux, }; search: for (search_paths.toSliceConst()) |search_path| { var sub_path: []const []const u8 = undefined; switch (vendor) { .musl => { sub_path = [_][]const u8{ search_path, libc_target.name, "usr", "local", "musl", "include" }; }, .glibc => { sub_path = [_][]const u8{ search_path, libc_target.name, "usr", "include" }; }, } const target_include_dir = try std.fs.path.join(allocator, sub_path); var dir_stack = std.ArrayList([]const u8).init(allocator); try dir_stack.append(target_include_dir); while (dir_stack.popOrNull()) |full_dir_name| { var dir = std.fs.Dir.open(allocator, full_dir_name) catch |err| switch (err) { error.FileNotFound => continue :search, error.AccessDenied => continue :search, else => return err, }; defer dir.close(); while (try dir.next()) |entry| { const full_path = try std.fs.path.join(allocator, [_][]const u8{ full_dir_name, entry.name }); switch (entry.kind) { .Directory => try dir_stack.append(full_path), .File => { const rel_path = try std.fs.path.relative(allocator, target_include_dir, full_path); const raw_bytes = try std.io.readFileAlloc(allocator, full_path); const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t"); total_bytes += raw_bytes.len; const hash = try allocator.alloc(u8, 32); hasher.reset(); hasher.update(rel_path); hasher.update(trimmed); hasher.final(hash); const gop = try hash_to_contents.getOrPut(hash); if (gop.found_existing) { max_bytes_saved += raw_bytes.len; gop.kv.value.hit_count += 1; std.debug.warn( "duplicate: {} {} ({Bi:2})\n", libc_target.name, rel_path, raw_bytes.len, ); } else { gop.kv.value = Contents{ .bytes = trimmed, .hit_count = 1, .hash = hash, .is_generic = false, }; } const path_gop = try path_table.getOrPut(rel_path); const target_to_hash = if (path_gop.found_existing) path_gop.kv.value else blk: { const ptr = try allocator.create(TargetToHash); ptr.* = TargetToHash.init(allocator); path_gop.kv.value = ptr; break :blk ptr; }; assert((try target_to_hash.put(dest_target, hash)) == null); }, else => std.debug.warn("warning: weird file: {}\n", full_path), } } } break; } else { std.debug.warn("warning: libc target not found: {}\n", libc_target.name); } } std.debug.warn("summary: {Bi:2} could be reduced to {Bi:2}\n", total_bytes, total_bytes - max_bytes_saved); try std.fs.makePath(allocator, out_dir); var missed_opportunity_bytes: usize = 0; // iterate path_table. for each path, put all the hashes into a list. sort by hit_count. // the hash with the highest hit_count gets to be the "generic" one. everybody else // gets their header in a separate arch directory. var path_it = path_table.iterator(); while (path_it.next()) |path_kv| { var contents_list = std.ArrayList(*Contents).init(allocator); { var hash_it = path_kv.value.iterator(); while (hash_it.next()) |hash_kv| { const contents = &hash_to_contents.get(hash_kv.value).?.value; try contents_list.append(contents); } } std.sort.sort(*Contents, contents_list.toSlice(), Contents.hitCountLessThan); var best_contents = contents_list.popOrNull().?; if (best_contents.hit_count > 1) { // worth it to make it generic const full_path = try std.fs.path.join(allocator, [_][]const u8{ out_dir, generic_name, path_kv.key }); try std.fs.makePath(allocator, std.fs.path.dirname(full_path).?); try std.io.writeFile(full_path, best_contents.bytes); best_contents.is_generic = true; while (contents_list.popOrNull()) |contender| { if (contender.hit_count > 1) { const this_missed_bytes = contender.hit_count * contender.bytes.len; missed_opportunity_bytes += this_missed_bytes; std.debug.warn("Missed opportunity ({Bi:2}): {}\n", this_missed_bytes, path_kv.key); } else break; } } var hash_it = path_kv.value.iterator(); while (hash_it.next()) |hash_kv| { const contents = &hash_to_contents.get(hash_kv.value).?.value; if (contents.is_generic) continue; const dest_target = hash_kv.key; const arch_name = switch (dest_target.arch) { .specific => |a| @tagName(a), else => @tagName(dest_target.arch), }; const out_subpath = try std.fmt.allocPrint( allocator, "{}-{}-{}", arch_name, @tagName(dest_target.os), @tagName(dest_target.abi), ); const full_path = try std.fs.path.join(allocator, [_][]const u8{ out_dir, out_subpath, path_kv.key }); try std.fs.makePath(allocator, std.fs.path.dirname(full_path).?); try std.io.writeFile(full_path, contents.bytes); } } } fn usageAndExit(arg0: []const u8) noreturn { std.debug.warn("Usage: {} [--search-path ] --out --abi \n", arg0); std.debug.warn("--search-path can be used any number of times.\n"); std.debug.warn(" subdirectories of search paths look like, e.g. x86_64-linux-gnu\n"); std.debug.warn("--out is a dir that will be created, and populated with the results\n"); std.debug.warn("--abi is either musl or glibc\n"); std.process.exit(1); }