copy_file_range linux syscall (#6010)

master
Maciej Walczak 2020-08-11 21:49:43 +02:00 committed by GitHub
parent 2b28cebf64
commit 6febe7e977
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 157 additions and 7 deletions

View File

@ -447,6 +447,14 @@ pub const Version = struct {
if (self.max.order(ver) == .lt) return false;
return true;
}
/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: Range, ver: Version) ?bool {
if (self.min.order(ver) != .lt) return true;
if (self.max.order(ver) == .lt) return false;
return null;
}
};
pub fn order(lhs: Version, rhs: Version) std.math.Order {

View File

@ -91,6 +91,8 @@ pub extern "c" fn sendfile(
count: usize,
) isize;
pub extern "c" fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, len: usize, flags: c_uint) isize;
pub const pthread_attr_t = extern struct {
__size: [56]u8,
__align: c_long,

View File

@ -607,15 +607,10 @@ pub const File = struct {
}
}
pub const CopyRangeError = PWriteError || PReadError;
pub const CopyRangeError = os.CopyFileRangeError;
pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) CopyRangeError!usize {
// TODO take advantage of copy_file_range OS APIs
var buf: [8 * 4096]u8 = undefined;
const adjusted_count = math.min(buf.len, len);
const amt_read = try in.pread(buf[0..adjusted_count], in_offset);
if (amt_read == 0) return @as(usize, 0);
return out.pwrite(buf[0..amt_read], out_offset);
return os.copy_file_range(in.handle, in_offset, out.handle, out_offset, len, 0);
}
/// Returns the number of bytes copied. If the number read is smaller than `buffer.len`, it

View File

@ -328,6 +328,32 @@ test "sendfile" {
testing.expect(mem.eql(u8, written_buf[0..amt], "header1\nsecond header\nine1\nsecontrailer1\nsecond trailer\n"));
}
test "copyRangeAll" {
var tmp = tmpDir(.{});
defer tmp.cleanup();
try tmp.dir.makePath("os_test_tmp");
defer tmp.dir.deleteTree("os_test_tmp") catch {};
var dir = try tmp.dir.openDir("os_test_tmp", .{});
defer dir.close();
var src_file = try dir.createFile("file1.txt", .{ .read = true });
defer src_file.close();
const data = "u6wj+JmdF3qHsFPE BUlH2g4gJCmEz0PP";
try src_file.writeAll(data);
var dest_file = try dir.createFile("file2.txt", .{ .read = true });
defer dest_file.close();
var written_buf: [100]u8 = undefined;
_ = try src_file.copyRangeAll(0, dest_file, 0, data.len);
const amt = try dest_file.preadAll(&written_buf, 0);
testing.expect(mem.eql(u8, written_buf[0..amt], data));
}
test "fs.copyFile" {
const data = "u6wj+JmdF3qHsFPE BUlH2g4gJCmEz0PP";
const src_file = "tmp_test_copy_file.txt";

View File

@ -4926,6 +4926,85 @@ pub fn sendfile(
return total_written;
}
pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
IsDir,
OutOfMemory,
NoSpaceLeft,
Unseekable,
PermissionDenied,
FileBusy,
} || PReadError || PWriteError || UnexpectedError;
/// Transfer data between file descriptors at specified offsets.
/// Returns the number of bytes written, which can less than requested.
///
/// The `copy_file_range` call copies `len` bytes from one file descriptor to another. When possible,
/// this is done within the operating system kernel, which can provide better performance
/// characteristics than transferring data from kernel to user space and back, such as with
/// `pread` and `pwrite` calls.
///
/// `fd_in` must be a file descriptor opened for reading, and `fd_out` must be a file descriptor
/// opened for writing. They may be any kind of file descriptor; however, if `fd_in` is not a regular
/// file system file, it may cause this function to fall back to calling `pread` and `pwrite`, in which case
/// atomicity guarantees no longer apply.
///
/// If `fd_in` and `fd_out` are the same, source and target ranges must not overlap.
/// The file descriptor seek positions are ignored and not updated.
/// When `off_in` is past the end of the input file, it successfully reads 0 bytes.
///
/// `flags` has different meanings per operating system; refer to the respective man pages.
///
/// These systems support in-kernel data copying:
/// * Linux 4.5 (cross-filesystem 5.3)
///
/// Other systems fall back to calling `pread` / `pwrite`.
///
/// Maximum offsets on Linux are `math.maxInt(i64)`.
pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize, flags: u32) CopyFileRangeError!usize {
const use_c = std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 }).ok;
// TODO support for other systems than linux
const try_syscall = comptime std.Target.current.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) != false;
if (use_c or try_syscall) {
const sys = if (use_c) std.c else linux;
var off_in_copy = @bitCast(i64, off_in);
var off_out_copy = @bitCast(i64, off_out);
const rc = sys.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, flags);
// TODO avoid wasting a syscall every time if kernel is too old and returns ENOSYS https://github.com/ziglang/zig/issues/1018
switch (sys.getErrno(rc)) {
0 => return @intCast(usize, rc),
EBADF => unreachable,
EFBIG => return error.FileTooBig,
EIO => return error.InputOutput,
EISDIR => return error.IsDir,
ENOMEM => return error.OutOfMemory,
ENOSPC => return error.NoSpaceLeft,
EOVERFLOW => return error.Unseekable,
EPERM => return error.PermissionDenied,
ETXTBSY => return error.FileBusy,
EINVAL => {}, // these may not be regular files, try fallback
EXDEV => {}, // support for cross-filesystem copy added in Linux 5.3, use fallback
ENOSYS => {}, // syscall added in Linux 4.5, use fallback
else => |err| return unexpectedErrno(err),
}
}
var buf: [8 * 4096]u8 = undefined;
const adjusted_count = math.min(buf.len, len);
const amt_read = try pread(fd_in, buf[0..adjusted_count], off_in);
// TODO without @as the line below fails to compile for wasm32-wasi:
// error: integer value 0 cannot be coerced to type 'os.PWriteError!usize'
if (amt_read == 0) return @as(usize, 0);
return pwrite(fd_out, buf[0..amt_read], off_out);
}
pub const PollError = error{
/// The kernel had no space to allocate file descriptor tables.
SystemResources,

View File

@ -1210,6 +1210,18 @@ pub fn signalfd4(fd: fd_t, mask: *const sigset_t, flags: i32) usize {
);
}
pub fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, len: usize, flags: u32) usize {
return syscall6(
.copy_file_range,
@bitCast(usize, @as(isize, fd_in)),
@ptrToInt(off_in),
@bitCast(usize, @as(isize, fd_out)),
@ptrToInt(off_out),
len,
flags,
);
}
test "" {
if (builtin.os.tag == .linux) {
_ = @import("linux/test.zig");

View File

@ -100,6 +100,14 @@ pub const Target = struct {
pub fn includesVersion(self: Range, ver: WindowsVersion) bool {
return @enumToInt(ver) >= @enumToInt(self.min) and @enumToInt(ver) <= @enumToInt(self.max);
}
/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: Range, ver: WindowsVersion) ?bool {
if (@enumToInt(self.min) >= @enumToInt(ver)) return true;
if (@enumToInt(self.max) < @enumToInt(ver)) return false;
return null;
}
};
/// This function is defined to serialize a Zig source code representation of this
@ -135,6 +143,12 @@ pub const Target = struct {
pub fn includesVersion(self: LinuxVersionRange, ver: Version) bool {
return self.range.includesVersion(ver);
}
/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: LinuxVersionRange, ver: Version) ?bool {
return self.range.isAtLeast(ver);
}
};
/// The version ranges here represent the minimum OS version to be supported
@ -158,6 +172,8 @@ pub const Target = struct {
///
/// Binaries built with a given maximum version will continue to function on newer operating system
/// versions. However, such a binary may not take full advantage of the newer operating system APIs.
///
/// See `Os.isAtLeast`.
pub const VersionRange = union {
none: void,
semver: Version.Range,
@ -273,6 +289,18 @@ pub const Target = struct {
};
}
/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: Os, comptime tag: Tag, version: anytype) ?bool {
if (self.tag != tag) return false;
return switch (tag) {
.linux => self.version_range.linux.isAtLeast(version),
.windows => self.version_range.windows.isAtLeast(version),
else => self.version_range.semver.isAtLeast(version),
};
}
pub fn requiresLibC(os: Os) bool {
return switch (os.tag) {
.freebsd,