zig/lib/std/os/bits/linux/bpf.zig
Andrew Kelley 4a69b11e74 add license header to all std lib files
add SPDX license identifier
copyright ownership is zig contributors
2020-08-20 16:07:04 -04:00

612 lines
16 KiB
Zig

// SPDX-License-Identifier: MIT
// Copyright (c) 2015-2020 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
usingnamespace std.os;
const std = @import("../../../std.zig");
// instruction classes
/// jmp mode in word width
pub const JMP32 = 0x06;
/// alu mode in double word width
pub const ALU64 = 0x07;
// ld/ldx fields
/// double word (64-bit)
pub const DW = 0x18;
/// exclusive add
pub const XADD = 0xc0;
// alu/jmp fields
/// mov reg to reg
pub const MOV = 0xb0;
/// sign extending arithmetic shift right */
pub const ARSH = 0xc0;
// change endianness of a register
/// flags for endianness conversion:
pub const END = 0xd0;
/// convert to little-endian */
pub const TO_LE = 0x00;
/// convert to big-endian
pub const TO_BE = 0x08;
pub const FROM_LE = TO_LE;
pub const FROM_BE = TO_BE;
// jmp encodings
/// jump != *
pub const JNE = 0x50;
/// LT is unsigned, '<'
pub const JLT = 0xa0;
/// LE is unsigned, '<=' *
pub const JLE = 0xb0;
/// SGT is signed '>', GT in x86
pub const JSGT = 0x60;
/// SGE is signed '>=', GE in x86
pub const JSGE = 0x70;
/// SLT is signed, '<'
pub const JSLT = 0xc0;
/// SLE is signed, '<='
pub const JSLE = 0xd0;
/// function call
pub const CALL = 0x80;
/// function return
pub const EXIT = 0x90;
/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the
/// program in this cgroup yields to sub-cgroup program.
pub const F_ALLOW_OVERRIDE = 0x1;
/// Flag for prog_attach command. If a sub-cgroup installs some bpf program,
/// that cgroup program gets run in addition to the program in this cgroup.
pub const F_ALLOW_MULTI = 0x2;
/// Flag for prog_attach command.
pub const F_REPLACE = 0x4;
/// If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier
/// will perform strict alignment checking as if the kernel has been built with
/// CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2.
pub const F_STRICT_ALIGNMENT = 0x1;
/// If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will
/// allow any alignment whatsoever. On platforms with strict alignment
/// requirements for loads ands stores (such as sparc and mips) the verifier
/// validates that all loads and stores provably follow this requirement. This
/// flag turns that checking and enforcement off.
///
/// It is mostly used for testing when we want to validate the context and
/// memory access aspects of the verifier, but because of an unaligned access
/// the alignment check would trigger before the one we are interested in.
pub const F_ANY_ALIGNMENT = 0x2;
/// BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
/// Verifier does sub-register def/use analysis and identifies instructions
/// whose def only matters for low 32-bit, high 32-bit is never referenced later
/// through implicit zero extension. Therefore verifier notifies JIT back-ends
/// that it is safe to ignore clearing high 32-bit for these instructions. This
/// saves some back-ends a lot of code-gen. However such optimization is not
/// necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
/// hence hasn't used verifier's analysis result. But, we really want to have a
/// way to be able to verify the correctness of the described optimization on
/// x86_64 on which testsuites are frequently exercised.
///
/// So, this flag is introduced. Once it is set, verifier will randomize high
/// 32-bit for those instructions who has been identified as safe to ignore
/// them. Then, if verifier is not doing correct analysis, such randomization
/// will regress tests to expose bugs.
pub const F_TEST_RND_HI32 = 0x4;
/// When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions:
/// insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
/// insn[0].imm: map fd map fd
/// insn[1].imm: 0 offset into value
/// insn[0].off: 0 0
/// insn[1].off: 0 0
/// ldimm64 rewrite: address of map address of map[0]+offset
/// verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
pub const PSEUDO_MAP_FD = 1;
pub const PSEUDO_MAP_VALUE = 2;
/// when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
/// offset to another bpf function
pub const PSEUDO_CALL = 1;
/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing
pub const ANY = 0;
/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist
pub const NOEXIST = 1;
/// flag for BPF_MAP_UPDATE_ELEM command. update existing element
pub const EXIST = 2;
/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update
pub const F_LOCK = 4;
/// flag for BPF_MAP_CREATE command */
pub const BPF_F_NO_PREALLOC = 0x1;
/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in
/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can
/// scale and perform better. Note, the LRU nodes (including free nodes) cannot
/// be moved across different LRU lists.
pub const BPF_F_NO_COMMON_LRU = 0x2;
/// flag for BPF_MAP_CREATE command. Specify numa node during map creation
pub const BPF_F_NUMA_NODE = 0x4;
/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from
/// syscall side
pub const BPF_F_RDONLY = 0x8;
/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from
/// syscall side
pub const BPF_F_WRONLY = 0x10;
/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset
/// instead of pointer
pub const BPF_F_STACK_BUILD_ID = 0x20;
/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This
/// should only be used for testing.
pub const BPF_F_ZERO_SEED = 0x40;
/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program
/// side.
pub const BPF_F_RDONLY_PROG = 0x80;
/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program
/// side.
pub const BPF_F_WRONLY_PROG = 0x100;
/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted
/// socket
pub const BPF_F_CLONE = 0x200;
/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map
pub const BPF_F_MMAPABLE = 0x400;
/// a single BPF instruction
pub const Insn = packed struct {
code: u8,
dst: u4,
src: u4,
off: i16,
imm: i32,
/// r0 - r9 are general purpose 64-bit registers, r10 points to the stack
/// frame
pub const Reg = enum(u4) {
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10
};
const alu = 0x04;
const jmp = 0x05;
const mov = 0xb0;
const k = 0;
const exit_code = 0x90;
// TODO: implement more factory functions for the other instructions
/// load immediate value into a register
pub fn load_imm(dst: Reg, imm: i32) Insn {
return Insn{
.code = alu | mov | k,
.dst = @enumToInt(dst),
.src = 0,
.off = 0,
.imm = imm,
};
}
/// exit BPF program
pub fn exit() Insn {
return Insn{
.code = jmp | exit_code,
.dst = 0,
.src = 0,
.off = 0,
.imm = 0,
};
}
};
pub const Cmd = extern enum(usize) {
map_create,
map_lookup_elem,
map_update_elem,
map_delete_elem,
map_get_next_key,
prog_load,
obj_pin,
obj_get,
prog_attach,
prog_detach,
prog_test_run,
prog_get_next_id,
map_get_next_id,
prog_get_fd_by_id,
map_get_fd_by_id,
obj_get_info_by_fd,
prog_query,
raw_tracepoint_open,
btf_load,
btf_get_fd_by_id,
task_fd_query,
map_lookup_and_delete_elem,
map_freeze,
btf_get_next_id,
map_lookup_batch,
map_lookup_and_delete_batch,
map_update_batch,
map_delete_batch,
link_create,
link_update,
link_get_fd_by_id,
link_get_next_id,
enable_stats,
iter_create,
link_detach,
_,
};
pub const MapType = extern enum(u32) {
unspec,
hash,
array,
prog_array,
perf_event_array,
percpu_hash,
percpu_array,
stack_trace,
cgroup_array,
lru_hash,
lru_percpu_hash,
lpm_trie,
array_of_maps,
hash_of_maps,
devmap,
sockmap,
cpumap,
xskmap,
sockhash,
cgroup_storage,
reuseport_sockarray,
percpu_cgroup_storage,
queue,
stack,
sk_storage,
devmap_hash,
struct_ops,
ringbuf,
_,
};
pub const ProgType = extern enum(u32) {
unspec,
socket_filter,
kprobe,
sched_cls,
sched_act,
tracepoint,
xdp,
perf_event,
cgroup_skb,
cgroup_sock,
lwt_in,
lwt_out,
lwt_xmit,
sock_ops,
sk_skb,
cgroup_device,
sk_msg,
raw_tracepoint,
cgroup_sock_addr,
lwt_seg6local,
lirc_mode2,
sk_reuseport,
flow_dissector,
cgroup_sysctl,
raw_tracepoint_writable,
cgroup_sockopt,
tracing,
struct_ops,
ext,
lsm,
sk_lookup,
};
pub const AttachType = extern enum(u32) {
cgroup_inet_ingress,
cgroup_inet_egress,
cgroup_inet_sock_create,
cgroup_sock_ops,
sk_skb_stream_parser,
sk_skb_stream_verdict,
cgroup_device,
sk_msg_verdict,
cgroup_inet4_bind,
cgroup_inet6_bind,
cgroup_inet4_connect,
cgroup_inet6_connect,
cgroup_inet4_post_bind,
cgroup_inet6_post_bind,
cgroup_udp4_sendmsg,
cgroup_udp6_sendmsg,
lirc_mode2,
flow_dissector,
cgroup_sysctl,
cgroup_udp4_recvmsg,
cgroup_udp6_recvmsg,
cgroup_getsockopt,
cgroup_setsockopt,
trace_raw_tp,
trace_fentry,
trace_fexit,
modify_return,
lsm_mac,
trace_iter,
cgroup_inet4_getpeername,
cgroup_inet6_getpeername,
cgroup_inet4_getsockname,
cgroup_inet6_getsockname,
xdp_devmap,
cgroup_inet_sock_release,
xdp_cpumap,
sk_lookup,
xdp,
_,
};
const obj_name_len = 16;
/// struct used by Cmd.map_create command
pub const MapCreateAttr = extern struct {
/// one of MapType
map_type: u32,
/// size of key in bytes
key_size: u32,
/// size of value in bytes
value_size: u32,
/// max number of entries in a map
max_entries: u32,
/// .map_create related flags
map_flags: u32,
/// fd pointing to the inner map
inner_map_fd: fd_t,
/// numa node (effective only if MapCreateFlags.numa_node is set)
numa_node: u32,
map_name: [obj_name_len]u8,
/// ifindex of netdev to create on
map_ifindex: u32,
/// fd pointing to a BTF type data
btf_fd: fd_t,
/// BTF type_id of the key
btf_key_type_id: u32,
/// BTF type_id of the value
bpf_value_type_id: u32,
/// BTF type_id of a kernel struct stored as the map value
btf_vmlinux_value_type_id: u32,
};
/// struct used by Cmd.map_*_elem commands
pub const MapElemAttr = extern struct {
map_fd: fd_t,
key: u64,
result: extern union {
value: u64,
next_key: u64,
},
flags: u64,
};
/// struct used by Cmd.map_*_batch commands
pub const MapBatchAttr = extern struct {
/// start batch, NULL to start from beginning
in_batch: u64,
/// output: next start batch
out_batch: u64,
keys: u64,
values: u64,
/// input/output:
/// input: # of key/value elements
/// output: # of filled elements
count: u32,
map_fd: fd_t,
elem_flags: u64,
flags: u64,
};
/// struct used by Cmd.prog_load command
pub const ProgLoadAttr = extern struct {
/// one of ProgType
prog_type: u32,
insn_cnt: u32,
insns: u64,
license: u64,
/// verbosity level of verifier
log_level: u32,
/// size of user buffer
log_size: u32,
/// user supplied buffer
log_buf: u64,
/// not used
kern_version: u32,
prog_flags: u32,
prog_name: [obj_name_len]u8,
/// ifindex of netdev to prep for. For some prog types expected attach
/// type must be known at load time to verify attach type specific parts
/// of prog (context accesses, allowed helpers, etc).
prog_ifindex: u32,
expected_attach_type: u32,
/// fd pointing to BTF type data
prog_btf_fd: fd_t,
/// userspace bpf_func_info size
func_info_rec_size: u32,
func_info: u64,
/// number of bpf_func_info records
func_info_cnt: u32,
/// userspace bpf_line_info size
line_info_rec_size: u32,
line_info: u64,
/// number of bpf_line_info records
line_info_cnt: u32,
/// in-kernel BTF type id to attach to
attact_btf_id: u32,
/// 0 to attach to vmlinux
attach_prog_id: u32,
};
/// struct used by Cmd.obj_* commands
pub const ObjAttr = extern struct {
pathname: u64,
bpf_fd: fd_t,
file_flags: u32,
};
/// struct used by Cmd.prog_attach/detach commands
pub const ProgAttachAttr = extern struct {
/// container object to attach to
target_fd: fd_t,
/// eBPF program to attach
attach_bpf_fd: fd_t,
attach_type: u32,
attach_flags: u32,
// TODO: BPF_F_REPLACE flags
/// previously attached eBPF program to replace if .replace is used
replace_bpf_fd: fd_t,
};
/// struct used by Cmd.prog_test_run command
pub const TestAttr = extern struct {
prog_fd: fd_t,
retval: u32,
/// input: len of data_in
data_size_in: u32,
/// input/output: len of data_out. returns ENOSPC if data_out is too small.
data_size_out: u32,
data_in: u64,
data_out: u64,
repeat: u32,
duration: u32,
/// input: len of ctx_in
ctx_size_in: u32,
/// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small.
ctx_size_out: u32,
ctx_in: u64,
ctx_out: u64,
};
/// struct used by Cmd.*_get_*_id commands
pub const GetIdAttr = extern struct {
id: extern union {
start_id: u32,
prog_id: u32,
map_id: u32,
btf_id: u32,
link_id: u32,
},
next_id: u32,
open_flags: u32,
};
/// struct used by Cmd.obj_get_info_by_fd command
pub const InfoAttr = extern struct {
bpf_fd: fd_t,
info_len: u32,
info: u64,
};
/// struct used by Cmd.prog_query command
pub const QueryAttr = extern struct {
/// container object to query
target_fd: fd_t,
attach_type: u32,
query_flags: u32,
attach_flags: u32,
prog_ids: u64,
prog_cnt: u32,
};
/// struct used by Cmd.raw_tracepoint_open command
pub const RawTracepointAttr = extern struct {
name: u64,
prog_fd: fd_t,
};
/// struct used by Cmd.btf_load command
pub const BtfLoadAttr = extern struct {
btf: u64,
btf_log_buf: u64,
btf_size: u32,
btf_log_size: u32,
btf_log_level: u32,
};
pub const TaskFdQueryAttr = extern struct {
/// input: pid
pid: pid_t,
/// input: fd
fd: fd_t,
/// input: flags
flags: u32,
/// input/output: buf len
buf_len: u32,
/// input/output:
/// tp_name for tracepoint
/// symbol for kprobe
/// filename for uprobe
buf: u64,
/// output: prod_id
prog_id: u32,
/// output: BPF_FD_TYPE
fd_type: u32,
/// output: probe_offset
probe_offset: u64,
/// output: probe_addr
probe_addr: u64,
};
/// struct used by Cmd.link_create command
pub const LinkCreateAttr = extern struct {
/// eBPF program to attach
prog_fd: fd_t,
/// object to attach to
target_fd: fd_t,
attach_type: u32,
/// extra flags
flags: u32,
};
/// struct used by Cmd.link_update command
pub const LinkUpdateAttr = extern struct {
link_fd: fd_t,
/// new program to update link with
new_prog_fd: fd_t,
/// extra flags
flags: u32,
/// expected link's program fd, it is specified only if BPF_F_REPLACE is
/// set in flags
old_prog_fd: fd_t,
};
/// struct used by Cmd.enable_stats command
pub const EnableStatsAttr = extern struct {
type: u32,
};
/// struct used by Cmd.iter_create command
pub const IterCreateAttr = extern struct {
link_fd: fd_t,
flags: u32,
};
pub const Attr = extern union {
map_create: MapCreateAttr,
map_elem: MapElemAttr,
map_batch: MapBatchAttr,
prog_load: ProgLoadAttr,
obj: ObjAttr,
prog_attach: ProgAttachAttr,
test_run: TestRunAttr,
get_id: GetIdAttr,
info: InfoAttr,
query: QueryAttr,
raw_tracepoint: RawTracepointAttr,
btf_load: BtfLoadAttr,
task_fd_query: TaskFdQueryAttr,
link_create: LinkCreateAttr,
link_update: LinkUpdateAttr,
enable_stats: EnableStatsAttr,
iter_create: IterCreateAttr,
};
pub fn bpf(cmd: Cmd, attr: *Attr, size: u32) usize {
return syscall3(.bpf, @enumToInt(cmd), @ptrToInt(attr), size);
}