ocaml/utils/binutils.ml

690 lines
20 KiB
OCaml

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Nicolas Ojeda Bar, LexiFi *)
(* *)
(* Copyright 2020 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
let char_to_hex c =
Printf.sprintf "0x%02x" (Char.code c)
let int_to_hex n =
Printf.sprintf "0x%x" n
type error =
| Truncated_file
| Unrecognized of string
| Unsupported of string * int64
| Out_of_range of string
let error_to_string = function
| Truncated_file ->
"Truncated file"
| Unrecognized magic ->
Printf.sprintf "Unrecognized magic: %s"
(String.concat " "
(List.init (String.length magic)
(fun i -> char_to_hex magic.[i])))
| Unsupported (s, n) ->
Printf.sprintf "Unsupported: %s: 0x%Lx" s n
| Out_of_range s ->
Printf.sprintf "Out of range constant: %s" s
exception Error of error
let name_at ?max_len buf start =
if start < 0 || start > Bytes.length buf then
raise (Error (Out_of_range (int_to_hex start)));
let max_pos =
match max_len with
| None -> Bytes.length buf
| Some n -> min (Bytes.length buf) (start + n)
in
let rec loop pos =
if pos >= max_pos || Bytes.get buf pos = '\000'
then
Bytes.sub_string buf start (pos - start)
else
loop (succ pos)
in
loop start
let array_find_map f a =
let rec loop i =
if i >= Array.length a then None
else begin
match f a.(i) with
| None -> loop (succ i)
| Some _ as r -> r
end
in
loop 0
let array_find f a =
array_find_map (fun x -> if f x then Some x else None) a
let really_input_bytes ic len =
let buf = Bytes.create len in
really_input ic buf 0 len;
buf
let uint64_of_uint32 n =
Int64.(logand (of_int32 n) 0xffffffffL)
type endianness =
| LE
| BE
type bitness =
| B32
| B64
type decoder =
{
ic: in_channel;
endianness: endianness;
bitness: bitness;
}
let word_size = function
| {bitness = B64; _} -> 8
| {bitness = B32; _} -> 4
let get_uint16 {endianness; _} buf idx =
match endianness with
| LE -> Bytes.get_uint16_le buf idx
| BE -> Bytes.get_uint16_be buf idx
let get_uint32 {endianness; _} buf idx =
match endianness with
| LE -> Bytes.get_int32_le buf idx
| BE -> Bytes.get_int32_be buf idx
let get_uint s d buf idx =
let n = get_uint32 d buf idx in
match Int32.unsigned_to_int n with
| None -> raise (Error (Unsupported (s, Int64.of_int32 n)))
| Some n -> n
let get_uint64 {endianness; _} buf idx =
match endianness with
| LE -> Bytes.get_int64_le buf idx
| BE -> Bytes.get_int64_be buf idx
let get_word d buf idx =
match d.bitness with
| B64 -> get_uint64 d buf idx
| B32 -> uint64_of_uint32 (get_uint32 d buf idx)
let uint64_to_int s n =
match Int64.unsigned_to_int n with
| None -> raise (Error (Unsupported (s, n)))
| Some n -> n
let load_bytes d off len =
LargeFile.seek_in d.ic off;
really_input_bytes d.ic len
type t =
{
defines_symbol: string -> bool;
symbol_offset: string -> int64 option;
}
module ELF = struct
(* Reference: http://man7.org/linux/man-pages/man5/elf.5.html *)
let header_size d =
40 + 3 * word_size d
type header =
{
e_shoff: int64;
e_shentsize: int;
e_shnum: int;
e_shstrndx: int;
}
let read_header d =
let buf = load_bytes d 0L (header_size d) in
let word_size = word_size d in
let e_shnum = get_uint16 d buf (36 + 3 * word_size) in
let e_shentsize = get_uint16 d buf (34 + 3 * word_size) in
let e_shoff = get_word d buf (24 + 2 * word_size) in
let e_shstrndx = get_uint16 d buf (38 + 3 * word_size) in
{e_shnum; e_shentsize; e_shoff; e_shstrndx}
type sh_type =
| SHT_STRTAB
| SHT_DYNSYM
| SHT_OTHER
type section =
{
sh_name: int;
sh_type: sh_type;
sh_addr: int64;
sh_offset: int64;
sh_size: int;
sh_entsize: int;
sh_name_str: string;
}
let load_section_body d {sh_offset; sh_size; _} =
load_bytes d sh_offset sh_size
let read_sections d {e_shoff; e_shnum; e_shentsize; e_shstrndx; _} =
let buf = load_bytes d e_shoff (e_shnum * e_shentsize) in
let word_size = word_size d in
let mk i =
let base = i * e_shentsize in
let sh_name = get_uint "sh_name" d buf (base + 0) in
let sh_type =
match get_uint32 d buf (base + 4) with
| 3l -> SHT_STRTAB
| 11l -> SHT_DYNSYM
| _ -> SHT_OTHER
in
let sh_addr = get_word d buf (base + 8 + word_size) in
let sh_offset = get_word d buf (base + 8 + 2 * word_size) in
let sh_size =
uint64_to_int "sh_size"
(get_word d buf (base + 8 + 3 * word_size))
in
let sh_entsize =
uint64_to_int "sh_entsize"
(get_word d buf (base + 16 + 5 * word_size))
in
{sh_name; sh_type; sh_addr; sh_offset;
sh_size; sh_entsize; sh_name_str = ""}
in
let sections = Array.init e_shnum mk in
if e_shstrndx = 0 then
(* no string table *)
sections
else
let shstrtbl = load_section_body d sections.(e_shstrndx) in
let set_name sec =
let sh_name_str = name_at shstrtbl sec.sh_name in
{sec with sh_name_str}
in
Array.map set_name sections
let read_sections d h =
let {e_shoff; e_shentsize; e_shnum; e_shstrndx} = h in
if e_shoff = 0L then
[||]
else begin
let buf = lazy (load_bytes d e_shoff e_shentsize) in
let word_size = word_size d in
let e_shnum =
if e_shnum = 0 then
(* The real e_shnum is the sh_size of the initial section.*)
uint64_to_int "e_shnum"
(get_word d (Lazy.force buf) (8 + 3 * word_size))
else
e_shnum
in
let e_shstrndx =
if e_shstrndx = 0xffff then
(* The real e_shstrndx is the sh_link of the initial section. *)
get_uint "e_shstrndx" d (Lazy.force buf) (8 + 4 * word_size)
else
e_shstrndx
in
read_sections d {h with e_shnum; e_shstrndx}
end
type symbol =
{
st_name: string;
st_value: int64;
st_shndx: int;
}
let find_section sections type_ sectname =
let f {sh_type; sh_name_str; _} =
sh_type = type_ && sh_name_str = sectname
in
array_find f sections
let read_symbols d sections =
match find_section sections SHT_DYNSYM ".dynsym" with
| None -> [| |]
| Some {sh_entsize = 0; _} ->
raise (Error (Out_of_range "sh_entsize=0"))
| Some dynsym ->
begin match find_section sections SHT_STRTAB ".dynstr" with
| None -> [| |]
| Some dynstr ->
let strtbl = load_section_body d dynstr in
let buf = load_section_body d dynsym in
let word_size = word_size d in
let mk i =
let base = i * dynsym.sh_entsize in
let st_name = name_at strtbl (get_uint "st_name" d buf base) in
let st_value = get_word d buf (base + word_size (* ! *)) in
let st_shndx =
let off = match d.bitness with B64 -> 6 | B32 -> 14 in
get_uint16 d buf (base + off)
in
{st_name; st_value; st_shndx}
in
Array.init (dynsym.sh_size / dynsym.sh_entsize) mk
end
let find_symbol symbols symname =
let f = function
| {st_shndx = 0; _} -> false
| {st_name; _} -> st_name = symname
in
array_find f symbols
let symbol_offset sections symbols symname =
match find_symbol symbols symname with
| None ->
None
| Some {st_shndx; st_value; _} ->
(* st_value in executables and shared objects holds a virtual (absolute)
address. See https://refspecs.linuxfoundation.org/elf/elf.pdf, page
1-21, "Symbol Values". *)
Some Int64.(add sections.(st_shndx).sh_offset
(sub st_value sections.(st_shndx).sh_addr))
let defines_symbol symbols symname =
Option.is_some (find_symbol symbols symname)
let read ic =
seek_in ic 0;
let identification = really_input_bytes ic 16 in
let bitness =
match Bytes.get identification 4 with
| '\x01' -> B32
| '\x02' -> B64
| _ as c ->
raise (Error (Unsupported ("ELFCLASS", Int64.of_int (Char.code c))))
in
let endianness =
match Bytes.get identification 5 with
| '\x01' -> LE
| '\x02' -> BE
| _ as c ->
raise (Error (Unsupported ("ELFDATA", Int64.of_int (Char.code c))))
in
let d = {ic; bitness; endianness} in
let header = read_header d in
let sections = read_sections d header in
let symbols = read_symbols d sections in
let symbol_offset = symbol_offset sections symbols in
let defines_symbol = defines_symbol symbols in
{symbol_offset; defines_symbol}
end
module Mach_O = struct
(* Reference:
https://github.com/aidansteele/osx-abi-macho-file-format-reference *)
let size_int = 4
let header_size {bitness; _} =
(match bitness with B64 -> 6 | B32 -> 5) * 4 + 2 * size_int
type header =
{
ncmds: int;
sizeofcmds: int;
}
let read_header d =
let buf = load_bytes d 0L (header_size d) in
let ncmds = get_uint "ncmds" d buf (8 + 2 * size_int) in
let sizeofcmds = get_uint "sizeofcmds" d buf (12 + 2 * size_int) in
{ncmds; sizeofcmds}
type lc_symtab =
{
symoff: int32;
nsyms: int;
stroff: int32;
strsize: int;
}
type load_command =
| LC_SYMTAB of lc_symtab
| OTHER
let read_load_commands d {ncmds; sizeofcmds} =
let buf = load_bytes d (Int64.of_int (header_size d)) sizeofcmds in
let base = ref 0 in
let mk _ =
let cmd = get_uint32 d buf (!base + 0) in
let cmdsize = get_uint "cmdsize" d buf (!base + 4) in
let lc =
match cmd with
| 0x2l ->
let symoff = get_uint32 d buf (!base + 8) in
let nsyms = get_uint "nsyms" d buf (!base + 12) in
let stroff = get_uint32 d buf (!base + 16) in
let strsize = get_uint "strsize" d buf (!base + 20) in
LC_SYMTAB {symoff; nsyms; stroff; strsize}
| _ ->
OTHER
in
base := !base + cmdsize;
lc
in
Array.init ncmds mk
type symbol =
{
n_name: string;
n_type: int;
n_value: int64;
}
let size_nlist d =
8 + word_size d
let read_symbols d load_commands =
match
(* Can it happen there be more than one LC_SYMTAB? *)
array_find_map (function
| LC_SYMTAB symtab -> Some symtab
| _ -> None
) load_commands
with
| None -> [| |]
| Some {symoff; nsyms; stroff; strsize} ->
let strtbl = load_bytes d (uint64_of_uint32 stroff) strsize in
let buf =
load_bytes d (uint64_of_uint32 symoff) (nsyms * size_nlist d) in
let size_nlist = size_nlist d in
let mk i =
let base = i * size_nlist in
let n_name = name_at strtbl (get_uint "n_name" d buf (base + 0)) in
let n_type = Bytes.get_uint8 buf (base + 4) in
let n_value = get_word d buf (base + 8) in
{n_name; n_type; n_value}
in
Array.init nsyms mk
let fix symname =
"_" ^ symname
let find_symbol symbols symname =
let f {n_name; n_type; _} =
n_type land 0b1111 = 0b1111 (* N_EXT + N_SECT *) &&
n_name = symname
in
array_find f symbols
let symbol_offset symbols symname =
let symname = fix symname in
match find_symbol symbols symname with
| None -> None
| Some {n_value; _} -> Some n_value
let defines_symbol symbols symname =
let symname = fix symname in
Option.is_some (find_symbol symbols symname)
type magic =
| MH_MAGIC
| MH_CIGAM
| MH_MAGIC_64
| MH_CIGAM_64
let read ic =
seek_in ic 0;
let magic = really_input_bytes ic 4 in
let magic =
match Bytes.get_int32_ne magic 0 with
| 0xFEEDFACEl -> MH_MAGIC
| 0xCEFAEDFEl -> MH_CIGAM
| 0xFEEDFACFl -> MH_MAGIC_64
| 0xCFFAEDFEl -> MH_CIGAM_64
| _ -> (* should not happen *)
raise (Error (Unrecognized (Bytes.to_string magic)))
in
let bitness =
match magic with
| MH_MAGIC | MH_CIGAM -> B32
| MH_MAGIC_64 | MH_CIGAM_64 -> B64
in
let endianness =
match magic, Sys.big_endian with
| (MH_MAGIC | MH_MAGIC_64), false
| (MH_CIGAM | MH_CIGAM_64), true -> LE
| (MH_MAGIC | MH_MAGIC_64), true
| (MH_CIGAM | MH_CIGAM_64), false -> BE
in
let d = {ic; endianness; bitness} in
let header = read_header d in
let load_commands = read_load_commands d header in
let symbols = read_symbols d load_commands in
let symbol_offset = symbol_offset symbols in
let defines_symbol = defines_symbol symbols in
{symbol_offset; defines_symbol}
end
module FlexDLL = struct
(* Reference:
https://docs.microsoft.com/en-us/windows/win32/debug/pe-format *)
let header_size = 24
type header =
{
e_lfanew: int64;
number_of_sections: int;
size_of_optional_header: int;
characteristics: int;
}
let read_header e_lfanew d buf =
let number_of_sections = get_uint16 d buf 6 in
let size_of_optional_header = get_uint16 d buf 20 in
let characteristics = get_uint16 d buf 22 in
{e_lfanew; number_of_sections; size_of_optional_header; characteristics}
type optional_header_magic =
| PE32
| PE32PLUS
type optional_header =
{
magic: optional_header_magic;
image_base: int64;
}
let read_optional_header d {e_lfanew; size_of_optional_header; _} =
if size_of_optional_header = 0 then
raise (Error (Unrecognized "SizeOfOptionalHeader=0"));
let buf =
load_bytes d Int64.(add e_lfanew (of_int header_size))
size_of_optional_header
in
let magic =
match get_uint16 d buf 0 with
| 0x10b -> PE32
| 0x20b -> PE32PLUS
| n ->
raise (Error (Unsupported ("optional_header_magic", Int64.of_int n)))
in
let image_base =
match magic with
| PE32 -> uint64_of_uint32 (get_uint32 d buf 28)
| PE32PLUS -> get_uint64 d buf 24
in
{magic; image_base}
type section =
{
name: string;
virtual_size: int;
virtual_address: int64;
size_of_raw_data: int;
pointer_to_raw_data: int64;
}
let section_header_size = 40
let read_sections d
{e_lfanew; number_of_sections; size_of_optional_header; _} =
let buf =
load_bytes d
Int64.(add e_lfanew (of_int (header_size + size_of_optional_header)))
(number_of_sections * section_header_size)
in
let mk i =
let base = i * section_header_size in
let name = name_at ~max_len:8 buf (base + 0) in
let virtual_size = get_uint "virtual_size" d buf (base + 8) in
let virtual_address = uint64_of_uint32 (get_uint32 d buf (base + 12)) in
let size_of_raw_data = get_uint "size_of_raw_data" d buf (base + 16) in
let pointer_to_raw_data =
uint64_of_uint32 (get_uint32 d buf (base + 20)) in
{name; virtual_size; virtual_address;
size_of_raw_data; pointer_to_raw_data}
in
Array.init number_of_sections mk
type symbol =
{
name: string;
address: int64;
}
let load_section_body d {size_of_raw_data; pointer_to_raw_data; _} =
load_bytes d pointer_to_raw_data size_of_raw_data
let find_section sections sectname =
array_find (function ({name; _} : section) -> name = sectname) sections
(* We extract the list of exported symbols as encoded by flexlink, see
https://github.com/alainfrisch/flexdll/blob/bd636def70d941674275b2f4b6c13a34ba23f9c9/reloc.ml
#L500-L525 *)
let read_symbols d {image_base; _} sections =
match find_section sections ".exptbl" with
| None -> [| |]
| Some ({virtual_address; _} as exptbl) ->
let buf = load_section_body d exptbl in
let numexports =
uint64_to_int "numexports" (get_word d buf 0)
in
let word_size = word_size d in
let mk i =
let address = get_word d buf (word_size * (2 * i + 1)) in
let nameoff = get_word d buf (word_size * (2 * i + 2)) in
let name =
let off = Int64.(sub nameoff (add virtual_address image_base)) in
name_at buf (uint64_to_int "exptbl name offset" off)
in
{name; address}
in
Array.init numexports mk
let symbol_offset {image_base; _} sections symbols =
match find_section sections ".data" with
| None -> Fun.const None
| Some {virtual_address; pointer_to_raw_data; _} ->
fun symname ->
begin match
array_find (function {name; _} -> name = symname) symbols
with
| None -> None
| Some {address; _} ->
Some Int64.(add pointer_to_raw_data
(sub address (add virtual_address image_base)))
end
let defines_symbol symbols symname =
Array.exists (fun {name; _} -> name = symname) symbols
type machine_type =
| IMAGE_FILE_MACHINE_ARM
| IMAGE_FILE_MACHINE_ARM64
| IMAGE_FILE_MACHINE_AMD64
| IMAGE_FILE_MACHINE_I386
let read ic =
let e_lfanew =
seek_in ic 0x3c;
let buf = really_input_bytes ic 4 in
uint64_of_uint32 (Bytes.get_int32_le buf 0)
in
LargeFile.seek_in ic e_lfanew;
let buf = really_input_bytes ic header_size in
let magic = Bytes.sub_string buf 0 4 in
if magic <> "PE\000\000" then raise (Error (Unrecognized magic));
let machine =
match Bytes.get_uint16_le buf 4 with
| 0x1c0 -> IMAGE_FILE_MACHINE_ARM
| 0xaa64 -> IMAGE_FILE_MACHINE_ARM64
| 0x8664 -> IMAGE_FILE_MACHINE_AMD64
| 0x14c -> IMAGE_FILE_MACHINE_I386
| n -> raise (Error (Unsupported ("MACHINETYPE", Int64.of_int n)))
in
let bitness =
match machine with
| IMAGE_FILE_MACHINE_AMD64
| IMAGE_FILE_MACHINE_ARM64 -> B64
| IMAGE_FILE_MACHINE_I386
| IMAGE_FILE_MACHINE_ARM -> B32
in
let d = {ic; endianness = LE; bitness} in
let header = read_header e_lfanew d buf in
let opt_header = read_optional_header d header in
let sections = read_sections d header in
let symbols = read_symbols d opt_header sections in
let symbol_offset = symbol_offset opt_header sections symbols in
let defines_symbol = defines_symbol symbols in
{symbol_offset; defines_symbol}
end
let read ic =
seek_in ic 0;
let magic = really_input_string ic 4 in
match magic.[0], magic.[1], magic.[2], magic.[3] with
| '\x7F', 'E', 'L', 'F' ->
ELF.read ic
| '\xFE', '\xED', '\xFA', '\xCE'
| '\xCE', '\xFA', '\xED', '\xFE'
| '\xFE', '\xED', '\xFA', '\xCF'
| '\xCF', '\xFA', '\xED', '\xFE' ->
Mach_O.read ic
| 'M', 'Z', _, _ ->
FlexDLL.read ic
| _ ->
raise (Error (Unrecognized magic))
let with_open_in fn f =
let ic = open_in_bin fn in
Fun.protect ~finally:(fun () -> close_in_noerr ic) (fun () -> f ic)
let read filename =
match with_open_in filename read with
| t -> Ok t
| exception End_of_file ->
Result.Error Truncated_file
| exception Error err ->
Result.Error err
let defines_symbol {defines_symbol; _} symname =
defines_symbol symname
let symbol_offset {symbol_offset; _} symname =
symbol_offset symname