(***********************************************************************) (* *) (* OCaml *) (* *) (* Fabrice Le Fessant, projet Gallium, INRIA Rocquencourt *) (* *) (* Copyright 2014 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0. *) (* *) (***********************************************************************) open X86_ast open X86_proc let bprintf = Printf.bprintf let print_reg b f r = Buffer.add_char b '%'; Buffer.add_string b (f r) let opt_displ b displ = if displ = 0 then () else if displ > 0 then bprintf b "+%d" displ else bprintf b "%d" displ let arg_mem b {arch; typ=_; idx; scale; base; sym; displ} = let string_of_register = match arch with | X86 -> string_of_reg32 | X64 -> string_of_reg64 in begin match sym with | None -> if displ <> 0 || scale = 0 then Buffer.add_string b (string_of_int displ) | Some s -> Buffer.add_string b s; opt_displ b displ end; if scale <> 0 then begin Buffer.add_char b '('; begin match base with | None -> () | Some base -> print_reg b string_of_register base end; if base != None || scale <> 1 then Buffer.add_char b ','; print_reg b string_of_register idx; if scale <> 1 then bprintf b ",%s" (string_of_int scale); Buffer.add_char b ')' end let arg b = function | Sym x -> Buffer.add_char b '$'; Buffer.add_string b x | Imm x -> bprintf b "$%Ld" x | Reg8L x -> print_reg b string_of_reg8l x | Reg8H x -> print_reg b string_of_reg8h x | Reg16 x -> print_reg b string_of_reg16 x | Reg32 x -> print_reg b string_of_reg32 x | Reg64 x -> print_reg b string_of_reg64 x | Regf x -> print_reg b string_of_registerf x | Mem addr -> arg_mem b addr | Mem64_RIP (_, s, displ) -> bprintf b "%s%a(%%rip)" s opt_displ displ let rec cst b = function | ConstLabel _ | Const _ | ConstThis as c -> scst b c | ConstAdd (c1, c2) -> bprintf b "%a + %a" scst c1 scst c2 | ConstSub (c1, c2) -> bprintf b "%a - %a" scst c1 scst c2 and scst b = function | ConstThis -> Buffer.add_string b "." | ConstLabel l -> Buffer.add_string b l | Const n when n <= 0x7FFF_FFFFL && n >= -0x8000_0000L -> Buffer.add_string b (Int64.to_string n) | Const n -> bprintf b "0x%Lx" n | ConstAdd (c1, c2) -> bprintf b "(%a + %a)" scst c1 scst c2 | ConstSub (c1, c2) -> bprintf b "(%a - %a)" scst c1 scst c2 let typeof = function | Mem {typ; _} | Mem64_RIP (typ, _, _) -> typ | Reg8L _ | Reg8H _ -> BYTE | Reg16 _ -> WORD | Reg32 _ -> DWORD | Reg64 _ -> QWORD | Imm _ | Sym _ -> NONE | Regf _ -> assert false let suf arg = match typeof arg with | BYTE -> "b" | WORD -> "w" | DWORD | REAL8 -> "l" | QWORD -> "q" | REAL4 -> "s" | NONE -> "" | OWORD | NEAR | PROC -> assert false let i0 b s = bprintf b "\t%s" s let i1 b s x = bprintf b "\t%s\t%a" s arg x let i1_s b s x = bprintf b "\t%s%s\t%a" s (suf x) arg x let i2 b s x y = bprintf b "\t%s\t%a, %a" s arg x arg y let i2_s b s x y = bprintf b "\t%s%s\t%a, %a" s (suf y) arg x arg y let i2_ss b s x y = bprintf b "\t%s%s%s\t%a, %a" s (suf x) (suf y) arg x arg y let i1_call_jmp b s = function (* this is the encoding of jump labels: don't use * *) | Mem {arch=X86; idx=_; scale=0; base=None; sym=Some _; _} as x -> i1 b s x | Reg32 _ | Reg64 _ | Mem _ | Mem64_RIP _ as x -> bprintf b "\t%s\t*%a" s arg x | Sym x -> bprintf b "\t%s\t%s" s x | _ -> assert false let print_instr b = function | ADD (arg1, arg2) -> i2_s b "add" arg1 arg2 | ADDSD (arg1, arg2) -> i2 b "addsd" arg1 arg2 | AND (arg1, arg2) -> i2_s b "and" arg1 arg2 | ANDPD (arg1, arg2) -> i2 b "andpd" arg1 arg2 | BSWAP arg -> i1 b "bswap" arg | CALL arg -> i1_call_jmp b "call" arg | CDQ -> i0 b "cltd" | CMOV (c, arg1, arg2) -> i2 b ("cmov" ^ string_of_condition c) arg1 arg2 | CMP (arg1, arg2) -> i2_s b "cmp" arg1 arg2 | COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2 | CQO -> i0 b "cqto" | CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2 | CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2 | CVTSI2SD (arg1, arg2) -> i2 b ("cvtsi2sd" ^ suf arg1) arg1 arg2 | CVTSS2SD (arg1, arg2) -> i2 b "cvtss2sd" arg1 arg2 | CVTTSD2SI (arg1, arg2) -> i2_s b "cvttsd2si" arg1 arg2 | DEC arg -> i1_s b "dec" arg | DIVSD (arg1, arg2) -> i2 b "divsd" arg1 arg2 | FABS -> i0 b "fabs" | FADD arg -> i1_s b "fadd" arg | FADDP (arg1, arg2) -> i2 b "faddp" arg1 arg2 | FCHS -> i0 b "fchs" | FCOMP arg -> i1_s b "fcomp" arg | FCOMPP -> i0 b "fcompp" | FCOS -> i0 b "fcos" | FDIV arg -> i1_s b "fdiv" arg | FDIVP (Regf (ST 0), arg2) -> i2 b "fdivrp" (Regf (ST 0)) arg2 (* bug *) | FDIVP (arg1, arg2) -> i2 b "fdivp" arg1 arg2 | FDIVR arg -> i1_s b "fdivr" arg | FDIVRP (Regf (ST 0), arg2) -> i2 b "fdivp" (Regf (ST 0)) arg2 (* bug *) | FDIVRP (arg1, arg2) -> i2 b "fdivrp" arg1 arg2 | FILD arg -> i1_s b "fild" arg | FISTP arg -> i1_s b "fistp" arg | FLD (Mem {typ=REAL4; _} as arg) -> i1 b "flds" arg | FLD arg -> i1 b "fldl" arg | FLD1 -> i0 b "fld1" | FLDCW arg -> i1 b "fldcw" arg | FLDLG2 -> i0 b "fldlg2" | FLDLN2 -> i0 b "fldln2" | FLDZ -> i0 b "fldz" | FMUL arg -> i1_s b "fmul" arg | FMULP (arg1, arg2) -> i2 b "fmulp" arg1 arg2 | FNSTCW arg -> i1 b "fnstcw" arg | FNSTSW arg -> i1 b "fnstsw" arg | FPATAN -> i0 b "fpatan" | FPTAN -> i0 b "fptan" | FSIN -> i0 b "fsin" | FSQRT -> i0 b "fsqrt" | FSTP (Mem {typ=REAL4; _} as arg) -> i1 b "fstps" arg | FSTP arg -> i1 b "fstpl" arg | FSUB arg -> i1_s b "fsub" arg | FSUBP (Regf (ST 0), arg2) -> i2 b "fsubrp" (Regf (ST 0)) arg2 (* bug *) | FSUBP (arg1, arg2) -> i2 b "fsubp" arg1 arg2 | FSUBR arg -> i1_s b "fsubr" arg | FSUBRP (Regf (ST 0), arg2) -> i2 b "fsubp" (Regf (ST 0)) arg2 (* bug *) | FSUBRP (arg1, arg2) -> i2 b "fsubrp" arg1 arg2 | FXCH arg -> i1 b "fxch" arg | FYL2X -> i0 b "fyl2x" | HLT -> i0 b "hlt" | IDIV arg -> i1_s b "idiv" arg | IMUL (arg, None) -> i1_s b "imul" arg | IMUL (arg1, Some arg2) -> i2_s b "imul" arg1 arg2 | INC arg -> i1_s b "inc" arg | J (c, arg) -> i1_call_jmp b ("j" ^ string_of_condition c) arg | JMP arg -> i1_call_jmp b "jmp" arg | LEA (arg1, arg2) -> i2_s b "lea" arg1 arg2 | LEAVE -> i0 b "leave" | MOV ((Imm n as arg1), (Reg64 _ as arg2)) when not (n <= 0x7FFF_FFFFL && n >= -0x8000_0000L) -> i2 b "movabsq" arg1 arg2 | MOV ((Sym _ as arg1), (Reg64 _ as arg2)) when windows -> i2 b "movabsq" arg1 arg2 | MOV (arg1, arg2) -> i2_s b "mov" arg1 arg2 | MOVAPD (arg1, arg2) -> i2 b "movapd" arg1 arg2 | MOVLPD (arg1, arg2) -> i2 b "movlpd" arg1 arg2 | MOVSD (arg1, arg2) -> i2 b "movsd" arg1 arg2 | MOVSS (arg1, arg2) -> i2 b "movss" arg1 arg2 | MOVSX (arg1, arg2) -> i2_ss b "movs" arg1 arg2 | MOVSXD (arg1, arg2) -> i2 b "movslq" arg1 arg2 | MOVZX (arg1, arg2) -> i2_ss b "movz" arg1 arg2 | MULSD (arg1, arg2) -> i2 b "mulsd" arg1 arg2 | NEG arg -> i1 b "neg" arg | NOP -> i0 b "nop" | OR (arg1, arg2) -> i2_s b "or" arg1 arg2 | POP arg -> i1_s b "pop" arg | PUSH arg -> i1_s b "push" arg | RET -> i0 b "ret" | ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2 | SAL (arg1, arg2) -> i2_s b "sal" arg1 arg2 | SAR (arg1, arg2) -> i2_s b "sar" arg1 arg2 | SET (c, arg) -> i1 b ("set" ^ string_of_condition c) arg | SHR (arg1, arg2) -> i2_s b "shr" arg1 arg2 | SQRTSD (arg1, arg2) -> i2 b "sqrtsd" arg1 arg2 | SUB (arg1, arg2) -> i2_s b "sub" arg1 arg2 | SUBSD (arg1, arg2) -> i2 b "subsd" arg1 arg2 | TEST (arg1, arg2) -> i2_s b "test" arg1 arg2 | UCOMISD (arg1, arg2) -> i2 b "ucomisd" arg1 arg2 | XCHG (arg1, arg2) -> i2 b "xchg" arg1 arg2 | XOR (arg1, arg2) -> i2_s b "xor" arg1 arg2 | XORPD (arg1, arg2) -> i2 b "xorpd" arg1 arg2 (* bug: https://sourceware.org/binutils/docs-2.22/as/i386_002dBugs.html#i386_002dBugs The AT&T syntax has a bug for fsub/fdiv/fsubr/fdivr instructions when the source register is %st and the destination is %st(i). In those case, AT&T use fsub (resp. fsubr) in place of fsubr (resp. fsub), and idem for fdiv/fdivr. Concretely, AT&T syntax interpretation of: fsub %st, %st(3) should normally be: %st(3) := %st(3) - %st but it should actually be interpreted as: %st(3) := %st - %st(3) which means the FSUBR instruction should be used. *) let print_line b = function | Ins instr -> print_instr b instr | Align (_data,n) -> (* MacOSX assembler interprets the integer n as a 2^n alignment *) let n = if system = S_macosx then Misc.log2 n else n in bprintf b "\t.align\t%d" n | Byte n -> bprintf b "\t.byte\t%a" cst n | Bytes s -> if system = S_solaris then buf_bytes_directive b ".byte" s else bprintf b "\t.ascii\t\"%s\"" (string_of_string_literal s) | Comment s -> bprintf b "\t\t\t\t/* %s */" s | Global s -> bprintf b "\t.globl\t%s" s; | Long n -> bprintf b "\t.long\t%a" cst n | NewLabel (s, _) -> bprintf b "%s:" s | Quad n -> bprintf b "\t.quad\t%a" cst n | Section ([".data" ], _, _) -> bprintf b "\t.data" | Section ([".text" ], _, _) -> bprintf b "\t.text" | Section (name, flags, args) -> bprintf b "\t.section %s" (String.concat "," name); begin match flags with | None -> () | Some flags -> bprintf b ",%S" flags end; begin match args with | [] -> () | _ -> bprintf b ",%s" (String.concat "," args) end | Space n -> if system = S_solaris then bprintf b "\t.zero\t%d" n else bprintf b "\t.space\t%d" n | Word n -> if system = S_solaris then bprintf b "\t.value\t%a" cst n else bprintf b "\t.word\t%a" cst n (* gas only *) | Cfi_adjust_cfa_offset n -> bprintf b "\t.cfi_adjust_cfa_offset %d" n | Cfi_endproc -> bprintf b "\t.cfi_endproc" | Cfi_startproc -> bprintf b "\t.cfi_startproc" | File (file_num, file_name) -> bprintf b "\t.file\t%d\t\"%s\"" file_num (X86_proc.string_of_string_literal file_name) | Indirect_symbol s -> bprintf b "\t.indirect_symbol %s" s | Loc (file_num, line) -> bprintf b "\t.loc\t%d\t%d" file_num line | Private_extern s -> bprintf b "\t.private_extern %s" s | Set (arg1, arg2) -> bprintf b "\t.set %s, %a" arg1 cst arg2 | Size (s, c) -> bprintf b "\t.size %s,%a" s cst c | Type (s, typ) -> bprintf b "\t.type %s,%s" s typ (* masm only *) | External _ | Mode386 | Model _ -> assert false let generate_asm oc lines = let b = Buffer.create 10000 in List.iter (fun i -> Buffer.clear b; print_line b i; Buffer.add_char b '\n'; Buffer.output_buffer oc b; ) lines