#2 "asmcomp/i386/emit.mlp" (***********************************************************************) (* *) (* OCaml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1996 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0. *) (* *) (***********************************************************************) (* Emission of Intel 386 assembly code *) open Misc open Cmm open Arch open Proc open Reg open Mach open Linearize open Emitaux open X86_ast open X86_proc open X86_dsl let _label s = D.label ~typ:DWORD s let mem_sym typ ?(ofs = 0) sym = mem32 typ ~scale:0 ?base:None ~sym ofs RAX (*ignored since scale=0*) (* CFI directives *) let cfi_startproc () = if Config.asm_cfi_supported then D.cfi_startproc () let cfi_endproc () = if Config.asm_cfi_supported then D.cfi_endproc () let cfi_adjust_cfa_offset n = if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n let emit_debug_info dbg = emit_debug_info_gen dbg D.file D.loc (* Tradeoff between code size and code speed *) let fastcode_flag = ref true let stack_offset = ref 0 (* Layout of the stack frame *) let frame_size () = (* includes return address *) let sz = !stack_offset + 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 4 in Misc.align sz stack_alignment let slot_offset loc cl = match loc with | Incoming n -> assert (n >= 0); frame_size() + n | Local n -> if cl = 0 then !stack_offset + n * 4 else !stack_offset + num_stack_slots.(0) * 4 + n * 8 | Outgoing n -> assert (n >= 0); n (* Record symbols used and defined - at the end generate extern for those used but not defined *) let symbols_defined = ref StringSet.empty let symbols_used = ref StringSet.empty let add_def_symbol s = symbols_defined := StringSet.add s !symbols_defined let add_used_symbol s = symbols_used := StringSet.add s !symbols_used let trap_frame_size = Misc.align 8 stack_alignment (* Prefixing of symbols with "_" *) let symbol_prefix = match system with | S_linux_elf -> "" | S_bsd_elf -> "" | S_solaris -> "" | S_beos -> "" | S_gnu -> "" | _ -> "_" (* win32 & others *) let emit_symbol s = string_of_symbol symbol_prefix s let immsym s = sym (emit_symbol s) let emit_call s = I.call (immsym s) (* Output a label *) let label_prefix = match system with | S_linux_elf -> ".L" | S_bsd_elf -> ".L" | S_solaris -> ".L" | S_beos -> ".L" | S_gnu -> ".L" | _ -> "L" let emit_label lbl = Printf.sprintf "%s%d" label_prefix lbl let emit_data_label lbl = Printf.sprintf "%sd%d" label_prefix lbl let label s = sym (emit_label s) let def_label s = D.label (emit_label s) let emit_Llabel fallthrough lbl = if not fallthrough && !fastcode_flag then D.align 16 ; def_label lbl (* Output a pseudo-register *) let int_reg_name = [| RAX; RBX; RCX; RDX; RSI; RDI; RBP |] let float_reg_name = [| TOS |] let register_name r = if r < 100 then Reg32 (int_reg_name.(r)) else Regf (float_reg_name.(r - 100)) let sym32 ?ofs s = mem_sym ?ofs DWORD (emit_symbol s) let reg = function | { loc = Reg r } -> register_name r | { loc = Stack(Incoming n | Outgoing n) } when n < 0 -> sym32 "caml_extra_params" ~ofs:(n + 64) | { loc = Stack s; typ = Float } as r -> let ofs = slot_offset s (register_class r) in mem32 REAL8 ofs RSP | { loc = Stack s } as r -> let ofs = slot_offset s (register_class r) in mem32 DWORD ofs RSP | { loc = Unknown } -> fatal_error "Emit_i386.reg" (* Output a reference to the lower 8 bits or lower 16 bits of a register *) let reg_low_8_name = Array.map (fun r -> Reg8L r) int_reg_name let reg_low_16_name = Array.map (fun r -> Reg16 r) int_reg_name let reg8 r = match r.loc with | Reg r when r < 4 -> reg_low_8_name.(r) | _ -> fatal_error "Emit_i386.reg8" let reg16 r = match r.loc with | Reg r when r < 7 -> reg_low_16_name.(r) | _ -> fatal_error "Emit_i386.reg16" let reg32 = function | { loc = Reg.Reg r } -> int_reg_name.(r) | _ -> assert false let arg32 i n = reg32 i.arg.(n) (* Output an addressing mode *) let addressing addr typ i n = match addr with | Ibased(s, ofs) -> add_used_symbol s; mem_sym typ (emit_symbol s) ~ofs | Iindexed d -> mem32 typ d (arg32 i n) | Iindexed2 d -> mem32 typ ~base:(arg32 i n) d (arg32 i (n+1)) | Iscaled(2, d) -> mem32 typ ~base:(arg32 i n) d (arg32 i n) | Iscaled(scale, d) -> mem32 typ ~scale d (arg32 i n) | Iindexed2scaled(scale, d) -> mem32 typ ~scale ~base:(arg32 i n) d (arg32 i (n+1)) (* Record live pointers at call points *) let record_frame_label live dbg = let lbl = new_label() in let live_offset = ref [] in Reg.Set.iter (function | {typ = Val; loc = Reg r} -> live_offset := ((r lsl 1) + 1) :: !live_offset | {typ = Val; loc = Stack s} as reg -> live_offset := slot_offset s (register_class reg) :: !live_offset | {typ = Addr} as r -> Misc.fatal_error ("bad GC root " ^ Reg.name r) | _ -> ()) live; frame_descriptors := { fd_lbl = lbl; fd_frame_size = frame_size(); fd_live_offset = !live_offset; fd_debuginfo = dbg } :: !frame_descriptors; lbl let record_frame live dbg = let lbl = record_frame_label live dbg in def_label lbl (* Record calls to the GC -- we've moved them out of the way *) type gc_call = { gc_lbl: label; (* Entry label *) gc_return_lbl: label; (* Where to branch after GC *) gc_frame: label } (* Label of frame descriptor *) let call_gc_sites = ref ([] : gc_call list) let emit_call_gc gc = def_label gc.gc_lbl; emit_call "caml_call_gc"; def_label gc.gc_frame; I.jmp (label gc.gc_return_lbl) (* Record calls to caml_ml_array_bound_error. In -g mode, we maintain one call to caml_ml_array_bound_error per bound check site. Without -g, we can share a single call. *) type bound_error_call = { bd_lbl: label; (* Entry label *) bd_frame: label } (* Label of frame descriptor *) let bound_error_sites = ref ([] : bound_error_call list) let bound_error_call = ref 0 let bound_error_label dbg = if !Clflags.debug then begin let lbl_bound_error = new_label() in let lbl_frame = record_frame_label Reg.Set.empty dbg in bound_error_sites := { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites; lbl_bound_error end else begin if !bound_error_call = 0 then bound_error_call := new_label(); !bound_error_call end let emit_call_bound_error bd = def_label bd.bd_lbl; emit_call "caml_ml_array_bound_error"; def_label bd.bd_frame let emit_call_bound_errors () = List.iter emit_call_bound_error !bound_error_sites; if !bound_error_call > 0 then begin def_label !bound_error_call; emit_call "caml_ml_array_bound_error" end (* Names for instructions *) let instr_for_intop = function | Iadd -> I.add | Isub -> I.sub | Imul -> (fun arg1 arg2 -> I.imul arg1 (Some arg2)) | Iand -> I.and_ | Ior -> I.or_ | Ixor -> I.xor | Ilsl -> I.sal | Ilsr -> I.shr | Iasr -> I.sar | _ -> fatal_error "Emit_i386: instr_for_intop" let unary_instr_for_floatop = function | Inegf -> I.fchs () | Iabsf -> I.fabs () | _ -> fatal_error "Emit_i386: unary_instr_for_floatop" let instr_for_floatop = function | Iaddf -> I.fadd | Isubf -> I.fsub | Imulf -> I.fmul | Idivf -> I.fdiv | Ispecific Isubfrev -> I.fsubr | Ispecific Idivfrev -> I.fdivr | _ -> fatal_error "Emit_i386: instr_for_floatop" let instr_for_floatop_reversed = function | Iaddf -> I.fadd | Isubf -> I.fsubr | Imulf -> I.fmul | Idivf -> I.fdivr | Ispecific Isubfrev -> I.fsub | Ispecific Idivfrev -> I.fdiv | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed" let instr_for_floatop_reversed_pop = function | Iaddf -> I.faddp | Isubf -> I.fsubrp | Imulf -> I.fmulp | Idivf -> I.fdivrp | Ispecific Isubfrev -> I.fsubp | Ispecific Idivfrev -> I.fdivp | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed_pop" let instr_for_floatarithmem = function | Ifloatadd -> I.fadd | Ifloatsub -> I.fsub | Ifloatsubrev -> I.fsubr | Ifloatmul -> I.fmul | Ifloatdiv -> I.fdiv | Ifloatdivrev -> I.fdivr let cond = function | Isigned Ceq -> E | Isigned Cne -> NE | Isigned Cle -> LE | Isigned Cgt -> G | Isigned Clt -> L | Isigned Cge -> GE | Iunsigned Ceq -> E | Iunsigned Cne -> NE | Iunsigned Cle -> BE | Iunsigned Cgt -> A | Iunsigned Clt -> B | Iunsigned Cge -> AE (* Output an = 0 or <> 0 test. *) let output_test_zero arg = match arg.loc with | Reg.Reg _ -> I.test (reg arg) (reg arg) | _ -> I.cmp (int 0) (reg arg) (* Deallocate the stack frame before a return or tail call *) let output_epilogue f = let n = frame_size() - 4 in if n > 0 then begin I.add (int n) esp; cfi_adjust_cfa_offset (-n); f (); (* reset CFA back cause function body may continue *) cfi_adjust_cfa_offset n end else f () (* Determine if the given register is the top of the floating-point stack *) let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false (* Emit the code for a floating-point comparison *) let emit_float_test cmp neg arg lbl = let actual_cmp = match (is_tos arg.(0), is_tos arg.(1)) with | (true, true) -> (* both args on top of FP stack *) I.fcompp (); cmp | (true, false) -> (* first arg on top of FP stack *) I.fcomp (reg arg.(1)); cmp | (false, true) -> (* second arg on top of FP stack *) I.fcomp (reg arg.(0)); Cmm.swap_comparison cmp | (false, false) -> I.fld (reg arg.(0)); I.fcomp (reg arg.(1)); cmp in I.fnstsw ax; match actual_cmp with | Ceq -> if neg then begin I.and_ (int 68) ah; I.xor (int 64) ah; I.jne lbl end else begin I.and_ (int 69) ah; I.cmp (int 64) ah; I.je lbl end | Cne -> if neg then begin I.and_ (int 69) ah; I.cmp (int 64) ah; I.je lbl end else begin I.and_ (int 68) ah; I.xor (int 64) ah; I.jne lbl end | Cle -> I.and_ (int 69) ah; I.dec ah; I.cmp (int 64) ah; if neg then I.jae lbl else I.jb lbl | Cge -> I.and_ (int 5) ah; if neg then I.jne lbl else I.je lbl | Clt -> I.and_ (int 69) ah; I.cmp (int 1) ah; if neg then I.jne lbl else I.je lbl | Cgt -> I.and_ (int 69) ah; if neg then I.jne lbl else I.je lbl (* Emit a Ifloatspecial instruction *) let emit_floatspecial = function | "atan" -> I.fld1 (); I.fpatan () | "atan2" -> I.fpatan () | "cos" -> I.fcos () | "log" -> I.fldln2 (); I.fxch st1; I.fyl2x () | "log10" -> I.fldlg2 (); I.fxch st1; I.fyl2x () | "sin" -> I.fsin () | "sqrt" -> I.fsqrt () | "tan" -> I.fptan (); I.fstp st0 | _ -> assert false (* Floating-point constants *) let float_constants = ref ([] : (int64 * int) list) let add_float_constant cst = try List.assoc cst !float_constants with Not_found -> let lbl = new_label() in float_constants := (cst, lbl) :: !float_constants; lbl let emit_float64_split_directive x = let lo = Int64.logand x 0xFFFF_FFFFL and hi = Int64.shift_right_logical x 32 in D.long (Const (if Arch.big_endian then hi else lo)); D.long (Const (if Arch.big_endian then lo else hi)) let emit_float_constant cst lbl = _label (emit_label lbl); emit_float64_split_directive cst let emit_global_label s = let lbl = Compilenv.make_symbol (Some s) in add_def_symbol lbl; let lbl = emit_symbol lbl in D.global lbl; _label lbl (* Output the assembly code for an instruction *) (* Name of current function *) let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 (* Record references to external C functions (for MacOSX) *) let external_symbols_direct = ref StringSet.empty let external_symbols_indirect = ref StringSet.empty let emit_instr fallthrough i = emit_debug_info i.dbg; match i.desc with | Lend -> () | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in if src.loc <> dst.loc then begin if src.typ = Float then if is_tos src then I.fstp (reg dst) else if is_tos dst then I.fld (reg src) else begin I.fld (reg src); I.fstp (reg dst) end else I.mov (reg src) (reg dst) end | Lop(Iconst_int n | Iconst_blockheader n) -> if n = 0n then begin match i.res.(0).loc with | Reg _ -> I.xor (reg i.res.(0)) (reg i.res.(0)) | _ -> I.mov (int 0) (reg i.res.(0)) end else I.mov (nat n) (reg i.res.(0)) | Lop(Iconst_float f) -> begin match f with | 0x0000_0000_0000_0000L -> (* +0.0 *) I.fldz () | 0x8000_0000_0000_0000L -> (* -0.0 *) I.fldz (); I.fchs () | 0x3FF0_0000_0000_0000L -> (* 1.0 *) I.fld1 () | 0xBFF0_0000_0000_0000L -> (* -1.0 *) I.fld1 (); I.fchs () | _ -> let lbl = add_float_constant f in I.fld (mem_sym REAL8 (emit_label lbl)) end | Lop(Iconst_symbol s) -> add_used_symbol s; I.mov (immsym s) (reg i.res.(0)) | Lop(Icall_ind) -> I.call (reg i.arg.(0)); record_frame i.live i.dbg | Lop(Icall_imm s) -> add_used_symbol s; emit_call s; record_frame i.live i.dbg | Lop(Itailcall_ind) -> output_epilogue begin fun () -> I.jmp (reg i.arg.(0)) end | Lop(Itailcall_imm s) -> if s = !function_name then I.jmp (label !tailrec_entry_point) else begin output_epilogue begin fun () -> add_used_symbol s; I.jmp (immsym s) end end | Lop(Iextcall(s, alloc)) -> add_used_symbol s; if alloc then begin if system <> S_macosx then I.mov (immsym s) eax else begin external_symbols_indirect := StringSet.add s !external_symbols_indirect; I.mov (mem_sym DWORD (Printf.sprintf "L%s$non_lazy_ptr" (emit_symbol s))) eax end; emit_call "caml_c_call"; record_frame i.live i.dbg end else begin if system <> S_macosx then emit_call s else begin external_symbols_direct := StringSet.add s !external_symbols_direct; I.call (sym (Printf.sprintf "L%s$stub" (emit_symbol s))) end end | Lop(Istackoffset n) -> if n < 0 then I.add (int (-n)) esp else I.sub (int n) esp; cfi_adjust_cfa_offset n; stack_offset := !stack_offset + n | Lop(Iload(chunk, addr)) -> let dest = i.res.(0) in begin match chunk with | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned -> I.mov (addressing addr DWORD i 0) (reg dest) | Byte_unsigned -> I.movzx (addressing addr BYTE i 0) (reg dest) | Byte_signed -> I.movsx (addressing addr BYTE i 0) (reg dest) | Sixteen_unsigned -> I.movzx (addressing addr WORD i 0) (reg dest) | Sixteen_signed -> I.movsx (addressing addr WORD i 0) (reg dest) | Single -> I.fld (addressing addr REAL4 i 0) | Double | Double_u -> I.fld (addressing addr REAL8 i 0) end | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned -> I.mov (reg i.arg.(0)) (addressing addr DWORD i 1) | Byte_unsigned | Byte_signed -> I.mov (reg8 i.arg.(0)) (addressing addr BYTE i 1) | Sixteen_unsigned | Sixteen_signed -> I.mov (reg16 i.arg.(0)) (addressing addr WORD i 1) | Single -> if is_tos i.arg.(0) then I.fstp (addressing addr REAL4 i 1) else begin I.fld (reg i.arg.(0)); I.fstp (addressing addr REAL4 i 1) end | Double | Double_u -> if is_tos i.arg.(0) then I.fstp (addressing addr REAL8 i 1) else begin I.fld (reg i.arg.(0)); I.fstp (addressing addr REAL8 i 1) end end | Lop(Ialloc n) -> if !fastcode_flag then begin let lbl_redo = new_label() in def_label lbl_redo; I.mov (sym32 "caml_young_ptr") eax; I.sub (int n) eax; I.mov eax (sym32 "caml_young_ptr"); I.cmp (sym32 "caml_young_limit") eax; let lbl_call_gc = new_label() in let lbl_frame = record_frame_label i.live Debuginfo.none in I.jb (label lbl_call_gc); I.lea (mem32 NONE 4 RAX) (reg i.res.(0)); call_gc_sites := { gc_lbl = lbl_call_gc; gc_return_lbl = lbl_redo; gc_frame = lbl_frame } :: !call_gc_sites end else begin begin match n with 8 -> emit_call "caml_alloc1" | 12 -> emit_call "caml_alloc2" | 16 -> emit_call "caml_alloc3" | _ -> I.mov (int n) eax; emit_call "caml_allocN" end; record_frame i.live Debuginfo.none; I.lea (mem32 NONE 4 RAX) (reg i.res.(0)) end | Lop(Iintop(Icomp cmp)) -> I.cmp (reg i.arg.(1)) (reg i.arg.(0)); I.set (cond cmp) al; I.movzx al (reg i.res.(0)); | Lop(Iintop_imm(Icomp cmp, n)) -> I.cmp (int n) (reg i.arg.(0)); I.set (cond cmp) al; I.movzx al (reg i.res.(0)) | Lop(Iintop Icheckbound) -> let lbl = bound_error_label i.dbg in I.cmp (reg i.arg.(1)) (reg i.arg.(0)); I.jbe (label lbl) | Lop(Iintop_imm(Icheckbound, n)) -> let lbl = bound_error_label i.dbg in I.cmp (int n) (reg i.arg.(0)); I.jbe (label lbl) | Lop(Iintop(Idiv | Imod)) -> I.cdq (); I.idiv (reg i.arg.(1)) | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *) instr_for_intop op cl (reg i.res.(0)) | Lop(Iintop Imulh) -> I.imul (reg i.arg.(1)) None | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) instr_for_intop op (reg i.arg.(1)) (reg i.res.(0)) | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc -> I.lea (mem32 NONE n (reg32 i.arg.(0))) (reg i.res.(0)) | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) -> I.inc (reg i.res.(0)) | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> I.dec (reg i.res.(0)) | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) instr_for_intop op (int n) (reg i.res.(0)) | Lop(Inegf | Iabsf as floatop) -> if not (is_tos i.arg.(0)) then I.fld (reg i.arg.(0)); unary_instr_for_floatop floatop | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev) as floatop) -> begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with (true, true) -> (* both operands on top of FP stack *) instr_for_floatop_reversed_pop floatop st0 st1 | (true, false) -> (* first operand on stack *) instr_for_floatop floatop (reg i.arg.(1)) | (false, true) -> (* second operand on stack *) instr_for_floatop_reversed floatop (reg i.arg.(0)) | (false, false) -> (* both operands in memory *) I.fld (reg i.arg.(0)); instr_for_floatop floatop (reg i.arg.(1)) end | Lop(Ifloatofint) -> begin match i.arg.(0).loc with | Stack _ -> I.fild (reg i.arg.(0)) | _ -> I.push (reg i.arg.(0)); I.fild (mem32 DWORD 0 RSP); I.add (int 4) esp end | Lop(Iintoffloat) -> if not (is_tos i.arg.(0)) then I.fld (reg i.arg.(0)); stack_offset := !stack_offset - 8; I.sub (int 8) esp; cfi_adjust_cfa_offset 8; I.fnstcw (mem32 NONE 4 RSP); I.mov (mem32 WORD 4 RSP) ax; I.mov (int 12) ah; I.mov ax (mem32 WORD 0 RSP); I.fldcw (mem32 NONE 0 RSP); begin match i.res.(0).loc with | Stack _ -> I.fistp (reg i.res.(0)) | _ -> I.fistp (mem32 DWORD 0 RSP); I.mov (mem32 DWORD 0 RSP) (reg i.res.(0)) end; I.fldcw (mem32 NONE 4 RSP); I.add (int 8) esp; cfi_adjust_cfa_offset (-8); stack_offset := !stack_offset + 8 | Lop(Ispecific(Ilea addr)) -> I.lea (addressing addr DWORD i 0) (reg i.res.(0)) | Lop(Ispecific(Istore_int(n, addr, _))) -> I.mov (nat n) (addressing addr DWORD i 0) | Lop(Ispecific(Istore_symbol(s, addr, _))) -> add_used_symbol s; I.mov (immsym s) (addressing addr DWORD i 0) | Lop(Ispecific(Ioffset_loc(n, addr))) -> I.add (int n) (addressing addr DWORD i 0) | Lop(Ispecific(Ipush)) -> (* Push arguments in reverse order *) for n = Array.length i.arg - 1 downto 0 do let r = i.arg.(n) in match r with {loc = Reg _; typ = Float} -> I.sub (int 8) esp; cfi_adjust_cfa_offset 8; I.fstp (mem32 REAL8 0 RSP); stack_offset := !stack_offset + 8 | {loc = Stack sl; typ = Float} -> let ofs = slot_offset sl 1 in (* Use x87 stack to move from stack to stack, instead of two 32-bit push instructions, which could kill performance on modern CPUs (see #6979). *) I.fld (mem32 REAL8 ofs RSP); I.sub (int 8) esp; cfi_adjust_cfa_offset 8; I.fstp (mem32 REAL8 0 RSP); stack_offset := !stack_offset + 8 | _ -> I.push (reg r); cfi_adjust_cfa_offset 4; stack_offset := !stack_offset + 4 done | Lop(Ispecific(Ipush_int n)) -> I.push (nat n); cfi_adjust_cfa_offset 4; stack_offset := !stack_offset + 4 | Lop(Ispecific(Ipush_symbol s)) -> add_used_symbol s; I.push (immsym s); cfi_adjust_cfa_offset 4; stack_offset := !stack_offset + 4 | Lop(Ispecific(Ipush_load addr)) -> I.push (addressing addr DWORD i 0); cfi_adjust_cfa_offset 4; stack_offset := !stack_offset + 4 | Lop(Ispecific(Ipush_load_float addr)) -> I.push (addressing (offset_addressing addr 4) DWORD i 0); I.push (addressing addr DWORD i 0); cfi_adjust_cfa_offset 8; stack_offset := !stack_offset + 8 | Lop(Ispecific(Ifloatarithmem(double, op, addr))) -> if not (is_tos i.arg.(0)) then I.fld (reg i.arg.(0)); instr_for_floatarithmem op (addressing addr (if double then REAL8 else REAL4) i 1) | Lop(Ispecific(Ifloatspecial s)) -> (* Push args on float stack if necessary *) for k = 0 to Array.length i.arg - 1 do if not (is_tos i.arg.(k)) then I.fld (reg i.arg.(k)) done; (* Fix-up for binary instrs whose args were swapped *) if Array.length i.arg = 2 && is_tos i.arg.(1) then I.fxch st1; emit_floatspecial s | Lreloadretaddr -> () | Lreturn -> output_epilogue begin fun () -> I.ret () end | Llabel lbl -> emit_Llabel fallthrough lbl | Lbranch lbl -> I.jmp (label lbl) | Lcondbranch(tst, lbl) -> let lbl = label lbl in begin match tst with | Itruetest -> output_test_zero i.arg.(0); I.jne lbl; | Ifalsetest -> output_test_zero i.arg.(0); I.je lbl | Iinttest cmp -> I.cmp (reg i.arg.(1)) (reg i.arg.(0)); I.j (cond cmp) lbl | Iinttest_imm((Isigned Ceq | Isigned Cne | Iunsigned Ceq | Iunsigned Cne) as cmp, 0) -> output_test_zero i.arg.(0); I.j (cond cmp) lbl | Iinttest_imm(cmp, n) -> I.cmp (int n) (reg i.arg.(0)); I.j (cond cmp) lbl | Ifloattest(cmp, neg) -> emit_float_test cmp neg i.arg lbl | Ioddtest -> I.test (int 1) (reg i.arg.(0)); I.jne lbl | Ieventest -> I.test (int 1) (reg i.arg.(0)); I.je lbl end | Lcondbranch3(lbl0, lbl1, lbl2) -> I.cmp (int 1) (reg i.arg.(0)); begin match lbl0 with None -> () | Some lbl -> I.jb (label lbl) end; begin match lbl1 with None -> () | Some lbl -> I.je (label lbl) end; begin match lbl2 with None -> () | Some lbl -> I.jg (label lbl) end | Lswitch jumptbl -> let lbl = new_label() in I.jmp (mem32 NONE 0 (reg32 i.arg.(0)) ~scale:4 ~sym:(emit_label lbl)); D.data (); _label (emit_label lbl); for i = 0 to Array.length jumptbl - 1 do D.long (ConstLabel (emit_label jumptbl.(i))) done; D.text () | Lsetuptrap lbl -> I.call (label lbl) | Lpushtrap -> if trap_frame_size > 8 then I.sub (int (trap_frame_size - 8)) esp; I.push (sym32 "caml_exception_pointer"); cfi_adjust_cfa_offset trap_frame_size; I.mov esp (sym32 "caml_exception_pointer"); stack_offset := !stack_offset + trap_frame_size | Lpoptrap -> I.pop (sym32 "caml_exception_pointer"); I.add (int (trap_frame_size - 4)) esp; cfi_adjust_cfa_offset (-trap_frame_size); stack_offset := !stack_offset - trap_frame_size | Lraise k -> begin match !Clflags.debug, k with | true, Lambda.Raise_regular -> emit_call "caml_raise_exn"; record_frame Reg.Set.empty i.dbg | true, Lambda.Raise_reraise -> emit_call "caml_reraise_exn"; record_frame Reg.Set.empty i.dbg | false, _ | true, Lambda.Raise_notrace -> I.mov (sym32 "caml_exception_pointer") esp; I.pop (sym32 "caml_exception_pointer"); if trap_frame_size > 8 then I.add (int (trap_frame_size - 8)) esp; I.ret () end let rec emit_all fallthrough i = match i.desc with | Lend -> () | _ -> emit_instr fallthrough i; emit_all (system = S_win32 || Linearize.has_fallthrough i.desc) i.next (* Emission of external symbol references (for MacOSX) *) let emit_external_symbol_direct s = _label (Printf.sprintf "L%s$stub" (emit_symbol s)); D.indirect_symbol (emit_symbol s); I.hlt (); I.hlt (); I.hlt (); I.hlt () ; I.hlt () let emit_external_symbol_indirect s = _label (Printf.sprintf "L%s$non_lazy_ptr" (emit_symbol s)); D.indirect_symbol (emit_symbol s); D.long (const 0) let emit_external_symbols () = D.section [ "__IMPORT"; "__pointers"] None ["non_lazy_symbol_pointers" ]; StringSet.iter emit_external_symbol_indirect !external_symbols_indirect; external_symbols_indirect := StringSet.empty; D.section [ "__IMPORT"; "__jump_table"] None [ "symbol_stubs"; "self_modifying_code+pure_instructions"; "5" ]; StringSet.iter emit_external_symbol_direct !external_symbols_direct; external_symbols_direct := StringSet.empty; if !Clflags.gprofile then begin _label "Lmcount$stub"; D.indirect_symbol "mcount"; I.hlt (); I.hlt (); I.hlt () ; I.hlt () ; I.hlt () end (* Emission of the profiling prelude *) let call_mcount mcount = I.push eax; I.mov esp ebp; I.push ecx; I.push edx; I.call (sym mcount); I.pop edx; I.pop ecx; I.pop eax let emit_profile () = match system with | S_linux_elf | S_gnu -> call_mcount "mcount" | S_bsd_elf -> call_mcount ".mcount" | S_macosx -> call_mcount "Lmcount$stub" | _ -> () (*unsupported yet*) (* Emission of a function declaration *) let fundecl fundecl = function_name := fundecl.fun_name; fastcode_flag := fundecl.fun_fast; tailrec_entry_point := new_label(); stack_offset := 0; call_gc_sites := []; bound_error_sites := []; bound_error_call := 0; D.text (); add_def_symbol fundecl.fun_name; D.align (if system = S_win32 then 4 else 16); if system = S_macosx && not !Clflags.output_c_object && is_generic_function fundecl.fun_name then (* PR#4690 *) D.private_extern (emit_symbol fundecl.fun_name) else D.global (emit_symbol fundecl.fun_name); D.label (emit_symbol fundecl.fun_name); emit_debug_info fundecl.fun_dbg; cfi_startproc (); if !Clflags.gprofile then emit_profile(); let n = frame_size() - 4 in if n > 0 then begin I.sub (int n) esp; cfi_adjust_cfa_offset n; end; def_label !tailrec_entry_point; emit_all true fundecl.fun_body; List.iter emit_call_gc !call_gc_sites; emit_call_bound_errors (); cfi_endproc (); begin match system with | S_linux_elf | S_bsd_elf | S_gnu -> D.type_ (emit_symbol fundecl.fun_name) "@function"; D.size (emit_symbol fundecl.fun_name) (ConstSub ( ConstThis, ConstLabel (emit_symbol fundecl.fun_name))) | _ -> () end (* Emission of data *) let emit_item = function | Cglobal_symbol s -> D.global (emit_symbol s) | Cdefine_symbol s -> add_def_symbol s; _label (emit_symbol s) | Cdefine_label lbl -> _label (emit_data_label lbl) | Cint8 n -> D.byte (const n) | Cint16 n -> D.word (const n) | Cint32 n -> D.long (const_nat n) | Cint n -> D.long (const_nat n) | Csingle f -> D.long (Const (Int64.of_int32 (Int32.bits_of_float f))) | Cdouble f -> emit_float64_split_directive (Int64.bits_of_float f) | Csymbol_address s -> add_used_symbol s; D.long (ConstLabel (emit_symbol s)) | Clabel_address lbl -> D.long (ConstLabel (emit_data_label lbl)) | Cstring s -> D.bytes s | Cskip n -> if n > 0 then D.space n | Calign n -> D.align n let data l = D.data (); List.iter emit_item l (* Beginning / end of an assembly file *) let begin_assembly() = X86_proc.reset_asm_code (); reset_debug_info(); (* PR#5603 *) float_constants := []; if system = S_win32 then begin D.mode386 (); D.model "FLAT"; D.extrn "_caml_young_ptr" DWORD; D.extrn "_caml_young_limit" DWORD; D.extrn "_caml_exception_pointer" DWORD; D.extrn "_caml_extra_params" DWORD; D.extrn "_caml_call_gc" PROC; D.extrn "_caml_c_call" PROC; D.extrn "_caml_allocN" PROC; D.extrn "_caml_alloc1" PROC; D.extrn "_caml_alloc2" PROC; D.extrn "_caml_alloc3" PROC; D.extrn "_caml_ml_array_bound_error" PROC; D.extrn "_caml_raise_exn" PROC; D.extrn "_caml_reraise_exn" PROC; end; D.data (); emit_global_label "data_begin"; D.text (); emit_global_label "code_begin"; if system = S_macosx then I.nop (); (* PR#4690 *) () let end_assembly() = if !float_constants <> [] then begin D.data (); List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants end; D.text (); if system = S_macosx then I.nop (); (* suppress "ld warning: atom sorting error" *) emit_global_label "code_end"; D.data (); emit_global_label "data_end"; D.long (const 0); emit_global_label "frametable"; emit_frames { efa_label = (fun l -> D.long (ConstLabel (emit_label l))); efa_16 = (fun n -> D.word (const n)); efa_32 = (fun n -> D.long (const_32 n)); efa_word = (fun n -> D.long (const n)); efa_align = D.align; efa_label_rel = (fun lbl ofs -> D.long (ConstAdd ( ConstSub(ConstLabel(emit_label lbl), ConstThis), const_32 ofs))); efa_def_label = (fun l -> _label (emit_label l)); efa_string = (fun s -> D.bytes (s ^ "\000")) }; if system = S_macosx then emit_external_symbols (); if system = S_linux_elf then (* Mark stack as non-executable, PR#4564 *) D.section [".note.GNU-stack"] (Some "") ["%progbits"]; if system = S_win32 then begin D.comment "External functions"; StringSet.iter (fun s -> if not (StringSet.mem s !symbols_defined) then D.extrn (emit_symbol s) PROC) !symbols_used; symbols_used := StringSet.empty; symbols_defined := StringSet.empty; end; let asm = if !Emitaux.create_asm_file then Some ( (if X86_proc.masm then X86_masm.generate_asm else X86_gas.generate_asm) !Emitaux.output_channel ) else None in X86_proc.generate_code asm