(***********************************************************************) (* *) (* Caml Special Light *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1995 Institut National de Recherche en Informatique et *) (* Automatique. Distributed only by permission. *) (* *) (***********************************************************************) (* $Id$ *) (* Emission of Intel 386 assembly code *) open Misc open Cmm open Arch open Proc open Reg open Mach open Linearize open Emitaux (* Tradeoff between code size and code speed *) let fastcode_flag = ref true let stack_offset = ref 0 (* Layout of the stack frame *) let frame_size () = (* includes return address *) !stack_offset + 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 4 let slot_offset loc class = match loc with Incoming n -> frame_size() + n | Local n -> if class = 0 then !stack_offset + n * 4 else !stack_offset + num_stack_slots.(0) * 4 + n * 8 | Outgoing n -> n (* Symbols are prefixed with _, except under Linux with ELF binaries *) let symbol_prefix = match Config.system with "linux_elf" -> "" | _ -> "_" let emit_symbol s = emit_string symbol_prefix; Emitaux.emit_symbol s (* Output a label *) let label_prefix = match Config.system with "linux_elf" -> ".L" | _ -> "L" let emit_label lbl = emit_string label_prefix; emit_int lbl (* Output a .align directive. The numerical argument to .align is log2 of alignment size, except under ELF, where it is the alignment size... *) let emit_align = match Config.system with "linux_elf" -> (fun n -> ` .align {emit_int n}\n`) | _ -> (fun n -> ` .align {emit_int(Misc.log2 n)}\n`) (* Output a pseudo-register *) let emit_reg r = match r.loc with Reg r -> emit_string (register_name r) | Stack s -> let ofs = slot_offset s (register_class r) in `{emit_int ofs}(%esp)` | Unknown -> fatal_error "Emit_i386.emit_reg" (* Same, but after one push in the floating-point register set *) let emit_shift r = match r.loc with Reg r -> emit_string (register_name(r + 1)) | Stack s -> let ofs = slot_offset s (register_class r) in `{emit_int ofs}(%esp)` | Unknown -> fatal_error "Emit_i386.emit_shift" (* Output a reference to the lower 8 bits or lower 16 bits of a register *) let reg_low_byte_name = [| "%al"; "%bl"; "%cl"; "%dl" |] let reg_low_half_name = [| "%ax"; "%bx"; "%cx"; "%dx"; "%si"; "%di"; "%bp" |] let emit_reg8 r = match r.loc with Reg r when r < 4 -> emit_string (reg_low_byte_name.(r)) | _ -> fatal_error "Emit_i386.emit_reg8" let emit_reg16 r = match r.loc with Reg r when r < 7 -> emit_string (reg_low_half_name.(r)) | _ -> fatal_error "Emit_i386.emit_reg16" (* Check if the given register overlaps (same location) with the given array of registers *) let register_overlap reg arr = try for i = 0 to Array.length arr - 1 do if reg.loc = arr.(i).loc then raise Exit done; false with Exit -> true (* Output an addressing mode *) let emit_addressing addr r n = match addr with Ibased(s, d) -> `{emit_symbol s}`; if d <> 0 then ` + {emit_int d}` | Iindexed d -> if d <> 0 then emit_int d; `({emit_reg r.(n)})` | Iindexed2 d -> if d <> 0 then emit_int d; `({emit_reg r.(n)}, {emit_reg r.(n+1)})` | Iscaled(scale, d) -> if d <> 0 then emit_int d; `(, {emit_reg r.(n)}, {emit_int scale})` | Iindexed2scaled(scale, d) -> if d <> 0 then emit_int d; `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})` (* Record live pointers at call points *) type frame_descr = { fd_lbl: int; (* Return address *) fd_frame_size: int; (* Size of stack frame *) fd_live_offset: int list } (* Offsets/regs of live addresses *) let frame_descriptors = ref([] : frame_descr list) let record_frame_label live = let lbl = new_label() in let live_offset = ref [] in Reg.Set.iter (function {typ = Addr; loc = Reg r} -> live_offset := (-1 - r) :: !live_offset | {typ = Addr; loc = Stack s} as reg -> live_offset := slot_offset s (register_class reg) :: !live_offset | _ -> ()) live; frame_descriptors := { fd_lbl = lbl; fd_frame_size = frame_size(); fd_live_offset = !live_offset } :: !frame_descriptors; lbl let record_frame live = let lbl = record_frame_label live in `{emit_label lbl}:` let emit_frame fd = ` .long {emit_label fd.fd_lbl}\n`; ` .word {emit_int fd.fd_frame_size}\n`; ` .word {emit_int (List.length fd.fd_live_offset)}\n`; List.iter (fun n -> ` .word {emit_int n}\n`) fd.fd_live_offset; emit_align 4 (* Names for instructions *) let instr_for_intop = function Iadd -> "addl" | Isub -> "subl" | Imul -> "imull" | Iand -> "andl" | Ior -> "orl" | Ixor -> "xorl" | Ilsl -> "sal" | Ilsr -> "shr" | Iasr -> "sar" | _ -> fatal_error "Emit_i386: instr_for_intop" let name_for_cond_branch = function Isigned Ceq -> "e" | Isigned Cne -> "ne" | Isigned Cle -> "le" | Isigned Cgt -> "g" | Isigned Clt -> "l" | Isigned Cge -> "ge" | Iunsigned Ceq -> "e" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "be" | Iunsigned Cgt -> "a" | Iunsigned Clt -> "b" | Iunsigned Cge -> "ae" (* Output an = 0 or <> 0 test. *) let output_test_zero arg = match arg.loc with Reg r -> ` testl {emit_reg arg}, {emit_reg arg}\n` | _ -> ` cmpl $0, {emit_reg arg}\n` (* Deallocate the stack frame before a return or tail call *) let output_epilogue () = match frame_size() - 4 with 0 -> () | 4 -> ` popl %esi\n` (* Faster than add, and %esi is dead here *) | n -> ` addl ${emit_int n}, %esp\n` (* Output the assembly code for an instruction *) (* Name of current function *) let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 (* Label of trap for out-of-range accesses *) let range_check_trap = ref 0 let float_constants = ref ([] : (int * string) list) let emit_instr i = match i.desc with Lend -> () | Lop(Imove | Ispill | Ireload) -> if i.arg.(0).loc <> i.res.(0).loc then begin match i.arg.(0).typ with Int | Addr -> ` movl {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Float -> if i.arg.(0).loc = Reg 100 then ` fstl {emit_reg i.res.(0)}\n` else begin ` fldl {emit_reg i.arg.(0)}\n`; ` fstpl {emit_shift i.res.(0)}\n` end end | Lop(Iconst_int 0) -> begin match i.res.(0).loc with Reg n -> ` xorl {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` | _ -> ` movl $0, {emit_reg i.res.(0)}\n` end | Lop(Iconst_int n) -> ` movl ${emit_int n}, {emit_reg i.res.(0)}\n` | Lop(Iconst_float f) -> if float_of_string f = 0.0 then ` fldz\n` else begin let lbl = new_label() in float_constants := (lbl, f) :: !float_constants; ` fldl {emit_label lbl}\n` end; ` fstpl {emit_shift i.res.(0)}\n` | Lop(Iconst_symbol s) -> ` movl ${emit_symbol s}, {emit_reg i.res.(0)}\n` | Lop(Icall_ind) -> ` call *{emit_reg i.arg.(0)}\n`; record_frame i.live | Lop(Icall_imm s) -> ` call {emit_symbol s}\n`; record_frame i.live | Lop(Itailcall_ind) -> output_epilogue(); ` jmp *{emit_reg i.arg.(0)}\n` | Lop(Itailcall_imm s) -> if s = !function_name then ` jmp {emit_label !tailrec_entry_point}\n` else begin output_epilogue(); ` jmp {emit_symbol s}\n` end | Lop(Iextcall(s, alloc)) -> if alloc then begin ` movl ${emit_symbol s}, %eax\n`; ` call {emit_symbol "caml_c_call"}\n`; record_frame i.live end else begin ` finit\n`; ` call {emit_symbol s}\n` end; if Array.length i.res > 0 & i.res.(0).typ = Float then ` fstpl {emit_shift i.res.(0)}\n` | Lop(Istackoffset n) -> if n >= 0 then ` subl ${emit_int n}, %esp\n` else ` addl ${emit_int(-n)}, %esp\n`; stack_offset := !stack_offset + n | Lop(Iload(chunk, addr)) -> let dest = i.res.(0) in begin match dest.typ with Int | Addr -> begin match (chunk, dest.loc) with (Word, _) -> ` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` | (Byte_unsigned, Reg r) when r < 4 & not (register_overlap dest i.arg) -> ` xorl {emit_reg dest}, {emit_reg dest}\n`; ` movb {emit_addressing addr i.arg 0}, {emit_reg8 dest}\n` | (Byte_unsigned, _) -> ` movzbl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` | (Byte_signed, _) -> ` movsbl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` | (Sixteen_unsigned, Reg r) when not (register_overlap dest i.arg) -> ` xorl {emit_reg dest}, {emit_reg dest}\n`; ` movw {emit_addressing addr i.arg 0}, {emit_reg16 dest}\n` | (Sixteen_unsigned, _) -> ` movzwl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` | (Sixteen_signed, _) -> ` movswl {emit_addressing addr i.arg 0}, {emit_reg dest}\n` end | Float -> ` fldl {emit_addressing addr i.arg 0}\n`; ` fstpl {emit_shift i.res.(0)}\n` end | Lop(Istore(Word, addr)) -> begin match i.arg.(0).typ with Int | Addr -> ` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n` | Float -> if i.arg.(0).loc = Reg 100 then ` fstl {emit_addressing addr i.arg 1}\n` else begin ` fldl {emit_reg i.arg.(0)}\n`; ` fstpl {emit_addressing addr i.arg 1}\n` end end | Lop(Istore(chunk, addr)) -> (* i.arg.(0) is guaranteed to be in %edx, actually *) begin match chunk with Word -> fatal_error "Emit_i386: store word" | Byte_unsigned | Byte_signed -> ` movb {emit_reg8 i.arg.(0)}, {emit_addressing addr i.arg 1}\n` | Sixteen_unsigned | Sixteen_signed -> ` movw {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n` end | Lop(Ialloc n) -> if !fastcode_flag then begin ` movl {emit_symbol "young_ptr"}, %eax\n`; ` subl ${emit_int n}, %eax\n`; ` movl %eax, {emit_symbol "young_ptr"}\n`; ` cmpl {emit_symbol "young_start"}, %eax\n`; let lbl_cont = record_frame_label i.live in ` jae {emit_label lbl_cont}\n`; ` call {emit_symbol "caml_call_gc"}\n`; ` .word {emit_int n}\n`; `{emit_label lbl_cont}: leal 4(%eax), {emit_reg i.res.(0)}\n` end else begin begin match n with 8 -> ` call {emit_symbol "caml_alloc1"}\n` | 12 -> ` call {emit_symbol "caml_alloc2"}\n` | 16 -> ` call {emit_symbol "caml_alloc3"}\n` | _ -> ` movl ${emit_int n}, %eax\n`; ` call {emit_symbol "caml_alloc"}\n` end; `{record_frame i.live} leal 4(%eax), {emit_reg i.res.(0)}\n` end | Lop(Iintop(Icomp cmp)) -> ` cmpl {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; let b = name_for_cond_branch cmp in ` set{emit_string b} %al\n`; ` movzbl %al, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Icomp cmp, n)) -> ` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`; let b = name_for_cond_branch cmp in ` set{emit_string b} %al\n`; ` movzbl %al, {emit_reg i.res.(0)}\n` | Lop(Iintop Icheckbound) -> if !range_check_trap = 0 then range_check_trap := new_label(); ` cmpl {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; ` jbe {emit_label !range_check_trap}\n` | Lop(Iintop_imm(Icheckbound, n)) -> if !range_check_trap = 0 then range_check_trap := new_label(); ` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`; ` jbe {emit_label !range_check_trap}\n` | Lop(Iintop(Idiv | Imod)) -> ` cltd\n`; ` idivl {emit_reg i.arg.(1)}\n` | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *) ` {emit_string(instr_for_intop op)} %cl, {emit_reg i.res.(0)}\n` | Lop(Iintop op) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) -> ` incl {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> ` decl {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(op, n)) -> (* We have i.arg.(0) = i.res.(0) *) ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) -> let instr = match floatop with Iaddf -> "fadd" | Isubf -> "fsub" | Imulf -> "fmul" | Idivf -> "fdiv" | _ -> fatal_error "Emit_i386.emit_instr: floatop" in ` fldl {emit_reg i.arg.(0)}\n`; begin match i.arg.(1).loc with Stack s -> ` {emit_string instr}l {emit_shift i.arg.(1)}\n` | _ -> ` {emit_string instr} {emit_shift i.arg.(1)}\n` end; ` fstpl {emit_shift i.res.(0)}\n` | Lop(Ifloatofint) -> begin match i.arg.(0).loc with Stack s -> ` fildl {emit_reg i.arg.(0)}\n`; ` fstpl {emit_shift i.res.(0)}\n` | _ -> ` pushl {emit_reg i.arg.(0)}\n`; stack_offset := !stack_offset + 4; ` fildl (%esp)\n`; ` fstpl {emit_shift i.res.(0)}\n`; ` addl $4, %esp\n`; stack_offset := !stack_offset - 4 end | Lop(Iintoffloat) -> stack_offset := !stack_offset - 8; ` subl $8, %esp\n`; ` fnstcw 4(%esp)\n`; ` movl 4(%esp), %eax\n`; ` movb $12, %ah\n`; ` movl %eax, (%esp)\n`; ` fldcw (%esp)\n`; ` fldl {emit_reg i.arg.(0)}\n`; begin match i.res.(0).loc with Stack s -> ` fistpl {emit_shift i.res.(0)}\n` | _ -> ` fistpl (%esp)\n`; ` movl (%esp), {emit_reg i.res.(0)}\n` end; ` addl $8, %esp\n`; stack_offset := !stack_offset + 8 | Lop(Ispecific(Ilea addr)) -> ` lea {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n` | Lop(Ispecific(Istore_int(n, addr))) -> ` movl ${emit_int n}, {emit_addressing addr i.arg 0}\n` | Lop(Ispecific(Istore_symbol(s, addr))) -> ` movl ${emit_symbol s}, {emit_addressing addr i.arg 0}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> ` addl ${emit_int n}, {emit_addressing addr i.arg 0}\n` | Lreloadretaddr -> () | Lreturn -> output_epilogue(); ` ret\n` | Llabel lbl -> `{emit_label lbl}:\n` | Lbranch lbl -> ` jmp {emit_label lbl}\n` | Lcondbranch(tst, lbl) -> begin match tst with Itruetest -> output_test_zero i.arg.(0); ` jne {emit_label lbl}\n` | Ifalsetest -> output_test_zero i.arg.(0); ` je {emit_label lbl}\n` | Iinttest cmp -> ` cmpl {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` | Iinttest_imm((Isigned Ceq | Isigned Cne | Iunsigned Ceq | Iunsigned Cne) as cmp, 0) -> output_test_zero i.arg.(0); let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` | Iinttest_imm(cmp, n) -> ` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`; let b = name_for_cond_branch cmp in ` j{emit_string b} {emit_label lbl}\n` | Ifloattest cmp -> ` fldl {emit_reg i.arg.(0)}\n`; let comp_instr = match cmp with Ceq | Cne -> "fucom" | _ -> "fcom" in begin match i.arg.(1).loc with Stack s -> ` {emit_string comp_instr}pl {emit_shift i.arg.(1)}\n` | _ -> ` {emit_string comp_instr}p {emit_shift i.arg.(1)}\n` end; ` fnstsw %ax\n`; begin match cmp with Ceq -> ` andb $69, %ah\n`; ` cmpb $64, %ah\n`; ` je {emit_label lbl}\n` | Cne -> ` andb $68, %ah\n`; ` xorb $64, %ah\n`; ` jne {emit_label lbl}\n` | Cle -> ` andb $69, %ah\n`; ` decb %ah\n`; ` cmpb $64, %ah\n`; ` jb {emit_label lbl}\n` | Cge -> ` andb $5, %ah\n`; ` je {emit_label lbl}\n` | Clt -> ` andb $69, %ah\n`; ` cmpb $1, %ah\n`; ` je {emit_label lbl}\n` | Cgt -> ` andb $69, %ah\n`; ` je {emit_label lbl}\n` end | Ioddtest -> ` testl $1, {emit_reg i.arg.(0)}\n`; ` jne {emit_label lbl}\n` | Ieventest -> ` testl $1, {emit_reg i.arg.(0)}\n`; ` je {emit_label lbl}\n` end | Lcondbranch3(lbl0, lbl1, lbl2) -> ` cmpl $1, {emit_reg i.arg.(0)}\n`; begin match lbl0 with None -> () | Some lbl -> ` jb {emit_label lbl}\n` end; begin match lbl1 with None -> () | Some lbl -> ` je {emit_label lbl}\n` end; begin match lbl2 with None -> () | Some lbl -> ` jg {emit_label lbl}\n` end | Lswitch jumptbl -> let lbl = new_label() in ` jmp *{emit_label lbl}(, {emit_reg i.arg.(0)}, 4)\n`; emit_align 4; `{emit_label lbl}:`; for i = 0 to Array.length jumptbl - 1 do ` .long {emit_label jumptbl.(i)}\n` done | Lsetuptrap lbl -> ` call {emit_label lbl}\n` | Lpushtrap -> ` pushl {emit_symbol "caml_exception_pointer"}\n`; ` movl %esp, {emit_symbol "caml_exception_pointer"}\n`; stack_offset := !stack_offset + 8 | Lpoptrap -> ` popl {emit_symbol "caml_exception_pointer"}\n`; ` addl $4, %esp\n`; stack_offset := !stack_offset - 8 | Lraise -> ` movl {emit_symbol "caml_exception_pointer"}, %esp\n`; ` popl {emit_symbol "caml_exception_pointer"}\n`; ` ret\n` let rec emit_all i = match i.desc with Lend -> () | _ -> emit_instr i; emit_all i.next (* Emission of the floating-point constants *) let emit_float_constant (lbl, cst) = ` .data\n`; `{emit_label lbl}: .double {emit_string cst}\n` (* Emission of a function declaration *) let fundecl fundecl = function_name := fundecl.fun_name; fastcode_flag := fundecl.fun_fast; tailrec_entry_point := new_label(); stack_offset := 0; float_constants := []; range_check_trap := 0; ` .text\n`; emit_align 16; (* 16-byte alignment is recommended for the 486 *) ` .globl {emit_symbol fundecl.fun_name}\n`; `{emit_symbol fundecl.fun_name}:\n`; let n = frame_size() - 4 in if n > 0 then ` subl ${emit_int n}, %esp\n`; `{emit_label !tailrec_entry_point}:`; emit_all fundecl.fun_body; if !range_check_trap > 0 then `{emit_label !range_check_trap}: int $5\n`; List.iter emit_float_constant !float_constants (* Emission of data *) let emit_item = function Cdefine_symbol s -> ` .globl {emit_symbol s}\n`; `{emit_symbol s}:\n` | Cdefine_label lbl -> `{emit_label (10000 + lbl)}:\n` | Cint8 n -> ` .byte {emit_int n}\n` | Cint16 n -> ` .word {emit_int n}\n` | Cint n -> ` .long {emit_int n}\n` | Cintlit n -> ` .long {emit_string n}\n` | Cfloat f -> ` .double {emit_string f}\n` | Csymbol_address s -> ` .long {emit_symbol s}\n` | Clabel_address lbl -> ` .long {emit_label (10000 + lbl)}\n` | Cstring s -> let l = String.length s in if l = 0 then () else if l < 80 then ` .ascii {emit_string_literal s}\n` else begin let i = ref 0 in while !i < l do let n = min (l - !i) 80 in ` .ascii {emit_string_literal(String.sub s !i n)}\n`; i := !i + n done end | Cskip n -> if n > 0 then ` .space {emit_int n}\n` | Calign n -> emit_align n let data l = ` .data\n`; List.iter emit_item l (* Beginning / end of an assembly file *) let begin_assembly() = () let end_assembly() = let lbl = Compilenv.current_unit_name() ^ "_frametable" in ` .data\n`; ` .globl {emit_symbol lbl}\n`; `{emit_symbol lbl}:\n`; ` .long {emit_int (List.length !frame_descriptors)}\n`; List.iter emit_frame !frame_descriptors; frame_descriptors := []