Amelioration des flottants x86: utiliser %st(0) comme registre quand c'est possible, evitant ainsi des couples fstp/fld; ajout option -ffast-math
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5404 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02master
parent
c517632cb2
commit
40efd97fe1
|
@ -14,7 +14,11 @@
|
|||
|
||||
(* Machine-specific command-line options *)
|
||||
|
||||
let command_line_options = []
|
||||
let fast_math = ref false
|
||||
|
||||
let command_line_options =
|
||||
[ "-ffast-math", Arg.Set fast_math,
|
||||
" Inline trigonometric and exponential functions" ]
|
||||
|
||||
(* Specific operations for the Intel 386 processor *)
|
||||
|
||||
|
@ -42,6 +46,8 @@ type specific_operation =
|
|||
| Ifloatarithmem of bool * float_operation * addressing_mode
|
||||
(* Float arith operation with memory *)
|
||||
(* bool: true=64 bits, false=32 *)
|
||||
| Ifloatspecial of string
|
||||
|
||||
and float_operation =
|
||||
Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev
|
||||
|
||||
|
@ -132,3 +138,10 @@ let print_specific_operation printreg op ppf arg =
|
|||
let long = if double then "float64" else "float32" in
|
||||
fprintf ppf "%a %s %s[%a]" printreg arg.(0) (op_name op) long
|
||||
(print_addressing printreg addr) (Array.sub arg 1 (Array.length arg - 1))
|
||||
| Ifloatspecial name ->
|
||||
fprintf ppf "%s " name;
|
||||
for i = 0 to Array.length arg - 1 do
|
||||
if i > 0 then fprintf ppf ", ";
|
||||
printreg ppf arg.(i)
|
||||
done
|
||||
|
||||
|
|
|
@ -272,6 +272,99 @@ let output_epilogue () =
|
|||
let n = frame_size() - 4 in
|
||||
if n > 0 then ` addl ${emit_int n}, %esp\n`
|
||||
|
||||
(* Determine if the given register is the top of the floating-point stack *)
|
||||
|
||||
let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
|
||||
|
||||
(* Emit a "p" suffix if TOS is dead after the given instruction *)
|
||||
|
||||
let pop_suffix i =
|
||||
let r = i.arg.(0) in
|
||||
if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
|
||||
|
||||
(* Emit the code for a floating-point comparison *)
|
||||
|
||||
let emit_float_test cmp neg arg lbl =
|
||||
let actual_cmp =
|
||||
match (is_tos arg.(0), is_tos arg.(1)) with
|
||||
(true, true) ->
|
||||
(* both args on top of FP stack *)
|
||||
` fcompp\n`;
|
||||
cmp
|
||||
| (true, false) ->
|
||||
(* first arg on top of FP stack *)
|
||||
` fcompl {emit_reg arg.(1)}\n`;
|
||||
cmp
|
||||
| (false, true) ->
|
||||
(* second arg on top of FP stack *)
|
||||
` fcompl {emit_reg arg.(0)}\n`;
|
||||
Cmm.swap_comparison cmp
|
||||
| (false, false) ->
|
||||
` fldl {emit_reg arg.(0)}\n`;
|
||||
` fcompl {emit_reg arg.(1)}\n`;
|
||||
cmp
|
||||
in
|
||||
` fnstsw %ax\n`;
|
||||
begin match actual_cmp with
|
||||
Ceq ->
|
||||
if neg then begin
|
||||
` andb $68, %ah\n`;
|
||||
` xorb $64, %ah\n`;
|
||||
` jne `
|
||||
end else begin
|
||||
` andb $69, %ah\n`;
|
||||
` cmpb $64, %ah\n`;
|
||||
` je `
|
||||
end
|
||||
| Cne ->
|
||||
if neg then begin
|
||||
` andb $69, %ah\n`;
|
||||
` cmpb $64, %ah\n`;
|
||||
` je `
|
||||
end else begin
|
||||
` andb $68, %ah\n`;
|
||||
` xorb $64, %ah\n`;
|
||||
` jne `
|
||||
end
|
||||
| Cle ->
|
||||
` andb $69, %ah\n`;
|
||||
` decb %ah\n`;
|
||||
` cmpb $64, %ah\n`;
|
||||
if neg
|
||||
then ` jae `
|
||||
else ` jb `
|
||||
| Cge ->
|
||||
` andb $5, %ah\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Clt ->
|
||||
` andb $69, %ah\n`;
|
||||
` cmpb $1, %ah\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Cgt ->
|
||||
` andb $69, %ah\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
end;
|
||||
`{emit_label lbl}\n`
|
||||
|
||||
(* Emit a Ifloatspecial instruction *)
|
||||
|
||||
let emit_floatspecial = function
|
||||
"atan" -> ` fldl1; fpatan\n`
|
||||
| "atan2" -> ` fpatan\n`
|
||||
| "cos" -> ` fcos\n`
|
||||
| "log" -> ` fldln2; fxch; fyl2x\n`
|
||||
| "log10" -> ` fldlg2; fxch; fyl2x\n`
|
||||
| "sin" -> ` fsin\n`
|
||||
| "sqrt" -> ` fsqrt\n`
|
||||
| "tan" -> ` fptan; fstp %st(0)\n`
|
||||
| _ -> assert false
|
||||
|
||||
(* Output the assembly code for an instruction *)
|
||||
|
||||
(* Name of current function *)
|
||||
|
@ -280,12 +373,9 @@ let function_name = ref ""
|
|||
let tailrec_entry_point = ref 0
|
||||
(* Label of trap for out-of-range accesses *)
|
||||
let range_check_trap = ref 0
|
||||
|
||||
(* Record float literals to be emitted later *)
|
||||
let float_constants = ref ([] : (int * string) list)
|
||||
|
||||
let tos = phys_reg 100
|
||||
|
||||
|
||||
let emit_instr fallthrough i =
|
||||
match i.desc with
|
||||
Lend -> ()
|
||||
|
@ -293,8 +383,10 @@ let emit_instr fallthrough i =
|
|||
let src = i.arg.(0) and dst = i.res.(0) in
|
||||
if src.loc <> dst.loc then begin
|
||||
if src.typ = Float then
|
||||
if src = tos then
|
||||
` fstpl {emit_reg dst}\n`
|
||||
if is_tos src then
|
||||
` fst{pop_suffix i}l {emit_reg dst}\n`
|
||||
else if is_tos dst then
|
||||
` fldl {emit_reg src}\n`
|
||||
else begin
|
||||
` fldl {emit_reg src}\n`;
|
||||
` fstpl {emit_reg dst}\n`
|
||||
|
@ -354,6 +446,8 @@ let emit_instr fallthrough i =
|
|||
| Lop(Iload(chunk, addr)) ->
|
||||
let dest = i.res.(0) in
|
||||
begin match chunk with
|
||||
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
|
||||
` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
|
||||
| Byte_unsigned ->
|
||||
` movzbl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
|
||||
| Byte_signed ->
|
||||
|
@ -366,8 +460,6 @@ let emit_instr fallthrough i =
|
|||
` flds {emit_addressing addr i.arg 0}\n`
|
||||
| Double | Double_u ->
|
||||
` fldl {emit_addressing addr i.arg 0}\n`
|
||||
| _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
|
||||
` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
begin match chunk with
|
||||
|
@ -378,15 +470,15 @@ let emit_instr fallthrough i =
|
|||
| Sixteen_unsigned | Sixteen_signed ->
|
||||
` movw {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
|
||||
| Single ->
|
||||
if i.arg.(0) = tos then
|
||||
` fstps {emit_addressing addr i.arg 1}\n`
|
||||
if is_tos i.arg.(0) then
|
||||
` fst{pop_suffix i}s {emit_addressing addr i.arg 1}\n`
|
||||
else begin
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` fstps {emit_addressing addr i.arg 1}\n`
|
||||
end
|
||||
| Double | Double_u ->
|
||||
if i.arg.(0) = tos then
|
||||
` fstpl {emit_addressing addr i.arg 1}\n`
|
||||
if is_tos i.arg.(0) then
|
||||
` fst{pop_suffix i}l {emit_addressing addr i.arg 1}\n`
|
||||
else begin
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` fstpl {emit_addressing addr i.arg 1}\n`
|
||||
|
@ -444,12 +536,12 @@ let emit_instr fallthrough i =
|
|||
| Lop(Iintop op) ->
|
||||
(* We have i.arg.(0) = i.res.(0) *)
|
||||
` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
|
||||
` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
|
||||
` incl {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
|
||||
` decl {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
|
||||
` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Idiv, n)) ->
|
||||
let l = Misc.log2 n in
|
||||
let lbl = new_label() in
|
||||
|
@ -470,21 +562,22 @@ let emit_instr fallthrough i =
|
|||
(* We have i.arg.(0) = i.res.(0) *)
|
||||
` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n`
|
||||
| Lop(Inegf | Iabsf as floatop) ->
|
||||
if i.arg.(0) <> tos then
|
||||
if not (is_tos i.arg.(0)) then
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` {emit_string(instr_for_floatop floatop)}\n`
|
||||
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
|
||||
as floatop) ->
|
||||
if i.arg.(0) = tos && i.arg.(1) = tos then
|
||||
begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
|
||||
(true, true) ->
|
||||
(* both operands on top of FP stack *)
|
||||
` {emit_string(instr_for_floatop_pop floatop)} %st, %st(1)\n`
|
||||
else if i.arg.(0) = tos then
|
||||
| (true, false) ->
|
||||
(* first operand on stack *)
|
||||
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
|
||||
else if i.arg.(1) = tos then
|
||||
| (false, true) ->
|
||||
(* second operand on stack *)
|
||||
` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
|
||||
else begin
|
||||
| (false, false) ->
|
||||
(* both operands in memory *)
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
|
||||
|
@ -499,7 +592,7 @@ let emit_instr fallthrough i =
|
|||
` addl $4, %esp\n`
|
||||
end
|
||||
| Lop(Iintoffloat) ->
|
||||
if i.arg.(0) <> tos then
|
||||
if not (is_tos i.arg.(0)) then
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
stack_offset := !stack_offset - 8;
|
||||
` subl $8, %esp\n`;
|
||||
|
@ -510,9 +603,9 @@ let emit_instr fallthrough i =
|
|||
` fldcw (%esp)\n`;
|
||||
begin match i.res.(0).loc with
|
||||
Stack s ->
|
||||
` fistpl {emit_reg i.res.(0)}\n`
|
||||
` fist{pop_suffix i}l {emit_reg i.res.(0)}\n`
|
||||
| _ ->
|
||||
` fistpl (%esp)\n`;
|
||||
` fist{pop_suffix i}l (%esp)\n`;
|
||||
` movl (%esp), {emit_reg i.res.(0)}\n`
|
||||
end;
|
||||
` fldcw 4(%esp)\n`;
|
||||
|
@ -558,9 +651,18 @@ let emit_instr fallthrough i =
|
|||
` pushl {emit_addressing addr i.arg 0}\n`;
|
||||
stack_offset := !stack_offset + 8
|
||||
| Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
|
||||
if i.arg.(0) <> tos then
|
||||
if not (is_tos i.arg.(0)) then
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` {emit_string(instr_for_floatarithmem double op)} {emit_addressing addr i.arg 1}\n`
|
||||
| Lop(Ispecific(Ifloatspecial s)) ->
|
||||
(* Push args on float stack if necessary *)
|
||||
for k = 0 to Array.length i.arg - 1 do
|
||||
if not (is_tos i.arg.(k)) then ` fldl {emit_reg i.arg.(k)}\n`
|
||||
done;
|
||||
(* Fix-up for binary instrs whose args were swapped *)
|
||||
if Array.length i.arg = 2 && is_tos i.arg.(1) then
|
||||
` fxch %st(1)\n`;
|
||||
emit_floatspecial s
|
||||
| Lreloadretaddr ->
|
||||
()
|
||||
| Lreturn ->
|
||||
|
@ -591,70 +693,8 @@ let emit_instr fallthrough i =
|
|||
` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`;
|
||||
let b = name_for_cond_branch cmp in
|
||||
` j{emit_string b} {emit_label lbl}\n`
|
||||
| Ifloattest((Ceq | Cne as cmp), neg) ->
|
||||
if i.arg.(1) <> tos then
|
||||
` fldl {emit_reg i.arg.(1)}\n`;
|
||||
if i.arg.(0) <> tos then
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` fucompp\n`;
|
||||
` fnstsw %ax\n`;
|
||||
let neg1 = if cmp = Ceq then neg else not neg in
|
||||
if neg1 then begin (* branch if different *)
|
||||
` andb $68, %ah\n`;
|
||||
` xorb $64, %ah\n`;
|
||||
` jne {emit_label lbl}\n`
|
||||
end else begin (* branch if equal *)
|
||||
` andb $69, %ah\n`;
|
||||
` cmpb $64, %ah\n`;
|
||||
` je {emit_label lbl}\n`
|
||||
end
|
||||
| Ifloattest(cmp, neg) ->
|
||||
let actual_cmp =
|
||||
if i.arg.(0) = tos && i.arg.(1) = tos then begin
|
||||
(* both args on top of FP stack *)
|
||||
` fcompp\n`;
|
||||
cmp
|
||||
end else if i.arg.(0) = tos then begin
|
||||
(* first arg on top of FP stack *)
|
||||
` fcompl {emit_reg i.arg.(1)}\n`;
|
||||
cmp
|
||||
end else if i.arg.(1) = tos then begin
|
||||
(* second arg on top of FP stack *)
|
||||
` fcompl {emit_reg i.arg.(0)}\n`;
|
||||
Cmm.swap_comparison cmp
|
||||
end else begin
|
||||
` fldl {emit_reg i.arg.(0)}\n`;
|
||||
` fcompl {emit_reg i.arg.(1)}\n`;
|
||||
cmp
|
||||
end in
|
||||
` fnstsw %ax\n`;
|
||||
begin match actual_cmp with
|
||||
Cle ->
|
||||
` andb $69, %ah\n`;
|
||||
` decb %ah\n`;
|
||||
` cmpb $64, %ah\n`;
|
||||
if neg
|
||||
then ` jae `
|
||||
else ` jb `
|
||||
| Cge ->
|
||||
` andb $5, %ah\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Clt ->
|
||||
` andb $69, %ah\n`;
|
||||
` cmpb $1, %ah\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Cgt ->
|
||||
` andb $69, %ah\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| _ -> fatal_error "Emit_i386: floattest"
|
||||
end;
|
||||
`{emit_label lbl}\n`
|
||||
emit_float_test cmp neg i.arg lbl
|
||||
| Ioddtest ->
|
||||
` testl $1, {emit_reg i.arg.(0)}\n`;
|
||||
` jne {emit_label lbl}\n`
|
||||
|
|
|
@ -246,6 +246,99 @@ let output_epilogue () =
|
|||
let n = frame_size() - 4 in
|
||||
if n > 0 then ` add esp, {emit_int n}\n`
|
||||
|
||||
(* Determine if the given register is the top of the floating-point stack *)
|
||||
|
||||
let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
|
||||
|
||||
(* Emit a "p" suffix if TOS is dead after the given instruction *)
|
||||
|
||||
let pop_suffix i =
|
||||
let r = i.arg.(0) in
|
||||
if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
|
||||
|
||||
(* Emit the code for a floating-point comparison *)
|
||||
|
||||
let emit_float_test cmp neg arg lbl =
|
||||
let actual_cmp =
|
||||
match (is_tos arg.(0), is_tos arg.(1)) with
|
||||
(true, true) ->
|
||||
(* both args on top of FP stack *)
|
||||
` fcompp\n`;
|
||||
cmp
|
||||
| (true, false) ->
|
||||
(* first arg on top of FP stack *)
|
||||
` fcompl {emit_reg arg.(1)}\n`;
|
||||
cmp
|
||||
| (false, true) ->
|
||||
(* second arg on top of FP stack *)
|
||||
` fcompl {emit_reg arg.(0)}\n`;
|
||||
Cmm.swap_comparison cmp
|
||||
| (false, false) ->
|
||||
` fldl {emit_reg arg.(0)}\n`;
|
||||
` fcompl {emit_reg arg.(1)}\n`;
|
||||
cmp
|
||||
in
|
||||
` fnstsw %ax\n`;
|
||||
begin match actual_cmp with
|
||||
Ceq ->
|
||||
if neg then begin
|
||||
` and ah, 68\n`;
|
||||
` xor ah, 64\n`;
|
||||
` jne `
|
||||
end else begin
|
||||
` and ah, 69\n`;
|
||||
` cmp ah, 64\n`;
|
||||
` je `
|
||||
end
|
||||
| Cne ->
|
||||
if neg then begin
|
||||
` and ah, 69\n`;
|
||||
` cmp ah, 64\n`;
|
||||
` je `
|
||||
end else begin
|
||||
` and ah, 68\n`;
|
||||
` xor ah, 64\n`;
|
||||
` jne `
|
||||
end
|
||||
| Cle ->
|
||||
` and ah, 69\n`;
|
||||
` dec ah\n`;
|
||||
` cmp ah, 64\n`;
|
||||
if neg
|
||||
then ` jae `
|
||||
else ` jb `
|
||||
| Cge ->
|
||||
` and ah, 5\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Clt ->
|
||||
` and ah, 69\n`;
|
||||
` cmp ah, 1\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Cgt ->
|
||||
` and ah, 69\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
end;
|
||||
`{emit_label lbl}\n`
|
||||
|
||||
(* Emit a Ifloatspecial instruction *)
|
||||
|
||||
let emit_floatspecial = function
|
||||
"atan" -> ` fldl1\n\tfpatan\n`
|
||||
| "atan2" -> ` fpatan\n`
|
||||
| "cos" -> ` fcos\n`
|
||||
| "log" -> ` fldln2\n\tfxch\n\tfyl2x\n`
|
||||
| "log10" -> ` fldlg2\n\tfxch\n\tfyl2x\n`
|
||||
| "sin" -> ` fsin\n`
|
||||
| "sqrt" -> ` fsqrt\n`
|
||||
| "tan" -> ` fptan\n\tfstp st(0)\n`
|
||||
| _ -> assert false
|
||||
|
||||
(* Output the assembly code for an instruction *)
|
||||
|
||||
(* Name of current function *)
|
||||
|
@ -257,8 +350,6 @@ let range_check_trap = ref 0
|
|||
|
||||
let float_constants = ref ([] : (int * string) list)
|
||||
|
||||
let tos = phys_reg 100
|
||||
|
||||
let emit_instr i =
|
||||
match i.desc with
|
||||
Lend -> ()
|
||||
|
@ -266,8 +357,10 @@ let emit_instr i =
|
|||
let src = i.arg.(0) and dst = i.res.(0) in
|
||||
if src.loc <> dst.loc then begin
|
||||
if src.typ = Float then
|
||||
if src = tos then
|
||||
` fstp {emit_reg dst}\n`
|
||||
if is_tos src then
|
||||
` fst{pop_suffix i} {emit_reg dst}\n`
|
||||
else if is_tos dst then
|
||||
` fld {emit_reg dst}\n`
|
||||
else begin
|
||||
` fld {emit_reg src}\n`;
|
||||
` fstp {emit_reg dst}\n`
|
||||
|
@ -331,6 +424,8 @@ let emit_instr i =
|
|||
| Lop(Iload(chunk, addr)) ->
|
||||
let dest = i.res.(0) in
|
||||
begin match chunk with
|
||||
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
|
||||
` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
|
||||
| Byte_unsigned ->
|
||||
` movzx {emit_reg dest}, BYTE PTR {emit_addressing addr i.arg 0}\n`
|
||||
| Byte_signed ->
|
||||
|
@ -343,8 +438,6 @@ let emit_instr i =
|
|||
` fld REAL4 PTR {emit_addressing addr i.arg 0}\n`
|
||||
| Double | Double_u ->
|
||||
` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`
|
||||
| _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
|
||||
` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
begin match chunk with
|
||||
|
@ -355,15 +448,15 @@ let emit_instr i =
|
|||
| Sixteen_unsigned | Sixteen_signed ->
|
||||
` mov WORD PTR {emit_addressing addr i.arg 1}, {emit_reg16 i.arg.(0)}\n`
|
||||
| Single ->
|
||||
if i.arg.(0) = tos then
|
||||
` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n`
|
||||
if is_tos i.arg.(0) then
|
||||
` fst{pop_suffix i} REAL4 PTR {emit_addressing addr i.arg 1}\n`
|
||||
else begin
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n`
|
||||
end
|
||||
| Double | Double_u ->
|
||||
if i.arg.(0) = tos then
|
||||
` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
|
||||
if is_tops i.arg.(0) then
|
||||
` fst{pop_suffix i} REAL8 PTR {emit_addressing addr i.arg 1}\n`
|
||||
else begin
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
|
||||
|
@ -421,12 +514,12 @@ let emit_instr i =
|
|||
| Lop(Iintop op) ->
|
||||
(* We have i.arg.(0) = i.res.(0) *)
|
||||
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
|
||||
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
|
||||
` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
|
||||
| Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
|
||||
` inc {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
|
||||
` dec {emit_reg i.res.(0)}\n`
|
||||
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
|
||||
` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
|
||||
| Lop(Iintop_imm(Idiv, n)) ->
|
||||
let l = Misc.log2 n in
|
||||
let lbl = new_label() in
|
||||
|
@ -447,21 +540,22 @@ let emit_instr i =
|
|||
(* We have i.arg.(0) = i.res.(0) *)
|
||||
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n`
|
||||
| Lop(Inegf | Iabsf as floatop) ->
|
||||
if i.arg.(0) <> tos then
|
||||
if not (is_tos i.arg.(0)) then
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
` {emit_string(instr_for_floatop floatop)}\n`
|
||||
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
|
||||
as floatop) ->
|
||||
if i.arg.(0) = tos && i.arg.(1) = tos then
|
||||
begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
|
||||
(true, true) ->
|
||||
(* both operands on top of FP stack *)
|
||||
` {emit_string(instr_for_floatop_reversed floatop)}\n`
|
||||
else if i.arg.(0) = tos then
|
||||
` {emit_string(instr_for_floatop_pop floatop)}\n`
|
||||
| (true, false) ->
|
||||
(* first operand on stack *)
|
||||
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
|
||||
else if i.arg.(1) = tos then
|
||||
| (false, true) ->
|
||||
(* second operand on stack *)
|
||||
` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
|
||||
else begin
|
||||
| (false, false) ->
|
||||
(* both operands in memory *)
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
|
||||
|
@ -476,7 +570,7 @@ let emit_instr i =
|
|||
` add esp, 4\n`
|
||||
end
|
||||
| Lop(Iintoffloat) ->
|
||||
if i.arg.(0) <> tos then
|
||||
if not (is_tos i.arg.(0)) then
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
stack_offset := !stack_offset - 8;
|
||||
` sub esp, 8\n`;
|
||||
|
@ -487,9 +581,9 @@ let emit_instr i =
|
|||
` fldcw [esp]\n`;
|
||||
begin match i.res.(0).loc with
|
||||
Stack s ->
|
||||
` fistp {emit_reg i.res.(0)}\n`
|
||||
` fist{pop_suffix i} {emit_reg i.res.(0)}\n`
|
||||
| _ ->
|
||||
` fistp DWORD PTR [esp]\n`;
|
||||
` fist{pop_suffix i} DWORD PTR [esp]\n`;
|
||||
` mov {emit_reg i.res.(0)}, [esp]\n`
|
||||
end;
|
||||
` fldcw [esp+4]\n`;
|
||||
|
@ -537,10 +631,19 @@ let emit_instr i =
|
|||
` push DWORD PTR {emit_addressing addr i.arg 0}\n`;
|
||||
stack_offset := !stack_offset + 8
|
||||
| Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
|
||||
if i.arg.(0) <> tos then
|
||||
if not (is_tos i.arg.(0)) then
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
let size = if double then "REAL8" else "REAL4" in
|
||||
` {emit_string(instr_for_floatarithmem op)} {emit_string size} PTR {emit_addressing addr i.arg 1}\n`
|
||||
| Lop(Ispecific(Ifloatspecial s)) ->
|
||||
(* Push args on float stack if necessary *)
|
||||
for k = 0 to Array.length i.arg - 1 do
|
||||
if not (is_tos i.arg.(k)) then ` fld {emit_reg i.arg.(k)}\n`
|
||||
done;
|
||||
(* Fix-up for binary instrs whose args were swapped *)
|
||||
if Array.length i.arg = 2 && is_tos i.arg.(1) then
|
||||
` fxch st(1)\n`;
|
||||
emit_floatspecial s
|
||||
| Lreloadretaddr ->
|
||||
()
|
||||
| Lreturn ->
|
||||
|
@ -571,70 +674,8 @@ let emit_instr i =
|
|||
` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`;
|
||||
let b = name_for_cond_branch cmp in
|
||||
` j{emit_string b} {emit_label lbl}\n`
|
||||
| Ifloattest((Ceq | Cne as cmp), neg) ->
|
||||
if i.arg.(1) <> tos then
|
||||
` fld {emit_reg i.arg.(1)}\n`;
|
||||
if i.arg.(0) <> tos then
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
` fucompp\n`;
|
||||
` fnstsw ax\n`;
|
||||
let neg1 = if cmp = Ceq then neg else not neg in
|
||||
if neg1 then begin (* branch if different *)
|
||||
` and ah, 68\n`;
|
||||
` xor ah, 64\n`;
|
||||
` jne {emit_label lbl}\n`
|
||||
end else begin (* branch if equal *)
|
||||
` and ah, 69\n`;
|
||||
` cmp ah, 64\n`;
|
||||
` je {emit_label lbl}\n`
|
||||
end
|
||||
| Ifloattest(cmp, neg) ->
|
||||
let actual_cmp =
|
||||
if i.arg.(0) = tos && i.arg.(1) = tos then begin
|
||||
(* both args on top of FP stack *)
|
||||
` fcompp\n`;
|
||||
cmp
|
||||
end else if i.arg.(0) = tos then begin
|
||||
(* first arg on top of FP stack *)
|
||||
` fcomp {emit_reg i.arg.(1)}\n`;
|
||||
cmp
|
||||
end else if i.arg.(1) = tos then begin
|
||||
(* second arg on top of FP stack *)
|
||||
` fcomp {emit_reg i.arg.(0)}\n`;
|
||||
Cmm.swap_comparison cmp
|
||||
end else begin
|
||||
` fld {emit_reg i.arg.(0)}\n`;
|
||||
` fcomp {emit_reg i.arg.(1)}\n`;
|
||||
cmp
|
||||
end in
|
||||
` fnstsw ax\n`;
|
||||
begin match actual_cmp with
|
||||
Cle ->
|
||||
` and ah, 69\n`;
|
||||
` dec ah\n`;
|
||||
` cmp ah, 64\n`;
|
||||
if neg
|
||||
then ` jae `
|
||||
else ` jb `
|
||||
| Cge ->
|
||||
` and ah, 5\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Clt ->
|
||||
` and ah, 69\n`;
|
||||
` cmp ah, 1\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| Cgt ->
|
||||
` and ah, 69\n`;
|
||||
if neg
|
||||
then ` jne `
|
||||
else ` je `
|
||||
| _ -> fatal_error "Emit_i386: floattest"
|
||||
end;
|
||||
`{emit_label lbl}\n`
|
||||
emit_float_test cmp neg i.arg lbl
|
||||
| Ioddtest ->
|
||||
` test {emit_reg i.arg.(0)}, 1\n`;
|
||||
` jne {emit_label lbl}\n`
|
||||
|
|
|
@ -47,7 +47,7 @@ let register_class r =
|
|||
| Addr -> 0
|
||||
| Float -> 1
|
||||
|
||||
let num_available_registers = [| 7; 0 |]
|
||||
let num_available_registers = [| 7; 1 |]
|
||||
|
||||
let first_available_register = [| 0; 100 |]
|
||||
|
||||
|
|
|
@ -73,14 +73,28 @@ let rec select_addr exp =
|
|||
| arg ->
|
||||
(Alinear arg, 0)
|
||||
|
||||
(* C functions to be turned into Ifloatspecial instructions if -ffast-math *)
|
||||
|
||||
let inline_float_ops =
|
||||
["atan"; "atan2"; "cos"; "log"; "log10"; "sin"; "sqrt"; "tan"]
|
||||
|
||||
(* Estimate number of float temporaries needed to evaluate expression
|
||||
(Ershov's algorithm) *)
|
||||
|
||||
let rec float_needs = function
|
||||
Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
|
||||
Cop((Cnegf | Cabsf), [arg]) ->
|
||||
float_needs arg
|
||||
| Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
|
||||
let n1 = float_needs arg1 in
|
||||
let n2 = float_needs arg2 in
|
||||
if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
|
||||
| Cop(Cextcall(fn, ty_res, alloc), args)
|
||||
when !fast_math && List.mem fn inline_float_ops ->
|
||||
begin match args with
|
||||
[arg] -> float_needs arg
|
||||
| [arg1; arg2] -> max (float_needs arg2 + 1) (float_needs arg1)
|
||||
| _ -> assert false
|
||||
end
|
||||
| _ ->
|
||||
1
|
||||
|
||||
|
@ -119,7 +133,7 @@ let pseudoregs_for_operation op arg res =
|
|||
the result is always left at the top of the floating-point stack *)
|
||||
| Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
|
||||
| Ifloatofint | Iload((Single | Double | Double_u), _)
|
||||
| Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _)) ->
|
||||
| Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _) | Ifloatspecial _) ->
|
||||
(arg, [| tos |], false) (* don't move it immediately *)
|
||||
(* For storing a byte, the argument must be in eax...edx.
|
||||
(But for a short, any reg will do!)
|
||||
|
@ -215,6 +229,11 @@ method select_operation op args =
|
|||
| _ ->
|
||||
super#select_operation op args
|
||||
end
|
||||
(* Recognize inlined floating point operations *)
|
||||
| Cextcall(fn, ty_res, false)
|
||||
when !fast_math && List.mem fn inline_float_ops ->
|
||||
(Ispecific(Ifloatspecial fn), args)
|
||||
(* Default *)
|
||||
| _ -> super#select_operation op args
|
||||
|
||||
(* Recognize float arithmetic with mem *)
|
||||
|
|
|
@ -76,6 +76,13 @@ let size_expr env exp =
|
|||
fatal_error "Selection.size_expr"
|
||||
in size Tbl.empty exp
|
||||
|
||||
(* These are C library functions that are known to be pure
|
||||
(no side effects at all) and worth not pre-computing. *)
|
||||
|
||||
let pure_external_functions =
|
||||
["acos"; "asin"; "atan"; "atan2"; "cos"; "exp"; "log";
|
||||
"log10"; "sin"; "sqrt"; "tan"]
|
||||
|
||||
(* Says if an expression is "simple". A "simple" expression has no
|
||||
side-effects and its execution can be delayed until its value
|
||||
is really needed. In the case of e.g. an [alloc] instruction,
|
||||
|
@ -97,9 +104,15 @@ let rec is_simple_expr = function
|
|||
| Cop(op, args) ->
|
||||
begin match op with
|
||||
(* The following may have side effects *)
|
||||
Capply _ | Cextcall(_, _, _) | Calloc | Cstore _ | Craise -> false
|
||||
| Capply _ | Calloc | Cstore _ | Craise -> false
|
||||
(* External C functions normally have side effects, unless known *)
|
||||
| Cextcall(fn, _, alloc) ->
|
||||
not alloc &&
|
||||
List.mem fn pure_external_functions &&
|
||||
List.for_all is_simple_expr args
|
||||
(* The remaining operations are simple if their args are *)
|
||||
| _ -> List.for_all is_simple_expr args
|
||||
| _ ->
|
||||
List.for_all is_simple_expr args
|
||||
end
|
||||
| _ -> false
|
||||
|
||||
|
|
Loading…
Reference in New Issue