Amelioration des flottants x86: utiliser %st(0) comme registre quand c'est possible, evitant ainsi des couples fstp/fld; ajout option -ffast-math

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5404 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Xavier Leroy 2003-02-25 15:50:13 +00:00
parent c517632cb2
commit 40efd97fe1
6 changed files with 303 additions and 177 deletions

View File

@ -14,7 +14,11 @@
(* Machine-specific command-line options *)
let command_line_options = []
let fast_math = ref false
let command_line_options =
[ "-ffast-math", Arg.Set fast_math,
" Inline trigonometric and exponential functions" ]
(* Specific operations for the Intel 386 processor *)
@ -42,6 +46,8 @@ type specific_operation =
| Ifloatarithmem of bool * float_operation * addressing_mode
(* Float arith operation with memory *)
(* bool: true=64 bits, false=32 *)
| Ifloatspecial of string
and float_operation =
Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev
@ -132,3 +138,10 @@ let print_specific_operation printreg op ppf arg =
let long = if double then "float64" else "float32" in
fprintf ppf "%a %s %s[%a]" printreg arg.(0) (op_name op) long
(print_addressing printreg addr) (Array.sub arg 1 (Array.length arg - 1))
| Ifloatspecial name ->
fprintf ppf "%s " name;
for i = 0 to Array.length arg - 1 do
if i > 0 then fprintf ppf ", ";
printreg ppf arg.(i)
done

View File

@ -272,6 +272,99 @@ let output_epilogue () =
let n = frame_size() - 4 in
if n > 0 then ` addl ${emit_int n}, %esp\n`
(* Determine if the given register is the top of the floating-point stack *)
let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
(* Emit a "p" suffix if TOS is dead after the given instruction *)
let pop_suffix i =
let r = i.arg.(0) in
if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
(* Emit the code for a floating-point comparison *)
let emit_float_test cmp neg arg lbl =
let actual_cmp =
match (is_tos arg.(0), is_tos arg.(1)) with
(true, true) ->
(* both args on top of FP stack *)
` fcompp\n`;
cmp
| (true, false) ->
(* first arg on top of FP stack *)
` fcompl {emit_reg arg.(1)}\n`;
cmp
| (false, true) ->
(* second arg on top of FP stack *)
` fcompl {emit_reg arg.(0)}\n`;
Cmm.swap_comparison cmp
| (false, false) ->
` fldl {emit_reg arg.(0)}\n`;
` fcompl {emit_reg arg.(1)}\n`;
cmp
in
` fnstsw %ax\n`;
begin match actual_cmp with
Ceq ->
if neg then begin
` andb $68, %ah\n`;
` xorb $64, %ah\n`;
` jne `
end else begin
` andb $69, %ah\n`;
` cmpb $64, %ah\n`;
` je `
end
| Cne ->
if neg then begin
` andb $69, %ah\n`;
` cmpb $64, %ah\n`;
` je `
end else begin
` andb $68, %ah\n`;
` xorb $64, %ah\n`;
` jne `
end
| Cle ->
` andb $69, %ah\n`;
` decb %ah\n`;
` cmpb $64, %ah\n`;
if neg
then ` jae `
else ` jb `
| Cge ->
` andb $5, %ah\n`;
if neg
then ` jne `
else ` je `
| Clt ->
` andb $69, %ah\n`;
` cmpb $1, %ah\n`;
if neg
then ` jne `
else ` je `
| Cgt ->
` andb $69, %ah\n`;
if neg
then ` jne `
else ` je `
end;
`{emit_label lbl}\n`
(* Emit a Ifloatspecial instruction *)
let emit_floatspecial = function
"atan" -> ` fldl1; fpatan\n`
| "atan2" -> ` fpatan\n`
| "cos" -> ` fcos\n`
| "log" -> ` fldln2; fxch; fyl2x\n`
| "log10" -> ` fldlg2; fxch; fyl2x\n`
| "sin" -> ` fsin\n`
| "sqrt" -> ` fsqrt\n`
| "tan" -> ` fptan; fstp %st(0)\n`
| _ -> assert false
(* Output the assembly code for an instruction *)
(* Name of current function *)
@ -280,12 +373,9 @@ let function_name = ref ""
let tailrec_entry_point = ref 0
(* Label of trap for out-of-range accesses *)
let range_check_trap = ref 0
(* Record float literals to be emitted later *)
let float_constants = ref ([] : (int * string) list)
let tos = phys_reg 100
let emit_instr fallthrough i =
match i.desc with
Lend -> ()
@ -293,8 +383,10 @@ let emit_instr fallthrough i =
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
if src.typ = Float then
if src = tos then
` fstpl {emit_reg dst}\n`
if is_tos src then
` fst{pop_suffix i}l {emit_reg dst}\n`
else if is_tos dst then
` fldl {emit_reg src}\n`
else begin
` fldl {emit_reg src}\n`;
` fstpl {emit_reg dst}\n`
@ -354,6 +446,8 @@ let emit_instr fallthrough i =
| Lop(Iload(chunk, addr)) ->
let dest = i.res.(0) in
begin match chunk with
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
| Byte_unsigned ->
` movzbl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
| Byte_signed ->
@ -366,8 +460,6 @@ let emit_instr fallthrough i =
` flds {emit_addressing addr i.arg 0}\n`
| Double | Double_u ->
` fldl {emit_addressing addr i.arg 0}\n`
| _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
` movl {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
end
| Lop(Istore(chunk, addr)) ->
begin match chunk with
@ -378,15 +470,15 @@ let emit_instr fallthrough i =
| Sixteen_unsigned | Sixteen_signed ->
` movw {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
| Single ->
if i.arg.(0) = tos then
` fstps {emit_addressing addr i.arg 1}\n`
if is_tos i.arg.(0) then
` fst{pop_suffix i}s {emit_addressing addr i.arg 1}\n`
else begin
` fldl {emit_reg i.arg.(0)}\n`;
` fstps {emit_addressing addr i.arg 1}\n`
end
| Double | Double_u ->
if i.arg.(0) = tos then
` fstpl {emit_addressing addr i.arg 1}\n`
if is_tos i.arg.(0) then
` fst{pop_suffix i}l {emit_addressing addr i.arg 1}\n`
else begin
` fldl {emit_reg i.arg.(0)}\n`;
` fstpl {emit_addressing addr i.arg 1}\n`
@ -444,12 +536,12 @@ let emit_instr fallthrough i =
| Lop(Iintop op) ->
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
` incl {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
` decl {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
` leal {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Idiv, n)) ->
let l = Misc.log2 n in
let lbl = new_label() in
@ -470,21 +562,22 @@ let emit_instr fallthrough i =
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n`
| Lop(Inegf | Iabsf as floatop) ->
if i.arg.(0) <> tos then
if not (is_tos i.arg.(0)) then
` fldl {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
as floatop) ->
if i.arg.(0) = tos && i.arg.(1) = tos then
begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
(true, true) ->
(* both operands on top of FP stack *)
` {emit_string(instr_for_floatop_pop floatop)} %st, %st(1)\n`
else if i.arg.(0) = tos then
| (true, false) ->
(* first operand on stack *)
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
else if i.arg.(1) = tos then
| (false, true) ->
(* second operand on stack *)
` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
else begin
| (false, false) ->
(* both operands in memory *)
` fldl {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
@ -499,7 +592,7 @@ let emit_instr fallthrough i =
` addl $4, %esp\n`
end
| Lop(Iintoffloat) ->
if i.arg.(0) <> tos then
if not (is_tos i.arg.(0)) then
` fldl {emit_reg i.arg.(0)}\n`;
stack_offset := !stack_offset - 8;
` subl $8, %esp\n`;
@ -510,9 +603,9 @@ let emit_instr fallthrough i =
` fldcw (%esp)\n`;
begin match i.res.(0).loc with
Stack s ->
` fistpl {emit_reg i.res.(0)}\n`
` fist{pop_suffix i}l {emit_reg i.res.(0)}\n`
| _ ->
` fistpl (%esp)\n`;
` fist{pop_suffix i}l (%esp)\n`;
` movl (%esp), {emit_reg i.res.(0)}\n`
end;
` fldcw 4(%esp)\n`;
@ -558,9 +651,18 @@ let emit_instr fallthrough i =
` pushl {emit_addressing addr i.arg 0}\n`;
stack_offset := !stack_offset + 8
| Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
if i.arg.(0) <> tos then
if not (is_tos i.arg.(0)) then
` fldl {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatarithmem double op)} {emit_addressing addr i.arg 1}\n`
| Lop(Ispecific(Ifloatspecial s)) ->
(* Push args on float stack if necessary *)
for k = 0 to Array.length i.arg - 1 do
if not (is_tos i.arg.(k)) then ` fldl {emit_reg i.arg.(k)}\n`
done;
(* Fix-up for binary instrs whose args were swapped *)
if Array.length i.arg = 2 && is_tos i.arg.(1) then
` fxch %st(1)\n`;
emit_floatspecial s
| Lreloadretaddr ->
()
| Lreturn ->
@ -591,70 +693,8 @@ let emit_instr fallthrough i =
` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`;
let b = name_for_cond_branch cmp in
` j{emit_string b} {emit_label lbl}\n`
| Ifloattest((Ceq | Cne as cmp), neg) ->
if i.arg.(1) <> tos then
` fldl {emit_reg i.arg.(1)}\n`;
if i.arg.(0) <> tos then
` fldl {emit_reg i.arg.(0)}\n`;
` fucompp\n`;
` fnstsw %ax\n`;
let neg1 = if cmp = Ceq then neg else not neg in
if neg1 then begin (* branch if different *)
` andb $68, %ah\n`;
` xorb $64, %ah\n`;
` jne {emit_label lbl}\n`
end else begin (* branch if equal *)
` andb $69, %ah\n`;
` cmpb $64, %ah\n`;
` je {emit_label lbl}\n`
end
| Ifloattest(cmp, neg) ->
let actual_cmp =
if i.arg.(0) = tos && i.arg.(1) = tos then begin
(* both args on top of FP stack *)
` fcompp\n`;
cmp
end else if i.arg.(0) = tos then begin
(* first arg on top of FP stack *)
` fcompl {emit_reg i.arg.(1)}\n`;
cmp
end else if i.arg.(1) = tos then begin
(* second arg on top of FP stack *)
` fcompl {emit_reg i.arg.(0)}\n`;
Cmm.swap_comparison cmp
end else begin
` fldl {emit_reg i.arg.(0)}\n`;
` fcompl {emit_reg i.arg.(1)}\n`;
cmp
end in
` fnstsw %ax\n`;
begin match actual_cmp with
Cle ->
` andb $69, %ah\n`;
` decb %ah\n`;
` cmpb $64, %ah\n`;
if neg
then ` jae `
else ` jb `
| Cge ->
` andb $5, %ah\n`;
if neg
then ` jne `
else ` je `
| Clt ->
` andb $69, %ah\n`;
` cmpb $1, %ah\n`;
if neg
then ` jne `
else ` je `
| Cgt ->
` andb $69, %ah\n`;
if neg
then ` jne `
else ` je `
| _ -> fatal_error "Emit_i386: floattest"
end;
`{emit_label lbl}\n`
emit_float_test cmp neg i.arg lbl
| Ioddtest ->
` testl $1, {emit_reg i.arg.(0)}\n`;
` jne {emit_label lbl}\n`

View File

@ -246,6 +246,99 @@ let output_epilogue () =
let n = frame_size() - 4 in
if n > 0 then ` add esp, {emit_int n}\n`
(* Determine if the given register is the top of the floating-point stack *)
let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
(* Emit a "p" suffix if TOS is dead after the given instruction *)
let pop_suffix i =
let r = i.arg.(0) in
if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
(* Emit the code for a floating-point comparison *)
let emit_float_test cmp neg arg lbl =
let actual_cmp =
match (is_tos arg.(0), is_tos arg.(1)) with
(true, true) ->
(* both args on top of FP stack *)
` fcompp\n`;
cmp
| (true, false) ->
(* first arg on top of FP stack *)
` fcompl {emit_reg arg.(1)}\n`;
cmp
| (false, true) ->
(* second arg on top of FP stack *)
` fcompl {emit_reg arg.(0)}\n`;
Cmm.swap_comparison cmp
| (false, false) ->
` fldl {emit_reg arg.(0)}\n`;
` fcompl {emit_reg arg.(1)}\n`;
cmp
in
` fnstsw %ax\n`;
begin match actual_cmp with
Ceq ->
if neg then begin
` and ah, 68\n`;
` xor ah, 64\n`;
` jne `
end else begin
` and ah, 69\n`;
` cmp ah, 64\n`;
` je `
end
| Cne ->
if neg then begin
` and ah, 69\n`;
` cmp ah, 64\n`;
` je `
end else begin
` and ah, 68\n`;
` xor ah, 64\n`;
` jne `
end
| Cle ->
` and ah, 69\n`;
` dec ah\n`;
` cmp ah, 64\n`;
if neg
then ` jae `
else ` jb `
| Cge ->
` and ah, 5\n`;
if neg
then ` jne `
else ` je `
| Clt ->
` and ah, 69\n`;
` cmp ah, 1\n`;
if neg
then ` jne `
else ` je `
| Cgt ->
` and ah, 69\n`;
if neg
then ` jne `
else ` je `
end;
`{emit_label lbl}\n`
(* Emit a Ifloatspecial instruction *)
let emit_floatspecial = function
"atan" -> ` fldl1\n\tfpatan\n`
| "atan2" -> ` fpatan\n`
| "cos" -> ` fcos\n`
| "log" -> ` fldln2\n\tfxch\n\tfyl2x\n`
| "log10" -> ` fldlg2\n\tfxch\n\tfyl2x\n`
| "sin" -> ` fsin\n`
| "sqrt" -> ` fsqrt\n`
| "tan" -> ` fptan\n\tfstp st(0)\n`
| _ -> assert false
(* Output the assembly code for an instruction *)
(* Name of current function *)
@ -257,8 +350,6 @@ let range_check_trap = ref 0
let float_constants = ref ([] : (int * string) list)
let tos = phys_reg 100
let emit_instr i =
match i.desc with
Lend -> ()
@ -266,8 +357,10 @@ let emit_instr i =
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
if src.typ = Float then
if src = tos then
` fstp {emit_reg dst}\n`
if is_tos src then
` fst{pop_suffix i} {emit_reg dst}\n`
else if is_tos dst then
` fld {emit_reg dst}\n`
else begin
` fld {emit_reg src}\n`;
` fstp {emit_reg dst}\n`
@ -331,6 +424,8 @@ let emit_instr i =
| Lop(Iload(chunk, addr)) ->
let dest = i.res.(0) in
begin match chunk with
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
| Byte_unsigned ->
` movzx {emit_reg dest}, BYTE PTR {emit_addressing addr i.arg 0}\n`
| Byte_signed ->
@ -343,8 +438,6 @@ let emit_instr i =
` fld REAL4 PTR {emit_addressing addr i.arg 0}\n`
| Double | Double_u ->
` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`
| _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
` mov {emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
end
| Lop(Istore(chunk, addr)) ->
begin match chunk with
@ -355,15 +448,15 @@ let emit_instr i =
| Sixteen_unsigned | Sixteen_signed ->
` mov WORD PTR {emit_addressing addr i.arg 1}, {emit_reg16 i.arg.(0)}\n`
| Single ->
if i.arg.(0) = tos then
` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n`
if is_tos i.arg.(0) then
` fst{pop_suffix i} REAL4 PTR {emit_addressing addr i.arg 1}\n`
else begin
` fld {emit_reg i.arg.(0)}\n`;
` fstp REAL4 PTR {emit_addressing addr i.arg 1}\n`
end
| Double | Double_u ->
if i.arg.(0) = tos then
` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
if is_tops i.arg.(0) then
` fst{pop_suffix i} REAL8 PTR {emit_addressing addr i.arg 1}\n`
else begin
` fld {emit_reg i.arg.(0)}\n`;
` fstp REAL8 PTR {emit_addressing addr i.arg 1}\n`
@ -421,12 +514,12 @@ let emit_instr i =
| Lop(Iintop op) ->
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
| Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
` inc {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
` dec {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
` lea {emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
| Lop(Iintop_imm(Idiv, n)) ->
let l = Misc.log2 n in
let lbl = new_label() in
@ -447,21 +540,22 @@ let emit_instr i =
(* We have i.arg.(0) = i.res.(0) *)
` {emit_string(instr_for_intop op)} {emit_reg i.res.(0)}, {emit_int n}\n`
| Lop(Inegf | Iabsf as floatop) ->
if i.arg.(0) <> tos then
if not (is_tos i.arg.(0)) then
` fld {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
as floatop) ->
if i.arg.(0) = tos && i.arg.(1) = tos then
begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
(true, true) ->
(* both operands on top of FP stack *)
` {emit_string(instr_for_floatop_reversed floatop)}\n`
else if i.arg.(0) = tos then
` {emit_string(instr_for_floatop_pop floatop)}\n`
| (true, false) ->
(* first operand on stack *)
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
else if i.arg.(1) = tos then
| (false, true) ->
(* second operand on stack *)
` {emit_string(instr_for_floatop_reversed floatop)} {emit_reg i.arg.(0)}\n`
else begin
| (false, false) ->
(* both operands in memory *)
` fld {emit_reg i.arg.(0)}\n`;
` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}\n`
@ -476,7 +570,7 @@ let emit_instr i =
` add esp, 4\n`
end
| Lop(Iintoffloat) ->
if i.arg.(0) <> tos then
if not (is_tos i.arg.(0)) then
` fld {emit_reg i.arg.(0)}\n`;
stack_offset := !stack_offset - 8;
` sub esp, 8\n`;
@ -487,9 +581,9 @@ let emit_instr i =
` fldcw [esp]\n`;
begin match i.res.(0).loc with
Stack s ->
` fistp {emit_reg i.res.(0)}\n`
` fist{pop_suffix i} {emit_reg i.res.(0)}\n`
| _ ->
` fistp DWORD PTR [esp]\n`;
` fist{pop_suffix i} DWORD PTR [esp]\n`;
` mov {emit_reg i.res.(0)}, [esp]\n`
end;
` fldcw [esp+4]\n`;
@ -537,10 +631,19 @@ let emit_instr i =
` push DWORD PTR {emit_addressing addr i.arg 0}\n`;
stack_offset := !stack_offset + 8
| Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
if i.arg.(0) <> tos then
if not (is_tos i.arg.(0)) then
` fld {emit_reg i.arg.(0)}\n`;
let size = if double then "REAL8" else "REAL4" in
` {emit_string(instr_for_floatarithmem op)} {emit_string size} PTR {emit_addressing addr i.arg 1}\n`
| Lop(Ispecific(Ifloatspecial s)) ->
(* Push args on float stack if necessary *)
for k = 0 to Array.length i.arg - 1 do
if not (is_tos i.arg.(k)) then ` fld {emit_reg i.arg.(k)}\n`
done;
(* Fix-up for binary instrs whose args were swapped *)
if Array.length i.arg = 2 && is_tos i.arg.(1) then
` fxch st(1)\n`;
emit_floatspecial s
| Lreloadretaddr ->
()
| Lreturn ->
@ -571,70 +674,8 @@ let emit_instr i =
` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`;
let b = name_for_cond_branch cmp in
` j{emit_string b} {emit_label lbl}\n`
| Ifloattest((Ceq | Cne as cmp), neg) ->
if i.arg.(1) <> tos then
` fld {emit_reg i.arg.(1)}\n`;
if i.arg.(0) <> tos then
` fld {emit_reg i.arg.(0)}\n`;
` fucompp\n`;
` fnstsw ax\n`;
let neg1 = if cmp = Ceq then neg else not neg in
if neg1 then begin (* branch if different *)
` and ah, 68\n`;
` xor ah, 64\n`;
` jne {emit_label lbl}\n`
end else begin (* branch if equal *)
` and ah, 69\n`;
` cmp ah, 64\n`;
` je {emit_label lbl}\n`
end
| Ifloattest(cmp, neg) ->
let actual_cmp =
if i.arg.(0) = tos && i.arg.(1) = tos then begin
(* both args on top of FP stack *)
` fcompp\n`;
cmp
end else if i.arg.(0) = tos then begin
(* first arg on top of FP stack *)
` fcomp {emit_reg i.arg.(1)}\n`;
cmp
end else if i.arg.(1) = tos then begin
(* second arg on top of FP stack *)
` fcomp {emit_reg i.arg.(0)}\n`;
Cmm.swap_comparison cmp
end else begin
` fld {emit_reg i.arg.(0)}\n`;
` fcomp {emit_reg i.arg.(1)}\n`;
cmp
end in
` fnstsw ax\n`;
begin match actual_cmp with
Cle ->
` and ah, 69\n`;
` dec ah\n`;
` cmp ah, 64\n`;
if neg
then ` jae `
else ` jb `
| Cge ->
` and ah, 5\n`;
if neg
then ` jne `
else ` je `
| Clt ->
` and ah, 69\n`;
` cmp ah, 1\n`;
if neg
then ` jne `
else ` je `
| Cgt ->
` and ah, 69\n`;
if neg
then ` jne `
else ` je `
| _ -> fatal_error "Emit_i386: floattest"
end;
`{emit_label lbl}\n`
emit_float_test cmp neg i.arg lbl
| Ioddtest ->
` test {emit_reg i.arg.(0)}, 1\n`;
` jne {emit_label lbl}\n`

View File

@ -47,7 +47,7 @@ let register_class r =
| Addr -> 0
| Float -> 1
let num_available_registers = [| 7; 0 |]
let num_available_registers = [| 7; 1 |]
let first_available_register = [| 0; 100 |]

View File

@ -73,14 +73,28 @@ let rec select_addr exp =
| arg ->
(Alinear arg, 0)
(* C functions to be turned into Ifloatspecial instructions if -ffast-math *)
let inline_float_ops =
["atan"; "atan2"; "cos"; "log"; "log10"; "sin"; "sqrt"; "tan"]
(* Estimate number of float temporaries needed to evaluate expression
(Ershov's algorithm) *)
let rec float_needs = function
Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
Cop((Cnegf | Cabsf), [arg]) ->
float_needs arg
| Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
let n1 = float_needs arg1 in
let n2 = float_needs arg2 in
if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
| Cop(Cextcall(fn, ty_res, alloc), args)
when !fast_math && List.mem fn inline_float_ops ->
begin match args with
[arg] -> float_needs arg
| [arg1; arg2] -> max (float_needs arg2 + 1) (float_needs arg1)
| _ -> assert false
end
| _ ->
1
@ -119,7 +133,7 @@ let pseudoregs_for_operation op arg res =
the result is always left at the top of the floating-point stack *)
| Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
| Ifloatofint | Iload((Single | Double | Double_u), _)
| Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _)) ->
| Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _) | Ifloatspecial _) ->
(arg, [| tos |], false) (* don't move it immediately *)
(* For storing a byte, the argument must be in eax...edx.
(But for a short, any reg will do!)
@ -215,6 +229,11 @@ method select_operation op args =
| _ ->
super#select_operation op args
end
(* Recognize inlined floating point operations *)
| Cextcall(fn, ty_res, false)
when !fast_math && List.mem fn inline_float_ops ->
(Ispecific(Ifloatspecial fn), args)
(* Default *)
| _ -> super#select_operation op args
(* Recognize float arithmetic with mem *)

View File

@ -76,6 +76,13 @@ let size_expr env exp =
fatal_error "Selection.size_expr"
in size Tbl.empty exp
(* These are C library functions that are known to be pure
(no side effects at all) and worth not pre-computing. *)
let pure_external_functions =
["acos"; "asin"; "atan"; "atan2"; "cos"; "exp"; "log";
"log10"; "sin"; "sqrt"; "tan"]
(* Says if an expression is "simple". A "simple" expression has no
side-effects and its execution can be delayed until its value
is really needed. In the case of e.g. an [alloc] instruction,
@ -97,9 +104,15 @@ let rec is_simple_expr = function
| Cop(op, args) ->
begin match op with
(* The following may have side effects *)
Capply _ | Cextcall(_, _, _) | Calloc | Cstore _ | Craise -> false
| Capply _ | Calloc | Cstore _ | Craise -> false
(* External C functions normally have side effects, unless known *)
| Cextcall(fn, _, alloc) ->
not alloc &&
List.mem fn pure_external_functions &&
List.for_all is_simple_expr args
(* The remaining operations are simple if their args are *)
| _ -> List.for_all is_simple_expr args
| _ ->
List.for_all is_simple_expr args
end
| _ -> false