Amelioration des flottants x86: utiliser %st(0) comme registre quand c'est possible, evitant ainsi des couples fstp/fld; ajout option -ffast-math

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5404 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
2003-02-25 15:50:13 +00:00 · 2003-02-25 15:50:13 +00:00 · 40efd97fe1
parent c517632cb2
commit 40efd97fe1
6 changed files with 303 additions and 177 deletions
--- a/asmcomp/i386/arch.ml
+++ b/asmcomp/i386/arch.ml
@ -14,7 +14,11 @@

 (* Machine-specific command-line options *)

-let command_line_options = []
+let fast_math = ref false
+
+let command_line_options =
+  [ "-ffast-math", Arg.Set fast_math,
+      " Inline trigonometric and exponential functions" ]

 (* Specific operations for the Intel 386 processor *)

@ -42,6 +46,8 @@ type specific_operation =
  | Ifloatarithmem of bool * float_operation * addressing_mode
                                        (* Float arith operation with memory *)
                                        (* bool: true=64 bits, false=32 *)
+  | Ifloatspecial of string
+
 and float_operation =
    Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev

@ -132,3 +138,10 @@ let print_specific_operation printreg op ppf arg =
      let long = if double then "float64" else "float32" in
      fprintf ppf "%a %s %s[%a]" printreg arg.(0) (op_name op) long
       (print_addressing printreg addr) (Array.sub arg 1 (Array.length arg - 1))
+  | Ifloatspecial name ->
+      fprintf ppf "%s " name;
+      for i = 0 to Array.length arg - 1 do
+        if i > 0 then fprintf ppf ", ";
+        printreg ppf arg.(i)
+      done
+      
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@ -272,6 +272,99 @@ let output_epilogue () =
  let n = frame_size() - 4 in
  if n > 0 then `	addl	${emit_int n}, %esp\n`

+(* Determine if the given register is the top of the floating-point stack *)
+
+let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
+
+(* Emit a "p" suffix if TOS is dead after the given instruction *)
+
+let pop_suffix i = 
+  let r = i.arg.(0) in
+  if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
+
+(* Emit the code for a floating-point comparison *)
+
+let emit_float_test cmp neg arg lbl =
+  let actual_cmp =
+    match (is_tos arg.(0), is_tos arg.(1)) with
+      (true, true) ->
+      (* both args on top of FP stack *)
+      `	fcompp\n`;
+      cmp
+    | (true, false) ->
+      (* first arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    | (false, true) ->
+      (* second arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(0)}\n`;
+      Cmm.swap_comparison cmp
+    | (false, false) ->
+      `	fldl	{emit_reg arg.(0)}\n`;
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    in
+  `	fnstsw	%ax\n`;
+  begin match actual_cmp with
+    Ceq ->
+      if neg then begin
+      `	andb	$68, %ah\n`;
+      `	xorb	$64, %ah\n`;
+      `	jne	`
+      end else begin
+      `	andb	$69, %ah\n`;
+      `	cmpb	$64, %ah\n`;
+      `	je	`
+      end
+  | Cne ->
+      if neg then begin
+      `	andb	$69, %ah\n`;
+      `	cmpb	$64, %ah\n`;
+      `	je	`
+      end else begin
+      `	andb	$68, %ah\n`;
+      `	xorb	$64, %ah\n`;
+      `	jne	`
+      end
+  | Cle ->
+      `	andb	$69, %ah\n`;
+      `	decb	%ah\n`;
+      `	cmpb	$64, %ah\n`;
+      if neg
+      then `	jae	`
+      else `	jb	`
+  | Cge ->
+      `	andb	$5, %ah\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Clt ->
+      `	andb	$69, %ah\n`;
+      `	cmpb	$1, %ah\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Cgt ->
+      `	andb	$69, %ah\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  end;
+  `{emit_label lbl}\n`
+
+(* Emit a Ifloatspecial instruction *)
+
+let emit_floatspecial = function
+    "atan"  -> `	fldl1; fpatan\n`
+  | "atan2" -> `	fpatan\n`
+  | "cos"   -> `	fcos\n`
+  | "log"   -> `	fldln2; fxch; fyl2x\n`
+  | "log10" -> `	fldlg2; fxch; fyl2x\n`
+  | "sin"   -> `	fsin\n`
+  | "sqrt"  -> `	fsqrt\n`
+  | "tan"   -> `	fptan; fstp %st(0)\n`
+  | _ -> assert false
+
 (* Output the assembly code for an instruction *)

 (* Name of current function *)
@ -280,12 +373,9 @@ let function_name = ref ""
 let tailrec_entry_point = ref 0
 (* Label of trap for out-of-range accesses *)
 let range_check_trap = ref 0
-
+(* Record float literals to be emitted later *)
 let float_constants = ref ([] : (int * string) list)

-let tos = phys_reg 100
-
-
 let emit_instr fallthrough i =
    match i.desc with
      Lend -> ()
@ -293,8 +383,10 @@ let emit_instr fallthrough i =
        let src = i.arg.(0) and dst = i.res.(0) in
        if src.loc <> dst.loc then begin
          if src.typ = Float then
-            if src = tos then
-              `	fstpl	{emit_reg dst}\n`
+            if is_tos src then
+              `	fst{pop_suffix i}l	{emit_reg dst}\n`
+            else if is_tos dst then
+              `	fldl	{emit_reg src}\n`
            else begin
              `	fldl	{emit_reg src}\n`;
              `	fstpl	{emit_reg dst}\n`
@ -354,6 +446,8 @@ let emit_instr fallthrough i =
    | Lop(Iload(chunk, addr)) ->
        let dest = i.res.(0) in
        begin match chunk with
+          | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
+              `	movl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
          | Byte_unsigned ->
              `	movzbl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
          | Byte_signed ->
@ -366,8 +460,6 @@ let emit_instr fallthrough i =
            `	flds	{emit_addressing addr i.arg 0}\n`
          | Double | Double_u ->
            `	fldl	{emit_addressing addr i.arg 0}\n`
-          | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
-              `	movl	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
        end
    | Lop(Istore(chunk, addr)) ->
        begin match chunk with
@ -378,15 +470,15 @@ let emit_instr fallthrough i =
          | Sixteen_unsigned | Sixteen_signed ->
            `	movw	{emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
          | Single ->
-              if i.arg.(0) = tos then
-                `	fstps	{emit_addressing addr i.arg 1}\n`
+              if is_tos i.arg.(0) then
+                `	fst{pop_suffix i}s	{emit_addressing addr i.arg 1}\n`
              else begin
                `	fldl	{emit_reg i.arg.(0)}\n`;
                `	fstps	{emit_addressing addr i.arg 1}\n`
              end
          | Double | Double_u ->
-              if i.arg.(0) = tos then
-                `	fstpl	{emit_addressing addr i.arg 1}\n`
+              if is_tos i.arg.(0) then
+                `	fst{pop_suffix i}l	{emit_addressing addr i.arg 1}\n`
              else begin
                `	fldl	{emit_reg i.arg.(0)}\n`;
                `	fstpl	{emit_addressing addr i.arg 1}\n`
@ -444,12 +536,12 @@ let emit_instr fallthrough i =
    | Lop(Iintop op) ->
        (* We have i.arg.(0) = i.res.(0) *)
        `	{emit_string(instr_for_intop op)}	{emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
+    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
+        `	leal	{emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
        `	incl	{emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
        `	decl	{emit_reg i.res.(0)}\n`
-    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
-        `	leal	{emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Idiv, n)) ->
        let l = Misc.log2 n in
        let lbl = new_label() in
@ -470,21 +562,22 @@ let emit_instr fallthrough i =
        (* We have i.arg.(0) = i.res.(0) *)
        `	{emit_string(instr_for_intop op)}	${emit_int n}, {emit_reg i.res.(0)}\n`
    | Lop(Inegf | Iabsf as floatop) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
          `	fldl	{emit_reg i.arg.(0)}\n`;
        `	{emit_string(instr_for_floatop floatop)}\n`
    | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
          as floatop) ->
-        if i.arg.(0) = tos && i.arg.(1) = tos then
+        begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
+          (true, true) ->
          (* both operands on top of FP stack *)
          `	{emit_string(instr_for_floatop_pop floatop)}	%st, %st(1)\n`
-        else if i.arg.(0) = tos then
+        | (true, false) ->
          (* first operand on stack *)
          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
-        else if i.arg.(1) = tos then
+        | (false, true) ->
          (* second operand on stack *)
          `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_reg i.arg.(0)}\n`
-        else begin
+        | (false, false) ->
          (* both operands in memory *)
          `	fldl	{emit_reg i.arg.(0)}\n`;
          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
@ -499,7 +592,7 @@ let emit_instr fallthrough i =
            `	addl	$4, %esp\n`
        end
    | Lop(Iintoffloat) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
          `	fldl	{emit_reg i.arg.(0)}\n`;
        stack_offset := !stack_offset - 8;
        `	subl	$8, %esp\n`;
@ -510,9 +603,9 @@ let emit_instr fallthrough i =
        `	fldcw	(%esp)\n`;
        begin match i.res.(0).loc with
          Stack s ->
-            `	fistpl	{emit_reg i.res.(0)}\n`
+            `	fist{pop_suffix i}l	{emit_reg i.res.(0)}\n`
        | _ ->
-            `	fistpl	(%esp)\n`;
+            `	fist{pop_suffix i}l	(%esp)\n`;
            `	movl	(%esp), {emit_reg i.res.(0)}\n`
        end;
        `	fldcw	4(%esp)\n`;
@ -558,9 +651,18 @@ let emit_instr fallthrough i =
        `	pushl	{emit_addressing addr i.arg 0}\n`;
        stack_offset := !stack_offset + 8
    | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
          `	fldl	{emit_reg i.arg.(0)}\n`;
        `	{emit_string(instr_for_floatarithmem double op)}	{emit_addressing addr i.arg 1}\n`
+    | Lop(Ispecific(Ifloatspecial s)) ->
+        (* Push args on float stack if necessary *)
+        for k = 0 to Array.length i.arg - 1 do
+          if not (is_tos i.arg.(k)) then `	fldl	{emit_reg i.arg.(k)}\n`
+        done;
+        (* Fix-up for binary instrs whose args were swapped *)
+        if Array.length i.arg = 2 && is_tos i.arg.(1) then
+          `	fxch	%st(1)\n`;
+        emit_floatspecial s
    | Lreloadretaddr ->
        ()
    | Lreturn ->
@ -591,70 +693,8 @@ let emit_instr fallthrough i =
            `	cmpl	${emit_int n}, {emit_reg i.arg.(0)}\n`;
            let b = name_for_cond_branch cmp in
            `	j{emit_string b}	{emit_label lbl}\n`
-        | Ifloattest((Ceq | Cne as cmp), neg) ->
-            if i.arg.(1) <> tos then
-              `	fldl	{emit_reg i.arg.(1)}\n`;
-            if i.arg.(0) <> tos then
-              `	fldl	{emit_reg i.arg.(0)}\n`;
-            `	fucompp\n`;
-            `	fnstsw	%ax\n`;
-            let neg1 = if cmp = Ceq then neg else not neg in
-            if neg1 then begin          (* branch if different *)
-              `	andb	$68, %ah\n`;
-              `	xorb	$64, %ah\n`;
-              `	jne	{emit_label lbl}\n`
-            end else begin              (* branch if equal *)
-              `	andb	$69, %ah\n`;
-              `	cmpb	$64, %ah\n`;
-              `	je	{emit_label lbl}\n`
-            end
        | Ifloattest(cmp, neg) ->
-            let actual_cmp =
-              if i.arg.(0) = tos && i.arg.(1) = tos then begin
-                (* both args on top of FP stack *)
-                `	fcompp\n`;
-                cmp
-              end else if i.arg.(0) = tos then begin
-                (* first arg on top of FP stack *)
-                `	fcompl	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end else if i.arg.(1) = tos then begin
-                (* second arg on top of FP stack *)
-                `	fcompl	{emit_reg i.arg.(0)}\n`;
-                Cmm.swap_comparison cmp
-              end else begin
-                `	fldl	{emit_reg i.arg.(0)}\n`;
-                `	fcompl	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end in
-            `	fnstsw	%ax\n`;
-            begin match actual_cmp with
-              Cle ->
-                `	andb	$69, %ah\n`;
-                `	decb	%ah\n`;
-                `	cmpb	$64, %ah\n`;
-                if neg
-                then `	jae	`
-                else `	jb	`
-            | Cge ->
-                `	andb	$5, %ah\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Clt ->
-                `	andb	$69, %ah\n`;
-                `	cmpb	$1, %ah\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Cgt ->
-                `	andb	$69, %ah\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | _ -> fatal_error "Emit_i386: floattest"
-            end;
-            `{emit_label lbl}\n`
+            emit_float_test cmp neg i.arg lbl
        | Ioddtest ->
            `	testl	$1, {emit_reg i.arg.(0)}\n`;
            `	jne	{emit_label lbl}\n`
--- a/asmcomp/i386/emit_nt.mlp
+++ b/asmcomp/i386/emit_nt.mlp
@ -246,6 +246,99 @@ let output_epilogue () =
  let n = frame_size() - 4 in
  if n > 0 then `	add    esp, {emit_int n}\n`

+(* Determine if the given register is the top of the floating-point stack *)
+
+let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
+
+(* Emit a "p" suffix if TOS is dead after the given instruction *)
+
+let pop_suffix i = 
+  let r = i.arg.(0) in
+  if not (is_tos r && Reg.Set.mem r i.live) then emit_string "p"
+
+(* Emit the code for a floating-point comparison *)
+
+let emit_float_test cmp neg arg lbl =
+  let actual_cmp =
+    match (is_tos arg.(0), is_tos arg.(1)) with
+      (true, true) ->
+      (* both args on top of FP stack *)
+      `	fcompp\n`;
+      cmp
+    | (true, false) ->
+      (* first arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    | (false, true) ->
+      (* second arg on top of FP stack *)
+      `	fcompl	{emit_reg arg.(0)}\n`;
+      Cmm.swap_comparison cmp
+    | (false, false) ->
+      `	fldl	{emit_reg arg.(0)}\n`;
+      `	fcompl	{emit_reg arg.(1)}\n`;
+      cmp
+    in
+  `	fnstsw	%ax\n`;
+  begin match actual_cmp with
+    Ceq ->
+      if neg then begin
+      `	and	ah, 68\n`;
+      `	xor	ah, 64\n`;
+      `	jne	`
+      end else begin
+      `	and	ah, 69\n`;
+      `	cmp	ah, 64\n`;
+      `	je	`
+      end
+  | Cne ->
+      if neg then begin
+      `	and	ah, 69\n`;
+      `	cmp	ah, 64\n`;
+      `	je	`
+      end else begin
+      `	and	ah, 68\n`;
+      `	xor	ah, 64\n`;
+      `	jne	`
+      end
+  | Cle ->
+      `	and	ah, 69\n`;
+      `	dec	ah\n`;
+      `	cmp	ah, 64\n`;
+      if neg
+      then `	jae	`
+      else `	jb	`
+  | Cge ->
+      `	and	ah, 5\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Clt ->
+      `	and	ah, 69\n`;
+      `	cmp	ah, 1\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  | Cgt ->
+      `	and	ah, 69\n`;
+      if neg
+      then `	jne	`
+      else `	je	`
+  end;
+  `{emit_label lbl}\n`
+
+(* Emit a Ifloatspecial instruction *)
+
+let emit_floatspecial = function
+    "atan"  -> `	fldl1\n\tfpatan\n`
+  | "atan2" -> `	fpatan\n`
+  | "cos"   -> `	fcos\n`
+  | "log"   -> `	fldln2\n\tfxch\n\tfyl2x\n`
+  | "log10" -> `	fldlg2\n\tfxch\n\tfyl2x\n`
+  | "sin"   -> `	fsin\n`
+  | "sqrt"  -> `	fsqrt\n`
+  | "tan"   -> `	fptan\n\tfstp st(0)\n`
+  | _ -> assert false
+
 (* Output the assembly code for an instruction *)

 (* Name of current function *)
@ -257,8 +350,6 @@ let range_check_trap = ref 0

 let float_constants = ref ([] : (int * string) list)

-let tos = phys_reg 100
-
 let emit_instr i =
    match i.desc with
      Lend -> ()
@ -266,8 +357,10 @@ let emit_instr i =
        let src = i.arg.(0) and dst = i.res.(0) in
        if src.loc <> dst.loc then begin
          if src.typ = Float then
-            if src = tos then
-              `	fstp	{emit_reg dst}\n`
+            if is_tos src then
+              `	fst{pop_suffix i}	{emit_reg dst}\n`
+            else if is_tos dst then
+              `	fld	{emit_reg dst}\n`
            else begin
              `	fld	{emit_reg src}\n`;
              `	fstp	{emit_reg dst}\n`
@ -331,6 +424,8 @@ let emit_instr i =
    | Lop(Iload(chunk, addr)) ->
        let dest = i.res.(0) in
        begin match chunk with
+          | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
+            `	mov	{emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
          | Byte_unsigned ->
              `	movzx	{emit_reg dest}, BYTE PTR {emit_addressing addr i.arg 0}\n`
          | Byte_signed ->
@ -343,8 +438,6 @@ let emit_instr i =
            `	fld	REAL4 PTR {emit_addressing addr i.arg 0}\n`
          | Double | Double_u ->
            `	fld	REAL8 PTR {emit_addressing addr i.arg 0}\n`
-          | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) ->
-            `	mov	{emit_reg dest}, DWORD PTR {emit_addressing addr i.arg 0}\n`
        end
    | Lop(Istore(chunk, addr)) ->
        begin match chunk with
@ -355,15 +448,15 @@ let emit_instr i =
          | Sixteen_unsigned | Sixteen_signed ->
            `	mov	WORD PTR {emit_addressing addr i.arg 1}, {emit_reg16 i.arg.(0)}\n`
          | Single ->
-              if i.arg.(0) = tos then
-                `	fstp	REAL4 PTR {emit_addressing addr i.arg 1}\n`
+              if is_tos i.arg.(0) then
+                `	fst{pop_suffix i}	REAL4 PTR {emit_addressing addr i.arg 1}\n`
              else begin
                `	fld	{emit_reg i.arg.(0)}\n`;
                `	fstp	REAL4 PTR {emit_addressing addr i.arg 1}\n`
              end
          | Double | Double_u ->
-              if i.arg.(0) = tos then
-                `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`
+              if is_tops i.arg.(0) then
+                `	fst{pop_suffix i}	REAL8 PTR {emit_addressing addr i.arg 1}\n`
              else begin
                `	fld	{emit_reg i.arg.(0)}\n`;
                `	fstp	REAL8 PTR {emit_addressing addr i.arg 1}\n`
@ -421,12 +514,12 @@ let emit_instr i =
    | Lop(Iintop op) ->
        (* We have i.arg.(0) = i.res.(0) *)
        `	{emit_string(instr_for_intop op)}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
+    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
+        `	lea	{emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
    | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
        `	inc	{emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
        `	dec	{emit_reg i.res.(0)}\n`
-    | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
-        `	lea	{emit_reg i.res.(0)}, [{emit_reg i.arg.(0)}+{emit_int n}]\n`
    | Lop(Iintop_imm(Idiv, n)) ->
        let l = Misc.log2 n in
        let lbl = new_label() in
@ -447,21 +540,22 @@ let emit_instr i =
        (* We have i.arg.(0) = i.res.(0) *)
        `	{emit_string(instr_for_intop op)}	{emit_reg i.res.(0)}, {emit_int n}\n`
    | Lop(Inegf | Iabsf as floatop) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
          `	fld	{emit_reg i.arg.(0)}\n`;
        `	{emit_string(instr_for_floatop floatop)}\n`
    | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
          as floatop) ->
-        if i.arg.(0) = tos && i.arg.(1) = tos then
+        begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
+          (true, true) ->
          (* both operands on top of FP stack *)
-          `	{emit_string(instr_for_floatop_reversed floatop)}\n`
-        else if i.arg.(0) = tos then
+          `	{emit_string(instr_for_floatop_pop floatop)}\n`
+        | (true, false) ->
          (* first operand on stack *)
          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
-        else if i.arg.(1) = tos then
+        | (false, true) ->
          (* second operand on stack *)
          `	{emit_string(instr_for_floatop_reversed floatop)}	{emit_reg i.arg.(0)}\n`
-        else begin
+        | (false, false) ->
          (* both operands in memory *)
          `	fld	{emit_reg i.arg.(0)}\n`;
          `	{emit_string(instr_for_floatop floatop)}	{emit_reg i.arg.(1)}\n`
@ -476,7 +570,7 @@ let emit_instr i =
            `	add	esp, 4\n`
        end
    | Lop(Iintoffloat) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
          `	fld	{emit_reg i.arg.(0)}\n`;
        stack_offset := !stack_offset - 8;
        `	sub	esp, 8\n`;
@ -487,9 +581,9 @@ let emit_instr i =
        `	fldcw	[esp]\n`;
        begin match i.res.(0).loc with
          Stack s ->
-            `	fistp	{emit_reg i.res.(0)}\n`
+            `	fist{pop_suffix i}	{emit_reg i.res.(0)}\n`
        | _ ->
-            `	fistp	DWORD PTR [esp]\n`;
+            `	fist{pop_suffix i}	DWORD PTR [esp]\n`;
            `	mov	{emit_reg i.res.(0)}, [esp]\n`
        end;
        `	fldcw	[esp+4]\n`;
@ -537,10 +631,19 @@ let emit_instr i =
        `	push	DWORD PTR {emit_addressing addr i.arg 0}\n`;
        stack_offset := !stack_offset + 8
    | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
-        if i.arg.(0) <> tos then
+        if not (is_tos i.arg.(0)) then
          `	fld	{emit_reg i.arg.(0)}\n`;
        let size = if double then "REAL8" else "REAL4" in
        `	{emit_string(instr_for_floatarithmem op)}	{emit_string size} PTR {emit_addressing addr i.arg 1}\n`
+    | Lop(Ispecific(Ifloatspecial s)) ->
+        (* Push args on float stack if necessary *)
+        for k = 0 to Array.length i.arg - 1 do
+          if not (is_tos i.arg.(k)) then `	fld	{emit_reg i.arg.(k)}\n`
+        done;
+        (* Fix-up for binary instrs whose args were swapped *)
+        if Array.length i.arg = 2 && is_tos i.arg.(1) then
+          `	fxch	st(1)\n`;
+        emit_floatspecial s
    | Lreloadretaddr ->
        ()
    | Lreturn ->
@ -571,70 +674,8 @@ let emit_instr i =
            `	cmp	{emit_reg i.arg.(0)}, {emit_int n}\n`;
            let b = name_for_cond_branch cmp in
            `	j{emit_string b}	{emit_label lbl}\n`
-        | Ifloattest((Ceq | Cne as cmp), neg) ->
-            if i.arg.(1) <> tos then
-              `	fld	{emit_reg i.arg.(1)}\n`;
-            if i.arg.(0) <> tos then
-              `	fld	{emit_reg i.arg.(0)}\n`;
-            `	fucompp\n`;
-            `	fnstsw	ax\n`;
-            let neg1 = if cmp = Ceq then neg else not neg in
-            if neg1 then begin          (* branch if different *)
-              `	and	ah, 68\n`;
-              `	xor	ah, 64\n`;
-              `	jne	{emit_label lbl}\n`
-            end else begin              (* branch if equal *)
-              `	and	ah, 69\n`;
-              `	cmp	ah, 64\n`;
-              `	je	{emit_label lbl}\n`
-            end
        | Ifloattest(cmp, neg) ->
-            let actual_cmp =
-              if i.arg.(0) = tos && i.arg.(1) = tos then begin
-                (* both args on top of FP stack *)
-                `	fcompp\n`;
-                cmp
-              end else if i.arg.(0) = tos then begin
-                (* first arg on top of FP stack *)
-                `	fcomp	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end else if i.arg.(1) = tos then begin
-                (* second arg on top of FP stack *)
-                `	fcomp	{emit_reg i.arg.(0)}\n`;
-                Cmm.swap_comparison cmp
-              end else begin
-                `	fld	{emit_reg i.arg.(0)}\n`;
-                `	fcomp	{emit_reg i.arg.(1)}\n`;
-                cmp
-              end in
-            `	fnstsw	ax\n`;
-            begin match actual_cmp with
-              Cle ->
-                `	and	ah, 69\n`;
-                `	dec	ah\n`;
-                `	cmp	ah, 64\n`;
-                if neg
-                then `	jae	`
-                else `	jb	`
-            | Cge ->
-                `	and	ah, 5\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Clt ->
-                `	and	ah, 69\n`;
-                `	cmp	ah, 1\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | Cgt ->
-                `	and	ah, 69\n`;
-                if neg
-                then `	jne	`
-                else `	je	`
-            | _ -> fatal_error "Emit_i386: floattest"
-            end;
-            `{emit_label lbl}\n`
+            emit_float_test cmp neg i.arg lbl
        | Ioddtest ->
            `	test	{emit_reg i.arg.(0)}, 1\n`;
            `	jne	{emit_label lbl}\n`
--- a/asmcomp/i386/proc.ml
+++ b/asmcomp/i386/proc.ml
@ -47,7 +47,7 @@ let register_class r =
  | Addr -> 0
  | Float -> 1

-let num_available_registers = [| 7; 0 |]
+let num_available_registers = [| 7; 1 |]

 let first_available_register = [| 0; 100 |]

--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@ -73,14 +73,28 @@ let rec select_addr exp =
  | arg ->
      (Alinear arg, 0)
    
+(* C functions to be turned into Ifloatspecial instructions if -ffast-math *)
+
+let inline_float_ops =
+  ["atan"; "atan2"; "cos"; "log"; "log10"; "sin"; "sqrt"; "tan"]
+
 (* Estimate number of float temporaries needed to evaluate expression
   (Ershov's algorithm) *)

 let rec float_needs = function
-    Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
+    Cop((Cnegf | Cabsf), [arg]) ->
+      float_needs arg
+  | Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
      let n1 = float_needs arg1 in
      let n2 = float_needs arg2 in
      if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
+  | Cop(Cextcall(fn, ty_res, alloc), args)
+    when !fast_math && List.mem fn inline_float_ops ->
+      begin match args with
+        [arg] -> float_needs arg
+      | [arg1; arg2] -> max (float_needs arg2 + 1) (float_needs arg1)
+      | _ -> assert false
+      end
  | _ ->
      1

@ -119,7 +133,7 @@ let pseudoregs_for_operation op arg res =
     the result is always left at the top of the floating-point stack *)
  | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
  | Ifloatofint | Iload((Single | Double | Double_u), _)
-  | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _)) ->
+  | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _, _) | Ifloatspecial _) ->
      (arg, [| tos |], false)           (* don't move it immediately *)
  (* For storing a byte, the argument must be in eax...edx.
     (But for a short, any reg will do!)
@ -215,6 +229,11 @@ method select_operation op args =
      | _ ->
          super#select_operation op args
      end
+  (* Recognize inlined floating point operations *)
+  | Cextcall(fn, ty_res, false)
+    when !fast_math && List.mem fn inline_float_ops ->
+      (Ispecific(Ifloatspecial fn), args)
+  (* Default *)
  | _ -> super#select_operation op args

 (* Recognize float arithmetic with mem *)
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@ -76,6 +76,13 @@ let size_expr env exp =
        fatal_error "Selection.size_expr"
  in size Tbl.empty exp

+(* These are C library functions that are known to be pure
+   (no side effects at all) and worth not pre-computing. *)
+
+let pure_external_functions =
+  ["acos"; "asin"; "atan"; "atan2"; "cos"; "exp"; "log";
+   "log10"; "sin"; "sqrt"; "tan"]
+
 (* Says if an expression is "simple". A "simple" expression has no
   side-effects and its execution can be delayed until its value
   is really needed. In the case of e.g. an [alloc] instruction,
@ -97,9 +104,15 @@ let rec is_simple_expr = function
  | Cop(op, args) ->
      begin match op with
        (* The following may have side effects *)
-        Capply _ | Cextcall(_, _, _) | Calloc | Cstore _ | Craise -> false
+      | Capply _ | Calloc | Cstore _ | Craise -> false
+        (* External C functions normally have side effects, unless known *)
+      | Cextcall(fn, _, alloc) ->
+          not alloc &&
+          List.mem fn pure_external_functions &&
+          List.for_all is_simple_expr args
        (* The remaining operations are simple if their args are *)
-      | _ -> List.for_all is_simple_expr args
+      | _ ->
+          List.for_all is_simple_expr args
      end
  | _ -> false