Reecriture du flottant I386 pour utiliser vraiment la pile de registres flottants

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@519 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
1995-12-10 09:31:57 +00:00 · 1995-12-10 09:31:57 +00:00 · 1f8a4f6da7
parent e5884f1b62
commit 1f8a4f6da7
7 changed files with 269 additions and 243 deletions
--- a/asmcomp/arch_i386.ml
+++ b/asmcomp/arch_i386.ml
@ -25,11 +25,10 @@ type specific_operation =
  | Istore_int of int * addressing_mode (* Store an integer constant *)
  | Istore_symbol of string * addressing_mode (* Store a symbol *)
  | Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
-  | Ifloatarith of float_oper * addressing_mode option *
-                   addressing_mode option * addressing_mode option 
-                                        (* Reg/mem float operations *)
-and float_oper =
-    Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
+  | Ifloatarithmem of float_operation * addressing_mode (* float arith w/mem *)
+
+and float_operation =
+    Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev

 (* Sizes, endianness *)

@ -82,9 +81,6 @@ let print_addressing printreg addr arg =
      print_string " * "; print_int scale;
      if n <> 0 then begin print_string " + "; print_int n end

-let print_sub_addressing printreg addr arg pos =
-  print_addressing printreg addr (Array.sub arg pos (Array.length arg - pos))
-
 let print_specific_operation printreg op arg =
  match op with
    Ilea addr -> print_addressing printreg addr arg
@ -97,29 +93,16 @@ let print_specific_operation printreg op arg =
  | Ioffset_loc(n, addr) ->
      print_string "["; print_addressing printreg addr arg;
      print_string "] +:= "; print_int n
-  | Ifloatarith(op, arg1, arg2, res) ->
-      let pos_arg2 =
-        match arg1 with None -> 1 | Some addr -> num_args_addressing addr in
-      let pos_res =
-        pos_arg2 +
-        (match arg2 with None -> 1 | Some addr -> num_args_addressing addr) in
-      begin match res with
-          None -> ()
-        | Some addr ->
-            print_string "["; print_sub_addressing printreg addr arg pos_res;
-            print_string "] := "
-      end;
-      let print_arg pos = function
-          None ->
-            printreg arg.(pos)
-        | Some addr ->
-            print_string "[";
-            print_sub_addressing printreg addr arg pos in
-      print_arg 0 arg1;
+  | Ifloatarithmem(op, addr) ->
+      printreg arg.(0);
      begin match op with
        Ifloatadd -> print_string " +f "
      | Ifloatsub -> print_string " -f "
+      | Ifloatsubrev -> print_string " -f(rev) "
      | Ifloatmul -> print_string " *f "
      | Ifloatdiv -> print_string " /f "
+      | Ifloatdivrev -> print_string " /f(rev) "
      end;
-      print_arg pos_arg2 arg2
+      print_string "[";
+      print_addressing printreg addr (Array.sub arg 1 (Array.length arg - 1));
+      print_string "]"
--- a/asmcomp/emit_i386.mlp
+++ b/asmcomp/emit_i386.mlp
@ -27,6 +27,7 @@ open Emitaux
 let fastcode_flag = ref true

 let stack_offset = ref 0
+let fp_offset = ref 0

 (* Layout of the stack frame *)

@ -73,28 +74,17 @@ let emit_align =
  
 (* Output a pseudo-register *)

-let emit_reg r =
-  match r.loc with
-    Reg r ->
+let emit_reg = function
+    { loc = Reg r; typ = Float } ->
+      emit_string (register_name(r + !fp_offset))
+  | { loc = Reg r } ->
      emit_string (register_name r)
-  | Stack s ->
+  | { loc = Stack s } as r ->
      let ofs = slot_offset s (register_class r) in
      `{emit_int ofs}(%esp)`
-  | Unknown ->
+  | { loc = Unknown } ->
      fatal_error "Emit_i386.emit_reg"

-(* Same, but after one push in the floating-point register set *)
-
-let emit_shift r =
-  match r.loc with
-    Reg r ->
-      emit_string (register_name(r + 1))
-  | Stack s ->
-      let ofs = slot_offset s (register_class r) in
-      `{emit_int ofs}(%esp)`
-  | Unknown ->
-      fatal_error "Emit_i386.emit_shift"
-
 (* Output a reference to the lower 8 bits or lower 16 bits of a register *)

 let reg_low_byte_name = [| "%al"; "%bl"; "%cl"; "%dl" |]
@ -153,6 +143,13 @@ let emit_addressing addr r n =
      if d <> 0 then emit_int d;
      `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})`

+(* Emit the operand of a floating-point operation *)
+
+let emit_float_operand r =
+  match r.loc with
+    Stack s -> `l	{emit_reg r}`
+  | _       -> `	{emit_reg r}` 
+
 (* Record live pointers at call points *)

 type frame_descr =
@ -206,6 +203,35 @@ let instr_for_intop = function
  | Iasr -> "sar"
  | _ -> fatal_error "Emit_i386: instr_for_intop"

+let instr_for_floatop = function
+    Iaddf -> "fadd"
+  | Isubf -> "fsub"
+  | Imulf -> "fmul"
+  | Idivf -> "fdiv"
+  | _ -> fatal_error "Emit_i386: instr_for_floatop"
+
+let instr_for_floatop_reversed = function
+    Iaddf -> "fadd"
+  | Isubf -> "fsubr"
+  | Imulf -> "fmul"
+  | Idivf -> "fdivr"
+  | _ -> fatal_error "Emit_i386: instr_for_floatop"
+
+let instr_for_floatop_pop = function
+    Iaddf -> "faddp"
+  | Isubf -> "fsubp"
+  | Imulf -> "fmulp"
+  | Idivf -> "fdivp"
+  | _ -> fatal_error "Emit_i386: instr_for_floatop"
+
+let instr_for_floatarithmem = function
+    Ifloatadd -> "faddl"
+  | Ifloatsub -> "fsubl"
+  | Ifloatsubrev -> "fsubrl"
+  | Ifloatmul -> "fmull"
+  | Ifloatdiv -> "fdivl"
+  | Ifloatdivrev -> "fdivrl"
+
 let name_for_cond_branch = function
    Isigned Ceq -> "e"     | Isigned Cne -> "ne"
  | Isigned Cle -> "le"     | Isigned Cgt -> "g"
@ -249,11 +275,17 @@ let emit_instr i =
            Int | Addr ->
              `	movl	{emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
          | Float ->
-              if i.arg.(0).loc = Reg 100 then
-                `	fstl	{emit_reg i.res.(0)}\n`
-              else begin
-                `	fldl	{emit_reg i.arg.(0)}\n`;
-                `	fstpl	{emit_shift i.res.(0)}\n`
+              begin match i.arg.(0).loc with
+                Reg 100 ->
+                  `	fstpl	{emit_reg i.res.(0)}\n`;
+                  decr fp_offset
+              | Reg 101 when !fp_offset = 0 ->
+                  `	fstl	{emit_reg i.res.(0)}\n`
+              | _ ->
+                  `	fldl	{emit_reg i.arg.(0)}\n`;
+                  incr fp_offset;
+                  `	fstpl	{emit_reg i.res.(0)}\n`;
+                  decr fp_offset
              end
        end
    | Lop(Iconst_int 0) ->
@ -271,7 +303,7 @@ let emit_instr i =
          float_constants := (lbl, f) :: !float_constants;
          `	fldl	{emit_label lbl}\n`
        end;
-        `	fstpl	{emit_shift i.res.(0)}\n`
+        incr fp_offset
    | Lop(Iconst_symbol s) ->
        `	movl	${emit_symbol s}, {emit_reg i.res.(0)}\n`
    | Lop(Icall_ind) ->
@ -295,13 +327,15 @@ let emit_instr i =
          `	movl	${emit_symbol s}, %eax\n`;
          `	call	{emit_symbol "caml_c_call"}\n`;
          record_frame i.live
-        end else if contains_floats i.arg or contains_floats i.res then begin
-          `	movl	${emit_symbol s}, %eax\n`;
-          `	call	{emit_symbol "caml_c_call_noalloc"}\n`
-        end else
-          `	call	{emit_symbol s}\n`;
-        if Array.length i.res > 0 & i.res.(0).typ = Float then
-          `	fstpl	{emit_shift i.res.(0)}\n`
+        end else begin
+          if contains_floats i.arg or contains_floats i.res then begin
+            `	ffree	%st(0)\n`;
+            `	ffree	%st(1)\n`;
+            `	ffree	%st(2)\n`;
+            `	ffree	%st(3)\n`
+          end;
+          `	call	{emit_symbol s}\n`
+        end
    | Lop(Istackoffset n) ->
        if n >= 0
        then `	subl	${emit_int n}, %esp\n`
@ -331,18 +365,22 @@ let emit_instr i =
            end
        | Float ->
            `	fldl	{emit_addressing addr i.arg 0}\n`;
-            `	fstpl	{emit_shift i.res.(0)}\n`
+            incr fp_offset
        end
    | Lop(Istore(Word, addr)) ->
        begin match i.arg.(0).typ with
          Int | Addr ->
            `	movl	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
        | Float ->
-            if i.arg.(0).loc = Reg 100 then
-              `	fstl	{emit_addressing addr i.arg 1}\n`
-            else begin
-              `	fldl	{emit_reg i.arg.(0)}\n`;
-              `	fstpl	{emit_addressing addr i.arg 1}\n`
+            begin match i.arg.(0).loc with
+              Reg 100 ->
+                `	fstpl	{emit_addressing addr i.arg 1}\n`;
+                decr fp_offset
+            | Reg 101 when !fp_offset = 0 ->
+                `	fstl	{emit_addressing addr i.arg 1}\n`
+            | _ ->
+                `	fldl	{emit_reg i.arg.(0)}\n`;
+                `	fstpl	{emit_addressing addr i.arg 1}\n`
            end
        end
    | Lop(Istore(chunk, addr)) ->
@ -410,20 +448,31 @@ let emit_instr i =
        (* We have i.arg.(0) = i.res.(0) *)
        `	{emit_string(instr_for_intop op)}	${emit_int n}, {emit_reg i.res.(0)}\n`
    | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) ->
-        fatal_error "Emit_i386: floatop"
+        begin match (i.arg.(0).loc, i.arg.(1).loc) with
+          (Reg 100, Reg 100) ->         (* both operands on top of FP stack *)
+            `	{emit_string(instr_for_floatop_pop floatop)}	%st(0), %st(1)\n`;
+            decr fp_offset
+        | (Reg 100, _) ->               (* first operand on stack *)
+            `	{emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n`
+        | (_, Reg 100) ->               (* second operand on stack *)
+            `	{emit_string(instr_for_floatop_reversed floatop)}{emit_float_operand i.arg.(0)}\n`
+        | (_, _) ->                     (* both in regs or on stack *)
+            `	fldl	{emit_reg i.arg.(0)}\n`;
+            incr fp_offset;
+            `	{emit_string(instr_for_floatop floatop)}{emit_float_operand i.arg.(1)}\n`
+        end
    | Lop(Ifloatofint) ->
        begin match i.arg.(0).loc with
          Stack s ->
-            `	fildl	{emit_reg i.arg.(0)}\n`;
-            `	fstpl	{emit_shift i.res.(0)}\n`
+            `	fildl	{emit_reg i.arg.(0)}\n`
        | _ ->
            `	pushl	{emit_reg i.arg.(0)}\n`;
            stack_offset := !stack_offset + 4;
            `	fildl	(%esp)\n`;
-            `	fstpl	{emit_shift i.res.(0)}\n`;
            `	addl	$4, %esp\n`;
            stack_offset := !stack_offset - 4
-        end
+        end;
+        incr fp_offset
    | Lop(Iintoffloat) ->
        stack_offset := !stack_offset - 8;
        `	subl	$8, %esp\n`;
@ -435,7 +484,7 @@ let emit_instr i =
        `	fldl	{emit_reg i.arg.(0)}\n`;
        begin match i.res.(0).loc with
          Stack s ->
-            `	fistpl	{emit_shift i.res.(0)}\n`
+            `	fistpl	{emit_reg i.res.(0)}\n`
        | _ ->
            `	fistpl	(%esp)\n`;
            `	movl	(%esp), {emit_reg i.res.(0)}\n`
@ -451,42 +500,12 @@ let emit_instr i =
        `	movl	${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
    | Lop(Ispecific(Ioffset_loc(n, addr))) ->
        `	addl	${emit_int n}, {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Ifloatarith(op, addr_arg1, addr_arg2, addr_res))) ->
-        let pos_arg2 =
-          match addr_arg1 with
-            None -> 1 | Some addr -> num_args_addressing addr in
-        let pos_res =
-          pos_arg2 +
-          (match addr_arg2 with
-            None -> 1 | Some addr -> num_args_addressing addr) in
-        let instr =
-          match op with
-              Ifloatadd -> "fadd"
-            | Ifloatsub -> "fsub"
-            | Ifloatmul -> "fmul"
-            | Ifloatdiv -> "fdiv" in
-        begin match addr_arg1 with
-          Some addr ->
-            `	fldl	{emit_addressing addr i.arg 0}\n`
-        | None ->
-            `	fldl	{emit_reg i.arg.(0)}\n`
+    | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
+        if i.arg.(0).loc <> Reg 100 then begin
+          `	fldl	{emit_reg i.arg.(0)}\n`;
+          incr fp_offset
        end;
-        begin match addr_arg2 with
-          Some addr ->
-            `	{emit_string instr}l	{emit_addressing addr i.arg pos_arg2}\n`
-        | None ->
-            match i.arg.(1).loc with
-              Stack s ->
-              `	{emit_string instr}l	{emit_shift i.arg.(pos_arg2)}\n`
-            | _ ->
-              `	{emit_string instr}	{emit_shift i.arg.(pos_arg2)}\n`
-        end;
-        begin match addr_res with
-          Some addr ->
-            `	fstpl	{emit_addressing addr i.arg pos_res}\n`
-        | None ->
-            `	fstpl	{emit_shift i.res.(0)}\n`
-        end
+        `	{emit_string(instr_for_floatarithmem op)}	{emit_addressing addr i.arg 1}\n`
    | Lreloadretaddr ->
        ()
    | Lreturn ->
@ -518,19 +537,32 @@ let emit_instr i =
            let b = name_for_cond_branch cmp in
            `	j{emit_string b}	{emit_label lbl}\n`
        | Ifloattest(cmp, neg) ->
-            `	fldl	{emit_reg i.arg.(0)}\n`;
-            let comp_instr =
+            let instr =
              match cmp with
                Ceq | Cne -> "fucom"
-              | _ -> "fcom" in
-            begin match i.arg.(1).loc with
-              Stack s ->
-                `	{emit_string comp_instr}pl	{emit_shift i.arg.(1)}\n`
-            | _ ->
-                `	{emit_string comp_instr}p	{emit_shift i.arg.(1)}\n`
-            end;
+              | _         -> "fcom" in
+            let actual_cmp =
+              match (i.arg.(0).loc, i.arg.(1).loc) with
+                (Reg 100, Reg 100) ->     (* both args on top of FP stack *)
+                  `	{emit_string instr}pp\n`;
+                  fp_offset := !fp_offset - 2;
+                  cmp
+              | (Reg 100, _) ->           (* first arg on top of FP stack *)
+                  `	{emit_string instr}p{emit_float_operand i.arg.(1)}\n`;
+                  decr fp_offset;
+                  cmp
+              | (_, Reg 100) ->           (* second arg on top of FP stack *)
+                  `	{emit_string instr}p{emit_float_operand i.arg.(0)}\n`;
+                  decr fp_offset;
+                  Cmm.swap_comparison cmp
+              | (_, _) ->
+                  `	fldl	{emit_reg i.arg.(0)}\n`;
+                  incr fp_offset;
+                  `	{emit_string instr}p{emit_float_operand i.arg.(1)}\n`;
+                  decr fp_offset;
+                  cmp in
            `	fnstsw	%ax\n`;
-            begin match cmp with
+            begin match actual_cmp with
              Ceq ->
                if neg then begin
                  `	andb	$68, %ah\n`;
--- a/asmcomp/proc.mli
+++ b/asmcomp/proc.mli
@ -27,7 +27,7 @@ val select_store:
      Arch.addressing_mode -> Cmm.expression -> Mach.operation * Cmm.expression
 val pseudoregs_for_operation:
      Mach.operation -> Reg.t array -> Reg.t array ->
-        Reg.t array * Reg.t array
+        Reg.t array * Reg.t array * bool
 val is_immediate: int -> bool
 val word_addressed: bool

--- a/asmcomp/proc_i386.ml
+++ b/asmcomp/proc_i386.ml
@ -31,15 +31,19 @@ open Mach
    edi         5
    ebp         6

-    f0 - f4     100-104         function arguments and results
+    f0 - f3     101-104         function arguments and results
                                f0: C function results
-                                not preserved by C *)
+                                not preserved by C
+
+   The other 4 floating-point registers are treated as a stack.
+   We use the pseudo-register %tos (100) to represent the top of that stack. *)

 let int_reg_name =
  [| "%eax"; "%ebx"; "%ecx"; "%edx"; "%esi"; "%edi"; "%ebp" |]

 let float_reg_name =
-  [| "%st"; "%st(1)"; "%st(2)"; "%st(3)"; "%st(4)"; "%st(5)" |]
+  [| "%tos"; "%st(0)"; "%st(1)"; "%st(2)"; "%st(3)";
+     "%st(4)"; "%st(5)"; "%st(6)"; "%st(7)" |]

 let num_register_classes = 2

@ -49,9 +53,9 @@ let register_class r =
  | Addr -> 0
  | Float -> 1

-let num_available_registers = [| 7; 5 |]
+let num_available_registers = [| 7; 4 |]

-let first_available_register = [| 0; 100 |]
+let first_available_register = [| 0; 101 |]

 let register_name r =
  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
@ -82,6 +86,11 @@ let phys_reg n =
 let stack_slot slot ty =
  Reg.at_location ty (Stack slot)

+let eax = phys_reg 0
+let ecx = phys_reg 2
+let edx = phys_reg 3
+let tos = phys_reg 100
+
 (* Exceptions raised to signal cases not handled here *)

 exception Use_default
@ -153,28 +162,29 @@ let select_addressing exp =
  | (Ascaledadd(e1, e2, scale), d) ->
      (Iindexed2scaled(scale, d), Ctuple[e1; e2])

-exception Use_default
+(* Make float operations associate to the right as much as possible. *)

-let select_floatop op = function
-    [Cop(Cload _, [loc1]); Cop(Cload _, [loc2])] ->
-      let (addr1, arg1) = select_addressing loc1 in
-      let (addr2, arg2) = select_addressing loc2 in
-      (Ispecific(Ifloatarith(op, Some addr1, Some addr2, None)), [arg1; arg2])
+let rec reassociate oldop newop = function
+    [Cop(op, [arg1; arg2]); arg3] when op = oldop ->
+      reassociate oldop newop [arg1; Cop(newop, [arg2; arg3])]
+  | args -> args
+
+(* Recognize float arithmetic with mem *)
+
+let select_floatarith regular_op mem_op mem_rev_op args =
+  match args with
+    [arg1; Cop(Cload _, [loc2])] ->
+      let (addr, arg2) = select_addressing loc2 in
+      (Ispecific(Ifloatarithmem(mem_op, addr)), [arg1; arg2])
  | [Cop(Cload _, [loc1]); arg2] ->
-      let (addr1, arg1) = select_addressing loc1 in
-      (Ispecific(Ifloatarith(op, Some addr1, None, None)), [arg1; arg2])
-  | [arg1; Cop(Cload _, [loc2])] ->
-      let (addr2, arg2) = select_addressing loc2 in
-      (Ispecific(Ifloatarith(op, None, Some addr2, None)), [arg1; arg2])
-  | arg12 ->
-      (Ispecific(Ifloatarith(op, None, None, None)), arg12)
+      let (addr, arg1) = select_addressing loc1 in
+      (Ispecific(Ifloatarithmem(mem_rev_op, addr)), [arg2; arg1])
+  | _ ->
+      (regular_op, args)

-let select_floatop_store op loc args =
-  let (addr, addr_arg) = select_addressing loc in
-  match select_floatop op args with
-    (Ispecific(Ifloatarith(op, opt1, opt2, None)), newargs) ->
-      (Ispecific(Ifloatarith(op, opt1, opt2, Some addr)), newargs @ [addr_arg])
-  | _ -> fatal_error "Proc_i386.select_floatop_store"
+(* Main instruction selection functions *)
+
+exception Use_default

 let select_oper op args =
  match op with
@ -189,11 +199,15 @@ let select_oper op args =
     which do not correspond to an addressing mode. *)
  | Cdivi -> (Iintop Idiv, args)
  | Cmodi -> (Iintop Imod, args)
-  (* Recognize the floating-point operations *)
-  | Caddf -> select_floatop Ifloatadd args
-  | Csubf -> select_floatop Ifloatsub args
-  | Cmulf -> select_floatop Ifloatmul args
-  | Cdivf -> select_floatop Ifloatdiv args
+  (* Recognize float arithmetic with memory.
+     In passing, change associativity of some float operations *)
+  | Caddf -> select_floatarith Iaddf Ifloatadd Ifloatadd
+                               (reassociate Caddf Caddf args)
+  | Csubf -> select_floatarith Isubf Ifloatsub Ifloatsubrev
+                               (reassociate Csubf Caddf args)
+  | Cmulf -> select_floatarith Imulf Ifloatmul Ifloatmul
+                               (reassociate Cmulf Cmulf args)
+  | Cdivf -> select_floatarith Idivf Ifloatdiv Ifloatdivrev args
  (* Recognize store instructions *)
  | Cstore ->
      begin match args with
@ -210,63 +224,53 @@ let select_oper op args =
        when loc = loc' ->
          let (addr, arg) = select_addressing loc in
          (Ispecific(Ioffset_loc(n, addr)), [arg])
-      | [loc; Cop(Caddf, args)] ->
-          select_floatop_store Ifloatadd loc args
-      | [loc; Cop(Csubf, args)] ->
-          select_floatop_store Ifloatsub loc args
-      | [loc; Cop(Cmulf, args)] ->
-          select_floatop_store Ifloatmul loc args
-      | [loc; Cop(Cdivf, args)] ->
-          select_floatop_store Ifloatdiv loc args
      | _ ->
          raise Use_default
      end
  | _ -> raise Use_default

-let select_store_floatop op addr args =
-  match select_floatop op args with
-    (Ispecific(Ifloatarith(op, opt1, opt2, None)), newargs) ->
-      (Ispecific(Ifloatarith(op, opt1, opt2, Some addr)), Ctuple newargs)
-  | _ -> fatal_error "Proc_i386.select_store_floatop"
-
 let select_store addr exp =
  match exp with
    Cconst_int n -> (Ispecific(Istore_int(n, addr)), Ctuple [])
  | Cconst_pointer n -> (Ispecific(Istore_int(n, addr)), Ctuple [])
  | Cconst_symbol s -> (Ispecific(Istore_symbol(s, addr)), Ctuple [])
-  | Cop(Caddf, args) -> select_store_floatop Ifloatadd addr args
-  | Cop(Csubf, args) -> select_store_floatop Ifloatsub addr args
-  | Cop(Cmulf, args) -> select_store_floatop Ifloatmul addr args
-  | Cop(Cdivf, args) -> select_store_floatop Ifloatdiv addr args
  | _ -> raise Use_default

 let pseudoregs_for_operation op arg res =
  match op with
-    (* Two-address binary operations *)
+  (* Two-address binary operations *)
    Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) ->
-      ([|res.(0); arg.(1)|], res)
-    (* Two-address unary operations *)
+      ([|res.(0); arg.(1)|], res, false)
+  (* Two-address unary operations *)
  | Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _) ->
-      (res, res)
-    (* For shifts with variable shift count, second arg must be in ecx *)
+      (res, res, false)
+  (* For shifts with variable shift count, second arg must be in ecx *)
  | Iintop(Ilsl|Ilsr|Iasr) ->
-      ([|res.(0); phys_reg 2|], res)
-    (* For div and mod, first arg must be in eax, edx is clobbered,
-       and result is in eax or edx respectively.
-       Keep it simple, just force second argument in ecx. *)
+      ([|res.(0); ecx|], res, false)
+  (* For div and mod, first arg must be in eax, edx is clobbered,
+     and result is in eax or edx respectively.
+     Keep it simple, just force second argument in ecx. *)
  | Iintop(Idiv) ->
-      ([|phys_reg 0; phys_reg 2|], [|phys_reg 0|])
+      ([| eax; ecx |], [| eax |], true)
  | Iintop(Imod) ->
-      ([|phys_reg 0; phys_reg 2|], [|phys_reg 3|])
-    (* For storing a byte, the argument must be in eax...edx.
-       For storing a halfword, any reg is ok.
-       Keep it simple, just force it to be in edx in both cases. *)
+      ([| eax; ecx |], [| edx |], true)
+  (* For floating-point operations, the result is always left at the
+     top of the floating-point stack *)
+  | Iconst_float _ | Iaddf | Isubf | Imulf | Idivf | Ifloatofint |
+    Ispecific(Ifloatarithmem(_, _)) ->
+      (arg, [| tos |], false)           (* don't move it immediately *)
+  (* Same for a floating-point load *)
+  | Iload(Word, addr) when res.(0).typ = Float ->
+      (arg, [| tos |], false)
+  (* For storing a byte, the argument must be in eax...edx.
+     For storing a halfword, any reg is ok.
+     Keep it simple, just force it to be in edx in both cases. *)
  | Istore(Word, addr) -> raise Use_default
  | Istore(chunk, addr) ->
      let newarg = Array.copy arg in
-      newarg.(0) <- phys_reg 3;
-      (newarg, res)
-    (* Other instructions are more or less regular *)
+      newarg.(0) <- edx;
+      (newarg, res, false)
+  (* Other instructions are more or less regular *)
  | _ -> raise Use_default

 let is_immediate (n: int) = true
@ -307,17 +311,17 @@ let outgoing ofs = Outgoing ofs
 let not_supported ofs = fatal_error "Proc.loc_results: cannot call"

 let loc_arguments arg =
-  calling_conventions 0 5 100 103 outgoing arg
+  calling_conventions 0 5 101 104 outgoing arg
 let loc_parameters arg =
-  let (loc, ofs) = calling_conventions 0 5 100 103 incoming arg in loc
+  let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
 let loc_results res =
-  let (loc, ofs) = calling_conventions 0 5 100 103 not_supported res in loc
+  let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
 let loc_external_arguments arg =
-  calling_conventions 0 (-1) 100 99 outgoing arg
+  calling_conventions 0 (-1) 101 100 outgoing arg
 let loc_external_results res =
-  let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
+  let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc

-let loc_exn_bucket = phys_reg 0         (* eax *)
+let loc_exn_bucket = eax

 (* Registers destroyed by operations *)

@ -327,25 +331,25 @@ let destroyed_at_c_call =               (* ebx, esi, edi, ebp preserved *)
 let destroyed_at_oper = function
    Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
  | Iop(Iextcall(_, false)) -> destroyed_at_c_call
-  | Iop(Iintop(Idiv | Imod)) -> [| phys_reg 0; phys_reg 3 |] (* eax, edx *)
-  | Iop(Ialloc _) -> [| phys_reg 0|] (* eax *)
-  | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| phys_reg 0 |] (* eax *)
-  | Iop(Iintoffloat) -> [| phys_reg 0 |] (* eax *)
-  | Iifthenelse(Ifloattest(_, _), _, _) -> [| phys_reg 0 |] (* eax *)
+  | Iop(Iintop(Idiv | Imod)) -> [| eax; edx |]
+  | Iop(Ialloc _) -> [| eax |]
+  | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |]
+  | Iop(Iintoffloat) -> [| eax |]
+  | Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |]
  | _ -> [||]

 let destroyed_at_raise = all_phys_regs

 (* Maximal register pressure *)

-let safe_register_pressure op = 5
+let safe_register_pressure op = 4

 let max_register_pressure = function
    Iextcall(_, _) -> [| 4; 0 |]
-  | Iintop(Idiv | Imod) -> [| 5; 5 |]
+  | Iintop(Idiv | Imod) -> [| 5; 4 |]
  | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
-    Iintoffloat -> [| 6; 5 |]
-  | _ -> [|7; 5|]
+    Iintoffloat -> [| 6; 4 |]
+  | _ -> [|7; 4|]

 (* Reloading of instruction arguments, storing of instruction results *)

@ -369,16 +373,15 @@ let reload_operation makereg op arg res =
      if stackp arg.(0) & stackp arg.(1)
      then ([|arg.(0); makereg arg.(1)|], res)
      else (arg, res)
-  | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat ->
+  | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
+    Iaddf | Isubf | Imulf | Idivf ->
      (* The argument(s) can be either in register or on stack *)
      (arg, res)
-  | Ispecific(Ifloatarith(op, addr_arg1, addr_arg2, addr_res)) ->
-      (* This one is a pain. The float arguments and results can reside in
-         the stack, but the integer arguments must be in registers. *)
-      let newarg = Array.new (Array.length arg) Reg.dummy in
-      for i = 0 to Array.length arg - 1 do
-        newarg.(i) <- if arg.(i).typ = Float then arg.(i) else makereg arg.(i)
-      done;
+  | Ispecific(Ifloatarithmem(_, _)) ->
+      (* First arg can be either in register or on stack, but remaining
+         arguments must be in registers *)
+      let newarg = Array.new (Array.length arg) arg.(0) in
+      for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
      (newarg, res)
  | _ -> (* Other operations: all args and results in registers *)
      raise Use_default
--- a/asmcomp/reg.ml
+++ b/asmcomp/reg.ml
@ -64,7 +64,7 @@ let clone r =
  nr

 let at_location ty loc =
-  let r = { name = ""; stamp = !currstamp; typ = ty; loc = loc; spill = false;
+  let r = { name = "R"; stamp = !currstamp; typ = ty; loc = loc; spill = false;
            interf = []; prefer = []; degree = 0; spill_cost = 0;
            visited = false } in
  incr currstamp;
--- a/asmcomp/reload.ml
+++ b/asmcomp/reload.ml
@ -60,15 +60,6 @@ let rec reload i =
      (* Don't do anything, the arguments and results are already at
         the correct position (e.g. on stack for some arguments). *)
      instr_cons_live i.desc i.arg i.res i.live (reload i.next)
-  | Iop(Imove | Ireload | Ispill) ->
-      (* Do something if this is a stack-to-stack move *)
-      begin match i.arg.(0), i.res.(0) with
-        {loc = Stack s1}, {loc = Stack s2} when s1 <> s2 ->
-          let r = makereg i.arg.(0) in
-          insert_move i.arg.(0) r (insert_move r i.res.(0) (reload i.next))
-      | _ ->
-          instr_cons i.desc i.arg i.res (reload i.next)
-      end
  | Iop op ->
      (* Let the machine description tell us whether some arguments / results
         can reside on the stack *)
@ -77,8 +68,19 @@ let rec reload i =
          Proc.reload_operation makereg op i.arg i.res
        with Proc.Use_default ->
          (* By default, assume that arguments and results must reside
-             in hardware registers *)
-          (makeregs i.arg, makeregs i.res) in
+             in hardware registers. For moves, allow one arg or one
+             res to be stack-allocated, but do something for
+             stack-to-stack moves *)
+          match op with
+            Imove | Ireload | Ispill ->
+              begin match i.arg.(0), i.res.(0) with
+                {loc = Stack s1}, {loc = Stack s2} when s1 <> s2 ->
+                  ([| makereg i.arg.(0) |], i.res)
+              | _ ->
+                  (i.arg, i.res)
+              end
+          | _ ->
+              (makeregs i.arg, makeregs i.res) in
      insert_moves i.arg newarg
        (instr_cons_live i.desc newarg newres i.live
          (insert_moves newres i.res
--- a/asmcomp/selection.ml
+++ b/asmcomp/selection.ml
@ -294,6 +294,26 @@ let join_array rs =
    res
  end

+(* Add an Iop opcode.
+   Offer the processor description an opportunity to insert moves
+   before and after the operation, i.e. for two-address 
+   instructions, or instructions using dedicated registers. *)
+
+let insert_op op rs rd seq =
+  try
+    let (rsrc, rdst, move_res) = Proc.pseudoregs_for_operation op rs rd in
+    insert_moves rs rsrc seq;
+    insert (Iop op) rsrc rdst seq;
+    if move_res then begin
+      insert_moves rdst rd seq;
+      rd
+    end else
+      rdst
+  with Proc.Use_default ->
+    (* Assume no constraints on arg and res registers *)
+    insert (Iop op) rs rd seq;
+    rd
+
 (* Add the instructions for the given expression
   at the end of the given sequence *)

@ -301,20 +321,16 @@ let rec emit_expr env exp seq =
  match exp with
    Cconst_int n ->
      let r = Reg.newv typ_int in
-      insert (Iop(Iconst_int n)) [||] r seq;
-      r
+      insert_op (Iconst_int n) [||] r seq
  | Cconst_float n ->
      let r = Reg.newv typ_float in
-      insert (Iop(Iconst_float n)) [||] r seq;
-      r
+      insert_op (Iconst_float n) [||] r seq
  | Cconst_symbol n ->
      let r = Reg.newv typ_addr in
-      insert (Iop(Iconst_symbol n)) [||] r seq;
-      r
+      insert_op (Iconst_symbol n) [||] r seq
  | Cconst_pointer n ->
      let r = Reg.newv typ_addr in
-      insert (Iop(Iconst_int n)) [||] r seq;
-      r
+      insert_op (Iconst_int n) [||] r seq
  | Cvar v ->
      begin try
        Tbl.find v env
@ -394,12 +410,7 @@ let rec emit_expr env exp seq =
      | Iload(Word, addr) ->
          let r1 = emit_tuple env new_args seq in
          let rd = Reg.newv ty in
-          let a = ref addr in
-          for i = 0 to Array.length ty - 1 do
-            insert(Iop(Iload(Word, !a))) r1 [|rd.(i)|] seq;
-            a := Arch.offset_addressing !a (size_component ty.(i))
-          done;
-          rd
+          insert_op (Iload(Word, addr)) r1 rd seq
      | Istore(Word, addr) ->
          begin match new_args with
            [] -> fatal_error "Selection.Istore"
@ -419,19 +430,7 @@ let rec emit_expr env exp seq =
      | op ->
          let r1 = emit_tuple env new_args seq in
          let rd = Reg.newv ty in
-          begin try
-            (* Offer the processor description an opportunity to insert moves
-               before and after the operation, i.e. for two-address 
-               instructions, or instructions using dedicated registers. *)
-            let (rsrc, rdst) = Proc.pseudoregs_for_operation op r1 rd in
-            insert_moves r1 rsrc seq;
-            insert (Iop op) rsrc rdst seq;
-            insert_moves rdst rd seq
-          with Proc.Use_default ->
-            (* Assume no constraints on arg and res registers *)
-            insert (Iop op) r1 rd seq
-          end;
-          rd
+          insert_op op r1 rd seq
      end        
  | Csequence(e1, e2) ->
      emit_expr env e1 seq;
@ -530,7 +529,14 @@ and emit_parts_list env exp_list seq =
      (new_exp :: new_rem, fin_env)

 and emit_tuple env exp_list seq =
-  Array.concat(List.map (fun e -> emit_expr env e seq) exp_list)
+  let rec emit_list = function
+    [] -> []
+  | exp :: rem ->
+      (* Again, force right-to-left evaluation *)
+      let loc_rem = emit_list rem in
+      let loc_exp = emit_expr env exp seq in
+      loc_exp :: loc_rem in
+  Array.concat(emit_list exp_list)

 and emit_stores env data seq regs_addr addr =
  let a = ref addr in