ocaml/asmrun/ia64.S

531 lines
18 KiB
ArmAsm

/***********************************************************************/
/* */
/* Objective Caml */
/* */
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
/* */
/* Copyright 1996 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the Q Public License version 1.0. */
/* */
/***********************************************************************/
/* $Id$ */
/* Asm part of the runtime system, Alpha processor */
#undef BROKEN_POSTINCREMENT
#define ADDRGLOBAL(reg,symb) \
add reg = @ltoff(symb), gp;; ld8 reg = [reg]
#define LOADGLOBAL(reg,symb) \
add r3 = @ltoff(symb), gp;; ld8 r3 = [r3];; ld8 reg = [r3]
#define STOREGLOBAL(reg,symb) \
add r3 = @ltoff(symb), gp;; ld8 r3 = [r3];; st8 [r3] = reg
#define ST8OFF(a,b,d) st8 [a] = b, d
#define LD8OFF(a,b,d) ld8 a = [b], d
#define STFDOFF(a,b,d) stfd [a] = b, d
#define LDFDOFF(a,b,d) ldfd a = [b], d
#define STFSPILLOFF(a,b,d) stf.spill [a] = b, d
#define LDFFILLOFF(a,b,d) ldf.fill a = [b], d
#define SAVE2(a,b) ST8OFF(r2, a, 16); ST8OFF(r3, b, 16)
#define SAVE4(a,b,c,d) SAVE2(a,b);; SAVE2(c,d)
#define SAVE8(a,b,c,d,e,f,g,h) SAVE4(a,b,c,d);; SAVE4(e,f,g,h)
#define LOAD2(a,b) LD8OFF(a, r2, 16); LD8OFF(b, r3, 16)
#define LOAD4(a,b,c,d) LOAD2(a,b);; LOAD2(c,d)
#define LOAD8(a,b,c,d,e,f,g,h) LOAD4(a,b,c,d);; LOAD4(e,f,g,h)
#define FSAVE2(a,b) STFDOFF(r2, a, 16); STFDOFF(r3, b, 16)
#define FSAVE4(a,b,c,d) FSAVE2(a,b);; FSAVE2(c,d)
#define FSAVE8(a,b,c,d,e,f,g,h) FSAVE4(a,b,c,d);; FSAVE4(e,f,g,h)
#define FLOAD2(a,b) LDFDOFF(a, r2, 16); LDFDOFF(b, r3, 16)
#define FLOAD4(a,b,c,d) FLOAD2(a,b);; FLOAD2(c,d)
#define FLOAD8(a,b,c,d,e,f,g,h) FLOAD4(a,b,c,d);; FLOAD4(e,f,g,h)
#define FSPILL2(a,b) STFSPILLOFF(r2, a, 32); STFSPILLOFF(r3, b, 32)
#define FSPILL4(a,b,c,d) FSPILL2(a,b);; FSPILL2(c,d)
#define FSPILL8(a,b,c,d,e,f,g,h) FSPILL4(a,b,c,d);; FSPILL4(e,f,g,h)
#define FFILL2(a,b) LDFFILLOFF(a, r2, 32); LDFFILLOFF(b, r3, 32)
#define FFILL4(a,b,c,d) FFILL2(a,b);; FFILL2(c,d)
#define FFILL8(a,b,c,d,e,f,g,h) FFILL4(a,b,c,d);; FFILL4(e,f,g,h)
/* Allocation */
.text
.global caml_allocN#
.proc caml_allocN#
.align 16
/* caml_allocN: all code generator registers preserved,
gp preserved, r2 = requested size */
caml_allocN:
sub r4 = r4, r2 ;;
cmp.ltu p0, p6 = r4, r5
(p6) br.ret.sptk b0 ;;
/* Stash return address at sp (in stack scratch area) */
mov r3 = b0 ;;
st8 [sp] = r3
/* Call GC */
br.call.sptk b0 = caml_call_gc# ;;
/* Return to caller */
ld8 r3 = [sp] ;;
mov b0 = r3 ;;
br.ret.sptk b0
.endp caml_allocN#
/* caml_call_gc: all code generator registers preserved,
gp preserved, r2 = requested size */
.global caml_call_gc#
.proc caml_call_gc#
.align 16
caml_call_gc:
/* Allocate stack frame */
add sp = -(16 + 16 + 80*8 + 42*8), sp ;;
/* Save requested size and GP on stack */
add r3 = 16, sp ;;
ST8OFF(r3, r2, 8) ;;
st8 [r3] = gp
/* Record lowest stack address, return address, GC regs */
mov r2 = b0 ;;
STOREGLOBAL(r2, caml_last_return_address#)
add r2 = (16 + 16 + 80*8 + 42*8), sp ;;
STOREGLOBAL(r2, caml_bottom_of_stack#)
add r2 = (16 + 16), sp ;;
STOREGLOBAL(r2, caml_gc_regs#)
/* Save all integer regs used by the code generator in the context */
.L100: add r3 = 8, r2 ;;
SAVE4(r8,r9,r10,r11) ;;
SAVE8(r16,r17,r18,r19,r20,r21,r22,r23) ;;
SAVE8(r24,r25,r26,r27,r28,r29,r30,r31) ;;
SAVE8(r32,r33,r34,r35,r36,r37,r38,r39) ;;
SAVE8(r40,r41,r42,r43,r44,r45,r46,r47) ;;
SAVE8(r48,r49,r50,r51,r52,r53,r54,r55) ;;
SAVE8(r56,r57,r58,r59,r60,r61,r62,r63) ;;
SAVE8(r64,r65,r66,r67,r68,r69,r70,r71) ;;
SAVE8(r72,r73,r74,r75,r76,r77,r78,r79) ;;
SAVE8(r80,r81,r82,r83,r84,r85,r86,r87) ;;
SAVE4(r88,r89,r90,r91) ;;
/* Save all floating-point registers not preserved by C */
FSAVE2(f6,f7) ;;
FSAVE8(f8,f9,f10,f11,f12,f13,f14,f15) ;;
FSAVE8(f32,f33,f34,f35,f36,f37,f38,f39) ;;
FSAVE8(f40,f41,f42,f43,f44,f45,f46,f47) ;;
FSAVE8(f48,f49,f50,f51,f52,f53,f54,f55) ;;
FSAVE8(f56,f57,f58,f59,f60,f61,f62,f63) ;;
/* Save current allocation pointer for debugging purposes */
STOREGLOBAL(r4, caml_young_ptr#)
/* Save trap pointer in case an exception is raised */
STOREGLOBAL(r6, caml_exception_pointer#)
/* Call the garbage collector */
br.call.sptk b0 = caml_garbage_collection# ;;
/* Restore gp */
add r3 = 24, sp ;;
ld8 gp = [r3]
/* Restore all integer regs from GC context */
add r2 = (16 + 16), sp ;;
add r3 = 8, r2 ;;
LOAD4(r8,r9,r10,r11) ;;
LOAD8(r16,r17,r18,r19,r20,r21,r22,r23) ;;
LOAD8(r24,r25,r26,r27,r28,r29,r30,r31) ;;
LOAD8(r32,r33,r34,r35,r36,r37,r38,r39) ;;
LOAD8(r40,r41,r42,r43,r44,r45,r46,r47) ;;
LOAD8(r48,r49,r50,r51,r52,r53,r54,r55) ;;
LOAD8(r56,r57,r58,r59,r60,r61,r62,r63) ;;
LOAD8(r64,r65,r66,r67,r68,r69,r70,r71) ;;
LOAD8(r72,r73,r74,r75,r76,r77,r78,r79) ;;
LOAD8(r80,r81,r82,r83,r84,r85,r86,r87) ;;
LOAD4(r88,r89,r90,r91) ;;
/* Restore all floating-point registers not preserved by C */
FLOAD2(f6,f7) ;;
FLOAD8(f8,f9,f10,f11,f12,f13,f14,f15) ;;
FLOAD8(f32,f33,f34,f35,f36,f37,f38,f39) ;;
FLOAD8(f40,f41,f42,f43,f44,f45,f46,f47) ;;
FLOAD8(f48,f49,f50,f51,f52,f53,f54,f55) ;;
FLOAD8(f56,f57,f58,f59,f60,f61,f62,f63) ;;
/* Reload new allocation pointer and allocation limit */
LOADGLOBAL(r4, caml_young_ptr#)
LOADGLOBAL(r5, caml_young_limit#)
/* Allocate space for the block */
add r3 = 16, sp ;;
ld8 r2 = [r3] ;;
sub r4 = r4, r2 ;;
cmp.ltu p6, p0 = r4, r5 /* enough space? */
(p6) br.cond.spnt .L100 ;; /* no: call GC again */
/* Reload return address and say that we are back into Caml code */
ADDRGLOBAL(r3, caml_last_return_address#) ;;
ld8 r2 = [r3]
st8 [r3] = r0 ;;
/* Return to caller */
mov b0 = r2
add sp = (16 + 16 + 80*8 + 42*8), sp ;;
br.ret.sptk b0
.endp caml_call_gc#
/* Call a C function from Caml */
/* Function to call is in r2 */
.global caml_c_call#
.proc caml_c_call#
.align 16
caml_c_call:
/* The Caml code that called us does not expect any
code-generator registers to be preserved */
/* Recover entry point from the function pointer in r2 */
LD8OFF(r3, r2, 8) ;;
mov b6 = r3
/* Preserve gp in r7 */
mov r7 = gp
/* Record lowest stack address and return address */
mov r14 = b0
STOREGLOBAL(sp, caml_bottom_of_stack#) ;;
STOREGLOBAL(r14, caml_last_return_address#)
/* Make the exception handler and alloc ptr available to the C code */
STOREGLOBAL(r4, caml_young_ptr#)
STOREGLOBAL(r6, caml_exception_pointer#)
/* Recover gp from the function pointer in r2 */
ld8 gp = [r2]
/* Call the function */
br.call.sptk b0 = b6 ;;
/* Restore gp */
mov gp = r7 ;;
/* Reload alloc ptr and alloc limit */
LOADGLOBAL(r4, caml_young_ptr#)
LOADGLOBAL(r5, caml_young_limit#)
/* Reload return address and say that we are back into Caml code */
ADDRGLOBAL(r3, caml_last_return_address#) ;;
ld8 r2 = [r3]
st8 [r3] = r0 ;;
/* Return to caller */
mov b0 = r2 ;;
br.ret.sptk b0
.endp caml_c_call#
/* Start the Caml program */
.global caml_start_program#
.proc caml_start_program#
.align 16
caml_start_program:
ADDRGLOBAL(r2, caml_program#) ;;
mov b6 = r2
/* Code shared with caml_callback* */
.L103:
/* Allocate 64 "out" registers (for the Caml code) and no locals */
alloc r3 = ar.pfs, 0, 0, 64, 0
add sp = -(56 * 8), sp ;;
/* Save all callee-save registers on stack */
add r2 = 16, sp ;;
ST8OFF(r2, r3, 8) /* 0 : ar.pfs */
mov r3 = b0 ;;
ST8OFF(r2, r3, 8) ;; /* 1 : return address */
ST8OFF(r2, gp, 8) /* 2 : gp */
mov r3 = pr ;;
ST8OFF(r2, r3, 8) /* 3 : predicates */
mov r3 = ar.fpsr ;;
ST8OFF(r2, r3, 8) /* 4 : ar.fpsr */
mov r3 = ar.unat ;;
ST8OFF(r2, r3, 8) /* 5 : ar.unat */
mov r3 = ar.lc ;;
ST8OFF(r2, r3, 8) /* 6 : ar.lc */
mov r3 = b1 ;;
ST8OFF(r2, r3, 8) /* 7 - 11 : b1 - b5 */
mov r3 = b2 ;;
ST8OFF(r2, r3, 8)
mov r3 = b3 ;;
ST8OFF(r2, r3, 8)
mov r3 = b4 ;;
ST8OFF(r2, r3, 8)
mov r3 = b5 ;;
ST8OFF(r2, r3, 8) ;;
add r3 = 8, r2 ;;
SAVE4(r4,r5,r6,r7) ;; /* 12 - 15 : r4 - r7 */
add r3 = 16, r2 ;; /* 16 - 55 : f2 - f5, f16 - f31 */
FSPILL4(f2,f3,f4,f5) ;;
FSPILL8(f16,f17,f18,f19,f20,f21,f22,f23) ;;
FSPILL8(f24,f25,f26,f27,f28,f29,f30,f31) ;;
/* Set up a callback link on the stack. In addition to
the normal callback link contents (saved values of
caml_bottom_of_stack, caml_last_return_address and
caml_gc_regs), we also save there caml_saved_bsp
and caml_saved_rnat */
add sp = -48, sp
LOADGLOBAL(r3, caml_bottom_of_stack#)
add r2 = 16, sp ;;
ST8OFF(r2, r3, 8)
LOADGLOBAL(r3, caml_last_return_address#) ;;
ST8OFF(r2, r3, 8)
LOADGLOBAL(r3, caml_gc_regs#) ;;
ST8OFF(r2, r3, 8)
LOADGLOBAL(r3, caml_saved_bsp#) ;;
ST8OFF(r2, r3, 8)
LOADGLOBAL(r3, caml_saved_rnat#) ;;
ST8OFF(r2, r3, 8)
/* Set up a trap frame to catch exceptions escaping the Caml code */
mov r6 = sp
add sp = -16, sp ;;
LOADGLOBAL(r3, caml_exception_pointer#)
add r2 = 16, sp ;;
ST8OFF(r2, r3, 8)
.L110: mov r3 = ip ;;
add r3 = .L101 - .L110, r3 ;;
ST8OFF(r2, r3, 8) ;;
/* Save ar.bsp, flush register window, and save ar.rnat */
mov r2 = ar.bsp ;;
STOREGLOBAL(r2, caml_saved_bsp#) ;;
mov r14 = ar.rsc ;;
and r2 = ~0x3, r14;; /* set rsc.mode = 0 */
mov ar.rsc = r2 ;; /* RSE is in enforced lazy mode */
flushrs ;; /* must be first instr in group */
mov r2 = ar.rnat ;;
STOREGLOBAL(r2, caml_saved_rnat#)
mov ar.rsc = r14 /* restore original RSE mode */
/* Reload allocation pointers */
LOADGLOBAL(r4, caml_young_ptr#)
LOADGLOBAL(r5, caml_young_limit#)
/* We are back into Caml code */
STOREGLOBAL(r0, caml_last_return_address#)
/* Call the Caml code */
br.call.sptk b0 = b6 ;;
.L102:
/* Pop the trap frame, restoring caml_exception_pointer */
add sp = 16, sp ;;
ld8 r2 = [sp] ;;
STOREGLOBAL(r2, caml_exception_pointer#)
.L104:
/* Pop the callback link, restoring the global variables */
add r14 = 16, sp ;;
LD8OFF(r2, r14, 8) ;;
STOREGLOBAL(r2, caml_bottom_of_stack#)
LD8OFF(r2, r14, 8) ;;
STOREGLOBAL(r2, caml_last_return_address#)
LD8OFF(r2, r14, 8) ;;
STOREGLOBAL(r2, caml_gc_regs#)
LD8OFF(r2, r14, 8) ;;
STOREGLOBAL(r2, caml_saved_bsp#)
LD8OFF(r2, r14, 8) ;;
STOREGLOBAL(r2, caml_saved_rnat#)
add sp = 48, sp
/* Update allocation pointer */
STOREGLOBAL(r4, caml_young_ptr#)
/* Restore all callee-save registers from stack */
add r2 = 16, sp ;;
LD8OFF(r3, r2, 8) ;; /* 0 : ar.pfs */
mov ar.pfs = r3
LD8OFF(r3, r2, 8) ;; /* 1 : return address */
mov b0 = r3
LD8OFF(gp, r2, 8) ;; /* 2 : gp */
LD8OFF(r3, r2, 8) ;; /* 3 : predicates */
mov pr = r3, -1
LD8OFF(r3, r2, 8) ;; /* 4 : ar.fpsr */
mov ar.fpsr = r3
LD8OFF(r3, r2, 8) ;; /* 5 : ar.unat */
mov ar.unat = r3
LD8OFF(r3, r2, 8) ;; /* 6 : ar.lc */
mov ar.lc = r3
LD8OFF(r3, r2, 8) ;; /* 7 - 11 : b1 - b5 */
mov b1 = r3
LD8OFF(r3, r2, 8) ;;
mov b2 = r3
LD8OFF(r3, r2, 8) ;;
mov b3 = r3
LD8OFF(r3, r2, 8) ;;
mov b4 = r3
LD8OFF(r3, r2, 8) ;;
mov b5 = r3
add r3 = 8, r2 ;;
LOAD4(r4,r5,r6,r7) ;; /* 12 - 15 : r4 - r7 */
add r3 = 16, r2 ;; /* 16 - 55 : f2 - f5, f16 - f31 */
FFILL4(f2,f3,f4,f5) ;;
FFILL8(f16,f17,f18,f19,f20,f21,f22,f23) ;;
FFILL8(f24,f25,f26,f27,f28,f29,f30,f31) ;;
/* Pop stack frame and return */
add sp = (56 * 8), sp
br.ret.sptk.many b0 ;;
/* The trap handler */
.L101:
/* Save exception pointer */
STOREGLOBAL(r6, caml_exception_pointer#)
/* Encode exception bucket as exception result */
or r8 = 2, r8
/* Return it */
br.sptk .L104 ;;
.endp caml_start_program#
/* Raise an exception from C */
.global caml_raise_exception#
.proc caml_raise_exception#
.align 16
caml_raise_exception:
/* Allocate 64 "out" registers (for the Caml code) and no locals */
/* Since we don't return, don't bother saving the PFS */
alloc r2 = ar.pfs, 0, 0, 64, 0
/* Move exn bucket where Caml expects it */
mov r8 = r32 ;;
/* Perform "context switch" as per the Software Conventions Guide,
chapter 10 */
flushrs ;; /* flush dirty registers to stack */
mov r14 = ar.rsc ;;
and r2 = ~0x3, r14;; /* set rsc.mode = 0 */
dep r2 = r0, r2, 16, 4 ;; /* clear rsc.loadrs */
mov ar.rsc = r2 ;; /* RSE is in enforced lazy mode */
invala ;; /* Invalidate ALAT */
LOADGLOBAL(r2, caml_saved_bsp#) ;;
mov ar.bspstore = r2 /* Restore ar.bspstore */
LOADGLOBAL(r2, caml_saved_rnat#) ;;
mov ar.rnat = r2 /* Restore ar.rnat */
mov ar.rsc = r14 ;; /* Restore original RSE mode */
/* Reload allocation pointers and exception pointer */
LOADGLOBAL(r4, caml_young_ptr#)
LOADGLOBAL(r5, caml_young_limit#)
LOADGLOBAL(r6, caml_exception_pointer#)
/* Say that we're back into Caml */
STOREGLOBAL(r0, caml_last_return_address#)
/* Raise the exception proper */
mov sp = r6
add r2 = 8, r6 ;;
ld8 r6 = [r6]
ld8 r2 = [r2] ;;
mov b6 = r2 ;;
/* Branch to handler. Must use a call so as to set up the
CFM and PFS correctly. */
br.call.sptk.many b0 = b6
.endp caml_raise_exception
/* Callbacks from C to Caml */
.global caml_callback_exn#
.proc caml_callback_exn#
.align 16
caml_callback_exn:
/* Initial shuffling of arguments */
ld8 r3 = [r32] /* code pointer */
mov r2 = r32
mov r32 = r33 ;; /* first arg */
mov r33 = r2 /* environment */
mov b6 = r3
br.sptk .L103 ;;
.endp caml_callback_exn#
.global caml_callback2_exn#
.proc caml_callback2_exn#
.align 16
caml_callback2_exn:
/* Initial shuffling of arguments */
ADDRGLOBAL(r3, caml_apply2) /* code pointer */
mov r2 = r32
mov r32 = r33 /* first arg */
mov r33 = r34 ;; /* second arg */
mov r34 = r2 /* environment */
mov b6 = r3
br.sptk .L103 ;;
.endp caml_callback2_exn#
.global caml_callback3_exn#
.proc caml_callback3_exn#
.align 16
caml_callback3_exn:
/* Initial shuffling of arguments */
ADDRGLOBAL(r3, caml_apply3) /* code pointer */
mov r2 = r32
mov r32 = r33 /* first arg */
mov r33 = r34 /* second arg */
mov r34 = r35 ;; /* third arg */
mov r35 = r2 /* environment */
mov b6 = r3
br.sptk .L103 ;;
.endp caml_callback3_exn#
/* Glue code to call [caml_array_bound_error] */
.global caml_ml_array_bound_error#
.proc caml_ml_array_bound_error#
.align 16
caml_ml_array_bound_error:
ADDRGLOBAL(r2, @fptr(caml_array_bound_error#))
br.sptk caml_c_call /* never returns */
.rodata
.global caml_system__frametable#
.type caml_system__frametable#, @object
.size caml_system__frametable#, 8
caml_system__frametable:
data8 1 /* one descriptor */
data8 .L102 /* return address into callback */
data2 -1 /* negative frame size => use callback link */
data2 0 /* no roots here */
.align 8
/* Global variables used by caml_raise_exception */
.common caml_saved_bsp#, 8, 8
.common caml_saved_rnat#, 8, 8