ocaml/otherlibs/num/bignum/s/sparcKerN.s

644 lines
9.9 KiB
ArmAsm

! Copyright Digital Equipment Corporation 1991
! Last modified_on Fri Mar 1 17:21:25 GMT+1:00 1991 by shand
!
! KerN for SPARC
! Mark Shand
!
! Implementation notes:
!
! Initial implementations of sparc offer very limited support for
! integer multiplication, so BnnMultiplyDigit is based on
! double precision floating point multiplies that compute
! a 16x32->48 bit result without round-off. Performance is
! not great, but is about twice as good as using the integer
! multiply primitives directly.
!
! BnnDivideDigit uses the unmodified assembly code produced
! by cc -O2 KerN.c
!
.seg "text" ! [internal]
.proc 16
.global _BnnSetToZero
_BnnSetToZero:
deccc %o1
bneg LBSZ3 ! is zero
andcc 1,%o1,%o2
be LBSZ2 ! is odd
nop
dec 4,%o0
LBSZ1: ! [internal]
inc 8,%o0
st %g0,[%o0-4]
LBSZ2:
deccc 2,%o1
bpos LBSZ1
st %g0,[%o0]
LBSZ3:
retl
nop ! [internal]
!
!
.proc 16
.global _BnnAssign
_BnnAssign:
cmp %o0,%o1
bgt,a LBAG2 ! if(mm >= nn) goto LBAG2
tst %o2
be LBAGX
tst %o2
be LBAGX ! if(nl==0) return
nop
LBAG1:
ld [%o1],%o3
inc 4,%o1
st %o3,[%o0]
deccc %o2
bgt LBAG1
inc 4,%o0
LBAGX:
retl
nop
LBAG2:
be LBAGX ! if(nl==0) return
sll %o2,2,%o3 ! nl <<= 2
add %o1,%o3,%o1 ! nn += nl
add %o0,%o3,%o0 ! mm += nl
LBAG3:
dec 4,%o1
ld [%o1],%o3 ! %o3 = *--nn
dec 4,%o0
deccc %o2
bgt LBAG3
st %o3,[%o0] ! *--mm = %o3
retl
nop
!
!
.proc 16
.global _BnnSetDigit
_BnnSetDigit:
retl
st %o1,[%o0]
!
!
.proc 14
.global _BnnGetDigit
_BnnGetDigit:
retl
ld [%o0],%o0
!
!
.proc 14
.global _BnnNumDigits
_BnnNumDigits:
tst %o1
sll %o1,2,%o3
be LBND2
add %o0,%o3,%o4
dec 4,%o4
LBND1:
ld [%o4],%o2
tst %o2
bne LBND2
deccc %o1
bne,a LBND1
dec 4,%o4
LBND2:
retl
add 1,%o1,%o0
!
!
.proc 14
.global _BnnNumLeadingZeroBitsInDigit
_BnnNumLeadingZeroBitsInDigit:
addcc %o0,%g0,%o5 ! %o5 = d
be LBLZX ! if(!d) goto BLZX
sethi %hi(0xffff0000),%o1 ! mask = 0xffff0000
mov 1,%o0 ! p = 1
andcc %o1,%o5,%g0 ! mask & d
bne LBLZ1
sll %o1,8,%o1
sll %o5,16,%o5
or 16,%o0,%o0
LBLZ1:
andcc %o1,%o5,%g0 ! mask & d
bne LBLZ2
sll %o1,4,%o1
sll %o5,8,%o5
or 8,%o0,%o0
LBLZ2:
andcc %o1,%o5,%g0 ! mask & d
bne LBLZ3
sll %o1,2,%o1
sll %o5,4,%o5
or 4,%o0,%o0
LBLZ3:
andcc %o1,%o5,%g0 ! mask & d
bne LBLZ4
nop
sll %o5,2,%o5
or 2,%o0,%o0
LBLZ4:
srl %o5,31,%o5 ! %o5 = (d & 0x80000000) != 0
retl
xor %o0,%o5,%o0
LBLZX:
retl
mov 32,%o0
.proc 4
.global _BnnDoesDigitFitInWord
_BnnDoesDigitFitInWord:
retl
mov 1,%o0
.proc 4
.global _BnnIsDigitZero
_BnnIsDigitZero:
tst %o0
bne,a LBDZ0
mov 0,%o1
mov 1,%o1
LBDZ0:
retl
add %g0,%o1,%o0
.proc 4
.global _BnnIsDigitNormalized
_BnnIsDigitNormalized:
retl
srl %o0,31,%o0
.proc 4
.global _BnnIsDigitOdd
_BnnIsDigitOdd:
retl
and %o0,1,%o0
.proc 4
.global _BnnCompareDigits
_BnnCompareDigits:
cmp %o0,%o1
bleu LBCD1
mov -1,%o0
retl
mov 1,%o0
LBCD1: ! [internal]
be,a LBCD2
mov 0,%o0
LBCD2:
retl
nop ! [internal]
.proc 16
.global _BnnComplement
_BnnComplement:
deccc %o1
bneg LE129
nop
LY11: ! [internal]
ld [%o0],%o2
xor %o2,-1,%o2
st %o2,[%o0]
deccc %o1
bpos LY11
inc 4,%o0
LE129:
retl
nop ! [internal]
.proc 16
.global _BnnAndDigits
_BnnAndDigits:
ld [%o0],%o2
and %o2,%o1,%o2
retl
st %o2,[%o0]
.proc 16
.global _BnnOrDigits
_BnnOrDigits:
ld [%o0],%o2
or %o2,%o1,%o2
retl
st %o2,[%o0]
.proc 16
.global _BnnXorDigits
_BnnXorDigits:
ld [%o0],%o2
xor %o2,%o1,%o2
retl
st %o2,[%o0]
.proc 14
.global _BnnShiftLeft
_BnnShiftLeft:
tst %o2
be L77105
mov 0,%o4
deccc %o1
mov 32,%o3
bneg L77105
sub %o3,%o2,%o3
LY12: ! [internal]
ld [%o0],%o5
sll %o5,%o2,%g1
or %g1,%o4,%g1
st %g1,[%o0]
deccc %o1
srl %o5,%o3,%o4
bpos LY12
inc 4,%o0
L77105:
retl
add %g0,%o4,%o0
.proc 14
.global _BnnShiftRight
_BnnShiftRight:
tst %o2
be L77114
mov 0,%o4
sll %o1,2,%g1
deccc %o1
mov 32,%o3
add %o0,%g1,%o0
bneg L77114
sub %o3,%o2,%o3
LY13: ! [internal]
dec 4,%o0
ld [%o0],%o5
srl %o5,%o2,%g2
or %g2,%o4,%g2
deccc %o1
sll %o5,%o3,%o4
bpos LY13
st %g2,[%o0]
L77114:
retl
add %g0,%o4,%o0
.proc 14
.global _BnnAddCarry ! (mm, ml, car)
_BnnAddCarry:
tst %o2
be LBACX0 ! if(car == 0) return(0);
tst %o1
be LBACX1 ! if(nl == 0) return(1);
nop
LBACL:
ld [%o0],%o3
inccc %o3
bcc LBACX0
st %o3,[%o0]
deccc %o1
bgt LBACL
inc 4,%o0
LBACX1:
retl
mov 1,%o0
LBACX0:
retl
mov 0,%o0
.proc 14
.global _BnnAdd ! (mm ml nn nl car)
_BnnAdd:
sub %o1,%o3,%o1 ! ml -= nl
tst %o3
be,a _BnnAddCarry ! if (nl == 0) %o2 = car; goto AddCarry
mov %o4,%o2
LBAD1:
ld [%o2],%o5 ! o5 = *nn
addcc -1,%o4,%g0 ! set C = carin
ld [%o0],%o4 ! o4 = *mm
inc 4,%o2
addxcc %o5,%o4,%o5 ! o5 = *mm + *nn, C = carout
addx %g0,%g0,%o4 ! o4 = carout
st %o5,[%o0]
deccc %o3
bne LBAD1
inc 4,%o0
b _BnnAddCarry
mov %o4,%o2
.proc 14
.global _BnnSubtractBorrow ! (mm, ml, car)
_BnnSubtractBorrow:
tst %o2
bne LSBBX1 ! if(car == 1) return(1);
tst %o1
be LSBBX0 ! if(nl == 0) return(0);
nop
LSBBL:
ld [%o0],%o3
deccc %o3
bcc LSBBX1
st %o3,[%o0]
deccc %o1
bgt LSBBL
inc 4,%o0
LSBBX0:
retl
mov 0,%o0
LSBBX1:
retl
mov 1,%o0
.proc 14
.global _BnnSubtract ! (mm ml nn nl car)
_BnnSubtract:
sub %o1,%o3,%o1 ! ml -= nl
tst %o3
be,a _BnnSubtractBorrow ! if (nl == 0) %o2 = car; goto SubBorrow
mov %o4,%o2
LSUB1:
ld [%o2],%o5 ! o5 = *nn
deccc %o4 ! set C = carin
ld [%o0],%o4 ! o4 = *mm
inc 4,%o2
subxcc %o4,%o5,%o5 ! o5 = *mm + *nn, C = carout
mov 1,%o4
subx %o4,%g0,%o4 ! o4 = carout
st %o5,[%o0]
deccc %o3
bne LSUB1
inc 4,%o0
b _BnnSubtractBorrow
mov %o4,%o2
.proc 14
.global _BnnMultiplyDigit
_BnnMultiplyDigit:
!#PROLOGUE# 0
!#PROLOGUE# 1
tst %o4
bne LMDnonzero
cmp %o4,1
retl
mov 0,%o0
LMDnonzero:
bne LMD0
mov 0,%o5
b _BnnAdd ! shortcut to BnnAdd
mov 0,%o4 ! carry in = 0
LMD0:
save %sp,-96,%sp
tst %i3
be L77007
sub %i1,%i3,%l1
LMD1:
ld [%i0],%l7
mov %i4,%y
ld [%i2],%l0
addcc %g0,%g0,%o0 ! initialize
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%l0,%o0; mulscc %o0,%l0,%o0;
mulscc %o0,%g0,%o0 ! align
tst %l0
blt,a LMDsignfix
add %o0,%i4,%o0
LMDsignfix:
mov %o0,%o1
mov %y,%o0
addcc %o0,%i5,%i1
inc 4,%i2
addx %o1,%g0,%i5
addcc %l7,%i1,%l7
addx %g0,%i5,%i5
st %l7,[%i0]
deccc %i3
bgt LMD1
inc 4,%i0
L77007:
tst %i5
be LMDexit
deccc %l1
LY3: ! [internal]
blt LMDexit
inc 4,%i0
ld [%i0-4],%i1
addcc %i1,%i5,%i1
addxcc %g0,%g0,%i5
st %i1,[%i0-4]
bne,a LY3
deccc %l1
LMDexit:
ret
restore %g0,%i5,%o0
.proc 14
.align 4
.global _BnnDivideDigit
.proc 016
_BnnDivideDigit:
!#PROLOGUE# 0
save %sp,-120,%sp
!#PROLOGUE# 1
mov %i0,%l3
call _BnnNumLeadingZeroBitsInDigit,0
mov %i3,%o0
orcc %o0,%g0,%l6
be L146
mov %i1,%o0
mov %i2,%l7
sll %i3,%l6,%i3
mov %l7,%o1
ld [%l3-4],%o3
mov %l6,%o2
call _BnnShiftLeft,0
st %o3,[%fp-20]
L146:
sll %i2,2,%o0
add %i1,%o0,%i1
add %i2,-1,%i2
sll %i2,2,%o0
add %l3,%o0,%l3
add %i1,-4,%i1
ld [%i1],%i0
cmp %i2,0
srl %i3,16,%l4
sethi %hi(65535),%o0
or %o0,%lo(65535),%o0
be L148
and %i3,%o0,%i4
sll %i4,16,%l5
mov %o0,%i5
L163:
add %i2,-1,%i2
mov %i0,%l1
add %i1,-4,%i1
ld [%i1],%i0
mov %l1,%o0
call .udiv,0
mov %l4,%o1
mov %o0,%l2
mov %i4,%o0
call .umul,0
mov %l2,%o1
mov %o0,%l0
mov %l4,%o0
call .umul,0
mov %l2,%o1
mov %o0,%o2
srl %l0,16,%o0
add %o2,%o0,%o2
b L149
sll %l0,16,%l0
L154:
bleu L155
add %l1,-1,%o0
cmp %l5,%l0
L172:
bleu L152
add %l2,-1,%l2
sub %l0,%l5,%l0
add %o2,-1,%o0
b L149
sub %o0,%l4,%o2
L152:
sub %l0,%l5,%l0
sub %o2,%l4,%o2
L149:
cmp %o2,%l1
bgu L172
cmp %l5,%l0
cmp %o2,%l1
be L154
cmp %l0,%i0
bleu L155
add %l1,-1,%o0
sub %i0,%l0,%i0
b L156
sub %o0,%o2,%l1
L155:
sub %i0,%l0,%i0
sub %l1,%o2,%l1
L156:
add %l3,-4,%l3
sll %l2,16,%o0
st %o0,[%l3]
sll %l1,16,%o0
srl %i0,16,%o1
or %o0,%o1,%o0
call .udiv,0
mov %l4,%o1
mov %o0,%l2
mov %i4,%o0
call .umul,0
mov %l2,%o1
mov %o0,%l0
mov %l4,%o0
call .umul,0
mov %l2,%o1
mov %o0,%o2
srl %l0,16,%o0
add %o2,%o0,%o2
and %l0,%i5,%o1
and %o2,%i5,%o0
sll %o0,16,%o0
or %o1,%o0,%l0
b L157
srl %o2,16,%o2
L162:
bleu,a L173
sub %i0,%l0,%i0
cmp %i3,%l0
L174:
bleu L160
add %l2,-1,%l2
sub %l0,%i3,%l0
b L157
add %o2,-1,%o2
L160:
sub %l0,%i3,%l0
L157:
cmp %o2,%l1
bgu L174
cmp %i3,%l0
cmp %o2,%l1
be L162
cmp %l0,%i0
sub %i0,%l0,%i0
L173:
ld [%l3],%o0
cmp %i2,0
or %l2,%o0,%o0
bne L163
st %o0,[%l3]
L148:
cmp %l6,0
be L164
cmp %l3,%i1
bleu L175
sll %l7,2,%o0
add %i1,%o0,%o0
cmp %l3,%o0
bgeu L165
sub %l3,%i1,%o0
sra %o0,2,%l7
mov %i1,%o0
mov %l7,%o1
call _BnnShiftRight,0
mov %l6,%o2
sll %l7,2,%o0
ld [%fp-20],%o3
add %o0,%i1,%o0
b L164
st %o3,[%o0-4]
L165:
cmp %l3,%i1
L175:
bne L167
mov %i1,%o0
sll %l7,2,%o0
add %o0,-4,%o0
add %i1,%o0,%o0
b L170
mov 1,%o1
L167:
mov %l7,%o1
L170:
call _BnnShiftRight,0
mov %l6,%o2
L164:
srl %i0,%l6,%i0
ret
restore
.seg "data" ! [internal]
_copyright:
.half 0x4028
.half 0x2329
.half 0x4b65
.half 0x724e
.half 0x2e63
.half 0x3a20
.half 0x636f
.half 0x7079
.half 0x7269
.half 0x6768
.half 0x7420
.half 0x4469
.half 0x6769
.half 0x7461
.half 0x6c20
.half 0x4571
.half 0x7569
.half 0x706d
.half 0x656e
.half 0x7420
.half 0x436f
.half 0x7270
.half 0x6f72
.half 0x6174
.half 0x696f
.half 0x6e20
.half 0x2620
.half 0x494e
.half 0x5249
.half 0x4120
.half 0x3139
.half 0x3838
.half 0x2c20
.half 0x3139
.half 0x3839
.half 0xa00