702 lines
19 KiB
Plaintext
702 lines
19 KiB
Plaintext
; Copyright Digital Equipment Corporation & INRIA 1988, 1989, 1990
|
|
;
|
|
; KerN for the VAX.
|
|
; [Bepaul, Shand]
|
|
; Last modified_on Mon Apr 2 21:03:05 GMT+2:00 1990 by shand
|
|
; modified_on Mon Nov 20 13:51:10 GMT+1:00 1989 by herve
|
|
; modified_on 17-OCT-1989 20:37:48 by Jim Lawton
|
|
;
|
|
; >>> IMPORTANT <<< DO NOT MODIFY THIS FILE -- IT IS GENERATED FROM vaxKerN.s
|
|
;
|
|
; >>> READ THIS <<<
|
|
;
|
|
; This file is automatically converted from unix to VAX/VMS assembler format.
|
|
; On VMS it is the callee's rsponsiblity to save all modified registers
|
|
; other than r0 and r1. On Ultrix r0-r5 are considered saved by caller.
|
|
; Specify procedure entry masks that save ALL modified registers (including
|
|
; r0 and r1) and "&" them with "callee_save" which is a predefined constant
|
|
; that eliminates the saves which are unnecessary under whichever calling
|
|
; convention the file is being assembler for.
|
|
callee_save = ^C3
|
|
; WARNING: text after comment used in conversion to VMS format assembler
|
|
.psect vaxKerN_data,noexe,quad
|
|
_copyright: .ascii "@(#)vaxKerN.s: copyright Digital Equipment Corporation & INRIA 1988, 1989, 1990\12\0"
|
|
; WARNING: text after comment used in conversion to VMS format assembler
|
|
.psect vaxKerN,exe,shr,pic,nowrt,quad
|
|
.globl BnnSetToZero
|
|
.align 3
|
|
BnnSetToZero:
|
|
.word ^X3&callee_save ; mask<r0,r1>
|
|
movl 4(ap),r0 ; nn
|
|
movl 8(ap),r1 ; nl
|
|
sobgeq r1,LSTZ1 ; if(nl--) goto LSTZ1
|
|
ret ; return;
|
|
LSTZ1: clrl (r0)+ ; *(nn++) = 0;
|
|
sobgeq r1,LSTZ1 ; if(nl--) goto LSTZ1;
|
|
ret
|
|
|
|
.globl BnnAssign
|
|
.align 3
|
|
BnnAssign:
|
|
.word ^X7&callee_save ; mask<r0,r1,r2>
|
|
movl 4(ap),r0 ; mm
|
|
movl 8(ap),r1 ; nn
|
|
movl 12(ap),r2 ; nl
|
|
cmpl r0,r1
|
|
bgequ LAG2 ; if(mm >= nn) goto LAG2;
|
|
sobgeq r2,LAG1 ; if(nl--) goto LAG1;
|
|
ret ; return;
|
|
LAG1: movl (r1)+,(r0)+ ; *(mm++) = *(nn++);
|
|
sobgeq r2,LAG1 ; if(nl--) goto LAG1;
|
|
LAG2: blequ LAG4 ; if(mm <= nn) goto LAG4;
|
|
moval (r0)[r2],r0 ; mm = &mm[nl];
|
|
moval (r1)[r2],r1 ; nn = &nn[nl];
|
|
sobgeq r2,LAG3 ; if(nl--) goto LAG3;
|
|
ret ; return;
|
|
LAG3: movl -(r1),-(r0) ; *(--mm) = *(--nn);
|
|
sobgeq r2,LAG3 ; if(nl--) goto LAG3;
|
|
LAG4: ret ; return;
|
|
|
|
.globl BnnSetDigit
|
|
.align 3
|
|
BnnSetDigit:
|
|
.word ^X0&callee_save ; mask<>
|
|
movl 8(ap),@4(ap) ; *nn = d;
|
|
ret
|
|
|
|
.globl BnnGetDigit
|
|
.align 3
|
|
BnnGetDigit:
|
|
.word ^X0&callee_save ; mask<>
|
|
movl @4(ap),r0 ; return(*nn);
|
|
ret
|
|
|
|
.globl BnnNumDigits
|
|
.align 3
|
|
BnnNumDigits:
|
|
.word ^X2&callee_save ; mask<r1>
|
|
movl 8(ap),r0 ; nl
|
|
moval @4(ap)[r0],r1 ; nn = &nn[nd];
|
|
sobgeq r0,LND1 ; if(nl-- != 0) goto LND1;
|
|
movl #1,r0
|
|
ret ; return(1);
|
|
LND1: tstl -(r1)
|
|
bneq LND3 ; if(*(--n) != 0) goto LND3;
|
|
sobgeq r0,LND1 ; if(nl-- != 0) goto LND1;
|
|
movl #1,r0
|
|
ret ; return(1);
|
|
LND3: incl r0
|
|
ret ; return(nl + 1);
|
|
|
|
.globl BnnNumLeadingZeroBitsInDigit
|
|
.align 3
|
|
BnnNumLeadingZeroBitsInDigit:
|
|
.word ^X2&callee_save ; mask<r1>
|
|
movl 4(ap),r1 ; d
|
|
movl #31,r0
|
|
LLZ1: bbs r0,r1,LLZ2
|
|
sobgeq r0,LLZ1
|
|
LLZ2: subl3 r0,#31,r0
|
|
ret
|
|
|
|
.globl BnnDoesDigitFitInWord
|
|
.align 3
|
|
BnnDoesDigitFitInWord:
|
|
.word ^X0&callee_save ; mask<>
|
|
movl #1,r0 ; C_VERSION
|
|
ret
|
|
|
|
.globl BnnIsDigitZero
|
|
.align 3
|
|
BnnIsDigitZero:
|
|
.word ^X2&callee_save ; mask<r1>
|
|
tstl 4(ap) ; d
|
|
bneq LDZ1 ; if(d) goto LDZ1;
|
|
movl #1,r0
|
|
ret ; return(1);
|
|
LDZ1: clrl r0
|
|
ret ; return(0);
|
|
|
|
.globl BnnIsDigitNormalized
|
|
; Boolean BnIsDigitNormalized(n, nd) BigNum n; int nd; {
|
|
.align 3
|
|
BnnIsDigitNormalized:
|
|
.word ^X0&callee_save ; mask<>
|
|
movl 4(ap),r0 ; d
|
|
extzv #31,#1,r0,r0 ; return(d >> 31);
|
|
ret
|
|
|
|
.globl BnnIsDigitOdd
|
|
.align 3
|
|
BnnIsDigitOdd:
|
|
.word ^X0&callee_save ; mask<>
|
|
bicl3 #-2,4(ap),r0 ; return(d || 1);
|
|
ret
|
|
|
|
.globl BnnCompareDigits
|
|
.align 3
|
|
BnnCompareDigits:
|
|
.word ^X0&callee_save ; mask<>
|
|
cmpl 4(ap),8(ap) ; cmpl d1,d2
|
|
beql LCDeq ; if(d0 == d1) goto LCDeq
|
|
blssu LCDinf ; if(d0 < d1) goto LCDinf
|
|
movl #1,r0 ; return(1);
|
|
ret
|
|
LCDeq: clrl r0 ; return(0);
|
|
ret
|
|
LCDinf: movl #-1,r0 ; return(-1);
|
|
ret
|
|
|
|
.globl BnnComplement
|
|
.align 3
|
|
BnnComplement:
|
|
.word ^X2&callee_save ; mask<r1>
|
|
movl 4(ap),r0 ; nn
|
|
movl 8(ap),r1 ; nl
|
|
sobgeq r1,LCM1 ; if(nl-- != 0) goto LCM1;
|
|
ret
|
|
LCM1: mcoml (r0),(r0)+ ; *(n++) ^= -1;
|
|
sobgeq r1,LCM1 ; if(nl-- != 0) goto LCM1;
|
|
ret
|
|
|
|
.globl BnnAndDigits
|
|
.align 3
|
|
BnnAndDigits:
|
|
.word ^X0&callee_save ; mask<>
|
|
mcoml 8(ap),r0 ; d = ~d;
|
|
bicl2 r0,@4(ap) ; *nn &= ~d;
|
|
ret
|
|
|
|
.globl BnnOrDigits
|
|
.align 3
|
|
BnnOrDigits:
|
|
.word ^X0&callee_save ; mask<>
|
|
bisl2 8(ap),@4(ap) ; *nn |= d;
|
|
ret
|
|
|
|
.globl BnnXorDigits
|
|
.align 3
|
|
BnnXorDigits:
|
|
.word ^X0&callee_save ; mask<>
|
|
xorl2 8(ap),@4(ap) ; *nn ^= d;
|
|
ret
|
|
|
|
.globl BnnShiftLeft
|
|
.align 3
|
|
BnnShiftLeft:
|
|
.word ^X7E&callee_save ; mask<r1,r2,r3,r4,r5,r6>
|
|
clrl r0 ; res = 0;
|
|
movl 12(ap),r3 ; nbi
|
|
bneq LSL0 ; if(nbi) goto LSL0
|
|
ret ; return(res);
|
|
LSL0: movl 4(ap),r2 ; mm
|
|
movl 8(ap),r1 ; ml
|
|
subl3 r3,#32,r4 ; rnbi = BN_DIGIT_SIZE - nbi;
|
|
sobgeq r1,LSL1 ; if(ml-- != 0) goto LSL1;
|
|
ret ; return(res);
|
|
LSL1: movl (r2),r5 ; save = *mm
|
|
ashl r3,r5,r6 ; X = save << nbi;
|
|
bisl3 r0,r6,(r2)+ ; *(mm++) = X | res;
|
|
extzv r4,r3,r5,r0 ; res = save >> rnbits;
|
|
sobgeq r1,LSL1 ; if(ml-- != 0) goto LSL1;
|
|
ret ; return(res);
|
|
|
|
.globl BnnShiftRight
|
|
.align 3
|
|
BnnShiftRight:
|
|
.word ^X7E&callee_save ; mask<r1,r2,r3,r4,r5,r6>
|
|
clrl r0 ; res = 0;
|
|
movl 12(ap),r3 ; nbi
|
|
bneq LSR0 ; if(nbi) goto LSR0;
|
|
ret ; return(res);
|
|
LSR0: movl 8(ap),r1 ; ml
|
|
moval @4(ap)[r1],r2 ; mm = &mm[ml];
|
|
subl3 r3,#32,r4 ; lnbi = BN_DIGIT_SIZE - nbi;
|
|
sobgeq r1,LSR1 ; if(ml-- != 0) goto LSR1;
|
|
ret ; return(res);
|
|
LSR1: movl -(r2),r5 ; save = *(--mm);
|
|
extzv r3,r4,r5,r6 ; X = save >> nbi;
|
|
bisl3 r0,r6,(r2) ; *mm = X | res;
|
|
ashl r4,r5,r0 ; res = save << lnbi;
|
|
sobgeq r1,LSR1 ; if(ml-- != 0) goto LSR1;
|
|
ret ; return(res);
|
|
|
|
.globl BnnAddCarry
|
|
.align 3
|
|
BnnAddCarry:
|
|
.word ^X2&callee_save ; mask<r1>
|
|
movl 12(ap),r0 ; car
|
|
beql LAC3 ; if(car == 0) return(car);
|
|
movl 8(ap),r0 ; nl
|
|
beql LAC2 ; if(nl == 0) return(1);
|
|
movl 4(ap),r1 ; nn
|
|
LAC1: incl (r1)+ ; ++(*nn++);
|
|
bcc LAC4 ; if(!Carry) goto LAC4
|
|
sobgtr r0,LAC1 ; if(--nl > 0) goto LAC1;
|
|
LAC2: movl #1,r0 ; return(1);
|
|
LAC3: ret
|
|
LAC4: clrl r0 ; return(0);
|
|
ret
|
|
|
|
.globl BnnAdd
|
|
.align 3
|
|
BnnAdd:
|
|
.word ^X1E&callee_save ; mask<r1,r2,r3,r4>
|
|
LADDEntry: movl 4(ap),r0 ; mm
|
|
movl 12(ap),r1 ; nn
|
|
movl 16(ap),r3 ; nl
|
|
bneq LADD1 ; if(nl) goto LADD1
|
|
subl3 r3,8(ap),r2 ; ml -= nl;
|
|
tstl 20(ap) ; car
|
|
bneq LADD5 ; if(car) goto LADD5
|
|
clrl r0
|
|
ret ; return(0);
|
|
LADD1: subl3 r3,8(ap),r2 ; ml -= nl;
|
|
addl3 20(ap),#-1,r4 ; C = car
|
|
|
|
LADD2: adwc (r1)+,(r0)+ ; *(m++) += *(n++) + C;
|
|
LADD3: sobgtr r3,LADD2 ; if(--nl > 0) goto LADD2;
|
|
bcs LADD5 ; if(C) goto LADD5;
|
|
LADD4: clrl r0
|
|
ret
|
|
|
|
LADD6: incl (r0)+ ; ++(*m++);
|
|
bcc LADD4 ; if(!C) goto LADD4;
|
|
LADD5: sobgeq r2,LADD6 ; if(--ml >= 0) goto LADD6;
|
|
LADD7: movl #1,r0
|
|
ret
|
|
|
|
.globl BnnSubtractBorrow
|
|
.align 3
|
|
BnnSubtractBorrow:
|
|
.word ^X2&callee_save ; mask<r1>
|
|
movl 12(ap),r0 ; car
|
|
bneq LSB2 ; if(car) return(car);
|
|
movl 8(ap),r0 ; nl
|
|
beql LSB20 ; if(nl == 0) return(0);
|
|
movl 4(ap),r1 ; nn
|
|
LSB1: decl (r1)+ ; (*nn++)--;
|
|
bcc LSB3 ; if(!Carry) goto LSB3;
|
|
sobgtr r0,LSB1 ; if(--nl > 0) goto LSB1;
|
|
LSB20: ; assert r0 == 0 return(0);
|
|
LSB2: ret
|
|
LSB3: movl #1,r0 ; return(1);
|
|
ret
|
|
|
|
.globl BnnSubtract
|
|
.align 3
|
|
BnnSubtract:
|
|
.word ^X1E&callee_save ; mask<r1,r2,r3,r4>
|
|
movl 4(ap),r2 ; mm
|
|
movl 12(ap),r1 ; nn
|
|
movl 16(ap),r3 ; nl
|
|
bneq LS1 ; if(nl) goto LS1
|
|
subl3 r3,8(ap),r0 ; ml -= nl;
|
|
tstl 20(ap) ; car
|
|
beql LS5 ; if(car) goto LS5
|
|
movl #1,r0
|
|
ret ; return(1);
|
|
LS1: subl3 r3,8(ap),r0 ; ml -= nl;
|
|
tstl 20(ap) ; C = 0; Z = (car == 0)
|
|
bneq LS2 ; if(!(Z = (car == 0))) goto LS2
|
|
addl3 #1,#-1,r4 ; C = 1;
|
|
|
|
LS2: sbwc (r1)+,(r2)+ ; C..*m++ -= *n++ + C
|
|
sobgtr r3,LS2 ; if(--nl > 0) goto LS2
|
|
bcs LS5
|
|
LS3: movl #1,r0
|
|
ret
|
|
LS4: decl (r2)+
|
|
bcc LS3
|
|
LS5: sobgeq r0,LS4 ; if (--ml >= 0) goto LS4
|
|
clrl r0
|
|
ret
|
|
|
|
.globl BnnMultiplyDigit
|
|
; note1: (2^32-1)*(2^32-1) = 2^64-1 - 2*(2^32-1)
|
|
; thus 64 bits accomodates a*b+c+d for all a,b,c,d < 2^32
|
|
; note2: inner loop is doubled to avoid unnecessary register moves.
|
|
.align 3
|
|
BnnMultiplyDigit:
|
|
.word ^X1FE&callee_save ; mask<r1,r2,r3,r4,r5,r6,r7,r8>
|
|
movl 20(ap),r2 ; r2 = d
|
|
blss LMDNeg ; if (d<0) goto LMDNeg
|
|
bneq LMD1 ; if (d) goto LMD1;
|
|
clrl r0
|
|
ret
|
|
LMD1: cmpl #1,r2
|
|
bneq LMD2 ; if (d != 1) goto LMD2
|
|
clrl 20(ap) ; IN BnnAdd: car = 0
|
|
brw LADDEntry ; BnnAdd(pp,pl,mm,ml,0);
|
|
|
|
LMD2: movl 4(ap),r3 ; r3 = p
|
|
movl 12(ap),r1 ; r1 = m
|
|
movl 16(ap),r7 ; r7 = ml
|
|
subl3 r7,8(ap),r8 ; r8 = pl-ml
|
|
ashl #-1,r7,r0 ; loop counter r0 = (ml+1)/2
|
|
clrl r5
|
|
bitl #1,r7
|
|
bneq LMDPOddLen ; if (ml is odd) goto LMDPOddLen
|
|
clrl r7
|
|
brb LMDPEvenLen ; if (ml is even) goto LMDPOddLen
|
|
LMDPLoop: emul (r1)+,r2,#0,r4 ; r4:r5 = m[i]*d
|
|
bgeq LMDMPos1 ; if (m[i] < 0)
|
|
addl2 r2,r5 ; r5 += d
|
|
LMDMPos1: addl2 r7,r4 ; r4 = (m[i]*d)%2^32+(m[i-1]*d)/2^32+C
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry1
|
|
addl2 r4,(r3)+ ; *p++ += r4
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry2
|
|
LMDPOddLen: emul (r1)+,r2,#0,r6 ; r6:r7 = m[i+1]*d
|
|
bgeq LMDMPos2 ; if (m[i+1] < 0)
|
|
addl2 r2,r7 ; r7 += d
|
|
LMDMPos2: addl2 r5,r6 ; r6 = (m[i+1]*d)%2^32+(m[i]*d)/2^32+C
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry1
|
|
addl2 r6,(r3)+ ; *p++ += r6
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry2
|
|
LMDPEvenLen: sobgeq r0,LMDPLoop ; if ((i+=2)/2 < ml/2) repeat loop
|
|
addl2 r7,(r3)+ ; *p += (m[ml-1]*d)/2^32
|
|
bcs LMDTail
|
|
LMDRet0: clrl r0
|
|
ret
|
|
|
|
LMDNeg: movl 4(ap),r3 ; r3 = p
|
|
movl 12(ap),r1 ; r1 = m
|
|
movl 16(ap),r7 ; r7 = ml
|
|
subl3 r7,8(ap),r8 ; r8 = pl-ml
|
|
ashl #-1,r7,r0 ; loop counter r0 = (ml+1)/2
|
|
clrl r5
|
|
bitl #1,r7
|
|
bneq LMDNOddLen
|
|
clrl r7
|
|
brb LMDNEvenLen
|
|
LMDNLoop: movl (r1)+,r6 ; r6 = m[i]
|
|
emul r6,r2,#0,r4 ; r4:r5 = m[i]*d
|
|
bleq LMDMPos3 ; if (m[i] < 0)
|
|
addl2 r2,r5 ; r5 += d
|
|
LMDMPos3: addl2 r6,r5 ; r5 += m[i]
|
|
addl2 r7,r4 ; r4 = (m[i]*d)%2^32+(m[i-1]*d)/2^32+C
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry1
|
|
addl2 r4,(r3)+ ; *p++ += r4
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry2
|
|
LMDNOddLen: movl (r1)+,r4 ; r6 = m[i+1]
|
|
emul r4,r2,#0,r6 ; r6:r7 = m[i+1]*d
|
|
bleq LMDMPos4 ; if (m[i+1] < 0)
|
|
addl2 r2,r7 ; r7 += d
|
|
LMDMPos4: addl2 r4,r7 ; r7 += m[i+1]
|
|
addl2 r5,r6 ; r6 = (m[i+1]*d)%2^32+(m[i]*d)/2^32+C
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry1
|
|
addl2 r6,(r3)+ ; *p++ += r6
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry2
|
|
LMDNEvenLen: sobgeq r0,LMDNLoop ; if ((i+=2)/2 < ml/2) repeat loop
|
|
addl2 r7,(r3)+ ; *p += (m[ml-1]*d)/2^32
|
|
bcs LMDTail
|
|
clrl r0 ; r0 = carry
|
|
ret
|
|
|
|
LMDTailLoop: incl (r3)+
|
|
bcc LMDRet0
|
|
LMDTail: sobgtr r8,LMDTailLoop
|
|
movl #1,r0 ; r0 = carry
|
|
ret
|
|
|
|
.globl BnnDivideDigit
|
|
.align 3
|
|
BnnDivideDigit:
|
|
.word ^X3FE&callee_save ; mask<r1,r2,r3,r4,r5,r6,r7,r8,r9>
|
|
movl 12(ap),r2 ; nl
|
|
movl 16(ap),r3 ; d
|
|
moval @8(ap)[r2],r0 ; nn = &nn[nl];
|
|
decl r2 ; nl--;
|
|
moval @4(ap)[r2],r1 ; qq = &qq[nl];
|
|
movl -(r0),r5 ; X(hight) = *(--n);
|
|
extzv #1,#31,r3,r7 ; r7 = D' <- D div 2
|
|
tstl r3
|
|
bgeq Lndivc2
|
|
brw Lndiv5 ; D < 0!!
|
|
|
|
; D < 2**31
|
|
brb Lndivc2 ; N < D * 2**32
|
|
Lndivc1: movl -(r0),r4 ; (bdivu dx3 ax1 dx1)
|
|
cmpl r5,r7
|
|
blss Lndivc11
|
|
extzv #0,#1,r4,r6 ; r6 <- n0
|
|
ashq #-1,r4,r4 ; N' = r4 = N quo 2 < D * 2**31
|
|
ediv r3,r4,r4,r5 ; r4 <- Q' = N' quo D < 2**31
|
|
; r5 <- R' = N' rem D < D
|
|
ashq #1,r4,r4 ; r4 <- 2 * Q' < 2**32
|
|
; r5 <- 2 * R' < 2 * D
|
|
addl2 r6,r5 ; r5 <- 2 * R' + n0 < 2 * D
|
|
cmpl r5,r3 ; r5 < D -> Q = r4, R = r5
|
|
blssu Lndivc12 ; sinon
|
|
incl r4 ; Q = r4 + 1
|
|
subl2 r3,r5 ; R = r5 - D
|
|
brb Lndivc12
|
|
Lndivc11: ediv r3,r4,r4,r5 ; Q = r4, R = r5
|
|
Lndivc12: movl r4,-(r1) ; range r4 en me'moire
|
|
Lndivc2: sobgeq r2,Lndivc1 ; (sobgez dx2 Lndivc1)
|
|
movl r5,r0 ; return(X(hight));
|
|
ret
|
|
|
|
Lndiv3: movl -(r0),r4 ; r4 poid faible de N
|
|
extzv #0,#1,r4,r9 ; r9 <- n0
|
|
extzv #1,#1,r4,r6 ; r6 <- n1
|
|
extzv #2,#1,r4,r8 ; r8 <- n2
|
|
ashq #-3,r4,r4 ; r4 <- N'' = N quo 4
|
|
bicl2 #^XE0000000,r5 ; Le ashq ne le fait pas
|
|
ediv r7,r4,r4,r5 ; r4 <- Q' = N''' quo D'
|
|
; r5 <- R' = N''' rem D'
|
|
ashl #1,r5,r5 ; r5 <- 2 * R'
|
|
addl2 r8,r5 ; r5 <- 2 * R' + n2
|
|
bbc #0,r3,Lndiv4 ; si d0 = 0
|
|
cmpl r5,r4 ; sinon r5 <- 2R' + n1 - Q'
|
|
blssu Lndiv30 ; la diff est < 0
|
|
subl2 r4,r5 ; la diff est > 0
|
|
brb Lndiv4 ; voila la diff!
|
|
Lndiv30: subl2 r4,r5 ; la diff!
|
|
decl r4 ; r4 <- r4 - 1
|
|
addl2 r3,r5 ; r5 <- r5 + D
|
|
Lndiv4: ashl #1,r4,r4 ; r4 <- 2Q'
|
|
addl2 r5,r5 ; r5 <- 2r5
|
|
bisl2 r6,r5 ; r5 <- r5 + n1 (flag C ok!)
|
|
bcs Lndiv40 ; On deborde sur!
|
|
cmpl r5,r3
|
|
blssu Lndiv42 ; depasse pas D
|
|
Lndiv40: incl r4 ; Q = r4 + 1
|
|
subl2 r3,r5 ; R = r5 - D
|
|
Lndiv42: ashl #1,r4,r4 ; r4 <- 2Q'
|
|
addl2 r5,r5 ; r5 <- 2r5
|
|
bisl2 r9,r5 ; r5 <- r5 + n0 (flag C ok!)
|
|
bcs Lndiv43 ; On deborde sur!
|
|
cmpl r5,r3
|
|
blssu Lndiv44 ; depasse pas D
|
|
Lndiv43: incl r4 ; Q = r4 + 1
|
|
subl2 r3,r5 ; R = r5 - D
|
|
Lndiv44: movl r4,-(r1) ; range le quotient en memoire
|
|
Lndiv5: sobgeq r2,Lndiv3 ; On continue!
|
|
movl r5,r0 ; return(X(hight));
|
|
ret
|
|
|
|
; BigNumCarry BnnMultiply (pp, pl, mm, ml, nn, nl)
|
|
; BigNum pp, nn, mm;
|
|
; BigNumLength pl, nl, ml;
|
|
|
|
.globl BnnMultiply
|
|
.align 3
|
|
BnnMultiply:
|
|
.word ^XFFE&callee_save ; mask<r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
|
|
movl 24(ap),r9 ; r9 = nl
|
|
bneq LMM_nl_pos
|
|
LMM_Ret0a:
|
|
clrl r0
|
|
LMM_Ret:
|
|
ret
|
|
LMM_nl_pos:
|
|
movl 20(ap),r11 ; nn
|
|
cmpl 12(ap),r11 ; if (nn == mm)
|
|
beql BMM_Sqr
|
|
LMM_NotSqr:
|
|
movl 4(ap),r10 ; pp
|
|
clrl r8 ; c_hi
|
|
LMM_NLoop:
|
|
movl 16(ap),r7 ; ml
|
|
movl 12(ap),r1 ; mm
|
|
moval (r10)+,r3 ; pp
|
|
clrl r5 ; c_lo
|
|
movl (r11)+,r2 ; digit
|
|
bsbw BMM_MultiplyDigit
|
|
sobgtr r9,LMM_NLoop
|
|
movl r8,r0
|
|
beql LMM_Ret
|
|
movl 16(ap),r7 ; r7 = ml
|
|
subl3 r7,8(ap),r1 ; r7 = pl-ml
|
|
subl2 24(ap),r1 ; r7 = pl-ml-nl
|
|
bleq LMM_Ret
|
|
moval (r10)[r7],r10 ; pp += ml
|
|
LMM_PLoop:
|
|
incl (r10)+
|
|
bcc LMM_Ret0a
|
|
sobgtr r1,LMM_PLoop
|
|
ret
|
|
; Special squaring code based on:
|
|
; n[0..nl-1]*n[0..nl-1] = sum (i = 0..nl-1):
|
|
; B^2i * (n[i]*n[i] + 2*n[i] * n[i+1..nl-1] * B)
|
|
; the 2*n[i] is tricky because it may overflow, but ...
|
|
; suppose L[i] = 2*n[i]%2^32
|
|
; and H[i] = 2*n[i]/2^32
|
|
; Then:
|
|
; sum (i = 0..nl-1):
|
|
; B^2i * (n[i]*n[i] + L[i]+H[i-1] * n[i+1..nl-1] * B + H[i-1]*n[i])
|
|
; notice that when i = nl-1 the final term is 2*n[nl-1] * n[nl..nl-1],
|
|
; n[nl..nl-1] is zero length -- i.e. we can ignore it!
|
|
; lastly we don't have quite enough registers to conveniently remember
|
|
; the top bit of n[i-1] we encode it in the PC by duplicating
|
|
; the loop--sometimes I love assembler.
|
|
LMMS_NNLoop:
|
|
; execute this version of loop if n[i-1] was >= 2^31
|
|
movl (r11)+,r0 ; d = r0 = *nn++
|
|
movl r11,r1 ; r1 = mm
|
|
emul r0,r0,#0,r4 ; r4:r5 = d*d
|
|
addl2 r0,r4 ; r4 += (2*n[i-1])/2^32*n[i] (= d)
|
|
adwc #0,r5
|
|
addl2 r4,(r10)+ ; *pp++ += d*d%2^32
|
|
adwc #0,r5 ; r5 += C
|
|
moval (r10)+,r3 ; arg-p = pp++
|
|
movl r9,r7 ; arg-ml = ml
|
|
addl3 r0,r0,r2 ; if (d >= 0)
|
|
bisl2 #1,r2
|
|
bcc LMMS_DPos ; switch to < 2^31 loop
|
|
LMMS_DNeg:
|
|
addl2 r0,r5 ; compensate for signed mul
|
|
addl2 r0,r5 ; r4:r5 += 2*r2*2^32
|
|
tstl r2 ; set condition codes for entry to subr
|
|
; MultiplyDigit(pp=r3, mm=r1, ml=r7, d=r2, c_hi=r8, c_lo=r5)
|
|
bsbb BMM_MultiplyDigit
|
|
sobgeq r9,LMMS_NNLoop
|
|
brb LMMS_Post
|
|
; >>> ENTRY <<<
|
|
BMM_Sqr:
|
|
; r9 = nl, r11 = nn
|
|
movl 16(ap),r8 ; r8 = ml
|
|
bneq LMMS_ml_pos
|
|
clrl r0
|
|
ret ; return 0;
|
|
LMMS_ml_pos:
|
|
cmpl r8,r9 ; if (ml != nl)
|
|
bneq LMM_NotSqr
|
|
|
|
; r8 = 0, r9 = nl, r11 = nn, r10 = sgn(nn[nl-1])
|
|
movl 4(ap),r10 ; r10 = pp
|
|
; r11 = nn
|
|
clrl r8 ; r8 = high carry = 0
|
|
decl r9 ; r9 = ml-1 = nl-1
|
|
LMMS_NLoop:
|
|
; execute this version of loop if n[i-1] was < 2^31
|
|
movl (r11)+,r0 ; d = r0 = *nn++
|
|
movl r11,r1 ; r1 = mm
|
|
emul r0,r0,#0,r4 ; r4:r5 = d*d
|
|
addl2 r4,(r10)+ ; *pp++ += d*d%2^32
|
|
adwc #0,r5 ; r5 += C
|
|
moval (r10)+,r3 ; arg-p = pp++
|
|
movl r9,r7 ; arg-ml = ml
|
|
addl3 r0,r0,r2 ; if (d < 0)
|
|
bcs LMMS_DNeg ; switch to >= 2^31 loop
|
|
LMMS_DPos:
|
|
tstl r2 ; set condition codes for entry to subr
|
|
; MultiplyDigit(pp=r3, mm=r1, ml=r7, d=r2, c_hi=r8, c_lo=r5)
|
|
bsbb BMM_MultiplyDigit
|
|
sobgeq r9,LMMS_NLoop
|
|
; r9 = 0, r10 = pp+2*ml, r2 = 2*nn[nl-1], r8 = carry_hi, r11 = nn+nl
|
|
LMMS_Post:
|
|
movl r8,r0
|
|
bneq LMMS_CProp ; if (c != 0)
|
|
LMMS_ret:
|
|
ret ; return
|
|
LMMS_CProp:
|
|
movl 16(ap),r7 ; r7 = nl
|
|
subl3 r7,8(ap),r2 ; r2 = pl-nl (note nl == ml)
|
|
subl2 r7,r2 ; r2 = pl-nl-ml
|
|
bleq LMMS_ret ; if (pl-nl > ml)
|
|
; ret = BnnAddCarry(pp+ml, pl-ml, c);
|
|
LMMS_CPLoop:
|
|
incl (r10)+ ; (*pp++)++
|
|
bcc LMMS_Ret0
|
|
sobgtr r2,LMMS_CPLoop
|
|
ret
|
|
LMMS_Ret0:
|
|
clrl r0
|
|
ret
|
|
|
|
; Subroutine: MultiplyDigit(pp,mm,ml,d,c_hi,c_lo)
|
|
; returns:
|
|
; c_hi*base^(ml+1)+ pp[0..ml] = pp[0..ml]+(mm[0..ml-1]*d)+c_hi*base^ml+c_lo
|
|
;
|
|
; In:
|
|
; ml_entry:r7
|
|
; ml/2: r0
|
|
; mm: r1
|
|
; digit: r2
|
|
; pp: r3
|
|
; c_hi: r8
|
|
; c_lo: r5
|
|
;
|
|
; multiply scratch: r4,r5 / r6,r7
|
|
;
|
|
; Out:
|
|
; c_hi: r8
|
|
LMMD_C_hi:
|
|
addl2 r8,(r3)[r7] ; p[ml] += c_hi
|
|
clrl r8
|
|
adwc #0,r8
|
|
rsb
|
|
LMMD_Zero:
|
|
tstl r5 ; Too complicated, return to
|
|
beql LMMD_ZC_lo ; normal case.
|
|
tstl r2
|
|
brb LMMD_Retry
|
|
LMMD_ZC_lo:
|
|
tstl r8
|
|
bneq LMMD_C_hi
|
|
rsb
|
|
BMM_MultiplyDigit:
|
|
beql LMMD_Zero
|
|
LMMD_Retry:
|
|
blss LMMD_Neg ; if (d<0) goto LMMD_Neg
|
|
ashl #-1,r7,r0 ; loop counter r0 = ml/2
|
|
bitl #1,r7
|
|
bneq LMMD_POddLen ; if (ml is odd) goto LMMD_POddLen
|
|
movl r5,r7
|
|
brb LMMD_PEvenLen ; if (ml is even) goto LMMD_POddLen
|
|
LMMD_PLoop: emul (r1)+,r2,#0,r4 ; r4:r5 = m[i]*d
|
|
bgeq LMMD_MPos1 ; if (m[i] < 0)
|
|
addl2 r2,r5 ; r5 += d
|
|
LMMD_MPos1: addl2 r7,r4 ; r4 = (m[i]*d)%2^32+(m[i-1]*d)/2^32+C
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry1
|
|
addl2 r4,(r3)+ ; *p++ += r4
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry2
|
|
LMMD_POddLen: emul (r1)+,r2,#0,r6 ; r6:r7 = m[i+1]*d
|
|
bgeq LMMD_MPos2 ; if (m[i+1] < 0)
|
|
addl2 r2,r7 ; r7 += d
|
|
LMMD_MPos2: addl2 r5,r6 ; r6 = (m[i+1]*d)%2^32+(m[i]*d)/2^32+C
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry1
|
|
addl2 r6,(r3)+ ; *p++ += r6
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry2
|
|
LMMD_PEvenLen: sobgeq r0,LMMD_PLoop ; if ((i+=2)/2 < ml/2) repeat loop
|
|
addl2 r8,r7
|
|
clrl r8
|
|
adwc #0,r8
|
|
addl2 r7,(r3) ; *p += (m[ml-1]*d)/2^32
|
|
adwc #0,r8
|
|
rsb
|
|
LMMD_Neg:
|
|
ashl #-1,r7,r0 ; loop counter r0 = ml/2
|
|
bitl #1,r7
|
|
bneq LMMD_NOddLen
|
|
movl r5,r7
|
|
brb LMMD_NEvenLen
|
|
LMMD_NLoop: movl (r1)+,r6 ; r6 = m[i]
|
|
emul r6,r2,#0,r4 ; r4:r5 = m[i]*d
|
|
bleq LMMD_MPos3 ; if (m[i] < 0)
|
|
addl2 r2,r5 ; r5 += d
|
|
LMMD_MPos3: addl2 r6,r5 ; r5 += m[i]
|
|
addl2 r7,r4 ; r4 = (m[i]*d)%2^32+(m[i-1]*d)/2^32+C
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry1
|
|
addl2 r4,(r3)+ ; *p++ += r4
|
|
adwc #0,r5 ; r5 = (m[i]*d)/2^32 + carry2
|
|
LMMD_NOddLen: movl (r1)+,r4 ; r6 = m[i+1]
|
|
emul r4,r2,#0,r6 ; r6:r7 = m[i+1]*d
|
|
bleq LMMD_MPos4 ; if (m[i+1] < 0)
|
|
addl2 r2,r7 ; r7 += d
|
|
LMMD_MPos4: addl2 r4,r7 ; r7 += m[i+1]
|
|
addl2 r5,r6 ; r6 = (m[i+1]*d)%2^32+(m[i]*d)/2^32+C
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry1
|
|
addl2 r6,(r3)+ ; *p++ += r6
|
|
adwc #0,r7 ; r7 = (m[i+1]*d)/2^32 + carry2
|
|
LMMD_NEvenLen: sobgeq r0,LMMD_NLoop ; if ((i+=2)/2 < ml/2) repeat loop
|
|
addl2 r8,r7
|
|
clrl r8
|
|
adwc #0,r8
|
|
addl2 r7,(r3) ; *p += (m[ml-1]*d)/2^32
|
|
adwc #0,r8
|
|
rsb
|
|
.end
|