! Copyright Digital Equipment Corporation 1991 ! Last modified_on Fri Jan 25 23:11:58 GMT+1:00 1991 by shand ! ! KerN for SPARC ! Mark Shand ! ! Implementation notes: ! ! Initial implementations of sparc offer very limited support for ! integer multiplication, so BnnMultiplyDigit is based on ! double precision floating point multiplies that compute ! a 16x32->48 bit result without round-off. Performance is ! not great, but is about twice as good as using the integer ! multiply primitives directly. ! ! BnnDivideDigit uses the unmodified assembly code produced ! by cc -O2 KerN.c ! .seg "text" ! [internal] .proc 16 .global _BnnSetToZero _BnnSetToZero: deccc %o1 bneg LBSZ3 ! is zero andcc 1,%o1,%o2 be LBSZ2 ! is odd nop dec 4,%o0 LBSZ1: ! [internal] inc 8,%o0 st %g0,[%o0-4] LBSZ2: deccc 2,%o1 bpos LBSZ1 st %g0,[%o0] LBSZ3: retl nop ! [internal] ! ! .proc 16 .global _BnnAssign _BnnAssign: cmp %o0,%o1 bgt,a LBAG2 ! if(mm >= nn) goto LBAG2 tst %o2 be LBAGX tst %o2 be LBAGX ! if(nl==0) return nop LBAG1: ld [%o1],%o3 inc 4,%o1 st %o3,[%o0] deccc %o2 bgt LBAG1 inc 4,%o0 LBAGX: retl nop LBAG2: be LBAGX ! if(nl==0) return sll %o2,2,%o3 ! nl <<= 2 add %o1,%o3,%o1 ! nn += nl add %o0,%o3,%o0 ! mm += nl LBAG3: dec 4,%o1 ld [%o1],%o3 ! %o3 = *--nn dec 4,%o0 deccc %o2 bgt LBAG3 st %o3,[%o0] ! *--mm = %o3 retl nop ! ! .proc 16 .global _BnnSetDigit _BnnSetDigit: retl st %o1,[%o0] ! ! .proc 14 .global _BnnGetDigit _BnnGetDigit: retl ld [%o0],%o0 ! ! .proc 14 .global _BnnNumDigits _BnnNumDigits: tst %o1 sll %o1,2,%o3 be LBND2 add %o0,%o3,%o4 dec 4,%o4 LBND1: ld [%o4],%o2 tst %o2 bne LBND2 deccc %o1 bne,a LBND1 dec 4,%o4 LBND2: retl add 1,%o1,%o0 ! ! .proc 14 .global _BnnNumLeadingZeroBitsInDigit _BnnNumLeadingZeroBitsInDigit: addcc %o0,%g0,%o5 ! %o5 = d be LBLZX ! if(!d) goto BLZX sethi %hi(0xffff0000),%o1 ! mask = 0xffff0000 mov 1,%o0 ! p = 1 andcc %o1,%o5,%g0 ! mask & d bne LBLZ1 sll %o1,8,%o1 sll %o5,16,%o5 or 16,%o0,%o0 LBLZ1: andcc %o1,%o5,%g0 ! mask & d bne LBLZ2 sll %o1,4,%o1 sll %o5,8,%o5 or 8,%o0,%o0 LBLZ2: andcc %o1,%o5,%g0 ! mask & d bne LBLZ3 sll %o1,2,%o1 sll %o5,4,%o5 or 4,%o0,%o0 LBLZ3: andcc %o1,%o5,%g0 ! mask & d bne LBLZ4 nop sll %o5,2,%o5 or 2,%o0,%o0 LBLZ4: srl %o5,31,%o5 ! %o5 = (d & 0x80000000) != 0 retl xor %o0,%o5,%o0 LBLZX: retl mov 32,%o0 .proc 4 .global _BnnDoesDigitFitInWord _BnnDoesDigitFitInWord: retl mov 1,%o0 .proc 4 .global _BnnIsDigitZero _BnnIsDigitZero: tst %o0 bne,a LBDZ0 mov 0,%o1 mov 1,%o1 LBDZ0: retl add %g0,%o1,%o0 .proc 4 .global _BnnIsDigitNormalized _BnnIsDigitNormalized: retl srl %o0,31,%o0 .proc 4 .global _BnnIsDigitOdd _BnnIsDigitOdd: retl and %o0,1,%o0 .proc 4 .global _BnnCompareDigits _BnnCompareDigits: cmp %o0,%o1 bleu LBCD1 mov -1,%o0 retl mov 1,%o0 LBCD1: ! [internal] be,a LBCD2 mov 0,%o0 LBCD2: retl nop ! [internal] .proc 16 .global _BnnComplement _BnnComplement: deccc %o1 bneg LE129 nop LY11: ! [internal] ld [%o0],%o2 xor %o2,-1,%o2 st %o2,[%o0] deccc %o1 bpos LY11 inc 4,%o0 LE129: retl nop ! [internal] .proc 16 .global _BnnAndDigits _BnnAndDigits: ld [%o0],%o2 and %o2,%o1,%o2 retl st %o2,[%o0] .proc 16 .global _BnnOrDigits _BnnOrDigits: ld [%o0],%o2 or %o2,%o1,%o2 retl st %o2,[%o0] .proc 16 .global _BnnXorDigits _BnnXorDigits: ld [%o0],%o2 xor %o2,%o1,%o2 retl st %o2,[%o0] .proc 14 .global _BnnShiftLeft _BnnShiftLeft: tst %o2 be L77105 mov 0,%o4 deccc %o1 mov 32,%o3 bneg L77105 sub %o3,%o2,%o3 LY12: ! [internal] ld [%o0],%o5 sll %o5,%o2,%g1 or %g1,%o4,%g1 st %g1,[%o0] deccc %o1 srl %o5,%o3,%o4 bpos LY12 inc 4,%o0 L77105: retl add %g0,%o4,%o0 .proc 14 .global _BnnShiftRight _BnnShiftRight: tst %o2 be L77114 mov 0,%o4 sll %o1,2,%g1 deccc %o1 mov 32,%o3 add %o0,%g1,%o0 bneg L77114 sub %o3,%o2,%o3 LY13: ! [internal] dec 4,%o0 ld [%o0],%o5 srl %o5,%o2,%g2 or %g2,%o4,%g2 deccc %o1 sll %o5,%o3,%o4 bpos LY13 st %g2,[%o0] L77114: retl add %g0,%o4,%o0 .proc 14 .global _BnnAddCarry ! (mm, ml, car) _BnnAddCarry: tst %o2 be LBACX0 ! if(car == 0) return(0); tst %o1 be LBACX1 ! if(nl == 0) return(1); nop LBACL: ld [%o0],%o3 inccc %o3 bcc LBACX0 st %o3,[%o0] deccc %o1 bgt LBACL inc 4,%o0 LBACX1: retl mov 1,%o0 LBACX0: retl mov 0,%o0 .proc 14 .global _BnnAdd ! (mm ml nn nl car) _BnnAdd: sub %o1,%o3,%o1 ! ml -= nl tst %o3 be,a _BnnAddCarry ! if (nl == 0) %o2 = car; goto AddCarry mov %o4,%o2 LBAD1: ld [%o2],%o5 ! o5 = *nn addcc -1,%o4,%g0 ! set C = carin ld [%o0],%o4 ! o4 = *mm inc 4,%o2 addxcc %o5,%o4,%o5 ! o5 = *mm + *nn, C = carout addx %g0,%g0,%o4 ! o4 = carout st %o5,[%o0] deccc %o3 bne LBAD1 inc 4,%o0 b _BnnAddCarry mov %o4,%o2 .proc 14 .global _BnnSubtractBorrow ! (mm, ml, car) _BnnSubtractBorrow: tst %o2 bne LSBBX1 ! if(car == 1) return(1); tst %o1 be LSBBX0 ! if(nl == 0) return(0); nop LSBBL: ld [%o0],%o3 deccc %o3 bcc LSBBX1 st %o3,[%o0] deccc %o1 bgt LSBBL inc 4,%o0 LSBBX0: retl mov 0,%o0 LSBBX1: retl mov 1,%o0 .proc 14 .global _BnnSubtract ! (mm ml nn nl car) _BnnSubtract: sub %o1,%o3,%o1 ! ml -= nl tst %o3 be,a _BnnSubtractBorrow ! if (nl == 0) %o2 = car; goto SubBorrow mov %o4,%o2 LSUB1: ld [%o2],%o5 ! o5 = *nn deccc %o4 ! set C = carin ld [%o0],%o4 ! o4 = *mm inc 4,%o2 subxcc %o4,%o5,%o5 ! o5 = *mm + *nn, C = carout mov 1,%o4 subx %o4,%g0,%o4 ! o4 = carout st %o5,[%o0] deccc %o3 bne LSUB1 inc 4,%o0 b _BnnSubtractBorrow mov %o4,%o2 .proc 14 .global _BnnMultiplyDigit ! (pp pl mm ml d) ! Assembler version of BnnMultiplyDigit is derived from the ! following code. ! ! BigNumCarry ! BnnMultiplyDigit(pp, pl, mm, ml, d) ! register BigNum pp, mm; ! int pl, ml; ! BigNumDigit d; ! { ! register double fd, lowAlignR; ! register BigNumDigit carry = 0; ! ! fd = (double) d; ! lowAlignR = (65536.0*65536.0*65536.0*16.0); ! ! pl -= ml; ! ! while (ml--) ! { ! BigNumDigit md, pd; ! register BigNumDigit tmp0, tmp1; ! register double fmh, fml; ! double fmlxd, fmhxd; ! ! md = *mm++; ! pd = *pp; ! fml = (double) (int) (md & 0xffff); ! fmh = (double) (int) (md >> 16); ! fmlxd = fd*fml + lowAlignR; ! fmhxd = fd*fmh + lowAlignR; ! pd += carry; ! carry = (pd < carry); ! tmp0 = ((unsigned long *)(&fmlxd))[1]; ! carry += (((unsigned long *)(&fmlxd))[0]) &0xffff; ! if ((pd += tmp0) < tmp0) carry++; ! tmp0 = ((unsigned long *)(&fmhxd))[1]; ! tmp1 = tmp0 << 16; ! if ((pd += tmp1) < tmp1) carry++; ! carry += (tmp0 >> 16); ! carry += (((unsigned long *)(&fmhxd))[0]) << 16; ! /* assert carry:pd = d*md + *pp + carry(in) */ ! *pp++ = pd; ! } ! ! while (carry && pl--) ! { ! BigNumDigit pd; ! ! pd = *pp; ! carry = (pd += carry) < carry; ! *pp++ = pd; ! } ! return carry; ! } _BnnMultiplyDigit: !#PROLOGUE# 0 !#PROLOGUE# 1 save %sp,-120,%sp ! establish stack frame st %i4,[%sp+LP61+32] ! mem = d ld [%sp+LP61+32],%f0 ! f0 = d fitod %f0,%f26 ! f26 = (double) d mov 0,%i5 ! carry = 0 tst %i4 ! if (i >= 0) bge LBMD1 ! goto LBMD1 sethi %hi(L2pwr32),%o0 ldd [%o0+%lo(L2pwr32)],%f4 ! f4 = 2^32 faddd %f26,%f4,%f26 ! f26 += 2^32 LBMD1: sethi %hi(L2pwr52),%o1 ldd [%o1+%lo(L2pwr52)],%f24 ! f24 = 2^52 tst %i3 ! ml? be LBMDExit ! if (ml == 0) sub %i1,%i3,%i1 ! goto LBMDExit; pl -= ml st %g0,[%sp+LP61+32] ! clr [%sp+LP61+32] LBMDpxdLoop: ld [%i2],%o4 ! o4 = md = *mm sth %o4,[%sp+LP61+34] ! o4 & 0xffff -> mem ld [%sp+LP61+32],%f7 ! f7 <- mem fitod %f7,%f30 ! fml = (double) (md & 0xffff) srl %o4,16,%o4 ! o4 = md >> 16 st %o4,[%sp+LP61+32] ! i4 -> mem ! fitod + 8 cycles. f30 ready on SS1 fmuld %f26,%f30,%f12 ! f12 = fd * fml ld [%sp+LP61+32],%f9 ! f9 <- mem fitod %f9,%f28 ! fmh = (double) (md >> 16) ld [%i0],%l7 ! pd = l7 = *pp inc 4,%i2 ! mm++ inc 4,%i0 ! pp++ ! fmuld + 10 fitod + 6. f28 ready, mul/add unit available on SS1 faddd %f12,%f24,%f14 ! f14 = f12 + 2^52 fmuld %f26,%f28,%f16 ! f16 = fd * fmh addcc %l7,%i5,%i4 ! pd += carry{in} ! 1 cycle stall of faddd st %f15,[%fp-4] ! fmlxd[low] = f15 ! fmuld + 9. f16 ready on SS1 faddd %f16,%f24,%f18 ! f18 = f16 + 2^52 st %f14,[%fp-8] ! fmlxd[high] = f14 ld [%fp-4],%l7 ! tmp0 = l7 = fmlxd[low] lduh [%fp-6],%i5 ! i5 = fmlxd[high] & 0xffff addx %g0,%i5,%i5 ! carry = (fmlxd[high] & 0xffff)+C addcc %i4,%l7,%l7 ! pd += tmp0 st %f18,[%fp-16] ! fmhxd = f18 ld [%fp-16],%o4 ! o4 = fmhxd[high] st %f19,[%fp-12] ! fmhxd = f18 ld [%fp-12],%o5 ! o5 = fmhxd[low] sll %o5,16,%l3 ! l3 = fmhxd[low] << 16 srl %o5,16,%o5 ! o5 = fmhxd[low] >> 16 addx %i5,%o5,%i5 ! carry += (fmhxd[low] >> 16) + C addcc %l7,%l3,%l7 ! pd += fmhxd[low] << 16 sll %o4,16,%l3 ! l3 = fmlxd[high] << 16 addx %i5,%l3,%i5 ! carry += fmlxd[high] << 16 + C deccc %i3 ! ml-- bne LBMDpxdLoop ! if (ml > 0) st %l7,[%i0-4] ! goto LBMDpxdLoop; pp[-1] = pd tst %i5 be LBMDExit ! if (!carry) nop ! goto LBMDExit LBMDacLoop: deccc %i1 blt LBMDExit ld [%i0],%i3 addcc %i3,%i5,%i3 addxcc %g0,%g0,%i5 st %i3,[%i0] bne LBMDacLoop inc 4,%i0 LBMDExit: ret restore %g0,%i5,%o0 LP61 = 64 .seg "data" ! [internal] .align 8 L2pwr32: .word 0x41f00000 .word 0 .align 8 L2pwr52: .word 0x43300000 .word 0 .seg "text" .proc 14 .global _BnnDivideDigit _BnnDivideDigit: !#PROLOGUE# 0 !#PROLOGUE# 1 save %sp,-112,%sp call _BnnNumLeadingZeroBitsInDigit,1 mov %i3,%o0 mov %o0,%o2 tst %o2 be L77225 st %o2,[%fp-8] ld [%i0-4],%o4 st %i2,[%fp-16] st %o4,[%fp-12] mov %i2,%o1 mov %i1,%o0 call _BnnShiftLeft,3 sll %i3,%o2,%i3 L77225: sub %i2,1,%l2 sethi %hi(0xffff),%o1 ! [internal] or %o1,%lo(0xffff),%o1 ! [internal] sll %i2,2,%l3 add %i1,%l3,%l3 dec 4,%l3 ld [%l3],%i2 and %i3,%o1,%l1 sll %l2,2,%l4 tst %l2 srl %i3,16,%l6 mov %o1,%l0 sll %l1,16,%l7 add %i0,%l4,%l4 be L77249 add %l6,1,%l5 LY43: ! [internal] dec 4,%l3 ld [%l3],%i4 mov %i2,%i5 mov %i5,%o0 call .udiv,2 mov %l6,%o1 mov %o0,%i1 mov %l1,%o0 call .umul,2 mov %i1,%o1 mov %o0,%i2 mov %l6,%o0 call .umul,2 mov %i1,%o1 srl %i2,16,%i0 add %o0,%i0,%i0 cmp %i0,%i5 dec %l2 bgu L77232 sll %i2,16,%i2 cmp %i0,%i5 bne LY57 cmp %i2,%i4 LY54: ! [internal] bleu,a LY57 cmp %i2,%i4 L77232: cmp %l7,%i2 LY55: ! [internal] bleu L77234 dec %i1 sub %i2,%l7,%i2 b L77228 sub %i0,%l5,%i0 LY56: ! [internal] ld [%fp-4],%o3 ld [%fp+68],%i0 ld [%fp+80],%o1 dec 4,%o0 ld [%o0],%o0 sll %o3,32,%o3 call .udiv,2 or %o3,%o0,%o0 dec 4,%i0 st %o0,[%i0] ld [%fp+76],%o0 tst %o0 bne,a LY56 ld [%fp+72],%o0 b L77259 ld [%fp-4],%i2 L77234: sub %i0,%l6,%i0 sub %i2,%l7,%i2 L77228: cmp %i0,%i5 bgu,a LY55 cmp %l7,%i2 cmp %i0,%i5 be LY54 cmp %i2,%i4 LY57: ! [internal] bleu LY47 sub %i4,%i2,%i4 inc %i0 LY47: ! [internal] sub %i5,%i0,%i5 sll %i5,16,%o0 srl %i4,16,%o7 sll %i1,16,%i1 dec 4,%l4 st %i1,[%l4] mov %l6,%o1 or %o0,%o7,%o0 call .udiv,2 nop mov %o0,%i1 mov %l1,%o0 call .umul,2 mov %i1,%o1 mov %o0,%i2 mov %l6,%o0 call .umul,2 mov %i1,%o1 mov %o0,%i0 srl %i2,16,%o0 add %i0,%o0,%i0 and %i0,%l0,%o2 srl %i0,16,%i0 cmp %i0,%i5 sll %o2,16,%o2 and %i2,%l0,%i2 bgu L77244 or %i2,%o2,%i2 cmp %i0,%i5 bne,a LY53 ld [%l4],%o1 cmp %i2,%i4 LY51: ! [internal] bleu,a LY53 ld [%l4],%o1 L77244: cmp %i3,%i2 LY52: ! [internal] bleu L77246 dec %i1 sub %i2,%i3,%i2 b L77240 dec %i0 L77246: sub %i2,%i3,%i2 L77240: cmp %i0,%i5 bgu,a LY52 cmp %i3,%i2 cmp %i0,%i5 be,a LY51 cmp %i2,%i4 ld [%l4],%o1 LY53: ! [internal] tst %l2 or %o1,%i1,%o1 sub %i4,%i2,%i2 bne LY43 st %o1,[%l4] L77249: ld [%fp-8],%o2 tst %o2 be,a LY50 ld [%fp-8],%o1 cmp %l4,%l3 bleu,a LY49 cmp %l4,%l3 ld [%fp-16],%o4 sll %o4,2,%o4 add %l3,%o4,%o4 cmp %l4,%o4 bcc,a LY49 cmp %l4,%l3 sub %l4,%l3,%i0 sra %i0,2,%i0 mov %i0,%o1 call _BnnShiftRight,3 mov %l3,%o0 ld [%fp-12],%o4 dec %i0 sll %i0,2,%i0 b L77258 st %o4,[%l3+%i0] LY49: ! [internal] bne,a LY48 ld [%fp-16],%o1 ld [%fp-16],%o0 mov 1,%o1 dec %o0 sll %o0,2,%o0 b LY42 add %l3,%o0,%o0 LY48: ! [internal] mov %l3,%o0 LY42: ! [internal] call _BnnShiftRight,3 ld [%fp-8],%o2 L77258: ld [%fp-8],%o1 LY50: ! [internal] srl %i2,%o1,%i2 L77259: ret restore %g0,%i2,%o0 .seg "data" ! [internal] _copyright: .half 0x4028 .half 0x2329 .half 0x4b65 .half 0x724e .half 0x2e63 .half 0x3a20 .half 0x636f .half 0x7079 .half 0x7269 .half 0x6768 .half 0x7420 .half 0x4469 .half 0x6769 .half 0x7461 .half 0x6c20 .half 0x4571 .half 0x7569 .half 0x706d .half 0x656e .half 0x7420 .half 0x436f .half 0x7270 .half 0x6f72 .half 0x6174 .half 0x696f .half 0x6e20 .half 0x2620 .half 0x494e .half 0x5249 .half 0x4120 .half 0x3139 .half 0x3838 .half 0x2c20 .half 0x3139 .half 0x3839 .half 0xa00