810 lines
20 KiB
NASM
Executable File
810 lines
20 KiB
NASM
Executable File
.FILE "ROUTS.ASM"
|
|
*----------------------------------------------------------------------------
|
|
*RUNTIME SYSTEM ROUTINES
|
|
*
|
|
*COPYRIGHT (C) 1994 BY TV GAMES, INC.
|
|
*ALL RIGHTS RESERVED
|
|
*
|
|
*
|
|
*THIS FILE CONTAINS THE RUNTIME SOURCE CODE TO C ORIENTED OPERATIONS
|
|
*
|
|
*CONTAINED IN THIS FILE ARE THE FOLLOWING FUNCTIONS
|
|
*
|
|
* DIV_F divide floating
|
|
* DIV_I divide integer
|
|
* DIV_U30 divide unsigned
|
|
* INV_F30 inverse floating
|
|
* MOD_I30 modulus integer
|
|
* MOD_U30 modulus unsigned
|
|
* SQRT C callable sqrt()
|
|
*
|
|
*
|
|
|
|
.globl SQRT
|
|
.globl DIV_F,DIV_F30
|
|
.globl DIV_I,DIV_I30
|
|
.globl DIV_U30
|
|
.globl INV_F30
|
|
.globl MOD_I30
|
|
.globl MOD_U30
|
|
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*DIVF Floating point divide function
|
|
*
|
|
*PARAMETERS u in R0, v in R1
|
|
* R0 FL u
|
|
* R1 FL v
|
|
*RETURNS
|
|
* R0 FL R0/R1
|
|
*STATUS Set from result in R0
|
|
*OPERATION Result = (1/v) * u.
|
|
*
|
|
*CLOBBERS R0,R1,BK
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
* DIV_F - Floating point division
|
|
*
|
|
* Algorithm:
|
|
* Given v = a * 2**e
|
|
* x[0] = 1.0 * 2**(-e-1)
|
|
* for (i = 1; i <= 5; i++)
|
|
* x[i] = x[i-1] * (2.0 - v * x[i-1])
|
|
*
|
|
* The single-precision floating-point format is accurate to 6.9
|
|
* decimal places. The single-precision format is accurate to
|
|
* 2**-23 = 1.192E-7, so we would like to have that much accuracy
|
|
* in the final result.
|
|
*
|
|
* The algorithm's error at an iteration i (e[i]) is defined as
|
|
* e[i] = 1 - v * x[i]
|
|
* It can also be shown that e[i+1] = e[i] * e[i].
|
|
*
|
|
* Cycles: 40
|
|
*
|
|
DIV_F:
|
|
DIV_F30
|
|
POP BK ;Pop return address
|
|
PUSH R2 ;Save R2: integer part
|
|
PUSHF R2 ;Save R2: floating point part
|
|
|
|
PUSHF R1 ;SAVE THE SIGN
|
|
PUSHF R0 ;Save u (dividend)
|
|
; LDI R1,AR0 ;Save mantissa of v to remember sign
|
|
ABSF R1 ;The algorithm uses v = |v|.
|
|
|
|
|
|
;
|
|
; Extract the exponent of v.
|
|
;
|
|
PUSHF R1
|
|
POP R2
|
|
ASH -24,R2 ;The 8 LSBs of R2 contain the exponent of v.
|
|
;
|
|
;A few comments on boundary conditions. If e = -128, then v = 0. The
|
|
;following x[0] calculation yields R2 = --128 - 1 = 127 and the algorithm will
|
|
;overflow and saturate since x[0] is large. This seems reasonable. If e =
|
|
;127, the R2 = -127 - 1 = -128. Thus x[0] = 0 and this will cause the
|
|
;algorithm to yield zero. Since the mantissa of v is always between 1 and 2,
|
|
;this is also reasonable. As a result, boundary conditions are handled
|
|
;automatically in a reasonable fashion.
|
|
;
|
|
; x[0] formation given the exponent of v.
|
|
;
|
|
NEGI R2
|
|
SUBI 1,R2 ;Now we have -e-1, the exponent of x[0].
|
|
ASH 24,R2
|
|
PUSH R2
|
|
POPF R2 ;Now R2 = x[0] = 1.0 * 2**(-e-1).
|
|
|
|
;
|
|
;Now the iterations begin.
|
|
;
|
|
MPYF R2,R1,R0 ;R0 = v * x[0]
|
|
SUBRF 2.0,R0 ;R0 = 2.0 - v * x[0]
|
|
MPYF R0,R2 ;R2 = x[1] = x[0] * (2.0 - v * x[0])
|
|
|
|
MPYF R2,R1,R0 ;R0 = v * x[1]
|
|
SUBRF 2.0,R0 ;R0 = 2.0 - v * x[1]
|
|
MPYF R0,R2 ;R2 = x[2] = x[1] * (2.0 - v * x[1])
|
|
|
|
MPYF R2,R1,R0 ;R0 = v * x[2]
|
|
SUBRF 2.0,R0 ;R0 = 2.0 - v * x[2]
|
|
MPYF R0,R2 ;R2 = x[3] = x[2] * (2.0 - v * x[2])
|
|
|
|
MPYF R2,R1,R0 ;R0 = v * x[3]
|
|
SUBRF 2.0,R0 ;R0 = 2.0 - v * x[3]
|
|
MPYF R0,R2 ;R2 = x[4] = x[3] * (2.0 - v * x[3])
|
|
|
|
RND R2 ;This minimizes error in the LSBs.
|
|
|
|
|
|
;
|
|
;For the last iteration we use the formulation:
|
|
;x[5] = (x[4] * (1.0 - (v * x[4]))) + x[4]
|
|
;
|
|
MPYF R2,R1,R0 ;R0 = v * x[4] = 1.0..01.. => 1
|
|
SUBRF 1.0,R0 ;R0 = 1.0 - v * x[4] = 0.0..01... => 0
|
|
MPYF R2,R0 ;R0 = x[4] * (1.0 - v * x[4])
|
|
ADDF R0,R2,R1 ;R0 = x[5] = (x[4]*(1.0-(v*x[4])))+x[4]
|
|
|
|
;
|
|
;R1 contains 1/v. Multiply by u to get result.
|
|
;
|
|
RND R1 ;Round since this is follow by a MPYF.
|
|
POPF R0 ;Pop u
|
|
MPYF R1,R0 ;Result = u * (1/v)
|
|
|
|
;
|
|
;Branch (delayed) return. Use delay slots to negate the result if v < 0.
|
|
;
|
|
|
|
NEGF R0,R1 ;R1 = -(1/|v|)
|
|
POPF R2 ;CHECK ORIGINAL SIGN DUDES... (SETS SIGN FLAG)
|
|
|
|
BD BK ;Delayed branch to return
|
|
LDFN R1,R0 ;If v < 0, then R1 = -R1 (BASED ON POPF R2)
|
|
POPF R2 ;Restore R2: floating point part
|
|
POP R2 ;Restore R2: integer part
|
|
;---->B BK ;BRANCH OCCURS (RETURN)
|
|
*----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*DIVI Integer divide routine (signed)
|
|
*
|
|
*
|
|
*PARAMETERS
|
|
* R0 Signed integer dividend in R0
|
|
* R1 Signed integer divisor in R1
|
|
*
|
|
*OUTPUT R0 / R1 into R0.
|
|
*STATUS Set from result in R0.
|
|
*Registers used R0, R1, AR0, AR1, RC, RS, RE
|
|
*
|
|
*Operation 1. Normalize divisor with dividend
|
|
* 2. Repeat SUBC
|
|
* 3. Quotient is in LSBs of result
|
|
*
|
|
*Cycles 31-62 (depends on amount of normalization)
|
|
*
|
|
*
|
|
.asg AR1,V ;divisor
|
|
.asg R1,TEMP ;float value of operands
|
|
.asg R1,COUNT ;repeat/shift count
|
|
.asg AR0,SIGN ;sign of quotient
|
|
.asg RC,EXP ;divisor exponent
|
|
|
|
DIV_I:
|
|
DIV_I30
|
|
|
|
;
|
|
;Determine sign of result. Get absolute value of operands.
|
|
;
|
|
XOR R0,R1,SIGN ;get the sign
|
|
ABSI R0 ;make dividend positive
|
|
BVD div_32 ;if still negative, escape
|
|
ABSI R1 ;make divisor positive
|
|
LDI R1,V ;save in V
|
|
CMPI R0,V ;divisor > dividend ?
|
|
BHID zero ; if so, return 0
|
|
|
|
;
|
|
;Normalize operands. Use difference in exponents as shift count
|
|
;for divisor, and as repeat count for SUBC.
|
|
;
|
|
FLOAT R1,TEMP ;normalize divisor
|
|
PUSHF TEMP ;push as float
|
|
POP EXP ;pop as int
|
|
|
|
FLOAT R0,TEMP ;normalize dividend
|
|
PUSHF TEMP ;push as float
|
|
POP COUNT ;pop as int
|
|
|
|
LSH -24,EXP ;divisor exponent
|
|
LSH -24,COUNT ;dividend exponent
|
|
SUBI EXP,COUNT ;get difference in exponents
|
|
LSH COUNT,V ;align divisor with dividend
|
|
;
|
|
;Do COUNT+1 subtract & shifts.
|
|
;
|
|
RPTS COUNT
|
|
SUBC V,R0
|
|
;
|
|
; Mask off the lower COUNT+1 bits of R0
|
|
;
|
|
SUBRI 31,COUNT ;shift count is (32 - (COUNT+1))
|
|
LSH COUNT,R0 ;shift left
|
|
NEGI COUNT
|
|
LSH COUNT,R0 ;shift right to get result
|
|
|
|
|
|
|
|
;Check sign and negate result if necessary.
|
|
return
|
|
POP RC ;return address
|
|
NEGI R0,TEMP ;negate result
|
|
BD RC ;delayed branch to return
|
|
CMPI 0,SIGN ;check sign
|
|
LDIN TEMP,R0 ;if set, use negative result
|
|
CMPI 0,R0 ;set status from result
|
|
;----> B RC ;BRANCH OCCURS (RETURN)
|
|
|
|
|
|
;The following code handles cases of a full 32-bit dividend. This occurs
|
|
;when R0 = abs(R0) = 080000000h. Handle this by calling the unsigned divide
|
|
;function, then negating the result if necessary.
|
|
div_32
|
|
PUSH SIGN ;remember sign
|
|
CALL DIV_U30 ;do divide
|
|
POP SIGN ;restore sign
|
|
B return ;return
|
|
|
|
;***Return zero.
|
|
zero
|
|
LDI 0,R0
|
|
RETS
|
|
*----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*DIVU Integer divide routine (unsigned)
|
|
*
|
|
*
|
|
*PARAMETERS Unsigned dividend in R0,
|
|
* unsigned divisor in R1.
|
|
*
|
|
* Output: R0 / R1 into R0.
|
|
* Status: Set from result in R0.
|
|
* Registers used: R0, R1, AR0, AR1, RC, RS, RE
|
|
* Operation: 1. Normalize divisor with dividend
|
|
* 2. Repeat SUBC
|
|
* 3. Quotient is in LSBs of result
|
|
*
|
|
* Cycles: 31-65
|
|
*
|
|
.asg AR1,V ;divisor
|
|
.asg R1, TEMP ;float value of operands
|
|
.asg R1, MSBQ ;MSQ of quotient
|
|
.asg AR0,QMASK ;mask for quotient
|
|
.asg AR0,COUNT ;repeat/shift count
|
|
.asg RC, EXP ;divisor exponent
|
|
|
|
DIV_U30:
|
|
|
|
CMPI R0,R1 ;divisor > dividend ?
|
|
BHI zerob ; if so, return 0
|
|
LDI R1,V ;move divisor to AR1
|
|
|
|
;
|
|
;If top bit of dividend is set, handle specially.
|
|
;
|
|
CMPI 0,R0 ;check top bit
|
|
BLTD div_32b ;get divisor exponent, then jump.
|
|
;
|
|
;Get divisor exponent by converting to float.
|
|
;
|
|
FLOAT V,TEMP ;normalize divisor
|
|
PUSHF TEMP ;push as float
|
|
POP EXP ;pop as int to get exponent
|
|
;
|
|
;31 or less bits in dividend. Get dividend exponent.
|
|
;
|
|
FLOAT R0,TEMP ;normalize dividend
|
|
PUSHF TEMP ;push as float
|
|
POP COUNT ;pop as int to get exponent
|
|
;
|
|
;Use difference in exponents as shift count to line up MSBs.
|
|
;
|
|
LSH -24,COUNT ;divisor exponent
|
|
LSH -24,EXP ;dividend exponent
|
|
SUBI EXP,COUNT ;difference
|
|
LSH COUNT,V ;shift divisor up
|
|
;
|
|
;Do COUNT+1 subtract & shifts.
|
|
;
|
|
RPTS COUNT
|
|
SUBC V,R0
|
|
;
|
|
; Mask off the lower COUNT+1 bits of U and return.
|
|
;
|
|
POP RC ;return address
|
|
SUBRI 31,COUNT ;shift count is (32 - (COUNT+1))
|
|
BD RC ;delayed branch to return
|
|
LSH COUNT,R0 ;shift left
|
|
NEGI COUNT
|
|
LSH COUNT,R0 ;shift right to get result
|
|
;----> B RC ;BRANCH OCCURS (RETURN)
|
|
|
|
;
|
|
;The following code handles cases of a full 32-bit dividend. Before
|
|
;SUBC can be used, the top bit must be cleared (otherwise SUBC can
|
|
;possibly shift a significant 1 out the top of the dividend). This
|
|
;is accomplished by first doing a normal subtraction, then proceeding
|
|
;with SUBCs.
|
|
;
|
|
div_32b
|
|
;
|
|
;If the top bit of the divisor is set too, the quotient is 1.
|
|
;Otherwise, shift the divisor up to line up the MSBs.
|
|
;
|
|
CMPI 0,V ;check divisor
|
|
BLTD one ;if top bit set, quotient is 1
|
|
LSH -24,EXP ;divisor exponent
|
|
SUBRI 31,EXP ;shift count
|
|
LSH EXP,V ;shift up to line up MSBs
|
|
|
|
;
|
|
;Now MSBs are aligned. Do first SUBC by hand, and save off the first
|
|
;quotient digit. Then, shift divisor right rather than shifting dividend
|
|
;left. This leaves a 0 in the top bit of the dividend.
|
|
;
|
|
LDI 1,QMASK ;initialize MSB of quotient
|
|
LSH EXP,QMASK ;create a mask for the MSBs
|
|
SUBI 1,QMASK ;mask is (2 << COUNT) - 1
|
|
|
|
SUBI V,R0,TEMP ;subtract
|
|
LDIHS TEMP,R0 ;if positive, replace dividend
|
|
LDIHS 1,MSBQ ; and set quotient to 1
|
|
LDILO 0,MSBQ ;if negative, set quotient to 0
|
|
LSH EXP,MSBQ ;shift MSB into position
|
|
|
|
LSH -1,V ;shift divisor down
|
|
SUBI 1,EXP ;first iteration is done
|
|
|
|
;
|
|
;Do EXP subtract & shifts.
|
|
;
|
|
RPTS EXP
|
|
SUBC V,R0
|
|
|
|
;
|
|
;MSB of the quotient is in MSBQ. LSBs are in the lower COUNT bits of
|
|
;R0.
|
|
;
|
|
POP RC ;return address
|
|
BD RC ;delayed branch to return
|
|
AND QMASK,R0 ;mask off LSBs
|
|
OR MSBQ,R0 ;MSB of quotient
|
|
NOP
|
|
;----> B RC ;BRANCH OCCURS (RETURN)
|
|
|
|
;
|
|
; Return one.
|
|
;
|
|
one LDI 1,R0
|
|
RETS
|
|
|
|
;
|
|
; Return zero.
|
|
;
|
|
zerob LDI 0,R0
|
|
RETS
|
|
*----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*INVF Floating point inverse function
|
|
*
|
|
*
|
|
*PARAMETERS v in R0
|
|
*RETURNS 1/v in R0
|
|
*Status Not set from result (!!!)
|
|
*Registers used R0, R1, BK
|
|
*
|
|
* Algorithm:
|
|
* Given v = a * 2**e
|
|
* x[0] = 1.0 * 2**(-e-1)
|
|
* for (i = 1;i <= 5;i++)
|
|
* x[i] = x[i-1] * (2.0 - v * x[i-1])
|
|
*
|
|
* The single-precision floating-point format is accurate to 6.9
|
|
* Given v = a * 2**e
|
|
* x[0] = 1.0 * 2**(-e-1)
|
|
* for (i = 1;i <= 5;i++)
|
|
* x[i] = x[i-1] * (2.0 - v * x[i-1])
|
|
*
|
|
* The single-precision floating-point format is accurate to 6.9
|
|
* decimal places. The single-precision format is accurate to
|
|
* 2**-23 = 1.192E-7, so we would like to have that much accuracy
|
|
* in the final result.
|
|
*
|
|
* The algorithm's error at an iteration i (e[i]) is defined as
|
|
* e[i] = 1 - v * x[i]
|
|
* It can also be shown that e[i+1] = e[i] * e[i].
|
|
* Cycles: 36
|
|
*
|
|
INV_F30:
|
|
POP BK ;Pop return address
|
|
|
|
PUSH R2 ;Save R2: integer part
|
|
PUSHF R2 ;Save R2: floating point part
|
|
|
|
PUSHF R0
|
|
ABSF R0 ;The algorithm uses v = |v|.
|
|
|
|
;
|
|
; Extract the exponent of v.
|
|
;
|
|
PUSHF R0
|
|
POP R1
|
|
ASH -24,R1 ;The 8 LSBs of R1 contain the exponent of v.
|
|
;
|
|
;A few comments on boundary conditions. If e = -128, then v = 0. The
|
|
;following x[0] calculation yields R1 = --128 - 1 = 127 and the algorithm will
|
|
;overflow and saturate since x[0] is large. This seems reasonable. If e =
|
|
;127, the R1 = -127 - 1 = -128. Thus x[0] = 0 and this will cause the
|
|
;algorithm to yield zero. Since the mantissa of v is always between 1 and 2,
|
|
;this is also reasonable. As a result, boundary conditions are handled
|
|
;automatically in a reasonable fashion.
|
|
;
|
|
; x[0] formation given the exponent of v.
|
|
;
|
|
NEGI R1
|
|
SUBI 1,R1 ;Now we have -e-1, the exponent of x[0].
|
|
ASH 24,R1
|
|
PUSH R1
|
|
POPF R1 ;Now R1 = x[0] = 1.0 * 2**(-e-1).
|
|
;
|
|
;Now the iterations begin.
|
|
;
|
|
MPYF R1,R0,R2 ;R2 = v * x[0]
|
|
SUBRF 2.0,R2 ;R2 = 2.0 - v * x[0]
|
|
MPYF R2,R1 ;R1 = x[1] = x[0] * (2.0 - v * x[0])
|
|
|
|
MPYF R1,R0,R2 ;R2 = v * x[1]
|
|
SUBRF 2.0,R2 ;R2 = 2.0 - v * x[1]
|
|
MPYF R2,R1 ;R1 = x[2] = x[1] * (2.0 - v * x[1])
|
|
|
|
MPYF R1,R0,R2 ;R2 = v * x[2]
|
|
SUBRF 2.0,R2 ;R2 = 2.0 - v * x[2]
|
|
MPYF R2,R1 ;R1 = x[3] = x[2] * (2.0 - v * x[2])
|
|
|
|
MPYF R1,R0,R2 ;R2 = v * x[3]
|
|
SUBRF 2.0,R2 ;R2 = 2.0 - v * x[3]
|
|
MPYF R2,R1 ;R1 = x[4] = x[3] * (2.0 - v * x[3])
|
|
|
|
RND R1 ;This minimizes error in the LSBs.
|
|
;
|
|
;For the last iteration we use the formulation:
|
|
;x[5] = (x[4] * (1.0 - (v * x[4]))) + x[4]
|
|
;
|
|
MPYF R1,R0,R2 ;R2 = v * x[4] = 1.0..01.. => 1
|
|
SUBRF 1.0,R2 ;R2 = 1.0 - v * x[4] = 0.0..01... => 0
|
|
MPYF R1,R2 ;R2 = x[4] * (1.0 - v * x[4])
|
|
ADDF R2,R1,R0 ;R0 = x[5] = (x[4]*(1.0-(v*x[4])))+x[4]
|
|
;
|
|
;Return (delayed). Use delay slots to negate the result if v < 0.
|
|
;
|
|
|
|
NEGF R0,R1 ;R1 = -(1/|v|)
|
|
POPF R2 ;CHECK ORIGINAL SIGN DUDES... (SETS SIGN FLAG)
|
|
|
|
BD BK ;Delayed branch to return
|
|
LDFN R1,R0 ;If v < 0, then R1 = -R1 (BASED ON POPF R2)
|
|
POPF R2 ;Restore R2: floating point part
|
|
POP R2 ;Restore R2: integer part
|
|
;---->B BK ;BRANCH OCCURS (RETURN)
|
|
*----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*MODI Integer modulo (signed)
|
|
*
|
|
*PARAMETERS Signed integer dividend in R0,
|
|
* Signed integer divisor in R1.
|
|
*Output R0 % R1 into R0.
|
|
*Status Set from result in R0.
|
|
*Registers used R0, R1, AR0, AR1, IR0, IR1
|
|
*
|
|
*Operation 1. Normalize divisor with dividend
|
|
* 2. Repeat SUBC
|
|
* 3. Remainder is in MSBs of result
|
|
*
|
|
* Cycles: 31-62 (depends on normalization)
|
|
*
|
|
.asg AR1,V ;divisor
|
|
.asg R1,TEMP ;float value of operands
|
|
.asg R1,COUNT ;repeat/shift count
|
|
.asg AR0,SIGN ;sign of result
|
|
.asg RC,EXP ;divisor exponent
|
|
|
|
MOD_I30:
|
|
|
|
;
|
|
;Determine sign of result. Get absolute value of operands.
|
|
;
|
|
LDI R0,SIGN ;sign of result same as dividend
|
|
ABSI R0 ;make dividend positive
|
|
BVD mod_32 ;if still negative, escape
|
|
ABSI R1 ;make divisor positive
|
|
LDI R1,V ;save in V
|
|
CMPI R0,V ;divisor > dividend ?
|
|
BHID returnc ; if so, return dividend
|
|
;
|
|
;Normalize operands. Use difference in exponents as shift count
|
|
;for divisor, and as repeat count for SUBC.
|
|
;
|
|
FLOAT R1,TEMP ;normalize divisor
|
|
PUSHF TEMP ;push as float
|
|
POP EXP ;pop as int
|
|
|
|
FLOAT R0,TEMP ;normalize dividend
|
|
PUSHF TEMP ;push as float
|
|
POP COUNT ;pop as int
|
|
|
|
LSH -24,EXP ;get divisor exponent
|
|
LSH -24,COUNT ;get dividend exponent
|
|
SUBI EXP,COUNT ;get difference in exponents
|
|
LSH COUNT,V ;align divisor with dividend
|
|
;
|
|
;Do COUNT+1 subtract & shifts.
|
|
;
|
|
RPTS COUNT
|
|
SUBC V,R0
|
|
;
|
|
; Remainder is in upper bits of R0
|
|
;
|
|
ADDI 1,COUNT ;shift count is -(COUNT+1)
|
|
NEGI COUNT
|
|
LSH COUNT,R0 ;shift right
|
|
;
|
|
; Check sign and negate result if necessary.
|
|
;
|
|
returnc
|
|
POP RC ;return address
|
|
NEGI R0,TEMP ;negate result
|
|
BD RC ;delayed branch to return
|
|
CMPI 0,SIGN ;check sign
|
|
LDIN TEMP,R0 ;if set, use negative result
|
|
CMPI 0,R0 ;set status on result
|
|
;----> B RC ;BRANCH OCCURS (RETURN)
|
|
;
|
|
;The following code handles cases of a full 32-bit dividend. This occurs
|
|
;when R0 = abs(R0) = 080000000h. Handle this by calling the unsigned mod
|
|
;function, then negating the result if necessary.
|
|
;
|
|
mod_32
|
|
PUSH SIGN ;remember sign
|
|
CALL MOD_U30 ;do divide
|
|
POP SIGN ;restore sign
|
|
B return ;return
|
|
*----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*MODU Integer modulo (unsigned)
|
|
*
|
|
*PARAMETERS Unsigned dividend in R0,
|
|
* unsigned divisor in R1.
|
|
*
|
|
*Output R0 % R1 into R0.
|
|
*Status Set from result in R0.
|
|
*Registers used R0, R1, AR0, AR1, RC, RS, RE
|
|
*Operation 1. Normalize divisor with dividend
|
|
* 2. Repeat SUBC
|
|
* 3. Remainder is in MSBs of result
|
|
*
|
|
*Cycles 31-60
|
|
*
|
|
.asg AR1,V ;divisor
|
|
.asg R1,TEMP ;float value of operands
|
|
.asg AR0,COUNT ;repeat/shift count
|
|
.asg RC,EXP ;divisor exponent
|
|
|
|
MOD_U30:
|
|
|
|
CMPI R0,R1 ;divisor > dividend ?
|
|
BHI zerob ; if so, return dividend
|
|
LDI R1,V ;load divisor
|
|
;
|
|
;If top bit of dividend is set, handle specially.
|
|
;
|
|
CMPI 0,R0 ;check top bit
|
|
BLTD mod_32c ;get divisor exponent, then jump.
|
|
;
|
|
;Get divisor exponent by converting to float.
|
|
;
|
|
FLOAT V,TEMP ;normalize divisor
|
|
PUSHF TEMP ;push as float
|
|
POP EXP ;pop as int to get exponent
|
|
;
|
|
;31 or less bits in dividend. Get dividend exponent.
|
|
;
|
|
FLOAT R0,TEMP ;normalize dividend
|
|
PUSHF TEMP ;push as float
|
|
POP COUNT ;pop as int to get exponent
|
|
;
|
|
;Use difference in exponents as shift count to line up MSBs.
|
|
;
|
|
LSH -24,EXP ;divisor exponent
|
|
LSH -24,COUNT ;dividend exponent
|
|
SUBI EXP,COUNT ;difference
|
|
LSH COUNT,V ;shift divisor up
|
|
;
|
|
;Do COUNT+1 subtract & shifts.
|
|
;
|
|
RPTS COUNT
|
|
SUBC V,R0
|
|
;
|
|
; Remainder is in upper 31-COUNT bits.
|
|
;
|
|
POP RC ;return address
|
|
BD RC ;delayed branch to return
|
|
ADDI 1,COUNT ;shift count is COUNT+1
|
|
NEGI COUNT ;negate for right shift
|
|
LSH COUNT,R0 ;shift to get result
|
|
;----> B RC ;BRANCH OCCURS (RETURN)
|
|
|
|
;
|
|
;The following code handles cases of a full 32-bit dividend. Before
|
|
;SUBC can be used, the top bit must be cleared (otherwise SUBC can
|
|
;possibly shift a significant 1 out the top of the dividend). This
|
|
;is accomplished by first doing a normal subtraction, then proceeding
|
|
;with SUBCs.
|
|
;
|
|
mod_32c
|
|
;
|
|
;If the top bit of the divisor is set too, the remainder is simply
|
|
;the difference between the dividend and divisor. Otherwise, shift
|
|
;the divisor up to line up the MSBs.
|
|
;
|
|
CMPI 0,V ;check divisor
|
|
BLTD onec ;if negative, remainder is diff
|
|
|
|
LSH -24,EXP ;divisor exponent
|
|
SUBRI 31,EXP ;shift count = 31 - exp
|
|
NEGI EXP,COUNT ;used later as shift count
|
|
LSH EXP,V ;shift up to line up MSBs
|
|
|
|
;
|
|
;Now MSBs are aligned. Do first SUBC by hand using a plain subtraction.
|
|
;Then, shift divisor right rather than shifting dividend left. This leaves
|
|
;a 0 in the top bit of the dividend.
|
|
;
|
|
SUBI V,R0,TEMP ;subtract
|
|
LDIHS TEMP,R0 ;if positive, replace dividend
|
|
SUBI 1,EXP ;first iteration is done
|
|
LSH -1,V ;shift divisor down
|
|
|
|
;
|
|
;Do EXP subtract & shifts.
|
|
;
|
|
RPTS EXP
|
|
SUBC V,R0
|
|
|
|
;
|
|
;Quotient is in EXP+1 LSBs;shift remainder (in MSBs) down.
|
|
;
|
|
LSH COUNT,R0 ;COUNT contains -(EXP+1)
|
|
RETS
|
|
|
|
;
|
|
; Return (dividend - divisor).
|
|
;
|
|
onec SUBI R1,R0
|
|
RETS
|
|
|
|
;
|
|
; Return dividend.
|
|
;
|
|
zeroc CMPI 0,R0 ;set status from result
|
|
RETS
|
|
*----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
*----------------------------------------------------------------------------
|
|
*double sqrt(double x)
|
|
*SQRT Square Root
|
|
*
|
|
*PARAMETERS
|
|
* R2 float x
|
|
*
|
|
*RETURNS
|
|
* R0 float square root of x
|
|
* If x <= 0 returns x
|
|
*
|
|
*CLOBBERS
|
|
* R0,R1,R2,R3
|
|
*
|
|
*The algorithm is from the TMS320C30 User's Guide, p. 11-30
|
|
*
|
|
*This has been modified from the C version such that _errno
|
|
*is not set (nor does it exist).
|
|
*
|
|
*
|
|
*CYCLES
|
|
* 52 (64 OUT OF CACHE (PUSHES))
|
|
*
|
|
|
|
|
|
SQRT:
|
|
LDF R2,R0
|
|
RETSLE ;return the value if <= 0
|
|
|
|
PUSH R1
|
|
PUSHF R1
|
|
PUSH R2
|
|
PUSHF R2
|
|
|
|
MPYF 2.0,R2 ;add a rounding bit in exponent
|
|
PUSHF R2 ;push x as float
|
|
POP R1 ;pop as int
|
|
ASH -25,R1 ;e = exponent(x) / 2
|
|
|
|
;
|
|
;determine initial estimate .25 * 2**(-e/2)
|
|
;
|
|
NEGI R1 ;negate exponent
|
|
ASH 24,R1 ;shift into place
|
|
PUSH R1 ;push as int
|
|
POPF R1 ;pop as float
|
|
MPYF 0.25,R2 ;remove rounding bit
|
|
|
|
;
|
|
;iterate 5 times
|
|
;
|
|
MPYF R1,R1,R0 ;R0 = x[0] * x[0]
|
|
MPYF R2,R0 ;R0 = (v/2) * x[0] * x[0]
|
|
SUBRF 1.5,R0 ;R0 = 1.5 - (v/2) * x[0] * x[0]
|
|
MPYF R0,R1 ;x[1] = x[0] * (1.5 - v/2 * x[0] * x[0])
|
|
|
|
;2
|
|
RND R1
|
|
MPYF R1,R1,R0 ;R0 = x[1] * x[1]
|
|
MPYF R2,R0 ;R0 = (v/2) * x[1] * x[1]
|
|
SUBRF 1.5,R0 ;R0 = 1.5 - (v/2) * x[1] * x[1]
|
|
MPYF R0,R1 ;x[2] = x[1] * (1.5 - v/2 * x[1] * x[1])
|
|
|
|
;3
|
|
RND R1
|
|
MPYF R1,R1,R0 ;R0 = x[2] * x[2]
|
|
MPYF R2,R0 ;R0 = (v/2) * x[2] * x[2]
|
|
SUBRF 1.5,R0 ;R0 = 1.5 - (v/2) * x[2] * x[2]
|
|
MPYF R0,R1 ;x[3] = x[2] * (1.5 - v/2 * x[2] * x[2])
|
|
|
|
;4
|
|
RND R1
|
|
MPYF R1,R1,R0 ;R0 = x[3] * x[3]
|
|
MPYF R2,R0 ;R0 = (v/2) * x[3] * x[3]
|
|
SUBRF 1.5,R0 ;R0 = 1.5 - (v/2) * x[3] * x[3]
|
|
MPYF R0,R1 ;x[4] = x[3] * (1.5 - v/2 * x[3] * x[3])
|
|
|
|
;5
|
|
RND R1
|
|
MPYF R1,R1,R0 ;R0 = x[4] * x[4]
|
|
MPYF R2,R0 ;R0 = (v/2) * x[4] * x[4]
|
|
SUBRF 1.5,R0 ;R0 = 1.5 - (v/2) * x[4] * x[4]
|
|
MPYF R0,R1 ;x[5] = x[4] * (1.5 - v/2 * x[4] * x[4])
|
|
|
|
RND R1
|
|
|
|
POPF R2
|
|
POP R2
|
|
|
|
MPYF R2,R1,R0 ;sqrt(x) = x * sqrt(1/x)
|
|
|
|
POPF R1
|
|
POP R1
|
|
RETS
|
|
*----------------------------------------------------------------------------
|
|
.END
|
|
|