ocaml/otherlibs/num/bng_amd64.c

196 lines
6.3 KiB
C

/***********************************************************************/
/* */
/* Objective Caml */
/* */
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
/* */
/* Copyright 2003 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the GNU Library General Public License, with */
/* the special exception on linking described in file ../../LICENSE. */
/* */
/***********************************************************************/
/* $Id$ */
/* Code specific to the AMD x86_64 architecture. */
#define BngAdd2(res,carryout,arg1,arg2) \
asm("xorl %1, %1 \n\t" \
"addq %3, %0 \n\t" \
"setc %b1" \
: "=r" (res), "=&q" (carryout) \
: "0" (arg1), "rm" (arg2))
#define BngSub2(res,carryout,arg1,arg2) \
asm("xorl %1, %1 \n\t" \
"subq %3, %0 \n\t" \
"setc %b1" \
: "=r" (res), "=&q" (carryout) \
: "0" (arg1), "rm" (arg2))
#define BngMult(resh,resl,arg1,arg2) \
asm("mulq %3" \
: "=a" (resl), "=d" (resh) \
: "a" (arg1), "r" (arg2))
#define BngDiv(quo,rem,nh,nl,d) \
asm("divq %4" \
: "=a" (quo), "=d" (rem) \
: "a" (nl), "d" (nh), "r" (d))
/* Reimplementation in asm of some of the bng operations. */
static bngcarry bng_amd64_add
(bng a/*[alen]*/, bngsize alen,
bng b/*[blen]*/, bngsize blen,
bngcarry carry)
{
bngdigit tmp;
alen -= blen;
if (blen > 0) {
asm("negb %b3 \n\t"
"1: \n\t"
"movq (%0), %4 \n\t"
"adcq (%1), %4 \n\t"
"movq %4, (%0) \n\t"
"leaq 8(%0), %0 \n\t"
"leaq 8(%1), %1 \n\t"
"decq %2 \n\t"
"jnz 1b \n\t"
"setc %b3"
: "=r" (a), "=r" (b), "=r" (blen), "=q" (carry), "=r" (tmp)
: "0" (a), "1" (b), "2" (blen), "3" (carry));
}
if (carry == 0 || alen == 0) return carry;
do {
if (++(*a) != 0) return 0;
a++;
} while (--alen);
return 1;
}
static bngcarry bng_amd64_sub
(bng a/*[alen]*/, bngsize alen,
bng b/*[blen]*/, bngsize blen,
bngcarry carry)
{
bngdigit tmp;
alen -= blen;
if (blen > 0) {
asm("negb %b3 \n\t"
"1: \n\t"
"movq (%0), %4 \n\t"
"sbbq (%1), %4 \n\t"
"movq %4, (%0) \n\t"
"leaq 8(%0), %0 \n\t"
"leaq 8(%1), %1 \n\t"
"decq %2 \n\t"
"jnz 1b \n\t"
"setc %b3"
: "=r" (a), "=r" (b), "=r" (blen), "=q" (carry), "=r" (tmp)
: "0" (a), "1" (b), "2" (blen), "3" (carry));
}
if (carry == 0 || alen == 0) return carry;
do {
if ((*a)-- != 0) return 0;
a++;
} while (--alen);
return 1;
}
static bngdigit bng_amd64_mult_add_digit
(bng a/*[alen]*/, bngsize alen,
bng b/*[blen]*/, bngsize blen,
bngdigit d)
{
bngdigit out;
bngcarry carry;
alen -= blen;
out = 0;
if (blen > 0) {
asm("1: \n\t"
"movq (%1), %%rax \n\t"
"mulq %7\n\t" /* rdx:rax = d * next digit of b */
"addq (%0), %%rax \n\t" /* add next digit of a to rax */
"adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
"addq %3, %%rax \n\t" /* add out to rax */
"adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
"movq %%rax, (%0) \n\t" /* rax is next digit of result */
"movq %%rdx, %3 \n\t" /* rdx is next out */
"leaq 8(%0), %0 \n\t"
"leaq 8(%1), %1 \n\t"
"decq %2 \n\t"
"jnz 1b"
: "=&r" (a), "=&r" (b), "=&r" (blen), "=&r" (out)
: "0" (a), "1" (b), "2" (blen), "rm" (d), "3" (out)
: "rax", "rdx");
}
if (alen == 0) return out;
/* current digit of a += out */
BngAdd2(*a, carry, *a, out);
a++;
alen--;
/* Propagate carry */
if (carry == 0 || alen == 0) return carry;
do {
if (++(*a) != 0) return 0;
a++;
} while (--alen);
return 1;
}
static bngdigit bng_amd64_mult_sub_digit
(bng a/*[alen]*/, bngsize alen,
bng b/*[blen]*/, bngsize blen,
bngdigit d)
{
bngdigit out, tmp;
bngcarry carry;
alen -= blen;
out = 0;
if (blen > 0) {
asm("1: \n\t"
"movq (%1), %%rax \n\t"
"movq (%0), %4 \n\t"
"mulq %8\n\t" /* rdx:rax = d * next digit of b */
"subq %%rax, %4 \n\t" /* subtract rax from next digit of a */
"adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
"subq %3, %4 \n\t" /* subtract out */
"adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
"movq %4, (%0) \n\t" /* store next digit of result */
"movq %%rdx, %3 \n\t" /* rdx is next out */
"leaq 8(%0), %0 \n\t"
"leaq 8(%1), %1 \n\t"
"decq %2 \n\t"
"jnz 1b"
: "=&r" (a), "=&r" (b), "=&rm" (blen), "=&r" (out), "=&r" (tmp)
: "0" (a), "1" (b), "2" (blen), "rm" (d), "3" (out)
: "rax", "rdx");
}
if (alen == 0) return out;
/* current digit of a -= out */
BngSub2(*a, carry, *a, out);
a++;
alen--;
/* Propagate carry */
if (carry == 0 || alen == 0) return carry;
do {
if ((*a)-- != 0) return 0;
a++;
} while (--alen);
return 1;
}
static void bng_amd64_setup_ops(void)
{
bng_ops.add = bng_amd64_add;
bng_ops.sub = bng_amd64_sub;
bng_ops.mult_add_digit = bng_amd64_mult_add_digit;
bng_ops.mult_sub_digit = bng_amd64_mult_sub_digit;
}
#define BNG_SETUP_OPS bng_amd64_setup_ops()