sse kernel support (sources)

jpathy 2013-05-22 23:47:05 +05:30
parent 213bf50893
commit 81b7451972
7 changed files with 141 additions and 16 deletions

View File

@ -2,7 +2,9 @@ typedef struct BIOS32si BIOS32si;
typedef struct BIOS32ci BIOS32ci;
typedef struct Conf Conf;
typedef struct Confmem Confmem;
typedef struct FPsave FPsave;
typedef union FPsave FPsave;
typedef struct FPssestate FPssestate;
typedef struct FPstate FPstate;
typedef struct ISAConf ISAConf;
typedef struct Label Label;
typedef struct Lock Lock;
@ -64,7 +66,7 @@ enum
FPillegal= 0x100,
};
struct FPsave
struct FPstate
{
ushort control;
ushort r1;
@ -81,6 +83,33 @@ struct FPsave
uchar regs[80]; /* floating point registers */
};
struct FPssestate /* SSE fp state */
{
ushort fcw; /* control */
ushort fsw; /* status */
ushort ftw; /* tag */
ushort fop; /* opcode */
ulong fpuip; /* pc */
ushort cs; /* pc segment */
ushort r1; /* reserved */
ulong fpudp; /* data pointer */
ushort ds; /* data pointer segment */
ushort r2;
ulong mxcsr; /* MXCSR register state */
ulong mxcsr_mask; /* MXCSR mask register */
uchar xregs[480]; /* extended registers */
uchar alignpad[FPalign];
};
/*
* the FP regs must be stored here, not somewhere pointed to from here.
* port code assumes this.
*/
union FPsave {
FPstate;
FPssestate;
};
struct Confmem
{
ulong base;
@ -227,6 +256,7 @@ struct Mach
uvlong tscticks;
int pdballoc;
int pdbfree;
FPsave *fpsavalign;
vlong mtrrcap;
vlong mtrrdef;
@ -297,6 +327,7 @@ enum {
Clflush = 1<<19,
Acpif = 1<<22, /* therm control msr */
Mmx = 1<<23,
Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */
Sse = 1<<25, /* thus sfence instr. */
Sse2 = 1<<26, /* thus mfence & lfence instr.s */
Rdrnd = 1<<30, /* RDRAND support bit */

View File

@ -38,6 +38,11 @@ enum {
Qmax = 16,
};
enum {
CR4Osfxsr = 1 << 9,
};
enum { /* cpuid standard function codes */
Highstdfunc = 0, /* also returns vendor string */
Procsig,
@ -850,6 +855,15 @@ cpuidentify(void)
rdmsr(0x01, &mct);
}
if(m->cpuiddx & Fxsr){ /* have sse fp? */
fpsave = fpssesave;
fprestore = fpsserestore;
putcr4(getcr4() | CR4Osfxsr);
} else {
fpsave = fpx87save;
fprestore = fpx87restore;
}
cputype = t;
return t->family;
}

View File

@ -33,9 +33,15 @@ void fpclear(void);
void fpenv(FPsave*);
void fpinit(void);
void fpoff(void);
void fprestore(FPsave*);
void fpsave(FPsave*);
void (*fprestore)(FPsave*);
void (*fpsave)(FPsave*);
void fpsserestore(FPsave*);
void fpsserestore0(FPsave*);
void fpssesave(FPsave*);
void fpssesave0(FPsave*);
ulong fpstatus(void);
void fpx87restore(FPsave*);
void fpx87save(FPsave*);
ulong getcr0(void);
ulong getcr2(void);
ulong getcr3(void);

View File

@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */
WAIT
RET
TEXT fpsave(SB), $0 /* save state and disable */
TEXT fpx87save(SB), $0 /* save state and disable */
MOVL p+0(FP), AX
FSAVE 0(AX) /* no WAIT */
FPOFF(l2)
RET
TEXT fprestore(SB), $0 /* enable and restore state */
TEXT fpx87restore(SB), $0 /* enable and restore state */
FPON
MOVL p+0(FP), AX
FRSTOR 0(AX)
@ -685,6 +685,19 @@ TEXT fpclear(SB), $0 /* clear pending exceptions */
FPOFF(l3)
RET
TEXT fpssesave0(SB), $0 /* save state and disable */
MOVL p+0(FP), AX
FXSAVE 0(AX) /* no WAIT */
FPOFF(l4)
RET
TEXT fpsserestore0(SB), $0 /* enable and restore state */
FPON
MOVL p+0(FP), AX
FXRSTOR 0(AX)
WAIT
RET
/*
*/
TEXT splhi(SB), $0

View File

@ -479,14 +479,37 @@ static char* mathmsg[] =
"precision loss",
};
static void
mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
{
ulong sts, fpc, ctl;
FPsave *f = &up->fpsave;
if(fpsave == fpx87save){
sts = f->status;
fpc = f->pc;
ctl = f->control;
} else {
sts = f->fsw;
fpc = f->fpuip;
ctl = f->fcw;
}
if(stsp)
*stsp = sts;
if(pcp)
*pcp = fpc;
if(ctlp)
*ctlp = ctl;
}
static void
mathnote(void)
{
int i;
ulong status;
ulong status, pc;
char *msg, note[ERRMAX];
status = up->fpsave.status;
mathstate(&status, &pc, nil);
/*
* Some attention should probably be paid here to the
@ -513,12 +536,50 @@ mathnote(void)
postnote(up, 1, note, NDebug);
}
/*
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
* so we shuffle the data up and down as needed or make copies.
*/
void
fpssesave(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
fpssesave0(afps);
if (fps != afps) /* not aligned? shuffle down from aligned buffer */
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
void
fpsserestore(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
if (fps != afps) {
if (m->fpsavalign == nil)
m->fpsavalign = mallocalign(sizeof(FPssestate),
FPalign, 0, 0);
if (m->fpsavalign)
afps = m->fpsavalign;
/* copy or shuffle up to make aligned */
memmove(afps, fps, sizeof(FPssestate) - FPalign);
}
fpsserestore0(afps);
/* if we couldn't make a copy, shuffle regs back down */
if (fps != afps && afps != m->fpsavalign)
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
/*
* math coprocessor error
*/
static void
matherror(Ureg *ur, void*)
{
ulong status, pc;
/*
* a write cycle to port 0xF0 clears the interrupt latch attached
* to the error# line from the 387
@ -532,9 +593,11 @@ matherror(Ureg *ur, void*)
fpenv(&up->fpsave);
mathnote();
if((ur->pc & 0xf0000000) == KZERO)
if((ur->pc & 0xf0000000) == KZERO){
mathstate(&status, &pc, nil);
panic("fp: status %ux fppc=0x%lux pc=0x%lux",
up->fpsave.status, up->fpsave.pc, ur->pc);
}
}
/*
@ -543,6 +606,8 @@ matherror(Ureg *ur, void*)
static void
mathemu(Ureg *ureg, void*)
{
ulong status, control;
if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */
postnote(up, 1, "sys: floating point in note handler", NDebug);
@ -561,7 +626,8 @@ mathemu(Ureg *ureg, void*)
* More attention should probably be paid here to the
* exception masks and error summary.
*/
if((up->fpsave.status & ~up->fpsave.control) & 0x07F){
mathstate(&status, nil, &control);
if((status & ~control) & 0x07F){
mathnote();
break;
}

View File

@ -19,6 +19,7 @@
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
#define PGROUND(s) ROUND(s, BY2PG)
#define BLOCKALIGN 8
#define FPalign 16
/*
* In 32-bit mode, the MAXMACH limit is 32 without

View File

@ -434,12 +434,6 @@ uchar ymskb[] =
Ymr, Yrl, Zm_r_xm, 1,
0
};
uchar yxaes[] =
{
Yxm, Yxr, Zm_r_xm, 2,
Yxm, Yxr, Zm_r_i_xm, 2,
0
};
Optab optab[] =
/* as, ytab, andproto, opcode */