From 81b7451972780743bcd00c963a1c9cb686a3e4ad Mon Sep 17 00:00:00 2001 From: jpathy Date: Wed, 22 May 2013 23:47:05 +0530 Subject: [PATCH] sse kernel support (sources) --- sys/src/9/pc/dat.h | 35 ++++++++++++++++++-- sys/src/9/pc/devarch.c | 14 ++++++++ sys/src/9/pc/fns.h | 10 ++++-- sys/src/9/pc/l.s | 17 ++++++++-- sys/src/9/pc/main.c | 74 +++++++++++++++++++++++++++++++++++++++--- sys/src/9/pc/mem.h | 1 + sys/src/cmd/8l/optab.c | 6 ---- 7 files changed, 141 insertions(+), 16 deletions(-) diff --git a/sys/src/9/pc/dat.h b/sys/src/9/pc/dat.h index c55b35e35..030109433 100644 --- a/sys/src/9/pc/dat.h +++ b/sys/src/9/pc/dat.h @@ -2,7 +2,9 @@ typedef struct BIOS32si BIOS32si; typedef struct BIOS32ci BIOS32ci; typedef struct Conf Conf; typedef struct Confmem Confmem; -typedef struct FPsave FPsave; +typedef union FPsave FPsave; +typedef struct FPssestate FPssestate; +typedef struct FPstate FPstate; typedef struct ISAConf ISAConf; typedef struct Label Label; typedef struct Lock Lock; @@ -64,7 +66,7 @@ enum FPillegal= 0x100, }; -struct FPsave +struct FPstate { ushort control; ushort r1; @@ -81,6 +83,33 @@ struct FPsave uchar regs[80]; /* floating point registers */ }; +struct FPssestate /* SSE fp state */ +{ + ushort fcw; /* control */ + ushort fsw; /* status */ + ushort ftw; /* tag */ + ushort fop; /* opcode */ + ulong fpuip; /* pc */ + ushort cs; /* pc segment */ + ushort r1; /* reserved */ + ulong fpudp; /* data pointer */ + ushort ds; /* data pointer segment */ + ushort r2; + ulong mxcsr; /* MXCSR register state */ + ulong mxcsr_mask; /* MXCSR mask register */ + uchar xregs[480]; /* extended registers */ + uchar alignpad[FPalign]; +}; + +/* + * the FP regs must be stored here, not somewhere pointed to from here. + * port code assumes this. + */ +union FPsave { + FPstate; + FPssestate; +}; + struct Confmem { ulong base; @@ -227,6 +256,7 @@ struct Mach uvlong tscticks; int pdballoc; int pdbfree; + FPsave *fpsavalign; vlong mtrrcap; vlong mtrrdef; @@ -297,6 +327,7 @@ enum { Clflush = 1<<19, Acpif = 1<<22, /* therm control msr */ Mmx = 1<<23, + Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */ Sse = 1<<25, /* thus sfence instr. */ Sse2 = 1<<26, /* thus mfence & lfence instr.s */ Rdrnd = 1<<30, /* RDRAND support bit */ diff --git a/sys/src/9/pc/devarch.c b/sys/src/9/pc/devarch.c index e877eab5f..be29eab3c 100644 --- a/sys/src/9/pc/devarch.c +++ b/sys/src/9/pc/devarch.c @@ -38,6 +38,11 @@ enum { Qmax = 16, }; + +enum { + CR4Osfxsr = 1 << 9, +}; + enum { /* cpuid standard function codes */ Highstdfunc = 0, /* also returns vendor string */ Procsig, @@ -850,6 +855,15 @@ cpuidentify(void) rdmsr(0x01, &mct); } + if(m->cpuiddx & Fxsr){ /* have sse fp? */ + fpsave = fpssesave; + fprestore = fpsserestore; + putcr4(getcr4() | CR4Osfxsr); + } else { + fpsave = fpx87save; + fprestore = fpx87restore; + } + cputype = t; return t->family; } diff --git a/sys/src/9/pc/fns.h b/sys/src/9/pc/fns.h index 010025f5a..a066c44ea 100644 --- a/sys/src/9/pc/fns.h +++ b/sys/src/9/pc/fns.h @@ -33,9 +33,15 @@ void fpclear(void); void fpenv(FPsave*); void fpinit(void); void fpoff(void); -void fprestore(FPsave*); -void fpsave(FPsave*); +void (*fprestore)(FPsave*); +void (*fpsave)(FPsave*); +void fpsserestore(FPsave*); +void fpsserestore0(FPsave*); +void fpssesave(FPsave*); +void fpssesave0(FPsave*); ulong fpstatus(void); +void fpx87restore(FPsave*); +void fpx87save(FPsave*); ulong getcr0(void); ulong getcr2(void); ulong getcr3(void); diff --git a/sys/src/9/pc/l.s b/sys/src/9/pc/l.s index 418570e88..4087b3f1a 100644 --- a/sys/src/9/pc/l.s +++ b/sys/src/9/pc/l.s @@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */ WAIT RET -TEXT fpsave(SB), $0 /* save state and disable */ +TEXT fpx87save(SB), $0 /* save state and disable */ MOVL p+0(FP), AX FSAVE 0(AX) /* no WAIT */ FPOFF(l2) RET -TEXT fprestore(SB), $0 /* enable and restore state */ +TEXT fpx87restore(SB), $0 /* enable and restore state */ FPON MOVL p+0(FP), AX FRSTOR 0(AX) @@ -685,6 +685,19 @@ TEXT fpclear(SB), $0 /* clear pending exceptions */ FPOFF(l3) RET +TEXT fpssesave0(SB), $0 /* save state and disable */ + MOVL p+0(FP), AX + FXSAVE 0(AX) /* no WAIT */ + FPOFF(l4) + RET + +TEXT fpsserestore0(SB), $0 /* enable and restore state */ + FPON + MOVL p+0(FP), AX + FXRSTOR 0(AX) + WAIT + RET + /* */ TEXT splhi(SB), $0 diff --git a/sys/src/9/pc/main.c b/sys/src/9/pc/main.c index c84eeb6a1..6310d3b99 100644 --- a/sys/src/9/pc/main.c +++ b/sys/src/9/pc/main.c @@ -479,14 +479,37 @@ static char* mathmsg[] = "precision loss", }; +static void +mathstate(ulong *stsp, ulong *pcp, ulong *ctlp) +{ + ulong sts, fpc, ctl; + FPsave *f = &up->fpsave; + + if(fpsave == fpx87save){ + sts = f->status; + fpc = f->pc; + ctl = f->control; + } else { + sts = f->fsw; + fpc = f->fpuip; + ctl = f->fcw; + } + if(stsp) + *stsp = sts; + if(pcp) + *pcp = fpc; + if(ctlp) + *ctlp = ctl; +} + static void mathnote(void) { int i; - ulong status; + ulong status, pc; char *msg, note[ERRMAX]; - status = up->fpsave.status; + mathstate(&status, &pc, nil); /* * Some attention should probably be paid here to the @@ -513,12 +536,50 @@ mathnote(void) postnote(up, 1, note, NDebug); } +/* + * sse fp save and restore buffers have to be 16-byte (FPalign) aligned, + * so we shuffle the data up and down as needed or make copies. + */ + +void +fpssesave(FPsave *fps) +{ + FPsave *afps; + + afps = (FPsave *)ROUND(((uintptr)fps), FPalign); + fpssesave0(afps); + if (fps != afps) /* not aligned? shuffle down from aligned buffer */ + memmove(fps, afps, sizeof(FPssestate) - FPalign); +} + +void +fpsserestore(FPsave *fps) +{ + FPsave *afps; + + afps = (FPsave *)ROUND(((uintptr)fps), FPalign); + if (fps != afps) { + if (m->fpsavalign == nil) + m->fpsavalign = mallocalign(sizeof(FPssestate), + FPalign, 0, 0); + if (m->fpsavalign) + afps = m->fpsavalign; + /* copy or shuffle up to make aligned */ + memmove(afps, fps, sizeof(FPssestate) - FPalign); + } + fpsserestore0(afps); + /* if we couldn't make a copy, shuffle regs back down */ + if (fps != afps && afps != m->fpsavalign) + memmove(fps, afps, sizeof(FPssestate) - FPalign); +} + /* * math coprocessor error */ static void matherror(Ureg *ur, void*) { + ulong status, pc; /* * a write cycle to port 0xF0 clears the interrupt latch attached * to the error# line from the 387 @@ -532,9 +593,11 @@ matherror(Ureg *ur, void*) fpenv(&up->fpsave); mathnote(); - if((ur->pc & 0xf0000000) == KZERO) + if((ur->pc & 0xf0000000) == KZERO){ + mathstate(&status, &pc, nil); panic("fp: status %ux fppc=0x%lux pc=0x%lux", up->fpsave.status, up->fpsave.pc, ur->pc); + } } /* @@ -543,6 +606,8 @@ matherror(Ureg *ur, void*) static void mathemu(Ureg *ureg, void*) { + ulong status, control; + if(up->fpstate & FPillegal){ /* someone did floating point in a note handler */ postnote(up, 1, "sys: floating point in note handler", NDebug); @@ -561,7 +626,8 @@ mathemu(Ureg *ureg, void*) * More attention should probably be paid here to the * exception masks and error summary. */ - if((up->fpsave.status & ~up->fpsave.control) & 0x07F){ + mathstate(&status, nil, &control); + if((status & ~control) & 0x07F){ mathnote(); break; } diff --git a/sys/src/9/pc/mem.h b/sys/src/9/pc/mem.h index c30c56da1..79ab270d6 100644 --- a/sys/src/9/pc/mem.h +++ b/sys/src/9/pc/mem.h @@ -19,6 +19,7 @@ #define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1)) #define PGROUND(s) ROUND(s, BY2PG) #define BLOCKALIGN 8 +#define FPalign 16 /* * In 32-bit mode, the MAXMACH limit is 32 without diff --git a/sys/src/cmd/8l/optab.c b/sys/src/cmd/8l/optab.c index e6773a443..bde09048b 100644 --- a/sys/src/cmd/8l/optab.c +++ b/sys/src/cmd/8l/optab.c @@ -434,12 +434,6 @@ uchar ymskb[] = Ymr, Yrl, Zm_r_xm, 1, 0 }; -uchar yxaes[] = -{ - Yxm, Yxr, Zm_r_xm, 2, - Yxm, Yxr, Zm_r_i_xm, 2, - 0 -}; Optab optab[] = /* as, ytab, andproto, opcode */