keep fpregs always in sse (FXSAVE) format, adapt libmach and acid files for new format

we now always use the new FXSAVE format in FPsave structure and fpregs
file, converting back and forth in fpx87save() and fpx87restore().

document that fprestore() is a destructive operation now.

change fp register definition in libmach and adapt fpr() acid funciton.

avoid unneccesary copy of fpstate and fpsave in sysfork(). functions
including syscalls do not preserve the fp registers and copying fpstate
from the current process would mean we had to fpsave(&up->fpsave); first.
simply not doing it, new process starts in FPinit state.
front
cinap_lenrek 2013-05-26 22:41:40 +02:00
parent 7aea1204b9
commit 257c7e958e
7 changed files with 177 additions and 99 deletions

View File

@ -69,22 +69,14 @@ defn fpr()
print("F5\t", *F5, "\n");
print("F6\t", *F6, "\n");
print("F7\t", *F7, "\n");
print("control\t", *fmt(E0, 'x'), "\n");
print("status\t", *fmt(E1, 'x'), "\n");
print("tag\t", *fmt(E2, 'x'), "\n");
print("ip offset\t", *fmt(E3, 'X'), "\n");
print("cs selector\t", *fmt(E4, 'x'), "\n");
print("opcode\t", *fmt(E4>>8, 'x'), "\n");
print("data operand offset\t", *fmt(E5, 'x'), "\n");
print("operand selector\t", *fmt(E6, 'x'), "\n");
}
defn mmregs()
{
print("MM0\t", *MM0, " MM1\t", *MM1, "\n");
print("MM2\t", *MM2, " MM3\t", *MM3, "\n");
print("MM4\t", *MM4, " MM5\t", *MM5, "\n");
print("MM6\t", *MM6, " MM7\t", *MM7, "\n");
print("control\t", *FCW, "\n");
print("status\t", *FSW, "\n");
print("tag\t", *FTW, "\n");
print("ip\t", *FIP, "\n");
print("cs selector\t", *FCS, "\n");
print("opcode\t", *FOP, "\n");
print("data operand\t", *FDP, "\n");
print("operand selector\t", *FDS, "\n");
}
defn pstop(pid)

View File

@ -76,10 +76,10 @@ struct FPstate
ushort r3;
ulong pc;
ushort selector;
ushort r4;
ushort opcode;
ulong operand;
ushort oselector;
ushort r5;
ushort r4;
uchar regs[80]; /* floating point registers */
};
@ -91,10 +91,10 @@ struct FPssestate /* SSE fp state */
ushort fop; /* opcode */
ulong fpuip; /* pc */
ushort cs; /* pc segment */
ushort r1; /* reserved */
ushort rsrvd1; /* reserved */
ulong fpudp; /* data pointer */
ushort ds; /* data pointer segment */
ushort r2;
ushort rsrvd2;
ulong mxcsr; /* MXCSR register state */
ulong mxcsr_mask; /* MXCSR mask register */
uchar xregs[480]; /* extended registers */

View File

@ -41,7 +41,9 @@ void fpssesave(FPsave*);
void fpssesave0(FPsave*);
ulong fpstatus(void);
void fpx87restore(FPsave*);
void fpx87restore0(FPsave*);
void fpx87save(FPsave*);
void fpx87save0(FPsave*);
ulong getcr0(void);
ulong getcr2(void);
ulong getcr3(void);

View File

@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */
WAIT
RET
TEXT fpx87save(SB), $0 /* save state and disable */
TEXT fpx87save0(SB), $0 /* save state and disable */
MOVL p+0(FP), AX
FSAVE 0(AX) /* no WAIT */
FPOFF(l2)
RET
TEXT fpx87restore(SB), $0 /* enable and restore state */
TEXT fpx87restore0(SB), $0 /* enable and restore state */
FPON
MOVL p+0(FP), AX
FRSTOR 0(AX)

View File

@ -469,6 +469,121 @@ confinit(void)
}
}
/*
* we keep FPsave structure in sse format emulating FXSAVE / FXRSTOR
* instructions for legacy x87 fpu.
*
* Note that fpx87restore() and fpxsserestore() do modify the FPsave
* data structure for conversion / realignment shuffeling. this means
* that p->fpsave is only valid when p->fpstate == FPinactive.
*/
void
fpx87save(FPsave *fps)
{
fpx87save0(fps);
/* NOP fps->fcw = fps->control; */
fps->fsw = fps->status;
fps->ftw = fps->tag;
fps->fop = fps->opcode;
fps->fpuip = fps->pc;
fps->cs = fps->selector;
fps->fpudp = fps->operand;
fps->ds = fps->oselector;
#define MOVA(d,s) \
*((ushort*)(d+8)) = *((ushort*)(s+8)), \
*((ulong*)(d+4)) = *((ulong*)(s+4)), \
*((ulong*)(d)) = *((ulong*)(s))
MOVA(fps->xregs+0x70, fps->regs+70);
MOVA(fps->xregs+0x60, fps->regs+60);
MOVA(fps->xregs+0x50, fps->regs+50);
MOVA(fps->xregs+0x40, fps->regs+40);
MOVA(fps->xregs+0x30, fps->regs+30);
MOVA(fps->xregs+0x20, fps->regs+20);
MOVA(fps->xregs+0x10, fps->regs+10);
MOVA(fps->xregs+0x00, fps->regs+00);
#undef MOVA
#define CLR6(d) \
*((ulong*)(d)) = 0, \
*((ushort*)(d+4)) = 0
CLR6(fps->xregs+0x70+10);
CLR6(fps->xregs+0x60+10);
CLR6(fps->xregs+0x50+10);
CLR6(fps->xregs+0x40+10);
CLR6(fps->xregs+0x30+10);
CLR6(fps->xregs+0x20+10);
CLR6(fps->xregs+0x10+10);
CLR6(fps->xregs+0x00+10);
#undef CLR6
fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
}
void
fpx87restore(FPsave *fps)
{
#define MOVA(d,s) \
*((ulong*)(d)) = *((ulong*)(s)), \
*((ulong*)(d+4)) = *((ulong*)(s+4)), \
*((ushort*)(d+8)) = *((ushort*)(s+8))
MOVA(fps->regs+00, fps->xregs+0x00);
MOVA(fps->regs+10, fps->xregs+0x10);
MOVA(fps->regs+20, fps->xregs+0x20);
MOVA(fps->regs+30, fps->xregs+0x30);
MOVA(fps->regs+40, fps->xregs+0x40);
MOVA(fps->regs+50, fps->xregs+0x50);
MOVA(fps->regs+60, fps->xregs+0x60);
MOVA(fps->regs+70, fps->xregs+0x70);
#undef MOVA
fps->oselector = fps->ds;
fps->operand = fps->fpudp;
fps->opcode = (fps->fop & 0x7ff);
fps->selector = fps->cs;
fps->pc = fps->fpuip;
fps->tag = fps->ftw;
fps->status = fps->fsw;
/* NOP fps->control = fps->fcw; */
fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
fpx87restore0(fps);
}
/*
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
* so we shuffle the data up and down as needed or make copies.
*/
void
fpssesave(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
fpssesave0(afps);
if(fps != afps) /* not aligned? shuffle down from aligned buffer */
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
void
fpsserestore(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
if(fps != afps) /* shuffle up to make aligned */
memmove(afps, fps, sizeof(FPssestate) - FPalign);
fpsserestore0(afps);
}
static char* mathmsg[] =
{
nil, /* handled below */
@ -510,61 +625,6 @@ mathnote(ulong status, ulong pc)
postnote(up, 1, note, NDebug);
}
/*
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
* so we shuffle the data up and down as needed or make copies.
*/
void
fpssesave(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
fpssesave0(afps);
if(fps != afps) /* not aligned? shuffle down from aligned buffer */
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
void
fpsserestore(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
if(fps != afps) /* shuffle up to make aligned */
memmove(afps, fps, sizeof(FPssestate) - FPalign);
fpsserestore0(afps);
if(fps != afps) /* shuffle regs back down when unaligned */
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
/*
* extract control, status and fppc from process
* floating point state independent of format.
*/
static void
mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
{
ulong sts, fpc, ctl;
FPsave *f = &up->fpsave;
if(fpsave == fpx87save){
sts = f->status;
fpc = f->pc;
ctl = f->control;
} else {
sts = f->fsw;
fpc = f->fpuip;
ctl = f->fcw;
}
if(stsp)
*stsp = sts;
if(pcp)
*pcp = fpc;
if(ctlp)
*ctlp = ctl;
}
/*
* math coprocessor error
*/
@ -591,7 +651,7 @@ matherror(Ureg*, void*)
static void
mathemu(Ureg *ureg, void*)
{
ulong status, control, pc;
ulong status, control;
if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */
@ -611,9 +671,10 @@ mathemu(Ureg *ureg, void*)
* More attention should probably be paid here to the
* exception masks and error summary.
*/
mathstate(&status, &pc, &control);
status = up->fpsave.fsw;
control = up->fpsave.fcw;
if((status & ~control) & 0x07F){
mathnote(status, pc);
mathnote(status, up->fpsave.fpuip);
break;
}
fprestore(&up->fpsave);

View File

@ -84,7 +84,6 @@ sysrfork(ulong *arg)
p = newproc();
p->fpsave = up->fpsave;
p->scallnr = up->scallnr;
p->s = up->s;
p->nerrlab = 0;
@ -180,7 +179,6 @@ sysrfork(ulong *arg)
if((flag&RFNOTEG) == 0)
p->noteid = up->noteid;
p->fpstate = up->fpstate;
pid = p->pid;
memset(p->time, 0, sizeof(p->time));
p->time[TReal] = MACHP(0)->ticks;

View File

@ -14,9 +14,11 @@
#define AX REGOFF(ax)
#define REGSIZE sizeof(struct Ureg)
#define FP_CTLS(x) (REGSIZE+2*(x))
#define FP_CTL(x) (REGSIZE+4*(x))
#define FP_REG(x) (FP_CTL(7)+10*(x))
#define FPREGSIZE (7*4+8*10)
#define FP_REG(x) (FP_CTL(8)+16*(x))
#define XM_REG(x) (FP_CTL(8)+8*16+16*(x))
#define FPREGSIZE 512
Reglist i386reglist[] = {
{"DI", REGOFF(di), RINT, 'X'},
@ -38,21 +40,44 @@ Reglist i386reglist[] = {
{"SP", SP, RINT, 'X'},
{"SS", REGOFF(ss), RINT, 'X'},
{"E0", FP_CTL(0), RFLT, 'X'},
{"E1", FP_CTL(1), RFLT, 'X'},
{"E2", FP_CTL(2), RFLT, 'X'},
{"E3", FP_CTL(3), RFLT, 'X'},
{"E4", FP_CTL(4), RFLT, 'X'},
{"E5", FP_CTL(5), RFLT, 'X'},
{"E6", FP_CTL(6), RFLT, 'X'},
{"F0", FP_REG(0), RFLT, '3'},
{"F1", FP_REG(1), RFLT, '3'},
{"F2", FP_REG(2), RFLT, '3'},
{"F3", FP_REG(3), RFLT, '3'},
{"F4", FP_REG(4), RFLT, '3'},
{"F5", FP_REG(5), RFLT, '3'},
{"F6", FP_REG(6), RFLT, '3'},
{"F7", FP_REG(7), RFLT, '3'},
{"FCW", FP_CTLS(0), RFLT, 'x'},
{"FSW", FP_CTLS(1), RFLT, 'x'},
{"FTW", FP_CTLS(2), RFLT, 'b'},
{"FOP", FP_CTLS(3), RFLT, 'x'},
{"FIP", FP_CTL(2), RFLT, 'X'},
{"FCS", FP_CTLS(6), RFLT, 'x'},
{"FDP", FP_CTL(4), RFLT, 'X'},
{"FDS", FP_CTLS(10), RFLT, 'x'},
{"MXCSR", FP_CTL(6), RFLT, 'X'},
{"MXCSRMASK", FP_CTL(7), RFLT, 'X'},
{"M0", FP_REG(0), RFLT, 'F'}, /* assumes double */
{"M1", FP_REG(1), RFLT, 'F'},
{"M2", FP_REG(2), RFLT, 'F'},
{"M3", FP_REG(3), RFLT, 'F'},
{"M4", FP_REG(4), RFLT, 'F'},
{"M5", FP_REG(5), RFLT, 'F'},
{"M6", FP_REG(6), RFLT, 'F'},
{"M7", FP_REG(7), RFLT, 'F'},
{"X0", XM_REG(0), RFLT, 'F'}, /* assumes double */
{"X1", XM_REG(1), RFLT, 'F'},
{"X2", XM_REG(2), RFLT, 'F'},
{"X3", XM_REG(3), RFLT, 'F'},
{"X4", XM_REG(4), RFLT, 'F'},
{"X5", XM_REG(5), RFLT, 'F'},
{"X6", XM_REG(6), RFLT, 'F'},
{"X7", XM_REG(7), RFLT, 'F'},
{"F0", FP_REG(7), RFLT, '3'},
{"F1", FP_REG(6), RFLT, '3'},
{"F2", FP_REG(5), RFLT, '3'},
{"F3", FP_REG(4), RFLT, '3'},
{"F4", FP_REG(3), RFLT, '3'},
{"F5", FP_REG(2), RFLT, '3'},
{"F6", FP_REG(1), RFLT, '3'},
{"F7", FP_REG(0), RFLT, '3'},
{ 0 }
};