summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/src/9/pc64/dat.h29
-rw-r--r--sys/src/9/pc64/fns.h2
-rw-r--r--sys/src/9/pc64/main.c108
-rw-r--r--sys/src/9/pc64/trap.c50
4 files changed, 134 insertions, 55 deletions
diff --git a/sys/src/9/pc64/dat.h b/sys/src/9/pc64/dat.h
index 4dde0ec1b..8106cf1cb 100644
--- a/sys/src/9/pc64/dat.h
+++ b/sys/src/9/pc64/dat.h
@@ -65,12 +65,6 @@ struct FPsave
uchar ign[96]; /* reserved, ignored */
};
-struct PFPU
-{
- int fpstate;
- FPsave *fpsave;
-};
-
enum
{
/* this is a state */
@@ -78,8 +72,27 @@ enum
FPactive= 1,
FPinactive= 2,
- /* the following is a bit that can be or'd into the state */
- FPillegal= 0x100,
+ /*
+ * the following are bits that can be or'd into the state.
+ *
+ * this is biased so that FPinit, FPactive and FPinactive
+ * without any flags refer to user fp state in fpslot[0].
+ */
+ FPillegal= 1<<8, /* fp forbidden in note handler */
+ FPpush= 2<<8, /* trap on use and initialize new fpslot */
+ FPnouser= 4<<8, /* fpslot[0] is kernel regs */
+ FPkernel= 8<<8, /* fp use in kernel (user in fpslot[0] when !FPnouser) */
+
+ FPindexs= 16,
+ FPindex1= 1<<FPindexs,
+ FPindexm= 3<<FPindexs,
+};
+
+struct PFPU
+{
+ int fpstate;
+ FPsave *fpsave; /* fpslot[fpstate>>FPindexs] */
+ FPsave *fpslot[(FPindexm+1)>>FPindexs];
};
struct Confmem
diff --git a/sys/src/9/pc64/fns.h b/sys/src/9/pc64/fns.h
index 44613d2b2..f785a328a 100644
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -41,6 +41,8 @@ void fpsserestore(FPsave*);
void fpssesave(FPsave*);
void fpx87restore(FPsave*);
void fpx87save(FPsave*);
+int fpusave(void);
+void fpurestore(int);
u64int getcr0(void);
u64int getcr2(void);
u64int getcr3(void);
diff --git a/sys/src/9/pc64/main.c b/sys/src/9/pc64/main.c
index 77a45f396..e56ad9197 100644
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -473,13 +473,13 @@ mathnote(ulong status, uintptr pc)
* math coprocessor error
*/
static void
-matherror(Ureg*, void*)
+matherror(Ureg *, void*)
{
/*
* Save FPU state to check out the error.
*/
fpsave(up->fpsave);
- up->fpstate = FPinactive;
+ up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
mathnote(up->fpsave->fsw, up->fpsave->rip);
}
@@ -490,7 +490,7 @@ static void
simderror(Ureg *ureg, void*)
{
fpsave(up->fpsave);
- up->fpstate = FPinactive;
+ up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
}
@@ -519,18 +519,37 @@ static void
mathemu(Ureg *ureg, void*)
{
ulong status, control;
+ int index;
if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */
postnote(up, 1, "sys: floating point in note handler", NDebug);
return;
}
- switch(up->fpstate){
+ switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){
+ case FPactive | FPpush:
+ _clts();
+ fpsave(up->fpsave);
+ case FPinactive | FPpush:
+ up->fpstate += FPindex1;
+ case FPinit | FPpush:
case FPinit:
fpinit();
- while(up->fpsave == nil)
- up->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
- up->fpstate = FPactive;
+ index = up->fpstate >> FPindexs;
+ if(index < 0 || index > FPindexm)
+ panic("fpslot index overflow: %d", index);
+ if(userureg(ureg)){
+ if(index != 0)
+ panic("fpslot index %d != 0 for user", index);
+ } else {
+ if(index == 0)
+ up->fpstate |= FPnouser;
+ up->fpstate |= FPkernel;
+ }
+ while(up->fpslot[index] == nil)
+ up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+ up->fpsave = up->fpslot[index];
+ up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
break;
case FPinactive:
/*
@@ -547,7 +566,7 @@ mathemu(Ureg *ureg, void*)
break;
}
fprestore(up->fpsave);
- up->fpstate = FPactive;
+ up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
break;
case FPactive:
panic("math emu pid %ld %s pc %#p",
@@ -596,17 +615,21 @@ procfork(Proc *p)
/* save floating point state */
s = splhi();
switch(up->fpstate & ~FPillegal){
+ case FPactive | FPpush:
+ _clts();
case FPactive:
fpsave(up->fpsave);
- up->fpstate = FPinactive;
+ up->fpstate = FPinactive | (up->fpstate & FPpush);
+ case FPactive | FPkernel:
+ case FPinactive | FPkernel:
+ case FPinactive | FPpush:
case FPinactive:
- while(p->fpsave == nil)
- p->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
- memmove(p->fpsave, up->fpsave, sizeof(FPsave));
+ while(p->fpslot[0] == nil)
+ p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+ memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
p->fpstate = FPinactive;
}
splx(s);
-
}
void
@@ -644,24 +667,26 @@ procsave(Proc *p)
p->kentry -= t;
p->pcycles += t;
- if(p->fpstate == FPactive){
+ switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){
+ case FPactive | FPpush:
+ _clts();
+ case FPactive:
if(p->state == Moribund){
- _clts();
_fnclex();
_stts();
+ break;
}
- else{
- /*
- * Fpsave() stores without handling pending
- * unmasked exeptions. Postnote() can't be called
- * here as sleep() already has up->rlock, so
- * the handling of pending exceptions is delayed
- * until the process runs again and generates an
- * emulation fault to activate the FPU.
- */
- fpsave(p->fpsave);
- }
- p->fpstate = FPinactive;
+ /*
+ * Fpsave() stores without handling pending
+ * unmasked exeptions. Postnote() can't be called
+ * here as sleep() already has up->rlock, so
+ * the handling of pending exceptions is delayed
+ * until the process runs again and generates an
+ * emulation fault to activate the FPU.
+ */
+ fpsave(p->fpsave);
+ p->fpstate = FPinactive | (p->fpstate & (FPpush|FPnouser|FPkernel|FPindexm));
+ break;
}
/*
@@ -677,3 +702,32 @@ procsave(Proc *p)
*/
mmuflushtlb();
}
+
+/*
+ * Fpusave and fpurestore lazily save and restore FPU state across
+ * system calls and the pagefault handler so that we can take
+ * advantage of SSE instructions such as AES-NI in the kernel.
+ */
+int
+fpusave(void)
+{
+ int ostate = up->fpstate;
+ if((up->fpstate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+ _stts();
+ up->fpstate = FPpush | (up->fpstate & ~FPillegal);
+ return ostate;
+}
+void
+fpurestore(int ostate)
+{
+ if((up->fpstate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+ _stts();
+ if((ostate & FPindexm) == (up->fpstate & FPindexm)){
+ if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+ _clts();
+ } else {
+ up->fpsave = up->fpslot[ostate>>FPindexs];
+ ostate = FPinactive | (ostate & (FPillegal|FPpush|FPnouser|FPkernel|FPindexm));
+ }
+ up->fpstate = ostate;
+}
diff --git a/sys/src/9/pc64/trap.c b/sys/src/9/pc64/trap.c
index ced545deb..387467aa5 100644
--- a/sys/src/9/pc64/trap.c
+++ b/sys/src/9/pc64/trap.c
@@ -649,11 +649,12 @@ unexpected(Ureg* ureg, void*)
}
extern void checkpages(void);
+
static void
faultamd64(Ureg* ureg, void*)
{
uintptr addr;
- int read, user, n, insyscall;
+ int read, user, n, insyscall, f;
char buf[ERRMAX];
addr = getcr2();
@@ -670,6 +671,14 @@ faultamd64(Ureg* ureg, void*)
insyscall = up->insyscall;
up->insyscall = 1;
+ f = fpusave();
+ if(!user && waserror()){
+ int s = splhi();
+ fpurestore(f);
+ up->insyscall = insyscall;
+ splx(s);
+ nexterror();
+ }
n = fault(addr, read);
if(n < 0){
if(!user){
@@ -681,6 +690,9 @@ faultamd64(Ureg* ureg, void*)
read ? "read" : "write", addr);
postnote(up, 1, buf, NDebug);
}
+ if(!user) poperror();
+ splhi();
+ fpurestore(f);
up->insyscall = insyscall;
}
@@ -698,7 +710,7 @@ syscall(Ureg* ureg)
char *e;
uintptr sp;
long long ret;
- int i, s;
+ int i, s, f;
ulong scallnr;
vlong startns, stopns;
@@ -715,11 +727,12 @@ syscall(Ureg* ureg)
sp = ureg->sp;
scallnr = ureg->bp; /* RARG */
up->scallnr = scallnr;
-
+ f = fpusave();
spllo();
+
+ ret = -1;
startns = 0;
up->nerrlab = 0;
- ret = -1;
if(!waserror()){
if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
validaddr(sp, sizeof(Sargs)+BY2WD, 0);
@@ -778,12 +791,13 @@ syscall(Ureg* ureg)
splx(s);
}
+ splhi();
+ fpurestore(f);
up->insyscall = 0;
up->psstate = 0;
if(scallnr == NOTED){
noted(ureg, *((ulong*)up->s.args));
-
/*
* normally, syscall() returns to forkret()
* not restoring general registers when going
@@ -796,10 +810,10 @@ syscall(Ureg* ureg)
}
if(scallnr!=RFORK && (up->procctl || up->nnote)){
- splhi();
notify(ureg);
((void**)&ureg)[-1] = (void*)noteret; /* loads RARG */
}
+
/* if we delayed sched because we held a lock, sched now */
if(up->delaysched)
sched();
@@ -813,7 +827,7 @@ syscall(Ureg* ureg)
int
notify(Ureg* ureg)
{
- int l, s;
+ int l;
uintptr sp;
Note *n;
@@ -821,14 +835,7 @@ notify(Ureg* ureg)
procctl();
if(up->nnote == 0)
return 0;
-
- if(up->fpstate == FPactive){
- fpsave(up->fpsave);
- up->fpstate = FPinactive;
- }
- up->fpstate |= FPillegal;
-
- s = spllo();
+ spllo();
qlock(&up->debug);
up->notepending = 0;
n = &up->note[0];
@@ -887,11 +894,14 @@ if(0) print("%s %lud: notify %#p %#p %#p %s\n",
up->nnote--;
memmove(&up->lastnote, &up->note[0], sizeof(Note));
memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
-
qunlock(&up->debug);
- splx(s);
+ splhi();
+ if(up->fpstate == FPactive){
+ fpsave(up->fpsave);
+ up->fpstate = FPinactive;
+ }
+ up->fpstate |= FPillegal;
return 1;
-
}
/*
@@ -903,6 +913,8 @@ noted(Ureg* ureg, ulong arg0)
Ureg *nureg;
uintptr oureg, sp;
+ up->fpstate &= ~FPillegal;
+ spllo();
qlock(&up->debug);
if(arg0!=NRSTR && !up->notified) {
qunlock(&up->debug);
@@ -913,8 +925,6 @@ noted(Ureg* ureg, ulong arg0)
nureg = up->ureg; /* pointer to user returned Ureg struct */
- up->fpstate &= ~FPillegal;
-
/* sanity clause */
oureg = (uintptr)nureg;
if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){