diff options
| author | cinap_lenrek <cinap_lenrek@felloff.net> | 2018-11-07 16:48:14 +0100 |
|---|---|---|
| committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2018-11-07 16:48:14 +0100 |
| commit | b0d226705cec8f36339adce5d95b4feda1deba02 (patch) | |
| tree | 3ab18cce5834465c336b4d9afaeaad9732a1517f | |
| parent | 17f0b2ce38f3c845a3bbbe0630185ca8dac684a4 (diff) | |
| download | plan9front-b0d226705cec8f36339adce5d95b4feda1deba02.tar.xz | |
bcm: speed up co-processor operations by avoiding i+d cache flush on each operation
coproc.c generated the instrucitons anew each time,
requiering a i+d cache flush for each operation.
instead, we can speed this up like this:
given that the coprocessor registers are per cpu, we can
assume that interrupts have already been disabled by
the caller to prevent a process switch to another cpu.
we cache the instructions generated in a static append
only buffer and maintain separate end pointers for each
cpu.
the cache flushes only need to be done when new
operations have been added to the buffer.
| -rw-r--r-- | sys/src/9/bcm/coproc.c | 164 | ||||
| -rw-r--r-- | sys/src/9/bcm/vfp3.c | 4 |
2 files changed, 167 insertions, 1 deletions
diff --git a/sys/src/9/bcm/coproc.c b/sys/src/9/bcm/coproc.c index 87efa65b4..55c680f08 100644 --- a/sys/src/9/bcm/coproc.c +++ b/sys/src/9/bcm/coproc.c @@ -1 +1,163 @@ -#include "../teg2/coproc.c" +/* + * arm co-processors + * mainly to cope with arm hard-wiring register numbers into instructions. + * + * CP15 (system control) is the one that gets used the most in practice. + * + * these routines must be callable from KZERO. + * + * on a multiprocessor, process switching to another cpu is assumed + * to be inhibited by the caller as these registers are local to the cpu. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" + +#include "arm.h" + +enum { + /* alternates: 0xe12fff1e BX (R14); last e is R14 */ + /* 0xe28ef000 B 0(R14); second e is R14 (ken) */ + Retinst = 0xe1a0f00e, /* MOV R14, R15 */ + + Opmask = MASK(3), + Regmask = MASK(4), +}; + +static void* +mkinstr(ulong wd) +{ + static ulong ib[256], *ep[MAXMACH+1]; + static Lock lk; + ulong *ip, *ie; + + ie = ep[m->machno]; + for(ip = ib; ip < ie; ip += 2) + if(*ip == wd) + return ip; + + ilock(&lk); + ie = ep[MAXMACH]; + for(; ip < ie; ip += 2) + if(*ip == wd) + goto Found; + if(ip >= &ib[nelem(ib)]) + panic("mkinstr: out of instrucuction buffer"); + ip[0] = wd; + ip[1] = Retinst; + ep[MAXMACH] = ie = ip + 2; + cachedwbse(ip, 2*sizeof(*ip)); +Found: + iunlock(&lk); + cacheiinv(); + ep[m->machno] = ie; + return ip; +} + + +static void* +setupcpop(ulong opcode, int cp, int op1, int crn, int crm, + int op2) +{ + op1 &= Opmask; + op2 &= Opmask; + crn &= Regmask; + crm &= Regmask; + cp &= Regmask; + return mkinstr(opcode | op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm); +} + +ulong +cprd(int cp, int op1, int crn, int crm, int op2) +{ + /* + * MRC. return value will be in R0, which is convenient. + * Rt will be R0. + */ + ulong (*fp)(void) = setupcpop(0xee100010, cp, op1, crn, crm, op2); + return fp(); +} + +void +cpwr(int cp, int op1, int crn, int crm, int op2, ulong val) +{ + /* MCR, Rt is R0 */ + void (*fp)(ulong) = setupcpop(0xee000010, cp, op1, crn, crm, op2); + fp(val); +} + +ulong +cprdsc(int op1, int crn, int crm, int op2) +{ + return cprd(CpSC, op1, crn, crm, op2); +} + +void +cpwrsc(int op1, int crn, int crm, int op2, ulong val) +{ + cpwr(CpSC, op1, crn, crm, op2, val); +} + +/* floating point */ + +/* fp coproc control */ +static void* +setupfpctlop(int opcode, int fpctlreg) +{ + fpctlreg &= Nfpctlregs - 1; + return mkinstr(opcode | fpctlreg << 16 | 0 << 12 | CpFP << 8); +} + +ulong +fprd(int fpreg) +{ + /* + * VMRS. return value will be in R0, which is convenient. + * Rt will be R0. + */ + ulong (*fp)(void) = setupfpctlop(0xeef00010, fpreg); + return fp(); +} + +void +fpwr(int fpreg, ulong val) +{ + /* + * fpu might be off and this VMSR might enable it + * VMSR, Rt is R0 + */ + void (*fp)(ulong) = setupfpctlop(0xeee00010, fpreg); + fp(val); +} + +/* fp register access; don't bother with single precision */ +static void* +setupfpop(int opcode, int fpreg) +{ + ulong wd = opcode | 0 << 16 | (fpreg & (16 - 1)) << 12; + if (fpreg >= 16) + wd |= 1 << 22; /* high bit of dfp reg # */ + return mkinstr(wd); +} + +ulong +fpsavereg(int fpreg, uvlong *fpp) +{ + /* + * VSTR. pointer will be in R0, which is convenient. + * Rt will be R0. + */ + ulong (*fp)(uvlong *) = setupfpop(0xed000000 | CpDFP << 8, fpreg); + return fp(fpp); +} + +void +fprestreg(int fpreg, uvlong val) +{ + /* VLDR, Rt is R0 */ + void (*fp)(uvlong *) = setupfpop(0xed100000 | CpDFP << 8, fpreg); + fp(&val); +} diff --git a/sys/src/9/bcm/vfp3.c b/sys/src/9/bcm/vfp3.c index c62af6b7b..cefeda261 100644 --- a/sys/src/9/bcm/vfp3.c +++ b/sys/src/9/bcm/vfp3.c @@ -338,8 +338,12 @@ fpuprocfork(Proc *p) void fpusysprocsetup(Proc *p) { + int s; + + s = splhi(); p->fpstate = FPinit; fpoff(); + splx(s); } static void |
