From 56343cafcfb47a4ef3fff0d6a8e3220ecd93518b Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Sat, 1 Feb 2014 10:25:10 +0100 Subject: add experimental pc64 kernel --- sys/src/9/pc64/apbootstrap.s | 169 +++++++ sys/src/9/pc64/dat.h | 360 ++++++++++++++ sys/src/9/pc64/fns.h | 189 ++++++++ sys/src/9/pc64/l.s | 1050 +++++++++++++++++++++++++++++++++++++++++ sys/src/9/pc64/main.c | 742 +++++++++++++++++++++++++++++ sys/src/9/pc64/mem.h | 164 +++++++ sys/src/9/pc64/memory.c | 720 ++++++++++++++++++++++++++++ sys/src/9/pc64/mkfile | 149 ++++++ sys/src/9/pc64/mmu.c | 505 ++++++++++++++++++++ sys/src/9/pc64/pc64 | 153 ++++++ sys/src/9/pc64/squidboy.c | 113 +++++ sys/src/9/pc64/trap.c | 1065 ++++++++++++++++++++++++++++++++++++++++++ sys/src/9/port/devcons.c | 8 +- sys/src/9/port/mkdevc | 2 +- sys/src/9/port/mkdevlist | 2 +- 15 files changed, 5385 insertions(+), 6 deletions(-) create mode 100644 sys/src/9/pc64/apbootstrap.s create mode 100644 sys/src/9/pc64/dat.h create mode 100644 sys/src/9/pc64/fns.h create mode 100644 sys/src/9/pc64/l.s create mode 100644 sys/src/9/pc64/main.c create mode 100644 sys/src/9/pc64/mem.h create mode 100644 sys/src/9/pc64/memory.c create mode 100644 sys/src/9/pc64/mkfile create mode 100644 sys/src/9/pc64/mmu.c create mode 100644 sys/src/9/pc64/pc64 create mode 100644 sys/src/9/pc64/squidboy.c create mode 100644 sys/src/9/pc64/trap.c diff --git a/sys/src/9/pc64/apbootstrap.s b/sys/src/9/pc64/apbootstrap.s new file mode 100644 index 000000000..460ae0738 --- /dev/null +++ b/sys/src/9/pc64/apbootstrap.s @@ -0,0 +1,169 @@ +/* + * Start an Application Processor. This must be placed on a 4KB boundary + * somewhere in the 1st MB of conventional memory (APBOOTSTRAP). However, + * due to some shortcuts below it's restricted further to within the 1st + * 64KB. The AP starts in real-mode, with + * CS selector set to the startup memory address/16; + * CS base set to startup memory address; + * CS limit set to 64KB; + * CPL and IP set to 0. + */ +#include "mem.h" + +#define NOP BYTE $0x90 /* NOP */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \ + LONG $o; WORD $s +#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \ + WORD $o; WORD $s; +#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \ + pFARJMP32(s, o) + +#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \ + BYTE $0x01; BYTE $0x16; \ + WORD $gdtptr + +#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \ + WORD $i; + +#define DELAY BYTE $0xEB; /* JMP .+2 */ \ + BYTE $0x00 + +MODE $16 + +TEXT apbootstrap(SB), 1, $-4 + rFARJMP16(0, _apbootstrap-KZERO(SB)) + NOP; NOP; NOP; +TEXT _apvector(SB), 1, $-4 /* address APBOOTSTRAP+0x08 */ + QUAD $0 +TEXT _appml4(SB), 1, $-4 /* address APBOOTSTRAP+0x10 */ + QUAD $0 +TEXT _apapic(SB), 1, $-4 /* address APBOOTSTRAP+0x18 */ + QUAD $0 +TEXT _apmach(SB), 1, $-4 /* address APBOOTSTRAP+0x20 */ + QUAD $0 +TEXT _apbootstrap(SB), 1, $-4 + MOVW CS, AX + MOVW AX, DS /* initialise DS */ + + rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */ + + MOVL CR0, AX + ORL $1, AX + MOVL AX, CR0 /* turn on protected mode */ + DELAY /* JMP .+2 */ + + rFARJMP16(SELECTOR(3, SELGDT, 0), _ap32-KZERO(SB)) + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +MODE $32 + +TEXT _ap32(SB), 1, $-4 + MOVW $SELECTOR(2, SELGDT, 0), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + MOVL _appml4-KZERO(SB), AX /* physical address of PML4 */ + MOVL AX, CR3 /* load the mmu */ + DELAY + + MOVL CR4, AX + ANDL $~0x00000010, AX /* Page Size */ + ORL $0x000000A0, AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $0xc0000080, CX /* Extended Feature Enable */ + RDMSR + ORL $0x00000100, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~0x6000000a, DX + ORL $0x80010000, DX /* Paging Enable, Write Protect */ + MOVL DX, CR0 + + pFARJMP32(SELECTOR(KESEG, SELGDT, 0), _ap64-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT; + * zap the identity map; + * initialise the stack and call the + * C startup code in m->splpc. + */ +MODE $64 + +TEXT _ap64(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ AX, AX + MOVW AX, DS /* not used in long mode */ + MOVW AX, ES /* not used in long mode */ + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS /* not used in long mode */ + + MOVW AX, LDTR + + MOVQ _apmach(SB), SP + + MOVQ AX, RUSER /* up = 0; */ + MOVQ SP, RMACH /* m = apmach */ + + ADDQ $MACHSIZE, SP + + PUSHQ AX /* clear flags */ + POPFQ + + MOVQ _apvector(SB), AX + MOVQ _apapic(SB), RARG + PUSHQ RARG + + CALL *AX + +_halt: + HLT + JMP _halt + +TEXT _gdt<>(SB), 1, $-4 + /* null descriptor */ + LONG $0 + LONG $0 + + /* (KESEG) 64 bit long mode exec segment */ + LONG $(0xFFFF) + LONG $(SEGL|SEGG|SEGP|(0xF<<16)|SEGPL(0)|SEGEXEC|SEGR) + + /* 32 bit data segment descriptor for 4 gigabytes (PL 0) */ + LONG $(0xFFFF) + LONG $(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW) + + /* 32 bit exec segment descriptor for 4 gigabytes (PL 0) */ + LONG $(0xFFFF) + LONG $(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR) + + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) + LONG $_gdt<>-KZERO(SB) + +TEXT _gdtptr64p<>(SB), 1, $-4 + WORD $(4*8-1) + QUAD $_gdt<>-KZERO(SB) + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(4*8-1) + QUAD $_gdt<>(SB) diff --git a/sys/src/9/pc64/dat.h b/sys/src/9/pc64/dat.h new file mode 100644 index 000000000..713ec590e --- /dev/null +++ b/sys/src/9/pc64/dat.h @@ -0,0 +1,360 @@ +typedef struct BIOS32si BIOS32si; +typedef struct BIOS32ci BIOS32ci; +typedef struct Conf Conf; +typedef struct Confmem Confmem; +typedef union FPsave FPsave; +typedef struct Fxsave Fxsave; +typedef struct FPstate FPstate; +typedef struct ISAConf ISAConf; +typedef struct Label Label; +typedef struct Lock Lock; +typedef struct MMU MMU; +typedef struct Mach Mach; +typedef struct Notsave Notsave; +typedef struct PCArch PCArch; +typedef struct Pcidev Pcidev; +typedef struct PCMmap PCMmap; +typedef struct PCMslot PCMslot; +typedef struct Page Page; +typedef struct PMMU PMMU; +typedef struct Proc Proc; +typedef struct Segdesc Segdesc; +typedef vlong Tval; +typedef struct Ureg Ureg; +typedef struct Vctl Vctl; + +#pragma incomplete BIOS32si +#pragma incomplete Pcidev +#pragma incomplete Ureg + +#define MAXSYSARG 5 /* for mount(fd, afd, mpt, flag, arg) */ + +/* + * parameters for sysproc.c + */ +#define AOUT_MAGIC (S_MAGIC) + +struct Lock +{ + ulong key; + ulong sr; + uintptr pc; + Proc *p; + Mach *m; + ushort isilock; + long lockcycles; +}; + +struct Label +{ + uintptr sp; + uintptr pc; +}; + +/* + * FPsave.status + */ +enum +{ + /* this is a state */ + FPinit= 0, + FPactive= 1, + FPinactive= 2, + + /* the following is a bit that can be or'd into the state */ + FPillegal= 0x100, +}; + +/* + * the FP regs must be stored here, not somewhere pointed to from here. + * port code assumes this. + */ +struct Fxsave { + u16int fcw; /* x87 control word */ + u16int fsw; /* x87 status word */ + u8int ftw; /* x87 tag word */ + u8int zero; /* 0 */ + u16int fop; /* last x87 opcode */ + u64int rip; /* last x87 instruction pointer */ + u64int rdp; /* last x87 data pointer */ + u32int mxcsr; /* MMX control and status */ + u32int mxcsrmask; /* supported MMX feature bits */ + uchar st[128]; /* shared 64-bit media and x87 regs */ + uchar xmm[256]; /* 128-bit media regs */ + uchar ign[96]; /* reserved, ignored */ +}; + +union FPsave { + uchar align[512+15]; + Fxsave; +}; + +struct Confmem +{ + uintptr base; + ulong npage; + uintptr kbase; + uintptr klimit; +}; + +struct Conf +{ + ulong nmach; /* processors */ + ulong nproc; /* processes */ + ulong monitor; /* has monitor? */ + Confmem mem[4]; /* physical memory */ + ulong npage; /* total physical pages of memory */ + ulong upages; /* user page pool */ + ulong nimage; /* number of page cache image headers */ + ulong nswap; /* number of swap pages */ + int nswppo; /* max # of pageouts per segment pass */ + ulong copymode; /* 0 is copy on write, 1 is copy on reference */ + ulong ialloc; /* max interrupt time allocation in bytes */ + ulong pipeqsize; /* size in bytes of pipe queues */ + int nuart; /* number of uart devices */ +}; + +struct Segdesc +{ + u32int d0; + u32int d1; +}; + +/* + * MMU structure for PDP, PD, PT pages. + */ +struct MMU +{ + MMU *next; + uintptr *page; + int index; + int level; +}; + +/* + * MMU stuff in proc + */ +#define NCOLOR 1 +struct PMMU +{ + MMU *mmuhead; + MMU *mmutail; + int mmucount; +}; + +/* + * things saved in the Proc structure during a notify + */ +struct Notsave +{ + ulong svflags; + ulong svcs; + ulong svss; +}; + +#include "../port/portdat.h" + +typedef struct { + u32int _0_; + u32int rsp0[2]; + u32int rsp1[2]; + u32int rsp2[2]; + u32int _28_[2]; + u32int ist[14]; + u16int _92_[5]; + u16int iomap; +} Tss; + +struct Mach +{ + int machno; /* physical id of processor (KNOWN TO ASSEMBLY) */ + uintptr splpc; /* pc of last caller to splhi (KNOWN TO ASSEMBLY) */ + + Proc* proc; /* current process on this processor (KNOWN TO ASSEMBLY) */ + + u64int* pml4; /* pml4 base for this processor (va) */ + Tss* tss; /* tss for this processor */ + Segdesc *gdt; /* gdt for this processor */ + + u64int mmumap[4]; /* bitmap of pml4 entries for zapping */ + MMU* mmufree; /* freelist for MMU structures */ + int mmucount; /* number of MMU structures in freelist */ + int kmapindex; /* next KMAP page index for use */ + + ulong ticks; /* of the clock since boot time */ + Label sched; /* scheduler wakeup */ + Lock alarmlock; /* access to alarm list */ + void* alarm; /* alarms bound to this clock */ + int inclockintr; + + Proc* readied; /* for runproc */ + ulong schedticks; /* next forced context switch */ + + int tlbfault; + int tlbpurge; + int pfault; + int cs; + int syscall; + int load; + int intr; + int flushmmu; /* make current proc flush it's mmu state */ + int ilockdepth; + Perf perf; /* performance counters */ + + ulong spuriousintr; + int lastintr; + + int loopconst; + + int cpumhz; + uvlong cyclefreq; /* Frequency of user readable cycle counter */ + uvlong cpuhz; + int cpuidax; + int cpuidcx; + int cpuiddx; + char cpuidid[16]; + char* cpuidtype; + int havetsc; + int havepge; + uvlong tscticks; + int pdballoc; + int pdbfree; + + vlong mtrrcap; + vlong mtrrdef; + vlong mtrrfix[11]; + vlong mtrrvar[32]; /* 256 max. */ + + uintptr stack[1]; +}; + +/* + * KMap the structure + */ +typedef void KMap; +#define VA(k) ((void*)k) + +struct +{ + Lock; + int machs; /* bitmap of active CPUs */ + int exiting; /* shutdown */ + int ispanic; /* shutdown in response to a panic */ + int thunderbirdsarego; /* lets the added processors continue to schedinit */ +}active; + +/* + * routines for things outside the PC model, like power management + */ +struct PCArch +{ + char* id; + int (*ident)(void); /* this should be in the model */ + void (*reset)(void); /* this should be in the model */ + int (*serialpower)(int); /* 1 == on, 0 == off */ + int (*modempower)(int); /* 1 == on, 0 == off */ + + void (*intrinit)(void); + int (*intrenable)(Vctl*); + int (*intrvecno)(int); + int (*intrdisable)(int); + void (*introff)(void); + void (*intron)(void); + + void (*clockenable)(void); + uvlong (*fastclock)(uvlong*); + void (*timerset)(uvlong); +}; + +/* cpuid instruction result register bits */ +enum { + /* cx */ + Monitor = 1<<3, + + /* dx */ + Fpuonchip = 1<<0, + Vmex = 1<<1, /* virtual-mode extensions */ + Pse = 1<<3, /* page size extensions */ + Tsc = 1<<4, /* time-stamp counter */ + Cpumsr = 1<<5, /* model-specific registers, rdmsr/wrmsr */ + Pae = 1<<6, /* physical-addr extensions */ + Mce = 1<<7, /* machine-check exception */ + Cmpxchg8b = 1<<8, + Cpuapic = 1<<9, + Mtrr = 1<<12, /* memory-type range regs. */ + Pge = 1<<13, /* page global extension */ + Pse2 = 1<<17, /* more page size extensions */ + Clflush = 1<<19, + Acpif = 1<<22, /* therm control msr */ + Mmx = 1<<23, + Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */ + Sse = 1<<25, /* thus sfence instr. */ + Sse2 = 1<<26, /* thus mfence & lfence instr.s */ + Rdrnd = 1<<30, /* RDRAND support bit */ +}; + +enum { /* MSRs */ + PerfEvtbase = 0xc0010000, /* Performance Event Select */ + PerfCtrbase = 0xc0010004, /* Performance Counters */ + + Efer = 0xc0000080, /* Extended Feature Enable */ + Star = 0xc0000081, /* Legacy Target IP and [CS]S */ + Lstar = 0xc0000082, /* Long Mode Target IP */ + Cstar = 0xc0000083, /* Compatibility Target IP */ + Sfmask = 0xc0000084, /* SYSCALL Flags Mask */ + FSbase = 0xc0000100, /* 64-bit FS Base Address */ + GSbase = 0xc0000101, /* 64-bit GS Base Address */ + KernelGSbase = 0xc0000102, /* SWAPGS instruction */ +}; + +/* + * a parsed plan9.ini line + */ +#define NISAOPT 8 + +struct ISAConf { + char *type; + ulong port; + int irq; + ulong dma; + ulong mem; + ulong size; + ulong freq; + + int nopt; + char *opt[NISAOPT]; +}; + +extern PCArch *arch; /* PC architecture */ + +Mach* machp[MAXMACH]; + +#define MACHP(n) (machp[n]) + +extern register Mach* m; /* R15 */ +extern register Proc* up; /* R14 */ + +/* + * hardware info about a device + */ +typedef struct { + ulong port; + int size; +} Devport; + +struct DevConf +{ + ulong intnum; /* interrupt number */ + char *type; /* card type, malloced */ + int nports; /* Number of ports */ + Devport *ports; /* The ports themselves */ +}; + +typedef struct BIOS32ci { /* BIOS32 Calling Interface */ + u32int eax; + u32int ebx; + u32int ecx; + u32int edx; + u32int esi; + u32int edi; +} BIOS32ci; diff --git a/sys/src/9/pc64/fns.h b/sys/src/9/pc64/fns.h new file mode 100644 index 000000000..5586d1aa5 --- /dev/null +++ b/sys/src/9/pc64/fns.h @@ -0,0 +1,189 @@ +#include "../port/portfns.h" + +void aamloop(int); +Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong)); +void archinit(void); +int bios32call(BIOS32ci*, u16int[3]); +int bios32ci(BIOS32si*, BIOS32ci*); +void bios32close(BIOS32si*); +BIOS32si* bios32open(char*); +void bootargs(void*); +uintptr cankaddr(uintptr); +int checksum(void *, int); +void clockintr(Ureg*, void*); +int (*cmpswap)(long*, long, long); +int cmpswap486(long*, long, long); +void (*coherence)(void); +void cpuid(int, ulong regs[]); +int cpuidentify(void); +void cpuidprint(void); +void (*cycles)(uvlong*); +void delay(int); +void* dmabva(int); +int dmacount(int); +int dmadone(int); +void dmaend(int); +int dmainit(int, int); +#define DMAWRITE 0 +#define DMAREAD 1 +#define DMALOOP 2 +long dmasetup(int, void*, long, int); +#define evenaddr(x) /* x86 doesn't care */ +void (*fprestore)(FPsave*); +void (*fpsave)(FPsave*); +void fpsserestore(FPsave*); +void fpssesave(FPsave*); +void fpx87restore(FPsave*); +void fpx87save(FPsave*); +u64int getcr0(void); +u64int getcr2(void); +u64int getcr3(void); +u64int getcr4(void); +char* getconf(char*); +void guesscpuhz(int); +void halt(void); +void mwait(void*); +int i8042auxcmd(int); +int i8042auxcmds(uchar*, int); +void i8042auxenable(void (*)(int, int)); +void i8042reset(void); +void i8250console(void); +void* i8250alloc(int, int, int); +void i8253enable(void); +void i8253init(void); +void i8253reset(void); +uvlong i8253read(uvlong*); +void i8253timerset(uvlong); +int i8259disable(int); +int i8259enable(Vctl*); +void i8259init(void); +int i8259isr(int); +void i8259on(void); +void i8259off(void); +int i8259vecno(int); +void idle(void); +void idlehands(void); +int inb(int); +void insb(int, void*, int); +ushort ins(int); +void inss(int, void*, int); +ulong inl(int); +void insl(int, void*, int); +int intrdisable(int, void (*)(Ureg *, void *), void*, int, char*); +void intrenable(int, void (*)(Ureg*, void*), void*, int, char*); +void introff(void); +void intron(void); +void invlpg(uintptr); +void iofree(int); +void ioinit(void); +int iounused(int, int); +int ioalloc(int, int, int, char*); +int ioreserve(int, int, int, char*); +int iprint(char*, ...); +int isaconfig(char*, int, ISAConf*); +void* kaddr(uintptr); +void kbdenable(void); +void kbdinit(void); +KMap* kmap(Page*); +void kunmap(KMap*); +#define kmapinval() +void lgdt(void*); +void lidt(void*); +void links(void); +void ltr(ulong); +void mach0init(void); +void mathinit(void); +void mb386(void); +void mb586(void); +void meminit(void); +void memorysummary(void); +void mfence(void); +#define mmuflushtlb() putcr3(getcr3()) +void mmuinit(void); +uintptr *mmuwalk(uintptr*, uintptr, int, int); +int mtrr(uvlong, uvlong, char *); +void mtrrclock(void); +int mtrrprint(char *, long); +uchar nvramread(int); +void nvramwrite(int, uchar); +void outb(int, int); +void outsb(int, void*, int); +void outs(int, ushort); +void outss(int, void*, int); +void outl(int, ulong); +void outsl(int, void*, int); +uintptr paddr(void*); +ulong pcibarsize(Pcidev*, int); +void pcibussize(Pcidev*, ulong*, ulong*); +int pcicfgr8(Pcidev*, int); +int pcicfgr16(Pcidev*, int); +int pcicfgr32(Pcidev*, int); +void pcicfgw8(Pcidev*, int, int); +void pcicfgw16(Pcidev*, int, int); +void pcicfgw32(Pcidev*, int, int); +void pciclrbme(Pcidev*); +void pciclrioe(Pcidev*); +void pciclrmwi(Pcidev*); +int pcigetpms(Pcidev*); +void pcihinv(Pcidev*); +uchar pciipin(Pcidev*, uchar); +Pcidev* pcimatch(Pcidev*, int, int); +Pcidev* pcimatchtbdf(int); +int pcicap(Pcidev*, int); +int pcihtcap(Pcidev*, int); +void pcireset(void); +int pciscan(int, Pcidev**); +void pcisetbme(Pcidev*); +void pcisetioe(Pcidev*); +void pcisetmwi(Pcidev*); +int pcisetpms(Pcidev*, int); +void pcmcisread(PCMslot*); +int pcmcistuple(int, int, int, void*, int); +PCMmap* pcmmap(int, ulong, int, int); +int pcmspecial(char*, ISAConf*); +int (*_pcmspecial)(char *, ISAConf *); +void pcmspecialclose(int); +void (*_pcmspecialclose)(int); +void pcmunmap(int, PCMmap*); +void pmap(uintptr *, uintptr, uintptr, int); +void procrestore(Proc*); +void procsave(Proc*); +void procsetup(Proc*); +void procfork(Proc*); +void putcr0(u64int); +void putcr3(u64int); +void putcr4(u64int); +void* rampage(void); +int rdmsr(int, vlong*); +void realmode(Ureg*); +void screeninit(void); +void (*screenputs)(char*, int); +void* sigsearch(char*); +void syncclock(void); +void syscallentry(void); +void touser(void*); +void trapenable(int, void (*)(Ureg*, void*), void*, char*); +void trapinit(void); +void trapinit0(void); +int tas(void*); +uvlong tscticks(uvlong*); +uintptr umbmalloc(uintptr, int, int); +void umbfree(uintptr, int); +uintptr umbrwmalloc(uintptr, int, int); +void umbrwfree(uintptr, int); +uintptr upaalloc(int, int); +void upafree(uintptr, int); +void upareserve(uintptr, int); +void vectortable(void); +void* vmap(uintptr, int); +int vmapsync(uintptr); +void vunmap(void*, int); +void wbinvd(void); +int wrmsr(int, vlong); +int xchgw(ushort*, int); +void rdrandbuf(void*, ulong); + +#define userureg(ur) (((ur)->cs & 3) == 3) +#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1])) +#define KADDR(a) kaddr(a) +#define PADDR(a) paddr((void*)(a)) diff --git a/sys/src/9/pc64/l.s b/sys/src/9/pc64/l.s new file mode 100644 index 000000000..65aebb07b --- /dev/null +++ b/sys/src/9/pc64/l.s @@ -0,0 +1,1050 @@ +#include "mem.h" + +MODE $32 + +#define DELAY BYTE $0xEB; BYTE $0x00 /* JMP .+2 */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jump to ptr32:16 */\ + LONG $o; WORD $s + +/* + * Enter here in 32-bit protected mode. Welcome to 1982. + * Make sure the GDT is set as it should be: + * disable interrupts; + * load the GDT with the table in _gdt32p; + * load all the data segments + * load the code segment via a far jump. + */ +TEXT _protected<>(SB), 1, $-4 + CLI + + MOVL $_gdtptr32p<>-KZERO(SB), AX + MOVL (AX), GDTR + + MOVL $SELECTOR(2, SELGDT, 0), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + pFARJMP32(SELECTOR(3, SELGDT, 0), _warp64<>-KZERO(SB)) + +TEXT _gdt<>(SB), 1, $-4 + /* null descriptor */ + LONG $0 + LONG $0 + + /* (KESEG) 64 bit long mode exec segment */ + LONG $(0xFFFF) + LONG $(SEGL|SEGG|SEGP|(0xF<<16)|SEGPL(0)|SEGEXEC|SEGR) + + /* 32 bit data segment descriptor for 4 gigabytes (PL 0) */ + LONG $(0xFFFF) + LONG $(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW) + + /* 32 bit exec segment descriptor for 4 gigabytes (PL 0) */ + LONG $(0xFFFF) + LONG $(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR) + + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) + LONG $_gdt<>-KZERO(SB) + +TEXT _gdtptr64p<>(SB), 1, $-4 + WORD $(4*8-1) + QUAD $_gdt<>-KZERO(SB) + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(4*8-1) + QUAD $_gdt<>(SB) + +/* + * Macros for accessing page table entries; change the + * C-style array-index macros into a page table byte offset + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _warp64<>(SB), 1, $-4 + + /* clear mach and page tables */ + MOVL $((CPU0END-CPU0PML4)>>2), CX + MOVL $(CPU0PML4-KZERO), SI + MOVL SI, DI + XORL AX, AX + CLD + REP; STOSL + + MOVL SI, AX /* PML4 */ + MOVL AX, DX + ADDL $(PTSZ|PTEWRITE|PTEVALID), DX /* PDP at PML4 + PTSZ */ + MOVL DX, PML4O(0)(AX) /* PML4E for double-map */ + MOVL DX, PML4O(KZERO)(AX) /* PML4E for KZERO */ + + ADDL $PTSZ, AX /* PDP at PML4 + PTSZ */ + ADDL $PTSZ, DX /* PD at PML4 + 2*PTSZ */ + MOVL DX, PDPO(0)(AX) /* PDPE for double-map */ + MOVL DX, PDPO(KZERO)(AX) /* PDPE for KZERO */ + + ADDL $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + MOVL $(PTESIZE|PTEGLOBAL|PTEWRITE|PTEVALID), DX + MOVL DX, PDO(0)(AX) /* PDE for double-map */ + + ADDL $PDO(KZERO), AX +memloop: + MOVL DX, 0(AX) + ADDL $PGLSZ(1), DX + ADDL $8, AX + CMPL DX, $INIMAP + JLT memloop + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL SI, CR3 /* load the mmu */ + DELAY + + MOVL CR4, AX + ANDL $~0x00000010, AX /* Page Size */ + ORL $0x000000A0, AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $0xc0000080, CX /* Extended Feature Enable */ + RDMSR + ORL $0x00000100, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~0x6000000a, DX + ORL $0x80010000, DX /* Paging Enable, Write Protect */ + MOVL DX, CR0 + + pFARJMP32(SELECTOR(KESEG, SELGDT, 0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ AX, AX + MOVW AX, DS /* not used in long mode */ + MOVW AX, ES /* not used in long mode */ + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS /* not used in long mode */ + + MOVW AX, LDTR + + MOVQ $(CPU0MACH+MACHSIZE), SP + MOVQ $(CPU0MACH), RMACH + MOVQ AX, RUSER /* up = 0; */ + +_clearbss: + MOVQ $edata(SB), DI + MOVQ $end(SB), CX + SUBQ DI, CX /* end-edata bytes */ + SHRQ $2, CX /* end-edata doublewords */ + + CLD + REP; STOSL /* clear BSS */ + + PUSHQ AX /* clear flags */ + POPFQ + + CALL main(SB) + +/* + * The CPUID instruction is always supported on the amd64. + */ +TEXT cpuid(SB), $-4 + MOVL RARG, AX /* function in AX */ + CPUID + + MOVQ info+8(FP), BP + MOVL AX, 0(BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) + RET + +/* + * Port I/O. + */ +TEXT inb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INB + RET + +TEXT insb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSB + RET + +TEXT ins(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INW + RET + +TEXT inss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSW + RET + +TEXT inl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + INL + RET + +TEXT insl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSL + RET + +TEXT outb(SB), 1, $-1 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL byte+8(FP), AX + OUTB + RET + +TEXT outsb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSB + RET + +TEXT outs(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL short+8(FP), AX + OUTW + RET + +TEXT outss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSW + RET + +TEXT outl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL long+8(FP), AX + OUTL + RET + +TEXT outsl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSL + RET + +TEXT getgdt(SB), 1, $-4 + MOVQ RARG, AX + MOVL GDTR, (AX) /* Note: 10 bytes returned */ + RET + +TEXT lgdt(SB), $0 /* GDTR - global descriptor table */ + MOVQ RARG, AX + MOVL (AX), GDTR + RET + +TEXT lidt(SB), $0 /* IDTR - interrupt descriptor table */ + MOVQ RARG, AX + MOVL (AX), IDTR + RET + +TEXT ltr(SB), 1, $-4 + MOVW RARG, AX + MOVW AX, TASK + RET + +/* + * Read/write various system registers. + */ +TEXT getcr0(SB), 1, $-4 /* Processor Control */ + MOVQ CR0, AX + RET + +TEXT putcr0(SB), 1, $-4 + MOVQ RARG, CR0 + RET + +TEXT getcr2(SB), 1, $-4 /* #PF Linear Address */ + MOVQ CR2, AX + RET + +TEXT getcr3(SB), 1, $-4 /* PML4 Base */ + MOVQ CR3, AX + RET + +TEXT putcr3(SB), 1, $-4 + MOVQ RARG, CR3 + RET + +TEXT getcr4(SB), 1, $-4 /* Extensions */ + MOVQ CR4, AX + RET + +TEXT putcr4(SB), 1, $-4 + MOVQ RARG, CR4 + RET + +TEXT mb386(SB), 1, $-4 /* hack */ +TEXT mb586(SB), 1, $-4 + XORL AX, AX + CPUID + RET + +/* + * BIOS32. + */ +TEXT bios32call(SB), 1, $-1 + XORL AX, AX + INCL AX + RET + +/* + * Basic timing loop to determine CPU frequency. + */ +TEXT aamloop(SB), 1, $-4 + MOVL RARG, CX +_aamloop: + LOOP _aamloop + RET + +TEXT _cycles(SB), 1, $-4 /* time stamp counter */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET + +TEXT rdmsr(SB), 1, $-4 /* Model-Specific Register */ + MOVL RARG, CX + MOVQ $0, BP +TEXT _rdmsrinst(SB), $0 + RDMSR + MOVQ vlong+8(FP), CX /* &vlong */ + MOVL AX, 0(CX) /* lo */ + MOVL DX, 4(CX) /* hi */ + MOVQ BP, AX /* BP set to -1 if traped */ + RET + +TEXT wrmsr(SB), 1, $-4 + MOVL RARG, CX + MOVL lo+8(FP), AX + MOVL hi+12(FP), DX + MOVQ $0, BP +TEXT _wrmsrinst(SB), $0 + WRMSR + MOVQ BP, AX /* BP set to -1 if traped */ + RET + +TEXT invlpg(SB), 1, $-4 /* INVLPG va+0(FP) */ + MOVQ RARG, va+0(FP) + + INVLPG va+0(FP) + + RET + +TEXT wbinvd(SB), 1, $-4 + WBINVD + RET + +/* + * Serialisation. + */ +TEXT lfence(SB), 1, $-4 + LFENCE + RET + +TEXT mfence(SB), 1, $-4 + MFENCE + RET + +TEXT sfence(SB), 1, $-4 + SFENCE + RET + +/* + * Note: CLI and STI are not serialising instructions. + * Is that assumed anywhere? + */ +TEXT splhi(SB), 1, $-4 +_splhi: + PUSHFQ + POPQ AX + TESTQ $0x200, AX /* 0x200 - Interrupt Flag */ + JZ _alreadyhi /* use CMOVLEQ etc. here? */ + + MOVQ (SP), BX + MOVQ BX, 8(RMACH) /* save PC in m->splpc */ + +_alreadyhi: + CLI + RET + +TEXT spllo(SB), 1, $-4 +_spllo: + PUSHFQ + POPQ AX + TESTQ $0x200, AX /* 0x200 - Interrupt Flag */ + JNZ _alreadylo /* use CMOVLEQ etc. here? */ + + MOVQ $0, 8(RMACH) /* clear m->splpc */ + +_alreadylo: + STI + RET + +TEXT splx(SB), 1, $-4 + TESTQ $0x200, RARG /* 0x200 - Interrupt Flag */ + JNZ _spllo + JMP _splhi + +TEXT spldone(SB), 1, $-4 + RET + +TEXT islo(SB), 1, $-4 + PUSHFQ + POPQ AX + ANDQ $0x200, AX /* 0x200 - Interrupt Flag */ + RET + +/* + * Synchronisation + */ +TEXT ainc8(SB), 1, $-4 + XORL AX, AX + INCL AX + LOCK; XADDB AX, (RARG) +/* BOTCH INCL AX */ + RET + +TEXT _xinc(SB), 1, $-4 /* int _inc(long*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX /* overflow if -ve or 0 */ + JGT _return +_trap: + XORQ BX, BX + MOVQ (BX), BX /* over under sideways down */ +_return: + RET + +TEXT _xdec(SB), 1, $-4 /* int _dec(long*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX /* underflow if -ve */ + JLT _trap + RET + +TEXT tas(SB), 1, $-4 + MOVL $0xdeaddead, AX + XCHGL AX, (RARG) /* */ + RET + +TEXT fas64(SB), 1, $-4 + MOVQ p+8(FP), AX + LOCK; XCHGQ AX, (RARG) /* */ + RET + +TEXT cmpswap486(SB), 1, $-4 +TEXT cas(SB), 1, $-4 + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +TEXT cas64(SB), 1, $-4 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +/* + * Label consists of a stack pointer and a programme counter + */ +TEXT gotolabel(SB), 1, $-4 + MOVQ 0(RARG), SP /* restore SP */ + MOVQ 8(RARG), AX /* put return PC on the stack */ + MOVQ AX, 0(SP) + MOVL $1, AX /* return 1 */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVQ SP, 0(RARG) /* store SP */ + MOVQ 0(SP), BX /* store return PC */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT idle(SB), $0 +_idle: + STI + HLT + JMP _idle + +TEXT halt(SB), 1, $-4 + HLT + RET + +/* + * SIMD Floating Point. + * Note: for x87 instructions which have both a 'wait' and 'nowait' version, + * 8a only knows the 'wait' mnemonic but does NOT insertthe WAIT prefix byte + * (i.e. they act like their FNxxx variations) so WAIT instructions must be + * explicitly placed in the code if necessary. + */ +TEXT _clts(SB), 1, $-4 + CLTS + RET + +TEXT _fldcw(SB), 1, $-4 /* Load x87 FPU Control Word */ + MOVQ RARG, cw+0(FP) + FLDCW cw+0(FP) + RET + +TEXT _fnclex(SB), 1, $-4 + FCLEX + RET + +TEXT _fninit(SB), 1, $-4 + FINIT /* no WAIT */ + RET + +TEXT _fxrstor(SB), 1, $-4 + FXRSTOR64 (RARG) + RET + +TEXT _fxsave(SB), 1, $-4 + FXSAVE64 (RARG) + RET + +TEXT _fwait(SB), 1, $-4 + WAIT + RET + +TEXT _ldmxcsr(SB), 1, $-4 /* Load MXCSR */ + MOVQ RARG, mxcsr+0(FP) + LDMXCSR mxcsr+0(FP) + RET + +TEXT _stts(SB), 1, $-4 + MOVQ CR0, AX + ORQ $8, AX /* Ts */ + MOVQ AX, CR0 + RET + +TEXT mul64fract(SB), 1, $-4 + MOVQ a+8(FP), AX + MULQ b+16(FP) /* a*b */ + SHRQ $32, AX:DX + MOVQ AX, (RARG) + RET + +#define RDRANDAX BYTE $0x0f; BYTE $0xc7; BYTE $0xf0 +#define RDRAND64AX BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0xf0 + +TEXT rdrand32(SB), $-4 +loop32: + RDRANDAX + JCC loop32 + RET + +TEXT rdrand64(SB), $-4 +loop64: + RDRAND64AX + JCC loop64 + RET + +TEXT rdrandbuf(SB), $0 + MOVQ RARG, DX + + MOVLQZX cnt+8(FP), CX + SHRQ $3, CX +eights: + CMPL CX, $0 + JLE f1 + CALL rdrand64(SB) + MOVQ AX, 0(DX) + ADDQ $8, DX + SUBL $1, CX + JMP eights + +f1: + MOVLQZX cnt+8(FP), CX + ANDL $7, CX + SHRQ $2, CX +fours: + CMPL CX, $0 + JLE f2 + CALL rdrand32(SB) + MOVL AX, 0(DX) + ADDQ $4, DX + SUBL $1, CX + JMP fours + +f2: + MOVLQZX cnt+8(FP), CX + ANDL $3, CX +ones: + CMPL CX, $0 + JLE f3 + CALL rdrand32(SB) + MOVB AX, 0(DX) + ADDQ $1, DX + SUBL $1, CX + JMP ones + +f3: + RET + +/* + */ +TEXT touser(SB), 1, $-4 + CLI + SWAPGS + MOVQ $UDSEL, AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + + MOVQ $(UTZERO+0x28), CX /* ip */ + MOVQ $0x200, R11 /* flags */ + + MOVQ RARG, SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +/* + */ +TEXT syscallentry(SB), 1, $-4 + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* m->proc */ + MOVQ SP, R13 + MOVQ 16(RUSER), SP /* m->proc->kstack */ + ADDQ $KSTACK, SP + PUSHQ $UDSEL /* old stack segment */ + PUSHQ R13 /* old sp */ + PUSHQ R11 /* old flags */ + PUSHQ $UESEL /* old code segment */ + PUSHQ CX /* old ip */ + + SUBQ $(17*8), SP /* unsaved registers */ + PUSHQ RARG /* system call number */ + + MOVW $UDSEL, (15*8+0)(SP) + MOVW ES, (15*8+2)(SP) + MOVW FS, (15*8+4)(SP) + MOVW GS, (15*8+6)(SP) + + MOVQ SP, RARG + PUSHQ SP /* Ureg* */ + CALL syscall(SB) + +TEXT forkret(SB), 1, $-4 + MOVQ 8(SP), AX /* Ureg.ax */ + MOVQ (8+6*8)(SP), BP /* Ureg.bp */ + ADDQ $(16*8), SP /* registers + arguments */ + + CLI + SWAPGS + MOVW 0(SP), DS + MOVW 2(SP), ES + MOVW 4(SP), FS + MOVW 6(SP), GS + + MOVQ 24(SP), CX /* ip */ + MOVQ 40(SP), R11 /* flags */ + + MOVQ 48(SP), SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +/* + * Interrupt/exception handling. + */ + +TEXT _strayintr(SB), 1, $-4 /* no error code pushed */ + PUSHQ AX /* save AX */ + MOVQ 8(SP), AX /* vectortable(SB) PC */ + JMP _intrcommon + +TEXT _strayintrx(SB), 1, $-4 /* error code pushed */ + XCHGQ AX, (SP) +_intrcommon: + MOVBQZX (AX), AX + XCHGQ AX, (SP) + + SUBQ $24, SP /* R1[45], [DEFG]S */ + CMPW 48(SP), $KESEL /* old CS */ + JEQ _intrnested + + MOVQ RUSER, 0(SP) + MOVQ RMACH, 8(SP) + MOVW DS, 16(SP) + MOVW ES, 18(SP) + MOVW FS, 20(SP) + MOVW GS, 22(SP) + + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* up */ + +_intrnested: + PUSHQ R13 + PUSHQ R12 + PUSHQ R11 + PUSHQ R10 + PUSHQ R9 + PUSHQ R8 + PUSHQ BP + PUSHQ DI + PUSHQ SI + PUSHQ DX + PUSHQ CX + PUSHQ BX + PUSHQ AX + + MOVQ SP, RARG + PUSHQ SP + CALL trap(SB) + + POPQ AX + + POPQ AX + POPQ BX + POPQ CX + POPQ DX + POPQ SI + POPQ DI + POPQ BP + POPQ R8 + POPQ R9 + POPQ R10 + POPQ R11 + POPQ R12 + POPQ R13 + + CMPQ 48(SP), $KESEL + JEQ _iretnested + + SWAPGS + MOVW 22(SP), GS + MOVW 20(SP), FS + MOVW 18(SP), ES + MOVW 16(SP), DS + MOVQ 8(SP), RMACH + MOVQ 0(SP), RUSER + +_iretnested: + ADDQ $40, SP + IRETQ + +TEXT vectortable(SB), $0 + CALL _strayintr(SB); BYTE $0x00 /* divide error */ + CALL _strayintr(SB); BYTE $0x01 /* debug exception */ + CALL _strayintr(SB); BYTE $0x02 /* NMI interrupt */ + CALL _strayintr(SB); BYTE $0x03 /* breakpoint */ + CALL _strayintr(SB); BYTE $0x04 /* overflow */ + CALL _strayintr(SB); BYTE $0x05 /* bound */ + CALL _strayintr(SB); BYTE $0x06 /* invalid opcode */ + CALL _strayintr(SB); BYTE $0x07 /* no coprocessor available */ + CALL _strayintrx(SB); BYTE $0x08 /* double fault */ + CALL _strayintr(SB); BYTE $0x09 /* coprocessor segment overflow */ + CALL _strayintrx(SB); BYTE $0x0A /* invalid TSS */ + CALL _strayintrx(SB); BYTE $0x0B /* segment not available */ + CALL _strayintrx(SB); BYTE $0x0C /* stack exception */ + CALL _strayintrx(SB); BYTE $0x0D /* general protection error */ + CALL _strayintrx(SB); BYTE $0x0E /* page fault */ + CALL _strayintr(SB); BYTE $0x0F /* */ + CALL _strayintr(SB); BYTE $0x10 /* coprocessor error */ + CALL _strayintrx(SB); BYTE $0x11 /* alignment check */ + CALL _strayintr(SB); BYTE $0x12 /* machine check */ + CALL _strayintr(SB); BYTE $0x13 + CALL _strayintr(SB); BYTE $0x14 + CALL _strayintr(SB); BYTE $0x15 + CALL _strayintr(SB); BYTE $0x16 + CALL _strayintr(SB); BYTE $0x17 + CALL _strayintr(SB); BYTE $0x18 + CALL _strayintr(SB); BYTE $0x19 + CALL _strayintr(SB); BYTE $0x1A + CALL _strayintr(SB); BYTE $0x1B + CALL _strayintr(SB); BYTE $0x1C + CALL _strayintr(SB); BYTE $0x1D + CALL _strayintr(SB); BYTE $0x1E + CALL _strayintr(SB); BYTE $0x1F + CALL _strayintr(SB); BYTE $0x20 /* VectorLAPIC */ + CALL _strayintr(SB); BYTE $0x21 + CALL _strayintr(SB); BYTE $0x22 + CALL _strayintr(SB); BYTE $0x23 + CALL _strayintr(SB); BYTE $0x24 + CALL _strayintr(SB); BYTE $0x25 + CALL _strayintr(SB); BYTE $0x26 + CALL _strayintr(SB); BYTE $0x27 + CALL _strayintr(SB); BYTE $0x28 + CALL _strayintr(SB); BYTE $0x29 + CALL _strayintr(SB); BYTE $0x2A + CALL _strayintr(SB); BYTE $0x2B + CALL _strayintr(SB); BYTE $0x2C + CALL _strayintr(SB); BYTE $0x2D + CALL _strayintr(SB); BYTE $0x2E + CALL _strayintr(SB); BYTE $0x2F + CALL _strayintr(SB); BYTE $0x30 + CALL _strayintr(SB); BYTE $0x31 + CALL _strayintr(SB); BYTE $0x32 + CALL _strayintr(SB); BYTE $0x33 + CALL _strayintr(SB); BYTE $0x34 + CALL _strayintr(SB); BYTE $0x35 + CALL _strayintr(SB); BYTE $0x36 + CALL _strayintr(SB); BYTE $0x37 + CALL _strayintr(SB); BYTE $0x38 + CALL _strayintr(SB); BYTE $0x39 + CALL _strayintr(SB); BYTE $0x3A + CALL _strayintr(SB); BYTE $0x3B + CALL _strayintr(SB); BYTE $0x3C + CALL _strayintr(SB); BYTE $0x3D + CALL _strayintr(SB); BYTE $0x3E + CALL _strayintr(SB); BYTE $0x3F + CALL _strayintr(SB); BYTE $0x40 /* was VectorSYSCALL */ + CALL _strayintr(SB); BYTE $0x41 + CALL _strayintr(SB); BYTE $0x42 + CALL _strayintr(SB); BYTE $0x43 + CALL _strayintr(SB); BYTE $0x44 + CALL _strayintr(SB); BYTE $0x45 + CALL _strayintr(SB); BYTE $0x46 + CALL _strayintr(SB); BYTE $0x47 + CALL _strayintr(SB); BYTE $0x48 + CALL _strayintr(SB); BYTE $0x49 + CALL _strayintr(SB); BYTE $0x4A + CALL _strayintr(SB); BYTE $0x4B + CALL _strayintr(SB); BYTE $0x4C + CALL _strayintr(SB); BYTE $0x4D + CALL _strayintr(SB); BYTE $0x4E + CALL _strayintr(SB); BYTE $0x4F + CALL _strayintr(SB); BYTE $0x50 + CALL _strayintr(SB); BYTE $0x51 + CALL _strayintr(SB); BYTE $0x52 + CALL _strayintr(SB); BYTE $0x53 + CALL _strayintr(SB); BYTE $0x54 + CALL _strayintr(SB); BYTE $0x55 + CALL _strayintr(SB); BYTE $0x56 + CALL _strayintr(SB); BYTE $0x57 + CALL _strayintr(SB); BYTE $0x58 + CALL _strayintr(SB); BYTE $0x59 + CALL _strayintr(SB); BYTE $0x5A + CALL _strayintr(SB); BYTE $0x5B + CALL _strayintr(SB); BYTE $0x5C + CALL _strayintr(SB); BYTE $0x5D + CALL _strayintr(SB); BYTE $0x5E + CALL _strayintr(SB); BYTE $0x5F + CALL _strayintr(SB); BYTE $0x60 + CALL _strayintr(SB); BYTE $0x61 + CALL _strayintr(SB); BYTE $0x62 + CALL _strayintr(SB); BYTE $0x63 + CALL _strayintr(SB); BYTE $0x64 + CALL _strayintr(SB); BYTE $0x65 + CALL _strayintr(SB); BYTE $0x66 + CALL _strayintr(SB); BYTE $0x67 + CALL _strayintr(SB); BYTE $0x68 + CALL _strayintr(SB); BYTE $0x69 + CALL _strayintr(SB); BYTE $0x6A + CALL _strayintr(SB); BYTE $0x6B + CALL _strayintr(SB); BYTE $0x6C + CALL _strayintr(SB); BYTE $0x6D + CALL _strayintr(SB); BYTE $0x6E + CALL _strayintr(SB); BYTE $0x6F + CALL _strayintr(SB); BYTE $0x70 + CALL _strayintr(SB); BYTE $0x71 + CALL _strayintr(SB); BYTE $0x72 + CALL _strayintr(SB); BYTE $0x73 + CALL _strayintr(SB); BYTE $0x74 + CALL _strayintr(SB); BYTE $0x75 + CALL _strayintr(SB); BYTE $0x76 + CALL _strayintr(SB); BYTE $0x77 + CALL _strayintr(SB); BYTE $0x78 + CALL _strayintr(SB); BYTE $0x79 + CALL _strayintr(SB); BYTE $0x7A + CALL _strayintr(SB); BYTE $0x7B + CALL _strayintr(SB); BYTE $0x7C + CALL _strayintr(SB); BYTE $0x7D + CALL _strayintr(SB); BYTE $0x7E + CALL _strayintr(SB); BYTE $0x7F + CALL _strayintr(SB); BYTE $0x80 /* Vector[A]PIC */ + CALL _strayintr(SB); BYTE $0x81 + CALL _strayintr(SB); BYTE $0x82 + CALL _strayintr(SB); BYTE $0x83 + CALL _strayintr(SB); BYTE $0x84 + CALL _strayintr(SB); BYTE $0x85 + CALL _strayintr(SB); BYTE $0x86 + CALL _strayintr(SB); BYTE $0x87 + CALL _strayintr(SB); BYTE $0x88 + CALL _strayintr(SB); BYTE $0x89 + CALL _strayintr(SB); BYTE $0x8A + CALL _strayintr(SB); BYTE $0x8B + CALL _strayintr(SB); BYTE $0x8C + CALL _strayintr(SB); BYTE $0x8D + CALL _strayintr(SB); BYTE $0x8E + CALL _strayintr(SB); BYTE $0x8F + CALL _strayintr(SB); BYTE $0x90 + CALL _strayintr(SB); BYTE $0x91 + CALL _strayintr(SB); BYTE $0x92 + CALL _strayintr(SB); BYTE $0x93 + CALL _strayintr(SB); BYTE $0x94 + CALL _strayintr(SB); BYTE $0x95 + CALL _strayintr(SB); BYTE $0x96 + CALL _strayintr(SB); BYTE $0x97 + CALL _strayintr(SB); BYTE $0x98 + CALL _strayintr(SB); BYTE $0x99 + CALL _strayintr(SB); BYTE $0x9A + CALL _strayintr(SB); BYTE $0x9B + CALL _strayintr(SB); BYTE $0x9C + CALL _strayintr(SB); BYTE $0x9D + CALL _strayintr(SB); BYTE $0x9E + CALL _strayintr(SB); BYTE $0x9F + CALL _strayintr(SB); BYTE $0xA0 + CALL _strayintr(SB); BYTE $0xA1 + CALL _strayintr(SB); BYTE $0xA2 + CALL _strayintr(SB); BYTE $0xA3 + CALL _strayintr(SB); BYTE $0xA4 + CALL _strayintr(SB); BYTE $0xA5 + CALL _strayintr(SB); BYTE $0xA6 + CALL _strayintr(SB); BYTE $0xA7 + CALL _strayintr(SB); BYTE $0xA8 + CALL _strayintr(SB); BYTE $0xA9 + CALL _strayintr(SB); BYTE $0xAA + CALL _strayintr(SB); BYTE $0xAB + CALL _strayintr(SB); BYTE $0xAC + CALL _strayintr(SB); BYTE $0xAD + CALL _strayintr(SB); BYTE $0xAE + CALL _strayintr(SB); BYTE $0xAF + CALL _strayintr(SB); BYTE $0xB0 + CALL _strayintr(SB); BYTE $0xB1 + CALL _strayintr(SB); BYTE $0xB2 + CALL _strayintr(SB); BYTE $0xB3 + CALL _strayintr(SB); BYTE $0xB4 + CALL _strayintr(SB); BYTE $0xB5 + CALL _strayintr(SB); BYTE $0xB6 + CALL _strayintr(SB); BYTE $0xB7 + CALL _strayintr(SB); BYTE $0xB8 + CALL _strayintr(SB); BYTE $0xB9 + CALL _strayintr(SB); BYTE $0xBA + CALL _strayintr(SB); BYTE $0xBB + CALL _strayintr(SB); BYTE $0xBC + CALL _strayintr(SB); BYTE $0xBD + CALL _strayintr(SB); BYTE $0xBE + CALL _strayintr(SB); BYTE $0xBF + CALL _strayintr(SB); BYTE $0xC0 + CALL _strayintr(SB); BYTE $0xC1 + CALL _strayintr(SB); BYTE $0xC2 + CALL _strayintr(SB); BYTE $0xC3 + CALL _strayintr(SB); BYTE $0xC4 + CALL _strayintr(SB); BYTE $0xC5 + CALL _strayintr(SB); BYTE $0xC6 + CALL _strayintr(SB); BYTE $0xC7 + CALL _strayintr(SB); BYTE $0xC8 + CALL _strayintr(SB); BYTE $0xC9 + CALL _strayintr(SB); BYTE $0xCA + CALL _strayintr(SB); BYTE $0xCB + CALL _strayintr(SB); BYTE $0xCC + CALL _strayintr(SB); BYTE $0xCD + CALL _strayintr(SB); BYTE $0xCE + CALL _strayintr(SB); BYTE $0xCF + CALL _strayintr(SB); BYTE $0xD0 + CALL _strayintr(SB); BYTE $0xD1 + CALL _strayintr(SB); BYTE $0xD2 + CALL _strayintr(SB); BYTE $0xD3 + CALL _strayintr(SB); BYTE $0xD4 + CALL _strayintr(SB); BYTE $0xD5 + CALL _strayintr(SB); BYTE $0xD6 + CALL _strayintr(SB); BYTE $0xD7 + CALL _strayintr(SB); BYTE $0xD8 + CALL _strayintr(SB); BYTE $0xD9 + CALL _strayintr(SB); BYTE $0xDA + CALL _strayintr(SB); BYTE $0xDB + CALL _strayintr(SB); BYTE $0xDC + CALL _strayintr(SB); BYTE $0xDD + CALL _strayintr(SB); BYTE $0xDE + CALL _strayintr(SB); BYTE $0xDF + CALL _strayintr(SB); BYTE $0xE0 + CALL _strayintr(SB); BYTE $0xE1 + CALL _strayintr(SB); BYTE $0xE2 + CALL _strayintr(SB); BYTE $0xE3 + CALL _strayintr(SB); BYTE $0xE4 + CALL _strayintr(SB); BYTE $0xE5 + CALL _strayintr(SB); BYTE $0xE6 + CALL _strayintr(SB); BYTE $0xE7 + CALL _strayintr(SB); BYTE $0xE8 + CALL _strayintr(SB); BYTE $0xE9 + CALL _strayintr(SB); BYTE $0xEA + CALL _strayintr(SB); BYTE $0xEB + CALL _strayintr(SB); BYTE $0xEC + CALL _strayintr(SB); BYTE $0xED + CALL _strayintr(SB); BYTE $0xEE + CALL _strayintr(SB); BYTE $0xEF + CALL _strayintr(SB); BYTE $0xF0 + CALL _strayintr(SB); BYTE $0xF1 + CALL _strayintr(SB); BYTE $0xF2 + CALL _strayintr(SB); BYTE $0xF3 + CALL _strayintr(SB); BYTE $0xF4 + CALL _strayintr(SB); BYTE $0xF5 + CALL _strayintr(SB); BYTE $0xF6 + CALL _strayintr(SB); BYTE $0xF7 + CALL _strayintr(SB); BYTE $0xF8 + CALL _strayintr(SB); BYTE $0xF9 + CALL _strayintr(SB); BYTE $0xFA + CALL _strayintr(SB); BYTE $0xFB + CALL _strayintr(SB); BYTE $0xFC + CALL _strayintr(SB); BYTE $0xFD + CALL _strayintr(SB); BYTE $0xFE + CALL _strayintr(SB); BYTE $0xFF diff --git a/sys/src/9/pc64/main.c b/sys/src/9/pc64/main.c new file mode 100644 index 000000000..8c07566c7 --- /dev/null +++ b/sys/src/9/pc64/main.c @@ -0,0 +1,742 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "tos.h" +#include "ureg.h" +#include "init.h" +#include "pool.h" + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 3584 bytes available at CONFADDR. + */ +#define BOOTLINE ((char*)CONFADDR) +#define BOOTLINELEN 64 +#define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (4096-0x200-BOOTLINELEN) +#define MAXCONF 64 + +Conf conf; +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; +int delaylink; +uchar *sp; /* user stack of init proc */ + +extern void (*i8237alloc)(void); + +static void +options(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + // multibootargs(); + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getfields(cp, line, MAXCONF, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +void +confinit(void) +{ + char *p; + int i, userpcnt; + ulong kpages; + + if(p = getconf("*kernelpercent")) + userpcnt = 100 - strtol(p, 0, 0); + else + userpcnt = 0; + + conf.npage = 0; + for(i=0; i 2000) + conf.nproc = 2000; + conf.nimage = 200; + conf.nswap = conf.nproc*80; + conf.nswppo = 4096; + + if(cpuserver) { + if(userpcnt < 10) + userpcnt = 70; + kpages = conf.npage - (conf.npage*userpcnt)/100; + + /* + * Hack for the big boys. Only good while physmem < 4GB. + * Give the kernel fixed max + enough to allocate the + * page pool. + * This is an overestimate as conf.upages < conf.npages. + * The patch of nimage is a band-aid, scanning the whole + * page list in imagereclaim just takes too long. + */ + if(getconf("*imagemaxmb") == 0) + if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){ + kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG; + conf.nimage = 2000; + kpages += (conf.nproc*KSTACK)/BY2PG; + } + } else { + if(userpcnt < 10) { + if(conf.npage*BY2PG < 16*MB) + userpcnt = 50; + else + userpcnt = 60; + } + kpages = conf.npage - (conf.npage*userpcnt)/100; + + /* + * Make sure terminals with low memory get at least + * 4MB on the first Image chunk allocation. + */ + if(conf.npage*BY2PG < 16*MB) + imagmem->minarena = 4*MB; + } + + /* + * can't go past the end of virtual memory. + */ + if(kpages > ((uintptr)-KZERO)/BY2PG) + kpages = ((uintptr)-KZERO)/BY2PG; + + conf.upages = conf.npage - kpages; + conf.ialloc = (kpages/2)*BY2PG; + + /* + * Guess how much is taken by the large permanent + * datastructures. Mntcache and Mntrpc are not accounted for + * (probably ~300KB). + */ + kpages *= BY2PG; + kpages -= conf.upages*sizeof(Page) + + conf.nproc*sizeof(Proc) + + conf.nimage*sizeof(Image) + + conf.nswap + + conf.nswppo*sizeof(Page*); + mainmem->maxsize = kpages; + + /* + * the dynamic allocation will balance the load properly, + * hopefully. be careful with 32-bit overflow. + */ + imagmem->maxsize = kpages - (kpages/10); + if(p = getconf("*imagemaxmb")){ + imagmem->maxsize = strtol(p, nil, 0)*MB; + if(imagmem->maxsize > mainmem->maxsize) + imagmem->maxsize = mainmem->maxsize; + } +} + + +void +machinit(void) +{ + int machno; + Segdesc *gdt; + uintptr *pml4; + + machno = m->machno; + pml4 = m->pml4; + gdt = m->gdt; + memset(m, 0, sizeof(Mach)); + m->machno = machno; + m->pml4 = pml4; + m->gdt = gdt; + m->perf.period = 1; + + /* + * For polled uart output at boot, need + * a default delay constant. 100000 should + * be enough for a while. Cpuidentify will + * calculate the real value later. + */ + m->loopconst = 100000; +} + +void +mach0init(void) +{ + conf.nmach = 1; + + MACHP(0) = (Mach*)CPU0MACH; + + m->machno = 0; + m->pml4 = (u64int*)CPU0PML4; + m->gdt = (Segdesc*)CPU0GDT; + + machinit(); + + active.machs = 1; + active.exiting = 0; +} + + +uchar * +pusharg(char *p) +{ + int n; + + n = strlen(p)+1; + sp -= n; + memmove(sp, p, n); + return sp; +} + +void +bootargs(void *base) +{ + int i, ac; + uchar *av[32]; + uchar **lsp; + char *cp = BOOTLINE; + char buf[64]; + + sp = (uchar*)base + BY2PG - sizeof(Tos); + + ac = 0; + av[ac++] = pusharg("boot"); + + /* when boot is changed to only use rc, this code can go away */ + cp[BOOTLINELEN-1] = 0; + buf[0] = 0; + if(strncmp(cp, "fd", 2) == 0){ + sprint(buf, "local!#f/fd%lddisk", strtol(cp+2, 0, 0)); + av[ac++] = pusharg(buf); + } else if(strncmp(cp, "sd", 2) == 0){ + sprint(buf, "local!#S/sd%c%c/fs", *(cp+2), *(cp+3)); + av[ac++] = pusharg(buf); + } else if(strncmp(cp, "ether", 5) == 0) + av[ac++] = pusharg("-n"); + + /* 8 byte word align stack */ + sp = (uchar*)((uintptr)sp & ~7); + + /* build argc, argv on stack */ + sp -= (ac+1)*sizeof(sp); + lsp = (uchar**)sp; + for(i = 0; i < ac; i++) + lsp[i] = av[i] + ((uintptr)(USTKTOP - BY2PG) - (uintptr)base); + lsp[i] = 0; + sp += (uintptr)(USTKTOP - BY2PG) - (uintptr)base; + sp -= BY2WD; +} + +void +init0(void) +{ + int i; + char buf[2*KNAMELEN]; + + up->nerrlab = 0; + + spllo(); + + /* + * These are o.k. because rootinit is null. + * Then early kproc's will have a root and dot. + */ + up->slash = namec("#/", Atodir, 0, 0); + pathclose(up->slash->path); + up->slash->path = newpath("/"); + up->dot = cclone(up->slash); + + chandevinit(); + + if(!waserror()){ + snprint(buf, sizeof(buf), "%s %s", arch->id, conffile); + ksetenv("terminal", buf, 0); + ksetenv("cputype", "amd64", 0); + if(cpuserver) + ksetenv("service", "cpu", 0); + else + ksetenv("service", "terminal", 0); + for(i = 0; i < nconf; i++){ + if(confname[i][0] != '*') + ksetenv(confname[i], confval[i], 0); + ksetenv(confname[i], confval[i], 1); + } + poperror(); + } + kproc("alarm", alarmkproc, 0); + + touser(sp); +} + +void +userinit(void) +{ + void *v; + Proc *p; + Segment *s; + Page *pg; + + p = newproc(); + p->pgrp = newpgrp(); + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + p->fgrp = dupfgrp(nil); + p->rgrp = newrgrp(); + p->procmode = 0640; + + kstrdup(&eve, ""); + kstrdup(&p->text, "*init*"); + kstrdup(&p->user, eve); + + procsetup(p); + + /* + * Kernel Stack + * + * N.B. make sure there's enough space for syscall to check + * for valid args and + * 8 bytes for gotolabel's return PC + */ + p->sched.pc = (uintptr)init0; + p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD); + + /* + * User Stack + */ + s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG); + p->seg[SSEG] = s; + pg = newpage(0, 0, USTKTOP-BY2PG); + v = kmap(pg); + memset(v, 0, BY2PG); + segpage(s, pg); + bootargs(v); + kunmap(v); + + /* + * Text + */ + s = newseg(SG_TEXT, UTZERO, 1); + s->flushme++; + p->seg[TSEG] = s; + pg = newpage(0, 0, UTZERO); + memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); + segpage(s, pg); + v = kmap(pg); + memset(v, 0, BY2PG); + memmove(v, initcode, sizeof initcode); + kunmap(v); + + ready(p); +} + +void +main() +{ + mach0init(); + options(); + ioinit(); + // i8250console(); + quotefmtinstall(); + screeninit(); + trapinit0(); + kbdinit(); + i8253init(); + cpuidentify(); + meminit(); + confinit(); + archinit(); + xinit(); + if(i8237alloc != nil) + i8237alloc(); + trapinit(); + printinit(); + cpuidprint(); + mmuinit(); + if(arch->intrinit) + arch->intrinit(); + timersinit(); + mathinit(); + kbdenable(); + if(arch->clockenable) + arch->clockenable(); + procinit0(); + initseg(); + if(delaylink){ + bootlinks(); + pcimatch(0, 0, 0); + }else + links(); + conf.monitor = 1; + chandevreset(); + pageinit(); + swapinit(); + userinit(); + active.thunderbirdsarego = 1; + schedinit(); +} + +void +exit(int) +{ + print("exit\n"); + splhi(); + for(;;); +} + +void +reboot(void*, void*, ulong) +{ + exit(0); +} + +void +idlehands(void) +{ + halt(); +} + +/* + * SIMD Floating Point. + * Assembler support to get at the individual instructions + * is in l.s. + * There are opportunities to be lazier about saving and + * restoring the state and allocating the storage needed. + */ +extern void _clts(void); +extern void _fldcw(u16int); +extern void _fnclex(void); +extern void _fninit(void); +extern void _fxrstor(Fxsave*); +extern void _fxsave(Fxsave*); +extern void _fwait(void); +extern void _ldmxcsr(u32int); +extern void _stts(void); + +/* + * not used, AMD64 mandated SSE + */ +void +fpx87save(FPsave*) +{ +} +void +fpx87restore(FPsave*) +{ +} + +void +fpssesave(FPsave *fps) +{ + Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign); + + _fxsave(fx); + _stts(); + if(fx != (Fxsave*)fps) + memmove((Fxsave*)fps, fx, sizeof(Fxsave)); +} +void +fpsserestore(FPsave *fps) +{ + Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign); + + if(fx != (Fxsave*)fps) + memmove(fx, (Fxsave*)fps, sizeof(Fxsave)); + _clts(); + _fxrstor(fx); +} + +static char* mathmsg[] = +{ + nil, /* handled below */ + "denormalized operand", + "division by zero", + "numeric overflow", + "numeric underflow", + "precision loss", +}; + +static void +mathnote(ulong status, uintptr pc) +{ + char *msg, note[ERRMAX]; + int i; + + /* + * Some attention should probably be paid here to the + * exception masks and error summary. + */ + msg = "unknown exception"; + for(i = 1; i <= 5; i++){ + if(!((1<fpsave); + up->fpstate = FPinactive; + mathnote(up->fpsave.fsw, up->fpsave.rip); +} + +/* + * math coprocessor emulation fault + */ +static void +mathemu(Ureg *ureg, void*) +{ + ulong status, control; + + if(up->fpstate & FPillegal){ + /* someone did floating point in a note handler */ + postnote(up, 1, "sys: floating point in note handler", NDebug); + return; + } + switch(up->fpstate){ + case FPinit: + /* + * A process tries to use the FPU for the + * first time and generates a 'device not available' + * exception. + * Turn the FPU on and initialise it for use. + * Set the precision and mask the exceptions + * we don't care about from the generic Mach value. + */ + _clts(); + _fninit(); + _fwait(); + _fldcw(0x0232); + /* + * TODO: sse exceptions + * _ldmxcsr(m->mxcsr); + * + */ + up->fpstate = FPactive; + break; + case FPinactive: + /* + * Before restoring the state, check for any pending + * exceptions, there's no way to restore the state without + * generating an unmasked exception. + * More attention should probably be paid here to the + * exception masks and error summary. + */ + status = up->fpsave.fsw; + control = up->fpsave.fcw; + if((status & ~control) & 0x07F){ + mathnote(status, up->fpsave.rip); + break; + } + fprestore(&up->fpsave); + up->fpstate = FPactive; + break; + case FPactive: + panic("math emu pid %ld %s pc %#p", + up->pid, up->text, ureg->pc); + break; + } +} + +/* + * math coprocessor segment overrun + */ +static void +mathover(Ureg*, void*) +{ + pexit("math overrun", 0); +} + +void +mathinit(void) +{ + trapenable(VectorCERR, matherror, 0, "matherror"); + if(X86FAMILY(m->cpuidax) == 3) + intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror"); + trapenable(VectorCNA, mathemu, 0, "mathemu"); + trapenable(VectorCSO, mathover, 0, "mathover"); +} + +void +procsetup(Proc *p) +{ + p->fpstate = FPinit; + _stts(); + cycles(&p->kentry); + p->pcycles = -p->kentry; +} + +void +procfork(Proc *p) +{ + int s; + + p->kentry = up->kentry; + p->pcycles = -p->kentry; + + /* save floating point state */ + s = splhi(); + switch(up->fpstate & ~FPillegal){ + case FPactive: + fpsave(&up->fpsave); + up->fpstate = FPinactive; + case FPinactive: + p->fpsave = up->fpsave; + p->fpstate = FPinactive; + } + splx(s); + +} + +void +procrestore(Proc *p) +{ + uvlong t; + + if(p->kp) + return; + + cycles(&t); + p->kentry += t; + p->pcycles -= t; +} + +void +procsave(Proc *p) +{ + uvlong t; + + cycles(&t); + p->kentry -= t; + p->pcycles += t; + + if(p->fpstate == FPactive){ + if(p->state == Moribund){ + _clts(); + _fnclex(); + _stts(); + } + else{ + /* + * Fpsave() stores without handling pending + * unmasked exeptions. Postnote() can't be called + * here as sleep() already has up->rlock, so + * the handling of pending exceptions is delayed + * until the process runs again and generates an + * emulation fault to activate the FPU. + */ + fpsave(&p->fpsave); + } + p->fpstate = FPinactive; + } + + /* + * While this processor is in the scheduler, the process could run + * on another processor and exit, returning the page tables to + * the free list where they could be reallocated and overwritten. + * When this processor eventually has to get an entry from the + * trashed page tables it will crash. + * + * If there's only one processor, this can't happen. + * You might think it would be a win not to do this in that case, + * especially on VMware, but it turns out not to matter. + */ + mmuflushtlb(); +} + +int +isaconfig(char *class, int ctlrno, ISAConf *isa) +{ + char cc[32], *p; + int i; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + p = getconf(cc); + if(p == nil) + return 0; + + isa->type = ""; + isa->nopt = tokenize(p, isa->opt, NISAOPT); + for(i = 0; i < isa->nopt; i++){ + p = isa->opt[i]; + if(cistrncmp(p, "type=", 5) == 0) + isa->type = p + 5; + else if(cistrncmp(p, "port=", 5) == 0) + isa->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + isa->irq = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "dma=", 4) == 0) + isa->dma = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "mem=", 4) == 0) + isa->mem = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "size=", 5) == 0) + isa->size = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "freq=", 5) == 0) + isa->freq = strtoul(p+5, &p, 0); + } + return 1; +} diff --git a/sys/src/9/pc64/mem.h b/sys/src/9/pc64/mem.h new file mode 100644 index 000000000..4d9cfb1da --- /dev/null +++ b/sys/src/9/pc64/mem.h @@ -0,0 +1,164 @@ +/* + * Memory and machine-specific definitions. Used in C and assembler. + */ +#define KiB 1024u /* Kibi 0x0000000000000400 */ +#define MiB 1048576u /* Mebi 0x0000000000100000 */ +#define GiB 1073741824u /* Gibi 000000000040000000 */ +#define TiB 1099511627776ull /* Tebi 0x0000010000000000 */ +#define PiB 1125899906842624ull /* Pebi 0x0004000000000000 */ +#define EiB 1152921504606846976ull /* Exbi 0x1000000000000000 */ + +#define MIN(a, b) ((a) < (b)? (a): (b)) +#define MAX(a, b) ((a) > (b)? (a): (b)) + +#define ALIGNED(p, a) (!(((uintptr)(p)) & ((a)-1))) + +/* + * Sizes + */ +#define BI2BY 8 /* bits per byte */ +#define BI2WD 32 /* bits per word */ +#define BY2WD 8 /* bytes per word */ +#define BY2V 8 /* bytes per double word */ +#define BY2PG (0x1000ull) /* bytes per page */ +#define WD2PG (BY2PG/BY2WD) /* words per page */ +#define BY2XPG (2*MiB) /* bytes per big page */ +#define PGSHIFT 12 /* log(BY2PG) */ +#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1)) +#define PGROUND(s) ROUND(s, BY2PG) +#define BLOCKALIGN 8 +#define FPalign 16 + +#define MAXMACH 32 /* max # cpus system can run */ + +#define KSTACK (16*KiB) /* Size of Proc kernel stack */ + +/* + * Time + */ +#define HZ (100) /* clock frequency */ +#define MS2HZ (100/HZ) /* millisec per clock tick */ +#define TK2SEC(t) ((t)/HZ) /* ticks to seconds */ + +/* + * Address spaces. User: + */ +#define UTZERO (0x0000000000200000ull) /* first address in user text */ +#define TSTKTOP (0x00007ffffffff000ull) +#define USTKSIZE (16*MiB) /* size of user stack */ +#define USTKTOP (TSTKTOP-USTKSIZE) /* end of new stack in sysexec */ + +/* + * Address spaces. Kernel, sorted by address. + */ +#define KZERO (0xffffffff80000000ull) /* 2GB identity map of lower 2GB ram */ +#define KTZERO (KZERO+1*MiB+64*KiB) + +#define VMAP (0xffffffff00000000ull) /* 2GB identity map of upper 2GB ram */ +#define VMAPSIZE (2*GiB) + +#define KMAP (0xffffff7f00000000ull) +#define KMAPSIZE (512*GiB) + +/* + * Fundamental addresses - bottom 64kB saved for return to real mode + */ +#define CONFADDR (KZERO+0x1200ull) /* info passed from boot loader */ +#define APBOOTSTRAP (KZERO+0x3000ull) /* AP bootstrap code */ +#define IDTADDR (KZERO+0x10000ull) /* idt */ +#define REBOOTADDR (0x11000) /* reboot code - physical address */ +#define CPU0PML4 (KZERO+0x13000ull) +#define CPU0GDT (KZERO+0x17000ull) /* bootstrap processor GDT */ +#define CPU0MACH (KZERO+0x18000ull) /* Mach for bootstrap processor */ +#define CPU0END (CPU0MACH+MACHSIZE) + +#define MACHSIZE (2*KSTACK) +#define INIMAP (8*MiB) /* 4 pages; size of inital map in l.s */ + +/* + * known x86 segments (in GDT) and their selectors + */ +#define NULLSEG 0 /* null segment */ +#define KESEG 1 /* kernel executable */ +#define KDSEG 2 /* kernel data */ +#define UE32SEG 3 /* user executable 32bit */ +#define UDSEG 4 /* user data/stack */ +#define UESEG 5 /* user executable 64bit */ +#define TSSSEG 8 /* task segment (two descriptors) */ + +#define NGDT 10 /* number of GDT entries required */ + +#define SELGDT (0<<2) /* selector is in gdt */ +#define SELLDT (1<<2) /* selector is in ldt */ + +#define SELECTOR(i, t, p) (((i)<<3) | (t) | (p)) + +#define NULLSEL SELECTOR(NULLSEG, SELGDT, 0) +#define KESEL SELECTOR(KESEG, SELGDT, 0) +#define UE32SEL SELECTOR(UE32SEG, SELGDT, 3) +#define UDSEL SELECTOR(UDSEG, SELGDT, 3) +#define UESEL SELECTOR(UESEG, SELGDT, 3) +#define TSSSEL SELECTOR(TSSSEG, SELGDT, 0) + +/* + * fields in segment descriptors + */ +#define SEGDATA (0x10<<8) /* data/stack segment */ +#define SEGEXEC (0x18<<8) /* executable segment */ +#define SEGTSS (0x9<<8) /* TSS segment */ +#define SEGCG (0x0C<<8) /* call gate */ +#define SEGIG (0x0E<<8) /* interrupt gate */ +#define SEGTG (0x0F<<8) /* trap gate */ +#define SEGLDT (0x02<<8) /* local descriptor table */ +#define SEGTYPE (0x1F<<8) + +#define SEGP (1<<15) /* segment present */ +#define SEGPL(x) ((x)<<13) /* priority level */ +#define SEGB (1<<22) /* granularity 1==4k (for expand-down) */ +#define SEGD (1<<22) /* default 1==32bit (for code) */ +#define SEGE (1<<10) /* expand down */ +#define SEGW (1<<9) /* writable (for data/stack) */ +#define SEGR (1<<9) /* readable (for code) */ +#define SEGL (1<<21) /* 64 bit */ +#define SEGG (1<<23) /* granularity 1==4k (for other) */ + +/* + * virtual MMU + */ +#define PTEMAPMEM (1024*1024) +#define PTEPERTAB (PTEMAPMEM/BY2PG) +#define SEGMAPSIZE 1984 +#define SSEGMAPSIZE 16 +#define PPN(x) ((x)&~((uintptr)BY2PG-1)) + +/* + * physical MMU + */ +#define PTEVALID (1ull<<0) +#define PTEWT (1ull<<3) +#define PTEUNCACHED (1ull<<4) +#define PTEWRITE (1ull<<1) +#define PTERONLY (0ull<<1) +#define PTEKERNEL (0ull<<2) +#define PTEUSER (1ull<<2) +#define PTESIZE (1ull<<7) +#define PTEGLOBAL (1ull<<8) + +/* + * Hierarchical Page Tables. + * For example, traditional IA-32 paging structures have 2 levels, + * level 1 is the PD, and level 0 the PT pages; with IA-32e paging, + * level 3 is the PML4(!), level 2 the PDP, level 1 the PD, + * and level 0 the PT pages. The PTLX macro gives an index into the + * page-table page at level 'l' for the virtual address 'v'. + */ +#define PTSZ (4*KiB) /* page table page size */ +#define PTSHIFT 9 /* */ + +#define PTLX(v, l) (((v)>>(((l)*PTSHIFT)+PGSHIFT)) & ((1< */ +#define RUSER R14 /* up-> */ diff --git a/sys/src/9/pc64/memory.c b/sys/src/9/pc64/memory.c new file mode 100644 index 000000000..a9e9b9c28 --- /dev/null +++ b/sys/src/9/pc64/memory.c @@ -0,0 +1,720 @@ +/* + * Size memory and create the kernel page-tables on the fly while doing so. + * Called from main(), this code should only be run by the bootstrap processor. + * + * MemMin is what the bootstrap code in l.s has already mapped; + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" + +#define MEMDEBUG 0 + +enum { + MemUPA = 0, /* unbacked physical address */ + MemRAM = 1, /* physical memory */ + MemUMB = 2, /* upper memory block (<16MB) */ + MemReserved = 3, + NMemType = 4, + + KB = 1024, + + MemMin = INIMAP, +}; + +typedef struct Map Map; +struct Map { + uintptr size; + uintptr addr; +}; + +typedef struct RMap RMap; +struct RMap { + char* name; + Map* map; + Map* mapend; + + Lock; +}; + +/* + * Memory allocation tracking. + */ +static Map mapupa[16]; +static RMap rmapupa = { + "unallocated unbacked physical memory", + mapupa, + &mapupa[nelem(mapupa)-1], +}; + +static Map mapram[16]; +static RMap rmapram = { + "physical memory", + mapram, + &mapram[nelem(mapram)-1], +}; + +static Map mapumb[64]; +static RMap rmapumb = { + "upper memory block", + mapumb, + &mapumb[nelem(mapumb)-1], +}; + +static Map mapumbrw[16]; +static RMap rmapumbrw = { + "UMB device memory", + mapumbrw, + &mapumbrw[nelem(mapumbrw)-1], +}; + +void +mapprint(RMap *rmap) +{ + Map *mp; + + print("%s\n", rmap->name); + for(mp = rmap->map; mp->size; mp++) + print("\t%#p %#p (%#p)\n", mp->addr, mp->addr+mp->size, mp->size); +} + + +void +memdebug(void) +{ + ulong maxpa, maxpa1, maxpa2; + + maxpa = (nvramread(0x18)<<8)|nvramread(0x17); + maxpa1 = (nvramread(0x31)<<8)|nvramread(0x30); + maxpa2 = (nvramread(0x16)<<8)|nvramread(0x15); + print("maxpa = %luX -> %luX, maxpa1 = %luX maxpa2 = %luX\n", + maxpa, MB+maxpa*KB, maxpa1, maxpa2); + + mapprint(&rmapram); + mapprint(&rmapumb); + mapprint(&rmapumbrw); + mapprint(&rmapupa); +} + +void +mapfree(RMap* rmap, uintptr addr, uintptr size) +{ + Map *mp; + uintptr t; + + if(size <= 0) + return; + + lock(rmap); + for(mp = rmap->map; mp->addr <= addr && mp->size; mp++) + ; + + if(mp > rmap->map && (mp-1)->addr+(mp-1)->size == addr){ + (mp-1)->size += size; + if(addr+size == mp->addr){ + (mp-1)->size += mp->size; + while(mp->size){ + mp++; + (mp-1)->addr = mp->addr; + (mp-1)->size = mp->size; + } + } + } + else{ + if(addr+size == mp->addr && mp->size){ + mp->addr -= size; + mp->size += size; + } + else do{ + if(mp >= rmap->mapend){ + print("mapfree: %s: losing %#p, %#p\n", + rmap->name, addr, size); + break; + } + t = mp->addr; + mp->addr = addr; + addr = t; + t = mp->size; + mp->size = size; + mp++; + }while(size = t); + } + unlock(rmap); +} + +uintptr +mapalloc(RMap* rmap, uintptr addr, int size, int align) +{ + Map *mp; + uintptr maddr, oaddr; + + lock(rmap); + for(mp = rmap->map; mp->size; mp++){ + maddr = mp->addr; + + if(addr){ + /* + * A specific address range has been given: + * if the current map entry is greater then + * the address is not in the map; + * if the current map entry does not overlap + * the beginning of the requested range then + * continue on to the next map entry; + * if the current map entry does not entirely + * contain the requested range then the range + * is not in the map. + */ + if(maddr > addr) + break; + if(mp->size < addr - maddr) /* maddr+mp->size < addr, but no overflow */ + continue; + if(addr - maddr > mp->size - size) /* addr+size > maddr+mp->size, but no overflow */ + break; + maddr = addr; + } + + if(align > 0) + maddr = ((maddr+align-1)/align)*align; + if(mp->addr+mp->size-maddr < size) + continue; + + oaddr = mp->addr; + mp->addr = maddr+size; + mp->size -= maddr-oaddr+size; + if(mp->size == 0){ + do{ + mp++; + (mp-1)->addr = mp->addr; + }while((mp-1)->size = mp->size); + } + + unlock(rmap); + if(oaddr != maddr) + mapfree(rmap, oaddr, maddr-oaddr); + + return maddr; + } + unlock(rmap); + + return 0; +} + +/* + * Allocate from the ram map directly to make page tables. + * Called by mmuwalk during e820scan. + */ +void* +rampage(void) +{ + uintptr m; + + m = mapalloc(&rmapram, 0, BY2PG, BY2PG); + if(m == 0) + return nil; + return KADDR(m); +} + +static void +umbexclude(void) +{ + int size; + ulong addr; + char *op, *p, *rptr; + + if((p = getconf("umbexclude")) == nil) + return; + + while(p && *p != '\0' && *p != '\n'){ + op = p; + addr = strtoul(p, &rptr, 0); + if(rptr == nil || rptr == p || *rptr != '-'){ + print("umbexclude: invalid argument <%s>\n", op); + break; + } + p = rptr+1; + + size = strtoul(p, &rptr, 0) - addr + 1; + if(size <= 0){ + print("umbexclude: bad range <%s>\n", op); + break; + } + if(rptr != nil && *rptr == ',') + *rptr++ = '\0'; + p = rptr; + + mapalloc(&rmapumb, addr, size, 0); + } +} + +static void +umbscan(void) +{ + uchar *p; + + /* + * Scan the Upper Memory Blocks (0xA0000->0xF0000) for pieces + * which aren't used; they can be used later for devices which + * want to allocate some virtual address space. + * Check for two things: + * 1) device BIOS ROM. This should start with a two-byte header + * of 0x55 0xAA, followed by a byte giving the size of the ROM + * in 512-byte chunks. These ROM's must start on a 2KB boundary. + * 2) device memory. This is read-write. + * There are some assumptions: there's VGA memory at 0xA0000 and + * the VGA BIOS ROM is at 0xC0000. Also, if there's no ROM signature + * at 0xE0000 then the whole 64KB up to 0xF0000 is theoretically up + * for grabs; check anyway. + */ + p = KADDR(0xD0000); + while(p < (uchar*)KADDR(0xE0000)){ + /* + * Test for 0x55 0xAA before poking obtrusively, + * some machines (e.g. Thinkpad X20) seem to map + * something dynamic here (cardbus?) causing weird + * problems if it is changed. + */ + if(p[0] == 0x55 && p[1] == 0xAA){ + p += p[2]*512; + continue; + } + + p[0] = 0xCC; + p[2*KB-1] = 0xCC; + if(p[0] != 0xCC || p[2*KB-1] != 0xCC){ + p[0] = 0x55; + p[1] = 0xAA; + p[2] = 4; + if(p[0] == 0x55 && p[1] == 0xAA){ + p += p[2]*512; + continue; + } + if(p[0] == 0xFF && p[1] == 0xFF) + mapfree(&rmapumb, PADDR(p), 2*KB); + } + else + mapfree(&rmapumbrw, PADDR(p), 2*KB); + p += 2*KB; + } + + p = KADDR(0xE0000); + if(p[0] != 0x55 || p[1] != 0xAA){ + p[0] = 0xCC; + p[64*KB-1] = 0xCC; + if(p[0] != 0xCC && p[64*KB-1] != 0xCC) + mapfree(&rmapumb, PADDR(p), 64*KB); + } + + umbexclude(); +} + +int +checksum(void *v, int n) +{ + uchar *p, s; + + s = 0; + p = v; + while(n-- > 0) + s += *p++; + return s; +} + +static void* +sigscan(uchar* addr, int len, char* signature) +{ + int sl; + uchar *e, *p; + + e = addr+len; + sl = strlen(signature); + for(p = addr; p+sl < e; p += 16) + if(memcmp(p, signature, sl) == 0) + return p; + return nil; +} + +void* +sigsearch(char* signature) +{ + uintptr p; + uchar *bda; + void *r; + + /* + * Search for the data structure: + * 1) within the first KiB of the Extended BIOS Data Area (EBDA), or + * 2) within the last KiB of system base memory if the EBDA segment + * is undefined, or + * 3) within the BIOS ROM address space between 0xf0000 and 0xfffff + * (but will actually check 0xe0000 to 0xfffff). + */ + bda = KADDR(0x400); + if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){ + if((p = (bda[0x0f]<<8)|bda[0x0e]) != 0){ + if((r = sigscan(KADDR(p<<4), 1024, signature)) != nil) + return r; + } + } + + if((p = ((bda[0x14]<<8)|bda[0x13])*1024) != 0){ + if((r = sigscan(KADDR(p-1024), 1024, signature)) != nil) + return r; + } + /* hack for virtualbox: look in KiB below 0xa0000 */ + if((r = sigscan(KADDR(0xa0000-1024), 1024, signature)) != nil) + return r; + + return sigscan(KADDR(0xe0000), 0x20000, signature); +} + +static void +lowraminit(void) +{ + uintptr pa, x; + + /* + * Initialise the memory bank information for conventional memory + * (i.e. less than 640KB). The base is the first location after the + * bootstrap processor MMU information and the limit is obtained from + * the BIOS data area. + */ + x = PADDR(PGROUND((uintptr)end)); + pa = MemMin; + if(x > pa) + panic("kernel too big"); + mapfree(&rmapram, x, pa-x); + memset(KADDR(x), 0, pa-x); /* keep us honest */ +} + +typedef struct Emap Emap; +struct Emap +{ + int type; + uvlong base; + uvlong top; +}; +static Emap emap[128]; +int nemap; + +static int +emapcmp(const void *va, const void *vb) +{ + Emap *a, *b; + + a = (Emap*)va; + b = (Emap*)vb; + if(a->top < b->top) + return -1; + if(a->top > b->top) + return 1; + if(a->base < b->base) + return -1; + if(a->base > b->base) + return 1; + return 0; +} + +static void +map(uintptr base, uintptr len, int type) +{ + uintptr e, n, *pte, flags, maxkpa; + + /* + * Split any call crossing MemMin to make below simpler. + */ + if(base < MemMin && len > MemMin-base){ + n = MemMin - base; + map(base, n, type); + map(MemMin, len-n, type); + } + + /* + * Let lowraminit and umbscan hash out the low MemMin. + */ + if(base < MemMin) + return; + + /* + * Any non-memory below 16*MB is used as upper mem blocks. + */ + if(type == MemUPA && base < 16*MB && len > 16*MB-base){ + map(base, 16*MB-base, MemUMB); + map(16*MB, len-(16*MB-base), MemUPA); + return; + } + + /* + * Memory below CPU0END is reserved for the kernel + * and already mapped. + */ + if(base < PADDR(CPU0END)){ + n = PADDR(CPU0END) - base; + if(len <= n) + return; + map(PADDR(CPU0END), len-n, type); + return; + } + + /* + * Memory between KTZERO and end is the kernel itself + * and is already mapped. + */ + if(base < PADDR(KTZERO) && len > PADDR(KTZERO)-base){ + map(base, PADDR(KTZERO)-base, type); + return; + } + if(PADDR(KTZERO) < base && base < PADDR(PGROUND((uintptr)end))){ + n = PADDR(PGROUND((uintptr)end)); + if(len <= n) + return; + map(PADDR(PGROUND((uintptr)end)), len-n, type); + return; + } + + /* + * Now we have a simple case. + */ + switch(type){ + case MemRAM: + mapfree(&rmapram, base, len); + flags = PTEWRITE|PTEVALID; + break; + case MemUMB: + mapfree(&rmapumb, base, len); + flags = PTEWRITE|PTEUNCACHED|PTEVALID; + break; + case MemUPA: + mapfree(&rmapupa, base, len); + flags = 0; + break; + default: + case MemReserved: + flags = 0; + break; + } + + /* + * bottom MemMin is already mapped - just twiddle flags. + * (not currently used - see above) + */ + if(base < MemMin){ + e = base+len; + base &= ~((uintptr)PGLSZ(1)-1); + for(; basepml4, base+KZERO, 1, 0); + if(pte != 0 && *pte & PTEVALID) + *pte |= flags; + } + return; + } + + if(flags){ + maxkpa = -KZERO; + if(base >= maxkpa) + return; + if(len > maxkpa-base) + len = maxkpa - base; + pmap(m->pml4, base|flags, base+KZERO, len); + } +} + +static int +e820scan(void) +{ + uintptr base, len, last; + Emap *e; + char *s; + int i; + + /* passed by bootloader */ + if((s = getconf("*e820")) == nil) + if((s = getconf("e820")) == nil) + return -1; + nemap = 0; + while(nemap < nelem(emap)){ + while(*s == ' ') + s++; + if(*s == 0) + break; + e = emap + nemap; + e->type = 1; + if(s[1] == ' '){ /* new format */ + e->type = s[0] - '0'; + s += 2; + } + e->base = strtoull(s, &s, 16); + if(*s != ' ') + break; + e->top = strtoull(s, &s, 16); + if(*s != ' ' && *s != 0) + break; + if(e->base < e->top) + nemap++; + } + if(nemap == 0) + return -1; + qsort(emap, nemap, sizeof emap[0], emapcmp); + last = 0; + for(i=0; itop <= last) + continue; + if(e->base < last) + base = last; + else + base = e->base; + len = e->top - base; + /* + * If the map skips addresses, mark them available. + */ + if(last < base) + map(last, base-last, MemUPA); + map(base, len, (e->type == 1) ? MemRAM : MemReserved); + last = base + len; + if(last == 0) + break; + } + if(last != 0) + map(last, -last, MemUPA); + return 0; +} + +void +meminit(void) +{ + int i; + Map *mp; + Confmem *cm; + uintptr lost; + + umbscan(); + // lowraminit(); + e820scan(); + + /* + * Set the conf entries describing banks of allocatable memory. + */ + for(i=0; ibase = mp->addr; + cm->npage = mp->size/BY2PG; + } + + lost = 0; + for(; i init.h + +apbootstrap.h: apbootstrap.s + $AS apbootstrap.s + $LD -l -R1 -s -o apbootstrap.out -T$APBOOTSTRAP apbootstrap.$O + {echo 'uchar apbootstrap[]={' + dd -if apbootstrap.out -bs 1 -iseek 40 | + xd -1x | + sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > $target + +sd53c8xx.i: sd53c8xx.n + aux/na $prereq > $target + +acid:V: + $CC -a -w -I. -. ../pc/i8253.c>acid + +%.clean:V: + rm -f $stem.c [9bz]$stem [9bz]$stem.gz boot$stem.* apbootstrap.h init.h $PCHEADERS diff --git a/sys/src/9/pc64/mmu.c b/sys/src/9/pc64/mmu.c new file mode 100644 index 000000000..43918c678 --- /dev/null +++ b/sys/src/9/pc64/mmu.c @@ -0,0 +1,505 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" + +/* + * Simple segment descriptors with no translation. + */ +#define EXECSEGM(p) { 0, SEGL|SEGP|SEGPL(p)|SEGEXEC } +#define DATASEGM(p) { 0, SEGB|SEGG|SEGP|SEGPL(p)|SEGDATA|SEGW } +#define EXEC32SEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } +#define DATA32SEGM(p) { 0xFFFF, SEGB|SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } + +Segdesc gdt[NGDT] = +{ +[NULLSEG] { 0, 0}, /* null descriptor */ +[KESEG] EXECSEGM(0), /* kernel code */ +[KDSEG] DATASEGM(0), /* kernel data */ +[UE32SEG] EXEC32SEGM(3), /* user code 32 bit*/ +[UDSEG] DATA32SEGM(3), /* user data/stack */ +[UESEG] EXECSEGM(3), /* user code */ +}; + +static int didmmuinit = 0; + +/* level */ +enum { + PML4E = 2, + PDPE = 1, + PDE = 0, + + MAPBITS = 8*sizeof(m->mmumap[0]), +}; + +static void +loadptr(u16int lim, uintptr off, void (*load)(void*)) +{ + u64int b[2], *o; + u16int *s; + + o = &b[1]; + s = ((u16int*)o)-1; + + *s = lim; + *o = off; + + (*load)(s); +} + +static void +taskswitch(uintptr stack) +{ + Tss *tss; + + tss = m->tss; + tss->rsp0[0] = (u32int)stack; + tss->rsp0[1] = stack >> 32; + tss->rsp1[0] = (u32int)stack; + tss->rsp1[1] = stack >> 32; + tss->rsp2[0] = (u32int)stack; + tss->rsp2[1] = stack >> 32; + mmuflushtlb(); +} + +void +mmuinit(void) +{ + uintptr x; + vlong v; + int i; + + didmmuinit = 1; + + /* zap double map done by l.s */ + m->pml4[0] = 0; + m->pml4[512] = 0; + + m->tss = mallocz(sizeof(Tss), 1); + if(m->tss == nil) + panic("mmuinit: no memory for Tss"); + m->tss->iomap = 0xDFFF; + for(i=0; i<14; i+=2){ + x = (uintptr)m + MACHSIZE; + m->tss->ist[i] = x; + m->tss->ist[i+1] = x>>32; + } + + /* + * We used to keep the GDT in the Mach structure, but it + * turns out that that slows down access to the rest of the + * page. Since the Mach structure is accessed quite often, + * it pays off anywhere from a factor of 1.25 to 2 on real + * hardware to separate them (the AMDs are more sensitive + * than Intels in this regard). Under VMware it pays off + * a factor of about 10 to 100. + */ + memmove(m->gdt, gdt, sizeof gdt); + + x = (uintptr)m->tss; + m->gdt[TSSSEG+0].d0 = (x<<16)|(sizeof(Tss)-1); + m->gdt[TSSSEG+0].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; + m->gdt[TSSSEG+1].d0 = x>>32; + m->gdt[TSSSEG+1].d1 = 0; + + loadptr(sizeof(gdt)-1, (uintptr)m->gdt, lgdt); + loadptr(sizeof(Segdesc)*512-1, (uintptr)IDTADDR, lidt); + taskswitch((uintptr)m + MACHSIZE); + ltr(TSSSEL); + + wrmsr(0xc0000100, 0ull); /* 64 bit fsbase */ + wrmsr(0xc0000101, (uvlong)&machp[m->machno]); /* 64 bit gsbase */ + wrmsr(0xc0000102, 0ull); /* kernel gs base */ + + /* enable syscall extension */ + rdmsr(0xc0000080, &v); + v |= 1ull; + wrmsr(0xc0000080, v); + + /* IA32_STAR */ + wrmsr(0xc0000081, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32)); + + /* IA32_LSTAR */ + wrmsr(0xc0000082, (uvlong)syscallentry); + + /* SYSCALL flags mask */ + wrmsr(0xc0000084, 0x200); +} + +/* + * These could go back to being macros once the kernel is debugged, + * but the extra checking is nice to have. + */ +void* +kaddr(uintptr pa) +{ + if(pa > (uintptr)-KZERO) + panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa)); + return (void*)(pa+KZERO); +} + +uintptr +paddr(void *v) +{ + uintptr va; + + va = (uintptr)v; + if(va >= KZERO) + return va-KZERO; + if(va >= VMAP) + return va-VMAP; + panic("paddr: va=%#p pc=%#p", va, getcallerpc(&v)); + return 0; +} + +static MMU* +mmualloc(void) +{ + MMU *p; + int i, n; + + p = m->mmufree; + if(p == nil){ + n = 256; + p = malloc(n * sizeof(MMU)); + if(p == nil) + panic("mmualloc: out of memory for MMU"); + p->page = mallocalign(n * PTSZ, BY2PG, 0, 0); + if(p->page == nil) + panic("mmualloc: out of memory for MMU pages"); + for(i=1; immucount += n; + } + m->mmucount--; + m->mmufree = p->next; + p->next = nil; + return p; +} + +uintptr* +mmuwalk(uintptr* table, uintptr va, int level, int create) +{ + uintptr pte, *page; + int i, x; + MMU *p; + + x = PTLX(va, 3); + for(i = 2; i >= level; i--){ + pte = table[x]; + if(pte & PTEVALID){ + if(pte & PTESIZE) + return 0; + table = KADDR(PPN(pte)); + } else { + if(!create) + return 0; + pte = PTEWRITE|PTEVALID; + if(va < VMAP){ + if(va < TSTKTOP) + pte |= PTEUSER; + p = mmualloc(); + p->index = x; + p->level = i; + if(i == PML4E){ + /* PML4 entries linked to head */ + p->next = up->mmuhead; + if(p->next == nil) + up->mmutail = p; + up->mmuhead = p; + if(p->index <= PTLX(TSTKTOP, 3)) + m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS); + } else { + /* PDP and PD entries linked to tail */ + up->mmutail->next = p; + } + page = p->page; + } else if(didmmuinit) { + page = mallocalign(PTSZ, BY2PG, 0, 0); + } else + page = rampage(); + memset(page, 0, PTSZ); + table[x] = PADDR(page) | pte; + table = page; + } + x = PTLX(va, i); + } + return &table[x]; +} + +static int +ptecount(uintptr va, int level) +{ + return (1<= KZERO) + flags |= PTEGLOBAL; + while(size > 0){ + if(size >= PGLSZ(1) && (va % PGLSZ(1)) == 0) + flags |= PTESIZE; + l = (flags & PTESIZE) != 0; + z = PGLSZ(l); + pte = mmuwalk(pml4, va, l, 1); + if(pte == 0){ + pte = mmuwalk(pml4, va, ++l, 0); + if(pte && (*pte & PTESIZE)){ + flags |= PTESIZE; + z = va & PGLSZ(l)-1; + va -= z; + pa -= z; + size += z; + continue; + } + panic("pmap: pa=%#p va=%#p size=%d", pa, va, size); + } + ptee = pte + ptecount(va, l); + while(size > 0 && pte < ptee){ + *pte++ = pa | flags; + pa += z; + va += z; + size -= z; + } + } +} + +static void +mmuzap(void) +{ + uintptr *pte; + u64int w; + int i, x; + + pte = m->pml4; + pte[PTLX(KMAP, 3)] = 0; + + /* common case */ + pte[PTLX(UTZERO, 3)] = 0; + pte[PTLX(TSTKTOP, 3)] = 0; + m->mmumap[PTLX(UTZERO, 3)/MAPBITS] &= ~(1ull<<(PTLX(UTZERO, 3)%MAPBITS)); + m->mmumap[PTLX(TSTKTOP, 3)/MAPBITS] &= ~(1ull<<(PTLX(TSTKTOP, 3)%MAPBITS)); + + for(i = 0; i < nelem(m->mmumap); pte += MAPBITS, i++){ + w = m->mmumap[i]; + if(w == 0) + continue; + x = 0; + do { + if(w & 1) + pte[x] = 0; + x++; + x >>= 1; + } while(w); + m->mmumap[i] = 0; + } +} + +static void +mmufree(Proc *proc) +{ + MMU *p; + + p = proc->mmutail; + if(p != nil){ + p->next = m->mmufree; + m->mmufree = proc->mmuhead; + proc->mmuhead = proc->mmutail = nil; + m->mmucount += proc->mmucount; + proc->mmucount = 0; + } +} + +void +flushmmu(void) +{ + int x; + + x = splhi(); + up->newtlb = 1; + mmuswitch(up); + splx(x); +} + +void +mmuswitch(Proc *proc) +{ + uintptr pte; + MMU *p; + + mmuzap(); + if(proc->newtlb){ + mmufree(proc); + proc->newtlb = 0; + } + for(p = proc->mmuhead; p && p->level==PML4E; p = p->next){ + pte = PADDR(p->page) | PTEWRITE|PTEVALID; + if(p->index <= PTLX(TSTKTOP, 3)){ + m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS); + pte |= PTEUSER; + } + m->pml4[p->index] = pte; + } + taskswitch((uintptr)proc->kstack+KSTACK); +} + +void +mmurelease(Proc *proc) +{ + mmuzap(); + mmufree(proc); + taskswitch((uintptr)m+MACHSIZE); +} + +void +putmmu(uintptr va, uintptr pa, Page *) +{ + uintptr *pte, old; + int x; + + x = splhi(); + pte = mmuwalk(m->pml4, va, 0, 1); + if(pte == 0){ + panic("putmmu: bug: va=%#p pa=%#p", va, pa); + return; + } + old = *pte; + *pte = pa | PTEVALID|PTEUSER; + splx(x); + if(old & PTEVALID) + invlpg(va); +} + +void +checkmmu(uintptr va, uintptr pa) +{ + USED(va, pa); +} + +uintptr +cankaddr(uintptr pa) +{ + if(pa >= -KZERO) + return 0; + return -KZERO - pa; +} + +void +countpagerefs(ulong *ref, int print) +{ + USED(ref, print); +} + +KMap* +kmap(Page *page) +{ + uintptr *pte, pa, va; + int x; + + pa = page->pa; + if(cankaddr(pa) != 0) + return (KMap*)KADDR(pa); + + x = splhi(); + va = KMAP + ((uintptr)m->kmapindex << PGSHIFT); + pte = mmuwalk(m->pml4, va, 0, 1); + if(pte == 0 || *pte & PTEVALID) + panic("kmap: pa=%#p va=%#p", pa, va); + *pte = pa | PTEWRITE|PTEVALID; + m->kmapindex = (m->kmapindex + 1) % (1<kmapindex == 0) + mmuflushtlb(); + splx(x); + return (KMap*)va; +} + +void +kunmap(KMap *k) +{ + uintptr *pte, va; + int x; + + va = (uintptr)k; + if(va >= KZERO) + return; + + x = splhi(); + pte = mmuwalk(m->pml4, va, 0, 0); + if(pte == 0 || (*pte & PTEVALID) == 0) + panic("kunmap: va=%#p", va); + *pte = 0; + splx(x); +} + +/* + * Add a device mapping to the vmap range. + */ +void* +vmap(uintptr pa, int size) +{ + uintptr va; + int o; + + if(size <= 0 || pa & ~0xffffffffull) + panic("vmap: pa=%#p size=%d pc=%#p", pa, size, getcallerpc(&pa)); + if(cankaddr(pa) >= size) + va = pa+KZERO; + else + va = pa+VMAP; + /* + * might be asking for less than a page. + */ + o = pa & (BY2PG-1); + pa -= o; + va -= o; + size += o; + pmap(m->pml4, pa | PTEUNCACHED|PTEWRITE|PTEVALID, va, size); + return (void*)(va+o); +} + +void +vunmap(void *v, int) +{ + paddr(v); /* will panic on error */ +} + +/* + * vmapsync() is currently unused as the VMAP and KZERO PDPs + * are shared between processors. (see mpstartap) + */ +int +vmapsync(uintptr va) +{ + uintptr *pte1, *pte2; + int level; + + if(va < VMAP || m->machno == 0) + return 0; + + for(level=0; level<2; level++){ + pte1 = mmuwalk(MACHP(0)->pml4, va, level, 0); + if(pte1 && *pte1 & PTEVALID){ + pte2 = mmuwalk(m->pml4, va, level, 1); + if(pte2 == 0) + break; + if(pte1 != pte2) + *pte2 = *pte1; + return 1; + } + } + return 0; +} diff --git a/sys/src/9/pc64/pc64 b/sys/src/9/pc64/pc64 new file mode 100644 index 000000000..e94c27085 --- /dev/null +++ b/sys/src/9/pc64/pc64 @@ -0,0 +1,153 @@ +# pcf - pc terminal with local disk +dev + root + cons + arch + pnp pci + env + pipe + proc + mnt + srv + shr + dup + rtc + ssl + tls + cap + kprof + fs + + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog ethermedium nullmedium pktmedium inferno + + draw screen vga vgax swcursor + mouse mouse + kbd + vga + + sd +# floppy dma +# aoe +# lpt + + audio dma +# pccard +# i82365 cis + uart + usb + +link +# segdesc +# devpccard +# devi82365 +# cputemp +# apm apmjump +# ether2000 ether8390 +# ether2114x pci +# ether589 etherelnk3 +# ether79c970 pci +# ether8003 ether8390 +# ether8139 pci +# ether8169 pci ethermii +# should be obsoleted by igbe +# ether82543gc pci +# ether82557 pci + ether82563 pci +# ether82598 pci +# ether83815 pci +# etherbcm pci +# etherdp83820 pci +# etherec2t ether8390 +# etherelnk3 pci +# etherga620 pci +# etherigbe pci ethermii +# ethervgbe pci ethermii +# ethervt6102 pci ethermii +# ethervt6105m pci ethermii +# ethersink +# ethersmc devi82365 cis +# etheryuk pci +# etherwavelan wavelan devi82365 cis pci + etheriwl pci wifi +# etherrt2860 pci wifi + ethermedium +# pcmciamodem + netdevmedium + loopbackmedium + usbuhci +# usbohci + usbehci usbehcipc + +# audiosb16 dma +# audioac97 audioac97mix + audiohda + +misc + archacpi mp apic squidboy + archmp mp apic squidboy + mtrr + +# sdaoe +# sdide pci sdscsi +# sd53c8xx pci sdscsi +# sdmylex pci sdscsi +# sdiahci pci sdscsi led +# sdodin pci sdscsi led +# sdvirtio pci sdscsi +# sdmmc pci pmmc +# sdloop + +# uarti8250 +# uartisa +# uartpci pci + +# vga3dfx +cur +# vgaark2000pv +cur +# vgabt485 =cur +# vgaclgd542x +cur +# vgaclgd546x +cur +# vgact65545 +cur +# vgacyber938x +cur +# vgaet4000 +cur +# vgageode +cur +# vgahiqvideo +cur +# vgai81x +cur +# vgamach64xx +cur +# vgamga2164w +cur +# vgamga4xx +cur +# vganeomagic +cur +# vganvidia +cur +# vgaradeon +cur +# vgargb524 =cur +# vgas3 +cur vgasavage +# vgat2r4 +cur +# vgatvp3020 =cur +# vgatvp3026 =cur + vgavesa +# vgavmware +cur + +ip + tcp + udp + rudp + ipifc + icmp + icmp6 + gre + ipmux + esp + il + +port + int cpuserver = 0; + +boot boot + tcp + local + +bootdir + boot$CONF.out boot + /$objtype/bin/paqfs + /$objtype/bin/auth/factotum + bootfs.paq diff --git a/sys/src/9/pc64/squidboy.c b/sys/src/9/pc64/squidboy.c new file mode 100644 index 000000000..b0d9f409f --- /dev/null +++ b/sys/src/9/pc64/squidboy.c @@ -0,0 +1,113 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" + +#include "mp.h" + +extern void checkmtrr(void); + +static void +squidboy(Apic* apic) +{ + machinit(); + mmuinit(); + cpuidentify(); + cpuidprint(); + checkmtrr(); + apic->online = 1; + coherence(); + + lapicinit(apic); + lapiconline(); + syncclock(); + timersinit(); + + lock(&active); + active.machs |= 1<machno; + unlock(&active); + + while(!active.thunderbirdsarego) + microdelay(100); + + schedinit(); +} + +void +mpstartap(Apic* apic) +{ + uintptr *apbootp, *pml4, *pdp0; + Segdesc *gdt; + Mach *mach; + uchar *p; + int i; + + /* + * Initialise the AP page-tables and Mach structure. + * Xspanalloc will panic if an allocation can't be made. + */ + p = xspanalloc(2*PTSZ + BY2PG + MACHSIZE, BY2PG, 0); + pml4 = (uintptr*)p; + p += PTSZ; + pdp0 = (uintptr*)p; + p += PTSZ; + gdt = (Segdesc*)p; + p += BY2PG; + mach = (Mach*)p; + + memset(pml4, 0, PTSZ); + memset(pdp0, 0, PTSZ); + memset(gdt, 0, BY2PG); + memset(mach, 0, MACHSIZE); + + mach->machno = apic->machno; + mach->pml4 = pml4; + mach->gdt = gdt; /* filled by mmuinit */ + MACHP(mach->machno) = mach; + + /* + * map KZERO (note that we share the KZERO (and VMAP) + * PDP between processors. + */ + pml4[PTLX(KZERO, 3)] = MACHP(0)->pml4[PTLX(KZERO, 3)]; + + /* double map */ + pml4[0] = PADDR(pdp0) | PTEWRITE|PTEVALID; + pdp0[0] = *mmuwalk(pml4, KZERO, 2, 0); + + /* + * Tell the AP where its kernel vector and pdb are. + * The offsets are known in the AP bootstrap code. + */ + apbootp = (uintptr*)(APBOOTSTRAP+0x08); + apbootp[0] = (uintptr)squidboy; /* assembler jumps here eventually */ + apbootp[1] = (uintptr)PADDR(pml4); + apbootp[2] = (uintptr)apic; + apbootp[3] = (uintptr)mach; + + /* + * Universal Startup Algorithm. + */ + p = KADDR(0x467); /* warm-reset vector */ + *p++ = PADDR(APBOOTSTRAP); + *p++ = PADDR(APBOOTSTRAP)>>8; + i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16; + /* code assumes i==0 */ + if(i != 0) + print("mp: bad APBOOTSTRAP\n"); + *p++ = i; + *p = i>>8; + coherence(); + + nvramwrite(0x0F, 0x0A); /* shutdown code: warm reset upon init ipi */ + lapicstartap(apic, PADDR(APBOOTSTRAP)); + for(i = 0; i < 1000; i++){ + if(apic->online) + break; + delay(10); + } + nvramwrite(0x0F, 0x00); +} diff --git a/sys/src/9/pc64/trap.c b/sys/src/9/pc64/trap.c new file mode 100644 index 000000000..e2dee959e --- /dev/null +++ b/sys/src/9/pc64/trap.c @@ -0,0 +1,1065 @@ +#include "u.h" +#include "tos.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" +#include + +static int trapinited; + +void noted(Ureg*, ulong); + +static void debugbpt(Ureg*, void*); +static void fault386(Ureg*, void*); +static void doublefault(Ureg*, void*); +static void unexpected(Ureg*, void*); +static void _dumpstack(Ureg*); + +static Lock vctllock; +static Vctl *vctl[256]; + +enum +{ + Ntimevec = 20 /* number of time buckets for each intr */ +}; +ulong intrtimes[256][Ntimevec]; + +void +intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) +{ + int vno; + Vctl *v; + + if(f == nil){ + print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n", + irq, tbdf, name); + return; + } + + if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){ + print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n", + irq, tbdf, name); + irq = -1; + } + + if((v = xalloc(sizeof(Vctl))) == nil) + panic("intrenable: out of memory"); + v->isintr = 1; + v->irq = irq; + v->tbdf = tbdf; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN-1); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + vno = arch->intrenable(v); + if(vno == -1){ + iunlock(&vctllock); + print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n", + irq, tbdf, v->name); + xfree(v); + return; + } + if(vctl[vno]){ + if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %#p %#p %#p %#p", + vctl[vno]->name, v->name, + vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi); + v->next = vctl[vno]; + } + vctl[vno] = v; + iunlock(&vctllock); +} + +int +intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name) +{ + Vctl **pv, *v; + int vno; + + /* + * For now, none of this will work with the APIC code, + * there is no mapping between irq and vector as the IRQ + * is pretty meaningless. + */ + if(arch->intrvecno == nil) + return -1; + vno = arch->intrvecno(irq); + ilock(&vctllock); + pv = &vctl[vno]; + while (*pv && + ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a || + strcmp((*pv)->name, name))) + pv = &((*pv)->next); + assert(*pv); + + v = *pv; + *pv = (*pv)->next; /* Link out the entry */ + + if(vctl[vno] == nil && arch->intrdisable != nil) + arch->intrdisable(irq); + iunlock(&vctllock); + xfree(v); + return 0; +} + +static long +irqallocread(Chan*, void *vbuf, long n, vlong offset) +{ + char *buf, *p, str[2*(11+1)+KNAMELEN+1+1]; + int m, vno; + long oldn; + Vctl *v; + + if(n < 0 || offset < 0) + error(Ebadarg); + + oldn = n; + buf = vbuf; + for(vno=0; vnonext){ + m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name); + if(m <= offset) /* if do not want this, skip entry */ + offset -= m; + else{ + /* skip offset bytes */ + m -= offset; + p = str+offset; + offset = 0; + + /* write at most max(n,m) bytes */ + if(m > n) + m = n; + memmove(buf, p, m); + n -= m; + buf += m; + + if(n == 0) + return oldn; + } + } + } + return oldn - n; +} + +void +trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) +{ + Vctl *v; + + if(vno < 0 || vno >= VectorPIC) + panic("trapenable: vno %d", vno); + if((v = xalloc(sizeof(Vctl))) == nil) + panic("trapenable: out of memory"); + v->tbdf = BUSUNKNOWN; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN-1); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + if(vctl[vno]) + v->next = vctl[vno]->next; + vctl[vno] = v; + iunlock(&vctllock); +} + +static void +nmienable(void) +{ + int x; + + /* + * Hack: should be locked with NVRAM access. + */ + outb(0x70, 0x80); /* NMI latch clear */ + outb(0x70, 0); + + x = inb(0x61) & 0x07; /* Enable NMI */ + outb(0x61, 0x08|x); + outb(0x61, x); +} + +void +trapinit0(void) +{ + u32int d1, v; + uintptr vaddr; + Segdesc *idt; + + idt = (Segdesc*)IDTADDR; + vaddr = (uintptr)vectortable; + for(v = 0; v < 256; v++){ + d1 = (vaddr & 0xFFFF0000)|SEGP; + switch(v){ + + case VectorBPT: + d1 |= SEGPL(3)|SEGIG; + break; + + case VectorSYSCALL: + d1 |= SEGPL(3)|SEGIG; + break; + + default: + d1 |= SEGPL(0)|SEGIG; + break; + } + + idt->d0 = (vaddr & 0xFFFF)|(KESEL<<16); + idt->d1 = d1; + idt++; + + idt->d0 = (vaddr >> 32); + idt->d1 = 0; + idt++; + + vaddr += 6; + } +} + +void +trapinit(void) +{ + /* + * Special traps. + * Syscall() is called directly without going through trap(). + */ + trapenable(VectorBPT, debugbpt, 0, "debugpt"); + trapenable(VectorPF, fault386, 0, "fault386"); + trapenable(Vector2F, doublefault, 0, "doublefault"); + trapenable(Vector15, unexpected, 0, "unexpected"); + nmienable(); + addarchfile("irqalloc", 0444, irqallocread, nil); + trapinited = 1; +} + +static char* excname[32] = { + "divide error", + "debug exception", + "nonmaskable interrupt", + "breakpoint", + "overflow", + "bounds check", + "invalid opcode", + "coprocessor not available", + "double fault", + "coprocessor segment overrun", + "invalid TSS", + "segment not present", + "stack exception", + "general protection violation", + "page fault", + "15 (reserved)", + "coprocessor error", + "alignment check", + "machine check", + "19 (reserved)", + "20 (reserved)", + "21 (reserved)", + "22 (reserved)", + "23 (reserved)", + "24 (reserved)", + "25 (reserved)", + "26 (reserved)", + "27 (reserved)", + "28 (reserved)", + "29 (reserved)", + "30 (reserved)", + "31 (reserved)", +}; + +/* + * keep histogram of interrupt service times + */ +void +intrtime(Mach*, int vno) +{ + ulong diff; + ulong x; + + x = perfticks(); + diff = x - m->perf.intrts; + m->perf.intrts = x; + + m->perf.inintr += diff; + if(up == nil && m->perf.inidle > diff) + m->perf.inidle -= diff; + + diff /= m->cpumhz*100; /* quantum = 100µsec */ + if(diff >= Ntimevec) + diff = Ntimevec-1; + intrtimes[vno][diff]++; +} + +/* go to user space */ +void +kexit(Ureg*) +{ + uvlong t; + Tos *tos; + + /* precise time accounting, kernel exit */ + tos = (Tos*)((uintptr)USTKTOP-sizeof(Tos)); + cycles(&t); + tos->kcycles += t - up->kentry; + tos->pcycles = t + up->pcycles; + tos->pid = up->pid; +} + +void +display(char *s) +{ + char *d; + + d = (char*)KADDR(0xB8000); + while(*s){ + *d = *s++; + d += 2; + } +} + +void +trap(Ureg *ureg) +{ + int clockintr, i, vno, user; + char buf[ERRMAX]; + Vctl *ctl, *v; + Mach *mach; + + if(!trapinited){ + /* fault386 can give a better error message */ + if(ureg->type == VectorPF) + fault386(ureg, nil); + panic("trap %llud: not ready", ureg->type); + } + + m->perf.intrts = perfticks(); + user = userureg(ureg); + if(user){ + up->dbgreg = ureg; + cycles(&up->kentry); + } + + clockintr = 0; + + vno = ureg->type; + + if(ctl = vctl[vno]){ + if(ctl->isintr){ + m->intr++; + if(vno >= VectorPIC) + m->lastintr = ctl->irq; + } + if(ctl->isr) + ctl->isr(vno); + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + if(ctl->eoi) + ctl->eoi(vno); + + if(ctl->isintr){ + intrtime(m, vno); + + if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) + clockintr = 1; + + if(up && !clockintr) + preempted(); + } + } + else if(vno < nelem(excname) && user){ + spllo(); + sprint(buf, "sys: trap: %s", excname[vno]); + dumpregs(ureg); + postnote(up, 1, buf, NDebug); + } + else if(vno >= VectorPIC){ + /* + * An unknown interrupt. + * Check for a default IRQ7. This can happen when + * the IRQ input goes away before the acknowledge. + * In this case, a 'default IRQ7' is generated, but + * the corresponding bit in the ISR isn't set. + * In fact, just ignore all such interrupts. + */ + + /* call all interrupt routines, just in case */ + for(i = VectorPIC; i <= MaxIrqLAPIC; i++){ + ctl = vctl[i]; + if(ctl == nil) + continue; + if(!ctl->isintr) + continue; + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + /* should we do this? */ + if(ctl->eoi) + ctl->eoi(i); + } + + /* clear the interrupt */ + i8259isr(vno); + + if(0)print("cpu%d: spurious interrupt %d, last %d\n", + m->machno, vno, m->lastintr); + if(0)if(conf.nmach > 1){ + for(i = 0; i < 32; i++){ + if(!(active.machs & (1<machno == mach->machno) + continue; + print(" cpu%d: last %d", + mach->machno, mach->lastintr); + } + print("\n"); + } + m->spuriousintr++; + if(user) + kexit(ureg); + return; + } + else{ + if(vno == VectorNMI){ + /* + * Don't re-enable, it confuses the crash dumps. + nmienable(); + */ + iprint("cpu%d: PC %#p\n", m->machno, ureg->pc); + while(m->machno != 0) + ; + } + + if(!user){ + void (*pc)(void); + + extern void _rdmsrinst(void); + extern void _wrmsrinst(void); + + pc = (void*)ureg->pc; + if(pc == _rdmsrinst || pc == _wrmsrinst){ + if(vno == VectorGPF){ + ureg->bp = -1; + ureg->pc += 2; + return; + } + } + } + + dumpregs(ureg); + if(!user){ + ureg->sp = (uintptr)&ureg->sp; + _dumpstack(ureg); + } + if(vno < nelem(excname)) + panic("%s", excname[vno]); + panic("unknown trap/intr: %d", vno); + } + splhi(); + + /* delaysched set because we held a lock or because our quantum ended */ + if(up && up->delaysched && clockintr){ + sched(); + splhi(); + } + + if(user){ + if(up->procctl || up->nnote) + notify(ureg); + kexit(ureg); + } +} + +void +dumpregs(Ureg* ureg) +{ + if(up) + iprint("cpu%d: registers for %s %lud\n", + m->machno, up->text, up->pid); + else + iprint("cpu%d: registers for kernel\n", m->machno); + iprint("FLAGS=%#p TYPE=%#p ERROR=%#p PC=%#p SP=%#p", + ureg->flags, ureg->type, ureg->error, ureg->pc, ureg->sp); + iprint(" AX %#p BX %#p CX %#p DX %#p\n", + ureg->ax, ureg->bx, ureg->cx, ureg->dx); + iprint(" SI %#p DI %#p BP %#p\n", + ureg->si, ureg->di, ureg->bp); + iprint(" CS %4.4lluX DS %4.4uX ES %4.4uX FS %4.4uX GS %4.4uX\n", + ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF, + ureg->fs & 0xFFFF, ureg->gs & 0xFFFF); + + /* + * Processor control registers. + * If machine check exception, time stamp counter, page size extensions + * or enhanced virtual 8086 mode extensions are supported, there is a + * CR4. If there is a CR4 and machine check extensions, read the machine + * check address and machine check type registers if RDMSR supported. + */ + iprint(" CR0 %8.8llux CR2 %16.16llux CR3 %16.16llux", + getcr0(), getcr2(), getcr3()); + if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){ + iprint(" CR4 %16.16llux", getcr4()); + if((m->cpuiddx & (Mce|Cpumsr)) == (Mce|Cpumsr)){ + vlong mca, mct; + + rdmsr(0x00, &mca); + rdmsr(0x01, &mct); + iprint("\n MCA %8.8llux MCT %8.8llux", mca, mct); + } + } + iprint("\n ur %#p up %#p\n", ureg, up); +} + + +/* + * Fill in enough of Ureg to get a stack trace, and call a function. + * Used by debugging interface rdb. + */ +void +callwithureg(void (*fn)(Ureg*)) +{ + Ureg ureg; + ureg.pc = getcallerpc(&fn); + ureg.sp = (uintptr)&fn; + fn(&ureg); +} + +static void +_dumpstack(Ureg *ureg) +{ + uintptr l, v, i, estack; + extern ulong etext; + int x; + char *s; + + if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){ + iprint("dumpstack disabled\n"); + return; + } + iprint("dumpstack\n"); + + x = 0; + x += iprint("ktrace /kernel/path %#p %#p <pc, ureg->sp); + i = 0; + if(up + && (uintptr)&l >= (uintptr)up->kstack + && (uintptr)&l <= (uintptr)up->kstack+KSTACK) + estack = (uintptr)up->kstack+KSTACK; + else if((uintptr)&l >= (uintptr)m->stack + && (uintptr)&l <= (uintptr)m+MACHSIZE) + estack = (uintptr)m+MACHSIZE; + else + return; + x += iprint("estackx %p\n", estack); + + for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ + v = *(uintptr*)l; + if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){ + /* + * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8) + * and CALL indirect through AX + * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0), + * but this is too clever and misses faulting address. + */ + x += iprint("%.8p=%.8p ", l, v); + i++; + } + if(i == 4){ + i = 0; + x += iprint("\n"); + } + } + if(i) + iprint("\n"); + iprint("EOF\n"); + + if(ureg->type != VectorNMI) + return; + + i = 0; + for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ + iprint("%.8p ", *(uintptr*)l); + if(++i == 8){ + i = 0; + iprint("\n"); + } + } + if(i) + iprint("\n"); +} + +void +dumpstack(void) +{ + callwithureg(_dumpstack); +} + +static void +debugbpt(Ureg* ureg, void*) +{ + char buf[ERRMAX]; + + if(up == 0) + panic("kernel bpt"); + /* restore pc to instruction that caused the trap */ + ureg->pc--; + sprint(buf, "sys: breakpoint"); + postnote(up, 1, buf, NDebug); +} + +static void +doublefault(Ureg*, void*) +{ + panic("double fault"); +} + +static void +unexpected(Ureg* ureg, void*) +{ + print("unexpected trap %llud; ignoring\n", ureg->type); +} + +extern void checkpages(void); +static void +fault386(Ureg* ureg, void*) +{ + uintptr addr; + int read, user, n, insyscall; + char buf[ERRMAX]; + + addr = getcr2(); + read = !(ureg->error & 2); + user = userureg(ureg); + if(!user){ + if(vmapsync(addr)) + return; + if(addr >= USTKTOP) + panic("kernel fault: bad address pc=%#p addr=%#p", ureg->pc, addr); + if(up == nil) + panic("kernel fault: no user process pc=%#p addr=%#p", ureg->pc, addr); + } + if(up == nil) + panic("user fault: up=0 pc=%#p addr=%#p", ureg->pc, addr); + + insyscall = up->insyscall; + up->insyscall = 1; + n = fault(addr, read); + if(n < 0){ + dumpregs(ureg); + if(!user){ + panic("fault: %#p", addr); + } + checkpages(); + sprint(buf, "sys: trap: fault %s addr=%#p", + read ? "read" : "write", addr); + postnote(up, 1, buf, NDebug); + } + up->insyscall = insyscall; +} + +/* + * system calls + */ +#include "../port/systab.h" + +/* + * Syscall is called directly from assembler without going through trap(). + */ +void +syscall(Ureg* ureg) +{ + char *e; + uintptr sp; + long long ret; + int i, s; + ulong scallnr; + vlong startns, stopns; + + if(!userureg(ureg)) + panic("syscall: cs 0x%4.4lluX", ureg->cs); + + cycles(&up->kentry); + + m->syscall++; + up->insyscall = 1; + up->pc = ureg->pc; + up->dbgreg = ureg; + + sp = ureg->sp; + scallnr = ureg->ax; + up->scallnr = scallnr; + + spllo(); + startns = 0; + up->nerrlab = 0; + ret = -1; + if(!waserror()){ + if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD)) + validaddr(sp, sizeof(Sargs)+BY2WD, 0); + + up->s = *((Sargs*)(sp+BY2WD)); + if(0){ + syscallfmt(scallnr, ureg->pc, (va_list)up->s.args); + print("syscall: %s\n", up->syscalltrace); + } + + if(up->procctl == Proc_tracesyscall){ + syscallfmt(scallnr, ureg->pc, (va_list)up->s.args); + s = splhi(); + up->procctl = Proc_stopme; + procctl(up); + splx(s); + startns = todget(nil); + } + if(scallnr >= nsyscall || systab[scallnr] == 0){ + pprint("bad sys call number %lud pc %#p\n", + scallnr, ureg->pc); + postnote(up, 1, "sys: bad sys call", NDebug); + error(Ebadarg); + } + up->psstate = sysctab[scallnr]; + ret = systab[scallnr]((va_list)up->s.args); + poperror(); + }else{ + /* failure: save the error buffer for errstr */ + e = up->syserrstr; + up->syserrstr = up->errstr; + up->errstr = e; + if(0 && up->pid == 1) + print("syscall %lud error %s\n", scallnr, up->syserrstr); + } + if(up->nerrlab){ + print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab); + for(i = 0; i < NERR; i++) + print("sp=%#p pc=%#p\n", + up->errlab[i].sp, up->errlab[i].pc); + panic("error stack"); + } + + /* + * Put return value in frame. On the x86 the syscall is + * just another trap and the return value from syscall is + * ignored. On other machines the return value is put into + * the results register by caller of syscall. + */ + ureg->ax = ret; + + if(0){ + print("syscallret: %lud %s %s ret=%lld\n", + up->pid, up->text, sysctab[scallnr], ret); + } + + if(up->procctl == Proc_tracesyscall){ + stopns = todget(nil); + sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns); + s = splhi(); + up->procctl = Proc_stopme; + procctl(up); + splx(s); + } + + up->insyscall = 0; + up->psstate = 0; + + if(scallnr == NOTED) + noted(ureg, up->s.args[0]); + + if(scallnr!=RFORK && (up->procctl || up->nnote)){ + splhi(); + notify(ureg); + } + /* if we delayed sched because we held a lock, sched now */ + if(up->delaysched) + sched(); + kexit(ureg); +} + +/* + * Call user, if necessary, with note. + * Pass user the Ureg struct and the note on his stack. + */ +int +notify(Ureg* ureg) +{ + int l, s; + uintptr sp; + Note *n; + + if(up->procctl) + procctl(up); + if(up->nnote == 0) + return 0; + + if(up->fpstate == FPactive){ + fpsave(&up->fpsave); + up->fpstate = FPinactive; + } + up->fpstate |= FPillegal; + + s = spllo(); + qlock(&up->debug); + up->notepending = 0; + n = &up->note[0]; + if(strncmp(n->msg, "sys:", 4) == 0){ + l = strlen(n->msg); + if(l > ERRMAX-15) /* " pc=0x12345678\0" */ + l = ERRMAX-15; + sprint(n->msg+l, " pc=%#p", ureg->pc); + } + + if(n->flag!=NUser && (up->notified || up->notify==0)){ + qunlock(&up->debug); + if(n->flag == NDebug) + pprint("suicide: %s\n", n->msg); + pexit(n->msg, n->flag!=NDebug); + } + + if(up->notified){ + qunlock(&up->debug); + splhi(); + return 0; + } + + if(!up->notify){ + qunlock(&up->debug); + pexit(n->msg, n->flag!=NDebug); + } + sp = ureg->sp; + sp -= 256; /* debugging: preserve context causing problem */ + sp -= sizeof(Ureg); +if(0) print("%s %lud: notify %#p %#p %#p %s\n", + up->text, up->pid, ureg->pc, ureg->sp, sp, n->msg); + + if(!okaddr((uintptr)up->notify, 1, 0) + || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){ + qunlock(&up->debug); + pprint("suicide: bad address in notify\n"); + pexit("Suicide", 0); + } + + memmove((Ureg*)sp, ureg, sizeof(Ureg)); + *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */ + up->ureg = (void*)sp; + sp -= BY2WD+ERRMAX; + memmove((char*)sp, up->note[0].msg, ERRMAX); + sp -= 3*BY2WD; + *(uintptr*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */ + *(uintptr*)(sp+1*BY2WD) = (uintptr)up->ureg; /* arg 1 is ureg* */ + *(uintptr*)(sp+0*BY2WD) = 0; /* arg 0 is pc */ + ureg->sp = sp; + ureg->pc = (uintptr)up->notify; + ureg->cs = UESEL; + ureg->ss = ureg->ds = ureg->es = UDSEL; + up->notified = 1; + up->nnote--; + memmove(&up->lastnote, &up->note[0], sizeof(Note)); + memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note)); + + qunlock(&up->debug); + splx(s); + return 1; + +} + +/* + * Return user to state before notify() + */ +void +noted(Ureg* ureg, ulong arg0) +{ + Ureg *nureg; + uintptr oureg, sp; + + qlock(&up->debug); + if(arg0!=NRSTR && !up->notified) { + qunlock(&up->debug); + pprint("call to noted() when not notified\n"); + pexit("Suicide", 0); + } + up->notified = 0; + + nureg = up->ureg; /* pointer to user returned Ureg struct */ + + up->fpstate &= ~FPillegal; + + /* sanity clause */ + oureg = (uintptr)nureg; + if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){ + qunlock(&up->debug); + pprint("bad ureg in noted or call to noted when not notified\n"); + pexit("Suicide", 0); + } + + /* don't let user change system flags */ + nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5); + nureg->cs |= 3; + nureg->ss |= 3; + + memmove(ureg, nureg, sizeof(Ureg)); + + switch(arg0){ + case NCONT: + case NRSTR: +if(0) print("%s %lud: noted %#p %#p\n", + up->text, up->pid, nureg->pc, nureg->sp); + if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->sp, BY2WD, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted\n"); + pexit("Suicide", 0); + } + up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD)); + qunlock(&up->debug); + break; + + case NSAVE: + if(!okaddr(nureg->pc, BY2WD, 0) + || !okaddr(nureg->sp, BY2WD, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted\n"); + pexit("Suicide", 0); + } + qunlock(&up->debug); + sp = oureg-4*BY2WD-ERRMAX; + splhi(); + ureg->sp = sp; + ((uintptr*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */ + ((uintptr*)sp)[0] = 0; /* arg 0 is pc */ + break; + + default: + up->lastnote.flag = NDebug; + /* fall through */ + + case NDFLT: + qunlock(&up->debug); + if(up->lastnote.flag == NDebug) + pprint("suicide: %s\n", up->lastnote.msg); + pexit(up->lastnote.msg, up->lastnote.flag!=NDebug); + } +} + +uintptr +execregs(uintptr entry, ulong ssize, ulong nargs) +{ + uintptr *sp; + Ureg *ureg; + + sp = (uintptr*)(USTKTOP - ssize); + *--sp = nargs; + ureg = up->dbgreg; + ureg->sp = (uintptr)sp; + ureg->pc = entry; + ureg->cs = UESEL; + ureg->ss = ureg->ds = ureg->es = UDSEL; + ureg->fs = ureg->gs = NULLSEL; + return (uintptr)USTKTOP-sizeof(Tos); /* address of kernel/user shared data */ +} + +/* + * return the userpc the last exception happened at + */ +uintptr +userpc(void) +{ + Ureg *ureg; + + ureg = (Ureg*)up->dbgreg; + return ureg->pc; +} + +/* This routine must save the values of registers the user is not permitted + * to write from devproc and then restore the saved values before returning. + */ +void +setregisters(Ureg* ureg, char* pureg, char* uva, int n) +{ + u64int flags; + + flags = ureg->flags; + memmove(pureg, uva, n); + ureg->cs = UESEL; + ureg->ss = ureg->ds = ureg->es = UDSEL; + if(ureg->fs != UDSEL) + ureg->fs = NULLSEL; + if(ureg->gs != UDSEL) + ureg->gs = 0; + ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00); +} + +static void +linkproc(void) +{ + spllo(); + up->kpfun(up->kparg); + pexit("kproc dying", 0); +} + +void +kprocchild(Proc* p, void (*func)(void*), void* arg) +{ + /* + * gotolabel() needs a word on the stack in + * which to place the return PC used to jump + * to linkproc(). + */ + p->sched.pc = (uintptr)linkproc; + p->sched.sp = (uintptr)p->kstack+KSTACK-BY2WD; + + p->kpfun = func; + p->kparg = arg; +} + +void +forkchild(Proc *p, Ureg *ureg) +{ + Ureg *cureg; + + /* + * Add 2*BY2WD to the stack to account for + * - the return PC + * - trap's argument (ur) + */ + p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD); + p->sched.pc = (uintptr)forkret; + + cureg = (Ureg*)(p->sched.sp+2*BY2WD); + memmove(cureg, ureg, sizeof(Ureg)); + + cureg->ax = 0; + + /* Things from bottom of syscall which were never executed */ + p->psstate = 0; + p->insyscall = 0; +} + +/* Give enough context in the ureg to produce a kernel stack for + * a sleeping process + */ +void +setkernur(Ureg* ureg, Proc* p) +{ + ureg->pc = p->sched.pc; + ureg->sp = p->sched.sp+8; +} + +uintptr +dbgpc(Proc *p) +{ + Ureg *ureg; + + ureg = p->dbgreg; + if(ureg == 0) + return 0; + + return ureg->pc; +} diff --git a/sys/src/9/port/devcons.c b/sys/src/9/port/devcons.c index aacc64d3f..b3ee3d2c7 100644 --- a/sys/src/9/port/devcons.c +++ b/sys/src/9/port/devcons.c @@ -601,15 +601,15 @@ consread(Chan *c, void *buf, long n, vlong off) case Qswap: snprint(tmp, sizeof tmp, - "%lud memory\n" - "%d pagesize\n" + "%llud memory\n" + "%llud pagesize\n" "%lud kernel\n" "%lud/%lud user\n" "%lud/%lud swap\n" "%lud/%lud kernel malloc\n" "%lud/%lud kernel draw\n", - conf.npage*BY2PG, - BY2PG, + (uvlong)conf.npage*BY2PG, + (uvlong)BY2PG, conf.npage-conf.upages, palloc.user-palloc.freecount, palloc.user, conf.nswap-swapalloc.free, conf.nswap, diff --git a/sys/src/9/port/mkdevc b/sys/src/9/port/mkdevc index c4ba81ff9..642ff1060 100755 --- a/sys/src/9/port/mkdevc +++ b/sys/src/9/port/mkdevc @@ -109,7 +109,7 @@ END{ printf "\t%slink();\n", link[i]; printf "}\n\n"; - if(narch || objtype == "386" || objtype == "amd64"){ + if(narch || objtype ~ "(386|amd64)"){ for(i = 0; i < narch; i++) printf "extern PCArch %s;\n", arch[i]; printf "PCArch* knownarch[] = {\n"; diff --git a/sys/src/9/port/mkdevlist b/sys/src/9/port/mkdevlist index 6d745b318..0bdbf445a 100755 --- a/sys/src/9/port/mkdevlist +++ b/sys/src/9/port/mkdevlist @@ -40,7 +40,7 @@ END{ x = "" for(i in obj) x = x i "\n"; - if(objtype ~ "386" && obj["pci" "'.$O'"]) + if((objtype ~ "386" || objtype ~ "amd64") && obj["pci" "'.$O'"]) x = x "bios32'.$O' \n"; printf x; }' $* -- cgit v1.2.3