diff options
-rw-r--r-- | sys/src/9/pc/ethervirtio.c | 20 | ||||
-rw-r--r-- | sys/src/9/pc/ethervirtio10.c | 790 | ||||
-rw-r--r-- | sys/src/9/pc/pc | 2 | ||||
-rw-r--r-- | sys/src/9/pc/sdvirtio.c | 4 | ||||
-rw-r--r-- | sys/src/9/pc/sdvirtio10.c | 808 | ||||
-rw-r--r-- | sys/src/9/pc64/pc64 | 2 |
6 files changed, 1617 insertions, 9 deletions
diff --git a/sys/src/9/pc/ethervirtio.c b/sys/src/9/pc/ethervirtio.c index 871f6d884..db9dc2cba 100644 --- a/sys/src/9/pc/ethervirtio.c +++ b/sys/src/9/pc/ethervirtio.c @@ -1,3 +1,7 @@ +/* + * virtio ethernet driver implementing the legacy interface: + * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html + */ #include "u.h" #include "../port/lib.h" #include "mem.h" @@ -9,11 +13,6 @@ #include "../port/netif.h" #include "../port/etherif.h" -/* - * virtio ethernet driver - * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html - */ - typedef struct Vring Vring; typedef struct Vdesc Vdesc; typedef struct Vused Vused; @@ -555,13 +554,14 @@ pciprobe(int typ) h = t = nil; /* §4.1.2 PCI Device Discovery */ - for(p = nil; p = pcimatch(p, 0, 0);){ - if(p->vid != 0x1AF4) - continue; + for(p = nil; p = pcimatch(p, 0x1AF4, 0);){ /* the two possible DIDs for virtio-net */ if(p->did != 0x1000 && p->did != 0x1041) continue; - /* non-transitional devices will have a revision > 0 */ + /* + * non-transitional devices will have a revision > 0, + * these are handled by ethervirtio10 driver. + */ if(p->rid != 0) continue; /* first membar needs to be I/O */ @@ -588,6 +588,8 @@ pciprobe(int typ) /* §3.1.2 Legacy Device Initialization */ outb(c->port+Qstatus, 0); + while(inb(c->port+Qstatus) != 0) + delay(1); outb(c->port+Qstatus, Sacknowledge|Sdriver); /* negotiate feature bits */ diff --git a/sys/src/9/pc/ethervirtio10.c b/sys/src/9/pc/ethervirtio10.c new file mode 100644 index 000000000..03108cad9 --- /dev/null +++ b/sys/src/9/pc/ethervirtio10.c @@ -0,0 +1,790 @@ +/* + * virtio 1.0 ethernet driver + * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html + * + * In contrast to ethervirtio.c, this driver handles the non-legacy + * interface for virtio ethernet which uses mmio for all register accesses + * and requires a laborate pci capability structure dance to get working. + * + * It is kind of pointless as it is most likely slower than + * port i/o (harder to emulate on the pc platform). + * + * The reason why this driver is needed it is that vultr set the + * disable-legacy=on option in the -device parameter for qemu + * on their hypervisor. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/pci.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "../port/etherif.h" + +typedef struct Vconfig Vconfig; +typedef struct Vnetcfg Vnetcfg; + +typedef struct Vring Vring; +typedef struct Vdesc Vdesc; +typedef struct Vused Vused; +typedef struct Vheader Vheader; +typedef struct Vqueue Vqueue; + +typedef struct Ctlr Ctlr; + +enum { + /* §2.1 Device Status Field */ + Sacknowledge = 1, + Sdriver = 2, + Sdriverok = 4, + Sfeatureok = 8, + Sfailed = 128, + + /* flags in Qnetstatus */ + Nlinkup = (1<<0), + Nannounce = (1<<1), + + /* feat[0] bits */ + Fmac = 1<<5, + Fstatus = 1<<16, + Fctrlvq = 1<<17, + Fctrlrx = 1<<18, + + /* feat[1] bits */ + Fversion1 = 1<<(32-32), + + /* vring used flags */ + Unonotify = 1, + /* vring avail flags */ + Rnointerrupt = 1, + + /* descriptor flags */ + Dnext = 1, + Dwrite = 2, + Dindirect = 4, + + /* struct sizes */ + VringSize = 4, + VdescSize = 16, + VusedSize = 8, + VheaderSize = 12, + + Vrxq = 0, + Vtxq = 1, + Vctlq = 2, + + /* class/cmd for Vctlq */ + CtrlRx = 0x00, + CmdPromisc = 0x00, + CmdAllmulti = 0x01, + CtrlMac = 0x01, + CmdMacTableSet = 0x00, + CtrlVlan= 0x02, + CmdVlanAdd = 0x00, + CmdVlanDel = 0x01, +}; + +struct Vconfig { + u32int devfeatsel; + u32int devfeat; + u32int drvfeatsel; + u32int drvfeat; + + u16int msixcfg; + u16int nqueues; + + u8int status; + u8int cfggen; + u16int queuesel; + + u16int queuesize; + u16int queuemsixvect; + + u16int queueenable; + u16int queuenotifyoff; + + u64int queuedesc; + u64int queueavail; + u64int queueused; +}; + +struct Vnetcfg +{ + u16int mac0; + u16int mac1; + u16int mac2; + u16int status; + u16int maxqueuepairs; + u16int mtu; +}; + +struct Vring +{ + u16int flags; + u16int idx; +}; + +struct Vdesc +{ + u64int addr; + u32int len; + u16int flags; + u16int next; +}; + +struct Vused +{ + u32int id; + u32int len; +}; + +struct Vheader +{ + u8int flags; + u8int segtype; + u16int hlen; + u16int seglen; + u16int csumstart; + u16int csumend; +}; + +struct Vqueue +{ + Rendez; + + uint qsize; + uint qmask; + + Vdesc *desc; + + Vring *avail; + u16int *availent; + u16int *availevent; + + Vring *used; + Vused *usedent; + u16int *usedevent; + u16int lastused; + + uint nintr; + uint nnote; + + /* notify register */ + void *notify; +}; + +struct Ctlr { + Lock; + + QLock ctllock; + + int attached; + + /* registers */ + Vconfig *cfg; + Vnetcfg *dev; + u8int *isr; + u8int *notify; + u32int notifyoffmult; + + uvlong port; + Pcidev *pcidev; + Ctlr *next; + int active; + ulong feat[2]; + int nqueue; + + /* virtioether has 3 queues: rx, tx and ctl */ + Vqueue queue[3]; +}; + +static Ctlr *ctlrhead; + +static int +vhasroom(void *v) +{ + Vqueue *q = v; + return q->lastused != q->used->idx; +} + +static void +vqnotify(Ctlr *ctlr, int x) +{ + Vqueue *q; + + coherence(); + q = &ctlr->queue[x]; + if(q->used->flags & Unonotify) + return; + q->nnote++; + *((u16int*)q->notify) = x; +} + +static void +txproc(void *v) +{ + Vheader *header; + Block **blocks; + Ether *edev; + Ctlr *ctlr; + Vqueue *q; + Vused *u; + Block *b; + int i, j; + + edev = v; + ctlr = edev->ctlr; + q = &ctlr->queue[Vtxq]; + + header = smalloc(VheaderSize); + blocks = smalloc(sizeof(Block*) * (q->qsize/2)); + + for(i = 0; i < q->qsize/2; i++){ + j = i << 1; + q->desc[j].addr = PADDR(header); + q->desc[j].len = VheaderSize; + q->desc[j].next = j | 1; + q->desc[j].flags = Dnext; + + q->availent[i] = q->availent[i + q->qsize/2] = j; + + j |= 1; + q->desc[j].next = 0; + q->desc[j].flags = 0; + } + + q->avail->flags &= ~Rnointerrupt; + + while(waserror()) + ; + + while((b = qbread(edev->oq, 1000000)) != nil){ + for(;;){ + /* retire completed packets */ + while((i = q->lastused) != q->used->idx){ + u = &q->usedent[i & q->qmask]; + i = (u->id & q->qmask) >> 1; + if(blocks[i] == nil) + break; + freeb(blocks[i]); + blocks[i] = nil; + q->lastused++; + } + + /* have free slot? */ + i = q->avail->idx & (q->qmask >> 1); + if(blocks[i] == nil) + break; + + /* ring full, wait and retry */ + if(!vhasroom(q)) + sleep(q, vhasroom, q); + } + + /* slot is free, fill in descriptor */ + blocks[i] = b; + j = (i << 1) | 1; + q->desc[j].addr = PADDR(b->rp); + q->desc[j].len = BLEN(b); + coherence(); + q->avail->idx++; + vqnotify(ctlr, Vtxq); + } + + pexit("ether out queue closed", 1); +} + +static void +rxproc(void *v) +{ + Vheader *header; + Block **blocks; + Ether *edev; + Ctlr *ctlr; + Vqueue *q; + Vused *u; + Block *b; + int i, j; + + edev = v; + ctlr = edev->ctlr; + q = &ctlr->queue[Vrxq]; + + header = smalloc(VheaderSize); + blocks = smalloc(sizeof(Block*) * (q->qsize/2)); + + for(i = 0; i < q->qsize/2; i++){ + j = i << 1; + q->desc[j].addr = PADDR(header); + q->desc[j].len = VheaderSize; + q->desc[j].next = j | 1; + q->desc[j].flags = Dwrite|Dnext; + + q->availent[i] = q->availent[i + q->qsize/2] = j; + + j |= 1; + q->desc[j].next = 0; + q->desc[j].flags = Dwrite; + } + + q->avail->flags &= ~Rnointerrupt; + + while(waserror()) + ; + + for(;;){ + /* replenish receive ring */ + do { + i = q->avail->idx & (q->qmask >> 1); + if(blocks[i] != nil) + break; + if((b = iallocb(ETHERMAXTU)) == nil) + break; + blocks[i] = b; + j = (i << 1) | 1; + q->desc[j].addr = PADDR(b->rp); + q->desc[j].len = BALLOC(b); + coherence(); + q->avail->idx++; + } while(q->avail->idx != q->used->idx); + vqnotify(ctlr, Vrxq); + + /* wait for any packets to complete */ + if(!vhasroom(q)) + sleep(q, vhasroom, q); + + /* retire completed packets */ + while((i = q->lastused) != q->used->idx) { + u = &q->usedent[i & q->qmask]; + i = (u->id & q->qmask) >> 1; + if((b = blocks[i]) == nil) + break; + + blocks[i] = nil; + b->wp = b->rp + u->len - VheaderSize; + etheriq(edev, b); + q->lastused++; + } + } +} + +static int +vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata) +{ + uchar hdr[2], ack[1]; + Ctlr *ctlr; + Vqueue *q; + Vdesc *d; + int i; + + ctlr = edev->ctlr; + q = &ctlr->queue[Vctlq]; + if(q->qsize < 3) + return -1; + + qlock(&ctlr->ctllock); + while(waserror()) + ; + + ack[0] = 0x55; + hdr[0] = class; + hdr[1] = cmd; + + d = &q->desc[0]; + d->addr = PADDR(hdr); + d->len = sizeof(hdr); + d->next = 1; + d->flags = Dnext; + d++; + d->addr = PADDR(data); + d->len = ndata; + d->next = 2; + d->flags = Dnext; + d++; + d->addr = PADDR(ack); + d->len = sizeof(ack); + d->next = 0; + d->flags = Dwrite; + + i = q->avail->idx & q->qmask; + q->availent[i] = 0; + coherence(); + + q->avail->flags &= ~Rnointerrupt; + q->avail->idx++; + vqnotify(ctlr, Vctlq); + while(!vhasroom(q)) + sleep(q, vhasroom, q); + q->lastused = q->used->idx; + q->avail->flags |= Rnointerrupt; + + qunlock(&ctlr->ctllock); + poperror(); + + if(ack[0] != 0) + print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]); + + return ack[0]; +} + +static void +interrupt(Ureg*, void* arg) +{ + Ether *edev; + Ctlr *ctlr; + Vqueue *q; + int i; + + edev = arg; + ctlr = edev->ctlr; + if(*ctlr->isr & 1){ + for(i = 0; i < ctlr->nqueue; i++){ + q = &ctlr->queue[i]; + if(vhasroom(q)){ + q->nintr++; + wakeup(q); + } + } + } +} + +static void +attach(Ether* edev) +{ + char name[KNAMELEN]; + Ctlr* ctlr; + int i; + + ctlr = edev->ctlr; + ilock(ctlr); + if(ctlr->attached){ + iunlock(ctlr); + return; + } + ctlr->attached = 1; + + /* driver is ready */ + ctlr->cfg->status |= Sdriverok; + + /* enable the queues */ + for(i = 0; i < ctlr->nqueue; i++){ + ctlr->cfg->queuesel = i; + ctlr->cfg->queueenable = 1; + } + iunlock(ctlr); + + /* start kprocs */ + snprint(name, sizeof name, "#l%drx", edev->ctlrno); + kproc(name, rxproc, edev); + snprint(name, sizeof name, "#l%dtx", edev->ctlrno); + kproc(name, txproc, edev); +} + +static long +ifstat(Ether *edev, void *a, long n, ulong offset) +{ + int i, l; + char *p; + Ctlr *ctlr; + Vqueue *q; + + ctlr = edev->ctlr; + + p = smalloc(READSTR); + + l = snprint(p, READSTR, "devfeat %32.32lub %32.32lub\n", ctlr->feat[1], ctlr->feat[0]); + l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", ctlr->cfg->status); + + for(i = 0; i < ctlr->nqueue; i++){ + q = &ctlr->queue[i]; + l += snprint(p+l, READSTR-l, + "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n", + i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote); + } + + n = readstr(offset, a, n, p); + free(p); + + return n; +} + +static void +shutdown(Ether* edev) +{ + Ctlr *ctlr = edev->ctlr; + + coherence(); + ctlr->cfg->status = 0; + coherence(); + + pciclrbme(ctlr->pcidev); +} + +static void +promiscuous(void *arg, int on) +{ + Ether *edev = arg; + uchar b[1]; + + b[0] = on != 0; + vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b)); +} + +static void +multicast(void *arg, uchar*, int) +{ + Ether *edev = arg; + uchar b[1]; + + b[0] = edev->nmaddr > 0; + vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b)); +} + +static int +initqueue(Vqueue *q, int size) +{ + uchar *p; + + q->desc = mallocalign(VdescSize*size, 16, 0, 0); + if(q->desc == nil) + return -1; + p = mallocalign(VringSize + 2*size + 2, 2, 0, 0); + if(p == nil){ +FreeDesc: + free(q->desc); + q->desc = nil; + return -1; + } + q->avail = (void*)p; + p += VringSize; + q->availent = (void*)p; + p += sizeof(u16int)*size; + q->availevent = (void*)p; + p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0); + if(p == nil){ + free(q->avail); + q->avail = nil; + goto FreeDesc; + } + q->used = (void*)p; + p += VringSize; + q->usedent = (void*)p; + p += VusedSize*size; + q->usedevent = (void*)p; + + q->qsize = size; + q->qmask = q->qsize - 1; + + q->lastused = q->avail->idx = q->used->idx = 0; + + q->avail->flags |= Rnointerrupt; + + return 0; +} + +static int +matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ) +{ + int bar; + + if(cap != 9 || pcicfgr8(p, off+3) != typ) + return 1; + + /* skip invalid or non memory bars */ + bar = pcicfgr8(p, off+4); + if(bar < 0 || bar >= nelem(p->mem) + || p->mem[bar].size == 0 + || (p->mem[bar].bar & 3) != 0) + return 1; + + return 0; +} + +static int +virtiocap(Pcidev *p, int typ) +{ + return pcienumcaps(p, matchvirtiocfgcap, typ); +} + +static void* +virtiomapregs(Pcidev *p, int cap, int size) +{ + int bar, len; + uvlong addr; + + if(cap < 0) + return nil; + bar = pcicfgr8(p, cap+4) % nelem(p->mem); + addr = pcicfgr32(p, cap+8); + len = pcicfgr32(p, cap+12); + if(size <= 0) + size = len; + else if(len < size) + return nil; + if(addr+len > p->mem[bar].size) + return nil; + addr += p->mem[bar].bar & ~0xFULL; + return vmap(addr, size); +} + +static Ctlr* +pciprobe(void) +{ + Ctlr *c, *h, *t; + Pcidev *p; + Vconfig *cfg; + int bar, cap, n, i; + + h = t = nil; + + /* §4.1.2 PCI Device Discovery */ + for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){ + /* non-transitional devices will have a revision > 0 */ + if(p->rid == 0) + continue; + if((cap = virtiocap(p, 1)) < 0) + continue; + bar = pcicfgr8(p, cap+4) % nelem(p->mem); + cfg = virtiomapregs(p, cap, sizeof(Vconfig)); + if(cfg == nil) + continue; + if((c = mallocz(sizeof(Ctlr), 1)) == nil){ + print("ethervirtio: no memory for Ctlr\n"); + break; + } + c->cfg = cfg; + c->pcidev = p; + c->port = p->mem[bar].bar & ~0xFULL; + + pcienable(p); + c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg)); + if(c->dev == nil) + goto Baddev; + c->isr = virtiomapregs(p, virtiocap(p, 3), 0); + if(c->isr == nil) + goto Baddev; + cap = virtiocap(p, 2); + c->notify = virtiomapregs(p, cap, 0); + if(c->notify == nil) + goto Baddev; + c->notifyoffmult = pcicfgr32(p, cap+16); + + /* device reset */ + coherence(); + cfg->status = 0; + while(cfg->status != 0) + delay(1); + cfg->status = Sacknowledge|Sdriver; + + /* negotiate feature bits */ + cfg->devfeatsel = 1; + c->feat[1] = cfg->devfeat; + + cfg->devfeatsel = 0; + c->feat[0] = cfg->devfeat; + + cfg->drvfeatsel = 1; + cfg->drvfeat = c->feat[1] & Fversion1; + + cfg->drvfeatsel = 0; + cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx); + + for(i=0; i<nelem(c->queue); i++){ + cfg->queuesel = i; + n = cfg->queuesize; + if(n == 0 || (n & (n-1)) != 0){ + if(i < 2) + print("ethervirtio: queue %d has invalid size %d\n", i, n); + break; + } + if(initqueue(&c->queue[i], n) < 0) + break; + c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff; + coherence(); + cfg->queuedesc = PADDR(c->queue[i].desc); + cfg->queueavail = PADDR(c->queue[i].avail); + cfg->queueused = PADDR(c->queue[i].used); + } + if(i < 2){ + print("ethervirtio: no queues\n"); +Baddev: + pcidisable(p); + /* TODO, vunmap */ + free(c); + continue; + } + c->nqueue = i; + + if(h == nil) + h = c; + else + t->next = c; + t = c; + } + + return h; +} + + +static int +reset(Ether* edev) +{ + static uchar zeros[Eaddrlen]; + Ctlr *ctlr; + int i; + + if(ctlrhead == nil) + ctlrhead = pciprobe(); + + for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->link = 1; + + if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){ + for(i = 0; i < Eaddrlen; i++) + edev->ea[i] = ((uchar*)ctlr->dev)[i]; + } else { + for(i = 0; i < Eaddrlen; i++) + ((uchar*)ctlr->dev)[i] = edev->ea[i]; + } + + edev->arg = edev; + + edev->attach = attach; + edev->shutdown = shutdown; + edev->ifstat = ifstat; + + if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){ + edev->multicast = multicast; + edev->promiscuous = promiscuous; + } + + pcisetbme(ctlr->pcidev); + intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name); + + return 0; +} + +void +ethervirtio10link(void) +{ + addethercard("virtio10", reset); +} diff --git a/sys/src/9/pc/pc b/sys/src/9/pc/pc index 4d242b2ed..9d980f903 100644 --- a/sys/src/9/pc/pc +++ b/sys/src/9/pc/pc @@ -80,6 +80,7 @@ link etherwpi pci wifi etherrt2860 pci wifi ethervirtio pci + ethervirtio10 pci ethermedium pcmciamodem netdevmedium @@ -108,6 +109,7 @@ misc sdiahci pci sdscsi led sdodin pci sdscsi led sdvirtio pci sdscsi + sdvirtio10 pci sdscsi sdmmc pci pmmc sdnvme pci sdloop diff --git a/sys/src/9/pc/sdvirtio.c b/sys/src/9/pc/sdvirtio.c index 4b4227667..c102fc030 100644 --- a/sys/src/9/pc/sdvirtio.c +++ b/sys/src/9/pc/sdvirtio.c @@ -1,3 +1,7 @@ +/* + * virtio ethernet driver implementing the legacy interface: + * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html + */ #include "u.h" #include "../port/lib.h" #include "mem.h" diff --git a/sys/src/9/pc/sdvirtio10.c b/sys/src/9/pc/sdvirtio10.c new file mode 100644 index 000000000..df25df87a --- /dev/null +++ b/sys/src/9/pc/sdvirtio10.c @@ -0,0 +1,808 @@ +/* + * virtio 1.0 disk driver + * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html + * + * In contrast to sdvirtio.c, this driver handles the non-legacy + * interface for virtio disk which uses mmio for all register accesses + * and requires a laborate pci capability structure dance to get working. + * + * It is kind of pointless as it is most likely slower than + * port i/o (harder to emulate on the pc platform). + * + * The reason why this driver is needed it is that vultr set the + * disable-legacy=on option in the -device parameter for qemu + * on their hypervisor. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/pci.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +typedef struct Vscsidev Vscsidev; +typedef struct Vblkdev Vblkdev; + +typedef struct Vconfig Vconfig; +typedef struct Vring Vring; +typedef struct Vdesc Vdesc; +typedef struct Vused Vused; +typedef struct Vqueue Vqueue; +typedef struct Vdev Vdev; + + +/* device types */ +enum { + TypBlk = 2, + TypSCSI = 8, +}; + +/* status flags */ +enum { + Acknowledge = 1, + Driver = 2, + DriverOk = 4, + Failed = 0x80, +}; + +/* descriptor flags */ +enum { + Next = 1, + Write = 2, + Indirect = 4, +}; + +/* struct sizes */ +enum { + VringSize = 4, +}; + +enum { + CDBSIZE = 32, + SENSESIZE = 96, +}; + + +struct Vscsidev +{ + u32int num_queues; + u32int seg_max; + u32int max_sectors; + u32int cmd_per_lun; + u32int event_info_size; + u32int sense_size; + u32int cdb_size; + u16int max_channel; + u16int max_target; + u32int max_lun; +}; + +struct Vblkdev +{ + u64int capacity; +}; + +struct Vconfig { + u32int devfeatsel; + u32int devfeat; + u32int drvfeatsel; + u32int drvfeat; + + u16int msixcfg; + u16int nqueues; + + u8int status; + u8int cfggen; + u16int queuesel; + + u16int queuesize; + u16int queuemsixvect; + + u16int queueenable; + u16int queuenotifyoff; + + u64int queuedesc; + u64int queueavail; + u64int queueused; +}; + +struct Vring +{ + u16int flags; + u16int idx; +}; + +struct Vdesc +{ + u64int addr; + u32int len; + u16int flags; + u16int next; +}; + +struct Vused +{ + u32int id; + u32int len; +}; + +struct Vqueue +{ + Lock; + + Vdev *dev; + void *notify; + int idx; + + int size; + + int free; + int nfree; + + Vdesc *desc; + + Vring *avail; + u16int *availent; + u16int *availevent; + + Vring *used; + Vused *usedent; + u16int *usedevent; + u16int lastused; + + void *rock[]; +}; + +struct Vdev +{ + int typ; + + Pcidev *pci; + + uvlong port; + ulong feat[2]; + + int nqueue; + Vqueue *queue[16]; + + void *dev; /* device specific config (for scsi) */ + + /* registers */ + Vconfig *cfg; + u8int *isr; + u8int *notify; + u32int notifyoffmult; + + Vdev *next; +}; + +static Vqueue* +mkvqueue(int size) +{ + Vqueue *q; + uchar *p; + int i; + + q = malloc(sizeof(*q) + sizeof(void*)*size); + p = mallocalign( + PGROUND(sizeof(Vdesc)*size + + VringSize + + sizeof(u16int)*size + + sizeof(u16int)) + + PGROUND(VringSize + + sizeof(Vused)*size + + sizeof(u16int)), + BY2PG, 0, 0); + if(p == nil || q == nil){ + print("virtio: no memory for Vqueue\n"); + free(p); + free(q); + return nil; + } + + q->desc = (void*)p; + p += sizeof(Vdesc)*size; + q->avail = (void*)p; + p += VringSize; + q->availent = (void*)p; + p += sizeof(u16int)*size; + q->availevent = (void*)p; + p += sizeof(u16int); + + p = (uchar*)PGROUND((uintptr)p); + q->used = (void*)p; + p += VringSize; + q->usedent = (void*)p; + p += sizeof(Vused)*size; + q->usedevent = (void*)p; + + q->free = -1; + q->nfree = q->size = size; + for(i=0; i<size; i++){ + q->desc[i].next = q->free; + q->free = i; + } + + return q; +} + +static int +matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ) +{ + int bar; + + if(cap != 9 || pcicfgr8(p, off+3) != typ) + return 1; + + /* skip invalid or non memory bars */ + bar = pcicfgr8(p, off+4); + if(bar < 0 || bar >= nelem(p->mem) + || p->mem[bar].size == 0 + || (p->mem[bar].bar & 3) != 0) + return 1; + + return 0; +} + +static int +virtiocap(Pcidev *p, int typ) +{ + return pcienumcaps(p, matchvirtiocfgcap, typ); +} + +static void* +virtiomapregs(Pcidev *p, int cap, int size) +{ + int bar, len; + uvlong addr; + + if(cap < 0) + return nil; + bar = pcicfgr8(p, cap+4) % nelem(p->mem); + addr = pcicfgr32(p, cap+8); + len = pcicfgr32(p, cap+12); + if(size <= 0) + size = len; + else if(len < size) + return nil; + if(addr+len > p->mem[bar].size) + return nil; + addr += p->mem[bar].bar & ~0xFULL; + return vmap(addr, size); +} + +static Vdev* +viopnpdevs(int typ) +{ + Vdev *vd, *h, *t; + Vconfig *cfg; + Vqueue *q; + Pcidev *p; + int cap, bar; + int n, i; + + h = t = nil; + for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){ + if(p->rid == 0) + continue; + if((cap = virtiocap(p, 1)) < 0) + continue; + bar = pcicfgr8(p, cap+4) % nelem(p->mem); + cfg = virtiomapregs(p, cap, sizeof(Vconfig)); + if(cfg == nil) + continue; + if((vd = malloc(sizeof(*vd))) == nil){ + print("virtio: no memory for Vdev\n"); + break; + } + vd->port = p->mem[bar].bar & ~0xFULL; + vd->typ = typ; + vd->pci = p; + vd->cfg = cfg; + pcienable(p); + + vd->isr = virtiomapregs(p, virtiocap(p, 3), 0); + if(vd->isr == nil){ +Baddev: + pcidisable(p); + /* TODO: vunmap */ + free(vd); + continue; + } + cap = virtiocap(p, 2); + vd->notify = virtiomapregs(p, cap, 0); + if(vd->notify == nil) + goto Baddev; + vd->notifyoffmult = pcicfgr32(p, cap+16); + + /* reset */ + cfg->status = 0; + while(cfg->status != 0) + delay(1); + cfg->status = Acknowledge|Driver; + + /* negotiate feature bits */ + cfg->devfeatsel = 1; + vd->feat[1] = cfg->devfeat; + cfg->devfeatsel = 0; + vd->feat[0] = cfg->devfeat; + cfg->drvfeatsel = 1; + cfg->drvfeat = vd->feat[1] & 1; + cfg->drvfeatsel = 0; + cfg->drvfeat = 0; + + for(i=0; i<nelem(vd->queue); i++){ + cfg->queuesel = i; + n = cfg->queuesize; + if(n == 0 || (n & (n-1)) != 0) + break; + if((q = mkvqueue(n)) == nil) + break; + q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff; + q->dev = vd; + q->idx = i; + vd->queue[i] = q; + coherence(); + cfg->queuedesc = PADDR(q->desc); + cfg->queueavail = PADDR(q->avail); + cfg->queueused = PADDR(q->used); + } + vd->nqueue = i; + + if(h == nil) + h = vd; + else + t->next = vd; + t = vd; + } + + return h; +} + +struct Rock { + int done; + Rendez *sleep; +}; + +static void +vqinterrupt(Vqueue *q) +{ + int id, free, m; + struct Rock *r; + Rendez *z; + + m = q->size-1; + + ilock(q); + while((q->lastused ^ q->used->idx) & m){ + id = q->usedent[q->lastused++ & m].id; + if(r = q->rock[id]){ + q->rock[id] = nil; + z = r->sleep; + r->done = 1; /* hands off */ + if(z != nil) + wakeup(z); + } + do { + free = id; + id = q->desc[free].next; + q->desc[free].next = q->free; + q->free = free; + q->nfree++; + } while(q->desc[free].flags & Next); + } + iunlock(q); +} + +static void +viointerrupt(Ureg *, void *arg) +{ + Vdev *vd = arg; + + if(vd->isr[0] & 1) + vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]); +} + +static int +viodone(void *arg) +{ + return ((struct Rock*)arg)->done; +} + +static void +vqio(Vqueue *q, int head) +{ + struct Rock rock; + + rock.done = 0; + rock.sleep = &up->sleep; + q->rock[head] = &rock; + q->availent[q->avail->idx & (q->size-1)] = head; + coherence(); + q->avail->idx++; + iunlock(q); + if((q->used->flags & 1) == 0) + *((u16int*)q->notify) = q->idx; + while(!rock.done){ + while(waserror()) + ; + tsleep(rock.sleep, viodone, &rock, 1000); + poperror(); + + if(!rock.done) + vqinterrupt(q); + } +} + +static int +vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba) +{ + int need, free, head; + Vqueue *q; + Vdesc *d; + + u8int status; + struct Vioblkreqhdr { + u32int typ; + u32int prio; + u64int lba; + } req; + + need = 2; + if(a != nil) + need = 3; + + status = -1; + req.typ = typ; + req.prio = 0; + req.lba = lba; + + q = vd->queue[0]; + ilock(q); + while(q->nfree < need){ + iunlock(q); + + if(!waserror()) + tsleep(&up->sleep, return0, 0, 500); + poperror(); + + ilock(q); + } + + head = free = q->free; + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(&req); + d->len = sizeof(req); + d->flags = Next; + + if(a != nil){ + d = &q->desc[free]; free = d->next; + d->addr = PADDR(a); + d->len = secsize*count; + d->flags = typ ? Next : (Write|Next); + } + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(&status); + d->len = sizeof(status); + d->flags = Write; + + q->free = free; + q->nfree -= need; + + /* queue io, unlock and wait for completion */ + vqio(q, head); + + return status; +} + +static int +vioscsireq(SDreq *r) +{ + u8int resp[4+4+2+2+SENSESIZE]; + u8int req[8+8+3+CDBSIZE]; + int free, head; + u32int len; + Vqueue *q; + Vdesc *d; + Vdev *vd; + SDunit *u; + Vscsidev *scsi; + + u = r->unit; + vd = u->dev->ctlr; + scsi = vd->dev; + + memset(resp, 0, sizeof(resp)); + memset(req, 0, sizeof(req)); + req[0] = 1; + req[1] = u->subno; + req[2] = r->lun>>8; + req[3] = r->lun&0xFF; + *(u64int*)(&req[8]) = (uintptr)r; + + memmove(&req[8+8+3], r->cmd, r->clen); + + q = vd->queue[2]; + ilock(q); + while(q->nfree < 3){ + iunlock(q); + + if(!waserror()) + tsleep(&up->sleep, return0, 0, 500); + poperror(); + + ilock(q); + } + + head = free = q->free; + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(req); + d->len = 8+8+3+scsi->cdb_size; + d->flags = Next; + + if(r->write && r->dlen > 0){ + d = &q->desc[free]; free = d->next; + d->addr = PADDR(r->data); + d->len = r->dlen; + d->flags = Next; + } + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(resp); + d->len = 4+4+2+2+scsi->sense_size; + d->flags = Write; + + if(!r->write && r->dlen > 0){ + d->flags |= Next; + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(r->data); + d->len = r->dlen; + d->flags = Write; + } + + q->free = free; + q->nfree -= 2 + (r->dlen > 0); + + /* queue io, unlock and wait for completion */ + vqio(q, head); + + /* response+status */ + r->status = resp[10]; + if(resp[11] != 0) + r->status = SDcheck; + + /* sense_len */ + len = *((u32int*)&resp[0]); + if(len > 0){ + if(len > sizeof(r->sense)) + len = sizeof(r->sense); + memmove(r->sense, &resp[4+4+2+2], len); + r->flags |= SDvalidsense; + } + + /* data residue */ + len = *((u32int*)&resp[4]); + if(len > r->dlen) + r->rlen = 0; + else + r->rlen = r->dlen - len; + + return r->status; + +} + +static long +viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba) +{ + long ss, cc, max, ret; + Vdev *vd; + + vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return scsibio(u, lun, write, a, count, lba); + + max = 32; + ss = u->secsize; + ret = 0; + while(count > 0){ + if((cc = count) > max) + cc = max; + if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0) + error(Eio); + ret += cc*ss; + count -= cc; + lba += cc; + } + return ret; +} + +static int +viorio(SDreq *r) +{ + int i, count, rw; + uvlong lba; + SDunit *u; + Vdev *vd; + + u = r->unit; + vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return vioscsireq(r); + if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){ + if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0) + return sdsetsense(r, SDcheck, 3, 0xc, 2); + return sdsetsense(r, SDok, 0, 0, 0); + } + if((i = sdfakescsi(r)) != SDnostatus) + return r->status = i; + if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus) + return i; + r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba); + return r->status = SDok; +} + +static int +vioonline(SDunit *u) +{ + Vdev *vd; + Vblkdev *blk; + uvlong cap; + + vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return scsionline(u); + + blk = vd->dev; + cap = blk->capacity; + if(u->sectors != cap){ + u->sectors = cap; + u->secsize = 512; + return 2; + } + return 1; +} + +static int +vioverify(SDunit *u) +{ + Vdev *vd; + + vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return scsiverify(u); + + return 1; +} + +SDifc sdvirtio10ifc; + +static int +vioenable(SDev *sd) +{ + char name[32]; + Vdev *vd; + int i; + + vd = sd->ctlr; + pcisetbme(vd->pci); + snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name); + intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name); + coherence(); + + vd->cfg->status |= DriverOk; + for(i = 0; i < vd->nqueue; i++){ + vd->cfg->queuesel = i; + vd->cfg->queueenable = 1; + } + + return 1; +} + +static int +viodisable(SDev *sd) +{ + char name[32]; + Vdev *vd; + + vd = sd->ctlr; + snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name); + intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name); + pciclrbme(vd->pci); + return 1; +} + +static SDev* +viopnp(void) +{ + SDev *s, *h, *t; + Vdev *vd; + int id; + + h = t = nil; + + id = 'F'; + for(vd = viopnpdevs(TypBlk); vd; vd = vd->next){ + if(vd->nqueue == 0) + continue; + + if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil) + break; + if((s = malloc(sizeof(*s))) == nil) + break; + s->ctlr = vd; + s->idno = id++; + s->ifc = &sdvirtio10ifc; + s->nunit = 1; + if(h) + t->next = s; + else + h = s; + t = s; + } + + id = '0'; + for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){ + Vscsidev *scsi; + + if(vd->nqueue < 3) + continue; + + if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil) + break; + if(scsi->max_target == 0){ + vunmap(scsi, sizeof(Vscsidev)); + continue; + } + if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){ + print("sdvirtio: cdb %ud or sense size %ud too big\n", + scsi->cdb_size, scsi->sense_size); + vunmap(scsi, sizeof(Vscsidev)); + continue; + } + vd->dev = scsi; + + if((s = malloc(sizeof(*s))) == nil) + break; + s->ctlr = vd; + s->idno = id++; + s->ifc = &sdvirtio10ifc; + s->nunit = scsi->max_target; + + if(h) + t->next = s; + else + h = s; + t = s; + } + return h; +} + +SDifc sdvirtio10ifc = { + "virtio10", /* name */ + + viopnp, /* pnp */ + nil, /* legacy */ + vioenable, /* enable */ + viodisable, /* disable */ + + vioverify, /* verify */ + vioonline, /* online */ + viorio, /* rio */ + nil, /* rctl */ + nil, /* wctl */ + + viobio, /* bio */ + nil, /* probe */ + nil, /* clear */ + nil, /* rtopctl */ + nil, /* wtopctl */ +}; diff --git a/sys/src/9/pc64/pc64 b/sys/src/9/pc64/pc64 index 3250e9a45..1c9fbd172 100644 --- a/sys/src/9/pc64/pc64 +++ b/sys/src/9/pc64/pc64 @@ -78,6 +78,7 @@ link etherwpi pci wifi etherrt2860 pci wifi ethervirtio pci + ethervirtio10 pci ethermedium # pcmciamodem netdevmedium @@ -105,6 +106,7 @@ misc sdiahci pci sdscsi led # sdodin pci sdscsi led sdvirtio pci sdscsi + sdvirtio10 pci sdscsi sdmmc pci pmmc sdnvme pci sdloop |