pci_virtio_net.c revision 244159
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33221828Sgrehan#include <sys/linker_set.h> 34221828Sgrehan#include <sys/select.h> 35221828Sgrehan#include <sys/uio.h> 36221828Sgrehan#include <sys/ioctl.h> 37221828Sgrehan 38221828Sgrehan#include <errno.h> 39221828Sgrehan#include <fcntl.h> 40221828Sgrehan#include <stdio.h> 41221828Sgrehan#include <stdlib.h> 42221828Sgrehan#include <stdint.h> 43221828Sgrehan#include <string.h> 44221828Sgrehan#include <strings.h> 45221828Sgrehan#include <unistd.h> 46221828Sgrehan#include <assert.h> 47221828Sgrehan#include <md5.h> 48221828Sgrehan#include <pthread.h> 49221828Sgrehan 50221828Sgrehan#include "fbsdrun.h" 51221828Sgrehan#include "pci_emul.h" 52221828Sgrehan#include "mevent.h" 53221828Sgrehan#include "virtio.h" 54221828Sgrehan 55221828Sgrehan#define VTNET_RINGSZ 256 56221828Sgrehan 57221828Sgrehan#define VTNET_MAXSEGS 32 58221828Sgrehan 59221828Sgrehan/* 60221828Sgrehan * PCI config-space register offsets 61221828Sgrehan */ 62221828Sgrehan#define VTNET_R_CFG0 20 63221828Sgrehan#define VTNET_R_CFG1 21 64221828Sgrehan#define VTNET_R_CFG2 22 65221828Sgrehan#define VTNET_R_CFG3 23 66221828Sgrehan#define VTNET_R_CFG4 24 67221828Sgrehan#define VTNET_R_CFG5 25 68221828Sgrehan#define VTNET_R_CFG6 26 69221828Sgrehan#define VTNET_R_CFG7 27 70221828Sgrehan#define VTNET_R_MAX 27 71221828Sgrehan 72221828Sgrehan#define VTNET_REGSZ VTNET_R_MAX+1 73221828Sgrehan 74221828Sgrehan/* 75221828Sgrehan * Host capabilities 76221828Sgrehan */ 77221828Sgrehan#define VTNET_S_HOSTCAPS \ 78221828Sgrehan ( 0x00000020 | /* host supplies MAC */ \ 79221828Sgrehan 0x00008000 | /* host can merge Rx buffers */ \ 80221828Sgrehan 0x00010000 ) /* config status available */ 81221828Sgrehan 82221828Sgrehan/* 83221828Sgrehan * Queue definitions. 84221828Sgrehan */ 85221828Sgrehan#define VTNET_RXQ 0 86221828Sgrehan#define VTNET_TXQ 1 87221828Sgrehan#define VTNET_CTLQ 2 88221828Sgrehan 89221828Sgrehan#define VTNET_MAXQ 3 90221828Sgrehan 91221828Sgrehanstruct vring_hqueue { 92221828Sgrehan /* Internal state */ 93221828Sgrehan uint16_t hq_size; 94221828Sgrehan uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 95221828Sgrehan 96221828Sgrehan /* Host-context pointers to the queue */ 97221828Sgrehan struct virtio_desc *hq_dtable; 98221828Sgrehan uint16_t *hq_avail_flags; 99221828Sgrehan uint16_t *hq_avail_idx; /* monotonically increasing */ 100221828Sgrehan uint16_t *hq_avail_ring; 101221828Sgrehan 102221828Sgrehan uint16_t *hq_used_flags; 103221828Sgrehan uint16_t *hq_used_idx; /* monotonically increasing */ 104221828Sgrehan struct virtio_used *hq_used_ring; 105221828Sgrehan}; 106221828Sgrehan 107221828Sgrehan/* 108221828Sgrehan * Fixed network header size 109221828Sgrehan */ 110221828Sgrehanstruct virtio_net_rxhdr { 111221828Sgrehan uint8_t vrh_flags; 112221828Sgrehan uint8_t vrh_gso_type; 113221828Sgrehan uint16_t vrh_hdr_len; 114221828Sgrehan uint16_t vrh_gso_size; 115221828Sgrehan uint16_t vrh_csum_start; 116221828Sgrehan uint16_t vrh_csum_offset; 117221828Sgrehan uint16_t vrh_bufs; 118221828Sgrehan} __packed; 119221828Sgrehan 120221828Sgrehan/* 121221828Sgrehan * Debug printf 122221828Sgrehan */ 123221828Sgrehanstatic int pci_vtnet_debug; 124221828Sgrehan#define DPRINTF(params) if (pci_vtnet_debug) printf params 125221828Sgrehan#define WPRINTF(params) printf params 126221828Sgrehan 127221828Sgrehan/* 128221828Sgrehan * Per-device softc 129221828Sgrehan */ 130221828Sgrehanstruct pci_vtnet_softc { 131221828Sgrehan struct pci_devinst *vsc_pi; 132221828Sgrehan pthread_mutex_t vsc_mtx; 133221828Sgrehan struct mevent *vsc_mevp; 134221828Sgrehan 135221828Sgrehan int vsc_curq; 136221828Sgrehan int vsc_status; 137221828Sgrehan int vsc_isr; 138221828Sgrehan int vsc_tapfd; 139221828Sgrehan int vsc_rx_ready; 140221828Sgrehan int vsc_rxpend; 141221828Sgrehan 142221828Sgrehan uint32_t vsc_features; 143221828Sgrehan uint8_t vsc_macaddr[6]; 144221828Sgrehan 145221828Sgrehan uint64_t vsc_pfn[VTNET_MAXQ]; 146221828Sgrehan struct vring_hqueue vsc_hq[VTNET_MAXQ]; 147221828Sgrehan}; 148221828Sgrehan 149221828Sgrehan/* 150221828Sgrehan * Return the number of available descriptors in the vring taking care 151221828Sgrehan * of the 16-bit index wraparound. 152221828Sgrehan */ 153221828Sgrehanstatic int 154221828Sgrehanhq_num_avail(struct vring_hqueue *hq) 155221828Sgrehan{ 156221828Sgrehan int ndesc; 157221828Sgrehan 158221828Sgrehan if (*hq->hq_avail_idx >= hq->hq_cur_aidx) 159221828Sgrehan ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx; 160221828Sgrehan else 161221828Sgrehan ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1; 162221828Sgrehan 163221828Sgrehan assert(ndesc >= 0 && ndesc <= hq->hq_size); 164221828Sgrehan 165221828Sgrehan return (ndesc); 166221828Sgrehan} 167221828Sgrehan 168221828Sgrehanstatic uint16_t 169221828Sgrehanpci_vtnet_qsize(int qnum) 170221828Sgrehan{ 171221828Sgrehan /* XXX no ctl queue currently */ 172221828Sgrehan if (qnum == VTNET_CTLQ) { 173221828Sgrehan return (0); 174221828Sgrehan } 175221828Sgrehan 176221828Sgrehan /* XXX fixed currently. Maybe different for tx/rx/ctl */ 177221828Sgrehan return (VTNET_RINGSZ); 178221828Sgrehan} 179221828Sgrehan 180221828Sgrehanstatic void 181221828Sgrehanpci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 182221828Sgrehan{ 183221828Sgrehan if (value == 0) { 184221828Sgrehan DPRINTF(("vtnet: device reset requested !\n")); 185221828Sgrehan } 186221828Sgrehan 187221828Sgrehan sc->vsc_status = value; 188221828Sgrehan} 189221828Sgrehan 190221828Sgrehan/* 191221828Sgrehan * Called to send a buffer chain out to the tap device 192221828Sgrehan */ 193221828Sgrehanstatic void 194221828Sgrehanpci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 195221828Sgrehan int len) 196221828Sgrehan{ 197221828Sgrehan char pad[60]; 198221828Sgrehan 199221828Sgrehan if (sc->vsc_tapfd == -1) 200221828Sgrehan return; 201221828Sgrehan 202221828Sgrehan /* 203221828Sgrehan * If the length is < 60, pad out to that and add the 204221828Sgrehan * extra zero'd segment to the iov. It is guaranteed that 205221828Sgrehan * there is always an extra iov available by the caller. 206221828Sgrehan */ 207221828Sgrehan if (len < 60) { 208221828Sgrehan memset(pad, 0, 60 - len); 209221828Sgrehan iov[iovcnt].iov_base = pad; 210221828Sgrehan iov[iovcnt].iov_len = 60 - len; 211221828Sgrehan iovcnt++; 212221828Sgrehan } 213221828Sgrehan (void) writev(sc->vsc_tapfd, iov, iovcnt); 214221828Sgrehan} 215221828Sgrehan 216221828Sgrehan/* 217221828Sgrehan * Called when there is read activity on the tap file descriptor. 218221828Sgrehan * Each buffer posted by the guest is assumed to be able to contain 219221828Sgrehan * an entire ethernet frame + rx header. 220221828Sgrehan * MP note: the dummybuf is only used for discarding frames, so there 221221828Sgrehan * is no need for it to be per-vtnet or locked. 222221828Sgrehan */ 223221828Sgrehanstatic uint8_t dummybuf[2048]; 224221828Sgrehan 225221828Sgrehanstatic void 226221828Sgrehanpci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 227221828Sgrehan{ 228221828Sgrehan struct virtio_desc *vd; 229221828Sgrehan struct virtio_used *vu; 230221828Sgrehan struct vring_hqueue *hq; 231221828Sgrehan struct virtio_net_rxhdr *vrx; 232221828Sgrehan uint8_t *buf; 233221828Sgrehan int i; 234221828Sgrehan int len; 235221828Sgrehan int ndescs; 236221828Sgrehan int didx, uidx, aidx; /* descriptor, avail and used index */ 237221828Sgrehan 238221828Sgrehan /* 239221828Sgrehan * Should never be called without a valid tap fd 240221828Sgrehan */ 241221828Sgrehan assert(sc->vsc_tapfd != -1); 242221828Sgrehan 243221828Sgrehan /* 244221828Sgrehan * But, will be called when the rx ring hasn't yet 245221828Sgrehan * been set up. 246221828Sgrehan */ 247221828Sgrehan if (sc->vsc_rx_ready == 0) { 248221828Sgrehan /* 249221828Sgrehan * Drop the packet and try later. 250221828Sgrehan */ 251221828Sgrehan (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 252221828Sgrehan return; 253221828Sgrehan } 254221828Sgrehan 255221828Sgrehan /* 256221828Sgrehan * Calculate the number of available rx buffers 257221828Sgrehan */ 258221828Sgrehan hq = &sc->vsc_hq[VTNET_RXQ]; 259221828Sgrehan 260221828Sgrehan ndescs = hq_num_avail(hq); 261221828Sgrehan 262221828Sgrehan if (ndescs == 0) { 263221828Sgrehan /* 264221828Sgrehan * Need to wait for host notification to read 265221828Sgrehan */ 266221828Sgrehan if (sc->vsc_rxpend == 0) { 267221828Sgrehan WPRINTF(("vtnet: no rx descriptors !\n")); 268221828Sgrehan sc->vsc_rxpend = 1; 269221828Sgrehan } 270221828Sgrehan 271221828Sgrehan /* 272221828Sgrehan * Drop the packet and try later 273221828Sgrehan */ 274221828Sgrehan (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 275221828Sgrehan return; 276221828Sgrehan } 277221828Sgrehan 278221828Sgrehan aidx = hq->hq_cur_aidx; 279221828Sgrehan uidx = *hq->hq_used_idx; 280221828Sgrehan for (i = 0; i < ndescs; i++) { 281221828Sgrehan /* 282221828Sgrehan * 'aidx' indexes into the an array of descriptor indexes 283221828Sgrehan */ 284221828Sgrehan didx = hq->hq_avail_ring[aidx % hq->hq_size]; 285221828Sgrehan assert(didx >= 0 && didx < hq->hq_size); 286221828Sgrehan 287221828Sgrehan vd = &hq->hq_dtable[didx]; 288221828Sgrehan 289221828Sgrehan /* 290221828Sgrehan * Get a pointer to the rx header, and use the 291221828Sgrehan * data immediately following it for the packet buffer. 292221828Sgrehan */ 293221828Sgrehan vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr); 294221828Sgrehan buf = (uint8_t *)(vrx + 1); 295221828Sgrehan 296221828Sgrehan len = read(sc->vsc_tapfd, buf, 297221828Sgrehan vd->vd_len - sizeof(struct virtio_net_rxhdr)); 298221828Sgrehan 299221828Sgrehan if (len < 0 && errno == EWOULDBLOCK) { 300221828Sgrehan break; 301221828Sgrehan } 302221828Sgrehan 303221828Sgrehan /* 304221828Sgrehan * The only valid field in the rx packet header is the 305221828Sgrehan * number of buffers, which is always 1 without TSO 306221828Sgrehan * support. 307221828Sgrehan */ 308221828Sgrehan memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 309221828Sgrehan vrx->vrh_bufs = 1; 310221828Sgrehan 311221828Sgrehan /* 312221828Sgrehan * Write this descriptor into the used ring 313221828Sgrehan */ 314221828Sgrehan vu = &hq->hq_used_ring[uidx % hq->hq_size]; 315221828Sgrehan vu->vu_idx = didx; 316221828Sgrehan vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 317221828Sgrehan uidx++; 318221828Sgrehan aidx++; 319221828Sgrehan } 320221828Sgrehan 321221828Sgrehan /* 322221828Sgrehan * Update the used pointer, and signal an interrupt if allowed 323221828Sgrehan */ 324221828Sgrehan *hq->hq_used_idx = uidx; 325221828Sgrehan hq->hq_cur_aidx = aidx; 326221828Sgrehan 327221828Sgrehan if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 328221828Sgrehan sc->vsc_isr |= 1; 329221828Sgrehan pci_generate_msi(sc->vsc_pi, 0); 330221828Sgrehan } 331221828Sgrehan} 332221828Sgrehan 333221828Sgrehanstatic void 334221828Sgrehanpci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 335221828Sgrehan{ 336221828Sgrehan struct pci_vtnet_softc *sc = param; 337221828Sgrehan 338221828Sgrehan pthread_mutex_lock(&sc->vsc_mtx); 339221828Sgrehan pci_vtnet_tap_rx(sc); 340221828Sgrehan pthread_mutex_unlock(&sc->vsc_mtx); 341221828Sgrehan 342221828Sgrehan} 343221828Sgrehan 344221828Sgrehanstatic void 345221828Sgrehanpci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 346221828Sgrehan{ 347221828Sgrehan /* 348221828Sgrehan * A qnotify means that the rx process can now begin 349221828Sgrehan */ 350221828Sgrehan if (sc->vsc_rx_ready == 0) { 351221828Sgrehan sc->vsc_rx_ready = 1; 352221828Sgrehan } 353221828Sgrehan 354221828Sgrehan /* 355221828Sgrehan * If the rx queue was empty, attempt to receive a 356221828Sgrehan * packet that was previously blocked due to no rx bufs 357221828Sgrehan * available 358221828Sgrehan */ 359221828Sgrehan if (sc->vsc_rxpend) { 360221828Sgrehan WPRINTF(("vtnet: rx resumed\n\r")); 361221828Sgrehan sc->vsc_rxpend = 0; 362221828Sgrehan pci_vtnet_tap_rx(sc); 363221828Sgrehan } 364221828Sgrehan} 365221828Sgrehan 366221828Sgrehanstatic void 367221828Sgrehanpci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 368221828Sgrehan{ 369221828Sgrehan struct iovec iov[VTNET_MAXSEGS + 1]; 370221828Sgrehan struct virtio_desc *vd; 371221828Sgrehan struct virtio_used *vu; 372221828Sgrehan int i; 373221828Sgrehan int plen; 374221828Sgrehan int tlen; 375221828Sgrehan int uidx, aidx, didx; 376221828Sgrehan 377221828Sgrehan uidx = *hq->hq_used_idx; 378221828Sgrehan aidx = hq->hq_cur_aidx; 379221828Sgrehan didx = hq->hq_avail_ring[aidx % hq->hq_size]; 380221828Sgrehan assert(didx >= 0 && didx < hq->hq_size); 381221828Sgrehan 382221828Sgrehan vd = &hq->hq_dtable[didx]; 383221828Sgrehan 384221828Sgrehan /* 385221828Sgrehan * Run through the chain of descriptors, ignoring the 386221828Sgrehan * first header descriptor. However, include the header 387221828Sgrehan * length in the total length that will be put into the 388221828Sgrehan * used queue. 389221828Sgrehan */ 390221828Sgrehan tlen = vd->vd_len; 391221828Sgrehan vd = &hq->hq_dtable[vd->vd_next]; 392221828Sgrehan 393221828Sgrehan for (i = 0, plen = 0; 394221828Sgrehan i < VTNET_MAXSEGS; 395221828Sgrehan i++, vd = &hq->hq_dtable[vd->vd_next]) { 396221828Sgrehan iov[i].iov_base = paddr_guest2host(vd->vd_addr); 397221828Sgrehan iov[i].iov_len = vd->vd_len; 398221828Sgrehan plen += vd->vd_len; 399221828Sgrehan tlen += vd->vd_len; 400221828Sgrehan 401221828Sgrehan if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 402221828Sgrehan break; 403221828Sgrehan } 404221828Sgrehan assert(i < VTNET_MAXSEGS); 405221828Sgrehan 406221828Sgrehan DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 407221828Sgrehan pci_vtnet_tap_tx(sc, iov, i + 1, plen); 408221828Sgrehan 409221828Sgrehan /* 410221828Sgrehan * Return this chain back to the host 411221828Sgrehan */ 412221828Sgrehan vu = &hq->hq_used_ring[uidx % hq->hq_size]; 413221828Sgrehan vu->vu_idx = didx; 414221828Sgrehan vu->vu_tlen = tlen; 415221828Sgrehan hq->hq_cur_aidx = aidx + 1; 416221828Sgrehan *hq->hq_used_idx = uidx + 1; 417221828Sgrehan 418221828Sgrehan /* 419221828Sgrehan * Generate an interrupt if able 420221828Sgrehan */ 421221828Sgrehan if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 422221828Sgrehan sc->vsc_isr |= 1; 423221828Sgrehan pci_generate_msi(sc->vsc_pi, 0); 424221828Sgrehan } 425221828Sgrehan} 426221828Sgrehan 427221828Sgrehanstatic void 428221828Sgrehanpci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 429221828Sgrehan{ 430221828Sgrehan struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 431221828Sgrehan int i; 432221828Sgrehan int ndescs; 433221828Sgrehan 434221828Sgrehan /* 435221828Sgrehan * Calculate number of ring entries to process 436221828Sgrehan */ 437221828Sgrehan ndescs = hq_num_avail(hq); 438221828Sgrehan 439221828Sgrehan if (ndescs == 0) 440221828Sgrehan return; 441221828Sgrehan 442221828Sgrehan /* 443221828Sgrehan * Run through all the entries, placing them into iovecs and 444221828Sgrehan * sending when an end-of-packet is found 445221828Sgrehan */ 446221828Sgrehan for (i = 0; i < ndescs; i++) 447221828Sgrehan pci_vtnet_proctx(sc, hq); 448221828Sgrehan} 449221828Sgrehan 450221828Sgrehanstatic void 451221828Sgrehanpci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 452221828Sgrehan{ 453221828Sgrehan 454221828Sgrehan DPRINTF(("vtnet: control qnotify!\n\r")); 455221828Sgrehan} 456221828Sgrehan 457221828Sgrehanstatic void 458221828Sgrehanpci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 459221828Sgrehan{ 460221828Sgrehan struct vring_hqueue *hq; 461221828Sgrehan int qnum = sc->vsc_curq; 462221828Sgrehan 463221828Sgrehan assert(qnum < VTNET_MAXQ); 464221828Sgrehan 465221828Sgrehan sc->vsc_pfn[qnum] = pfn << VRING_PFN; 466221828Sgrehan 467221828Sgrehan /* 468221828Sgrehan * Set up host pointers to the various parts of the 469221828Sgrehan * queue 470221828Sgrehan */ 471221828Sgrehan hq = &sc->vsc_hq[qnum]; 472221828Sgrehan hq->hq_size = pci_vtnet_qsize(qnum); 473221828Sgrehan 474221828Sgrehan hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN); 475221828Sgrehan hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 476221828Sgrehan hq->hq_avail_idx = hq->hq_avail_flags + 1; 477221828Sgrehan hq->hq_avail_ring = hq->hq_avail_flags + 2; 478221828Sgrehan hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 479221828Sgrehan VRING_ALIGN); 480221828Sgrehan hq->hq_used_idx = hq->hq_used_flags + 1; 481221828Sgrehan hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 482221828Sgrehan 483221828Sgrehan /* 484221828Sgrehan * Initialize queue indexes 485221828Sgrehan */ 486221828Sgrehan hq->hq_cur_aidx = 0; 487221828Sgrehan} 488221828Sgrehan 489221828Sgrehanstatic int 490221828Sgrehanpci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 491221828Sgrehan{ 492221828Sgrehan MD5_CTX mdctx; 493221828Sgrehan unsigned char digest[16]; 494221828Sgrehan char nstr[80]; 495221828Sgrehan struct pci_vtnet_softc *sc; 496221828Sgrehan 497221828Sgrehan /* 498221828Sgrehan * Access to guest memory is required. Fail if 499221828Sgrehan * memory not mapped 500221828Sgrehan */ 501221828Sgrehan if (paddr_guest2host(0) == NULL) 502221828Sgrehan return (1); 503221828Sgrehan 504221828Sgrehan sc = malloc(sizeof(struct pci_vtnet_softc)); 505221828Sgrehan memset(sc, 0, sizeof(struct pci_vtnet_softc)); 506221828Sgrehan 507221828Sgrehan pi->pi_arg = sc; 508221828Sgrehan sc->vsc_pi = pi; 509221828Sgrehan 510221828Sgrehan pthread_mutex_init(&sc->vsc_mtx, NULL); 511221828Sgrehan 512221828Sgrehan /* 513221828Sgrehan * Attempt to open the tap device 514221828Sgrehan */ 515221828Sgrehan sc->vsc_tapfd = -1; 516221828Sgrehan if (opts != NULL) { 517221828Sgrehan char tbuf[80]; 518221828Sgrehan 519221828Sgrehan strcpy(tbuf, "/dev/"); 520242882Sneel strlcat(tbuf, opts, sizeof(tbuf)); 521221828Sgrehan 522221828Sgrehan sc->vsc_tapfd = open(tbuf, O_RDWR); 523221828Sgrehan if (sc->vsc_tapfd == -1) { 524221828Sgrehan WPRINTF(("open of tap device %s failed\n", tbuf)); 525221828Sgrehan } else { 526221828Sgrehan /* 527221828Sgrehan * Set non-blocking and register for read 528221828Sgrehan * notifications with the event loop 529221828Sgrehan */ 530221828Sgrehan int opt = 1; 531221828Sgrehan if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 532221828Sgrehan WPRINTF(("tap device O_NONBLOCK failed\n")); 533221828Sgrehan close(sc->vsc_tapfd); 534221828Sgrehan sc->vsc_tapfd = -1; 535221828Sgrehan } 536221828Sgrehan 537221828Sgrehan sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 538221828Sgrehan EVF_READ, 539221828Sgrehan pci_vtnet_tap_callback, 540221828Sgrehan sc); 541221828Sgrehan if (sc->vsc_mevp == NULL) { 542221828Sgrehan WPRINTF(("Could not register event\n")); 543221828Sgrehan close(sc->vsc_tapfd); 544221828Sgrehan sc->vsc_tapfd = -1; 545221828Sgrehan } 546221828Sgrehan } 547221828Sgrehan } 548221828Sgrehan 549221828Sgrehan /* 550221828Sgrehan * The MAC address is the standard NetApp OUI of 00-a0-98, 551244159Sgrehan * followed by an MD5 of the vm name. The slot/func number is 552244159Sgrehan * prepended to this for slots other than 1:0, so that 553244159Sgrehan * a bootloader can netboot from the equivalent of slot 1. 554221828Sgrehan */ 555244159Sgrehan if (pi->pi_slot == 1 && pi->pi_func == 0) { 556221828Sgrehan strncpy(nstr, vmname, sizeof(nstr)); 557221828Sgrehan } else { 558244159Sgrehan snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 559244159Sgrehan pi->pi_func, vmname); 560221828Sgrehan } 561221828Sgrehan 562221828Sgrehan MD5Init(&mdctx); 563221828Sgrehan MD5Update(&mdctx, nstr, strlen(nstr)); 564221828Sgrehan MD5Final(digest, &mdctx); 565221828Sgrehan 566221828Sgrehan sc->vsc_macaddr[0] = 0x00; 567221828Sgrehan sc->vsc_macaddr[1] = 0xa0; 568221828Sgrehan sc->vsc_macaddr[2] = 0x98; 569221828Sgrehan sc->vsc_macaddr[3] = digest[0]; 570221828Sgrehan sc->vsc_macaddr[4] = digest[1]; 571221828Sgrehan sc->vsc_macaddr[5] = digest[2]; 572221828Sgrehan 573221828Sgrehan /* initialize config space */ 574221828Sgrehan pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 575221828Sgrehan pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 576221828Sgrehan pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 577221828Sgrehan pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 578221828Sgrehan pci_emul_add_msicap(pi, 1); 579241744Sgrehan pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 580221828Sgrehan 581221828Sgrehan return (0); 582221828Sgrehan} 583221828Sgrehan 584221828Sgrehan/* 585221828Sgrehan * Function pointer array to handle queue notifications 586221828Sgrehan */ 587221828Sgrehanstatic void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 588221828Sgrehan pci_vtnet_ping_rxq, 589221828Sgrehan pci_vtnet_ping_txq, 590221828Sgrehan pci_vtnet_ping_ctlq 591221828Sgrehan}; 592221828Sgrehan 593221828Sgrehanstatic void 594241744Sgrehanpci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 595241744Sgrehan int baridx, uint64_t offset, int size, uint64_t value) 596221828Sgrehan{ 597221828Sgrehan struct pci_vtnet_softc *sc = pi->pi_arg; 598222830Sgrehan void *ptr; 599222830Sgrehan 600241744Sgrehan assert(baridx == 0); 601241744Sgrehan 602221828Sgrehan if (offset + size > VTNET_REGSZ) { 603241744Sgrehan DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 604221828Sgrehan offset, size)); 605221828Sgrehan return; 606221828Sgrehan } 607221828Sgrehan 608221828Sgrehan pthread_mutex_lock(&sc->vsc_mtx); 609221828Sgrehan 610221828Sgrehan switch (offset) { 611221828Sgrehan case VTCFG_R_GUESTCAP: 612221828Sgrehan assert(size == 4); 613221828Sgrehan sc->vsc_features = value & VTNET_S_HOSTCAPS; 614221828Sgrehan break; 615221828Sgrehan case VTCFG_R_PFN: 616221828Sgrehan assert(size == 4); 617221828Sgrehan pci_vtnet_ring_init(sc, value); 618221828Sgrehan break; 619221828Sgrehan case VTCFG_R_QSEL: 620221828Sgrehan assert(size == 2); 621221828Sgrehan assert(value < VTNET_MAXQ); 622221828Sgrehan sc->vsc_curq = value; 623221828Sgrehan break; 624221828Sgrehan case VTCFG_R_QNOTIFY: 625221828Sgrehan assert(size == 2); 626221828Sgrehan assert(value < VTNET_MAXQ); 627221828Sgrehan (*pci_vtnet_qnotify[value])(sc); 628221828Sgrehan break; 629221828Sgrehan case VTCFG_R_STATUS: 630221828Sgrehan assert(size == 1); 631221828Sgrehan pci_vtnet_update_status(sc, value); 632221828Sgrehan break; 633221828Sgrehan case VTNET_R_CFG0: 634221828Sgrehan case VTNET_R_CFG1: 635221828Sgrehan case VTNET_R_CFG2: 636221828Sgrehan case VTNET_R_CFG3: 637221828Sgrehan case VTNET_R_CFG4: 638221828Sgrehan case VTNET_R_CFG5: 639222830Sgrehan assert((size + offset) <= (VTNET_R_CFG5 + 1)); 640222830Sgrehan ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 641221828Sgrehan /* 642221828Sgrehan * The driver is allowed to change the MAC address 643221828Sgrehan */ 644221828Sgrehan sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 645222830Sgrehan if (size == 1) { 646222830Sgrehan *(uint8_t *) ptr = value; 647222830Sgrehan } else if (size == 2) { 648222830Sgrehan *(uint16_t *) ptr = value; 649222830Sgrehan } else { 650222830Sgrehan *(uint32_t *) ptr = value; 651222830Sgrehan } 652221828Sgrehan break; 653221828Sgrehan case VTCFG_R_HOSTCAP: 654221828Sgrehan case VTCFG_R_QNUM: 655221828Sgrehan case VTCFG_R_ISR: 656221828Sgrehan case VTNET_R_CFG6: 657221828Sgrehan case VTNET_R_CFG7: 658241744Sgrehan DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 659221828Sgrehan break; 660221828Sgrehan default: 661241744Sgrehan DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 662221828Sgrehan value = 0; 663221828Sgrehan break; 664221828Sgrehan } 665221828Sgrehan 666221828Sgrehan pthread_mutex_unlock(&sc->vsc_mtx); 667221828Sgrehan} 668221828Sgrehan 669241744Sgrehanuint64_t 670241744Sgrehanpci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 671241744Sgrehan int baridx, uint64_t offset, int size) 672221828Sgrehan{ 673221828Sgrehan struct pci_vtnet_softc *sc = pi->pi_arg; 674222830Sgrehan void *ptr; 675241744Sgrehan uint64_t value; 676221828Sgrehan 677241744Sgrehan assert(baridx == 0); 678241744Sgrehan 679221828Sgrehan if (offset + size > VTNET_REGSZ) { 680241744Sgrehan DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 681221828Sgrehan offset, size)); 682221828Sgrehan return (0); 683221828Sgrehan } 684221828Sgrehan 685221828Sgrehan pthread_mutex_lock(&sc->vsc_mtx); 686221828Sgrehan 687221828Sgrehan switch (offset) { 688221828Sgrehan case VTCFG_R_HOSTCAP: 689221828Sgrehan assert(size == 4); 690221828Sgrehan value = VTNET_S_HOSTCAPS; 691221828Sgrehan break; 692221828Sgrehan case VTCFG_R_GUESTCAP: 693221828Sgrehan assert(size == 4); 694221828Sgrehan value = sc->vsc_features; /* XXX never read ? */ 695221828Sgrehan break; 696221828Sgrehan case VTCFG_R_PFN: 697221828Sgrehan assert(size == 4); 698221828Sgrehan value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 699221828Sgrehan break; 700221828Sgrehan case VTCFG_R_QNUM: 701221828Sgrehan assert(size == 2); 702221828Sgrehan value = pci_vtnet_qsize(sc->vsc_curq); 703221828Sgrehan break; 704221828Sgrehan case VTCFG_R_QSEL: 705221828Sgrehan assert(size == 2); 706221828Sgrehan value = sc->vsc_curq; /* XXX never read ? */ 707221828Sgrehan break; 708221828Sgrehan case VTCFG_R_QNOTIFY: 709221828Sgrehan assert(size == 2); 710221828Sgrehan value = sc->vsc_curq; /* XXX never read ? */ 711221828Sgrehan break; 712221828Sgrehan case VTCFG_R_STATUS: 713221828Sgrehan assert(size == 1); 714221828Sgrehan value = sc->vsc_status; 715221828Sgrehan break; 716221828Sgrehan case VTCFG_R_ISR: 717221828Sgrehan assert(size == 1); 718221828Sgrehan value = sc->vsc_isr; 719221828Sgrehan sc->vsc_isr = 0; /* a read clears this flag */ 720221828Sgrehan break; 721221828Sgrehan case VTNET_R_CFG0: 722221828Sgrehan case VTNET_R_CFG1: 723221828Sgrehan case VTNET_R_CFG2: 724221828Sgrehan case VTNET_R_CFG3: 725221828Sgrehan case VTNET_R_CFG4: 726221828Sgrehan case VTNET_R_CFG5: 727222830Sgrehan assert((size + offset) <= (VTNET_R_CFG5 + 1)); 728222830Sgrehan ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 729222830Sgrehan if (size == 1) { 730222830Sgrehan value = *(uint8_t *) ptr; 731222830Sgrehan } else if (size == 2) { 732222830Sgrehan value = *(uint16_t *) ptr; 733222830Sgrehan } else { 734222830Sgrehan value = *(uint32_t *) ptr; 735222830Sgrehan } 736221828Sgrehan break; 737221828Sgrehan case VTNET_R_CFG6: 738222830Sgrehan assert(size != 4); 739222830Sgrehan value = 0x01; /* XXX link always up */ 740221828Sgrehan break; 741221828Sgrehan case VTNET_R_CFG7: 742221828Sgrehan assert(size == 1); 743222830Sgrehan value = 0; /* XXX link status in LSB */ 744221828Sgrehan break; 745221828Sgrehan default: 746241744Sgrehan DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 747221828Sgrehan value = 0; 748221828Sgrehan break; 749221828Sgrehan } 750221828Sgrehan 751221828Sgrehan pthread_mutex_unlock(&sc->vsc_mtx); 752221828Sgrehan 753221828Sgrehan return (value); 754221828Sgrehan} 755221828Sgrehan 756221828Sgrehanstruct pci_devemu pci_de_vnet = { 757241744Sgrehan .pe_emu = "virtio-net", 758241744Sgrehan .pe_init = pci_vtnet_init, 759241744Sgrehan .pe_barwrite = pci_vtnet_write, 760241744Sgrehan .pe_barread = pci_vtnet_read 761221828Sgrehan}; 762221828SgrehanPCI_EMUL_SET(pci_de_vnet); 763