pci_virtio_net.c revision 248368
1254939Sdes/*- 2254939Sdes * Copyright (c) 2011 NetApp, Inc. 3254939Sdes * All rights reserved. 4254939Sdes * 5254939Sdes * Redistribution and use in source and binary forms, with or without 6254939Sdes * modification, are permitted provided that the following conditions 7254939Sdes * are met: 8254939Sdes * 1. Redistributions of source code must retain the above copyright 9254939Sdes * notice, this list of conditions and the following disclaimer. 10255403Sdes * 2. Redistributions in binary form must reproduce the above copyright 11255403Sdes * notice, this list of conditions and the following disclaimer in the 12254939Sdes * documentation and/or other materials provided with the distribution. 13254939Sdes * 14254939Sdes * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15254939Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16254939Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17254939Sdes * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18255403Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19255403Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20254939Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21254939Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22254939Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23254939Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24254939Sdes * SUCH DAMAGE. 25254939Sdes * 26254939Sdes * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 248368 2013-03-16 05:40:29Z neel $ 27254939Sdes */ 28254939Sdes 29254939Sdes#include <sys/cdefs.h> 30254939Sdes__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 248368 2013-03-16 05:40:29Z neel $"); 31254939Sdes 32254939Sdes#include <sys/param.h> 33254939Sdes#include <sys/linker_set.h> 34254939Sdes#include <sys/select.h> 35254939Sdes#include <sys/uio.h> 36254939Sdes#include <sys/ioctl.h> 37254939Sdes 38254939Sdes#include <errno.h> 39254939Sdes#include <fcntl.h> 40254939Sdes#include <stdio.h> 41254939Sdes#include <stdlib.h> 42254939Sdes#include <stdint.h> 43254939Sdes#include <string.h> 44254939Sdes#include <strings.h> 45254939Sdes#include <unistd.h> 46254939Sdes#include <assert.h> 47254939Sdes#include <md5.h> 48254939Sdes#include <pthread.h> 49254939Sdes 50254939Sdes#include "bhyverun.h" 51254939Sdes#include "pci_emul.h" 52254939Sdes#include "mevent.h" 53254939Sdes#include "virtio.h" 54254939Sdes 55254939Sdes#define VTNET_RINGSZ 256 56254939Sdes 57254939Sdes#define VTNET_MAXSEGS 32 58254939Sdes 59254939Sdes/* 60254939Sdes * PCI config-space register offsets 61254939Sdes */ 62254939Sdes#define VTNET_R_CFG0 24 63254939Sdes#define VTNET_R_CFG1 25 64254939Sdes#define VTNET_R_CFG2 26 65254939Sdes#define VTNET_R_CFG3 27 66254939Sdes#define VTNET_R_CFG4 28 67254939Sdes#define VTNET_R_CFG5 29 68254939Sdes#define VTNET_R_CFG6 30 69254939Sdes#define VTNET_R_CFG7 31 70254939Sdes#define VTNET_R_MAX 31 71254939Sdes 72254939Sdes#define VTNET_REGSZ VTNET_R_MAX+1 73254939Sdes 74254939Sdes/* 75254939Sdes * Host capabilities 76254939Sdes */ 77254939Sdes#define VTNET_S_HOSTCAPS \ 78254939Sdes ( 0x00000020 | /* host supplies MAC */ \ 79254939Sdes 0x00008000 | /* host can merge Rx buffers */ \ 80254939Sdes 0x00010000 ) /* config status available */ 81254939Sdes 82254939Sdes/* 83254939Sdes * Queue definitions. 84254939Sdes */ 85254939Sdes#define VTNET_RXQ 0 86254939Sdes#define VTNET_TXQ 1 87254939Sdes#define VTNET_CTLQ 2 88254939Sdes 89254939Sdes#define VTNET_MAXQ 3 90254939Sdes 91254939Sdesstatic int use_msix = 1; 92254939Sdes 93254939Sdesstruct vring_hqueue { 94254939Sdes /* Internal state */ 95254939Sdes uint16_t hq_size; 96254939Sdes uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 97254939Sdes 98254939Sdes /* Host-context pointers to the queue */ 99254939Sdes struct virtio_desc *hq_dtable; 100254939Sdes uint16_t *hq_avail_flags; 101254939Sdes uint16_t *hq_avail_idx; /* monotonically increasing */ 102254939Sdes uint16_t *hq_avail_ring; 103254939Sdes 104254939Sdes uint16_t *hq_used_flags; 105254939Sdes uint16_t *hq_used_idx; /* monotonically increasing */ 106254939Sdes struct virtio_used *hq_used_ring; 107254939Sdes}; 108254939Sdes 109254939Sdes/* 110254939Sdes * Fixed network header size 111254939Sdes */ 112254939Sdesstruct virtio_net_rxhdr { 113254939Sdes uint8_t vrh_flags; 114254939Sdes uint8_t vrh_gso_type; 115254939Sdes uint16_t vrh_hdr_len; 116254939Sdes uint16_t vrh_gso_size; 117254939Sdes uint16_t vrh_csum_start; 118254939Sdes uint16_t vrh_csum_offset; 119254939Sdes uint16_t vrh_bufs; 120254939Sdes} __packed; 121254939Sdes 122254939Sdes/* 123254939Sdes * Debug printf 124254939Sdes */ 125254939Sdesstatic int pci_vtnet_debug; 126254939Sdes#define DPRINTF(params) if (pci_vtnet_debug) printf params 127254939Sdes#define WPRINTF(params) printf params 128254939Sdes 129254939Sdes/* 130254939Sdes * Per-device softc 131254939Sdes */ 132254939Sdesstruct pci_vtnet_softc { 133254939Sdes struct pci_devinst *vsc_pi; 134254939Sdes pthread_mutex_t vsc_mtx; 135254939Sdes struct mevent *vsc_mevp; 136254939Sdes 137254939Sdes int vsc_curq; 138254939Sdes int vsc_status; 139254939Sdes int vsc_isr; 140254939Sdes int vsc_tapfd; 141254939Sdes int vsc_rx_ready; 142254939Sdes int vsc_rxpend; 143254939Sdes 144254939Sdes uint32_t vsc_features; 145254939Sdes uint8_t vsc_macaddr[6]; 146254939Sdes 147254939Sdes uint64_t vsc_pfn[VTNET_MAXQ]; 148254939Sdes struct vring_hqueue vsc_hq[VTNET_MAXQ]; 149254939Sdes uint16_t vsc_msix_table_idx[VTNET_MAXQ]; 150254939Sdes}; 151254939Sdes 152254939Sdes/* 153254939Sdes * Return the size of IO BAR that maps virtio header and device specific 154254939Sdes * region. The size would vary depending on whether MSI-X is enabled or 155254939Sdes * not. 156254939Sdes */ 157254939Sdesstatic uint64_t 158254939Sdespci_vtnet_iosize(struct pci_devinst *pi) 159254939Sdes{ 160254939Sdes if (pci_msix_enabled(pi)) 161254939Sdes return (VTNET_REGSZ); 162254939Sdes else 163254939Sdes return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 164254939Sdes} 165254939Sdes 166254939Sdes/* 167254939Sdes * Return the number of available descriptors in the vring taking care 168254939Sdes * of the 16-bit index wraparound. 169254939Sdes */ 170254939Sdesstatic int 171254939Sdeshq_num_avail(struct vring_hqueue *hq) 172254939Sdes{ 173254939Sdes uint16_t ndesc; 174254939Sdes 175254939Sdes /* 176254939Sdes * We're just computing (a-b) in GF(216). 177254939Sdes * 178254939Sdes * The only glitch here is that in standard C, 179254939Sdes * uint16_t promotes to (signed) int when int has 180254939Sdes * more than 16 bits (pretty much always now), so 181254939Sdes * we have to force it back to unsigned. 182254939Sdes */ 183254939Sdes ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; 184254939Sdes 185254939Sdes assert(ndesc <= hq->hq_size); 186254939Sdes 187254939Sdes return (ndesc); 188254939Sdes} 189254939Sdes 190254939Sdesstatic uint16_t 191254939Sdespci_vtnet_qsize(int qnum) 192254939Sdes{ 193254939Sdes /* XXX no ctl queue currently */ 194254939Sdes if (qnum == VTNET_CTLQ) { 195254939Sdes return (0); 196254939Sdes } 197254939Sdes 198254939Sdes /* XXX fixed currently. Maybe different for tx/rx/ctl */ 199254939Sdes return (VTNET_RINGSZ); 200254939Sdes} 201254939Sdes 202254939Sdesstatic void 203254939Sdespci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) 204255403Sdes{ 205254939Sdes struct vring_hqueue *hq; 206255403Sdes 207255403Sdes assert(ring < VTNET_MAXQ); 208255403Sdes 209254939Sdes hq = &sc->vsc_hq[ring]; 210254939Sdes 211254939Sdes /* 212254939Sdes * Reset all soft state 213254939Sdes */ 214254939Sdes hq->hq_cur_aidx = 0; 215254939Sdes} 216254939Sdes 217254939Sdesstatic void 218254939Sdespci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 219254939Sdes{ 220254939Sdes 221254939Sdes if (value == 0) { 222254939Sdes DPRINTF(("vtnet: device reset requested !\n")); 223254939Sdes pci_vtnet_ring_reset(sc, VTNET_RXQ); 224254939Sdes pci_vtnet_ring_reset(sc, VTNET_TXQ); 225254939Sdes sc->vsc_rx_ready = 0; 226254939Sdes } 227254939Sdes 228254939Sdes sc->vsc_status = value; 229254939Sdes} 230254939Sdes 231254939Sdes/* 232254939Sdes * Called to send a buffer chain out to the tap device 233254939Sdes */ 234254939Sdesstatic void 235254939Sdespci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 236254939Sdes int len) 237254939Sdes{ 238254939Sdes char pad[60]; 239254939Sdes 240254939Sdes if (sc->vsc_tapfd == -1) 241254939Sdes return; 242254939Sdes 243254939Sdes /* 244254939Sdes * If the length is < 60, pad out to that and add the 245254939Sdes * extra zero'd segment to the iov. It is guaranteed that 246254939Sdes * there is always an extra iov available by the caller. 247254939Sdes */ 248254939Sdes if (len < 60) { 249254939Sdes memset(pad, 0, 60 - len); 250254939Sdes iov[iovcnt].iov_base = pad; 251254939Sdes iov[iovcnt].iov_len = 60 - len; 252254939Sdes iovcnt++; 253254939Sdes } 254254939Sdes (void) writev(sc->vsc_tapfd, iov, iovcnt); 255254939Sdes} 256254939Sdes 257254939Sdes/* 258254939Sdes * Called when there is read activity on the tap file descriptor. 259254939Sdes * Each buffer posted by the guest is assumed to be able to contain 260254939Sdes * an entire ethernet frame + rx header. 261254939Sdes * MP note: the dummybuf is only used for discarding frames, so there 262254939Sdes * is no need for it to be per-vtnet or locked. 263254939Sdes */ 264254939Sdesstatic uint8_t dummybuf[2048]; 265254939Sdes 266254939Sdesstatic void 267254939Sdespci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 268254939Sdes{ 269254939Sdes struct virtio_desc *vd; 270254939Sdes struct virtio_used *vu; 271254939Sdes struct vring_hqueue *hq; 272254939Sdes struct virtio_net_rxhdr *vrx; 273254939Sdes uint8_t *buf; 274254939Sdes int i; 275254939Sdes int len; 276254939Sdes int ndescs; 277254939Sdes int didx, uidx, aidx; /* descriptor, avail and used index */ 278254939Sdes 279254939Sdes /* 280254939Sdes * Should never be called without a valid tap fd 281254939Sdes */ 282254939Sdes assert(sc->vsc_tapfd != -1); 283254939Sdes 284254939Sdes /* 285254939Sdes * But, will be called when the rx ring hasn't yet 286254939Sdes * been set up. 287254939Sdes */ 288254939Sdes if (sc->vsc_rx_ready == 0) { 289254939Sdes /* 290254939Sdes * Drop the packet and try later. 291254939Sdes */ 292254939Sdes (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 293254939Sdes return; 294254939Sdes } 295254939Sdes 296254939Sdes /* 297254939Sdes * Calculate the number of available rx buffers 298254939Sdes */ 299254939Sdes hq = &sc->vsc_hq[VTNET_RXQ]; 300254939Sdes 301254939Sdes ndescs = hq_num_avail(hq); 302254939Sdes 303254939Sdes if (ndescs == 0) { 304254939Sdes /* 305254939Sdes * Need to wait for host notification to read 306254939Sdes */ 307254939Sdes if (sc->vsc_rxpend == 0) { 308254939Sdes WPRINTF(("vtnet: no rx descriptors !\n")); 309254939Sdes sc->vsc_rxpend = 1; 310254939Sdes } 311254939Sdes 312254939Sdes /* 313254939Sdes * Drop the packet and try later 314254939Sdes */ 315254939Sdes (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 316254939Sdes return; 317254939Sdes } 318254939Sdes 319254939Sdes aidx = hq->hq_cur_aidx; 320254939Sdes uidx = *hq->hq_used_idx; 321254939Sdes for (i = 0; i < ndescs; i++) { 322254939Sdes /* 323254939Sdes * 'aidx' indexes into the an array of descriptor indexes 324254939Sdes */ 325254939Sdes didx = hq->hq_avail_ring[aidx % hq->hq_size]; 326254939Sdes assert(didx >= 0 && didx < hq->hq_size); 327254939Sdes 328254939Sdes vd = &hq->hq_dtable[didx]; 329254939Sdes 330254939Sdes /* 331254939Sdes * Get a pointer to the rx header, and use the 332254939Sdes * data immediately following it for the packet buffer. 333254939Sdes */ 334254939Sdes vrx = paddr_guest2host(vd->vd_addr, vd->vd_len); 335254939Sdes buf = (uint8_t *)(vrx + 1); 336254939Sdes 337254939Sdes len = read(sc->vsc_tapfd, buf, 338254939Sdes vd->vd_len - sizeof(struct virtio_net_rxhdr)); 339254939Sdes 340254939Sdes if (len < 0 && errno == EWOULDBLOCK) { 341254939Sdes break; 342254939Sdes } 343254939Sdes 344254939Sdes /* 345254939Sdes * The only valid field in the rx packet header is the 346254939Sdes * number of buffers, which is always 1 without TSO 347254939Sdes * support. 348254939Sdes */ 349254939Sdes memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 350254939Sdes vrx->vrh_bufs = 1; 351254939Sdes 352254939Sdes /* 353254939Sdes * Write this descriptor into the used ring 354254939Sdes */ 355254939Sdes vu = &hq->hq_used_ring[uidx % hq->hq_size]; 356254939Sdes vu->vu_idx = didx; 357254939Sdes vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 358254939Sdes uidx++; 359254939Sdes aidx++; 360254939Sdes } 361254939Sdes 362254939Sdes /* 363254939Sdes * Update the used pointer, and signal an interrupt if allowed 364254939Sdes */ 365254939Sdes *hq->hq_used_idx = uidx; 366254939Sdes hq->hq_cur_aidx = aidx; 367254939Sdes 368254939Sdes if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 369254939Sdes if (use_msix) { 370254939Sdes pci_generate_msix(sc->vsc_pi, 371254939Sdes sc->vsc_msix_table_idx[VTNET_RXQ]); 372254939Sdes } else { 373254939Sdes sc->vsc_isr |= 1; 374254939Sdes pci_generate_msi(sc->vsc_pi, 0); 375254939Sdes } 376254939Sdes } 377254939Sdes} 378254939Sdes 379254939Sdesstatic void 380254939Sdespci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 381254939Sdes{ 382254939Sdes struct pci_vtnet_softc *sc = param; 383254939Sdes 384254939Sdes pthread_mutex_lock(&sc->vsc_mtx); 385254939Sdes pci_vtnet_tap_rx(sc); 386254939Sdes pthread_mutex_unlock(&sc->vsc_mtx); 387254939Sdes 388254939Sdes} 389254939Sdes 390254939Sdesstatic void 391254939Sdespci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 392254939Sdes{ 393254939Sdes /* 394254939Sdes * A qnotify means that the rx process can now begin 395254939Sdes */ 396254939Sdes if (sc->vsc_rx_ready == 0) { 397254939Sdes sc->vsc_rx_ready = 1; 398254939Sdes } 399254939Sdes 400254939Sdes /* 401254939Sdes * If the rx queue was empty, attempt to receive a 402254939Sdes * packet that was previously blocked due to no rx bufs 403254939Sdes * available 404254939Sdes */ 405254939Sdes if (sc->vsc_rxpend) { 406254939Sdes WPRINTF(("vtnet: rx resumed\n\r")); 407254939Sdes sc->vsc_rxpend = 0; 408254939Sdes pci_vtnet_tap_rx(sc); 409254939Sdes } 410254939Sdes} 411254939Sdes 412254939Sdesstatic void 413254939Sdespci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 414254939Sdes{ 415254939Sdes struct iovec iov[VTNET_MAXSEGS + 1]; 416254939Sdes struct virtio_desc *vd; 417254939Sdes struct virtio_used *vu; 418254939Sdes int i; 419254939Sdes int plen; 420254939Sdes int tlen; 421254939Sdes int uidx, aidx, didx; 422254939Sdes 423254939Sdes uidx = *hq->hq_used_idx; 424254939Sdes aidx = hq->hq_cur_aidx; 425254939Sdes didx = hq->hq_avail_ring[aidx % hq->hq_size]; 426254939Sdes assert(didx >= 0 && didx < hq->hq_size); 427254939Sdes 428254939Sdes vd = &hq->hq_dtable[didx]; 429254939Sdes 430254939Sdes /* 431254939Sdes * Run through the chain of descriptors, ignoring the 432254939Sdes * first header descriptor. However, include the header 433254939Sdes * length in the total length that will be put into the 434254939Sdes * used queue. 435254939Sdes */ 436254939Sdes tlen = vd->vd_len; 437254939Sdes vd = &hq->hq_dtable[vd->vd_next]; 438254939Sdes 439254939Sdes for (i = 0, plen = 0; 440254939Sdes i < VTNET_MAXSEGS; 441254939Sdes i++, vd = &hq->hq_dtable[vd->vd_next]) { 442254939Sdes iov[i].iov_base = paddr_guest2host(vd->vd_addr, vd->vd_len); 443254939Sdes iov[i].iov_len = vd->vd_len; 444254939Sdes plen += vd->vd_len; 445254939Sdes tlen += vd->vd_len; 446254939Sdes 447254939Sdes if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 448254939Sdes break; 449254939Sdes } 450254939Sdes assert(i < VTNET_MAXSEGS); 451254939Sdes 452254939Sdes DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 453254939Sdes pci_vtnet_tap_tx(sc, iov, i + 1, plen); 454254939Sdes 455254939Sdes /* 456254939Sdes * Return this chain back to the host 457254939Sdes */ 458254939Sdes vu = &hq->hq_used_ring[uidx % hq->hq_size]; 459254939Sdes vu->vu_idx = didx; 460254939Sdes vu->vu_tlen = tlen; 461254939Sdes hq->hq_cur_aidx = aidx + 1; 462254939Sdes *hq->hq_used_idx = uidx + 1; 463254939Sdes 464255403Sdes /* 465255403Sdes * Generate an interrupt if able 466254939Sdes */ 467254939Sdes if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 468254939Sdes if (use_msix) { 469254939Sdes pci_generate_msix(sc->vsc_pi, 470254939Sdes sc->vsc_msix_table_idx[VTNET_TXQ]); 471254939Sdes } else { 472254939Sdes sc->vsc_isr |= 1; 473254939Sdes pci_generate_msi(sc->vsc_pi, 0); 474254939Sdes } 475254939Sdes } 476254939Sdes} 477254939Sdes 478254939Sdesstatic void 479254939Sdespci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 480254939Sdes{ 481254939Sdes struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 482254939Sdes int i; 483254939Sdes int ndescs; 484254939Sdes 485254939Sdes /* 486254939Sdes * Calculate number of ring entries to process 487254939Sdes */ 488254939Sdes ndescs = hq_num_avail(hq); 489254939Sdes 490254939Sdes if (ndescs == 0) 491254939Sdes return; 492254939Sdes 493254939Sdes /* 494254939Sdes * Run through all the entries, placing them into iovecs and 495254939Sdes * sending when an end-of-packet is found 496254939Sdes */ 497254939Sdes for (i = 0; i < ndescs; i++) 498254939Sdes pci_vtnet_proctx(sc, hq); 499254939Sdes} 500254939Sdes 501254939Sdesstatic void 502254939Sdespci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 503254939Sdes{ 504254939Sdes 505254939Sdes DPRINTF(("vtnet: control qnotify!\n\r")); 506254939Sdes} 507254939Sdes 508254939Sdesstatic void 509254939Sdespci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 510254939Sdes{ 511254939Sdes struct vring_hqueue *hq; 512254939Sdes int qnum = sc->vsc_curq; 513254939Sdes 514254939Sdes assert(qnum < VTNET_MAXQ); 515254939Sdes 516254939Sdes sc->vsc_pfn[qnum] = pfn << VRING_PFN; 517254939Sdes 518254939Sdes /* 519254939Sdes * Set up host pointers to the various parts of the 520254939Sdes * queue 521254939Sdes */ 522254939Sdes hq = &sc->vsc_hq[qnum]; 523254939Sdes hq->hq_size = pci_vtnet_qsize(qnum); 524254939Sdes 525254939Sdes hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN, 526254939Sdes vring_size(hq->hq_size)); 527254939Sdes hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 528254939Sdes hq->hq_avail_idx = hq->hq_avail_flags + 1; 529254939Sdes hq->hq_avail_ring = hq->hq_avail_flags + 2; 530254939Sdes hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 531254939Sdes VRING_ALIGN); 532254939Sdes hq->hq_used_idx = hq->hq_used_flags + 1; 533254939Sdes hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 534254939Sdes 535254939Sdes /* 536254939Sdes * Initialize queue indexes 537254939Sdes */ 538254939Sdes hq->hq_cur_aidx = 0; 539254939Sdes} 540254939Sdes 541254939Sdesstatic int 542254939Sdespci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 543254939Sdes{ 544254939Sdes MD5_CTX mdctx; 545254939Sdes unsigned char digest[16]; 546254939Sdes char nstr[80]; 547254939Sdes struct pci_vtnet_softc *sc; 548254939Sdes const char *env_msi; 549254939Sdes 550254939Sdes sc = malloc(sizeof(struct pci_vtnet_softc)); 551254939Sdes memset(sc, 0, sizeof(struct pci_vtnet_softc)); 552254939Sdes 553254939Sdes pi->pi_arg = sc; 554254939Sdes sc->vsc_pi = pi; 555254939Sdes 556254939Sdes pthread_mutex_init(&sc->vsc_mtx, NULL); 557254939Sdes 558254939Sdes /* 559254939Sdes * Use MSI if set by user 560254939Sdes */ 561254939Sdes if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { 562254939Sdes if (strcasecmp(env_msi, "yes") == 0) 563254939Sdes use_msix = 0; 564254939Sdes } 565254939Sdes 566254939Sdes /* 567254939Sdes * Attempt to open the tap device 568254939Sdes */ 569254939Sdes sc->vsc_tapfd = -1; 570254939Sdes if (opts != NULL) { 571254939Sdes char tbuf[80]; 572254939Sdes 573254939Sdes strcpy(tbuf, "/dev/"); 574254939Sdes strlcat(tbuf, opts, sizeof(tbuf)); 575254939Sdes 576254939Sdes sc->vsc_tapfd = open(tbuf, O_RDWR); 577254939Sdes if (sc->vsc_tapfd == -1) { 578254939Sdes WPRINTF(("open of tap device %s failed\n", tbuf)); 579254939Sdes } else { 580254939Sdes /* 581254939Sdes * Set non-blocking and register for read 582254939Sdes * notifications with the event loop 583254939Sdes */ 584254939Sdes int opt = 1; 585254939Sdes if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 586254939Sdes WPRINTF(("tap device O_NONBLOCK failed\n")); 587254939Sdes close(sc->vsc_tapfd); 588254939Sdes sc->vsc_tapfd = -1; 589254939Sdes } 590254939Sdes 591254939Sdes sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 592254939Sdes EVF_READ, 593254939Sdes pci_vtnet_tap_callback, 594254939Sdes sc); 595254939Sdes if (sc->vsc_mevp == NULL) { 596254939Sdes WPRINTF(("Could not register event\n")); 597254939Sdes close(sc->vsc_tapfd); 598254939Sdes sc->vsc_tapfd = -1; 599254939Sdes } 600254939Sdes } 601254939Sdes } 602254939Sdes 603254939Sdes /* 604254939Sdes * The MAC address is the standard NetApp OUI of 00-a0-98, 605254939Sdes * followed by an MD5 of the vm name. The slot/func number is 606254939Sdes * prepended to this for slots other than 1:0, so that 607254939Sdes * a bootloader can netboot from the equivalent of slot 1. 608254939Sdes */ 609254939Sdes if (pi->pi_slot == 1 && pi->pi_func == 0) { 610254939Sdes strncpy(nstr, vmname, sizeof(nstr)); 611254939Sdes } else { 612254939Sdes snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 613254939Sdes pi->pi_func, vmname); 614254939Sdes } 615254939Sdes 616254939Sdes MD5Init(&mdctx); 617254939Sdes MD5Update(&mdctx, nstr, strlen(nstr)); 618254939Sdes MD5Final(digest, &mdctx); 619254939Sdes 620254939Sdes sc->vsc_macaddr[0] = 0x00; 621254939Sdes sc->vsc_macaddr[1] = 0xa0; 622254939Sdes sc->vsc_macaddr[2] = 0x98; 623254939Sdes sc->vsc_macaddr[3] = digest[0]; 624254939Sdes sc->vsc_macaddr[4] = digest[1]; 625254939Sdes sc->vsc_macaddr[5] = digest[2]; 626254939Sdes 627254939Sdes /* initialize config space */ 628254939Sdes pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 629254939Sdes pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 630254939Sdes pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 631254939Sdes pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 632254939Sdes 633254939Sdes if (use_msix) { 634254939Sdes /* MSI-X support */ 635254939Sdes int i; 636254939Sdes 637254939Sdes for (i = 0; i < VTNET_MAXQ; i++) 638254939Sdes sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 639254939Sdes 640254939Sdes /* 641254939Sdes * BAR 1 used to map MSI-X table and PBA 642254939Sdes */ 643254939Sdes if (pci_emul_add_msixcap(pi, VTNET_MAXQ, 1)) 644254939Sdes return (1); 645254939Sdes } else { 646254939Sdes /* MSI support */ 647254939Sdes pci_emul_add_msicap(pi, 1); 648254939Sdes } 649254939Sdes 650254939Sdes pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 651254939Sdes 652254939Sdes return (0); 653254939Sdes} 654254939Sdes 655254939Sdes/* 656254939Sdes * Function pointer array to handle queue notifications 657254939Sdes */ 658254939Sdesstatic void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 659254939Sdes pci_vtnet_ping_rxq, 660254939Sdes pci_vtnet_ping_txq, 661254939Sdes pci_vtnet_ping_ctlq 662254939Sdes}; 663254939Sdes 664254939Sdesstatic uint64_t 665254939Sdesvtnet_adjust_offset(struct pci_devinst *pi, uint64_t offset) 666254939Sdes{ 667254939Sdes /* 668254939Sdes * Device specific offsets used by guest would change based on 669254939Sdes * whether MSI-X capability is enabled or not 670254939Sdes */ 671254939Sdes if (!pci_msix_enabled(pi)) { 672254939Sdes if (offset >= VTCFG_R_MSIX) 673254939Sdes return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 674254939Sdes } 675254939Sdes 676254939Sdes return (offset); 677254939Sdes} 678254939Sdes 679254939Sdesstatic void 680254939Sdespci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 681254939Sdes int baridx, uint64_t offset, int size, uint64_t value) 682254939Sdes{ 683254939Sdes struct pci_vtnet_softc *sc = pi->pi_arg; 684254939Sdes void *ptr; 685254939Sdes 686254939Sdes if (use_msix) { 687254939Sdes if (baridx == pci_msix_table_bar(pi) || 688254939Sdes baridx == pci_msix_pba_bar(pi)) { 689254939Sdes pci_emul_msix_twrite(pi, offset, size, value); 690254939Sdes return; 691254939Sdes } 692254939Sdes } 693254939Sdes 694254939Sdes assert(baridx == 0); 695254939Sdes 696254939Sdes if (offset + size > pci_vtnet_iosize(pi)) { 697254939Sdes DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 698254939Sdes offset, size)); 699254939Sdes return; 700254939Sdes } 701254939Sdes 702254939Sdes pthread_mutex_lock(&sc->vsc_mtx); 703254939Sdes 704254939Sdes offset = vtnet_adjust_offset(pi, offset); 705254939Sdes 706254939Sdes switch (offset) { 707254939Sdes case VTCFG_R_GUESTCAP: 708254939Sdes assert(size == 4); 709254939Sdes sc->vsc_features = value & VTNET_S_HOSTCAPS; 710254939Sdes break; 711254939Sdes case VTCFG_R_PFN: 712254939Sdes assert(size == 4); 713254939Sdes pci_vtnet_ring_init(sc, value); 714254939Sdes break; 715254939Sdes case VTCFG_R_QSEL: 716254939Sdes assert(size == 2); 717254939Sdes assert(value < VTNET_MAXQ); 718254939Sdes sc->vsc_curq = value; 719254939Sdes break; 720254939Sdes case VTCFG_R_QNOTIFY: 721254939Sdes assert(size == 2); 722254939Sdes assert(value < VTNET_MAXQ); 723254939Sdes (*pci_vtnet_qnotify[value])(sc); 724254939Sdes break; 725254939Sdes case VTCFG_R_STATUS: 726254939Sdes assert(size == 1); 727254939Sdes pci_vtnet_update_status(sc, value); 728254939Sdes break; 729254939Sdes case VTCFG_R_CFGVEC: 730254939Sdes assert(size == 2); 731254939Sdes sc->vsc_msix_table_idx[VTNET_CTLQ] = value; 732254939Sdes break; 733254939Sdes case VTCFG_R_QVEC: 734254939Sdes assert(size == 2); 735254939Sdes assert(sc->vsc_curq != VTNET_CTLQ); 736254939Sdes sc->vsc_msix_table_idx[sc->vsc_curq] = value; 737254939Sdes break; 738254939Sdes case VTNET_R_CFG0: 739254939Sdes case VTNET_R_CFG1: 740254939Sdes case VTNET_R_CFG2: 741254939Sdes case VTNET_R_CFG3: 742254939Sdes case VTNET_R_CFG4: 743254939Sdes case VTNET_R_CFG5: 744254939Sdes assert((size + offset) <= (VTNET_R_CFG5 + 1)); 745254939Sdes ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 746254939Sdes /* 747254939Sdes * The driver is allowed to change the MAC address 748254939Sdes */ 749254939Sdes sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 750254939Sdes if (size == 1) { 751254939Sdes *(uint8_t *) ptr = value; 752254939Sdes } else if (size == 2) { 753254939Sdes *(uint16_t *) ptr = value; 754254939Sdes } else { 755254939Sdes *(uint32_t *) ptr = value; 756254939Sdes } 757254939Sdes break; 758254939Sdes case VTCFG_R_HOSTCAP: 759254939Sdes case VTCFG_R_QNUM: 760254939Sdes case VTCFG_R_ISR: 761254939Sdes case VTNET_R_CFG6: 762254939Sdes case VTNET_R_CFG7: 763254939Sdes DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 764254939Sdes break; 765254939Sdes default: 766254939Sdes DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 767254939Sdes value = 0; 768254939Sdes break; 769254939Sdes } 770254939Sdes 771254939Sdes pthread_mutex_unlock(&sc->vsc_mtx); 772254939Sdes} 773254939Sdes 774254939Sdesuint64_t 775254939Sdespci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 776254939Sdes int baridx, uint64_t offset, int size) 777254939Sdes{ 778254939Sdes struct pci_vtnet_softc *sc = pi->pi_arg; 779254939Sdes void *ptr; 780254939Sdes uint64_t value; 781254939Sdes 782255403Sdes if (use_msix) { 783255403Sdes if (baridx == pci_msix_table_bar(pi) || 784255403Sdes baridx == pci_msix_pba_bar(pi)) { 785255403Sdes return (pci_emul_msix_tread(pi, offset, size)); 786254939Sdes } 787254939Sdes } 788254939Sdes 789254939Sdes assert(baridx == 0); 790254939Sdes 791254939Sdes if (offset + size > pci_vtnet_iosize(pi)) { 792254939Sdes DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 793254939Sdes offset, size)); 794254939Sdes return (0); 795254939Sdes } 796254939Sdes 797254939Sdes pthread_mutex_lock(&sc->vsc_mtx); 798254939Sdes 799254939Sdes offset = vtnet_adjust_offset(pi, offset); 800254939Sdes 801254939Sdes switch (offset) { 802254939Sdes case VTCFG_R_HOSTCAP: 803254939Sdes assert(size == 4); 804254939Sdes value = VTNET_S_HOSTCAPS; 805254939Sdes break; 806254939Sdes case VTCFG_R_GUESTCAP: 807254939Sdes assert(size == 4); 808254939Sdes value = sc->vsc_features; /* XXX never read ? */ 809254939Sdes break; 810254939Sdes case VTCFG_R_PFN: 811254939Sdes assert(size == 4); 812254939Sdes value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 813254939Sdes break; 814254939Sdes case VTCFG_R_QNUM: 815254939Sdes assert(size == 2); 816254939Sdes value = pci_vtnet_qsize(sc->vsc_curq); 817254939Sdes break; 818254939Sdes case VTCFG_R_QSEL: 819254939Sdes assert(size == 2); 820254939Sdes value = sc->vsc_curq; /* XXX never read ? */ 821254939Sdes break; 822254939Sdes case VTCFG_R_QNOTIFY: 823254939Sdes assert(size == 2); 824254939Sdes value = sc->vsc_curq; /* XXX never read ? */ 825254939Sdes break; 826254939Sdes case VTCFG_R_STATUS: 827254939Sdes assert(size == 1); 828254939Sdes value = sc->vsc_status; 829254939Sdes break; 830254939Sdes case VTCFG_R_ISR: 831254939Sdes assert(size == 1); 832254939Sdes value = sc->vsc_isr; 833254939Sdes sc->vsc_isr = 0; /* a read clears this flag */ 834254939Sdes break; 835254939Sdes case VTCFG_R_CFGVEC: 836254939Sdes assert(size == 2); 837254939Sdes value = sc->vsc_msix_table_idx[VTNET_CTLQ]; 838254939Sdes break; 839254939Sdes case VTCFG_R_QVEC: 840254939Sdes assert(size == 2); 841254939Sdes assert(sc->vsc_curq != VTNET_CTLQ); 842254939Sdes value = sc->vsc_msix_table_idx[sc->vsc_curq]; 843254939Sdes break; 844254939Sdes case VTNET_R_CFG0: 845254939Sdes case VTNET_R_CFG1: 846254939Sdes case VTNET_R_CFG2: 847254939Sdes case VTNET_R_CFG3: 848254939Sdes case VTNET_R_CFG4: 849254939Sdes case VTNET_R_CFG5: 850254939Sdes assert((size + offset) <= (VTNET_R_CFG5 + 1)); 851254939Sdes ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 852254939Sdes if (size == 1) { 853254939Sdes value = *(uint8_t *) ptr; 854254939Sdes } else if (size == 2) { 855254939Sdes value = *(uint16_t *) ptr; 856254939Sdes } else { 857254939Sdes value = *(uint32_t *) ptr; 858254939Sdes } 859254939Sdes break; 860254939Sdes case VTNET_R_CFG6: 861254939Sdes assert(size != 4); 862254939Sdes value = 0x01; /* XXX link always up */ 863254939Sdes break; 864254939Sdes case VTNET_R_CFG7: 865254939Sdes assert(size == 1); 866254939Sdes value = 0; /* XXX link status in LSB */ 867254939Sdes break; 868254939Sdes default: 869254939Sdes DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 870254939Sdes value = 0; 871254939Sdes break; 872254939Sdes } 873254939Sdes 874254939Sdes pthread_mutex_unlock(&sc->vsc_mtx); 875254939Sdes 876254939Sdes return (value); 877254939Sdes} 878254939Sdes 879254939Sdesstruct pci_devemu pci_de_vnet = { 880254939Sdes .pe_emu = "virtio-net", 881254939Sdes .pe_init = pci_vtnet_init, 882254939Sdes .pe_barwrite = pci_vtnet_write, 883254939Sdes .pe_barread = pci_vtnet_read 884254939Sdes}; 885PCI_EMUL_SET(pci_de_vnet); 886