1221828Sgrehan/*- 2330449Seadler * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3330449Seadler * 4221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 5221828Sgrehan * All rights reserved. 6221828Sgrehan * 7221828Sgrehan * Redistribution and use in source and binary forms, with or without 8221828Sgrehan * modification, are permitted provided that the following conditions 9221828Sgrehan * are met: 10221828Sgrehan * 1. Redistributions of source code must retain the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer. 12221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 13221828Sgrehan * notice, this list of conditions and the following disclaimer in the 14221828Sgrehan * documentation and/or other materials provided with the distribution. 15221828Sgrehan * 16221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26221828Sgrehan * SUCH DAMAGE. 27221828Sgrehan * 28221828Sgrehan * $FreeBSD: stable/11/usr.sbin/bhyve/pci_virtio_net.c 349740 2019-07-04 18:21:01Z vmaffione $ 29221828Sgrehan */ 30221828Sgrehan 31221828Sgrehan#include <sys/cdefs.h> 32221828Sgrehan__FBSDID("$FreeBSD: stable/11/usr.sbin/bhyve/pci_virtio_net.c 349740 2019-07-04 18:21:01Z vmaffione $"); 33221828Sgrehan 34221828Sgrehan#include <sys/param.h> 35320866Sgrehan#ifndef WITHOUT_CAPSICUM 36320866Sgrehan#include <sys/capsicum.h> 37320866Sgrehan#endif 38221828Sgrehan#include <sys/linker_set.h> 39221828Sgrehan#include <sys/select.h> 40221828Sgrehan#include <sys/uio.h> 41221828Sgrehan#include <sys/ioctl.h> 42252682Sgrehan#include <net/ethernet.h> 43293459Sgnn#ifndef NETMAP_WITH_LIBS 44293459Sgnn#define NETMAP_WITH_LIBS 45293459Sgnn#endif 46293459Sgnn#include <net/netmap_user.h> 47221828Sgrehan 48320866Sgrehan#include <err.h> 49221828Sgrehan#include <errno.h> 50221828Sgrehan#include <fcntl.h> 51221828Sgrehan#include <stdio.h> 52221828Sgrehan#include <stdlib.h> 53221828Sgrehan#include <stdint.h> 54221828Sgrehan#include <string.h> 55221828Sgrehan#include <strings.h> 56221828Sgrehan#include <unistd.h> 57221828Sgrehan#include <assert.h> 58221828Sgrehan#include <md5.h> 59221828Sgrehan#include <pthread.h> 60249917Sgrehan#include <pthread_np.h> 61320866Sgrehan#include <sysexits.h> 62221828Sgrehan 63244167Sgrehan#include "bhyverun.h" 64221828Sgrehan#include "pci_emul.h" 65221828Sgrehan#include "mevent.h" 66221828Sgrehan#include "virtio.h" 67349739Svmaffione#include "net_utils.h" 68221828Sgrehan 69249917Sgrehan#define VTNET_RINGSZ 1024 70221828Sgrehan 71288470Sgrehan#define VTNET_MAXSEGS 256 72221828Sgrehan 73221828Sgrehan/* 74253440Sgrehan * Host capabilities. Note that we only offer a few of these. 75221828Sgrehan */ 76253440Sgrehan#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ 77253440Sgrehan#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ 78253440Sgrehan#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ 79253440Sgrehan#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ 80253440Sgrehan#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ 81253440Sgrehan#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */ 82253440Sgrehan#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */ 83253440Sgrehan#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */ 84253440Sgrehan#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */ 85253440Sgrehan#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */ 86253440Sgrehan#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */ 87253440Sgrehan#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */ 88253440Sgrehan#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */ 89253440Sgrehan#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */ 90253440Sgrehan#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */ 91253440Sgrehan#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */ 92253440Sgrehan#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ 93253440Sgrehan#define VIRTIO_NET_F_GUEST_ANNOUNCE \ 94253440Sgrehan (1 << 21) /* guest can send gratuitous pkts */ 95221828Sgrehan 96253440Sgrehan#define VTNET_S_HOSTCAPS \ 97253440Sgrehan ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ 98288470Sgrehan VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 99221828Sgrehan 100221828Sgrehan/* 101253440Sgrehan * PCI config-space "registers" 102221828Sgrehan */ 103253440Sgrehanstruct virtio_net_config { 104253440Sgrehan uint8_t mac[6]; 105253440Sgrehan uint16_t status; 106253440Sgrehan} __packed; 107221828Sgrehan 108221828Sgrehan/* 109221828Sgrehan * Queue definitions. 110221828Sgrehan */ 111221828Sgrehan#define VTNET_RXQ 0 112221828Sgrehan#define VTNET_TXQ 1 113253440Sgrehan#define VTNET_CTLQ 2 /* NB: not yet supported */ 114221828Sgrehan 115221828Sgrehan#define VTNET_MAXQ 3 116221828Sgrehan 117221828Sgrehan/* 118221828Sgrehan * Fixed network header size 119221828Sgrehan */ 120221828Sgrehanstruct virtio_net_rxhdr { 121221828Sgrehan uint8_t vrh_flags; 122221828Sgrehan uint8_t vrh_gso_type; 123221828Sgrehan uint16_t vrh_hdr_len; 124221828Sgrehan uint16_t vrh_gso_size; 125221828Sgrehan uint16_t vrh_csum_start; 126221828Sgrehan uint16_t vrh_csum_offset; 127221828Sgrehan uint16_t vrh_bufs; 128221828Sgrehan} __packed; 129221828Sgrehan 130221828Sgrehan/* 131221828Sgrehan * Debug printf 132221828Sgrehan */ 133221828Sgrehanstatic int pci_vtnet_debug; 134221828Sgrehan#define DPRINTF(params) if (pci_vtnet_debug) printf params 135221828Sgrehan#define WPRINTF(params) printf params 136221828Sgrehan 137221828Sgrehan/* 138221828Sgrehan * Per-device softc 139221828Sgrehan */ 140221828Sgrehanstruct pci_vtnet_softc { 141253440Sgrehan struct virtio_softc vsc_vs; 142253440Sgrehan struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 143221828Sgrehan pthread_mutex_t vsc_mtx; 144221828Sgrehan struct mevent *vsc_mevp; 145221828Sgrehan 146221828Sgrehan int vsc_tapfd; 147293459Sgnn struct nm_desc *vsc_nmd; 148293459Sgnn 149221828Sgrehan int vsc_rx_ready; 150349698Svmaffione int resetting; /* protected by tx_mtx */ 151221828Sgrehan 152271338Sgrehan uint64_t vsc_features; /* negotiated features */ 153271338Sgrehan 154253440Sgrehan struct virtio_net_config vsc_config; 155221828Sgrehan 156250083Sneel pthread_mutex_t rx_mtx; 157271338Sgrehan int rx_vhdrlen; 158271338Sgrehan int rx_merge; /* merged rx bufs in use */ 159250083Sneel 160249917Sgrehan pthread_t tx_tid; 161249917Sgrehan pthread_mutex_t tx_mtx; 162249917Sgrehan pthread_cond_t tx_cond; 163250083Sneel int tx_in_progress; 164293459Sgnn 165293459Sgnn void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc); 166293459Sgnn void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov, 167293459Sgnn int iovcnt, int len); 168221828Sgrehan}; 169221828Sgrehan 170253440Sgrehanstatic void pci_vtnet_reset(void *); 171253440Sgrehan/* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 172253440Sgrehanstatic int pci_vtnet_cfgread(void *, int, int, uint32_t *); 173253440Sgrehanstatic int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 174271338Sgrehanstatic void pci_vtnet_neg_features(void *, uint64_t); 175246109Sneel 176253440Sgrehanstatic struct virtio_consts vtnet_vi_consts = { 177253440Sgrehan "vtnet", /* our name */ 178253440Sgrehan VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ 179253440Sgrehan sizeof(struct virtio_net_config), /* config reg size */ 180253440Sgrehan pci_vtnet_reset, /* reset */ 181253440Sgrehan NULL, /* device-wide qnotify -- not used */ 182253440Sgrehan pci_vtnet_cfgread, /* read PCI config */ 183253440Sgrehan pci_vtnet_cfgwrite, /* write PCI config */ 184271338Sgrehan pci_vtnet_neg_features, /* apply negotiated features */ 185253440Sgrehan VTNET_S_HOSTCAPS, /* our capabilities */ 186253440Sgrehan}; 187221828Sgrehan 188244160Sgrehanstatic void 189349698Svmaffionepci_vtnet_reset(void *vsc) 190250083Sneel{ 191349698Svmaffione struct pci_vtnet_softc *sc = vsc; 192250083Sneel 193349698Svmaffione DPRINTF(("vtnet: device reset requested !\n")); 194349698Svmaffione 195349698Svmaffione /* Acquire the RX lock to block RX processing. */ 196349698Svmaffione pthread_mutex_lock(&sc->rx_mtx); 197349698Svmaffione 198349698Svmaffione /* Set sc->resetting and give a chance to the TX thread to stop. */ 199250083Sneel pthread_mutex_lock(&sc->tx_mtx); 200349698Svmaffione sc->resetting = 1; 201250083Sneel while (sc->tx_in_progress) { 202250083Sneel pthread_mutex_unlock(&sc->tx_mtx); 203250083Sneel usleep(10000); 204250083Sneel pthread_mutex_lock(&sc->tx_mtx); 205250083Sneel } 206250083Sneel 207253440Sgrehan sc->vsc_rx_ready = 0; 208271338Sgrehan sc->rx_merge = 1; 209271338Sgrehan sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 210250086Sneel 211349698Svmaffione /* 212349698Svmaffione * Now reset rings, MSI-X vectors, and negotiated capabilities. 213349698Svmaffione * Do that with the TX lock held, since we need to reset 214349698Svmaffione * sc->resetting. 215349698Svmaffione */ 216253440Sgrehan vi_reset_dev(&sc->vsc_vs); 217250086Sneel 218253440Sgrehan sc->resetting = 0; 219349698Svmaffione pthread_mutex_unlock(&sc->tx_mtx); 220349698Svmaffione pthread_mutex_unlock(&sc->rx_mtx); 221221828Sgrehan} 222221828Sgrehan 223221828Sgrehan/* 224221828Sgrehan * Called to send a buffer chain out to the tap device 225221828Sgrehan */ 226221828Sgrehanstatic void 227221828Sgrehanpci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 228221828Sgrehan int len) 229221828Sgrehan{ 230253440Sgrehan static char pad[60]; /* all zero bytes */ 231221828Sgrehan 232221828Sgrehan if (sc->vsc_tapfd == -1) 233221828Sgrehan return; 234221828Sgrehan 235221828Sgrehan /* 236221828Sgrehan * If the length is < 60, pad out to that and add the 237221828Sgrehan * extra zero'd segment to the iov. It is guaranteed that 238221828Sgrehan * there is always an extra iov available by the caller. 239221828Sgrehan */ 240221828Sgrehan if (len < 60) { 241221828Sgrehan iov[iovcnt].iov_base = pad; 242221828Sgrehan iov[iovcnt].iov_len = 60 - len; 243221828Sgrehan iovcnt++; 244221828Sgrehan } 245221828Sgrehan (void) writev(sc->vsc_tapfd, iov, iovcnt); 246221828Sgrehan} 247221828Sgrehan 248221828Sgrehan/* 249221828Sgrehan * Called when there is read activity on the tap file descriptor. 250221828Sgrehan * Each buffer posted by the guest is assumed to be able to contain 251221828Sgrehan * an entire ethernet frame + rx header. 252221828Sgrehan * MP note: the dummybuf is only used for discarding frames, so there 253221828Sgrehan * is no need for it to be per-vtnet or locked. 254221828Sgrehan */ 255221828Sgrehanstatic uint8_t dummybuf[2048]; 256221828Sgrehan 257271338Sgrehanstatic __inline struct iovec * 258271338Sgrehanrx_iov_trim(struct iovec *iov, int *niov, int tlen) 259271338Sgrehan{ 260271338Sgrehan struct iovec *riov; 261271338Sgrehan 262271338Sgrehan /* XXX short-cut: assume first segment is >= tlen */ 263271338Sgrehan assert(iov[0].iov_len >= tlen); 264271338Sgrehan 265271338Sgrehan iov[0].iov_len -= tlen; 266271338Sgrehan if (iov[0].iov_len == 0) { 267271338Sgrehan assert(*niov > 1); 268271338Sgrehan *niov -= 1; 269271338Sgrehan riov = &iov[1]; 270271338Sgrehan } else { 271271338Sgrehan iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); 272271338Sgrehan riov = &iov[0]; 273271338Sgrehan } 274271338Sgrehan 275271338Sgrehan return (riov); 276271338Sgrehan} 277271338Sgrehan 278221828Sgrehanstatic void 279221828Sgrehanpci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 280221828Sgrehan{ 281271338Sgrehan struct iovec iov[VTNET_MAXSEGS], *riov; 282253440Sgrehan struct vqueue_info *vq; 283271338Sgrehan void *vrx; 284271338Sgrehan int len, n; 285280026Smav uint16_t idx; 286221828Sgrehan 287221828Sgrehan /* 288221828Sgrehan * Should never be called without a valid tap fd 289221828Sgrehan */ 290221828Sgrehan assert(sc->vsc_tapfd != -1); 291221828Sgrehan 292221828Sgrehan /* 293221828Sgrehan * But, will be called when the rx ring hasn't yet 294349698Svmaffione * been set up. 295221828Sgrehan */ 296349698Svmaffione if (!sc->vsc_rx_ready) { 297221828Sgrehan /* 298221828Sgrehan * Drop the packet and try later. 299221828Sgrehan */ 300221828Sgrehan (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 301221828Sgrehan return; 302221828Sgrehan } 303221828Sgrehan 304221828Sgrehan /* 305253440Sgrehan * Check for available rx buffers 306221828Sgrehan */ 307253440Sgrehan vq = &sc->vsc_queues[VTNET_RXQ]; 308253440Sgrehan if (!vq_has_descs(vq)) { 309221828Sgrehan /* 310253440Sgrehan * Drop the packet and try later. Interrupt on 311253440Sgrehan * empty, if that's negotiated. 312221828Sgrehan */ 313221828Sgrehan (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 314253440Sgrehan vq_endchains(vq, 1); 315221828Sgrehan return; 316221828Sgrehan } 317221828Sgrehan 318253440Sgrehan do { 319221828Sgrehan /* 320271338Sgrehan * Get descriptor chain. 321221828Sgrehan */ 322280026Smav n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 323271338Sgrehan assert(n >= 1 && n <= VTNET_MAXSEGS); 324221828Sgrehan 325221828Sgrehan /* 326221828Sgrehan * Get a pointer to the rx header, and use the 327221828Sgrehan * data immediately following it for the packet buffer. 328221828Sgrehan */ 329271338Sgrehan vrx = iov[0].iov_base; 330271338Sgrehan riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); 331221828Sgrehan 332271338Sgrehan len = readv(sc->vsc_tapfd, riov, n); 333221828Sgrehan 334221828Sgrehan if (len < 0 && errno == EWOULDBLOCK) { 335253440Sgrehan /* 336253440Sgrehan * No more packets, but still some avail ring 337253440Sgrehan * entries. Interrupt if needed/appropriate. 338253440Sgrehan */ 339280041Smav vq_retchain(vq); 340253440Sgrehan vq_endchains(vq, 0); 341253440Sgrehan return; 342221828Sgrehan } 343221828Sgrehan 344221828Sgrehan /* 345221828Sgrehan * The only valid field in the rx packet header is the 346271338Sgrehan * number of buffers if merged rx bufs were negotiated. 347221828Sgrehan */ 348271338Sgrehan memset(vrx, 0, sc->rx_vhdrlen); 349221828Sgrehan 350271338Sgrehan if (sc->rx_merge) { 351271338Sgrehan struct virtio_net_rxhdr *vrxh; 352271338Sgrehan 353271338Sgrehan vrxh = vrx; 354271338Sgrehan vrxh->vrh_bufs = 1; 355271338Sgrehan } 356271338Sgrehan 357221828Sgrehan /* 358253440Sgrehan * Release this chain and handle more chains. 359221828Sgrehan */ 360280026Smav vq_relchain(vq, idx, len + sc->rx_vhdrlen); 361253440Sgrehan } while (vq_has_descs(vq)); 362221828Sgrehan 363253440Sgrehan /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ 364253440Sgrehan vq_endchains(vq, 1); 365221828Sgrehan} 366221828Sgrehan 367296829Sgnnstatic __inline int 368293459Sgnnpci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) 369293459Sgnn{ 370293459Sgnn int r, i; 371293459Sgnn int len = 0; 372293459Sgnn 373293459Sgnn for (r = nmd->cur_tx_ring; ; ) { 374293459Sgnn struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); 375293459Sgnn uint32_t cur, idx; 376293459Sgnn char *buf; 377293459Sgnn 378293459Sgnn if (nm_ring_empty(ring)) { 379293459Sgnn r++; 380293459Sgnn if (r > nmd->last_tx_ring) 381293459Sgnn r = nmd->first_tx_ring; 382296829Sgnn if (r == nmd->cur_tx_ring) 383293459Sgnn break; 384293459Sgnn continue; 385293459Sgnn } 386293459Sgnn cur = ring->cur; 387293459Sgnn idx = ring->slot[cur].buf_idx; 388293459Sgnn buf = NETMAP_BUF(ring, idx); 389293459Sgnn 390293459Sgnn for (i = 0; i < iovcnt; i++) { 391296829Sgnn if (len + iov[i].iov_len > 2048) 392296829Sgnn break; 393293459Sgnn memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); 394293459Sgnn len += iov[i].iov_len; 395293459Sgnn } 396293459Sgnn ring->slot[cur].len = len; 397293459Sgnn ring->head = ring->cur = nm_ring_next(ring, cur); 398293459Sgnn nmd->cur_tx_ring = r; 399293459Sgnn ioctl(nmd->fd, NIOCTXSYNC, NULL); 400293459Sgnn break; 401293459Sgnn } 402293459Sgnn 403293459Sgnn return (len); 404293459Sgnn} 405293459Sgnn 406296829Sgnnstatic __inline int 407293459Sgnnpci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) 408293459Sgnn{ 409293459Sgnn int len = 0; 410293459Sgnn int i = 0; 411293459Sgnn int r; 412293459Sgnn 413293459Sgnn for (r = nmd->cur_rx_ring; ; ) { 414293459Sgnn struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); 415293459Sgnn uint32_t cur, idx; 416293459Sgnn char *buf; 417293459Sgnn size_t left; 418293459Sgnn 419293459Sgnn if (nm_ring_empty(ring)) { 420293459Sgnn r++; 421293459Sgnn if (r > nmd->last_rx_ring) 422293459Sgnn r = nmd->first_rx_ring; 423293459Sgnn if (r == nmd->cur_rx_ring) 424293459Sgnn break; 425293459Sgnn continue; 426293459Sgnn } 427293459Sgnn cur = ring->cur; 428293459Sgnn idx = ring->slot[cur].buf_idx; 429293459Sgnn buf = NETMAP_BUF(ring, idx); 430293459Sgnn left = ring->slot[cur].len; 431293459Sgnn 432293459Sgnn for (i = 0; i < iovcnt && left > 0; i++) { 433293459Sgnn if (iov[i].iov_len > left) 434293459Sgnn iov[i].iov_len = left; 435293459Sgnn memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); 436293459Sgnn len += iov[i].iov_len; 437293459Sgnn left -= iov[i].iov_len; 438293459Sgnn } 439293459Sgnn ring->head = ring->cur = nm_ring_next(ring, cur); 440293459Sgnn nmd->cur_rx_ring = r; 441293459Sgnn ioctl(nmd->fd, NIOCRXSYNC, NULL); 442293459Sgnn break; 443293459Sgnn } 444293459Sgnn for (; i < iovcnt; i++) 445293459Sgnn iov[i].iov_len = 0; 446293459Sgnn 447293459Sgnn return (len); 448293459Sgnn} 449293459Sgnn 450293459Sgnn/* 451293459Sgnn * Called to send a buffer chain out to the vale port 452293459Sgnn */ 453221828Sgrehanstatic void 454293459Sgnnpci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 455293459Sgnn int len) 456221828Sgrehan{ 457293459Sgnn static char pad[60]; /* all zero bytes */ 458293459Sgnn 459293459Sgnn if (sc->vsc_nmd == NULL) 460293459Sgnn return; 461293459Sgnn 462293459Sgnn /* 463293459Sgnn * If the length is < 60, pad out to that and add the 464293459Sgnn * extra zero'd segment to the iov. It is guaranteed that 465293459Sgnn * there is always an extra iov available by the caller. 466293459Sgnn */ 467293459Sgnn if (len < 60) { 468293459Sgnn iov[iovcnt].iov_base = pad; 469293459Sgnn iov[iovcnt].iov_len = 60 - len; 470293459Sgnn iovcnt++; 471293459Sgnn } 472293459Sgnn (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); 473293459Sgnn} 474293459Sgnn 475293459Sgnnstatic void 476293459Sgnnpci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) 477293459Sgnn{ 478293459Sgnn struct iovec iov[VTNET_MAXSEGS], *riov; 479293459Sgnn struct vqueue_info *vq; 480293459Sgnn void *vrx; 481293459Sgnn int len, n; 482293459Sgnn uint16_t idx; 483293459Sgnn 484293459Sgnn /* 485293459Sgnn * Should never be called without a valid netmap descriptor 486293459Sgnn */ 487293459Sgnn assert(sc->vsc_nmd != NULL); 488293459Sgnn 489293459Sgnn /* 490293459Sgnn * But, will be called when the rx ring hasn't yet 491349698Svmaffione * been set up. 492293459Sgnn */ 493349698Svmaffione if (!sc->vsc_rx_ready) { 494293459Sgnn /* 495293459Sgnn * Drop the packet and try later. 496293459Sgnn */ 497293459Sgnn (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); 498293459Sgnn return; 499293459Sgnn } 500293459Sgnn 501293459Sgnn /* 502293459Sgnn * Check for available rx buffers 503293459Sgnn */ 504293459Sgnn vq = &sc->vsc_queues[VTNET_RXQ]; 505293459Sgnn if (!vq_has_descs(vq)) { 506293459Sgnn /* 507293459Sgnn * Drop the packet and try later. Interrupt on 508293459Sgnn * empty, if that's negotiated. 509293459Sgnn */ 510293459Sgnn (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); 511293459Sgnn vq_endchains(vq, 1); 512293459Sgnn return; 513293459Sgnn } 514293459Sgnn 515293459Sgnn do { 516293459Sgnn /* 517293459Sgnn * Get descriptor chain. 518293459Sgnn */ 519293459Sgnn n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 520293459Sgnn assert(n >= 1 && n <= VTNET_MAXSEGS); 521293459Sgnn 522293459Sgnn /* 523293459Sgnn * Get a pointer to the rx header, and use the 524293459Sgnn * data immediately following it for the packet buffer. 525293459Sgnn */ 526293459Sgnn vrx = iov[0].iov_base; 527293459Sgnn riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); 528293459Sgnn 529293459Sgnn len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); 530293459Sgnn 531293459Sgnn if (len == 0) { 532293459Sgnn /* 533293459Sgnn * No more packets, but still some avail ring 534293459Sgnn * entries. Interrupt if needed/appropriate. 535293459Sgnn */ 536296829Sgnn vq_retchain(vq); 537293459Sgnn vq_endchains(vq, 0); 538293459Sgnn return; 539293459Sgnn } 540293459Sgnn 541293459Sgnn /* 542293459Sgnn * The only valid field in the rx packet header is the 543293459Sgnn * number of buffers if merged rx bufs were negotiated. 544293459Sgnn */ 545293459Sgnn memset(vrx, 0, sc->rx_vhdrlen); 546293459Sgnn 547293459Sgnn if (sc->rx_merge) { 548293459Sgnn struct virtio_net_rxhdr *vrxh; 549293459Sgnn 550293459Sgnn vrxh = vrx; 551293459Sgnn vrxh->vrh_bufs = 1; 552293459Sgnn } 553293459Sgnn 554293459Sgnn /* 555293459Sgnn * Release this chain and handle more chains. 556293459Sgnn */ 557293459Sgnn vq_relchain(vq, idx, len + sc->rx_vhdrlen); 558293459Sgnn } while (vq_has_descs(vq)); 559293459Sgnn 560293459Sgnn /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ 561293459Sgnn vq_endchains(vq, 1); 562293459Sgnn} 563293459Sgnn 564293459Sgnnstatic void 565293459Sgnnpci_vtnet_rx_callback(int fd, enum ev_type type, void *param) 566293459Sgnn{ 567221828Sgrehan struct pci_vtnet_softc *sc = param; 568221828Sgrehan 569250083Sneel pthread_mutex_lock(&sc->rx_mtx); 570293459Sgnn sc->pci_vtnet_rx(sc); 571250083Sneel pthread_mutex_unlock(&sc->rx_mtx); 572221828Sgrehan 573221828Sgrehan} 574221828Sgrehan 575221828Sgrehanstatic void 576253440Sgrehanpci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 577221828Sgrehan{ 578253440Sgrehan struct pci_vtnet_softc *sc = vsc; 579253440Sgrehan 580221828Sgrehan /* 581221828Sgrehan * A qnotify means that the rx process can now begin 582221828Sgrehan */ 583349740Svmaffione pthread_mutex_lock(&sc->rx_mtx); 584221828Sgrehan if (sc->vsc_rx_ready == 0) { 585221828Sgrehan sc->vsc_rx_ready = 1; 586349704Svmaffione vq_kick_disable(vq); 587221828Sgrehan } 588349740Svmaffione pthread_mutex_unlock(&sc->rx_mtx); 589221828Sgrehan} 590221828Sgrehan 591221828Sgrehanstatic void 592253440Sgrehanpci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 593221828Sgrehan{ 594221828Sgrehan struct iovec iov[VTNET_MAXSEGS + 1]; 595253440Sgrehan int i, n; 596253440Sgrehan int plen, tlen; 597280026Smav uint16_t idx; 598221828Sgrehan 599221828Sgrehan /* 600253440Sgrehan * Obtain chain of descriptors. The first one is 601253440Sgrehan * really the header descriptor, so we need to sum 602253440Sgrehan * up two lengths: packet length and transfer length. 603221828Sgrehan */ 604280026Smav n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 605253440Sgrehan assert(n >= 1 && n <= VTNET_MAXSEGS); 606253440Sgrehan plen = 0; 607253440Sgrehan tlen = iov[0].iov_len; 608253440Sgrehan for (i = 1; i < n; i++) { 609253440Sgrehan plen += iov[i].iov_len; 610253440Sgrehan tlen += iov[i].iov_len; 611221828Sgrehan } 612221828Sgrehan 613253440Sgrehan DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n)); 614293459Sgnn sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen); 615221828Sgrehan 616253440Sgrehan /* chain is processed, release it and set tlen */ 617280026Smav vq_relchain(vq, idx, tlen); 618221828Sgrehan} 619221828Sgrehan 620221828Sgrehanstatic void 621253440Sgrehanpci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 622221828Sgrehan{ 623253440Sgrehan struct pci_vtnet_softc *sc = vsc; 624221828Sgrehan 625221828Sgrehan /* 626253440Sgrehan * Any ring entries to process? 627221828Sgrehan */ 628253440Sgrehan if (!vq_has_descs(vq)) 629221828Sgrehan return; 630221828Sgrehan 631249917Sgrehan /* Signal the tx thread for processing */ 632249917Sgrehan pthread_mutex_lock(&sc->tx_mtx); 633349704Svmaffione vq_kick_disable(vq); 634249917Sgrehan if (sc->tx_in_progress == 0) 635249917Sgrehan pthread_cond_signal(&sc->tx_cond); 636249917Sgrehan pthread_mutex_unlock(&sc->tx_mtx); 637221828Sgrehan} 638221828Sgrehan 639249917Sgrehan/* 640249917Sgrehan * Thread which will handle processing of TX desc 641249917Sgrehan */ 642249917Sgrehanstatic void * 643249917Sgrehanpci_vtnet_tx_thread(void *param) 644249917Sgrehan{ 645253440Sgrehan struct pci_vtnet_softc *sc = param; 646253440Sgrehan struct vqueue_info *vq; 647282563Smav int error; 648253440Sgrehan 649253440Sgrehan vq = &sc->vsc_queues[VTNET_TXQ]; 650253440Sgrehan 651253440Sgrehan /* 652253440Sgrehan * Let us wait till the tx queue pointers get initialised & 653253440Sgrehan * first tx signaled 654249917Sgrehan */ 655249917Sgrehan pthread_mutex_lock(&sc->tx_mtx); 656249917Sgrehan error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 657249917Sgrehan assert(error == 0); 658253440Sgrehan 659249917Sgrehan for (;;) { 660253440Sgrehan /* note - tx mutex is locked here */ 661282563Smav while (sc->resetting || !vq_has_descs(vq)) { 662349704Svmaffione vq_kick_enable(vq); 663282563Smav if (!sc->resetting && vq_has_descs(vq)) 664282563Smav break; 665250197Sneel 666282563Smav sc->tx_in_progress = 0; 667282563Smav error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 668282563Smav assert(error == 0); 669282563Smav } 670349704Svmaffione vq_kick_disable(vq); 671249917Sgrehan sc->tx_in_progress = 1; 672249917Sgrehan pthread_mutex_unlock(&sc->tx_mtx); 673249917Sgrehan 674253440Sgrehan do { 675249917Sgrehan /* 676253440Sgrehan * Run through entries, placing them into 677253440Sgrehan * iovecs and sending when an end-of-packet 678253440Sgrehan * is found 679249917Sgrehan */ 680253440Sgrehan pci_vtnet_proctx(sc, vq); 681253440Sgrehan } while (vq_has_descs(vq)); 682250197Sneel 683250197Sneel /* 684250197Sneel * Generate an interrupt if needed. 685250197Sneel */ 686253440Sgrehan vq_endchains(vq, 1); 687253440Sgrehan 688253440Sgrehan pthread_mutex_lock(&sc->tx_mtx); 689249917Sgrehan } 690221828Sgrehan} 691221828Sgrehan 692253440Sgrehan#ifdef notyet 693221828Sgrehanstatic void 694253440Sgrehanpci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 695221828Sgrehan{ 696221828Sgrehan 697253440Sgrehan DPRINTF(("vtnet: control qnotify!\n\r")); 698221828Sgrehan} 699253440Sgrehan#endif 700221828Sgrehan 701293459Sgnnstatic void 702293459Sgnnpci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) 703293459Sgnn{ 704293459Sgnn char tbuf[80]; 705320866Sgrehan#ifndef WITHOUT_CAPSICUM 706320866Sgrehan cap_rights_t rights; 707320866Sgrehan#endif 708252682Sgrehan 709293459Sgnn strcpy(tbuf, "/dev/"); 710293459Sgnn strlcat(tbuf, devname, sizeof(tbuf)); 711293459Sgnn 712293459Sgnn sc->pci_vtnet_rx = pci_vtnet_tap_rx; 713293459Sgnn sc->pci_vtnet_tx = pci_vtnet_tap_tx; 714293459Sgnn 715293459Sgnn sc->vsc_tapfd = open(tbuf, O_RDWR); 716293459Sgnn if (sc->vsc_tapfd == -1) { 717293459Sgnn WPRINTF(("open of tap device %s failed\n", tbuf)); 718293459Sgnn return; 719293459Sgnn } 720293459Sgnn 721293459Sgnn /* 722293459Sgnn * Set non-blocking and register for read 723293459Sgnn * notifications with the event loop 724293459Sgnn */ 725293459Sgnn int opt = 1; 726293459Sgnn if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 727293459Sgnn WPRINTF(("tap device O_NONBLOCK failed\n")); 728293459Sgnn close(sc->vsc_tapfd); 729293459Sgnn sc->vsc_tapfd = -1; 730293459Sgnn } 731293459Sgnn 732320866Sgrehan#ifndef WITHOUT_CAPSICUM 733320866Sgrehan cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 734320866Sgrehan if (cap_rights_limit(sc->vsc_tapfd, &rights) == -1 && errno != ENOSYS) 735320866Sgrehan errx(EX_OSERR, "Unable to apply rights for sandbox"); 736320866Sgrehan#endif 737320866Sgrehan 738293459Sgnn sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 739293459Sgnn EVF_READ, 740293459Sgnn pci_vtnet_rx_callback, 741293459Sgnn sc); 742293459Sgnn if (sc->vsc_mevp == NULL) { 743293459Sgnn WPRINTF(("Could not register event\n")); 744293459Sgnn close(sc->vsc_tapfd); 745293459Sgnn sc->vsc_tapfd = -1; 746293459Sgnn } 747293459Sgnn} 748293459Sgnn 749293459Sgnnstatic void 750293459Sgnnpci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) 751293459Sgnn{ 752293459Sgnn sc->pci_vtnet_rx = pci_vtnet_netmap_rx; 753293459Sgnn sc->pci_vtnet_tx = pci_vtnet_netmap_tx; 754293459Sgnn 755293459Sgnn sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); 756293459Sgnn if (sc->vsc_nmd == NULL) { 757293459Sgnn WPRINTF(("open of netmap device %s failed\n", ifname)); 758293459Sgnn return; 759293459Sgnn } 760293459Sgnn 761293459Sgnn sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, 762293459Sgnn EVF_READ, 763293459Sgnn pci_vtnet_rx_callback, 764293459Sgnn sc); 765293459Sgnn if (sc->vsc_mevp == NULL) { 766293459Sgnn WPRINTF(("Could not register event\n")); 767293459Sgnn nm_close(sc->vsc_nmd); 768293459Sgnn sc->vsc_nmd = NULL; 769293459Sgnn } 770293459Sgnn} 771293459Sgnn 772252682Sgrehanstatic int 773221828Sgrehanpci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 774221828Sgrehan{ 775249917Sgrehan char tname[MAXCOMLEN + 1]; 776221828Sgrehan struct pci_vtnet_softc *sc; 777252682Sgrehan char *devname; 778252682Sgrehan char *vtopts; 779252682Sgrehan int mac_provided; 780221828Sgrehan 781264770Sdelphij sc = calloc(1, sizeof(struct pci_vtnet_softc)); 782221828Sgrehan 783253440Sgrehan pthread_mutex_init(&sc->vsc_mtx, NULL); 784221828Sgrehan 785253440Sgrehan vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues); 786261268Sjhb sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 787261268Sjhb 788253440Sgrehan sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 789253440Sgrehan sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 790253440Sgrehan sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 791253440Sgrehan sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 792253440Sgrehan#ifdef notyet 793253440Sgrehan sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 794253440Sgrehan sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 795253440Sgrehan#endif 796246109Sneel 797246109Sneel /* 798252682Sgrehan * Attempt to open the tap device and read the MAC address 799252682Sgrehan * if specified 800221828Sgrehan */ 801252682Sgrehan mac_provided = 0; 802221828Sgrehan sc->vsc_tapfd = -1; 803293459Sgnn sc->vsc_nmd = NULL; 804221828Sgrehan if (opts != NULL) { 805252682Sgrehan int err; 806221828Sgrehan 807252682Sgrehan devname = vtopts = strdup(opts); 808252682Sgrehan (void) strsep(&vtopts, ","); 809252682Sgrehan 810252682Sgrehan if (vtopts != NULL) { 811349739Svmaffione err = net_parsemac(vtopts, sc->vsc_config.mac); 812252682Sgrehan if (err != 0) { 813252682Sgrehan free(devname); 814252682Sgrehan return (err); 815252682Sgrehan } 816252682Sgrehan mac_provided = 1; 817252682Sgrehan } 818252682Sgrehan 819293459Sgnn if (strncmp(devname, "vale", 4) == 0) 820293459Sgnn pci_vtnet_netmap_setup(sc, devname); 821293643Sglebius if (strncmp(devname, "tap", 3) == 0 || 822293643Sglebius strncmp(devname, "vmnet", 5) == 0) 823293459Sgnn pci_vtnet_tap_setup(sc, devname); 824221828Sgrehan 825252682Sgrehan free(devname); 826221828Sgrehan } 827221828Sgrehan 828252682Sgrehan if (!mac_provided) { 829349739Svmaffione net_genmac(pi, sc->vsc_config.mac); 830252682Sgrehan } 831221828Sgrehan 832221828Sgrehan /* initialize config space */ 833221828Sgrehan pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 834221828Sgrehan pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 835221828Sgrehan pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 836221828Sgrehan pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 837282865Sgrehan pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 838253440Sgrehan 839296829Sgnn /* Link is up if we managed to open tap device or vale port. */ 840296829Sgnn sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 || 841296829Sgnn sc->vsc_nmd != NULL); 842246109Sneel 843253440Sgrehan /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 844256711Sgrehan if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) 845253440Sgrehan return (1); 846246109Sneel 847253440Sgrehan /* use BAR 0 to map config regs in IO space */ 848253440Sgrehan vi_set_io_bar(&sc->vsc_vs, 0); 849246109Sneel 850250083Sneel sc->resetting = 0; 851250083Sneel 852271338Sgrehan sc->rx_merge = 1; 853271338Sgrehan sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 854250083Sneel pthread_mutex_init(&sc->rx_mtx, NULL); 855250083Sneel 856249917Sgrehan /* 857253440Sgrehan * Initialize tx semaphore & spawn TX processing thread. 858249917Sgrehan * As of now, only one thread for TX desc processing is 859249917Sgrehan * spawned. 860249917Sgrehan */ 861249917Sgrehan sc->tx_in_progress = 0; 862249917Sgrehan pthread_mutex_init(&sc->tx_mtx, NULL); 863249917Sgrehan pthread_cond_init(&sc->tx_cond, NULL); 864249917Sgrehan pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 865257729Sgrehan snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 866257729Sgrehan pi->pi_func); 867336161Saraujo pthread_set_name_np(sc->tx_tid, tname); 868221828Sgrehan 869221828Sgrehan return (0); 870221828Sgrehan} 871221828Sgrehan 872253440Sgrehanstatic int 873253440Sgrehanpci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 874246109Sneel{ 875253440Sgrehan struct pci_vtnet_softc *sc = vsc; 876222830Sgrehan void *ptr; 877222830Sgrehan 878253440Sgrehan if (offset < 6) { 879253440Sgrehan assert(offset + size <= 6); 880221828Sgrehan /* 881221828Sgrehan * The driver is allowed to change the MAC address 882221828Sgrehan */ 883253440Sgrehan ptr = &sc->vsc_config.mac[offset]; 884253440Sgrehan memcpy(ptr, &value, size); 885253440Sgrehan } else { 886271338Sgrehan /* silently ignore other writes */ 887253440Sgrehan DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); 888221828Sgrehan } 889271338Sgrehan 890253440Sgrehan return (0); 891221828Sgrehan} 892221828Sgrehan 893253440Sgrehanstatic int 894253440Sgrehanpci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 895221828Sgrehan{ 896253440Sgrehan struct pci_vtnet_softc *sc = vsc; 897222830Sgrehan void *ptr; 898221828Sgrehan 899253440Sgrehan ptr = (uint8_t *)&sc->vsc_config + offset; 900253440Sgrehan memcpy(retval, ptr, size); 901253440Sgrehan return (0); 902221828Sgrehan} 903221828Sgrehan 904271338Sgrehanstatic void 905271338Sgrehanpci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 906271338Sgrehan{ 907271338Sgrehan struct pci_vtnet_softc *sc = vsc; 908271338Sgrehan 909271338Sgrehan sc->vsc_features = negotiated_features; 910271338Sgrehan 911271338Sgrehan if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) { 912271338Sgrehan sc->rx_merge = 0; 913271338Sgrehan /* non-merge rx header is 2 bytes shorter */ 914271338Sgrehan sc->rx_vhdrlen -= 2; 915271338Sgrehan } 916271338Sgrehan} 917271338Sgrehan 918221828Sgrehanstruct pci_devemu pci_de_vnet = { 919241744Sgrehan .pe_emu = "virtio-net", 920241744Sgrehan .pe_init = pci_vtnet_init, 921253440Sgrehan .pe_barwrite = vi_pci_write, 922253440Sgrehan .pe_barread = vi_pci_read 923221828Sgrehan}; 924221828SgrehanPCI_EMUL_SET(pci_de_vnet); 925