ixv_netmap.c revision 315333
1/****************************************************************************** 2 3 Copyright (c) 2001-2017, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32******************************************************************************/ 33/*$FreeBSD: stable/10/sys/dev/ixgbe/ixv_netmap.c 315333 2017-03-15 21:20:17Z erj $*/ 34 35/* 36 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 */ 59 60/* 61 * $FreeBSD: stable/10/sys/dev/ixgbe/ixv_netmap.c 315333 2017-03-15 21:20:17Z erj $ 62 * 63 * netmap support for: ixgbe 64 * 65 * This file is meant to be a reference on how to implement 66 * netmap support for a network driver. 67 * This file contains code but only static or inline functions used 68 * by a single driver. To avoid replication of code we just #include 69 * it near the beginning of the standard driver. 70 */ 71 72#ifdef DEV_NETMAP 73/* 74 * Some drivers may need the following headers. Others 75 * already include them by default 76 77#include <vm/vm.h> 78#include <vm/pmap.h> 79 80 */ 81#include "ixv.h" 82 83/* 84 * device-specific sysctl variables: 85 * 86 * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. 87 * During regular operations the CRC is stripped, but on some 88 * hardware reception of frames not multiple of 64 is slower, 89 * so using crcstrip=0 helps in benchmarks. 90 * 91 * ix_rx_miss, ix_rx_miss_bufs: 92 * count packets that might be missed due to lost interrupts. 93 */ 94SYSCTL_DECL(_dev_netmap); 95static int ix_rx_miss, ix_rx_miss_bufs; 96int ix_crcstrip; 97SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip, 98 CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames"); 99SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss, 100 CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr"); 101SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs, 102 CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs"); 103 104 105static void 106set_crcstrip(struct ixgbe_hw *hw, int onoff) 107{ 108 /* crc stripping is set in two places: 109 * IXGBE_HLREG0 (modified on init_locked and hw reset) 110 * IXGBE_RDRXCTL (set by the original driver in 111 * ixgbe_setup_hw_rsc() called in init_locked. 112 * We disable the setting when netmap is compiled in). 113 * We update the values here, but also in ixgbe.c because 114 * init_locked sometimes is called outside our control. 115 */ 116 uint32_t hl, rxc; 117 118 hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); 119 rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 120 if (netmap_verbose) 121 D("%s read HLREG 0x%x rxc 0x%x", 122 onoff ? "enter" : "exit", hl, rxc); 123 /* hw requirements ... */ 124 rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 125 rxc |= IXGBE_RDRXCTL_RSCACKC; 126 if (onoff && !ix_crcstrip) { 127 /* keep the crc. Fast rx */ 128 hl &= ~IXGBE_HLREG0_RXCRCSTRP; 129 rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; 130 } else { 131 /* reset default mode */ 132 hl |= IXGBE_HLREG0_RXCRCSTRP; 133 rxc |= IXGBE_RDRXCTL_CRCSTRIP; 134 } 135 if (netmap_verbose) 136 D("%s write HLREG 0x%x rxc 0x%x", 137 onoff ? "enter" : "exit", hl, rxc); 138 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); 139 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); 140} 141 142 143/* 144 * Register/unregister. We are already under netmap lock. 145 * Only called on the first register or the last unregister. 146 */ 147static int 148ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) 149{ 150 struct ifnet *ifp = na->ifp; 151 struct adapter *adapter = ifp->if_softc; 152 153 IXGBE_CORE_LOCK(adapter); 154 adapter->stop_locked(adapter); 155 156 set_crcstrip(&adapter->hw, onoff); 157 /* enable or disable flags and callbacks in na and ifp */ 158 if (onoff) { 159 nm_set_native_flags(na); 160 } else { 161 nm_clear_native_flags(na); 162 } 163 adapter->init_locked(adapter); /* also enables intr */ 164 set_crcstrip(&adapter->hw, onoff); // XXX why twice ? 165 IXGBE_CORE_UNLOCK(adapter); 166 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); 167} 168 169 170/* 171 * Reconcile kernel and user view of the transmit ring. 172 * 173 * All information is in the kring. 174 * Userspace wants to send packets up to the one before kring->rhead, 175 * kernel knows kring->nr_hwcur is the first unsent packet. 176 * 177 * Here we push packets out (as many as possible), and possibly 178 * reclaim buffers from previously completed transmission. 179 * 180 * The caller (netmap) guarantees that there is only one instance 181 * running at any time. Any interference with other driver 182 * methods should be handled by the individual drivers. 183 */ 184static int 185ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) 186{ 187 struct netmap_adapter *na = kring->na; 188 struct ifnet *ifp = na->ifp; 189 struct netmap_ring *ring = kring->ring; 190 u_int nm_i; /* index into the netmap ring */ 191 u_int nic_i; /* index into the NIC ring */ 192 u_int n; 193 u_int const lim = kring->nkr_num_slots - 1; 194 u_int const head = kring->rhead; 195 /* 196 * interrupts on every tx packet are expensive so request 197 * them every half ring, or where NS_REPORT is set 198 */ 199 u_int report_frequency = kring->nkr_num_slots >> 1; 200 201 /* device-specific */ 202 struct adapter *adapter = ifp->if_softc; 203 struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; 204 int reclaim_tx; 205 206 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 207 BUS_DMASYNC_POSTREAD); 208 209 /* 210 * First part: process new packets to send. 211 * nm_i is the current index in the netmap ring, 212 * nic_i is the corresponding index in the NIC ring. 213 * The two numbers differ because upon a *_init() we reset 214 * the NIC ring but leave the netmap ring unchanged. 215 * For the transmit ring, we have 216 * 217 * nm_i = kring->nr_hwcur 218 * nic_i = IXGBE_TDT (not tracked in the driver) 219 * and 220 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 221 * 222 * In this driver kring->nkr_hwofs >= 0, but for other 223 * drivers it might be negative as well. 224 */ 225 226 /* 227 * If we have packets to send (kring->nr_hwcur != kring->rhead) 228 * iterate over the netmap ring, fetch length and update 229 * the corresponding slot in the NIC ring. Some drivers also 230 * need to update the buffer's physical address in the NIC slot 231 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 232 * 233 * The netmap_reload_map() calls is especially expensive, 234 * even when (as in this case) the tag is 0, so do only 235 * when the buffer has actually changed. 236 * 237 * If possible do not set the report/intr bit on all slots, 238 * but only a few times per ring or when NS_REPORT is set. 239 * 240 * Finally, on 10G and faster drivers, it might be useful 241 * to prefetch the next slot and txr entry. 242 */ 243 244 nm_i = kring->nr_hwcur; 245 if (nm_i != head) { /* we have new packets to send */ 246 nic_i = netmap_idx_k2n(kring, nm_i); 247 248 __builtin_prefetch(&ring->slot[nm_i]); 249 __builtin_prefetch(&txr->tx_buffers[nic_i]); 250 251 for (n = 0; nm_i != head; n++) { 252 struct netmap_slot *slot = &ring->slot[nm_i]; 253 u_int len = slot->len; 254 uint64_t paddr; 255 void *addr = PNMB(na, slot, &paddr); 256 257 /* device-specific */ 258 union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i]; 259 struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i]; 260 int flags = (slot->flags & NS_REPORT || 261 nic_i == 0 || nic_i == report_frequency) ? 262 IXGBE_TXD_CMD_RS : 0; 263 264 /* prefetch for next round */ 265 __builtin_prefetch(&ring->slot[nm_i + 1]); 266 __builtin_prefetch(&txr->tx_buffers[nic_i + 1]); 267 268 NM_CHECK_ADDR_LEN(na, addr, len); 269 270 if (slot->flags & NS_BUF_CHANGED) { 271 /* buffer has changed, reload map */ 272 netmap_reload_map(na, txr->txtag, txbuf->map, addr); 273 } 274 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 275 276 /* Fill the slot in the NIC ring. */ 277 /* Use legacy descriptor, they are faster? */ 278 curr->read.buffer_addr = htole64(paddr); 279 curr->read.olinfo_status = 0; 280 curr->read.cmd_type_len = htole32(len | flags | 281 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); 282 283 /* make sure changes to the buffer are synced */ 284 bus_dmamap_sync(txr->txtag, txbuf->map, 285 BUS_DMASYNC_PREWRITE); 286 287 nm_i = nm_next(nm_i, lim); 288 nic_i = nm_next(nic_i, lim); 289 } 290 kring->nr_hwcur = head; 291 292 /* synchronize the NIC ring */ 293 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 294 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 295 296 /* (re)start the tx unit up to slot nic_i (excluded) */ 297 IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i); 298 } 299 300 /* 301 * Second part: reclaim buffers for completed transmissions. 302 * Because this is expensive (we read a NIC register etc.) 303 * we only do it in specific cases (see below). 304 */ 305 if (flags & NAF_FORCE_RECLAIM) { 306 reclaim_tx = 1; /* forced reclaim */ 307 } else if (!nm_kr_txempty(kring)) { 308 reclaim_tx = 0; /* have buffers, no reclaim */ 309 } else { 310 /* 311 * No buffers available. Locate previous slot with 312 * REPORT_STATUS set. 313 * If the slot has DD set, we can reclaim space, 314 * otherwise wait for the next interrupt. 315 * This enables interrupt moderation on the tx 316 * side though it might reduce throughput. 317 */ 318 struct ixgbe_legacy_tx_desc *txd = 319 (struct ixgbe_legacy_tx_desc *)txr->tx_base; 320 321 nic_i = txr->next_to_clean + report_frequency; 322 if (nic_i > lim) 323 nic_i -= lim + 1; 324 // round to the closest with dd set 325 nic_i = (nic_i < kring->nkr_num_slots / 4 || 326 nic_i >= kring->nkr_num_slots*3/4) ? 327 0 : report_frequency; 328 reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? 329 } 330 if (reclaim_tx) { 331 /* 332 * Record completed transmissions. 333 * We (re)use the driver's txr->next_to_clean to keep 334 * track of the most recently completed transmission. 335 * 336 * The datasheet discourages the use of TDH to find 337 * out the number of sent packets, but we only set 338 * REPORT_STATUS in a few slots so TDH is the only 339 * good way. 340 */ 341 nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id)); 342 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ 343 D("TDH wrap %d", nic_i); 344 nic_i -= kring->nkr_num_slots; 345 } 346 if (nic_i != txr->next_to_clean) { 347 /* some tx completed, increment avail */ 348 txr->next_to_clean = nic_i; 349 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 350 } 351 } 352 353 return 0; 354} 355 356 357/* 358 * Reconcile kernel and user view of the receive ring. 359 * Same as for the txsync, this routine must be efficient. 360 * The caller guarantees a single invocations, but races against 361 * the rest of the driver should be handled here. 362 * 363 * On call, kring->rhead is the first packet that userspace wants 364 * to keep, and kring->rcur is the wakeup point. 365 * The kernel has previously reported packets up to kring->rtail. 366 * 367 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 368 * of whether or not we received an interrupt. 369 */ 370static int 371ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) 372{ 373 struct netmap_adapter *na = kring->na; 374 struct ifnet *ifp = na->ifp; 375 struct netmap_ring *ring = kring->ring; 376 u_int nm_i; /* index into the netmap ring */ 377 u_int nic_i; /* index into the NIC ring */ 378 u_int n; 379 u_int const lim = kring->nkr_num_slots - 1; 380 u_int const head = kring->rhead; 381 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 382 383 /* device-specific */ 384 struct adapter *adapter = ifp->if_softc; 385 struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; 386 387 if (head > lim) 388 return netmap_ring_reinit(kring); 389 390 /* XXX check sync modes */ 391 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 392 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 393 394 /* 395 * First part: import newly received packets. 396 * 397 * nm_i is the index of the next free slot in the netmap ring, 398 * nic_i is the index of the next received packet in the NIC ring, 399 * and they may differ in case if_init() has been called while 400 * in netmap mode. For the receive ring we have 401 * 402 * nic_i = rxr->next_to_check; 403 * nm_i = kring->nr_hwtail (previous) 404 * and 405 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 406 * 407 * rxr->next_to_check is set to 0 on a ring reinit 408 */ 409 if (netmap_no_pendintr || force_update) { 410 int crclen = (ix_crcstrip) ? 0 : 4; 411 uint16_t slot_flags = kring->nkr_slot_flags; 412 413 nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) 414 nm_i = netmap_idx_n2k(kring, nic_i); 415 416 for (n = 0; ; n++) { 417 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; 418 uint32_t staterr = le32toh(curr->wb.upper.status_error); 419 420 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 421 break; 422 ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; 423 ring->slot[nm_i].flags = slot_flags; 424 bus_dmamap_sync(rxr->ptag, 425 rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); 426 nm_i = nm_next(nm_i, lim); 427 nic_i = nm_next(nic_i, lim); 428 } 429 if (n) { /* update the state variables */ 430 if (netmap_no_pendintr && !force_update) { 431 /* diagnostics */ 432 ix_rx_miss ++; 433 ix_rx_miss_bufs += n; 434 } 435 rxr->next_to_check = nic_i; 436 kring->nr_hwtail = nm_i; 437 } 438 kring->nr_kflags &= ~NKR_PENDINTR; 439 } 440 441 /* 442 * Second part: skip past packets that userspace has released. 443 * (kring->nr_hwcur to kring->rhead excluded), 444 * and make the buffers available for reception. 445 * As usual nm_i is the index in the netmap ring, 446 * nic_i is the index in the NIC ring, and 447 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 448 */ 449 nm_i = kring->nr_hwcur; 450 if (nm_i != head) { 451 nic_i = netmap_idx_k2n(kring, nm_i); 452 for (n = 0; nm_i != head; n++) { 453 struct netmap_slot *slot = &ring->slot[nm_i]; 454 uint64_t paddr; 455 void *addr = PNMB(na, slot, &paddr); 456 457 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; 458 struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i]; 459 460 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 461 goto ring_reset; 462 463 if (slot->flags & NS_BUF_CHANGED) { 464 /* buffer has changed, reload map */ 465 netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr); 466 slot->flags &= ~NS_BUF_CHANGED; 467 } 468 curr->wb.upper.status_error = 0; 469 curr->read.pkt_addr = htole64(paddr); 470 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 471 BUS_DMASYNC_PREREAD); 472 nm_i = nm_next(nm_i, lim); 473 nic_i = nm_next(nic_i, lim); 474 } 475 kring->nr_hwcur = head; 476 477 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 478 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 479 /* 480 * IMPORTANT: we must leave one free slot in the ring, 481 * so move nic_i back by one unit 482 */ 483 nic_i = nm_prev(nic_i, lim); 484 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i); 485 } 486 487 return 0; 488 489ring_reset: 490 return netmap_ring_reinit(kring); 491} 492 493 494/* 495 * The attach routine, called near the end of ixgbe_attach(), 496 * fills the parameters for netmap_attach() and calls it. 497 * It cannot fail, in the worst case (such as no memory) 498 * netmap mode will be disabled and the driver will only 499 * operate in standard mode. 500 */ 501void 502ixgbe_netmap_attach(struct adapter *adapter) 503{ 504 struct netmap_adapter na; 505 506 bzero(&na, sizeof(na)); 507 508 na.ifp = adapter->ifp; 509 na.na_flags = NAF_BDG_MAYSLEEP; 510 na.num_tx_desc = adapter->num_tx_desc; 511 na.num_rx_desc = adapter->num_rx_desc; 512 na.nm_txsync = ixgbe_netmap_txsync; 513 na.nm_rxsync = ixgbe_netmap_rxsync; 514 na.nm_register = ixgbe_netmap_reg; 515 na.num_tx_rings = na.num_rx_rings = adapter->num_queues; 516 netmap_attach(&na); 517} 518 519#endif /* DEV_NETMAP */ 520 521/* end of file */ 522