1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26/* 27 * $FreeBSD: stable/11/sys/dev/netmap/ixgbe_netmap.h 343771 2019-02-05 10:33:22Z vmaffione $ 28 * 29 * netmap support for: ixgbe (both ix and ixv) 30 * 31 * This file is meant to be a reference on how to implement 32 * netmap support for a network driver. 33 * This file contains code but only static or inline functions used 34 * by a single driver. To avoid replication of code we just #include 35 * it near the beginning of the standard driver. 36 */ 37 38 39#include <net/netmap.h> 40#include <sys/selinfo.h> 41/* 42 * Some drivers may need the following headers. Others 43 * already include them by default 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47 48 */ 49#include <dev/netmap/netmap_kern.h> 50 51void ixgbe_netmap_attach(struct adapter *adapter); 52 53/* 54 * device-specific sysctl variables: 55 * 56 * ix_crcstrip: 0: NIC keeps CRC in rx frames (default), 1: NIC strips it. 57 * During regular operations the CRC is stripped, but on some 58 * hardware reception of frames not multiple of 64 is slower, 59 * so using crcstrip=0 helps in benchmarks. 60 * 61 * ix_rx_miss, ix_rx_miss_bufs: 62 * count packets that might be missed due to lost interrupts. 63 */ 64SYSCTL_DECL(_dev_netmap); 65static int ix_rx_miss, ix_rx_miss_bufs; 66int ix_crcstrip; 67SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip, 68 CTLFLAG_RW, &ix_crcstrip, 0, "NIC strips CRC on rx frames"); 69SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss, 70 CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr"); 71SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs, 72 CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs"); 73 74 75static void 76set_crcstrip(struct ixgbe_hw *hw, int onoff) 77{ 78 /* crc stripping is set in two places: 79 * IXGBE_HLREG0 (modified on init_locked and hw reset) 80 * IXGBE_RDRXCTL (set by the original driver in 81 * ixgbe_setup_hw_rsc() called in init_locked. 82 * We disable the setting when netmap is compiled in). 83 * We update the values here, but also in ixgbe.c because 84 * init_locked sometimes is called outside our control. 85 */ 86 uint32_t hl, rxc; 87 88 hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); 89 rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 90 if (netmap_verbose) 91 nm_prinf("%s read HLREG 0x%x rxc 0x%x", 92 onoff ? "enter" : "exit", hl, rxc); 93 /* hw requirements ... */ 94 rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 95 rxc |= IXGBE_RDRXCTL_RSCACKC; 96 if (onoff && !ix_crcstrip) { 97 /* keep the crc. Fast rx */ 98 hl &= ~IXGBE_HLREG0_RXCRCSTRP; 99 rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; 100 } else { 101 /* reset default mode */ 102 hl |= IXGBE_HLREG0_RXCRCSTRP; 103 rxc |= IXGBE_RDRXCTL_CRCSTRIP; 104 } 105 if (netmap_verbose) 106 nm_prinf("%s write HLREG 0x%x rxc 0x%x", 107 onoff ? "enter" : "exit", hl, rxc); 108 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); 109 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); 110} 111 112static void 113ixgbe_netmap_intr(struct netmap_adapter *na, int onoff) 114{ 115 struct ifnet *ifp = na->ifp; 116 struct adapter *adapter = ifp->if_softc; 117 118 IXGBE_CORE_LOCK(adapter); 119 if (onoff) { 120 ixgbe_enable_intr(adapter); // XXX maybe ixgbe_stop ? 121 } else { 122 ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ? 123 } 124 IXGBE_CORE_UNLOCK(adapter); 125} 126 127/* 128 * Register/unregister. We are already under netmap lock. 129 * Only called on the first register or the last unregister. 130 */ 131static int 132ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) 133{ 134 struct ifnet *ifp = na->ifp; 135 struct adapter *adapter = ifp->if_softc; 136 137 IXGBE_CORE_LOCK(adapter); 138 adapter->stop_locked(adapter); 139 140 if (!IXGBE_IS_VF(adapter)) 141 set_crcstrip(&adapter->hw, onoff); 142 /* enable or disable flags and callbacks in na and ifp */ 143 if (onoff) { 144 nm_set_native_flags(na); 145 } else { 146 nm_clear_native_flags(na); 147 } 148 adapter->init_locked(adapter); /* also enables intr */ 149 if (!IXGBE_IS_VF(adapter)) 150 set_crcstrip(&adapter->hw, onoff); // XXX why twice ? 151 IXGBE_CORE_UNLOCK(adapter); 152 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); 153} 154 155 156/* 157 * Reconcile kernel and user view of the transmit ring. 158 * 159 * All information is in the kring. 160 * Userspace wants to send packets up to the one before kring->rhead, 161 * kernel knows kring->nr_hwcur is the first unsent packet. 162 * 163 * Here we push packets out (as many as possible), and possibly 164 * reclaim buffers from previously completed transmission. 165 * 166 * The caller (netmap) guarantees that there is only one instance 167 * running at any time. Any interference with other driver 168 * methods should be handled by the individual drivers. 169 */ 170static int 171ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) 172{ 173 struct netmap_adapter *na = kring->na; 174 struct ifnet *ifp = na->ifp; 175 struct netmap_ring *ring = kring->ring; 176 u_int nm_i; /* index into the netmap ring */ 177 u_int nic_i; /* index into the NIC ring */ 178 u_int n; 179 u_int const lim = kring->nkr_num_slots - 1; 180 u_int const head = kring->rhead; 181 /* 182 * interrupts on every tx packet are expensive so request 183 * them every half ring, or where NS_REPORT is set 184 */ 185 u_int report_frequency = kring->nkr_num_slots >> 1; 186 187 /* device-specific */ 188 struct adapter *adapter = ifp->if_softc; 189 struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; 190 int reclaim_tx; 191 192 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 193 BUS_DMASYNC_POSTREAD); 194 195 /* 196 * First part: process new packets to send. 197 * nm_i is the current index in the netmap ring, 198 * nic_i is the corresponding index in the NIC ring. 199 * The two numbers differ because upon a *_init() we reset 200 * the NIC ring but leave the netmap ring unchanged. 201 * For the transmit ring, we have 202 * 203 * nm_i = kring->nr_hwcur 204 * nic_i = IXGBE_TDT (not tracked in the driver) 205 * and 206 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 207 * 208 * In this driver kring->nkr_hwofs >= 0, but for other 209 * drivers it might be negative as well. 210 */ 211 212 /* 213 * If we have packets to send (kring->nr_hwcur != kring->rhead) 214 * iterate over the netmap ring, fetch length and update 215 * the corresponding slot in the NIC ring. Some drivers also 216 * need to update the buffer's physical address in the NIC slot 217 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 218 * 219 * The netmap_reload_map() calls is especially expensive, 220 * even when (as in this case) the tag is 0, so do only 221 * when the buffer has actually changed. 222 * 223 * If possible do not set the report/intr bit on all slots, 224 * but only a few times per ring or when NS_REPORT is set. 225 * 226 * Finally, on 10G and faster drivers, it might be useful 227 * to prefetch the next slot and txr entry. 228 */ 229 230 nm_i = kring->nr_hwcur; 231 if (nm_i != head) { /* we have new packets to send */ 232 nic_i = netmap_idx_k2n(kring, nm_i); 233 234 __builtin_prefetch(&ring->slot[nm_i]); 235 __builtin_prefetch(&txr->tx_buffers[nic_i]); 236 237 for (n = 0; nm_i != head; n++) { 238 struct netmap_slot *slot = &ring->slot[nm_i]; 239 u_int len = slot->len; 240 uint64_t paddr; 241 void *addr = PNMB(na, slot, &paddr); 242 243 /* device-specific */ 244 union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i]; 245 struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i]; 246 int flags = (slot->flags & NS_REPORT || 247 nic_i == 0 || nic_i == report_frequency) ? 248 IXGBE_TXD_CMD_RS : 0; 249 250 /* prefetch for next round */ 251 __builtin_prefetch(&ring->slot[nm_i + 1]); 252 __builtin_prefetch(&txr->tx_buffers[nic_i + 1]); 253 254 NM_CHECK_ADDR_LEN(na, addr, len); 255 256 if (slot->flags & NS_BUF_CHANGED) { 257 /* buffer has changed, reload map */ 258 netmap_reload_map(na, txr->txtag, txbuf->map, addr); 259 } 260 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 261 262 /* Fill the slot in the NIC ring. */ 263 /* Use legacy descriptor, they are faster? */ 264 curr->read.buffer_addr = htole64(paddr); 265 curr->read.olinfo_status = 0; 266 curr->read.cmd_type_len = htole32(len | flags | 267 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); 268 269 /* make sure changes to the buffer are synced */ 270 bus_dmamap_sync(txr->txtag, txbuf->map, 271 BUS_DMASYNC_PREWRITE); 272 273 nm_i = nm_next(nm_i, lim); 274 nic_i = nm_next(nic_i, lim); 275 } 276 kring->nr_hwcur = head; 277 278 /* synchronize the NIC ring */ 279 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 280 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 281 282 /* (re)start the tx unit up to slot nic_i (excluded) */ 283 IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i); 284 } 285 286 /* 287 * Second part: reclaim buffers for completed transmissions. 288 * Because this is expensive (we read a NIC register etc.) 289 * we only do it in specific cases (see below). 290 */ 291 if (flags & NAF_FORCE_RECLAIM) { 292 reclaim_tx = 1; /* forced reclaim */ 293 } else if (!nm_kr_txempty(kring)) { 294 reclaim_tx = 0; /* have buffers, no reclaim */ 295 } else { 296 /* 297 * No buffers available. Locate previous slot with 298 * REPORT_STATUS set. 299 * If the slot has DD set, we can reclaim space, 300 * otherwise wait for the next interrupt. 301 * This enables interrupt moderation on the tx 302 * side though it might reduce throughput. 303 */ 304 struct ixgbe_legacy_tx_desc *txd = 305 (struct ixgbe_legacy_tx_desc *)txr->tx_base; 306 307 nic_i = txr->next_to_clean + report_frequency; 308 if (nic_i > lim) 309 nic_i -= lim + 1; 310 // round to the closest with dd set 311 nic_i = (nic_i < kring->nkr_num_slots / 4 || 312 nic_i >= kring->nkr_num_slots*3/4) ? 313 0 : report_frequency; 314 reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? 315 } 316 if (reclaim_tx) { 317 /* 318 * Record completed transmissions. 319 * We (re)use the driver's txr->next_to_clean to keep 320 * track of the most recently completed transmission. 321 * 322 * The datasheet discourages the use of TDH to find 323 * out the number of sent packets, but we only set 324 * REPORT_STATUS in a few slots so TDH is the only 325 * good way. 326 */ 327 nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_IS_VF(adapter) ? 328 IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id)); 329 if (unlikely(nic_i >= kring->nkr_num_slots)) { 330 nm_prerr("TDH wrap at idx %d", nic_i); 331 nic_i -= kring->nkr_num_slots; 332 } 333 if (nic_i != txr->next_to_clean) { 334 /* some tx completed, increment avail */ 335 txr->next_to_clean = nic_i; 336 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 337 } 338 } 339 340 return 0; 341} 342 343 344/* 345 * Reconcile kernel and user view of the receive ring. 346 * Same as for the txsync, this routine must be efficient. 347 * The caller guarantees a single invocations, but races against 348 * the rest of the driver should be handled here. 349 * 350 * On call, kring->rhead is the first packet that userspace wants 351 * to keep, and kring->rcur is the wakeup point. 352 * The kernel has previously reported packets up to kring->rtail. 353 * 354 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 355 * of whether or not we received an interrupt. 356 */ 357static int 358ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) 359{ 360 struct netmap_adapter *na = kring->na; 361 struct ifnet *ifp = na->ifp; 362 struct netmap_ring *ring = kring->ring; 363 u_int nm_i; /* index into the netmap ring */ 364 u_int nic_i; /* index into the NIC ring */ 365 u_int n; 366 u_int const lim = kring->nkr_num_slots - 1; 367 u_int const head = kring->rhead; 368 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 369 370 /* device-specific */ 371 struct adapter *adapter = ifp->if_softc; 372 struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; 373 374 if (head > lim) 375 return netmap_ring_reinit(kring); 376 377 /* XXX check sync modes */ 378 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 379 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 380 381 /* 382 * First part: import newly received packets. 383 * 384 * nm_i is the index of the next free slot in the netmap ring, 385 * nic_i is the index of the next received packet in the NIC ring, 386 * and they may differ in case if_init() has been called while 387 * in netmap mode. For the receive ring we have 388 * 389 * nic_i = rxr->next_to_check; 390 * nm_i = kring->nr_hwtail (previous) 391 * and 392 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 393 * 394 * rxr->next_to_check is set to 0 on a ring reinit 395 */ 396 if (netmap_no_pendintr || force_update) { 397 int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4; 398 399 nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) 400 nm_i = netmap_idx_n2k(kring, nic_i); 401 402 for (n = 0; ; n++) { 403 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; 404 uint32_t staterr = le32toh(curr->wb.upper.status_error); 405 406 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 407 break; 408 ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; 409 ring->slot[nm_i].flags = 0; 410 bus_dmamap_sync(rxr->ptag, 411 rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); 412 nm_i = nm_next(nm_i, lim); 413 nic_i = nm_next(nic_i, lim); 414 } 415 if (n) { /* update the state variables */ 416 if (netmap_no_pendintr && !force_update) { 417 /* diagnostics */ 418 ix_rx_miss ++; 419 ix_rx_miss_bufs += n; 420 } 421 rxr->next_to_check = nic_i; 422 kring->nr_hwtail = nm_i; 423 } 424 kring->nr_kflags &= ~NKR_PENDINTR; 425 } 426 427 /* 428 * Second part: skip past packets that userspace has released. 429 * (kring->nr_hwcur to kring->rhead excluded), 430 * and make the buffers available for reception. 431 * As usual nm_i is the index in the netmap ring, 432 * nic_i is the index in the NIC ring, and 433 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 434 */ 435 nm_i = kring->nr_hwcur; 436 if (nm_i != head) { 437 nic_i = netmap_idx_k2n(kring, nm_i); 438 for (n = 0; nm_i != head; n++) { 439 struct netmap_slot *slot = &ring->slot[nm_i]; 440 uint64_t paddr; 441 void *addr = PNMB(na, slot, &paddr); 442 443 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; 444 struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i]; 445 446 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 447 goto ring_reset; 448 449 if (slot->flags & NS_BUF_CHANGED) { 450 /* buffer has changed, reload map */ 451 netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr); 452 slot->flags &= ~NS_BUF_CHANGED; 453 } 454 curr->wb.upper.status_error = 0; 455 curr->read.pkt_addr = htole64(paddr); 456 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 457 BUS_DMASYNC_PREREAD); 458 nm_i = nm_next(nm_i, lim); 459 nic_i = nm_next(nic_i, lim); 460 } 461 kring->nr_hwcur = head; 462 463 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 464 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 465 /* 466 * IMPORTANT: we must leave one free slot in the ring, 467 * so move nic_i back by one unit 468 */ 469 nic_i = nm_prev(nic_i, lim); 470 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i); 471 } 472 473 return 0; 474 475ring_reset: 476 return netmap_ring_reinit(kring); 477} 478 479 480/* 481 * The attach routine, called near the end of ixgbe_attach(), 482 * fills the parameters for netmap_attach() and calls it. 483 * It cannot fail, in the worst case (such as no memory) 484 * netmap mode will be disabled and the driver will only 485 * operate in standard mode. 486 */ 487void 488ixgbe_netmap_attach(struct adapter *adapter) 489{ 490 struct netmap_adapter na; 491 492 bzero(&na, sizeof(na)); 493 494 na.ifp = adapter->ifp; 495 na.na_flags = NAF_BDG_MAYSLEEP; 496 na.num_tx_desc = adapter->num_tx_desc; 497 na.num_rx_desc = adapter->num_rx_desc; 498 na.nm_txsync = ixgbe_netmap_txsync; 499 na.nm_rxsync = ixgbe_netmap_rxsync; 500 na.nm_register = ixgbe_netmap_reg; 501 na.num_tx_rings = na.num_rx_rings = adapter->num_queues; 502 na.nm_intr = ixgbe_netmap_intr; 503 netmap_attach(&na); 504} 505 506/* end of file */ 507