1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26/* 27 * $FreeBSD: stable/10/sys/dev/ixgbe/ixgbe_netmap.c 323830 2017-09-20 21:22:20Z marius $ 28 * 29 * netmap support for: ixgbe 30 * 31 * This file is meant to be a reference on how to implement 32 * netmap support for a network driver. 33 * This file contains code but only static or inline functions used 34 * by a single driver. To avoid replication of code we just #include 35 * it near the beginning of the standard driver. 36 */ 37 38#ifdef DEV_NETMAP 39/* 40 * Some drivers may need the following headers. Others 41 * already include them by default 42 43#include <vm/vm.h> 44#include <vm/pmap.h> 45 46 */ 47#include "ixgbe.h" 48 49/* 50 * device-specific sysctl variables: 51 * 52 * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. 53 * During regular operations the CRC is stripped, but on some 54 * hardware reception of frames not multiple of 64 is slower, 55 * so using crcstrip=0 helps in benchmarks. 56 * 57 * ix_rx_miss, ix_rx_miss_bufs: 58 * count packets that might be missed due to lost interrupts. 59 */ 60SYSCTL_DECL(_dev_netmap); 61static int ix_rx_miss, ix_rx_miss_bufs; 62int ix_crcstrip; 63SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip, 64 CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames"); 65SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss, 66 CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr"); 67SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs, 68 CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs"); 69 70 71static void 72set_crcstrip(struct ixgbe_hw *hw, int onoff) 73{ 74 /* crc stripping is set in two places: 75 * IXGBE_HLREG0 (modified on init_locked and hw reset) 76 * IXGBE_RDRXCTL (set by the original driver in 77 * ixgbe_setup_hw_rsc() called in init_locked. 78 * We disable the setting when netmap is compiled in). 79 * We update the values here, but also in ixgbe.c because 80 * init_locked sometimes is called outside our control. 81 */ 82 uint32_t hl, rxc; 83 84 hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); 85 rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 86 if (netmap_verbose) 87 D("%s read HLREG 0x%x rxc 0x%x", 88 onoff ? "enter" : "exit", hl, rxc); 89 /* hw requirements ... */ 90 rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 91 rxc |= IXGBE_RDRXCTL_RSCACKC; 92 if (onoff && !ix_crcstrip) { 93 /* keep the crc. Fast rx */ 94 hl &= ~IXGBE_HLREG0_RXCRCSTRP; 95 rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; 96 } else { 97 /* reset default mode */ 98 hl |= IXGBE_HLREG0_RXCRCSTRP; 99 rxc |= IXGBE_RDRXCTL_CRCSTRIP; 100 } 101 if (netmap_verbose) 102 D("%s write HLREG 0x%x rxc 0x%x", 103 onoff ? "enter" : "exit", hl, rxc); 104 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); 105 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); 106} 107 108 109/* 110 * Register/unregister. We are already under netmap lock. 111 * Only called on the first register or the last unregister. 112 */ 113static int 114ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) 115{ 116 struct ifnet *ifp = na->ifp; 117 struct adapter *adapter = ifp->if_softc; 118 119 IXGBE_CORE_LOCK(adapter); 120 adapter->stop_locked(adapter); 121 122 set_crcstrip(&adapter->hw, onoff); 123 /* enable or disable flags and callbacks in na and ifp */ 124 if (onoff) { 125 nm_set_native_flags(na); 126 } else { 127 nm_clear_native_flags(na); 128 } 129 adapter->init_locked(adapter); /* also enables intr */ 130 set_crcstrip(&adapter->hw, onoff); // XXX why twice ? 131 IXGBE_CORE_UNLOCK(adapter); 132 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); 133} 134 135 136/* 137 * Reconcile kernel and user view of the transmit ring. 138 * 139 * All information is in the kring. 140 * Userspace wants to send packets up to the one before kring->rhead, 141 * kernel knows kring->nr_hwcur is the first unsent packet. 142 * 143 * Here we push packets out (as many as possible), and possibly 144 * reclaim buffers from previously completed transmission. 145 * 146 * The caller (netmap) guarantees that there is only one instance 147 * running at any time. Any interference with other driver 148 * methods should be handled by the individual drivers. 149 */ 150static int 151ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) 152{ 153 struct netmap_adapter *na = kring->na; 154 struct ifnet *ifp = na->ifp; 155 struct netmap_ring *ring = kring->ring; 156 u_int nm_i; /* index into the netmap ring */ 157 u_int nic_i; /* index into the NIC ring */ 158 u_int n; 159 u_int const lim = kring->nkr_num_slots - 1; 160 u_int const head = kring->rhead; 161 /* 162 * interrupts on every tx packet are expensive so request 163 * them every half ring, or where NS_REPORT is set 164 */ 165 u_int report_frequency = kring->nkr_num_slots >> 1; 166 167 /* device-specific */ 168 struct adapter *adapter = ifp->if_softc; 169 struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; 170 int reclaim_tx; 171 172 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 173 BUS_DMASYNC_POSTREAD); 174 175 /* 176 * First part: process new packets to send. 177 * nm_i is the current index in the netmap ring, 178 * nic_i is the corresponding index in the NIC ring. 179 * The two numbers differ because upon a *_init() we reset 180 * the NIC ring but leave the netmap ring unchanged. 181 * For the transmit ring, we have 182 * 183 * nm_i = kring->nr_hwcur 184 * nic_i = IXGBE_TDT (not tracked in the driver) 185 * and 186 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 187 * 188 * In this driver kring->nkr_hwofs >= 0, but for other 189 * drivers it might be negative as well. 190 */ 191 192 /* 193 * If we have packets to send (kring->nr_hwcur != kring->rhead) 194 * iterate over the netmap ring, fetch length and update 195 * the corresponding slot in the NIC ring. Some drivers also 196 * need to update the buffer's physical address in the NIC slot 197 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 198 * 199 * The netmap_reload_map() calls is especially expensive, 200 * even when (as in this case) the tag is 0, so do only 201 * when the buffer has actually changed. 202 * 203 * If possible do not set the report/intr bit on all slots, 204 * but only a few times per ring or when NS_REPORT is set. 205 * 206 * Finally, on 10G and faster drivers, it might be useful 207 * to prefetch the next slot and txr entry. 208 */ 209 210 nm_i = kring->nr_hwcur; 211 if (nm_i != head) { /* we have new packets to send */ 212 nic_i = netmap_idx_k2n(kring, nm_i); 213 214 __builtin_prefetch(&ring->slot[nm_i]); 215 __builtin_prefetch(&txr->tx_buffers[nic_i]); 216 217 for (n = 0; nm_i != head; n++) { 218 struct netmap_slot *slot = &ring->slot[nm_i]; 219 u_int len = slot->len; 220 uint64_t paddr; 221 void *addr = PNMB(na, slot, &paddr); 222 223 /* device-specific */ 224 union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i]; 225 struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i]; 226 int flags = (slot->flags & NS_REPORT || 227 nic_i == 0 || nic_i == report_frequency) ? 228 IXGBE_TXD_CMD_RS : 0; 229 230 /* prefetch for next round */ 231 __builtin_prefetch(&ring->slot[nm_i + 1]); 232 __builtin_prefetch(&txr->tx_buffers[nic_i + 1]); 233 234 NM_CHECK_ADDR_LEN(na, addr, len); 235 236 if (slot->flags & NS_BUF_CHANGED) { 237 /* buffer has changed, reload map */ 238 netmap_reload_map(na, txr->txtag, txbuf->map, addr); 239 } 240 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 241 242 /* Fill the slot in the NIC ring. */ 243 /* Use legacy descriptor, they are faster? */ 244 curr->read.buffer_addr = htole64(paddr); 245 curr->read.olinfo_status = 0; 246 curr->read.cmd_type_len = htole32(len | flags | 247 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); 248 249 /* make sure changes to the buffer are synced */ 250 bus_dmamap_sync(txr->txtag, txbuf->map, 251 BUS_DMASYNC_PREWRITE); 252 253 nm_i = nm_next(nm_i, lim); 254 nic_i = nm_next(nic_i, lim); 255 } 256 kring->nr_hwcur = head; 257 258 /* synchronize the NIC ring */ 259 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 260 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 261 262 /* (re)start the tx unit up to slot nic_i (excluded) */ 263 IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i); 264 } 265 266 /* 267 * Second part: reclaim buffers for completed transmissions. 268 * Because this is expensive (we read a NIC register etc.) 269 * we only do it in specific cases (see below). 270 */ 271 if (flags & NAF_FORCE_RECLAIM) { 272 reclaim_tx = 1; /* forced reclaim */ 273 } else if (!nm_kr_txempty(kring)) { 274 reclaim_tx = 0; /* have buffers, no reclaim */ 275 } else { 276 /* 277 * No buffers available. Locate previous slot with 278 * REPORT_STATUS set. 279 * If the slot has DD set, we can reclaim space, 280 * otherwise wait for the next interrupt. 281 * This enables interrupt moderation on the tx 282 * side though it might reduce throughput. 283 */ 284 struct ixgbe_legacy_tx_desc *txd = 285 (struct ixgbe_legacy_tx_desc *)txr->tx_base; 286 287 nic_i = txr->next_to_clean + report_frequency; 288 if (nic_i > lim) 289 nic_i -= lim + 1; 290 // round to the closest with dd set 291 nic_i = (nic_i < kring->nkr_num_slots / 4 || 292 nic_i >= kring->nkr_num_slots*3/4) ? 293 0 : report_frequency; 294 reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? 295 } 296 if (reclaim_tx) { 297 /* 298 * Record completed transmissions. 299 * We (re)use the driver's txr->next_to_clean to keep 300 * track of the most recently completed transmission. 301 * 302 * The datasheet discourages the use of TDH to find 303 * out the number of sent packets, but we only set 304 * REPORT_STATUS in a few slots so TDH is the only 305 * good way. 306 */ 307 nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id)); 308 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ 309 D("TDH wrap %d", nic_i); 310 nic_i -= kring->nkr_num_slots; 311 } 312 if (nic_i != txr->next_to_clean) { 313 /* some tx completed, increment avail */ 314 txr->next_to_clean = nic_i; 315 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 316 } 317 } 318 319 nm_txsync_finalize(kring); 320 321 return 0; 322} 323 324 325/* 326 * Reconcile kernel and user view of the receive ring. 327 * Same as for the txsync, this routine must be efficient. 328 * The caller guarantees a single invocations, but races against 329 * the rest of the driver should be handled here. 330 * 331 * On call, kring->rhead is the first packet that userspace wants 332 * to keep, and kring->rcur is the wakeup point. 333 * The kernel has previously reported packets up to kring->rtail. 334 * 335 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 336 * of whether or not we received an interrupt. 337 */ 338static int 339ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) 340{ 341 struct netmap_adapter *na = kring->na; 342 struct ifnet *ifp = na->ifp; 343 struct netmap_ring *ring = kring->ring; 344 u_int nm_i; /* index into the netmap ring */ 345 u_int nic_i; /* index into the NIC ring */ 346 u_int n; 347 u_int const lim = kring->nkr_num_slots - 1; 348 u_int const head = nm_rxsync_prologue(kring); 349 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 350 351 /* device-specific */ 352 struct adapter *adapter = ifp->if_softc; 353 struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; 354 355 if (head > lim) 356 return netmap_ring_reinit(kring); 357 358 /* XXX check sync modes */ 359 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 360 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 361 362 /* 363 * First part: import newly received packets. 364 * 365 * nm_i is the index of the next free slot in the netmap ring, 366 * nic_i is the index of the next received packet in the NIC ring, 367 * and they may differ in case if_init() has been called while 368 * in netmap mode. For the receive ring we have 369 * 370 * nic_i = rxr->next_to_check; 371 * nm_i = kring->nr_hwtail (previous) 372 * and 373 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 374 * 375 * rxr->next_to_check is set to 0 on a ring reinit 376 */ 377 if (netmap_no_pendintr || force_update) { 378 int crclen = (ix_crcstrip) ? 0 : 4; 379 uint16_t slot_flags = kring->nkr_slot_flags; 380 381 nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) 382 nm_i = netmap_idx_n2k(kring, nic_i); 383 384 for (n = 0; ; n++) { 385 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; 386 uint32_t staterr = le32toh(curr->wb.upper.status_error); 387 388 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 389 break; 390 ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; 391 ring->slot[nm_i].flags = slot_flags; 392 bus_dmamap_sync(rxr->ptag, 393 rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); 394 nm_i = nm_next(nm_i, lim); 395 nic_i = nm_next(nic_i, lim); 396 } 397 if (n) { /* update the state variables */ 398 if (netmap_no_pendintr && !force_update) { 399 /* diagnostics */ 400 ix_rx_miss ++; 401 ix_rx_miss_bufs += n; 402 } 403 rxr->next_to_check = nic_i; 404 kring->nr_hwtail = nm_i; 405 } 406 kring->nr_kflags &= ~NKR_PENDINTR; 407 } 408 409 /* 410 * Second part: skip past packets that userspace has released. 411 * (kring->nr_hwcur to kring->rhead excluded), 412 * and make the buffers available for reception. 413 * As usual nm_i is the index in the netmap ring, 414 * nic_i is the index in the NIC ring, and 415 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 416 */ 417 nm_i = kring->nr_hwcur; 418 if (nm_i != head) { 419 nic_i = netmap_idx_k2n(kring, nm_i); 420 for (n = 0; nm_i != head; n++) { 421 struct netmap_slot *slot = &ring->slot[nm_i]; 422 uint64_t paddr; 423 void *addr = PNMB(na, slot, &paddr); 424 425 union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; 426 struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i]; 427 428 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 429 goto ring_reset; 430 431 if (slot->flags & NS_BUF_CHANGED) { 432 /* buffer has changed, reload map */ 433 netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr); 434 slot->flags &= ~NS_BUF_CHANGED; 435 } 436 curr->wb.upper.status_error = 0; 437 curr->read.pkt_addr = htole64(paddr); 438 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 439 BUS_DMASYNC_PREREAD); 440 nm_i = nm_next(nm_i, lim); 441 nic_i = nm_next(nic_i, lim); 442 } 443 kring->nr_hwcur = head; 444 445 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 446 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 447 /* 448 * IMPORTANT: we must leave one free slot in the ring, 449 * so move nic_i back by one unit 450 */ 451 nic_i = nm_prev(nic_i, lim); 452 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i); 453 } 454 455 /* tell userspace that there might be new packets */ 456 nm_rxsync_finalize(kring); 457 458 return 0; 459 460ring_reset: 461 return netmap_ring_reinit(kring); 462} 463 464 465/* 466 * The attach routine, called near the end of ixgbe_attach(), 467 * fills the parameters for netmap_attach() and calls it. 468 * It cannot fail, in the worst case (such as no memory) 469 * netmap mode will be disabled and the driver will only 470 * operate in standard mode. 471 */ 472void 473ixgbe_netmap_attach(struct adapter *adapter) 474{ 475 struct netmap_adapter na; 476 477 bzero(&na, sizeof(na)); 478 479 na.ifp = adapter->ifp; 480 na.na_flags = NAF_BDG_MAYSLEEP; 481 na.num_tx_desc = adapter->num_tx_desc; 482 na.num_rx_desc = adapter->num_rx_desc; 483 na.nm_txsync = ixgbe_netmap_txsync; 484 na.nm_rxsync = ixgbe_netmap_rxsync; 485 na.nm_register = ixgbe_netmap_reg; 486 na.num_tx_rings = na.num_rx_rings = adapter->num_queues; 487 netmap_attach(&na); 488} 489 490#endif /* DEV_NETMAP */ 491 492/* end of file */ 493