ix_txrx.c revision 283620
1/****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32******************************************************************************/ 33/*$FreeBSD: stable/10/sys/dev/ixgbe/ix_txrx.c 283620 2015-05-27 17:44:11Z erj $*/ 34 35 36#ifndef IXGBE_STANDALONE_BUILD 37#include "opt_inet.h" 38#include "opt_inet6.h" 39#endif 40 41#include "ixgbe.h" 42 43#ifdef DEV_NETMAP 44#include <net/netmap.h> 45#include <sys/selinfo.h> 46#include <dev/netmap/netmap_kern.h> 47 48extern int ix_crcstrip; 49#endif 50 51/* 52** HW RSC control: 53** this feature only works with 54** IPv4, and only on 82599 and later. 55** Also this will cause IP forwarding to 56** fail and that can't be controlled by 57** the stack as LRO can. For all these 58** reasons I've deemed it best to leave 59** this off and not bother with a tuneable 60** interface, this would need to be compiled 61** to enable. 62*/ 63static bool ixgbe_rsc_enable = FALSE; 64 65#ifdef IXGBE_FDIR 66/* 67** For Flow Director: this is the 68** number of TX packets we sample 69** for the filter pool, this means 70** every 20th packet will be probed. 71** 72** This feature can be disabled by 73** setting this to 0. 74*/ 75static int atr_sample_rate = 20; 76#endif 77 78/* Shared PCI config read/write */ 79inline u16 80ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) 81{ 82 u16 value; 83 84 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 85 reg, 2); 86 87 return (value); 88} 89 90inline void 91ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) 92{ 93 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 94 reg, value, 2); 95 96 return; 97} 98 99/********************************************************************* 100 * Local Function prototypes 101 *********************************************************************/ 102static void ixgbe_setup_transmit_ring(struct tx_ring *); 103static void ixgbe_free_transmit_buffers(struct tx_ring *); 104static int ixgbe_setup_receive_ring(struct rx_ring *); 105static void ixgbe_free_receive_buffers(struct rx_ring *); 106 107static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 108static void ixgbe_refresh_mbufs(struct rx_ring *, int); 109static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 110static int ixgbe_tx_ctx_setup(struct tx_ring *, 111 struct mbuf *, u32 *, u32 *); 112static int ixgbe_tso_setup(struct tx_ring *, 113 struct mbuf *, u32 *, u32 *); 114#ifdef IXGBE_FDIR 115static void ixgbe_atr(struct tx_ring *, struct mbuf *); 116#endif 117static __inline void ixgbe_rx_discard(struct rx_ring *, int); 118static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 119 struct mbuf *, u32); 120 121#ifdef IXGBE_LEGACY_TX 122/********************************************************************* 123 * Transmit entry point 124 * 125 * ixgbe_start is called by the stack to initiate a transmit. 126 * The driver will remain in this routine as long as there are 127 * packets to transmit and transmit resources are available. 128 * In case resources are not available stack is notified and 129 * the packet is requeued. 130 **********************************************************************/ 131 132void 133ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 134{ 135 struct mbuf *m_head; 136 struct adapter *adapter = txr->adapter; 137 138 IXGBE_TX_LOCK_ASSERT(txr); 139 140 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 141 return; 142 if (!adapter->link_active) 143 return; 144 145 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 146 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 147 break; 148 149 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 150 if (m_head == NULL) 151 break; 152 153 if (ixgbe_xmit(txr, &m_head)) { 154 if (m_head != NULL) 155 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 156 break; 157 } 158 /* Send a copy of the frame to the BPF listener */ 159 ETHER_BPF_MTAP(ifp, m_head); 160 } 161 return; 162} 163 164/* 165 * Legacy TX start - called by the stack, this 166 * always uses the first tx ring, and should 167 * not be used with multiqueue tx enabled. 168 */ 169void 170ixgbe_start(struct ifnet *ifp) 171{ 172 struct adapter *adapter = ifp->if_softc; 173 struct tx_ring *txr = adapter->tx_rings; 174 175 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 176 IXGBE_TX_LOCK(txr); 177 ixgbe_start_locked(txr, ifp); 178 IXGBE_TX_UNLOCK(txr); 179 } 180 return; 181} 182 183#else /* ! IXGBE_LEGACY_TX */ 184 185/* 186** Multiqueue Transmit driver 187** 188*/ 189int 190ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 191{ 192 struct adapter *adapter = ifp->if_softc; 193 struct ix_queue *que; 194 struct tx_ring *txr; 195 int i, err = 0; 196 197 /* 198 * When doing RSS, map it to the same outbound queue 199 * as the incoming flow would be mapped to. 200 * 201 * If everything is setup correctly, it should be the 202 * same bucket that the current CPU we're on is. 203 */ 204 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 205 i = m->m_pkthdr.flowid % adapter->num_queues; 206 else 207 i = curcpu % adapter->num_queues; 208 209 /* Check for a hung queue and pick alternative */ 210 if (((1 << i) & adapter->active_queues) == 0) 211 i = ffsl(adapter->active_queues); 212 213 txr = &adapter->tx_rings[i]; 214 que = &adapter->queues[i]; 215 216 err = drbr_enqueue(ifp, txr->br, m); 217 if (err) 218 return (err); 219 if (IXGBE_TX_TRYLOCK(txr)) { 220 ixgbe_mq_start_locked(ifp, txr); 221 IXGBE_TX_UNLOCK(txr); 222 } else 223 taskqueue_enqueue(que->tq, &txr->txq_task); 224 225 return (0); 226} 227 228int 229ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 230{ 231 struct adapter *adapter = txr->adapter; 232 struct mbuf *next; 233 int enqueued = 0, err = 0; 234 235 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 236 adapter->link_active == 0) 237 return (ENETDOWN); 238 239 /* Process the queue */ 240#if __FreeBSD_version < 901504 241 next = drbr_dequeue(ifp, txr->br); 242 while (next != NULL) { 243 if ((err = ixgbe_xmit(txr, &next)) != 0) { 244 if (next != NULL) 245 err = drbr_enqueue(ifp, txr->br, next); 246#else 247 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 248 if ((err = ixgbe_xmit(txr, &next)) != 0) { 249 if (next == NULL) { 250 drbr_advance(ifp, txr->br); 251 } else { 252 drbr_putback(ifp, txr->br, next); 253 } 254#endif 255 break; 256 } 257#if __FreeBSD_version >= 901504 258 drbr_advance(ifp, txr->br); 259#endif 260 enqueued++; 261#if 0 // this is VF-only 262#if __FreeBSD_version >= 1100036 263 /* 264 * Since we're looking at the tx ring, we can check 265 * to see if we're a VF by examing our tail register 266 * address. 267 */ 268 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) 269 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 270#endif 271#endif 272 /* Send a copy of the frame to the BPF listener */ 273 ETHER_BPF_MTAP(ifp, next); 274 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 275 break; 276#if __FreeBSD_version < 901504 277 next = drbr_dequeue(ifp, txr->br); 278#endif 279 } 280 281 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 282 ixgbe_txeof(txr); 283 284 return (err); 285} 286 287/* 288 * Called from a taskqueue to drain queued transmit packets. 289 */ 290void 291ixgbe_deferred_mq_start(void *arg, int pending) 292{ 293 struct tx_ring *txr = arg; 294 struct adapter *adapter = txr->adapter; 295 struct ifnet *ifp = adapter->ifp; 296 297 IXGBE_TX_LOCK(txr); 298 if (!drbr_empty(ifp, txr->br)) 299 ixgbe_mq_start_locked(ifp, txr); 300 IXGBE_TX_UNLOCK(txr); 301} 302 303/* 304 * Flush all ring buffers 305 */ 306void 307ixgbe_qflush(struct ifnet *ifp) 308{ 309 struct adapter *adapter = ifp->if_softc; 310 struct tx_ring *txr = adapter->tx_rings; 311 struct mbuf *m; 312 313 for (int i = 0; i < adapter->num_queues; i++, txr++) { 314 IXGBE_TX_LOCK(txr); 315 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 316 m_freem(m); 317 IXGBE_TX_UNLOCK(txr); 318 } 319 if_qflush(ifp); 320} 321#endif /* IXGBE_LEGACY_TX */ 322 323 324/********************************************************************* 325 * 326 * This routine maps the mbufs to tx descriptors, allowing the 327 * TX engine to transmit the packets. 328 * - return 0 on success, positive on failure 329 * 330 **********************************************************************/ 331 332static int 333ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 334{ 335 struct adapter *adapter = txr->adapter; 336 u32 olinfo_status = 0, cmd_type_len; 337 int i, j, error, nsegs; 338 int first; 339 bool remap = TRUE; 340 struct mbuf *m_head; 341 bus_dma_segment_t segs[adapter->num_segs]; 342 bus_dmamap_t map; 343 struct ixgbe_tx_buf *txbuf; 344 union ixgbe_adv_tx_desc *txd = NULL; 345 346 m_head = *m_headp; 347 348 /* Basic descriptor defines */ 349 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 350 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 351 352 if (m_head->m_flags & M_VLANTAG) 353 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 354 355 /* 356 * Important to capture the first descriptor 357 * used because it will contain the index of 358 * the one we tell the hardware to report back 359 */ 360 first = txr->next_avail_desc; 361 txbuf = &txr->tx_buffers[first]; 362 map = txbuf->map; 363 364 /* 365 * Map the packet for DMA. 366 */ 367retry: 368 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 369 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 370 371 if (__predict_false(error)) { 372 struct mbuf *m; 373 374 switch (error) { 375 case EFBIG: 376 /* Try it again? - one try */ 377 if (remap == TRUE) { 378 remap = FALSE; 379 /* 380 * XXX: m_defrag will choke on 381 * non-MCLBYTES-sized clusters 382 */ 383 m = m_defrag(*m_headp, M_NOWAIT); 384 if (m == NULL) { 385 adapter->mbuf_defrag_failed++; 386 m_freem(*m_headp); 387 *m_headp = NULL; 388 return (ENOBUFS); 389 } 390 *m_headp = m; 391 goto retry; 392 } else 393 return (error); 394 case ENOMEM: 395 txr->no_tx_dma_setup++; 396 return (error); 397 default: 398 txr->no_tx_dma_setup++; 399 m_freem(*m_headp); 400 *m_headp = NULL; 401 return (error); 402 } 403 } 404 405 /* Make certain there are enough descriptors */ 406 if (nsegs > txr->tx_avail - 2) { 407 txr->no_desc_avail++; 408 bus_dmamap_unload(txr->txtag, map); 409 return (ENOBUFS); 410 } 411 m_head = *m_headp; 412 413 /* 414 * Set up the appropriate offload context 415 * this will consume the first descriptor 416 */ 417 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 418 if (__predict_false(error)) { 419 if (error == ENOBUFS) 420 *m_headp = NULL; 421 return (error); 422 } 423 424#ifdef IXGBE_FDIR 425 /* Do the flow director magic */ 426 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 427 ++txr->atr_count; 428 if (txr->atr_count >= atr_sample_rate) { 429 ixgbe_atr(txr, m_head); 430 txr->atr_count = 0; 431 } 432 } 433#endif 434 435 i = txr->next_avail_desc; 436 for (j = 0; j < nsegs; j++) { 437 bus_size_t seglen; 438 bus_addr_t segaddr; 439 440 txbuf = &txr->tx_buffers[i]; 441 txd = &txr->tx_base[i]; 442 seglen = segs[j].ds_len; 443 segaddr = htole64(segs[j].ds_addr); 444 445 txd->read.buffer_addr = segaddr; 446 txd->read.cmd_type_len = htole32(txr->txd_cmd | 447 cmd_type_len |seglen); 448 txd->read.olinfo_status = htole32(olinfo_status); 449 450 if (++i == txr->num_desc) 451 i = 0; 452 } 453 454 txd->read.cmd_type_len |= 455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 456 txr->tx_avail -= nsegs; 457 txr->next_avail_desc = i; 458 459 txbuf->m_head = m_head; 460 /* 461 * Here we swap the map so the last descriptor, 462 * which gets the completion interrupt has the 463 * real map, and the first descriptor gets the 464 * unused map from this descriptor. 465 */ 466 txr->tx_buffers[first].map = txbuf->map; 467 txbuf->map = map; 468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 469 470 /* Set the EOP descriptor that will be marked done */ 471 txbuf = &txr->tx_buffers[first]; 472 txbuf->eop = txd; 473 474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 476 /* 477 * Advance the Transmit Descriptor Tail (Tdt), this tells the 478 * hardware that this frame is available to transmit. 479 */ 480 ++txr->total_packets; 481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 482 483 /* Mark queue as having work */ 484 if (txr->busy == 0) 485 txr->busy = 1; 486 487 return (0); 488} 489 490 491/********************************************************************* 492 * 493 * Allocate memory for tx_buffer structures. The tx_buffer stores all 494 * the information needed to transmit a packet on the wire. This is 495 * called only once at attach, setup is done every reset. 496 * 497 **********************************************************************/ 498int 499ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 500{ 501 struct adapter *adapter = txr->adapter; 502 device_t dev = adapter->dev; 503 struct ixgbe_tx_buf *txbuf; 504 int error, i; 505 506 /* 507 * Setup DMA descriptor areas. 508 */ 509 if ((error = bus_dma_tag_create( 510 bus_get_dma_tag(adapter->dev), /* parent */ 511 1, 0, /* alignment, bounds */ 512 BUS_SPACE_MAXADDR, /* lowaddr */ 513 BUS_SPACE_MAXADDR, /* highaddr */ 514 NULL, NULL, /* filter, filterarg */ 515 IXGBE_TSO_SIZE, /* maxsize */ 516 adapter->num_segs, /* nsegments */ 517 PAGE_SIZE, /* maxsegsize */ 518 0, /* flags */ 519 NULL, /* lockfunc */ 520 NULL, /* lockfuncarg */ 521 &txr->txtag))) { 522 device_printf(dev,"Unable to allocate TX DMA tag\n"); 523 goto fail; 524 } 525 526 if (!(txr->tx_buffers = 527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 529 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 530 error = ENOMEM; 531 goto fail; 532 } 533 534 /* Create the descriptor buffer dma maps */ 535 txbuf = txr->tx_buffers; 536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 538 if (error != 0) { 539 device_printf(dev, "Unable to create TX DMA map\n"); 540 goto fail; 541 } 542 } 543 544 return 0; 545fail: 546 /* We free all, it handles case where we are in the middle */ 547 ixgbe_free_transmit_structures(adapter); 548 return (error); 549} 550 551/********************************************************************* 552 * 553 * Initialize a transmit ring. 554 * 555 **********************************************************************/ 556static void 557ixgbe_setup_transmit_ring(struct tx_ring *txr) 558{ 559 struct adapter *adapter = txr->adapter; 560 struct ixgbe_tx_buf *txbuf; 561 int i; 562#ifdef DEV_NETMAP 563 struct netmap_adapter *na = NA(adapter->ifp); 564 struct netmap_slot *slot; 565#endif /* DEV_NETMAP */ 566 567 /* Clear the old ring contents */ 568 IXGBE_TX_LOCK(txr); 569#ifdef DEV_NETMAP 570 /* 571 * (under lock): if in netmap mode, do some consistency 572 * checks and set slot to entry 0 of the netmap ring. 573 */ 574 slot = netmap_reset(na, NR_TX, txr->me, 0); 575#endif /* DEV_NETMAP */ 576 bzero((void *)txr->tx_base, 577 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 578 /* Reset indices */ 579 txr->next_avail_desc = 0; 580 txr->next_to_clean = 0; 581 582 /* Free any existing tx buffers. */ 583 txbuf = txr->tx_buffers; 584 for (i = 0; i < txr->num_desc; i++, txbuf++) { 585 if (txbuf->m_head != NULL) { 586 bus_dmamap_sync(txr->txtag, txbuf->map, 587 BUS_DMASYNC_POSTWRITE); 588 bus_dmamap_unload(txr->txtag, txbuf->map); 589 m_freem(txbuf->m_head); 590 txbuf->m_head = NULL; 591 } 592#ifdef DEV_NETMAP 593 /* 594 * In netmap mode, set the map for the packet buffer. 595 * NOTE: Some drivers (not this one) also need to set 596 * the physical buffer address in the NIC ring. 597 * Slots in the netmap ring (indexed by "si") are 598 * kring->nkr_hwofs positions "ahead" wrt the 599 * corresponding slot in the NIC ring. In some drivers 600 * (not here) nkr_hwofs can be negative. Function 601 * netmap_idx_n2k() handles wraparounds properly. 602 */ 603 if (slot) { 604 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 605 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); 606 } 607#endif /* DEV_NETMAP */ 608 /* Clear the EOP descriptor pointer */ 609 txbuf->eop = NULL; 610 } 611 612#ifdef IXGBE_FDIR 613 /* Set the rate at which we sample packets */ 614 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 615 txr->atr_sample = atr_sample_rate; 616#endif 617 618 /* Set number of descriptors available */ 619 txr->tx_avail = adapter->num_tx_desc; 620 621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 623 IXGBE_TX_UNLOCK(txr); 624} 625 626/********************************************************************* 627 * 628 * Initialize all transmit rings. 629 * 630 **********************************************************************/ 631int 632ixgbe_setup_transmit_structures(struct adapter *adapter) 633{ 634 struct tx_ring *txr = adapter->tx_rings; 635 636 for (int i = 0; i < adapter->num_queues; i++, txr++) 637 ixgbe_setup_transmit_ring(txr); 638 639 return (0); 640} 641 642/********************************************************************* 643 * 644 * Free all transmit rings. 645 * 646 **********************************************************************/ 647void 648ixgbe_free_transmit_structures(struct adapter *adapter) 649{ 650 struct tx_ring *txr = adapter->tx_rings; 651 652 for (int i = 0; i < adapter->num_queues; i++, txr++) { 653 IXGBE_TX_LOCK(txr); 654 ixgbe_free_transmit_buffers(txr); 655 ixgbe_dma_free(adapter, &txr->txdma); 656 IXGBE_TX_UNLOCK(txr); 657 IXGBE_TX_LOCK_DESTROY(txr); 658 } 659 free(adapter->tx_rings, M_DEVBUF); 660} 661 662/********************************************************************* 663 * 664 * Free transmit ring related data structures. 665 * 666 **********************************************************************/ 667static void 668ixgbe_free_transmit_buffers(struct tx_ring *txr) 669{ 670 struct adapter *adapter = txr->adapter; 671 struct ixgbe_tx_buf *tx_buffer; 672 int i; 673 674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 675 676 if (txr->tx_buffers == NULL) 677 return; 678 679 tx_buffer = txr->tx_buffers; 680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 681 if (tx_buffer->m_head != NULL) { 682 bus_dmamap_sync(txr->txtag, tx_buffer->map, 683 BUS_DMASYNC_POSTWRITE); 684 bus_dmamap_unload(txr->txtag, 685 tx_buffer->map); 686 m_freem(tx_buffer->m_head); 687 tx_buffer->m_head = NULL; 688 if (tx_buffer->map != NULL) { 689 bus_dmamap_destroy(txr->txtag, 690 tx_buffer->map); 691 tx_buffer->map = NULL; 692 } 693 } else if (tx_buffer->map != NULL) { 694 bus_dmamap_unload(txr->txtag, 695 tx_buffer->map); 696 bus_dmamap_destroy(txr->txtag, 697 tx_buffer->map); 698 tx_buffer->map = NULL; 699 } 700 } 701#ifdef IXGBE_LEGACY_TX 702 if (txr->br != NULL) 703 buf_ring_free(txr->br, M_DEVBUF); 704#endif 705 if (txr->tx_buffers != NULL) { 706 free(txr->tx_buffers, M_DEVBUF); 707 txr->tx_buffers = NULL; 708 } 709 if (txr->txtag != NULL) { 710 bus_dma_tag_destroy(txr->txtag); 711 txr->txtag = NULL; 712 } 713 return; 714} 715 716/********************************************************************* 717 * 718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 719 * 720 **********************************************************************/ 721 722static int 723ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 724 u32 *cmd_type_len, u32 *olinfo_status) 725{ 726 struct adapter *adapter = txr->adapter; 727 struct ixgbe_adv_tx_context_desc *TXD; 728 struct ether_vlan_header *eh; 729 struct ip *ip; 730 struct ip6_hdr *ip6; 731 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 732 int ehdrlen, ip_hlen = 0; 733 u16 etype; 734 u8 ipproto = 0; 735 int offload = TRUE; 736 int ctxd = txr->next_avail_desc; 737 u16 vtag = 0; 738 739 /* First check if TSO is to be used */ 740 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 741 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 742 743 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 744 offload = FALSE; 745 746 /* Indicate the whole packet as payload when not doing TSO */ 747 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 748 749 /* Now ready a context descriptor */ 750 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 751 752 /* 753 ** In advanced descriptors the vlan tag must 754 ** be placed into the context descriptor. Hence 755 ** we need to make one even if not doing offloads. 756 */ 757 if (mp->m_flags & M_VLANTAG) { 758 vtag = htole16(mp->m_pkthdr.ether_vtag); 759 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 760 } 761 else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) 762 return (0); 763 764 /* 765 * Determine where frame payload starts. 766 * Jump over vlan headers if already present, 767 * helpful for QinQ too. 768 */ 769 eh = mtod(mp, struct ether_vlan_header *); 770 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 771 etype = ntohs(eh->evl_proto); 772 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 773 } else { 774 etype = ntohs(eh->evl_encap_proto); 775 ehdrlen = ETHER_HDR_LEN; 776 } 777 778 /* Set the ether header length */ 779 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 780 781 if (offload == FALSE) 782 goto no_offloads; 783 784 switch (etype) { 785 case ETHERTYPE_IP: 786 ip = (struct ip *)(mp->m_data + ehdrlen); 787 ip_hlen = ip->ip_hl << 2; 788 ipproto = ip->ip_p; 789 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 790 break; 791 case ETHERTYPE_IPV6: 792 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 793 ip_hlen = sizeof(struct ip6_hdr); 794 /* XXX-BZ this will go badly in case of ext hdrs. */ 795 ipproto = ip6->ip6_nxt; 796 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 797 break; 798 default: 799 offload = FALSE; 800 break; 801 } 802 803 vlan_macip_lens |= ip_hlen; 804 805 switch (ipproto) { 806 case IPPROTO_TCP: 807 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 808 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 809 break; 810 811 case IPPROTO_UDP: 812 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 813 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 814 break; 815 816#if __FreeBSD_version >= 800000 817 case IPPROTO_SCTP: 818 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 820 break; 821#endif 822 default: 823 offload = FALSE; 824 break; 825 } 826 827 if (offload) /* For the TX descriptor setup */ 828 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 829 830no_offloads: 831 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 832 833 /* Now copy bits into descriptor */ 834 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 835 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 836 TXD->seqnum_seed = htole32(0); 837 TXD->mss_l4len_idx = htole32(0); 838 839 /* We've consumed the first desc, adjust counters */ 840 if (++ctxd == txr->num_desc) 841 ctxd = 0; 842 txr->next_avail_desc = ctxd; 843 --txr->tx_avail; 844 845 return (0); 846} 847 848/********************************************************************** 849 * 850 * Setup work for hardware segmentation offload (TSO) on 851 * adapters using advanced tx descriptors 852 * 853 **********************************************************************/ 854static int 855ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 856 u32 *cmd_type_len, u32 *olinfo_status) 857{ 858 struct ixgbe_adv_tx_context_desc *TXD; 859 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 860 u32 mss_l4len_idx = 0, paylen; 861 u16 vtag = 0, eh_type; 862 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 863 struct ether_vlan_header *eh; 864#ifdef INET6 865 struct ip6_hdr *ip6; 866#endif 867#ifdef INET 868 struct ip *ip; 869#endif 870 struct tcphdr *th; 871 872 873 /* 874 * Determine where frame payload starts. 875 * Jump over vlan headers if already present 876 */ 877 eh = mtod(mp, struct ether_vlan_header *); 878 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 879 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 880 eh_type = eh->evl_proto; 881 } else { 882 ehdrlen = ETHER_HDR_LEN; 883 eh_type = eh->evl_encap_proto; 884 } 885 886 switch (ntohs(eh_type)) { 887#ifdef INET6 888 case ETHERTYPE_IPV6: 889 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 890 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 891 if (ip6->ip6_nxt != IPPROTO_TCP) 892 return (ENXIO); 893 ip_hlen = sizeof(struct ip6_hdr); 894 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 895 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 896 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 897 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 898 break; 899#endif 900#ifdef INET 901 case ETHERTYPE_IP: 902 ip = (struct ip *)(mp->m_data + ehdrlen); 903 if (ip->ip_p != IPPROTO_TCP) 904 return (ENXIO); 905 ip->ip_sum = 0; 906 ip_hlen = ip->ip_hl << 2; 907 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 908 th->th_sum = in_pseudo(ip->ip_src.s_addr, 909 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 910 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 911 /* Tell transmit desc to also do IPv4 checksum. */ 912 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 913 break; 914#endif 915 default: 916 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 917 __func__, ntohs(eh_type)); 918 break; 919 } 920 921 ctxd = txr->next_avail_desc; 922 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 923 924 tcp_hlen = th->th_off << 2; 925 926 /* This is used in the transmit desc in encap */ 927 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 928 929 /* VLAN MACLEN IPLEN */ 930 if (mp->m_flags & M_VLANTAG) { 931 vtag = htole16(mp->m_pkthdr.ether_vtag); 932 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 933 } 934 935 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 936 vlan_macip_lens |= ip_hlen; 937 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 938 939 /* ADV DTYPE TUCMD */ 940 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 941 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 942 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 943 944 /* MSS L4LEN IDX */ 945 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 946 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 947 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 948 949 TXD->seqnum_seed = htole32(0); 950 951 if (++ctxd == txr->num_desc) 952 ctxd = 0; 953 954 txr->tx_avail--; 955 txr->next_avail_desc = ctxd; 956 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 957 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 958 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 959 ++txr->tso_tx; 960 return (0); 961} 962 963 964/********************************************************************** 965 * 966 * Examine each tx_buffer in the used queue. If the hardware is done 967 * processing the packet then free associated resources. The 968 * tx_buffer is put back on the free queue. 969 * 970 **********************************************************************/ 971void 972ixgbe_txeof(struct tx_ring *txr) 973{ 974#ifdef DEV_NETMAP 975 struct adapter *adapter = txr->adapter; 976 struct ifnet *ifp = adapter->ifp; 977#endif 978 u32 work, processed = 0; 979 u16 limit = txr->process_limit; 980 struct ixgbe_tx_buf *buf; 981 union ixgbe_adv_tx_desc *txd; 982 983 mtx_assert(&txr->tx_mtx, MA_OWNED); 984 985#ifdef DEV_NETMAP 986 if (ifp->if_capenable & IFCAP_NETMAP) { 987 struct netmap_adapter *na = NA(ifp); 988 struct netmap_kring *kring = &na->tx_rings[txr->me]; 989 txd = txr->tx_base; 990 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 991 BUS_DMASYNC_POSTREAD); 992 /* 993 * In netmap mode, all the work is done in the context 994 * of the client thread. Interrupt handlers only wake up 995 * clients, which may be sleeping on individual rings 996 * or on a global resource for all rings. 997 * To implement tx interrupt mitigation, we wake up the client 998 * thread roughly every half ring, even if the NIC interrupts 999 * more frequently. This is implemented as follows: 1000 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1001 * the slot that should wake up the thread (nkr_num_slots 1002 * means the user thread should not be woken up); 1003 * - the driver ignores tx interrupts unless netmap_mitigate=0 1004 * or the slot has the DD bit set. 1005 */ 1006 if (!netmap_mitigate || 1007 (kring->nr_kflags < kring->nkr_num_slots && 1008 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1009 netmap_tx_irq(ifp, txr->me); 1010 } 1011 return; 1012 } 1013#endif /* DEV_NETMAP */ 1014 1015 if (txr->tx_avail == txr->num_desc) { 1016 txr->busy = 0; 1017 return; 1018 } 1019 1020 /* Get work starting point */ 1021 work = txr->next_to_clean; 1022 buf = &txr->tx_buffers[work]; 1023 txd = &txr->tx_base[work]; 1024 work -= txr->num_desc; /* The distance to ring end */ 1025 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1026 BUS_DMASYNC_POSTREAD); 1027 1028 do { 1029 union ixgbe_adv_tx_desc *eop= buf->eop; 1030 if (eop == NULL) /* No work */ 1031 break; 1032 1033 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1034 break; /* I/O not complete */ 1035 1036 if (buf->m_head) { 1037 txr->bytes += 1038 buf->m_head->m_pkthdr.len; 1039 bus_dmamap_sync(txr->txtag, 1040 buf->map, 1041 BUS_DMASYNC_POSTWRITE); 1042 bus_dmamap_unload(txr->txtag, 1043 buf->map); 1044 m_freem(buf->m_head); 1045 buf->m_head = NULL; 1046 } 1047 buf->eop = NULL; 1048 ++txr->tx_avail; 1049 1050 /* We clean the range if multi segment */ 1051 while (txd != eop) { 1052 ++txd; 1053 ++buf; 1054 ++work; 1055 /* wrap the ring? */ 1056 if (__predict_false(!work)) { 1057 work -= txr->num_desc; 1058 buf = txr->tx_buffers; 1059 txd = txr->tx_base; 1060 } 1061 if (buf->m_head) { 1062 txr->bytes += 1063 buf->m_head->m_pkthdr.len; 1064 bus_dmamap_sync(txr->txtag, 1065 buf->map, 1066 BUS_DMASYNC_POSTWRITE); 1067 bus_dmamap_unload(txr->txtag, 1068 buf->map); 1069 m_freem(buf->m_head); 1070 buf->m_head = NULL; 1071 } 1072 ++txr->tx_avail; 1073 buf->eop = NULL; 1074 1075 } 1076 ++txr->packets; 1077 ++processed; 1078 1079 /* Try the next packet */ 1080 ++txd; 1081 ++buf; 1082 ++work; 1083 /* reset with a wrap */ 1084 if (__predict_false(!work)) { 1085 work -= txr->num_desc; 1086 buf = txr->tx_buffers; 1087 txd = txr->tx_base; 1088 } 1089 prefetch(txd); 1090 } while (__predict_true(--limit)); 1091 1092 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1093 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1094 1095 work += txr->num_desc; 1096 txr->next_to_clean = work; 1097 1098 /* 1099 ** Queue Hang detection, we know there's 1100 ** work outstanding or the first return 1101 ** would have been taken, so increment busy 1102 ** if nothing managed to get cleaned, then 1103 ** in local_timer it will be checked and 1104 ** marked as HUNG if it exceeds a MAX attempt. 1105 */ 1106 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1107 ++txr->busy; 1108 /* 1109 ** If anything gets cleaned we reset state to 1, 1110 ** note this will turn off HUNG if its set. 1111 */ 1112 if (processed) 1113 txr->busy = 1; 1114 1115 if (txr->tx_avail == txr->num_desc) 1116 txr->busy = 0; 1117 1118 return; 1119} 1120 1121 1122#ifdef IXGBE_FDIR 1123/* 1124** This routine parses packet headers so that Flow 1125** Director can make a hashed filter table entry 1126** allowing traffic flows to be identified and kept 1127** on the same cpu. This would be a performance 1128** hit, but we only do it at IXGBE_FDIR_RATE of 1129** packets. 1130*/ 1131static void 1132ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1133{ 1134 struct adapter *adapter = txr->adapter; 1135 struct ix_queue *que; 1136 struct ip *ip; 1137 struct tcphdr *th; 1138 struct udphdr *uh; 1139 struct ether_vlan_header *eh; 1140 union ixgbe_atr_hash_dword input = {.dword = 0}; 1141 union ixgbe_atr_hash_dword common = {.dword = 0}; 1142 int ehdrlen, ip_hlen; 1143 u16 etype; 1144 1145 eh = mtod(mp, struct ether_vlan_header *); 1146 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1147 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1148 etype = eh->evl_proto; 1149 } else { 1150 ehdrlen = ETHER_HDR_LEN; 1151 etype = eh->evl_encap_proto; 1152 } 1153 1154 /* Only handling IPv4 */ 1155 if (etype != htons(ETHERTYPE_IP)) 1156 return; 1157 1158 ip = (struct ip *)(mp->m_data + ehdrlen); 1159 ip_hlen = ip->ip_hl << 2; 1160 1161 /* check if we're UDP or TCP */ 1162 switch (ip->ip_p) { 1163 case IPPROTO_TCP: 1164 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1165 /* src and dst are inverted */ 1166 common.port.dst ^= th->th_sport; 1167 common.port.src ^= th->th_dport; 1168 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1169 break; 1170 case IPPROTO_UDP: 1171 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1172 /* src and dst are inverted */ 1173 common.port.dst ^= uh->uh_sport; 1174 common.port.src ^= uh->uh_dport; 1175 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1176 break; 1177 default: 1178 return; 1179 } 1180 1181 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1182 if (mp->m_pkthdr.ether_vtag) 1183 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1184 else 1185 common.flex_bytes ^= etype; 1186 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1187 1188 que = &adapter->queues[txr->me]; 1189 /* 1190 ** This assumes the Rx queue and Tx 1191 ** queue are bound to the same CPU 1192 */ 1193 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1194 input, common, que->msix); 1195} 1196#endif /* IXGBE_FDIR */ 1197 1198/* 1199** Used to detect a descriptor that has 1200** been merged by Hardware RSC. 1201*/ 1202static inline u32 1203ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1204{ 1205 return (le32toh(rx->wb.lower.lo_dword.data) & 1206 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1207} 1208 1209/********************************************************************* 1210 * 1211 * Initialize Hardware RSC (LRO) feature on 82599 1212 * for an RX ring, this is toggled by the LRO capability 1213 * even though it is transparent to the stack. 1214 * 1215 * NOTE: since this HW feature only works with IPV4 and 1216 * our testing has shown soft LRO to be as effective 1217 * I have decided to disable this by default. 1218 * 1219 **********************************************************************/ 1220static void 1221ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1222{ 1223 struct adapter *adapter = rxr->adapter; 1224 struct ixgbe_hw *hw = &adapter->hw; 1225 u32 rscctrl, rdrxctl; 1226 1227 /* If turning LRO/RSC off we need to disable it */ 1228 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1229 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1230 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1231 return; 1232 } 1233 1234 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1235 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1236#ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1237 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1238#endif /* DEV_NETMAP */ 1239 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1240 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1241 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1242 1243 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1244 rscctrl |= IXGBE_RSCCTL_RSCEN; 1245 /* 1246 ** Limit the total number of descriptors that 1247 ** can be combined, so it does not exceed 64K 1248 */ 1249 if (rxr->mbuf_sz == MCLBYTES) 1250 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1251 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1252 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1253 else if (rxr->mbuf_sz == MJUM9BYTES) 1254 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1255 else /* Using 16K cluster */ 1256 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1257 1258 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1259 1260 /* Enable TCP header recognition */ 1261 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1262 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1263 IXGBE_PSRTYPE_TCPHDR)); 1264 1265 /* Disable RSC for ACK packets */ 1266 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1267 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1268 1269 rxr->hw_rsc = TRUE; 1270} 1271/********************************************************************* 1272 * 1273 * Refresh mbuf buffers for RX descriptor rings 1274 * - now keeps its own state so discards due to resource 1275 * exhaustion are unnecessary, if an mbuf cannot be obtained 1276 * it just returns, keeping its placeholder, thus it can simply 1277 * be recalled to try again. 1278 * 1279 **********************************************************************/ 1280static void 1281ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1282{ 1283 struct adapter *adapter = rxr->adapter; 1284 bus_dma_segment_t seg[1]; 1285 struct ixgbe_rx_buf *rxbuf; 1286 struct mbuf *mp; 1287 int i, j, nsegs, error; 1288 bool refreshed = FALSE; 1289 1290 i = j = rxr->next_to_refresh; 1291 /* Control the loop with one beyond */ 1292 if (++j == rxr->num_desc) 1293 j = 0; 1294 1295 while (j != limit) { 1296 rxbuf = &rxr->rx_buffers[i]; 1297 if (rxbuf->buf == NULL) { 1298 mp = m_getjcl(M_NOWAIT, MT_DATA, 1299 M_PKTHDR, rxr->mbuf_sz); 1300 if (mp == NULL) 1301 goto update; 1302 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1303 m_adj(mp, ETHER_ALIGN); 1304 } else 1305 mp = rxbuf->buf; 1306 1307 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1308 1309 /* If we're dealing with an mbuf that was copied rather 1310 * than replaced, there's no need to go through busdma. 1311 */ 1312 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1313 /* Get the memory mapping */ 1314 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1315 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1316 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1317 if (error != 0) { 1318 printf("Refresh mbufs: payload dmamap load" 1319 " failure - %d\n", error); 1320 m_free(mp); 1321 rxbuf->buf = NULL; 1322 goto update; 1323 } 1324 rxbuf->buf = mp; 1325 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1326 BUS_DMASYNC_PREREAD); 1327 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1328 htole64(seg[0].ds_addr); 1329 } else { 1330 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1331 rxbuf->flags &= ~IXGBE_RX_COPY; 1332 } 1333 1334 refreshed = TRUE; 1335 /* Next is precalculated */ 1336 i = j; 1337 rxr->next_to_refresh = i; 1338 if (++j == rxr->num_desc) 1339 j = 0; 1340 } 1341update: 1342 if (refreshed) /* Update hardware tail index */ 1343 IXGBE_WRITE_REG(&adapter->hw, 1344 rxr->tail, rxr->next_to_refresh); 1345 return; 1346} 1347 1348/********************************************************************* 1349 * 1350 * Allocate memory for rx_buffer structures. Since we use one 1351 * rx_buffer per received packet, the maximum number of rx_buffer's 1352 * that we'll need is equal to the number of receive descriptors 1353 * that we've allocated. 1354 * 1355 **********************************************************************/ 1356int 1357ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1358{ 1359 struct adapter *adapter = rxr->adapter; 1360 device_t dev = adapter->dev; 1361 struct ixgbe_rx_buf *rxbuf; 1362 int i, bsize, error; 1363 1364 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1365 if (!(rxr->rx_buffers = 1366 (struct ixgbe_rx_buf *) malloc(bsize, 1367 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1368 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1369 error = ENOMEM; 1370 goto fail; 1371 } 1372 1373 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1374 1, 0, /* alignment, bounds */ 1375 BUS_SPACE_MAXADDR, /* lowaddr */ 1376 BUS_SPACE_MAXADDR, /* highaddr */ 1377 NULL, NULL, /* filter, filterarg */ 1378 MJUM16BYTES, /* maxsize */ 1379 1, /* nsegments */ 1380 MJUM16BYTES, /* maxsegsize */ 1381 0, /* flags */ 1382 NULL, /* lockfunc */ 1383 NULL, /* lockfuncarg */ 1384 &rxr->ptag))) { 1385 device_printf(dev, "Unable to create RX DMA tag\n"); 1386 goto fail; 1387 } 1388 1389 for (i = 0; i < rxr->num_desc; i++, rxbuf++) { 1390 rxbuf = &rxr->rx_buffers[i]; 1391 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1392 if (error) { 1393 device_printf(dev, "Unable to create RX dma map\n"); 1394 goto fail; 1395 } 1396 } 1397 1398 return (0); 1399 1400fail: 1401 /* Frees all, but can handle partial completion */ 1402 ixgbe_free_receive_structures(adapter); 1403 return (error); 1404} 1405 1406 1407static void 1408ixgbe_free_receive_ring(struct rx_ring *rxr) 1409{ 1410 struct ixgbe_rx_buf *rxbuf; 1411 int i; 1412 1413 for (i = 0; i < rxr->num_desc; i++) { 1414 rxbuf = &rxr->rx_buffers[i]; 1415 if (rxbuf->buf != NULL) { 1416 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1417 BUS_DMASYNC_POSTREAD); 1418 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1419 rxbuf->buf->m_flags |= M_PKTHDR; 1420 m_freem(rxbuf->buf); 1421 rxbuf->buf = NULL; 1422 rxbuf->flags = 0; 1423 } 1424 } 1425} 1426 1427 1428/********************************************************************* 1429 * 1430 * Initialize a receive ring and its buffers. 1431 * 1432 **********************************************************************/ 1433static int 1434ixgbe_setup_receive_ring(struct rx_ring *rxr) 1435{ 1436 struct adapter *adapter; 1437 struct ifnet *ifp; 1438 device_t dev; 1439 struct ixgbe_rx_buf *rxbuf; 1440 bus_dma_segment_t seg[1]; 1441 struct lro_ctrl *lro = &rxr->lro; 1442 int rsize, nsegs, error = 0; 1443#ifdef DEV_NETMAP 1444 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1445 struct netmap_slot *slot; 1446#endif /* DEV_NETMAP */ 1447 1448 adapter = rxr->adapter; 1449 ifp = adapter->ifp; 1450 dev = adapter->dev; 1451 1452 /* Clear the ring contents */ 1453 IXGBE_RX_LOCK(rxr); 1454#ifdef DEV_NETMAP 1455 /* same as in ixgbe_setup_transmit_ring() */ 1456 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1457#endif /* DEV_NETMAP */ 1458 rsize = roundup2(adapter->num_rx_desc * 1459 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1460 bzero((void *)rxr->rx_base, rsize); 1461 /* Cache the size */ 1462 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1463 1464 /* Free current RX buffer structs and their mbufs */ 1465 ixgbe_free_receive_ring(rxr); 1466 1467 /* Now replenish the mbufs */ 1468 for (int j = 0; j != rxr->num_desc; ++j) { 1469 struct mbuf *mp; 1470 1471 rxbuf = &rxr->rx_buffers[j]; 1472#ifdef DEV_NETMAP 1473 /* 1474 * In netmap mode, fill the map and set the buffer 1475 * address in the NIC ring, considering the offset 1476 * between the netmap and NIC rings (see comment in 1477 * ixgbe_setup_transmit_ring() ). No need to allocate 1478 * an mbuf, so end the block with a continue; 1479 */ 1480 if (slot) { 1481 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1482 uint64_t paddr; 1483 void *addr; 1484 1485 addr = PNMB(na, slot + sj, &paddr); 1486 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1487 /* Update descriptor and the cached value */ 1488 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1489 rxbuf->addr = htole64(paddr); 1490 continue; 1491 } 1492#endif /* DEV_NETMAP */ 1493 rxbuf->flags = 0; 1494 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1495 M_PKTHDR, adapter->rx_mbuf_sz); 1496 if (rxbuf->buf == NULL) { 1497 error = ENOBUFS; 1498 goto fail; 1499 } 1500 mp = rxbuf->buf; 1501 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1502 /* Get the memory mapping */ 1503 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1504 rxbuf->pmap, mp, seg, 1505 &nsegs, BUS_DMA_NOWAIT); 1506 if (error != 0) 1507 goto fail; 1508 bus_dmamap_sync(rxr->ptag, 1509 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1510 /* Update the descriptor and the cached value */ 1511 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1512 rxbuf->addr = htole64(seg[0].ds_addr); 1513 } 1514 1515 1516 /* Setup our descriptor indices */ 1517 rxr->next_to_check = 0; 1518 rxr->next_to_refresh = 0; 1519 rxr->lro_enabled = FALSE; 1520 rxr->rx_copies = 0; 1521 rxr->rx_bytes = 0; 1522 rxr->vtag_strip = FALSE; 1523 1524 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1525 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1526 1527 /* 1528 ** Now set up the LRO interface: 1529 */ 1530 if (ixgbe_rsc_enable) 1531 ixgbe_setup_hw_rsc(rxr); 1532 else if (ifp->if_capenable & IFCAP_LRO) { 1533 int err = tcp_lro_init(lro); 1534 if (err) { 1535 device_printf(dev, "LRO Initialization failed!\n"); 1536 goto fail; 1537 } 1538 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1539 rxr->lro_enabled = TRUE; 1540 lro->ifp = adapter->ifp; 1541 } 1542 1543 IXGBE_RX_UNLOCK(rxr); 1544 return (0); 1545 1546fail: 1547 ixgbe_free_receive_ring(rxr); 1548 IXGBE_RX_UNLOCK(rxr); 1549 return (error); 1550} 1551 1552/********************************************************************* 1553 * 1554 * Initialize all receive rings. 1555 * 1556 **********************************************************************/ 1557int 1558ixgbe_setup_receive_structures(struct adapter *adapter) 1559{ 1560 struct rx_ring *rxr = adapter->rx_rings; 1561 int j; 1562 1563 for (j = 0; j < adapter->num_queues; j++, rxr++) 1564 if (ixgbe_setup_receive_ring(rxr)) 1565 goto fail; 1566 1567 return (0); 1568fail: 1569 /* 1570 * Free RX buffers allocated so far, we will only handle 1571 * the rings that completed, the failing case will have 1572 * cleaned up for itself. 'j' failed, so its the terminus. 1573 */ 1574 for (int i = 0; i < j; ++i) { 1575 rxr = &adapter->rx_rings[i]; 1576 ixgbe_free_receive_ring(rxr); 1577 } 1578 1579 return (ENOBUFS); 1580} 1581 1582 1583/********************************************************************* 1584 * 1585 * Free all receive rings. 1586 * 1587 **********************************************************************/ 1588void 1589ixgbe_free_receive_structures(struct adapter *adapter) 1590{ 1591 struct rx_ring *rxr = adapter->rx_rings; 1592 1593 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1594 1595 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1596 struct lro_ctrl *lro = &rxr->lro; 1597 ixgbe_free_receive_buffers(rxr); 1598 /* Free LRO memory */ 1599 tcp_lro_free(lro); 1600 /* Free the ring memory as well */ 1601 ixgbe_dma_free(adapter, &rxr->rxdma); 1602 } 1603 1604 free(adapter->rx_rings, M_DEVBUF); 1605} 1606 1607 1608/********************************************************************* 1609 * 1610 * Free receive ring data structures 1611 * 1612 **********************************************************************/ 1613void 1614ixgbe_free_receive_buffers(struct rx_ring *rxr) 1615{ 1616 struct adapter *adapter = rxr->adapter; 1617 struct ixgbe_rx_buf *rxbuf; 1618 1619 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1620 1621 /* Cleanup any existing buffers */ 1622 if (rxr->rx_buffers != NULL) { 1623 for (int i = 0; i < adapter->num_rx_desc; i++) { 1624 rxbuf = &rxr->rx_buffers[i]; 1625 if (rxbuf->buf != NULL) { 1626 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1627 BUS_DMASYNC_POSTREAD); 1628 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1629 rxbuf->buf->m_flags |= M_PKTHDR; 1630 m_freem(rxbuf->buf); 1631 } 1632 rxbuf->buf = NULL; 1633 if (rxbuf->pmap != NULL) { 1634 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1635 rxbuf->pmap = NULL; 1636 } 1637 } 1638 if (rxr->rx_buffers != NULL) { 1639 free(rxr->rx_buffers, M_DEVBUF); 1640 rxr->rx_buffers = NULL; 1641 } 1642 } 1643 1644 if (rxr->ptag != NULL) { 1645 bus_dma_tag_destroy(rxr->ptag); 1646 rxr->ptag = NULL; 1647 } 1648 1649 return; 1650} 1651 1652static __inline void 1653ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1654{ 1655 1656 /* 1657 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1658 * should be computed by hardware. Also it should not have VLAN tag in 1659 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1660 */ 1661 if (rxr->lro_enabled && 1662 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1663 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1664 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1665 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1666 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1667 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1668 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1669 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1670 /* 1671 * Send to the stack if: 1672 ** - LRO not enabled, or 1673 ** - no LRO resources, or 1674 ** - lro enqueue fails 1675 */ 1676 if (rxr->lro.lro_cnt != 0) 1677 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1678 return; 1679 } 1680 IXGBE_RX_UNLOCK(rxr); 1681 (*ifp->if_input)(ifp, m); 1682 IXGBE_RX_LOCK(rxr); 1683} 1684 1685static __inline void 1686ixgbe_rx_discard(struct rx_ring *rxr, int i) 1687{ 1688 struct ixgbe_rx_buf *rbuf; 1689 1690 rbuf = &rxr->rx_buffers[i]; 1691 1692 1693 /* 1694 ** With advanced descriptors the writeback 1695 ** clobbers the buffer addrs, so its easier 1696 ** to just free the existing mbufs and take 1697 ** the normal refresh path to get new buffers 1698 ** and mapping. 1699 */ 1700 1701 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1702 rbuf->fmp->m_flags |= M_PKTHDR; 1703 m_freem(rbuf->fmp); 1704 rbuf->fmp = NULL; 1705 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1706 } else if (rbuf->buf) { 1707 m_free(rbuf->buf); 1708 rbuf->buf = NULL; 1709 } 1710 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1711 1712 rbuf->flags = 0; 1713 1714 return; 1715} 1716 1717 1718/********************************************************************* 1719 * 1720 * This routine executes in interrupt context. It replenishes 1721 * the mbufs in the descriptor and sends data which has been 1722 * dma'ed into host memory to upper layer. 1723 * 1724 * Return TRUE for more work, FALSE for all clean. 1725 *********************************************************************/ 1726bool 1727ixgbe_rxeof(struct ix_queue *que) 1728{ 1729 struct adapter *adapter = que->adapter; 1730 struct rx_ring *rxr = que->rxr; 1731 struct ifnet *ifp = adapter->ifp; 1732 struct lro_ctrl *lro = &rxr->lro; 1733 struct lro_entry *queued; 1734 int i, nextp, processed = 0; 1735 u32 staterr = 0; 1736 u16 count = rxr->process_limit; 1737 union ixgbe_adv_rx_desc *cur; 1738 struct ixgbe_rx_buf *rbuf, *nbuf; 1739 u16 pkt_info; 1740 1741 IXGBE_RX_LOCK(rxr); 1742 1743#ifdef DEV_NETMAP 1744 /* Same as the txeof routine: wakeup clients on intr. */ 1745 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1746 IXGBE_RX_UNLOCK(rxr); 1747 return (FALSE); 1748 } 1749#endif /* DEV_NETMAP */ 1750 1751 for (i = rxr->next_to_check; count != 0;) { 1752 struct mbuf *sendmp, *mp; 1753 u32 rsc, ptype; 1754 u16 len; 1755 u16 vtag = 0; 1756 bool eop; 1757 1758 /* Sync the ring. */ 1759 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1760 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1761 1762 cur = &rxr->rx_base[i]; 1763 staterr = le32toh(cur->wb.upper.status_error); 1764 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1765 1766 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1767 break; 1768 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1769 break; 1770 1771 count--; 1772 sendmp = NULL; 1773 nbuf = NULL; 1774 rsc = 0; 1775 cur->wb.upper.status_error = 0; 1776 rbuf = &rxr->rx_buffers[i]; 1777 mp = rbuf->buf; 1778 1779 len = le16toh(cur->wb.upper.length); 1780 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1781 IXGBE_RXDADV_PKTTYPE_MASK; 1782 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1783 1784 /* Make sure bad packets are discarded */ 1785 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1786#if __FreeBSD_version >= 1100036 1787 if (IXGBE_IS_VF(adapter)) 1788 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1789#endif 1790 rxr->rx_discarded++; 1791 ixgbe_rx_discard(rxr, i); 1792 goto next_desc; 1793 } 1794 1795 /* 1796 ** On 82599 which supports a hardware 1797 ** LRO (called HW RSC), packets need 1798 ** not be fragmented across sequential 1799 ** descriptors, rather the next descriptor 1800 ** is indicated in bits of the descriptor. 1801 ** This also means that we might proceses 1802 ** more than one packet at a time, something 1803 ** that has never been true before, it 1804 ** required eliminating global chain pointers 1805 ** in favor of what we are doing here. -jfv 1806 */ 1807 if (!eop) { 1808 /* 1809 ** Figure out the next descriptor 1810 ** of this frame. 1811 */ 1812 if (rxr->hw_rsc == TRUE) { 1813 rsc = ixgbe_rsc_count(cur); 1814 rxr->rsc_num += (rsc - 1); 1815 } 1816 if (rsc) { /* Get hardware index */ 1817 nextp = ((staterr & 1818 IXGBE_RXDADV_NEXTP_MASK) >> 1819 IXGBE_RXDADV_NEXTP_SHIFT); 1820 } else { /* Just sequential */ 1821 nextp = i + 1; 1822 if (nextp == adapter->num_rx_desc) 1823 nextp = 0; 1824 } 1825 nbuf = &rxr->rx_buffers[nextp]; 1826 prefetch(nbuf); 1827 } 1828 /* 1829 ** Rather than using the fmp/lmp global pointers 1830 ** we now keep the head of a packet chain in the 1831 ** buffer struct and pass this along from one 1832 ** descriptor to the next, until we get EOP. 1833 */ 1834 mp->m_len = len; 1835 /* 1836 ** See if there is a stored head 1837 ** that determines what we are 1838 */ 1839 sendmp = rbuf->fmp; 1840 if (sendmp != NULL) { /* secondary frag */ 1841 rbuf->buf = rbuf->fmp = NULL; 1842 mp->m_flags &= ~M_PKTHDR; 1843 sendmp->m_pkthdr.len += mp->m_len; 1844 } else { 1845 /* 1846 * Optimize. This might be a small packet, 1847 * maybe just a TCP ACK. Do a fast copy that 1848 * is cache aligned into a new mbuf, and 1849 * leave the old mbuf+cluster for re-use. 1850 */ 1851 if (eop && len <= IXGBE_RX_COPY_LEN) { 1852 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1853 if (sendmp != NULL) { 1854 sendmp->m_data += 1855 IXGBE_RX_COPY_ALIGN; 1856 ixgbe_bcopy(mp->m_data, 1857 sendmp->m_data, len); 1858 sendmp->m_len = len; 1859 rxr->rx_copies++; 1860 rbuf->flags |= IXGBE_RX_COPY; 1861 } 1862 } 1863 if (sendmp == NULL) { 1864 rbuf->buf = rbuf->fmp = NULL; 1865 sendmp = mp; 1866 } 1867 1868 /* first desc of a non-ps chain */ 1869 sendmp->m_flags |= M_PKTHDR; 1870 sendmp->m_pkthdr.len = mp->m_len; 1871 } 1872 ++processed; 1873 1874 /* Pass the head pointer on */ 1875 if (eop == 0) { 1876 nbuf->fmp = sendmp; 1877 sendmp = NULL; 1878 mp->m_next = nbuf->buf; 1879 } else { /* Sending this frame */ 1880 sendmp->m_pkthdr.rcvif = ifp; 1881 rxr->rx_packets++; 1882 /* capture data for AIM */ 1883 rxr->bytes += sendmp->m_pkthdr.len; 1884 rxr->rx_bytes += sendmp->m_pkthdr.len; 1885 /* Process vlan info */ 1886 if ((rxr->vtag_strip) && 1887 (staterr & IXGBE_RXD_STAT_VP)) 1888 vtag = le16toh(cur->wb.upper.vlan); 1889 if (vtag) { 1890 sendmp->m_pkthdr.ether_vtag = vtag; 1891 sendmp->m_flags |= M_VLANTAG; 1892 } 1893 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1894 ixgbe_rx_checksum(staterr, sendmp, ptype); 1895#if __FreeBSD_version >= 800000 1896 sendmp->m_pkthdr.flowid = que->msix; 1897#endif /* FreeBSD_version */ 1898 } 1899next_desc: 1900 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1901 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1902 1903 /* Advance our pointers to the next descriptor. */ 1904 if (++i == rxr->num_desc) 1905 i = 0; 1906 1907 /* Now send to the stack or do LRO */ 1908 if (sendmp != NULL) { 1909 rxr->next_to_check = i; 1910 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1911 i = rxr->next_to_check; 1912 } 1913 1914 /* Every 8 descriptors we go to refresh mbufs */ 1915 if (processed == 8) { 1916 ixgbe_refresh_mbufs(rxr, i); 1917 processed = 0; 1918 } 1919 } 1920 1921 /* Refresh any remaining buf structs */ 1922 if (ixgbe_rx_unrefreshed(rxr)) 1923 ixgbe_refresh_mbufs(rxr, i); 1924 1925 rxr->next_to_check = i; 1926 1927 /* 1928 * Flush any outstanding LRO work 1929 */ 1930 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1931 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1932 tcp_lro_flush(lro, queued); 1933 } 1934 1935 IXGBE_RX_UNLOCK(rxr); 1936 1937 /* 1938 ** Still have cleaning to do? 1939 */ 1940 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 1941 return (TRUE); 1942 else 1943 return (FALSE); 1944} 1945 1946 1947/********************************************************************* 1948 * 1949 * Verify that the hardware indicated that the checksum is valid. 1950 * Inform the stack about the status of checksum so that stack 1951 * doesn't spend time verifying the checksum. 1952 * 1953 *********************************************************************/ 1954static void 1955ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 1956{ 1957 u16 status = (u16) staterr; 1958 u8 errors = (u8) (staterr >> 24); 1959 bool sctp = FALSE; 1960 1961 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1962 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 1963 sctp = TRUE; 1964 1965 if (status & IXGBE_RXD_STAT_IPCS) { 1966 if (!(errors & IXGBE_RXD_ERR_IPE)) { 1967 /* IP Checksum Good */ 1968 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1969 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1970 1971 } else 1972 mp->m_pkthdr.csum_flags = 0; 1973 } 1974 if (status & IXGBE_RXD_STAT_L4CS) { 1975 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1976#if __FreeBSD_version >= 800000 1977 if (sctp) 1978 type = CSUM_SCTP_VALID; 1979#endif 1980 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 1981 mp->m_pkthdr.csum_flags |= type; 1982 if (!sctp) 1983 mp->m_pkthdr.csum_data = htons(0xffff); 1984 } 1985 } 1986 return; 1987} 1988 1989/******************************************************************** 1990 * Manage DMA'able memory. 1991 *******************************************************************/ 1992static void 1993ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 1994{ 1995 if (error) 1996 return; 1997 *(bus_addr_t *) arg = segs->ds_addr; 1998 return; 1999} 2000 2001int 2002ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2003 struct ixgbe_dma_alloc *dma, int mapflags) 2004{ 2005 device_t dev = adapter->dev; 2006 int r; 2007 2008 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2009 DBA_ALIGN, 0, /* alignment, bounds */ 2010 BUS_SPACE_MAXADDR, /* lowaddr */ 2011 BUS_SPACE_MAXADDR, /* highaddr */ 2012 NULL, NULL, /* filter, filterarg */ 2013 size, /* maxsize */ 2014 1, /* nsegments */ 2015 size, /* maxsegsize */ 2016 BUS_DMA_ALLOCNOW, /* flags */ 2017 NULL, /* lockfunc */ 2018 NULL, /* lockfuncarg */ 2019 &dma->dma_tag); 2020 if (r != 0) { 2021 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2022 "error %u\n", r); 2023 goto fail_0; 2024 } 2025 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2026 BUS_DMA_NOWAIT, &dma->dma_map); 2027 if (r != 0) { 2028 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2029 "error %u\n", r); 2030 goto fail_1; 2031 } 2032 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2033 size, 2034 ixgbe_dmamap_cb, 2035 &dma->dma_paddr, 2036 mapflags | BUS_DMA_NOWAIT); 2037 if (r != 0) { 2038 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2039 "error %u\n", r); 2040 goto fail_2; 2041 } 2042 dma->dma_size = size; 2043 return (0); 2044fail_2: 2045 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2046fail_1: 2047 bus_dma_tag_destroy(dma->dma_tag); 2048fail_0: 2049 dma->dma_tag = NULL; 2050 return (r); 2051} 2052 2053void 2054ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2055{ 2056 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2057 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2058 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2059 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2060 bus_dma_tag_destroy(dma->dma_tag); 2061} 2062 2063 2064/********************************************************************* 2065 * 2066 * Allocate memory for the transmit and receive rings, and then 2067 * the descriptors associated with each, called only once at attach. 2068 * 2069 **********************************************************************/ 2070int 2071ixgbe_allocate_queues(struct adapter *adapter) 2072{ 2073 device_t dev = adapter->dev; 2074 struct ix_queue *que; 2075 struct tx_ring *txr; 2076 struct rx_ring *rxr; 2077 int rsize, tsize, error = IXGBE_SUCCESS; 2078 int txconf = 0, rxconf = 0; 2079 2080 /* First allocate the top level queue structs */ 2081 if (!(adapter->queues = 2082 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2083 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2084 device_printf(dev, "Unable to allocate queue memory\n"); 2085 error = ENOMEM; 2086 goto fail; 2087 } 2088 2089 /* First allocate the TX ring struct memory */ 2090 if (!(adapter->tx_rings = 2091 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2092 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2093 device_printf(dev, "Unable to allocate TX ring memory\n"); 2094 error = ENOMEM; 2095 goto tx_fail; 2096 } 2097 2098 /* Next allocate the RX */ 2099 if (!(adapter->rx_rings = 2100 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2101 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2102 device_printf(dev, "Unable to allocate RX ring memory\n"); 2103 error = ENOMEM; 2104 goto rx_fail; 2105 } 2106 2107 /* For the ring itself */ 2108 tsize = roundup2(adapter->num_tx_desc * 2109 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2110 2111 /* 2112 * Now set up the TX queues, txconf is needed to handle the 2113 * possibility that things fail midcourse and we need to 2114 * undo memory gracefully 2115 */ 2116 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2117 /* Set up some basics */ 2118 txr = &adapter->tx_rings[i]; 2119 txr->adapter = adapter; 2120 txr->me = i; 2121 txr->num_desc = adapter->num_tx_desc; 2122 2123 /* Initialize the TX side lock */ 2124 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2125 device_get_nameunit(dev), txr->me); 2126 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2127 2128 if (ixgbe_dma_malloc(adapter, tsize, 2129 &txr->txdma, BUS_DMA_NOWAIT)) { 2130 device_printf(dev, 2131 "Unable to allocate TX Descriptor memory\n"); 2132 error = ENOMEM; 2133 goto err_tx_desc; 2134 } 2135 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2136 bzero((void *)txr->tx_base, tsize); 2137 2138 /* Now allocate transmit buffers for the ring */ 2139 if (ixgbe_allocate_transmit_buffers(txr)) { 2140 device_printf(dev, 2141 "Critical Failure setting up transmit buffers\n"); 2142 error = ENOMEM; 2143 goto err_tx_desc; 2144 } 2145#ifndef IXGBE_LEGACY_TX 2146 /* Allocate a buf ring */ 2147 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2148 M_WAITOK, &txr->tx_mtx); 2149 if (txr->br == NULL) { 2150 device_printf(dev, 2151 "Critical Failure setting up buf ring\n"); 2152 error = ENOMEM; 2153 goto err_tx_desc; 2154 } 2155#endif 2156 } 2157 2158 /* 2159 * Next the RX queues... 2160 */ 2161 rsize = roundup2(adapter->num_rx_desc * 2162 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2163 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2164 rxr = &adapter->rx_rings[i]; 2165 /* Set up some basics */ 2166 rxr->adapter = adapter; 2167 rxr->me = i; 2168 rxr->num_desc = adapter->num_rx_desc; 2169 2170 /* Initialize the RX side lock */ 2171 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2172 device_get_nameunit(dev), rxr->me); 2173 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2174 2175 if (ixgbe_dma_malloc(adapter, rsize, 2176 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2177 device_printf(dev, 2178 "Unable to allocate RxDescriptor memory\n"); 2179 error = ENOMEM; 2180 goto err_rx_desc; 2181 } 2182 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2183 bzero((void *)rxr->rx_base, rsize); 2184 2185 /* Allocate receive buffers for the ring*/ 2186 if (ixgbe_allocate_receive_buffers(rxr)) { 2187 device_printf(dev, 2188 "Critical Failure setting up receive buffers\n"); 2189 error = ENOMEM; 2190 goto err_rx_desc; 2191 } 2192 } 2193 2194 /* 2195 ** Finally set up the queue holding structs 2196 */ 2197 for (int i = 0; i < adapter->num_queues; i++) { 2198 que = &adapter->queues[i]; 2199 que->adapter = adapter; 2200 que->me = i; 2201 que->txr = &adapter->tx_rings[i]; 2202 que->rxr = &adapter->rx_rings[i]; 2203 } 2204 2205 return (0); 2206 2207err_rx_desc: 2208 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2209 ixgbe_dma_free(adapter, &rxr->rxdma); 2210err_tx_desc: 2211 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2212 ixgbe_dma_free(adapter, &txr->txdma); 2213 free(adapter->rx_rings, M_DEVBUF); 2214rx_fail: 2215 free(adapter->tx_rings, M_DEVBUF); 2216tx_fail: 2217 free(adapter->queues, M_DEVBUF); 2218fail: 2219 return (error); 2220} 2221