ixl_txrx.c revision 323211
1/****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32******************************************************************************/ 33/*$FreeBSD: stable/11/sys/dev/ixl/ixl_txrx.c 323211 2017-09-06 07:08:52Z rlibby $*/ 34 35/* 36** IXL driver TX/RX Routines: 37** This was seperated to allow usage by 38** both the PF and VF drivers. 39*/ 40 41#ifndef IXL_STANDALONE_BUILD 42#include "opt_inet.h" 43#include "opt_inet6.h" 44#include "opt_rss.h" 45#endif 46 47#include "ixl.h" 48 49#ifdef RSS 50#include <net/rss_config.h> 51#endif 52 53/* Local Prototypes */ 54static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55static void ixl_refresh_mbufs(struct ixl_queue *, int); 56static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61static inline void ixl_rx_discard(struct rx_ring *, int); 62static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66static inline u32 ixl_get_tx_head(struct ixl_queue *que); 67 68#ifdef DEV_NETMAP 69#include <dev/netmap/if_ixl_netmap.h> 70#endif /* DEV_NETMAP */ 71 72/* 73 * @key key is saved into this parameter 74 */ 75void 76ixl_get_default_rss_key(u32 *key) 77{ 78 MPASS(key != NULL); 79 80 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 81 0x183cfd8c, 0xce880440, 0x580cbc3c, 82 0x35897377, 0x328b25e1, 0x4fa98922, 83 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 84 0x0, 0x0, 0x0}; 85 86 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 87} 88 89/* 90** Multiqueue Transmit driver 91*/ 92int 93ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 94{ 95 struct ixl_vsi *vsi = ifp->if_softc; 96 struct ixl_queue *que; 97 struct tx_ring *txr; 98 int err, i; 99#ifdef RSS 100 u32 bucket_id; 101#endif 102 103 /* 104 ** Which queue to use: 105 ** 106 ** When doing RSS, map it to the same outbound 107 ** queue as the incoming flow would be mapped to. 108 ** If everything is setup correctly, it should be 109 ** the same bucket that the current CPU we're on is. 110 */ 111 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 112#ifdef RSS 113 if (rss_hash2bucket(m->m_pkthdr.flowid, 114 M_HASHTYPE_GET(m), &bucket_id) == 0) { 115 i = bucket_id % vsi->num_queues; 116 } else 117#endif 118 i = m->m_pkthdr.flowid % vsi->num_queues; 119 } else 120 i = curcpu % vsi->num_queues; 121 122 que = &vsi->queues[i]; 123 txr = &que->txr; 124 125 err = drbr_enqueue(ifp, txr->br, m); 126 if (err) 127 return (err); 128 if (IXL_TX_TRYLOCK(txr)) { 129 ixl_mq_start_locked(ifp, txr); 130 IXL_TX_UNLOCK(txr); 131 } else 132 taskqueue_enqueue(que->tq, &que->tx_task); 133 134 return (0); 135} 136 137int 138ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 139{ 140 struct ixl_queue *que = txr->que; 141 struct ixl_vsi *vsi = que->vsi; 142 struct mbuf *next; 143 int err = 0; 144 145 146 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 147 vsi->link_active == 0) 148 return (ENETDOWN); 149 150 /* Process the transmit queue */ 151 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 152 if ((err = ixl_xmit(que, &next)) != 0) { 153 if (next == NULL) 154 drbr_advance(ifp, txr->br); 155 else 156 drbr_putback(ifp, txr->br, next); 157 break; 158 } 159 drbr_advance(ifp, txr->br); 160 /* Send a copy of the frame to the BPF listener */ 161 ETHER_BPF_MTAP(ifp, next); 162 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 163 break; 164 } 165 166 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 167 ixl_txeof(que); 168 169 return (err); 170} 171 172/* 173 * Called from a taskqueue to drain queued transmit packets. 174 */ 175void 176ixl_deferred_mq_start(void *arg, int pending) 177{ 178 struct ixl_queue *que = arg; 179 struct tx_ring *txr = &que->txr; 180 struct ixl_vsi *vsi = que->vsi; 181 struct ifnet *ifp = vsi->ifp; 182 183 IXL_TX_LOCK(txr); 184 if (!drbr_empty(ifp, txr->br)) 185 ixl_mq_start_locked(ifp, txr); 186 IXL_TX_UNLOCK(txr); 187} 188 189/* 190** Flush all queue ring buffers 191*/ 192void 193ixl_qflush(struct ifnet *ifp) 194{ 195 struct ixl_vsi *vsi = ifp->if_softc; 196 197 for (int i = 0; i < vsi->num_queues; i++) { 198 struct ixl_queue *que = &vsi->queues[i]; 199 struct tx_ring *txr = &que->txr; 200 struct mbuf *m; 201 IXL_TX_LOCK(txr); 202 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 203 m_freem(m); 204 IXL_TX_UNLOCK(txr); 205 } 206 if_qflush(ifp); 207} 208 209/* 210** Find mbuf chains passed to the driver 211** that are 'sparse', using more than 8 212** mbufs to deliver an mss-size chunk of data 213*/ 214static inline bool 215ixl_tso_detect_sparse(struct mbuf *mp) 216{ 217 struct mbuf *m; 218 int num, mss; 219 220 num = 0; 221 mss = mp->m_pkthdr.tso_segsz; 222 223 /* Exclude first mbuf; assume it contains all headers */ 224 for (m = mp->m_next; m != NULL; m = m->m_next) { 225 if (m == NULL) 226 break; 227 num++; 228 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 229 230 if (mss < 1) { 231 if (num > IXL_SPARSE_CHAIN) 232 return (true); 233 num = (mss == 0) ? 0 : 1; 234 mss += mp->m_pkthdr.tso_segsz; 235 } 236 } 237 238 return (false); 239} 240 241 242/********************************************************************* 243 * 244 * This routine maps the mbufs to tx descriptors, allowing the 245 * TX engine to transmit the packets. 246 * - return 0 on success, positive on failure 247 * 248 **********************************************************************/ 249#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 250 251static int 252ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 253{ 254 struct ixl_vsi *vsi = que->vsi; 255 struct i40e_hw *hw = vsi->hw; 256 struct tx_ring *txr = &que->txr; 257 struct ixl_tx_buf *buf; 258 struct i40e_tx_desc *txd = NULL; 259 struct mbuf *m_head, *m; 260 int i, j, error, nsegs; 261 int first, last = 0; 262 u16 vtag = 0; 263 u32 cmd, off; 264 bus_dmamap_t map; 265 bus_dma_tag_t tag; 266 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 267 268 cmd = off = 0; 269 m_head = *m_headp; 270 271 /* 272 * Important to capture the first descriptor 273 * used because it will contain the index of 274 * the one we tell the hardware to report back 275 */ 276 first = txr->next_avail; 277 buf = &txr->buffers[first]; 278 map = buf->map; 279 tag = txr->tx_tag; 280 281 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 282 /* Use larger mapping for TSO */ 283 tag = txr->tso_tag; 284 if (ixl_tso_detect_sparse(m_head)) { 285 m = m_defrag(m_head, M_NOWAIT); 286 if (m == NULL) { 287 m_freem(*m_headp); 288 *m_headp = NULL; 289 return (ENOBUFS); 290 } 291 *m_headp = m; 292 } 293 } 294 295 /* 296 * Map the packet for DMA. 297 */ 298 error = bus_dmamap_load_mbuf_sg(tag, map, 299 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 300 301 if (error == EFBIG) { 302 struct mbuf *m; 303 304 m = m_defrag(*m_headp, M_NOWAIT); 305 if (m == NULL) { 306 que->mbuf_defrag_failed++; 307 m_freem(*m_headp); 308 *m_headp = NULL; 309 return (ENOBUFS); 310 } 311 *m_headp = m; 312 313 /* Try it again */ 314 error = bus_dmamap_load_mbuf_sg(tag, map, 315 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 316 317 if (error != 0) { 318 que->tx_dmamap_failed++; 319 m_freem(*m_headp); 320 *m_headp = NULL; 321 return (error); 322 } 323 } else if (error != 0) { 324 que->tx_dmamap_failed++; 325 m_freem(*m_headp); 326 *m_headp = NULL; 327 return (error); 328 } 329 330 /* Make certain there are enough descriptors */ 331 if (nsegs > txr->avail - 2) { 332 txr->no_desc++; 333 error = ENOBUFS; 334 goto xmit_fail; 335 } 336 m_head = *m_headp; 337 338 /* Set up the TSO/CSUM offload */ 339 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 340 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 341 if (error) 342 goto xmit_fail; 343 } 344 345 cmd |= I40E_TX_DESC_CMD_ICRC; 346 /* Grab the VLAN tag */ 347 if (m_head->m_flags & M_VLANTAG) { 348 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 349 vtag = htole16(m_head->m_pkthdr.ether_vtag); 350 } 351 352 i = txr->next_avail; 353 for (j = 0; j < nsegs; j++) { 354 bus_size_t seglen; 355 356 buf = &txr->buffers[i]; 357 buf->tag = tag; /* Keep track of the type tag */ 358 txd = &txr->base[i]; 359 seglen = segs[j].ds_len; 360 361 txd->buffer_addr = htole64(segs[j].ds_addr); 362 txd->cmd_type_offset_bsz = 363 htole64(I40E_TX_DESC_DTYPE_DATA 364 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 365 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 366 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 367 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 368 369 last = i; /* descriptor that will get completion IRQ */ 370 371 if (++i == que->num_desc) 372 i = 0; 373 374 buf->m_head = NULL; 375 buf->eop_index = -1; 376 } 377 /* Set the last descriptor for report */ 378 txd->cmd_type_offset_bsz |= 379 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 380 txr->avail -= nsegs; 381 txr->next_avail = i; 382 383 buf->m_head = m_head; 384 /* Swap the dma map between the first and last descriptor */ 385 txr->buffers[first].map = buf->map; 386 buf->map = map; 387 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 388 389 /* Set the index of the descriptor that will be marked done */ 390 buf = &txr->buffers[first]; 391 buf->eop_index = last; 392 393 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 394 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 395 /* 396 * Advance the Transmit Descriptor Tail (Tdt), this tells the 397 * hardware that this frame is available to transmit. 398 */ 399 ++txr->total_packets; 400 wr32(hw, txr->tail, i); 401 402 /* Mark outstanding work */ 403 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 404 return (0); 405 406xmit_fail: 407 bus_dmamap_unload(tag, buf->map); 408 return (error); 409} 410 411 412/********************************************************************* 413 * 414 * Allocate memory for tx_buffer structures. The tx_buffer stores all 415 * the information needed to transmit a packet on the wire. This is 416 * called only once at attach, setup is done every reset. 417 * 418 **********************************************************************/ 419int 420ixl_allocate_tx_data(struct ixl_queue *que) 421{ 422 struct tx_ring *txr = &que->txr; 423 struct ixl_vsi *vsi = que->vsi; 424 device_t dev = vsi->dev; 425 struct ixl_tx_buf *buf; 426 int error = 0; 427 428 /* 429 * Setup DMA descriptor areas. 430 */ 431 if ((error = bus_dma_tag_create(NULL, /* parent */ 432 1, 0, /* alignment, bounds */ 433 BUS_SPACE_MAXADDR, /* lowaddr */ 434 BUS_SPACE_MAXADDR, /* highaddr */ 435 NULL, NULL, /* filter, filterarg */ 436 IXL_TSO_SIZE, /* maxsize */ 437 IXL_MAX_TX_SEGS, /* nsegments */ 438 PAGE_SIZE, /* maxsegsize */ 439 0, /* flags */ 440 NULL, /* lockfunc */ 441 NULL, /* lockfuncarg */ 442 &txr->tx_tag))) { 443 device_printf(dev,"Unable to allocate TX DMA tag\n"); 444 goto fail; 445 } 446 447 /* Make a special tag for TSO */ 448 if ((error = bus_dma_tag_create(NULL, /* parent */ 449 1, 0, /* alignment, bounds */ 450 BUS_SPACE_MAXADDR, /* lowaddr */ 451 BUS_SPACE_MAXADDR, /* highaddr */ 452 NULL, NULL, /* filter, filterarg */ 453 IXL_TSO_SIZE, /* maxsize */ 454 IXL_MAX_TSO_SEGS, /* nsegments */ 455 PAGE_SIZE, /* maxsegsize */ 456 0, /* flags */ 457 NULL, /* lockfunc */ 458 NULL, /* lockfuncarg */ 459 &txr->tso_tag))) { 460 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 461 goto fail; 462 } 463 464 if (!(txr->buffers = 465 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 466 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 467 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 468 error = ENOMEM; 469 goto fail; 470 } 471 472 /* Create the descriptor buffer default dma maps */ 473 buf = txr->buffers; 474 for (int i = 0; i < que->num_desc; i++, buf++) { 475 buf->tag = txr->tx_tag; 476 error = bus_dmamap_create(buf->tag, 0, &buf->map); 477 if (error != 0) { 478 device_printf(dev, "Unable to create TX DMA map\n"); 479 goto fail; 480 } 481 } 482fail: 483 return (error); 484} 485 486 487/********************************************************************* 488 * 489 * (Re)Initialize a queue transmit ring. 490 * - called by init, it clears the descriptor ring, 491 * and frees any stale mbufs 492 * 493 **********************************************************************/ 494void 495ixl_init_tx_ring(struct ixl_queue *que) 496{ 497#ifdef DEV_NETMAP 498 struct netmap_adapter *na = NA(que->vsi->ifp); 499 struct netmap_slot *slot; 500#endif /* DEV_NETMAP */ 501 struct tx_ring *txr = &que->txr; 502 struct ixl_tx_buf *buf; 503 504 /* Clear the old ring contents */ 505 IXL_TX_LOCK(txr); 506 507#ifdef DEV_NETMAP 508 /* 509 * (under lock): if in netmap mode, do some consistency 510 * checks and set slot to entry 0 of the netmap ring. 511 */ 512 slot = netmap_reset(na, NR_TX, que->me, 0); 513#endif /* DEV_NETMAP */ 514 515 bzero((void *)txr->base, 516 (sizeof(struct i40e_tx_desc)) * que->num_desc); 517 518 /* Reset indices */ 519 txr->next_avail = 0; 520 txr->next_to_clean = 0; 521 522 /* Reset watchdog status */ 523 txr->watchdog_timer = 0; 524 525#ifdef IXL_FDIR 526 /* Initialize flow director */ 527 txr->atr_rate = ixl_atr_rate; 528 txr->atr_count = 0; 529#endif 530 /* Free any existing tx mbufs. */ 531 buf = txr->buffers; 532 for (int i = 0; i < que->num_desc; i++, buf++) { 533 if (buf->m_head != NULL) { 534 bus_dmamap_sync(buf->tag, buf->map, 535 BUS_DMASYNC_POSTWRITE); 536 bus_dmamap_unload(buf->tag, buf->map); 537 m_freem(buf->m_head); 538 buf->m_head = NULL; 539 } 540#ifdef DEV_NETMAP 541 /* 542 * In netmap mode, set the map for the packet buffer. 543 * NOTE: Some drivers (not this one) also need to set 544 * the physical buffer address in the NIC ring. 545 * netmap_idx_n2k() maps a nic index, i, into the corresponding 546 * netmap slot index, si 547 */ 548 if (slot) { 549 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 550 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 551 } 552#endif /* DEV_NETMAP */ 553 /* Clear the EOP index */ 554 buf->eop_index = -1; 555 } 556 557 /* Set number of descriptors available */ 558 txr->avail = que->num_desc; 559 560 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 561 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 562 IXL_TX_UNLOCK(txr); 563} 564 565 566/********************************************************************* 567 * 568 * Free transmit ring related data structures. 569 * 570 **********************************************************************/ 571void 572ixl_free_que_tx(struct ixl_queue *que) 573{ 574 struct tx_ring *txr = &que->txr; 575 struct ixl_tx_buf *buf; 576 577 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 578 579 for (int i = 0; i < que->num_desc; i++) { 580 buf = &txr->buffers[i]; 581 if (buf->m_head != NULL) { 582 bus_dmamap_sync(buf->tag, buf->map, 583 BUS_DMASYNC_POSTWRITE); 584 bus_dmamap_unload(buf->tag, 585 buf->map); 586 m_freem(buf->m_head); 587 buf->m_head = NULL; 588 if (buf->map != NULL) { 589 bus_dmamap_destroy(buf->tag, 590 buf->map); 591 buf->map = NULL; 592 } 593 } else if (buf->map != NULL) { 594 bus_dmamap_unload(buf->tag, 595 buf->map); 596 bus_dmamap_destroy(buf->tag, 597 buf->map); 598 buf->map = NULL; 599 } 600 } 601 if (txr->br != NULL) 602 buf_ring_free(txr->br, M_DEVBUF); 603 if (txr->buffers != NULL) { 604 free(txr->buffers, M_DEVBUF); 605 txr->buffers = NULL; 606 } 607 if (txr->tx_tag != NULL) { 608 bus_dma_tag_destroy(txr->tx_tag); 609 txr->tx_tag = NULL; 610 } 611 if (txr->tso_tag != NULL) { 612 bus_dma_tag_destroy(txr->tso_tag); 613 txr->tso_tag = NULL; 614 } 615 616 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 617 return; 618} 619 620/********************************************************************* 621 * 622 * Setup descriptor for hw offloads 623 * 624 **********************************************************************/ 625 626static int 627ixl_tx_setup_offload(struct ixl_queue *que, 628 struct mbuf *mp, u32 *cmd, u32 *off) 629{ 630 struct ether_vlan_header *eh; 631#ifdef INET 632 struct ip *ip = NULL; 633#endif 634 struct tcphdr *th = NULL; 635#ifdef INET6 636 struct ip6_hdr *ip6; 637#endif 638 int elen, ip_hlen = 0, tcp_hlen; 639 u16 etype; 640 u8 ipproto = 0; 641 bool tso = FALSE; 642 643 /* Set up the TSO context descriptor if required */ 644 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 645 tso = ixl_tso_setup(que, mp); 646 if (tso) 647 ++que->tso; 648 else 649 return (ENXIO); 650 } 651 652 /* 653 * Determine where frame payload starts. 654 * Jump over vlan headers if already present, 655 * helpful for QinQ too. 656 */ 657 eh = mtod(mp, struct ether_vlan_header *); 658 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 659 etype = ntohs(eh->evl_proto); 660 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 661 } else { 662 etype = ntohs(eh->evl_encap_proto); 663 elen = ETHER_HDR_LEN; 664 } 665 666 switch (etype) { 667#ifdef INET 668 case ETHERTYPE_IP: 669 ip = (struct ip *)(mp->m_data + elen); 670 ip_hlen = ip->ip_hl << 2; 671 ipproto = ip->ip_p; 672 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 673 /* The IP checksum must be recalculated with TSO */ 674 if (tso) 675 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 676 else 677 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 678 break; 679#endif 680#ifdef INET6 681 case ETHERTYPE_IPV6: 682 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 683 ip_hlen = sizeof(struct ip6_hdr); 684 ipproto = ip6->ip6_nxt; 685 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 686 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 687 break; 688#endif 689 default: 690 break; 691 } 692 693 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 694 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 695 696 switch (ipproto) { 697 case IPPROTO_TCP: 698 tcp_hlen = th->th_off << 2; 699 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 700 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 701 *off |= (tcp_hlen >> 2) << 702 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 703 } 704#ifdef IXL_FDIR 705 ixl_atr(que, th, etype); 706#endif 707 break; 708 case IPPROTO_UDP: 709 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 710 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 711 *off |= (sizeof(struct udphdr) >> 2) << 712 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 713 } 714 break; 715 716 case IPPROTO_SCTP: 717 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 718 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 719 *off |= (sizeof(struct sctphdr) >> 2) << 720 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 721 } 722 /* Fall Thru */ 723 default: 724 break; 725 } 726 727 return (0); 728} 729 730 731/********************************************************************** 732 * 733 * Setup context for hardware segmentation offload (TSO) 734 * 735 **********************************************************************/ 736static bool 737ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 738{ 739 struct tx_ring *txr = &que->txr; 740 struct i40e_tx_context_desc *TXD; 741 struct ixl_tx_buf *buf; 742 u32 cmd, mss, type, tsolen; 743 u16 etype; 744 int idx, elen, ip_hlen, tcp_hlen; 745 struct ether_vlan_header *eh; 746#ifdef INET 747 struct ip *ip; 748#endif 749#ifdef INET6 750 struct ip6_hdr *ip6; 751#endif 752#if defined(INET6) || defined(INET) 753 struct tcphdr *th; 754#endif 755 u64 type_cmd_tso_mss; 756 757 /* 758 * Determine where frame payload starts. 759 * Jump over vlan headers if already present 760 */ 761 eh = mtod(mp, struct ether_vlan_header *); 762 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 763 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 764 etype = eh->evl_proto; 765 } else { 766 elen = ETHER_HDR_LEN; 767 etype = eh->evl_encap_proto; 768 } 769 770 switch (ntohs(etype)) { 771#ifdef INET6 772 case ETHERTYPE_IPV6: 773 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 774 if (ip6->ip6_nxt != IPPROTO_TCP) 775 return (ENXIO); 776 ip_hlen = sizeof(struct ip6_hdr); 777 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 778 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 779 tcp_hlen = th->th_off << 2; 780 /* 781 * The corresponding flag is set by the stack in the IPv4 782 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 783 * So, set it here because the rest of the flow requires it. 784 */ 785 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 786 break; 787#endif 788#ifdef INET 789 case ETHERTYPE_IP: 790 ip = (struct ip *)(mp->m_data + elen); 791 if (ip->ip_p != IPPROTO_TCP) 792 return (ENXIO); 793 ip->ip_sum = 0; 794 ip_hlen = ip->ip_hl << 2; 795 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 796 th->th_sum = in_pseudo(ip->ip_src.s_addr, 797 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 798 tcp_hlen = th->th_off << 2; 799 break; 800#endif 801 default: 802 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 803 __func__, ntohs(etype)); 804 return FALSE; 805 } 806 807 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 808 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 809 return FALSE; 810 811 idx = txr->next_avail; 812 buf = &txr->buffers[idx]; 813 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 814 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 815 816 type = I40E_TX_DESC_DTYPE_CONTEXT; 817 cmd = I40E_TX_CTX_DESC_TSO; 818 /* TSO MSS must not be less than 64 */ 819 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 820 que->mss_too_small++; 821 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 822 } 823 mss = mp->m_pkthdr.tso_segsz; 824 825 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 826 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 827 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 828 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 829 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 830 831 TXD->tunneling_params = htole32(0); 832 buf->m_head = NULL; 833 buf->eop_index = -1; 834 835 if (++idx == que->num_desc) 836 idx = 0; 837 838 txr->avail--; 839 txr->next_avail = idx; 840 841 return TRUE; 842} 843 844/* 845** ixl_get_tx_head - Retrieve the value from the 846** location the HW records its HEAD index 847*/ 848static inline u32 849ixl_get_tx_head(struct ixl_queue *que) 850{ 851 struct tx_ring *txr = &que->txr; 852 void *head = &txr->base[que->num_desc]; 853 return LE32_TO_CPU(*(volatile __le32 *)head); 854} 855 856/********************************************************************** 857 * 858 * Examine each tx_buffer in the used queue. If the hardware is done 859 * processing the packet then free associated resources. The 860 * tx_buffer is put back on the free queue. 861 * 862 **********************************************************************/ 863bool 864ixl_txeof(struct ixl_queue *que) 865{ 866 struct tx_ring *txr = &que->txr; 867 u32 first, last, head, done, processed; 868 struct ixl_tx_buf *buf; 869 struct i40e_tx_desc *tx_desc, *eop_desc; 870 871 872 mtx_assert(&txr->mtx, MA_OWNED); 873 874#ifdef DEV_NETMAP 875 // XXX todo: implement moderation 876 if (netmap_tx_irq(que->vsi->ifp, que->me)) 877 return FALSE; 878#endif /* DEF_NETMAP */ 879 880 /* These are not the descriptors you seek, move along :) */ 881 if (txr->avail == que->num_desc) { 882 atomic_store_rel_32(&txr->watchdog_timer, 0); 883 return FALSE; 884 } 885 886 processed = 0; 887 first = txr->next_to_clean; 888 buf = &txr->buffers[first]; 889 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 890 last = buf->eop_index; 891 if (last == -1) 892 return FALSE; 893 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 894 895 /* Get the Head WB value */ 896 head = ixl_get_tx_head(que); 897 898 /* 899 ** Get the index of the first descriptor 900 ** BEYOND the EOP and call that 'done'. 901 ** I do this so the comparison in the 902 ** inner while loop below can be simple 903 */ 904 if (++last == que->num_desc) last = 0; 905 done = last; 906 907 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 908 BUS_DMASYNC_POSTREAD); 909 /* 910 ** The HEAD index of the ring is written in a 911 ** defined location, this rather than a done bit 912 ** is what is used to keep track of what must be 913 ** 'cleaned'. 914 */ 915 while (first != head) { 916 /* We clean the range of the packet */ 917 while (first != done) { 918 ++txr->avail; 919 ++processed; 920 921 if (buf->m_head) { 922 txr->bytes += /* for ITR adjustment */ 923 buf->m_head->m_pkthdr.len; 924 txr->tx_bytes += /* for TX stats */ 925 buf->m_head->m_pkthdr.len; 926 bus_dmamap_sync(buf->tag, 927 buf->map, 928 BUS_DMASYNC_POSTWRITE); 929 bus_dmamap_unload(buf->tag, 930 buf->map); 931 m_freem(buf->m_head); 932 buf->m_head = NULL; 933 buf->map = NULL; 934 } 935 buf->eop_index = -1; 936 937 if (++first == que->num_desc) 938 first = 0; 939 940 buf = &txr->buffers[first]; 941 tx_desc = &txr->base[first]; 942 } 943 ++txr->packets; 944 /* See if there is more work now */ 945 last = buf->eop_index; 946 if (last != -1) { 947 eop_desc = &txr->base[last]; 948 /* Get next done point */ 949 if (++last == que->num_desc) last = 0; 950 done = last; 951 } else 952 break; 953 } 954 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 955 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 956 957 txr->next_to_clean = first; 958 959 960 /* 961 * If there are no pending descriptors, clear the timeout. 962 */ 963 if (txr->avail == que->num_desc) { 964 atomic_store_rel_32(&txr->watchdog_timer, 0); 965 return FALSE; 966 } 967 968 return TRUE; 969} 970 971/********************************************************************* 972 * 973 * Refresh mbuf buffers for RX descriptor rings 974 * - now keeps its own state so discards due to resource 975 * exhaustion are unnecessary, if an mbuf cannot be obtained 976 * it just returns, keeping its placeholder, thus it can simply 977 * be recalled to try again. 978 * 979 **********************************************************************/ 980static void 981ixl_refresh_mbufs(struct ixl_queue *que, int limit) 982{ 983 struct ixl_vsi *vsi = que->vsi; 984 struct rx_ring *rxr = &que->rxr; 985 bus_dma_segment_t hseg[1]; 986 bus_dma_segment_t pseg[1]; 987 struct ixl_rx_buf *buf; 988 struct mbuf *mh, *mp; 989 int i, j, nsegs, error; 990 bool refreshed = FALSE; 991 992 i = j = rxr->next_refresh; 993 /* Control the loop with one beyond */ 994 if (++j == que->num_desc) 995 j = 0; 996 997 while (j != limit) { 998 buf = &rxr->buffers[i]; 999 if (rxr->hdr_split == FALSE) 1000 goto no_split; 1001 1002 if (buf->m_head == NULL) { 1003 mh = m_gethdr(M_NOWAIT, MT_DATA); 1004 if (mh == NULL) 1005 goto update; 1006 } else 1007 mh = buf->m_head; 1008 1009 mh->m_pkthdr.len = mh->m_len = MHLEN; 1010 mh->m_len = MHLEN; 1011 mh->m_flags |= M_PKTHDR; 1012 /* Get the memory mapping */ 1013 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1014 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1015 if (error != 0) { 1016 printf("Refresh mbufs: hdr dmamap load" 1017 " failure - %d\n", error); 1018 m_free(mh); 1019 buf->m_head = NULL; 1020 goto update; 1021 } 1022 buf->m_head = mh; 1023 bus_dmamap_sync(rxr->htag, buf->hmap, 1024 BUS_DMASYNC_PREREAD); 1025 rxr->base[i].read.hdr_addr = 1026 htole64(hseg[0].ds_addr); 1027 1028no_split: 1029 if (buf->m_pack == NULL) { 1030 mp = m_getjcl(M_NOWAIT, MT_DATA, 1031 M_PKTHDR, rxr->mbuf_sz); 1032 if (mp == NULL) 1033 goto update; 1034 } else 1035 mp = buf->m_pack; 1036 1037 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1038 /* Get the memory mapping */ 1039 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1040 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1041 if (error != 0) { 1042 printf("Refresh mbufs: payload dmamap load" 1043 " failure - %d\n", error); 1044 m_free(mp); 1045 buf->m_pack = NULL; 1046 goto update; 1047 } 1048 buf->m_pack = mp; 1049 bus_dmamap_sync(rxr->ptag, buf->pmap, 1050 BUS_DMASYNC_PREREAD); 1051 rxr->base[i].read.pkt_addr = 1052 htole64(pseg[0].ds_addr); 1053 /* Used only when doing header split */ 1054 rxr->base[i].read.hdr_addr = 0; 1055 1056 refreshed = TRUE; 1057 /* Next is precalculated */ 1058 i = j; 1059 rxr->next_refresh = i; 1060 if (++j == que->num_desc) 1061 j = 0; 1062 } 1063update: 1064 if (refreshed) /* Update hardware tail index */ 1065 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1066 return; 1067} 1068 1069 1070/********************************************************************* 1071 * 1072 * Allocate memory for rx_buffer structures. Since we use one 1073 * rx_buffer per descriptor, the maximum number of rx_buffer's 1074 * that we'll need is equal to the number of receive descriptors 1075 * that we've defined. 1076 * 1077 **********************************************************************/ 1078int 1079ixl_allocate_rx_data(struct ixl_queue *que) 1080{ 1081 struct rx_ring *rxr = &que->rxr; 1082 struct ixl_vsi *vsi = que->vsi; 1083 device_t dev = vsi->dev; 1084 struct ixl_rx_buf *buf; 1085 int i, bsize, error; 1086 1087 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1088 if (!(rxr->buffers = 1089 (struct ixl_rx_buf *) malloc(bsize, 1090 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1091 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1092 error = ENOMEM; 1093 return (error); 1094 } 1095 1096 if ((error = bus_dma_tag_create(NULL, /* parent */ 1097 1, 0, /* alignment, bounds */ 1098 BUS_SPACE_MAXADDR, /* lowaddr */ 1099 BUS_SPACE_MAXADDR, /* highaddr */ 1100 NULL, NULL, /* filter, filterarg */ 1101 MSIZE, /* maxsize */ 1102 1, /* nsegments */ 1103 MSIZE, /* maxsegsize */ 1104 0, /* flags */ 1105 NULL, /* lockfunc */ 1106 NULL, /* lockfuncarg */ 1107 &rxr->htag))) { 1108 device_printf(dev, "Unable to create RX DMA htag\n"); 1109 return (error); 1110 } 1111 1112 if ((error = bus_dma_tag_create(NULL, /* parent */ 1113 1, 0, /* alignment, bounds */ 1114 BUS_SPACE_MAXADDR, /* lowaddr */ 1115 BUS_SPACE_MAXADDR, /* highaddr */ 1116 NULL, NULL, /* filter, filterarg */ 1117 MJUM16BYTES, /* maxsize */ 1118 1, /* nsegments */ 1119 MJUM16BYTES, /* maxsegsize */ 1120 0, /* flags */ 1121 NULL, /* lockfunc */ 1122 NULL, /* lockfuncarg */ 1123 &rxr->ptag))) { 1124 device_printf(dev, "Unable to create RX DMA ptag\n"); 1125 return (error); 1126 } 1127 1128 for (i = 0; i < que->num_desc; i++) { 1129 buf = &rxr->buffers[i]; 1130 error = bus_dmamap_create(rxr->htag, 1131 BUS_DMA_NOWAIT, &buf->hmap); 1132 if (error) { 1133 device_printf(dev, "Unable to create RX head map\n"); 1134 break; 1135 } 1136 error = bus_dmamap_create(rxr->ptag, 1137 BUS_DMA_NOWAIT, &buf->pmap); 1138 if (error) { 1139 device_printf(dev, "Unable to create RX pkt map\n"); 1140 break; 1141 } 1142 } 1143 1144 return (error); 1145} 1146 1147 1148/********************************************************************* 1149 * 1150 * (Re)Initialize the queue receive ring and its buffers. 1151 * 1152 **********************************************************************/ 1153int 1154ixl_init_rx_ring(struct ixl_queue *que) 1155{ 1156 struct rx_ring *rxr = &que->rxr; 1157 struct ixl_vsi *vsi = que->vsi; 1158#if defined(INET6) || defined(INET) 1159 struct ifnet *ifp = vsi->ifp; 1160 struct lro_ctrl *lro = &rxr->lro; 1161#endif 1162 struct ixl_rx_buf *buf; 1163 bus_dma_segment_t pseg[1], hseg[1]; 1164 int rsize, nsegs, error = 0; 1165#ifdef DEV_NETMAP 1166 struct netmap_adapter *na = NA(que->vsi->ifp); 1167 struct netmap_slot *slot; 1168#endif /* DEV_NETMAP */ 1169 1170 IXL_RX_LOCK(rxr); 1171#ifdef DEV_NETMAP 1172 /* same as in ixl_init_tx_ring() */ 1173 slot = netmap_reset(na, NR_RX, que->me, 0); 1174#endif /* DEV_NETMAP */ 1175 /* Clear the ring contents */ 1176 rsize = roundup2(que->num_desc * 1177 sizeof(union i40e_rx_desc), DBA_ALIGN); 1178 bzero((void *)rxr->base, rsize); 1179 /* Cleanup any existing buffers */ 1180 for (int i = 0; i < que->num_desc; i++) { 1181 buf = &rxr->buffers[i]; 1182 if (buf->m_head != NULL) { 1183 bus_dmamap_sync(rxr->htag, buf->hmap, 1184 BUS_DMASYNC_POSTREAD); 1185 bus_dmamap_unload(rxr->htag, buf->hmap); 1186 buf->m_head->m_flags |= M_PKTHDR; 1187 m_freem(buf->m_head); 1188 } 1189 if (buf->m_pack != NULL) { 1190 bus_dmamap_sync(rxr->ptag, buf->pmap, 1191 BUS_DMASYNC_POSTREAD); 1192 bus_dmamap_unload(rxr->ptag, buf->pmap); 1193 buf->m_pack->m_flags |= M_PKTHDR; 1194 m_freem(buf->m_pack); 1195 } 1196 buf->m_head = NULL; 1197 buf->m_pack = NULL; 1198 } 1199 1200 /* header split is off */ 1201 rxr->hdr_split = FALSE; 1202 1203 /* Now replenish the mbufs */ 1204 for (int j = 0; j != que->num_desc; ++j) { 1205 struct mbuf *mh, *mp; 1206 1207 buf = &rxr->buffers[j]; 1208#ifdef DEV_NETMAP 1209 /* 1210 * In netmap mode, fill the map and set the buffer 1211 * address in the NIC ring, considering the offset 1212 * between the netmap and NIC rings (see comment in 1213 * ixgbe_setup_transmit_ring() ). No need to allocate 1214 * an mbuf, so end the block with a continue; 1215 */ 1216 if (slot) { 1217 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1218 uint64_t paddr; 1219 void *addr; 1220 1221 addr = PNMB(na, slot + sj, &paddr); 1222 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1223 /* Update descriptor and the cached value */ 1224 rxr->base[j].read.pkt_addr = htole64(paddr); 1225 rxr->base[j].read.hdr_addr = 0; 1226 continue; 1227 } 1228#endif /* DEV_NETMAP */ 1229 /* 1230 ** Don't allocate mbufs if not 1231 ** doing header split, its wasteful 1232 */ 1233 if (rxr->hdr_split == FALSE) 1234 goto skip_head; 1235 1236 /* First the header */ 1237 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1238 if (buf->m_head == NULL) { 1239 error = ENOBUFS; 1240 goto fail; 1241 } 1242 m_adj(buf->m_head, ETHER_ALIGN); 1243 mh = buf->m_head; 1244 mh->m_len = mh->m_pkthdr.len = MHLEN; 1245 mh->m_flags |= M_PKTHDR; 1246 /* Get the memory mapping */ 1247 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1248 buf->hmap, buf->m_head, hseg, 1249 &nsegs, BUS_DMA_NOWAIT); 1250 if (error != 0) /* Nothing elegant to do here */ 1251 goto fail; 1252 bus_dmamap_sync(rxr->htag, 1253 buf->hmap, BUS_DMASYNC_PREREAD); 1254 /* Update descriptor */ 1255 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1256 1257skip_head: 1258 /* Now the payload cluster */ 1259 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1260 M_PKTHDR, rxr->mbuf_sz); 1261 if (buf->m_pack == NULL) { 1262 error = ENOBUFS; 1263 goto fail; 1264 } 1265 mp = buf->m_pack; 1266 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1267 /* Get the memory mapping */ 1268 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1269 buf->pmap, mp, pseg, 1270 &nsegs, BUS_DMA_NOWAIT); 1271 if (error != 0) 1272 goto fail; 1273 bus_dmamap_sync(rxr->ptag, 1274 buf->pmap, BUS_DMASYNC_PREREAD); 1275 /* Update descriptor */ 1276 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1277 rxr->base[j].read.hdr_addr = 0; 1278 } 1279 1280 1281 /* Setup our descriptor indices */ 1282 rxr->next_check = 0; 1283 rxr->next_refresh = 0; 1284 rxr->lro_enabled = FALSE; 1285 rxr->split = 0; 1286 rxr->bytes = 0; 1287 rxr->discard = FALSE; 1288 1289 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1290 ixl_flush(vsi->hw); 1291 1292#if defined(INET6) || defined(INET) 1293 /* 1294 ** Now set up the LRO interface: 1295 */ 1296 if (ifp->if_capenable & IFCAP_LRO) { 1297 int err = tcp_lro_init(lro); 1298 if (err) { 1299 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1300 goto fail; 1301 } 1302 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1303 rxr->lro_enabled = TRUE; 1304 lro->ifp = vsi->ifp; 1305 } 1306#endif 1307 1308 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1309 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1310 1311fail: 1312 IXL_RX_UNLOCK(rxr); 1313 return (error); 1314} 1315 1316 1317/********************************************************************* 1318 * 1319 * Free station receive ring data structures 1320 * 1321 **********************************************************************/ 1322void 1323ixl_free_que_rx(struct ixl_queue *que) 1324{ 1325 struct rx_ring *rxr = &que->rxr; 1326 struct ixl_rx_buf *buf; 1327 1328 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1329 1330 /* Cleanup any existing buffers */ 1331 if (rxr->buffers != NULL) { 1332 for (int i = 0; i < que->num_desc; i++) { 1333 buf = &rxr->buffers[i]; 1334 if (buf->m_head != NULL) { 1335 bus_dmamap_sync(rxr->htag, buf->hmap, 1336 BUS_DMASYNC_POSTREAD); 1337 bus_dmamap_unload(rxr->htag, buf->hmap); 1338 buf->m_head->m_flags |= M_PKTHDR; 1339 m_freem(buf->m_head); 1340 } 1341 if (buf->m_pack != NULL) { 1342 bus_dmamap_sync(rxr->ptag, buf->pmap, 1343 BUS_DMASYNC_POSTREAD); 1344 bus_dmamap_unload(rxr->ptag, buf->pmap); 1345 buf->m_pack->m_flags |= M_PKTHDR; 1346 m_freem(buf->m_pack); 1347 } 1348 buf->m_head = NULL; 1349 buf->m_pack = NULL; 1350 if (buf->hmap != NULL) { 1351 bus_dmamap_destroy(rxr->htag, buf->hmap); 1352 buf->hmap = NULL; 1353 } 1354 if (buf->pmap != NULL) { 1355 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1356 buf->pmap = NULL; 1357 } 1358 } 1359 if (rxr->buffers != NULL) { 1360 free(rxr->buffers, M_DEVBUF); 1361 rxr->buffers = NULL; 1362 } 1363 } 1364 1365 if (rxr->htag != NULL) { 1366 bus_dma_tag_destroy(rxr->htag); 1367 rxr->htag = NULL; 1368 } 1369 if (rxr->ptag != NULL) { 1370 bus_dma_tag_destroy(rxr->ptag); 1371 rxr->ptag = NULL; 1372 } 1373 1374 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1375 return; 1376} 1377 1378static inline void 1379ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1380{ 1381 1382#if defined(INET6) || defined(INET) 1383 /* 1384 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1385 * should be computed by hardware. Also it should not have VLAN tag in 1386 * ethernet header. 1387 */ 1388 if (rxr->lro_enabled && 1389 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1390 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1391 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1392 /* 1393 * Send to the stack if: 1394 ** - LRO not enabled, or 1395 ** - no LRO resources, or 1396 ** - lro enqueue fails 1397 */ 1398 if (rxr->lro.lro_cnt != 0) 1399 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1400 return; 1401 } 1402#endif 1403 IXL_RX_UNLOCK(rxr); 1404 (*ifp->if_input)(ifp, m); 1405 IXL_RX_LOCK(rxr); 1406} 1407 1408 1409static inline void 1410ixl_rx_discard(struct rx_ring *rxr, int i) 1411{ 1412 struct ixl_rx_buf *rbuf; 1413 1414 rbuf = &rxr->buffers[i]; 1415 1416 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1417 rbuf->fmp->m_flags |= M_PKTHDR; 1418 m_freem(rbuf->fmp); 1419 rbuf->fmp = NULL; 1420 } 1421 1422 /* 1423 ** With advanced descriptors the writeback 1424 ** clobbers the buffer addrs, so its easier 1425 ** to just free the existing mbufs and take 1426 ** the normal refresh path to get new buffers 1427 ** and mapping. 1428 */ 1429 if (rbuf->m_head) { 1430 m_free(rbuf->m_head); 1431 rbuf->m_head = NULL; 1432 } 1433 1434 if (rbuf->m_pack) { 1435 m_free(rbuf->m_pack); 1436 rbuf->m_pack = NULL; 1437 } 1438 1439 return; 1440} 1441 1442#ifdef RSS 1443/* 1444** i40e_ptype_to_hash: parse the packet type 1445** to determine the appropriate hash. 1446*/ 1447static inline int 1448ixl_ptype_to_hash(u8 ptype) 1449{ 1450 struct i40e_rx_ptype_decoded decoded; 1451 u8 ex = 0; 1452 1453 decoded = decode_rx_desc_ptype(ptype); 1454 ex = decoded.outer_frag; 1455 1456 if (!decoded.known) 1457 return M_HASHTYPE_OPAQUE_HASH; 1458 1459 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1460 return M_HASHTYPE_OPAQUE_HASH; 1461 1462 /* Note: anything that gets to this point is IP */ 1463 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1464 switch (decoded.inner_prot) { 1465 case I40E_RX_PTYPE_INNER_PROT_TCP: 1466 if (ex) 1467 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1468 else 1469 return M_HASHTYPE_RSS_TCP_IPV6; 1470 case I40E_RX_PTYPE_INNER_PROT_UDP: 1471 if (ex) 1472 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1473 else 1474 return M_HASHTYPE_RSS_UDP_IPV6; 1475 default: 1476 if (ex) 1477 return M_HASHTYPE_RSS_IPV6_EX; 1478 else 1479 return M_HASHTYPE_RSS_IPV6; 1480 } 1481 } 1482 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1483 switch (decoded.inner_prot) { 1484 case I40E_RX_PTYPE_INNER_PROT_TCP: 1485 return M_HASHTYPE_RSS_TCP_IPV4; 1486 case I40E_RX_PTYPE_INNER_PROT_UDP: 1487 if (ex) 1488 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1489 else 1490 return M_HASHTYPE_RSS_UDP_IPV4; 1491 default: 1492 return M_HASHTYPE_RSS_IPV4; 1493 } 1494 } 1495 /* We should never get here!! */ 1496 return M_HASHTYPE_OPAQUE_HASH; 1497} 1498#endif /* RSS */ 1499 1500/********************************************************************* 1501 * 1502 * This routine executes in interrupt context. It replenishes 1503 * the mbufs in the descriptor and sends data which has been 1504 * dma'ed into host memory to upper layer. 1505 * 1506 * We loop at most count times if count is > 0, or until done if 1507 * count < 0. 1508 * 1509 * Return TRUE for more work, FALSE for all clean. 1510 *********************************************************************/ 1511bool 1512ixl_rxeof(struct ixl_queue *que, int count) 1513{ 1514 struct ixl_vsi *vsi = que->vsi; 1515 struct rx_ring *rxr = &que->rxr; 1516 struct ifnet *ifp = vsi->ifp; 1517#if defined(INET6) || defined(INET) 1518 struct lro_ctrl *lro = &rxr->lro; 1519#endif 1520 int i, nextp, processed = 0; 1521 union i40e_rx_desc *cur; 1522 struct ixl_rx_buf *rbuf, *nbuf; 1523 1524 1525 IXL_RX_LOCK(rxr); 1526 1527#ifdef DEV_NETMAP 1528 if (netmap_rx_irq(ifp, que->me, &count)) { 1529 IXL_RX_UNLOCK(rxr); 1530 return (FALSE); 1531 } 1532#endif /* DEV_NETMAP */ 1533 1534 for (i = rxr->next_check; count != 0;) { 1535 struct mbuf *sendmp, *mh, *mp; 1536 u32 status, error; 1537 u16 hlen, plen, vtag; 1538 u64 qword; 1539 u8 ptype; 1540 bool eop; 1541 1542 /* Sync the ring. */ 1543 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1544 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1545 1546 cur = &rxr->base[i]; 1547 qword = le64toh(cur->wb.qword1.status_error_len); 1548 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1549 >> I40E_RXD_QW1_STATUS_SHIFT; 1550 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1551 >> I40E_RXD_QW1_ERROR_SHIFT; 1552 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1553 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1554 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1555 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1556 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1557 >> I40E_RXD_QW1_PTYPE_SHIFT; 1558 1559 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1560 ++rxr->not_done; 1561 break; 1562 } 1563 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1564 break; 1565 1566 count--; 1567 sendmp = NULL; 1568 nbuf = NULL; 1569 cur->wb.qword1.status_error_len = 0; 1570 rbuf = &rxr->buffers[i]; 1571 mh = rbuf->m_head; 1572 mp = rbuf->m_pack; 1573 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1574 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1575 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1576 else 1577 vtag = 0; 1578 1579 /* 1580 ** Make sure bad packets are discarded, 1581 ** note that only EOP descriptor has valid 1582 ** error results. 1583 */ 1584 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1585 rxr->desc_errs++; 1586 ixl_rx_discard(rxr, i); 1587 goto next_desc; 1588 } 1589 1590 /* Prefetch the next buffer */ 1591 if (!eop) { 1592 nextp = i + 1; 1593 if (nextp == que->num_desc) 1594 nextp = 0; 1595 nbuf = &rxr->buffers[nextp]; 1596 prefetch(nbuf); 1597 } 1598 1599 /* 1600 ** The header mbuf is ONLY used when header 1601 ** split is enabled, otherwise we get normal 1602 ** behavior, ie, both header and payload 1603 ** are DMA'd into the payload buffer. 1604 ** 1605 ** Rather than using the fmp/lmp global pointers 1606 ** we now keep the head of a packet chain in the 1607 ** buffer struct and pass this along from one 1608 ** descriptor to the next, until we get EOP. 1609 */ 1610 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1611 if (hlen > IXL_RX_HDR) 1612 hlen = IXL_RX_HDR; 1613 mh->m_len = hlen; 1614 mh->m_flags |= M_PKTHDR; 1615 mh->m_next = NULL; 1616 mh->m_pkthdr.len = mh->m_len; 1617 /* Null buf pointer so it is refreshed */ 1618 rbuf->m_head = NULL; 1619 /* 1620 ** Check the payload length, this 1621 ** could be zero if its a small 1622 ** packet. 1623 */ 1624 if (plen > 0) { 1625 mp->m_len = plen; 1626 mp->m_next = NULL; 1627 mp->m_flags &= ~M_PKTHDR; 1628 mh->m_next = mp; 1629 mh->m_pkthdr.len += mp->m_len; 1630 /* Null buf pointer so it is refreshed */ 1631 rbuf->m_pack = NULL; 1632 rxr->split++; 1633 } 1634 /* 1635 ** Now create the forward 1636 ** chain so when complete 1637 ** we wont have to. 1638 */ 1639 if (eop == 0) { 1640 /* stash the chain head */ 1641 nbuf->fmp = mh; 1642 /* Make forward chain */ 1643 if (plen) 1644 mp->m_next = nbuf->m_pack; 1645 else 1646 mh->m_next = nbuf->m_pack; 1647 } else { 1648 /* Singlet, prepare to send */ 1649 sendmp = mh; 1650 if (vtag) { 1651 sendmp->m_pkthdr.ether_vtag = vtag; 1652 sendmp->m_flags |= M_VLANTAG; 1653 } 1654 } 1655 } else { 1656 /* 1657 ** Either no header split, or a 1658 ** secondary piece of a fragmented 1659 ** split packet. 1660 */ 1661 mp->m_len = plen; 1662 /* 1663 ** See if there is a stored head 1664 ** that determines what we are 1665 */ 1666 sendmp = rbuf->fmp; 1667 rbuf->m_pack = rbuf->fmp = NULL; 1668 1669 if (sendmp != NULL) /* secondary frag */ 1670 sendmp->m_pkthdr.len += mp->m_len; 1671 else { 1672 /* first desc of a non-ps chain */ 1673 sendmp = mp; 1674 sendmp->m_flags |= M_PKTHDR; 1675 sendmp->m_pkthdr.len = mp->m_len; 1676 } 1677 /* Pass the head pointer on */ 1678 if (eop == 0) { 1679 nbuf->fmp = sendmp; 1680 sendmp = NULL; 1681 mp->m_next = nbuf->m_pack; 1682 } 1683 } 1684 ++processed; 1685 /* Sending this frame? */ 1686 if (eop) { 1687 sendmp->m_pkthdr.rcvif = ifp; 1688 /* gather stats */ 1689 rxr->rx_packets++; 1690 rxr->rx_bytes += sendmp->m_pkthdr.len; 1691 /* capture data for dynamic ITR adjustment */ 1692 rxr->packets++; 1693 rxr->bytes += sendmp->m_pkthdr.len; 1694 /* Set VLAN tag (field only valid in eop desc) */ 1695 if (vtag) { 1696 sendmp->m_pkthdr.ether_vtag = vtag; 1697 sendmp->m_flags |= M_VLANTAG; 1698 } 1699 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1700 ixl_rx_checksum(sendmp, status, error, ptype); 1701#ifdef RSS 1702 sendmp->m_pkthdr.flowid = 1703 le32toh(cur->wb.qword0.hi_dword.rss); 1704 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1705#else 1706 sendmp->m_pkthdr.flowid = que->msix; 1707 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1708#endif 1709 } 1710next_desc: 1711 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1712 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1713 1714 /* Advance our pointers to the next descriptor. */ 1715 if (++i == que->num_desc) 1716 i = 0; 1717 1718 /* Now send to the stack or do LRO */ 1719 if (sendmp != NULL) { 1720 rxr->next_check = i; 1721 ixl_rx_input(rxr, ifp, sendmp, ptype); 1722 i = rxr->next_check; 1723 } 1724 1725 /* Every 8 descriptors we go to refresh mbufs */ 1726 if (processed == 8) { 1727 ixl_refresh_mbufs(que, i); 1728 processed = 0; 1729 } 1730 } 1731 1732 /* Refresh any remaining buf structs */ 1733 if (ixl_rx_unrefreshed(que)) 1734 ixl_refresh_mbufs(que, i); 1735 1736 rxr->next_check = i; 1737 1738#if defined(INET6) || defined(INET) 1739 /* 1740 * Flush any outstanding LRO work 1741 */ 1742#if __FreeBSD_version >= 1100105 1743 tcp_lro_flush_all(lro); 1744#else 1745 struct lro_entry *queued; 1746 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1747 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1748 tcp_lro_flush(lro, queued); 1749 } 1750#endif 1751#endif /* defined(INET6) || defined(INET) */ 1752 1753 IXL_RX_UNLOCK(rxr); 1754 return (FALSE); 1755} 1756 1757 1758/********************************************************************* 1759 * 1760 * Verify that the hardware indicated that the checksum is valid. 1761 * Inform the stack about the status of checksum so that stack 1762 * doesn't spend time verifying the checksum. 1763 * 1764 *********************************************************************/ 1765static void 1766ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1767{ 1768 struct i40e_rx_ptype_decoded decoded; 1769 1770 decoded = decode_rx_desc_ptype(ptype); 1771 1772 /* Errors? */ 1773 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1774 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1775 mp->m_pkthdr.csum_flags = 0; 1776 return; 1777 } 1778 1779 /* IPv6 with extension headers likely have bad csum */ 1780 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1781 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1782 if (status & 1783 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1784 mp->m_pkthdr.csum_flags = 0; 1785 return; 1786 } 1787 1788 1789 /* IP Checksum Good */ 1790 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1791 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1792 1793 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1794 mp->m_pkthdr.csum_flags |= 1795 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1796 mp->m_pkthdr.csum_data |= htons(0xffff); 1797 } 1798 return; 1799} 1800 1801#if __FreeBSD_version >= 1100000 1802uint64_t 1803ixl_get_counter(if_t ifp, ift_counter cnt) 1804{ 1805 struct ixl_vsi *vsi; 1806 1807 vsi = if_getsoftc(ifp); 1808 1809 switch (cnt) { 1810 case IFCOUNTER_IPACKETS: 1811 return (vsi->ipackets); 1812 case IFCOUNTER_IERRORS: 1813 return (vsi->ierrors); 1814 case IFCOUNTER_OPACKETS: 1815 return (vsi->opackets); 1816 case IFCOUNTER_OERRORS: 1817 return (vsi->oerrors); 1818 case IFCOUNTER_COLLISIONS: 1819 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1820 return (0); 1821 case IFCOUNTER_IBYTES: 1822 return (vsi->ibytes); 1823 case IFCOUNTER_OBYTES: 1824 return (vsi->obytes); 1825 case IFCOUNTER_IMCASTS: 1826 return (vsi->imcasts); 1827 case IFCOUNTER_OMCASTS: 1828 return (vsi->omcasts); 1829 case IFCOUNTER_IQDROPS: 1830 return (vsi->iqdrops); 1831 case IFCOUNTER_OQDROPS: 1832 return (vsi->oqdrops); 1833 case IFCOUNTER_NOPROTO: 1834 return (vsi->noproto); 1835 default: 1836 return (if_get_counter_default(ifp, cnt)); 1837 } 1838} 1839#endif 1840 1841