ixl_txrx.c revision 318357
1/****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32******************************************************************************/ 33/*$FreeBSD: stable/11/sys/dev/ixl/ixl_txrx.c 318357 2017-05-16 17:49:15Z erj $*/ 34 35/* 36** IXL driver TX/RX Routines: 37** This was seperated to allow usage by 38** both the PF and VF drivers. 39*/ 40 41#ifndef IXL_STANDALONE_BUILD 42#include "opt_inet.h" 43#include "opt_inet6.h" 44#include "opt_rss.h" 45#endif 46 47#include "ixl.h" 48 49#ifdef RSS 50#include <net/rss_config.h> 51#endif 52 53/* Local Prototypes */ 54static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55static void ixl_refresh_mbufs(struct ixl_queue *, int); 56static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61static inline void ixl_rx_discard(struct rx_ring *, int); 62static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66static int ixl_tx_setup_offload(struct ixl_queue *que, 67 struct mbuf *mp, u32 *cmd, u32 *off); 68static inline u32 ixl_get_tx_head(struct ixl_queue *que); 69 70#ifdef DEV_NETMAP 71#include <dev/netmap/if_ixl_netmap.h> 72#endif /* DEV_NETMAP */ 73 74/* 75 * @key key is saved into this parameter 76 */ 77void 78ixl_get_default_rss_key(u32 *key) 79{ 80 MPASS(key != NULL); 81 82 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 83 0x183cfd8c, 0xce880440, 0x580cbc3c, 84 0x35897377, 0x328b25e1, 0x4fa98922, 85 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 86 0x0, 0x0, 0x0}; 87 88 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 89} 90 91/* 92** Multiqueue Transmit driver 93*/ 94int 95ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 96{ 97 struct ixl_vsi *vsi = ifp->if_softc; 98 struct ixl_queue *que; 99 struct tx_ring *txr; 100 int err, i; 101#ifdef RSS 102 u32 bucket_id; 103#endif 104 105 /* 106 ** Which queue to use: 107 ** 108 ** When doing RSS, map it to the same outbound 109 ** queue as the incoming flow would be mapped to. 110 ** If everything is setup correctly, it should be 111 ** the same bucket that the current CPU we're on is. 112 */ 113 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 114#ifdef RSS 115 if (rss_hash2bucket(m->m_pkthdr.flowid, 116 M_HASHTYPE_GET(m), &bucket_id) == 0) { 117 i = bucket_id % vsi->num_queues; 118 } else 119#endif 120 i = m->m_pkthdr.flowid % vsi->num_queues; 121 } else 122 i = curcpu % vsi->num_queues; 123 124 que = &vsi->queues[i]; 125 txr = &que->txr; 126 127 err = drbr_enqueue(ifp, txr->br, m); 128 if (err) 129 return (err); 130 if (IXL_TX_TRYLOCK(txr)) { 131 ixl_mq_start_locked(ifp, txr); 132 IXL_TX_UNLOCK(txr); 133 } else 134 taskqueue_enqueue(que->tq, &que->tx_task); 135 136 return (0); 137} 138 139int 140ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 141{ 142 struct ixl_queue *que = txr->que; 143 struct ixl_vsi *vsi = que->vsi; 144 struct mbuf *next; 145 int err = 0; 146 147 148 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 149 vsi->link_active == 0) 150 return (ENETDOWN); 151 152 /* Process the transmit queue */ 153 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 154 if ((err = ixl_xmit(que, &next)) != 0) { 155 if (next == NULL) 156 drbr_advance(ifp, txr->br); 157 else 158 drbr_putback(ifp, txr->br, next); 159 break; 160 } 161 drbr_advance(ifp, txr->br); 162 /* Send a copy of the frame to the BPF listener */ 163 ETHER_BPF_MTAP(ifp, next); 164 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 165 break; 166 } 167 168 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 169 ixl_txeof(que); 170 171 return (err); 172} 173 174/* 175 * Called from a taskqueue to drain queued transmit packets. 176 */ 177void 178ixl_deferred_mq_start(void *arg, int pending) 179{ 180 struct ixl_queue *que = arg; 181 struct tx_ring *txr = &que->txr; 182 struct ixl_vsi *vsi = que->vsi; 183 struct ifnet *ifp = vsi->ifp; 184 185 IXL_TX_LOCK(txr); 186 if (!drbr_empty(ifp, txr->br)) 187 ixl_mq_start_locked(ifp, txr); 188 IXL_TX_UNLOCK(txr); 189} 190 191/* 192** Flush all queue ring buffers 193*/ 194void 195ixl_qflush(struct ifnet *ifp) 196{ 197 struct ixl_vsi *vsi = ifp->if_softc; 198 199 for (int i = 0; i < vsi->num_queues; i++) { 200 struct ixl_queue *que = &vsi->queues[i]; 201 struct tx_ring *txr = &que->txr; 202 struct mbuf *m; 203 IXL_TX_LOCK(txr); 204 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 205 m_freem(m); 206 IXL_TX_UNLOCK(txr); 207 } 208 if_qflush(ifp); 209} 210 211/* 212** Find mbuf chains passed to the driver 213** that are 'sparse', using more than 8 214** mbufs to deliver an mss-size chunk of data 215*/ 216static inline bool 217ixl_tso_detect_sparse(struct mbuf *mp) 218{ 219 struct mbuf *m; 220 int num, mss; 221 222 num = 0; 223 mss = mp->m_pkthdr.tso_segsz; 224 225 /* Exclude first mbuf; assume it contains all headers */ 226 for (m = mp->m_next; m != NULL; m = m->m_next) { 227 if (m == NULL) 228 break; 229 num++; 230 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 231 232 if (mss < 1) { 233 if (num > IXL_SPARSE_CHAIN) 234 return (true); 235 num = (mss == 0) ? 0 : 1; 236 mss += mp->m_pkthdr.tso_segsz; 237 } 238 } 239 240 return (false); 241} 242 243 244/********************************************************************* 245 * 246 * This routine maps the mbufs to tx descriptors, allowing the 247 * TX engine to transmit the packets. 248 * - return 0 on success, positive on failure 249 * 250 **********************************************************************/ 251#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 252 253static int 254ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 255{ 256 struct ixl_vsi *vsi = que->vsi; 257 struct i40e_hw *hw = vsi->hw; 258 struct tx_ring *txr = &que->txr; 259 struct ixl_tx_buf *buf; 260 struct i40e_tx_desc *txd = NULL; 261 struct mbuf *m_head, *m; 262 int i, j, error, nsegs; 263 int first, last = 0; 264 u16 vtag = 0; 265 u32 cmd, off; 266 bus_dmamap_t map; 267 bus_dma_tag_t tag; 268 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 269 270 cmd = off = 0; 271 m_head = *m_headp; 272 273 /* 274 * Important to capture the first descriptor 275 * used because it will contain the index of 276 * the one we tell the hardware to report back 277 */ 278 first = txr->next_avail; 279 buf = &txr->buffers[first]; 280 map = buf->map; 281 tag = txr->tx_tag; 282 283 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 284 /* Use larger mapping for TSO */ 285 tag = txr->tso_tag; 286 if (ixl_tso_detect_sparse(m_head)) { 287 m = m_defrag(m_head, M_NOWAIT); 288 if (m == NULL) { 289 m_freem(*m_headp); 290 *m_headp = NULL; 291 return (ENOBUFS); 292 } 293 *m_headp = m; 294 } 295 } 296 297 /* 298 * Map the packet for DMA. 299 */ 300 error = bus_dmamap_load_mbuf_sg(tag, map, 301 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 302 303 if (error == EFBIG) { 304 struct mbuf *m; 305 306 m = m_defrag(*m_headp, M_NOWAIT); 307 if (m == NULL) { 308 que->mbuf_defrag_failed++; 309 m_freem(*m_headp); 310 *m_headp = NULL; 311 return (ENOBUFS); 312 } 313 *m_headp = m; 314 315 /* Try it again */ 316 error = bus_dmamap_load_mbuf_sg(tag, map, 317 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 318 319 if (error != 0) { 320 que->tx_dmamap_failed++; 321 m_freem(*m_headp); 322 *m_headp = NULL; 323 return (error); 324 } 325 } else if (error != 0) { 326 que->tx_dmamap_failed++; 327 m_freem(*m_headp); 328 *m_headp = NULL; 329 return (error); 330 } 331 332 /* Make certain there are enough descriptors */ 333 if (nsegs > txr->avail - 2) { 334 txr->no_desc++; 335 error = ENOBUFS; 336 goto xmit_fail; 337 } 338 m_head = *m_headp; 339 340 /* Set up the TSO/CSUM offload */ 341 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 342 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 343 if (error) 344 goto xmit_fail; 345 } 346 347 cmd |= I40E_TX_DESC_CMD_ICRC; 348 /* Grab the VLAN tag */ 349 if (m_head->m_flags & M_VLANTAG) { 350 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 351 vtag = htole16(m_head->m_pkthdr.ether_vtag); 352 } 353 354 i = txr->next_avail; 355 for (j = 0; j < nsegs; j++) { 356 bus_size_t seglen; 357 358 buf = &txr->buffers[i]; 359 buf->tag = tag; /* Keep track of the type tag */ 360 txd = &txr->base[i]; 361 seglen = segs[j].ds_len; 362 363 txd->buffer_addr = htole64(segs[j].ds_addr); 364 txd->cmd_type_offset_bsz = 365 htole64(I40E_TX_DESC_DTYPE_DATA 366 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 367 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 368 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 369 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 370 371 last = i; /* descriptor that will get completion IRQ */ 372 373 if (++i == que->num_desc) 374 i = 0; 375 376 buf->m_head = NULL; 377 buf->eop_index = -1; 378 } 379 /* Set the last descriptor for report */ 380 txd->cmd_type_offset_bsz |= 381 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 382 txr->avail -= nsegs; 383 txr->next_avail = i; 384 385 buf->m_head = m_head; 386 /* Swap the dma map between the first and last descriptor */ 387 txr->buffers[first].map = buf->map; 388 buf->map = map; 389 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 390 391 /* Set the index of the descriptor that will be marked done */ 392 buf = &txr->buffers[first]; 393 buf->eop_index = last; 394 395 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 396 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 397 /* 398 * Advance the Transmit Descriptor Tail (Tdt), this tells the 399 * hardware that this frame is available to transmit. 400 */ 401 ++txr->total_packets; 402 wr32(hw, txr->tail, i); 403 404 /* Mark outstanding work */ 405 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 406 return (0); 407 408xmit_fail: 409 bus_dmamap_unload(tag, buf->map); 410 return (error); 411} 412 413 414/********************************************************************* 415 * 416 * Allocate memory for tx_buffer structures. The tx_buffer stores all 417 * the information needed to transmit a packet on the wire. This is 418 * called only once at attach, setup is done every reset. 419 * 420 **********************************************************************/ 421int 422ixl_allocate_tx_data(struct ixl_queue *que) 423{ 424 struct tx_ring *txr = &que->txr; 425 struct ixl_vsi *vsi = que->vsi; 426 device_t dev = vsi->dev; 427 struct ixl_tx_buf *buf; 428 int error = 0; 429 430 /* 431 * Setup DMA descriptor areas. 432 */ 433 if ((error = bus_dma_tag_create(NULL, /* parent */ 434 1, 0, /* alignment, bounds */ 435 BUS_SPACE_MAXADDR, /* lowaddr */ 436 BUS_SPACE_MAXADDR, /* highaddr */ 437 NULL, NULL, /* filter, filterarg */ 438 IXL_TSO_SIZE, /* maxsize */ 439 IXL_MAX_TX_SEGS, /* nsegments */ 440 PAGE_SIZE, /* maxsegsize */ 441 0, /* flags */ 442 NULL, /* lockfunc */ 443 NULL, /* lockfuncarg */ 444 &txr->tx_tag))) { 445 device_printf(dev,"Unable to allocate TX DMA tag\n"); 446 goto fail; 447 } 448 449 /* Make a special tag for TSO */ 450 if ((error = bus_dma_tag_create(NULL, /* parent */ 451 1, 0, /* alignment, bounds */ 452 BUS_SPACE_MAXADDR, /* lowaddr */ 453 BUS_SPACE_MAXADDR, /* highaddr */ 454 NULL, NULL, /* filter, filterarg */ 455 IXL_TSO_SIZE, /* maxsize */ 456 IXL_MAX_TSO_SEGS, /* nsegments */ 457 PAGE_SIZE, /* maxsegsize */ 458 0, /* flags */ 459 NULL, /* lockfunc */ 460 NULL, /* lockfuncarg */ 461 &txr->tso_tag))) { 462 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 463 goto fail; 464 } 465 466 if (!(txr->buffers = 467 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 468 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 469 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 470 error = ENOMEM; 471 goto fail; 472 } 473 474 /* Create the descriptor buffer default dma maps */ 475 buf = txr->buffers; 476 for (int i = 0; i < que->num_desc; i++, buf++) { 477 buf->tag = txr->tx_tag; 478 error = bus_dmamap_create(buf->tag, 0, &buf->map); 479 if (error != 0) { 480 device_printf(dev, "Unable to create TX DMA map\n"); 481 goto fail; 482 } 483 } 484fail: 485 return (error); 486} 487 488 489/********************************************************************* 490 * 491 * (Re)Initialize a queue transmit ring. 492 * - called by init, it clears the descriptor ring, 493 * and frees any stale mbufs 494 * 495 **********************************************************************/ 496void 497ixl_init_tx_ring(struct ixl_queue *que) 498{ 499#ifdef DEV_NETMAP 500 struct netmap_adapter *na = NA(que->vsi->ifp); 501 struct netmap_slot *slot; 502#endif /* DEV_NETMAP */ 503 struct tx_ring *txr = &que->txr; 504 struct ixl_tx_buf *buf; 505 506 /* Clear the old ring contents */ 507 IXL_TX_LOCK(txr); 508 509#ifdef DEV_NETMAP 510 /* 511 * (under lock): if in netmap mode, do some consistency 512 * checks and set slot to entry 0 of the netmap ring. 513 */ 514 slot = netmap_reset(na, NR_TX, que->me, 0); 515#endif /* DEV_NETMAP */ 516 517 bzero((void *)txr->base, 518 (sizeof(struct i40e_tx_desc)) * que->num_desc); 519 520 /* Reset indices */ 521 txr->next_avail = 0; 522 txr->next_to_clean = 0; 523 524 /* Reset watchdog status */ 525 txr->watchdog_timer = 0; 526 527#ifdef IXL_FDIR 528 /* Initialize flow director */ 529 txr->atr_rate = ixl_atr_rate; 530 txr->atr_count = 0; 531#endif 532 /* Free any existing tx mbufs. */ 533 buf = txr->buffers; 534 for (int i = 0; i < que->num_desc; i++, buf++) { 535 if (buf->m_head != NULL) { 536 bus_dmamap_sync(buf->tag, buf->map, 537 BUS_DMASYNC_POSTWRITE); 538 bus_dmamap_unload(buf->tag, buf->map); 539 m_freem(buf->m_head); 540 buf->m_head = NULL; 541 } 542#ifdef DEV_NETMAP 543 /* 544 * In netmap mode, set the map for the packet buffer. 545 * NOTE: Some drivers (not this one) also need to set 546 * the physical buffer address in the NIC ring. 547 * netmap_idx_n2k() maps a nic index, i, into the corresponding 548 * netmap slot index, si 549 */ 550 if (slot) { 551 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 552 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 553 } 554#endif /* DEV_NETMAP */ 555 /* Clear the EOP index */ 556 buf->eop_index = -1; 557 } 558 559 /* Set number of descriptors available */ 560 txr->avail = que->num_desc; 561 562 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 563 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 564 IXL_TX_UNLOCK(txr); 565} 566 567 568/********************************************************************* 569 * 570 * Free transmit ring related data structures. 571 * 572 **********************************************************************/ 573void 574ixl_free_que_tx(struct ixl_queue *que) 575{ 576 struct tx_ring *txr = &que->txr; 577 struct ixl_tx_buf *buf; 578 579 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 580 581 for (int i = 0; i < que->num_desc; i++) { 582 buf = &txr->buffers[i]; 583 if (buf->m_head != NULL) { 584 bus_dmamap_sync(buf->tag, buf->map, 585 BUS_DMASYNC_POSTWRITE); 586 bus_dmamap_unload(buf->tag, 587 buf->map); 588 m_freem(buf->m_head); 589 buf->m_head = NULL; 590 if (buf->map != NULL) { 591 bus_dmamap_destroy(buf->tag, 592 buf->map); 593 buf->map = NULL; 594 } 595 } else if (buf->map != NULL) { 596 bus_dmamap_unload(buf->tag, 597 buf->map); 598 bus_dmamap_destroy(buf->tag, 599 buf->map); 600 buf->map = NULL; 601 } 602 } 603 if (txr->br != NULL) 604 buf_ring_free(txr->br, M_DEVBUF); 605 if (txr->buffers != NULL) { 606 free(txr->buffers, M_DEVBUF); 607 txr->buffers = NULL; 608 } 609 if (txr->tx_tag != NULL) { 610 bus_dma_tag_destroy(txr->tx_tag); 611 txr->tx_tag = NULL; 612 } 613 if (txr->tso_tag != NULL) { 614 bus_dma_tag_destroy(txr->tso_tag); 615 txr->tso_tag = NULL; 616 } 617 618 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 619 return; 620} 621 622/********************************************************************* 623 * 624 * Setup descriptor for hw offloads 625 * 626 **********************************************************************/ 627 628static int 629ixl_tx_setup_offload(struct ixl_queue *que, 630 struct mbuf *mp, u32 *cmd, u32 *off) 631{ 632 struct ether_vlan_header *eh; 633#ifdef INET 634 struct ip *ip = NULL; 635#endif 636 struct tcphdr *th = NULL; 637#ifdef INET6 638 struct ip6_hdr *ip6; 639#endif 640 int elen, ip_hlen = 0, tcp_hlen; 641 u16 etype; 642 u8 ipproto = 0; 643 bool tso = FALSE; 644 645 /* Set up the TSO context descriptor if required */ 646 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 647 tso = ixl_tso_setup(que, mp); 648 if (tso) 649 ++que->tso; 650 else 651 return (ENXIO); 652 } 653 654 /* 655 * Determine where frame payload starts. 656 * Jump over vlan headers if already present, 657 * helpful for QinQ too. 658 */ 659 eh = mtod(mp, struct ether_vlan_header *); 660 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 661 etype = ntohs(eh->evl_proto); 662 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 663 } else { 664 etype = ntohs(eh->evl_encap_proto); 665 elen = ETHER_HDR_LEN; 666 } 667 668 switch (etype) { 669#ifdef INET 670 case ETHERTYPE_IP: 671 ip = (struct ip *)(mp->m_data + elen); 672 ip_hlen = ip->ip_hl << 2; 673 ipproto = ip->ip_p; 674 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 675 /* The IP checksum must be recalculated with TSO */ 676 if (tso) 677 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 678 else 679 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 680 break; 681#endif 682#ifdef INET6 683 case ETHERTYPE_IPV6: 684 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 685 ip_hlen = sizeof(struct ip6_hdr); 686 ipproto = ip6->ip6_nxt; 687 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 688 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 689 break; 690#endif 691 default: 692 break; 693 } 694 695 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 696 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 697 698 switch (ipproto) { 699 case IPPROTO_TCP: 700 tcp_hlen = th->th_off << 2; 701 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 702 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 703 *off |= (tcp_hlen >> 2) << 704 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 705 } 706#ifdef IXL_FDIR 707 ixl_atr(que, th, etype); 708#endif 709 break; 710 case IPPROTO_UDP: 711 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 712 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 713 *off |= (sizeof(struct udphdr) >> 2) << 714 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 715 } 716 break; 717 718 case IPPROTO_SCTP: 719 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 720 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 721 *off |= (sizeof(struct sctphdr) >> 2) << 722 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 723 } 724 /* Fall Thru */ 725 default: 726 break; 727 } 728 729 return (0); 730} 731 732 733/********************************************************************** 734 * 735 * Setup context for hardware segmentation offload (TSO) 736 * 737 **********************************************************************/ 738static bool 739ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 740{ 741 struct tx_ring *txr = &que->txr; 742 struct i40e_tx_context_desc *TXD; 743 struct ixl_tx_buf *buf; 744 u32 cmd, mss, type, tsolen; 745 u16 etype; 746 int idx, elen, ip_hlen, tcp_hlen; 747 struct ether_vlan_header *eh; 748#ifdef INET 749 struct ip *ip; 750#endif 751#ifdef INET6 752 struct ip6_hdr *ip6; 753#endif 754#if defined(INET6) || defined(INET) 755 struct tcphdr *th; 756#endif 757 u64 type_cmd_tso_mss; 758 759 /* 760 * Determine where frame payload starts. 761 * Jump over vlan headers if already present 762 */ 763 eh = mtod(mp, struct ether_vlan_header *); 764 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 765 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 766 etype = eh->evl_proto; 767 } else { 768 elen = ETHER_HDR_LEN; 769 etype = eh->evl_encap_proto; 770 } 771 772 switch (ntohs(etype)) { 773#ifdef INET6 774 case ETHERTYPE_IPV6: 775 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 776 if (ip6->ip6_nxt != IPPROTO_TCP) 777 return (ENXIO); 778 ip_hlen = sizeof(struct ip6_hdr); 779 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 780 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 781 tcp_hlen = th->th_off << 2; 782 /* 783 * The corresponding flag is set by the stack in the IPv4 784 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 785 * So, set it here because the rest of the flow requires it. 786 */ 787 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 788 break; 789#endif 790#ifdef INET 791 case ETHERTYPE_IP: 792 ip = (struct ip *)(mp->m_data + elen); 793 if (ip->ip_p != IPPROTO_TCP) 794 return (ENXIO); 795 ip->ip_sum = 0; 796 ip_hlen = ip->ip_hl << 2; 797 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 798 th->th_sum = in_pseudo(ip->ip_src.s_addr, 799 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 800 tcp_hlen = th->th_off << 2; 801 break; 802#endif 803 default: 804 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 805 __func__, ntohs(etype)); 806 return FALSE; 807 } 808 809 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 810 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 811 return FALSE; 812 813 idx = txr->next_avail; 814 buf = &txr->buffers[idx]; 815 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 816 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 817 818 type = I40E_TX_DESC_DTYPE_CONTEXT; 819 cmd = I40E_TX_CTX_DESC_TSO; 820 /* TSO MSS must not be less than 64 */ 821 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 822 que->mss_too_small++; 823 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 824 } 825 mss = mp->m_pkthdr.tso_segsz; 826 827 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 828 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 829 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 830 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 831 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 832 833 TXD->tunneling_params = htole32(0); 834 buf->m_head = NULL; 835 buf->eop_index = -1; 836 837 if (++idx == que->num_desc) 838 idx = 0; 839 840 txr->avail--; 841 txr->next_avail = idx; 842 843 return TRUE; 844} 845 846/* 847** ixl_get_tx_head - Retrieve the value from the 848** location the HW records its HEAD index 849*/ 850static inline u32 851ixl_get_tx_head(struct ixl_queue *que) 852{ 853 struct tx_ring *txr = &que->txr; 854 void *head = &txr->base[que->num_desc]; 855 return LE32_TO_CPU(*(volatile __le32 *)head); 856} 857 858/********************************************************************** 859 * 860 * Examine each tx_buffer in the used queue. If the hardware is done 861 * processing the packet then free associated resources. The 862 * tx_buffer is put back on the free queue. 863 * 864 **********************************************************************/ 865bool 866ixl_txeof(struct ixl_queue *que) 867{ 868 struct tx_ring *txr = &que->txr; 869 u32 first, last, head, done, processed; 870 struct ixl_tx_buf *buf; 871 struct i40e_tx_desc *tx_desc, *eop_desc; 872 873 874 mtx_assert(&txr->mtx, MA_OWNED); 875 876#ifdef DEV_NETMAP 877 // XXX todo: implement moderation 878 if (netmap_tx_irq(que->vsi->ifp, que->me)) 879 return FALSE; 880#endif /* DEF_NETMAP */ 881 882 /* These are not the descriptors you seek, move along :) */ 883 if (txr->avail == que->num_desc) { 884 atomic_store_rel_32(&txr->watchdog_timer, 0); 885 return FALSE; 886 } 887 888 processed = 0; 889 first = txr->next_to_clean; 890 buf = &txr->buffers[first]; 891 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 892 last = buf->eop_index; 893 if (last == -1) 894 return FALSE; 895 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 896 897 /* Get the Head WB value */ 898 head = ixl_get_tx_head(que); 899 900 /* 901 ** Get the index of the first descriptor 902 ** BEYOND the EOP and call that 'done'. 903 ** I do this so the comparison in the 904 ** inner while loop below can be simple 905 */ 906 if (++last == que->num_desc) last = 0; 907 done = last; 908 909 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 910 BUS_DMASYNC_POSTREAD); 911 /* 912 ** The HEAD index of the ring is written in a 913 ** defined location, this rather than a done bit 914 ** is what is used to keep track of what must be 915 ** 'cleaned'. 916 */ 917 while (first != head) { 918 /* We clean the range of the packet */ 919 while (first != done) { 920 ++txr->avail; 921 ++processed; 922 923 if (buf->m_head) { 924 txr->bytes += /* for ITR adjustment */ 925 buf->m_head->m_pkthdr.len; 926 txr->tx_bytes += /* for TX stats */ 927 buf->m_head->m_pkthdr.len; 928 bus_dmamap_sync(buf->tag, 929 buf->map, 930 BUS_DMASYNC_POSTWRITE); 931 bus_dmamap_unload(buf->tag, 932 buf->map); 933 m_freem(buf->m_head); 934 buf->m_head = NULL; 935 buf->map = NULL; 936 } 937 buf->eop_index = -1; 938 939 if (++first == que->num_desc) 940 first = 0; 941 942 buf = &txr->buffers[first]; 943 tx_desc = &txr->base[first]; 944 } 945 ++txr->packets; 946 /* See if there is more work now */ 947 last = buf->eop_index; 948 if (last != -1) { 949 eop_desc = &txr->base[last]; 950 /* Get next done point */ 951 if (++last == que->num_desc) last = 0; 952 done = last; 953 } else 954 break; 955 } 956 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 957 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 958 959 txr->next_to_clean = first; 960 961 962 /* 963 * If there are no pending descriptors, clear the timeout. 964 */ 965 if (txr->avail == que->num_desc) { 966 atomic_store_rel_32(&txr->watchdog_timer, 0); 967 return FALSE; 968 } 969 970 return TRUE; 971} 972 973/********************************************************************* 974 * 975 * Refresh mbuf buffers for RX descriptor rings 976 * - now keeps its own state so discards due to resource 977 * exhaustion are unnecessary, if an mbuf cannot be obtained 978 * it just returns, keeping its placeholder, thus it can simply 979 * be recalled to try again. 980 * 981 **********************************************************************/ 982static void 983ixl_refresh_mbufs(struct ixl_queue *que, int limit) 984{ 985 struct ixl_vsi *vsi = que->vsi; 986 struct rx_ring *rxr = &que->rxr; 987 bus_dma_segment_t hseg[1]; 988 bus_dma_segment_t pseg[1]; 989 struct ixl_rx_buf *buf; 990 struct mbuf *mh, *mp; 991 int i, j, nsegs, error; 992 bool refreshed = FALSE; 993 994 i = j = rxr->next_refresh; 995 /* Control the loop with one beyond */ 996 if (++j == que->num_desc) 997 j = 0; 998 999 while (j != limit) { 1000 buf = &rxr->buffers[i]; 1001 if (rxr->hdr_split == FALSE) 1002 goto no_split; 1003 1004 if (buf->m_head == NULL) { 1005 mh = m_gethdr(M_NOWAIT, MT_DATA); 1006 if (mh == NULL) 1007 goto update; 1008 } else 1009 mh = buf->m_head; 1010 1011 mh->m_pkthdr.len = mh->m_len = MHLEN; 1012 mh->m_len = MHLEN; 1013 mh->m_flags |= M_PKTHDR; 1014 /* Get the memory mapping */ 1015 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1016 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1017 if (error != 0) { 1018 printf("Refresh mbufs: hdr dmamap load" 1019 " failure - %d\n", error); 1020 m_free(mh); 1021 buf->m_head = NULL; 1022 goto update; 1023 } 1024 buf->m_head = mh; 1025 bus_dmamap_sync(rxr->htag, buf->hmap, 1026 BUS_DMASYNC_PREREAD); 1027 rxr->base[i].read.hdr_addr = 1028 htole64(hseg[0].ds_addr); 1029 1030no_split: 1031 if (buf->m_pack == NULL) { 1032 mp = m_getjcl(M_NOWAIT, MT_DATA, 1033 M_PKTHDR, rxr->mbuf_sz); 1034 if (mp == NULL) 1035 goto update; 1036 } else 1037 mp = buf->m_pack; 1038 1039 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1040 /* Get the memory mapping */ 1041 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1042 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1043 if (error != 0) { 1044 printf("Refresh mbufs: payload dmamap load" 1045 " failure - %d\n", error); 1046 m_free(mp); 1047 buf->m_pack = NULL; 1048 goto update; 1049 } 1050 buf->m_pack = mp; 1051 bus_dmamap_sync(rxr->ptag, buf->pmap, 1052 BUS_DMASYNC_PREREAD); 1053 rxr->base[i].read.pkt_addr = 1054 htole64(pseg[0].ds_addr); 1055 /* Used only when doing header split */ 1056 rxr->base[i].read.hdr_addr = 0; 1057 1058 refreshed = TRUE; 1059 /* Next is precalculated */ 1060 i = j; 1061 rxr->next_refresh = i; 1062 if (++j == que->num_desc) 1063 j = 0; 1064 } 1065update: 1066 if (refreshed) /* Update hardware tail index */ 1067 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1068 return; 1069} 1070 1071 1072/********************************************************************* 1073 * 1074 * Allocate memory for rx_buffer structures. Since we use one 1075 * rx_buffer per descriptor, the maximum number of rx_buffer's 1076 * that we'll need is equal to the number of receive descriptors 1077 * that we've defined. 1078 * 1079 **********************************************************************/ 1080int 1081ixl_allocate_rx_data(struct ixl_queue *que) 1082{ 1083 struct rx_ring *rxr = &que->rxr; 1084 struct ixl_vsi *vsi = que->vsi; 1085 device_t dev = vsi->dev; 1086 struct ixl_rx_buf *buf; 1087 int i, bsize, error; 1088 1089 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1090 if (!(rxr->buffers = 1091 (struct ixl_rx_buf *) malloc(bsize, 1092 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1093 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1094 error = ENOMEM; 1095 return (error); 1096 } 1097 1098 if ((error = bus_dma_tag_create(NULL, /* parent */ 1099 1, 0, /* alignment, bounds */ 1100 BUS_SPACE_MAXADDR, /* lowaddr */ 1101 BUS_SPACE_MAXADDR, /* highaddr */ 1102 NULL, NULL, /* filter, filterarg */ 1103 MSIZE, /* maxsize */ 1104 1, /* nsegments */ 1105 MSIZE, /* maxsegsize */ 1106 0, /* flags */ 1107 NULL, /* lockfunc */ 1108 NULL, /* lockfuncarg */ 1109 &rxr->htag))) { 1110 device_printf(dev, "Unable to create RX DMA htag\n"); 1111 return (error); 1112 } 1113 1114 if ((error = bus_dma_tag_create(NULL, /* parent */ 1115 1, 0, /* alignment, bounds */ 1116 BUS_SPACE_MAXADDR, /* lowaddr */ 1117 BUS_SPACE_MAXADDR, /* highaddr */ 1118 NULL, NULL, /* filter, filterarg */ 1119 MJUM16BYTES, /* maxsize */ 1120 1, /* nsegments */ 1121 MJUM16BYTES, /* maxsegsize */ 1122 0, /* flags */ 1123 NULL, /* lockfunc */ 1124 NULL, /* lockfuncarg */ 1125 &rxr->ptag))) { 1126 device_printf(dev, "Unable to create RX DMA ptag\n"); 1127 return (error); 1128 } 1129 1130 for (i = 0; i < que->num_desc; i++) { 1131 buf = &rxr->buffers[i]; 1132 error = bus_dmamap_create(rxr->htag, 1133 BUS_DMA_NOWAIT, &buf->hmap); 1134 if (error) { 1135 device_printf(dev, "Unable to create RX head map\n"); 1136 break; 1137 } 1138 error = bus_dmamap_create(rxr->ptag, 1139 BUS_DMA_NOWAIT, &buf->pmap); 1140 if (error) { 1141 device_printf(dev, "Unable to create RX pkt map\n"); 1142 break; 1143 } 1144 } 1145 1146 return (error); 1147} 1148 1149 1150/********************************************************************* 1151 * 1152 * (Re)Initialize the queue receive ring and its buffers. 1153 * 1154 **********************************************************************/ 1155int 1156ixl_init_rx_ring(struct ixl_queue *que) 1157{ 1158 struct rx_ring *rxr = &que->rxr; 1159 struct ixl_vsi *vsi = que->vsi; 1160#if defined(INET6) || defined(INET) 1161 struct ifnet *ifp = vsi->ifp; 1162 struct lro_ctrl *lro = &rxr->lro; 1163#endif 1164 struct ixl_rx_buf *buf; 1165 bus_dma_segment_t pseg[1], hseg[1]; 1166 int rsize, nsegs, error = 0; 1167#ifdef DEV_NETMAP 1168 struct netmap_adapter *na = NA(que->vsi->ifp); 1169 struct netmap_slot *slot; 1170#endif /* DEV_NETMAP */ 1171 1172 IXL_RX_LOCK(rxr); 1173#ifdef DEV_NETMAP 1174 /* same as in ixl_init_tx_ring() */ 1175 slot = netmap_reset(na, NR_RX, que->me, 0); 1176#endif /* DEV_NETMAP */ 1177 /* Clear the ring contents */ 1178 rsize = roundup2(que->num_desc * 1179 sizeof(union i40e_rx_desc), DBA_ALIGN); 1180 bzero((void *)rxr->base, rsize); 1181 /* Cleanup any existing buffers */ 1182 for (int i = 0; i < que->num_desc; i++) { 1183 buf = &rxr->buffers[i]; 1184 if (buf->m_head != NULL) { 1185 bus_dmamap_sync(rxr->htag, buf->hmap, 1186 BUS_DMASYNC_POSTREAD); 1187 bus_dmamap_unload(rxr->htag, buf->hmap); 1188 buf->m_head->m_flags |= M_PKTHDR; 1189 m_freem(buf->m_head); 1190 } 1191 if (buf->m_pack != NULL) { 1192 bus_dmamap_sync(rxr->ptag, buf->pmap, 1193 BUS_DMASYNC_POSTREAD); 1194 bus_dmamap_unload(rxr->ptag, buf->pmap); 1195 buf->m_pack->m_flags |= M_PKTHDR; 1196 m_freem(buf->m_pack); 1197 } 1198 buf->m_head = NULL; 1199 buf->m_pack = NULL; 1200 } 1201 1202 /* header split is off */ 1203 rxr->hdr_split = FALSE; 1204 1205 /* Now replenish the mbufs */ 1206 for (int j = 0; j != que->num_desc; ++j) { 1207 struct mbuf *mh, *mp; 1208 1209 buf = &rxr->buffers[j]; 1210#ifdef DEV_NETMAP 1211 /* 1212 * In netmap mode, fill the map and set the buffer 1213 * address in the NIC ring, considering the offset 1214 * between the netmap and NIC rings (see comment in 1215 * ixgbe_setup_transmit_ring() ). No need to allocate 1216 * an mbuf, so end the block with a continue; 1217 */ 1218 if (slot) { 1219 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1220 uint64_t paddr; 1221 void *addr; 1222 1223 addr = PNMB(na, slot + sj, &paddr); 1224 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1225 /* Update descriptor and the cached value */ 1226 rxr->base[j].read.pkt_addr = htole64(paddr); 1227 rxr->base[j].read.hdr_addr = 0; 1228 continue; 1229 } 1230#endif /* DEV_NETMAP */ 1231 /* 1232 ** Don't allocate mbufs if not 1233 ** doing header split, its wasteful 1234 */ 1235 if (rxr->hdr_split == FALSE) 1236 goto skip_head; 1237 1238 /* First the header */ 1239 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1240 if (buf->m_head == NULL) { 1241 error = ENOBUFS; 1242 goto fail; 1243 } 1244 m_adj(buf->m_head, ETHER_ALIGN); 1245 mh = buf->m_head; 1246 mh->m_len = mh->m_pkthdr.len = MHLEN; 1247 mh->m_flags |= M_PKTHDR; 1248 /* Get the memory mapping */ 1249 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1250 buf->hmap, buf->m_head, hseg, 1251 &nsegs, BUS_DMA_NOWAIT); 1252 if (error != 0) /* Nothing elegant to do here */ 1253 goto fail; 1254 bus_dmamap_sync(rxr->htag, 1255 buf->hmap, BUS_DMASYNC_PREREAD); 1256 /* Update descriptor */ 1257 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1258 1259skip_head: 1260 /* Now the payload cluster */ 1261 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1262 M_PKTHDR, rxr->mbuf_sz); 1263 if (buf->m_pack == NULL) { 1264 error = ENOBUFS; 1265 goto fail; 1266 } 1267 mp = buf->m_pack; 1268 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1269 /* Get the memory mapping */ 1270 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1271 buf->pmap, mp, pseg, 1272 &nsegs, BUS_DMA_NOWAIT); 1273 if (error != 0) 1274 goto fail; 1275 bus_dmamap_sync(rxr->ptag, 1276 buf->pmap, BUS_DMASYNC_PREREAD); 1277 /* Update descriptor */ 1278 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1279 rxr->base[j].read.hdr_addr = 0; 1280 } 1281 1282 1283 /* Setup our descriptor indices */ 1284 rxr->next_check = 0; 1285 rxr->next_refresh = 0; 1286 rxr->lro_enabled = FALSE; 1287 rxr->split = 0; 1288 rxr->bytes = 0; 1289 rxr->discard = FALSE; 1290 1291 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1292 ixl_flush(vsi->hw); 1293 1294#if defined(INET6) || defined(INET) 1295 /* 1296 ** Now set up the LRO interface: 1297 */ 1298 if (ifp->if_capenable & IFCAP_LRO) { 1299 int err = tcp_lro_init(lro); 1300 if (err) { 1301 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1302 goto fail; 1303 } 1304 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1305 rxr->lro_enabled = TRUE; 1306 lro->ifp = vsi->ifp; 1307 } 1308#endif 1309 1310 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1311 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1312 1313fail: 1314 IXL_RX_UNLOCK(rxr); 1315 return (error); 1316} 1317 1318 1319/********************************************************************* 1320 * 1321 * Free station receive ring data structures 1322 * 1323 **********************************************************************/ 1324void 1325ixl_free_que_rx(struct ixl_queue *que) 1326{ 1327 struct rx_ring *rxr = &que->rxr; 1328 struct ixl_rx_buf *buf; 1329 1330 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1331 1332 /* Cleanup any existing buffers */ 1333 if (rxr->buffers != NULL) { 1334 for (int i = 0; i < que->num_desc; i++) { 1335 buf = &rxr->buffers[i]; 1336 if (buf->m_head != NULL) { 1337 bus_dmamap_sync(rxr->htag, buf->hmap, 1338 BUS_DMASYNC_POSTREAD); 1339 bus_dmamap_unload(rxr->htag, buf->hmap); 1340 buf->m_head->m_flags |= M_PKTHDR; 1341 m_freem(buf->m_head); 1342 } 1343 if (buf->m_pack != NULL) { 1344 bus_dmamap_sync(rxr->ptag, buf->pmap, 1345 BUS_DMASYNC_POSTREAD); 1346 bus_dmamap_unload(rxr->ptag, buf->pmap); 1347 buf->m_pack->m_flags |= M_PKTHDR; 1348 m_freem(buf->m_pack); 1349 } 1350 buf->m_head = NULL; 1351 buf->m_pack = NULL; 1352 if (buf->hmap != NULL) { 1353 bus_dmamap_destroy(rxr->htag, buf->hmap); 1354 buf->hmap = NULL; 1355 } 1356 if (buf->pmap != NULL) { 1357 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1358 buf->pmap = NULL; 1359 } 1360 } 1361 if (rxr->buffers != NULL) { 1362 free(rxr->buffers, M_DEVBUF); 1363 rxr->buffers = NULL; 1364 } 1365 } 1366 1367 if (rxr->htag != NULL) { 1368 bus_dma_tag_destroy(rxr->htag); 1369 rxr->htag = NULL; 1370 } 1371 if (rxr->ptag != NULL) { 1372 bus_dma_tag_destroy(rxr->ptag); 1373 rxr->ptag = NULL; 1374 } 1375 1376 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1377 return; 1378} 1379 1380static inline void 1381ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1382{ 1383 1384#if defined(INET6) || defined(INET) 1385 /* 1386 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1387 * should be computed by hardware. Also it should not have VLAN tag in 1388 * ethernet header. 1389 */ 1390 if (rxr->lro_enabled && 1391 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1392 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1393 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1394 /* 1395 * Send to the stack if: 1396 ** - LRO not enabled, or 1397 ** - no LRO resources, or 1398 ** - lro enqueue fails 1399 */ 1400 if (rxr->lro.lro_cnt != 0) 1401 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1402 return; 1403 } 1404#endif 1405 IXL_RX_UNLOCK(rxr); 1406 (*ifp->if_input)(ifp, m); 1407 IXL_RX_LOCK(rxr); 1408} 1409 1410 1411static inline void 1412ixl_rx_discard(struct rx_ring *rxr, int i) 1413{ 1414 struct ixl_rx_buf *rbuf; 1415 1416 rbuf = &rxr->buffers[i]; 1417 1418 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1419 rbuf->fmp->m_flags |= M_PKTHDR; 1420 m_freem(rbuf->fmp); 1421 rbuf->fmp = NULL; 1422 } 1423 1424 /* 1425 ** With advanced descriptors the writeback 1426 ** clobbers the buffer addrs, so its easier 1427 ** to just free the existing mbufs and take 1428 ** the normal refresh path to get new buffers 1429 ** and mapping. 1430 */ 1431 if (rbuf->m_head) { 1432 m_free(rbuf->m_head); 1433 rbuf->m_head = NULL; 1434 } 1435 1436 if (rbuf->m_pack) { 1437 m_free(rbuf->m_pack); 1438 rbuf->m_pack = NULL; 1439 } 1440 1441 return; 1442} 1443 1444#ifdef RSS 1445/* 1446** i40e_ptype_to_hash: parse the packet type 1447** to determine the appropriate hash. 1448*/ 1449static inline int 1450ixl_ptype_to_hash(u8 ptype) 1451{ 1452 struct i40e_rx_ptype_decoded decoded; 1453 u8 ex = 0; 1454 1455 decoded = decode_rx_desc_ptype(ptype); 1456 ex = decoded.outer_frag; 1457 1458 if (!decoded.known) 1459 return M_HASHTYPE_OPAQUE_HASH; 1460 1461 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1462 return M_HASHTYPE_OPAQUE_HASH; 1463 1464 /* Note: anything that gets to this point is IP */ 1465 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1466 switch (decoded.inner_prot) { 1467 case I40E_RX_PTYPE_INNER_PROT_TCP: 1468 if (ex) 1469 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1470 else 1471 return M_HASHTYPE_RSS_TCP_IPV6; 1472 case I40E_RX_PTYPE_INNER_PROT_UDP: 1473 if (ex) 1474 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1475 else 1476 return M_HASHTYPE_RSS_UDP_IPV6; 1477 default: 1478 if (ex) 1479 return M_HASHTYPE_RSS_IPV6_EX; 1480 else 1481 return M_HASHTYPE_RSS_IPV6; 1482 } 1483 } 1484 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1485 switch (decoded.inner_prot) { 1486 case I40E_RX_PTYPE_INNER_PROT_TCP: 1487 return M_HASHTYPE_RSS_TCP_IPV4; 1488 case I40E_RX_PTYPE_INNER_PROT_UDP: 1489 if (ex) 1490 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1491 else 1492 return M_HASHTYPE_RSS_UDP_IPV4; 1493 default: 1494 return M_HASHTYPE_RSS_IPV4; 1495 } 1496 } 1497 /* We should never get here!! */ 1498 return M_HASHTYPE_OPAQUE_HASH; 1499} 1500#endif /* RSS */ 1501 1502/********************************************************************* 1503 * 1504 * This routine executes in interrupt context. It replenishes 1505 * the mbufs in the descriptor and sends data which has been 1506 * dma'ed into host memory to upper layer. 1507 * 1508 * We loop at most count times if count is > 0, or until done if 1509 * count < 0. 1510 * 1511 * Return TRUE for more work, FALSE for all clean. 1512 *********************************************************************/ 1513bool 1514ixl_rxeof(struct ixl_queue *que, int count) 1515{ 1516 struct ixl_vsi *vsi = que->vsi; 1517 struct rx_ring *rxr = &que->rxr; 1518 struct ifnet *ifp = vsi->ifp; 1519#if defined(INET6) || defined(INET) 1520 struct lro_ctrl *lro = &rxr->lro; 1521#endif 1522 int i, nextp, processed = 0; 1523 union i40e_rx_desc *cur; 1524 struct ixl_rx_buf *rbuf, *nbuf; 1525 1526 1527 IXL_RX_LOCK(rxr); 1528 1529#ifdef DEV_NETMAP 1530 if (netmap_rx_irq(ifp, que->me, &count)) { 1531 IXL_RX_UNLOCK(rxr); 1532 return (FALSE); 1533 } 1534#endif /* DEV_NETMAP */ 1535 1536 for (i = rxr->next_check; count != 0;) { 1537 struct mbuf *sendmp, *mh, *mp; 1538 u32 status, error; 1539 u16 hlen, plen, vtag; 1540 u64 qword; 1541 u8 ptype; 1542 bool eop; 1543 1544 /* Sync the ring. */ 1545 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1546 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1547 1548 cur = &rxr->base[i]; 1549 qword = le64toh(cur->wb.qword1.status_error_len); 1550 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1551 >> I40E_RXD_QW1_STATUS_SHIFT; 1552 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1553 >> I40E_RXD_QW1_ERROR_SHIFT; 1554 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1555 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1556 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1557 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1558 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1559 >> I40E_RXD_QW1_PTYPE_SHIFT; 1560 1561 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1562 ++rxr->not_done; 1563 break; 1564 } 1565 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1566 break; 1567 1568 count--; 1569 sendmp = NULL; 1570 nbuf = NULL; 1571 cur->wb.qword1.status_error_len = 0; 1572 rbuf = &rxr->buffers[i]; 1573 mh = rbuf->m_head; 1574 mp = rbuf->m_pack; 1575 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1576 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1577 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1578 else 1579 vtag = 0; 1580 1581 /* 1582 ** Make sure bad packets are discarded, 1583 ** note that only EOP descriptor has valid 1584 ** error results. 1585 */ 1586 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1587 rxr->desc_errs++; 1588 ixl_rx_discard(rxr, i); 1589 goto next_desc; 1590 } 1591 1592 /* Prefetch the next buffer */ 1593 if (!eop) { 1594 nextp = i + 1; 1595 if (nextp == que->num_desc) 1596 nextp = 0; 1597 nbuf = &rxr->buffers[nextp]; 1598 prefetch(nbuf); 1599 } 1600 1601 /* 1602 ** The header mbuf is ONLY used when header 1603 ** split is enabled, otherwise we get normal 1604 ** behavior, ie, both header and payload 1605 ** are DMA'd into the payload buffer. 1606 ** 1607 ** Rather than using the fmp/lmp global pointers 1608 ** we now keep the head of a packet chain in the 1609 ** buffer struct and pass this along from one 1610 ** descriptor to the next, until we get EOP. 1611 */ 1612 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1613 if (hlen > IXL_RX_HDR) 1614 hlen = IXL_RX_HDR; 1615 mh->m_len = hlen; 1616 mh->m_flags |= M_PKTHDR; 1617 mh->m_next = NULL; 1618 mh->m_pkthdr.len = mh->m_len; 1619 /* Null buf pointer so it is refreshed */ 1620 rbuf->m_head = NULL; 1621 /* 1622 ** Check the payload length, this 1623 ** could be zero if its a small 1624 ** packet. 1625 */ 1626 if (plen > 0) { 1627 mp->m_len = plen; 1628 mp->m_next = NULL; 1629 mp->m_flags &= ~M_PKTHDR; 1630 mh->m_next = mp; 1631 mh->m_pkthdr.len += mp->m_len; 1632 /* Null buf pointer so it is refreshed */ 1633 rbuf->m_pack = NULL; 1634 rxr->split++; 1635 } 1636 /* 1637 ** Now create the forward 1638 ** chain so when complete 1639 ** we wont have to. 1640 */ 1641 if (eop == 0) { 1642 /* stash the chain head */ 1643 nbuf->fmp = mh; 1644 /* Make forward chain */ 1645 if (plen) 1646 mp->m_next = nbuf->m_pack; 1647 else 1648 mh->m_next = nbuf->m_pack; 1649 } else { 1650 /* Singlet, prepare to send */ 1651 sendmp = mh; 1652 if (vtag) { 1653 sendmp->m_pkthdr.ether_vtag = vtag; 1654 sendmp->m_flags |= M_VLANTAG; 1655 } 1656 } 1657 } else { 1658 /* 1659 ** Either no header split, or a 1660 ** secondary piece of a fragmented 1661 ** split packet. 1662 */ 1663 mp->m_len = plen; 1664 /* 1665 ** See if there is a stored head 1666 ** that determines what we are 1667 */ 1668 sendmp = rbuf->fmp; 1669 rbuf->m_pack = rbuf->fmp = NULL; 1670 1671 if (sendmp != NULL) /* secondary frag */ 1672 sendmp->m_pkthdr.len += mp->m_len; 1673 else { 1674 /* first desc of a non-ps chain */ 1675 sendmp = mp; 1676 sendmp->m_flags |= M_PKTHDR; 1677 sendmp->m_pkthdr.len = mp->m_len; 1678 } 1679 /* Pass the head pointer on */ 1680 if (eop == 0) { 1681 nbuf->fmp = sendmp; 1682 sendmp = NULL; 1683 mp->m_next = nbuf->m_pack; 1684 } 1685 } 1686 ++processed; 1687 /* Sending this frame? */ 1688 if (eop) { 1689 sendmp->m_pkthdr.rcvif = ifp; 1690 /* gather stats */ 1691 rxr->rx_packets++; 1692 rxr->rx_bytes += sendmp->m_pkthdr.len; 1693 /* capture data for dynamic ITR adjustment */ 1694 rxr->packets++; 1695 rxr->bytes += sendmp->m_pkthdr.len; 1696 /* Set VLAN tag (field only valid in eop desc) */ 1697 if (vtag) { 1698 sendmp->m_pkthdr.ether_vtag = vtag; 1699 sendmp->m_flags |= M_VLANTAG; 1700 } 1701 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1702 ixl_rx_checksum(sendmp, status, error, ptype); 1703#ifdef RSS 1704 sendmp->m_pkthdr.flowid = 1705 le32toh(cur->wb.qword0.hi_dword.rss); 1706 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1707#else 1708 sendmp->m_pkthdr.flowid = que->msix; 1709 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1710#endif 1711 } 1712next_desc: 1713 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1714 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1715 1716 /* Advance our pointers to the next descriptor. */ 1717 if (++i == que->num_desc) 1718 i = 0; 1719 1720 /* Now send to the stack or do LRO */ 1721 if (sendmp != NULL) { 1722 rxr->next_check = i; 1723 ixl_rx_input(rxr, ifp, sendmp, ptype); 1724 i = rxr->next_check; 1725 } 1726 1727 /* Every 8 descriptors we go to refresh mbufs */ 1728 if (processed == 8) { 1729 ixl_refresh_mbufs(que, i); 1730 processed = 0; 1731 } 1732 } 1733 1734 /* Refresh any remaining buf structs */ 1735 if (ixl_rx_unrefreshed(que)) 1736 ixl_refresh_mbufs(que, i); 1737 1738 rxr->next_check = i; 1739 1740#if defined(INET6) || defined(INET) 1741 /* 1742 * Flush any outstanding LRO work 1743 */ 1744#if __FreeBSD_version >= 1100105 1745 tcp_lro_flush_all(lro); 1746#else 1747 struct lro_entry *queued; 1748 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1749 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1750 tcp_lro_flush(lro, queued); 1751 } 1752#endif 1753#endif /* defined(INET6) || defined(INET) */ 1754 1755 IXL_RX_UNLOCK(rxr); 1756 return (FALSE); 1757} 1758 1759 1760/********************************************************************* 1761 * 1762 * Verify that the hardware indicated that the checksum is valid. 1763 * Inform the stack about the status of checksum so that stack 1764 * doesn't spend time verifying the checksum. 1765 * 1766 *********************************************************************/ 1767static void 1768ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1769{ 1770 struct i40e_rx_ptype_decoded decoded; 1771 1772 decoded = decode_rx_desc_ptype(ptype); 1773 1774 /* Errors? */ 1775 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1776 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1777 mp->m_pkthdr.csum_flags = 0; 1778 return; 1779 } 1780 1781 /* IPv6 with extension headers likely have bad csum */ 1782 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1783 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1784 if (status & 1785 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1786 mp->m_pkthdr.csum_flags = 0; 1787 return; 1788 } 1789 1790 1791 /* IP Checksum Good */ 1792 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1793 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1794 1795 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1796 mp->m_pkthdr.csum_flags |= 1797 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1798 mp->m_pkthdr.csum_data |= htons(0xffff); 1799 } 1800 return; 1801} 1802 1803#if __FreeBSD_version >= 1100000 1804uint64_t 1805ixl_get_counter(if_t ifp, ift_counter cnt) 1806{ 1807 struct ixl_vsi *vsi; 1808 1809 vsi = if_getsoftc(ifp); 1810 1811 switch (cnt) { 1812 case IFCOUNTER_IPACKETS: 1813 return (vsi->ipackets); 1814 case IFCOUNTER_IERRORS: 1815 return (vsi->ierrors); 1816 case IFCOUNTER_OPACKETS: 1817 return (vsi->opackets); 1818 case IFCOUNTER_OERRORS: 1819 return (vsi->oerrors); 1820 case IFCOUNTER_COLLISIONS: 1821 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1822 return (0); 1823 case IFCOUNTER_IBYTES: 1824 return (vsi->ibytes); 1825 case IFCOUNTER_OBYTES: 1826 return (vsi->obytes); 1827 case IFCOUNTER_IMCASTS: 1828 return (vsi->imcasts); 1829 case IFCOUNTER_OMCASTS: 1830 return (vsi->omcasts); 1831 case IFCOUNTER_IQDROPS: 1832 return (vsi->iqdrops); 1833 case IFCOUNTER_OQDROPS: 1834 return (vsi->oqdrops); 1835 case IFCOUNTER_NOPROTO: 1836 return (vsi->noproto); 1837 default: 1838 return (if_get_counter_default(ifp, cnt)); 1839 } 1840} 1841#endif 1842 1843