Deleted Added
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2014, Intel Corporation
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 280182 2015-03-17 18:32:28Z jfv $*/
33/*$FreeBSD: stable/10/sys/dev/ixgbe/ix_txrx.c 283620 2015-05-27 17:44:11Z erj $*/
34
35
36#ifndef IXGBE_STANDALONE_BUILD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#include "opt_rss.h"
39#endif
40
41#include "ixgbe.h"
42
44#ifdef RSS
45#include <netinet/in_rss.h>
43#ifdef DEV_NETMAP
44#include <net/netmap.h>
45#include <sys/selinfo.h>
46#include <dev/netmap/netmap_kern.h>
47
48extern int ix_crcstrip;
49#endif
50
51/*
52** HW RSC control:
53** this feature only works with
54** IPv4, and only on 82599 and later.
55** Also this will cause IP forwarding to
56** fail and that can't be controlled by
57** the stack as LRO can. For all these
58** reasons I've deemed it best to leave
59** this off and not bother with a tuneable
60** interface, this would need to be compiled
61** to enable.
62*/
63static bool ixgbe_rsc_enable = FALSE;
64
65#ifdef IXGBE_FDIR
66/*
67** For Flow Director: this is the
68** number of TX packets we sample
69** for the filter pool, this means
70** every 20th packet will be probed.
71**
72** This feature can be disabled by
73** setting this to 0.
74*/
75static int atr_sample_rate = 20;
76#endif
77
78/* Shared PCI config read/write */
79inline u16
80ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
81{
82 u16 value;
83
84 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
85 reg, 2);
86
87 return (value);
88}
89
90inline void
91ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
92{
93 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
94 reg, value, 2);
95
96 return;
97}
98
99/*********************************************************************
100 * Local Function prototypes
101 *********************************************************************/
102static void ixgbe_setup_transmit_ring(struct tx_ring *);
103static void ixgbe_free_transmit_buffers(struct tx_ring *);
104static int ixgbe_setup_receive_ring(struct rx_ring *);
105static void ixgbe_free_receive_buffers(struct rx_ring *);
106
107static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
108static void ixgbe_refresh_mbufs(struct rx_ring *, int);
109static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
110static int ixgbe_tx_ctx_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112static int ixgbe_tso_setup(struct tx_ring *,
113 struct mbuf *, u32 *, u32 *);
114#ifdef IXGBE_FDIR
115static void ixgbe_atr(struct tx_ring *, struct mbuf *);
116#endif
117static __inline void ixgbe_rx_discard(struct rx_ring *, int);
118static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
119 struct mbuf *, u32);
120
121#ifdef IXGBE_LEGACY_TX
122/*********************************************************************
123 * Transmit entry point
124 *
125 * ixgbe_start is called by the stack to initiate a transmit.
126 * The driver will remain in this routine as long as there are
127 * packets to transmit and transmit resources are available.
128 * In case resources are not available stack is notified and
129 * the packet is requeued.
130 **********************************************************************/
131
132void
133ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
134{
135 struct mbuf *m_head;
136 struct adapter *adapter = txr->adapter;
137
138 IXGBE_TX_LOCK_ASSERT(txr);
139
140 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
141 return;
142 if (!adapter->link_active)
143 return;
144
145 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
146 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
147 break;
148
149 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
150 if (m_head == NULL)
151 break;
152
153 if (ixgbe_xmit(txr, &m_head)) {
154 if (m_head != NULL)
155 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
156 break;
157 }
158 /* Send a copy of the frame to the BPF listener */
159 ETHER_BPF_MTAP(ifp, m_head);
160 }
161 return;
162}
163
164/*
165 * Legacy TX start - called by the stack, this
166 * always uses the first tx ring, and should
167 * not be used with multiqueue tx enabled.
168 */
169void
170ixgbe_start(struct ifnet *ifp)
171{
172 struct adapter *adapter = ifp->if_softc;
173 struct tx_ring *txr = adapter->tx_rings;
174
175 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
176 IXGBE_TX_LOCK(txr);
177 ixgbe_start_locked(txr, ifp);
178 IXGBE_TX_UNLOCK(txr);
179 }
180 return;
181}
182
183#else /* ! IXGBE_LEGACY_TX */
184
185/*
186** Multiqueue Transmit driver
187**
188*/
189int
190ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
191{
192 struct adapter *adapter = ifp->if_softc;
193 struct ix_queue *que;
194 struct tx_ring *txr;
195 int i, err = 0;
193#ifdef RSS
194 uint32_t bucket_id;
195#endif
196
197 /*
198 * When doing RSS, map it to the same outbound queue
199 * as the incoming flow would be mapped to.
200 *
201 * If everything is setup correctly, it should be the
202 * same bucket that the current CPU we're on is.
203 */
204 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
205#ifdef RSS
206 if (rss_hash2bucket(m->m_pkthdr.flowid,
207 M_HASHTYPE_GET(m), &bucket_id) == 0)
208 /* TODO: spit out something if bucket_id > num_queues? */
209 i = bucket_id % adapter->num_queues;
210 else
211#endif
212 i = m->m_pkthdr.flowid % adapter->num_queues;
213 } else
204 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
205 i = m->m_pkthdr.flowid % adapter->num_queues;
206 else
207 i = curcpu % adapter->num_queues;
208
209 /* Check for a hung queue and pick alternative */
210 if (((1 << i) & adapter->active_queues) == 0)
211 i = ffsl(adapter->active_queues);
212
213 txr = &adapter->tx_rings[i];
214 que = &adapter->queues[i];
215
216 err = drbr_enqueue(ifp, txr->br, m);
217 if (err)
218 return (err);
219 if (IXGBE_TX_TRYLOCK(txr)) {
220 ixgbe_mq_start_locked(ifp, txr);
221 IXGBE_TX_UNLOCK(txr);
222 } else
223 taskqueue_enqueue(que->tq, &txr->txq_task);
224
225 return (0);
226}
227
228int
229ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230{
231 struct adapter *adapter = txr->adapter;
232 struct mbuf *next;
233 int enqueued = 0, err = 0;
234
235 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
236 adapter->link_active == 0)
237 return (ENETDOWN);
238
239 /* Process the queue */
240#if __FreeBSD_version < 901504
241 next = drbr_dequeue(ifp, txr->br);
242 while (next != NULL) {
243 if ((err = ixgbe_xmit(txr, &next)) != 0) {
244 if (next != NULL)
245 err = drbr_enqueue(ifp, txr->br, next);
246#else
247 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
248 if ((err = ixgbe_xmit(txr, &next)) != 0) {
249 if (next == NULL) {
250 drbr_advance(ifp, txr->br);
251 } else {
252 drbr_putback(ifp, txr->br, next);
253 }
254#endif
255 break;
256 }
257#if __FreeBSD_version >= 901504
258 drbr_advance(ifp, txr->br);
259#endif
260 enqueued++;
261#if 0 // this is VF-only
262#if __FreeBSD_version >= 1100036
270 if (next->m_flags & M_MCAST)
263 /*
264 * Since we're looking at the tx ring, we can check
265 * to see if we're a VF by examing our tail register
266 * address.
267 */
268 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
269 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
270#endif
271#endif
272 /* Send a copy of the frame to the BPF listener */
273 ETHER_BPF_MTAP(ifp, next);
274 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275 break;
276#if __FreeBSD_version < 901504
277 next = drbr_dequeue(ifp, txr->br);
278#endif
279 }
280
281 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
282 ixgbe_txeof(txr);
283
284 return (err);
285}
286
287/*
288 * Called from a taskqueue to drain queued transmit packets.
289 */
290void
291ixgbe_deferred_mq_start(void *arg, int pending)
292{
293 struct tx_ring *txr = arg;
294 struct adapter *adapter = txr->adapter;
295 struct ifnet *ifp = adapter->ifp;
296
297 IXGBE_TX_LOCK(txr);
298 if (!drbr_empty(ifp, txr->br))
299 ixgbe_mq_start_locked(ifp, txr);
300 IXGBE_TX_UNLOCK(txr);
301}
302
303/*
306** Flush all ring buffers
307*/
304 * Flush all ring buffers
305 */
306void
307ixgbe_qflush(struct ifnet *ifp)
308{
309 struct adapter *adapter = ifp->if_softc;
310 struct tx_ring *txr = adapter->tx_rings;
311 struct mbuf *m;
312
313 for (int i = 0; i < adapter->num_queues; i++, txr++) {
314 IXGBE_TX_LOCK(txr);
315 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316 m_freem(m);
317 IXGBE_TX_UNLOCK(txr);
318 }
319 if_qflush(ifp);
320}
321#endif /* IXGBE_LEGACY_TX */
322
323
324/*********************************************************************
325 *
326 * This routine maps the mbufs to tx descriptors, allowing the
327 * TX engine to transmit the packets.
328 * - return 0 on success, positive on failure
329 *
330 **********************************************************************/
331
332static int
333ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334{
335 struct adapter *adapter = txr->adapter;
336 u32 olinfo_status = 0, cmd_type_len;
337 int i, j, error, nsegs;
338 int first;
339 bool remap = TRUE;
340 struct mbuf *m_head;
341 bus_dma_segment_t segs[adapter->num_segs];
342 bus_dmamap_t map;
343 struct ixgbe_tx_buf *txbuf;
344 union ixgbe_adv_tx_desc *txd = NULL;
345
346 m_head = *m_headp;
347
348 /* Basic descriptor defines */
349 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
350 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351
352 if (m_head->m_flags & M_VLANTAG)
353 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
354
355 /*
356 * Important to capture the first descriptor
357 * used because it will contain the index of
358 * the one we tell the hardware to report back
359 */
360 first = txr->next_avail_desc;
361 txbuf = &txr->tx_buffers[first];
362 map = txbuf->map;
363
364 /*
365 * Map the packet for DMA.
366 */
367retry:
368 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
369 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370
371 if (__predict_false(error)) {
372 struct mbuf *m;
373
374 switch (error) {
375 case EFBIG:
376 /* Try it again? - one try */
377 if (remap == TRUE) {
378 remap = FALSE;
379 /*
380 * XXX: m_defrag will choke on
381 * non-MCLBYTES-sized clusters
382 */
383 m = m_defrag(*m_headp, M_NOWAIT);
384 if (m == NULL) {
385 adapter->mbuf_defrag_failed++;
386 m_freem(*m_headp);
387 *m_headp = NULL;
388 return (ENOBUFS);
389 }
390 *m_headp = m;
391 goto retry;
392 } else
393 return (error);
394 case ENOMEM:
395 txr->no_tx_dma_setup++;
396 return (error);
397 default:
398 txr->no_tx_dma_setup++;
399 m_freem(*m_headp);
400 *m_headp = NULL;
401 return (error);
402 }
403 }
404
405 /* Make certain there are enough descriptors */
406 if (nsegs > txr->tx_avail - 2) {
407 txr->no_desc_avail++;
408 bus_dmamap_unload(txr->txtag, map);
409 return (ENOBUFS);
410 }
411 m_head = *m_headp;
412
413 /*
412 ** Set up the appropriate offload context
413 ** this will consume the first descriptor
414 */
414 * Set up the appropriate offload context
415 * this will consume the first descriptor
416 */
417 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
418 if (__predict_false(error)) {
419 if (error == ENOBUFS)
420 *m_headp = NULL;
421 return (error);
422 }
423
424#ifdef IXGBE_FDIR
425 /* Do the flow director magic */
426 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427 ++txr->atr_count;
428 if (txr->atr_count >= atr_sample_rate) {
429 ixgbe_atr(txr, m_head);
430 txr->atr_count = 0;
431 }
432 }
433#endif
434
433 olinfo_status |= IXGBE_ADVTXD_CC;
435 i = txr->next_avail_desc;
436 for (j = 0; j < nsegs; j++) {
437 bus_size_t seglen;
438 bus_addr_t segaddr;
439
440 txbuf = &txr->tx_buffers[i];
441 txd = &txr->tx_base[i];
442 seglen = segs[j].ds_len;
443 segaddr = htole64(segs[j].ds_addr);
444
445 txd->read.buffer_addr = segaddr;
446 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 cmd_type_len |seglen);
448 txd->read.olinfo_status = htole32(olinfo_status);
449
450 if (++i == txr->num_desc)
451 i = 0;
452 }
453
454 txd->read.cmd_type_len |=
455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 txr->tx_avail -= nsegs;
457 txr->next_avail_desc = i;
458
459 txbuf->m_head = m_head;
460 /*
460 ** Here we swap the map so the last descriptor,
461 ** which gets the completion interrupt has the
462 ** real map, and the first descriptor gets the
463 ** unused map from this descriptor.
464 */
461 * Here we swap the map so the last descriptor,
462 * which gets the completion interrupt has the
463 * real map, and the first descriptor gets the
464 * unused map from this descriptor.
465 */
466 txr->tx_buffers[first].map = txbuf->map;
467 txbuf->map = map;
468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469
470 /* Set the EOP descriptor that will be marked done */
471 txbuf = &txr->tx_buffers[first];
472 txbuf->eop = txd;
473
474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476 /*
477 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 * hardware that this frame is available to transmit.
479 */
480 ++txr->total_packets;
481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482
483 /* Mark queue as having work */
484 if (txr->busy == 0)
485 txr->busy = 1;
486
487 return (0);
487
488}
489
490
491/*********************************************************************
492 *
493 * Allocate memory for tx_buffer structures. The tx_buffer stores all
494 * the information needed to transmit a packet on the wire. This is
495 * called only once at attach, setup is done every reset.
496 *
497 **********************************************************************/
498int
499ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500{
501 struct adapter *adapter = txr->adapter;
502 device_t dev = adapter->dev;
503 struct ixgbe_tx_buf *txbuf;
504 int error, i;
505
506 /*
507 * Setup DMA descriptor areas.
508 */
509 if ((error = bus_dma_tag_create(
510 bus_get_dma_tag(adapter->dev), /* parent */
511 1, 0, /* alignment, bounds */
512 BUS_SPACE_MAXADDR, /* lowaddr */
513 BUS_SPACE_MAXADDR, /* highaddr */
514 NULL, NULL, /* filter, filterarg */
515 IXGBE_TSO_SIZE, /* maxsize */
516 adapter->num_segs, /* nsegments */
517 PAGE_SIZE, /* maxsegsize */
518 0, /* flags */
519 NULL, /* lockfunc */
520 NULL, /* lockfuncarg */
521 &txr->txtag))) {
522 device_printf(dev,"Unable to allocate TX DMA tag\n");
523 goto fail;
524 }
525
526 if (!(txr->tx_buffers =
527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 device_printf(dev, "Unable to allocate tx_buffer memory\n");
530 error = ENOMEM;
531 goto fail;
532 }
533
534 /* Create the descriptor buffer dma maps */
535 txbuf = txr->tx_buffers;
536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538 if (error != 0) {
539 device_printf(dev, "Unable to create TX DMA map\n");
540 goto fail;
541 }
542 }
543
544 return 0;
545fail:
546 /* We free all, it handles case where we are in the middle */
547 ixgbe_free_transmit_structures(adapter);
548 return (error);
549}
550
551/*********************************************************************
552 *
553 * Initialize a transmit ring.
554 *
555 **********************************************************************/
556static void
557ixgbe_setup_transmit_ring(struct tx_ring *txr)
558{
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
561 int i;
562#ifdef DEV_NETMAP
563 struct netmap_adapter *na = NA(adapter->ifp);
564 struct netmap_slot *slot;
565#endif /* DEV_NETMAP */
566
567 /* Clear the old ring contents */
568 IXGBE_TX_LOCK(txr);
569#ifdef DEV_NETMAP
570 /*
571 * (under lock): if in netmap mode, do some consistency
572 * checks and set slot to entry 0 of the netmap ring.
573 */
574 slot = netmap_reset(na, NR_TX, txr->me, 0);
575#endif /* DEV_NETMAP */
576 bzero((void *)txr->tx_base,
577 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578 /* Reset indices */
579 txr->next_avail_desc = 0;
580 txr->next_to_clean = 0;
581
582 /* Free any existing tx buffers. */
583 txbuf = txr->tx_buffers;
584 for (i = 0; i < txr->num_desc; i++, txbuf++) {
585 if (txbuf->m_head != NULL) {
586 bus_dmamap_sync(txr->txtag, txbuf->map,
587 BUS_DMASYNC_POSTWRITE);
588 bus_dmamap_unload(txr->txtag, txbuf->map);
589 m_freem(txbuf->m_head);
590 txbuf->m_head = NULL;
591 }
592#ifdef DEV_NETMAP
593 /*
594 * In netmap mode, set the map for the packet buffer.
595 * NOTE: Some drivers (not this one) also need to set
596 * the physical buffer address in the NIC ring.
597 * Slots in the netmap ring (indexed by "si") are
598 * kring->nkr_hwofs positions "ahead" wrt the
599 * corresponding slot in the NIC ring. In some drivers
600 * (not here) nkr_hwofs can be negative. Function
601 * netmap_idx_n2k() handles wraparounds properly.
602 */
603 if (slot) {
604 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
606 }
607#endif /* DEV_NETMAP */
608 /* Clear the EOP descriptor pointer */
609 txbuf->eop = NULL;
610 }
611
612#ifdef IXGBE_FDIR
613 /* Set the rate at which we sample packets */
614 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 txr->atr_sample = atr_sample_rate;
616#endif
617
618 /* Set number of descriptors available */
619 txr->tx_avail = adapter->num_tx_desc;
620
621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 IXGBE_TX_UNLOCK(txr);
624}
625
626/*********************************************************************
627 *
628 * Initialize all transmit rings.
629 *
630 **********************************************************************/
631int
632ixgbe_setup_transmit_structures(struct adapter *adapter)
633{
634 struct tx_ring *txr = adapter->tx_rings;
635
636 for (int i = 0; i < adapter->num_queues; i++, txr++)
637 ixgbe_setup_transmit_ring(txr);
638
639 return (0);
640}
641
642/*********************************************************************
643 *
644 * Free all transmit rings.
645 *
646 **********************************************************************/
647void
648ixgbe_free_transmit_structures(struct adapter *adapter)
649{
650 struct tx_ring *txr = adapter->tx_rings;
651
652 for (int i = 0; i < adapter->num_queues; i++, txr++) {
653 IXGBE_TX_LOCK(txr);
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_UNLOCK(txr);
657 IXGBE_TX_LOCK_DESTROY(txr);
658 }
659 free(adapter->tx_rings, M_DEVBUF);
660}
661
662/*********************************************************************
663 *
664 * Free transmit ring related data structures.
665 *
666 **********************************************************************/
667static void
668ixgbe_free_transmit_buffers(struct tx_ring *txr)
669{
670 struct adapter *adapter = txr->adapter;
671 struct ixgbe_tx_buf *tx_buffer;
672 int i;
673
674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676 if (txr->tx_buffers == NULL)
677 return;
678
679 tx_buffer = txr->tx_buffers;
680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 if (tx_buffer->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 BUS_DMASYNC_POSTWRITE);
684 bus_dmamap_unload(txr->txtag,
685 tx_buffer->map);
686 m_freem(tx_buffer->m_head);
687 tx_buffer->m_head = NULL;
688 if (tx_buffer->map != NULL) {
689 bus_dmamap_destroy(txr->txtag,
690 tx_buffer->map);
691 tx_buffer->map = NULL;
692 }
693 } else if (tx_buffer->map != NULL) {
694 bus_dmamap_unload(txr->txtag,
695 tx_buffer->map);
696 bus_dmamap_destroy(txr->txtag,
697 tx_buffer->map);
698 tx_buffer->map = NULL;
699 }
700 }
701#ifdef IXGBE_LEGACY_TX
702 if (txr->br != NULL)
703 buf_ring_free(txr->br, M_DEVBUF);
704#endif
705 if (txr->tx_buffers != NULL) {
706 free(txr->tx_buffers, M_DEVBUF);
707 txr->tx_buffers = NULL;
708 }
709 if (txr->txtag != NULL) {
710 bus_dma_tag_destroy(txr->txtag);
711 txr->txtag = NULL;
712 }
713 return;
714}
715
716/*********************************************************************
717 *
718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
719 *
720 **********************************************************************/
721
722static int
723ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724 u32 *cmd_type_len, u32 *olinfo_status)
725{
726 struct adapter *adapter = txr->adapter;
727 struct ixgbe_adv_tx_context_desc *TXD;
728 struct ether_vlan_header *eh;
729 struct ip *ip;
730 struct ip6_hdr *ip6;
731 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
732 int ehdrlen, ip_hlen = 0;
733 u16 etype;
734 u8 ipproto = 0;
735 int offload = TRUE;
736 int ctxd = txr->next_avail_desc;
737 u16 vtag = 0;
738
739 /* First check if TSO is to be used */
740 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
741 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
742
743 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
744 offload = FALSE;
745
746 /* Indicate the whole packet as payload when not doing TSO */
747 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
748
749 /* Now ready a context descriptor */
750 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
751
752 /*
753 ** In advanced descriptors the vlan tag must
754 ** be placed into the context descriptor. Hence
755 ** we need to make one even if not doing offloads.
756 */
757 if (mp->m_flags & M_VLANTAG) {
758 vtag = htole16(mp->m_pkthdr.ether_vtag);
759 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
760 }
761 else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
762 return (0);
763
764 /*
765 * Determine where frame payload starts.
766 * Jump over vlan headers if already present,
767 * helpful for QinQ too.
768 */
769 eh = mtod(mp, struct ether_vlan_header *);
770 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
771 etype = ntohs(eh->evl_proto);
772 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
773 } else {
774 etype = ntohs(eh->evl_encap_proto);
775 ehdrlen = ETHER_HDR_LEN;
776 }
777
778 /* Set the ether header length */
779 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
780
781 if (offload == FALSE)
782 goto no_offloads;
783
784 switch (etype) {
785 case ETHERTYPE_IP:
786 ip = (struct ip *)(mp->m_data + ehdrlen);
787 ip_hlen = ip->ip_hl << 2;
788 ipproto = ip->ip_p;
789 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
790 break;
791 case ETHERTYPE_IPV6:
792 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
793 ip_hlen = sizeof(struct ip6_hdr);
794 /* XXX-BZ this will go badly in case of ext hdrs. */
795 ipproto = ip6->ip6_nxt;
796 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
797 break;
798 default:
799 offload = FALSE;
800 break;
801 }
802
803 vlan_macip_lens |= ip_hlen;
804
805 switch (ipproto) {
806 case IPPROTO_TCP:
807 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
808 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
809 break;
810
811 case IPPROTO_UDP:
812 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
813 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
814 break;
815
816#if __FreeBSD_version >= 800000
817 case IPPROTO_SCTP:
818 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
820 break;
821#endif
822 default:
823 offload = FALSE;
824 break;
825 }
826
827 if (offload) /* For the TX descriptor setup */
828 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
829
830no_offloads:
831 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
832
833 /* Now copy bits into descriptor */
834 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
835 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
836 TXD->seqnum_seed = htole32(0);
837 TXD->mss_l4len_idx = htole32(0);
838
839 /* We've consumed the first desc, adjust counters */
840 if (++ctxd == txr->num_desc)
841 ctxd = 0;
842 txr->next_avail_desc = ctxd;
843 --txr->tx_avail;
844
845 return (0);
846}
847
848/**********************************************************************
849 *
850 * Setup work for hardware segmentation offload (TSO) on
851 * adapters using advanced tx descriptors
852 *
853 **********************************************************************/
854static int
855ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
856 u32 *cmd_type_len, u32 *olinfo_status)
857{
858 struct ixgbe_adv_tx_context_desc *TXD;
859 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
860 u32 mss_l4len_idx = 0, paylen;
861 u16 vtag = 0, eh_type;
862 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
863 struct ether_vlan_header *eh;
864#ifdef INET6
865 struct ip6_hdr *ip6;
866#endif
867#ifdef INET
868 struct ip *ip;
869#endif
870 struct tcphdr *th;
871
872
873 /*
874 * Determine where frame payload starts.
875 * Jump over vlan headers if already present
876 */
877 eh = mtod(mp, struct ether_vlan_header *);
878 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
879 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
880 eh_type = eh->evl_proto;
881 } else {
882 ehdrlen = ETHER_HDR_LEN;
883 eh_type = eh->evl_encap_proto;
884 }
885
886 switch (ntohs(eh_type)) {
887#ifdef INET6
888 case ETHERTYPE_IPV6:
889 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
890 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
891 if (ip6->ip6_nxt != IPPROTO_TCP)
892 return (ENXIO);
893 ip_hlen = sizeof(struct ip6_hdr);
894 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
895 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
896 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
897 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
898 break;
899#endif
900#ifdef INET
901 case ETHERTYPE_IP:
902 ip = (struct ip *)(mp->m_data + ehdrlen);
903 if (ip->ip_p != IPPROTO_TCP)
904 return (ENXIO);
905 ip->ip_sum = 0;
906 ip_hlen = ip->ip_hl << 2;
907 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
908 th->th_sum = in_pseudo(ip->ip_src.s_addr,
909 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
910 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
911 /* Tell transmit desc to also do IPv4 checksum. */
912 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
913 break;
914#endif
915 default:
916 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
917 __func__, ntohs(eh_type));
918 break;
919 }
920
921 ctxd = txr->next_avail_desc;
922 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
923
924 tcp_hlen = th->th_off << 2;
925
926 /* This is used in the transmit desc in encap */
927 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
928
929 /* VLAN MACLEN IPLEN */
930 if (mp->m_flags & M_VLANTAG) {
931 vtag = htole16(mp->m_pkthdr.ether_vtag);
932 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
933 }
934
935 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
936 vlan_macip_lens |= ip_hlen;
937 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
938
939 /* ADV DTYPE TUCMD */
940 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
941 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
942 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
943
944 /* MSS L4LEN IDX */
945 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
946 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
947 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
948
949 TXD->seqnum_seed = htole32(0);
950
951 if (++ctxd == txr->num_desc)
952 ctxd = 0;
953
954 txr->tx_avail--;
955 txr->next_avail_desc = ctxd;
956 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
957 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
958 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
959 ++txr->tso_tx;
960 return (0);
961}
962
963
964/**********************************************************************
965 *
966 * Examine each tx_buffer in the used queue. If the hardware is done
967 * processing the packet then free associated resources. The
968 * tx_buffer is put back on the free queue.
969 *
970 **********************************************************************/
971void
972ixgbe_txeof(struct tx_ring *txr)
973{
974#ifdef DEV_NETMAP
975 struct adapter *adapter = txr->adapter;
976 struct ifnet *ifp = adapter->ifp;
977#endif
978 u32 work, processed = 0;
979 u16 limit = txr->process_limit;
980 struct ixgbe_tx_buf *buf;
981 union ixgbe_adv_tx_desc *txd;
982
983 mtx_assert(&txr->tx_mtx, MA_OWNED);
984
985#ifdef DEV_NETMAP
986 if (ifp->if_capenable & IFCAP_NETMAP) {
987 struct netmap_adapter *na = NA(ifp);
988 struct netmap_kring *kring = &na->tx_rings[txr->me];
989 txd = txr->tx_base;
990 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
991 BUS_DMASYNC_POSTREAD);
992 /*
993 * In netmap mode, all the work is done in the context
994 * of the client thread. Interrupt handlers only wake up
995 * clients, which may be sleeping on individual rings
996 * or on a global resource for all rings.
997 * To implement tx interrupt mitigation, we wake up the client
998 * thread roughly every half ring, even if the NIC interrupts
999 * more frequently. This is implemented as follows:
1000 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1001 * the slot that should wake up the thread (nkr_num_slots
1002 * means the user thread should not be woken up);
1003 * - the driver ignores tx interrupts unless netmap_mitigate=0
1004 * or the slot has the DD bit set.
1005 */
1006 if (!netmap_mitigate ||
1007 (kring->nr_kflags < kring->nkr_num_slots &&
1008 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1009 netmap_tx_irq(ifp, txr->me);
1010 }
1011 return;
1012 }
1013#endif /* DEV_NETMAP */
1014
1015 if (txr->tx_avail == txr->num_desc) {
1016 txr->busy = 0;
1017 return;
1018 }
1019
1020 /* Get work starting point */
1021 work = txr->next_to_clean;
1022 buf = &txr->tx_buffers[work];
1023 txd = &txr->tx_base[work];
1024 work -= txr->num_desc; /* The distance to ring end */
1025 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1026 BUS_DMASYNC_POSTREAD);
1027
1028 do {
1029 union ixgbe_adv_tx_desc *eop= buf->eop;
1030 if (eop == NULL) /* No work */
1031 break;
1032
1033 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1034 break; /* I/O not complete */
1035
1036 if (buf->m_head) {
1037 txr->bytes +=
1038 buf->m_head->m_pkthdr.len;
1039 bus_dmamap_sync(txr->txtag,
1040 buf->map,
1041 BUS_DMASYNC_POSTWRITE);
1042 bus_dmamap_unload(txr->txtag,
1043 buf->map);
1044 m_freem(buf->m_head);
1045 buf->m_head = NULL;
1043 buf->map = NULL;
1046 }
1047 buf->eop = NULL;
1048 ++txr->tx_avail;
1049
1050 /* We clean the range if multi segment */
1051 while (txd != eop) {
1052 ++txd;
1053 ++buf;
1054 ++work;
1055 /* wrap the ring? */
1056 if (__predict_false(!work)) {
1057 work -= txr->num_desc;
1058 buf = txr->tx_buffers;
1059 txd = txr->tx_base;
1060 }
1061 if (buf->m_head) {
1062 txr->bytes +=
1063 buf->m_head->m_pkthdr.len;
1064 bus_dmamap_sync(txr->txtag,
1065 buf->map,
1066 BUS_DMASYNC_POSTWRITE);
1067 bus_dmamap_unload(txr->txtag,
1068 buf->map);
1069 m_freem(buf->m_head);
1070 buf->m_head = NULL;
1069 buf->map = NULL;
1071 }
1072 ++txr->tx_avail;
1073 buf->eop = NULL;
1074
1075 }
1076 ++txr->packets;
1077 ++processed;
1078
1079 /* Try the next packet */
1080 ++txd;
1081 ++buf;
1082 ++work;
1083 /* reset with a wrap */
1084 if (__predict_false(!work)) {
1085 work -= txr->num_desc;
1086 buf = txr->tx_buffers;
1087 txd = txr->tx_base;
1088 }
1089 prefetch(txd);
1090 } while (__predict_true(--limit));
1091
1092 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094
1095 work += txr->num_desc;
1096 txr->next_to_clean = work;
1097
1098 /*
1099 ** Queue Hang detection, we know there's
1100 ** work outstanding or the first return
1101 ** would have been taken, so increment busy
1102 ** if nothing managed to get cleaned, then
1103 ** in local_timer it will be checked and
1104 ** marked as HUNG if it exceeds a MAX attempt.
1105 */
1106 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107 ++txr->busy;
1108 /*
1109 ** If anything gets cleaned we reset state to 1,
1110 ** note this will turn off HUNG if its set.
1111 */
1112 if (processed)
1113 txr->busy = 1;
1114
1115 if (txr->tx_avail == txr->num_desc)
1116 txr->busy = 0;
1117
1118 return;
1119}
1120
1121
1122#ifdef IXGBE_FDIR
1123/*
1124** This routine parses packet headers so that Flow
1125** Director can make a hashed filter table entry
1126** allowing traffic flows to be identified and kept
1127** on the same cpu. This would be a performance
1128** hit, but we only do it at IXGBE_FDIR_RATE of
1129** packets.
1130*/
1131static void
1132ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1133{
1134 struct adapter *adapter = txr->adapter;
1135 struct ix_queue *que;
1136 struct ip *ip;
1137 struct tcphdr *th;
1138 struct udphdr *uh;
1139 struct ether_vlan_header *eh;
1140 union ixgbe_atr_hash_dword input = {.dword = 0};
1141 union ixgbe_atr_hash_dword common = {.dword = 0};
1142 int ehdrlen, ip_hlen;
1143 u16 etype;
1144
1145 eh = mtod(mp, struct ether_vlan_header *);
1146 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1147 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1148 etype = eh->evl_proto;
1149 } else {
1150 ehdrlen = ETHER_HDR_LEN;
1151 etype = eh->evl_encap_proto;
1152 }
1153
1154 /* Only handling IPv4 */
1155 if (etype != htons(ETHERTYPE_IP))
1156 return;
1157
1158 ip = (struct ip *)(mp->m_data + ehdrlen);
1159 ip_hlen = ip->ip_hl << 2;
1160
1161 /* check if we're UDP or TCP */
1162 switch (ip->ip_p) {
1163 case IPPROTO_TCP:
1164 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1165 /* src and dst are inverted */
1166 common.port.dst ^= th->th_sport;
1167 common.port.src ^= th->th_dport;
1168 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1169 break;
1170 case IPPROTO_UDP:
1171 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1172 /* src and dst are inverted */
1173 common.port.dst ^= uh->uh_sport;
1174 common.port.src ^= uh->uh_dport;
1175 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1176 break;
1177 default:
1178 return;
1179 }
1180
1181 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1182 if (mp->m_pkthdr.ether_vtag)
1183 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1184 else
1185 common.flex_bytes ^= etype;
1186 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1187
1188 que = &adapter->queues[txr->me];
1189 /*
1190 ** This assumes the Rx queue and Tx
1191 ** queue are bound to the same CPU
1192 */
1193 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1194 input, common, que->msix);
1195}
1196#endif /* IXGBE_FDIR */
1197
1198/*
1199** Used to detect a descriptor that has
1200** been merged by Hardware RSC.
1201*/
1202static inline u32
1203ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1204{
1205 return (le32toh(rx->wb.lower.lo_dword.data) &
1206 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1207}
1208
1209/*********************************************************************
1210 *
1211 * Initialize Hardware RSC (LRO) feature on 82599
1212 * for an RX ring, this is toggled by the LRO capability
1213 * even though it is transparent to the stack.
1214 *
1215 * NOTE: since this HW feature only works with IPV4 and
1216 * our testing has shown soft LRO to be as effective
1217 * I have decided to disable this by default.
1218 *
1219 **********************************************************************/
1220static void
1221ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1222{
1223 struct adapter *adapter = rxr->adapter;
1224 struct ixgbe_hw *hw = &adapter->hw;
1225 u32 rscctrl, rdrxctl;
1226
1227 /* If turning LRO/RSC off we need to disable it */
1228 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1229 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1230 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1231 return;
1232 }
1233
1234 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1235 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1236#ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1237 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1238#endif /* DEV_NETMAP */
1239 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1240 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1241 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1242
1243 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1244 rscctrl |= IXGBE_RSCCTL_RSCEN;
1245 /*
1246 ** Limit the total number of descriptors that
1247 ** can be combined, so it does not exceed 64K
1248 */
1249 if (rxr->mbuf_sz == MCLBYTES)
1250 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1251 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1252 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1253 else if (rxr->mbuf_sz == MJUM9BYTES)
1254 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1255 else /* Using 16K cluster */
1256 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1257
1258 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1259
1260 /* Enable TCP header recognition */
1261 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1262 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1263 IXGBE_PSRTYPE_TCPHDR));
1264
1265 /* Disable RSC for ACK packets */
1266 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1267 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1268
1269 rxr->hw_rsc = TRUE;
1270}
1271/*********************************************************************
1272 *
1273 * Refresh mbuf buffers for RX descriptor rings
1274 * - now keeps its own state so discards due to resource
1275 * exhaustion are unnecessary, if an mbuf cannot be obtained
1276 * it just returns, keeping its placeholder, thus it can simply
1277 * be recalled to try again.
1278 *
1279 **********************************************************************/
1280static void
1281ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1282{
1283 struct adapter *adapter = rxr->adapter;
1284 bus_dma_segment_t seg[1];
1285 struct ixgbe_rx_buf *rxbuf;
1286 struct mbuf *mp;
1287 int i, j, nsegs, error;
1288 bool refreshed = FALSE;
1289
1290 i = j = rxr->next_to_refresh;
1291 /* Control the loop with one beyond */
1292 if (++j == rxr->num_desc)
1293 j = 0;
1294
1295 while (j != limit) {
1296 rxbuf = &rxr->rx_buffers[i];
1297 if (rxbuf->buf == NULL) {
1298 mp = m_getjcl(M_NOWAIT, MT_DATA,
1299 M_PKTHDR, rxr->mbuf_sz);
1300 if (mp == NULL)
1301 goto update;
1302 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1303 m_adj(mp, ETHER_ALIGN);
1304 } else
1305 mp = rxbuf->buf;
1306
1307 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1308
1309 /* If we're dealing with an mbuf that was copied rather
1310 * than replaced, there's no need to go through busdma.
1311 */
1312 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1313 /* Get the memory mapping */
1314 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1315 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1316 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1317 if (error != 0) {
1318 printf("Refresh mbufs: payload dmamap load"
1319 " failure - %d\n", error);
1320 m_free(mp);
1321 rxbuf->buf = NULL;
1322 goto update;
1323 }
1324 rxbuf->buf = mp;
1325 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1326 BUS_DMASYNC_PREREAD);
1327 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1328 htole64(seg[0].ds_addr);
1329 } else {
1330 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1331 rxbuf->flags &= ~IXGBE_RX_COPY;
1332 }
1333
1334 refreshed = TRUE;
1335 /* Next is precalculated */
1336 i = j;
1337 rxr->next_to_refresh = i;
1338 if (++j == rxr->num_desc)
1339 j = 0;
1340 }
1341update:
1342 if (refreshed) /* Update hardware tail index */
1343 IXGBE_WRITE_REG(&adapter->hw,
1344 rxr->tail, rxr->next_to_refresh);
1345 return;
1346}
1347
1348/*********************************************************************
1349 *
1350 * Allocate memory for rx_buffer structures. Since we use one
1351 * rx_buffer per received packet, the maximum number of rx_buffer's
1352 * that we'll need is equal to the number of receive descriptors
1353 * that we've allocated.
1354 *
1355 **********************************************************************/
1356int
1357ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1358{
1359 struct adapter *adapter = rxr->adapter;
1360 device_t dev = adapter->dev;
1361 struct ixgbe_rx_buf *rxbuf;
1362 int i, bsize, error;
1363
1364 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1365 if (!(rxr->rx_buffers =
1366 (struct ixgbe_rx_buf *) malloc(bsize,
1367 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1368 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1369 error = ENOMEM;
1370 goto fail;
1371 }
1372
1373 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1374 1, 0, /* alignment, bounds */
1375 BUS_SPACE_MAXADDR, /* lowaddr */
1376 BUS_SPACE_MAXADDR, /* highaddr */
1377 NULL, NULL, /* filter, filterarg */
1378 MJUM16BYTES, /* maxsize */
1379 1, /* nsegments */
1380 MJUM16BYTES, /* maxsegsize */
1381 0, /* flags */
1382 NULL, /* lockfunc */
1383 NULL, /* lockfuncarg */
1384 &rxr->ptag))) {
1385 device_printf(dev, "Unable to create RX DMA tag\n");
1386 goto fail;
1387 }
1388
1389 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1390 rxbuf = &rxr->rx_buffers[i];
1389 error = bus_dmamap_create(rxr->ptag,
1390 BUS_DMA_NOWAIT, &rxbuf->pmap);
1391 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1392 if (error) {
1393 device_printf(dev, "Unable to create RX dma map\n");
1394 goto fail;
1395 }
1396 }
1397
1398 return (0);
1399
1400fail:
1401 /* Frees all, but can handle partial completion */
1402 ixgbe_free_receive_structures(adapter);
1403 return (error);
1404}
1405
1406
1407static void
1408ixgbe_free_receive_ring(struct rx_ring *rxr)
1409{
1410 struct ixgbe_rx_buf *rxbuf;
1411 int i;
1412
1413 for (i = 0; i < rxr->num_desc; i++) {
1414 rxbuf = &rxr->rx_buffers[i];
1415 if (rxbuf->buf != NULL) {
1416 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1417 BUS_DMASYNC_POSTREAD);
1418 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1419 rxbuf->buf->m_flags |= M_PKTHDR;
1420 m_freem(rxbuf->buf);
1421 rxbuf->buf = NULL;
1422 rxbuf->flags = 0;
1423 }
1424 }
1425}
1426
1427
1428/*********************************************************************
1429 *
1430 * Initialize a receive ring and its buffers.
1431 *
1432 **********************************************************************/
1433static int
1434ixgbe_setup_receive_ring(struct rx_ring *rxr)
1435{
1436 struct adapter *adapter;
1437 struct ifnet *ifp;
1438 device_t dev;
1439 struct ixgbe_rx_buf *rxbuf;
1440 bus_dma_segment_t seg[1];
1441 struct lro_ctrl *lro = &rxr->lro;
1442 int rsize, nsegs, error = 0;
1443#ifdef DEV_NETMAP
1444 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1445 struct netmap_slot *slot;
1446#endif /* DEV_NETMAP */
1447
1448 adapter = rxr->adapter;
1449 ifp = adapter->ifp;
1450 dev = adapter->dev;
1451
1452 /* Clear the ring contents */
1453 IXGBE_RX_LOCK(rxr);
1454#ifdef DEV_NETMAP
1455 /* same as in ixgbe_setup_transmit_ring() */
1456 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1457#endif /* DEV_NETMAP */
1458 rsize = roundup2(adapter->num_rx_desc *
1459 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1460 bzero((void *)rxr->rx_base, rsize);
1461 /* Cache the size */
1462 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1463
1464 /* Free current RX buffer structs and their mbufs */
1465 ixgbe_free_receive_ring(rxr);
1466
1467 /* Now replenish the mbufs */
1468 for (int j = 0; j != rxr->num_desc; ++j) {
1469 struct mbuf *mp;
1470
1471 rxbuf = &rxr->rx_buffers[j];
1472#ifdef DEV_NETMAP
1473 /*
1474 * In netmap mode, fill the map and set the buffer
1475 * address in the NIC ring, considering the offset
1476 * between the netmap and NIC rings (see comment in
1477 * ixgbe_setup_transmit_ring() ). No need to allocate
1478 * an mbuf, so end the block with a continue;
1479 */
1480 if (slot) {
1481 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1482 uint64_t paddr;
1483 void *addr;
1484
1485 addr = PNMB(na, slot + sj, &paddr);
1486 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1487 /* Update descriptor and the cached value */
1488 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1489 rxbuf->addr = htole64(paddr);
1490 continue;
1491 }
1492#endif /* DEV_NETMAP */
1493 rxbuf->flags = 0;
1494 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1495 M_PKTHDR, adapter->rx_mbuf_sz);
1496 if (rxbuf->buf == NULL) {
1497 error = ENOBUFS;
1498 goto fail;
1499 }
1500 mp = rxbuf->buf;
1501 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1502 /* Get the memory mapping */
1503 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1504 rxbuf->pmap, mp, seg,
1505 &nsegs, BUS_DMA_NOWAIT);
1506 if (error != 0)
1507 goto fail;
1508 bus_dmamap_sync(rxr->ptag,
1509 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1510 /* Update the descriptor and the cached value */
1511 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1512 rxbuf->addr = htole64(seg[0].ds_addr);
1513 }
1514
1515
1516 /* Setup our descriptor indices */
1517 rxr->next_to_check = 0;
1518 rxr->next_to_refresh = 0;
1519 rxr->lro_enabled = FALSE;
1520 rxr->rx_copies = 0;
1521 rxr->rx_bytes = 0;
1522 rxr->vtag_strip = FALSE;
1523
1524 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1525 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1526
1527 /*
1528 ** Now set up the LRO interface:
1529 */
1530 if (ixgbe_rsc_enable)
1531 ixgbe_setup_hw_rsc(rxr);
1532 else if (ifp->if_capenable & IFCAP_LRO) {
1533 int err = tcp_lro_init(lro);
1534 if (err) {
1535 device_printf(dev, "LRO Initialization failed!\n");
1536 goto fail;
1537 }
1538 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1539 rxr->lro_enabled = TRUE;
1540 lro->ifp = adapter->ifp;
1541 }
1542
1543 IXGBE_RX_UNLOCK(rxr);
1544 return (0);
1545
1546fail:
1547 ixgbe_free_receive_ring(rxr);
1548 IXGBE_RX_UNLOCK(rxr);
1549 return (error);
1550}
1551
1552/*********************************************************************
1553 *
1554 * Initialize all receive rings.
1555 *
1556 **********************************************************************/
1557int
1558ixgbe_setup_receive_structures(struct adapter *adapter)
1559{
1560 struct rx_ring *rxr = adapter->rx_rings;
1561 int j;
1562
1563 for (j = 0; j < adapter->num_queues; j++, rxr++)
1564 if (ixgbe_setup_receive_ring(rxr))
1565 goto fail;
1566
1567 return (0);
1568fail:
1569 /*
1570 * Free RX buffers allocated so far, we will only handle
1571 * the rings that completed, the failing case will have
1572 * cleaned up for itself. 'j' failed, so its the terminus.
1573 */
1574 for (int i = 0; i < j; ++i) {
1575 rxr = &adapter->rx_rings[i];
1576 ixgbe_free_receive_ring(rxr);
1577 }
1578
1579 return (ENOBUFS);
1580}
1581
1582
1583/*********************************************************************
1584 *
1585 * Free all receive rings.
1586 *
1587 **********************************************************************/
1588void
1589ixgbe_free_receive_structures(struct adapter *adapter)
1590{
1591 struct rx_ring *rxr = adapter->rx_rings;
1592
1593 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1594
1595 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1596 struct lro_ctrl *lro = &rxr->lro;
1597 ixgbe_free_receive_buffers(rxr);
1598 /* Free LRO memory */
1599 tcp_lro_free(lro);
1600 /* Free the ring memory as well */
1601 ixgbe_dma_free(adapter, &rxr->rxdma);
1602 }
1603
1604 free(adapter->rx_rings, M_DEVBUF);
1605}
1606
1607
1608/*********************************************************************
1609 *
1610 * Free receive ring data structures
1611 *
1612 **********************************************************************/
1613void
1614ixgbe_free_receive_buffers(struct rx_ring *rxr)
1615{
1616 struct adapter *adapter = rxr->adapter;
1617 struct ixgbe_rx_buf *rxbuf;
1618
1619 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1620
1621 /* Cleanup any existing buffers */
1622 if (rxr->rx_buffers != NULL) {
1623 for (int i = 0; i < adapter->num_rx_desc; i++) {
1624 rxbuf = &rxr->rx_buffers[i];
1625 if (rxbuf->buf != NULL) {
1626 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1627 BUS_DMASYNC_POSTREAD);
1628 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1629 rxbuf->buf->m_flags |= M_PKTHDR;
1630 m_freem(rxbuf->buf);
1631 }
1632 rxbuf->buf = NULL;
1633 if (rxbuf->pmap != NULL) {
1634 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1635 rxbuf->pmap = NULL;
1636 }
1637 }
1638 if (rxr->rx_buffers != NULL) {
1639 free(rxr->rx_buffers, M_DEVBUF);
1640 rxr->rx_buffers = NULL;
1641 }
1642 }
1643
1644 if (rxr->ptag != NULL) {
1645 bus_dma_tag_destroy(rxr->ptag);
1646 rxr->ptag = NULL;
1647 }
1648
1649 return;
1650}
1651
1652static __inline void
1653ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1654{
1655
1656 /*
1657 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1658 * should be computed by hardware. Also it should not have VLAN tag in
1659 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1660 */
1661 if (rxr->lro_enabled &&
1662 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1663 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1664 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1665 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1666 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1667 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1668 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1669 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1670 /*
1671 * Send to the stack if:
1672 ** - LRO not enabled, or
1673 ** - no LRO resources, or
1674 ** - lro enqueue fails
1675 */
1676 if (rxr->lro.lro_cnt != 0)
1677 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1678 return;
1679 }
1680 IXGBE_RX_UNLOCK(rxr);
1681 (*ifp->if_input)(ifp, m);
1682 IXGBE_RX_LOCK(rxr);
1683}
1684
1685static __inline void
1686ixgbe_rx_discard(struct rx_ring *rxr, int i)
1687{
1688 struct ixgbe_rx_buf *rbuf;
1689
1690 rbuf = &rxr->rx_buffers[i];
1691
1692
1693 /*
1694 ** With advanced descriptors the writeback
1695 ** clobbers the buffer addrs, so its easier
1696 ** to just free the existing mbufs and take
1697 ** the normal refresh path to get new buffers
1698 ** and mapping.
1699 */
1700
1701 if (rbuf->fmp != NULL) {/* Partial chain ? */
1702 rbuf->fmp->m_flags |= M_PKTHDR;
1703 m_freem(rbuf->fmp);
1704 rbuf->fmp = NULL;
1705 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1706 } else if (rbuf->buf) {
1707 m_free(rbuf->buf);
1708 rbuf->buf = NULL;
1709 }
1710 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1711
1712 rbuf->flags = 0;
1713
1714 return;
1715}
1716
1717
1718/*********************************************************************
1719 *
1720 * This routine executes in interrupt context. It replenishes
1721 * the mbufs in the descriptor and sends data which has been
1722 * dma'ed into host memory to upper layer.
1723 *
1722 * We loop at most count times if count is > 0, or until done if
1723 * count < 0.
1724 *
1724 * Return TRUE for more work, FALSE for all clean.
1725 *********************************************************************/
1726bool
1727ixgbe_rxeof(struct ix_queue *que)
1728{
1729 struct adapter *adapter = que->adapter;
1730 struct rx_ring *rxr = que->rxr;
1731 struct ifnet *ifp = adapter->ifp;
1732 struct lro_ctrl *lro = &rxr->lro;
1733 struct lro_entry *queued;
1734 int i, nextp, processed = 0;
1735 u32 staterr = 0;
1736 u16 count = rxr->process_limit;
1737 union ixgbe_adv_rx_desc *cur;
1738 struct ixgbe_rx_buf *rbuf, *nbuf;
1739 u16 pkt_info;
1740
1741 IXGBE_RX_LOCK(rxr);
1742
1743#ifdef DEV_NETMAP
1744 /* Same as the txeof routine: wakeup clients on intr. */
1745 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1746 IXGBE_RX_UNLOCK(rxr);
1747 return (FALSE);
1748 }
1749#endif /* DEV_NETMAP */
1750
1751 for (i = rxr->next_to_check; count != 0;) {
1752 struct mbuf *sendmp, *mp;
1753 u32 rsc, ptype;
1754 u16 len;
1755 u16 vtag = 0;
1756 bool eop;
1757
1758 /* Sync the ring. */
1759 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1760 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1761
1762 cur = &rxr->rx_base[i];
1763 staterr = le32toh(cur->wb.upper.status_error);
1764 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1765
1766 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1767 break;
1768 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1769 break;
1770
1771 count--;
1772 sendmp = NULL;
1773 nbuf = NULL;
1774 rsc = 0;
1775 cur->wb.upper.status_error = 0;
1776 rbuf = &rxr->rx_buffers[i];
1777 mp = rbuf->buf;
1778
1779 len = le16toh(cur->wb.upper.length);
1780 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1781 IXGBE_RXDADV_PKTTYPE_MASK;
1782 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1783
1784 /* Make sure bad packets are discarded */
1785 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1787#if 0 // VF-only
1786#if __FreeBSD_version >= 1100036
1789 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1787 if (IXGBE_IS_VF(adapter))
1788 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1789#endif
1791#endif
1790 rxr->rx_discarded++;
1791 ixgbe_rx_discard(rxr, i);
1792 goto next_desc;
1793 }
1794
1795 /*
1796 ** On 82599 which supports a hardware
1797 ** LRO (called HW RSC), packets need
1798 ** not be fragmented across sequential
1799 ** descriptors, rather the next descriptor
1800 ** is indicated in bits of the descriptor.
1801 ** This also means that we might proceses
1802 ** more than one packet at a time, something
1803 ** that has never been true before, it
1804 ** required eliminating global chain pointers
1805 ** in favor of what we are doing here. -jfv
1806 */
1807 if (!eop) {
1808 /*
1809 ** Figure out the next descriptor
1810 ** of this frame.
1811 */
1812 if (rxr->hw_rsc == TRUE) {
1813 rsc = ixgbe_rsc_count(cur);
1814 rxr->rsc_num += (rsc - 1);
1815 }
1816 if (rsc) { /* Get hardware index */
1817 nextp = ((staterr &
1818 IXGBE_RXDADV_NEXTP_MASK) >>
1819 IXGBE_RXDADV_NEXTP_SHIFT);
1820 } else { /* Just sequential */
1821 nextp = i + 1;
1822 if (nextp == adapter->num_rx_desc)
1823 nextp = 0;
1824 }
1825 nbuf = &rxr->rx_buffers[nextp];
1826 prefetch(nbuf);
1827 }
1828 /*
1829 ** Rather than using the fmp/lmp global pointers
1830 ** we now keep the head of a packet chain in the
1831 ** buffer struct and pass this along from one
1832 ** descriptor to the next, until we get EOP.
1833 */
1834 mp->m_len = len;
1835 /*
1836 ** See if there is a stored head
1837 ** that determines what we are
1838 */
1839 sendmp = rbuf->fmp;
1840 if (sendmp != NULL) { /* secondary frag */
1841 rbuf->buf = rbuf->fmp = NULL;
1842 mp->m_flags &= ~M_PKTHDR;
1843 sendmp->m_pkthdr.len += mp->m_len;
1844 } else {
1845 /*
1846 * Optimize. This might be a small packet,
1847 * maybe just a TCP ACK. Do a fast copy that
1848 * is cache aligned into a new mbuf, and
1849 * leave the old mbuf+cluster for re-use.
1850 */
1851 if (eop && len <= IXGBE_RX_COPY_LEN) {
1852 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1853 if (sendmp != NULL) {
1854 sendmp->m_data +=
1855 IXGBE_RX_COPY_ALIGN;
1856 ixgbe_bcopy(mp->m_data,
1857 sendmp->m_data, len);
1858 sendmp->m_len = len;
1859 rxr->rx_copies++;
1860 rbuf->flags |= IXGBE_RX_COPY;
1861 }
1862 }
1863 if (sendmp == NULL) {
1864 rbuf->buf = rbuf->fmp = NULL;
1865 sendmp = mp;
1866 }
1867
1868 /* first desc of a non-ps chain */
1869 sendmp->m_flags |= M_PKTHDR;
1870 sendmp->m_pkthdr.len = mp->m_len;
1871 }
1872 ++processed;
1873
1874 /* Pass the head pointer on */
1875 if (eop == 0) {
1876 nbuf->fmp = sendmp;
1877 sendmp = NULL;
1878 mp->m_next = nbuf->buf;
1879 } else { /* Sending this frame */
1880 sendmp->m_pkthdr.rcvif = ifp;
1881 rxr->rx_packets++;
1882 /* capture data for AIM */
1883 rxr->bytes += sendmp->m_pkthdr.len;
1884 rxr->rx_bytes += sendmp->m_pkthdr.len;
1885 /* Process vlan info */
1886 if ((rxr->vtag_strip) &&
1887 (staterr & IXGBE_RXD_STAT_VP))
1888 vtag = le16toh(cur->wb.upper.vlan);
1889 if (vtag) {
1890 sendmp->m_pkthdr.ether_vtag = vtag;
1891 sendmp->m_flags |= M_VLANTAG;
1892 }
1893 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1894 ixgbe_rx_checksum(staterr, sendmp, ptype);
1895#if __FreeBSD_version >= 800000
1898#ifdef RSS
1899 sendmp->m_pkthdr.flowid =
1900 le32toh(cur->wb.lower.hi_dword.rss);
1901 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1902 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1903 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1904 break;
1905 case IXGBE_RXDADV_RSSTYPE_IPV4:
1906 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1907 break;
1908 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1909 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1910 break;
1911 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1912 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1913 break;
1914 case IXGBE_RXDADV_RSSTYPE_IPV6:
1915 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1916 break;
1917 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1918 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1919 break;
1920 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1921 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1922 break;
1923 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1924 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1925 break;
1926 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1927 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1928 break;
1929 default:
1930 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1931 }
1932#else /* RSS */
1896 sendmp->m_pkthdr.flowid = que->msix;
1934 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1935#endif /* RSS */
1897#endif /* FreeBSD_version */
1898 }
1899next_desc:
1900 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1901 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1902
1903 /* Advance our pointers to the next descriptor. */
1904 if (++i == rxr->num_desc)
1905 i = 0;
1906
1907 /* Now send to the stack or do LRO */
1908 if (sendmp != NULL) {
1909 rxr->next_to_check = i;
1910 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1911 i = rxr->next_to_check;
1912 }
1913
1914 /* Every 8 descriptors we go to refresh mbufs */
1915 if (processed == 8) {
1916 ixgbe_refresh_mbufs(rxr, i);
1917 processed = 0;
1918 }
1919 }
1920
1921 /* Refresh any remaining buf structs */
1922 if (ixgbe_rx_unrefreshed(rxr))
1923 ixgbe_refresh_mbufs(rxr, i);
1924
1925 rxr->next_to_check = i;
1926
1927 /*
1928 * Flush any outstanding LRO work
1929 */
1930 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1931 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1932 tcp_lro_flush(lro, queued);
1933 }
1934
1935 IXGBE_RX_UNLOCK(rxr);
1936
1937 /*
1938 ** Still have cleaning to do?
1939 */
1940 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1941 return (TRUE);
1942 else
1943 return (FALSE);
1944}
1945
1946
1947/*********************************************************************
1948 *
1949 * Verify that the hardware indicated that the checksum is valid.
1950 * Inform the stack about the status of checksum so that stack
1951 * doesn't spend time verifying the checksum.
1952 *
1953 *********************************************************************/
1954static void
1955ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1956{
1957 u16 status = (u16) staterr;
1958 u8 errors = (u8) (staterr >> 24);
1959 bool sctp = FALSE;
1960
1961 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1962 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1963 sctp = TRUE;
1964
1965 if (status & IXGBE_RXD_STAT_IPCS) {
1966 if (!(errors & IXGBE_RXD_ERR_IPE)) {
1967 /* IP Checksum Good */
1968 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1969 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1970
1971 } else
1972 mp->m_pkthdr.csum_flags = 0;
1973 }
1974 if (status & IXGBE_RXD_STAT_L4CS) {
1975 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1976#if __FreeBSD_version >= 800000
1977 if (sctp)
1978 type = CSUM_SCTP_VALID;
1979#endif
1980 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1981 mp->m_pkthdr.csum_flags |= type;
1982 if (!sctp)
1983 mp->m_pkthdr.csum_data = htons(0xffff);
1984 }
1985 }
1986 return;
1987}
1988
1989/********************************************************************
1990 * Manage DMA'able memory.
1991 *******************************************************************/
1992static void
1993ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1994{
1995 if (error)
1996 return;
1997 *(bus_addr_t *) arg = segs->ds_addr;
1998 return;
1999}
2000
2001int
2002ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2003 struct ixgbe_dma_alloc *dma, int mapflags)
2004{
2005 device_t dev = adapter->dev;
2006 int r;
2007
2008 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2009 DBA_ALIGN, 0, /* alignment, bounds */
2010 BUS_SPACE_MAXADDR, /* lowaddr */
2011 BUS_SPACE_MAXADDR, /* highaddr */
2012 NULL, NULL, /* filter, filterarg */
2013 size, /* maxsize */
2014 1, /* nsegments */
2015 size, /* maxsegsize */
2016 BUS_DMA_ALLOCNOW, /* flags */
2017 NULL, /* lockfunc */
2018 NULL, /* lockfuncarg */
2019 &dma->dma_tag);
2020 if (r != 0) {
2021 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2022 "error %u\n", r);
2023 goto fail_0;
2024 }
2025 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2026 BUS_DMA_NOWAIT, &dma->dma_map);
2027 if (r != 0) {
2028 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2029 "error %u\n", r);
2030 goto fail_1;
2031 }
2032 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2033 size,
2034 ixgbe_dmamap_cb,
2035 &dma->dma_paddr,
2036 mapflags | BUS_DMA_NOWAIT);
2037 if (r != 0) {
2038 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2039 "error %u\n", r);
2040 goto fail_2;
2041 }
2042 dma->dma_size = size;
2043 return (0);
2044fail_2:
2045 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2046fail_1:
2047 bus_dma_tag_destroy(dma->dma_tag);
2048fail_0:
2049 dma->dma_tag = NULL;
2050 return (r);
2051}
2052
2053void
2054ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2055{
2056 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2057 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2058 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2059 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2060 bus_dma_tag_destroy(dma->dma_tag);
2061}
2062
2063
2064/*********************************************************************
2065 *
2066 * Allocate memory for the transmit and receive rings, and then
2067 * the descriptors associated with each, called only once at attach.
2068 *
2069 **********************************************************************/
2070int
2071ixgbe_allocate_queues(struct adapter *adapter)
2072{
2073 device_t dev = adapter->dev;
2074 struct ix_queue *que;
2075 struct tx_ring *txr;
2076 struct rx_ring *rxr;
2077 int rsize, tsize, error = IXGBE_SUCCESS;
2078 int txconf = 0, rxconf = 0;
2079
2080 /* First allocate the top level queue structs */
2081 if (!(adapter->queues =
2082 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2083 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2084 device_printf(dev, "Unable to allocate queue memory\n");
2085 error = ENOMEM;
2086 goto fail;
2087 }
2088
2089 /* First allocate the TX ring struct memory */
2090 if (!(adapter->tx_rings =
2091 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2092 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2093 device_printf(dev, "Unable to allocate TX ring memory\n");
2094 error = ENOMEM;
2095 goto tx_fail;
2096 }
2097
2098 /* Next allocate the RX */
2099 if (!(adapter->rx_rings =
2100 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2101 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2102 device_printf(dev, "Unable to allocate RX ring memory\n");
2103 error = ENOMEM;
2104 goto rx_fail;
2105 }
2106
2107 /* For the ring itself */
2108 tsize = roundup2(adapter->num_tx_desc *
2109 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2110
2111 /*
2112 * Now set up the TX queues, txconf is needed to handle the
2113 * possibility that things fail midcourse and we need to
2114 * undo memory gracefully
2115 */
2116 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2117 /* Set up some basics */
2118 txr = &adapter->tx_rings[i];
2119 txr->adapter = adapter;
2120 txr->me = i;
2121 txr->num_desc = adapter->num_tx_desc;
2122
2123 /* Initialize the TX side lock */
2124 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2125 device_get_nameunit(dev), txr->me);
2126 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2127
2128 if (ixgbe_dma_malloc(adapter, tsize,
2129 &txr->txdma, BUS_DMA_NOWAIT)) {
2130 device_printf(dev,
2131 "Unable to allocate TX Descriptor memory\n");
2132 error = ENOMEM;
2133 goto err_tx_desc;
2134 }
2135 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2136 bzero((void *)txr->tx_base, tsize);
2137
2138 /* Now allocate transmit buffers for the ring */
2139 if (ixgbe_allocate_transmit_buffers(txr)) {
2140 device_printf(dev,
2141 "Critical Failure setting up transmit buffers\n");
2142 error = ENOMEM;
2143 goto err_tx_desc;
2144 }
2145#ifndef IXGBE_LEGACY_TX
2146 /* Allocate a buf ring */
2147 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2148 M_WAITOK, &txr->tx_mtx);
2149 if (txr->br == NULL) {
2150 device_printf(dev,
2151 "Critical Failure setting up buf ring\n");
2152 error = ENOMEM;
2153 goto err_tx_desc;
2154 }
2155#endif
2156 }
2157
2158 /*
2159 * Next the RX queues...
2160 */
2161 rsize = roundup2(adapter->num_rx_desc *
2162 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2163 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2164 rxr = &adapter->rx_rings[i];
2165 /* Set up some basics */
2166 rxr->adapter = adapter;
2167 rxr->me = i;
2168 rxr->num_desc = adapter->num_rx_desc;
2169
2170 /* Initialize the RX side lock */
2171 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2172 device_get_nameunit(dev), rxr->me);
2173 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2174
2175 if (ixgbe_dma_malloc(adapter, rsize,
2176 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2177 device_printf(dev,
2178 "Unable to allocate RxDescriptor memory\n");
2179 error = ENOMEM;
2180 goto err_rx_desc;
2181 }
2182 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2183 bzero((void *)rxr->rx_base, rsize);
2184
2185 /* Allocate receive buffers for the ring*/
2186 if (ixgbe_allocate_receive_buffers(rxr)) {
2187 device_printf(dev,
2188 "Critical Failure setting up receive buffers\n");
2189 error = ENOMEM;
2190 goto err_rx_desc;
2191 }
2192 }
2193
2194 /*
2195 ** Finally set up the queue holding structs
2196 */
2197 for (int i = 0; i < adapter->num_queues; i++) {
2198 que = &adapter->queues[i];
2199 que->adapter = adapter;
2200 que->me = i;
2201 que->txr = &adapter->tx_rings[i];
2202 que->rxr = &adapter->rx_rings[i];
2203 }
2204
2205 return (0);
2206
2207err_rx_desc:
2208 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2209 ixgbe_dma_free(adapter, &rxr->rxdma);
2210err_tx_desc:
2211 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2212 ixgbe_dma_free(adapter, &txr->txdma);
2213 free(adapter->rx_rings, M_DEVBUF);
2214rx_fail:
2215 free(adapter->tx_rings, M_DEVBUF);
2216tx_fail:
2217 free(adapter->queues, M_DEVBUF);
2218fail:
2219 return (error);
2220}