Deleted Added
sdiff udiff text old ( 280182 ) new ( 283620 )
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2014, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 280182 2015-03-17 18:32:28Z jfv $*/
34
35
36#ifndef IXGBE_STANDALONE_BUILD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#include "opt_rss.h"
40#endif
41
42#include "ixgbe.h"
43
44#ifdef RSS
45#include <netinet/in_rss.h>
46#endif
47
48/*
49** HW RSC control:
50** this feature only works with
51** IPv4, and only on 82599 and later.
52** Also this will cause IP forwarding to
53** fail and that can't be controlled by
54** the stack as LRO can. For all these
55** reasons I've deemed it best to leave
56** this off and not bother with a tuneable
57** interface, this would need to be compiled
58** to enable.
59*/
60static bool ixgbe_rsc_enable = FALSE;
61
62#ifdef IXGBE_FDIR
63/*
64** For Flow Director: this is the
65** number of TX packets we sample
66** for the filter pool, this means
67** every 20th packet will be probed.
68**
69** This feature can be disabled by
70** setting this to 0.
71*/
72static int atr_sample_rate = 20;
73#endif
74
75/* Shared PCI config read/write */
76inline u16
77ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
78{
79 u16 value;
80
81 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
82 reg, 2);
83
84 return (value);
85}
86
87inline void
88ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
89{
90 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
91 reg, value, 2);
92
93 return;
94}
95
96/*********************************************************************
97 * Local Function prototypes
98 *********************************************************************/
99static void ixgbe_setup_transmit_ring(struct tx_ring *);
100static void ixgbe_free_transmit_buffers(struct tx_ring *);
101static int ixgbe_setup_receive_ring(struct rx_ring *);
102static void ixgbe_free_receive_buffers(struct rx_ring *);
103
104static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
105static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
107static int ixgbe_tx_ctx_setup(struct tx_ring *,
108 struct mbuf *, u32 *, u32 *);
109static int ixgbe_tso_setup(struct tx_ring *,
110 struct mbuf *, u32 *, u32 *);
111#ifdef IXGBE_FDIR
112static void ixgbe_atr(struct tx_ring *, struct mbuf *);
113#endif
114static __inline void ixgbe_rx_discard(struct rx_ring *, int);
115static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
116 struct mbuf *, u32);
117
118#ifdef IXGBE_LEGACY_TX
119/*********************************************************************
120 * Transmit entry point
121 *
122 * ixgbe_start is called by the stack to initiate a transmit.
123 * The driver will remain in this routine as long as there are
124 * packets to transmit and transmit resources are available.
125 * In case resources are not available stack is notified and
126 * the packet is requeued.
127 **********************************************************************/
128
129void
130ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
131{
132 struct mbuf *m_head;
133 struct adapter *adapter = txr->adapter;
134
135 IXGBE_TX_LOCK_ASSERT(txr);
136
137 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
138 return;
139 if (!adapter->link_active)
140 return;
141
142 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
143 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
144 break;
145
146 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
147 if (m_head == NULL)
148 break;
149
150 if (ixgbe_xmit(txr, &m_head)) {
151 if (m_head != NULL)
152 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
153 break;
154 }
155 /* Send a copy of the frame to the BPF listener */
156 ETHER_BPF_MTAP(ifp, m_head);
157 }
158 return;
159}
160
161/*
162 * Legacy TX start - called by the stack, this
163 * always uses the first tx ring, and should
164 * not be used with multiqueue tx enabled.
165 */
166void
167ixgbe_start(struct ifnet *ifp)
168{
169 struct adapter *adapter = ifp->if_softc;
170 struct tx_ring *txr = adapter->tx_rings;
171
172 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
173 IXGBE_TX_LOCK(txr);
174 ixgbe_start_locked(txr, ifp);
175 IXGBE_TX_UNLOCK(txr);
176 }
177 return;
178}
179
180#else /* ! IXGBE_LEGACY_TX */
181
182/*
183** Multiqueue Transmit driver
184**
185*/
186int
187ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
188{
189 struct adapter *adapter = ifp->if_softc;
190 struct ix_queue *que;
191 struct tx_ring *txr;
192 int i, err = 0;
193#ifdef RSS
194 uint32_t bucket_id;
195#endif
196
197 /*
198 * When doing RSS, map it to the same outbound queue
199 * as the incoming flow would be mapped to.
200 *
201 * If everything is setup correctly, it should be the
202 * same bucket that the current CPU we're on is.
203 */
204 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
205#ifdef RSS
206 if (rss_hash2bucket(m->m_pkthdr.flowid,
207 M_HASHTYPE_GET(m), &bucket_id) == 0)
208 /* TODO: spit out something if bucket_id > num_queues? */
209 i = bucket_id % adapter->num_queues;
210 else
211#endif
212 i = m->m_pkthdr.flowid % adapter->num_queues;
213 } else
214 i = curcpu % adapter->num_queues;
215
216 /* Check for a hung queue and pick alternative */
217 if (((1 << i) & adapter->active_queues) == 0)
218 i = ffsl(adapter->active_queues);
219
220 txr = &adapter->tx_rings[i];
221 que = &adapter->queues[i];
222
223 err = drbr_enqueue(ifp, txr->br, m);
224 if (err)
225 return (err);
226 if (IXGBE_TX_TRYLOCK(txr)) {
227 ixgbe_mq_start_locked(ifp, txr);
228 IXGBE_TX_UNLOCK(txr);
229 } else
230 taskqueue_enqueue(que->tq, &txr->txq_task);
231
232 return (0);
233}
234
235int
236ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
237{
238 struct adapter *adapter = txr->adapter;
239 struct mbuf *next;
240 int enqueued = 0, err = 0;
241
242 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
243 adapter->link_active == 0)
244 return (ENETDOWN);
245
246 /* Process the queue */
247#if __FreeBSD_version < 901504
248 next = drbr_dequeue(ifp, txr->br);
249 while (next != NULL) {
250 if ((err = ixgbe_xmit(txr, &next)) != 0) {
251 if (next != NULL)
252 err = drbr_enqueue(ifp, txr->br, next);
253#else
254 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
255 if ((err = ixgbe_xmit(txr, &next)) != 0) {
256 if (next == NULL) {
257 drbr_advance(ifp, txr->br);
258 } else {
259 drbr_putback(ifp, txr->br, next);
260 }
261#endif
262 break;
263 }
264#if __FreeBSD_version >= 901504
265 drbr_advance(ifp, txr->br);
266#endif
267 enqueued++;
268#if 0 // this is VF-only
269#if __FreeBSD_version >= 1100036
270 if (next->m_flags & M_MCAST)
271 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
272#endif
273#endif
274 /* Send a copy of the frame to the BPF listener */
275 ETHER_BPF_MTAP(ifp, next);
276 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
277 break;
278#if __FreeBSD_version < 901504
279 next = drbr_dequeue(ifp, txr->br);
280#endif
281 }
282
283 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
284 ixgbe_txeof(txr);
285
286 return (err);
287}
288
289/*
290 * Called from a taskqueue to drain queued transmit packets.
291 */
292void
293ixgbe_deferred_mq_start(void *arg, int pending)
294{
295 struct tx_ring *txr = arg;
296 struct adapter *adapter = txr->adapter;
297 struct ifnet *ifp = adapter->ifp;
298
299 IXGBE_TX_LOCK(txr);
300 if (!drbr_empty(ifp, txr->br))
301 ixgbe_mq_start_locked(ifp, txr);
302 IXGBE_TX_UNLOCK(txr);
303}
304
305/*
306** Flush all ring buffers
307*/
308void
309ixgbe_qflush(struct ifnet *ifp)
310{
311 struct adapter *adapter = ifp->if_softc;
312 struct tx_ring *txr = adapter->tx_rings;
313 struct mbuf *m;
314
315 for (int i = 0; i < adapter->num_queues; i++, txr++) {
316 IXGBE_TX_LOCK(txr);
317 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
318 m_freem(m);
319 IXGBE_TX_UNLOCK(txr);
320 }
321 if_qflush(ifp);
322}
323#endif /* IXGBE_LEGACY_TX */
324
325
326/*********************************************************************
327 *
328 * This routine maps the mbufs to tx descriptors, allowing the
329 * TX engine to transmit the packets.
330 * - return 0 on success, positive on failure
331 *
332 **********************************************************************/
333
334static int
335ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
336{
337 struct adapter *adapter = txr->adapter;
338 u32 olinfo_status = 0, cmd_type_len;
339 int i, j, error, nsegs;
340 int first;
341 bool remap = TRUE;
342 struct mbuf *m_head;
343 bus_dma_segment_t segs[adapter->num_segs];
344 bus_dmamap_t map;
345 struct ixgbe_tx_buf *txbuf;
346 union ixgbe_adv_tx_desc *txd = NULL;
347
348 m_head = *m_headp;
349
350 /* Basic descriptor defines */
351 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
352 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
353
354 if (m_head->m_flags & M_VLANTAG)
355 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
356
357 /*
358 * Important to capture the first descriptor
359 * used because it will contain the index of
360 * the one we tell the hardware to report back
361 */
362 first = txr->next_avail_desc;
363 txbuf = &txr->tx_buffers[first];
364 map = txbuf->map;
365
366 /*
367 * Map the packet for DMA.
368 */
369retry:
370 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
371 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
372
373 if (__predict_false(error)) {
374 struct mbuf *m;
375
376 switch (error) {
377 case EFBIG:
378 /* Try it again? - one try */
379 if (remap == TRUE) {
380 remap = FALSE;
381 m = m_defrag(*m_headp, M_NOWAIT);
382 if (m == NULL) {
383 adapter->mbuf_defrag_failed++;
384 m_freem(*m_headp);
385 *m_headp = NULL;
386 return (ENOBUFS);
387 }
388 *m_headp = m;
389 goto retry;
390 } else
391 return (error);
392 case ENOMEM:
393 txr->no_tx_dma_setup++;
394 return (error);
395 default:
396 txr->no_tx_dma_setup++;
397 m_freem(*m_headp);
398 *m_headp = NULL;
399 return (error);
400 }
401 }
402
403 /* Make certain there are enough descriptors */
404 if (nsegs > txr->tx_avail - 2) {
405 txr->no_desc_avail++;
406 bus_dmamap_unload(txr->txtag, map);
407 return (ENOBUFS);
408 }
409 m_head = *m_headp;
410
411 /*
412 ** Set up the appropriate offload context
413 ** this will consume the first descriptor
414 */
415 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
416 if (__predict_false(error)) {
417 if (error == ENOBUFS)
418 *m_headp = NULL;
419 return (error);
420 }
421
422#ifdef IXGBE_FDIR
423 /* Do the flow director magic */
424 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
425 ++txr->atr_count;
426 if (txr->atr_count >= atr_sample_rate) {
427 ixgbe_atr(txr, m_head);
428 txr->atr_count = 0;
429 }
430 }
431#endif
432
433 olinfo_status |= IXGBE_ADVTXD_CC;
434 i = txr->next_avail_desc;
435 for (j = 0; j < nsegs; j++) {
436 bus_size_t seglen;
437 bus_addr_t segaddr;
438
439 txbuf = &txr->tx_buffers[i];
440 txd = &txr->tx_base[i];
441 seglen = segs[j].ds_len;
442 segaddr = htole64(segs[j].ds_addr);
443
444 txd->read.buffer_addr = segaddr;
445 txd->read.cmd_type_len = htole32(txr->txd_cmd |
446 cmd_type_len |seglen);
447 txd->read.olinfo_status = htole32(olinfo_status);
448
449 if (++i == txr->num_desc)
450 i = 0;
451 }
452
453 txd->read.cmd_type_len |=
454 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
455 txr->tx_avail -= nsegs;
456 txr->next_avail_desc = i;
457
458 txbuf->m_head = m_head;
459 /*
460 ** Here we swap the map so the last descriptor,
461 ** which gets the completion interrupt has the
462 ** real map, and the first descriptor gets the
463 ** unused map from this descriptor.
464 */
465 txr->tx_buffers[first].map = txbuf->map;
466 txbuf->map = map;
467 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
468
469 /* Set the EOP descriptor that will be marked done */
470 txbuf = &txr->tx_buffers[first];
471 txbuf->eop = txd;
472
473 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
474 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
475 /*
476 * Advance the Transmit Descriptor Tail (Tdt), this tells the
477 * hardware that this frame is available to transmit.
478 */
479 ++txr->total_packets;
480 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
481
482 /* Mark queue as having work */
483 if (txr->busy == 0)
484 txr->busy = 1;
485
486 return (0);
487
488}
489
490
491/*********************************************************************
492 *
493 * Allocate memory for tx_buffer structures. The tx_buffer stores all
494 * the information needed to transmit a packet on the wire. This is
495 * called only once at attach, setup is done every reset.
496 *
497 **********************************************************************/
498int
499ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500{
501 struct adapter *adapter = txr->adapter;
502 device_t dev = adapter->dev;
503 struct ixgbe_tx_buf *txbuf;
504 int error, i;
505
506 /*
507 * Setup DMA descriptor areas.
508 */
509 if ((error = bus_dma_tag_create(
510 bus_get_dma_tag(adapter->dev), /* parent */
511 1, 0, /* alignment, bounds */
512 BUS_SPACE_MAXADDR, /* lowaddr */
513 BUS_SPACE_MAXADDR, /* highaddr */
514 NULL, NULL, /* filter, filterarg */
515 IXGBE_TSO_SIZE, /* maxsize */
516 adapter->num_segs, /* nsegments */
517 PAGE_SIZE, /* maxsegsize */
518 0, /* flags */
519 NULL, /* lockfunc */
520 NULL, /* lockfuncarg */
521 &txr->txtag))) {
522 device_printf(dev,"Unable to allocate TX DMA tag\n");
523 goto fail;
524 }
525
526 if (!(txr->tx_buffers =
527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 device_printf(dev, "Unable to allocate tx_buffer memory\n");
530 error = ENOMEM;
531 goto fail;
532 }
533
534 /* Create the descriptor buffer dma maps */
535 txbuf = txr->tx_buffers;
536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538 if (error != 0) {
539 device_printf(dev, "Unable to create TX DMA map\n");
540 goto fail;
541 }
542 }
543
544 return 0;
545fail:
546 /* We free all, it handles case where we are in the middle */
547 ixgbe_free_transmit_structures(adapter);
548 return (error);
549}
550
551/*********************************************************************
552 *
553 * Initialize a transmit ring.
554 *
555 **********************************************************************/
556static void
557ixgbe_setup_transmit_ring(struct tx_ring *txr)
558{
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
561 int i;
562#ifdef DEV_NETMAP
563 struct netmap_adapter *na = NA(adapter->ifp);
564 struct netmap_slot *slot;
565#endif /* DEV_NETMAP */
566
567 /* Clear the old ring contents */
568 IXGBE_TX_LOCK(txr);
569#ifdef DEV_NETMAP
570 /*
571 * (under lock): if in netmap mode, do some consistency
572 * checks and set slot to entry 0 of the netmap ring.
573 */
574 slot = netmap_reset(na, NR_TX, txr->me, 0);
575#endif /* DEV_NETMAP */
576 bzero((void *)txr->tx_base,
577 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578 /* Reset indices */
579 txr->next_avail_desc = 0;
580 txr->next_to_clean = 0;
581
582 /* Free any existing tx buffers. */
583 txbuf = txr->tx_buffers;
584 for (i = 0; i < txr->num_desc; i++, txbuf++) {
585 if (txbuf->m_head != NULL) {
586 bus_dmamap_sync(txr->txtag, txbuf->map,
587 BUS_DMASYNC_POSTWRITE);
588 bus_dmamap_unload(txr->txtag, txbuf->map);
589 m_freem(txbuf->m_head);
590 txbuf->m_head = NULL;
591 }
592#ifdef DEV_NETMAP
593 /*
594 * In netmap mode, set the map for the packet buffer.
595 * NOTE: Some drivers (not this one) also need to set
596 * the physical buffer address in the NIC ring.
597 * Slots in the netmap ring (indexed by "si") are
598 * kring->nkr_hwofs positions "ahead" wrt the
599 * corresponding slot in the NIC ring. In some drivers
600 * (not here) nkr_hwofs can be negative. Function
601 * netmap_idx_n2k() handles wraparounds properly.
602 */
603 if (slot) {
604 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
606 }
607#endif /* DEV_NETMAP */
608 /* Clear the EOP descriptor pointer */
609 txbuf->eop = NULL;
610 }
611
612#ifdef IXGBE_FDIR
613 /* Set the rate at which we sample packets */
614 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 txr->atr_sample = atr_sample_rate;
616#endif
617
618 /* Set number of descriptors available */
619 txr->tx_avail = adapter->num_tx_desc;
620
621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 IXGBE_TX_UNLOCK(txr);
624}
625
626/*********************************************************************
627 *
628 * Initialize all transmit rings.
629 *
630 **********************************************************************/
631int
632ixgbe_setup_transmit_structures(struct adapter *adapter)
633{
634 struct tx_ring *txr = adapter->tx_rings;
635
636 for (int i = 0; i < adapter->num_queues; i++, txr++)
637 ixgbe_setup_transmit_ring(txr);
638
639 return (0);
640}
641
642/*********************************************************************
643 *
644 * Free all transmit rings.
645 *
646 **********************************************************************/
647void
648ixgbe_free_transmit_structures(struct adapter *adapter)
649{
650 struct tx_ring *txr = adapter->tx_rings;
651
652 for (int i = 0; i < adapter->num_queues; i++, txr++) {
653 IXGBE_TX_LOCK(txr);
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_UNLOCK(txr);
657 IXGBE_TX_LOCK_DESTROY(txr);
658 }
659 free(adapter->tx_rings, M_DEVBUF);
660}
661
662/*********************************************************************
663 *
664 * Free transmit ring related data structures.
665 *
666 **********************************************************************/
667static void
668ixgbe_free_transmit_buffers(struct tx_ring *txr)
669{
670 struct adapter *adapter = txr->adapter;
671 struct ixgbe_tx_buf *tx_buffer;
672 int i;
673
674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676 if (txr->tx_buffers == NULL)
677 return;
678
679 tx_buffer = txr->tx_buffers;
680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 if (tx_buffer->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 BUS_DMASYNC_POSTWRITE);
684 bus_dmamap_unload(txr->txtag,
685 tx_buffer->map);
686 m_freem(tx_buffer->m_head);
687 tx_buffer->m_head = NULL;
688 if (tx_buffer->map != NULL) {
689 bus_dmamap_destroy(txr->txtag,
690 tx_buffer->map);
691 tx_buffer->map = NULL;
692 }
693 } else if (tx_buffer->map != NULL) {
694 bus_dmamap_unload(txr->txtag,
695 tx_buffer->map);
696 bus_dmamap_destroy(txr->txtag,
697 tx_buffer->map);
698 tx_buffer->map = NULL;
699 }
700 }
701#ifdef IXGBE_LEGACY_TX
702 if (txr->br != NULL)
703 buf_ring_free(txr->br, M_DEVBUF);
704#endif
705 if (txr->tx_buffers != NULL) {
706 free(txr->tx_buffers, M_DEVBUF);
707 txr->tx_buffers = NULL;
708 }
709 if (txr->txtag != NULL) {
710 bus_dma_tag_destroy(txr->txtag);
711 txr->txtag = NULL;
712 }
713 return;
714}
715
716/*********************************************************************
717 *
718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
719 *
720 **********************************************************************/
721
722static int
723ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724 u32 *cmd_type_len, u32 *olinfo_status)
725{
726 struct ixgbe_adv_tx_context_desc *TXD;
727 struct ether_vlan_header *eh;
728 struct ip *ip;
729 struct ip6_hdr *ip6;
730 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
731 int ehdrlen, ip_hlen = 0;
732 u16 etype;
733 u8 ipproto = 0;
734 int offload = TRUE;
735 int ctxd = txr->next_avail_desc;
736 u16 vtag = 0;
737
738 /* First check if TSO is to be used */
739 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
740 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
741
742 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
743 offload = FALSE;
744
745 /* Indicate the whole packet as payload when not doing TSO */
746 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
747
748 /* Now ready a context descriptor */
749 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
750
751 /*
752 ** In advanced descriptors the vlan tag must
753 ** be placed into the context descriptor. Hence
754 ** we need to make one even if not doing offloads.
755 */
756 if (mp->m_flags & M_VLANTAG) {
757 vtag = htole16(mp->m_pkthdr.ether_vtag);
758 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
759 }
760
761 /*
762 * Determine where frame payload starts.
763 * Jump over vlan headers if already present,
764 * helpful for QinQ too.
765 */
766 eh = mtod(mp, struct ether_vlan_header *);
767 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
768 etype = ntohs(eh->evl_proto);
769 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
770 } else {
771 etype = ntohs(eh->evl_encap_proto);
772 ehdrlen = ETHER_HDR_LEN;
773 }
774
775 /* Set the ether header length */
776 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
777
778 if (offload == FALSE)
779 goto no_offloads;
780
781 switch (etype) {
782 case ETHERTYPE_IP:
783 ip = (struct ip *)(mp->m_data + ehdrlen);
784 ip_hlen = ip->ip_hl << 2;
785 ipproto = ip->ip_p;
786 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
787 break;
788 case ETHERTYPE_IPV6:
789 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
790 ip_hlen = sizeof(struct ip6_hdr);
791 /* XXX-BZ this will go badly in case of ext hdrs. */
792 ipproto = ip6->ip6_nxt;
793 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
794 break;
795 default:
796 offload = FALSE;
797 break;
798 }
799
800 vlan_macip_lens |= ip_hlen;
801
802 switch (ipproto) {
803 case IPPROTO_TCP:
804 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
805 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
806 break;
807
808 case IPPROTO_UDP:
809 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
810 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
811 break;
812
813#if __FreeBSD_version >= 800000
814 case IPPROTO_SCTP:
815 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
816 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
817 break;
818#endif
819 default:
820 offload = FALSE;
821 break;
822 }
823
824 if (offload) /* For the TX descriptor setup */
825 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
826
827no_offloads:
828 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
829
830 /* Now copy bits into descriptor */
831 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
832 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
833 TXD->seqnum_seed = htole32(0);
834 TXD->mss_l4len_idx = htole32(0);
835
836 /* We've consumed the first desc, adjust counters */
837 if (++ctxd == txr->num_desc)
838 ctxd = 0;
839 txr->next_avail_desc = ctxd;
840 --txr->tx_avail;
841
842 return (0);
843}
844
845/**********************************************************************
846 *
847 * Setup work for hardware segmentation offload (TSO) on
848 * adapters using advanced tx descriptors
849 *
850 **********************************************************************/
851static int
852ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
853 u32 *cmd_type_len, u32 *olinfo_status)
854{
855 struct ixgbe_adv_tx_context_desc *TXD;
856 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
857 u32 mss_l4len_idx = 0, paylen;
858 u16 vtag = 0, eh_type;
859 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
860 struct ether_vlan_header *eh;
861#ifdef INET6
862 struct ip6_hdr *ip6;
863#endif
864#ifdef INET
865 struct ip *ip;
866#endif
867 struct tcphdr *th;
868
869
870 /*
871 * Determine where frame payload starts.
872 * Jump over vlan headers if already present
873 */
874 eh = mtod(mp, struct ether_vlan_header *);
875 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
876 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
877 eh_type = eh->evl_proto;
878 } else {
879 ehdrlen = ETHER_HDR_LEN;
880 eh_type = eh->evl_encap_proto;
881 }
882
883 switch (ntohs(eh_type)) {
884#ifdef INET6
885 case ETHERTYPE_IPV6:
886 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
887 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
888 if (ip6->ip6_nxt != IPPROTO_TCP)
889 return (ENXIO);
890 ip_hlen = sizeof(struct ip6_hdr);
891 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
892 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
893 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
894 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
895 break;
896#endif
897#ifdef INET
898 case ETHERTYPE_IP:
899 ip = (struct ip *)(mp->m_data + ehdrlen);
900 if (ip->ip_p != IPPROTO_TCP)
901 return (ENXIO);
902 ip->ip_sum = 0;
903 ip_hlen = ip->ip_hl << 2;
904 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
905 th->th_sum = in_pseudo(ip->ip_src.s_addr,
906 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
907 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
908 /* Tell transmit desc to also do IPv4 checksum. */
909 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
910 break;
911#endif
912 default:
913 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
914 __func__, ntohs(eh_type));
915 break;
916 }
917
918 ctxd = txr->next_avail_desc;
919 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
920
921 tcp_hlen = th->th_off << 2;
922
923 /* This is used in the transmit desc in encap */
924 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
925
926 /* VLAN MACLEN IPLEN */
927 if (mp->m_flags & M_VLANTAG) {
928 vtag = htole16(mp->m_pkthdr.ether_vtag);
929 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
930 }
931
932 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
933 vlan_macip_lens |= ip_hlen;
934 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
935
936 /* ADV DTYPE TUCMD */
937 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
938 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
939 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
940
941 /* MSS L4LEN IDX */
942 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
943 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
944 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
945
946 TXD->seqnum_seed = htole32(0);
947
948 if (++ctxd == txr->num_desc)
949 ctxd = 0;
950
951 txr->tx_avail--;
952 txr->next_avail_desc = ctxd;
953 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
954 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
955 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
956 ++txr->tso_tx;
957 return (0);
958}
959
960
961/**********************************************************************
962 *
963 * Examine each tx_buffer in the used queue. If the hardware is done
964 * processing the packet then free associated resources. The
965 * tx_buffer is put back on the free queue.
966 *
967 **********************************************************************/
968void
969ixgbe_txeof(struct tx_ring *txr)
970{
971#ifdef DEV_NETMAP
972 struct adapter *adapter = txr->adapter;
973 struct ifnet *ifp = adapter->ifp;
974#endif
975 u32 work, processed = 0;
976 u16 limit = txr->process_limit;
977 struct ixgbe_tx_buf *buf;
978 union ixgbe_adv_tx_desc *txd;
979
980 mtx_assert(&txr->tx_mtx, MA_OWNED);
981
982#ifdef DEV_NETMAP
983 if (ifp->if_capenable & IFCAP_NETMAP) {
984 struct netmap_adapter *na = NA(ifp);
985 struct netmap_kring *kring = &na->tx_rings[txr->me];
986 txd = txr->tx_base;
987 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
988 BUS_DMASYNC_POSTREAD);
989 /*
990 * In netmap mode, all the work is done in the context
991 * of the client thread. Interrupt handlers only wake up
992 * clients, which may be sleeping on individual rings
993 * or on a global resource for all rings.
994 * To implement tx interrupt mitigation, we wake up the client
995 * thread roughly every half ring, even if the NIC interrupts
996 * more frequently. This is implemented as follows:
997 * - ixgbe_txsync() sets kring->nr_kflags with the index of
998 * the slot that should wake up the thread (nkr_num_slots
999 * means the user thread should not be woken up);
1000 * - the driver ignores tx interrupts unless netmap_mitigate=0
1001 * or the slot has the DD bit set.
1002 */
1003 if (!netmap_mitigate ||
1004 (kring->nr_kflags < kring->nkr_num_slots &&
1005 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1006 netmap_tx_irq(ifp, txr->me);
1007 }
1008 return;
1009 }
1010#endif /* DEV_NETMAP */
1011
1012 if (txr->tx_avail == txr->num_desc) {
1013 txr->busy = 0;
1014 return;
1015 }
1016
1017 /* Get work starting point */
1018 work = txr->next_to_clean;
1019 buf = &txr->tx_buffers[work];
1020 txd = &txr->tx_base[work];
1021 work -= txr->num_desc; /* The distance to ring end */
1022 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1023 BUS_DMASYNC_POSTREAD);
1024
1025 do {
1026 union ixgbe_adv_tx_desc *eop= buf->eop;
1027 if (eop == NULL) /* No work */
1028 break;
1029
1030 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1031 break; /* I/O not complete */
1032
1033 if (buf->m_head) {
1034 txr->bytes +=
1035 buf->m_head->m_pkthdr.len;
1036 bus_dmamap_sync(txr->txtag,
1037 buf->map,
1038 BUS_DMASYNC_POSTWRITE);
1039 bus_dmamap_unload(txr->txtag,
1040 buf->map);
1041 m_freem(buf->m_head);
1042 buf->m_head = NULL;
1043 buf->map = NULL;
1044 }
1045 buf->eop = NULL;
1046 ++txr->tx_avail;
1047
1048 /* We clean the range if multi segment */
1049 while (txd != eop) {
1050 ++txd;
1051 ++buf;
1052 ++work;
1053 /* wrap the ring? */
1054 if (__predict_false(!work)) {
1055 work -= txr->num_desc;
1056 buf = txr->tx_buffers;
1057 txd = txr->tx_base;
1058 }
1059 if (buf->m_head) {
1060 txr->bytes +=
1061 buf->m_head->m_pkthdr.len;
1062 bus_dmamap_sync(txr->txtag,
1063 buf->map,
1064 BUS_DMASYNC_POSTWRITE);
1065 bus_dmamap_unload(txr->txtag,
1066 buf->map);
1067 m_freem(buf->m_head);
1068 buf->m_head = NULL;
1069 buf->map = NULL;
1070 }
1071 ++txr->tx_avail;
1072 buf->eop = NULL;
1073
1074 }
1075 ++txr->packets;
1076 ++processed;
1077
1078 /* Try the next packet */
1079 ++txd;
1080 ++buf;
1081 ++work;
1082 /* reset with a wrap */
1083 if (__predict_false(!work)) {
1084 work -= txr->num_desc;
1085 buf = txr->tx_buffers;
1086 txd = txr->tx_base;
1087 }
1088 prefetch(txd);
1089 } while (__predict_true(--limit));
1090
1091 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1092 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1093
1094 work += txr->num_desc;
1095 txr->next_to_clean = work;
1096
1097 /*
1098 ** Queue Hang detection, we know there's
1099 ** work outstanding or the first return
1100 ** would have been taken, so increment busy
1101 ** if nothing managed to get cleaned, then
1102 ** in local_timer it will be checked and
1103 ** marked as HUNG if it exceeds a MAX attempt.
1104 */
1105 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1106 ++txr->busy;
1107 /*
1108 ** If anything gets cleaned we reset state to 1,
1109 ** note this will turn off HUNG if its set.
1110 */
1111 if (processed)
1112 txr->busy = 1;
1113
1114 if (txr->tx_avail == txr->num_desc)
1115 txr->busy = 0;
1116
1117 return;
1118}
1119
1120
1121#ifdef IXGBE_FDIR
1122/*
1123** This routine parses packet headers so that Flow
1124** Director can make a hashed filter table entry
1125** allowing traffic flows to be identified and kept
1126** on the same cpu. This would be a performance
1127** hit, but we only do it at IXGBE_FDIR_RATE of
1128** packets.
1129*/
1130static void
1131ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1132{
1133 struct adapter *adapter = txr->adapter;
1134 struct ix_queue *que;
1135 struct ip *ip;
1136 struct tcphdr *th;
1137 struct udphdr *uh;
1138 struct ether_vlan_header *eh;
1139 union ixgbe_atr_hash_dword input = {.dword = 0};
1140 union ixgbe_atr_hash_dword common = {.dword = 0};
1141 int ehdrlen, ip_hlen;
1142 u16 etype;
1143
1144 eh = mtod(mp, struct ether_vlan_header *);
1145 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1146 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1147 etype = eh->evl_proto;
1148 } else {
1149 ehdrlen = ETHER_HDR_LEN;
1150 etype = eh->evl_encap_proto;
1151 }
1152
1153 /* Only handling IPv4 */
1154 if (etype != htons(ETHERTYPE_IP))
1155 return;
1156
1157 ip = (struct ip *)(mp->m_data + ehdrlen);
1158 ip_hlen = ip->ip_hl << 2;
1159
1160 /* check if we're UDP or TCP */
1161 switch (ip->ip_p) {
1162 case IPPROTO_TCP:
1163 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1164 /* src and dst are inverted */
1165 common.port.dst ^= th->th_sport;
1166 common.port.src ^= th->th_dport;
1167 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1168 break;
1169 case IPPROTO_UDP:
1170 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1171 /* src and dst are inverted */
1172 common.port.dst ^= uh->uh_sport;
1173 common.port.src ^= uh->uh_dport;
1174 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1175 break;
1176 default:
1177 return;
1178 }
1179
1180 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1181 if (mp->m_pkthdr.ether_vtag)
1182 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1183 else
1184 common.flex_bytes ^= etype;
1185 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1186
1187 que = &adapter->queues[txr->me];
1188 /*
1189 ** This assumes the Rx queue and Tx
1190 ** queue are bound to the same CPU
1191 */
1192 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1193 input, common, que->msix);
1194}
1195#endif /* IXGBE_FDIR */
1196
1197/*
1198** Used to detect a descriptor that has
1199** been merged by Hardware RSC.
1200*/
1201static inline u32
1202ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1203{
1204 return (le32toh(rx->wb.lower.lo_dword.data) &
1205 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1206}
1207
1208/*********************************************************************
1209 *
1210 * Initialize Hardware RSC (LRO) feature on 82599
1211 * for an RX ring, this is toggled by the LRO capability
1212 * even though it is transparent to the stack.
1213 *
1214 * NOTE: since this HW feature only works with IPV4 and
1215 * our testing has shown soft LRO to be as effective
1216 * I have decided to disable this by default.
1217 *
1218 **********************************************************************/
1219static void
1220ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1221{
1222 struct adapter *adapter = rxr->adapter;
1223 struct ixgbe_hw *hw = &adapter->hw;
1224 u32 rscctrl, rdrxctl;
1225
1226 /* If turning LRO/RSC off we need to disable it */
1227 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1228 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1229 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1230 return;
1231 }
1232
1233 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1234 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1235#ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1236 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1237#endif /* DEV_NETMAP */
1238 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1239 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1240 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1241
1242 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1243 rscctrl |= IXGBE_RSCCTL_RSCEN;
1244 /*
1245 ** Limit the total number of descriptors that
1246 ** can be combined, so it does not exceed 64K
1247 */
1248 if (rxr->mbuf_sz == MCLBYTES)
1249 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1250 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1251 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1252 else if (rxr->mbuf_sz == MJUM9BYTES)
1253 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1254 else /* Using 16K cluster */
1255 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1256
1257 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1258
1259 /* Enable TCP header recognition */
1260 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1261 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1262 IXGBE_PSRTYPE_TCPHDR));
1263
1264 /* Disable RSC for ACK packets */
1265 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1266 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1267
1268 rxr->hw_rsc = TRUE;
1269}
1270/*********************************************************************
1271 *
1272 * Refresh mbuf buffers for RX descriptor rings
1273 * - now keeps its own state so discards due to resource
1274 * exhaustion are unnecessary, if an mbuf cannot be obtained
1275 * it just returns, keeping its placeholder, thus it can simply
1276 * be recalled to try again.
1277 *
1278 **********************************************************************/
1279static void
1280ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1281{
1282 struct adapter *adapter = rxr->adapter;
1283 bus_dma_segment_t seg[1];
1284 struct ixgbe_rx_buf *rxbuf;
1285 struct mbuf *mp;
1286 int i, j, nsegs, error;
1287 bool refreshed = FALSE;
1288
1289 i = j = rxr->next_to_refresh;
1290 /* Control the loop with one beyond */
1291 if (++j == rxr->num_desc)
1292 j = 0;
1293
1294 while (j != limit) {
1295 rxbuf = &rxr->rx_buffers[i];
1296 if (rxbuf->buf == NULL) {
1297 mp = m_getjcl(M_NOWAIT, MT_DATA,
1298 M_PKTHDR, rxr->mbuf_sz);
1299 if (mp == NULL)
1300 goto update;
1301 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1302 m_adj(mp, ETHER_ALIGN);
1303 } else
1304 mp = rxbuf->buf;
1305
1306 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1307
1308 /* If we're dealing with an mbuf that was copied rather
1309 * than replaced, there's no need to go through busdma.
1310 */
1311 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1312 /* Get the memory mapping */
1313 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1314 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1315 if (error != 0) {
1316 printf("Refresh mbufs: payload dmamap load"
1317 " failure - %d\n", error);
1318 m_free(mp);
1319 rxbuf->buf = NULL;
1320 goto update;
1321 }
1322 rxbuf->buf = mp;
1323 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1324 BUS_DMASYNC_PREREAD);
1325 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1326 htole64(seg[0].ds_addr);
1327 } else {
1328 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1329 rxbuf->flags &= ~IXGBE_RX_COPY;
1330 }
1331
1332 refreshed = TRUE;
1333 /* Next is precalculated */
1334 i = j;
1335 rxr->next_to_refresh = i;
1336 if (++j == rxr->num_desc)
1337 j = 0;
1338 }
1339update:
1340 if (refreshed) /* Update hardware tail index */
1341 IXGBE_WRITE_REG(&adapter->hw,
1342 rxr->tail, rxr->next_to_refresh);
1343 return;
1344}
1345
1346/*********************************************************************
1347 *
1348 * Allocate memory for rx_buffer structures. Since we use one
1349 * rx_buffer per received packet, the maximum number of rx_buffer's
1350 * that we'll need is equal to the number of receive descriptors
1351 * that we've allocated.
1352 *
1353 **********************************************************************/
1354int
1355ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1356{
1357 struct adapter *adapter = rxr->adapter;
1358 device_t dev = adapter->dev;
1359 struct ixgbe_rx_buf *rxbuf;
1360 int i, bsize, error;
1361
1362 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1363 if (!(rxr->rx_buffers =
1364 (struct ixgbe_rx_buf *) malloc(bsize,
1365 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1366 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1367 error = ENOMEM;
1368 goto fail;
1369 }
1370
1371 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1372 1, 0, /* alignment, bounds */
1373 BUS_SPACE_MAXADDR, /* lowaddr */
1374 BUS_SPACE_MAXADDR, /* highaddr */
1375 NULL, NULL, /* filter, filterarg */
1376 MJUM16BYTES, /* maxsize */
1377 1, /* nsegments */
1378 MJUM16BYTES, /* maxsegsize */
1379 0, /* flags */
1380 NULL, /* lockfunc */
1381 NULL, /* lockfuncarg */
1382 &rxr->ptag))) {
1383 device_printf(dev, "Unable to create RX DMA tag\n");
1384 goto fail;
1385 }
1386
1387 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1388 rxbuf = &rxr->rx_buffers[i];
1389 error = bus_dmamap_create(rxr->ptag,
1390 BUS_DMA_NOWAIT, &rxbuf->pmap);
1391 if (error) {
1392 device_printf(dev, "Unable to create RX dma map\n");
1393 goto fail;
1394 }
1395 }
1396
1397 return (0);
1398
1399fail:
1400 /* Frees all, but can handle partial completion */
1401 ixgbe_free_receive_structures(adapter);
1402 return (error);
1403}
1404
1405
1406static void
1407ixgbe_free_receive_ring(struct rx_ring *rxr)
1408{
1409 struct ixgbe_rx_buf *rxbuf;
1410 int i;
1411
1412 for (i = 0; i < rxr->num_desc; i++) {
1413 rxbuf = &rxr->rx_buffers[i];
1414 if (rxbuf->buf != NULL) {
1415 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1416 BUS_DMASYNC_POSTREAD);
1417 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1418 rxbuf->buf->m_flags |= M_PKTHDR;
1419 m_freem(rxbuf->buf);
1420 rxbuf->buf = NULL;
1421 rxbuf->flags = 0;
1422 }
1423 }
1424}
1425
1426
1427/*********************************************************************
1428 *
1429 * Initialize a receive ring and its buffers.
1430 *
1431 **********************************************************************/
1432static int
1433ixgbe_setup_receive_ring(struct rx_ring *rxr)
1434{
1435 struct adapter *adapter;
1436 struct ifnet *ifp;
1437 device_t dev;
1438 struct ixgbe_rx_buf *rxbuf;
1439 bus_dma_segment_t seg[1];
1440 struct lro_ctrl *lro = &rxr->lro;
1441 int rsize, nsegs, error = 0;
1442#ifdef DEV_NETMAP
1443 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1444 struct netmap_slot *slot;
1445#endif /* DEV_NETMAP */
1446
1447 adapter = rxr->adapter;
1448 ifp = adapter->ifp;
1449 dev = adapter->dev;
1450
1451 /* Clear the ring contents */
1452 IXGBE_RX_LOCK(rxr);
1453#ifdef DEV_NETMAP
1454 /* same as in ixgbe_setup_transmit_ring() */
1455 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1456#endif /* DEV_NETMAP */
1457 rsize = roundup2(adapter->num_rx_desc *
1458 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1459 bzero((void *)rxr->rx_base, rsize);
1460 /* Cache the size */
1461 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1462
1463 /* Free current RX buffer structs and their mbufs */
1464 ixgbe_free_receive_ring(rxr);
1465
1466 /* Now replenish the mbufs */
1467 for (int j = 0; j != rxr->num_desc; ++j) {
1468 struct mbuf *mp;
1469
1470 rxbuf = &rxr->rx_buffers[j];
1471#ifdef DEV_NETMAP
1472 /*
1473 * In netmap mode, fill the map and set the buffer
1474 * address in the NIC ring, considering the offset
1475 * between the netmap and NIC rings (see comment in
1476 * ixgbe_setup_transmit_ring() ). No need to allocate
1477 * an mbuf, so end the block with a continue;
1478 */
1479 if (slot) {
1480 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1481 uint64_t paddr;
1482 void *addr;
1483
1484 addr = PNMB(na, slot + sj, &paddr);
1485 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1486 /* Update descriptor and the cached value */
1487 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1488 rxbuf->addr = htole64(paddr);
1489 continue;
1490 }
1491#endif /* DEV_NETMAP */
1492 rxbuf->flags = 0;
1493 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1494 M_PKTHDR, adapter->rx_mbuf_sz);
1495 if (rxbuf->buf == NULL) {
1496 error = ENOBUFS;
1497 goto fail;
1498 }
1499 mp = rxbuf->buf;
1500 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1501 /* Get the memory mapping */
1502 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1503 rxbuf->pmap, mp, seg,
1504 &nsegs, BUS_DMA_NOWAIT);
1505 if (error != 0)
1506 goto fail;
1507 bus_dmamap_sync(rxr->ptag,
1508 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1509 /* Update the descriptor and the cached value */
1510 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1511 rxbuf->addr = htole64(seg[0].ds_addr);
1512 }
1513
1514
1515 /* Setup our descriptor indices */
1516 rxr->next_to_check = 0;
1517 rxr->next_to_refresh = 0;
1518 rxr->lro_enabled = FALSE;
1519 rxr->rx_copies = 0;
1520 rxr->rx_bytes = 0;
1521 rxr->vtag_strip = FALSE;
1522
1523 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1524 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1525
1526 /*
1527 ** Now set up the LRO interface:
1528 */
1529 if (ixgbe_rsc_enable)
1530 ixgbe_setup_hw_rsc(rxr);
1531 else if (ifp->if_capenable & IFCAP_LRO) {
1532 int err = tcp_lro_init(lro);
1533 if (err) {
1534 device_printf(dev, "LRO Initialization failed!\n");
1535 goto fail;
1536 }
1537 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1538 rxr->lro_enabled = TRUE;
1539 lro->ifp = adapter->ifp;
1540 }
1541
1542 IXGBE_RX_UNLOCK(rxr);
1543 return (0);
1544
1545fail:
1546 ixgbe_free_receive_ring(rxr);
1547 IXGBE_RX_UNLOCK(rxr);
1548 return (error);
1549}
1550
1551/*********************************************************************
1552 *
1553 * Initialize all receive rings.
1554 *
1555 **********************************************************************/
1556int
1557ixgbe_setup_receive_structures(struct adapter *adapter)
1558{
1559 struct rx_ring *rxr = adapter->rx_rings;
1560 int j;
1561
1562 for (j = 0; j < adapter->num_queues; j++, rxr++)
1563 if (ixgbe_setup_receive_ring(rxr))
1564 goto fail;
1565
1566 return (0);
1567fail:
1568 /*
1569 * Free RX buffers allocated so far, we will only handle
1570 * the rings that completed, the failing case will have
1571 * cleaned up for itself. 'j' failed, so its the terminus.
1572 */
1573 for (int i = 0; i < j; ++i) {
1574 rxr = &adapter->rx_rings[i];
1575 ixgbe_free_receive_ring(rxr);
1576 }
1577
1578 return (ENOBUFS);
1579}
1580
1581
1582/*********************************************************************
1583 *
1584 * Free all receive rings.
1585 *
1586 **********************************************************************/
1587void
1588ixgbe_free_receive_structures(struct adapter *adapter)
1589{
1590 struct rx_ring *rxr = adapter->rx_rings;
1591
1592 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1593
1594 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1595 struct lro_ctrl *lro = &rxr->lro;
1596 ixgbe_free_receive_buffers(rxr);
1597 /* Free LRO memory */
1598 tcp_lro_free(lro);
1599 /* Free the ring memory as well */
1600 ixgbe_dma_free(adapter, &rxr->rxdma);
1601 }
1602
1603 free(adapter->rx_rings, M_DEVBUF);
1604}
1605
1606
1607/*********************************************************************
1608 *
1609 * Free receive ring data structures
1610 *
1611 **********************************************************************/
1612void
1613ixgbe_free_receive_buffers(struct rx_ring *rxr)
1614{
1615 struct adapter *adapter = rxr->adapter;
1616 struct ixgbe_rx_buf *rxbuf;
1617
1618 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1619
1620 /* Cleanup any existing buffers */
1621 if (rxr->rx_buffers != NULL) {
1622 for (int i = 0; i < adapter->num_rx_desc; i++) {
1623 rxbuf = &rxr->rx_buffers[i];
1624 if (rxbuf->buf != NULL) {
1625 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1626 BUS_DMASYNC_POSTREAD);
1627 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1628 rxbuf->buf->m_flags |= M_PKTHDR;
1629 m_freem(rxbuf->buf);
1630 }
1631 rxbuf->buf = NULL;
1632 if (rxbuf->pmap != NULL) {
1633 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1634 rxbuf->pmap = NULL;
1635 }
1636 }
1637 if (rxr->rx_buffers != NULL) {
1638 free(rxr->rx_buffers, M_DEVBUF);
1639 rxr->rx_buffers = NULL;
1640 }
1641 }
1642
1643 if (rxr->ptag != NULL) {
1644 bus_dma_tag_destroy(rxr->ptag);
1645 rxr->ptag = NULL;
1646 }
1647
1648 return;
1649}
1650
1651static __inline void
1652ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1653{
1654
1655 /*
1656 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1657 * should be computed by hardware. Also it should not have VLAN tag in
1658 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1659 */
1660 if (rxr->lro_enabled &&
1661 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1662 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1663 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1664 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1665 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1666 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1667 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1668 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1669 /*
1670 * Send to the stack if:
1671 ** - LRO not enabled, or
1672 ** - no LRO resources, or
1673 ** - lro enqueue fails
1674 */
1675 if (rxr->lro.lro_cnt != 0)
1676 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1677 return;
1678 }
1679 IXGBE_RX_UNLOCK(rxr);
1680 (*ifp->if_input)(ifp, m);
1681 IXGBE_RX_LOCK(rxr);
1682}
1683
1684static __inline void
1685ixgbe_rx_discard(struct rx_ring *rxr, int i)
1686{
1687 struct ixgbe_rx_buf *rbuf;
1688
1689 rbuf = &rxr->rx_buffers[i];
1690
1691
1692 /*
1693 ** With advanced descriptors the writeback
1694 ** clobbers the buffer addrs, so its easier
1695 ** to just free the existing mbufs and take
1696 ** the normal refresh path to get new buffers
1697 ** and mapping.
1698 */
1699
1700 if (rbuf->fmp != NULL) {/* Partial chain ? */
1701 rbuf->fmp->m_flags |= M_PKTHDR;
1702 m_freem(rbuf->fmp);
1703 rbuf->fmp = NULL;
1704 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1705 } else if (rbuf->buf) {
1706 m_free(rbuf->buf);
1707 rbuf->buf = NULL;
1708 }
1709
1710 rbuf->flags = 0;
1711
1712 return;
1713}
1714
1715
1716/*********************************************************************
1717 *
1718 * This routine executes in interrupt context. It replenishes
1719 * the mbufs in the descriptor and sends data which has been
1720 * dma'ed into host memory to upper layer.
1721 *
1722 * We loop at most count times if count is > 0, or until done if
1723 * count < 0.
1724 *
1725 * Return TRUE for more work, FALSE for all clean.
1726 *********************************************************************/
1727bool
1728ixgbe_rxeof(struct ix_queue *que)
1729{
1730 struct adapter *adapter = que->adapter;
1731 struct rx_ring *rxr = que->rxr;
1732 struct ifnet *ifp = adapter->ifp;
1733 struct lro_ctrl *lro = &rxr->lro;
1734 struct lro_entry *queued;
1735 int i, nextp, processed = 0;
1736 u32 staterr = 0;
1737 u16 count = rxr->process_limit;
1738 union ixgbe_adv_rx_desc *cur;
1739 struct ixgbe_rx_buf *rbuf, *nbuf;
1740 u16 pkt_info;
1741
1742 IXGBE_RX_LOCK(rxr);
1743
1744#ifdef DEV_NETMAP
1745 /* Same as the txeof routine: wakeup clients on intr. */
1746 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1747 IXGBE_RX_UNLOCK(rxr);
1748 return (FALSE);
1749 }
1750#endif /* DEV_NETMAP */
1751
1752 for (i = rxr->next_to_check; count != 0;) {
1753 struct mbuf *sendmp, *mp;
1754 u32 rsc, ptype;
1755 u16 len;
1756 u16 vtag = 0;
1757 bool eop;
1758
1759 /* Sync the ring. */
1760 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1761 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1762
1763 cur = &rxr->rx_base[i];
1764 staterr = le32toh(cur->wb.upper.status_error);
1765 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1766
1767 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1768 break;
1769 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1770 break;
1771
1772 count--;
1773 sendmp = NULL;
1774 nbuf = NULL;
1775 rsc = 0;
1776 cur->wb.upper.status_error = 0;
1777 rbuf = &rxr->rx_buffers[i];
1778 mp = rbuf->buf;
1779
1780 len = le16toh(cur->wb.upper.length);
1781 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1782 IXGBE_RXDADV_PKTTYPE_MASK;
1783 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1784
1785 /* Make sure bad packets are discarded */
1786 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1787#if 0 // VF-only
1788#if __FreeBSD_version >= 1100036
1789 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1790#endif
1791#endif
1792 rxr->rx_discarded++;
1793 ixgbe_rx_discard(rxr, i);
1794 goto next_desc;
1795 }
1796
1797 /*
1798 ** On 82599 which supports a hardware
1799 ** LRO (called HW RSC), packets need
1800 ** not be fragmented across sequential
1801 ** descriptors, rather the next descriptor
1802 ** is indicated in bits of the descriptor.
1803 ** This also means that we might proceses
1804 ** more than one packet at a time, something
1805 ** that has never been true before, it
1806 ** required eliminating global chain pointers
1807 ** in favor of what we are doing here. -jfv
1808 */
1809 if (!eop) {
1810 /*
1811 ** Figure out the next descriptor
1812 ** of this frame.
1813 */
1814 if (rxr->hw_rsc == TRUE) {
1815 rsc = ixgbe_rsc_count(cur);
1816 rxr->rsc_num += (rsc - 1);
1817 }
1818 if (rsc) { /* Get hardware index */
1819 nextp = ((staterr &
1820 IXGBE_RXDADV_NEXTP_MASK) >>
1821 IXGBE_RXDADV_NEXTP_SHIFT);
1822 } else { /* Just sequential */
1823 nextp = i + 1;
1824 if (nextp == adapter->num_rx_desc)
1825 nextp = 0;
1826 }
1827 nbuf = &rxr->rx_buffers[nextp];
1828 prefetch(nbuf);
1829 }
1830 /*
1831 ** Rather than using the fmp/lmp global pointers
1832 ** we now keep the head of a packet chain in the
1833 ** buffer struct and pass this along from one
1834 ** descriptor to the next, until we get EOP.
1835 */
1836 mp->m_len = len;
1837 /*
1838 ** See if there is a stored head
1839 ** that determines what we are
1840 */
1841 sendmp = rbuf->fmp;
1842 if (sendmp != NULL) { /* secondary frag */
1843 rbuf->buf = rbuf->fmp = NULL;
1844 mp->m_flags &= ~M_PKTHDR;
1845 sendmp->m_pkthdr.len += mp->m_len;
1846 } else {
1847 /*
1848 * Optimize. This might be a small packet,
1849 * maybe just a TCP ACK. Do a fast copy that
1850 * is cache aligned into a new mbuf, and
1851 * leave the old mbuf+cluster for re-use.
1852 */
1853 if (eop && len <= IXGBE_RX_COPY_LEN) {
1854 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1855 if (sendmp != NULL) {
1856 sendmp->m_data +=
1857 IXGBE_RX_COPY_ALIGN;
1858 ixgbe_bcopy(mp->m_data,
1859 sendmp->m_data, len);
1860 sendmp->m_len = len;
1861 rxr->rx_copies++;
1862 rbuf->flags |= IXGBE_RX_COPY;
1863 }
1864 }
1865 if (sendmp == NULL) {
1866 rbuf->buf = rbuf->fmp = NULL;
1867 sendmp = mp;
1868 }
1869
1870 /* first desc of a non-ps chain */
1871 sendmp->m_flags |= M_PKTHDR;
1872 sendmp->m_pkthdr.len = mp->m_len;
1873 }
1874 ++processed;
1875
1876 /* Pass the head pointer on */
1877 if (eop == 0) {
1878 nbuf->fmp = sendmp;
1879 sendmp = NULL;
1880 mp->m_next = nbuf->buf;
1881 } else { /* Sending this frame */
1882 sendmp->m_pkthdr.rcvif = ifp;
1883 rxr->rx_packets++;
1884 /* capture data for AIM */
1885 rxr->bytes += sendmp->m_pkthdr.len;
1886 rxr->rx_bytes += sendmp->m_pkthdr.len;
1887 /* Process vlan info */
1888 if ((rxr->vtag_strip) &&
1889 (staterr & IXGBE_RXD_STAT_VP))
1890 vtag = le16toh(cur->wb.upper.vlan);
1891 if (vtag) {
1892 sendmp->m_pkthdr.ether_vtag = vtag;
1893 sendmp->m_flags |= M_VLANTAG;
1894 }
1895 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1896 ixgbe_rx_checksum(staterr, sendmp, ptype);
1897#if __FreeBSD_version >= 800000
1898#ifdef RSS
1899 sendmp->m_pkthdr.flowid =
1900 le32toh(cur->wb.lower.hi_dword.rss);
1901 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1902 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1903 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1904 break;
1905 case IXGBE_RXDADV_RSSTYPE_IPV4:
1906 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1907 break;
1908 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1909 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1910 break;
1911 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1912 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1913 break;
1914 case IXGBE_RXDADV_RSSTYPE_IPV6:
1915 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1916 break;
1917 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1918 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1919 break;
1920 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1921 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1922 break;
1923 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1924 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1925 break;
1926 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1927 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1928 break;
1929 default:
1930 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1931 }
1932#else /* RSS */
1933 sendmp->m_pkthdr.flowid = que->msix;
1934 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1935#endif /* RSS */
1936#endif /* FreeBSD_version */
1937 }
1938next_desc:
1939 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1940 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1941
1942 /* Advance our pointers to the next descriptor. */
1943 if (++i == rxr->num_desc)
1944 i = 0;
1945
1946 /* Now send to the stack or do LRO */
1947 if (sendmp != NULL) {
1948 rxr->next_to_check = i;
1949 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1950 i = rxr->next_to_check;
1951 }
1952
1953 /* Every 8 descriptors we go to refresh mbufs */
1954 if (processed == 8) {
1955 ixgbe_refresh_mbufs(rxr, i);
1956 processed = 0;
1957 }
1958 }
1959
1960 /* Refresh any remaining buf structs */
1961 if (ixgbe_rx_unrefreshed(rxr))
1962 ixgbe_refresh_mbufs(rxr, i);
1963
1964 rxr->next_to_check = i;
1965
1966 /*
1967 * Flush any outstanding LRO work
1968 */
1969 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1970 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1971 tcp_lro_flush(lro, queued);
1972 }
1973
1974 IXGBE_RX_UNLOCK(rxr);
1975
1976 /*
1977 ** Still have cleaning to do?
1978 */
1979 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1980 return (TRUE);
1981 else
1982 return (FALSE);
1983}
1984
1985
1986/*********************************************************************
1987 *
1988 * Verify that the hardware indicated that the checksum is valid.
1989 * Inform the stack about the status of checksum so that stack
1990 * doesn't spend time verifying the checksum.
1991 *
1992 *********************************************************************/
1993static void
1994ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1995{
1996 u16 status = (u16) staterr;
1997 u8 errors = (u8) (staterr >> 24);
1998 bool sctp = FALSE;
1999
2000 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2001 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2002 sctp = TRUE;
2003
2004 if (status & IXGBE_RXD_STAT_IPCS) {
2005 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2006 /* IP Checksum Good */
2007 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2008 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2009
2010 } else
2011 mp->m_pkthdr.csum_flags = 0;
2012 }
2013 if (status & IXGBE_RXD_STAT_L4CS) {
2014 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2015#if __FreeBSD_version >= 800000
2016 if (sctp)
2017 type = CSUM_SCTP_VALID;
2018#endif
2019 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2020 mp->m_pkthdr.csum_flags |= type;
2021 if (!sctp)
2022 mp->m_pkthdr.csum_data = htons(0xffff);
2023 }
2024 }
2025 return;
2026}
2027
2028/********************************************************************
2029 * Manage DMA'able memory.
2030 *******************************************************************/
2031static void
2032ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2033{
2034 if (error)
2035 return;
2036 *(bus_addr_t *) arg = segs->ds_addr;
2037 return;
2038}
2039
2040int
2041ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2042 struct ixgbe_dma_alloc *dma, int mapflags)
2043{
2044 device_t dev = adapter->dev;
2045 int r;
2046
2047 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2048 DBA_ALIGN, 0, /* alignment, bounds */
2049 BUS_SPACE_MAXADDR, /* lowaddr */
2050 BUS_SPACE_MAXADDR, /* highaddr */
2051 NULL, NULL, /* filter, filterarg */
2052 size, /* maxsize */
2053 1, /* nsegments */
2054 size, /* maxsegsize */
2055 BUS_DMA_ALLOCNOW, /* flags */
2056 NULL, /* lockfunc */
2057 NULL, /* lockfuncarg */
2058 &dma->dma_tag);
2059 if (r != 0) {
2060 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2061 "error %u\n", r);
2062 goto fail_0;
2063 }
2064 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2065 BUS_DMA_NOWAIT, &dma->dma_map);
2066 if (r != 0) {
2067 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2068 "error %u\n", r);
2069 goto fail_1;
2070 }
2071 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2072 size,
2073 ixgbe_dmamap_cb,
2074 &dma->dma_paddr,
2075 mapflags | BUS_DMA_NOWAIT);
2076 if (r != 0) {
2077 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2078 "error %u\n", r);
2079 goto fail_2;
2080 }
2081 dma->dma_size = size;
2082 return (0);
2083fail_2:
2084 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2085fail_1:
2086 bus_dma_tag_destroy(dma->dma_tag);
2087fail_0:
2088 dma->dma_tag = NULL;
2089 return (r);
2090}
2091
2092void
2093ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2094{
2095 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2096 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2097 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2098 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2099 bus_dma_tag_destroy(dma->dma_tag);
2100}
2101
2102
2103/*********************************************************************
2104 *
2105 * Allocate memory for the transmit and receive rings, and then
2106 * the descriptors associated with each, called only once at attach.
2107 *
2108 **********************************************************************/
2109int
2110ixgbe_allocate_queues(struct adapter *adapter)
2111{
2112 device_t dev = adapter->dev;
2113 struct ix_queue *que;
2114 struct tx_ring *txr;
2115 struct rx_ring *rxr;
2116 int rsize, tsize, error = IXGBE_SUCCESS;
2117 int txconf = 0, rxconf = 0;
2118
2119 /* First allocate the top level queue structs */
2120 if (!(adapter->queues =
2121 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2122 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2123 device_printf(dev, "Unable to allocate queue memory\n");
2124 error = ENOMEM;
2125 goto fail;
2126 }
2127
2128 /* First allocate the TX ring struct memory */
2129 if (!(adapter->tx_rings =
2130 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2131 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2132 device_printf(dev, "Unable to allocate TX ring memory\n");
2133 error = ENOMEM;
2134 goto tx_fail;
2135 }
2136
2137 /* Next allocate the RX */
2138 if (!(adapter->rx_rings =
2139 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2140 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2141 device_printf(dev, "Unable to allocate RX ring memory\n");
2142 error = ENOMEM;
2143 goto rx_fail;
2144 }
2145
2146 /* For the ring itself */
2147 tsize = roundup2(adapter->num_tx_desc *
2148 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2149
2150 /*
2151 * Now set up the TX queues, txconf is needed to handle the
2152 * possibility that things fail midcourse and we need to
2153 * undo memory gracefully
2154 */
2155 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2156 /* Set up some basics */
2157 txr = &adapter->tx_rings[i];
2158 txr->adapter = adapter;
2159 txr->me = i;
2160 txr->num_desc = adapter->num_tx_desc;
2161
2162 /* Initialize the TX side lock */
2163 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2164 device_get_nameunit(dev), txr->me);
2165 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2166
2167 if (ixgbe_dma_malloc(adapter, tsize,
2168 &txr->txdma, BUS_DMA_NOWAIT)) {
2169 device_printf(dev,
2170 "Unable to allocate TX Descriptor memory\n");
2171 error = ENOMEM;
2172 goto err_tx_desc;
2173 }
2174 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2175 bzero((void *)txr->tx_base, tsize);
2176
2177 /* Now allocate transmit buffers for the ring */
2178 if (ixgbe_allocate_transmit_buffers(txr)) {
2179 device_printf(dev,
2180 "Critical Failure setting up transmit buffers\n");
2181 error = ENOMEM;
2182 goto err_tx_desc;
2183 }
2184#ifndef IXGBE_LEGACY_TX
2185 /* Allocate a buf ring */
2186 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2187 M_WAITOK, &txr->tx_mtx);
2188 if (txr->br == NULL) {
2189 device_printf(dev,
2190 "Critical Failure setting up buf ring\n");
2191 error = ENOMEM;
2192 goto err_tx_desc;
2193 }
2194#endif
2195 }
2196
2197 /*
2198 * Next the RX queues...
2199 */
2200 rsize = roundup2(adapter->num_rx_desc *
2201 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2202 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2203 rxr = &adapter->rx_rings[i];
2204 /* Set up some basics */
2205 rxr->adapter = adapter;
2206 rxr->me = i;
2207 rxr->num_desc = adapter->num_rx_desc;
2208
2209 /* Initialize the RX side lock */
2210 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2211 device_get_nameunit(dev), rxr->me);
2212 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2213
2214 if (ixgbe_dma_malloc(adapter, rsize,
2215 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2216 device_printf(dev,
2217 "Unable to allocate RxDescriptor memory\n");
2218 error = ENOMEM;
2219 goto err_rx_desc;
2220 }
2221 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2222 bzero((void *)rxr->rx_base, rsize);
2223
2224 /* Allocate receive buffers for the ring*/
2225 if (ixgbe_allocate_receive_buffers(rxr)) {
2226 device_printf(dev,
2227 "Critical Failure setting up receive buffers\n");
2228 error = ENOMEM;
2229 goto err_rx_desc;
2230 }
2231 }
2232
2233 /*
2234 ** Finally set up the queue holding structs
2235 */
2236 for (int i = 0; i < adapter->num_queues; i++) {
2237 que = &adapter->queues[i];
2238 que->adapter = adapter;
2239 que->me = i;
2240 que->txr = &adapter->tx_rings[i];
2241 que->rxr = &adapter->rx_rings[i];
2242 }
2243
2244 return (0);
2245
2246err_rx_desc:
2247 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2248 ixgbe_dma_free(adapter, &rxr->rxdma);
2249err_tx_desc:
2250 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2251 ixgbe_dma_free(adapter, &txr->txdma);
2252 free(adapter->rx_rings, M_DEVBUF);
2253rx_fail:
2254 free(adapter->tx_rings, M_DEVBUF);
2255tx_fail:
2256 free(adapter->queues, M_DEVBUF);
2257fail:
2258 return (error);
2259}