ix_txrx.c revision 283620
1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/ixgbe/ix_txrx.c 283620 2015-05-27 17:44:11Z erj $*/
34
35
36#ifndef IXGBE_STANDALONE_BUILD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include "ixgbe.h"
42
43#ifdef DEV_NETMAP
44#include <net/netmap.h>
45#include <sys/selinfo.h>
46#include <dev/netmap/netmap_kern.h>
47
48extern int ix_crcstrip;
49#endif
50
51/*
52** HW RSC control:
53**  this feature only works with
54**  IPv4, and only on 82599 and later.
55**  Also this will cause IP forwarding to
56**  fail and that can't be controlled by
57**  the stack as LRO can. For all these
58**  reasons I've deemed it best to leave
59**  this off and not bother with a tuneable
60**  interface, this would need to be compiled
61**  to enable.
62*/
63static bool ixgbe_rsc_enable = FALSE;
64
65#ifdef IXGBE_FDIR
66/*
67** For Flow Director: this is the
68** number of TX packets we sample
69** for the filter pool, this means
70** every 20th packet will be probed.
71**
72** This feature can be disabled by
73** setting this to 0.
74*/
75static int atr_sample_rate = 20;
76#endif
77
78/* Shared PCI config read/write */
79inline u16
80ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
81{
82	u16 value;
83
84	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
85	    reg, 2);
86
87	return (value);
88}
89
90inline void
91ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
92{
93	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
94	    reg, value, 2);
95
96	return;
97}
98
99/*********************************************************************
100 *  Local Function prototypes
101 *********************************************************************/
102static void	ixgbe_setup_transmit_ring(struct tx_ring *);
103static void     ixgbe_free_transmit_buffers(struct tx_ring *);
104static int	ixgbe_setup_receive_ring(struct rx_ring *);
105static void     ixgbe_free_receive_buffers(struct rx_ring *);
106
107static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
108static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
109static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
110static int	ixgbe_tx_ctx_setup(struct tx_ring *,
111		    struct mbuf *, u32 *, u32 *);
112static int	ixgbe_tso_setup(struct tx_ring *,
113		    struct mbuf *, u32 *, u32 *);
114#ifdef IXGBE_FDIR
115static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
116#endif
117static __inline void ixgbe_rx_discard(struct rx_ring *, int);
118static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
119		    struct mbuf *, u32);
120
121#ifdef IXGBE_LEGACY_TX
122/*********************************************************************
123 *  Transmit entry point
124 *
125 *  ixgbe_start is called by the stack to initiate a transmit.
126 *  The driver will remain in this routine as long as there are
127 *  packets to transmit and transmit resources are available.
128 *  In case resources are not available stack is notified and
129 *  the packet is requeued.
130 **********************************************************************/
131
132void
133ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
134{
135	struct mbuf    *m_head;
136	struct adapter *adapter = txr->adapter;
137
138	IXGBE_TX_LOCK_ASSERT(txr);
139
140	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
141		return;
142	if (!adapter->link_active)
143		return;
144
145	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
146		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
147			break;
148
149		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
150		if (m_head == NULL)
151			break;
152
153		if (ixgbe_xmit(txr, &m_head)) {
154			if (m_head != NULL)
155				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
156			break;
157		}
158		/* Send a copy of the frame to the BPF listener */
159		ETHER_BPF_MTAP(ifp, m_head);
160	}
161	return;
162}
163
164/*
165 * Legacy TX start - called by the stack, this
166 * always uses the first tx ring, and should
167 * not be used with multiqueue tx enabled.
168 */
169void
170ixgbe_start(struct ifnet *ifp)
171{
172	struct adapter *adapter = ifp->if_softc;
173	struct tx_ring	*txr = adapter->tx_rings;
174
175	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
176		IXGBE_TX_LOCK(txr);
177		ixgbe_start_locked(txr, ifp);
178		IXGBE_TX_UNLOCK(txr);
179	}
180	return;
181}
182
183#else /* ! IXGBE_LEGACY_TX */
184
185/*
186** Multiqueue Transmit driver
187**
188*/
189int
190ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
191{
192	struct adapter	*adapter = ifp->if_softc;
193	struct ix_queue	*que;
194	struct tx_ring	*txr;
195	int 		i, err = 0;
196
197	/*
198	 * When doing RSS, map it to the same outbound queue
199	 * as the incoming flow would be mapped to.
200	 *
201	 * If everything is setup correctly, it should be the
202	 * same bucket that the current CPU we're on is.
203	 */
204	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
205		i = m->m_pkthdr.flowid % adapter->num_queues;
206	else
207		i = curcpu % adapter->num_queues;
208
209	/* Check for a hung queue and pick alternative */
210	if (((1 << i) & adapter->active_queues) == 0)
211		i = ffsl(adapter->active_queues);
212
213	txr = &adapter->tx_rings[i];
214	que = &adapter->queues[i];
215
216	err = drbr_enqueue(ifp, txr->br, m);
217	if (err)
218		return (err);
219	if (IXGBE_TX_TRYLOCK(txr)) {
220		ixgbe_mq_start_locked(ifp, txr);
221		IXGBE_TX_UNLOCK(txr);
222	} else
223		taskqueue_enqueue(que->tq, &txr->txq_task);
224
225	return (0);
226}
227
228int
229ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230{
231	struct adapter  *adapter = txr->adapter;
232        struct mbuf     *next;
233        int             enqueued = 0, err = 0;
234
235	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
236	    adapter->link_active == 0)
237		return (ENETDOWN);
238
239	/* Process the queue */
240#if __FreeBSD_version < 901504
241	next = drbr_dequeue(ifp, txr->br);
242	while (next != NULL) {
243		if ((err = ixgbe_xmit(txr, &next)) != 0) {
244			if (next != NULL)
245				err = drbr_enqueue(ifp, txr->br, next);
246#else
247	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
248		if ((err = ixgbe_xmit(txr, &next)) != 0) {
249			if (next == NULL) {
250				drbr_advance(ifp, txr->br);
251			} else {
252				drbr_putback(ifp, txr->br, next);
253			}
254#endif
255			break;
256		}
257#if __FreeBSD_version >= 901504
258		drbr_advance(ifp, txr->br);
259#endif
260		enqueued++;
261#if 0 // this is VF-only
262#if __FreeBSD_version >= 1100036
263		/*
264		 * Since we're looking at the tx ring, we can check
265		 * to see if we're a VF by examing our tail register
266		 * address.
267		 */
268		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
269			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
270#endif
271#endif
272		/* Send a copy of the frame to the BPF listener */
273		ETHER_BPF_MTAP(ifp, next);
274		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275			break;
276#if __FreeBSD_version < 901504
277		next = drbr_dequeue(ifp, txr->br);
278#endif
279	}
280
281	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
282		ixgbe_txeof(txr);
283
284	return (err);
285}
286
287/*
288 * Called from a taskqueue to drain queued transmit packets.
289 */
290void
291ixgbe_deferred_mq_start(void *arg, int pending)
292{
293	struct tx_ring *txr = arg;
294	struct adapter *adapter = txr->adapter;
295	struct ifnet *ifp = adapter->ifp;
296
297	IXGBE_TX_LOCK(txr);
298	if (!drbr_empty(ifp, txr->br))
299		ixgbe_mq_start_locked(ifp, txr);
300	IXGBE_TX_UNLOCK(txr);
301}
302
303/*
304 * Flush all ring buffers
305 */
306void
307ixgbe_qflush(struct ifnet *ifp)
308{
309	struct adapter	*adapter = ifp->if_softc;
310	struct tx_ring	*txr = adapter->tx_rings;
311	struct mbuf	*m;
312
313	for (int i = 0; i < adapter->num_queues; i++, txr++) {
314		IXGBE_TX_LOCK(txr);
315		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316			m_freem(m);
317		IXGBE_TX_UNLOCK(txr);
318	}
319	if_qflush(ifp);
320}
321#endif /* IXGBE_LEGACY_TX */
322
323
324/*********************************************************************
325 *
326 *  This routine maps the mbufs to tx descriptors, allowing the
327 *  TX engine to transmit the packets.
328 *  	- return 0 on success, positive on failure
329 *
330 **********************************************************************/
331
332static int
333ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334{
335	struct adapter  *adapter = txr->adapter;
336	u32		olinfo_status = 0, cmd_type_len;
337	int             i, j, error, nsegs;
338	int		first;
339	bool		remap = TRUE;
340	struct mbuf	*m_head;
341	bus_dma_segment_t segs[adapter->num_segs];
342	bus_dmamap_t	map;
343	struct ixgbe_tx_buf *txbuf;
344	union ixgbe_adv_tx_desc *txd = NULL;
345
346	m_head = *m_headp;
347
348	/* Basic descriptor defines */
349        cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
350	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351
352	if (m_head->m_flags & M_VLANTAG)
353        	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
354
355        /*
356         * Important to capture the first descriptor
357         * used because it will contain the index of
358         * the one we tell the hardware to report back
359         */
360        first = txr->next_avail_desc;
361	txbuf = &txr->tx_buffers[first];
362	map = txbuf->map;
363
364	/*
365	 * Map the packet for DMA.
366	 */
367retry:
368	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
369	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370
371	if (__predict_false(error)) {
372		struct mbuf *m;
373
374		switch (error) {
375		case EFBIG:
376			/* Try it again? - one try */
377			if (remap == TRUE) {
378				remap = FALSE;
379				/*
380				 * XXX: m_defrag will choke on
381				 * non-MCLBYTES-sized clusters
382				 */
383				m = m_defrag(*m_headp, M_NOWAIT);
384				if (m == NULL) {
385					adapter->mbuf_defrag_failed++;
386					m_freem(*m_headp);
387					*m_headp = NULL;
388					return (ENOBUFS);
389				}
390				*m_headp = m;
391				goto retry;
392			} else
393				return (error);
394		case ENOMEM:
395			txr->no_tx_dma_setup++;
396			return (error);
397		default:
398			txr->no_tx_dma_setup++;
399			m_freem(*m_headp);
400			*m_headp = NULL;
401			return (error);
402		}
403	}
404
405	/* Make certain there are enough descriptors */
406	if (nsegs > txr->tx_avail - 2) {
407		txr->no_desc_avail++;
408		bus_dmamap_unload(txr->txtag, map);
409		return (ENOBUFS);
410	}
411	m_head = *m_headp;
412
413	/*
414	 * Set up the appropriate offload context
415	 * this will consume the first descriptor
416	 */
417	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
418	if (__predict_false(error)) {
419		if (error == ENOBUFS)
420			*m_headp = NULL;
421		return (error);
422	}
423
424#ifdef IXGBE_FDIR
425	/* Do the flow director magic */
426	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427		++txr->atr_count;
428		if (txr->atr_count >= atr_sample_rate) {
429			ixgbe_atr(txr, m_head);
430			txr->atr_count = 0;
431		}
432	}
433#endif
434
435	i = txr->next_avail_desc;
436	for (j = 0; j < nsegs; j++) {
437		bus_size_t seglen;
438		bus_addr_t segaddr;
439
440		txbuf = &txr->tx_buffers[i];
441		txd = &txr->tx_base[i];
442		seglen = segs[j].ds_len;
443		segaddr = htole64(segs[j].ds_addr);
444
445		txd->read.buffer_addr = segaddr;
446		txd->read.cmd_type_len = htole32(txr->txd_cmd |
447		    cmd_type_len |seglen);
448		txd->read.olinfo_status = htole32(olinfo_status);
449
450		if (++i == txr->num_desc)
451			i = 0;
452	}
453
454	txd->read.cmd_type_len |=
455	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456	txr->tx_avail -= nsegs;
457	txr->next_avail_desc = i;
458
459	txbuf->m_head = m_head;
460	/*
461	 * Here we swap the map so the last descriptor,
462	 * which gets the completion interrupt has the
463	 * real map, and the first descriptor gets the
464	 * unused map from this descriptor.
465	 */
466	txr->tx_buffers[first].map = txbuf->map;
467	txbuf->map = map;
468	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469
470        /* Set the EOP descriptor that will be marked done */
471        txbuf = &txr->tx_buffers[first];
472	txbuf->eop = txd;
473
474        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476	/*
477	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478	 * hardware that this frame is available to transmit.
479	 */
480	++txr->total_packets;
481	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482
483	/* Mark queue as having work */
484	if (txr->busy == 0)
485		txr->busy = 1;
486
487	return (0);
488}
489
490
491/*********************************************************************
492 *
493 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494 *  the information needed to transmit a packet on the wire. This is
495 *  called only once at attach, setup is done every reset.
496 *
497 **********************************************************************/
498int
499ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500{
501	struct adapter *adapter = txr->adapter;
502	device_t dev = adapter->dev;
503	struct ixgbe_tx_buf *txbuf;
504	int error, i;
505
506	/*
507	 * Setup DMA descriptor areas.
508	 */
509	if ((error = bus_dma_tag_create(
510			       bus_get_dma_tag(adapter->dev),	/* parent */
511			       1, 0,		/* alignment, bounds */
512			       BUS_SPACE_MAXADDR,	/* lowaddr */
513			       BUS_SPACE_MAXADDR,	/* highaddr */
514			       NULL, NULL,		/* filter, filterarg */
515			       IXGBE_TSO_SIZE,		/* maxsize */
516			       adapter->num_segs,	/* nsegments */
517			       PAGE_SIZE,		/* maxsegsize */
518			       0,			/* flags */
519			       NULL,			/* lockfunc */
520			       NULL,			/* lockfuncarg */
521			       &txr->txtag))) {
522		device_printf(dev,"Unable to allocate TX DMA tag\n");
523		goto fail;
524	}
525
526	if (!(txr->tx_buffers =
527	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529		device_printf(dev, "Unable to allocate tx_buffer memory\n");
530		error = ENOMEM;
531		goto fail;
532	}
533
534        /* Create the descriptor buffer dma maps */
535	txbuf = txr->tx_buffers;
536	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538		if (error != 0) {
539			device_printf(dev, "Unable to create TX DMA map\n");
540			goto fail;
541		}
542	}
543
544	return 0;
545fail:
546	/* We free all, it handles case where we are in the middle */
547	ixgbe_free_transmit_structures(adapter);
548	return (error);
549}
550
551/*********************************************************************
552 *
553 *  Initialize a transmit ring.
554 *
555 **********************************************************************/
556static void
557ixgbe_setup_transmit_ring(struct tx_ring *txr)
558{
559	struct adapter *adapter = txr->adapter;
560	struct ixgbe_tx_buf *txbuf;
561	int i;
562#ifdef DEV_NETMAP
563	struct netmap_adapter *na = NA(adapter->ifp);
564	struct netmap_slot *slot;
565#endif /* DEV_NETMAP */
566
567	/* Clear the old ring contents */
568	IXGBE_TX_LOCK(txr);
569#ifdef DEV_NETMAP
570	/*
571	 * (under lock): if in netmap mode, do some consistency
572	 * checks and set slot to entry 0 of the netmap ring.
573	 */
574	slot = netmap_reset(na, NR_TX, txr->me, 0);
575#endif /* DEV_NETMAP */
576	bzero((void *)txr->tx_base,
577	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578	/* Reset indices */
579	txr->next_avail_desc = 0;
580	txr->next_to_clean = 0;
581
582	/* Free any existing tx buffers. */
583        txbuf = txr->tx_buffers;
584	for (i = 0; i < txr->num_desc; i++, txbuf++) {
585		if (txbuf->m_head != NULL) {
586			bus_dmamap_sync(txr->txtag, txbuf->map,
587			    BUS_DMASYNC_POSTWRITE);
588			bus_dmamap_unload(txr->txtag, txbuf->map);
589			m_freem(txbuf->m_head);
590			txbuf->m_head = NULL;
591		}
592#ifdef DEV_NETMAP
593		/*
594		 * In netmap mode, set the map for the packet buffer.
595		 * NOTE: Some drivers (not this one) also need to set
596		 * the physical buffer address in the NIC ring.
597		 * Slots in the netmap ring (indexed by "si") are
598		 * kring->nkr_hwofs positions "ahead" wrt the
599		 * corresponding slot in the NIC ring. In some drivers
600		 * (not here) nkr_hwofs can be negative. Function
601		 * netmap_idx_n2k() handles wraparounds properly.
602		 */
603		if (slot) {
604			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
606		}
607#endif /* DEV_NETMAP */
608		/* Clear the EOP descriptor pointer */
609		txbuf->eop = NULL;
610        }
611
612#ifdef IXGBE_FDIR
613	/* Set the rate at which we sample packets */
614	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615		txr->atr_sample = atr_sample_rate;
616#endif
617
618	/* Set number of descriptors available */
619	txr->tx_avail = adapter->num_tx_desc;
620
621	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623	IXGBE_TX_UNLOCK(txr);
624}
625
626/*********************************************************************
627 *
628 *  Initialize all transmit rings.
629 *
630 **********************************************************************/
631int
632ixgbe_setup_transmit_structures(struct adapter *adapter)
633{
634	struct tx_ring *txr = adapter->tx_rings;
635
636	for (int i = 0; i < adapter->num_queues; i++, txr++)
637		ixgbe_setup_transmit_ring(txr);
638
639	return (0);
640}
641
642/*********************************************************************
643 *
644 *  Free all transmit rings.
645 *
646 **********************************************************************/
647void
648ixgbe_free_transmit_structures(struct adapter *adapter)
649{
650	struct tx_ring *txr = adapter->tx_rings;
651
652	for (int i = 0; i < adapter->num_queues; i++, txr++) {
653		IXGBE_TX_LOCK(txr);
654		ixgbe_free_transmit_buffers(txr);
655		ixgbe_dma_free(adapter, &txr->txdma);
656		IXGBE_TX_UNLOCK(txr);
657		IXGBE_TX_LOCK_DESTROY(txr);
658	}
659	free(adapter->tx_rings, M_DEVBUF);
660}
661
662/*********************************************************************
663 *
664 *  Free transmit ring related data structures.
665 *
666 **********************************************************************/
667static void
668ixgbe_free_transmit_buffers(struct tx_ring *txr)
669{
670	struct adapter *adapter = txr->adapter;
671	struct ixgbe_tx_buf *tx_buffer;
672	int             i;
673
674	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676	if (txr->tx_buffers == NULL)
677		return;
678
679	tx_buffer = txr->tx_buffers;
680	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681		if (tx_buffer->m_head != NULL) {
682			bus_dmamap_sync(txr->txtag, tx_buffer->map,
683			    BUS_DMASYNC_POSTWRITE);
684			bus_dmamap_unload(txr->txtag,
685			    tx_buffer->map);
686			m_freem(tx_buffer->m_head);
687			tx_buffer->m_head = NULL;
688			if (tx_buffer->map != NULL) {
689				bus_dmamap_destroy(txr->txtag,
690				    tx_buffer->map);
691				tx_buffer->map = NULL;
692			}
693		} else if (tx_buffer->map != NULL) {
694			bus_dmamap_unload(txr->txtag,
695			    tx_buffer->map);
696			bus_dmamap_destroy(txr->txtag,
697			    tx_buffer->map);
698			tx_buffer->map = NULL;
699		}
700	}
701#ifdef IXGBE_LEGACY_TX
702	if (txr->br != NULL)
703		buf_ring_free(txr->br, M_DEVBUF);
704#endif
705	if (txr->tx_buffers != NULL) {
706		free(txr->tx_buffers, M_DEVBUF);
707		txr->tx_buffers = NULL;
708	}
709	if (txr->txtag != NULL) {
710		bus_dma_tag_destroy(txr->txtag);
711		txr->txtag = NULL;
712	}
713	return;
714}
715
716/*********************************************************************
717 *
718 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719 *
720 **********************************************************************/
721
722static int
723ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724    u32 *cmd_type_len, u32 *olinfo_status)
725{
726	struct adapter *adapter = txr->adapter;
727	struct ixgbe_adv_tx_context_desc *TXD;
728	struct ether_vlan_header *eh;
729	struct ip *ip;
730	struct ip6_hdr *ip6;
731	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
732	int	ehdrlen, ip_hlen = 0;
733	u16	etype;
734	u8	ipproto = 0;
735	int	offload = TRUE;
736	int	ctxd = txr->next_avail_desc;
737	u16	vtag = 0;
738
739	/* First check if TSO is to be used */
740	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
741		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
742
743	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
744		offload = FALSE;
745
746	/* Indicate the whole packet as payload when not doing TSO */
747       	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
748
749	/* Now ready a context descriptor */
750	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
751
752	/*
753	** In advanced descriptors the vlan tag must
754	** be placed into the context descriptor. Hence
755	** we need to make one even if not doing offloads.
756	*/
757	if (mp->m_flags & M_VLANTAG) {
758		vtag = htole16(mp->m_pkthdr.ether_vtag);
759		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
760	}
761	else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
762		return (0);
763
764	/*
765	 * Determine where frame payload starts.
766	 * Jump over vlan headers if already present,
767	 * helpful for QinQ too.
768	 */
769	eh = mtod(mp, struct ether_vlan_header *);
770	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
771		etype = ntohs(eh->evl_proto);
772		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
773	} else {
774		etype = ntohs(eh->evl_encap_proto);
775		ehdrlen = ETHER_HDR_LEN;
776	}
777
778	/* Set the ether header length */
779	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
780
781	if (offload == FALSE)
782		goto no_offloads;
783
784	switch (etype) {
785		case ETHERTYPE_IP:
786			ip = (struct ip *)(mp->m_data + ehdrlen);
787			ip_hlen = ip->ip_hl << 2;
788			ipproto = ip->ip_p;
789			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
790			break;
791		case ETHERTYPE_IPV6:
792			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
793			ip_hlen = sizeof(struct ip6_hdr);
794			/* XXX-BZ this will go badly in case of ext hdrs. */
795			ipproto = ip6->ip6_nxt;
796			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
797			break;
798		default:
799			offload = FALSE;
800			break;
801	}
802
803	vlan_macip_lens |= ip_hlen;
804
805	switch (ipproto) {
806		case IPPROTO_TCP:
807			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
808				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
809			break;
810
811		case IPPROTO_UDP:
812			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
813				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
814			break;
815
816#if __FreeBSD_version >= 800000
817		case IPPROTO_SCTP:
818			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
819				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
820			break;
821#endif
822		default:
823			offload = FALSE;
824			break;
825	}
826
827	if (offload) /* For the TX descriptor setup */
828		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
829
830no_offloads:
831	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
832
833	/* Now copy bits into descriptor */
834	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
835	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
836	TXD->seqnum_seed = htole32(0);
837	TXD->mss_l4len_idx = htole32(0);
838
839	/* We've consumed the first desc, adjust counters */
840	if (++ctxd == txr->num_desc)
841		ctxd = 0;
842	txr->next_avail_desc = ctxd;
843	--txr->tx_avail;
844
845        return (0);
846}
847
848/**********************************************************************
849 *
850 *  Setup work for hardware segmentation offload (TSO) on
851 *  adapters using advanced tx descriptors
852 *
853 **********************************************************************/
854static int
855ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
856    u32 *cmd_type_len, u32 *olinfo_status)
857{
858	struct ixgbe_adv_tx_context_desc *TXD;
859	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
860	u32 mss_l4len_idx = 0, paylen;
861	u16 vtag = 0, eh_type;
862	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
863	struct ether_vlan_header *eh;
864#ifdef INET6
865	struct ip6_hdr *ip6;
866#endif
867#ifdef INET
868	struct ip *ip;
869#endif
870	struct tcphdr *th;
871
872
873	/*
874	 * Determine where frame payload starts.
875	 * Jump over vlan headers if already present
876	 */
877	eh = mtod(mp, struct ether_vlan_header *);
878	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
879		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
880		eh_type = eh->evl_proto;
881	} else {
882		ehdrlen = ETHER_HDR_LEN;
883		eh_type = eh->evl_encap_proto;
884	}
885
886	switch (ntohs(eh_type)) {
887#ifdef INET6
888	case ETHERTYPE_IPV6:
889		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
890		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
891		if (ip6->ip6_nxt != IPPROTO_TCP)
892			return (ENXIO);
893		ip_hlen = sizeof(struct ip6_hdr);
894		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
895		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
896		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
897		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
898		break;
899#endif
900#ifdef INET
901	case ETHERTYPE_IP:
902		ip = (struct ip *)(mp->m_data + ehdrlen);
903		if (ip->ip_p != IPPROTO_TCP)
904			return (ENXIO);
905		ip->ip_sum = 0;
906		ip_hlen = ip->ip_hl << 2;
907		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
908		th->th_sum = in_pseudo(ip->ip_src.s_addr,
909		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
910		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
911		/* Tell transmit desc to also do IPv4 checksum. */
912		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
913		break;
914#endif
915	default:
916		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
917		    __func__, ntohs(eh_type));
918		break;
919	}
920
921	ctxd = txr->next_avail_desc;
922	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
923
924	tcp_hlen = th->th_off << 2;
925
926	/* This is used in the transmit desc in encap */
927	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
928
929	/* VLAN MACLEN IPLEN */
930	if (mp->m_flags & M_VLANTAG) {
931		vtag = htole16(mp->m_pkthdr.ether_vtag);
932                vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
933	}
934
935	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
936	vlan_macip_lens |= ip_hlen;
937	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
938
939	/* ADV DTYPE TUCMD */
940	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
941	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
942	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
943
944	/* MSS L4LEN IDX */
945	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
946	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
947	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
948
949	TXD->seqnum_seed = htole32(0);
950
951	if (++ctxd == txr->num_desc)
952		ctxd = 0;
953
954	txr->tx_avail--;
955	txr->next_avail_desc = ctxd;
956	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
957	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
958	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
959	++txr->tso_tx;
960	return (0);
961}
962
963
964/**********************************************************************
965 *
966 *  Examine each tx_buffer in the used queue. If the hardware is done
967 *  processing the packet then free associated resources. The
968 *  tx_buffer is put back on the free queue.
969 *
970 **********************************************************************/
971void
972ixgbe_txeof(struct tx_ring *txr)
973{
974#ifdef DEV_NETMAP
975	struct adapter		*adapter = txr->adapter;
976	struct ifnet		*ifp = adapter->ifp;
977#endif
978	u32			work, processed = 0;
979	u16			limit = txr->process_limit;
980	struct ixgbe_tx_buf	*buf;
981	union ixgbe_adv_tx_desc *txd;
982
983	mtx_assert(&txr->tx_mtx, MA_OWNED);
984
985#ifdef DEV_NETMAP
986	if (ifp->if_capenable & IFCAP_NETMAP) {
987		struct netmap_adapter *na = NA(ifp);
988		struct netmap_kring *kring = &na->tx_rings[txr->me];
989		txd = txr->tx_base;
990		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
991		    BUS_DMASYNC_POSTREAD);
992		/*
993		 * In netmap mode, all the work is done in the context
994		 * of the client thread. Interrupt handlers only wake up
995		 * clients, which may be sleeping on individual rings
996		 * or on a global resource for all rings.
997		 * To implement tx interrupt mitigation, we wake up the client
998		 * thread roughly every half ring, even if the NIC interrupts
999		 * more frequently. This is implemented as follows:
1000		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1001		 *   the slot that should wake up the thread (nkr_num_slots
1002		 *   means the user thread should not be woken up);
1003		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1004		 *   or the slot has the DD bit set.
1005		 */
1006		if (!netmap_mitigate ||
1007		    (kring->nr_kflags < kring->nkr_num_slots &&
1008		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1009			netmap_tx_irq(ifp, txr->me);
1010		}
1011		return;
1012	}
1013#endif /* DEV_NETMAP */
1014
1015	if (txr->tx_avail == txr->num_desc) {
1016		txr->busy = 0;
1017		return;
1018	}
1019
1020	/* Get work starting point */
1021	work = txr->next_to_clean;
1022	buf = &txr->tx_buffers[work];
1023	txd = &txr->tx_base[work];
1024	work -= txr->num_desc; /* The distance to ring end */
1025        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1026            BUS_DMASYNC_POSTREAD);
1027
1028	do {
1029		union ixgbe_adv_tx_desc *eop= buf->eop;
1030		if (eop == NULL) /* No work */
1031			break;
1032
1033		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1034			break;	/* I/O not complete */
1035
1036		if (buf->m_head) {
1037			txr->bytes +=
1038			    buf->m_head->m_pkthdr.len;
1039			bus_dmamap_sync(txr->txtag,
1040			    buf->map,
1041			    BUS_DMASYNC_POSTWRITE);
1042			bus_dmamap_unload(txr->txtag,
1043			    buf->map);
1044			m_freem(buf->m_head);
1045			buf->m_head = NULL;
1046		}
1047		buf->eop = NULL;
1048		++txr->tx_avail;
1049
1050		/* We clean the range if multi segment */
1051		while (txd != eop) {
1052			++txd;
1053			++buf;
1054			++work;
1055			/* wrap the ring? */
1056			if (__predict_false(!work)) {
1057				work -= txr->num_desc;
1058				buf = txr->tx_buffers;
1059				txd = txr->tx_base;
1060			}
1061			if (buf->m_head) {
1062				txr->bytes +=
1063				    buf->m_head->m_pkthdr.len;
1064				bus_dmamap_sync(txr->txtag,
1065				    buf->map,
1066				    BUS_DMASYNC_POSTWRITE);
1067				bus_dmamap_unload(txr->txtag,
1068				    buf->map);
1069				m_freem(buf->m_head);
1070				buf->m_head = NULL;
1071			}
1072			++txr->tx_avail;
1073			buf->eop = NULL;
1074
1075		}
1076		++txr->packets;
1077		++processed;
1078
1079		/* Try the next packet */
1080		++txd;
1081		++buf;
1082		++work;
1083		/* reset with a wrap */
1084		if (__predict_false(!work)) {
1085			work -= txr->num_desc;
1086			buf = txr->tx_buffers;
1087			txd = txr->tx_base;
1088		}
1089		prefetch(txd);
1090	} while (__predict_true(--limit));
1091
1092	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094
1095	work += txr->num_desc;
1096	txr->next_to_clean = work;
1097
1098	/*
1099	** Queue Hang detection, we know there's
1100	** work outstanding or the first return
1101	** would have been taken, so increment busy
1102	** if nothing managed to get cleaned, then
1103	** in local_timer it will be checked and
1104	** marked as HUNG if it exceeds a MAX attempt.
1105	*/
1106	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107		++txr->busy;
1108	/*
1109	** If anything gets cleaned we reset state to 1,
1110	** note this will turn off HUNG if its set.
1111	*/
1112	if (processed)
1113		txr->busy = 1;
1114
1115	if (txr->tx_avail == txr->num_desc)
1116		txr->busy = 0;
1117
1118	return;
1119}
1120
1121
1122#ifdef IXGBE_FDIR
1123/*
1124** This routine parses packet headers so that Flow
1125** Director can make a hashed filter table entry
1126** allowing traffic flows to be identified and kept
1127** on the same cpu.  This would be a performance
1128** hit, but we only do it at IXGBE_FDIR_RATE of
1129** packets.
1130*/
1131static void
1132ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1133{
1134	struct adapter			*adapter = txr->adapter;
1135	struct ix_queue			*que;
1136	struct ip			*ip;
1137	struct tcphdr			*th;
1138	struct udphdr			*uh;
1139	struct ether_vlan_header	*eh;
1140	union ixgbe_atr_hash_dword	input = {.dword = 0};
1141	union ixgbe_atr_hash_dword	common = {.dword = 0};
1142	int  				ehdrlen, ip_hlen;
1143	u16				etype;
1144
1145	eh = mtod(mp, struct ether_vlan_header *);
1146	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1147		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1148		etype = eh->evl_proto;
1149	} else {
1150		ehdrlen = ETHER_HDR_LEN;
1151		etype = eh->evl_encap_proto;
1152	}
1153
1154	/* Only handling IPv4 */
1155	if (etype != htons(ETHERTYPE_IP))
1156		return;
1157
1158	ip = (struct ip *)(mp->m_data + ehdrlen);
1159	ip_hlen = ip->ip_hl << 2;
1160
1161	/* check if we're UDP or TCP */
1162	switch (ip->ip_p) {
1163	case IPPROTO_TCP:
1164		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1165		/* src and dst are inverted */
1166		common.port.dst ^= th->th_sport;
1167		common.port.src ^= th->th_dport;
1168		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1169		break;
1170	case IPPROTO_UDP:
1171		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1172		/* src and dst are inverted */
1173		common.port.dst ^= uh->uh_sport;
1174		common.port.src ^= uh->uh_dport;
1175		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1176		break;
1177	default:
1178		return;
1179	}
1180
1181	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1182	if (mp->m_pkthdr.ether_vtag)
1183		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1184	else
1185		common.flex_bytes ^= etype;
1186	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1187
1188	que = &adapter->queues[txr->me];
1189	/*
1190	** This assumes the Rx queue and Tx
1191	** queue are bound to the same CPU
1192	*/
1193	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1194	    input, common, que->msix);
1195}
1196#endif /* IXGBE_FDIR */
1197
1198/*
1199** Used to detect a descriptor that has
1200** been merged by Hardware RSC.
1201*/
1202static inline u32
1203ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1204{
1205	return (le32toh(rx->wb.lower.lo_dword.data) &
1206	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1207}
1208
1209/*********************************************************************
1210 *
1211 *  Initialize Hardware RSC (LRO) feature on 82599
1212 *  for an RX ring, this is toggled by the LRO capability
1213 *  even though it is transparent to the stack.
1214 *
1215 *  NOTE: since this HW feature only works with IPV4 and
1216 *        our testing has shown soft LRO to be as effective
1217 *        I have decided to disable this by default.
1218 *
1219 **********************************************************************/
1220static void
1221ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1222{
1223	struct	adapter 	*adapter = rxr->adapter;
1224	struct	ixgbe_hw	*hw = &adapter->hw;
1225	u32			rscctrl, rdrxctl;
1226
1227	/* If turning LRO/RSC off we need to disable it */
1228	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1229		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1230		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1231		return;
1232	}
1233
1234	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1235	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1236#ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1237	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1238#endif /* DEV_NETMAP */
1239	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1240	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1241	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1242
1243	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1244	rscctrl |= IXGBE_RSCCTL_RSCEN;
1245	/*
1246	** Limit the total number of descriptors that
1247	** can be combined, so it does not exceed 64K
1248	*/
1249	if (rxr->mbuf_sz == MCLBYTES)
1250		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1251	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1252		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1253	else if (rxr->mbuf_sz == MJUM9BYTES)
1254		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1255	else  /* Using 16K cluster */
1256		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1257
1258	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1259
1260	/* Enable TCP header recognition */
1261	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1262	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1263	    IXGBE_PSRTYPE_TCPHDR));
1264
1265	/* Disable RSC for ACK packets */
1266	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1267	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1268
1269	rxr->hw_rsc = TRUE;
1270}
1271/*********************************************************************
1272 *
1273 *  Refresh mbuf buffers for RX descriptor rings
1274 *   - now keeps its own state so discards due to resource
1275 *     exhaustion are unnecessary, if an mbuf cannot be obtained
1276 *     it just returns, keeping its placeholder, thus it can simply
1277 *     be recalled to try again.
1278 *
1279 **********************************************************************/
1280static void
1281ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1282{
1283	struct adapter		*adapter = rxr->adapter;
1284	bus_dma_segment_t	seg[1];
1285	struct ixgbe_rx_buf	*rxbuf;
1286	struct mbuf		*mp;
1287	int			i, j, nsegs, error;
1288	bool			refreshed = FALSE;
1289
1290	i = j = rxr->next_to_refresh;
1291	/* Control the loop with one beyond */
1292	if (++j == rxr->num_desc)
1293		j = 0;
1294
1295	while (j != limit) {
1296		rxbuf = &rxr->rx_buffers[i];
1297		if (rxbuf->buf == NULL) {
1298			mp = m_getjcl(M_NOWAIT, MT_DATA,
1299			    M_PKTHDR, rxr->mbuf_sz);
1300			if (mp == NULL)
1301				goto update;
1302			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1303				m_adj(mp, ETHER_ALIGN);
1304		} else
1305			mp = rxbuf->buf;
1306
1307		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1308
1309		/* If we're dealing with an mbuf that was copied rather
1310		 * than replaced, there's no need to go through busdma.
1311		 */
1312		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1313			/* Get the memory mapping */
1314			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1315			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1316			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1317			if (error != 0) {
1318				printf("Refresh mbufs: payload dmamap load"
1319				    " failure - %d\n", error);
1320				m_free(mp);
1321				rxbuf->buf = NULL;
1322				goto update;
1323			}
1324			rxbuf->buf = mp;
1325			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1326			    BUS_DMASYNC_PREREAD);
1327			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1328			    htole64(seg[0].ds_addr);
1329		} else {
1330			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1331			rxbuf->flags &= ~IXGBE_RX_COPY;
1332		}
1333
1334		refreshed = TRUE;
1335		/* Next is precalculated */
1336		i = j;
1337		rxr->next_to_refresh = i;
1338		if (++j == rxr->num_desc)
1339			j = 0;
1340	}
1341update:
1342	if (refreshed) /* Update hardware tail index */
1343		IXGBE_WRITE_REG(&adapter->hw,
1344		    rxr->tail, rxr->next_to_refresh);
1345	return;
1346}
1347
1348/*********************************************************************
1349 *
1350 *  Allocate memory for rx_buffer structures. Since we use one
1351 *  rx_buffer per received packet, the maximum number of rx_buffer's
1352 *  that we'll need is equal to the number of receive descriptors
1353 *  that we've allocated.
1354 *
1355 **********************************************************************/
1356int
1357ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1358{
1359	struct	adapter 	*adapter = rxr->adapter;
1360	device_t 		dev = adapter->dev;
1361	struct ixgbe_rx_buf 	*rxbuf;
1362	int             	i, bsize, error;
1363
1364	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1365	if (!(rxr->rx_buffers =
1366	    (struct ixgbe_rx_buf *) malloc(bsize,
1367	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1368		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1369		error = ENOMEM;
1370		goto fail;
1371	}
1372
1373	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1374				   1, 0,	/* alignment, bounds */
1375				   BUS_SPACE_MAXADDR,	/* lowaddr */
1376				   BUS_SPACE_MAXADDR,	/* highaddr */
1377				   NULL, NULL,		/* filter, filterarg */
1378				   MJUM16BYTES,		/* maxsize */
1379				   1,			/* nsegments */
1380				   MJUM16BYTES,		/* maxsegsize */
1381				   0,			/* flags */
1382				   NULL,		/* lockfunc */
1383				   NULL,		/* lockfuncarg */
1384				   &rxr->ptag))) {
1385		device_printf(dev, "Unable to create RX DMA tag\n");
1386		goto fail;
1387	}
1388
1389	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1390		rxbuf = &rxr->rx_buffers[i];
1391		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1392		if (error) {
1393			device_printf(dev, "Unable to create RX dma map\n");
1394			goto fail;
1395		}
1396	}
1397
1398	return (0);
1399
1400fail:
1401	/* Frees all, but can handle partial completion */
1402	ixgbe_free_receive_structures(adapter);
1403	return (error);
1404}
1405
1406
1407static void
1408ixgbe_free_receive_ring(struct rx_ring *rxr)
1409{
1410	struct ixgbe_rx_buf       *rxbuf;
1411	int i;
1412
1413	for (i = 0; i < rxr->num_desc; i++) {
1414		rxbuf = &rxr->rx_buffers[i];
1415		if (rxbuf->buf != NULL) {
1416			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1417			    BUS_DMASYNC_POSTREAD);
1418			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1419			rxbuf->buf->m_flags |= M_PKTHDR;
1420			m_freem(rxbuf->buf);
1421			rxbuf->buf = NULL;
1422			rxbuf->flags = 0;
1423		}
1424	}
1425}
1426
1427
1428/*********************************************************************
1429 *
1430 *  Initialize a receive ring and its buffers.
1431 *
1432 **********************************************************************/
1433static int
1434ixgbe_setup_receive_ring(struct rx_ring *rxr)
1435{
1436	struct	adapter 	*adapter;
1437	struct ifnet		*ifp;
1438	device_t		dev;
1439	struct ixgbe_rx_buf	*rxbuf;
1440	bus_dma_segment_t	seg[1];
1441	struct lro_ctrl		*lro = &rxr->lro;
1442	int			rsize, nsegs, error = 0;
1443#ifdef DEV_NETMAP
1444	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1445	struct netmap_slot *slot;
1446#endif /* DEV_NETMAP */
1447
1448	adapter = rxr->adapter;
1449	ifp = adapter->ifp;
1450	dev = adapter->dev;
1451
1452	/* Clear the ring contents */
1453	IXGBE_RX_LOCK(rxr);
1454#ifdef DEV_NETMAP
1455	/* same as in ixgbe_setup_transmit_ring() */
1456	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1457#endif /* DEV_NETMAP */
1458	rsize = roundup2(adapter->num_rx_desc *
1459	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1460	bzero((void *)rxr->rx_base, rsize);
1461	/* Cache the size */
1462	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1463
1464	/* Free current RX buffer structs and their mbufs */
1465	ixgbe_free_receive_ring(rxr);
1466
1467	/* Now replenish the mbufs */
1468	for (int j = 0; j != rxr->num_desc; ++j) {
1469		struct mbuf	*mp;
1470
1471		rxbuf = &rxr->rx_buffers[j];
1472#ifdef DEV_NETMAP
1473		/*
1474		 * In netmap mode, fill the map and set the buffer
1475		 * address in the NIC ring, considering the offset
1476		 * between the netmap and NIC rings (see comment in
1477		 * ixgbe_setup_transmit_ring() ). No need to allocate
1478		 * an mbuf, so end the block with a continue;
1479		 */
1480		if (slot) {
1481			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1482			uint64_t paddr;
1483			void *addr;
1484
1485			addr = PNMB(na, slot + sj, &paddr);
1486			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1487			/* Update descriptor and the cached value */
1488			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1489			rxbuf->addr = htole64(paddr);
1490			continue;
1491		}
1492#endif /* DEV_NETMAP */
1493		rxbuf->flags = 0;
1494		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1495		    M_PKTHDR, adapter->rx_mbuf_sz);
1496		if (rxbuf->buf == NULL) {
1497			error = ENOBUFS;
1498                        goto fail;
1499		}
1500		mp = rxbuf->buf;
1501		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1502		/* Get the memory mapping */
1503		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1504		    rxbuf->pmap, mp, seg,
1505		    &nsegs, BUS_DMA_NOWAIT);
1506		if (error != 0)
1507                        goto fail;
1508		bus_dmamap_sync(rxr->ptag,
1509		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1510		/* Update the descriptor and the cached value */
1511		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1512		rxbuf->addr = htole64(seg[0].ds_addr);
1513	}
1514
1515
1516	/* Setup our descriptor indices */
1517	rxr->next_to_check = 0;
1518	rxr->next_to_refresh = 0;
1519	rxr->lro_enabled = FALSE;
1520	rxr->rx_copies = 0;
1521	rxr->rx_bytes = 0;
1522	rxr->vtag_strip = FALSE;
1523
1524	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1525	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1526
1527	/*
1528	** Now set up the LRO interface:
1529	*/
1530	if (ixgbe_rsc_enable)
1531		ixgbe_setup_hw_rsc(rxr);
1532	else if (ifp->if_capenable & IFCAP_LRO) {
1533		int err = tcp_lro_init(lro);
1534		if (err) {
1535			device_printf(dev, "LRO Initialization failed!\n");
1536			goto fail;
1537		}
1538		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1539		rxr->lro_enabled = TRUE;
1540		lro->ifp = adapter->ifp;
1541	}
1542
1543	IXGBE_RX_UNLOCK(rxr);
1544	return (0);
1545
1546fail:
1547	ixgbe_free_receive_ring(rxr);
1548	IXGBE_RX_UNLOCK(rxr);
1549	return (error);
1550}
1551
1552/*********************************************************************
1553 *
1554 *  Initialize all receive rings.
1555 *
1556 **********************************************************************/
1557int
1558ixgbe_setup_receive_structures(struct adapter *adapter)
1559{
1560	struct rx_ring *rxr = adapter->rx_rings;
1561	int j;
1562
1563	for (j = 0; j < adapter->num_queues; j++, rxr++)
1564		if (ixgbe_setup_receive_ring(rxr))
1565			goto fail;
1566
1567	return (0);
1568fail:
1569	/*
1570	 * Free RX buffers allocated so far, we will only handle
1571	 * the rings that completed, the failing case will have
1572	 * cleaned up for itself. 'j' failed, so its the terminus.
1573	 */
1574	for (int i = 0; i < j; ++i) {
1575		rxr = &adapter->rx_rings[i];
1576		ixgbe_free_receive_ring(rxr);
1577	}
1578
1579	return (ENOBUFS);
1580}
1581
1582
1583/*********************************************************************
1584 *
1585 *  Free all receive rings.
1586 *
1587 **********************************************************************/
1588void
1589ixgbe_free_receive_structures(struct adapter *adapter)
1590{
1591	struct rx_ring *rxr = adapter->rx_rings;
1592
1593	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1594
1595	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1596		struct lro_ctrl		*lro = &rxr->lro;
1597		ixgbe_free_receive_buffers(rxr);
1598		/* Free LRO memory */
1599		tcp_lro_free(lro);
1600		/* Free the ring memory as well */
1601		ixgbe_dma_free(adapter, &rxr->rxdma);
1602	}
1603
1604	free(adapter->rx_rings, M_DEVBUF);
1605}
1606
1607
1608/*********************************************************************
1609 *
1610 *  Free receive ring data structures
1611 *
1612 **********************************************************************/
1613void
1614ixgbe_free_receive_buffers(struct rx_ring *rxr)
1615{
1616	struct adapter		*adapter = rxr->adapter;
1617	struct ixgbe_rx_buf	*rxbuf;
1618
1619	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1620
1621	/* Cleanup any existing buffers */
1622	if (rxr->rx_buffers != NULL) {
1623		for (int i = 0; i < adapter->num_rx_desc; i++) {
1624			rxbuf = &rxr->rx_buffers[i];
1625			if (rxbuf->buf != NULL) {
1626				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1627				    BUS_DMASYNC_POSTREAD);
1628				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1629				rxbuf->buf->m_flags |= M_PKTHDR;
1630				m_freem(rxbuf->buf);
1631			}
1632			rxbuf->buf = NULL;
1633			if (rxbuf->pmap != NULL) {
1634				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1635				rxbuf->pmap = NULL;
1636			}
1637		}
1638		if (rxr->rx_buffers != NULL) {
1639			free(rxr->rx_buffers, M_DEVBUF);
1640			rxr->rx_buffers = NULL;
1641		}
1642	}
1643
1644	if (rxr->ptag != NULL) {
1645		bus_dma_tag_destroy(rxr->ptag);
1646		rxr->ptag = NULL;
1647	}
1648
1649	return;
1650}
1651
1652static __inline void
1653ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1654{
1655
1656        /*
1657         * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1658         * should be computed by hardware. Also it should not have VLAN tag in
1659         * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1660         */
1661        if (rxr->lro_enabled &&
1662            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1663            (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1664            ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1665            (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1666            (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1667            (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1668            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1669            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1670                /*
1671                 * Send to the stack if:
1672                 **  - LRO not enabled, or
1673                 **  - no LRO resources, or
1674                 **  - lro enqueue fails
1675                 */
1676                if (rxr->lro.lro_cnt != 0)
1677                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1678                                return;
1679        }
1680	IXGBE_RX_UNLOCK(rxr);
1681        (*ifp->if_input)(ifp, m);
1682	IXGBE_RX_LOCK(rxr);
1683}
1684
1685static __inline void
1686ixgbe_rx_discard(struct rx_ring *rxr, int i)
1687{
1688	struct ixgbe_rx_buf	*rbuf;
1689
1690	rbuf = &rxr->rx_buffers[i];
1691
1692
1693	/*
1694	** With advanced descriptors the writeback
1695	** clobbers the buffer addrs, so its easier
1696	** to just free the existing mbufs and take
1697	** the normal refresh path to get new buffers
1698	** and mapping.
1699	*/
1700
1701	if (rbuf->fmp != NULL) {/* Partial chain ? */
1702		rbuf->fmp->m_flags |= M_PKTHDR;
1703		m_freem(rbuf->fmp);
1704		rbuf->fmp = NULL;
1705		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1706	} else if (rbuf->buf) {
1707		m_free(rbuf->buf);
1708		rbuf->buf = NULL;
1709	}
1710	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1711
1712	rbuf->flags = 0;
1713
1714	return;
1715}
1716
1717
1718/*********************************************************************
1719 *
1720 *  This routine executes in interrupt context. It replenishes
1721 *  the mbufs in the descriptor and sends data which has been
1722 *  dma'ed into host memory to upper layer.
1723 *
1724 *  Return TRUE for more work, FALSE for all clean.
1725 *********************************************************************/
1726bool
1727ixgbe_rxeof(struct ix_queue *que)
1728{
1729	struct adapter		*adapter = que->adapter;
1730	struct rx_ring		*rxr = que->rxr;
1731	struct ifnet		*ifp = adapter->ifp;
1732	struct lro_ctrl		*lro = &rxr->lro;
1733	struct lro_entry	*queued;
1734	int			i, nextp, processed = 0;
1735	u32			staterr = 0;
1736	u16			count = rxr->process_limit;
1737	union ixgbe_adv_rx_desc	*cur;
1738	struct ixgbe_rx_buf	*rbuf, *nbuf;
1739	u16			pkt_info;
1740
1741	IXGBE_RX_LOCK(rxr);
1742
1743#ifdef DEV_NETMAP
1744	/* Same as the txeof routine: wakeup clients on intr. */
1745	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1746		IXGBE_RX_UNLOCK(rxr);
1747		return (FALSE);
1748	}
1749#endif /* DEV_NETMAP */
1750
1751	for (i = rxr->next_to_check; count != 0;) {
1752		struct mbuf	*sendmp, *mp;
1753		u32		rsc, ptype;
1754		u16		len;
1755		u16		vtag = 0;
1756		bool		eop;
1757
1758		/* Sync the ring. */
1759		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1760		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1761
1762		cur = &rxr->rx_base[i];
1763		staterr = le32toh(cur->wb.upper.status_error);
1764		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1765
1766		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1767			break;
1768		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1769			break;
1770
1771		count--;
1772		sendmp = NULL;
1773		nbuf = NULL;
1774		rsc = 0;
1775		cur->wb.upper.status_error = 0;
1776		rbuf = &rxr->rx_buffers[i];
1777		mp = rbuf->buf;
1778
1779		len = le16toh(cur->wb.upper.length);
1780		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1781		    IXGBE_RXDADV_PKTTYPE_MASK;
1782		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1783
1784		/* Make sure bad packets are discarded */
1785		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1786#if __FreeBSD_version >= 1100036
1787			if (IXGBE_IS_VF(adapter))
1788				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1789#endif
1790			rxr->rx_discarded++;
1791			ixgbe_rx_discard(rxr, i);
1792			goto next_desc;
1793		}
1794
1795		/*
1796		** On 82599 which supports a hardware
1797		** LRO (called HW RSC), packets need
1798		** not be fragmented across sequential
1799		** descriptors, rather the next descriptor
1800		** is indicated in bits of the descriptor.
1801		** This also means that we might proceses
1802		** more than one packet at a time, something
1803		** that has never been true before, it
1804		** required eliminating global chain pointers
1805		** in favor of what we are doing here.  -jfv
1806		*/
1807		if (!eop) {
1808			/*
1809			** Figure out the next descriptor
1810			** of this frame.
1811			*/
1812			if (rxr->hw_rsc == TRUE) {
1813				rsc = ixgbe_rsc_count(cur);
1814				rxr->rsc_num += (rsc - 1);
1815			}
1816			if (rsc) { /* Get hardware index */
1817				nextp = ((staterr &
1818				    IXGBE_RXDADV_NEXTP_MASK) >>
1819				    IXGBE_RXDADV_NEXTP_SHIFT);
1820			} else { /* Just sequential */
1821				nextp = i + 1;
1822				if (nextp == adapter->num_rx_desc)
1823					nextp = 0;
1824			}
1825			nbuf = &rxr->rx_buffers[nextp];
1826			prefetch(nbuf);
1827		}
1828		/*
1829		** Rather than using the fmp/lmp global pointers
1830		** we now keep the head of a packet chain in the
1831		** buffer struct and pass this along from one
1832		** descriptor to the next, until we get EOP.
1833		*/
1834		mp->m_len = len;
1835		/*
1836		** See if there is a stored head
1837		** that determines what we are
1838		*/
1839		sendmp = rbuf->fmp;
1840		if (sendmp != NULL) {  /* secondary frag */
1841			rbuf->buf = rbuf->fmp = NULL;
1842			mp->m_flags &= ~M_PKTHDR;
1843			sendmp->m_pkthdr.len += mp->m_len;
1844		} else {
1845			/*
1846			 * Optimize.  This might be a small packet,
1847			 * maybe just a TCP ACK.  Do a fast copy that
1848			 * is cache aligned into a new mbuf, and
1849			 * leave the old mbuf+cluster for re-use.
1850			 */
1851			if (eop && len <= IXGBE_RX_COPY_LEN) {
1852				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1853				if (sendmp != NULL) {
1854					sendmp->m_data +=
1855					    IXGBE_RX_COPY_ALIGN;
1856					ixgbe_bcopy(mp->m_data,
1857					    sendmp->m_data, len);
1858					sendmp->m_len = len;
1859					rxr->rx_copies++;
1860					rbuf->flags |= IXGBE_RX_COPY;
1861				}
1862			}
1863			if (sendmp == NULL) {
1864				rbuf->buf = rbuf->fmp = NULL;
1865				sendmp = mp;
1866			}
1867
1868			/* first desc of a non-ps chain */
1869			sendmp->m_flags |= M_PKTHDR;
1870			sendmp->m_pkthdr.len = mp->m_len;
1871		}
1872		++processed;
1873
1874		/* Pass the head pointer on */
1875		if (eop == 0) {
1876			nbuf->fmp = sendmp;
1877			sendmp = NULL;
1878			mp->m_next = nbuf->buf;
1879		} else { /* Sending this frame */
1880			sendmp->m_pkthdr.rcvif = ifp;
1881			rxr->rx_packets++;
1882			/* capture data for AIM */
1883			rxr->bytes += sendmp->m_pkthdr.len;
1884			rxr->rx_bytes += sendmp->m_pkthdr.len;
1885			/* Process vlan info */
1886			if ((rxr->vtag_strip) &&
1887			    (staterr & IXGBE_RXD_STAT_VP))
1888				vtag = le16toh(cur->wb.upper.vlan);
1889			if (vtag) {
1890				sendmp->m_pkthdr.ether_vtag = vtag;
1891				sendmp->m_flags |= M_VLANTAG;
1892			}
1893			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1894				ixgbe_rx_checksum(staterr, sendmp, ptype);
1895#if __FreeBSD_version >= 800000
1896			sendmp->m_pkthdr.flowid = que->msix;
1897#endif /* FreeBSD_version */
1898		}
1899next_desc:
1900		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1901		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1902
1903		/* Advance our pointers to the next descriptor. */
1904		if (++i == rxr->num_desc)
1905			i = 0;
1906
1907		/* Now send to the stack or do LRO */
1908		if (sendmp != NULL) {
1909			rxr->next_to_check = i;
1910			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1911			i = rxr->next_to_check;
1912		}
1913
1914               /* Every 8 descriptors we go to refresh mbufs */
1915		if (processed == 8) {
1916			ixgbe_refresh_mbufs(rxr, i);
1917			processed = 0;
1918		}
1919	}
1920
1921	/* Refresh any remaining buf structs */
1922	if (ixgbe_rx_unrefreshed(rxr))
1923		ixgbe_refresh_mbufs(rxr, i);
1924
1925	rxr->next_to_check = i;
1926
1927	/*
1928	 * Flush any outstanding LRO work
1929	 */
1930	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1931		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1932		tcp_lro_flush(lro, queued);
1933	}
1934
1935	IXGBE_RX_UNLOCK(rxr);
1936
1937	/*
1938	** Still have cleaning to do?
1939	*/
1940	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1941		return (TRUE);
1942	else
1943		return (FALSE);
1944}
1945
1946
1947/*********************************************************************
1948 *
1949 *  Verify that the hardware indicated that the checksum is valid.
1950 *  Inform the stack about the status of checksum so that stack
1951 *  doesn't spend time verifying the checksum.
1952 *
1953 *********************************************************************/
1954static void
1955ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1956{
1957	u16	status = (u16) staterr;
1958	u8	errors = (u8) (staterr >> 24);
1959	bool	sctp = FALSE;
1960
1961	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1962	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1963		sctp = TRUE;
1964
1965	if (status & IXGBE_RXD_STAT_IPCS) {
1966		if (!(errors & IXGBE_RXD_ERR_IPE)) {
1967			/* IP Checksum Good */
1968			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1969			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1970
1971		} else
1972			mp->m_pkthdr.csum_flags = 0;
1973	}
1974	if (status & IXGBE_RXD_STAT_L4CS) {
1975		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1976#if __FreeBSD_version >= 800000
1977		if (sctp)
1978			type = CSUM_SCTP_VALID;
1979#endif
1980		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1981			mp->m_pkthdr.csum_flags |= type;
1982			if (!sctp)
1983				mp->m_pkthdr.csum_data = htons(0xffff);
1984		}
1985	}
1986	return;
1987}
1988
1989/********************************************************************
1990 * Manage DMA'able memory.
1991 *******************************************************************/
1992static void
1993ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1994{
1995	if (error)
1996		return;
1997	*(bus_addr_t *) arg = segs->ds_addr;
1998	return;
1999}
2000
2001int
2002ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2003		struct ixgbe_dma_alloc *dma, int mapflags)
2004{
2005	device_t dev = adapter->dev;
2006	int             r;
2007
2008	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2009			       DBA_ALIGN, 0,	/* alignment, bounds */
2010			       BUS_SPACE_MAXADDR,	/* lowaddr */
2011			       BUS_SPACE_MAXADDR,	/* highaddr */
2012			       NULL, NULL,	/* filter, filterarg */
2013			       size,	/* maxsize */
2014			       1,	/* nsegments */
2015			       size,	/* maxsegsize */
2016			       BUS_DMA_ALLOCNOW,	/* flags */
2017			       NULL,	/* lockfunc */
2018			       NULL,	/* lockfuncarg */
2019			       &dma->dma_tag);
2020	if (r != 0) {
2021		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2022		       "error %u\n", r);
2023		goto fail_0;
2024	}
2025	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2026			     BUS_DMA_NOWAIT, &dma->dma_map);
2027	if (r != 0) {
2028		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2029		       "error %u\n", r);
2030		goto fail_1;
2031	}
2032	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2033			    size,
2034			    ixgbe_dmamap_cb,
2035			    &dma->dma_paddr,
2036			    mapflags | BUS_DMA_NOWAIT);
2037	if (r != 0) {
2038		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2039		       "error %u\n", r);
2040		goto fail_2;
2041	}
2042	dma->dma_size = size;
2043	return (0);
2044fail_2:
2045	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2046fail_1:
2047	bus_dma_tag_destroy(dma->dma_tag);
2048fail_0:
2049	dma->dma_tag = NULL;
2050	return (r);
2051}
2052
2053void
2054ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2055{
2056	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2057	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2058	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2059	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2060	bus_dma_tag_destroy(dma->dma_tag);
2061}
2062
2063
2064/*********************************************************************
2065 *
2066 *  Allocate memory for the transmit and receive rings, and then
2067 *  the descriptors associated with each, called only once at attach.
2068 *
2069 **********************************************************************/
2070int
2071ixgbe_allocate_queues(struct adapter *adapter)
2072{
2073	device_t	dev = adapter->dev;
2074	struct ix_queue	*que;
2075	struct tx_ring	*txr;
2076	struct rx_ring	*rxr;
2077	int rsize, tsize, error = IXGBE_SUCCESS;
2078	int txconf = 0, rxconf = 0;
2079
2080        /* First allocate the top level queue structs */
2081        if (!(adapter->queues =
2082            (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2083            adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2084                device_printf(dev, "Unable to allocate queue memory\n");
2085                error = ENOMEM;
2086                goto fail;
2087        }
2088
2089	/* First allocate the TX ring struct memory */
2090	if (!(adapter->tx_rings =
2091	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2092	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2093		device_printf(dev, "Unable to allocate TX ring memory\n");
2094		error = ENOMEM;
2095		goto tx_fail;
2096	}
2097
2098	/* Next allocate the RX */
2099	if (!(adapter->rx_rings =
2100	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2101	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2102		device_printf(dev, "Unable to allocate RX ring memory\n");
2103		error = ENOMEM;
2104		goto rx_fail;
2105	}
2106
2107	/* For the ring itself */
2108	tsize = roundup2(adapter->num_tx_desc *
2109	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2110
2111	/*
2112	 * Now set up the TX queues, txconf is needed to handle the
2113	 * possibility that things fail midcourse and we need to
2114	 * undo memory gracefully
2115	 */
2116	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2117		/* Set up some basics */
2118		txr = &adapter->tx_rings[i];
2119		txr->adapter = adapter;
2120		txr->me = i;
2121		txr->num_desc = adapter->num_tx_desc;
2122
2123		/* Initialize the TX side lock */
2124		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2125		    device_get_nameunit(dev), txr->me);
2126		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2127
2128		if (ixgbe_dma_malloc(adapter, tsize,
2129			&txr->txdma, BUS_DMA_NOWAIT)) {
2130			device_printf(dev,
2131			    "Unable to allocate TX Descriptor memory\n");
2132			error = ENOMEM;
2133			goto err_tx_desc;
2134		}
2135		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2136		bzero((void *)txr->tx_base, tsize);
2137
2138        	/* Now allocate transmit buffers for the ring */
2139        	if (ixgbe_allocate_transmit_buffers(txr)) {
2140			device_printf(dev,
2141			    "Critical Failure setting up transmit buffers\n");
2142			error = ENOMEM;
2143			goto err_tx_desc;
2144        	}
2145#ifndef IXGBE_LEGACY_TX
2146		/* Allocate a buf ring */
2147		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2148		    M_WAITOK, &txr->tx_mtx);
2149		if (txr->br == NULL) {
2150			device_printf(dev,
2151			    "Critical Failure setting up buf ring\n");
2152			error = ENOMEM;
2153			goto err_tx_desc;
2154        	}
2155#endif
2156	}
2157
2158	/*
2159	 * Next the RX queues...
2160	 */
2161	rsize = roundup2(adapter->num_rx_desc *
2162	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2163	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2164		rxr = &adapter->rx_rings[i];
2165		/* Set up some basics */
2166		rxr->adapter = adapter;
2167		rxr->me = i;
2168		rxr->num_desc = adapter->num_rx_desc;
2169
2170		/* Initialize the RX side lock */
2171		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2172		    device_get_nameunit(dev), rxr->me);
2173		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2174
2175		if (ixgbe_dma_malloc(adapter, rsize,
2176			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2177			device_printf(dev,
2178			    "Unable to allocate RxDescriptor memory\n");
2179			error = ENOMEM;
2180			goto err_rx_desc;
2181		}
2182		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2183		bzero((void *)rxr->rx_base, rsize);
2184
2185        	/* Allocate receive buffers for the ring*/
2186		if (ixgbe_allocate_receive_buffers(rxr)) {
2187			device_printf(dev,
2188			    "Critical Failure setting up receive buffers\n");
2189			error = ENOMEM;
2190			goto err_rx_desc;
2191		}
2192	}
2193
2194	/*
2195	** Finally set up the queue holding structs
2196	*/
2197	for (int i = 0; i < adapter->num_queues; i++) {
2198		que = &adapter->queues[i];
2199		que->adapter = adapter;
2200		que->me = i;
2201		que->txr = &adapter->tx_rings[i];
2202		que->rxr = &adapter->rx_rings[i];
2203	}
2204
2205	return (0);
2206
2207err_rx_desc:
2208	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2209		ixgbe_dma_free(adapter, &rxr->rxdma);
2210err_tx_desc:
2211	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2212		ixgbe_dma_free(adapter, &txr->txdma);
2213	free(adapter->rx_rings, M_DEVBUF);
2214rx_fail:
2215	free(adapter->tx_rings, M_DEVBUF);
2216tx_fail:
2217	free(adapter->queues, M_DEVBUF);
2218fail:
2219	return (error);
2220}
2221