ixv_txrx.c revision 315333
1/******************************************************************************
2
3  Copyright (c) 2001-2017, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/ixgbe/ixv_txrx.c 315333 2017-03-15 21:20:17Z erj $*/
34
35
36#ifndef IXGBE_STANDALONE_BUILD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include "ixv.h"
42
43extern int ix_crcstrip;
44
45/*
46 * HW RSC control:
47 *  this feature only works with
48 *  IPv4, and only on 82599 and later.
49 *  Also this will cause IP forwarding to
50 *  fail and that can't be controlled by
51 *  the stack as LRO can. For all these
52 *  reasons I've deemed it best to leave
53 *  this off and not bother with a tuneable
54 *  interface, this would need to be compiled
55 *  to enable.
56 */
57static bool ixgbe_rsc_enable = FALSE;
58
59/************************************************************************
60 *  Local Function prototypes
61 ************************************************************************/
62static void          ixgbe_setup_transmit_ring(struct tx_ring *);
63static void          ixgbe_free_transmit_buffers(struct tx_ring *);
64static int           ixgbe_setup_receive_ring(struct rx_ring *);
65static void          ixgbe_free_receive_buffers(struct rx_ring *);
66static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
67static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
68static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
69static int           ixgbe_tx_ctx_setup(struct tx_ring *,
70                                        struct mbuf *, u32 *, u32 *);
71static int           ixgbe_tso_setup(struct tx_ring *,
72                                     struct mbuf *, u32 *, u32 *);
73static __inline void ixgbe_rx_discard(struct rx_ring *, int);
74static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
75                                    struct mbuf *, u32);
76static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
77                                      struct ixgbe_dma_alloc *, int);
78static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
79
80MALLOC_DECLARE(M_IXV);
81
82/************************************************************************
83 * ixv_legacy_start_locked - Transmit entry point
84 *
85 *   Called by the stack to initiate a transmit.
86 *   The driver will remain in this routine as long as there are
87 *   packets to transmit and transmit resources are available.
88 *   In case resources are not available, the stack is notified
89 *   and the packet is requeued.
90 ************************************************************************/
91int
92ixv_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
93{
94	struct mbuf    *m_head;
95	struct adapter *adapter = txr->adapter;
96
97	IXGBE_TX_LOCK_ASSERT(txr);
98
99	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
100		return (ENETDOWN);
101	if (!adapter->link_active)
102		return (ENETDOWN);
103
104	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
105		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
106			break;
107
108		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
109		if (m_head == NULL)
110			break;
111
112		if (ixgbe_xmit(txr, &m_head)) {
113			if (m_head != NULL)
114				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
115			break;
116		}
117		/* Send a copy of the frame to the BPF listener */
118		ETHER_BPF_MTAP(ifp, m_head);
119	}
120
121	return IXGBE_SUCCESS;
122} /* ixv_legacy_start_locked */
123
124/************************************************************************
125 * ixv_legacy_start
126 *
127 *   Called by the stack, this always uses the first tx ring,
128 *   and should not be used with multiqueue tx enabled.
129 ************************************************************************/
130void
131ixv_legacy_start(struct ifnet *ifp)
132{
133	struct adapter *adapter = ifp->if_softc;
134	struct tx_ring *txr = adapter->tx_rings;
135
136	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
137		IXGBE_TX_LOCK(txr);
138		ixv_legacy_start_locked(ifp, txr);
139		IXGBE_TX_UNLOCK(txr);
140	}
141} /* ixv_legacy_start */
142
143/************************************************************************
144 * ixv_mq_start - Multiqueue Transmit Entry Point
145 *
146 *   (if_transmit function)
147 ************************************************************************/
148int
149ixv_mq_start(struct ifnet *ifp, struct mbuf *m)
150{
151	struct adapter  *adapter = ifp->if_softc;
152	struct ix_queue *que;
153	struct tx_ring  *txr;
154	int             i, err = 0;
155	uint32_t        bucket_id;
156
157	/*
158	 * When doing RSS, map it to the same outbound queue
159	 * as the incoming flow would be mapped to.
160	 *
161	 * If everything is setup correctly, it should be the
162	 * same bucket that the current CPU we're on is.
163	 */
164	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
165		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
166		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
167		    &bucket_id) == 0)) {
168			i = bucket_id % adapter->num_queues;
169#ifdef IXGBE_DEBUG
170			if (bucket_id > adapter->num_queues)
171				if_printf(ifp,
172				    "bucket_id (%d) > num_queues (%d)\n",
173				    bucket_id, adapter->num_queues);
174#endif
175		} else
176			i = m->m_pkthdr.flowid % adapter->num_queues;
177	} else
178		i = curcpu % adapter->num_queues;
179
180	/* Check for a hung queue and pick alternative */
181	if (((1 << i) & adapter->active_queues) == 0)
182		i = ffsl(adapter->active_queues);
183
184	txr = &adapter->tx_rings[i];
185	que = &adapter->queues[i];
186
187	err = drbr_enqueue(ifp, txr->br, m);
188	if (err)
189		return (err);
190	if (IXGBE_TX_TRYLOCK(txr)) {
191		ixv_mq_start_locked(ifp, txr);
192		IXGBE_TX_UNLOCK(txr);
193	} else
194		taskqueue_enqueue(que->tq, &txr->txq_task);
195
196	return (0);
197} /* ixv_mq_start */
198
199/************************************************************************
200 * ixv_mq_start_locked
201 ************************************************************************/
202int
203ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
204{
205	struct mbuf    *next;
206	int            enqueued = 0, err = 0;
207
208	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
209		return (ENETDOWN);
210	if (!txr->adapter->link_active)
211		return (ENETDOWN);
212
213	/* Process the queue */
214	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
215		err = ixgbe_xmit(txr, &next);
216		if (err != 0) {
217			if (next == NULL)
218				drbr_advance(ifp, txr->br);
219			else
220				drbr_putback(ifp, txr->br, next);
221			break;
222		}
223		drbr_advance(ifp, txr->br);
224		enqueued++;
225#if __FreeBSD_version >= 1100036
226		/*
227		 * Since we're looking at the tx ring, we can check
228		 * to see if we're a VF by examing our tail register
229		 * address.
230		 */
231		if (next->m_flags & M_MCAST)
232			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
233#endif
234		/* Send a copy of the frame to the BPF listener */
235		ETHER_BPF_MTAP(ifp, next);
236		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
237			break;
238	}
239
240	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
241		ixv_txeof(txr);
242
243	return (err);
244} /* ixv_mq_start_locked */
245
246/************************************************************************
247 * ixv_deferred_mq_start
248 *
249 *   Called from a taskqueue to drain queued transmit packets.
250 ************************************************************************/
251void
252ixv_deferred_mq_start(void *arg, int pending)
253{
254	struct tx_ring *txr = arg;
255	struct adapter *adapter = txr->adapter;
256	struct ifnet   *ifp = adapter->ifp;
257
258	IXGBE_TX_LOCK(txr);
259	if (!drbr_empty(ifp, txr->br))
260		ixv_mq_start_locked(ifp, txr);
261	IXGBE_TX_UNLOCK(txr);
262} /* ixv_deferred_mq_start */
263
264/************************************************************************
265 * ixv_qflush - Flush all ring buffers
266 ************************************************************************/
267void
268ixv_qflush(struct ifnet *ifp)
269{
270	struct adapter *adapter = ifp->if_softc;
271	struct tx_ring *txr = adapter->tx_rings;
272	struct mbuf    *m;
273
274	for (int i = 0; i < adapter->num_queues; i++, txr++) {
275		IXGBE_TX_LOCK(txr);
276		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
277			m_freem(m);
278		IXGBE_TX_UNLOCK(txr);
279	}
280	if_qflush(ifp);
281} /* ixv_qflush */
282
283
284/************************************************************************
285 * ixgbe_xmit
286 *
287 *   This routine maps the mbufs to tx descriptors, allowing the
288 *   TX engine to transmit the packets.
289 *
290 *   Return 0 on success, positive on failure
291 ************************************************************************/
292static int
293ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
294{
295	struct adapter          *adapter = txr->adapter;
296	struct ixgbe_tx_buf     *txbuf;
297	union ixgbe_adv_tx_desc *txd = NULL;
298	struct mbuf             *m_head;
299	int                     i, j, error, nsegs;
300	int                     first;
301	u32                     olinfo_status = 0, cmd_type_len;
302	bool                    remap = TRUE;
303	bus_dma_segment_t       segs[adapter->num_segs];
304	bus_dmamap_t            map;
305
306	m_head = *m_headp;
307
308	/* Basic descriptor defines */
309	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
310	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
311
312	if (m_head->m_flags & M_VLANTAG)
313		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
314
315	/*
316	 * Important to capture the first descriptor
317	 * used because it will contain the index of
318	 * the one we tell the hardware to report back
319	 */
320	first = txr->next_avail_desc;
321	txbuf = &txr->tx_buffers[first];
322	map = txbuf->map;
323
324	/*
325	 * Map the packet for DMA.
326	 */
327retry:
328	error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
329	    &nsegs, BUS_DMA_NOWAIT);
330
331	if (__predict_false(error)) {
332		struct mbuf *m;
333
334		switch (error) {
335		case EFBIG:
336			/* Try it again? - one try */
337			if (remap == TRUE) {
338				remap = FALSE;
339				/*
340				 * XXX: m_defrag will choke on
341				 * non-MCLBYTES-sized clusters
342				 */
343				m = m_defrag(*m_headp, M_NOWAIT);
344				if (m == NULL) {
345					adapter->mbuf_defrag_failed++;
346					m_freem(*m_headp);
347					*m_headp = NULL;
348					return (ENOBUFS);
349				}
350				*m_headp = m;
351				goto retry;
352			} else
353				return (error);
354		case ENOMEM:
355			txr->no_tx_dma_setup++;
356			return (error);
357		default:
358			txr->no_tx_dma_setup++;
359			m_freem(*m_headp);
360			*m_headp = NULL;
361			return (error);
362		}
363	}
364
365	/* Make certain there are enough descriptors */
366	if (txr->tx_avail < (nsegs + 2)) {
367		txr->no_desc_avail++;
368		bus_dmamap_unload(txr->txtag, map);
369		return (ENOBUFS);
370	}
371	m_head = *m_headp;
372
373	/*
374	 * Set up the appropriate offload context
375	 * this will consume the first descriptor
376	 */
377	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
378	if (__predict_false(error)) {
379		if (error == ENOBUFS)
380			*m_headp = NULL;
381		return (error);
382	}
383
384	olinfo_status |= IXGBE_ADVTXD_CC;
385	i = txr->next_avail_desc;
386	for (j = 0; j < nsegs; j++) {
387		bus_size_t seglen;
388		bus_addr_t segaddr;
389
390		txbuf = &txr->tx_buffers[i];
391		txd = &txr->tx_base[i];
392		seglen = segs[j].ds_len;
393		segaddr = htole64(segs[j].ds_addr);
394
395		txd->read.buffer_addr = segaddr;
396		txd->read.cmd_type_len = htole32(txr->txd_cmd |
397		    cmd_type_len | seglen);
398		txd->read.olinfo_status = htole32(olinfo_status);
399
400		if (++i == txr->num_desc)
401			i = 0;
402	}
403
404	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
405	txr->tx_avail -= nsegs;
406	txr->next_avail_desc = i;
407
408	txbuf->m_head = m_head;
409	/*
410	 * Here we swap the map so the last descriptor,
411	 * which gets the completion interrupt has the
412	 * real map, and the first descriptor gets the
413	 * unused map from this descriptor.
414	 */
415	txr->tx_buffers[first].map = txbuf->map;
416	txbuf->map = map;
417	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
418
419	/* Set the EOP descriptor that will be marked done */
420	txbuf = &txr->tx_buffers[first];
421	txbuf->eop = txd;
422
423	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
424	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
425	/*
426	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
427	 * hardware that this frame is available to transmit.
428	 */
429	++txr->total_packets;
430	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
431
432	/* Mark queue as having work */
433	if (txr->busy == 0)
434		txr->busy = 1;
435
436	return (0);
437} /* ixgbe_xmit */
438
439
440/************************************************************************
441 * ixgbe_allocate_transmit_buffers
442 *
443 *   Allocate memory for tx_buffer structures. The tx_buffer stores all
444 *   the information needed to transmit a packet on the wire. This is
445 *   called only once at attach, setup is done every reset.
446 ************************************************************************/
447static int
448ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
449{
450	struct adapter      *adapter = txr->adapter;
451	device_t            dev = adapter->dev;
452	struct ixgbe_tx_buf *txbuf;
453	int                 error, i;
454
455	/*
456	 * Setup DMA descriptor areas.
457	 */
458	error = bus_dma_tag_create(
459	         /*      parent */ bus_get_dma_tag(adapter->dev),
460	         /*   alignment */ 1,
461	         /*      bounds */ 0,
462	         /*     lowaddr */ BUS_SPACE_MAXADDR,
463	         /*    highaddr */ BUS_SPACE_MAXADDR,
464	         /*      filter */ NULL,
465	         /*   filterarg */ NULL,
466	         /*     maxsize */ IXGBE_TSO_SIZE,
467	         /*   nsegments */ adapter->num_segs,
468	         /*  maxsegsize */ PAGE_SIZE,
469	         /*       flags */ 0,
470	         /*    lockfunc */ NULL,
471	         /* lockfuncarg */ NULL,
472	                           &txr->txtag);
473	if (error) {
474		device_printf(dev, "Unable to allocate TX DMA tag\n");
475		goto fail;
476	}
477
478	txr->tx_buffers =
479	    (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
480	    adapter->num_tx_desc, M_IXV, M_NOWAIT | M_ZERO);
481	if (!txr->tx_buffers) {
482		device_printf(dev, "Unable to allocate tx_buffer memory\n");
483		error = ENOMEM;
484		goto fail;
485	}
486
487	/* Create the descriptor buffer dma maps */
488	txbuf = txr->tx_buffers;
489	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
490		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
491		if (error != 0) {
492			device_printf(dev, "Unable to create TX DMA map\n");
493			goto fail;
494		}
495	}
496
497	return 0;
498fail:
499	/* We free all, it handles case where we are in the middle */
500	ixv_free_transmit_structures(adapter);
501
502	return (error);
503} /* ixgbe_allocate_transmit_buffers */
504
505/************************************************************************
506 *
507 *  Initialize a transmit ring.
508 *
509 ************************************************************************/
510static void
511ixgbe_setup_transmit_ring(struct tx_ring *txr)
512{
513	struct adapter        *adapter = txr->adapter;
514	struct ixgbe_tx_buf   *txbuf;
515#ifdef DEV_NETMAP
516	struct netmap_adapter *na = NA(adapter->ifp);
517	struct netmap_slot    *slot;
518#endif /* DEV_NETMAP */
519
520	/* Clear the old ring contents */
521	IXGBE_TX_LOCK(txr);
522
523#ifdef DEV_NETMAP
524	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
525		/*
526		 * (under lock): if in netmap mode, do some consistency
527		 * checks and set slot to entry 0 of the netmap ring.
528		 */
529		slot = netmap_reset(na, NR_TX, txr->me, 0);
530	}
531#endif /* DEV_NETMAP */
532
533	bzero((void *)txr->tx_base,
534	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
535	/* Reset indices */
536	txr->next_avail_desc = 0;
537	txr->next_to_clean = 0;
538
539	/* Free any existing tx buffers. */
540	txbuf = txr->tx_buffers;
541	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
542		if (txbuf->m_head != NULL) {
543			bus_dmamap_sync(txr->txtag, txbuf->map,
544			    BUS_DMASYNC_POSTWRITE);
545			bus_dmamap_unload(txr->txtag, txbuf->map);
546			m_freem(txbuf->m_head);
547			txbuf->m_head = NULL;
548		}
549
550#ifdef DEV_NETMAP
551		/*
552		 * In netmap mode, set the map for the packet buffer.
553		 * NOTE: Some drivers (not this one) also need to set
554		 * the physical buffer address in the NIC ring.
555		 * Slots in the netmap ring (indexed by "si") are
556		 * kring->nkr_hwofs positions "ahead" wrt the
557		 * corresponding slot in the NIC ring. In some drivers
558		 * (not here) nkr_hwofs can be negative. Function
559		 * netmap_idx_n2k() handles wraparounds properly.
560		 */
561		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
562			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
563			netmap_load_map(na, txr->txtag,
564			    txbuf->map, NMB(na, slot + si));
565		}
566#endif /* DEV_NETMAP */
567
568		/* Clear the EOP descriptor pointer */
569		txbuf->eop = NULL;
570	}
571
572	/* Set number of descriptors available */
573	txr->tx_avail = adapter->num_tx_desc;
574
575	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
576	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
577	IXGBE_TX_UNLOCK(txr);
578} /* ixgbe_setup_transmit_ring */
579
580/************************************************************************
581 * ixv_setup_transmit_structures - Initialize all transmit rings.
582 ************************************************************************/
583int
584ixv_setup_transmit_structures(struct adapter *adapter)
585{
586	struct tx_ring *txr = adapter->tx_rings;
587
588	for (int i = 0; i < adapter->num_queues; i++, txr++)
589		ixgbe_setup_transmit_ring(txr);
590
591	return (0);
592} /* ixv_setup_transmit_structures */
593
594/************************************************************************
595 * ixv_free_transmit_structures - Free all transmit rings.
596 ************************************************************************/
597void
598ixv_free_transmit_structures(struct adapter *adapter)
599{
600	struct tx_ring *txr = adapter->tx_rings;
601
602	for (int i = 0; i < adapter->num_queues; i++, txr++) {
603		IXGBE_TX_LOCK(txr);
604		ixgbe_free_transmit_buffers(txr);
605		ixgbe_dma_free(adapter, &txr->txdma);
606		IXGBE_TX_UNLOCK(txr);
607		IXGBE_TX_LOCK_DESTROY(txr);
608	}
609	free(adapter->tx_rings, M_IXV);
610} /* ixv_free_transmit_structures */
611
612/************************************************************************
613 * ixgbe_free_transmit_buffers
614 *
615 *   Free transmit ring related data structures.
616 ************************************************************************/
617static void
618ixgbe_free_transmit_buffers(struct tx_ring *txr)
619{
620	struct adapter      *adapter = txr->adapter;
621	struct ixgbe_tx_buf *tx_buffer;
622	int                 i;
623
624	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
625
626	if (txr->tx_buffers == NULL)
627		return;
628
629	tx_buffer = txr->tx_buffers;
630	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
631		if (tx_buffer->m_head != NULL) {
632			bus_dmamap_sync(txr->txtag, tx_buffer->map,
633			    BUS_DMASYNC_POSTWRITE);
634			bus_dmamap_unload(txr->txtag, tx_buffer->map);
635			m_freem(tx_buffer->m_head);
636			tx_buffer->m_head = NULL;
637			if (tx_buffer->map != NULL) {
638				bus_dmamap_destroy(txr->txtag, tx_buffer->map);
639				tx_buffer->map = NULL;
640			}
641		} else if (tx_buffer->map != NULL) {
642			bus_dmamap_unload(txr->txtag, tx_buffer->map);
643			bus_dmamap_destroy(txr->txtag, tx_buffer->map);
644			tx_buffer->map = NULL;
645		}
646	}
647	if (txr->br != NULL)
648		buf_ring_free(txr->br, M_IXV);
649	if (txr->tx_buffers != NULL) {
650		free(txr->tx_buffers, M_IXV);
651		txr->tx_buffers = NULL;
652	}
653	if (txr->txtag != NULL) {
654		bus_dma_tag_destroy(txr->txtag);
655		txr->txtag = NULL;
656	}
657} /* ixgbe_free_transmit_buffers */
658
659/************************************************************************
660 * ixgbe_tx_ctx_setup
661 *
662 *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
663 ************************************************************************/
664static int
665ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
666    u32 *cmd_type_len, u32 *olinfo_status)
667{
668	struct ixgbe_adv_tx_context_desc *TXD;
669	struct ether_vlan_header         *eh;
670#ifdef INET
671	struct ip                        *ip;
672#endif
673#ifdef INET6
674	struct ip6_hdr                   *ip6;
675#endif
676	int                              ehdrlen, ip_hlen = 0;
677	int                              offload = TRUE;
678	int                              ctxd = txr->next_avail_desc;
679	u32                              vlan_macip_lens = 0;
680	u32                              type_tucmd_mlhl = 0;
681	u16                              vtag = 0;
682	u16                              etype;
683	u8                               ipproto = 0;
684	caddr_t                          l3d;
685
686
687	/* First check if TSO is to be used */
688	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
689		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
690
691	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
692		offload = FALSE;
693
694	/* Indicate the whole packet as payload when not doing TSO */
695	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
696
697	/* Now ready a context descriptor */
698	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
699
700	/*
701	 * In advanced descriptors the vlan tag must
702	 * be placed into the context descriptor. Hence
703	 * we need to make one even if not doing offloads.
704	 */
705	if (mp->m_flags & M_VLANTAG) {
706		vtag = htole16(mp->m_pkthdr.ether_vtag);
707		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
708	} else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
709		return (0);
710
711	/*
712	 * Determine where frame payload starts.
713	 * Jump over vlan headers if already present,
714	 * helpful for QinQ too.
715	 */
716	eh = mtod(mp, struct ether_vlan_header *);
717	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
718		etype = ntohs(eh->evl_proto);
719		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
720	} else {
721		etype = ntohs(eh->evl_encap_proto);
722		ehdrlen = ETHER_HDR_LEN;
723	}
724
725	/* Set the ether header length */
726	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
727
728	if (offload == FALSE)
729		goto no_offloads;
730
731	/*
732	 * If the first mbuf only includes the ethernet header,
733	 * jump to the next one
734	 * XXX: This assumes the stack splits mbufs containing headers
735	 *      on header boundaries
736	 * XXX: And assumes the entire IP header is contained in one mbuf
737	 */
738	if (mp->m_len == ehdrlen && mp->m_next)
739		l3d = mtod(mp->m_next, caddr_t);
740	else
741		l3d = mtod(mp, caddr_t) + ehdrlen;
742
743	switch (etype) {
744#ifdef INET
745		case ETHERTYPE_IP:
746			ip = (struct ip *)(l3d);
747			ip_hlen = ip->ip_hl << 2;
748			ipproto = ip->ip_p;
749			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
750			/* Insert IPv4 checksum into data descriptors */
751			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
752				ip->ip_sum = 0;
753				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
754			}
755			break;
756#endif
757#ifdef INET6
758		case ETHERTYPE_IPV6:
759			ip6 = (struct ip6_hdr *)(l3d);
760			ip_hlen = sizeof(struct ip6_hdr);
761			ipproto = ip6->ip6_nxt;
762			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
763			break;
764#endif
765		default:
766			offload = FALSE;
767			break;
768	}
769
770	vlan_macip_lens |= ip_hlen;
771
772	/* No support for offloads for non-L4 next headers */
773	switch (ipproto) {
774		case IPPROTO_TCP:
775			if (mp->m_pkthdr.csum_flags &
776			    (CSUM_IP_TCP | CSUM_IP6_TCP))
777				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
778			else
779				offload = false;
780			break;
781		case IPPROTO_UDP:
782			if (mp->m_pkthdr.csum_flags &
783			    (CSUM_IP_UDP | CSUM_IP6_UDP))
784				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
785			else
786				offload = false;
787			break;
788		case IPPROTO_SCTP:
789			if (mp->m_pkthdr.csum_flags &
790			    (CSUM_IP_SCTP | CSUM_IP6_SCTP))
791				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
792			else
793				offload = false;
794			break;
795		default:
796			offload = false;
797			break;
798	}
799
800	if (offload) /* Insert L4 checksum into data descriptors */
801		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
802
803no_offloads:
804	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
805
806	/* Now copy bits into descriptor */
807	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
808	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
809	TXD->seqnum_seed = htole32(0);
810	TXD->mss_l4len_idx = htole32(0);
811
812	/* We've consumed the first desc, adjust counters */
813	if (++ctxd == txr->num_desc)
814		ctxd = 0;
815	txr->next_avail_desc = ctxd;
816	--txr->tx_avail;
817
818	return (0);
819} /* ixgbe_tx_ctx_setup */
820
821/************************************************************************
822 * ixgbe_tso_setup
823 *
824 *   Setup work for hardware segmentation offload (TSO) on
825 *   adapters using advanced tx descriptors
826 ************************************************************************/
827static int
828ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
829    u32 *olinfo_status)
830{
831	struct ixgbe_adv_tx_context_desc *TXD;
832	struct ether_vlan_header         *eh;
833#ifdef INET6
834	struct ip6_hdr                   *ip6;
835#endif
836#ifdef INET
837	struct ip                        *ip;
838#endif
839	struct tcphdr                    *th;
840	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
841	u32                              vlan_macip_lens = 0;
842	u32                              type_tucmd_mlhl = 0;
843	u32                              mss_l4len_idx = 0, paylen;
844	u16                              vtag = 0, eh_type;
845
846	/*
847	 * Determine where frame payload starts.
848	 * Jump over vlan headers if already present
849	 */
850	eh = mtod(mp, struct ether_vlan_header *);
851	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
852		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
853		eh_type = eh->evl_proto;
854	} else {
855		ehdrlen = ETHER_HDR_LEN;
856		eh_type = eh->evl_encap_proto;
857	}
858
859	switch (ntohs(eh_type)) {
860#ifdef INET
861	case ETHERTYPE_IP:
862		ip = (struct ip *)(mp->m_data + ehdrlen);
863		if (ip->ip_p != IPPROTO_TCP)
864			return (ENXIO);
865		ip->ip_sum = 0;
866		ip_hlen = ip->ip_hl << 2;
867		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
868		th->th_sum = in_pseudo(ip->ip_src.s_addr,
869		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
870		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
871		/* Tell transmit desc to also do IPv4 checksum. */
872		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
873		break;
874#endif
875#ifdef INET6
876	case ETHERTYPE_IPV6:
877		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
878		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
879		if (ip6->ip6_nxt != IPPROTO_TCP)
880			return (ENXIO);
881		ip_hlen = sizeof(struct ip6_hdr);
882		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
883		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
884		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
885		break;
886#endif
887	default:
888		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
889		    __func__, ntohs(eh_type));
890		break;
891	}
892
893	ctxd = txr->next_avail_desc;
894	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
895
896	tcp_hlen = th->th_off << 2;
897
898	/* This is used in the transmit desc in encap */
899	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
900
901	/* VLAN MACLEN IPLEN */
902	if (mp->m_flags & M_VLANTAG) {
903		vtag = htole16(mp->m_pkthdr.ether_vtag);
904		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
905	}
906
907	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
908	vlan_macip_lens |= ip_hlen;
909	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
910
911	/* ADV DTYPE TUCMD */
912	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
913	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
914	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
915
916	/* MSS L4LEN IDX */
917	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
918	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
919	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
920
921	TXD->seqnum_seed = htole32(0);
922
923	if (++ctxd == txr->num_desc)
924		ctxd = 0;
925
926	txr->tx_avail--;
927	txr->next_avail_desc = ctxd;
928	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
929	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
930	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
931	++txr->tso_tx;
932
933	return (0);
934} /* ixgbe_tso_setup */
935
936
937/************************************************************************
938 * ixv_txeof
939 *
940 *   Examine each tx_buffer in the used queue. If the hardware is done
941 *   processing the packet then free associated resources. The
942 *   tx_buffer is put back on the free queue.
943 ************************************************************************/
944void
945ixv_txeof(struct tx_ring *txr)
946{
947	struct adapter          *adapter = txr->adapter;
948	struct ixgbe_tx_buf     *buf;
949	union ixgbe_adv_tx_desc *txd;
950	u32                     work, processed = 0;
951	u32                     limit = adapter->tx_process_limit;
952
953	mtx_assert(&txr->tx_mtx, MA_OWNED);
954
955#ifdef DEV_NETMAP
956	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
957	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
958		struct netmap_adapter *na = NA(adapter->ifp);
959		struct netmap_kring *kring = &na->tx_rings[txr->me];
960		txd = txr->tx_base;
961		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
962		    BUS_DMASYNC_POSTREAD);
963		/*
964		 * In netmap mode, all the work is done in the context
965		 * of the client thread. Interrupt handlers only wake up
966		 * clients, which may be sleeping on individual rings
967		 * or on a global resource for all rings.
968		 * To implement tx interrupt mitigation, we wake up the client
969		 * thread roughly every half ring, even if the NIC interrupts
970		 * more frequently. This is implemented as follows:
971		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
972		 *   the slot that should wake up the thread (nkr_num_slots
973		 *   means the user thread should not be woken up);
974		 * - the driver ignores tx interrupts unless netmap_mitigate=0
975		 *   or the slot has the DD bit set.
976		 */
977		if (!netmap_mitigate ||
978		    (kring->nr_kflags < kring->nkr_num_slots &&
979		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
980			netmap_tx_irq(adapter->ifp, txr->me);
981		}
982		return;
983	}
984#endif /* DEV_NETMAP */
985
986	if (txr->tx_avail == txr->num_desc) {
987		txr->busy = 0;
988		return;
989	}
990
991	/* Get work starting point */
992	work = txr->next_to_clean;
993	buf = &txr->tx_buffers[work];
994	txd = &txr->tx_base[work];
995	work -= txr->num_desc; /* The distance to ring end */
996	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997	    BUS_DMASYNC_POSTREAD);
998
999	do {
1000		union ixgbe_adv_tx_desc *eop = buf->eop;
1001		if (eop == NULL) /* No work */
1002			break;
1003
1004		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1005			break;	/* I/O not complete */
1006
1007		if (buf->m_head) {
1008			txr->bytes += buf->m_head->m_pkthdr.len;
1009			bus_dmamap_sync(txr->txtag, buf->map,
1010			    BUS_DMASYNC_POSTWRITE);
1011			bus_dmamap_unload(txr->txtag, buf->map);
1012			m_freem(buf->m_head);
1013			buf->m_head = NULL;
1014		}
1015		buf->eop = NULL;
1016		++txr->tx_avail;
1017
1018		/* We clean the range if multi segment */
1019		while (txd != eop) {
1020			++txd;
1021			++buf;
1022			++work;
1023			/* wrap the ring? */
1024			if (__predict_false(!work)) {
1025				work -= txr->num_desc;
1026				buf = txr->tx_buffers;
1027				txd = txr->tx_base;
1028			}
1029			if (buf->m_head) {
1030				txr->bytes += buf->m_head->m_pkthdr.len;
1031				bus_dmamap_sync(txr->txtag, buf->map,
1032				    BUS_DMASYNC_POSTWRITE);
1033				bus_dmamap_unload(txr->txtag, buf->map);
1034				m_freem(buf->m_head);
1035				buf->m_head = NULL;
1036			}
1037			++txr->tx_avail;
1038			buf->eop = NULL;
1039
1040		}
1041		++txr->packets;
1042		++processed;
1043
1044		/* Try the next packet */
1045		++txd;
1046		++buf;
1047		++work;
1048		/* reset with a wrap */
1049		if (__predict_false(!work)) {
1050			work -= txr->num_desc;
1051			buf = txr->tx_buffers;
1052			txd = txr->tx_base;
1053		}
1054		prefetch(txd);
1055	} while (__predict_true(--limit));
1056
1057	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1058	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1059
1060	work += txr->num_desc;
1061	txr->next_to_clean = work;
1062
1063	/*
1064	 * Queue Hang detection, we know there's
1065	 * work outstanding or the first return
1066	 * would have been taken, so increment busy
1067	 * if nothing managed to get cleaned, then
1068	 * in local_timer it will be checked and
1069	 * marked as HUNG if it exceeds a MAX attempt.
1070	 */
1071	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1072		++txr->busy;
1073	/*
1074	 * If anything gets cleaned we reset state to 1,
1075	 * note this will turn off HUNG if its set.
1076	 */
1077	if (processed)
1078		txr->busy = 1;
1079
1080	if (txr->tx_avail == txr->num_desc)
1081		txr->busy = 0;
1082
1083	return;
1084} /* ixv_txeof */
1085
1086/************************************************************************
1087 * ixgbe_rsc_count
1088 *
1089 *   Used to detect a descriptor that has been merged by Hardware RSC.
1090 ************************************************************************/
1091static inline u32
1092ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1093{
1094	return (le32toh(rx->wb.lower.lo_dword.data) &
1095	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1096} /* ixgbe_rsc_count */
1097
1098/************************************************************************
1099 * ixgbe_setup_hw_rsc
1100 *
1101 *   Initialize Hardware RSC (LRO) feature on 82599
1102 *   for an RX ring, this is toggled by the LRO capability
1103 *   even though it is transparent to the stack.
1104 *
1105 *   NOTE: Since this HW feature only works with IPv4 and
1106 *         testing has shown soft LRO to be as effective,
1107 *         this feature will be disabled by default.
1108 ************************************************************************/
1109static void
1110ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1111{
1112	struct adapter  *adapter = rxr->adapter;
1113	struct ixgbe_hw *hw = &adapter->hw;
1114	u32             rscctrl, rdrxctl;
1115
1116	/* If turning LRO/RSC off we need to disable it */
1117	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1118		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1119		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1120		return;
1121	}
1122
1123	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1124	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1125#ifdef DEV_NETMAP
1126	/* Always strip CRC unless Netmap disabled it */
1127	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1128	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1129	    ix_crcstrip)
1130#endif /* DEV_NETMAP */
1131		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1132	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1133	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1134
1135	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1136	rscctrl |= IXGBE_RSCCTL_RSCEN;
1137	/*
1138	 * Limit the total number of descriptors that
1139	 * can be combined, so it does not exceed 64K
1140	 */
1141	if (rxr->mbuf_sz == MCLBYTES)
1142		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1143	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1144		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1145	else if (rxr->mbuf_sz == MJUM9BYTES)
1146		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1147	else  /* Using 16K cluster */
1148		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1149
1150	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1151
1152	/* Enable TCP header recognition */
1153	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1154	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1155
1156	/* Disable RSC for ACK packets */
1157	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1158	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1159
1160	rxr->hw_rsc = TRUE;
1161} /* ixgbe_setup_hw_rsc */
1162
1163/************************************************************************
1164 * ixgbe_refresh_mbufs
1165 *
1166 *   Refresh mbuf buffers for RX descriptor rings
1167 *    - now keeps its own state so discards due to resource
1168 *      exhaustion are unnecessary, if an mbuf cannot be obtained
1169 *      it just returns, keeping its placeholder, thus it can simply
1170 *      be recalled to try again.
1171 ************************************************************************/
1172static void
1173ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1174{
1175	struct adapter      *adapter = rxr->adapter;
1176	struct ixgbe_rx_buf *rxbuf;
1177	struct mbuf         *mp;
1178	bus_dma_segment_t   seg[1];
1179	int                 i, j, nsegs, error;
1180	bool                refreshed = FALSE;
1181
1182	i = j = rxr->next_to_refresh;
1183	/* Control the loop with one beyond */
1184	if (++j == rxr->num_desc)
1185		j = 0;
1186
1187	while (j != limit) {
1188		rxbuf = &rxr->rx_buffers[i];
1189		if (rxbuf->buf == NULL) {
1190			mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1191			    rxr->mbuf_sz);
1192			if (mp == NULL)
1193				goto update;
1194			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1195				m_adj(mp, ETHER_ALIGN);
1196		} else
1197			mp = rxbuf->buf;
1198
1199		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1200
1201		/* If we're dealing with an mbuf that was copied rather
1202		 * than replaced, there's no need to go through busdma.
1203		 */
1204		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1205			/* Get the memory mapping */
1206			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1207			error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1208			    mp, seg, &nsegs, BUS_DMA_NOWAIT);
1209			if (error != 0) {
1210				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1211				m_free(mp);
1212				rxbuf->buf = NULL;
1213				goto update;
1214			}
1215			rxbuf->buf = mp;
1216			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1217			    BUS_DMASYNC_PREREAD);
1218			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1219			    htole64(seg[0].ds_addr);
1220		} else {
1221			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1222			rxbuf->flags &= ~IXGBE_RX_COPY;
1223		}
1224
1225		refreshed = TRUE;
1226		/* Next is precalculated */
1227		i = j;
1228		rxr->next_to_refresh = i;
1229		if (++j == rxr->num_desc)
1230			j = 0;
1231	}
1232
1233update:
1234	if (refreshed) /* Update hardware tail index */
1235		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1236
1237	return;
1238} /* ixgbe_refresh_mbufs */
1239
1240/************************************************************************
1241 * ixgbe_allocate_receive_buffers
1242 *
1243 *   Allocate memory for rx_buffer structures. Since we use one
1244 *   rx_buffer per received packet, the maximum number of rx_buffer's
1245 *   that we'll need is equal to the number of receive descriptors
1246 *   that we've allocated.
1247 ************************************************************************/
1248static int
1249ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1250{
1251	struct adapter      *adapter = rxr->adapter;
1252	device_t            dev = adapter->dev;
1253	struct ixgbe_rx_buf *rxbuf;
1254	int                 bsize, error;
1255
1256	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1257	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXV,
1258	    M_NOWAIT | M_ZERO);
1259	if (!rxr->rx_buffers) {
1260		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1261		error = ENOMEM;
1262		goto fail;
1263	}
1264
1265	error = bus_dma_tag_create(
1266	         /*      parent */ bus_get_dma_tag(dev),
1267	         /*   alignment */ 1,
1268	         /*      bounds */ 0,
1269	         /*     lowaddr */ BUS_SPACE_MAXADDR,
1270	         /*    highaddr */ BUS_SPACE_MAXADDR,
1271	         /*      filter */ NULL,
1272	         /*   filterarg */ NULL,
1273	         /*     maxsize */ MJUM16BYTES,
1274	         /*   nsegments */ 1,
1275	         /*  maxsegsize */ MJUM16BYTES,
1276	         /*       flags */ 0,
1277	         /*    lockfunc */ NULL,
1278	         /* lockfuncarg */ NULL,
1279	                           &rxr->ptag);
1280	if (error) {
1281		device_printf(dev, "Unable to create RX DMA tag\n");
1282		goto fail;
1283	}
1284
1285	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1286		rxbuf = &rxr->rx_buffers[i];
1287		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1288		if (error) {
1289			device_printf(dev, "Unable to create RX dma map\n");
1290			goto fail;
1291		}
1292	}
1293
1294	return (0);
1295
1296fail:
1297	/* Frees all, but can handle partial completion */
1298	ixv_free_receive_structures(adapter);
1299
1300	return (error);
1301} /* ixgbe_allocate_receive_buffers */
1302
1303/************************************************************************
1304 * ixgbe_free_receive_ring
1305 ************************************************************************/
1306static void
1307ixgbe_free_receive_ring(struct rx_ring *rxr)
1308{
1309	struct ixgbe_rx_buf *rxbuf;
1310
1311	for (int i = 0; i < rxr->num_desc; i++) {
1312		rxbuf = &rxr->rx_buffers[i];
1313		if (rxbuf->buf != NULL) {
1314			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1315			    BUS_DMASYNC_POSTREAD);
1316			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1317			rxbuf->buf->m_flags |= M_PKTHDR;
1318			m_freem(rxbuf->buf);
1319			rxbuf->buf = NULL;
1320			rxbuf->flags = 0;
1321		}
1322	}
1323} /* ixgbe_free_receive_ring */
1324
1325/************************************************************************
1326 * ixgbe_setup_receive_ring
1327 *
1328 *   Initialize a receive ring and its buffers.
1329 ************************************************************************/
1330static int
1331ixgbe_setup_receive_ring(struct rx_ring *rxr)
1332{
1333	struct adapter        *adapter;
1334	struct ifnet          *ifp;
1335	device_t              dev;
1336	struct ixgbe_rx_buf   *rxbuf;
1337	struct lro_ctrl       *lro = &rxr->lro;
1338#ifdef DEV_NETMAP
1339	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1340	struct netmap_slot    *slot;
1341#endif /* DEV_NETMAP */
1342	bus_dma_segment_t     seg[1];
1343	int                   rsize, nsegs, error = 0;
1344
1345	adapter = rxr->adapter;
1346	ifp = adapter->ifp;
1347	dev = adapter->dev;
1348
1349	/* Clear the ring contents */
1350	IXGBE_RX_LOCK(rxr);
1351
1352#ifdef DEV_NETMAP
1353	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1354		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1355#endif /* DEV_NETMAP */
1356
1357	rsize = roundup2(adapter->num_rx_desc *
1358	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1359	bzero((void *)rxr->rx_base, rsize);
1360	/* Cache the size */
1361	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1362
1363	/* Free current RX buffer structs and their mbufs */
1364	ixgbe_free_receive_ring(rxr);
1365
1366	/* Now replenish the mbufs */
1367	for (int j = 0; j != rxr->num_desc; ++j) {
1368		struct mbuf *mp;
1369
1370		rxbuf = &rxr->rx_buffers[j];
1371
1372#ifdef DEV_NETMAP
1373		/*
1374		 * In netmap mode, fill the map and set the buffer
1375		 * address in the NIC ring, considering the offset
1376		 * between the netmap and NIC rings (see comment in
1377		 * ixgbe_setup_transmit_ring() ). No need to allocate
1378		 * an mbuf, so end the block with a continue;
1379		 */
1380		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1381			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1382			uint64_t paddr;
1383			void *addr;
1384
1385			addr = PNMB(na, slot + sj, &paddr);
1386			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1387			/* Update descriptor and the cached value */
1388			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1389			rxbuf->addr = htole64(paddr);
1390			continue;
1391		}
1392#endif /* DEV_NETMAP */
1393
1394		rxbuf->flags = 0;
1395		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1396		    adapter->rx_mbuf_sz);
1397		if (rxbuf->buf == NULL) {
1398			error = ENOBUFS;
1399			goto fail;
1400		}
1401		mp = rxbuf->buf;
1402		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1403		/* Get the memory mapping */
1404		error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1405		    &nsegs, BUS_DMA_NOWAIT);
1406		if (error != 0)
1407			goto fail;
1408		bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1409		/* Update the descriptor and the cached value */
1410		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1411		rxbuf->addr = htole64(seg[0].ds_addr);
1412	}
1413
1414
1415	/* Setup our descriptor indices */
1416	rxr->next_to_check = 0;
1417	rxr->next_to_refresh = 0;
1418	rxr->lro_enabled = FALSE;
1419	rxr->rx_copies = 0;
1420	rxr->rx_bytes = 0;
1421	rxr->vtag_strip = FALSE;
1422
1423	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1424	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1425
1426	/*
1427	 * Now set up the LRO interface
1428	 */
1429	if (ixgbe_rsc_enable)
1430		ixgbe_setup_hw_rsc(rxr);
1431	else if (ifp->if_capenable & IFCAP_LRO) {
1432		int err = tcp_lro_init(lro);
1433		if (err) {
1434			device_printf(dev, "LRO Initialization failed!\n");
1435			goto fail;
1436		}
1437		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1438		rxr->lro_enabled = TRUE;
1439		lro->ifp = adapter->ifp;
1440	}
1441
1442	IXGBE_RX_UNLOCK(rxr);
1443
1444	return (0);
1445
1446fail:
1447	ixgbe_free_receive_ring(rxr);
1448	IXGBE_RX_UNLOCK(rxr);
1449
1450	return (error);
1451} /* ixgbe_setup_receive_ring */
1452
1453/************************************************************************
1454 * ixv_setup_receive_structures - Initialize all receive rings.
1455 ************************************************************************/
1456int
1457ixv_setup_receive_structures(struct adapter *adapter)
1458{
1459	struct rx_ring *rxr = adapter->rx_rings;
1460	int            j;
1461
1462	for (j = 0; j < adapter->num_queues; j++, rxr++)
1463		if (ixgbe_setup_receive_ring(rxr))
1464			goto fail;
1465
1466	return (0);
1467fail:
1468	/*
1469	 * Free RX buffers allocated so far, we will only handle
1470	 * the rings that completed, the failing case will have
1471	 * cleaned up for itself. 'j' failed, so its the terminus.
1472	 */
1473	for (int i = 0; i < j; ++i) {
1474		rxr = &adapter->rx_rings[i];
1475		ixgbe_free_receive_ring(rxr);
1476	}
1477
1478	return (ENOBUFS);
1479} /* ixv_setup_receive_structures */
1480
1481
1482/************************************************************************
1483 * ixv_free_receive_structures - Free all receive rings.
1484 ************************************************************************/
1485void
1486ixv_free_receive_structures(struct adapter *adapter)
1487{
1488	struct rx_ring *rxr = adapter->rx_rings;
1489	struct lro_ctrl *lro;
1490
1491	INIT_DEBUGOUT("ixv_free_receive_structures: begin");
1492
1493	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1494		lro = &rxr->lro;
1495		ixgbe_free_receive_buffers(rxr);
1496		/* Free LRO memory */
1497		tcp_lro_free(lro);
1498		/* Free the ring memory as well */
1499		ixgbe_dma_free(adapter, &rxr->rxdma);
1500	}
1501
1502	free(adapter->rx_rings, M_IXV);
1503} /* ixv_free_receive_structures */
1504
1505
1506/************************************************************************
1507 * ixgbe_free_receive_buffers - Free receive ring data structures
1508 ************************************************************************/
1509static void
1510ixgbe_free_receive_buffers(struct rx_ring *rxr)
1511{
1512	struct adapter      *adapter = rxr->adapter;
1513	struct ixgbe_rx_buf *rxbuf;
1514
1515	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1516
1517	/* Cleanup any existing buffers */
1518	if (rxr->rx_buffers != NULL) {
1519		for (int i = 0; i < adapter->num_rx_desc; i++) {
1520			rxbuf = &rxr->rx_buffers[i];
1521			if (rxbuf->buf != NULL) {
1522				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1523				    BUS_DMASYNC_POSTREAD);
1524				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1525				rxbuf->buf->m_flags |= M_PKTHDR;
1526				m_freem(rxbuf->buf);
1527			}
1528			rxbuf->buf = NULL;
1529			if (rxbuf->pmap != NULL) {
1530				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1531				rxbuf->pmap = NULL;
1532			}
1533		}
1534		if (rxr->rx_buffers != NULL) {
1535			free(rxr->rx_buffers, M_IXV);
1536			rxr->rx_buffers = NULL;
1537		}
1538	}
1539
1540	if (rxr->ptag != NULL) {
1541		bus_dma_tag_destroy(rxr->ptag);
1542		rxr->ptag = NULL;
1543	}
1544
1545	return;
1546} /* ixgbe_free_receive_buffers */
1547
1548/************************************************************************
1549 * ixgbe_rx_input
1550 ************************************************************************/
1551static __inline void
1552ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1553    u32 ptype)
1554{
1555	/*
1556	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1557	 * should be computed by hardware. Also it should not have VLAN tag in
1558	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1559	 */
1560	if (rxr->lro_enabled &&
1561	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1562	    (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1563	    ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1564	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1565	     (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1566	     (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1567	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1568	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1569		/*
1570		 * Send to the stack if:
1571		 *  - LRO not enabled, or
1572		 *  - no LRO resources, or
1573		 *  - lro enqueue fails
1574		 */
1575		if (rxr->lro.lro_cnt != 0)
1576			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1577				return;
1578	}
1579	IXGBE_RX_UNLOCK(rxr);
1580	(*ifp->if_input)(ifp, m);
1581	IXGBE_RX_LOCK(rxr);
1582} /* ixgbe_rx_input */
1583
1584/************************************************************************
1585 * ixgbe_rx_discard
1586 ************************************************************************/
1587static __inline void
1588ixgbe_rx_discard(struct rx_ring *rxr, int i)
1589{
1590	struct ixgbe_rx_buf *rbuf;
1591
1592	rbuf = &rxr->rx_buffers[i];
1593
1594	/*
1595	 * With advanced descriptors the writeback
1596	 * clobbers the buffer addrs, so its easier
1597	 * to just free the existing mbufs and take
1598	 * the normal refresh path to get new buffers
1599	 * and mapping.
1600	 */
1601
1602	if (rbuf->fmp != NULL) {/* Partial chain ? */
1603		rbuf->fmp->m_flags |= M_PKTHDR;
1604		m_freem(rbuf->fmp);
1605		rbuf->fmp = NULL;
1606		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1607	} else if (rbuf->buf) {
1608		m_free(rbuf->buf);
1609		rbuf->buf = NULL;
1610	}
1611	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1612
1613	rbuf->flags = 0;
1614
1615	return;
1616} /* ixgbe_rx_discard */
1617
1618
1619/************************************************************************
1620 * ixv_rxeof
1621 *
1622 *   This routine executes in interrupt context. It replenishes
1623 *   the mbufs in the descriptor and sends data which has been
1624 *   dma'ed into host memory to upper layer.
1625 *
1626 *   Return TRUE for more work, FALSE for all clean.
1627 ************************************************************************/
1628bool
1629ixv_rxeof(struct ix_queue *que)
1630{
1631	struct adapter          *adapter = que->adapter;
1632	struct rx_ring          *rxr = que->rxr;
1633	struct ifnet            *ifp = adapter->ifp;
1634	struct lro_ctrl         *lro = &rxr->lro;
1635#if __FreeBSD_version < 1100105
1636	struct lro_entry        *queued;
1637#endif
1638	union ixgbe_adv_rx_desc *cur;
1639	struct ixgbe_rx_buf     *rbuf, *nbuf;
1640	int                     i, nextp, processed = 0;
1641	u32                     staterr = 0;
1642	u32                     count = adapter->rx_process_limit;
1643	u16                     pkt_info;
1644
1645	IXGBE_RX_LOCK(rxr);
1646
1647#ifdef DEV_NETMAP
1648	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1649		/* Same as the txeof routine: wakeup clients on intr. */
1650		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1651			IXGBE_RX_UNLOCK(rxr);
1652			return (FALSE);
1653		}
1654	}
1655#endif /* DEV_NETMAP */
1656
1657	for (i = rxr->next_to_check; count != 0;) {
1658		struct mbuf *sendmp, *mp;
1659		u32         rsc, ptype;
1660		u16         len;
1661		u16         vtag = 0;
1662		bool        eop;
1663
1664		/* Sync the ring. */
1665		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1666		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1667
1668		cur = &rxr->rx_base[i];
1669		staterr = le32toh(cur->wb.upper.status_error);
1670		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1671
1672		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1673			break;
1674		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1675			break;
1676
1677		count--;
1678		sendmp = NULL;
1679		nbuf = NULL;
1680		rsc = 0;
1681		cur->wb.upper.status_error = 0;
1682		rbuf = &rxr->rx_buffers[i];
1683		mp = rbuf->buf;
1684
1685		len = le16toh(cur->wb.upper.length);
1686		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1687		    IXGBE_RXDADV_PKTTYPE_MASK;
1688		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1689
1690		/* Make sure bad packets are discarded */
1691		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1692#if __FreeBSD_version >= 1100036
1693			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1694#endif
1695			rxr->rx_discarded++;
1696			ixgbe_rx_discard(rxr, i);
1697			goto next_desc;
1698		}
1699
1700		/*
1701		 * On 82599 which supports a hardware
1702		 * LRO (called HW RSC), packets need
1703		 * not be fragmented across sequential
1704		 * descriptors, rather the next descriptor
1705		 * is indicated in bits of the descriptor.
1706		 * This also means that we might proceses
1707		 * more than one packet at a time, something
1708		 * that has never been true before, it
1709		 * required eliminating global chain pointers
1710		 * in favor of what we are doing here.  -jfv
1711		 */
1712		if (!eop) {
1713			/*
1714			 * Figure out the next descriptor
1715			 * of this frame.
1716			 */
1717			if (rxr->hw_rsc == TRUE) {
1718				rsc = ixgbe_rsc_count(cur);
1719				rxr->rsc_num += (rsc - 1);
1720			}
1721			if (rsc) { /* Get hardware index */
1722				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1723				    IXGBE_RXDADV_NEXTP_SHIFT);
1724			} else { /* Just sequential */
1725				nextp = i + 1;
1726				if (nextp == adapter->num_rx_desc)
1727					nextp = 0;
1728			}
1729			nbuf = &rxr->rx_buffers[nextp];
1730			prefetch(nbuf);
1731		}
1732		/*
1733		 * Rather than using the fmp/lmp global pointers
1734		 * we now keep the head of a packet chain in the
1735		 * buffer struct and pass this along from one
1736		 * descriptor to the next, until we get EOP.
1737		 */
1738		mp->m_len = len;
1739		/*
1740		 * See if there is a stored head
1741		 * that determines what we are
1742		 */
1743		sendmp = rbuf->fmp;
1744		if (sendmp != NULL) {  /* secondary frag */
1745			rbuf->buf = rbuf->fmp = NULL;
1746			mp->m_flags &= ~M_PKTHDR;
1747			sendmp->m_pkthdr.len += mp->m_len;
1748		} else {
1749			/*
1750			 * Optimize.  This might be a small packet,
1751			 * maybe just a TCP ACK.  Do a fast copy that
1752			 * is cache aligned into a new mbuf, and
1753			 * leave the old mbuf+cluster for re-use.
1754			 */
1755			if (eop && len <= IXGBE_RX_COPY_LEN) {
1756				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1757				if (sendmp != NULL) {
1758					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1759					ixv_bcopy(mp->m_data, sendmp->m_data,
1760					    len);
1761					sendmp->m_len = len;
1762					rxr->rx_copies++;
1763					rbuf->flags |= IXGBE_RX_COPY;
1764				}
1765			}
1766			if (sendmp == NULL) {
1767				rbuf->buf = rbuf->fmp = NULL;
1768				sendmp = mp;
1769			}
1770
1771			/* first desc of a non-ps chain */
1772			sendmp->m_flags |= M_PKTHDR;
1773			sendmp->m_pkthdr.len = mp->m_len;
1774		}
1775		++processed;
1776
1777		/* Pass the head pointer on */
1778		if (eop == 0) {
1779			nbuf->fmp = sendmp;
1780			sendmp = NULL;
1781			mp->m_next = nbuf->buf;
1782		} else { /* Sending this frame */
1783			sendmp->m_pkthdr.rcvif = ifp;
1784			rxr->rx_packets++;
1785			/* capture data for AIM */
1786			rxr->bytes += sendmp->m_pkthdr.len;
1787			rxr->rx_bytes += sendmp->m_pkthdr.len;
1788			/* Process vlan info */
1789			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1790				vtag = le16toh(cur->wb.upper.vlan);
1791			if (vtag) {
1792				sendmp->m_pkthdr.ether_vtag = vtag;
1793				sendmp->m_flags |= M_VLANTAG;
1794			}
1795			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1796				ixgbe_rx_checksum(staterr, sendmp, ptype);
1797
1798			/*
1799			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1800			 * and never cleared. This means we have RSS hash
1801			 * available to be used.
1802			 */
1803			if (adapter->num_queues > 1) {
1804				sendmp->m_pkthdr.flowid =
1805				    le32toh(cur->wb.lower.hi_dword.rss);
1806				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1807				case IXGBE_RXDADV_RSSTYPE_IPV4:
1808					M_HASHTYPE_SET(sendmp,
1809					    M_HASHTYPE_RSS_IPV4);
1810					break;
1811				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1812					M_HASHTYPE_SET(sendmp,
1813					    M_HASHTYPE_RSS_TCP_IPV4);
1814					break;
1815				case IXGBE_RXDADV_RSSTYPE_IPV6:
1816					M_HASHTYPE_SET(sendmp,
1817					    M_HASHTYPE_RSS_IPV6);
1818					break;
1819				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1820					M_HASHTYPE_SET(sendmp,
1821					    M_HASHTYPE_RSS_TCP_IPV6);
1822					break;
1823				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1824					M_HASHTYPE_SET(sendmp,
1825					    M_HASHTYPE_RSS_IPV6_EX);
1826					break;
1827				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1828					M_HASHTYPE_SET(sendmp,
1829					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1830					break;
1831#if __FreeBSD_version > 1100000
1832				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1833					M_HASHTYPE_SET(sendmp,
1834					    M_HASHTYPE_RSS_UDP_IPV4);
1835					break;
1836				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1837					M_HASHTYPE_SET(sendmp,
1838					    M_HASHTYPE_RSS_UDP_IPV6);
1839					break;
1840				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1841					M_HASHTYPE_SET(sendmp,
1842					    M_HASHTYPE_RSS_UDP_IPV6_EX);
1843					break;
1844#endif
1845				default:
1846#if __FreeBSD_version < 1100116
1847					M_HASHTYPE_SET(sendmp,
1848					    M_HASHTYPE_OPAQUE);
1849#else
1850					M_HASHTYPE_SET(sendmp,
1851					    M_HASHTYPE_OPAQUE_HASH);
1852#endif
1853				}
1854			} else {
1855				sendmp->m_pkthdr.flowid = que->msix;
1856				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1857			}
1858		}
1859next_desc:
1860		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1861		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1862
1863		/* Advance our pointers to the next descriptor. */
1864		if (++i == rxr->num_desc)
1865			i = 0;
1866
1867		/* Now send to the stack or do LRO */
1868		if (sendmp != NULL) {
1869			rxr->next_to_check = i;
1870			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1871			i = rxr->next_to_check;
1872		}
1873
1874		/* Every 8 descriptors we go to refresh mbufs */
1875		if (processed == 8) {
1876			ixgbe_refresh_mbufs(rxr, i);
1877			processed = 0;
1878		}
1879	}
1880
1881	/* Refresh any remaining buf structs */
1882	if (ixgbe_rx_unrefreshed(rxr))
1883		ixgbe_refresh_mbufs(rxr, i);
1884
1885	rxr->next_to_check = i;
1886
1887	/*
1888	 * Flush any outstanding LRO work
1889	 */
1890#if __FreeBSD_version < 1100105
1891	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1892		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1893		tcp_lro_flush(lro, queued);
1894	}
1895#else
1896	tcp_lro_flush_all(lro);
1897#endif
1898
1899	IXGBE_RX_UNLOCK(rxr);
1900
1901	/*
1902	 * Still have cleaning to do?
1903	 */
1904	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1905		return (TRUE);
1906
1907	return (FALSE);
1908} /* ixv_rxeof */
1909
1910
1911/************************************************************************
1912 * ixgbe_rx_checksum
1913 *
1914 *   Verify that the hardware indicated that the checksum is valid.
1915 *   Inform the stack about the status of checksum so that stack
1916 *   doesn't spend time verifying the checksum.
1917 ************************************************************************/
1918static void
1919ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1920{
1921	u16  status = (u16)staterr;
1922	u8   errors = (u8)(staterr >> 24);
1923	bool sctp = false;
1924
1925	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1926	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1927		sctp = true;
1928
1929	/* IPv4 checksum */
1930	if (status & IXGBE_RXD_STAT_IPCS) {
1931		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1932		/* IP Checksum Good */
1933		if (!(errors & IXGBE_RXD_ERR_IPE))
1934			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1935	}
1936	/* TCP/UDP/SCTP checksum */
1937	if (status & IXGBE_RXD_STAT_L4CS) {
1938		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1939		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1940			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1941			if (!sctp)
1942				mp->m_pkthdr.csum_data = htons(0xffff);
1943		}
1944	}
1945} /* ixgbe_rx_checksum */
1946
1947/************************************************************************
1948 * ixgbe_dmamap_cb - Manage DMA'able memory.
1949 ************************************************************************/
1950static void
1951ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1952{
1953	if (error)
1954		return;
1955	*(bus_addr_t *)arg = segs->ds_addr;
1956
1957	return;
1958} /* ixgbe_dmamap_cb */
1959
1960/************************************************************************
1961 * ixgbe_dma_malloc
1962 ************************************************************************/
1963static int
1964ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1965                 struct ixgbe_dma_alloc *dma, int mapflags)
1966{
1967	device_t dev = adapter->dev;
1968	int      r;
1969
1970	r = bus_dma_tag_create(
1971	     /*      parent */ bus_get_dma_tag(adapter->dev),
1972	     /*   alignment */ DBA_ALIGN,
1973	     /*      bounds */ 0,
1974	     /*     lowaddr */ BUS_SPACE_MAXADDR,
1975	     /*    highaddr */ BUS_SPACE_MAXADDR,
1976	     /*      filter */ NULL,
1977	     /*   filterarg */ NULL,
1978	     /*     maxsize */ size,
1979	     /*   nsegments */ 1,
1980	     /*  maxsegsize */ size,
1981	     /*       flags */ BUS_DMA_ALLOCNOW,
1982	     /*    lockfunc */ NULL,
1983	     /* lockfuncarg */ NULL,
1984	                       &dma->dma_tag);
1985	if (r != 0) {
1986		device_printf(dev,
1987		    "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1988		    r);
1989		goto fail_0;
1990	}
1991	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1992	    BUS_DMA_NOWAIT, &dma->dma_map);
1993	if (r != 0) {
1994		device_printf(dev,
1995		    "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
1996		goto fail_1;
1997	}
1998	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1999	    ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2000	if (r != 0) {
2001		device_printf(dev,
2002		    "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2003		goto fail_2;
2004	}
2005	dma->dma_size = size;
2006
2007	return (0);
2008fail_2:
2009	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2010fail_1:
2011	bus_dma_tag_destroy(dma->dma_tag);
2012fail_0:
2013	dma->dma_tag = NULL;
2014
2015	return (r);
2016} /* ixgbe_dma_malloc */
2017
2018static void
2019ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2020{
2021	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2022	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2024	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2025	bus_dma_tag_destroy(dma->dma_tag);
2026} /* ixgbe_dma_free */
2027
2028
2029/************************************************************************
2030 * ixv_allocate_queues
2031 *
2032 *   Allocate memory for the transmit and receive rings, and then
2033 *   the descriptors associated with each, called only once at attach.
2034 ************************************************************************/
2035int
2036ixv_allocate_queues(struct adapter *adapter)
2037{
2038	device_t        dev = adapter->dev;
2039	struct ix_queue *que;
2040	struct tx_ring  *txr;
2041	struct rx_ring  *rxr;
2042	int             rsize, tsize, error = IXGBE_SUCCESS;
2043	int             txconf = 0, rxconf = 0;
2044
2045	/* First, allocate the top level queue structs */
2046	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2047	    adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2048	if (!adapter->queues) {
2049		device_printf(dev, "Unable to allocate queue memory\n");
2050		error = ENOMEM;
2051		goto fail;
2052	}
2053
2054	/* Second, allocate the TX ring struct memory */
2055	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2056	    adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2057	if (!adapter->tx_rings) {
2058		device_printf(dev, "Unable to allocate TX ring memory\n");
2059		error = ENOMEM;
2060		goto tx_fail;
2061	}
2062
2063	/* Third, allocate the RX ring */
2064	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2065	    adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2066	if (!adapter->rx_rings) {
2067		device_printf(dev, "Unable to allocate RX ring memory\n");
2068		error = ENOMEM;
2069		goto rx_fail;
2070	}
2071
2072	/* For the ring itself */
2073	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2074	    DBA_ALIGN);
2075
2076	/*
2077	 * Now set up the TX queues, txconf is needed to handle the
2078	 * possibility that things fail midcourse and we need to
2079	 * undo memory gracefully
2080	 */
2081	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2082		/* Set up some basics */
2083		txr = &adapter->tx_rings[i];
2084		txr->adapter = adapter;
2085		txr->br = NULL;
2086		txr->me = i;
2087		txr->num_desc = adapter->num_tx_desc;
2088
2089		/* Initialize the TX side lock */
2090		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2091		    device_get_nameunit(dev), txr->me);
2092		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2093
2094		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2095		    BUS_DMA_NOWAIT)) {
2096			device_printf(dev,
2097			    "Unable to allocate TX Descriptor memory\n");
2098			error = ENOMEM;
2099			goto err_tx_desc;
2100		}
2101		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2102		bzero((void *)txr->tx_base, tsize);
2103
2104		/* Now allocate transmit buffers for the ring */
2105		if (ixgbe_allocate_transmit_buffers(txr)) {
2106			device_printf(dev,
2107			    "Critical Failure setting up transmit buffers\n");
2108			error = ENOMEM;
2109			goto err_tx_desc;
2110		}
2111		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2112			/* Allocate a buf ring */
2113			txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXV,
2114			    M_WAITOK, &txr->tx_mtx);
2115			if (txr->br == NULL) {
2116				device_printf(dev,
2117				    "Critical Failure setting up buf ring\n");
2118				error = ENOMEM;
2119				goto err_tx_desc;
2120			}
2121		}
2122	}
2123
2124	/*
2125	 * Next the RX queues...
2126	 */
2127	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2128	    DBA_ALIGN);
2129	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2130		rxr = &adapter->rx_rings[i];
2131		/* Set up some basics */
2132		rxr->adapter = adapter;
2133		rxr->me = i;
2134		rxr->num_desc = adapter->num_rx_desc;
2135
2136		/* Initialize the RX side lock */
2137		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2138		    device_get_nameunit(dev), rxr->me);
2139		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2140
2141		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2142		    BUS_DMA_NOWAIT)) {
2143			device_printf(dev,
2144			    "Unable to allocate RxDescriptor memory\n");
2145			error = ENOMEM;
2146			goto err_rx_desc;
2147		}
2148		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2149		bzero((void *)rxr->rx_base, rsize);
2150
2151		/* Allocate receive buffers for the ring */
2152		if (ixgbe_allocate_receive_buffers(rxr)) {
2153			device_printf(dev,
2154			    "Critical Failure setting up receive buffers\n");
2155			error = ENOMEM;
2156			goto err_rx_desc;
2157		}
2158	}
2159
2160	/*
2161	 * Finally set up the queue holding structs
2162	 */
2163	for (int i = 0; i < adapter->num_queues; i++) {
2164		que = &adapter->queues[i];
2165		que->adapter = adapter;
2166		que->me = i;
2167		que->txr = &adapter->tx_rings[i];
2168		que->rxr = &adapter->rx_rings[i];
2169	}
2170
2171	return (0);
2172
2173err_rx_desc:
2174	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2175		ixgbe_dma_free(adapter, &rxr->rxdma);
2176err_tx_desc:
2177	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2178		ixgbe_dma_free(adapter, &txr->txdma);
2179	free(adapter->rx_rings, M_IXV);
2180rx_fail:
2181	free(adapter->tx_rings, M_IXV);
2182tx_fail:
2183	free(adapter->queues, M_IXV);
2184fail:
2185	return (error);
2186} /* ixv_allocate_queues */
2187