ix_txrx.c revision 315333
1/******************************************************************************
2
3  Copyright (c) 2001-2017, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/ixgbe/ix_txrx.c 315333 2017-03-15 21:20:17Z erj $*/
34
35
36#ifndef IXGBE_STANDALONE_BUILD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include "ixgbe.h"
42
43extern int ix_crcstrip;
44
45/*
46 * HW RSC control:
47 *  this feature only works with
48 *  IPv4, and only on 82599 and later.
49 *  Also this will cause IP forwarding to
50 *  fail and that can't be controlled by
51 *  the stack as LRO can. For all these
52 *  reasons I've deemed it best to leave
53 *  this off and not bother with a tuneable
54 *  interface, this would need to be compiled
55 *  to enable.
56 */
57static bool ixgbe_rsc_enable = FALSE;
58
59/*
60 * For Flow Director: this is the
61 * number of TX packets we sample
62 * for the filter pool, this means
63 * every 20th packet will be probed.
64 *
65 * This feature can be disabled by
66 * setting this to 0.
67 */
68static int atr_sample_rate = 20;
69
70/************************************************************************
71 *  Local Function prototypes
72 ************************************************************************/
73static void          ixgbe_setup_transmit_ring(struct tx_ring *);
74static void          ixgbe_free_transmit_buffers(struct tx_ring *);
75static int           ixgbe_setup_receive_ring(struct rx_ring *);
76static void          ixgbe_free_receive_buffers(struct rx_ring *);
77static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
78static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
79static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
80static int           ixgbe_tx_ctx_setup(struct tx_ring *,
81                                        struct mbuf *, u32 *, u32 *);
82static int           ixgbe_tso_setup(struct tx_ring *,
83                                     struct mbuf *, u32 *, u32 *);
84static __inline void ixgbe_rx_discard(struct rx_ring *, int);
85static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
86                                    struct mbuf *, u32);
87static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
88                                      struct ixgbe_dma_alloc *, int);
89static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
90
91MALLOC_DECLARE(M_IXGBE);
92
93/************************************************************************
94 * ixgbe_legacy_start_locked - Transmit entry point
95 *
96 *   Called by the stack to initiate a transmit.
97 *   The driver will remain in this routine as long as there are
98 *   packets to transmit and transmit resources are available.
99 *   In case resources are not available, the stack is notified
100 *   and the packet is requeued.
101 ************************************************************************/
102int
103ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
104{
105	struct mbuf    *m_head;
106	struct adapter *adapter = txr->adapter;
107
108	IXGBE_TX_LOCK_ASSERT(txr);
109
110	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
111		return (ENETDOWN);
112	if (!adapter->link_active)
113		return (ENETDOWN);
114
115	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
116		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
117			break;
118
119		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
120		if (m_head == NULL)
121			break;
122
123		if (ixgbe_xmit(txr, &m_head)) {
124			if (m_head != NULL)
125				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
126			break;
127		}
128		/* Send a copy of the frame to the BPF listener */
129		ETHER_BPF_MTAP(ifp, m_head);
130	}
131
132	return IXGBE_SUCCESS;
133} /* ixgbe_legacy_start_locked */
134
135/************************************************************************
136 * ixgbe_legacy_start
137 *
138 *   Called by the stack, this always uses the first tx ring,
139 *   and should not be used with multiqueue tx enabled.
140 ************************************************************************/
141void
142ixgbe_legacy_start(struct ifnet *ifp)
143{
144	struct adapter *adapter = ifp->if_softc;
145	struct tx_ring *txr = adapter->tx_rings;
146
147	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
148		IXGBE_TX_LOCK(txr);
149		ixgbe_legacy_start_locked(ifp, txr);
150		IXGBE_TX_UNLOCK(txr);
151	}
152} /* ixgbe_legacy_start */
153
154/************************************************************************
155 * ixgbe_mq_start - Multiqueue Transmit Entry Point
156 *
157 *   (if_transmit function)
158 ************************************************************************/
159int
160ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
161{
162	struct adapter  *adapter = ifp->if_softc;
163	struct ix_queue *que;
164	struct tx_ring  *txr;
165	int             i, err = 0;
166	uint32_t        bucket_id;
167
168	/*
169	 * When doing RSS, map it to the same outbound queue
170	 * as the incoming flow would be mapped to.
171	 *
172	 * If everything is setup correctly, it should be the
173	 * same bucket that the current CPU we're on is.
174	 */
175	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
176		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
177		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
178		    &bucket_id) == 0)) {
179			i = bucket_id % adapter->num_queues;
180#ifdef IXGBE_DEBUG
181			if (bucket_id > adapter->num_queues)
182				if_printf(ifp,
183				    "bucket_id (%d) > num_queues (%d)\n",
184				    bucket_id, adapter->num_queues);
185#endif
186		} else
187			i = m->m_pkthdr.flowid % adapter->num_queues;
188	} else
189		i = curcpu % adapter->num_queues;
190
191	/* Check for a hung queue and pick alternative */
192	if (((1 << i) & adapter->active_queues) == 0)
193		i = ffsl(adapter->active_queues);
194
195	txr = &adapter->tx_rings[i];
196	que = &adapter->queues[i];
197
198	err = drbr_enqueue(ifp, txr->br, m);
199	if (err)
200		return (err);
201	if (IXGBE_TX_TRYLOCK(txr)) {
202		ixgbe_mq_start_locked(ifp, txr);
203		IXGBE_TX_UNLOCK(txr);
204	} else
205		taskqueue_enqueue(que->tq, &txr->txq_task);
206
207	return (0);
208} /* ixgbe_mq_start */
209
210/************************************************************************
211 * ixgbe_mq_start_locked
212 ************************************************************************/
213int
214ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
215{
216	struct mbuf    *next;
217	int            enqueued = 0, err = 0;
218
219	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
220		return (ENETDOWN);
221	if (!txr->adapter->link_active)
222		return (ENETDOWN);
223
224	/* Process the queue */
225#if __FreeBSD_version < 901504
226	next = drbr_dequeue(ifp, txr->br);
227	while (next != NULL) {
228		err = ixgbe_xmit(txr, &next);
229		if (err != 0) {
230			if (next != NULL)
231				err = drbr_enqueue(ifp, txr->br, next);
232#else
233	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
234		err = ixgbe_xmit(txr, &next);
235		if (err != 0) {
236			if (next == NULL)
237				drbr_advance(ifp, txr->br);
238			else
239				drbr_putback(ifp, txr->br, next);
240#endif
241			break;
242		}
243#if __FreeBSD_version >= 901504
244		drbr_advance(ifp, txr->br);
245#endif
246		enqueued++;
247		/* Send a copy of the frame to the BPF listener */
248		ETHER_BPF_MTAP(ifp, next);
249		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
250			break;
251#if __FreeBSD_version < 901504
252		next = drbr_dequeue(ifp, txr->br);
253#endif
254	}
255
256	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
257		ixgbe_txeof(txr);
258
259	return (err);
260} /* ixgbe_mq_start_locked */
261
262/************************************************************************
263 * ixgbe_deferred_mq_start
264 *
265 *   Called from a taskqueue to drain queued transmit packets.
266 ************************************************************************/
267void
268ixgbe_deferred_mq_start(void *arg, int pending)
269{
270	struct tx_ring *txr = arg;
271	struct adapter *adapter = txr->adapter;
272	struct ifnet   *ifp = adapter->ifp;
273
274	IXGBE_TX_LOCK(txr);
275	if (!drbr_empty(ifp, txr->br))
276		ixgbe_mq_start_locked(ifp, txr);
277	IXGBE_TX_UNLOCK(txr);
278} /* ixgbe_deferred_mq_start */
279
280/************************************************************************
281 * ixgbe_qflush - Flush all ring buffers
282 ************************************************************************/
283void
284ixgbe_qflush(struct ifnet *ifp)
285{
286	struct adapter *adapter = ifp->if_softc;
287	struct tx_ring *txr = adapter->tx_rings;
288	struct mbuf    *m;
289
290	for (int i = 0; i < adapter->num_queues; i++, txr++) {
291		IXGBE_TX_LOCK(txr);
292		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
293			m_freem(m);
294		IXGBE_TX_UNLOCK(txr);
295	}
296	if_qflush(ifp);
297} /* ixgbe_qflush */
298
299
300/************************************************************************
301 * ixgbe_xmit
302 *
303 *   This routine maps the mbufs to tx descriptors, allowing the
304 *   TX engine to transmit the packets.
305 *
306 *   Return 0 on success, positive on failure
307 ************************************************************************/
308static int
309ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
310{
311	struct adapter          *adapter = txr->adapter;
312	struct ixgbe_tx_buf     *txbuf;
313	union ixgbe_adv_tx_desc *txd = NULL;
314	struct mbuf             *m_head;
315	int                     i, j, error, nsegs;
316	int                     first;
317	u32                     olinfo_status = 0, cmd_type_len;
318	bool                    remap = TRUE;
319	bus_dma_segment_t       segs[adapter->num_segs];
320	bus_dmamap_t            map;
321
322	m_head = *m_headp;
323
324	/* Basic descriptor defines */
325	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
326	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
327
328	if (m_head->m_flags & M_VLANTAG)
329		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
330
331	/*
332	 * Important to capture the first descriptor
333	 * used because it will contain the index of
334	 * the one we tell the hardware to report back
335	 */
336	first = txr->next_avail_desc;
337	txbuf = &txr->tx_buffers[first];
338	map = txbuf->map;
339
340	/*
341	 * Map the packet for DMA.
342	 */
343retry:
344	error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
345	    &nsegs, BUS_DMA_NOWAIT);
346
347	if (__predict_false(error)) {
348		struct mbuf *m;
349
350		switch (error) {
351		case EFBIG:
352			/* Try it again? - one try */
353			if (remap == TRUE) {
354				remap = FALSE;
355				/*
356				 * XXX: m_defrag will choke on
357				 * non-MCLBYTES-sized clusters
358				 */
359				m = m_defrag(*m_headp, M_NOWAIT);
360				if (m == NULL) {
361					adapter->mbuf_defrag_failed++;
362					m_freem(*m_headp);
363					*m_headp = NULL;
364					return (ENOBUFS);
365				}
366				*m_headp = m;
367				goto retry;
368			} else
369				return (error);
370		case ENOMEM:
371			txr->no_tx_dma_setup++;
372			return (error);
373		default:
374			txr->no_tx_dma_setup++;
375			m_freem(*m_headp);
376			*m_headp = NULL;
377			return (error);
378		}
379	}
380
381	/* Make certain there are enough descriptors */
382	if (txr->tx_avail < (nsegs + 2)) {
383		txr->no_desc_avail++;
384		bus_dmamap_unload(txr->txtag, map);
385		return (ENOBUFS);
386	}
387	m_head = *m_headp;
388
389	/*
390	 * Set up the appropriate offload context
391	 * this will consume the first descriptor
392	 */
393	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
394	if (__predict_false(error)) {
395		if (error == ENOBUFS)
396			*m_headp = NULL;
397		return (error);
398	}
399
400	/* Do the flow director magic */
401	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
402	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
403		++txr->atr_count;
404		if (txr->atr_count >= atr_sample_rate) {
405			ixgbe_atr(txr, m_head);
406			txr->atr_count = 0;
407		}
408	}
409
410	olinfo_status |= IXGBE_ADVTXD_CC;
411	i = txr->next_avail_desc;
412	for (j = 0; j < nsegs; j++) {
413		bus_size_t seglen;
414		bus_addr_t segaddr;
415
416		txbuf = &txr->tx_buffers[i];
417		txd = &txr->tx_base[i];
418		seglen = segs[j].ds_len;
419		segaddr = htole64(segs[j].ds_addr);
420
421		txd->read.buffer_addr = segaddr;
422		txd->read.cmd_type_len = htole32(txr->txd_cmd |
423		    cmd_type_len | seglen);
424		txd->read.olinfo_status = htole32(olinfo_status);
425
426		if (++i == txr->num_desc)
427			i = 0;
428	}
429
430	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
431	txr->tx_avail -= nsegs;
432	txr->next_avail_desc = i;
433
434	txbuf->m_head = m_head;
435	/*
436	 * Here we swap the map so the last descriptor,
437	 * which gets the completion interrupt has the
438	 * real map, and the first descriptor gets the
439	 * unused map from this descriptor.
440	 */
441	txr->tx_buffers[first].map = txbuf->map;
442	txbuf->map = map;
443	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
444
445	/* Set the EOP descriptor that will be marked done */
446	txbuf = &txr->tx_buffers[first];
447	txbuf->eop = txd;
448
449	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
450	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
451	/*
452	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
453	 * hardware that this frame is available to transmit.
454	 */
455	++txr->total_packets;
456	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
457
458	/* Mark queue as having work */
459	if (txr->busy == 0)
460		txr->busy = 1;
461
462	return (0);
463} /* ixgbe_xmit */
464
465
466/************************************************************************
467 * ixgbe_allocate_transmit_buffers
468 *
469 *   Allocate memory for tx_buffer structures. The tx_buffer stores all
470 *   the information needed to transmit a packet on the wire. This is
471 *   called only once at attach, setup is done every reset.
472 ************************************************************************/
473static int
474ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
475{
476	struct adapter      *adapter = txr->adapter;
477	device_t            dev = adapter->dev;
478	struct ixgbe_tx_buf *txbuf;
479	int                 error, i;
480
481	/*
482	 * Setup DMA descriptor areas.
483	 */
484	error = bus_dma_tag_create(
485	         /*      parent */ bus_get_dma_tag(adapter->dev),
486	         /*   alignment */ 1,
487	         /*      bounds */ 0,
488	         /*     lowaddr */ BUS_SPACE_MAXADDR,
489	         /*    highaddr */ BUS_SPACE_MAXADDR,
490	         /*      filter */ NULL,
491	         /*   filterarg */ NULL,
492	         /*     maxsize */ IXGBE_TSO_SIZE,
493	         /*   nsegments */ adapter->num_segs,
494	         /*  maxsegsize */ PAGE_SIZE,
495	         /*       flags */ 0,
496	         /*    lockfunc */ NULL,
497	         /* lockfuncarg */ NULL,
498	                           &txr->txtag);
499	if (error) {
500		device_printf(dev, "Unable to allocate TX DMA tag\n");
501		goto fail;
502	}
503
504	txr->tx_buffers =
505	    (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
506	    adapter->num_tx_desc, M_IXGBE, M_NOWAIT | M_ZERO);
507	if (!txr->tx_buffers) {
508		device_printf(dev, "Unable to allocate tx_buffer memory\n");
509		error = ENOMEM;
510		goto fail;
511	}
512
513	/* Create the descriptor buffer dma maps */
514	txbuf = txr->tx_buffers;
515	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
516		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
517		if (error != 0) {
518			device_printf(dev, "Unable to create TX DMA map\n");
519			goto fail;
520		}
521	}
522
523	return 0;
524fail:
525	/* We free all, it handles case where we are in the middle */
526	ixgbe_free_transmit_structures(adapter);
527
528	return (error);
529} /* ixgbe_allocate_transmit_buffers */
530
531/************************************************************************
532 *
533 *  Initialize a transmit ring.
534 *
535 ************************************************************************/
536static void
537ixgbe_setup_transmit_ring(struct tx_ring *txr)
538{
539	struct adapter        *adapter = txr->adapter;
540	struct ixgbe_tx_buf   *txbuf;
541#ifdef DEV_NETMAP
542	struct netmap_adapter *na = NA(adapter->ifp);
543	struct netmap_slot    *slot;
544#endif /* DEV_NETMAP */
545
546	/* Clear the old ring contents */
547	IXGBE_TX_LOCK(txr);
548
549#ifdef DEV_NETMAP
550	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
551		/*
552		 * (under lock): if in netmap mode, do some consistency
553		 * checks and set slot to entry 0 of the netmap ring.
554		 */
555		slot = netmap_reset(na, NR_TX, txr->me, 0);
556	}
557#endif /* DEV_NETMAP */
558
559	bzero((void *)txr->tx_base,
560	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
561	/* Reset indices */
562	txr->next_avail_desc = 0;
563	txr->next_to_clean = 0;
564
565	/* Free any existing tx buffers. */
566	txbuf = txr->tx_buffers;
567	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
568		if (txbuf->m_head != NULL) {
569			bus_dmamap_sync(txr->txtag, txbuf->map,
570			    BUS_DMASYNC_POSTWRITE);
571			bus_dmamap_unload(txr->txtag, txbuf->map);
572			m_freem(txbuf->m_head);
573			txbuf->m_head = NULL;
574		}
575
576#ifdef DEV_NETMAP
577		/*
578		 * In netmap mode, set the map for the packet buffer.
579		 * NOTE: Some drivers (not this one) also need to set
580		 * the physical buffer address in the NIC ring.
581		 * Slots in the netmap ring (indexed by "si") are
582		 * kring->nkr_hwofs positions "ahead" wrt the
583		 * corresponding slot in the NIC ring. In some drivers
584		 * (not here) nkr_hwofs can be negative. Function
585		 * netmap_idx_n2k() handles wraparounds properly.
586		 */
587		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
588			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
589			netmap_load_map(na, txr->txtag,
590			    txbuf->map, NMB(na, slot + si));
591		}
592#endif /* DEV_NETMAP */
593
594		/* Clear the EOP descriptor pointer */
595		txbuf->eop = NULL;
596	}
597
598	/* Set the rate at which we sample packets */
599	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
600		txr->atr_sample = atr_sample_rate;
601
602	/* Set number of descriptors available */
603	txr->tx_avail = adapter->num_tx_desc;
604
605	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
606	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
607	IXGBE_TX_UNLOCK(txr);
608} /* ixgbe_setup_transmit_ring */
609
610/************************************************************************
611 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
612 ************************************************************************/
613int
614ixgbe_setup_transmit_structures(struct adapter *adapter)
615{
616	struct tx_ring *txr = adapter->tx_rings;
617
618	for (int i = 0; i < adapter->num_queues; i++, txr++)
619		ixgbe_setup_transmit_ring(txr);
620
621	return (0);
622} /* ixgbe_setup_transmit_structures */
623
624/************************************************************************
625 * ixgbe_free_transmit_structures - Free all transmit rings.
626 ************************************************************************/
627void
628ixgbe_free_transmit_structures(struct adapter *adapter)
629{
630	struct tx_ring *txr = adapter->tx_rings;
631
632	for (int i = 0; i < adapter->num_queues; i++, txr++) {
633		IXGBE_TX_LOCK(txr);
634		ixgbe_free_transmit_buffers(txr);
635		ixgbe_dma_free(adapter, &txr->txdma);
636		IXGBE_TX_UNLOCK(txr);
637		IXGBE_TX_LOCK_DESTROY(txr);
638	}
639	free(adapter->tx_rings, M_IXGBE);
640} /* ixgbe_free_transmit_structures */
641
642/************************************************************************
643 * ixgbe_free_transmit_buffers
644 *
645 *   Free transmit ring related data structures.
646 ************************************************************************/
647static void
648ixgbe_free_transmit_buffers(struct tx_ring *txr)
649{
650	struct adapter      *adapter = txr->adapter;
651	struct ixgbe_tx_buf *tx_buffer;
652	int                 i;
653
654	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
655
656	if (txr->tx_buffers == NULL)
657		return;
658
659	tx_buffer = txr->tx_buffers;
660	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
661		if (tx_buffer->m_head != NULL) {
662			bus_dmamap_sync(txr->txtag, tx_buffer->map,
663			    BUS_DMASYNC_POSTWRITE);
664			bus_dmamap_unload(txr->txtag, tx_buffer->map);
665			m_freem(tx_buffer->m_head);
666			tx_buffer->m_head = NULL;
667			if (tx_buffer->map != NULL) {
668				bus_dmamap_destroy(txr->txtag, tx_buffer->map);
669				tx_buffer->map = NULL;
670			}
671		} else if (tx_buffer->map != NULL) {
672			bus_dmamap_unload(txr->txtag, tx_buffer->map);
673			bus_dmamap_destroy(txr->txtag, tx_buffer->map);
674			tx_buffer->map = NULL;
675		}
676	}
677	if (txr->br != NULL)
678		buf_ring_free(txr->br, M_IXGBE);
679	if (txr->tx_buffers != NULL) {
680		free(txr->tx_buffers, M_IXGBE);
681		txr->tx_buffers = NULL;
682	}
683	if (txr->txtag != NULL) {
684		bus_dma_tag_destroy(txr->txtag);
685		txr->txtag = NULL;
686	}
687} /* ixgbe_free_transmit_buffers */
688
689/************************************************************************
690 * ixgbe_tx_ctx_setup
691 *
692 *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
693 ************************************************************************/
694static int
695ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
696    u32 *cmd_type_len, u32 *olinfo_status)
697{
698	struct ixgbe_adv_tx_context_desc *TXD;
699	struct ether_vlan_header         *eh;
700#ifdef INET
701	struct ip                        *ip;
702#endif
703#ifdef INET6
704	struct ip6_hdr                   *ip6;
705#endif
706	int                              ehdrlen, ip_hlen = 0;
707	int                              offload = TRUE;
708	int                              ctxd = txr->next_avail_desc;
709	u32                              vlan_macip_lens = 0;
710	u32                              type_tucmd_mlhl = 0;
711	u16                              vtag = 0;
712	u16                              etype;
713	u8                               ipproto = 0;
714	caddr_t                          l3d;
715
716
717	/* First check if TSO is to be used */
718	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
719		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
720
721	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
722		offload = FALSE;
723
724	/* Indicate the whole packet as payload when not doing TSO */
725	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
726
727	/* Now ready a context descriptor */
728	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
729
730	/*
731	 * In advanced descriptors the vlan tag must
732	 * be placed into the context descriptor. Hence
733	 * we need to make one even if not doing offloads.
734	 */
735	if (mp->m_flags & M_VLANTAG) {
736		vtag = htole16(mp->m_pkthdr.ether_vtag);
737		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
738	} else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
739		return (0);
740
741	/*
742	 * Determine where frame payload starts.
743	 * Jump over vlan headers if already present,
744	 * helpful for QinQ too.
745	 */
746	eh = mtod(mp, struct ether_vlan_header *);
747	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
748		etype = ntohs(eh->evl_proto);
749		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
750	} else {
751		etype = ntohs(eh->evl_encap_proto);
752		ehdrlen = ETHER_HDR_LEN;
753	}
754
755	/* Set the ether header length */
756	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
757
758	if (offload == FALSE)
759		goto no_offloads;
760
761	/*
762	 * If the first mbuf only includes the ethernet header,
763	 * jump to the next one
764	 * XXX: This assumes the stack splits mbufs containing headers
765	 *      on header boundaries
766	 * XXX: And assumes the entire IP header is contained in one mbuf
767	 */
768	if (mp->m_len == ehdrlen && mp->m_next)
769		l3d = mtod(mp->m_next, caddr_t);
770	else
771		l3d = mtod(mp, caddr_t) + ehdrlen;
772
773	switch (etype) {
774#ifdef INET
775		case ETHERTYPE_IP:
776			ip = (struct ip *)(l3d);
777			ip_hlen = ip->ip_hl << 2;
778			ipproto = ip->ip_p;
779			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
780			/* Insert IPv4 checksum into data descriptors */
781			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
782				ip->ip_sum = 0;
783				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
784			}
785			break;
786#endif
787#ifdef INET6
788		case ETHERTYPE_IPV6:
789			ip6 = (struct ip6_hdr *)(l3d);
790			ip_hlen = sizeof(struct ip6_hdr);
791			ipproto = ip6->ip6_nxt;
792			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
793			break;
794#endif
795		default:
796			offload = FALSE;
797			break;
798	}
799
800	vlan_macip_lens |= ip_hlen;
801
802	/* No support for offloads for non-L4 next headers */
803	switch (ipproto) {
804		case IPPROTO_TCP:
805			if (mp->m_pkthdr.csum_flags &
806			    (CSUM_IP_TCP | CSUM_IP6_TCP))
807				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
808			else
809				offload = false;
810			break;
811		case IPPROTO_UDP:
812			if (mp->m_pkthdr.csum_flags &
813			    (CSUM_IP_UDP | CSUM_IP6_UDP))
814				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
815			else
816				offload = false;
817			break;
818		case IPPROTO_SCTP:
819			if (mp->m_pkthdr.csum_flags &
820			    (CSUM_IP_SCTP | CSUM_IP6_SCTP))
821				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
822			else
823				offload = false;
824			break;
825		default:
826			offload = false;
827			break;
828	}
829
830	if (offload) /* Insert L4 checksum into data descriptors */
831		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
832
833no_offloads:
834	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
835
836	/* Now copy bits into descriptor */
837	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
838	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
839	TXD->seqnum_seed = htole32(0);
840	TXD->mss_l4len_idx = htole32(0);
841
842	/* We've consumed the first desc, adjust counters */
843	if (++ctxd == txr->num_desc)
844		ctxd = 0;
845	txr->next_avail_desc = ctxd;
846	--txr->tx_avail;
847
848	return (0);
849} /* ixgbe_tx_ctx_setup */
850
851/************************************************************************
852 * ixgbe_tso_setup
853 *
854 *   Setup work for hardware segmentation offload (TSO) on
855 *   adapters using advanced tx descriptors
856 ************************************************************************/
857static int
858ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
859    u32 *olinfo_status)
860{
861	struct ixgbe_adv_tx_context_desc *TXD;
862	struct ether_vlan_header         *eh;
863#ifdef INET6
864	struct ip6_hdr                   *ip6;
865#endif
866#ifdef INET
867	struct ip                        *ip;
868#endif
869	struct tcphdr                    *th;
870	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
871	u32                              vlan_macip_lens = 0;
872	u32                              type_tucmd_mlhl = 0;
873	u32                              mss_l4len_idx = 0, paylen;
874	u16                              vtag = 0, eh_type;
875
876	/*
877	 * Determine where frame payload starts.
878	 * Jump over vlan headers if already present
879	 */
880	eh = mtod(mp, struct ether_vlan_header *);
881	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
882		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
883		eh_type = eh->evl_proto;
884	} else {
885		ehdrlen = ETHER_HDR_LEN;
886		eh_type = eh->evl_encap_proto;
887	}
888
889	switch (ntohs(eh_type)) {
890#ifdef INET
891	case ETHERTYPE_IP:
892		ip = (struct ip *)(mp->m_data + ehdrlen);
893		if (ip->ip_p != IPPROTO_TCP)
894			return (ENXIO);
895		ip->ip_sum = 0;
896		ip_hlen = ip->ip_hl << 2;
897		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
898		th->th_sum = in_pseudo(ip->ip_src.s_addr,
899		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
900		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
901		/* Tell transmit desc to also do IPv4 checksum. */
902		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
903		break;
904#endif
905#ifdef INET6
906	case ETHERTYPE_IPV6:
907		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
908		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
909		if (ip6->ip6_nxt != IPPROTO_TCP)
910			return (ENXIO);
911		ip_hlen = sizeof(struct ip6_hdr);
912		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
913		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
914		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
915		break;
916#endif
917	default:
918		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
919		    __func__, ntohs(eh_type));
920		break;
921	}
922
923	ctxd = txr->next_avail_desc;
924	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
925
926	tcp_hlen = th->th_off << 2;
927
928	/* This is used in the transmit desc in encap */
929	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
930
931	/* VLAN MACLEN IPLEN */
932	if (mp->m_flags & M_VLANTAG) {
933		vtag = htole16(mp->m_pkthdr.ether_vtag);
934		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
935	}
936
937	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
938	vlan_macip_lens |= ip_hlen;
939	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
940
941	/* ADV DTYPE TUCMD */
942	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
943	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
944	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
945
946	/* MSS L4LEN IDX */
947	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
948	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
949	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
950
951	TXD->seqnum_seed = htole32(0);
952
953	if (++ctxd == txr->num_desc)
954		ctxd = 0;
955
956	txr->tx_avail--;
957	txr->next_avail_desc = ctxd;
958	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
959	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
960	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
961	++txr->tso_tx;
962
963	return (0);
964} /* ixgbe_tso_setup */
965
966
967/************************************************************************
968 * ixgbe_txeof
969 *
970 *   Examine each tx_buffer in the used queue. If the hardware is done
971 *   processing the packet then free associated resources. The
972 *   tx_buffer is put back on the free queue.
973 ************************************************************************/
974void
975ixgbe_txeof(struct tx_ring *txr)
976{
977	struct adapter          *adapter = txr->adapter;
978	struct ixgbe_tx_buf     *buf;
979	union ixgbe_adv_tx_desc *txd;
980	u32                     work, processed = 0;
981	u32                     limit = adapter->tx_process_limit;
982
983	mtx_assert(&txr->tx_mtx, MA_OWNED);
984
985#ifdef DEV_NETMAP
986	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
987	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
988		struct netmap_adapter *na = NA(adapter->ifp);
989		struct netmap_kring *kring = &na->tx_rings[txr->me];
990		txd = txr->tx_base;
991		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
992		    BUS_DMASYNC_POSTREAD);
993		/*
994		 * In netmap mode, all the work is done in the context
995		 * of the client thread. Interrupt handlers only wake up
996		 * clients, which may be sleeping on individual rings
997		 * or on a global resource for all rings.
998		 * To implement tx interrupt mitigation, we wake up the client
999		 * thread roughly every half ring, even if the NIC interrupts
1000		 * more frequently. This is implemented as follows:
1001		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1002		 *   the slot that should wake up the thread (nkr_num_slots
1003		 *   means the user thread should not be woken up);
1004		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1005		 *   or the slot has the DD bit set.
1006		 */
1007		if (!netmap_mitigate ||
1008		    (kring->nr_kflags < kring->nkr_num_slots &&
1009		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1010			netmap_tx_irq(adapter->ifp, txr->me);
1011		}
1012		return;
1013	}
1014#endif /* DEV_NETMAP */
1015
1016	if (txr->tx_avail == txr->num_desc) {
1017		txr->busy = 0;
1018		return;
1019	}
1020
1021	/* Get work starting point */
1022	work = txr->next_to_clean;
1023	buf = &txr->tx_buffers[work];
1024	txd = &txr->tx_base[work];
1025	work -= txr->num_desc; /* The distance to ring end */
1026	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1027	    BUS_DMASYNC_POSTREAD);
1028
1029	do {
1030		union ixgbe_adv_tx_desc *eop = buf->eop;
1031		if (eop == NULL) /* No work */
1032			break;
1033
1034		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1035			break;	/* I/O not complete */
1036
1037		if (buf->m_head) {
1038			txr->bytes += buf->m_head->m_pkthdr.len;
1039			bus_dmamap_sync(txr->txtag, buf->map,
1040			    BUS_DMASYNC_POSTWRITE);
1041			bus_dmamap_unload(txr->txtag, buf->map);
1042			m_freem(buf->m_head);
1043			buf->m_head = NULL;
1044		}
1045		buf->eop = NULL;
1046		++txr->tx_avail;
1047
1048		/* We clean the range if multi segment */
1049		while (txd != eop) {
1050			++txd;
1051			++buf;
1052			++work;
1053			/* wrap the ring? */
1054			if (__predict_false(!work)) {
1055				work -= txr->num_desc;
1056				buf = txr->tx_buffers;
1057				txd = txr->tx_base;
1058			}
1059			if (buf->m_head) {
1060				txr->bytes += buf->m_head->m_pkthdr.len;
1061				bus_dmamap_sync(txr->txtag, buf->map,
1062				    BUS_DMASYNC_POSTWRITE);
1063				bus_dmamap_unload(txr->txtag, buf->map);
1064				m_freem(buf->m_head);
1065				buf->m_head = NULL;
1066			}
1067			++txr->tx_avail;
1068			buf->eop = NULL;
1069
1070		}
1071		++txr->packets;
1072		++processed;
1073
1074		/* Try the next packet */
1075		++txd;
1076		++buf;
1077		++work;
1078		/* reset with a wrap */
1079		if (__predict_false(!work)) {
1080			work -= txr->num_desc;
1081			buf = txr->tx_buffers;
1082			txd = txr->tx_base;
1083		}
1084		prefetch(txd);
1085	} while (__predict_true(--limit));
1086
1087	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1088	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1089
1090	work += txr->num_desc;
1091	txr->next_to_clean = work;
1092
1093	/*
1094	 * Queue Hang detection, we know there's
1095	 * work outstanding or the first return
1096	 * would have been taken, so increment busy
1097	 * if nothing managed to get cleaned, then
1098	 * in local_timer it will be checked and
1099	 * marked as HUNG if it exceeds a MAX attempt.
1100	 */
1101	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1102		++txr->busy;
1103	/*
1104	 * If anything gets cleaned we reset state to 1,
1105	 * note this will turn off HUNG if its set.
1106	 */
1107	if (processed)
1108		txr->busy = 1;
1109
1110	if (txr->tx_avail == txr->num_desc)
1111		txr->busy = 0;
1112
1113	return;
1114} /* ixgbe_txeof */
1115
1116/************************************************************************
1117 * ixgbe_rsc_count
1118 *
1119 *   Used to detect a descriptor that has been merged by Hardware RSC.
1120 ************************************************************************/
1121static inline u32
1122ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1123{
1124	return (le32toh(rx->wb.lower.lo_dword.data) &
1125	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1126} /* ixgbe_rsc_count */
1127
1128/************************************************************************
1129 * ixgbe_setup_hw_rsc
1130 *
1131 *   Initialize Hardware RSC (LRO) feature on 82599
1132 *   for an RX ring, this is toggled by the LRO capability
1133 *   even though it is transparent to the stack.
1134 *
1135 *   NOTE: Since this HW feature only works with IPv4 and
1136 *         testing has shown soft LRO to be as effective,
1137 *         this feature will be disabled by default.
1138 ************************************************************************/
1139static void
1140ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1141{
1142	struct adapter  *adapter = rxr->adapter;
1143	struct ixgbe_hw *hw = &adapter->hw;
1144	u32             rscctrl, rdrxctl;
1145
1146	/* If turning LRO/RSC off we need to disable it */
1147	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1148		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1149		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1150		return;
1151	}
1152
1153	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1154	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1155#ifdef DEV_NETMAP
1156	/* Always strip CRC unless Netmap disabled it */
1157	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1158	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1159	    ix_crcstrip)
1160#endif /* DEV_NETMAP */
1161		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1162	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1163	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1164
1165	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1166	rscctrl |= IXGBE_RSCCTL_RSCEN;
1167	/*
1168	 * Limit the total number of descriptors that
1169	 * can be combined, so it does not exceed 64K
1170	 */
1171	if (rxr->mbuf_sz == MCLBYTES)
1172		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1173	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1174		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1175	else if (rxr->mbuf_sz == MJUM9BYTES)
1176		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1177	else  /* Using 16K cluster */
1178		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1179
1180	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1181
1182	/* Enable TCP header recognition */
1183	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1184	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1185
1186	/* Disable RSC for ACK packets */
1187	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1188	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1189
1190	rxr->hw_rsc = TRUE;
1191} /* ixgbe_setup_hw_rsc */
1192
1193/************************************************************************
1194 * ixgbe_refresh_mbufs
1195 *
1196 *   Refresh mbuf buffers for RX descriptor rings
1197 *    - now keeps its own state so discards due to resource
1198 *      exhaustion are unnecessary, if an mbuf cannot be obtained
1199 *      it just returns, keeping its placeholder, thus it can simply
1200 *      be recalled to try again.
1201 ************************************************************************/
1202static void
1203ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1204{
1205	struct adapter      *adapter = rxr->adapter;
1206	struct ixgbe_rx_buf *rxbuf;
1207	struct mbuf         *mp;
1208	bus_dma_segment_t   seg[1];
1209	int                 i, j, nsegs, error;
1210	bool                refreshed = FALSE;
1211
1212	i = j = rxr->next_to_refresh;
1213	/* Control the loop with one beyond */
1214	if (++j == rxr->num_desc)
1215		j = 0;
1216
1217	while (j != limit) {
1218		rxbuf = &rxr->rx_buffers[i];
1219		if (rxbuf->buf == NULL) {
1220			mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1221			    rxr->mbuf_sz);
1222			if (mp == NULL)
1223				goto update;
1224			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1225				m_adj(mp, ETHER_ALIGN);
1226		} else
1227			mp = rxbuf->buf;
1228
1229		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1230
1231		/* If we're dealing with an mbuf that was copied rather
1232		 * than replaced, there's no need to go through busdma.
1233		 */
1234		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1235			/* Get the memory mapping */
1236			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1237			error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1238			    mp, seg, &nsegs, BUS_DMA_NOWAIT);
1239			if (error != 0) {
1240				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1241				m_free(mp);
1242				rxbuf->buf = NULL;
1243				goto update;
1244			}
1245			rxbuf->buf = mp;
1246			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1247			    BUS_DMASYNC_PREREAD);
1248			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1249			    htole64(seg[0].ds_addr);
1250		} else {
1251			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1252			rxbuf->flags &= ~IXGBE_RX_COPY;
1253		}
1254
1255		refreshed = TRUE;
1256		/* Next is precalculated */
1257		i = j;
1258		rxr->next_to_refresh = i;
1259		if (++j == rxr->num_desc)
1260			j = 0;
1261	}
1262
1263update:
1264	if (refreshed) /* Update hardware tail index */
1265		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1266
1267	return;
1268} /* ixgbe_refresh_mbufs */
1269
1270/************************************************************************
1271 * ixgbe_allocate_receive_buffers
1272 *
1273 *   Allocate memory for rx_buffer structures. Since we use one
1274 *   rx_buffer per received packet, the maximum number of rx_buffer's
1275 *   that we'll need is equal to the number of receive descriptors
1276 *   that we've allocated.
1277 ************************************************************************/
1278static int
1279ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1280{
1281	struct adapter      *adapter = rxr->adapter;
1282	device_t            dev = adapter->dev;
1283	struct ixgbe_rx_buf *rxbuf;
1284	int                 bsize, error;
1285
1286	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1287	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXGBE,
1288	    M_NOWAIT | M_ZERO);
1289	if (!rxr->rx_buffers) {
1290		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1291		error = ENOMEM;
1292		goto fail;
1293	}
1294
1295	error = bus_dma_tag_create(
1296	         /*      parent */ bus_get_dma_tag(dev),
1297	         /*   alignment */ 1,
1298	         /*      bounds */ 0,
1299	         /*     lowaddr */ BUS_SPACE_MAXADDR,
1300	         /*    highaddr */ BUS_SPACE_MAXADDR,
1301	         /*      filter */ NULL,
1302	         /*   filterarg */ NULL,
1303	         /*     maxsize */ MJUM16BYTES,
1304	         /*   nsegments */ 1,
1305	         /*  maxsegsize */ MJUM16BYTES,
1306	         /*       flags */ 0,
1307	         /*    lockfunc */ NULL,
1308	         /* lockfuncarg */ NULL,
1309	                           &rxr->ptag);
1310	if (error) {
1311		device_printf(dev, "Unable to create RX DMA tag\n");
1312		goto fail;
1313	}
1314
1315	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1316		rxbuf = &rxr->rx_buffers[i];
1317		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1318		if (error) {
1319			device_printf(dev, "Unable to create RX dma map\n");
1320			goto fail;
1321		}
1322	}
1323
1324	return (0);
1325
1326fail:
1327	/* Frees all, but can handle partial completion */
1328	ixgbe_free_receive_structures(adapter);
1329
1330	return (error);
1331} /* ixgbe_allocate_receive_buffers */
1332
1333/************************************************************************
1334 * ixgbe_free_receive_ring
1335 ************************************************************************/
1336static void
1337ixgbe_free_receive_ring(struct rx_ring *rxr)
1338{
1339	struct ixgbe_rx_buf *rxbuf;
1340
1341	for (int i = 0; i < rxr->num_desc; i++) {
1342		rxbuf = &rxr->rx_buffers[i];
1343		if (rxbuf->buf != NULL) {
1344			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1345			    BUS_DMASYNC_POSTREAD);
1346			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1347			rxbuf->buf->m_flags |= M_PKTHDR;
1348			m_freem(rxbuf->buf);
1349			rxbuf->buf = NULL;
1350			rxbuf->flags = 0;
1351		}
1352	}
1353} /* ixgbe_free_receive_ring */
1354
1355/************************************************************************
1356 * ixgbe_setup_receive_ring
1357 *
1358 *   Initialize a receive ring and its buffers.
1359 ************************************************************************/
1360static int
1361ixgbe_setup_receive_ring(struct rx_ring *rxr)
1362{
1363	struct adapter        *adapter;
1364	struct ifnet          *ifp;
1365	device_t              dev;
1366	struct ixgbe_rx_buf   *rxbuf;
1367	struct lro_ctrl       *lro = &rxr->lro;
1368#ifdef DEV_NETMAP
1369	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1370	struct netmap_slot    *slot;
1371#endif /* DEV_NETMAP */
1372	bus_dma_segment_t     seg[1];
1373	int                   rsize, nsegs, error = 0;
1374
1375	adapter = rxr->adapter;
1376	ifp = adapter->ifp;
1377	dev = adapter->dev;
1378
1379	/* Clear the ring contents */
1380	IXGBE_RX_LOCK(rxr);
1381
1382#ifdef DEV_NETMAP
1383	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1384		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1385#endif /* DEV_NETMAP */
1386
1387	rsize = roundup2(adapter->num_rx_desc *
1388	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1389	bzero((void *)rxr->rx_base, rsize);
1390	/* Cache the size */
1391	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1392
1393	/* Free current RX buffer structs and their mbufs */
1394	ixgbe_free_receive_ring(rxr);
1395
1396	/* Now replenish the mbufs */
1397	for (int j = 0; j != rxr->num_desc; ++j) {
1398		struct mbuf *mp;
1399
1400		rxbuf = &rxr->rx_buffers[j];
1401
1402#ifdef DEV_NETMAP
1403		/*
1404		 * In netmap mode, fill the map and set the buffer
1405		 * address in the NIC ring, considering the offset
1406		 * between the netmap and NIC rings (see comment in
1407		 * ixgbe_setup_transmit_ring() ). No need to allocate
1408		 * an mbuf, so end the block with a continue;
1409		 */
1410		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1411			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1412			uint64_t paddr;
1413			void *addr;
1414
1415			addr = PNMB(na, slot + sj, &paddr);
1416			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1417			/* Update descriptor and the cached value */
1418			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1419			rxbuf->addr = htole64(paddr);
1420			continue;
1421		}
1422#endif /* DEV_NETMAP */
1423
1424		rxbuf->flags = 0;
1425		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1426		    adapter->rx_mbuf_sz);
1427		if (rxbuf->buf == NULL) {
1428			error = ENOBUFS;
1429			goto fail;
1430		}
1431		mp = rxbuf->buf;
1432		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1433		/* Get the memory mapping */
1434		error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1435		    &nsegs, BUS_DMA_NOWAIT);
1436		if (error != 0)
1437			goto fail;
1438		bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1439		/* Update the descriptor and the cached value */
1440		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1441		rxbuf->addr = htole64(seg[0].ds_addr);
1442	}
1443
1444
1445	/* Setup our descriptor indices */
1446	rxr->next_to_check = 0;
1447	rxr->next_to_refresh = 0;
1448	rxr->lro_enabled = FALSE;
1449	rxr->rx_copies = 0;
1450	rxr->rx_bytes = 0;
1451	rxr->vtag_strip = FALSE;
1452
1453	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1454	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1455
1456	/*
1457	 * Now set up the LRO interface
1458	 */
1459	if (ixgbe_rsc_enable)
1460		ixgbe_setup_hw_rsc(rxr);
1461	else if (ifp->if_capenable & IFCAP_LRO) {
1462		int err = tcp_lro_init(lro);
1463		if (err) {
1464			device_printf(dev, "LRO Initialization failed!\n");
1465			goto fail;
1466		}
1467		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1468		rxr->lro_enabled = TRUE;
1469		lro->ifp = adapter->ifp;
1470	}
1471
1472	IXGBE_RX_UNLOCK(rxr);
1473
1474	return (0);
1475
1476fail:
1477	ixgbe_free_receive_ring(rxr);
1478	IXGBE_RX_UNLOCK(rxr);
1479
1480	return (error);
1481} /* ixgbe_setup_receive_ring */
1482
1483/************************************************************************
1484 * ixgbe_setup_receive_structures - Initialize all receive rings.
1485 ************************************************************************/
1486int
1487ixgbe_setup_receive_structures(struct adapter *adapter)
1488{
1489	struct rx_ring *rxr = adapter->rx_rings;
1490	int            j;
1491
1492	for (j = 0; j < adapter->num_queues; j++, rxr++)
1493		if (ixgbe_setup_receive_ring(rxr))
1494			goto fail;
1495
1496	return (0);
1497fail:
1498	/*
1499	 * Free RX buffers allocated so far, we will only handle
1500	 * the rings that completed, the failing case will have
1501	 * cleaned up for itself. 'j' failed, so its the terminus.
1502	 */
1503	for (int i = 0; i < j; ++i) {
1504		rxr = &adapter->rx_rings[i];
1505		ixgbe_free_receive_ring(rxr);
1506	}
1507
1508	return (ENOBUFS);
1509} /* ixgbe_setup_receive_structures */
1510
1511
1512/************************************************************************
1513 * ixgbe_free_receive_structures - Free all receive rings.
1514 ************************************************************************/
1515void
1516ixgbe_free_receive_structures(struct adapter *adapter)
1517{
1518	struct rx_ring *rxr = adapter->rx_rings;
1519	struct lro_ctrl *lro;
1520
1521	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1522
1523	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1524		lro = &rxr->lro;
1525		ixgbe_free_receive_buffers(rxr);
1526		/* Free LRO memory */
1527		tcp_lro_free(lro);
1528		/* Free the ring memory as well */
1529		ixgbe_dma_free(adapter, &rxr->rxdma);
1530	}
1531
1532	free(adapter->rx_rings, M_IXGBE);
1533} /* ixgbe_free_receive_structures */
1534
1535
1536/************************************************************************
1537 * ixgbe_free_receive_buffers - Free receive ring data structures
1538 ************************************************************************/
1539void
1540ixgbe_free_receive_buffers(struct rx_ring *rxr)
1541{
1542	struct adapter      *adapter = rxr->adapter;
1543	struct ixgbe_rx_buf *rxbuf;
1544
1545	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1546
1547	/* Cleanup any existing buffers */
1548	if (rxr->rx_buffers != NULL) {
1549		for (int i = 0; i < adapter->num_rx_desc; i++) {
1550			rxbuf = &rxr->rx_buffers[i];
1551			if (rxbuf->buf != NULL) {
1552				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1553				    BUS_DMASYNC_POSTREAD);
1554				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1555				rxbuf->buf->m_flags |= M_PKTHDR;
1556				m_freem(rxbuf->buf);
1557			}
1558			rxbuf->buf = NULL;
1559			if (rxbuf->pmap != NULL) {
1560				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1561				rxbuf->pmap = NULL;
1562			}
1563		}
1564		if (rxr->rx_buffers != NULL) {
1565			free(rxr->rx_buffers, M_IXGBE);
1566			rxr->rx_buffers = NULL;
1567		}
1568	}
1569
1570	if (rxr->ptag != NULL) {
1571		bus_dma_tag_destroy(rxr->ptag);
1572		rxr->ptag = NULL;
1573	}
1574
1575	return;
1576} /* ixgbe_free_receive_buffers */
1577
1578/************************************************************************
1579 * ixgbe_rx_input
1580 ************************************************************************/
1581static __inline void
1582ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1583    u32 ptype)
1584{
1585	/*
1586	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1587	 * should be computed by hardware. Also it should not have VLAN tag in
1588	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1589	 */
1590	if (rxr->lro_enabled &&
1591	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1592	    (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1593	    ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1594	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1595	     (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1596	     (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1597	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1598	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1599		/*
1600		 * Send to the stack if:
1601		 *  - LRO not enabled, or
1602		 *  - no LRO resources, or
1603		 *  - lro enqueue fails
1604		 */
1605		if (rxr->lro.lro_cnt != 0)
1606			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1607				return;
1608	}
1609	IXGBE_RX_UNLOCK(rxr);
1610	(*ifp->if_input)(ifp, m);
1611	IXGBE_RX_LOCK(rxr);
1612} /* ixgbe_rx_input */
1613
1614/************************************************************************
1615 * ixgbe_rx_discard
1616 ************************************************************************/
1617static __inline void
1618ixgbe_rx_discard(struct rx_ring *rxr, int i)
1619{
1620	struct ixgbe_rx_buf *rbuf;
1621
1622	rbuf = &rxr->rx_buffers[i];
1623
1624	/*
1625	 * With advanced descriptors the writeback
1626	 * clobbers the buffer addrs, so its easier
1627	 * to just free the existing mbufs and take
1628	 * the normal refresh path to get new buffers
1629	 * and mapping.
1630	 */
1631
1632	if (rbuf->fmp != NULL) {/* Partial chain ? */
1633		rbuf->fmp->m_flags |= M_PKTHDR;
1634		m_freem(rbuf->fmp);
1635		rbuf->fmp = NULL;
1636		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1637	} else if (rbuf->buf) {
1638		m_free(rbuf->buf);
1639		rbuf->buf = NULL;
1640	}
1641	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1642
1643	rbuf->flags = 0;
1644
1645	return;
1646} /* ixgbe_rx_discard */
1647
1648
1649/************************************************************************
1650 * ixgbe_rxeof
1651 *
1652 *   This routine executes in interrupt context. It replenishes
1653 *   the mbufs in the descriptor and sends data which has been
1654 *   dma'ed into host memory to upper layer.
1655 *
1656 *   Return TRUE for more work, FALSE for all clean.
1657 ************************************************************************/
1658bool
1659ixgbe_rxeof(struct ix_queue *que)
1660{
1661	struct adapter          *adapter = que->adapter;
1662	struct rx_ring          *rxr = que->rxr;
1663	struct ifnet            *ifp = adapter->ifp;
1664	struct lro_ctrl         *lro = &rxr->lro;
1665#if __FreeBSD_version < 1100105
1666	struct lro_entry        *queued;
1667#endif
1668	union ixgbe_adv_rx_desc *cur;
1669	struct ixgbe_rx_buf     *rbuf, *nbuf;
1670	int                     i, nextp, processed = 0;
1671	u32                     staterr = 0;
1672	u32                     count = adapter->rx_process_limit;
1673	u16                     pkt_info;
1674
1675	IXGBE_RX_LOCK(rxr);
1676
1677#ifdef DEV_NETMAP
1678	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1679		/* Same as the txeof routine: wakeup clients on intr. */
1680		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1681			IXGBE_RX_UNLOCK(rxr);
1682			return (FALSE);
1683		}
1684	}
1685#endif /* DEV_NETMAP */
1686
1687	for (i = rxr->next_to_check; count != 0;) {
1688		struct mbuf *sendmp, *mp;
1689		u32         rsc, ptype;
1690		u16         len;
1691		u16         vtag = 0;
1692		bool        eop;
1693
1694		/* Sync the ring. */
1695		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1696		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1697
1698		cur = &rxr->rx_base[i];
1699		staterr = le32toh(cur->wb.upper.status_error);
1700		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1701
1702		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1703			break;
1704		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1705			break;
1706
1707		count--;
1708		sendmp = NULL;
1709		nbuf = NULL;
1710		rsc = 0;
1711		cur->wb.upper.status_error = 0;
1712		rbuf = &rxr->rx_buffers[i];
1713		mp = rbuf->buf;
1714
1715		len = le16toh(cur->wb.upper.length);
1716		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1717		    IXGBE_RXDADV_PKTTYPE_MASK;
1718		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1719
1720		/* Make sure bad packets are discarded */
1721		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1722			rxr->rx_discarded++;
1723			ixgbe_rx_discard(rxr, i);
1724			goto next_desc;
1725		}
1726
1727		/*
1728		 * On 82599 which supports a hardware
1729		 * LRO (called HW RSC), packets need
1730		 * not be fragmented across sequential
1731		 * descriptors, rather the next descriptor
1732		 * is indicated in bits of the descriptor.
1733		 * This also means that we might proceses
1734		 * more than one packet at a time, something
1735		 * that has never been true before, it
1736		 * required eliminating global chain pointers
1737		 * in favor of what we are doing here.  -jfv
1738		 */
1739		if (!eop) {
1740			/*
1741			 * Figure out the next descriptor
1742			 * of this frame.
1743			 */
1744			if (rxr->hw_rsc == TRUE) {
1745				rsc = ixgbe_rsc_count(cur);
1746				rxr->rsc_num += (rsc - 1);
1747			}
1748			if (rsc) { /* Get hardware index */
1749				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1750				    IXGBE_RXDADV_NEXTP_SHIFT);
1751			} else { /* Just sequential */
1752				nextp = i + 1;
1753				if (nextp == adapter->num_rx_desc)
1754					nextp = 0;
1755			}
1756			nbuf = &rxr->rx_buffers[nextp];
1757			prefetch(nbuf);
1758		}
1759		/*
1760		 * Rather than using the fmp/lmp global pointers
1761		 * we now keep the head of a packet chain in the
1762		 * buffer struct and pass this along from one
1763		 * descriptor to the next, until we get EOP.
1764		 */
1765		mp->m_len = len;
1766		/*
1767		 * See if there is a stored head
1768		 * that determines what we are
1769		 */
1770		sendmp = rbuf->fmp;
1771		if (sendmp != NULL) {  /* secondary frag */
1772			rbuf->buf = rbuf->fmp = NULL;
1773			mp->m_flags &= ~M_PKTHDR;
1774			sendmp->m_pkthdr.len += mp->m_len;
1775		} else {
1776			/*
1777			 * Optimize.  This might be a small packet,
1778			 * maybe just a TCP ACK.  Do a fast copy that
1779			 * is cache aligned into a new mbuf, and
1780			 * leave the old mbuf+cluster for re-use.
1781			 */
1782			if (eop && len <= IXGBE_RX_COPY_LEN) {
1783				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1784				if (sendmp != NULL) {
1785					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1786					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1787					    len);
1788					sendmp->m_len = len;
1789					rxr->rx_copies++;
1790					rbuf->flags |= IXGBE_RX_COPY;
1791				}
1792			}
1793			if (sendmp == NULL) {
1794				rbuf->buf = rbuf->fmp = NULL;
1795				sendmp = mp;
1796			}
1797
1798			/* first desc of a non-ps chain */
1799			sendmp->m_flags |= M_PKTHDR;
1800			sendmp->m_pkthdr.len = mp->m_len;
1801		}
1802		++processed;
1803
1804		/* Pass the head pointer on */
1805		if (eop == 0) {
1806			nbuf->fmp = sendmp;
1807			sendmp = NULL;
1808			mp->m_next = nbuf->buf;
1809		} else { /* Sending this frame */
1810			sendmp->m_pkthdr.rcvif = ifp;
1811			rxr->rx_packets++;
1812			/* capture data for AIM */
1813			rxr->bytes += sendmp->m_pkthdr.len;
1814			rxr->rx_bytes += sendmp->m_pkthdr.len;
1815			/* Process vlan info */
1816			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1817				vtag = le16toh(cur->wb.upper.vlan);
1818			if (vtag) {
1819				sendmp->m_pkthdr.ether_vtag = vtag;
1820				sendmp->m_flags |= M_VLANTAG;
1821			}
1822			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1823				ixgbe_rx_checksum(staterr, sendmp, ptype);
1824
1825			/*
1826			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1827			 * and never cleared. This means we have RSS hash
1828			 * available to be used.
1829			 */
1830			if (adapter->num_queues > 1) {
1831				sendmp->m_pkthdr.flowid =
1832				    le32toh(cur->wb.lower.hi_dword.rss);
1833				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1834				case IXGBE_RXDADV_RSSTYPE_IPV4:
1835					M_HASHTYPE_SET(sendmp,
1836					    M_HASHTYPE_RSS_IPV4);
1837					break;
1838				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1839					M_HASHTYPE_SET(sendmp,
1840					    M_HASHTYPE_RSS_TCP_IPV4);
1841					break;
1842				case IXGBE_RXDADV_RSSTYPE_IPV6:
1843					M_HASHTYPE_SET(sendmp,
1844					    M_HASHTYPE_RSS_IPV6);
1845					break;
1846				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1847					M_HASHTYPE_SET(sendmp,
1848					    M_HASHTYPE_RSS_TCP_IPV6);
1849					break;
1850				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1851					M_HASHTYPE_SET(sendmp,
1852					    M_HASHTYPE_RSS_IPV6_EX);
1853					break;
1854				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1855					M_HASHTYPE_SET(sendmp,
1856					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1857					break;
1858#if __FreeBSD_version > 1100000
1859				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1860					M_HASHTYPE_SET(sendmp,
1861					    M_HASHTYPE_RSS_UDP_IPV4);
1862					break;
1863				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1864					M_HASHTYPE_SET(sendmp,
1865					    M_HASHTYPE_RSS_UDP_IPV6);
1866					break;
1867				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1868					M_HASHTYPE_SET(sendmp,
1869					    M_HASHTYPE_RSS_UDP_IPV6_EX);
1870					break;
1871#endif
1872				default:
1873#if __FreeBSD_version < 1100116
1874					M_HASHTYPE_SET(sendmp,
1875					    M_HASHTYPE_OPAQUE);
1876#else
1877					M_HASHTYPE_SET(sendmp,
1878					    M_HASHTYPE_OPAQUE_HASH);
1879#endif
1880				}
1881			} else {
1882				sendmp->m_pkthdr.flowid = que->msix;
1883				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1884			}
1885		}
1886next_desc:
1887		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1888		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1889
1890		/* Advance our pointers to the next descriptor. */
1891		if (++i == rxr->num_desc)
1892			i = 0;
1893
1894		/* Now send to the stack or do LRO */
1895		if (sendmp != NULL) {
1896			rxr->next_to_check = i;
1897			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1898			i = rxr->next_to_check;
1899		}
1900
1901		/* Every 8 descriptors we go to refresh mbufs */
1902		if (processed == 8) {
1903			ixgbe_refresh_mbufs(rxr, i);
1904			processed = 0;
1905		}
1906	}
1907
1908	/* Refresh any remaining buf structs */
1909	if (ixgbe_rx_unrefreshed(rxr))
1910		ixgbe_refresh_mbufs(rxr, i);
1911
1912	rxr->next_to_check = i;
1913
1914	/*
1915	 * Flush any outstanding LRO work
1916	 */
1917#if __FreeBSD_version < 1100105
1918	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1919		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1920		tcp_lro_flush(lro, queued);
1921	}
1922#else
1923	tcp_lro_flush_all(lro);
1924#endif
1925
1926	IXGBE_RX_UNLOCK(rxr);
1927
1928	/*
1929	 * Still have cleaning to do?
1930	 */
1931	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1932		return (TRUE);
1933
1934	return (FALSE);
1935} /* ixgbe_rxeof */
1936
1937
1938/************************************************************************
1939 * ixgbe_rx_checksum
1940 *
1941 *   Verify that the hardware indicated that the checksum is valid.
1942 *   Inform the stack about the status of checksum so that stack
1943 *   doesn't spend time verifying the checksum.
1944 ************************************************************************/
1945static void
1946ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1947{
1948	u16  status = (u16)staterr;
1949	u8   errors = (u8)(staterr >> 24);
1950	bool sctp = false;
1951
1952	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1953	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1954		sctp = true;
1955
1956	/* IPv4 checksum */
1957	if (status & IXGBE_RXD_STAT_IPCS) {
1958		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1959		/* IP Checksum Good */
1960		if (!(errors & IXGBE_RXD_ERR_IPE))
1961			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1962	}
1963	/* TCP/UDP/SCTP checksum */
1964	if (status & IXGBE_RXD_STAT_L4CS) {
1965		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1966		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1967			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1968			if (!sctp)
1969				mp->m_pkthdr.csum_data = htons(0xffff);
1970		}
1971	}
1972} /* ixgbe_rx_checksum */
1973
1974/************************************************************************
1975 * ixgbe_dmamap_cb - Manage DMA'able memory.
1976 ************************************************************************/
1977static void
1978ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1979{
1980	if (error)
1981		return;
1982	*(bus_addr_t *)arg = segs->ds_addr;
1983
1984	return;
1985} /* ixgbe_dmamap_cb */
1986
1987/************************************************************************
1988 * ixgbe_dma_malloc
1989 ************************************************************************/
1990static int
1991ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1992                 struct ixgbe_dma_alloc *dma, int mapflags)
1993{
1994	device_t dev = adapter->dev;
1995	int      r;
1996
1997	r = bus_dma_tag_create(
1998	     /*      parent */ bus_get_dma_tag(adapter->dev),
1999	     /*   alignment */ DBA_ALIGN,
2000	     /*      bounds */ 0,
2001	     /*     lowaddr */ BUS_SPACE_MAXADDR,
2002	     /*    highaddr */ BUS_SPACE_MAXADDR,
2003	     /*      filter */ NULL,
2004	     /*   filterarg */ NULL,
2005	     /*     maxsize */ size,
2006	     /*   nsegments */ 1,
2007	     /*  maxsegsize */ size,
2008	     /*       flags */ BUS_DMA_ALLOCNOW,
2009	     /*    lockfunc */ NULL,
2010	     /* lockfuncarg */ NULL,
2011	                       &dma->dma_tag);
2012	if (r != 0) {
2013		device_printf(dev,
2014		    "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
2015		    r);
2016		goto fail_0;
2017	}
2018	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2019	    BUS_DMA_NOWAIT, &dma->dma_map);
2020	if (r != 0) {
2021		device_printf(dev,
2022		    "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2023		goto fail_1;
2024	}
2025	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2026	    ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2027	if (r != 0) {
2028		device_printf(dev,
2029		    "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2030		goto fail_2;
2031	}
2032	dma->dma_size = size;
2033
2034	return (0);
2035fail_2:
2036	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2037fail_1:
2038	bus_dma_tag_destroy(dma->dma_tag);
2039fail_0:
2040	dma->dma_tag = NULL;
2041
2042	return (r);
2043} /* ixgbe_dma_malloc */
2044
2045static void
2046ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2047{
2048	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2049	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2050	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2051	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2052	bus_dma_tag_destroy(dma->dma_tag);
2053} /* ixgbe_dma_free */
2054
2055
2056/************************************************************************
2057 * ixgbe_allocate_queues
2058 *
2059 *   Allocate memory for the transmit and receive rings, and then
2060 *   the descriptors associated with each, called only once at attach.
2061 ************************************************************************/
2062int
2063ixgbe_allocate_queues(struct adapter *adapter)
2064{
2065	device_t        dev = adapter->dev;
2066	struct ix_queue *que;
2067	struct tx_ring  *txr;
2068	struct rx_ring  *rxr;
2069	int             rsize, tsize, error = IXGBE_SUCCESS;
2070	int             txconf = 0, rxconf = 0;
2071
2072	/* First, allocate the top level queue structs */
2073	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2074	    adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2075	if (!adapter->queues) {
2076		device_printf(dev, "Unable to allocate queue memory\n");
2077		error = ENOMEM;
2078		goto fail;
2079	}
2080
2081	/* Second, allocate the TX ring struct memory */
2082	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2083	    adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2084	if (!adapter->tx_rings) {
2085		device_printf(dev, "Unable to allocate TX ring memory\n");
2086		error = ENOMEM;
2087		goto tx_fail;
2088	}
2089
2090	/* Third, allocate the RX ring */
2091	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2092	    adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2093	if (!adapter->rx_rings) {
2094		device_printf(dev, "Unable to allocate RX ring memory\n");
2095		error = ENOMEM;
2096		goto rx_fail;
2097	}
2098
2099	/* For the ring itself */
2100	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2101	    DBA_ALIGN);
2102
2103	/*
2104	 * Now set up the TX queues, txconf is needed to handle the
2105	 * possibility that things fail midcourse and we need to
2106	 * undo memory gracefully
2107	 */
2108	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2109		/* Set up some basics */
2110		txr = &adapter->tx_rings[i];
2111		txr->adapter = adapter;
2112		txr->br = NULL;
2113		/* In case SR-IOV is enabled, align the index properly */
2114		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2115		    i);
2116		txr->num_desc = adapter->num_tx_desc;
2117
2118		/* Initialize the TX side lock */
2119		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2120		    device_get_nameunit(dev), txr->me);
2121		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2122
2123		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2124		    BUS_DMA_NOWAIT)) {
2125			device_printf(dev,
2126			    "Unable to allocate TX Descriptor memory\n");
2127			error = ENOMEM;
2128			goto err_tx_desc;
2129		}
2130		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2131		bzero((void *)txr->tx_base, tsize);
2132
2133		/* Now allocate transmit buffers for the ring */
2134		if (ixgbe_allocate_transmit_buffers(txr)) {
2135			device_printf(dev,
2136			    "Critical Failure setting up transmit buffers\n");
2137			error = ENOMEM;
2138			goto err_tx_desc;
2139		}
2140		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2141			/* Allocate a buf ring */
2142			txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXGBE,
2143			    M_WAITOK, &txr->tx_mtx);
2144			if (txr->br == NULL) {
2145				device_printf(dev,
2146				    "Critical Failure setting up buf ring\n");
2147				error = ENOMEM;
2148				goto err_tx_desc;
2149			}
2150		}
2151	}
2152
2153	/*
2154	 * Next the RX queues...
2155	 */
2156	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2157	    DBA_ALIGN);
2158	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2159		rxr = &adapter->rx_rings[i];
2160		/* Set up some basics */
2161		rxr->adapter = adapter;
2162		/* In case SR-IOV is enabled, align the index properly */
2163		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2164		    i);
2165		rxr->num_desc = adapter->num_rx_desc;
2166
2167		/* Initialize the RX side lock */
2168		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2169		    device_get_nameunit(dev), rxr->me);
2170		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2171
2172		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2173		    BUS_DMA_NOWAIT)) {
2174			device_printf(dev,
2175			    "Unable to allocate RxDescriptor memory\n");
2176			error = ENOMEM;
2177			goto err_rx_desc;
2178		}
2179		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2180		bzero((void *)rxr->rx_base, rsize);
2181
2182		/* Allocate receive buffers for the ring */
2183		if (ixgbe_allocate_receive_buffers(rxr)) {
2184			device_printf(dev,
2185			    "Critical Failure setting up receive buffers\n");
2186			error = ENOMEM;
2187			goto err_rx_desc;
2188		}
2189	}
2190
2191	/*
2192	 * Finally set up the queue holding structs
2193	 */
2194	for (int i = 0; i < adapter->num_queues; i++) {
2195		que = &adapter->queues[i];
2196		que->adapter = adapter;
2197		que->me = i;
2198		que->txr = &adapter->tx_rings[i];
2199		que->rxr = &adapter->rx_rings[i];
2200	}
2201
2202	return (0);
2203
2204err_rx_desc:
2205	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2206		ixgbe_dma_free(adapter, &rxr->rxdma);
2207err_tx_desc:
2208	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2209		ixgbe_dma_free(adapter, &txr->txdma);
2210	free(adapter->rx_rings, M_IXGBE);
2211rx_fail:
2212	free(adapter->tx_rings, M_IXGBE);
2213tx_fail:
2214	free(adapter->queues, M_IXGBE);
2215fail:
2216	return (error);
2217} /* ixgbe_allocate_queues */
2218