1/******************************************************************************
2
3  Copyright (c) 2001-2017, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/ixgbe/ix_txrx.c 341477 2018-12-04 17:40:56Z vmaffione $*/
34
35
36#ifndef IXGBE_STANDALONE_BUILD
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#include "opt_rss.h"
40#endif
41
42#include "ixgbe.h"
43
44/*
45 * HW RSC control:
46 *  this feature only works with
47 *  IPv4, and only on 82599 and later.
48 *  Also this will cause IP forwarding to
49 *  fail and that can't be controlled by
50 *  the stack as LRO can. For all these
51 *  reasons I've deemed it best to leave
52 *  this off and not bother with a tuneable
53 *  interface, this would need to be compiled
54 *  to enable.
55 */
56static bool ixgbe_rsc_enable = FALSE;
57
58/*
59 * For Flow Director: this is the
60 * number of TX packets we sample
61 * for the filter pool, this means
62 * every 20th packet will be probed.
63 *
64 * This feature can be disabled by
65 * setting this to 0.
66 */
67static int atr_sample_rate = 20;
68
69/************************************************************************
70 *  Local Function prototypes
71 ************************************************************************/
72static void          ixgbe_setup_transmit_ring(struct tx_ring *);
73static void          ixgbe_free_transmit_buffers(struct tx_ring *);
74static int           ixgbe_setup_receive_ring(struct rx_ring *);
75static void          ixgbe_free_receive_buffers(struct rx_ring *);
76static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
77static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
78static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
79static int           ixgbe_tx_ctx_setup(struct tx_ring *,
80                                        struct mbuf *, u32 *, u32 *);
81static int           ixgbe_tso_setup(struct tx_ring *,
82                                     struct mbuf *, u32 *, u32 *);
83static __inline void ixgbe_rx_discard(struct rx_ring *, int);
84static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
85                                    struct mbuf *, u32);
86static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
87                                      struct ixgbe_dma_alloc *, int);
88static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
89
90/************************************************************************
91 * ixgbe_legacy_start_locked - Transmit entry point
92 *
93 *   Called by the stack to initiate a transmit.
94 *   The driver will remain in this routine as long as there are
95 *   packets to transmit and transmit resources are available.
96 *   In case resources are not available, the stack is notified
97 *   and the packet is requeued.
98 ************************************************************************/
99int
100ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
101{
102	struct mbuf    *m_head;
103	struct adapter *adapter = txr->adapter;
104
105	IXGBE_TX_LOCK_ASSERT(txr);
106
107	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
108		return (ENETDOWN);
109	if (!adapter->link_active)
110		return (ENETDOWN);
111
112	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
113		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
114			break;
115
116		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
117		if (m_head == NULL)
118			break;
119
120		if (ixgbe_xmit(txr, &m_head)) {
121			if (m_head != NULL)
122				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
123			break;
124		}
125		/* Send a copy of the frame to the BPF listener */
126		ETHER_BPF_MTAP(ifp, m_head);
127	}
128
129	return IXGBE_SUCCESS;
130} /* ixgbe_legacy_start_locked */
131
132/************************************************************************
133 * ixgbe_legacy_start
134 *
135 *   Called by the stack, this always uses the first tx ring,
136 *   and should not be used with multiqueue tx enabled.
137 ************************************************************************/
138void
139ixgbe_legacy_start(struct ifnet *ifp)
140{
141	struct adapter *adapter = ifp->if_softc;
142	struct tx_ring *txr = adapter->tx_rings;
143
144	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
145		IXGBE_TX_LOCK(txr);
146		ixgbe_legacy_start_locked(ifp, txr);
147		IXGBE_TX_UNLOCK(txr);
148	}
149} /* ixgbe_legacy_start */
150
151/************************************************************************
152 * ixgbe_mq_start - Multiqueue Transmit Entry Point
153 *
154 *   (if_transmit function)
155 ************************************************************************/
156int
157ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
158{
159	struct adapter  *adapter = ifp->if_softc;
160	struct ix_queue *que;
161	struct tx_ring  *txr;
162	int             i, err = 0;
163	uint32_t        bucket_id;
164
165	/*
166	 * When doing RSS, map it to the same outbound queue
167	 * as the incoming flow would be mapped to.
168	 *
169	 * If everything is setup correctly, it should be the
170	 * same bucket that the current CPU we're on is.
171	 */
172	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
173		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
174		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
175		    &bucket_id) == 0)) {
176			i = bucket_id % adapter->num_queues;
177#ifdef IXGBE_DEBUG
178			if (bucket_id > adapter->num_queues)
179				if_printf(ifp,
180				    "bucket_id (%d) > num_queues (%d)\n",
181				    bucket_id, adapter->num_queues);
182#endif
183		} else
184			i = m->m_pkthdr.flowid % adapter->num_queues;
185	} else
186		i = curcpu % adapter->num_queues;
187
188	/* Check for a hung queue and pick alternative */
189	if (((1 << i) & adapter->active_queues) == 0)
190		i = ffsl(adapter->active_queues);
191
192	txr = &adapter->tx_rings[i];
193	que = &adapter->queues[i];
194
195	err = drbr_enqueue(ifp, txr->br, m);
196	if (err)
197		return (err);
198	if (IXGBE_TX_TRYLOCK(txr)) {
199		ixgbe_mq_start_locked(ifp, txr);
200		IXGBE_TX_UNLOCK(txr);
201	} else
202		taskqueue_enqueue(que->tq, &txr->txq_task);
203
204	return (0);
205} /* ixgbe_mq_start */
206
207/************************************************************************
208 * ixgbe_mq_start_locked
209 ************************************************************************/
210int
211ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
212{
213	struct mbuf    *next;
214	int            enqueued = 0, err = 0;
215
216	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
217		return (ENETDOWN);
218	if (txr->adapter->link_active == 0)
219		return (ENETDOWN);
220
221	/* Process the queue */
222#if __FreeBSD_version < 901504
223	next = drbr_dequeue(ifp, txr->br);
224	while (next != NULL) {
225		if ((err = ixgbe_xmit(txr, &next)) != 0) {
226			if (next != NULL)
227				err = drbr_enqueue(ifp, txr->br, next);
228#else
229	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
230		err = ixgbe_xmit(txr, &next);
231		if (err != 0) {
232			if (next == NULL)
233				drbr_advance(ifp, txr->br);
234			else
235				drbr_putback(ifp, txr->br, next);
236#endif
237			break;
238		}
239#if __FreeBSD_version >= 901504
240		drbr_advance(ifp, txr->br);
241#endif
242		enqueued++;
243#if __FreeBSD_version >= 1100036
244		/*
245		 * Since we're looking at the tx ring, we can check
246		 * to see if we're a VF by examing our tail register
247		 * address.
248		 */
249		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
250		    (next->m_flags & M_MCAST))
251			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
252#endif
253		/* Send a copy of the frame to the BPF listener */
254		ETHER_BPF_MTAP(ifp, next);
255		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
256			break;
257#if __FreeBSD_version < 901504
258		next = drbr_dequeue(ifp, txr->br);
259#endif
260	}
261
262	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
263		ixgbe_txeof(txr);
264
265	return (err);
266} /* ixgbe_mq_start_locked */
267
268/************************************************************************
269 * ixgbe_deferred_mq_start
270 *
271 *   Called from a taskqueue to drain queued transmit packets.
272 ************************************************************************/
273void
274ixgbe_deferred_mq_start(void *arg, int pending)
275{
276	struct tx_ring *txr = arg;
277	struct adapter *adapter = txr->adapter;
278	struct ifnet   *ifp = adapter->ifp;
279
280	IXGBE_TX_LOCK(txr);
281	if (!drbr_empty(ifp, txr->br))
282		ixgbe_mq_start_locked(ifp, txr);
283	IXGBE_TX_UNLOCK(txr);
284} /* ixgbe_deferred_mq_start */
285
286/************************************************************************
287 * ixgbe_qflush - Flush all ring buffers
288 ************************************************************************/
289void
290ixgbe_qflush(struct ifnet *ifp)
291{
292	struct adapter *adapter = ifp->if_softc;
293	struct tx_ring *txr = adapter->tx_rings;
294	struct mbuf    *m;
295
296	for (int i = 0; i < adapter->num_queues; i++, txr++) {
297		IXGBE_TX_LOCK(txr);
298		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
299			m_freem(m);
300		IXGBE_TX_UNLOCK(txr);
301	}
302	if_qflush(ifp);
303} /* ixgbe_qflush */
304
305
306/************************************************************************
307 * ixgbe_xmit
308 *
309 *   Maps the mbufs to tx descriptors, allowing the
310 *   TX engine to transmit the packets.
311 *
312 *   Return 0 on success, positive on failure
313 ************************************************************************/
314static int
315ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
316{
317	struct adapter          *adapter = txr->adapter;
318	struct ixgbe_tx_buf     *txbuf;
319	union ixgbe_adv_tx_desc *txd = NULL;
320	struct mbuf             *m_head;
321	int                     i, j, error, nsegs;
322	int                     first;
323	u32                     olinfo_status = 0, cmd_type_len;
324	bool                    remap = TRUE;
325	bus_dma_segment_t       segs[adapter->num_segs];
326	bus_dmamap_t            map;
327
328	m_head = *m_headp;
329
330	/* Basic descriptor defines */
331	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
332	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
333
334	if (m_head->m_flags & M_VLANTAG)
335		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
336
337	/*
338	 * Important to capture the first descriptor
339	 * used because it will contain the index of
340	 * the one we tell the hardware to report back
341	 */
342	first = txr->next_avail_desc;
343	txbuf = &txr->tx_buffers[first];
344	map = txbuf->map;
345
346	/*
347	 * Map the packet for DMA.
348	 */
349retry:
350	error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
351	    &nsegs, BUS_DMA_NOWAIT);
352
353	if (__predict_false(error)) {
354		struct mbuf *m;
355
356		switch (error) {
357		case EFBIG:
358			/* Try it again? - one try */
359			if (remap == TRUE) {
360				remap = FALSE;
361				/*
362				 * XXX: m_defrag will choke on
363				 * non-MCLBYTES-sized clusters
364				 */
365				m = m_defrag(*m_headp, M_NOWAIT);
366				if (m == NULL) {
367					adapter->mbuf_defrag_failed++;
368					m_freem(*m_headp);
369					*m_headp = NULL;
370					return (ENOBUFS);
371				}
372				*m_headp = m;
373				goto retry;
374			} else
375				return (error);
376		case ENOMEM:
377			txr->no_tx_dma_setup++;
378			return (error);
379		default:
380			txr->no_tx_dma_setup++;
381			m_freem(*m_headp);
382			*m_headp = NULL;
383			return (error);
384		}
385	}
386
387	/* Make certain there are enough descriptors */
388	if (txr->tx_avail < (nsegs + 2)) {
389		txr->no_desc_avail++;
390		bus_dmamap_unload(txr->txtag, map);
391		return (ENOBUFS);
392	}
393	m_head = *m_headp;
394
395	/*
396	 * Set up the appropriate offload context
397	 * this will consume the first descriptor
398	 */
399	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
400	if (__predict_false(error)) {
401		if (error == ENOBUFS)
402			*m_headp = NULL;
403		return (error);
404	}
405
406	/* Do the flow director magic */
407	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
408	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
409		++txr->atr_count;
410		if (txr->atr_count >= atr_sample_rate) {
411			ixgbe_atr(txr, m_head);
412			txr->atr_count = 0;
413		}
414	}
415
416	olinfo_status |= IXGBE_ADVTXD_CC;
417	i = txr->next_avail_desc;
418	for (j = 0; j < nsegs; j++) {
419		bus_size_t seglen;
420		bus_addr_t segaddr;
421
422		txbuf = &txr->tx_buffers[i];
423		txd = &txr->tx_base[i];
424		seglen = segs[j].ds_len;
425		segaddr = htole64(segs[j].ds_addr);
426
427		txd->read.buffer_addr = segaddr;
428		txd->read.cmd_type_len = htole32(txr->txd_cmd |
429		    cmd_type_len | seglen);
430		txd->read.olinfo_status = htole32(olinfo_status);
431
432		if (++i == txr->num_desc)
433			i = 0;
434	}
435
436	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
437	txr->tx_avail -= nsegs;
438	txr->next_avail_desc = i;
439
440	txbuf->m_head = m_head;
441	/*
442	 * Here we swap the map so the last descriptor,
443	 * which gets the completion interrupt has the
444	 * real map, and the first descriptor gets the
445	 * unused map from this descriptor.
446	 */
447	txr->tx_buffers[first].map = txbuf->map;
448	txbuf->map = map;
449	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
450
451	/* Set the EOP descriptor that will be marked done */
452	txbuf = &txr->tx_buffers[first];
453	txbuf->eop = txd;
454
455	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
456	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
457	/*
458	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
459	 * hardware that this frame is available to transmit.
460	 */
461	++txr->total_packets;
462	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
463
464	/* Mark queue as having work */
465	if (txr->busy == 0)
466		txr->busy = 1;
467
468	return (0);
469} /* ixgbe_xmit */
470
471
472/************************************************************************
473 * ixgbe_allocate_transmit_buffers
474 *
475 *   Allocate memory for tx_buffer structures. The tx_buffer stores all
476 *   the information needed to transmit a packet on the wire. This is
477 *   called only once at attach, setup is done every reset.
478 ************************************************************************/
479static int
480ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
481{
482	struct adapter      *adapter = txr->adapter;
483	device_t            dev = adapter->dev;
484	struct ixgbe_tx_buf *txbuf;
485	int                 error, i;
486
487	/*
488	 * Setup DMA descriptor areas.
489	 */
490	error = bus_dma_tag_create(
491	         /*      parent */ bus_get_dma_tag(adapter->dev),
492	         /*   alignment */ 1,
493	         /*      bounds */ 0,
494	         /*     lowaddr */ BUS_SPACE_MAXADDR,
495	         /*    highaddr */ BUS_SPACE_MAXADDR,
496	         /*      filter */ NULL,
497	         /*   filterarg */ NULL,
498	         /*     maxsize */ IXGBE_TSO_SIZE,
499	         /*   nsegments */ adapter->num_segs,
500	         /*  maxsegsize */ PAGE_SIZE,
501	         /*       flags */ 0,
502	         /*    lockfunc */ NULL,
503	         /* lockfuncarg */ NULL,
504	                           &txr->txtag);
505	if (error != 0) {
506		device_printf(dev, "Unable to allocate TX DMA tag\n");
507		goto fail;
508	}
509
510	txr->tx_buffers =
511	    (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
512	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
513	if (txr->tx_buffers == NULL) {
514		device_printf(dev, "Unable to allocate tx_buffer memory\n");
515		error = ENOMEM;
516		goto fail;
517	}
518
519	/* Create the descriptor buffer dma maps */
520	txbuf = txr->tx_buffers;
521	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
522		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
523		if (error != 0) {
524			device_printf(dev, "Unable to create TX DMA map\n");
525			goto fail;
526		}
527	}
528
529	return 0;
530fail:
531	/* We free all, it handles case where we are in the middle */
532	ixgbe_free_transmit_structures(adapter);
533
534	return (error);
535} /* ixgbe_allocate_transmit_buffers */
536
537/************************************************************************
538 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
539 ************************************************************************/
540static void
541ixgbe_setup_transmit_ring(struct tx_ring *txr)
542{
543	struct adapter        *adapter = txr->adapter;
544	struct ixgbe_tx_buf   *txbuf;
545#ifdef DEV_NETMAP
546	struct netmap_adapter *na = NA(adapter->ifp);
547	struct netmap_slot    *slot;
548#endif /* DEV_NETMAP */
549
550	/* Clear the old ring contents */
551	IXGBE_TX_LOCK(txr);
552
553#ifdef DEV_NETMAP
554	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
555		/*
556		 * (under lock): if in netmap mode, do some consistency
557		 * checks and set slot to entry 0 of the netmap ring.
558		 */
559		slot = netmap_reset(na, NR_TX, txr->me, 0);
560	}
561#endif /* DEV_NETMAP */
562
563	bzero((void *)txr->tx_base,
564	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
565	/* Reset indices */
566	txr->next_avail_desc = 0;
567	txr->next_to_clean = 0;
568
569	/* Free any existing tx buffers. */
570	txbuf = txr->tx_buffers;
571	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
572		if (txbuf->m_head != NULL) {
573			bus_dmamap_sync(txr->txtag, txbuf->map,
574			    BUS_DMASYNC_POSTWRITE);
575			bus_dmamap_unload(txr->txtag, txbuf->map);
576			m_freem(txbuf->m_head);
577			txbuf->m_head = NULL;
578		}
579
580#ifdef DEV_NETMAP
581		/*
582		 * In netmap mode, set the map for the packet buffer.
583		 * NOTE: Some drivers (not this one) also need to set
584		 * the physical buffer address in the NIC ring.
585		 * Slots in the netmap ring (indexed by "si") are
586		 * kring->nkr_hwofs positions "ahead" wrt the
587		 * corresponding slot in the NIC ring. In some drivers
588		 * (not here) nkr_hwofs can be negative. Function
589		 * netmap_idx_n2k() handles wraparounds properly.
590		 */
591		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
592			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
593			netmap_load_map(na, txr->txtag,
594			    txbuf->map, NMB(na, slot + si));
595		}
596#endif /* DEV_NETMAP */
597
598		/* Clear the EOP descriptor pointer */
599		txbuf->eop = NULL;
600	}
601
602	/* Set the rate at which we sample packets */
603	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
604		txr->atr_sample = atr_sample_rate;
605
606	/* Set number of descriptors available */
607	txr->tx_avail = adapter->num_tx_desc;
608
609	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
610	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
611	IXGBE_TX_UNLOCK(txr);
612} /* ixgbe_setup_transmit_ring */
613
614/************************************************************************
615 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
616 ************************************************************************/
617int
618ixgbe_setup_transmit_structures(struct adapter *adapter)
619{
620	struct tx_ring *txr = adapter->tx_rings;
621
622	for (int i = 0; i < adapter->num_queues; i++, txr++)
623		ixgbe_setup_transmit_ring(txr);
624
625	return (0);
626} /* ixgbe_setup_transmit_structures */
627
628/************************************************************************
629 * ixgbe_free_transmit_structures - Free all transmit rings.
630 ************************************************************************/
631void
632ixgbe_free_transmit_structures(struct adapter *adapter)
633{
634	struct tx_ring *txr = adapter->tx_rings;
635
636	for (int i = 0; i < adapter->num_queues; i++, txr++) {
637		IXGBE_TX_LOCK(txr);
638		ixgbe_free_transmit_buffers(txr);
639		ixgbe_dma_free(adapter, &txr->txdma);
640		IXGBE_TX_UNLOCK(txr);
641		IXGBE_TX_LOCK_DESTROY(txr);
642	}
643	free(adapter->tx_rings, M_DEVBUF);
644} /* ixgbe_free_transmit_structures */
645
646/************************************************************************
647 * ixgbe_free_transmit_buffers
648 *
649 *   Free transmit ring related data structures.
650 ************************************************************************/
651static void
652ixgbe_free_transmit_buffers(struct tx_ring *txr)
653{
654	struct adapter      *adapter = txr->adapter;
655	struct ixgbe_tx_buf *tx_buffer;
656	int                 i;
657
658	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
659
660	if (txr->tx_buffers == NULL)
661		return;
662
663	tx_buffer = txr->tx_buffers;
664	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
665		if (tx_buffer->m_head != NULL) {
666			bus_dmamap_sync(txr->txtag, tx_buffer->map,
667			    BUS_DMASYNC_POSTWRITE);
668			bus_dmamap_unload(txr->txtag, tx_buffer->map);
669			m_freem(tx_buffer->m_head);
670			tx_buffer->m_head = NULL;
671			if (tx_buffer->map != NULL) {
672				bus_dmamap_destroy(txr->txtag, tx_buffer->map);
673				tx_buffer->map = NULL;
674			}
675		} else if (tx_buffer->map != NULL) {
676			bus_dmamap_unload(txr->txtag, tx_buffer->map);
677			bus_dmamap_destroy(txr->txtag, tx_buffer->map);
678			tx_buffer->map = NULL;
679		}
680	}
681	if (txr->br != NULL)
682		buf_ring_free(txr->br, M_DEVBUF);
683	if (txr->tx_buffers != NULL) {
684		free(txr->tx_buffers, M_DEVBUF);
685		txr->tx_buffers = NULL;
686	}
687	if (txr->txtag != NULL) {
688		bus_dma_tag_destroy(txr->txtag);
689		txr->txtag = NULL;
690	}
691} /* ixgbe_free_transmit_buffers */
692
693/************************************************************************
694 * ixgbe_tx_ctx_setup
695 *
696 *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
697 ************************************************************************/
698static int
699ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
700    u32 *cmd_type_len, u32 *olinfo_status)
701{
702	struct ixgbe_adv_tx_context_desc *TXD;
703	struct ether_vlan_header         *eh;
704#ifdef INET
705	struct ip                        *ip;
706#endif
707#ifdef INET6
708	struct ip6_hdr                   *ip6;
709#endif
710	int                              ehdrlen, ip_hlen = 0;
711	int                              offload = TRUE;
712	int                              ctxd = txr->next_avail_desc;
713	u32                              vlan_macip_lens = 0;
714	u32                              type_tucmd_mlhl = 0;
715	u16                              vtag = 0;
716	u16                              etype;
717	u8                               ipproto = 0;
718	caddr_t                          l3d;
719
720
721	/* First check if TSO is to be used */
722	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
723		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
724
725	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
726		offload = FALSE;
727
728	/* Indicate the whole packet as payload when not doing TSO */
729	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
730
731	/* Now ready a context descriptor */
732	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
733
734	/*
735	 * In advanced descriptors the vlan tag must
736	 * be placed into the context descriptor. Hence
737	 * we need to make one even if not doing offloads.
738	 */
739	if (mp->m_flags & M_VLANTAG) {
740		vtag = htole16(mp->m_pkthdr.ether_vtag);
741		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
742	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
743	           (offload == FALSE))
744		return (0);
745
746	/*
747	 * Determine where frame payload starts.
748	 * Jump over vlan headers if already present,
749	 * helpful for QinQ too.
750	 */
751	eh = mtod(mp, struct ether_vlan_header *);
752	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
753		etype = ntohs(eh->evl_proto);
754		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
755	} else {
756		etype = ntohs(eh->evl_encap_proto);
757		ehdrlen = ETHER_HDR_LEN;
758	}
759
760	/* Set the ether header length */
761	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
762
763	if (offload == FALSE)
764		goto no_offloads;
765
766	/*
767	 * If the first mbuf only includes the ethernet header,
768	 * jump to the next one
769	 * XXX: This assumes the stack splits mbufs containing headers
770	 *      on header boundaries
771	 * XXX: And assumes the entire IP header is contained in one mbuf
772	 */
773	if (mp->m_len == ehdrlen && mp->m_next)
774		l3d = mtod(mp->m_next, caddr_t);
775	else
776		l3d = mtod(mp, caddr_t) + ehdrlen;
777
778	switch (etype) {
779#ifdef INET
780		case ETHERTYPE_IP:
781			ip = (struct ip *)(l3d);
782			ip_hlen = ip->ip_hl << 2;
783			ipproto = ip->ip_p;
784			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
785			/* Insert IPv4 checksum into data descriptors */
786			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
787				ip->ip_sum = 0;
788				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
789			}
790			break;
791#endif
792#ifdef INET6
793		case ETHERTYPE_IPV6:
794			ip6 = (struct ip6_hdr *)(l3d);
795			ip_hlen = sizeof(struct ip6_hdr);
796			ipproto = ip6->ip6_nxt;
797			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
798			break;
799#endif
800		default:
801			offload = FALSE;
802			break;
803	}
804
805	vlan_macip_lens |= ip_hlen;
806
807	/* No support for offloads for non-L4 next headers */
808	switch (ipproto) {
809		case IPPROTO_TCP:
810			if (mp->m_pkthdr.csum_flags &
811			    (CSUM_IP_TCP | CSUM_IP6_TCP))
812				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
813			else
814				offload = false;
815			break;
816		case IPPROTO_UDP:
817			if (mp->m_pkthdr.csum_flags &
818			    (CSUM_IP_UDP | CSUM_IP6_UDP))
819				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
820			else
821				offload = false;
822			break;
823		case IPPROTO_SCTP:
824			if (mp->m_pkthdr.csum_flags &
825			    (CSUM_IP_SCTP | CSUM_IP6_SCTP))
826				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
827			else
828				offload = false;
829			break;
830		default:
831			offload = false;
832			break;
833	}
834
835	if (offload) /* Insert L4 checksum into data descriptors */
836		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
837
838no_offloads:
839	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
840
841	/* Now copy bits into descriptor */
842	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
843	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
844	TXD->seqnum_seed = htole32(0);
845	TXD->mss_l4len_idx = htole32(0);
846
847	/* We've consumed the first desc, adjust counters */
848	if (++ctxd == txr->num_desc)
849		ctxd = 0;
850	txr->next_avail_desc = ctxd;
851	--txr->tx_avail;
852
853	return (0);
854} /* ixgbe_tx_ctx_setup */
855
856/************************************************************************
857 * ixgbe_tso_setup
858 *
859 *   Setup work for hardware segmentation offload (TSO) on
860 *   adapters using advanced tx descriptors
861 ************************************************************************/
862static int
863ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
864    u32 *olinfo_status)
865{
866	struct ixgbe_adv_tx_context_desc *TXD;
867	struct ether_vlan_header         *eh;
868#ifdef INET6
869	struct ip6_hdr                   *ip6;
870#endif
871#ifdef INET
872	struct ip                        *ip;
873#endif
874	struct tcphdr                    *th;
875	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
876	u32                              vlan_macip_lens = 0;
877	u32                              type_tucmd_mlhl = 0;
878	u32                              mss_l4len_idx = 0, paylen;
879	u16                              vtag = 0, eh_type;
880
881	/*
882	 * Determine where frame payload starts.
883	 * Jump over vlan headers if already present
884	 */
885	eh = mtod(mp, struct ether_vlan_header *);
886	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
887		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888		eh_type = eh->evl_proto;
889	} else {
890		ehdrlen = ETHER_HDR_LEN;
891		eh_type = eh->evl_encap_proto;
892	}
893
894	switch (ntohs(eh_type)) {
895#ifdef INET
896	case ETHERTYPE_IP:
897		ip = (struct ip *)(mp->m_data + ehdrlen);
898		if (ip->ip_p != IPPROTO_TCP)
899			return (ENXIO);
900		ip->ip_sum = 0;
901		ip_hlen = ip->ip_hl << 2;
902		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
903		th->th_sum = in_pseudo(ip->ip_src.s_addr,
904		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
905		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
906		/* Tell transmit desc to also do IPv4 checksum. */
907		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
908		break;
909#endif
910#ifdef INET6
911	case ETHERTYPE_IPV6:
912		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
913		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
914		if (ip6->ip6_nxt != IPPROTO_TCP)
915			return (ENXIO);
916		ip_hlen = sizeof(struct ip6_hdr);
917		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
918		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
919		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
920		break;
921#endif
922	default:
923		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
924		    __func__, ntohs(eh_type));
925		break;
926	}
927
928	ctxd = txr->next_avail_desc;
929	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
930
931	tcp_hlen = th->th_off << 2;
932
933	/* This is used in the transmit desc in encap */
934	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
935
936	/* VLAN MACLEN IPLEN */
937	if (mp->m_flags & M_VLANTAG) {
938		vtag = htole16(mp->m_pkthdr.ether_vtag);
939		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
940	}
941
942	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
943	vlan_macip_lens |= ip_hlen;
944	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
945
946	/* ADV DTYPE TUCMD */
947	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
948	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
949	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
950
951	/* MSS L4LEN IDX */
952	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
953	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
954	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
955
956	TXD->seqnum_seed = htole32(0);
957
958	if (++ctxd == txr->num_desc)
959		ctxd = 0;
960
961	txr->tx_avail--;
962	txr->next_avail_desc = ctxd;
963	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
964	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
965	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
966	++txr->tso_tx;
967
968	return (0);
969} /* ixgbe_tso_setup */
970
971
972/************************************************************************
973 * ixgbe_txeof
974 *
975 *   Examine each tx_buffer in the used queue. If the hardware is done
976 *   processing the packet then free associated resources. The
977 *   tx_buffer is put back on the free queue.
978 ************************************************************************/
979void
980ixgbe_txeof(struct tx_ring *txr)
981{
982	struct adapter          *adapter = txr->adapter;
983	struct ixgbe_tx_buf     *buf;
984	union ixgbe_adv_tx_desc *txd;
985	u32                     work, processed = 0;
986	u32                     limit = adapter->tx_process_limit;
987
988	mtx_assert(&txr->tx_mtx, MA_OWNED);
989
990#ifdef DEV_NETMAP
991	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
992	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
993		struct netmap_adapter *na = NA(adapter->ifp);
994		struct netmap_kring *kring = na->tx_rings[txr->me];
995		txd = txr->tx_base;
996		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997		    BUS_DMASYNC_POSTREAD);
998		/*
999		 * In netmap mode, all the work is done in the context
1000		 * of the client thread. Interrupt handlers only wake up
1001		 * clients, which may be sleeping on individual rings
1002		 * or on a global resource for all rings.
1003		 * To implement tx interrupt mitigation, we wake up the client
1004		 * thread roughly every half ring, even if the NIC interrupts
1005		 * more frequently. This is implemented as follows:
1006		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1007		 *   the slot that should wake up the thread (nkr_num_slots
1008		 *   means the user thread should not be woken up);
1009		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1010		 *   or the slot has the DD bit set.
1011		 */
1012		if (!netmap_mitigate ||
1013		    (kring->nr_kflags < kring->nkr_num_slots &&
1014		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1015			netmap_tx_irq(adapter->ifp, txr->me);
1016		}
1017		return;
1018	}
1019#endif /* DEV_NETMAP */
1020
1021	if (txr->tx_avail == txr->num_desc) {
1022		txr->busy = 0;
1023		return;
1024	}
1025
1026	/* Get work starting point */
1027	work = txr->next_to_clean;
1028	buf = &txr->tx_buffers[work];
1029	txd = &txr->tx_base[work];
1030	work -= txr->num_desc; /* The distance to ring end */
1031	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032	    BUS_DMASYNC_POSTREAD);
1033
1034	do {
1035		union ixgbe_adv_tx_desc *eop = buf->eop;
1036		if (eop == NULL) /* No work */
1037			break;
1038
1039		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1040			break;	/* I/O not complete */
1041
1042		if (buf->m_head) {
1043			txr->bytes += buf->m_head->m_pkthdr.len;
1044			bus_dmamap_sync(txr->txtag, buf->map,
1045			    BUS_DMASYNC_POSTWRITE);
1046			bus_dmamap_unload(txr->txtag, buf->map);
1047			m_freem(buf->m_head);
1048			buf->m_head = NULL;
1049		}
1050		buf->eop = NULL;
1051		++txr->tx_avail;
1052
1053		/* We clean the range if multi segment */
1054		while (txd != eop) {
1055			++txd;
1056			++buf;
1057			++work;
1058			/* wrap the ring? */
1059			if (__predict_false(!work)) {
1060				work -= txr->num_desc;
1061				buf = txr->tx_buffers;
1062				txd = txr->tx_base;
1063			}
1064			if (buf->m_head) {
1065				txr->bytes += buf->m_head->m_pkthdr.len;
1066				bus_dmamap_sync(txr->txtag, buf->map,
1067				    BUS_DMASYNC_POSTWRITE);
1068				bus_dmamap_unload(txr->txtag, buf->map);
1069				m_freem(buf->m_head);
1070				buf->m_head = NULL;
1071			}
1072			++txr->tx_avail;
1073			buf->eop = NULL;
1074
1075		}
1076		++txr->packets;
1077		++processed;
1078
1079		/* Try the next packet */
1080		++txd;
1081		++buf;
1082		++work;
1083		/* reset with a wrap */
1084		if (__predict_false(!work)) {
1085			work -= txr->num_desc;
1086			buf = txr->tx_buffers;
1087			txd = txr->tx_base;
1088		}
1089		prefetch(txd);
1090	} while (__predict_true(--limit));
1091
1092	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094
1095	work += txr->num_desc;
1096	txr->next_to_clean = work;
1097
1098	/*
1099	 * Queue Hang detection, we know there's
1100	 * work outstanding or the first return
1101	 * would have been taken, so increment busy
1102	 * if nothing managed to get cleaned, then
1103	 * in local_timer it will be checked and
1104	 * marked as HUNG if it exceeds a MAX attempt.
1105	 */
1106	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107		++txr->busy;
1108	/*
1109	 * If anything gets cleaned we reset state to 1,
1110	 * note this will turn off HUNG if its set.
1111	 */
1112	if (processed)
1113		txr->busy = 1;
1114
1115	if (txr->tx_avail == txr->num_desc)
1116		txr->busy = 0;
1117
1118	return;
1119} /* ixgbe_txeof */
1120
1121/************************************************************************
1122 * ixgbe_rsc_count
1123 *
1124 *   Used to detect a descriptor that has been merged by Hardware RSC.
1125 ************************************************************************/
1126static inline u32
1127ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1128{
1129	return (le32toh(rx->wb.lower.lo_dword.data) &
1130	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1131} /* ixgbe_rsc_count */
1132
1133/************************************************************************
1134 * ixgbe_setup_hw_rsc
1135 *
1136 *   Initialize Hardware RSC (LRO) feature on 82599
1137 *   for an RX ring, this is toggled by the LRO capability
1138 *   even though it is transparent to the stack.
1139 *
1140 *   NOTE: Since this HW feature only works with IPv4 and
1141 *         testing has shown soft LRO to be as effective,
1142 *         this feature will be disabled by default.
1143 ************************************************************************/
1144static void
1145ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1146{
1147	struct adapter  *adapter = rxr->adapter;
1148	struct ixgbe_hw *hw = &adapter->hw;
1149	u32             rscctrl, rdrxctl;
1150
1151	/* If turning LRO/RSC off we need to disable it */
1152	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1153		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1154		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1155		return;
1156	}
1157
1158	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1159	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1160#ifdef DEV_NETMAP
1161	/* Always strip CRC unless Netmap disabled it */
1162	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1163	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1164	    ix_crcstrip)
1165#endif /* DEV_NETMAP */
1166		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1167	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1168	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1169
1170	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1171	rscctrl |= IXGBE_RSCCTL_RSCEN;
1172	/*
1173	 * Limit the total number of descriptors that
1174	 * can be combined, so it does not exceed 64K
1175	 */
1176	if (rxr->mbuf_sz == MCLBYTES)
1177		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1178	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1179		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1180	else if (rxr->mbuf_sz == MJUM9BYTES)
1181		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1182	else  /* Using 16K cluster */
1183		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1184
1185	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1186
1187	/* Enable TCP header recognition */
1188	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1189	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1190
1191	/* Disable RSC for ACK packets */
1192	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1193	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1194
1195	rxr->hw_rsc = TRUE;
1196} /* ixgbe_setup_hw_rsc */
1197
1198/************************************************************************
1199 * ixgbe_refresh_mbufs
1200 *
1201 *   Refresh mbuf buffers for RX descriptor rings
1202 *    - now keeps its own state so discards due to resource
1203 *      exhaustion are unnecessary, if an mbuf cannot be obtained
1204 *      it just returns, keeping its placeholder, thus it can simply
1205 *      be recalled to try again.
1206 ************************************************************************/
1207static void
1208ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1209{
1210	struct adapter      *adapter = rxr->adapter;
1211	struct ixgbe_rx_buf *rxbuf;
1212	struct mbuf         *mp;
1213	bus_dma_segment_t   seg[1];
1214	int                 i, j, nsegs, error;
1215	bool                refreshed = FALSE;
1216
1217	i = j = rxr->next_to_refresh;
1218	/* Control the loop with one beyond */
1219	if (++j == rxr->num_desc)
1220		j = 0;
1221
1222	while (j != limit) {
1223		rxbuf = &rxr->rx_buffers[i];
1224		if (rxbuf->buf == NULL) {
1225			mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1226			    rxr->mbuf_sz);
1227			if (mp == NULL)
1228				goto update;
1229			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1230				m_adj(mp, ETHER_ALIGN);
1231		} else
1232			mp = rxbuf->buf;
1233
1234		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1235
1236		/* If we're dealing with an mbuf that was copied rather
1237		 * than replaced, there's no need to go through busdma.
1238		 */
1239		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1240			/* Get the memory mapping */
1241			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1242			error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1243			    mp, seg, &nsegs, BUS_DMA_NOWAIT);
1244			if (error != 0) {
1245				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1246				m_free(mp);
1247				rxbuf->buf = NULL;
1248				goto update;
1249			}
1250			rxbuf->buf = mp;
1251			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1252			    BUS_DMASYNC_PREREAD);
1253			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1254			    htole64(seg[0].ds_addr);
1255		} else {
1256			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1257			rxbuf->flags &= ~IXGBE_RX_COPY;
1258		}
1259
1260		refreshed = TRUE;
1261		/* Next is precalculated */
1262		i = j;
1263		rxr->next_to_refresh = i;
1264		if (++j == rxr->num_desc)
1265			j = 0;
1266	}
1267
1268update:
1269	if (refreshed) /* Update hardware tail index */
1270		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1271
1272	return;
1273} /* ixgbe_refresh_mbufs */
1274
1275/************************************************************************
1276 * ixgbe_allocate_receive_buffers
1277 *
1278 *   Allocate memory for rx_buffer structures. Since we use one
1279 *   rx_buffer per received packet, the maximum number of rx_buffer's
1280 *   that we'll need is equal to the number of receive descriptors
1281 *   that we've allocated.
1282 ************************************************************************/
1283static int
1284ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1285{
1286	struct adapter      *adapter = rxr->adapter;
1287	device_t            dev = adapter->dev;
1288	struct ixgbe_rx_buf *rxbuf;
1289	int                 bsize, error;
1290
1291	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1292	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1293	    M_NOWAIT | M_ZERO);
1294	if (rxr->rx_buffers == NULL) {
1295		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1296		error = ENOMEM;
1297		goto fail;
1298	}
1299
1300	error = bus_dma_tag_create(
1301	         /*      parent */ bus_get_dma_tag(dev),
1302	         /*   alignment */ 1,
1303	         /*      bounds */ 0,
1304	         /*     lowaddr */ BUS_SPACE_MAXADDR,
1305	         /*    highaddr */ BUS_SPACE_MAXADDR,
1306	         /*      filter */ NULL,
1307	         /*   filterarg */ NULL,
1308	         /*     maxsize */ MJUM16BYTES,
1309	         /*   nsegments */ 1,
1310	         /*  maxsegsize */ MJUM16BYTES,
1311	         /*       flags */ 0,
1312	         /*    lockfunc */ NULL,
1313	         /* lockfuncarg */ NULL,
1314	                           &rxr->ptag);
1315	if (error != 0) {
1316		device_printf(dev, "Unable to create RX DMA tag\n");
1317		goto fail;
1318	}
1319
1320	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1321		rxbuf = &rxr->rx_buffers[i];
1322		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1323		if (error) {
1324			device_printf(dev, "Unable to create RX dma map\n");
1325			goto fail;
1326		}
1327	}
1328
1329	return (0);
1330
1331fail:
1332	/* Frees all, but can handle partial completion */
1333	ixgbe_free_receive_structures(adapter);
1334
1335	return (error);
1336} /* ixgbe_allocate_receive_buffers */
1337
1338/************************************************************************
1339 * ixgbe_free_receive_ring
1340 ************************************************************************/
1341static void
1342ixgbe_free_receive_ring(struct rx_ring *rxr)
1343{
1344	for (int i = 0; i < rxr->num_desc; i++) {
1345		ixgbe_rx_discard(rxr, i);
1346	}
1347} /* ixgbe_free_receive_ring */
1348
1349/************************************************************************
1350 * ixgbe_setup_receive_ring
1351 *
1352 *   Initialize a receive ring and its buffers.
1353 ************************************************************************/
1354static int
1355ixgbe_setup_receive_ring(struct rx_ring *rxr)
1356{
1357	struct adapter        *adapter;
1358	struct ifnet          *ifp;
1359	device_t              dev;
1360	struct ixgbe_rx_buf   *rxbuf;
1361	struct lro_ctrl       *lro = &rxr->lro;
1362#ifdef DEV_NETMAP
1363	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1364	struct netmap_slot    *slot;
1365#endif /* DEV_NETMAP */
1366	bus_dma_segment_t     seg[1];
1367	int                   rsize, nsegs, error = 0;
1368
1369	adapter = rxr->adapter;
1370	ifp = adapter->ifp;
1371	dev = adapter->dev;
1372
1373	/* Clear the ring contents */
1374	IXGBE_RX_LOCK(rxr);
1375
1376#ifdef DEV_NETMAP
1377	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1378		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1379#endif /* DEV_NETMAP */
1380
1381	rsize = roundup2(adapter->num_rx_desc *
1382	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1383	bzero((void *)rxr->rx_base, rsize);
1384	/* Cache the size */
1385	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1386
1387	/* Free current RX buffer structs and their mbufs */
1388	ixgbe_free_receive_ring(rxr);
1389
1390	/* Now replenish the mbufs */
1391	for (int j = 0; j != rxr->num_desc; ++j) {
1392		struct mbuf *mp;
1393
1394		rxbuf = &rxr->rx_buffers[j];
1395
1396#ifdef DEV_NETMAP
1397		/*
1398		 * In netmap mode, fill the map and set the buffer
1399		 * address in the NIC ring, considering the offset
1400		 * between the netmap and NIC rings (see comment in
1401		 * ixgbe_setup_transmit_ring() ). No need to allocate
1402		 * an mbuf, so end the block with a continue;
1403		 */
1404		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1405			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1406			uint64_t paddr;
1407			void *addr;
1408
1409			addr = PNMB(na, slot + sj, &paddr);
1410			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1411			/* Update descriptor and the cached value */
1412			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1413			rxbuf->addr = htole64(paddr);
1414			continue;
1415		}
1416#endif /* DEV_NETMAP */
1417
1418		rxbuf->flags = 0;
1419		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1420		    adapter->rx_mbuf_sz);
1421		if (rxbuf->buf == NULL) {
1422			error = ENOBUFS;
1423			goto fail;
1424		}
1425		mp = rxbuf->buf;
1426		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1427		/* Get the memory mapping */
1428		error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1429		    &nsegs, BUS_DMA_NOWAIT);
1430		if (error != 0)
1431			goto fail;
1432		bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1433		/* Update the descriptor and the cached value */
1434		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1435		rxbuf->addr = htole64(seg[0].ds_addr);
1436	}
1437
1438
1439	/* Setup our descriptor indices */
1440	rxr->next_to_check = 0;
1441	rxr->next_to_refresh = 0;
1442	rxr->lro_enabled = FALSE;
1443	rxr->rx_copies = 0;
1444	rxr->rx_bytes = 0;
1445	rxr->vtag_strip = FALSE;
1446
1447	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1448	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1449
1450	/*
1451	 * Now set up the LRO interface
1452	 */
1453	if (ixgbe_rsc_enable)
1454		ixgbe_setup_hw_rsc(rxr);
1455	else if (ifp->if_capenable & IFCAP_LRO) {
1456		int err = tcp_lro_init(lro);
1457		if (err) {
1458			device_printf(dev, "LRO Initialization failed!\n");
1459			goto fail;
1460		}
1461		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1462		rxr->lro_enabled = TRUE;
1463		lro->ifp = adapter->ifp;
1464	}
1465
1466	IXGBE_RX_UNLOCK(rxr);
1467
1468	return (0);
1469
1470fail:
1471	ixgbe_free_receive_ring(rxr);
1472	IXGBE_RX_UNLOCK(rxr);
1473
1474	return (error);
1475} /* ixgbe_setup_receive_ring */
1476
1477/************************************************************************
1478 * ixgbe_setup_receive_structures - Initialize all receive rings.
1479 ************************************************************************/
1480int
1481ixgbe_setup_receive_structures(struct adapter *adapter)
1482{
1483	struct rx_ring *rxr = adapter->rx_rings;
1484	int            j;
1485
1486	for (j = 0; j < adapter->num_queues; j++, rxr++)
1487		if (ixgbe_setup_receive_ring(rxr))
1488			goto fail;
1489
1490	return (0);
1491fail:
1492	/*
1493	 * Free RX buffers allocated so far, we will only handle
1494	 * the rings that completed, the failing case will have
1495	 * cleaned up for itself. 'j' failed, so its the terminus.
1496	 */
1497	for (int i = 0; i < j; ++i) {
1498		rxr = &adapter->rx_rings[i];
1499		IXGBE_RX_LOCK(rxr);
1500		ixgbe_free_receive_ring(rxr);
1501		IXGBE_RX_UNLOCK(rxr);
1502	}
1503
1504	return (ENOBUFS);
1505} /* ixgbe_setup_receive_structures */
1506
1507
1508/************************************************************************
1509 * ixgbe_free_receive_structures - Free all receive rings.
1510 ************************************************************************/
1511void
1512ixgbe_free_receive_structures(struct adapter *adapter)
1513{
1514	struct rx_ring *rxr = adapter->rx_rings;
1515
1516	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1517
1518	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1519		ixgbe_free_receive_buffers(rxr);
1520		/* Free LRO memory */
1521		tcp_lro_free(&rxr->lro);
1522		/* Free the ring memory as well */
1523		ixgbe_dma_free(adapter, &rxr->rxdma);
1524	}
1525
1526	free(adapter->rx_rings, M_DEVBUF);
1527} /* ixgbe_free_receive_structures */
1528
1529
1530/************************************************************************
1531 * ixgbe_free_receive_buffers - Free receive ring data structures
1532 ************************************************************************/
1533static void
1534ixgbe_free_receive_buffers(struct rx_ring *rxr)
1535{
1536	struct adapter      *adapter = rxr->adapter;
1537	struct ixgbe_rx_buf *rxbuf;
1538
1539	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1540
1541	/* Cleanup any existing buffers */
1542	if (rxr->rx_buffers != NULL) {
1543		for (int i = 0; i < adapter->num_rx_desc; i++) {
1544			rxbuf = &rxr->rx_buffers[i];
1545			ixgbe_rx_discard(rxr, i);
1546			if (rxbuf->pmap != NULL) {
1547				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1548				rxbuf->pmap = NULL;
1549			}
1550		}
1551		if (rxr->rx_buffers != NULL) {
1552			free(rxr->rx_buffers, M_DEVBUF);
1553			rxr->rx_buffers = NULL;
1554		}
1555	}
1556
1557	if (rxr->ptag != NULL) {
1558		bus_dma_tag_destroy(rxr->ptag);
1559		rxr->ptag = NULL;
1560	}
1561
1562	return;
1563} /* ixgbe_free_receive_buffers */
1564
1565/************************************************************************
1566 * ixgbe_rx_input
1567 ************************************************************************/
1568static __inline void
1569ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1570    u32 ptype)
1571{
1572	/*
1573	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1574	 * should be computed by hardware. Also it should not have VLAN tag in
1575	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1576	 */
1577	if (rxr->lro_enabled &&
1578	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1579	    (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1580	    ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1581	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1582	     (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1583	     (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1584	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1585	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1586		/*
1587		 * Send to the stack if:
1588		 *  - LRO not enabled, or
1589		 *  - no LRO resources, or
1590		 *  - lro enqueue fails
1591		 */
1592		if (rxr->lro.lro_cnt != 0)
1593			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1594				return;
1595	}
1596	IXGBE_RX_UNLOCK(rxr);
1597	(*ifp->if_input)(ifp, m);
1598	IXGBE_RX_LOCK(rxr);
1599} /* ixgbe_rx_input */
1600
1601/************************************************************************
1602 * ixgbe_rx_discard
1603 ************************************************************************/
1604static __inline void
1605ixgbe_rx_discard(struct rx_ring *rxr, int i)
1606{
1607	struct ixgbe_rx_buf *rbuf;
1608
1609	rbuf = &rxr->rx_buffers[i];
1610
1611	/*
1612	 * With advanced descriptors the writeback
1613	 * clobbers the buffer addrs, so its easier
1614	 * to just free the existing mbufs and take
1615	 * the normal refresh path to get new buffers
1616	 * and mapping.
1617	 */
1618
1619	if (rbuf->fmp != NULL) {/* Partial chain ? */
1620		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1621		m_freem(rbuf->fmp);
1622		rbuf->fmp = NULL;
1623		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1624	} else if (rbuf->buf) {
1625		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1626		m_free(rbuf->buf);
1627		rbuf->buf = NULL;
1628	}
1629	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1630
1631	rbuf->flags = 0;
1632
1633	return;
1634} /* ixgbe_rx_discard */
1635
1636
1637/************************************************************************
1638 * ixgbe_rxeof
1639 *
1640 *   Executes in interrupt context. It replenishes the
1641 *   mbufs in the descriptor and sends data which has
1642 *   been dma'ed into host memory to upper layer.
1643 *
1644 *   Return TRUE for more work, FALSE for all clean.
1645 ************************************************************************/
1646bool
1647ixgbe_rxeof(struct ix_queue *que)
1648{
1649	struct adapter          *adapter = que->adapter;
1650	struct rx_ring          *rxr = que->rxr;
1651	struct ifnet            *ifp = adapter->ifp;
1652	struct lro_ctrl         *lro = &rxr->lro;
1653	union ixgbe_adv_rx_desc *cur;
1654	struct ixgbe_rx_buf     *rbuf, *nbuf;
1655	int                     i, nextp, processed = 0;
1656	u32                     staterr = 0;
1657	u32                     count = adapter->rx_process_limit;
1658	u16                     pkt_info;
1659
1660	IXGBE_RX_LOCK(rxr);
1661
1662#ifdef DEV_NETMAP
1663	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1664		/* Same as the txeof routine: wakeup clients on intr. */
1665		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1666			IXGBE_RX_UNLOCK(rxr);
1667			return (FALSE);
1668		}
1669	}
1670#endif /* DEV_NETMAP */
1671
1672	for (i = rxr->next_to_check; count != 0;) {
1673		struct mbuf *sendmp, *mp;
1674		u32         rsc, ptype;
1675		u16         len;
1676		u16         vtag = 0;
1677		bool        eop;
1678
1679		/* Sync the ring. */
1680		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1681		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1682
1683		cur = &rxr->rx_base[i];
1684		staterr = le32toh(cur->wb.upper.status_error);
1685		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1686
1687		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1688			break;
1689		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1690			break;
1691
1692		count--;
1693		sendmp = NULL;
1694		nbuf = NULL;
1695		rsc = 0;
1696		cur->wb.upper.status_error = 0;
1697		rbuf = &rxr->rx_buffers[i];
1698		mp = rbuf->buf;
1699
1700		len = le16toh(cur->wb.upper.length);
1701		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1702		    IXGBE_RXDADV_PKTTYPE_MASK;
1703		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1704
1705		/* Make sure bad packets are discarded */
1706		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1707#if __FreeBSD_version >= 1100036
1708			if (adapter->feat_en & IXGBE_FEATURE_VF)
1709				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1710#endif
1711			rxr->rx_discarded++;
1712			ixgbe_rx_discard(rxr, i);
1713			goto next_desc;
1714		}
1715
1716		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1717
1718		/*
1719		 * On 82599 which supports a hardware
1720		 * LRO (called HW RSC), packets need
1721		 * not be fragmented across sequential
1722		 * descriptors, rather the next descriptor
1723		 * is indicated in bits of the descriptor.
1724		 * This also means that we might proceses
1725		 * more than one packet at a time, something
1726		 * that has never been true before, it
1727		 * required eliminating global chain pointers
1728		 * in favor of what we are doing here.  -jfv
1729		 */
1730		if (!eop) {
1731			/*
1732			 * Figure out the next descriptor
1733			 * of this frame.
1734			 */
1735			if (rxr->hw_rsc == TRUE) {
1736				rsc = ixgbe_rsc_count(cur);
1737				rxr->rsc_num += (rsc - 1);
1738			}
1739			if (rsc) { /* Get hardware index */
1740				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1741				    IXGBE_RXDADV_NEXTP_SHIFT);
1742			} else { /* Just sequential */
1743				nextp = i + 1;
1744				if (nextp == adapter->num_rx_desc)
1745					nextp = 0;
1746			}
1747			nbuf = &rxr->rx_buffers[nextp];
1748			prefetch(nbuf);
1749		}
1750		/*
1751		 * Rather than using the fmp/lmp global pointers
1752		 * we now keep the head of a packet chain in the
1753		 * buffer struct and pass this along from one
1754		 * descriptor to the next, until we get EOP.
1755		 */
1756		mp->m_len = len;
1757		/*
1758		 * See if there is a stored head
1759		 * that determines what we are
1760		 */
1761		sendmp = rbuf->fmp;
1762		if (sendmp != NULL) {  /* secondary frag */
1763			rbuf->buf = rbuf->fmp = NULL;
1764			mp->m_flags &= ~M_PKTHDR;
1765			sendmp->m_pkthdr.len += mp->m_len;
1766		} else {
1767			/*
1768			 * Optimize.  This might be a small packet,
1769			 * maybe just a TCP ACK.  Do a fast copy that
1770			 * is cache aligned into a new mbuf, and
1771			 * leave the old mbuf+cluster for re-use.
1772			 */
1773			if (eop && len <= IXGBE_RX_COPY_LEN) {
1774				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1775				if (sendmp != NULL) {
1776					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1777					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1778					    len);
1779					sendmp->m_len = len;
1780					rxr->rx_copies++;
1781					rbuf->flags |= IXGBE_RX_COPY;
1782				}
1783			}
1784			if (sendmp == NULL) {
1785				rbuf->buf = rbuf->fmp = NULL;
1786				sendmp = mp;
1787			}
1788
1789			/* first desc of a non-ps chain */
1790			sendmp->m_flags |= M_PKTHDR;
1791			sendmp->m_pkthdr.len = mp->m_len;
1792		}
1793		++processed;
1794
1795		/* Pass the head pointer on */
1796		if (eop == 0) {
1797			nbuf->fmp = sendmp;
1798			sendmp = NULL;
1799			mp->m_next = nbuf->buf;
1800		} else { /* Sending this frame */
1801			sendmp->m_pkthdr.rcvif = ifp;
1802			rxr->rx_packets++;
1803			/* capture data for AIM */
1804			rxr->bytes += sendmp->m_pkthdr.len;
1805			rxr->rx_bytes += sendmp->m_pkthdr.len;
1806			/* Process vlan info */
1807			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1808				vtag = le16toh(cur->wb.upper.vlan);
1809			if (vtag) {
1810				sendmp->m_pkthdr.ether_vtag = vtag;
1811				sendmp->m_flags |= M_VLANTAG;
1812			}
1813			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1814				ixgbe_rx_checksum(staterr, sendmp, ptype);
1815
1816			/*
1817			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1818			 * and never cleared. This means we have RSS hash
1819			 * available to be used.
1820			 */
1821			if (adapter->num_queues > 1) {
1822				sendmp->m_pkthdr.flowid =
1823				    le32toh(cur->wb.lower.hi_dword.rss);
1824				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1825				case IXGBE_RXDADV_RSSTYPE_IPV4:
1826					M_HASHTYPE_SET(sendmp,
1827					    M_HASHTYPE_RSS_IPV4);
1828					break;
1829				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1830					M_HASHTYPE_SET(sendmp,
1831					    M_HASHTYPE_RSS_TCP_IPV4);
1832					break;
1833				case IXGBE_RXDADV_RSSTYPE_IPV6:
1834					M_HASHTYPE_SET(sendmp,
1835					    M_HASHTYPE_RSS_IPV6);
1836					break;
1837				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1838					M_HASHTYPE_SET(sendmp,
1839					    M_HASHTYPE_RSS_TCP_IPV6);
1840					break;
1841				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1842					M_HASHTYPE_SET(sendmp,
1843					    M_HASHTYPE_RSS_IPV6_EX);
1844					break;
1845				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1846					M_HASHTYPE_SET(sendmp,
1847					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1848					break;
1849#if __FreeBSD_version > 1100000
1850				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1851					M_HASHTYPE_SET(sendmp,
1852					    M_HASHTYPE_RSS_UDP_IPV4);
1853					break;
1854				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1855					M_HASHTYPE_SET(sendmp,
1856					    M_HASHTYPE_RSS_UDP_IPV6);
1857					break;
1858				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1859					M_HASHTYPE_SET(sendmp,
1860					    M_HASHTYPE_RSS_UDP_IPV6_EX);
1861					break;
1862#endif
1863				default:
1864					M_HASHTYPE_SET(sendmp,
1865					    M_HASHTYPE_OPAQUE_HASH);
1866				}
1867			} else {
1868				sendmp->m_pkthdr.flowid = que->msix;
1869				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1870			}
1871		}
1872next_desc:
1873		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1874		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1875
1876		/* Advance our pointers to the next descriptor. */
1877		if (++i == rxr->num_desc)
1878			i = 0;
1879
1880		/* Now send to the stack or do LRO */
1881		if (sendmp != NULL) {
1882			rxr->next_to_check = i;
1883			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1884			i = rxr->next_to_check;
1885		}
1886
1887		/* Every 8 descriptors we go to refresh mbufs */
1888		if (processed == 8) {
1889			ixgbe_refresh_mbufs(rxr, i);
1890			processed = 0;
1891		}
1892	}
1893
1894	/* Refresh any remaining buf structs */
1895	if (ixgbe_rx_unrefreshed(rxr))
1896		ixgbe_refresh_mbufs(rxr, i);
1897
1898	rxr->next_to_check = i;
1899
1900	/*
1901	 * Flush any outstanding LRO work
1902	 */
1903	tcp_lro_flush_all(lro);
1904
1905	IXGBE_RX_UNLOCK(rxr);
1906
1907	/*
1908	 * Still have cleaning to do?
1909	 */
1910	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1911		return (TRUE);
1912
1913	return (FALSE);
1914} /* ixgbe_rxeof */
1915
1916
1917/************************************************************************
1918 * ixgbe_rx_checksum
1919 *
1920 *   Verify that the hardware indicated that the checksum is valid.
1921 *   Inform the stack about the status of checksum so that stack
1922 *   doesn't spend time verifying the checksum.
1923 ************************************************************************/
1924static void
1925ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1926{
1927	u16  status = (u16)staterr;
1928	u8   errors = (u8)(staterr >> 24);
1929	bool sctp = false;
1930
1931	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1932	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1933		sctp = true;
1934
1935	/* IPv4 checksum */
1936	if (status & IXGBE_RXD_STAT_IPCS) {
1937		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1938		/* IP Checksum Good */
1939		if (!(errors & IXGBE_RXD_ERR_IPE))
1940			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1941	}
1942	/* TCP/UDP/SCTP checksum */
1943	if (status & IXGBE_RXD_STAT_L4CS) {
1944		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1945		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1946			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1947			if (!sctp)
1948				mp->m_pkthdr.csum_data = htons(0xffff);
1949		}
1950	}
1951} /* ixgbe_rx_checksum */
1952
1953/************************************************************************
1954 * ixgbe_dmamap_cb - Manage DMA'able memory.
1955 ************************************************************************/
1956static void
1957ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1958{
1959	if (error)
1960		return;
1961	*(bus_addr_t *)arg = segs->ds_addr;
1962
1963	return;
1964} /* ixgbe_dmamap_cb */
1965
1966/************************************************************************
1967 * ixgbe_dma_malloc
1968 ************************************************************************/
1969static int
1970ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1971                 struct ixgbe_dma_alloc *dma, int mapflags)
1972{
1973	device_t dev = adapter->dev;
1974	int      r;
1975
1976	r = bus_dma_tag_create(
1977	     /*      parent */ bus_get_dma_tag(adapter->dev),
1978	     /*   alignment */ DBA_ALIGN,
1979	     /*      bounds */ 0,
1980	     /*     lowaddr */ BUS_SPACE_MAXADDR,
1981	     /*    highaddr */ BUS_SPACE_MAXADDR,
1982	     /*      filter */ NULL,
1983	     /*   filterarg */ NULL,
1984	     /*     maxsize */ size,
1985	     /*   nsegments */ 1,
1986	     /*  maxsegsize */ size,
1987	     /*       flags */ BUS_DMA_ALLOCNOW,
1988	     /*    lockfunc */ NULL,
1989	     /* lockfuncarg */ NULL,
1990	                       &dma->dma_tag);
1991	if (r != 0) {
1992		device_printf(dev,
1993		    "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1994		    r);
1995		goto fail_0;
1996	}
1997	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1998	    BUS_DMA_NOWAIT, &dma->dma_map);
1999	if (r != 0) {
2000		device_printf(dev,
2001		    "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2002		goto fail_1;
2003	}
2004	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2005	    ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2006	if (r != 0) {
2007		device_printf(dev,
2008		    "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2009		goto fail_2;
2010	}
2011	dma->dma_size = size;
2012
2013	return (0);
2014fail_2:
2015	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2016fail_1:
2017	bus_dma_tag_destroy(dma->dma_tag);
2018fail_0:
2019	dma->dma_tag = NULL;
2020
2021	return (r);
2022} /* ixgbe_dma_malloc */
2023
2024/************************************************************************
2025 * ixgbe_dma_free
2026 ************************************************************************/
2027static void
2028ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2029{
2030	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2031	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2032	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2033	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2034	bus_dma_tag_destroy(dma->dma_tag);
2035} /* ixgbe_dma_free */
2036
2037
2038/************************************************************************
2039 * ixgbe_allocate_queues
2040 *
2041 *   Allocate memory for the transmit and receive rings, and then
2042 *   the descriptors associated with each, called only once at attach.
2043 ************************************************************************/
2044int
2045ixgbe_allocate_queues(struct adapter *adapter)
2046{
2047	device_t        dev = adapter->dev;
2048	struct ix_queue *que;
2049	struct tx_ring  *txr;
2050	struct rx_ring  *rxr;
2051	int             rsize, tsize, error = IXGBE_SUCCESS;
2052	int             txconf = 0, rxconf = 0;
2053
2054	/* First, allocate the top level queue structs */
2055	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2056	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2057	if (adapter->queues == NULL) {
2058		device_printf(dev, "Unable to allocate queue memory\n");
2059		error = ENOMEM;
2060		goto fail;
2061	}
2062
2063	/* Second, allocate the TX ring struct memory */
2064	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2065	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2066	if (adapter->tx_rings == NULL) {
2067		device_printf(dev, "Unable to allocate TX ring memory\n");
2068		error = ENOMEM;
2069		goto tx_fail;
2070	}
2071
2072	/* Third, allocate the RX ring */
2073	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2074	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2075	if (adapter->rx_rings == NULL) {
2076		device_printf(dev, "Unable to allocate RX ring memory\n");
2077		error = ENOMEM;
2078		goto rx_fail;
2079	}
2080
2081	/* For the ring itself */
2082	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2083	    DBA_ALIGN);
2084
2085	/*
2086	 * Now set up the TX queues, txconf is needed to handle the
2087	 * possibility that things fail midcourse and we need to
2088	 * undo memory gracefully
2089	 */
2090	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2091		/* Set up some basics */
2092		txr = &adapter->tx_rings[i];
2093		txr->adapter = adapter;
2094		txr->br = NULL;
2095		/* In case SR-IOV is enabled, align the index properly */
2096		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2097		    i);
2098		txr->num_desc = adapter->num_tx_desc;
2099
2100		/* Initialize the TX side lock */
2101		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2102		    device_get_nameunit(dev), txr->me);
2103		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2104
2105		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2106		    BUS_DMA_NOWAIT)) {
2107			device_printf(dev,
2108			    "Unable to allocate TX Descriptor memory\n");
2109			error = ENOMEM;
2110			goto err_tx_desc;
2111		}
2112		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2113		bzero((void *)txr->tx_base, tsize);
2114
2115		/* Now allocate transmit buffers for the ring */
2116		if (ixgbe_allocate_transmit_buffers(txr)) {
2117			device_printf(dev,
2118			    "Critical Failure setting up transmit buffers\n");
2119			error = ENOMEM;
2120			goto err_tx_desc;
2121		}
2122		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2123			/* Allocate a buf ring */
2124			txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2125			    M_WAITOK, &txr->tx_mtx);
2126			if (txr->br == NULL) {
2127				device_printf(dev,
2128				    "Critical Failure setting up buf ring\n");
2129				error = ENOMEM;
2130				goto err_tx_desc;
2131			}
2132		}
2133	}
2134
2135	/*
2136	 * Next the RX queues...
2137	 */
2138	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2139	    DBA_ALIGN);
2140	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2141		rxr = &adapter->rx_rings[i];
2142		/* Set up some basics */
2143		rxr->adapter = adapter;
2144		/* In case SR-IOV is enabled, align the index properly */
2145		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2146		    i);
2147		rxr->num_desc = adapter->num_rx_desc;
2148
2149		/* Initialize the RX side lock */
2150		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2151		    device_get_nameunit(dev), rxr->me);
2152		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2153
2154		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2155		    BUS_DMA_NOWAIT)) {
2156			device_printf(dev,
2157			    "Unable to allocate RxDescriptor memory\n");
2158			error = ENOMEM;
2159			goto err_rx_desc;
2160		}
2161		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2162		bzero((void *)rxr->rx_base, rsize);
2163
2164		/* Allocate receive buffers for the ring */
2165		if (ixgbe_allocate_receive_buffers(rxr)) {
2166			device_printf(dev,
2167			    "Critical Failure setting up receive buffers\n");
2168			error = ENOMEM;
2169			goto err_rx_desc;
2170		}
2171	}
2172
2173	/*
2174	 * Finally set up the queue holding structs
2175	 */
2176	for (int i = 0; i < adapter->num_queues; i++) {
2177		que = &adapter->queues[i];
2178		que->adapter = adapter;
2179		que->me = i;
2180		que->txr = &adapter->tx_rings[i];
2181		que->rxr = &adapter->rx_rings[i];
2182	}
2183
2184	return (0);
2185
2186err_rx_desc:
2187	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2188		ixgbe_dma_free(adapter, &rxr->rxdma);
2189err_tx_desc:
2190	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2191		ixgbe_dma_free(adapter, &txr->txdma);
2192	free(adapter->rx_rings, M_DEVBUF);
2193rx_fail:
2194	free(adapter->tx_rings, M_DEVBUF);
2195tx_fail:
2196	free(adapter->queues, M_DEVBUF);
2197fail:
2198	return (error);
2199} /* ixgbe_allocate_queues */
2200