1/* $NetBSD: ix_txrx.c,v 1.116 2023/12/30 06:16:44 msaitoh Exp $ */
2
3/******************************************************************************
4
5  Copyright (c) 2001-2017, Intel Corporation
6  All rights reserved.
7
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions are met:
10
11   1. Redistributions of source code must retain the above copyright notice,
12      this list of conditions and the following disclaimer.
13
14   2. Redistributions in binary form must reproduce the above copyright
15      notice, this list of conditions and the following disclaimer in the
16      documentation and/or other materials provided with the distribution.
17
18   3. Neither the name of the Intel Corporation nor the names of its
19      contributors may be used to endorse or promote products derived from
20      this software without specific prior written permission.
21
22  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  POSSIBILITY OF SUCH DAMAGE.
33
34******************************************************************************/
35/*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37/*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 *    notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 *    notice, this list of conditions and the following disclaimer in the
51 *    documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66#include <sys/cdefs.h>
67__KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.116 2023/12/30 06:16:44 msaitoh Exp $");
68
69#include "opt_inet.h"
70#include "opt_inet6.h"
71
72#include "ixgbe.h"
73
74#ifdef RSC
75/*
76 * HW RSC control:
77 *  this feature only works with
78 *  IPv4, and only on 82599 and later.
79 *  Also this will cause IP forwarding to
80 *  fail and that can't be controlled by
81 *  the stack as LRO can. For all these
82 *  reasons I've deemed it best to leave
83 *  this off and not bother with a tuneable
84 *  interface, this would need to be compiled
85 *  to enable.
86 */
87static bool ixgbe_rsc_enable = FALSE;
88#endif
89
90#ifdef IXGBE_FDIR
91/*
92 * For Flow Director: this is the
93 * number of TX packets we sample
94 * for the filter pool, this means
95 * every 20th packet will be probed.
96 *
97 * This feature can be disabled by
98 * setting this to 0.
99 */
100static int atr_sample_rate = 20;
101#endif
102
103#define IXGBE_M_ADJ(sc, rxr, mp)					\
104	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
105		m_adj(mp, ETHER_ALIGN)
106
107/************************************************************************
108 *  Local Function prototypes
109 ************************************************************************/
110static void          ixgbe_setup_transmit_ring(struct tx_ring *);
111static void          ixgbe_free_transmit_buffers(struct tx_ring *);
112static int           ixgbe_setup_receive_ring(struct rx_ring *);
113static void          ixgbe_free_receive_buffers(struct rx_ring *);
114static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
115                                       struct ixgbe_hw_stats *);
116static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
117static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
118static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
119static int           ixgbe_tx_ctx_setup(struct tx_ring *,
120                                        struct mbuf *, u32 *, u32 *);
121static int           ixgbe_tso_setup(struct tx_ring *,
122                                     struct mbuf *, u32 *, u32 *);
123static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125                                    struct mbuf *, u32);
126static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
127                                      struct ixgbe_dma_alloc *, int);
128static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
129#ifdef RSC
130static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
131#endif
132
133/************************************************************************
134 * ixgbe_legacy_start_locked - Transmit entry point
135 *
136 *   Called by the stack to initiate a transmit.
137 *   The driver will remain in this routine as long as there are
138 *   packets to transmit and transmit resources are available.
139 *   In case resources are not available, the stack is notified
140 *   and the packet is requeued.
141 ************************************************************************/
142int
143ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
144{
145	int rc;
146	struct mbuf    *m_head;
147	struct ixgbe_softc *sc = txr->sc;
148
149	IXGBE_TX_LOCK_ASSERT(txr);
150
151	if (sc->link_active != LINK_STATE_UP) {
152		/*
153		 * discard all packets buffered in IFQ to avoid
154		 * sending old packets at next link up timing.
155		 */
156		ixgbe_drain(ifp, txr);
157		return (ENETDOWN);
158	}
159	if ((ifp->if_flags & IFF_RUNNING) == 0)
160		return (ENETDOWN);
161	if (txr->txr_no_space)
162		return (ENETDOWN);
163
164	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
165		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
166			break;
167
168		IFQ_POLL(&ifp->if_snd, m_head);
169		if (m_head == NULL)
170			break;
171
172		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
173			break;
174		}
175		IFQ_DEQUEUE(&ifp->if_snd, m_head);
176		if (rc != 0) {
177			m_freem(m_head);
178			continue;
179		}
180
181		/* Send a copy of the frame to the BPF listener */
182		bpf_mtap(ifp, m_head, BPF_D_OUT);
183	}
184
185	return IXGBE_SUCCESS;
186} /* ixgbe_legacy_start_locked */
187
188/************************************************************************
189 * ixgbe_legacy_start
190 *
191 *   Called by the stack, this always uses the first tx ring,
192 *   and should not be used with multiqueue tx enabled.
193 ************************************************************************/
194void
195ixgbe_legacy_start(struct ifnet *ifp)
196{
197	struct ixgbe_softc *sc = ifp->if_softc;
198	struct tx_ring *txr = sc->tx_rings;
199
200	if (ifp->if_flags & IFF_RUNNING) {
201		IXGBE_TX_LOCK(txr);
202		ixgbe_legacy_start_locked(ifp, txr);
203		IXGBE_TX_UNLOCK(txr);
204	}
205} /* ixgbe_legacy_start */
206
207/************************************************************************
208 * ixgbe_mq_start - Multiqueue Transmit Entry Point
209 *
210 *   (if_transmit function)
211 ************************************************************************/
212int
213ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
214{
215	struct ixgbe_softc *sc = ifp->if_softc;
216	struct tx_ring	*txr;
217	int		i;
218#ifdef RSS
219	uint32_t bucket_id;
220#endif
221
222	/*
223	 * When doing RSS, map it to the same outbound queue
224	 * as the incoming flow would be mapped to.
225	 *
226	 * If everything is setup correctly, it should be the
227	 * same bucket that the current CPU we're on is.
228	 */
229#ifdef RSS
230	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
231		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
232		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
233		    &bucket_id) == 0)) {
234			i = bucket_id % sc->num_queues;
235#ifdef IXGBE_DEBUG
236			if (bucket_id > sc->num_queues)
237				if_printf(ifp,
238				    "bucket_id (%d) > num_queues (%d)\n",
239				    bucket_id, sc->num_queues);
240#endif
241		} else
242			i = m->m_pkthdr.flowid % sc->num_queues;
243	} else
244#endif /* 0 */
245		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
246
247	/* Check for a hung queue and pick alternative */
248	if (((1ULL << i) & sc->active_queues) == 0)
249		i = ffs64(sc->active_queues);
250
251	txr = &sc->tx_rings[i];
252
253	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
254		m_freem(m);
255		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
256		return ENOBUFS;
257	}
258#ifdef IXGBE_ALWAYS_TXDEFER
259	kpreempt_disable();
260	softint_schedule(txr->txr_si);
261	kpreempt_enable();
262#else
263	if (IXGBE_TX_TRYLOCK(txr)) {
264		ixgbe_mq_start_locked(ifp, txr);
265		IXGBE_TX_UNLOCK(txr);
266	} else {
267		if (sc->txrx_use_workqueue) {
268			u_int *enqueued;
269
270			/*
271			 * This function itself is not called in interrupt
272			 * context, however it can be called in fast softint
273			 * context right after receiving forwarding packets.
274			 * So, it is required to protect workqueue from twice
275			 * enqueuing when the machine uses both spontaneous
276			 * packets and forwarding packets.
277			 */
278			enqueued = percpu_getref(sc->txr_wq_enqueued);
279			if (*enqueued == 0) {
280				*enqueued = 1;
281				percpu_putref(sc->txr_wq_enqueued);
282				workqueue_enqueue(sc->txr_wq,
283				    &txr->wq_cookie, curcpu());
284			} else
285				percpu_putref(sc->txr_wq_enqueued);
286		} else {
287			kpreempt_disable();
288			softint_schedule(txr->txr_si);
289			kpreempt_enable();
290		}
291	}
292#endif
293
294	return (0);
295} /* ixgbe_mq_start */
296
297/************************************************************************
298 * ixgbe_mq_start_locked
299 ************************************************************************/
300int
301ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
302{
303	struct mbuf    *next;
304	int            enqueued = 0, err = 0;
305
306	if (txr->sc->link_active != LINK_STATE_UP) {
307		/*
308		 * discard all packets buffered in txr_interq to avoid
309		 * sending old packets at next link up timing.
310		 */
311		ixgbe_drain(ifp, txr);
312		return (ENETDOWN);
313	}
314	if ((ifp->if_flags & IFF_RUNNING) == 0)
315		return (ENETDOWN);
316	if (txr->txr_no_space)
317		return (ENETDOWN);
318
319	/* Process the queue */
320	while ((next = pcq_get(txr->txr_interq)) != NULL) {
321		if ((err = ixgbe_xmit(txr, next)) != 0) {
322			m_freem(next);
323			/* All errors are counted in ixgbe_xmit() */
324			break;
325		}
326		enqueued++;
327#if __FreeBSD_version >= 1100036
328		/*
329		 * Since we're looking at the tx ring, we can check
330		 * to see if we're a VF by examining our tail register
331		 * address.
332		 */
333		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
334		    (next->m_flags & M_MCAST))
335			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
336#endif
337		/* Send a copy of the frame to the BPF listener */
338		bpf_mtap(ifp, next, BPF_D_OUT);
339		if ((ifp->if_flags & IFF_RUNNING) == 0)
340			break;
341	}
342
343	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
344		ixgbe_txeof(txr);
345
346	return (err);
347} /* ixgbe_mq_start_locked */
348
349/************************************************************************
350 * ixgbe_deferred_mq_start
351 *
352 *   Called from a softint and workqueue (indirectly) to drain queued
353 *   transmit packets.
354 ************************************************************************/
355void
356ixgbe_deferred_mq_start(void *arg)
357{
358	struct tx_ring *txr = arg;
359	struct ixgbe_softc *sc = txr->sc;
360	struct ifnet   *ifp = sc->ifp;
361
362	IXGBE_TX_LOCK(txr);
363	if (pcq_peek(txr->txr_interq) != NULL)
364		ixgbe_mq_start_locked(ifp, txr);
365	IXGBE_TX_UNLOCK(txr);
366} /* ixgbe_deferred_mq_start */
367
368/************************************************************************
369 * ixgbe_deferred_mq_start_work
370 *
371 *   Called from a workqueue to drain queued transmit packets.
372 ************************************************************************/
373void
374ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
375{
376	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
377	struct ixgbe_softc *sc = txr->sc;
378	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
379	*enqueued = 0;
380	percpu_putref(sc->txr_wq_enqueued);
381
382	ixgbe_deferred_mq_start(txr);
383} /* ixgbe_deferred_mq_start */
384
385/************************************************************************
386 * ixgbe_drain_all
387 ************************************************************************/
388void
389ixgbe_drain_all(struct ixgbe_softc *sc)
390{
391	struct ifnet *ifp = sc->ifp;
392	struct ix_queue *que = sc->queues;
393
394	for (int i = 0; i < sc->num_queues; i++, que++) {
395		struct tx_ring  *txr = que->txr;
396
397		IXGBE_TX_LOCK(txr);
398		ixgbe_drain(ifp, txr);
399		IXGBE_TX_UNLOCK(txr);
400	}
401}
402
403/************************************************************************
404 * ixgbe_xmit
405 *
406 *   Maps the mbufs to tx descriptors, allowing the
407 *   TX engine to transmit the packets.
408 *
409 *   Return 0 on success, positive on failure
410 ************************************************************************/
411static int
412ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
413{
414	struct ixgbe_softc      *sc = txr->sc;
415	struct ixgbe_tx_buf     *txbuf;
416	union ixgbe_adv_tx_desc *txd = NULL;
417	struct ifnet	        *ifp = sc->ifp;
418	int                     i, j, error;
419	int                     first;
420	u32                     olinfo_status = 0, cmd_type_len;
421	bool                    remap = TRUE;
422	bus_dmamap_t            map;
423
424	/* Basic descriptor defines */
425	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
426	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
427
428	if (vlan_has_tag(m_head))
429		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
430
431	/*
432	 * Important to capture the first descriptor
433	 * used because it will contain the index of
434	 * the one we tell the hardware to report back
435	 */
436	first = txr->next_avail_desc;
437	txbuf = &txr->tx_buffers[first];
438	map = txbuf->map;
439
440	/*
441	 * Map the packet for DMA.
442	 */
443retry:
444	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
445	    BUS_DMA_NOWAIT);
446
447	if (__predict_false(error)) {
448		struct mbuf *m;
449
450		switch (error) {
451		case EAGAIN:
452			txr->q_eagain_tx_dma_setup++;
453			return EAGAIN;
454		case ENOMEM:
455			txr->q_enomem_tx_dma_setup++;
456			return EAGAIN;
457		case EFBIG:
458			/* Try it again? - one try */
459			if (remap == TRUE) {
460				remap = FALSE;
461				/*
462				 * XXX: m_defrag will choke on
463				 * non-MCLBYTES-sized clusters
464				 */
465				txr->q_efbig_tx_dma_setup++;
466				m = m_defrag(m_head, M_NOWAIT);
467				if (m == NULL) {
468					txr->q_mbuf_defrag_failed++;
469					return ENOBUFS;
470				}
471				m_head = m;
472				goto retry;
473			} else {
474				txr->q_efbig2_tx_dma_setup++;
475				return error;
476			}
477		case EINVAL:
478			txr->q_einval_tx_dma_setup++;
479			return error;
480		default:
481			txr->q_other_tx_dma_setup++;
482			return error;
483		}
484	}
485
486	/* Make certain there are enough descriptors */
487	if (txr->tx_avail < (map->dm_nsegs + 2)) {
488		txr->txr_no_space = true;
489		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
490		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
491		return EAGAIN;
492	}
493
494	/*
495	 * Set up the appropriate offload context if requested,
496	 * this may consume one TX descriptor.
497	 */
498	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
499	if (__predict_false(error)) {
500		return (error);
501	}
502
503#ifdef IXGBE_FDIR
504	/* Do the flow director magic */
505	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
506	    (txr->atr_sample) && (!sc->fdir_reinit)) {
507		++txr->atr_count;
508		if (txr->atr_count >= atr_sample_rate) {
509			ixgbe_atr(txr, m_head);
510			txr->atr_count = 0;
511		}
512	}
513#endif
514
515	olinfo_status |= IXGBE_ADVTXD_CC;
516	i = txr->next_avail_desc;
517	for (j = 0; j < map->dm_nsegs; j++) {
518		bus_size_t seglen;
519		uint64_t segaddr;
520
521		txbuf = &txr->tx_buffers[i];
522		txd = &txr->tx_base[i];
523		seglen = map->dm_segs[j].ds_len;
524		segaddr = htole64(map->dm_segs[j].ds_addr);
525
526		txd->read.buffer_addr = segaddr;
527		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
528		txd->read.olinfo_status = htole32(olinfo_status);
529
530		if (++i == txr->num_desc)
531			i = 0;
532	}
533
534	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
535	txr->tx_avail -= map->dm_nsegs;
536	txr->next_avail_desc = i;
537
538	txbuf->m_head = m_head;
539	/*
540	 * Here we swap the map so the last descriptor,
541	 * which gets the completion interrupt has the
542	 * real map, and the first descriptor gets the
543	 * unused map from this descriptor.
544	 */
545	txr->tx_buffers[first].map = txbuf->map;
546	txbuf->map = map;
547	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
548	    BUS_DMASYNC_PREWRITE);
549
550	/* Set the EOP descriptor that will be marked done */
551	txbuf = &txr->tx_buffers[first];
552	txbuf->eop = txd;
553
554	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
555	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
556	/*
557	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
558	 * hardware that this frame is available to transmit.
559	 */
560	IXGBE_EVC_ADD(&txr->total_packets, 1);
561	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
562
563	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
564	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
565	if (m_head->m_flags & M_MCAST)
566		if_statinc_ref(nsr, if_omcasts);
567	IF_STAT_PUTREF(ifp);
568
569	/* Mark queue as having work */
570	if (txr->busy == 0)
571		txr->busy = 1;
572
573	return (0);
574} /* ixgbe_xmit */
575
576/************************************************************************
577 * ixgbe_drain
578 ************************************************************************/
579static void
580ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
581{
582	struct mbuf *m;
583
584	IXGBE_TX_LOCK_ASSERT(txr);
585
586	if (txr->me == 0) {
587		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
588			IFQ_DEQUEUE(&ifp->if_snd, m);
589			m_freem(m);
590			IF_DROP(&ifp->if_snd);
591		}
592	}
593
594	while ((m = pcq_get(txr->txr_interq)) != NULL) {
595		m_freem(m);
596		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
597	}
598}
599
600/************************************************************************
601 * ixgbe_allocate_transmit_buffers
602 *
603 *   Allocate memory for tx_buffer structures. The tx_buffer stores all
604 *   the information needed to transmit a packet on the wire. This is
605 *   called only once at attach, setup is done every reset.
606 ************************************************************************/
607static int
608ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
609{
610	struct ixgbe_softc  *sc = txr->sc;
611	device_t            dev = sc->dev;
612	struct ixgbe_tx_buf *txbuf;
613	int                 error, i;
614
615	/*
616	 * Setup DMA descriptor areas.
617	 */
618	error = ixgbe_dma_tag_create(
619	         /*      parent */ sc->osdep.dmat,
620	         /*   alignment */ 1,
621	         /*      bounds */ 0,
622	         /*     maxsize */ IXGBE_TSO_SIZE,
623	         /*   nsegments */ sc->num_segs,
624	         /*  maxsegsize */ PAGE_SIZE,
625	         /*       flags */ 0,
626	                           &txr->txtag);
627	if (error != 0) {
628		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
629		goto fail;
630	}
631
632	txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
633	    sc->num_tx_desc, KM_SLEEP);
634
635	/* Create the descriptor buffer dma maps */
636	txbuf = txr->tx_buffers;
637	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
638		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
639		if (error != 0) {
640			aprint_error_dev(dev,
641			    "Unable to create TX DMA map (%d)\n", error);
642			goto fail;
643		}
644	}
645
646	return 0;
647fail:
648	/* We free all, it handles case where we are in the middle */
649#if 0 /* XXX was FreeBSD */
650	ixgbe_free_transmit_structures(sc);
651#else
652	ixgbe_free_transmit_buffers(txr);
653#endif
654	return (error);
655} /* ixgbe_allocate_transmit_buffers */
656
657/************************************************************************
658 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
659 ************************************************************************/
660static void
661ixgbe_setup_transmit_ring(struct tx_ring *txr)
662{
663	struct ixgbe_softc    *sc = txr->sc;
664	struct ixgbe_tx_buf   *txbuf;
665#ifdef DEV_NETMAP
666	struct netmap_sc      *na = NA(sc->ifp);
667	struct netmap_slot    *slot;
668#endif /* DEV_NETMAP */
669
670	/* Clear the old ring contents */
671	IXGBE_TX_LOCK(txr);
672
673#ifdef DEV_NETMAP
674	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
675		/*
676		 * (under lock): if in netmap mode, do some consistency
677		 * checks and set slot to entry 0 of the netmap ring.
678		 */
679		slot = netmap_reset(na, NR_TX, txr->me, 0);
680	}
681#endif /* DEV_NETMAP */
682
683	bzero((void *)txr->tx_base,
684	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
685	/* Reset indices */
686	txr->next_avail_desc = 0;
687	txr->next_to_clean = 0;
688
689	/* Free any existing tx buffers. */
690	txbuf = txr->tx_buffers;
691	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
692		if (txbuf->m_head != NULL) {
693			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
694			    0, txbuf->m_head->m_pkthdr.len,
695			    BUS_DMASYNC_POSTWRITE);
696			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
697			m_freem(txbuf->m_head);
698			txbuf->m_head = NULL;
699		}
700
701#ifdef DEV_NETMAP
702		/*
703		 * In netmap mode, set the map for the packet buffer.
704		 * NOTE: Some drivers (not this one) also need to set
705		 * the physical buffer address in the NIC ring.
706		 * Slots in the netmap ring (indexed by "si") are
707		 * kring->nkr_hwofs positions "ahead" wrt the
708		 * corresponding slot in the NIC ring. In some drivers
709		 * (not here) nkr_hwofs can be negative. Function
710		 * netmap_idx_n2k() handles wraparounds properly.
711		 */
712		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
713			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
714			netmap_load_map(na, txr->txtag,
715			    txbuf->map, NMB(na, slot + si));
716		}
717#endif /* DEV_NETMAP */
718
719		/* Clear the EOP descriptor pointer */
720		txbuf->eop = NULL;
721	}
722
723#ifdef IXGBE_FDIR
724	/* Set the rate at which we sample packets */
725	if (sc->feat_en & IXGBE_FEATURE_FDIR)
726		txr->atr_sample = atr_sample_rate;
727#endif
728
729	/* Set number of descriptors available */
730	txr->tx_avail = sc->num_tx_desc;
731
732	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
733	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
734	IXGBE_TX_UNLOCK(txr);
735} /* ixgbe_setup_transmit_ring */
736
737/************************************************************************
738 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
739 ************************************************************************/
740int
741ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
742{
743	struct tx_ring *txr = sc->tx_rings;
744
745	for (int i = 0; i < sc->num_queues; i++, txr++)
746		ixgbe_setup_transmit_ring(txr);
747
748	return (0);
749} /* ixgbe_setup_transmit_structures */
750
751/************************************************************************
752 * ixgbe_free_transmit_structures - Free all transmit rings.
753 ************************************************************************/
754void
755ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
756{
757	struct tx_ring *txr = sc->tx_rings;
758
759	for (int i = 0; i < sc->num_queues; i++, txr++) {
760		ixgbe_free_transmit_buffers(txr);
761		ixgbe_dma_free(sc, &txr->txdma);
762		IXGBE_TX_LOCK_DESTROY(txr);
763	}
764	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
765} /* ixgbe_free_transmit_structures */
766
767/************************************************************************
768 * ixgbe_free_transmit_buffers
769 *
770 *   Free transmit ring related data structures.
771 ************************************************************************/
772static void
773ixgbe_free_transmit_buffers(struct tx_ring *txr)
774{
775	struct ixgbe_softc  *sc = txr->sc;
776	struct ixgbe_tx_buf *tx_buffer;
777	int                 i;
778
779	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
780
781	if (txr->tx_buffers == NULL)
782		return;
783
784	tx_buffer = txr->tx_buffers;
785	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
786		if (tx_buffer->m_head != NULL) {
787			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
788			    0, tx_buffer->m_head->m_pkthdr.len,
789			    BUS_DMASYNC_POSTWRITE);
790			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
791			m_freem(tx_buffer->m_head);
792			tx_buffer->m_head = NULL;
793			if (tx_buffer->map != NULL) {
794				ixgbe_dmamap_destroy(txr->txtag,
795				    tx_buffer->map);
796				tx_buffer->map = NULL;
797			}
798		} else if (tx_buffer->map != NULL) {
799			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
800			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
801			tx_buffer->map = NULL;
802		}
803	}
804	if (txr->txr_interq != NULL) {
805		struct mbuf *m;
806
807		while ((m = pcq_get(txr->txr_interq)) != NULL)
808			m_freem(m);
809		pcq_destroy(txr->txr_interq);
810	}
811	if (txr->tx_buffers != NULL) {
812		kmem_free(txr->tx_buffers,
813		    sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
814		txr->tx_buffers = NULL;
815	}
816	if (txr->txtag != NULL) {
817		ixgbe_dma_tag_destroy(txr->txtag);
818		txr->txtag = NULL;
819	}
820} /* ixgbe_free_transmit_buffers */
821
822/************************************************************************
823 * ixgbe_tx_ctx_setup
824 *
825 *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
826 ************************************************************************/
827static int
828ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
829    u32 *cmd_type_len, u32 *olinfo_status)
830{
831	struct ixgbe_softc               *sc = txr->sc;
832	struct ixgbe_adv_tx_context_desc *TXD;
833	struct ether_vlan_header         *eh;
834#ifdef INET
835	struct ip                        *ip;
836#endif
837#ifdef INET6
838	struct ip6_hdr                   *ip6;
839#endif
840	int                              ehdrlen, ip_hlen = 0;
841	int                              offload = TRUE;
842	int                              ctxd = txr->next_avail_desc;
843	u32                              vlan_macip_lens = 0;
844	u32                              type_tucmd_mlhl = 0;
845	u16                              vtag = 0;
846	u16                              etype;
847	u8                               ipproto = 0;
848	char                             *l3d;
849
850	/* First check if TSO is to be used */
851	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
852		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
853
854		if (rv != 0)
855			IXGBE_EVC_ADD(&sc->tso_err, 1);
856		return rv;
857	}
858
859	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
860		offload = FALSE;
861
862	/* Indicate the whole packet as payload when not doing TSO */
863	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
864
865	/*
866	 * In advanced descriptors the vlan tag must
867	 * be placed into the context descriptor. Hence
868	 * we need to make one even if not doing offloads.
869	 */
870	if (vlan_has_tag(mp)) {
871		vtag = htole16(vlan_get_tag(mp));
872		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
873	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
874	           (offload == FALSE))
875		return (0);
876
877	/*
878	 * Determine where frame payload starts.
879	 * Jump over vlan headers if already present,
880	 * helpful for QinQ too.
881	 */
882	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
883	eh = mtod(mp, struct ether_vlan_header *);
884	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
885		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
886		etype = ntohs(eh->evl_proto);
887		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888	} else {
889		etype = ntohs(eh->evl_encap_proto);
890		ehdrlen = ETHER_HDR_LEN;
891	}
892
893	/* Set the ether header length */
894	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
895
896	if (offload == FALSE)
897		goto no_offloads;
898
899	/*
900	 * If the first mbuf only includes the ethernet header,
901	 * jump to the next one
902	 * XXX: This assumes the stack splits mbufs containing headers
903	 *      on header boundaries
904	 * XXX: And assumes the entire IP header is contained in one mbuf
905	 */
906	if (mp->m_len == ehdrlen && mp->m_next)
907		l3d = mtod(mp->m_next, char *);
908	else
909		l3d = mtod(mp, char *) + ehdrlen;
910
911	switch (etype) {
912#ifdef INET
913	case ETHERTYPE_IP:
914		ip = (struct ip *)(l3d);
915		ip_hlen = ip->ip_hl << 2;
916		ipproto = ip->ip_p;
917		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
918		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
919		    ip->ip_sum == 0);
920		break;
921#endif
922#ifdef INET6
923	case ETHERTYPE_IPV6:
924		ip6 = (struct ip6_hdr *)(l3d);
925		ip_hlen = sizeof(struct ip6_hdr);
926		ipproto = ip6->ip6_nxt;
927		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
928		break;
929#endif
930	default:
931		offload = false;
932		break;
933	}
934
935	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
936		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
937
938	vlan_macip_lens |= ip_hlen;
939
940	/* No support for offloads for non-L4 next headers */
941	switch (ipproto) {
942	case IPPROTO_TCP:
943		if (mp->m_pkthdr.csum_flags &
944		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
945			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
946		else
947			offload = false;
948		break;
949	case IPPROTO_UDP:
950		if (mp->m_pkthdr.csum_flags &
951		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
952			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
953		else
954			offload = false;
955		break;
956	default:
957		offload = false;
958		break;
959	}
960
961	if (offload) /* Insert L4 checksum into data descriptors */
962		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
963
964no_offloads:
965	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966
967	/* Now ready a context descriptor */
968	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
969
970	/* Now copy bits into descriptor */
971	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
972	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
973	TXD->seqnum_seed = htole32(0);
974	TXD->mss_l4len_idx = htole32(0);
975
976	/* We've consumed the first desc, adjust counters */
977	if (++ctxd == txr->num_desc)
978		ctxd = 0;
979	txr->next_avail_desc = ctxd;
980	--txr->tx_avail;
981
982	return (0);
983} /* ixgbe_tx_ctx_setup */
984
985/************************************************************************
986 * ixgbe_tso_setup
987 *
988 *   Setup work for hardware segmentation offload (TSO) on
989 *   adapters using advanced tx descriptors
990 ************************************************************************/
991static int
992ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
993    u32 *olinfo_status)
994{
995	struct ixgbe_adv_tx_context_desc *TXD;
996	struct ether_vlan_header         *eh;
997#ifdef INET6
998	struct ip6_hdr                   *ip6;
999#endif
1000#ifdef INET
1001	struct ip                        *ip;
1002#endif
1003	struct tcphdr                    *th;
1004	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
1005	u32                              vlan_macip_lens = 0;
1006	u32                              type_tucmd_mlhl = 0;
1007	u32                              mss_l4len_idx = 0, paylen;
1008	u16                              vtag = 0, eh_type;
1009
1010	/*
1011	 * Determine where frame payload starts.
1012	 * Jump over vlan headers if already present
1013	 */
1014	eh = mtod(mp, struct ether_vlan_header *);
1015	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1016		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1017		eh_type = eh->evl_proto;
1018	} else {
1019		ehdrlen = ETHER_HDR_LEN;
1020		eh_type = eh->evl_encap_proto;
1021	}
1022
1023	switch (ntohs(eh_type)) {
1024#ifdef INET
1025	case ETHERTYPE_IP:
1026		ip = (struct ip *)(mp->m_data + ehdrlen);
1027		if (ip->ip_p != IPPROTO_TCP)
1028			return (ENXIO);
1029		ip->ip_sum = 0;
1030		ip_hlen = ip->ip_hl << 2;
1031		th = (struct tcphdr *)((char *)ip + ip_hlen);
1032		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1033		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1034		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1035		/* Tell transmit desc to also do IPv4 checksum. */
1036		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1037		break;
1038#endif
1039#ifdef INET6
1040	case ETHERTYPE_IPV6:
1041		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1042		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1043		if (ip6->ip6_nxt != IPPROTO_TCP)
1044			return (ENXIO);
1045		ip_hlen = sizeof(struct ip6_hdr);
1046		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1047		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1048		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1049		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1050		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1051		break;
1052#endif
1053	default:
1054		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1055		    __func__, ntohs(eh_type));
1056		break;
1057	}
1058
1059	ctxd = txr->next_avail_desc;
1060	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1061
1062	tcp_hlen = th->th_off << 2;
1063
1064	/* This is used in the transmit desc in encap */
1065	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1066
1067	/* VLAN MACLEN IPLEN */
1068	if (vlan_has_tag(mp)) {
1069		vtag = htole16(vlan_get_tag(mp));
1070		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1071	}
1072
1073	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1074	vlan_macip_lens |= ip_hlen;
1075	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1076
1077	/* ADV DTYPE TUCMD */
1078	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1079	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1080	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1081
1082	/* MSS L4LEN IDX */
1083	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1084	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1085	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1086
1087	TXD->seqnum_seed = htole32(0);
1088
1089	if (++ctxd == txr->num_desc)
1090		ctxd = 0;
1091
1092	txr->tx_avail--;
1093	txr->next_avail_desc = ctxd;
1094	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1095	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1096	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1097	IXGBE_EVC_ADD(&txr->tso_tx, 1);
1098
1099	return (0);
1100} /* ixgbe_tso_setup */
1101
1102
1103/************************************************************************
1104 * ixgbe_txeof
1105 *
1106 *   Examine each tx_buffer in the used queue. If the hardware is done
1107 *   processing the packet then free associated resources. The
1108 *   tx_buffer is put back on the free queue.
1109 ************************************************************************/
1110bool
1111ixgbe_txeof(struct tx_ring *txr)
1112{
1113	struct ixgbe_softc	*sc = txr->sc;
1114	struct ifnet		*ifp = sc->ifp;
1115	struct ixgbe_tx_buf	*buf;
1116	union ixgbe_adv_tx_desc *txd;
1117	u32			work, processed = 0;
1118	u32			limit = sc->tx_process_limit;
1119	u16			avail;
1120
1121	KASSERT(mutex_owned(&txr->tx_mtx));
1122
1123#ifdef DEV_NETMAP
1124	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
1125	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
1126		struct netmap_sc *na = NA(sc->ifp);
1127		struct netmap_kring *kring = na->tx_rings[txr->me];
1128		txd = txr->tx_base;
1129		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1130		    BUS_DMASYNC_POSTREAD);
1131		/*
1132		 * In netmap mode, all the work is done in the context
1133		 * of the client thread. Interrupt handlers only wake up
1134		 * clients, which may be sleeping on individual rings
1135		 * or on a global resource for all rings.
1136		 * To implement tx interrupt mitigation, we wake up the client
1137		 * thread roughly every half ring, even if the NIC interrupts
1138		 * more frequently. This is implemented as follows:
1139		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1140		 *   the slot that should wake up the thread (nkr_num_slots
1141		 *   means the user thread should not be woken up);
1142		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1143		 *   or the slot has the DD bit set.
1144		 */
1145		if (kring->nr_kflags < kring->nkr_num_slots &&
1146		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1147			netmap_tx_irq(ifp, txr->me);
1148		}
1149		return false;
1150	}
1151#endif /* DEV_NETMAP */
1152
1153	if (txr->tx_avail == txr->num_desc) {
1154		txr->busy = 0;
1155		return false;
1156	}
1157
1158	/* Get work starting point */
1159	work = txr->next_to_clean;
1160	buf = &txr->tx_buffers[work];
1161	txd = &txr->tx_base[work];
1162	work -= txr->num_desc; /* The distance to ring end */
1163	avail = txr->tx_avail;
1164	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1165	    BUS_DMASYNC_POSTREAD);
1166
1167	do {
1168		union ixgbe_adv_tx_desc *eop = buf->eop;
1169		if (eop == NULL) /* No work */
1170			break;
1171
1172		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1173			break;	/* I/O not complete */
1174
1175		if (buf->m_head) {
1176			txr->bytes += buf->m_head->m_pkthdr.len;
1177			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1178			    0, buf->m_head->m_pkthdr.len,
1179			    BUS_DMASYNC_POSTWRITE);
1180			ixgbe_dmamap_unload(txr->txtag, buf->map);
1181			m_freem(buf->m_head);
1182			buf->m_head = NULL;
1183		}
1184		buf->eop = NULL;
1185		++avail;
1186
1187		/* We clean the range if multi segment */
1188		while (txd != eop) {
1189			++txd;
1190			++buf;
1191			++work;
1192			/* wrap the ring? */
1193			if (__predict_false(!work)) {
1194				work -= txr->num_desc;
1195				buf = txr->tx_buffers;
1196				txd = txr->tx_base;
1197			}
1198			if (buf->m_head) {
1199				txr->bytes +=
1200				    buf->m_head->m_pkthdr.len;
1201				bus_dmamap_sync(txr->txtag->dt_dmat,
1202				    buf->map,
1203				    0, buf->m_head->m_pkthdr.len,
1204				    BUS_DMASYNC_POSTWRITE);
1205				ixgbe_dmamap_unload(txr->txtag,
1206				    buf->map);
1207				m_freem(buf->m_head);
1208				buf->m_head = NULL;
1209			}
1210			++avail;
1211			buf->eop = NULL;
1212
1213		}
1214		++processed;
1215
1216		/* Try the next packet */
1217		++txd;
1218		++buf;
1219		++work;
1220		/* reset with a wrap */
1221		if (__predict_false(!work)) {
1222			work -= txr->num_desc;
1223			buf = txr->tx_buffers;
1224			txd = txr->tx_base;
1225		}
1226		prefetch(txd);
1227	} while (__predict_true(--limit));
1228
1229	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1230	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1231
1232	work += txr->num_desc;
1233	txr->next_to_clean = work;
1234	if (processed) {
1235		txr->tx_avail = avail;
1236		txr->txr_no_space = false;
1237		txr->packets += processed;
1238		if_statadd(ifp, if_opackets, processed);
1239	}
1240
1241	/*
1242	 * Queue Hang detection, we know there's
1243	 * work outstanding or the first return
1244	 * would have been taken, so increment busy
1245	 * if nothing managed to get cleaned, then
1246	 * in local_timer it will be checked and
1247	 * marked as HUNG if it exceeds a MAX attempt.
1248	 */
1249	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1250		++txr->busy;
1251	/*
1252	 * If anything gets cleaned we reset state to 1,
1253	 * note this will turn off HUNG if its set.
1254	 */
1255	if (processed)
1256		txr->busy = 1;
1257
1258	if (txr->tx_avail == txr->num_desc)
1259		txr->busy = 0;
1260
1261	return ((limit > 0) ? false : true);
1262} /* ixgbe_txeof */
1263
1264#ifdef RSC
1265/************************************************************************
1266 * ixgbe_rsc_count
1267 *
1268 *   Used to detect a descriptor that has been merged by Hardware RSC.
1269 ************************************************************************/
1270static inline u32
1271ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1272{
1273	return (le32toh(rx->wb.lower.lo_dword.data) &
1274	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1275} /* ixgbe_rsc_count */
1276
1277/************************************************************************
1278 * ixgbe_setup_hw_rsc
1279 *
1280 *   Initialize Hardware RSC (LRO) feature on 82599
1281 *   for an RX ring, this is toggled by the LRO capability
1282 *   even though it is transparent to the stack.
1283 *
1284 *   NOTE: Since this HW feature only works with IPv4 and
1285 *         testing has shown soft LRO to be as effective,
1286 *         this feature will be disabled by default.
1287 ************************************************************************/
1288static void
1289ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1290{
1291	struct ixgbe_softc *sc = rxr->sc;
1292	struct ixgbe_hw	*hw = &sc->hw;
1293	u32		rscctrl, rdrxctl;
1294
1295	/* If turning LRO/RSC off we need to disable it */
1296	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
1297		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1298		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1299		return;
1300	}
1301
1302	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1303	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1304#ifdef DEV_NETMAP
1305	/* Always strip CRC unless Netmap disabled it */
1306	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
1307	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
1308	    ix_crcstrip)
1309#endif /* DEV_NETMAP */
1310		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1311	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1312	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1313
1314	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1315	rscctrl |= IXGBE_RSCCTL_RSCEN;
1316	/*
1317	 * Limit the total number of descriptors that
1318	 * can be combined, so it does not exceed 64K
1319	 */
1320	if (rxr->mbuf_sz == MCLBYTES)
1321		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1322	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1323		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1324	else if (rxr->mbuf_sz == MJUM9BYTES)
1325		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1326	else  /* Using 16K cluster */
1327		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1328
1329	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1330
1331	/* Enable TCP header recognition */
1332	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1333	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1334
1335	/* Disable RSC for ACK packets */
1336	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1337	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1338
1339	rxr->hw_rsc = TRUE;
1340} /* ixgbe_setup_hw_rsc */
1341#endif
1342
1343/************************************************************************
1344 * ixgbe_refresh_mbufs
1345 *
1346 *   Refresh mbuf buffers for RX descriptor rings
1347 *    - now keeps its own state so discards due to resource
1348 *      exhaustion are unnecessary, if an mbuf cannot be obtained
1349 *      it just returns, keeping its placeholder, thus it can simply
1350 *      be recalled to try again.
1351 ************************************************************************/
1352static void
1353ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1354{
1355	struct ixgbe_softc  *sc = rxr->sc;
1356	struct ixgbe_rx_buf *rxbuf;
1357	struct mbuf         *mp;
1358	int                 i, error;
1359	bool                refreshed = false;
1360
1361	i = rxr->next_to_refresh;
1362	/* next_to_refresh points to the previous one */
1363	if (++i == rxr->num_desc)
1364		i = 0;
1365
1366	while (i != limit) {
1367		rxbuf = &rxr->rx_buffers[i];
1368		if (__predict_false(rxbuf->buf == NULL)) {
1369			mp = ixgbe_getcl();
1370			if (mp == NULL) {
1371				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1372				goto update;
1373			}
1374			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1375			IXGBE_M_ADJ(sc, rxr, mp);
1376		} else
1377			mp = rxbuf->buf;
1378
1379		/* If we're dealing with an mbuf that was copied rather
1380		 * than replaced, there's no need to go through busdma.
1381		 */
1382		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1383			/* Get the memory mapping */
1384			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1385			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1386			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1387			if (__predict_false(error != 0)) {
1388				device_printf(sc->dev, "Refresh mbufs: "
1389				    "payload dmamap load failure - %d\n",
1390				    error);
1391				m_free(mp);
1392				rxbuf->buf = NULL;
1393				goto update;
1394			}
1395			rxbuf->buf = mp;
1396			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1397			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1398			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1399			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1400		} else {
1401			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1402			rxbuf->flags &= ~IXGBE_RX_COPY;
1403		}
1404
1405		refreshed = true;
1406		/* next_to_refresh points to the previous one */
1407		rxr->next_to_refresh = i;
1408		if (++i == rxr->num_desc)
1409			i = 0;
1410	}
1411
1412update:
1413	if (refreshed) /* Update hardware tail index */
1414		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
1415
1416	return;
1417} /* ixgbe_refresh_mbufs */
1418
1419/************************************************************************
1420 * ixgbe_allocate_receive_buffers
1421 *
1422 *   Allocate memory for rx_buffer structures. Since we use one
1423 *   rx_buffer per received packet, the maximum number of rx_buffer's
1424 *   that we'll need is equal to the number of receive descriptors
1425 *   that we've allocated.
1426 ************************************************************************/
1427static int
1428ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1429{
1430	struct ixgbe_softc  *sc = rxr->sc;
1431	device_t            dev = sc->dev;
1432	struct ixgbe_rx_buf *rxbuf;
1433	int                 bsize, error;
1434
1435	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1436	rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
1437
1438	error = ixgbe_dma_tag_create(
1439	         /*      parent */ sc->osdep.dmat,
1440	         /*   alignment */ 1,
1441	         /*      bounds */ 0,
1442	         /*     maxsize */ MJUM16BYTES,
1443	         /*   nsegments */ 1,
1444	         /*  maxsegsize */ MJUM16BYTES,
1445	         /*       flags */ 0,
1446	                           &rxr->ptag);
1447	if (error != 0) {
1448		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1449		goto fail;
1450	}
1451
1452	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1453		rxbuf = &rxr->rx_buffers[i];
1454		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1455		if (error) {
1456			aprint_error_dev(dev, "Unable to create RX dma map\n");
1457			goto fail;
1458		}
1459	}
1460
1461	return (0);
1462
1463fail:
1464	/* Frees all, but can handle partial completion */
1465	ixgbe_free_receive_structures(sc);
1466
1467	return (error);
1468} /* ixgbe_allocate_receive_buffers */
1469
1470/************************************************************************
1471 * ixgbe_free_receive_ring
1472 ************************************************************************/
1473static void
1474ixgbe_free_receive_ring(struct rx_ring *rxr)
1475{
1476	for (int i = 0; i < rxr->num_desc; i++) {
1477		ixgbe_rx_discard(rxr, i);
1478	}
1479} /* ixgbe_free_receive_ring */
1480
1481/************************************************************************
1482 * ixgbe_setup_receive_ring
1483 *
1484 *   Initialize a receive ring and its buffers.
1485 ************************************************************************/
1486static int
1487ixgbe_setup_receive_ring(struct rx_ring *rxr)
1488{
1489	struct ixgbe_softc    *sc;
1490	struct ixgbe_rx_buf   *rxbuf;
1491#ifdef LRO
1492	struct ifnet          *ifp;
1493	struct lro_ctrl       *lro = &rxr->lro;
1494#endif /* LRO */
1495#ifdef DEV_NETMAP
1496	struct netmap_sc      *na = NA(rxr->sc->ifp);
1497	struct netmap_slot    *slot;
1498#endif /* DEV_NETMAP */
1499	int                   rsize, error = 0;
1500
1501	sc = rxr->sc;
1502#ifdef LRO
1503	ifp = sc->ifp;
1504#endif /* LRO */
1505
1506	/* Clear the ring contents */
1507	IXGBE_RX_LOCK(rxr);
1508
1509#ifdef DEV_NETMAP
1510	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
1511		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1512#endif /* DEV_NETMAP */
1513
1514	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
1515	KASSERT((rsize % DBA_ALIGN) == 0);
1516	bzero((void *)rxr->rx_base, rsize);
1517	/* Cache the size */
1518	rxr->mbuf_sz = sc->rx_mbuf_sz;
1519
1520	/* Free current RX buffer structs and their mbufs */
1521	ixgbe_free_receive_ring(rxr);
1522
1523	/* Now replenish the mbufs */
1524	for (int i = 0; i < rxr->num_desc; i++) {
1525		struct mbuf *mp;
1526
1527		rxbuf = &rxr->rx_buffers[i];
1528
1529#ifdef DEV_NETMAP
1530		/*
1531		 * In netmap mode, fill the map and set the buffer
1532		 * address in the NIC ring, considering the offset
1533		 * between the netmap and NIC rings (see comment in
1534		 * ixgbe_setup_transmit_ring() ). No need to allocate
1535		 * an mbuf, so end the block with a continue;
1536		 */
1537		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1538			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
1539			uint64_t paddr;
1540			void *addr;
1541
1542			addr = PNMB(na, slot + sj, &paddr);
1543			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1544			/* Update descriptor and the cached value */
1545			rxr->rx_base[i].read.pkt_addr = htole64(paddr);
1546			rxbuf->addr = htole64(paddr);
1547			continue;
1548		}
1549#endif /* DEV_NETMAP */
1550
1551		rxbuf->flags = 0;
1552		rxbuf->buf = ixgbe_getcl();
1553		if (rxbuf->buf == NULL) {
1554			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1555			error = ENOBUFS;
1556			goto fail;
1557		}
1558		mp = rxbuf->buf;
1559		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1560		IXGBE_M_ADJ(sc, rxr, mp);
1561		/* Get the memory mapping */
1562		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1563		    mp, BUS_DMA_NOWAIT);
1564		if (error != 0) {
1565			/*
1566			 * Clear this entry for later cleanup in
1567			 * ixgbe_discard() which is called via
1568			 * ixgbe_free_receive_ring().
1569			 */
1570			m_freem(mp);
1571			rxbuf->buf = NULL;
1572			goto fail;
1573		}
1574		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1575		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1576		/* Update the descriptor and the cached value */
1577		rxr->rx_base[i].read.pkt_addr =
1578		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1579		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1580	}
1581
1582	/* Setup our descriptor indices */
1583	rxr->next_to_check = 0;
1584	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
1585#ifdef LRO
1586	rxr->lro_enabled = FALSE;
1587#endif
1588	rxr->discard_multidesc = false;
1589	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1590#if 0 /* NetBSD */
1591	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1592#if 1	/* Fix inconsistency */
1593	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1594#endif
1595#endif
1596	rxr->vtag_strip = FALSE;
1597
1598	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1599	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1600
1601	/*
1602	 * Now set up the LRO interface
1603	 */
1604#ifdef RSC
1605	if (ixgbe_rsc_enable)
1606		ixgbe_setup_hw_rsc(rxr);
1607#endif
1608#ifdef LRO
1609#ifdef RSC
1610	else
1611#endif
1612	if (ifp->if_capenable & IFCAP_LRO) {
1613		device_t dev = sc->dev;
1614		int err = tcp_lro_init(lro);
1615		if (err) {
1616			device_printf(dev, "LRO Initialization failed!\n");
1617			goto fail;
1618		}
1619		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1620		rxr->lro_enabled = TRUE;
1621		lro->ifp = sc->ifp;
1622	}
1623#endif /* LRO */
1624
1625	IXGBE_RX_UNLOCK(rxr);
1626
1627	return (0);
1628
1629fail:
1630	ixgbe_free_receive_ring(rxr);
1631	IXGBE_RX_UNLOCK(rxr);
1632
1633	return (error);
1634} /* ixgbe_setup_receive_ring */
1635
1636/************************************************************************
1637 * ixgbe_setup_receive_structures - Initialize all receive rings.
1638 ************************************************************************/
1639int
1640ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
1641{
1642	struct rx_ring *rxr = sc->rx_rings;
1643	int            j;
1644
1645	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1646	for (j = 0; j < sc->num_queues; j++, rxr++)
1647		if (ixgbe_setup_receive_ring(rxr))
1648			goto fail;
1649
1650	return (0);
1651fail:
1652	/*
1653	 * Free RX buffers allocated so far, we will only handle
1654	 * the rings that completed, the failing case will have
1655	 * cleaned up for itself. 'j' failed, so its the terminus.
1656	 */
1657	for (int i = 0; i < j; ++i) {
1658		rxr = &sc->rx_rings[i];
1659		IXGBE_RX_LOCK(rxr);
1660		ixgbe_free_receive_ring(rxr);
1661		IXGBE_RX_UNLOCK(rxr);
1662	}
1663
1664	return (ENOBUFS);
1665} /* ixgbe_setup_receive_structures */
1666
1667
1668/************************************************************************
1669 * ixgbe_free_receive_structures - Free all receive rings.
1670 ************************************************************************/
1671void
1672ixgbe_free_receive_structures(struct ixgbe_softc *sc)
1673{
1674	struct rx_ring *rxr = sc->rx_rings;
1675
1676	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1677
1678	for (int i = 0; i < sc->num_queues; i++, rxr++) {
1679		ixgbe_free_receive_buffers(rxr);
1680#ifdef LRO
1681		/* Free LRO memory */
1682		tcp_lro_free(&rxr->lro);
1683#endif /* LRO */
1684		/* Free the ring memory as well */
1685		ixgbe_dma_free(sc, &rxr->rxdma);
1686		IXGBE_RX_LOCK_DESTROY(rxr);
1687	}
1688
1689	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
1690} /* ixgbe_free_receive_structures */
1691
1692
1693/************************************************************************
1694 * ixgbe_free_receive_buffers - Free receive ring data structures
1695 ************************************************************************/
1696static void
1697ixgbe_free_receive_buffers(struct rx_ring *rxr)
1698{
1699	struct ixgbe_softc  *sc = rxr->sc;
1700	struct ixgbe_rx_buf *rxbuf;
1701
1702	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1703
1704	/* Cleanup any existing buffers */
1705	if (rxr->rx_buffers != NULL) {
1706		for (int i = 0; i < sc->num_rx_desc; i++) {
1707			rxbuf = &rxr->rx_buffers[i];
1708			ixgbe_rx_discard(rxr, i);
1709			if (rxbuf->pmap != NULL) {
1710				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1711				rxbuf->pmap = NULL;
1712			}
1713		}
1714
1715		if (rxr->rx_buffers != NULL) {
1716			kmem_free(rxr->rx_buffers,
1717			    sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
1718			rxr->rx_buffers = NULL;
1719		}
1720	}
1721
1722	if (rxr->ptag != NULL) {
1723		ixgbe_dma_tag_destroy(rxr->ptag);
1724		rxr->ptag = NULL;
1725	}
1726
1727	return;
1728} /* ixgbe_free_receive_buffers */
1729
1730/************************************************************************
1731 * ixgbe_rx_input
1732 ************************************************************************/
1733static __inline void
1734ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1735    u32 ptype)
1736{
1737	struct ixgbe_softc *sc = ifp->if_softc;
1738
1739#ifdef LRO
1740	struct ethercom *ec = &sc->osdep.ec;
1741
1742	/*
1743	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1744	 * should be computed by hardware. Also it should not have VLAN tag in
1745	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1746	 */
1747        if (rxr->lro_enabled &&
1748            (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1749            (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1750            ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1751            (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1752            (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1753            (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1754            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1755            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1756                /*
1757                 * Send to the stack if:
1758                 *  - LRO not enabled, or
1759                 *  - no LRO resources, or
1760                 *  - lro enqueue fails
1761                 */
1762                if (rxr->lro.lro_cnt != 0)
1763                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1764                                return;
1765        }
1766#endif /* LRO */
1767
1768	if_percpuq_enqueue(sc->ipq, m);
1769} /* ixgbe_rx_input */
1770
1771/************************************************************************
1772 * ixgbe_rx_discard
1773 ************************************************************************/
1774static __inline void
1775ixgbe_rx_discard(struct rx_ring *rxr, int i)
1776{
1777	struct ixgbe_rx_buf *rbuf;
1778
1779	rbuf = &rxr->rx_buffers[i];
1780
1781	/*
1782	 * With advanced descriptors the writeback clobbers the buffer addrs,
1783	 * so its easier to just free the existing mbufs and take the normal
1784	 * refresh path to get new buffers and mapping.
1785	 */
1786
1787	if (rbuf->fmp != NULL) {/* Partial chain ? */
1788		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1789		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1790		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1791		m_freem(rbuf->fmp);
1792		rbuf->fmp = NULL;
1793		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1794	} else if (rbuf->buf) {
1795		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1796		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1797		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1798		m_free(rbuf->buf);
1799		rbuf->buf = NULL;
1800	}
1801
1802	rbuf->flags = 0;
1803
1804	return;
1805} /* ixgbe_rx_discard */
1806
1807
1808/************************************************************************
1809 * ixgbe_rxeof
1810 *
1811 *   Executes in interrupt context. It replenishes the
1812 *   mbufs in the descriptor and sends data which has
1813 *   been dma'ed into host memory to upper layer.
1814 *
1815 *   Return TRUE for more work, FALSE for all clean.
1816 ************************************************************************/
1817bool
1818ixgbe_rxeof(struct ix_queue *que)
1819{
1820	struct ixgbe_softc	*sc = que->sc;
1821	struct rx_ring		*rxr = que->rxr;
1822	struct ifnet		*ifp = sc->ifp;
1823#ifdef LRO
1824	struct lro_ctrl		*lro = &rxr->lro;
1825#endif /* LRO */
1826	union ixgbe_adv_rx_desc	*cur;
1827	struct ixgbe_rx_buf	*rbuf, *nbuf;
1828	int			i, nextp, processed = 0;
1829	u32			staterr = 0;
1830	u32			loopcount = 0, numdesc;
1831	u32			limit = sc->rx_process_limit;
1832	u32			rx_copy_len = sc->rx_copy_len;
1833	bool			discard_multidesc = rxr->discard_multidesc;
1834	bool			wraparound = false;
1835	unsigned int		syncremain;
1836#ifdef RSS
1837	u16			pkt_info;
1838#endif
1839
1840	IXGBE_RX_LOCK(rxr);
1841
1842#ifdef DEV_NETMAP
1843	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
1844		/* Same as the txeof routine: wakeup clients on intr. */
1845		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1846			IXGBE_RX_UNLOCK(rxr);
1847			return (FALSE);
1848		}
1849	}
1850#endif /* DEV_NETMAP */
1851
1852	/* Sync the ring. The size is rx_process_limit or the first half */
1853	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1854		/* Non-wraparound */
1855		numdesc = limit;
1856		syncremain = 0;
1857	} else {
1858		/* Wraparound. Sync the first half. */
1859		numdesc = rxr->num_desc - rxr->next_to_check;
1860
1861		/* Set the size of the last half */
1862		syncremain = limit - numdesc;
1863	}
1864	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1865	    rxr->rxdma.dma_map,
1866	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1867	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
1868	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1869
1870	/*
1871	 * The max number of loop is rx_process_limit. If discard_multidesc is
1872	 * true, continue processing to not to send broken packet to the upper
1873	 * layer.
1874	 */
1875	for (i = rxr->next_to_check;
1876	     (loopcount < limit) || (discard_multidesc == true);) {
1877
1878		struct mbuf *sendmp, *mp;
1879		struct mbuf *newmp;
1880#ifdef RSC
1881		u32         rsc;
1882#endif
1883		u32         ptype;
1884		u16         len;
1885		u16         vtag = 0;
1886		bool        eop;
1887		bool        discard = false;
1888
1889		if (wraparound) {
1890			/* Sync the last half. */
1891			KASSERT(syncremain != 0);
1892			numdesc = syncremain;
1893			wraparound = false;
1894		} else if (__predict_false(loopcount >= limit)) {
1895			KASSERT(discard_multidesc == true);
1896			numdesc = 1;
1897		} else
1898			numdesc = 0;
1899
1900		if (numdesc != 0)
1901			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1902			    rxr->rxdma.dma_map, 0,
1903			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
1904			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1905
1906		cur = &rxr->rx_base[i];
1907		staterr = le32toh(cur->wb.upper.status_error);
1908#ifdef RSS
1909		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1910#endif
1911
1912		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1913			break;
1914
1915		loopcount++;
1916		sendmp = newmp = NULL;
1917		nbuf = NULL;
1918#ifdef RSC
1919		rsc = 0;
1920#endif
1921		cur->wb.upper.status_error = 0;
1922		rbuf = &rxr->rx_buffers[i];
1923		mp = rbuf->buf;
1924
1925		len = le16toh(cur->wb.upper.length);
1926		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1927		    IXGBE_RXDADV_PKTTYPE_MASK;
1928		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1929
1930		/* Make sure bad packets are discarded */
1931		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1932#if __FreeBSD_version >= 1100036
1933			if (sc->feat_en & IXGBE_FEATURE_VF)
1934				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1935#endif
1936			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1937			ixgbe_rx_discard(rxr, i);
1938			discard_multidesc = false;
1939			goto next_desc;
1940		}
1941
1942		if (__predict_false(discard_multidesc))
1943			discard = true;
1944		else {
1945			/* Pre-alloc new mbuf. */
1946
1947			if ((rbuf->fmp == NULL) &&
1948			    eop && (len <= rx_copy_len)) {
1949				/* For short packet. See below. */
1950				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1951				if (__predict_false(sendmp == NULL)) {
1952					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1953					discard = true;
1954				}
1955			} else {
1956				/* For long packet. */
1957				newmp = ixgbe_getcl();
1958				if (__predict_false(newmp == NULL)) {
1959					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1960					discard = true;
1961				}
1962			}
1963		}
1964
1965		if (__predict_false(discard)) {
1966			/*
1967			 * Descriptor initialization is already done by the
1968			 * above code (cur->wb.upper.status_error = 0).
1969			 * So, we can reuse current rbuf->buf for new packet.
1970			 *
1971			 * Rewrite the buffer addr, see comment in
1972			 * ixgbe_rx_discard().
1973			 */
1974			cur->read.pkt_addr = rbuf->addr;
1975			m_freem(rbuf->fmp);
1976			rbuf->fmp = NULL;
1977			if (!eop) {
1978				/* Discard the entire packet. */
1979				discard_multidesc = true;
1980			} else
1981				discard_multidesc = false;
1982			goto next_desc;
1983		}
1984		discard_multidesc = false;
1985
1986		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1987		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1988
1989		/*
1990		 * On 82599 which supports a hardware
1991		 * LRO (called HW RSC), packets need
1992		 * not be fragmented across sequential
1993		 * descriptors, rather the next descriptor
1994		 * is indicated in bits of the descriptor.
1995		 * This also means that we might process
1996		 * more than one packet at a time, something
1997		 * that has never been true before, it
1998		 * required eliminating global chain pointers
1999		 * in favor of what we are doing here.  -jfv
2000		 */
2001		if (!eop) {
2002			/*
2003			 * Figure out the next descriptor
2004			 * of this frame.
2005			 */
2006#ifdef RSC
2007			if (rxr->hw_rsc == TRUE) {
2008				rsc = ixgbe_rsc_count(cur);
2009				rxr->rsc_num += (rsc - 1);
2010			}
2011			if (rsc) { /* Get hardware index */
2012				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2013				    IXGBE_RXDADV_NEXTP_SHIFT);
2014			} else
2015#endif
2016			{ /* Just sequential */
2017				nextp = i + 1;
2018				if (nextp == sc->num_rx_desc)
2019					nextp = 0;
2020			}
2021			nbuf = &rxr->rx_buffers[nextp];
2022			prefetch(nbuf);
2023		}
2024		/*
2025		 * Rather than using the fmp/lmp global pointers
2026		 * we now keep the head of a packet chain in the
2027		 * buffer struct and pass this along from one
2028		 * descriptor to the next, until we get EOP.
2029		 */
2030		/*
2031		 * See if there is a stored head
2032		 * that determines what we are
2033		 */
2034		if (rbuf->fmp != NULL) {
2035			/* Secondary frag */
2036			sendmp = rbuf->fmp;
2037
2038			/* Update new (used in future) mbuf */
2039			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2040			IXGBE_M_ADJ(sc, rxr, newmp);
2041			rbuf->buf = newmp;
2042			rbuf->fmp = NULL;
2043
2044			/* For secondary frag */
2045			mp->m_len = len;
2046			mp->m_flags &= ~M_PKTHDR;
2047
2048			/* For sendmp */
2049			sendmp->m_pkthdr.len += mp->m_len;
2050		} else {
2051			/*
2052			 * It's the first segment of a multi descriptor
2053			 * packet or a single segment which contains a full
2054			 * packet.
2055			 */
2056
2057			if (eop && (len <= rx_copy_len)) {
2058				/*
2059				 * Optimize.  This might be a small packet, may
2060				 * be just a TCP ACK. Copy into a new mbuf, and
2061				 * Leave the old mbuf+cluster for re-use.
2062				 */
2063				sendmp->m_data += ETHER_ALIGN;
2064				memcpy(mtod(sendmp, void *),
2065				    mtod(mp, void *), len);
2066				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2067				rbuf->flags |= IXGBE_RX_COPY;
2068			} else {
2069				/* For long packet */
2070
2071				/* Update new (used in future) mbuf */
2072				newmp->m_pkthdr.len = newmp->m_len
2073				    = rxr->mbuf_sz;
2074				IXGBE_M_ADJ(sc, rxr, newmp);
2075				rbuf->buf = newmp;
2076				rbuf->fmp = NULL;
2077
2078				/* For sendmp */
2079				sendmp = mp;
2080			}
2081
2082			/* first desc of a non-ps chain */
2083			sendmp->m_pkthdr.len = sendmp->m_len = len;
2084		}
2085		++processed;
2086
2087		/* Pass the head pointer on */
2088		if (eop == 0) {
2089			nbuf->fmp = sendmp;
2090			sendmp = NULL;
2091			mp->m_next = nbuf->buf;
2092		} else { /* Sending this frame */
2093			m_set_rcvif(sendmp, ifp);
2094			++rxr->packets;
2095			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2096			/* capture data for AIM */
2097			rxr->bytes += sendmp->m_pkthdr.len;
2098			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2099			/* Process vlan info */
2100			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2101				vtag = le16toh(cur->wb.upper.vlan);
2102			if (vtag) {
2103				vlan_set_tag(sendmp, vtag);
2104			}
2105			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2106				ixgbe_rx_checksum(staterr, sendmp, ptype,
2107				   &sc->stats.pf);
2108			}
2109
2110#if 0 /* FreeBSD */
2111			/*
2112			 * In case of multiqueue, we have RXCSUM.PCSD bit set
2113			 * and never cleared. This means we have RSS hash
2114			 * available to be used.
2115			 */
2116			if (sc->num_queues > 1) {
2117				sendmp->m_pkthdr.flowid =
2118				    le32toh(cur->wb.lower.hi_dword.rss);
2119				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2120				case IXGBE_RXDADV_RSSTYPE_IPV4:
2121					M_HASHTYPE_SET(sendmp,
2122					    M_HASHTYPE_RSS_IPV4);
2123					break;
2124				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2125					M_HASHTYPE_SET(sendmp,
2126					    M_HASHTYPE_RSS_TCP_IPV4);
2127					break;
2128				case IXGBE_RXDADV_RSSTYPE_IPV6:
2129					M_HASHTYPE_SET(sendmp,
2130					    M_HASHTYPE_RSS_IPV6);
2131					break;
2132				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2133					M_HASHTYPE_SET(sendmp,
2134					    M_HASHTYPE_RSS_TCP_IPV6);
2135					break;
2136				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2137					M_HASHTYPE_SET(sendmp,
2138					    M_HASHTYPE_RSS_IPV6_EX);
2139					break;
2140				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2141					M_HASHTYPE_SET(sendmp,
2142					    M_HASHTYPE_RSS_TCP_IPV6_EX);
2143					break;
2144#if __FreeBSD_version > 1100000
2145				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2146					M_HASHTYPE_SET(sendmp,
2147					    M_HASHTYPE_RSS_UDP_IPV4);
2148					break;
2149				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2150					M_HASHTYPE_SET(sendmp,
2151					    M_HASHTYPE_RSS_UDP_IPV6);
2152					break;
2153				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2154					M_HASHTYPE_SET(sendmp,
2155					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2156					break;
2157#endif
2158				default:
2159					M_HASHTYPE_SET(sendmp,
2160					    M_HASHTYPE_OPAQUE_HASH);
2161				}
2162			} else {
2163				sendmp->m_pkthdr.flowid = que->msix;
2164				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2165			}
2166#endif
2167		}
2168next_desc:
2169		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2170		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2171
2172		/* Advance our pointers to the next descriptor. */
2173		if (++i == rxr->num_desc) {
2174			wraparound = true;
2175			i = 0;
2176		}
2177		rxr->next_to_check = i;
2178
2179		/* Now send to the stack or do LRO */
2180		if (sendmp != NULL)
2181			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2182
2183		/* Every 8 descriptors we go to refresh mbufs */
2184		if (processed == 8) {
2185			ixgbe_refresh_mbufs(rxr, i);
2186			processed = 0;
2187		}
2188	}
2189
2190	/* Save the current status */
2191	rxr->discard_multidesc = discard_multidesc;
2192
2193	/* Refresh any remaining buf structs */
2194	if (ixgbe_rx_unrefreshed(rxr))
2195		ixgbe_refresh_mbufs(rxr, i);
2196
2197	IXGBE_RX_UNLOCK(rxr);
2198
2199#ifdef LRO
2200	/*
2201	 * Flush any outstanding LRO work
2202	 */
2203	tcp_lro_flush_all(lro);
2204#endif /* LRO */
2205
2206	/*
2207	 * Still have cleaning to do?
2208	 */
2209	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2210		return (TRUE);
2211
2212	return (FALSE);
2213} /* ixgbe_rxeof */
2214
2215
2216/************************************************************************
2217 * ixgbe_rx_checksum
2218 *
2219 *   Verify that the hardware indicated that the checksum is valid.
2220 *   Inform the stack about the status of checksum so that stack
2221 *   doesn't spend time verifying the checksum.
2222 ************************************************************************/
2223static void
2224ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2225    struct ixgbe_hw_stats *stats)
2226{
2227	u16  status = (u16)staterr;
2228	u8   errors = (u8)(staterr >> 24);
2229#if 0
2230	bool sctp = false;
2231
2232	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2233	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2234		sctp = true;
2235#endif
2236
2237	/* IPv4 checksum */
2238	if (status & IXGBE_RXD_STAT_IPCS) {
2239		IXGBE_EVC_ADD(&stats->ipcs, 1);
2240		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2241			/* IP Checksum Good */
2242			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2243		} else {
2244			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2245			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2246		}
2247	}
2248	/* TCP/UDP/SCTP checksum */
2249	if (status & IXGBE_RXD_STAT_L4CS) {
2250		IXGBE_EVC_ADD(&stats->l4cs, 1);
2251		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2252		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2253			mp->m_pkthdr.csum_flags |= type;
2254		} else {
2255			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2256			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2257		}
2258	}
2259} /* ixgbe_rx_checksum */
2260
2261/************************************************************************
2262 * ixgbe_dma_malloc
2263 ************************************************************************/
2264int
2265ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
2266		struct ixgbe_dma_alloc *dma, const int mapflags)
2267{
2268	device_t dev = sc->dev;
2269	int      r, rsegs;
2270
2271	r = ixgbe_dma_tag_create(
2272	     /*      parent */ sc->osdep.dmat,
2273	     /*   alignment */ DBA_ALIGN,
2274	     /*      bounds */ 0,
2275	     /*     maxsize */ size,
2276	     /*   nsegments */ 1,
2277	     /*  maxsegsize */ size,
2278	     /*       flags */ BUS_DMA_ALLOCNOW,
2279			       &dma->dma_tag);
2280	if (r != 0) {
2281		aprint_error_dev(dev,
2282		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2283		    r);
2284		goto fail_0;
2285	}
2286
2287	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2288	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2289	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2290	if (r != 0) {
2291		aprint_error_dev(dev,
2292		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2293		goto fail_1;
2294	}
2295
2296	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2297	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2298	if (r != 0) {
2299		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2300		    __func__, r);
2301		goto fail_2;
2302	}
2303
2304	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2305	if (r != 0) {
2306		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2307		    __func__, r);
2308		goto fail_3;
2309	}
2310
2311	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2312	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2313	if (r != 0) {
2314		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2315		    __func__, r);
2316		goto fail_4;
2317	}
2318	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2319	dma->dma_size = size;
2320	return 0;
2321fail_4:
2322	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2323fail_3:
2324	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2325fail_2:
2326	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2327fail_1:
2328	ixgbe_dma_tag_destroy(dma->dma_tag);
2329fail_0:
2330
2331	return (r);
2332} /* ixgbe_dma_malloc */
2333
2334/************************************************************************
2335 * ixgbe_dma_free
2336 ************************************************************************/
2337void
2338ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
2339{
2340	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2341	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2342	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2343	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2344	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2345	ixgbe_dma_tag_destroy(dma->dma_tag);
2346} /* ixgbe_dma_free */
2347
2348
2349/************************************************************************
2350 * ixgbe_allocate_queues
2351 *
2352 *   Allocate memory for the transmit and receive rings, and then
2353 *   the descriptors associated with each, called only once at attach.
2354 ************************************************************************/
2355int
2356ixgbe_allocate_queues(struct ixgbe_softc *sc)
2357{
2358	device_t	dev = sc->dev;
2359	struct ix_queue	*que;
2360	struct tx_ring	*txr;
2361	struct rx_ring	*rxr;
2362	int             rsize, tsize, error = IXGBE_SUCCESS;
2363	int             txconf = 0, rxconf = 0;
2364
2365	/* First, allocate the top level queue structs */
2366	sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
2367	    KM_SLEEP);
2368
2369	/* Second, allocate the TX ring struct memory */
2370	sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
2371	    KM_SLEEP);
2372
2373	/* Third, allocate the RX ring */
2374	sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
2375	    KM_SLEEP);
2376
2377	/* For the ring itself */
2378	tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
2379	KASSERT((tsize % DBA_ALIGN) == 0);
2380
2381	/*
2382	 * Now set up the TX queues, txconf is needed to handle the
2383	 * possibility that things fail midcourse and we need to
2384	 * undo memory gracefully
2385	 */
2386	for (int i = 0; i < sc->num_queues; i++, txconf++) {
2387		/* Set up some basics */
2388		txr = &sc->tx_rings[i];
2389		txr->sc = sc;
2390		txr->txr_interq = NULL;
2391		/* In case SR-IOV is enabled, align the index properly */
2392#ifdef PCI_IOV
2393		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2394		    i);
2395#else
2396		txr->me = i;
2397#endif
2398		txr->num_desc = sc->num_tx_desc;
2399
2400		/* Initialize the TX side lock */
2401		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2402
2403		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
2404		    BUS_DMA_NOWAIT)) {
2405			aprint_error_dev(dev,
2406			    "Unable to allocate TX Descriptor memory\n");
2407			error = ENOMEM;
2408			goto err_tx_desc;
2409		}
2410		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2411		bzero((void *)txr->tx_base, tsize);
2412
2413		/* Now allocate transmit buffers for the ring */
2414		if (ixgbe_allocate_transmit_buffers(txr)) {
2415			aprint_error_dev(dev,
2416			    "Critical Failure setting up transmit buffers\n");
2417			error = ENOMEM;
2418			goto err_tx_desc;
2419		}
2420		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2421			/* Allocate a buf ring */
2422			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2423			if (txr->txr_interq == NULL) {
2424				aprint_error_dev(dev,
2425				    "Critical Failure setting up buf ring\n");
2426				error = ENOMEM;
2427				goto err_tx_desc;
2428			}
2429		}
2430	}
2431
2432	/*
2433	 * Next the RX queues...
2434	 */
2435	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
2436	KASSERT((rsize % DBA_ALIGN) == 0);
2437	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
2438		rxr = &sc->rx_rings[i];
2439		/* Set up some basics */
2440		rxr->sc = sc;
2441#ifdef PCI_IOV
2442		/* In case SR-IOV is enabled, align the index properly */
2443		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2444		    i);
2445#else
2446		rxr->me = i;
2447#endif
2448		rxr->num_desc = sc->num_rx_desc;
2449
2450		/* Initialize the RX side lock */
2451		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2452
2453		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
2454		    BUS_DMA_NOWAIT)) {
2455			aprint_error_dev(dev,
2456			    "Unable to allocate RxDescriptor memory\n");
2457			error = ENOMEM;
2458			goto err_rx_desc;
2459		}
2460		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2461		bzero((void *)rxr->rx_base, rsize);
2462
2463		/* Allocate receive buffers for the ring */
2464		if (ixgbe_allocate_receive_buffers(rxr)) {
2465			aprint_error_dev(dev,
2466			    "Critical Failure setting up receive buffers\n");
2467			error = ENOMEM;
2468			goto err_rx_desc;
2469		}
2470	}
2471
2472	/*
2473	 * Finally set up the queue holding structs
2474	 */
2475	for (int i = 0; i < sc->num_queues; i++) {
2476		que = &sc->queues[i];
2477		que->sc = sc;
2478		que->me = i;
2479		que->txr = &sc->tx_rings[i];
2480		que->rxr = &sc->rx_rings[i];
2481
2482		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2483		que->disabled_count = 0;
2484	}
2485
2486	return (0);
2487
2488err_rx_desc:
2489	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2490		ixgbe_dma_free(sc, &rxr->rxdma);
2491err_tx_desc:
2492	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2493		ixgbe_dma_free(sc, &txr->txdma);
2494	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
2495	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
2496	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2497	return (error);
2498} /* ixgbe_allocate_queues */
2499
2500/************************************************************************
2501 * ixgbe_free_queues
2502 *
2503 *   Free descriptors for the transmit and receive rings, and then
2504 *   the memory associated with each.
2505 ************************************************************************/
2506void
2507ixgbe_free_queues(struct ixgbe_softc *sc)
2508{
2509	struct ix_queue *que;
2510	int i;
2511
2512	ixgbe_free_transmit_structures(sc);
2513	ixgbe_free_receive_structures(sc);
2514	for (i = 0; i < sc->num_queues; i++) {
2515		que = &sc->queues[i];
2516		mutex_destroy(&que->dc_mtx);
2517	}
2518	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2519} /* ixgbe_free_queues */
2520