ixl_txrx.c revision 270631
1/******************************************************************************
2
3  Copyright (c) 2013-2014, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/ixl/ixl_txrx.c 270631 2014-08-25 22:04:29Z jfv $*/
34
35/*
36**	IXL driver TX/RX Routines:
37**	    This was seperated to allow usage by
38** 	    both the BASE and the VF drivers.
39*/
40
41#include "opt_inet.h"
42#include "opt_inet6.h"
43#include "ixl.h"
44
45/* Local Prototypes */
46static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47static void	ixl_refresh_mbufs(struct ixl_queue *, int);
48static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
49static int	ixl_tx_setup_offload(struct ixl_queue *,
50		    struct mbuf *, u32 *, u32 *);
51static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
52
53static __inline void ixl_rx_discard(struct rx_ring *, int);
54static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
55		    struct mbuf *, u8);
56
57/*
58** Multiqueue Transmit driver
59**
60*/
61int
62ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
63{
64	struct ixl_vsi		*vsi = ifp->if_softc;
65	struct ixl_queue	*que;
66	struct tx_ring		*txr;
67	int 			err, i;
68
69	/* Which queue to use */
70	if ((m->m_flags & M_FLOWID) != 0)
71		i = m->m_pkthdr.flowid % vsi->num_queues;
72	else
73		i = curcpu % vsi->num_queues;
74
75	/* Check for a hung queue and pick alternative */
76	if (((1 << i) & vsi->active_queues) == 0)
77		i = ffsl(vsi->active_queues);
78
79	que = &vsi->queues[i];
80	txr = &que->txr;
81
82	err = drbr_enqueue(ifp, txr->br, m);
83	if (err)
84		return(err);
85	if (IXL_TX_TRYLOCK(txr)) {
86		ixl_mq_start_locked(ifp, txr);
87		IXL_TX_UNLOCK(txr);
88	} else
89		taskqueue_enqueue(que->tq, &que->tx_task);
90
91	return (0);
92}
93
94int
95ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
96{
97	struct ixl_queue	*que = txr->que;
98	struct ixl_vsi		*vsi = que->vsi;
99        struct mbuf		*next;
100        int			err = 0;
101
102
103	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104	    vsi->link_active == 0)
105		return (ENETDOWN);
106
107	/* Process the transmit queue */
108	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109		if ((err = ixl_xmit(que, &next)) != 0) {
110			if (next == NULL)
111				drbr_advance(ifp, txr->br);
112			else
113				drbr_putback(ifp, txr->br, next);
114			break;
115		}
116		drbr_advance(ifp, txr->br);
117		/* Send a copy of the frame to the BPF listener */
118		ETHER_BPF_MTAP(ifp, next);
119		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
120			break;
121	}
122
123	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
124		ixl_txeof(que);
125
126	return (err);
127}
128
129/*
130 * Called from a taskqueue to drain queued transmit packets.
131 */
132void
133ixl_deferred_mq_start(void *arg, int pending)
134{
135	struct ixl_queue	*que = arg;
136        struct tx_ring		*txr = &que->txr;
137	struct ixl_vsi		*vsi = que->vsi;
138        struct ifnet		*ifp = vsi->ifp;
139
140	IXL_TX_LOCK(txr);
141	if (!drbr_empty(ifp, txr->br))
142		ixl_mq_start_locked(ifp, txr);
143	IXL_TX_UNLOCK(txr);
144}
145
146/*
147** Flush all queue ring buffers
148*/
149void
150ixl_qflush(struct ifnet *ifp)
151{
152	struct ixl_vsi	*vsi = ifp->if_softc;
153
154        for (int i = 0; i < vsi->num_queues; i++) {
155		struct ixl_queue *que = &vsi->queues[i];
156		struct tx_ring	*txr = &que->txr;
157		struct mbuf	*m;
158		IXL_TX_LOCK(txr);
159		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
160			m_freem(m);
161		IXL_TX_UNLOCK(txr);
162	}
163	if_qflush(ifp);
164}
165
166/*
167** Find mbuf chains passed to the driver
168** that are 'sparse', using more than 8
169** mbufs to deliver an mss-size chunk of data
170*/
171static inline bool
172ixl_tso_detect_sparse(struct mbuf *mp)
173{
174	struct mbuf	*m;
175	int		num = 0, mss;
176	bool		ret = FALSE;
177
178	mss = mp->m_pkthdr.tso_segsz;
179	for (m = mp->m_next; m != NULL; m = m->m_next) {
180		num++;
181		mss -= m->m_len;
182		if (mss < 1)
183			break;
184		if (m->m_next == NULL)
185			break;
186	}
187	if (num > IXL_SPARSE_CHAIN)
188		ret = TRUE;
189
190	return (ret);
191}
192
193
194/*********************************************************************
195 *
196 *  This routine maps the mbufs to tx descriptors, allowing the
197 *  TX engine to transmit the packets.
198 *  	- return 0 on success, positive on failure
199 *
200 **********************************************************************/
201#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
202
203static int
204ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
205{
206	struct ixl_vsi		*vsi = que->vsi;
207	struct i40e_hw		*hw = vsi->hw;
208	struct tx_ring		*txr = &que->txr;
209	struct ixl_tx_buf	*buf;
210	struct i40e_tx_desc	*txd = NULL;
211	struct mbuf		*m_head, *m;
212	int             	i, j, error, nsegs, maxsegs;
213	int			first, last = 0;
214	u16			vtag = 0;
215	u32			cmd, off;
216	bus_dmamap_t		map;
217	bus_dma_tag_t		tag;
218	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
219
220
221	cmd = off = 0;
222	m_head = *m_headp;
223
224        /*
225         * Important to capture the first descriptor
226         * used because it will contain the index of
227         * the one we tell the hardware to report back
228         */
229        first = txr->next_avail;
230	buf = &txr->buffers[first];
231	map = buf->map;
232	tag = txr->tx_tag;
233	maxsegs = IXL_MAX_TX_SEGS;
234
235	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236		/* Use larger mapping for TSO */
237		tag = txr->tso_tag;
238		maxsegs = IXL_MAX_TSO_SEGS;
239		if (ixl_tso_detect_sparse(m_head)) {
240			m = m_defrag(m_head, M_NOWAIT);
241			*m_headp = m;
242		}
243	}
244
245	/*
246	 * Map the packet for DMA.
247	 */
248	error = bus_dmamap_load_mbuf_sg(tag, map,
249	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
250
251	if (error == EFBIG) {
252		struct mbuf *m;
253
254		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
255		if (m == NULL) {
256			que->mbuf_defrag_failed++;
257			m_freem(*m_headp);
258			*m_headp = NULL;
259			return (ENOBUFS);
260		}
261		*m_headp = m;
262
263		/* Try it again */
264		error = bus_dmamap_load_mbuf_sg(tag, map,
265		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
266
267		if (error == ENOMEM) {
268			que->tx_dma_setup++;
269			return (error);
270		} else if (error != 0) {
271			que->tx_dma_setup++;
272			m_freem(*m_headp);
273			*m_headp = NULL;
274			return (error);
275		}
276	} else if (error == ENOMEM) {
277		que->tx_dma_setup++;
278		return (error);
279	} else if (error != 0) {
280		que->tx_dma_setup++;
281		m_freem(*m_headp);
282		*m_headp = NULL;
283		return (error);
284	}
285
286	/* Make certain there are enough descriptors */
287	if (nsegs > txr->avail - 2) {
288		txr->no_desc++;
289		error = ENOBUFS;
290		goto xmit_fail;
291	}
292	m_head = *m_headp;
293
294	/* Set up the TSO/CSUM offload */
295	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
296		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
297		if (error)
298			goto xmit_fail;
299	}
300
301	cmd |= I40E_TX_DESC_CMD_ICRC;
302	/* Grab the VLAN tag */
303	if (m_head->m_flags & M_VLANTAG) {
304		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
305		vtag = htole16(m_head->m_pkthdr.ether_vtag);
306	}
307
308	i = txr->next_avail;
309	for (j = 0; j < nsegs; j++) {
310		bus_size_t seglen;
311
312		buf = &txr->buffers[i];
313		buf->tag = tag; /* Keep track of the type tag */
314		txd = &txr->base[i];
315		seglen = segs[j].ds_len;
316
317		txd->buffer_addr = htole64(segs[j].ds_addr);
318		txd->cmd_type_offset_bsz =
319		    htole64(I40E_TX_DESC_DTYPE_DATA
320		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
321		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
322		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
323		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
324
325		last = i; /* descriptor that will get completion IRQ */
326
327		if (++i == que->num_desc)
328			i = 0;
329
330		buf->m_head = NULL;
331		buf->eop_index = -1;
332	}
333	/* Set the last descriptor for report */
334	txd->cmd_type_offset_bsz |=
335	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
336	txr->avail -= nsegs;
337	txr->next_avail = i;
338
339	buf->m_head = m_head;
340	/* Swap the dma map between the first and last descriptor */
341	txr->buffers[first].map = buf->map;
342	buf->map = map;
343	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
344
345        /* Set the index of the descriptor that will be marked done */
346        buf = &txr->buffers[first];
347	buf->eop_index = last;
348
349        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
350            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
351	/*
352	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
353	 * hardware that this frame is available to transmit.
354	 */
355	++txr->total_packets;
356	wr32(hw, txr->tail, i);
357
358	ixl_flush(hw);
359	/* Mark outstanding work */
360	if (que->busy == 0)
361		que->busy = 1;
362	return (0);
363
364xmit_fail:
365	bus_dmamap_unload(tag, buf->map);
366	return (error);
367}
368
369
370/*********************************************************************
371 *
372 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
373 *  the information needed to transmit a packet on the wire. This is
374 *  called only once at attach, setup is done every reset.
375 *
376 **********************************************************************/
377int
378ixl_allocate_tx_data(struct ixl_queue *que)
379{
380	struct tx_ring		*txr = &que->txr;
381	struct ixl_vsi		*vsi = que->vsi;
382	device_t		dev = vsi->dev;
383	struct ixl_tx_buf	*buf;
384	int			error = 0;
385
386	/*
387	 * Setup DMA descriptor areas.
388	 */
389	if ((error = bus_dma_tag_create(NULL,		/* parent */
390			       1, 0,			/* alignment, bounds */
391			       BUS_SPACE_MAXADDR,	/* lowaddr */
392			       BUS_SPACE_MAXADDR,	/* highaddr */
393			       NULL, NULL,		/* filter, filterarg */
394			       IXL_TSO_SIZE,		/* maxsize */
395			       IXL_MAX_TX_SEGS,		/* nsegments */
396			       PAGE_SIZE,		/* maxsegsize */
397			       0,			/* flags */
398			       NULL,			/* lockfunc */
399			       NULL,			/* lockfuncarg */
400			       &txr->tx_tag))) {
401		device_printf(dev,"Unable to allocate TX DMA tag\n");
402		goto fail;
403	}
404
405	/* Make a special tag for TSO */
406	if ((error = bus_dma_tag_create(NULL,		/* parent */
407			       1, 0,			/* alignment, bounds */
408			       BUS_SPACE_MAXADDR,	/* lowaddr */
409			       BUS_SPACE_MAXADDR,	/* highaddr */
410			       NULL, NULL,		/* filter, filterarg */
411			       IXL_TSO_SIZE,		/* maxsize */
412			       IXL_MAX_TSO_SEGS,	/* nsegments */
413			       PAGE_SIZE,		/* maxsegsize */
414			       0,			/* flags */
415			       NULL,			/* lockfunc */
416			       NULL,			/* lockfuncarg */
417			       &txr->tso_tag))) {
418		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
419		goto fail;
420	}
421
422	if (!(txr->buffers =
423	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
424	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
425		device_printf(dev, "Unable to allocate tx_buffer memory\n");
426		error = ENOMEM;
427		goto fail;
428	}
429
430        /* Create the descriptor buffer default dma maps */
431	buf = txr->buffers;
432	for (int i = 0; i < que->num_desc; i++, buf++) {
433		buf->tag = txr->tx_tag;
434		error = bus_dmamap_create(buf->tag, 0, &buf->map);
435		if (error != 0) {
436			device_printf(dev, "Unable to create TX DMA map\n");
437			goto fail;
438		}
439	}
440fail:
441	return (error);
442}
443
444
445/*********************************************************************
446 *
447 *  (Re)Initialize a queue transmit ring.
448 *	- called by init, it clears the descriptor ring,
449 *	  and frees any stale mbufs
450 *
451 **********************************************************************/
452void
453ixl_init_tx_ring(struct ixl_queue *que)
454{
455	struct tx_ring *txr = &que->txr;
456	struct ixl_tx_buf *buf;
457#ifdef DEV_NETMAP
458	struct ixl_vsi *vsi = que->vsi;
459	struct netmap_adapter *na = NA(vsi->ifp);
460	struct netmap_slot *slot;
461#endif /* DEV_NETMAP */
462
463	/* Clear the old ring contents */
464	IXL_TX_LOCK(txr);
465#ifdef DEV_NETMAP
466	slot = netmap_reset(na, NR_TX, que->me, 0);
467#endif
468	bzero((void *)txr->base,
469	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
470
471	/* Reset indices */
472	txr->next_avail = 0;
473	txr->next_to_clean = 0;
474
475#ifdef IXL_FDIR
476	/* Initialize flow director */
477	txr->atr_rate = ixl_atr_rate;
478	txr->atr_count = 0;
479#endif
480
481	/* Free any existing tx mbufs. */
482        buf = txr->buffers;
483	for (int i = 0; i < que->num_desc; i++, buf++) {
484		if (buf->m_head != NULL) {
485			bus_dmamap_sync(buf->tag, buf->map,
486			    BUS_DMASYNC_POSTWRITE);
487			bus_dmamap_unload(buf->tag, buf->map);
488			m_freem(buf->m_head);
489			buf->m_head = NULL;
490		}
491#ifdef DEV_NETMAP
492		if (slot)
493		{
494			int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
495			netmap_load_map(txr->tag, buf->map, NMB(slot + si));
496		}
497#endif
498		/* Clear the EOP index */
499		buf->eop_index = -1;
500        }
501
502	/* Set number of descriptors available */
503	txr->avail = que->num_desc;
504
505	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
506	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
507	IXL_TX_UNLOCK(txr);
508}
509
510
511/*********************************************************************
512 *
513 *  Free transmit ring related data structures.
514 *
515 **********************************************************************/
516void
517ixl_free_que_tx(struct ixl_queue *que)
518{
519	struct tx_ring *txr = &que->txr;
520	struct ixl_tx_buf *buf;
521
522	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
523
524	for (int i = 0; i < que->num_desc; i++) {
525		buf = &txr->buffers[i];
526		if (buf->m_head != NULL) {
527			bus_dmamap_sync(buf->tag, buf->map,
528			    BUS_DMASYNC_POSTWRITE);
529			bus_dmamap_unload(buf->tag,
530			    buf->map);
531			m_freem(buf->m_head);
532			buf->m_head = NULL;
533			if (buf->map != NULL) {
534				bus_dmamap_destroy(buf->tag,
535				    buf->map);
536				buf->map = NULL;
537			}
538		} else if (buf->map != NULL) {
539			bus_dmamap_unload(buf->tag,
540			    buf->map);
541			bus_dmamap_destroy(buf->tag,
542			    buf->map);
543			buf->map = NULL;
544		}
545	}
546	if (txr->br != NULL)
547		buf_ring_free(txr->br, M_DEVBUF);
548	if (txr->buffers != NULL) {
549		free(txr->buffers, M_DEVBUF);
550		txr->buffers = NULL;
551	}
552	if (txr->tx_tag != NULL) {
553		bus_dma_tag_destroy(txr->tx_tag);
554		txr->tx_tag = NULL;
555	}
556	if (txr->tso_tag != NULL) {
557		bus_dma_tag_destroy(txr->tso_tag);
558		txr->tso_tag = NULL;
559	}
560
561	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
562	return;
563}
564
565/*********************************************************************
566 *
567 *  Setup descriptor for hw offloads
568 *
569 **********************************************************************/
570
571static int
572ixl_tx_setup_offload(struct ixl_queue *que,
573    struct mbuf *mp, u32 *cmd, u32 *off)
574{
575	struct ether_vlan_header	*eh;
576	struct ip			*ip = NULL;
577	struct tcphdr			*th = NULL;
578	struct ip6_hdr			*ip6;
579	int				elen, ip_hlen = 0, tcp_hlen;
580	u16				etype;
581	u8				ipproto = 0;
582	bool				tso = FALSE;
583
584
585	/* Set up the TSO context descriptor if required */
586	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
587		tso = ixl_tso_setup(que, mp);
588		if (tso)
589			++que->tso;
590		else
591			return (ENXIO);
592	}
593
594	/*
595	 * Determine where frame payload starts.
596	 * Jump over vlan headers if already present,
597	 * helpful for QinQ too.
598	 */
599	eh = mtod(mp, struct ether_vlan_header *);
600	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
601		etype = ntohs(eh->evl_proto);
602		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
603	} else {
604		etype = ntohs(eh->evl_encap_proto);
605		elen = ETHER_HDR_LEN;
606	}
607
608	switch (etype) {
609		case ETHERTYPE_IP:
610			ip = (struct ip *)(mp->m_data + elen);
611			ip_hlen = ip->ip_hl << 2;
612			ipproto = ip->ip_p;
613			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
614			/* The IP checksum must be recalculated with TSO */
615			if (tso)
616				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
617			else
618				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
619			break;
620		case ETHERTYPE_IPV6:
621			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
622			ip_hlen = sizeof(struct ip6_hdr);
623			ipproto = ip6->ip6_nxt;
624			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
625			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
626			/* Falls thru */
627		default:
628			break;
629	}
630
631	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
632	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
633
634	switch (ipproto) {
635		case IPPROTO_TCP:
636			tcp_hlen = th->th_off << 2;
637			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
638				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
639				*off |= (tcp_hlen >> 2) <<
640				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
641			}
642#ifdef IXL_FDIR
643			ixl_atr(que, th, etype);
644#endif
645			break;
646		case IPPROTO_UDP:
647			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
648				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
649				*off |= (sizeof(struct udphdr) >> 2) <<
650				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
651			}
652			break;
653
654		case IPPROTO_SCTP:
655			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
656				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
657				*off |= (sizeof(struct sctphdr) >> 2) <<
658				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
659			}
660			/* Fall Thru */
661		default:
662			break;
663	}
664
665        return (0);
666}
667
668
669/**********************************************************************
670 *
671 *  Setup context for hardware segmentation offload (TSO)
672 *
673 **********************************************************************/
674static bool
675ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
676{
677	struct tx_ring			*txr = &que->txr;
678	struct i40e_tx_context_desc	*TXD;
679	struct ixl_tx_buf		*buf;
680	u32				cmd, mss, type, tsolen;
681	u16				etype;
682	int				idx, elen, ip_hlen, tcp_hlen;
683	struct ether_vlan_header	*eh;
684	struct ip			*ip;
685	struct ip6_hdr			*ip6;
686	struct tcphdr			*th;
687	u64				type_cmd_tso_mss;
688
689	/*
690	 * Determine where frame payload starts.
691	 * Jump over vlan headers if already present
692	 */
693	eh = mtod(mp, struct ether_vlan_header *);
694	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
695		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
696		etype = eh->evl_proto;
697	} else {
698		elen = ETHER_HDR_LEN;
699		etype = eh->evl_encap_proto;
700	}
701
702        switch (ntohs(etype)) {
703#ifdef INET6
704	case ETHERTYPE_IPV6:
705		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
706		if (ip6->ip6_nxt != IPPROTO_TCP)
707			return (ENXIO);
708		ip_hlen = sizeof(struct ip6_hdr);
709		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
710		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
711		tcp_hlen = th->th_off << 2;
712		break;
713#endif
714#ifdef INET
715	case ETHERTYPE_IP:
716		ip = (struct ip *)(mp->m_data + elen);
717		if (ip->ip_p != IPPROTO_TCP)
718			return (ENXIO);
719		ip->ip_sum = 0;
720		ip_hlen = ip->ip_hl << 2;
721		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
722		th->th_sum = in_pseudo(ip->ip_src.s_addr,
723		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
724		tcp_hlen = th->th_off << 2;
725		break;
726#endif
727	default:
728		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
729		    __func__, ntohs(etype));
730		break;
731        }
732
733        /* Ensure we have at least the IP+TCP header in the first mbuf. */
734        if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
735		return FALSE;
736
737	idx = txr->next_avail;
738	buf = &txr->buffers[idx];
739	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
740	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
741
742	type = I40E_TX_DESC_DTYPE_CONTEXT;
743	cmd = I40E_TX_CTX_DESC_TSO;
744	mss = mp->m_pkthdr.tso_segsz;
745
746	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
747	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
748	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
749	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
750	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
751
752	TXD->tunneling_params = htole32(0);
753	buf->m_head = NULL;
754	buf->eop_index = -1;
755
756	if (++idx == que->num_desc)
757		idx = 0;
758
759	txr->avail--;
760	txr->next_avail = idx;
761
762	return TRUE;
763}
764
765/*
766** ixl_get_tx_head - Retrieve the value from the
767**    location the HW records its HEAD index
768*/
769static inline u32
770ixl_get_tx_head(struct ixl_queue *que)
771{
772	struct tx_ring  *txr = &que->txr;
773	void *head = &txr->base[que->num_desc];
774	return LE32_TO_CPU(*(volatile __le32 *)head);
775}
776
777/**********************************************************************
778 *
779 *  Examine each tx_buffer in the used queue. If the hardware is done
780 *  processing the packet then free associated resources. The
781 *  tx_buffer is put back on the free queue.
782 *
783 **********************************************************************/
784bool
785ixl_txeof(struct ixl_queue *que)
786{
787	struct ixl_vsi		*vsi = que->vsi;
788	struct ifnet		*ifp = vsi->ifp;
789	struct tx_ring		*txr = &que->txr;
790	u32			first, last, head, done, processed;
791	struct ixl_tx_buf	*buf;
792	struct i40e_tx_desc	*tx_desc, *eop_desc;
793
794
795	mtx_assert(&txr->mtx, MA_OWNED);
796
797#ifdef DEV_NETMAP
798	if (ifp->if_capenable & IFCAP_NETMAP) {
799		struct netmap_adapter *na = NA(ifp);
800		struct netmap_kring *kring = &na->tx_rings[que->me];
801		tx_desc = txr->base;
802		bus_dmamap_sync(txr->dma.tag, txr->dma.map,
803		     BUS_DMASYNC_POSTREAD);
804		if (!netmap_mitigate ||
805		    (kring->nr_kflags < kring->nkr_num_slots &&
806		    tx_desc[kring->nr_kflags].cmd_type_offset_bsz &
807		        htole32(I40E_TX_DESC_DTYPE_DESC_DONE)))
808		{
809#if NETMAP_API < 4
810			struct ixl_pf *pf = vsi->pf;
811			kring->nr_kflags = kring->nkr_num_slots;
812			selwakeuppri(&na->tx_rings[que->me].si, PI_NET);
813			IXL_TX_UNLOCK(txr);
814			IXL_PF_LOCK(pf);
815			selwakeuppri(&na->tx_si, PI_NET);
816			IXL_PF_UNLOCK(pf);
817			IXL_TX_LOCK(txr);
818#else /* NETMAP_API >= 4 */
819			netmap_tx_irq(ifp, txr->que->me);
820#endif /* NETMAP_API */
821		}
822		// XXX guessing there is no more work to be done
823		return FALSE;
824	}
825#endif /* DEV_NETMAP */
826
827	/* These are not the descriptors you seek, move along :) */
828	if (txr->avail == que->num_desc) {
829		que->busy = 0;
830		return FALSE;
831	}
832
833	processed = 0;
834	first = txr->next_to_clean;
835	buf = &txr->buffers[first];
836	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
837	last = buf->eop_index;
838	if (last == -1)
839		return FALSE;
840	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
841
842	/* Get the Head WB value */
843	head = ixl_get_tx_head(que);
844
845	/*
846	** Get the index of the first descriptor
847	** BEYOND the EOP and call that 'done'.
848	** I do this so the comparison in the
849	** inner while loop below can be simple
850	*/
851	if (++last == que->num_desc) last = 0;
852	done = last;
853
854        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
855            BUS_DMASYNC_POSTREAD);
856	/*
857	** The HEAD index of the ring is written in a
858	** defined location, this rather than a done bit
859	** is what is used to keep track of what must be
860	** 'cleaned'.
861	*/
862	while (first != head) {
863		/* We clean the range of the packet */
864		while (first != done) {
865			++txr->avail;
866			++processed;
867
868			if (buf->m_head) {
869				txr->bytes += /* for ITR adjustment */
870				    buf->m_head->m_pkthdr.len;
871				txr->tx_bytes += /* for TX stats */
872				    buf->m_head->m_pkthdr.len;
873				bus_dmamap_sync(buf->tag,
874				    buf->map,
875				    BUS_DMASYNC_POSTWRITE);
876				bus_dmamap_unload(buf->tag,
877				    buf->map);
878				m_freem(buf->m_head);
879				buf->m_head = NULL;
880				buf->map = NULL;
881			}
882			buf->eop_index = -1;
883
884			if (++first == que->num_desc)
885				first = 0;
886
887			buf = &txr->buffers[first];
888			tx_desc = &txr->base[first];
889		}
890		++txr->packets;
891		++ifp->if_opackets;
892		/* See if there is more work now */
893		last = buf->eop_index;
894		if (last != -1) {
895			eop_desc = &txr->base[last];
896			/* Get next done point */
897			if (++last == que->num_desc) last = 0;
898			done = last;
899		} else
900			break;
901	}
902	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
903	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
904
905	txr->next_to_clean = first;
906
907
908	/*
909	** Hang detection, we know there's
910	** work outstanding or the first return
911	** would have been taken, so indicate an
912	** unsuccessful pass, in local_timer if
913	** the value is too great the queue will
914	** be considered hung. If anything has been
915	** cleaned then reset the state.
916	*/
917	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
918		++que->busy;
919
920	if (processed)
921		que->busy = 1; /* Note this turns off HUNG */
922
923	/*
924	 * If there are no pending descriptors, clear the timeout.
925	 */
926	if (txr->avail == que->num_desc) {
927		que->busy = 0;
928		return FALSE;
929	}
930
931	return TRUE;
932}
933
934/*********************************************************************
935 *
936 *  Refresh mbuf buffers for RX descriptor rings
937 *   - now keeps its own state so discards due to resource
938 *     exhaustion are unnecessary, if an mbuf cannot be obtained
939 *     it just returns, keeping its placeholder, thus it can simply
940 *     be recalled to try again.
941 *
942 **********************************************************************/
943static void
944ixl_refresh_mbufs(struct ixl_queue *que, int limit)
945{
946	struct ixl_vsi		*vsi = que->vsi;
947	struct rx_ring		*rxr = &que->rxr;
948	bus_dma_segment_t	hseg[1];
949	bus_dma_segment_t	pseg[1];
950	struct ixl_rx_buf	*buf;
951	struct mbuf		*mh, *mp;
952	int			i, j, nsegs, error;
953	bool			refreshed = FALSE;
954
955	i = j = rxr->next_refresh;
956	/* Control the loop with one beyond */
957	if (++j == que->num_desc)
958		j = 0;
959
960	while (j != limit) {
961		buf = &rxr->buffers[i];
962		if (rxr->hdr_split == FALSE)
963			goto no_split;
964
965		if (buf->m_head == NULL) {
966			mh = m_gethdr(M_NOWAIT, MT_DATA);
967			if (mh == NULL)
968				goto update;
969		} else
970			mh = buf->m_head;
971
972		mh->m_pkthdr.len = mh->m_len = MHLEN;
973		mh->m_len = MHLEN;
974		mh->m_flags |= M_PKTHDR;
975		/* Get the memory mapping */
976		error = bus_dmamap_load_mbuf_sg(rxr->htag,
977		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
978		if (error != 0) {
979			printf("Refresh mbufs: hdr dmamap load"
980			    " failure - %d\n", error);
981			m_free(mh);
982			buf->m_head = NULL;
983			goto update;
984		}
985		buf->m_head = mh;
986		bus_dmamap_sync(rxr->htag, buf->hmap,
987		    BUS_DMASYNC_PREREAD);
988		rxr->base[i].read.hdr_addr =
989		   htole64(hseg[0].ds_addr);
990
991no_split:
992		if (buf->m_pack == NULL) {
993			mp = m_getjcl(M_NOWAIT, MT_DATA,
994			    M_PKTHDR, rxr->mbuf_sz);
995			if (mp == NULL)
996				goto update;
997		} else
998			mp = buf->m_pack;
999
1000		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1001		/* Get the memory mapping */
1002		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1003		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1004		if (error != 0) {
1005			printf("Refresh mbufs: payload dmamap load"
1006			    " failure - %d\n", error);
1007			m_free(mp);
1008			buf->m_pack = NULL;
1009			goto update;
1010		}
1011		buf->m_pack = mp;
1012		bus_dmamap_sync(rxr->ptag, buf->pmap,
1013		    BUS_DMASYNC_PREREAD);
1014#ifdef DEV_NETMAP
1015		rxr->base[i].read.pkt_addr = buf->addr;
1016#else /* !DEV_NETMAP */
1017		rxr->base[i].read.pkt_addr =
1018		   htole64(pseg[0].ds_addr);
1019#endif /* DEV_NETMAP */
1020		/* Used only when doing header split */
1021		rxr->base[i].read.hdr_addr = 0;
1022
1023		refreshed = TRUE;
1024		/* Next is precalculated */
1025		i = j;
1026		rxr->next_refresh = i;
1027		if (++j == que->num_desc)
1028			j = 0;
1029	}
1030update:
1031	if (refreshed) /* Update hardware tail index */
1032		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1033	return;
1034}
1035
1036
1037/*********************************************************************
1038 *
1039 *  Allocate memory for rx_buffer structures. Since we use one
1040 *  rx_buffer per descriptor, the maximum number of rx_buffer's
1041 *  that we'll need is equal to the number of receive descriptors
1042 *  that we've defined.
1043 *
1044 **********************************************************************/
1045int
1046ixl_allocate_rx_data(struct ixl_queue *que)
1047{
1048	struct rx_ring		*rxr = &que->rxr;
1049	struct ixl_vsi		*vsi = que->vsi;
1050	device_t 		dev = vsi->dev;
1051	struct ixl_rx_buf 	*buf;
1052	int             	i, bsize, error;
1053
1054	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1055	if (!(rxr->buffers =
1056	    (struct ixl_rx_buf *) malloc(bsize,
1057	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1058		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1059		error = ENOMEM;
1060		return (error);
1061	}
1062
1063	if ((error = bus_dma_tag_create(NULL,	/* parent */
1064				   1, 0,	/* alignment, bounds */
1065				   BUS_SPACE_MAXADDR,	/* lowaddr */
1066				   BUS_SPACE_MAXADDR,	/* highaddr */
1067				   NULL, NULL,		/* filter, filterarg */
1068				   MSIZE,		/* maxsize */
1069				   1,			/* nsegments */
1070				   MSIZE,		/* maxsegsize */
1071				   0,			/* flags */
1072				   NULL,		/* lockfunc */
1073				   NULL,		/* lockfuncarg */
1074				   &rxr->htag))) {
1075		device_printf(dev, "Unable to create RX DMA htag\n");
1076		return (error);
1077	}
1078
1079	if ((error = bus_dma_tag_create(NULL,	/* parent */
1080				   1, 0,	/* alignment, bounds */
1081				   BUS_SPACE_MAXADDR,	/* lowaddr */
1082				   BUS_SPACE_MAXADDR,	/* highaddr */
1083				   NULL, NULL,		/* filter, filterarg */
1084				   MJUM16BYTES,		/* maxsize */
1085				   1,			/* nsegments */
1086				   MJUM16BYTES,		/* maxsegsize */
1087				   0,			/* flags */
1088				   NULL,		/* lockfunc */
1089				   NULL,		/* lockfuncarg */
1090				   &rxr->ptag))) {
1091		device_printf(dev, "Unable to create RX DMA ptag\n");
1092		return (error);
1093	}
1094
1095	for (i = 0; i < que->num_desc; i++) {
1096		buf = &rxr->buffers[i];
1097		error = bus_dmamap_create(rxr->htag,
1098		    BUS_DMA_NOWAIT, &buf->hmap);
1099		if (error) {
1100			device_printf(dev, "Unable to create RX head map\n");
1101			break;
1102		}
1103		error = bus_dmamap_create(rxr->ptag,
1104		    BUS_DMA_NOWAIT, &buf->pmap);
1105		if (error) {
1106			device_printf(dev, "Unable to create RX pkt map\n");
1107			break;
1108		}
1109	}
1110
1111	return (error);
1112}
1113
1114
1115/*********************************************************************
1116 *
1117 *  (Re)Initialize the queue receive ring and its buffers.
1118 *
1119 **********************************************************************/
1120int
1121ixl_init_rx_ring(struct ixl_queue *que)
1122{
1123	struct ixl_vsi		*vsi = que->vsi;
1124	struct ifnet		*ifp = vsi->ifp;
1125	struct	rx_ring 	*rxr = &que->rxr;
1126	struct lro_ctrl		*lro = &rxr->lro;
1127	struct ixl_rx_buf	*buf;
1128	bus_dma_segment_t	pseg[1], hseg[1];
1129	int			rsize, nsegs, error = 0;
1130#ifdef DEV_NETMAP
1131	struct netmap_adapter *na = NA(ifp);
1132	struct netmap_slot *slot;
1133#endif /* DEV_NETMAP */
1134
1135	IXL_RX_LOCK(rxr);
1136#ifdef DEV_NETMAP
1137	slot = netmap_reset(na, NR_RX, que->me, 0);
1138#endif
1139	/* Clear the ring contents */
1140	rsize = roundup2(que->num_desc *
1141	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1142	bzero((void *)rxr->base, rsize);
1143	/* Cleanup any existing buffers */
1144	for (int i = 0; i < que->num_desc; i++) {
1145		buf = &rxr->buffers[i];
1146		if (buf->m_head != NULL) {
1147			bus_dmamap_sync(rxr->htag, buf->hmap,
1148			    BUS_DMASYNC_POSTREAD);
1149			bus_dmamap_unload(rxr->htag, buf->hmap);
1150			buf->m_head->m_flags |= M_PKTHDR;
1151			m_freem(buf->m_head);
1152		}
1153		if (buf->m_pack != NULL) {
1154			bus_dmamap_sync(rxr->ptag, buf->pmap,
1155			    BUS_DMASYNC_POSTREAD);
1156			bus_dmamap_unload(rxr->ptag, buf->pmap);
1157			buf->m_pack->m_flags |= M_PKTHDR;
1158			m_freem(buf->m_pack);
1159		}
1160		buf->m_head = NULL;
1161		buf->m_pack = NULL;
1162	}
1163
1164	/* header split is off */
1165	rxr->hdr_split = FALSE;
1166
1167	/* Now replenish the mbufs */
1168	for (int j = 0; j != que->num_desc; ++j) {
1169		struct mbuf	*mh, *mp;
1170
1171		buf = &rxr->buffers[j];
1172#ifdef DEV_NETMAP
1173		if (slot)
1174		{
1175			int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1176			u64 paddr;
1177			void *addr;
1178
1179			addr = PNMB(slot + sj, &paddr);
1180			netmap_load_map(rxr->ptag, buf->pmap, addr);
1181			/* Update descriptor and cached value */
1182			rxr->base[j].read.pkt_addr = htole64(paddr);
1183			buf->addr = htole64(paddr);
1184			continue;
1185		}
1186#endif /* DEV_NETMAP */
1187		/*
1188		** Don't allocate mbufs if not
1189		** doing header split, its wasteful
1190		*/
1191		if (rxr->hdr_split == FALSE)
1192			goto skip_head;
1193
1194		/* First the header */
1195		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1196		if (buf->m_head == NULL) {
1197			error = ENOBUFS;
1198			goto fail;
1199		}
1200		m_adj(buf->m_head, ETHER_ALIGN);
1201		mh = buf->m_head;
1202		mh->m_len = mh->m_pkthdr.len = MHLEN;
1203		mh->m_flags |= M_PKTHDR;
1204		/* Get the memory mapping */
1205		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1206		    buf->hmap, buf->m_head, hseg,
1207		    &nsegs, BUS_DMA_NOWAIT);
1208		if (error != 0) /* Nothing elegant to do here */
1209			goto fail;
1210		bus_dmamap_sync(rxr->htag,
1211		    buf->hmap, BUS_DMASYNC_PREREAD);
1212		/* Update descriptor */
1213		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1214
1215skip_head:
1216		/* Now the payload cluster */
1217		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1218		    M_PKTHDR, rxr->mbuf_sz);
1219		if (buf->m_pack == NULL) {
1220			error = ENOBUFS;
1221                        goto fail;
1222		}
1223		mp = buf->m_pack;
1224		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1225		/* Get the memory mapping */
1226		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1227		    buf->pmap, mp, pseg,
1228		    &nsegs, BUS_DMA_NOWAIT);
1229		if (error != 0)
1230                        goto fail;
1231		bus_dmamap_sync(rxr->ptag,
1232		    buf->pmap, BUS_DMASYNC_PREREAD);
1233		/* Update descriptor */
1234		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1235		rxr->base[j].read.hdr_addr = 0;
1236	}
1237
1238
1239	/* Setup our descriptor indices */
1240	rxr->next_check = 0;
1241	rxr->next_refresh = 0;
1242	rxr->lro_enabled = FALSE;
1243	rxr->split = 0;
1244	rxr->bytes = 0;
1245	rxr->discard = FALSE;
1246
1247	/*
1248	** Now set up the LRO interface:
1249	*/
1250	if (ifp->if_capenable & IFCAP_LRO) {
1251		int err = tcp_lro_init(lro);
1252		if (err) {
1253			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1254			goto fail;
1255		}
1256		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1257		rxr->lro_enabled = TRUE;
1258		lro->ifp = vsi->ifp;
1259	}
1260
1261	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1262	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1263
1264fail:
1265	IXL_RX_UNLOCK(rxr);
1266	return (error);
1267}
1268
1269
1270/*********************************************************************
1271 *
1272 *  Free station receive ring data structures
1273 *
1274 **********************************************************************/
1275void
1276ixl_free_que_rx(struct ixl_queue *que)
1277{
1278	struct rx_ring		*rxr = &que->rxr;
1279	struct ixl_rx_buf	*buf;
1280
1281	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1282
1283	/* Cleanup any existing buffers */
1284	if (rxr->buffers != NULL) {
1285		for (int i = 0; i < que->num_desc; i++) {
1286			buf = &rxr->buffers[i];
1287			if (buf->m_head != NULL) {
1288				bus_dmamap_sync(rxr->htag, buf->hmap,
1289				    BUS_DMASYNC_POSTREAD);
1290				bus_dmamap_unload(rxr->htag, buf->hmap);
1291				buf->m_head->m_flags |= M_PKTHDR;
1292				m_freem(buf->m_head);
1293			}
1294			if (buf->m_pack != NULL) {
1295				bus_dmamap_sync(rxr->ptag, buf->pmap,
1296				    BUS_DMASYNC_POSTREAD);
1297				bus_dmamap_unload(rxr->ptag, buf->pmap);
1298				buf->m_pack->m_flags |= M_PKTHDR;
1299				m_freem(buf->m_pack);
1300			}
1301			buf->m_head = NULL;
1302			buf->m_pack = NULL;
1303			if (buf->hmap != NULL) {
1304				bus_dmamap_destroy(rxr->htag, buf->hmap);
1305				buf->hmap = NULL;
1306			}
1307			if (buf->pmap != NULL) {
1308				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1309				buf->pmap = NULL;
1310			}
1311		}
1312		if (rxr->buffers != NULL) {
1313			free(rxr->buffers, M_DEVBUF);
1314			rxr->buffers = NULL;
1315		}
1316	}
1317
1318	if (rxr->htag != NULL) {
1319		bus_dma_tag_destroy(rxr->htag);
1320		rxr->htag = NULL;
1321	}
1322	if (rxr->ptag != NULL) {
1323		bus_dma_tag_destroy(rxr->ptag);
1324		rxr->ptag = NULL;
1325	}
1326
1327	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1328	return;
1329}
1330
1331static __inline void
1332ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1333{
1334        /*
1335         * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1336         * should be computed by hardware. Also it should not have VLAN tag in
1337         * ethernet header.
1338         */
1339        if (rxr->lro_enabled &&
1340            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1341            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1342            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1343                /*
1344                 * Send to the stack if:
1345                 **  - LRO not enabled, or
1346                 **  - no LRO resources, or
1347                 **  - lro enqueue fails
1348                 */
1349                if (rxr->lro.lro_cnt != 0)
1350                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1351                                return;
1352        }
1353	IXL_RX_UNLOCK(rxr);
1354        (*ifp->if_input)(ifp, m);
1355	IXL_RX_LOCK(rxr);
1356}
1357
1358
1359static __inline void
1360ixl_rx_discard(struct rx_ring *rxr, int i)
1361{
1362	struct ixl_rx_buf	*rbuf;
1363
1364	rbuf = &rxr->buffers[i];
1365
1366        if (rbuf->fmp != NULL) {/* Partial chain ? */
1367		rbuf->fmp->m_flags |= M_PKTHDR;
1368                m_freem(rbuf->fmp);
1369                rbuf->fmp = NULL;
1370	}
1371
1372	/*
1373	** With advanced descriptors the writeback
1374	** clobbers the buffer addrs, so its easier
1375	** to just free the existing mbufs and take
1376	** the normal refresh path to get new buffers
1377	** and mapping.
1378	*/
1379	if (rbuf->m_head) {
1380		m_free(rbuf->m_head);
1381		rbuf->m_head = NULL;
1382	}
1383
1384	if (rbuf->m_pack) {
1385		m_free(rbuf->m_pack);
1386		rbuf->m_pack = NULL;
1387	}
1388
1389	return;
1390}
1391
1392
1393/*********************************************************************
1394 *
1395 *  This routine executes in interrupt context. It replenishes
1396 *  the mbufs in the descriptor and sends data which has been
1397 *  dma'ed into host memory to upper layer.
1398 *
1399 *  We loop at most count times if count is > 0, or until done if
1400 *  count < 0.
1401 *
1402 *  Return TRUE for more work, FALSE for all clean.
1403 *********************************************************************/
1404bool
1405ixl_rxeof(struct ixl_queue *que, int count)
1406{
1407	struct ixl_vsi		*vsi = que->vsi;
1408	struct rx_ring		*rxr = &que->rxr;
1409	struct ifnet		*ifp = vsi->ifp;
1410	struct lro_ctrl		*lro = &rxr->lro;
1411	struct lro_entry	*queued;
1412	int			i, nextp, processed = 0;
1413	union i40e_rx_desc	*cur;
1414	struct ixl_rx_buf	*rbuf, *nbuf;
1415
1416
1417	IXL_RX_LOCK(rxr);
1418
1419#ifdef DEV_NETMAP
1420#if NETMAP_API < 4
1421	if (ifp->if_capenable & IFCAP_NETMAP)
1422	{
1423		struct netmap_adapter *na = NA(ifp);
1424
1425		na->rx_rings[que->me].nr_kflags |= NKR_PENDINTR;
1426		selwakeuppri(&na->rx_rings[que->me].si, PI_NET);
1427		IXL_RX_UNLOCK(rxr);
1428		IXL_PF_LOCK(vsi->pf);
1429		selwakeuppri(&na->rx_si, PI_NET);
1430		IXL_PF_UNLOCK(vsi->pf);
1431		return (FALSE);
1432	}
1433#else /* NETMAP_API >= 4 */
1434	if (netmap_rx_irq(ifp, que->me, &processed))
1435	{
1436		IXL_RX_UNLOCK(rxr);
1437		return (FALSE);
1438	}
1439#endif /* NETMAP_API */
1440#endif /* DEV_NETMAP */
1441
1442	for (i = rxr->next_check; count != 0;) {
1443		struct mbuf	*sendmp, *mh, *mp;
1444		u32		rsc, status, error;
1445		u16		hlen, plen, vtag;
1446		u64		qword;
1447		u8		ptype;
1448		bool		eop;
1449
1450		/* Sync the ring. */
1451		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1452		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1453
1454		cur = &rxr->base[i];
1455		qword = le64toh(cur->wb.qword1.status_error_len);
1456		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1457		    >> I40E_RXD_QW1_STATUS_SHIFT;
1458		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1459		    >> I40E_RXD_QW1_ERROR_SHIFT;
1460		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1461		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1462		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1463		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1464		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1465		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1466
1467		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1468			++rxr->not_done;
1469			break;
1470		}
1471		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1472			break;
1473
1474		count--;
1475		sendmp = NULL;
1476		nbuf = NULL;
1477		rsc = 0;
1478		cur->wb.qword1.status_error_len = 0;
1479		rbuf = &rxr->buffers[i];
1480		mh = rbuf->m_head;
1481		mp = rbuf->m_pack;
1482		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1483		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1484			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1485		else
1486			vtag = 0;
1487
1488		/*
1489		** Make sure bad packets are discarded,
1490		** note that only EOP descriptor has valid
1491		** error results.
1492		*/
1493                if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1494			ifp->if_ierrors++;
1495			rxr->discarded++;
1496			ixl_rx_discard(rxr, i);
1497			goto next_desc;
1498		}
1499
1500		/* Prefetch the next buffer */
1501		if (!eop) {
1502			nextp = i + 1;
1503			if (nextp == que->num_desc)
1504				nextp = 0;
1505			nbuf = &rxr->buffers[nextp];
1506			prefetch(nbuf);
1507		}
1508
1509		/*
1510		** The header mbuf is ONLY used when header
1511		** split is enabled, otherwise we get normal
1512		** behavior, ie, both header and payload
1513		** are DMA'd into the payload buffer.
1514		**
1515		** Rather than using the fmp/lmp global pointers
1516		** we now keep the head of a packet chain in the
1517		** buffer struct and pass this along from one
1518		** descriptor to the next, until we get EOP.
1519		*/
1520		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1521			if (hlen > IXL_RX_HDR)
1522				hlen = IXL_RX_HDR;
1523			mh->m_len = hlen;
1524			mh->m_flags |= M_PKTHDR;
1525			mh->m_next = NULL;
1526			mh->m_pkthdr.len = mh->m_len;
1527			/* Null buf pointer so it is refreshed */
1528			rbuf->m_head = NULL;
1529			/*
1530			** Check the payload length, this
1531			** could be zero if its a small
1532			** packet.
1533			*/
1534			if (plen > 0) {
1535				mp->m_len = plen;
1536				mp->m_next = NULL;
1537				mp->m_flags &= ~M_PKTHDR;
1538				mh->m_next = mp;
1539				mh->m_pkthdr.len += mp->m_len;
1540				/* Null buf pointer so it is refreshed */
1541				rbuf->m_pack = NULL;
1542				rxr->split++;
1543			}
1544			/*
1545			** Now create the forward
1546			** chain so when complete
1547			** we wont have to.
1548			*/
1549                        if (eop == 0) {
1550				/* stash the chain head */
1551                                nbuf->fmp = mh;
1552				/* Make forward chain */
1553                                if (plen)
1554                                        mp->m_next = nbuf->m_pack;
1555                                else
1556                                        mh->m_next = nbuf->m_pack;
1557                        } else {
1558				/* Singlet, prepare to send */
1559                                sendmp = mh;
1560                                if (vtag) {
1561                                        sendmp->m_pkthdr.ether_vtag = vtag;
1562                                        sendmp->m_flags |= M_VLANTAG;
1563                                }
1564                        }
1565		} else {
1566			/*
1567			** Either no header split, or a
1568			** secondary piece of a fragmented
1569			** split packet.
1570			*/
1571			mp->m_len = plen;
1572			/*
1573			** See if there is a stored head
1574			** that determines what we are
1575			*/
1576			sendmp = rbuf->fmp;
1577			rbuf->m_pack = rbuf->fmp = NULL;
1578
1579			if (sendmp != NULL) /* secondary frag */
1580				sendmp->m_pkthdr.len += mp->m_len;
1581			else {
1582				/* first desc of a non-ps chain */
1583				sendmp = mp;
1584				sendmp->m_flags |= M_PKTHDR;
1585				sendmp->m_pkthdr.len = mp->m_len;
1586				if (vtag) {
1587					sendmp->m_pkthdr.ether_vtag = vtag;
1588					sendmp->m_flags |= M_VLANTAG;
1589				}
1590                        }
1591			/* Pass the head pointer on */
1592			if (eop == 0) {
1593				nbuf->fmp = sendmp;
1594				sendmp = NULL;
1595				mp->m_next = nbuf->m_pack;
1596			}
1597		}
1598		++processed;
1599		/* Sending this frame? */
1600		if (eop) {
1601			sendmp->m_pkthdr.rcvif = ifp;
1602			/* gather stats */
1603			ifp->if_ipackets++;
1604			rxr->rx_packets++;
1605			rxr->rx_bytes += sendmp->m_pkthdr.len;
1606			/* capture data for dynamic ITR adjustment */
1607			rxr->packets++;
1608			rxr->bytes += sendmp->m_pkthdr.len;
1609			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1610				ixl_rx_checksum(sendmp, status, error, ptype);
1611			sendmp->m_pkthdr.flowid = que->msix;
1612			sendmp->m_flags |= M_FLOWID;
1613		}
1614next_desc:
1615		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1616		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1617
1618		/* Advance our pointers to the next descriptor. */
1619		if (++i == que->num_desc)
1620			i = 0;
1621
1622		/* Now send to the stack or do LRO */
1623		if (sendmp != NULL) {
1624			rxr->next_check = i;
1625			ixl_rx_input(rxr, ifp, sendmp, ptype);
1626			i = rxr->next_check;
1627		}
1628
1629               /* Every 8 descriptors we go to refresh mbufs */
1630		if (processed == 8) {
1631			ixl_refresh_mbufs(que, i);
1632			processed = 0;
1633		}
1634	}
1635
1636	/* Refresh any remaining buf structs */
1637	if (ixl_rx_unrefreshed(que))
1638		ixl_refresh_mbufs(que, i);
1639
1640	rxr->next_check = i;
1641
1642	/*
1643	 * Flush any outstanding LRO work
1644	 */
1645	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1646		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1647		tcp_lro_flush(lro, queued);
1648	}
1649
1650	IXL_RX_UNLOCK(rxr);
1651	return (FALSE);
1652}
1653
1654
1655/*********************************************************************
1656 *
1657 *  Verify that the hardware indicated that the checksum is valid.
1658 *  Inform the stack about the status of checksum so that stack
1659 *  doesn't spend time verifying the checksum.
1660 *
1661 *********************************************************************/
1662static void
1663ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1664{
1665	struct i40e_rx_ptype_decoded decoded;
1666
1667	decoded = decode_rx_desc_ptype(ptype);
1668
1669	/* Errors? */
1670 	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1671	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1672		mp->m_pkthdr.csum_flags = 0;
1673		return;
1674	}
1675
1676	/* IPv6 with extension headers likely have bad csum */
1677	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1678	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1679		if (status &
1680		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1681			mp->m_pkthdr.csum_flags = 0;
1682			return;
1683		}
1684
1685
1686	/* IP Checksum Good */
1687	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1688	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1689
1690	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1691		mp->m_pkthdr.csum_flags |=
1692		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1693		mp->m_pkthdr.csum_data |= htons(0xffff);
1694	}
1695	return;
1696}
1697