ixl_txrx.c revision 318357
1/******************************************************************************
2
3  Copyright (c) 2013-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/ixl/ixl_txrx.c 318357 2017-05-16 17:49:15Z erj $*/
34
35/*
36**	IXL driver TX/RX Routines:
37**	    This was seperated to allow usage by
38** 	    both the PF and VF drivers.
39*/
40
41#ifndef IXL_STANDALONE_BUILD
42#include "opt_inet.h"
43#include "opt_inet6.h"
44#include "opt_rss.h"
45#endif
46
47#include "ixl.h"
48
49#ifdef RSS
50#include <net/rss_config.h>
51#endif
52
53/* Local Prototypes */
54static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55static void	ixl_refresh_mbufs(struct ixl_queue *, int);
56static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57static int	ixl_tx_setup_offload(struct ixl_queue *,
58		    struct mbuf *, u32 *, u32 *);
59static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61static inline void ixl_rx_discard(struct rx_ring *, int);
62static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63		    struct mbuf *, u8);
64
65static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66static int	ixl_tx_setup_offload(struct ixl_queue *que,
67    struct mbuf *mp, u32 *cmd, u32 *off);
68static inline u32 ixl_get_tx_head(struct ixl_queue *que);
69
70#ifdef DEV_NETMAP
71#include <dev/netmap/if_ixl_netmap.h>
72#endif /* DEV_NETMAP */
73
74/*
75 * @key key is saved into this parameter
76 */
77void
78ixl_get_default_rss_key(u32 *key)
79{
80	MPASS(key != NULL);
81
82	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
83	    0x183cfd8c, 0xce880440, 0x580cbc3c,
84	    0x35897377, 0x328b25e1, 0x4fa98922,
85	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
86	    0x0, 0x0, 0x0};
87
88	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
89}
90
91/*
92** Multiqueue Transmit driver
93*/
94int
95ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
96{
97	struct ixl_vsi		*vsi = ifp->if_softc;
98	struct ixl_queue	*que;
99	struct tx_ring		*txr;
100	int 			err, i;
101#ifdef RSS
102	u32			bucket_id;
103#endif
104
105	/*
106	** Which queue to use:
107	**
108	** When doing RSS, map it to the same outbound
109	** queue as the incoming flow would be mapped to.
110	** If everything is setup correctly, it should be
111	** the same bucket that the current CPU we're on is.
112	*/
113	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
114#ifdef  RSS
115		if (rss_hash2bucket(m->m_pkthdr.flowid,
116		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
117			i = bucket_id % vsi->num_queues;
118                } else
119#endif
120                        i = m->m_pkthdr.flowid % vsi->num_queues;
121        } else
122		i = curcpu % vsi->num_queues;
123
124	que = &vsi->queues[i];
125	txr = &que->txr;
126
127	err = drbr_enqueue(ifp, txr->br, m);
128	if (err)
129		return (err);
130	if (IXL_TX_TRYLOCK(txr)) {
131		ixl_mq_start_locked(ifp, txr);
132		IXL_TX_UNLOCK(txr);
133	} else
134		taskqueue_enqueue(que->tq, &que->tx_task);
135
136	return (0);
137}
138
139int
140ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
141{
142	struct ixl_queue	*que = txr->que;
143	struct ixl_vsi		*vsi = que->vsi;
144        struct mbuf		*next;
145        int			err = 0;
146
147
148	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
149	    vsi->link_active == 0)
150		return (ENETDOWN);
151
152	/* Process the transmit queue */
153	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
154		if ((err = ixl_xmit(que, &next)) != 0) {
155			if (next == NULL)
156				drbr_advance(ifp, txr->br);
157			else
158				drbr_putback(ifp, txr->br, next);
159			break;
160		}
161		drbr_advance(ifp, txr->br);
162		/* Send a copy of the frame to the BPF listener */
163		ETHER_BPF_MTAP(ifp, next);
164		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
165			break;
166	}
167
168	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
169		ixl_txeof(que);
170
171	return (err);
172}
173
174/*
175 * Called from a taskqueue to drain queued transmit packets.
176 */
177void
178ixl_deferred_mq_start(void *arg, int pending)
179{
180	struct ixl_queue	*que = arg;
181        struct tx_ring		*txr = &que->txr;
182	struct ixl_vsi		*vsi = que->vsi;
183        struct ifnet		*ifp = vsi->ifp;
184
185	IXL_TX_LOCK(txr);
186	if (!drbr_empty(ifp, txr->br))
187		ixl_mq_start_locked(ifp, txr);
188	IXL_TX_UNLOCK(txr);
189}
190
191/*
192** Flush all queue ring buffers
193*/
194void
195ixl_qflush(struct ifnet *ifp)
196{
197	struct ixl_vsi	*vsi = ifp->if_softc;
198
199        for (int i = 0; i < vsi->num_queues; i++) {
200		struct ixl_queue *que = &vsi->queues[i];
201		struct tx_ring	*txr = &que->txr;
202		struct mbuf	*m;
203		IXL_TX_LOCK(txr);
204		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
205			m_freem(m);
206		IXL_TX_UNLOCK(txr);
207	}
208	if_qflush(ifp);
209}
210
211/*
212** Find mbuf chains passed to the driver
213** that are 'sparse', using more than 8
214** mbufs to deliver an mss-size chunk of data
215*/
216static inline bool
217ixl_tso_detect_sparse(struct mbuf *mp)
218{
219	struct mbuf	*m;
220	int		num, mss;
221
222	num = 0;
223	mss = mp->m_pkthdr.tso_segsz;
224
225	/* Exclude first mbuf; assume it contains all headers */
226	for (m = mp->m_next; m != NULL; m = m->m_next) {
227		if (m == NULL)
228			break;
229		num++;
230		mss -= m->m_len % mp->m_pkthdr.tso_segsz;
231
232		if (mss < 1) {
233			if (num > IXL_SPARSE_CHAIN)
234				return (true);
235			num = (mss == 0) ? 0 : 1;
236			mss += mp->m_pkthdr.tso_segsz;
237		}
238	}
239
240	return (false);
241}
242
243
244/*********************************************************************
245 *
246 *  This routine maps the mbufs to tx descriptors, allowing the
247 *  TX engine to transmit the packets.
248 *  	- return 0 on success, positive on failure
249 *
250 **********************************************************************/
251#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
252
253static int
254ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
255{
256	struct ixl_vsi		*vsi = que->vsi;
257	struct i40e_hw		*hw = vsi->hw;
258	struct tx_ring		*txr = &que->txr;
259	struct ixl_tx_buf	*buf;
260	struct i40e_tx_desc	*txd = NULL;
261	struct mbuf		*m_head, *m;
262	int             	i, j, error, nsegs;
263	int			first, last = 0;
264	u16			vtag = 0;
265	u32			cmd, off;
266	bus_dmamap_t		map;
267	bus_dma_tag_t		tag;
268	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
269
270	cmd = off = 0;
271	m_head = *m_headp;
272
273        /*
274         * Important to capture the first descriptor
275         * used because it will contain the index of
276         * the one we tell the hardware to report back
277         */
278        first = txr->next_avail;
279	buf = &txr->buffers[first];
280	map = buf->map;
281	tag = txr->tx_tag;
282
283	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
284		/* Use larger mapping for TSO */
285		tag = txr->tso_tag;
286		if (ixl_tso_detect_sparse(m_head)) {
287			m = m_defrag(m_head, M_NOWAIT);
288			if (m == NULL) {
289				m_freem(*m_headp);
290				*m_headp = NULL;
291				return (ENOBUFS);
292			}
293			*m_headp = m;
294		}
295	}
296
297	/*
298	 * Map the packet for DMA.
299	 */
300	error = bus_dmamap_load_mbuf_sg(tag, map,
301	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
302
303	if (error == EFBIG) {
304		struct mbuf *m;
305
306		m = m_defrag(*m_headp, M_NOWAIT);
307		if (m == NULL) {
308			que->mbuf_defrag_failed++;
309			m_freem(*m_headp);
310			*m_headp = NULL;
311			return (ENOBUFS);
312		}
313		*m_headp = m;
314
315		/* Try it again */
316		error = bus_dmamap_load_mbuf_sg(tag, map,
317		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
318
319		if (error != 0) {
320			que->tx_dmamap_failed++;
321			m_freem(*m_headp);
322			*m_headp = NULL;
323			return (error);
324		}
325	} else if (error != 0) {
326		que->tx_dmamap_failed++;
327		m_freem(*m_headp);
328		*m_headp = NULL;
329		return (error);
330	}
331
332	/* Make certain there are enough descriptors */
333	if (nsegs > txr->avail - 2) {
334		txr->no_desc++;
335		error = ENOBUFS;
336		goto xmit_fail;
337	}
338	m_head = *m_headp;
339
340	/* Set up the TSO/CSUM offload */
341	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
342		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
343		if (error)
344			goto xmit_fail;
345	}
346
347	cmd |= I40E_TX_DESC_CMD_ICRC;
348	/* Grab the VLAN tag */
349	if (m_head->m_flags & M_VLANTAG) {
350		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
351		vtag = htole16(m_head->m_pkthdr.ether_vtag);
352	}
353
354	i = txr->next_avail;
355	for (j = 0; j < nsegs; j++) {
356		bus_size_t seglen;
357
358		buf = &txr->buffers[i];
359		buf->tag = tag; /* Keep track of the type tag */
360		txd = &txr->base[i];
361		seglen = segs[j].ds_len;
362
363		txd->buffer_addr = htole64(segs[j].ds_addr);
364		txd->cmd_type_offset_bsz =
365		    htole64(I40E_TX_DESC_DTYPE_DATA
366		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
367		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
368		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
369		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
370
371		last = i; /* descriptor that will get completion IRQ */
372
373		if (++i == que->num_desc)
374			i = 0;
375
376		buf->m_head = NULL;
377		buf->eop_index = -1;
378	}
379	/* Set the last descriptor for report */
380	txd->cmd_type_offset_bsz |=
381	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
382	txr->avail -= nsegs;
383	txr->next_avail = i;
384
385	buf->m_head = m_head;
386	/* Swap the dma map between the first and last descriptor */
387	txr->buffers[first].map = buf->map;
388	buf->map = map;
389	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
390
391        /* Set the index of the descriptor that will be marked done */
392        buf = &txr->buffers[first];
393	buf->eop_index = last;
394
395        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
396            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
397	/*
398	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
399	 * hardware that this frame is available to transmit.
400	 */
401	++txr->total_packets;
402	wr32(hw, txr->tail, i);
403
404	/* Mark outstanding work */
405	atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
406	return (0);
407
408xmit_fail:
409	bus_dmamap_unload(tag, buf->map);
410	return (error);
411}
412
413
414/*********************************************************************
415 *
416 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
417 *  the information needed to transmit a packet on the wire. This is
418 *  called only once at attach, setup is done every reset.
419 *
420 **********************************************************************/
421int
422ixl_allocate_tx_data(struct ixl_queue *que)
423{
424	struct tx_ring		*txr = &que->txr;
425	struct ixl_vsi		*vsi = que->vsi;
426	device_t		dev = vsi->dev;
427	struct ixl_tx_buf	*buf;
428	int			error = 0;
429
430	/*
431	 * Setup DMA descriptor areas.
432	 */
433	if ((error = bus_dma_tag_create(NULL,		/* parent */
434			       1, 0,			/* alignment, bounds */
435			       BUS_SPACE_MAXADDR,	/* lowaddr */
436			       BUS_SPACE_MAXADDR,	/* highaddr */
437			       NULL, NULL,		/* filter, filterarg */
438			       IXL_TSO_SIZE,		/* maxsize */
439			       IXL_MAX_TX_SEGS,		/* nsegments */
440			       PAGE_SIZE,		/* maxsegsize */
441			       0,			/* flags */
442			       NULL,			/* lockfunc */
443			       NULL,			/* lockfuncarg */
444			       &txr->tx_tag))) {
445		device_printf(dev,"Unable to allocate TX DMA tag\n");
446		goto fail;
447	}
448
449	/* Make a special tag for TSO */
450	if ((error = bus_dma_tag_create(NULL,		/* parent */
451			       1, 0,			/* alignment, bounds */
452			       BUS_SPACE_MAXADDR,	/* lowaddr */
453			       BUS_SPACE_MAXADDR,	/* highaddr */
454			       NULL, NULL,		/* filter, filterarg */
455			       IXL_TSO_SIZE,		/* maxsize */
456			       IXL_MAX_TSO_SEGS,	/* nsegments */
457			       PAGE_SIZE,		/* maxsegsize */
458			       0,			/* flags */
459			       NULL,			/* lockfunc */
460			       NULL,			/* lockfuncarg */
461			       &txr->tso_tag))) {
462		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
463		goto fail;
464	}
465
466	if (!(txr->buffers =
467	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
468	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
469		device_printf(dev, "Unable to allocate tx_buffer memory\n");
470		error = ENOMEM;
471		goto fail;
472	}
473
474        /* Create the descriptor buffer default dma maps */
475	buf = txr->buffers;
476	for (int i = 0; i < que->num_desc; i++, buf++) {
477		buf->tag = txr->tx_tag;
478		error = bus_dmamap_create(buf->tag, 0, &buf->map);
479		if (error != 0) {
480			device_printf(dev, "Unable to create TX DMA map\n");
481			goto fail;
482		}
483	}
484fail:
485	return (error);
486}
487
488
489/*********************************************************************
490 *
491 *  (Re)Initialize a queue transmit ring.
492 *	- called by init, it clears the descriptor ring,
493 *	  and frees any stale mbufs
494 *
495 **********************************************************************/
496void
497ixl_init_tx_ring(struct ixl_queue *que)
498{
499#ifdef DEV_NETMAP
500	struct netmap_adapter *na = NA(que->vsi->ifp);
501	struct netmap_slot *slot;
502#endif /* DEV_NETMAP */
503	struct tx_ring		*txr = &que->txr;
504	struct ixl_tx_buf	*buf;
505
506	/* Clear the old ring contents */
507	IXL_TX_LOCK(txr);
508
509#ifdef DEV_NETMAP
510	/*
511	 * (under lock): if in netmap mode, do some consistency
512	 * checks and set slot to entry 0 of the netmap ring.
513	 */
514	slot = netmap_reset(na, NR_TX, que->me, 0);
515#endif /* DEV_NETMAP */
516
517	bzero((void *)txr->base,
518	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
519
520	/* Reset indices */
521	txr->next_avail = 0;
522	txr->next_to_clean = 0;
523
524	/* Reset watchdog status */
525	txr->watchdog_timer = 0;
526
527#ifdef IXL_FDIR
528	/* Initialize flow director */
529	txr->atr_rate = ixl_atr_rate;
530	txr->atr_count = 0;
531#endif
532	/* Free any existing tx mbufs. */
533        buf = txr->buffers;
534	for (int i = 0; i < que->num_desc; i++, buf++) {
535		if (buf->m_head != NULL) {
536			bus_dmamap_sync(buf->tag, buf->map,
537			    BUS_DMASYNC_POSTWRITE);
538			bus_dmamap_unload(buf->tag, buf->map);
539			m_freem(buf->m_head);
540			buf->m_head = NULL;
541		}
542#ifdef DEV_NETMAP
543		/*
544		 * In netmap mode, set the map for the packet buffer.
545		 * NOTE: Some drivers (not this one) also need to set
546		 * the physical buffer address in the NIC ring.
547		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
548		 * netmap slot index, si
549		 */
550		if (slot) {
551			int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
552			netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
553		}
554#endif /* DEV_NETMAP */
555		/* Clear the EOP index */
556		buf->eop_index = -1;
557        }
558
559	/* Set number of descriptors available */
560	txr->avail = que->num_desc;
561
562	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
563	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
564	IXL_TX_UNLOCK(txr);
565}
566
567
568/*********************************************************************
569 *
570 *  Free transmit ring related data structures.
571 *
572 **********************************************************************/
573void
574ixl_free_que_tx(struct ixl_queue *que)
575{
576	struct tx_ring *txr = &que->txr;
577	struct ixl_tx_buf *buf;
578
579	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
580
581	for (int i = 0; i < que->num_desc; i++) {
582		buf = &txr->buffers[i];
583		if (buf->m_head != NULL) {
584			bus_dmamap_sync(buf->tag, buf->map,
585			    BUS_DMASYNC_POSTWRITE);
586			bus_dmamap_unload(buf->tag,
587			    buf->map);
588			m_freem(buf->m_head);
589			buf->m_head = NULL;
590			if (buf->map != NULL) {
591				bus_dmamap_destroy(buf->tag,
592				    buf->map);
593				buf->map = NULL;
594			}
595		} else if (buf->map != NULL) {
596			bus_dmamap_unload(buf->tag,
597			    buf->map);
598			bus_dmamap_destroy(buf->tag,
599			    buf->map);
600			buf->map = NULL;
601		}
602	}
603	if (txr->br != NULL)
604		buf_ring_free(txr->br, M_DEVBUF);
605	if (txr->buffers != NULL) {
606		free(txr->buffers, M_DEVBUF);
607		txr->buffers = NULL;
608	}
609	if (txr->tx_tag != NULL) {
610		bus_dma_tag_destroy(txr->tx_tag);
611		txr->tx_tag = NULL;
612	}
613	if (txr->tso_tag != NULL) {
614		bus_dma_tag_destroy(txr->tso_tag);
615		txr->tso_tag = NULL;
616	}
617
618	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
619	return;
620}
621
622/*********************************************************************
623 *
624 *  Setup descriptor for hw offloads
625 *
626 **********************************************************************/
627
628static int
629ixl_tx_setup_offload(struct ixl_queue *que,
630    struct mbuf *mp, u32 *cmd, u32 *off)
631{
632	struct ether_vlan_header	*eh;
633#ifdef INET
634	struct ip			*ip = NULL;
635#endif
636	struct tcphdr			*th = NULL;
637#ifdef INET6
638	struct ip6_hdr			*ip6;
639#endif
640	int				elen, ip_hlen = 0, tcp_hlen;
641	u16				etype;
642	u8				ipproto = 0;
643	bool				tso = FALSE;
644
645	/* Set up the TSO context descriptor if required */
646	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
647		tso = ixl_tso_setup(que, mp);
648		if (tso)
649			++que->tso;
650		else
651			return (ENXIO);
652	}
653
654	/*
655	 * Determine where frame payload starts.
656	 * Jump over vlan headers if already present,
657	 * helpful for QinQ too.
658	 */
659	eh = mtod(mp, struct ether_vlan_header *);
660	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
661		etype = ntohs(eh->evl_proto);
662		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
663	} else {
664		etype = ntohs(eh->evl_encap_proto);
665		elen = ETHER_HDR_LEN;
666	}
667
668	switch (etype) {
669#ifdef INET
670		case ETHERTYPE_IP:
671			ip = (struct ip *)(mp->m_data + elen);
672			ip_hlen = ip->ip_hl << 2;
673			ipproto = ip->ip_p;
674			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
675			/* The IP checksum must be recalculated with TSO */
676			if (tso)
677				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
678			else
679				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
680			break;
681#endif
682#ifdef INET6
683		case ETHERTYPE_IPV6:
684			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
685			ip_hlen = sizeof(struct ip6_hdr);
686			ipproto = ip6->ip6_nxt;
687			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
688			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
689			break;
690#endif
691		default:
692			break;
693	}
694
695	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
696	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
697
698	switch (ipproto) {
699		case IPPROTO_TCP:
700			tcp_hlen = th->th_off << 2;
701			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
702				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
703				*off |= (tcp_hlen >> 2) <<
704				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
705			}
706#ifdef IXL_FDIR
707			ixl_atr(que, th, etype);
708#endif
709			break;
710		case IPPROTO_UDP:
711			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
712				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
713				*off |= (sizeof(struct udphdr) >> 2) <<
714				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
715			}
716			break;
717
718		case IPPROTO_SCTP:
719			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
720				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
721				*off |= (sizeof(struct sctphdr) >> 2) <<
722				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
723			}
724			/* Fall Thru */
725		default:
726			break;
727	}
728
729        return (0);
730}
731
732
733/**********************************************************************
734 *
735 *  Setup context for hardware segmentation offload (TSO)
736 *
737 **********************************************************************/
738static bool
739ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
740{
741	struct tx_ring			*txr = &que->txr;
742	struct i40e_tx_context_desc	*TXD;
743	struct ixl_tx_buf		*buf;
744	u32				cmd, mss, type, tsolen;
745	u16				etype;
746	int				idx, elen, ip_hlen, tcp_hlen;
747	struct ether_vlan_header	*eh;
748#ifdef INET
749	struct ip			*ip;
750#endif
751#ifdef INET6
752	struct ip6_hdr			*ip6;
753#endif
754#if defined(INET6) || defined(INET)
755	struct tcphdr			*th;
756#endif
757	u64				type_cmd_tso_mss;
758
759	/*
760	 * Determine where frame payload starts.
761	 * Jump over vlan headers if already present
762	 */
763	eh = mtod(mp, struct ether_vlan_header *);
764	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
765		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
766		etype = eh->evl_proto;
767	} else {
768		elen = ETHER_HDR_LEN;
769		etype = eh->evl_encap_proto;
770	}
771
772        switch (ntohs(etype)) {
773#ifdef INET6
774	case ETHERTYPE_IPV6:
775		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
776		if (ip6->ip6_nxt != IPPROTO_TCP)
777			return (ENXIO);
778		ip_hlen = sizeof(struct ip6_hdr);
779		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
780		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
781		tcp_hlen = th->th_off << 2;
782		/*
783		 * The corresponding flag is set by the stack in the IPv4
784		 * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
785		 * So, set it here because the rest of the flow requires it.
786		 */
787		mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
788		break;
789#endif
790#ifdef INET
791	case ETHERTYPE_IP:
792		ip = (struct ip *)(mp->m_data + elen);
793		if (ip->ip_p != IPPROTO_TCP)
794			return (ENXIO);
795		ip->ip_sum = 0;
796		ip_hlen = ip->ip_hl << 2;
797		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
798		th->th_sum = in_pseudo(ip->ip_src.s_addr,
799		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
800		tcp_hlen = th->th_off << 2;
801		break;
802#endif
803	default:
804		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
805		    __func__, ntohs(etype));
806		return FALSE;
807        }
808
809        /* Ensure we have at least the IP+TCP header in the first mbuf. */
810        if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
811		return FALSE;
812
813	idx = txr->next_avail;
814	buf = &txr->buffers[idx];
815	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
816	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
817
818	type = I40E_TX_DESC_DTYPE_CONTEXT;
819	cmd = I40E_TX_CTX_DESC_TSO;
820	/* TSO MSS must not be less than 64 */
821	if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
822		que->mss_too_small++;
823		mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
824	}
825	mss = mp->m_pkthdr.tso_segsz;
826
827	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
828	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
829	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
830	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
831	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
832
833	TXD->tunneling_params = htole32(0);
834	buf->m_head = NULL;
835	buf->eop_index = -1;
836
837	if (++idx == que->num_desc)
838		idx = 0;
839
840	txr->avail--;
841	txr->next_avail = idx;
842
843	return TRUE;
844}
845
846/*
847** ixl_get_tx_head - Retrieve the value from the
848**    location the HW records its HEAD index
849*/
850static inline u32
851ixl_get_tx_head(struct ixl_queue *que)
852{
853	struct tx_ring  *txr = &que->txr;
854	void *head = &txr->base[que->num_desc];
855	return LE32_TO_CPU(*(volatile __le32 *)head);
856}
857
858/**********************************************************************
859 *
860 *  Examine each tx_buffer in the used queue. If the hardware is done
861 *  processing the packet then free associated resources. The
862 *  tx_buffer is put back on the free queue.
863 *
864 **********************************************************************/
865bool
866ixl_txeof(struct ixl_queue *que)
867{
868	struct tx_ring		*txr = &que->txr;
869	u32			first, last, head, done, processed;
870	struct ixl_tx_buf	*buf;
871	struct i40e_tx_desc	*tx_desc, *eop_desc;
872
873
874	mtx_assert(&txr->mtx, MA_OWNED);
875
876#ifdef DEV_NETMAP
877	// XXX todo: implement moderation
878	if (netmap_tx_irq(que->vsi->ifp, que->me))
879		return FALSE;
880#endif /* DEF_NETMAP */
881
882	/* These are not the descriptors you seek, move along :) */
883	if (txr->avail == que->num_desc) {
884		atomic_store_rel_32(&txr->watchdog_timer, 0);
885		return FALSE;
886	}
887
888	processed = 0;
889	first = txr->next_to_clean;
890	buf = &txr->buffers[first];
891	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
892	last = buf->eop_index;
893	if (last == -1)
894		return FALSE;
895	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
896
897	/* Get the Head WB value */
898	head = ixl_get_tx_head(que);
899
900	/*
901	** Get the index of the first descriptor
902	** BEYOND the EOP and call that 'done'.
903	** I do this so the comparison in the
904	** inner while loop below can be simple
905	*/
906	if (++last == que->num_desc) last = 0;
907	done = last;
908
909        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
910            BUS_DMASYNC_POSTREAD);
911	/*
912	** The HEAD index of the ring is written in a
913	** defined location, this rather than a done bit
914	** is what is used to keep track of what must be
915	** 'cleaned'.
916	*/
917	while (first != head) {
918		/* We clean the range of the packet */
919		while (first != done) {
920			++txr->avail;
921			++processed;
922
923			if (buf->m_head) {
924				txr->bytes += /* for ITR adjustment */
925				    buf->m_head->m_pkthdr.len;
926				txr->tx_bytes += /* for TX stats */
927				    buf->m_head->m_pkthdr.len;
928				bus_dmamap_sync(buf->tag,
929				    buf->map,
930				    BUS_DMASYNC_POSTWRITE);
931				bus_dmamap_unload(buf->tag,
932				    buf->map);
933				m_freem(buf->m_head);
934				buf->m_head = NULL;
935				buf->map = NULL;
936			}
937			buf->eop_index = -1;
938
939			if (++first == que->num_desc)
940				first = 0;
941
942			buf = &txr->buffers[first];
943			tx_desc = &txr->base[first];
944		}
945		++txr->packets;
946		/* See if there is more work now */
947		last = buf->eop_index;
948		if (last != -1) {
949			eop_desc = &txr->base[last];
950			/* Get next done point */
951			if (++last == que->num_desc) last = 0;
952			done = last;
953		} else
954			break;
955	}
956	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
957	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
958
959	txr->next_to_clean = first;
960
961
962	/*
963	 * If there are no pending descriptors, clear the timeout.
964	 */
965	if (txr->avail == que->num_desc) {
966		atomic_store_rel_32(&txr->watchdog_timer, 0);
967		return FALSE;
968	}
969
970	return TRUE;
971}
972
973/*********************************************************************
974 *
975 *  Refresh mbuf buffers for RX descriptor rings
976 *   - now keeps its own state so discards due to resource
977 *     exhaustion are unnecessary, if an mbuf cannot be obtained
978 *     it just returns, keeping its placeholder, thus it can simply
979 *     be recalled to try again.
980 *
981 **********************************************************************/
982static void
983ixl_refresh_mbufs(struct ixl_queue *que, int limit)
984{
985	struct ixl_vsi		*vsi = que->vsi;
986	struct rx_ring		*rxr = &que->rxr;
987	bus_dma_segment_t	hseg[1];
988	bus_dma_segment_t	pseg[1];
989	struct ixl_rx_buf	*buf;
990	struct mbuf		*mh, *mp;
991	int			i, j, nsegs, error;
992	bool			refreshed = FALSE;
993
994	i = j = rxr->next_refresh;
995	/* Control the loop with one beyond */
996	if (++j == que->num_desc)
997		j = 0;
998
999	while (j != limit) {
1000		buf = &rxr->buffers[i];
1001		if (rxr->hdr_split == FALSE)
1002			goto no_split;
1003
1004		if (buf->m_head == NULL) {
1005			mh = m_gethdr(M_NOWAIT, MT_DATA);
1006			if (mh == NULL)
1007				goto update;
1008		} else
1009			mh = buf->m_head;
1010
1011		mh->m_pkthdr.len = mh->m_len = MHLEN;
1012		mh->m_len = MHLEN;
1013		mh->m_flags |= M_PKTHDR;
1014		/* Get the memory mapping */
1015		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1016		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1017		if (error != 0) {
1018			printf("Refresh mbufs: hdr dmamap load"
1019			    " failure - %d\n", error);
1020			m_free(mh);
1021			buf->m_head = NULL;
1022			goto update;
1023		}
1024		buf->m_head = mh;
1025		bus_dmamap_sync(rxr->htag, buf->hmap,
1026		    BUS_DMASYNC_PREREAD);
1027		rxr->base[i].read.hdr_addr =
1028		   htole64(hseg[0].ds_addr);
1029
1030no_split:
1031		if (buf->m_pack == NULL) {
1032			mp = m_getjcl(M_NOWAIT, MT_DATA,
1033			    M_PKTHDR, rxr->mbuf_sz);
1034			if (mp == NULL)
1035				goto update;
1036		} else
1037			mp = buf->m_pack;
1038
1039		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1040		/* Get the memory mapping */
1041		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1042		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1043		if (error != 0) {
1044			printf("Refresh mbufs: payload dmamap load"
1045			    " failure - %d\n", error);
1046			m_free(mp);
1047			buf->m_pack = NULL;
1048			goto update;
1049		}
1050		buf->m_pack = mp;
1051		bus_dmamap_sync(rxr->ptag, buf->pmap,
1052		    BUS_DMASYNC_PREREAD);
1053		rxr->base[i].read.pkt_addr =
1054		   htole64(pseg[0].ds_addr);
1055		/* Used only when doing header split */
1056		rxr->base[i].read.hdr_addr = 0;
1057
1058		refreshed = TRUE;
1059		/* Next is precalculated */
1060		i = j;
1061		rxr->next_refresh = i;
1062		if (++j == que->num_desc)
1063			j = 0;
1064	}
1065update:
1066	if (refreshed) /* Update hardware tail index */
1067		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1068	return;
1069}
1070
1071
1072/*********************************************************************
1073 *
1074 *  Allocate memory for rx_buffer structures. Since we use one
1075 *  rx_buffer per descriptor, the maximum number of rx_buffer's
1076 *  that we'll need is equal to the number of receive descriptors
1077 *  that we've defined.
1078 *
1079 **********************************************************************/
1080int
1081ixl_allocate_rx_data(struct ixl_queue *que)
1082{
1083	struct rx_ring		*rxr = &que->rxr;
1084	struct ixl_vsi		*vsi = que->vsi;
1085	device_t 		dev = vsi->dev;
1086	struct ixl_rx_buf 	*buf;
1087	int             	i, bsize, error;
1088
1089	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1090	if (!(rxr->buffers =
1091	    (struct ixl_rx_buf *) malloc(bsize,
1092	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1093		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1094		error = ENOMEM;
1095		return (error);
1096	}
1097
1098	if ((error = bus_dma_tag_create(NULL,	/* parent */
1099				   1, 0,	/* alignment, bounds */
1100				   BUS_SPACE_MAXADDR,	/* lowaddr */
1101				   BUS_SPACE_MAXADDR,	/* highaddr */
1102				   NULL, NULL,		/* filter, filterarg */
1103				   MSIZE,		/* maxsize */
1104				   1,			/* nsegments */
1105				   MSIZE,		/* maxsegsize */
1106				   0,			/* flags */
1107				   NULL,		/* lockfunc */
1108				   NULL,		/* lockfuncarg */
1109				   &rxr->htag))) {
1110		device_printf(dev, "Unable to create RX DMA htag\n");
1111		return (error);
1112	}
1113
1114	if ((error = bus_dma_tag_create(NULL,	/* parent */
1115				   1, 0,	/* alignment, bounds */
1116				   BUS_SPACE_MAXADDR,	/* lowaddr */
1117				   BUS_SPACE_MAXADDR,	/* highaddr */
1118				   NULL, NULL,		/* filter, filterarg */
1119				   MJUM16BYTES,		/* maxsize */
1120				   1,			/* nsegments */
1121				   MJUM16BYTES,		/* maxsegsize */
1122				   0,			/* flags */
1123				   NULL,		/* lockfunc */
1124				   NULL,		/* lockfuncarg */
1125				   &rxr->ptag))) {
1126		device_printf(dev, "Unable to create RX DMA ptag\n");
1127		return (error);
1128	}
1129
1130	for (i = 0; i < que->num_desc; i++) {
1131		buf = &rxr->buffers[i];
1132		error = bus_dmamap_create(rxr->htag,
1133		    BUS_DMA_NOWAIT, &buf->hmap);
1134		if (error) {
1135			device_printf(dev, "Unable to create RX head map\n");
1136			break;
1137		}
1138		error = bus_dmamap_create(rxr->ptag,
1139		    BUS_DMA_NOWAIT, &buf->pmap);
1140		if (error) {
1141			device_printf(dev, "Unable to create RX pkt map\n");
1142			break;
1143		}
1144	}
1145
1146	return (error);
1147}
1148
1149
1150/*********************************************************************
1151 *
1152 *  (Re)Initialize the queue receive ring and its buffers.
1153 *
1154 **********************************************************************/
1155int
1156ixl_init_rx_ring(struct ixl_queue *que)
1157{
1158	struct	rx_ring 	*rxr = &que->rxr;
1159	struct ixl_vsi		*vsi = que->vsi;
1160#if defined(INET6) || defined(INET)
1161	struct ifnet		*ifp = vsi->ifp;
1162	struct lro_ctrl		*lro = &rxr->lro;
1163#endif
1164	struct ixl_rx_buf	*buf;
1165	bus_dma_segment_t	pseg[1], hseg[1];
1166	int			rsize, nsegs, error = 0;
1167#ifdef DEV_NETMAP
1168	struct netmap_adapter *na = NA(que->vsi->ifp);
1169	struct netmap_slot *slot;
1170#endif /* DEV_NETMAP */
1171
1172	IXL_RX_LOCK(rxr);
1173#ifdef DEV_NETMAP
1174	/* same as in ixl_init_tx_ring() */
1175	slot = netmap_reset(na, NR_RX, que->me, 0);
1176#endif /* DEV_NETMAP */
1177	/* Clear the ring contents */
1178	rsize = roundup2(que->num_desc *
1179	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1180	bzero((void *)rxr->base, rsize);
1181	/* Cleanup any existing buffers */
1182	for (int i = 0; i < que->num_desc; i++) {
1183		buf = &rxr->buffers[i];
1184		if (buf->m_head != NULL) {
1185			bus_dmamap_sync(rxr->htag, buf->hmap,
1186			    BUS_DMASYNC_POSTREAD);
1187			bus_dmamap_unload(rxr->htag, buf->hmap);
1188			buf->m_head->m_flags |= M_PKTHDR;
1189			m_freem(buf->m_head);
1190		}
1191		if (buf->m_pack != NULL) {
1192			bus_dmamap_sync(rxr->ptag, buf->pmap,
1193			    BUS_DMASYNC_POSTREAD);
1194			bus_dmamap_unload(rxr->ptag, buf->pmap);
1195			buf->m_pack->m_flags |= M_PKTHDR;
1196			m_freem(buf->m_pack);
1197		}
1198		buf->m_head = NULL;
1199		buf->m_pack = NULL;
1200	}
1201
1202	/* header split is off */
1203	rxr->hdr_split = FALSE;
1204
1205	/* Now replenish the mbufs */
1206	for (int j = 0; j != que->num_desc; ++j) {
1207		struct mbuf	*mh, *mp;
1208
1209		buf = &rxr->buffers[j];
1210#ifdef DEV_NETMAP
1211		/*
1212		 * In netmap mode, fill the map and set the buffer
1213		 * address in the NIC ring, considering the offset
1214		 * between the netmap and NIC rings (see comment in
1215		 * ixgbe_setup_transmit_ring() ). No need to allocate
1216		 * an mbuf, so end the block with a continue;
1217		 */
1218		if (slot) {
1219			int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1220			uint64_t paddr;
1221			void *addr;
1222
1223			addr = PNMB(na, slot + sj, &paddr);
1224			netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1225			/* Update descriptor and the cached value */
1226			rxr->base[j].read.pkt_addr = htole64(paddr);
1227			rxr->base[j].read.hdr_addr = 0;
1228			continue;
1229		}
1230#endif /* DEV_NETMAP */
1231		/*
1232		** Don't allocate mbufs if not
1233		** doing header split, its wasteful
1234		*/
1235		if (rxr->hdr_split == FALSE)
1236			goto skip_head;
1237
1238		/* First the header */
1239		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1240		if (buf->m_head == NULL) {
1241			error = ENOBUFS;
1242			goto fail;
1243		}
1244		m_adj(buf->m_head, ETHER_ALIGN);
1245		mh = buf->m_head;
1246		mh->m_len = mh->m_pkthdr.len = MHLEN;
1247		mh->m_flags |= M_PKTHDR;
1248		/* Get the memory mapping */
1249		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1250		    buf->hmap, buf->m_head, hseg,
1251		    &nsegs, BUS_DMA_NOWAIT);
1252		if (error != 0) /* Nothing elegant to do here */
1253			goto fail;
1254		bus_dmamap_sync(rxr->htag,
1255		    buf->hmap, BUS_DMASYNC_PREREAD);
1256		/* Update descriptor */
1257		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1258
1259skip_head:
1260		/* Now the payload cluster */
1261		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1262		    M_PKTHDR, rxr->mbuf_sz);
1263		if (buf->m_pack == NULL) {
1264			error = ENOBUFS;
1265                        goto fail;
1266		}
1267		mp = buf->m_pack;
1268		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1269		/* Get the memory mapping */
1270		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1271		    buf->pmap, mp, pseg,
1272		    &nsegs, BUS_DMA_NOWAIT);
1273		if (error != 0)
1274                        goto fail;
1275		bus_dmamap_sync(rxr->ptag,
1276		    buf->pmap, BUS_DMASYNC_PREREAD);
1277		/* Update descriptor */
1278		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1279		rxr->base[j].read.hdr_addr = 0;
1280	}
1281
1282
1283	/* Setup our descriptor indices */
1284	rxr->next_check = 0;
1285	rxr->next_refresh = 0;
1286	rxr->lro_enabled = FALSE;
1287	rxr->split = 0;
1288	rxr->bytes = 0;
1289	rxr->discard = FALSE;
1290
1291	wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1292	ixl_flush(vsi->hw);
1293
1294#if defined(INET6) || defined(INET)
1295	/*
1296	** Now set up the LRO interface:
1297	*/
1298	if (ifp->if_capenable & IFCAP_LRO) {
1299		int err = tcp_lro_init(lro);
1300		if (err) {
1301			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1302			goto fail;
1303		}
1304		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1305		rxr->lro_enabled = TRUE;
1306		lro->ifp = vsi->ifp;
1307	}
1308#endif
1309
1310	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1311	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1312
1313fail:
1314	IXL_RX_UNLOCK(rxr);
1315	return (error);
1316}
1317
1318
1319/*********************************************************************
1320 *
1321 *  Free station receive ring data structures
1322 *
1323 **********************************************************************/
1324void
1325ixl_free_que_rx(struct ixl_queue *que)
1326{
1327	struct rx_ring		*rxr = &que->rxr;
1328	struct ixl_rx_buf	*buf;
1329
1330	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1331
1332	/* Cleanup any existing buffers */
1333	if (rxr->buffers != NULL) {
1334		for (int i = 0; i < que->num_desc; i++) {
1335			buf = &rxr->buffers[i];
1336			if (buf->m_head != NULL) {
1337				bus_dmamap_sync(rxr->htag, buf->hmap,
1338				    BUS_DMASYNC_POSTREAD);
1339				bus_dmamap_unload(rxr->htag, buf->hmap);
1340				buf->m_head->m_flags |= M_PKTHDR;
1341				m_freem(buf->m_head);
1342			}
1343			if (buf->m_pack != NULL) {
1344				bus_dmamap_sync(rxr->ptag, buf->pmap,
1345				    BUS_DMASYNC_POSTREAD);
1346				bus_dmamap_unload(rxr->ptag, buf->pmap);
1347				buf->m_pack->m_flags |= M_PKTHDR;
1348				m_freem(buf->m_pack);
1349			}
1350			buf->m_head = NULL;
1351			buf->m_pack = NULL;
1352			if (buf->hmap != NULL) {
1353				bus_dmamap_destroy(rxr->htag, buf->hmap);
1354				buf->hmap = NULL;
1355			}
1356			if (buf->pmap != NULL) {
1357				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1358				buf->pmap = NULL;
1359			}
1360		}
1361		if (rxr->buffers != NULL) {
1362			free(rxr->buffers, M_DEVBUF);
1363			rxr->buffers = NULL;
1364		}
1365	}
1366
1367	if (rxr->htag != NULL) {
1368		bus_dma_tag_destroy(rxr->htag);
1369		rxr->htag = NULL;
1370	}
1371	if (rxr->ptag != NULL) {
1372		bus_dma_tag_destroy(rxr->ptag);
1373		rxr->ptag = NULL;
1374	}
1375
1376	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1377	return;
1378}
1379
1380static inline void
1381ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1382{
1383
1384#if defined(INET6) || defined(INET)
1385        /*
1386         * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1387         * should be computed by hardware. Also it should not have VLAN tag in
1388         * ethernet header.
1389         */
1390        if (rxr->lro_enabled &&
1391            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1392            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1393            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1394                /*
1395                 * Send to the stack if:
1396                 **  - LRO not enabled, or
1397                 **  - no LRO resources, or
1398                 **  - lro enqueue fails
1399                 */
1400                if (rxr->lro.lro_cnt != 0)
1401                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1402                                return;
1403        }
1404#endif
1405	IXL_RX_UNLOCK(rxr);
1406        (*ifp->if_input)(ifp, m);
1407	IXL_RX_LOCK(rxr);
1408}
1409
1410
1411static inline void
1412ixl_rx_discard(struct rx_ring *rxr, int i)
1413{
1414	struct ixl_rx_buf	*rbuf;
1415
1416	rbuf = &rxr->buffers[i];
1417
1418        if (rbuf->fmp != NULL) {/* Partial chain ? */
1419		rbuf->fmp->m_flags |= M_PKTHDR;
1420                m_freem(rbuf->fmp);
1421                rbuf->fmp = NULL;
1422	}
1423
1424	/*
1425	** With advanced descriptors the writeback
1426	** clobbers the buffer addrs, so its easier
1427	** to just free the existing mbufs and take
1428	** the normal refresh path to get new buffers
1429	** and mapping.
1430	*/
1431	if (rbuf->m_head) {
1432		m_free(rbuf->m_head);
1433		rbuf->m_head = NULL;
1434	}
1435
1436	if (rbuf->m_pack) {
1437		m_free(rbuf->m_pack);
1438		rbuf->m_pack = NULL;
1439	}
1440
1441	return;
1442}
1443
1444#ifdef RSS
1445/*
1446** i40e_ptype_to_hash: parse the packet type
1447** to determine the appropriate hash.
1448*/
1449static inline int
1450ixl_ptype_to_hash(u8 ptype)
1451{
1452        struct i40e_rx_ptype_decoded	decoded;
1453	u8				ex = 0;
1454
1455	decoded = decode_rx_desc_ptype(ptype);
1456	ex = decoded.outer_frag;
1457
1458	if (!decoded.known)
1459		return M_HASHTYPE_OPAQUE_HASH;
1460
1461	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
1462		return M_HASHTYPE_OPAQUE_HASH;
1463
1464	/* Note: anything that gets to this point is IP */
1465        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
1466		switch (decoded.inner_prot) {
1467			case I40E_RX_PTYPE_INNER_PROT_TCP:
1468				if (ex)
1469					return M_HASHTYPE_RSS_TCP_IPV6_EX;
1470				else
1471					return M_HASHTYPE_RSS_TCP_IPV6;
1472			case I40E_RX_PTYPE_INNER_PROT_UDP:
1473				if (ex)
1474					return M_HASHTYPE_RSS_UDP_IPV6_EX;
1475				else
1476					return M_HASHTYPE_RSS_UDP_IPV6;
1477			default:
1478				if (ex)
1479					return M_HASHTYPE_RSS_IPV6_EX;
1480				else
1481					return M_HASHTYPE_RSS_IPV6;
1482		}
1483	}
1484        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
1485		switch (decoded.inner_prot) {
1486			case I40E_RX_PTYPE_INNER_PROT_TCP:
1487					return M_HASHTYPE_RSS_TCP_IPV4;
1488			case I40E_RX_PTYPE_INNER_PROT_UDP:
1489				if (ex)
1490					return M_HASHTYPE_RSS_UDP_IPV4_EX;
1491				else
1492					return M_HASHTYPE_RSS_UDP_IPV4;
1493			default:
1494					return M_HASHTYPE_RSS_IPV4;
1495		}
1496	}
1497	/* We should never get here!! */
1498	return M_HASHTYPE_OPAQUE_HASH;
1499}
1500#endif /* RSS */
1501
1502/*********************************************************************
1503 *
1504 *  This routine executes in interrupt context. It replenishes
1505 *  the mbufs in the descriptor and sends data which has been
1506 *  dma'ed into host memory to upper layer.
1507 *
1508 *  We loop at most count times if count is > 0, or until done if
1509 *  count < 0.
1510 *
1511 *  Return TRUE for more work, FALSE for all clean.
1512 *********************************************************************/
1513bool
1514ixl_rxeof(struct ixl_queue *que, int count)
1515{
1516	struct ixl_vsi		*vsi = que->vsi;
1517	struct rx_ring		*rxr = &que->rxr;
1518	struct ifnet		*ifp = vsi->ifp;
1519#if defined(INET6) || defined(INET)
1520	struct lro_ctrl		*lro = &rxr->lro;
1521#endif
1522	int			i, nextp, processed = 0;
1523	union i40e_rx_desc	*cur;
1524	struct ixl_rx_buf	*rbuf, *nbuf;
1525
1526
1527	IXL_RX_LOCK(rxr);
1528
1529#ifdef DEV_NETMAP
1530	if (netmap_rx_irq(ifp, que->me, &count)) {
1531		IXL_RX_UNLOCK(rxr);
1532		return (FALSE);
1533	}
1534#endif /* DEV_NETMAP */
1535
1536	for (i = rxr->next_check; count != 0;) {
1537		struct mbuf	*sendmp, *mh, *mp;
1538		u32		status, error;
1539		u16		hlen, plen, vtag;
1540		u64		qword;
1541		u8		ptype;
1542		bool		eop;
1543
1544		/* Sync the ring. */
1545		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1546		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1547
1548		cur = &rxr->base[i];
1549		qword = le64toh(cur->wb.qword1.status_error_len);
1550		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1551		    >> I40E_RXD_QW1_STATUS_SHIFT;
1552		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1553		    >> I40E_RXD_QW1_ERROR_SHIFT;
1554		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1555		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1556		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1557		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1558		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1559		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1560
1561		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1562			++rxr->not_done;
1563			break;
1564		}
1565		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1566			break;
1567
1568		count--;
1569		sendmp = NULL;
1570		nbuf = NULL;
1571		cur->wb.qword1.status_error_len = 0;
1572		rbuf = &rxr->buffers[i];
1573		mh = rbuf->m_head;
1574		mp = rbuf->m_pack;
1575		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1576		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1577			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1578		else
1579			vtag = 0;
1580
1581		/*
1582		** Make sure bad packets are discarded,
1583		** note that only EOP descriptor has valid
1584		** error results.
1585		*/
1586                if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1587			rxr->desc_errs++;
1588			ixl_rx_discard(rxr, i);
1589			goto next_desc;
1590		}
1591
1592		/* Prefetch the next buffer */
1593		if (!eop) {
1594			nextp = i + 1;
1595			if (nextp == que->num_desc)
1596				nextp = 0;
1597			nbuf = &rxr->buffers[nextp];
1598			prefetch(nbuf);
1599		}
1600
1601		/*
1602		** The header mbuf is ONLY used when header
1603		** split is enabled, otherwise we get normal
1604		** behavior, ie, both header and payload
1605		** are DMA'd into the payload buffer.
1606		**
1607		** Rather than using the fmp/lmp global pointers
1608		** we now keep the head of a packet chain in the
1609		** buffer struct and pass this along from one
1610		** descriptor to the next, until we get EOP.
1611		*/
1612		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1613			if (hlen > IXL_RX_HDR)
1614				hlen = IXL_RX_HDR;
1615			mh->m_len = hlen;
1616			mh->m_flags |= M_PKTHDR;
1617			mh->m_next = NULL;
1618			mh->m_pkthdr.len = mh->m_len;
1619			/* Null buf pointer so it is refreshed */
1620			rbuf->m_head = NULL;
1621			/*
1622			** Check the payload length, this
1623			** could be zero if its a small
1624			** packet.
1625			*/
1626			if (plen > 0) {
1627				mp->m_len = plen;
1628				mp->m_next = NULL;
1629				mp->m_flags &= ~M_PKTHDR;
1630				mh->m_next = mp;
1631				mh->m_pkthdr.len += mp->m_len;
1632				/* Null buf pointer so it is refreshed */
1633				rbuf->m_pack = NULL;
1634				rxr->split++;
1635			}
1636			/*
1637			** Now create the forward
1638			** chain so when complete
1639			** we wont have to.
1640			*/
1641                        if (eop == 0) {
1642				/* stash the chain head */
1643                                nbuf->fmp = mh;
1644				/* Make forward chain */
1645                                if (plen)
1646                                        mp->m_next = nbuf->m_pack;
1647                                else
1648                                        mh->m_next = nbuf->m_pack;
1649                        } else {
1650				/* Singlet, prepare to send */
1651                                sendmp = mh;
1652                                if (vtag) {
1653                                        sendmp->m_pkthdr.ether_vtag = vtag;
1654                                        sendmp->m_flags |= M_VLANTAG;
1655                                }
1656                        }
1657		} else {
1658			/*
1659			** Either no header split, or a
1660			** secondary piece of a fragmented
1661			** split packet.
1662			*/
1663			mp->m_len = plen;
1664			/*
1665			** See if there is a stored head
1666			** that determines what we are
1667			*/
1668			sendmp = rbuf->fmp;
1669			rbuf->m_pack = rbuf->fmp = NULL;
1670
1671			if (sendmp != NULL) /* secondary frag */
1672				sendmp->m_pkthdr.len += mp->m_len;
1673			else {
1674				/* first desc of a non-ps chain */
1675				sendmp = mp;
1676				sendmp->m_flags |= M_PKTHDR;
1677				sendmp->m_pkthdr.len = mp->m_len;
1678                        }
1679			/* Pass the head pointer on */
1680			if (eop == 0) {
1681				nbuf->fmp = sendmp;
1682				sendmp = NULL;
1683				mp->m_next = nbuf->m_pack;
1684			}
1685		}
1686		++processed;
1687		/* Sending this frame? */
1688		if (eop) {
1689			sendmp->m_pkthdr.rcvif = ifp;
1690			/* gather stats */
1691			rxr->rx_packets++;
1692			rxr->rx_bytes += sendmp->m_pkthdr.len;
1693			/* capture data for dynamic ITR adjustment */
1694			rxr->packets++;
1695			rxr->bytes += sendmp->m_pkthdr.len;
1696			/* Set VLAN tag (field only valid in eop desc) */
1697			if (vtag) {
1698				sendmp->m_pkthdr.ether_vtag = vtag;
1699				sendmp->m_flags |= M_VLANTAG;
1700			}
1701			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1702				ixl_rx_checksum(sendmp, status, error, ptype);
1703#ifdef RSS
1704			sendmp->m_pkthdr.flowid =
1705			    le32toh(cur->wb.qword0.hi_dword.rss);
1706			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1707#else
1708			sendmp->m_pkthdr.flowid = que->msix;
1709			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1710#endif
1711		}
1712next_desc:
1713		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1714		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1715
1716		/* Advance our pointers to the next descriptor. */
1717		if (++i == que->num_desc)
1718			i = 0;
1719
1720		/* Now send to the stack or do LRO */
1721		if (sendmp != NULL) {
1722			rxr->next_check = i;
1723			ixl_rx_input(rxr, ifp, sendmp, ptype);
1724			i = rxr->next_check;
1725		}
1726
1727               /* Every 8 descriptors we go to refresh mbufs */
1728		if (processed == 8) {
1729			ixl_refresh_mbufs(que, i);
1730			processed = 0;
1731		}
1732	}
1733
1734	/* Refresh any remaining buf structs */
1735	if (ixl_rx_unrefreshed(que))
1736		ixl_refresh_mbufs(que, i);
1737
1738	rxr->next_check = i;
1739
1740#if defined(INET6) || defined(INET)
1741	/*
1742	 * Flush any outstanding LRO work
1743	 */
1744#if __FreeBSD_version >= 1100105
1745	tcp_lro_flush_all(lro);
1746#else
1747	struct lro_entry *queued;
1748	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1749		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1750		tcp_lro_flush(lro, queued);
1751	}
1752#endif
1753#endif /* defined(INET6) || defined(INET) */
1754
1755	IXL_RX_UNLOCK(rxr);
1756	return (FALSE);
1757}
1758
1759
1760/*********************************************************************
1761 *
1762 *  Verify that the hardware indicated that the checksum is valid.
1763 *  Inform the stack about the status of checksum so that stack
1764 *  doesn't spend time verifying the checksum.
1765 *
1766 *********************************************************************/
1767static void
1768ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1769{
1770	struct i40e_rx_ptype_decoded decoded;
1771
1772	decoded = decode_rx_desc_ptype(ptype);
1773
1774	/* Errors? */
1775 	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1776	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1777		mp->m_pkthdr.csum_flags = 0;
1778		return;
1779	}
1780
1781	/* IPv6 with extension headers likely have bad csum */
1782	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1783	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1784		if (status &
1785		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1786			mp->m_pkthdr.csum_flags = 0;
1787			return;
1788		}
1789
1790
1791	/* IP Checksum Good */
1792	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1793	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1794
1795	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1796		mp->m_pkthdr.csum_flags |=
1797		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1798		mp->m_pkthdr.csum_data |= htons(0xffff);
1799	}
1800	return;
1801}
1802
1803#if __FreeBSD_version >= 1100000
1804uint64_t
1805ixl_get_counter(if_t ifp, ift_counter cnt)
1806{
1807	struct ixl_vsi *vsi;
1808
1809	vsi = if_getsoftc(ifp);
1810
1811	switch (cnt) {
1812	case IFCOUNTER_IPACKETS:
1813		return (vsi->ipackets);
1814	case IFCOUNTER_IERRORS:
1815		return (vsi->ierrors);
1816	case IFCOUNTER_OPACKETS:
1817		return (vsi->opackets);
1818	case IFCOUNTER_OERRORS:
1819		return (vsi->oerrors);
1820	case IFCOUNTER_COLLISIONS:
1821		/* Collisions are by standard impossible in 40G/10G Ethernet */
1822		return (0);
1823	case IFCOUNTER_IBYTES:
1824		return (vsi->ibytes);
1825	case IFCOUNTER_OBYTES:
1826		return (vsi->obytes);
1827	case IFCOUNTER_IMCASTS:
1828		return (vsi->imcasts);
1829	case IFCOUNTER_OMCASTS:
1830		return (vsi->omcasts);
1831	case IFCOUNTER_IQDROPS:
1832		return (vsi->iqdrops);
1833	case IFCOUNTER_OQDROPS:
1834		return (vsi->oqdrops);
1835	case IFCOUNTER_NOPROTO:
1836		return (vsi->noproto);
1837	default:
1838		return (if_get_counter_default(ifp, cnt));
1839	}
1840}
1841#endif
1842
1843