ixl_txrx.c revision 323211
1/******************************************************************************
2
3  Copyright (c) 2013-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/ixl/ixl_txrx.c 323211 2017-09-06 07:08:52Z rlibby $*/
34
35/*
36**	IXL driver TX/RX Routines:
37**	    This was seperated to allow usage by
38** 	    both the PF and VF drivers.
39*/
40
41#ifndef IXL_STANDALONE_BUILD
42#include "opt_inet.h"
43#include "opt_inet6.h"
44#include "opt_rss.h"
45#endif
46
47#include "ixl.h"
48
49#ifdef RSS
50#include <net/rss_config.h>
51#endif
52
53/* Local Prototypes */
54static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55static void	ixl_refresh_mbufs(struct ixl_queue *, int);
56static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57static int	ixl_tx_setup_offload(struct ixl_queue *,
58		    struct mbuf *, u32 *, u32 *);
59static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61static inline void ixl_rx_discard(struct rx_ring *, int);
62static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63		    struct mbuf *, u8);
64
65static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66static inline u32 ixl_get_tx_head(struct ixl_queue *que);
67
68#ifdef DEV_NETMAP
69#include <dev/netmap/if_ixl_netmap.h>
70#endif /* DEV_NETMAP */
71
72/*
73 * @key key is saved into this parameter
74 */
75void
76ixl_get_default_rss_key(u32 *key)
77{
78	MPASS(key != NULL);
79
80	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
81	    0x183cfd8c, 0xce880440, 0x580cbc3c,
82	    0x35897377, 0x328b25e1, 0x4fa98922,
83	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
84	    0x0, 0x0, 0x0};
85
86	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
87}
88
89/*
90** Multiqueue Transmit driver
91*/
92int
93ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
94{
95	struct ixl_vsi		*vsi = ifp->if_softc;
96	struct ixl_queue	*que;
97	struct tx_ring		*txr;
98	int 			err, i;
99#ifdef RSS
100	u32			bucket_id;
101#endif
102
103	/*
104	** Which queue to use:
105	**
106	** When doing RSS, map it to the same outbound
107	** queue as the incoming flow would be mapped to.
108	** If everything is setup correctly, it should be
109	** the same bucket that the current CPU we're on is.
110	*/
111	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
112#ifdef  RSS
113		if (rss_hash2bucket(m->m_pkthdr.flowid,
114		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
115			i = bucket_id % vsi->num_queues;
116                } else
117#endif
118                        i = m->m_pkthdr.flowid % vsi->num_queues;
119        } else
120		i = curcpu % vsi->num_queues;
121
122	que = &vsi->queues[i];
123	txr = &que->txr;
124
125	err = drbr_enqueue(ifp, txr->br, m);
126	if (err)
127		return (err);
128	if (IXL_TX_TRYLOCK(txr)) {
129		ixl_mq_start_locked(ifp, txr);
130		IXL_TX_UNLOCK(txr);
131	} else
132		taskqueue_enqueue(que->tq, &que->tx_task);
133
134	return (0);
135}
136
137int
138ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
139{
140	struct ixl_queue	*que = txr->que;
141	struct ixl_vsi		*vsi = que->vsi;
142        struct mbuf		*next;
143        int			err = 0;
144
145
146	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
147	    vsi->link_active == 0)
148		return (ENETDOWN);
149
150	/* Process the transmit queue */
151	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
152		if ((err = ixl_xmit(que, &next)) != 0) {
153			if (next == NULL)
154				drbr_advance(ifp, txr->br);
155			else
156				drbr_putback(ifp, txr->br, next);
157			break;
158		}
159		drbr_advance(ifp, txr->br);
160		/* Send a copy of the frame to the BPF listener */
161		ETHER_BPF_MTAP(ifp, next);
162		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
163			break;
164	}
165
166	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
167		ixl_txeof(que);
168
169	return (err);
170}
171
172/*
173 * Called from a taskqueue to drain queued transmit packets.
174 */
175void
176ixl_deferred_mq_start(void *arg, int pending)
177{
178	struct ixl_queue	*que = arg;
179        struct tx_ring		*txr = &que->txr;
180	struct ixl_vsi		*vsi = que->vsi;
181        struct ifnet		*ifp = vsi->ifp;
182
183	IXL_TX_LOCK(txr);
184	if (!drbr_empty(ifp, txr->br))
185		ixl_mq_start_locked(ifp, txr);
186	IXL_TX_UNLOCK(txr);
187}
188
189/*
190** Flush all queue ring buffers
191*/
192void
193ixl_qflush(struct ifnet *ifp)
194{
195	struct ixl_vsi	*vsi = ifp->if_softc;
196
197        for (int i = 0; i < vsi->num_queues; i++) {
198		struct ixl_queue *que = &vsi->queues[i];
199		struct tx_ring	*txr = &que->txr;
200		struct mbuf	*m;
201		IXL_TX_LOCK(txr);
202		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
203			m_freem(m);
204		IXL_TX_UNLOCK(txr);
205	}
206	if_qflush(ifp);
207}
208
209/*
210** Find mbuf chains passed to the driver
211** that are 'sparse', using more than 8
212** mbufs to deliver an mss-size chunk of data
213*/
214static inline bool
215ixl_tso_detect_sparse(struct mbuf *mp)
216{
217	struct mbuf	*m;
218	int		num, mss;
219
220	num = 0;
221	mss = mp->m_pkthdr.tso_segsz;
222
223	/* Exclude first mbuf; assume it contains all headers */
224	for (m = mp->m_next; m != NULL; m = m->m_next) {
225		if (m == NULL)
226			break;
227		num++;
228		mss -= m->m_len % mp->m_pkthdr.tso_segsz;
229
230		if (mss < 1) {
231			if (num > IXL_SPARSE_CHAIN)
232				return (true);
233			num = (mss == 0) ? 0 : 1;
234			mss += mp->m_pkthdr.tso_segsz;
235		}
236	}
237
238	return (false);
239}
240
241
242/*********************************************************************
243 *
244 *  This routine maps the mbufs to tx descriptors, allowing the
245 *  TX engine to transmit the packets.
246 *  	- return 0 on success, positive on failure
247 *
248 **********************************************************************/
249#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
250
251static int
252ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
253{
254	struct ixl_vsi		*vsi = que->vsi;
255	struct i40e_hw		*hw = vsi->hw;
256	struct tx_ring		*txr = &que->txr;
257	struct ixl_tx_buf	*buf;
258	struct i40e_tx_desc	*txd = NULL;
259	struct mbuf		*m_head, *m;
260	int             	i, j, error, nsegs;
261	int			first, last = 0;
262	u16			vtag = 0;
263	u32			cmd, off;
264	bus_dmamap_t		map;
265	bus_dma_tag_t		tag;
266	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
267
268	cmd = off = 0;
269	m_head = *m_headp;
270
271        /*
272         * Important to capture the first descriptor
273         * used because it will contain the index of
274         * the one we tell the hardware to report back
275         */
276        first = txr->next_avail;
277	buf = &txr->buffers[first];
278	map = buf->map;
279	tag = txr->tx_tag;
280
281	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
282		/* Use larger mapping for TSO */
283		tag = txr->tso_tag;
284		if (ixl_tso_detect_sparse(m_head)) {
285			m = m_defrag(m_head, M_NOWAIT);
286			if (m == NULL) {
287				m_freem(*m_headp);
288				*m_headp = NULL;
289				return (ENOBUFS);
290			}
291			*m_headp = m;
292		}
293	}
294
295	/*
296	 * Map the packet for DMA.
297	 */
298	error = bus_dmamap_load_mbuf_sg(tag, map,
299	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
300
301	if (error == EFBIG) {
302		struct mbuf *m;
303
304		m = m_defrag(*m_headp, M_NOWAIT);
305		if (m == NULL) {
306			que->mbuf_defrag_failed++;
307			m_freem(*m_headp);
308			*m_headp = NULL;
309			return (ENOBUFS);
310		}
311		*m_headp = m;
312
313		/* Try it again */
314		error = bus_dmamap_load_mbuf_sg(tag, map,
315		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
316
317		if (error != 0) {
318			que->tx_dmamap_failed++;
319			m_freem(*m_headp);
320			*m_headp = NULL;
321			return (error);
322		}
323	} else if (error != 0) {
324		que->tx_dmamap_failed++;
325		m_freem(*m_headp);
326		*m_headp = NULL;
327		return (error);
328	}
329
330	/* Make certain there are enough descriptors */
331	if (nsegs > txr->avail - 2) {
332		txr->no_desc++;
333		error = ENOBUFS;
334		goto xmit_fail;
335	}
336	m_head = *m_headp;
337
338	/* Set up the TSO/CSUM offload */
339	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
340		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
341		if (error)
342			goto xmit_fail;
343	}
344
345	cmd |= I40E_TX_DESC_CMD_ICRC;
346	/* Grab the VLAN tag */
347	if (m_head->m_flags & M_VLANTAG) {
348		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
349		vtag = htole16(m_head->m_pkthdr.ether_vtag);
350	}
351
352	i = txr->next_avail;
353	for (j = 0; j < nsegs; j++) {
354		bus_size_t seglen;
355
356		buf = &txr->buffers[i];
357		buf->tag = tag; /* Keep track of the type tag */
358		txd = &txr->base[i];
359		seglen = segs[j].ds_len;
360
361		txd->buffer_addr = htole64(segs[j].ds_addr);
362		txd->cmd_type_offset_bsz =
363		    htole64(I40E_TX_DESC_DTYPE_DATA
364		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
365		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
366		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
367		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
368
369		last = i; /* descriptor that will get completion IRQ */
370
371		if (++i == que->num_desc)
372			i = 0;
373
374		buf->m_head = NULL;
375		buf->eop_index = -1;
376	}
377	/* Set the last descriptor for report */
378	txd->cmd_type_offset_bsz |=
379	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
380	txr->avail -= nsegs;
381	txr->next_avail = i;
382
383	buf->m_head = m_head;
384	/* Swap the dma map between the first and last descriptor */
385	txr->buffers[first].map = buf->map;
386	buf->map = map;
387	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
388
389        /* Set the index of the descriptor that will be marked done */
390        buf = &txr->buffers[first];
391	buf->eop_index = last;
392
393        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
394            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
395	/*
396	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
397	 * hardware that this frame is available to transmit.
398	 */
399	++txr->total_packets;
400	wr32(hw, txr->tail, i);
401
402	/* Mark outstanding work */
403	atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
404	return (0);
405
406xmit_fail:
407	bus_dmamap_unload(tag, buf->map);
408	return (error);
409}
410
411
412/*********************************************************************
413 *
414 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
415 *  the information needed to transmit a packet on the wire. This is
416 *  called only once at attach, setup is done every reset.
417 *
418 **********************************************************************/
419int
420ixl_allocate_tx_data(struct ixl_queue *que)
421{
422	struct tx_ring		*txr = &que->txr;
423	struct ixl_vsi		*vsi = que->vsi;
424	device_t		dev = vsi->dev;
425	struct ixl_tx_buf	*buf;
426	int			error = 0;
427
428	/*
429	 * Setup DMA descriptor areas.
430	 */
431	if ((error = bus_dma_tag_create(NULL,		/* parent */
432			       1, 0,			/* alignment, bounds */
433			       BUS_SPACE_MAXADDR,	/* lowaddr */
434			       BUS_SPACE_MAXADDR,	/* highaddr */
435			       NULL, NULL,		/* filter, filterarg */
436			       IXL_TSO_SIZE,		/* maxsize */
437			       IXL_MAX_TX_SEGS,		/* nsegments */
438			       PAGE_SIZE,		/* maxsegsize */
439			       0,			/* flags */
440			       NULL,			/* lockfunc */
441			       NULL,			/* lockfuncarg */
442			       &txr->tx_tag))) {
443		device_printf(dev,"Unable to allocate TX DMA tag\n");
444		goto fail;
445	}
446
447	/* Make a special tag for TSO */
448	if ((error = bus_dma_tag_create(NULL,		/* parent */
449			       1, 0,			/* alignment, bounds */
450			       BUS_SPACE_MAXADDR,	/* lowaddr */
451			       BUS_SPACE_MAXADDR,	/* highaddr */
452			       NULL, NULL,		/* filter, filterarg */
453			       IXL_TSO_SIZE,		/* maxsize */
454			       IXL_MAX_TSO_SEGS,	/* nsegments */
455			       PAGE_SIZE,		/* maxsegsize */
456			       0,			/* flags */
457			       NULL,			/* lockfunc */
458			       NULL,			/* lockfuncarg */
459			       &txr->tso_tag))) {
460		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
461		goto fail;
462	}
463
464	if (!(txr->buffers =
465	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
466	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
467		device_printf(dev, "Unable to allocate tx_buffer memory\n");
468		error = ENOMEM;
469		goto fail;
470	}
471
472        /* Create the descriptor buffer default dma maps */
473	buf = txr->buffers;
474	for (int i = 0; i < que->num_desc; i++, buf++) {
475		buf->tag = txr->tx_tag;
476		error = bus_dmamap_create(buf->tag, 0, &buf->map);
477		if (error != 0) {
478			device_printf(dev, "Unable to create TX DMA map\n");
479			goto fail;
480		}
481	}
482fail:
483	return (error);
484}
485
486
487/*********************************************************************
488 *
489 *  (Re)Initialize a queue transmit ring.
490 *	- called by init, it clears the descriptor ring,
491 *	  and frees any stale mbufs
492 *
493 **********************************************************************/
494void
495ixl_init_tx_ring(struct ixl_queue *que)
496{
497#ifdef DEV_NETMAP
498	struct netmap_adapter *na = NA(que->vsi->ifp);
499	struct netmap_slot *slot;
500#endif /* DEV_NETMAP */
501	struct tx_ring		*txr = &que->txr;
502	struct ixl_tx_buf	*buf;
503
504	/* Clear the old ring contents */
505	IXL_TX_LOCK(txr);
506
507#ifdef DEV_NETMAP
508	/*
509	 * (under lock): if in netmap mode, do some consistency
510	 * checks and set slot to entry 0 of the netmap ring.
511	 */
512	slot = netmap_reset(na, NR_TX, que->me, 0);
513#endif /* DEV_NETMAP */
514
515	bzero((void *)txr->base,
516	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
517
518	/* Reset indices */
519	txr->next_avail = 0;
520	txr->next_to_clean = 0;
521
522	/* Reset watchdog status */
523	txr->watchdog_timer = 0;
524
525#ifdef IXL_FDIR
526	/* Initialize flow director */
527	txr->atr_rate = ixl_atr_rate;
528	txr->atr_count = 0;
529#endif
530	/* Free any existing tx mbufs. */
531        buf = txr->buffers;
532	for (int i = 0; i < que->num_desc; i++, buf++) {
533		if (buf->m_head != NULL) {
534			bus_dmamap_sync(buf->tag, buf->map,
535			    BUS_DMASYNC_POSTWRITE);
536			bus_dmamap_unload(buf->tag, buf->map);
537			m_freem(buf->m_head);
538			buf->m_head = NULL;
539		}
540#ifdef DEV_NETMAP
541		/*
542		 * In netmap mode, set the map for the packet buffer.
543		 * NOTE: Some drivers (not this one) also need to set
544		 * the physical buffer address in the NIC ring.
545		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
546		 * netmap slot index, si
547		 */
548		if (slot) {
549			int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
550			netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
551		}
552#endif /* DEV_NETMAP */
553		/* Clear the EOP index */
554		buf->eop_index = -1;
555        }
556
557	/* Set number of descriptors available */
558	txr->avail = que->num_desc;
559
560	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
561	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
562	IXL_TX_UNLOCK(txr);
563}
564
565
566/*********************************************************************
567 *
568 *  Free transmit ring related data structures.
569 *
570 **********************************************************************/
571void
572ixl_free_que_tx(struct ixl_queue *que)
573{
574	struct tx_ring *txr = &que->txr;
575	struct ixl_tx_buf *buf;
576
577	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
578
579	for (int i = 0; i < que->num_desc; i++) {
580		buf = &txr->buffers[i];
581		if (buf->m_head != NULL) {
582			bus_dmamap_sync(buf->tag, buf->map,
583			    BUS_DMASYNC_POSTWRITE);
584			bus_dmamap_unload(buf->tag,
585			    buf->map);
586			m_freem(buf->m_head);
587			buf->m_head = NULL;
588			if (buf->map != NULL) {
589				bus_dmamap_destroy(buf->tag,
590				    buf->map);
591				buf->map = NULL;
592			}
593		} else if (buf->map != NULL) {
594			bus_dmamap_unload(buf->tag,
595			    buf->map);
596			bus_dmamap_destroy(buf->tag,
597			    buf->map);
598			buf->map = NULL;
599		}
600	}
601	if (txr->br != NULL)
602		buf_ring_free(txr->br, M_DEVBUF);
603	if (txr->buffers != NULL) {
604		free(txr->buffers, M_DEVBUF);
605		txr->buffers = NULL;
606	}
607	if (txr->tx_tag != NULL) {
608		bus_dma_tag_destroy(txr->tx_tag);
609		txr->tx_tag = NULL;
610	}
611	if (txr->tso_tag != NULL) {
612		bus_dma_tag_destroy(txr->tso_tag);
613		txr->tso_tag = NULL;
614	}
615
616	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
617	return;
618}
619
620/*********************************************************************
621 *
622 *  Setup descriptor for hw offloads
623 *
624 **********************************************************************/
625
626static int
627ixl_tx_setup_offload(struct ixl_queue *que,
628    struct mbuf *mp, u32 *cmd, u32 *off)
629{
630	struct ether_vlan_header	*eh;
631#ifdef INET
632	struct ip			*ip = NULL;
633#endif
634	struct tcphdr			*th = NULL;
635#ifdef INET6
636	struct ip6_hdr			*ip6;
637#endif
638	int				elen, ip_hlen = 0, tcp_hlen;
639	u16				etype;
640	u8				ipproto = 0;
641	bool				tso = FALSE;
642
643	/* Set up the TSO context descriptor if required */
644	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
645		tso = ixl_tso_setup(que, mp);
646		if (tso)
647			++que->tso;
648		else
649			return (ENXIO);
650	}
651
652	/*
653	 * Determine where frame payload starts.
654	 * Jump over vlan headers if already present,
655	 * helpful for QinQ too.
656	 */
657	eh = mtod(mp, struct ether_vlan_header *);
658	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
659		etype = ntohs(eh->evl_proto);
660		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
661	} else {
662		etype = ntohs(eh->evl_encap_proto);
663		elen = ETHER_HDR_LEN;
664	}
665
666	switch (etype) {
667#ifdef INET
668		case ETHERTYPE_IP:
669			ip = (struct ip *)(mp->m_data + elen);
670			ip_hlen = ip->ip_hl << 2;
671			ipproto = ip->ip_p;
672			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
673			/* The IP checksum must be recalculated with TSO */
674			if (tso)
675				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
676			else
677				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
678			break;
679#endif
680#ifdef INET6
681		case ETHERTYPE_IPV6:
682			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
683			ip_hlen = sizeof(struct ip6_hdr);
684			ipproto = ip6->ip6_nxt;
685			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
686			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
687			break;
688#endif
689		default:
690			break;
691	}
692
693	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
694	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
695
696	switch (ipproto) {
697		case IPPROTO_TCP:
698			tcp_hlen = th->th_off << 2;
699			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
700				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
701				*off |= (tcp_hlen >> 2) <<
702				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
703			}
704#ifdef IXL_FDIR
705			ixl_atr(que, th, etype);
706#endif
707			break;
708		case IPPROTO_UDP:
709			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
710				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
711				*off |= (sizeof(struct udphdr) >> 2) <<
712				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
713			}
714			break;
715
716		case IPPROTO_SCTP:
717			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
718				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
719				*off |= (sizeof(struct sctphdr) >> 2) <<
720				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
721			}
722			/* Fall Thru */
723		default:
724			break;
725	}
726
727        return (0);
728}
729
730
731/**********************************************************************
732 *
733 *  Setup context for hardware segmentation offload (TSO)
734 *
735 **********************************************************************/
736static bool
737ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
738{
739	struct tx_ring			*txr = &que->txr;
740	struct i40e_tx_context_desc	*TXD;
741	struct ixl_tx_buf		*buf;
742	u32				cmd, mss, type, tsolen;
743	u16				etype;
744	int				idx, elen, ip_hlen, tcp_hlen;
745	struct ether_vlan_header	*eh;
746#ifdef INET
747	struct ip			*ip;
748#endif
749#ifdef INET6
750	struct ip6_hdr			*ip6;
751#endif
752#if defined(INET6) || defined(INET)
753	struct tcphdr			*th;
754#endif
755	u64				type_cmd_tso_mss;
756
757	/*
758	 * Determine where frame payload starts.
759	 * Jump over vlan headers if already present
760	 */
761	eh = mtod(mp, struct ether_vlan_header *);
762	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
763		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
764		etype = eh->evl_proto;
765	} else {
766		elen = ETHER_HDR_LEN;
767		etype = eh->evl_encap_proto;
768	}
769
770        switch (ntohs(etype)) {
771#ifdef INET6
772	case ETHERTYPE_IPV6:
773		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
774		if (ip6->ip6_nxt != IPPROTO_TCP)
775			return (ENXIO);
776		ip_hlen = sizeof(struct ip6_hdr);
777		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
778		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
779		tcp_hlen = th->th_off << 2;
780		/*
781		 * The corresponding flag is set by the stack in the IPv4
782		 * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
783		 * So, set it here because the rest of the flow requires it.
784		 */
785		mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
786		break;
787#endif
788#ifdef INET
789	case ETHERTYPE_IP:
790		ip = (struct ip *)(mp->m_data + elen);
791		if (ip->ip_p != IPPROTO_TCP)
792			return (ENXIO);
793		ip->ip_sum = 0;
794		ip_hlen = ip->ip_hl << 2;
795		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
796		th->th_sum = in_pseudo(ip->ip_src.s_addr,
797		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
798		tcp_hlen = th->th_off << 2;
799		break;
800#endif
801	default:
802		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
803		    __func__, ntohs(etype));
804		return FALSE;
805        }
806
807        /* Ensure we have at least the IP+TCP header in the first mbuf. */
808        if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
809		return FALSE;
810
811	idx = txr->next_avail;
812	buf = &txr->buffers[idx];
813	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
814	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
815
816	type = I40E_TX_DESC_DTYPE_CONTEXT;
817	cmd = I40E_TX_CTX_DESC_TSO;
818	/* TSO MSS must not be less than 64 */
819	if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
820		que->mss_too_small++;
821		mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
822	}
823	mss = mp->m_pkthdr.tso_segsz;
824
825	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
826	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
827	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
828	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
829	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
830
831	TXD->tunneling_params = htole32(0);
832	buf->m_head = NULL;
833	buf->eop_index = -1;
834
835	if (++idx == que->num_desc)
836		idx = 0;
837
838	txr->avail--;
839	txr->next_avail = idx;
840
841	return TRUE;
842}
843
844/*
845** ixl_get_tx_head - Retrieve the value from the
846**    location the HW records its HEAD index
847*/
848static inline u32
849ixl_get_tx_head(struct ixl_queue *que)
850{
851	struct tx_ring  *txr = &que->txr;
852	void *head = &txr->base[que->num_desc];
853	return LE32_TO_CPU(*(volatile __le32 *)head);
854}
855
856/**********************************************************************
857 *
858 *  Examine each tx_buffer in the used queue. If the hardware is done
859 *  processing the packet then free associated resources. The
860 *  tx_buffer is put back on the free queue.
861 *
862 **********************************************************************/
863bool
864ixl_txeof(struct ixl_queue *que)
865{
866	struct tx_ring		*txr = &que->txr;
867	u32			first, last, head, done, processed;
868	struct ixl_tx_buf	*buf;
869	struct i40e_tx_desc	*tx_desc, *eop_desc;
870
871
872	mtx_assert(&txr->mtx, MA_OWNED);
873
874#ifdef DEV_NETMAP
875	// XXX todo: implement moderation
876	if (netmap_tx_irq(que->vsi->ifp, que->me))
877		return FALSE;
878#endif /* DEF_NETMAP */
879
880	/* These are not the descriptors you seek, move along :) */
881	if (txr->avail == que->num_desc) {
882		atomic_store_rel_32(&txr->watchdog_timer, 0);
883		return FALSE;
884	}
885
886	processed = 0;
887	first = txr->next_to_clean;
888	buf = &txr->buffers[first];
889	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
890	last = buf->eop_index;
891	if (last == -1)
892		return FALSE;
893	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
894
895	/* Get the Head WB value */
896	head = ixl_get_tx_head(que);
897
898	/*
899	** Get the index of the first descriptor
900	** BEYOND the EOP and call that 'done'.
901	** I do this so the comparison in the
902	** inner while loop below can be simple
903	*/
904	if (++last == que->num_desc) last = 0;
905	done = last;
906
907        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
908            BUS_DMASYNC_POSTREAD);
909	/*
910	** The HEAD index of the ring is written in a
911	** defined location, this rather than a done bit
912	** is what is used to keep track of what must be
913	** 'cleaned'.
914	*/
915	while (first != head) {
916		/* We clean the range of the packet */
917		while (first != done) {
918			++txr->avail;
919			++processed;
920
921			if (buf->m_head) {
922				txr->bytes += /* for ITR adjustment */
923				    buf->m_head->m_pkthdr.len;
924				txr->tx_bytes += /* for TX stats */
925				    buf->m_head->m_pkthdr.len;
926				bus_dmamap_sync(buf->tag,
927				    buf->map,
928				    BUS_DMASYNC_POSTWRITE);
929				bus_dmamap_unload(buf->tag,
930				    buf->map);
931				m_freem(buf->m_head);
932				buf->m_head = NULL;
933				buf->map = NULL;
934			}
935			buf->eop_index = -1;
936
937			if (++first == que->num_desc)
938				first = 0;
939
940			buf = &txr->buffers[first];
941			tx_desc = &txr->base[first];
942		}
943		++txr->packets;
944		/* See if there is more work now */
945		last = buf->eop_index;
946		if (last != -1) {
947			eop_desc = &txr->base[last];
948			/* Get next done point */
949			if (++last == que->num_desc) last = 0;
950			done = last;
951		} else
952			break;
953	}
954	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
955	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
956
957	txr->next_to_clean = first;
958
959
960	/*
961	 * If there are no pending descriptors, clear the timeout.
962	 */
963	if (txr->avail == que->num_desc) {
964		atomic_store_rel_32(&txr->watchdog_timer, 0);
965		return FALSE;
966	}
967
968	return TRUE;
969}
970
971/*********************************************************************
972 *
973 *  Refresh mbuf buffers for RX descriptor rings
974 *   - now keeps its own state so discards due to resource
975 *     exhaustion are unnecessary, if an mbuf cannot be obtained
976 *     it just returns, keeping its placeholder, thus it can simply
977 *     be recalled to try again.
978 *
979 **********************************************************************/
980static void
981ixl_refresh_mbufs(struct ixl_queue *que, int limit)
982{
983	struct ixl_vsi		*vsi = que->vsi;
984	struct rx_ring		*rxr = &que->rxr;
985	bus_dma_segment_t	hseg[1];
986	bus_dma_segment_t	pseg[1];
987	struct ixl_rx_buf	*buf;
988	struct mbuf		*mh, *mp;
989	int			i, j, nsegs, error;
990	bool			refreshed = FALSE;
991
992	i = j = rxr->next_refresh;
993	/* Control the loop with one beyond */
994	if (++j == que->num_desc)
995		j = 0;
996
997	while (j != limit) {
998		buf = &rxr->buffers[i];
999		if (rxr->hdr_split == FALSE)
1000			goto no_split;
1001
1002		if (buf->m_head == NULL) {
1003			mh = m_gethdr(M_NOWAIT, MT_DATA);
1004			if (mh == NULL)
1005				goto update;
1006		} else
1007			mh = buf->m_head;
1008
1009		mh->m_pkthdr.len = mh->m_len = MHLEN;
1010		mh->m_len = MHLEN;
1011		mh->m_flags |= M_PKTHDR;
1012		/* Get the memory mapping */
1013		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1014		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1015		if (error != 0) {
1016			printf("Refresh mbufs: hdr dmamap load"
1017			    " failure - %d\n", error);
1018			m_free(mh);
1019			buf->m_head = NULL;
1020			goto update;
1021		}
1022		buf->m_head = mh;
1023		bus_dmamap_sync(rxr->htag, buf->hmap,
1024		    BUS_DMASYNC_PREREAD);
1025		rxr->base[i].read.hdr_addr =
1026		   htole64(hseg[0].ds_addr);
1027
1028no_split:
1029		if (buf->m_pack == NULL) {
1030			mp = m_getjcl(M_NOWAIT, MT_DATA,
1031			    M_PKTHDR, rxr->mbuf_sz);
1032			if (mp == NULL)
1033				goto update;
1034		} else
1035			mp = buf->m_pack;
1036
1037		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1038		/* Get the memory mapping */
1039		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1040		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1041		if (error != 0) {
1042			printf("Refresh mbufs: payload dmamap load"
1043			    " failure - %d\n", error);
1044			m_free(mp);
1045			buf->m_pack = NULL;
1046			goto update;
1047		}
1048		buf->m_pack = mp;
1049		bus_dmamap_sync(rxr->ptag, buf->pmap,
1050		    BUS_DMASYNC_PREREAD);
1051		rxr->base[i].read.pkt_addr =
1052		   htole64(pseg[0].ds_addr);
1053		/* Used only when doing header split */
1054		rxr->base[i].read.hdr_addr = 0;
1055
1056		refreshed = TRUE;
1057		/* Next is precalculated */
1058		i = j;
1059		rxr->next_refresh = i;
1060		if (++j == que->num_desc)
1061			j = 0;
1062	}
1063update:
1064	if (refreshed) /* Update hardware tail index */
1065		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1066	return;
1067}
1068
1069
1070/*********************************************************************
1071 *
1072 *  Allocate memory for rx_buffer structures. Since we use one
1073 *  rx_buffer per descriptor, the maximum number of rx_buffer's
1074 *  that we'll need is equal to the number of receive descriptors
1075 *  that we've defined.
1076 *
1077 **********************************************************************/
1078int
1079ixl_allocate_rx_data(struct ixl_queue *que)
1080{
1081	struct rx_ring		*rxr = &que->rxr;
1082	struct ixl_vsi		*vsi = que->vsi;
1083	device_t 		dev = vsi->dev;
1084	struct ixl_rx_buf 	*buf;
1085	int             	i, bsize, error;
1086
1087	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1088	if (!(rxr->buffers =
1089	    (struct ixl_rx_buf *) malloc(bsize,
1090	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1091		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1092		error = ENOMEM;
1093		return (error);
1094	}
1095
1096	if ((error = bus_dma_tag_create(NULL,	/* parent */
1097				   1, 0,	/* alignment, bounds */
1098				   BUS_SPACE_MAXADDR,	/* lowaddr */
1099				   BUS_SPACE_MAXADDR,	/* highaddr */
1100				   NULL, NULL,		/* filter, filterarg */
1101				   MSIZE,		/* maxsize */
1102				   1,			/* nsegments */
1103				   MSIZE,		/* maxsegsize */
1104				   0,			/* flags */
1105				   NULL,		/* lockfunc */
1106				   NULL,		/* lockfuncarg */
1107				   &rxr->htag))) {
1108		device_printf(dev, "Unable to create RX DMA htag\n");
1109		return (error);
1110	}
1111
1112	if ((error = bus_dma_tag_create(NULL,	/* parent */
1113				   1, 0,	/* alignment, bounds */
1114				   BUS_SPACE_MAXADDR,	/* lowaddr */
1115				   BUS_SPACE_MAXADDR,	/* highaddr */
1116				   NULL, NULL,		/* filter, filterarg */
1117				   MJUM16BYTES,		/* maxsize */
1118				   1,			/* nsegments */
1119				   MJUM16BYTES,		/* maxsegsize */
1120				   0,			/* flags */
1121				   NULL,		/* lockfunc */
1122				   NULL,		/* lockfuncarg */
1123				   &rxr->ptag))) {
1124		device_printf(dev, "Unable to create RX DMA ptag\n");
1125		return (error);
1126	}
1127
1128	for (i = 0; i < que->num_desc; i++) {
1129		buf = &rxr->buffers[i];
1130		error = bus_dmamap_create(rxr->htag,
1131		    BUS_DMA_NOWAIT, &buf->hmap);
1132		if (error) {
1133			device_printf(dev, "Unable to create RX head map\n");
1134			break;
1135		}
1136		error = bus_dmamap_create(rxr->ptag,
1137		    BUS_DMA_NOWAIT, &buf->pmap);
1138		if (error) {
1139			device_printf(dev, "Unable to create RX pkt map\n");
1140			break;
1141		}
1142	}
1143
1144	return (error);
1145}
1146
1147
1148/*********************************************************************
1149 *
1150 *  (Re)Initialize the queue receive ring and its buffers.
1151 *
1152 **********************************************************************/
1153int
1154ixl_init_rx_ring(struct ixl_queue *que)
1155{
1156	struct	rx_ring 	*rxr = &que->rxr;
1157	struct ixl_vsi		*vsi = que->vsi;
1158#if defined(INET6) || defined(INET)
1159	struct ifnet		*ifp = vsi->ifp;
1160	struct lro_ctrl		*lro = &rxr->lro;
1161#endif
1162	struct ixl_rx_buf	*buf;
1163	bus_dma_segment_t	pseg[1], hseg[1];
1164	int			rsize, nsegs, error = 0;
1165#ifdef DEV_NETMAP
1166	struct netmap_adapter *na = NA(que->vsi->ifp);
1167	struct netmap_slot *slot;
1168#endif /* DEV_NETMAP */
1169
1170	IXL_RX_LOCK(rxr);
1171#ifdef DEV_NETMAP
1172	/* same as in ixl_init_tx_ring() */
1173	slot = netmap_reset(na, NR_RX, que->me, 0);
1174#endif /* DEV_NETMAP */
1175	/* Clear the ring contents */
1176	rsize = roundup2(que->num_desc *
1177	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1178	bzero((void *)rxr->base, rsize);
1179	/* Cleanup any existing buffers */
1180	for (int i = 0; i < que->num_desc; i++) {
1181		buf = &rxr->buffers[i];
1182		if (buf->m_head != NULL) {
1183			bus_dmamap_sync(rxr->htag, buf->hmap,
1184			    BUS_DMASYNC_POSTREAD);
1185			bus_dmamap_unload(rxr->htag, buf->hmap);
1186			buf->m_head->m_flags |= M_PKTHDR;
1187			m_freem(buf->m_head);
1188		}
1189		if (buf->m_pack != NULL) {
1190			bus_dmamap_sync(rxr->ptag, buf->pmap,
1191			    BUS_DMASYNC_POSTREAD);
1192			bus_dmamap_unload(rxr->ptag, buf->pmap);
1193			buf->m_pack->m_flags |= M_PKTHDR;
1194			m_freem(buf->m_pack);
1195		}
1196		buf->m_head = NULL;
1197		buf->m_pack = NULL;
1198	}
1199
1200	/* header split is off */
1201	rxr->hdr_split = FALSE;
1202
1203	/* Now replenish the mbufs */
1204	for (int j = 0; j != que->num_desc; ++j) {
1205		struct mbuf	*mh, *mp;
1206
1207		buf = &rxr->buffers[j];
1208#ifdef DEV_NETMAP
1209		/*
1210		 * In netmap mode, fill the map and set the buffer
1211		 * address in the NIC ring, considering the offset
1212		 * between the netmap and NIC rings (see comment in
1213		 * ixgbe_setup_transmit_ring() ). No need to allocate
1214		 * an mbuf, so end the block with a continue;
1215		 */
1216		if (slot) {
1217			int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1218			uint64_t paddr;
1219			void *addr;
1220
1221			addr = PNMB(na, slot + sj, &paddr);
1222			netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1223			/* Update descriptor and the cached value */
1224			rxr->base[j].read.pkt_addr = htole64(paddr);
1225			rxr->base[j].read.hdr_addr = 0;
1226			continue;
1227		}
1228#endif /* DEV_NETMAP */
1229		/*
1230		** Don't allocate mbufs if not
1231		** doing header split, its wasteful
1232		*/
1233		if (rxr->hdr_split == FALSE)
1234			goto skip_head;
1235
1236		/* First the header */
1237		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1238		if (buf->m_head == NULL) {
1239			error = ENOBUFS;
1240			goto fail;
1241		}
1242		m_adj(buf->m_head, ETHER_ALIGN);
1243		mh = buf->m_head;
1244		mh->m_len = mh->m_pkthdr.len = MHLEN;
1245		mh->m_flags |= M_PKTHDR;
1246		/* Get the memory mapping */
1247		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1248		    buf->hmap, buf->m_head, hseg,
1249		    &nsegs, BUS_DMA_NOWAIT);
1250		if (error != 0) /* Nothing elegant to do here */
1251			goto fail;
1252		bus_dmamap_sync(rxr->htag,
1253		    buf->hmap, BUS_DMASYNC_PREREAD);
1254		/* Update descriptor */
1255		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1256
1257skip_head:
1258		/* Now the payload cluster */
1259		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1260		    M_PKTHDR, rxr->mbuf_sz);
1261		if (buf->m_pack == NULL) {
1262			error = ENOBUFS;
1263                        goto fail;
1264		}
1265		mp = buf->m_pack;
1266		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1267		/* Get the memory mapping */
1268		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1269		    buf->pmap, mp, pseg,
1270		    &nsegs, BUS_DMA_NOWAIT);
1271		if (error != 0)
1272                        goto fail;
1273		bus_dmamap_sync(rxr->ptag,
1274		    buf->pmap, BUS_DMASYNC_PREREAD);
1275		/* Update descriptor */
1276		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1277		rxr->base[j].read.hdr_addr = 0;
1278	}
1279
1280
1281	/* Setup our descriptor indices */
1282	rxr->next_check = 0;
1283	rxr->next_refresh = 0;
1284	rxr->lro_enabled = FALSE;
1285	rxr->split = 0;
1286	rxr->bytes = 0;
1287	rxr->discard = FALSE;
1288
1289	wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1290	ixl_flush(vsi->hw);
1291
1292#if defined(INET6) || defined(INET)
1293	/*
1294	** Now set up the LRO interface:
1295	*/
1296	if (ifp->if_capenable & IFCAP_LRO) {
1297		int err = tcp_lro_init(lro);
1298		if (err) {
1299			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1300			goto fail;
1301		}
1302		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1303		rxr->lro_enabled = TRUE;
1304		lro->ifp = vsi->ifp;
1305	}
1306#endif
1307
1308	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1309	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1310
1311fail:
1312	IXL_RX_UNLOCK(rxr);
1313	return (error);
1314}
1315
1316
1317/*********************************************************************
1318 *
1319 *  Free station receive ring data structures
1320 *
1321 **********************************************************************/
1322void
1323ixl_free_que_rx(struct ixl_queue *que)
1324{
1325	struct rx_ring		*rxr = &que->rxr;
1326	struct ixl_rx_buf	*buf;
1327
1328	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1329
1330	/* Cleanup any existing buffers */
1331	if (rxr->buffers != NULL) {
1332		for (int i = 0; i < que->num_desc; i++) {
1333			buf = &rxr->buffers[i];
1334			if (buf->m_head != NULL) {
1335				bus_dmamap_sync(rxr->htag, buf->hmap,
1336				    BUS_DMASYNC_POSTREAD);
1337				bus_dmamap_unload(rxr->htag, buf->hmap);
1338				buf->m_head->m_flags |= M_PKTHDR;
1339				m_freem(buf->m_head);
1340			}
1341			if (buf->m_pack != NULL) {
1342				bus_dmamap_sync(rxr->ptag, buf->pmap,
1343				    BUS_DMASYNC_POSTREAD);
1344				bus_dmamap_unload(rxr->ptag, buf->pmap);
1345				buf->m_pack->m_flags |= M_PKTHDR;
1346				m_freem(buf->m_pack);
1347			}
1348			buf->m_head = NULL;
1349			buf->m_pack = NULL;
1350			if (buf->hmap != NULL) {
1351				bus_dmamap_destroy(rxr->htag, buf->hmap);
1352				buf->hmap = NULL;
1353			}
1354			if (buf->pmap != NULL) {
1355				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1356				buf->pmap = NULL;
1357			}
1358		}
1359		if (rxr->buffers != NULL) {
1360			free(rxr->buffers, M_DEVBUF);
1361			rxr->buffers = NULL;
1362		}
1363	}
1364
1365	if (rxr->htag != NULL) {
1366		bus_dma_tag_destroy(rxr->htag);
1367		rxr->htag = NULL;
1368	}
1369	if (rxr->ptag != NULL) {
1370		bus_dma_tag_destroy(rxr->ptag);
1371		rxr->ptag = NULL;
1372	}
1373
1374	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1375	return;
1376}
1377
1378static inline void
1379ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1380{
1381
1382#if defined(INET6) || defined(INET)
1383        /*
1384         * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1385         * should be computed by hardware. Also it should not have VLAN tag in
1386         * ethernet header.
1387         */
1388        if (rxr->lro_enabled &&
1389            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1390            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1391            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1392                /*
1393                 * Send to the stack if:
1394                 **  - LRO not enabled, or
1395                 **  - no LRO resources, or
1396                 **  - lro enqueue fails
1397                 */
1398                if (rxr->lro.lro_cnt != 0)
1399                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1400                                return;
1401        }
1402#endif
1403	IXL_RX_UNLOCK(rxr);
1404        (*ifp->if_input)(ifp, m);
1405	IXL_RX_LOCK(rxr);
1406}
1407
1408
1409static inline void
1410ixl_rx_discard(struct rx_ring *rxr, int i)
1411{
1412	struct ixl_rx_buf	*rbuf;
1413
1414	rbuf = &rxr->buffers[i];
1415
1416        if (rbuf->fmp != NULL) {/* Partial chain ? */
1417		rbuf->fmp->m_flags |= M_PKTHDR;
1418                m_freem(rbuf->fmp);
1419                rbuf->fmp = NULL;
1420	}
1421
1422	/*
1423	** With advanced descriptors the writeback
1424	** clobbers the buffer addrs, so its easier
1425	** to just free the existing mbufs and take
1426	** the normal refresh path to get new buffers
1427	** and mapping.
1428	*/
1429	if (rbuf->m_head) {
1430		m_free(rbuf->m_head);
1431		rbuf->m_head = NULL;
1432	}
1433
1434	if (rbuf->m_pack) {
1435		m_free(rbuf->m_pack);
1436		rbuf->m_pack = NULL;
1437	}
1438
1439	return;
1440}
1441
1442#ifdef RSS
1443/*
1444** i40e_ptype_to_hash: parse the packet type
1445** to determine the appropriate hash.
1446*/
1447static inline int
1448ixl_ptype_to_hash(u8 ptype)
1449{
1450        struct i40e_rx_ptype_decoded	decoded;
1451	u8				ex = 0;
1452
1453	decoded = decode_rx_desc_ptype(ptype);
1454	ex = decoded.outer_frag;
1455
1456	if (!decoded.known)
1457		return M_HASHTYPE_OPAQUE_HASH;
1458
1459	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
1460		return M_HASHTYPE_OPAQUE_HASH;
1461
1462	/* Note: anything that gets to this point is IP */
1463        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
1464		switch (decoded.inner_prot) {
1465			case I40E_RX_PTYPE_INNER_PROT_TCP:
1466				if (ex)
1467					return M_HASHTYPE_RSS_TCP_IPV6_EX;
1468				else
1469					return M_HASHTYPE_RSS_TCP_IPV6;
1470			case I40E_RX_PTYPE_INNER_PROT_UDP:
1471				if (ex)
1472					return M_HASHTYPE_RSS_UDP_IPV6_EX;
1473				else
1474					return M_HASHTYPE_RSS_UDP_IPV6;
1475			default:
1476				if (ex)
1477					return M_HASHTYPE_RSS_IPV6_EX;
1478				else
1479					return M_HASHTYPE_RSS_IPV6;
1480		}
1481	}
1482        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
1483		switch (decoded.inner_prot) {
1484			case I40E_RX_PTYPE_INNER_PROT_TCP:
1485					return M_HASHTYPE_RSS_TCP_IPV4;
1486			case I40E_RX_PTYPE_INNER_PROT_UDP:
1487				if (ex)
1488					return M_HASHTYPE_RSS_UDP_IPV4_EX;
1489				else
1490					return M_HASHTYPE_RSS_UDP_IPV4;
1491			default:
1492					return M_HASHTYPE_RSS_IPV4;
1493		}
1494	}
1495	/* We should never get here!! */
1496	return M_HASHTYPE_OPAQUE_HASH;
1497}
1498#endif /* RSS */
1499
1500/*********************************************************************
1501 *
1502 *  This routine executes in interrupt context. It replenishes
1503 *  the mbufs in the descriptor and sends data which has been
1504 *  dma'ed into host memory to upper layer.
1505 *
1506 *  We loop at most count times if count is > 0, or until done if
1507 *  count < 0.
1508 *
1509 *  Return TRUE for more work, FALSE for all clean.
1510 *********************************************************************/
1511bool
1512ixl_rxeof(struct ixl_queue *que, int count)
1513{
1514	struct ixl_vsi		*vsi = que->vsi;
1515	struct rx_ring		*rxr = &que->rxr;
1516	struct ifnet		*ifp = vsi->ifp;
1517#if defined(INET6) || defined(INET)
1518	struct lro_ctrl		*lro = &rxr->lro;
1519#endif
1520	int			i, nextp, processed = 0;
1521	union i40e_rx_desc	*cur;
1522	struct ixl_rx_buf	*rbuf, *nbuf;
1523
1524
1525	IXL_RX_LOCK(rxr);
1526
1527#ifdef DEV_NETMAP
1528	if (netmap_rx_irq(ifp, que->me, &count)) {
1529		IXL_RX_UNLOCK(rxr);
1530		return (FALSE);
1531	}
1532#endif /* DEV_NETMAP */
1533
1534	for (i = rxr->next_check; count != 0;) {
1535		struct mbuf	*sendmp, *mh, *mp;
1536		u32		status, error;
1537		u16		hlen, plen, vtag;
1538		u64		qword;
1539		u8		ptype;
1540		bool		eop;
1541
1542		/* Sync the ring. */
1543		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1544		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1545
1546		cur = &rxr->base[i];
1547		qword = le64toh(cur->wb.qword1.status_error_len);
1548		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1549		    >> I40E_RXD_QW1_STATUS_SHIFT;
1550		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1551		    >> I40E_RXD_QW1_ERROR_SHIFT;
1552		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1553		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1554		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1555		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1556		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1557		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1558
1559		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1560			++rxr->not_done;
1561			break;
1562		}
1563		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1564			break;
1565
1566		count--;
1567		sendmp = NULL;
1568		nbuf = NULL;
1569		cur->wb.qword1.status_error_len = 0;
1570		rbuf = &rxr->buffers[i];
1571		mh = rbuf->m_head;
1572		mp = rbuf->m_pack;
1573		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1574		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1575			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1576		else
1577			vtag = 0;
1578
1579		/*
1580		** Make sure bad packets are discarded,
1581		** note that only EOP descriptor has valid
1582		** error results.
1583		*/
1584                if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1585			rxr->desc_errs++;
1586			ixl_rx_discard(rxr, i);
1587			goto next_desc;
1588		}
1589
1590		/* Prefetch the next buffer */
1591		if (!eop) {
1592			nextp = i + 1;
1593			if (nextp == que->num_desc)
1594				nextp = 0;
1595			nbuf = &rxr->buffers[nextp];
1596			prefetch(nbuf);
1597		}
1598
1599		/*
1600		** The header mbuf is ONLY used when header
1601		** split is enabled, otherwise we get normal
1602		** behavior, ie, both header and payload
1603		** are DMA'd into the payload buffer.
1604		**
1605		** Rather than using the fmp/lmp global pointers
1606		** we now keep the head of a packet chain in the
1607		** buffer struct and pass this along from one
1608		** descriptor to the next, until we get EOP.
1609		*/
1610		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1611			if (hlen > IXL_RX_HDR)
1612				hlen = IXL_RX_HDR;
1613			mh->m_len = hlen;
1614			mh->m_flags |= M_PKTHDR;
1615			mh->m_next = NULL;
1616			mh->m_pkthdr.len = mh->m_len;
1617			/* Null buf pointer so it is refreshed */
1618			rbuf->m_head = NULL;
1619			/*
1620			** Check the payload length, this
1621			** could be zero if its a small
1622			** packet.
1623			*/
1624			if (plen > 0) {
1625				mp->m_len = plen;
1626				mp->m_next = NULL;
1627				mp->m_flags &= ~M_PKTHDR;
1628				mh->m_next = mp;
1629				mh->m_pkthdr.len += mp->m_len;
1630				/* Null buf pointer so it is refreshed */
1631				rbuf->m_pack = NULL;
1632				rxr->split++;
1633			}
1634			/*
1635			** Now create the forward
1636			** chain so when complete
1637			** we wont have to.
1638			*/
1639                        if (eop == 0) {
1640				/* stash the chain head */
1641                                nbuf->fmp = mh;
1642				/* Make forward chain */
1643                                if (plen)
1644                                        mp->m_next = nbuf->m_pack;
1645                                else
1646                                        mh->m_next = nbuf->m_pack;
1647                        } else {
1648				/* Singlet, prepare to send */
1649                                sendmp = mh;
1650                                if (vtag) {
1651                                        sendmp->m_pkthdr.ether_vtag = vtag;
1652                                        sendmp->m_flags |= M_VLANTAG;
1653                                }
1654                        }
1655		} else {
1656			/*
1657			** Either no header split, or a
1658			** secondary piece of a fragmented
1659			** split packet.
1660			*/
1661			mp->m_len = plen;
1662			/*
1663			** See if there is a stored head
1664			** that determines what we are
1665			*/
1666			sendmp = rbuf->fmp;
1667			rbuf->m_pack = rbuf->fmp = NULL;
1668
1669			if (sendmp != NULL) /* secondary frag */
1670				sendmp->m_pkthdr.len += mp->m_len;
1671			else {
1672				/* first desc of a non-ps chain */
1673				sendmp = mp;
1674				sendmp->m_flags |= M_PKTHDR;
1675				sendmp->m_pkthdr.len = mp->m_len;
1676                        }
1677			/* Pass the head pointer on */
1678			if (eop == 0) {
1679				nbuf->fmp = sendmp;
1680				sendmp = NULL;
1681				mp->m_next = nbuf->m_pack;
1682			}
1683		}
1684		++processed;
1685		/* Sending this frame? */
1686		if (eop) {
1687			sendmp->m_pkthdr.rcvif = ifp;
1688			/* gather stats */
1689			rxr->rx_packets++;
1690			rxr->rx_bytes += sendmp->m_pkthdr.len;
1691			/* capture data for dynamic ITR adjustment */
1692			rxr->packets++;
1693			rxr->bytes += sendmp->m_pkthdr.len;
1694			/* Set VLAN tag (field only valid in eop desc) */
1695			if (vtag) {
1696				sendmp->m_pkthdr.ether_vtag = vtag;
1697				sendmp->m_flags |= M_VLANTAG;
1698			}
1699			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1700				ixl_rx_checksum(sendmp, status, error, ptype);
1701#ifdef RSS
1702			sendmp->m_pkthdr.flowid =
1703			    le32toh(cur->wb.qword0.hi_dword.rss);
1704			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1705#else
1706			sendmp->m_pkthdr.flowid = que->msix;
1707			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1708#endif
1709		}
1710next_desc:
1711		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1712		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1713
1714		/* Advance our pointers to the next descriptor. */
1715		if (++i == que->num_desc)
1716			i = 0;
1717
1718		/* Now send to the stack or do LRO */
1719		if (sendmp != NULL) {
1720			rxr->next_check = i;
1721			ixl_rx_input(rxr, ifp, sendmp, ptype);
1722			i = rxr->next_check;
1723		}
1724
1725               /* Every 8 descriptors we go to refresh mbufs */
1726		if (processed == 8) {
1727			ixl_refresh_mbufs(que, i);
1728			processed = 0;
1729		}
1730	}
1731
1732	/* Refresh any remaining buf structs */
1733	if (ixl_rx_unrefreshed(que))
1734		ixl_refresh_mbufs(que, i);
1735
1736	rxr->next_check = i;
1737
1738#if defined(INET6) || defined(INET)
1739	/*
1740	 * Flush any outstanding LRO work
1741	 */
1742#if __FreeBSD_version >= 1100105
1743	tcp_lro_flush_all(lro);
1744#else
1745	struct lro_entry *queued;
1746	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1747		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1748		tcp_lro_flush(lro, queued);
1749	}
1750#endif
1751#endif /* defined(INET6) || defined(INET) */
1752
1753	IXL_RX_UNLOCK(rxr);
1754	return (FALSE);
1755}
1756
1757
1758/*********************************************************************
1759 *
1760 *  Verify that the hardware indicated that the checksum is valid.
1761 *  Inform the stack about the status of checksum so that stack
1762 *  doesn't spend time verifying the checksum.
1763 *
1764 *********************************************************************/
1765static void
1766ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1767{
1768	struct i40e_rx_ptype_decoded decoded;
1769
1770	decoded = decode_rx_desc_ptype(ptype);
1771
1772	/* Errors? */
1773 	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1774	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1775		mp->m_pkthdr.csum_flags = 0;
1776		return;
1777	}
1778
1779	/* IPv6 with extension headers likely have bad csum */
1780	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1781	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1782		if (status &
1783		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1784			mp->m_pkthdr.csum_flags = 0;
1785			return;
1786		}
1787
1788
1789	/* IP Checksum Good */
1790	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1791	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1792
1793	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1794		mp->m_pkthdr.csum_flags |=
1795		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1796		mp->m_pkthdr.csum_data |= htons(0xffff);
1797	}
1798	return;
1799}
1800
1801#if __FreeBSD_version >= 1100000
1802uint64_t
1803ixl_get_counter(if_t ifp, ift_counter cnt)
1804{
1805	struct ixl_vsi *vsi;
1806
1807	vsi = if_getsoftc(ifp);
1808
1809	switch (cnt) {
1810	case IFCOUNTER_IPACKETS:
1811		return (vsi->ipackets);
1812	case IFCOUNTER_IERRORS:
1813		return (vsi->ierrors);
1814	case IFCOUNTER_OPACKETS:
1815		return (vsi->opackets);
1816	case IFCOUNTER_OERRORS:
1817		return (vsi->oerrors);
1818	case IFCOUNTER_COLLISIONS:
1819		/* Collisions are by standard impossible in 40G/10G Ethernet */
1820		return (0);
1821	case IFCOUNTER_IBYTES:
1822		return (vsi->ibytes);
1823	case IFCOUNTER_OBYTES:
1824		return (vsi->obytes);
1825	case IFCOUNTER_IMCASTS:
1826		return (vsi->imcasts);
1827	case IFCOUNTER_OMCASTS:
1828		return (vsi->omcasts);
1829	case IFCOUNTER_IQDROPS:
1830		return (vsi->iqdrops);
1831	case IFCOUNTER_OQDROPS:
1832		return (vsi->oqdrops);
1833	case IFCOUNTER_NOPROTO:
1834		return (vsi->noproto);
1835	default:
1836		return (if_get_counter_default(ifp, cnt));
1837	}
1838}
1839#endif
1840
1841