nicvf_queues.c revision 296039
1/*
2 * Copyright (C) 2015 Cavium Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/dev/vnic/nicvf_queues.c 296039 2016-02-25 14:29:57Z zbb $
27 *
28 */
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/vnic/nicvf_queues.c 296039 2016-02-25 14:29:57Z zbb $");
31
32#include "opt_inet.h"
33#include "opt_inet6.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bitset.h>
38#include <sys/bitstring.h>
39#include <sys/buf_ring.h>
40#include <sys/bus.h>
41#include <sys/endian.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/module.h>
45#include <sys/rman.h>
46#include <sys/pciio.h>
47#include <sys/pcpu.h>
48#include <sys/proc.h>
49#include <sys/sockio.h>
50#include <sys/socket.h>
51#include <sys/stdatomic.h>
52#include <sys/cpuset.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/smp.h>
56#include <sys/taskqueue.h>
57
58#include <vm/vm.h>
59#include <vm/pmap.h>
60
61#include <machine/bus.h>
62#include <machine/vmparam.h>
63
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_var.h>
67#include <net/if_media.h>
68#include <net/ifq.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip6.h>
75#include <netinet/sctp.h>
76#include <netinet/tcp.h>
77#include <netinet/tcp_lro.h>
78#include <netinet/udp.h>
79
80#include <dev/pci/pcireg.h>
81#include <dev/pci/pcivar.h>
82
83#include "thunder_bgx.h"
84#include "nic_reg.h"
85#include "nic.h"
86#include "q_struct.h"
87#include "nicvf_queues.h"
88
89#define	DEBUG
90#undef DEBUG
91
92#ifdef DEBUG
93#define	dprintf(dev, fmt, ...)	device_printf(dev, fmt, ##__VA_ARGS__)
94#else
95#define	dprintf(dev, fmt, ...)
96#endif
97
98MALLOC_DECLARE(M_NICVF);
99
100static void nicvf_free_snd_queue(struct nicvf *, struct snd_queue *);
101static struct mbuf * nicvf_get_rcv_mbuf(struct nicvf *, struct cqe_rx_t *);
102static void nicvf_sq_disable(struct nicvf *, int);
103static void nicvf_sq_enable(struct nicvf *, struct snd_queue *, int);
104static void nicvf_put_sq_desc(struct snd_queue *, int);
105static void nicvf_cmp_queue_config(struct nicvf *, struct queue_set *, int,
106    boolean_t);
107static void nicvf_sq_free_used_descs(struct nicvf *, struct snd_queue *, int);
108
109static void nicvf_rbdr_task(void *, int);
110static void nicvf_rbdr_task_nowait(void *, int);
111
112struct rbuf_info {
113	bus_dma_tag_t	dmat;
114	bus_dmamap_t	dmap;
115	struct mbuf *	mbuf;
116};
117
118#define GET_RBUF_INFO(x) ((struct rbuf_info *)((x) - NICVF_RCV_BUF_ALIGN_BYTES))
119
120/* Poll a register for a specific value */
121static int nicvf_poll_reg(struct nicvf *nic, int qidx,
122			  uint64_t reg, int bit_pos, int bits, int val)
123{
124	uint64_t bit_mask;
125	uint64_t reg_val;
126	int timeout = 10;
127
128	bit_mask = (1UL << bits) - 1;
129	bit_mask = (bit_mask << bit_pos);
130
131	while (timeout) {
132		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
133		if (((reg_val & bit_mask) >> bit_pos) == val)
134			return (0);
135
136		DELAY(1000);
137		timeout--;
138	}
139	device_printf(nic->dev, "Poll on reg 0x%lx failed\n", reg);
140	return (ETIMEDOUT);
141}
142
143/* Callback for bus_dmamap_load() */
144static void
145nicvf_dmamap_q_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
146{
147	bus_addr_t *paddr;
148
149	KASSERT(nseg == 1, ("wrong number of segments, should be 1"));
150	paddr = arg;
151	*paddr = segs->ds_addr;
152}
153
154/* Allocate memory for a queue's descriptors */
155static int
156nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
157    int q_len, int desc_size, int align_bytes)
158{
159	int err, err_dmat;
160
161	/* Create DMA tag first */
162	err = bus_dma_tag_create(
163	    bus_get_dma_tag(nic->dev),		/* parent tag */
164	    align_bytes,			/* alignment */
165	    0,					/* boundary */
166	    BUS_SPACE_MAXADDR,			/* lowaddr */
167	    BUS_SPACE_MAXADDR,			/* highaddr */
168	    NULL, NULL,				/* filtfunc, filtfuncarg */
169	    (q_len * desc_size),		/* maxsize */
170	    1,					/* nsegments */
171	    (q_len * desc_size),		/* maxsegsize */
172	    0,					/* flags */
173	    NULL, NULL,				/* lockfunc, lockfuncarg */
174	    &dmem->dmat);			/* dmat */
175
176	if (err != 0) {
177		device_printf(nic->dev,
178		    "Failed to create busdma tag for descriptors ring\n");
179		return (err);
180	}
181
182	/* Allocate segment of continuous DMA safe memory */
183	err = bus_dmamem_alloc(
184	    dmem->dmat,				/* DMA tag */
185	    &dmem->base,			/* virtual address */
186	    (BUS_DMA_NOWAIT | BUS_DMA_ZERO),	/* flags */
187	    &dmem->dmap);			/* DMA map */
188	if (err != 0) {
189		device_printf(nic->dev, "Failed to allocate DMA safe memory for"
190		    "descriptors ring\n");
191		goto dmamem_fail;
192	}
193
194	err = bus_dmamap_load(
195	    dmem->dmat,
196	    dmem->dmap,
197	    dmem->base,
198	    (q_len * desc_size),		/* allocation size */
199	    nicvf_dmamap_q_cb,			/* map to DMA address cb. */
200	    &dmem->phys_base,			/* physical address */
201	    BUS_DMA_NOWAIT);
202	if (err != 0) {
203		device_printf(nic->dev,
204		    "Cannot load DMA map of descriptors ring\n");
205		goto dmamap_fail;
206	}
207
208	dmem->q_len = q_len;
209	dmem->size = (desc_size * q_len);
210
211	return (0);
212
213dmamap_fail:
214	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
215	dmem->phys_base = 0;
216dmamem_fail:
217	err_dmat = bus_dma_tag_destroy(dmem->dmat);
218	dmem->base = NULL;
219	KASSERT(err_dmat == 0,
220	    ("%s: Trying to destroy BUSY DMA tag", __func__));
221
222	return (err);
223}
224
225/* Free queue's descriptor memory */
226static void
227nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
228{
229	int err;
230
231	if ((dmem == NULL) || (dmem->base == NULL))
232		return;
233
234	/* Unload a map */
235	bus_dmamap_sync(dmem->dmat, dmem->dmap, BUS_DMASYNC_POSTREAD);
236	bus_dmamap_unload(dmem->dmat, dmem->dmap);
237	/* Free DMA memory */
238	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
239	/* Destroy DMA tag */
240	err = bus_dma_tag_destroy(dmem->dmat);
241
242	KASSERT(err == 0,
243	    ("%s: Trying to destroy BUSY DMA tag", __func__));
244
245	dmem->phys_base = 0;
246	dmem->base = NULL;
247}
248
249/*
250 * Allocate buffer for packet reception
251 * HW returns memory address where packet is DMA'ed but not a pointer
252 * into RBDR ring, so save buffer address at the start of fragment and
253 * align the start address to a cache aligned address
254 */
255static __inline int
256nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
257    bus_dmamap_t dmap, int mflags, uint32_t buf_len, bus_addr_t *rbuf)
258{
259	struct mbuf *mbuf;
260	struct rbuf_info *rinfo;
261	bus_dma_segment_t segs[1];
262	int nsegs;
263	int err;
264
265	mbuf = m_getjcl(mflags, MT_DATA, M_PKTHDR, MCLBYTES);
266	if (mbuf == NULL)
267		return (ENOMEM);
268
269	/*
270	 * The length is equal to the actual length + one 128b line
271	 * used as a room for rbuf_info structure.
272	 */
273	mbuf->m_len = mbuf->m_pkthdr.len = buf_len;
274
275	err = bus_dmamap_load_mbuf_sg(rbdr->rbdr_buff_dmat, dmap, mbuf, segs,
276	    &nsegs, BUS_DMA_NOWAIT);
277	if (err != 0) {
278		device_printf(nic->dev,
279		    "Failed to map mbuf into DMA visible memory, err: %d\n",
280		    err);
281		m_freem(mbuf);
282		bus_dmamap_destroy(rbdr->rbdr_buff_dmat, dmap);
283		return (err);
284	}
285	if (nsegs != 1)
286		panic("Unexpected number of DMA segments for RB: %d", nsegs);
287	/*
288	 * Now use the room for rbuf_info structure
289	 * and adjust mbuf data and length.
290	 */
291	rinfo = (struct rbuf_info *)mbuf->m_data;
292	m_adj(mbuf, NICVF_RCV_BUF_ALIGN_BYTES);
293
294	rinfo->dmat = rbdr->rbdr_buff_dmat;
295	rinfo->dmap = dmap;
296	rinfo->mbuf = mbuf;
297
298	*rbuf = segs[0].ds_addr + NICVF_RCV_BUF_ALIGN_BYTES;
299
300	return (0);
301}
302
303/* Retrieve mbuf for received packet */
304static struct mbuf *
305nicvf_rb_ptr_to_mbuf(struct nicvf *nic, bus_addr_t rb_ptr)
306{
307	struct mbuf *mbuf;
308	struct rbuf_info *rinfo;
309
310	/* Get buffer start address and alignment offset */
311	rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(rb_ptr));
312
313	/* Now retrieve mbuf to give to stack */
314	mbuf = rinfo->mbuf;
315	if (__predict_false(mbuf == NULL)) {
316		panic("%s: Received packet fragment with NULL mbuf",
317		    device_get_nameunit(nic->dev));
318	}
319	/*
320	 * Clear the mbuf in the descriptor to indicate
321	 * that this slot is processed and free to use.
322	 */
323	rinfo->mbuf = NULL;
324
325	bus_dmamap_sync(rinfo->dmat, rinfo->dmap, BUS_DMASYNC_POSTREAD);
326	bus_dmamap_unload(rinfo->dmat, rinfo->dmap);
327
328	return (mbuf);
329}
330
331/* Allocate RBDR ring and populate receive buffers */
332static int
333nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len,
334    int buf_size, int qidx)
335{
336	bus_dmamap_t dmap;
337	bus_addr_t rbuf;
338	struct rbdr_entry_t *desc;
339	int idx;
340	int err;
341
342	/* Allocate rbdr descriptors ring */
343	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
344	    sizeof(struct rbdr_entry_t), NICVF_RCV_BUF_ALIGN_BYTES);
345	if (err != 0) {
346		device_printf(nic->dev,
347		    "Failed to create RBDR descriptors ring\n");
348		return (err);
349	}
350
351	rbdr->desc = rbdr->dmem.base;
352	/*
353	 * Buffer size has to be in multiples of 128 bytes.
354	 * Make room for metadata of size of one line (128 bytes).
355	 */
356	rbdr->dma_size = buf_size - NICVF_RCV_BUF_ALIGN_BYTES;
357	rbdr->enable = TRUE;
358	rbdr->thresh = RBDR_THRESH;
359	rbdr->nic = nic;
360	rbdr->idx = qidx;
361
362	/*
363	 * Create DMA tag for Rx buffers.
364	 * Each map created using this tag is intended to store Rx payload for
365	 * one fragment and one header structure containing rbuf_info (thus
366	 * additional 128 byte line since RB must be a multiple of 128 byte
367	 * cache line).
368	 */
369	if (buf_size > MCLBYTES) {
370		device_printf(nic->dev,
371		    "Buffer size to large for mbuf cluster\n");
372		return (EINVAL);
373	}
374	err = bus_dma_tag_create(
375	    bus_get_dma_tag(nic->dev),		/* parent tag */
376	    NICVF_RCV_BUF_ALIGN_BYTES,		/* alignment */
377	    0,					/* boundary */
378	    DMAP_MAX_PHYSADDR,			/* lowaddr */
379	    DMAP_MIN_PHYSADDR,			/* highaddr */
380	    NULL, NULL,				/* filtfunc, filtfuncarg */
381	    roundup2(buf_size, MCLBYTES),	/* maxsize */
382	    1,					/* nsegments */
383	    roundup2(buf_size, MCLBYTES),	/* maxsegsize */
384	    0,					/* flags */
385	    NULL, NULL,				/* lockfunc, lockfuncarg */
386	    &rbdr->rbdr_buff_dmat);		/* dmat */
387
388	if (err != 0) {
389		device_printf(nic->dev,
390		    "Failed to create busdma tag for RBDR buffers\n");
391		return (err);
392	}
393
394	rbdr->rbdr_buff_dmaps = malloc(sizeof(*rbdr->rbdr_buff_dmaps) *
395	    ring_len, M_NICVF, (M_WAITOK | M_ZERO));
396
397	for (idx = 0; idx < ring_len; idx++) {
398		err = bus_dmamap_create(rbdr->rbdr_buff_dmat, 0, &dmap);
399		if (err != 0) {
400			device_printf(nic->dev,
401			    "Failed to create DMA map for RB\n");
402			return (err);
403		}
404		rbdr->rbdr_buff_dmaps[idx] = dmap;
405
406		err = nicvf_alloc_rcv_buffer(nic, rbdr, dmap, M_WAITOK,
407		    DMA_BUFFER_LEN, &rbuf);
408		if (err != 0)
409			return (err);
410
411		desc = GET_RBDR_DESC(rbdr, idx);
412		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
413	}
414
415	/* Allocate taskqueue */
416	TASK_INIT(&rbdr->rbdr_task, 0, nicvf_rbdr_task, rbdr);
417	TASK_INIT(&rbdr->rbdr_task_nowait, 0, nicvf_rbdr_task_nowait, rbdr);
418	rbdr->rbdr_taskq = taskqueue_create_fast("nicvf_rbdr_taskq", M_WAITOK,
419	    taskqueue_thread_enqueue, &rbdr->rbdr_taskq);
420	taskqueue_start_threads(&rbdr->rbdr_taskq, 1, PI_NET, "%s: rbdr_taskq",
421	    device_get_nameunit(nic->dev));
422
423	return (0);
424}
425
426/* Free RBDR ring and its receive buffers */
427static void
428nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
429{
430	struct mbuf *mbuf;
431	struct queue_set *qs;
432	struct rbdr_entry_t *desc;
433	struct rbuf_info *rinfo;
434	bus_addr_t buf_addr;
435	int head, tail, idx;
436	int err;
437
438	qs = nic->qs;
439
440	if ((qs == NULL) || (rbdr == NULL))
441		return;
442
443	rbdr->enable = FALSE;
444	if (rbdr->rbdr_taskq != NULL) {
445		/* Remove tasks */
446		while (taskqueue_cancel(rbdr->rbdr_taskq,
447		    &rbdr->rbdr_task_nowait, NULL) != 0) {
448			/* Finish the nowait task first */
449			taskqueue_drain(rbdr->rbdr_taskq,
450			    &rbdr->rbdr_task_nowait);
451		}
452		taskqueue_free(rbdr->rbdr_taskq);
453		rbdr->rbdr_taskq = NULL;
454
455		while (taskqueue_cancel(taskqueue_thread,
456		    &rbdr->rbdr_task, NULL) != 0) {
457			/* Now finish the sleepable task */
458			taskqueue_drain(taskqueue_thread, &rbdr->rbdr_task);
459		}
460	}
461
462	/*
463	 * Free all of the memory under the RB descriptors.
464	 * There are assumptions here:
465	 * 1. Corresponding RBDR is disabled
466	 *    - it is safe to operate using head and tail indexes
467	 * 2. All bffers that were received are properly freed by
468	 *    the receive handler
469	 *    - there is no need to unload DMA map and free MBUF for other
470	 *      descriptors than unused ones
471	 */
472	if (rbdr->rbdr_buff_dmat != NULL) {
473		head = rbdr->head;
474		tail = rbdr->tail;
475		while (head != tail) {
476			desc = GET_RBDR_DESC(rbdr, head);
477			buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
478			rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
479			bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
480			mbuf = rinfo->mbuf;
481			/* This will destroy everything including rinfo! */
482			m_freem(mbuf);
483			head++;
484			head &= (rbdr->dmem.q_len - 1);
485		}
486		/* Free tail descriptor */
487		desc = GET_RBDR_DESC(rbdr, tail);
488		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
489		rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
490		bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
491		mbuf = rinfo->mbuf;
492		/* This will destroy everything including rinfo! */
493		m_freem(mbuf);
494
495		/* Destroy DMA maps */
496		for (idx = 0; idx < qs->rbdr_len; idx++) {
497			if (rbdr->rbdr_buff_dmaps[idx] == NULL)
498				continue;
499			err = bus_dmamap_destroy(rbdr->rbdr_buff_dmat,
500			    rbdr->rbdr_buff_dmaps[idx]);
501			KASSERT(err == 0,
502			    ("%s: Could not destroy DMA map for RB, desc: %d",
503			    __func__, idx));
504			rbdr->rbdr_buff_dmaps[idx] = NULL;
505		}
506
507		/* Now destroy the tag */
508		err = bus_dma_tag_destroy(rbdr->rbdr_buff_dmat);
509		KASSERT(err == 0,
510		    ("%s: Trying to destroy BUSY DMA tag", __func__));
511
512		rbdr->head = 0;
513		rbdr->tail = 0;
514	}
515
516	/* Free RBDR ring */
517	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
518}
519
520/*
521 * Refill receive buffer descriptors with new buffers.
522 */
523static int
524nicvf_refill_rbdr(struct rbdr *rbdr, int mflags)
525{
526	struct nicvf *nic;
527	struct queue_set *qs;
528	int rbdr_idx;
529	int tail, qcount;
530	int refill_rb_cnt;
531	struct rbdr_entry_t *desc;
532	bus_dmamap_t dmap;
533	bus_addr_t rbuf;
534	boolean_t rb_alloc_fail;
535	int new_rb;
536
537	rb_alloc_fail = TRUE;
538	new_rb = 0;
539	nic = rbdr->nic;
540	qs = nic->qs;
541	rbdr_idx = rbdr->idx;
542
543	/* Check if it's enabled */
544	if (!rbdr->enable)
545		return (0);
546
547	/* Get no of desc's to be refilled */
548	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
549	qcount &= 0x7FFFF;
550	/* Doorbell can be ringed with a max of ring size minus 1 */
551	if (qcount >= (qs->rbdr_len - 1)) {
552		rb_alloc_fail = FALSE;
553		goto out;
554	} else
555		refill_rb_cnt = qs->rbdr_len - qcount - 1;
556
557	/* Start filling descs from tail */
558	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
559	while (refill_rb_cnt) {
560		tail++;
561		tail &= (rbdr->dmem.q_len - 1);
562
563		dmap = rbdr->rbdr_buff_dmaps[tail];
564		if (nicvf_alloc_rcv_buffer(nic, rbdr, dmap, mflags,
565		    DMA_BUFFER_LEN, &rbuf)) {
566			/* Something went wrong. Resign */
567			break;
568		}
569		desc = GET_RBDR_DESC(rbdr, tail);
570		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
571		refill_rb_cnt--;
572		new_rb++;
573	}
574
575	/* make sure all memory stores are done before ringing doorbell */
576	wmb();
577
578	/* Check if buffer allocation failed */
579	if (refill_rb_cnt == 0)
580		rb_alloc_fail = FALSE;
581
582	/* Notify HW */
583	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
584			      rbdr_idx, new_rb);
585out:
586	if (!rb_alloc_fail) {
587		/*
588		 * Re-enable RBDR interrupts only
589		 * if buffer allocation is success.
590		 */
591		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
592
593		return (0);
594	}
595
596	return (ENOMEM);
597}
598
599/* Refill RBs even if sleep is needed to reclaim memory */
600static void
601nicvf_rbdr_task(void *arg, int pending)
602{
603	struct rbdr *rbdr;
604	int err;
605
606	rbdr = (struct rbdr *)arg;
607
608	err = nicvf_refill_rbdr(rbdr, M_WAITOK);
609	if (__predict_false(err != 0)) {
610		panic("%s: Failed to refill RBs even when sleep enabled",
611		    __func__);
612	}
613}
614
615/* Refill RBs as soon as possible without waiting */
616static void
617nicvf_rbdr_task_nowait(void *arg, int pending)
618{
619	struct rbdr *rbdr;
620	int err;
621
622	rbdr = (struct rbdr *)arg;
623
624	err = nicvf_refill_rbdr(rbdr, M_NOWAIT);
625	if (err != 0) {
626		/*
627		 * Schedule another, sleepable kernel thread
628		 * that will for sure refill the buffers.
629		 */
630		taskqueue_enqueue(taskqueue_thread, &rbdr->rbdr_task);
631	}
632}
633
634static int
635nicvf_rcv_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
636    struct cqe_rx_t *cqe_rx, int cqe_type)
637{
638	struct mbuf *mbuf;
639	struct rcv_queue *rq;
640	int rq_idx;
641	int err = 0;
642
643	rq_idx = cqe_rx->rq_idx;
644	rq = &nic->qs->rq[rq_idx];
645
646	/* Check for errors */
647	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
648	if (err && !cqe_rx->rb_cnt)
649		return (0);
650
651	mbuf = nicvf_get_rcv_mbuf(nic, cqe_rx);
652	if (mbuf == NULL) {
653		dprintf(nic->dev, "Packet not received\n");
654		return (0);
655	}
656
657	/* If error packet */
658	if (err != 0) {
659		m_freem(mbuf);
660		return (0);
661	}
662
663	if (rq->lro_enabled &&
664	    ((cqe_rx->l3_type == L3TYPE_IPV4) && (cqe_rx->l4_type == L4TYPE_TCP)) &&
665	    (mbuf->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
666            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
667		/*
668		 * At this point it is known that there are no errors in the
669		 * packet. Attempt to LRO enqueue. Send to stack if no resources
670		 * or enqueue error.
671		 */
672		if ((rq->lro.lro_cnt != 0) &&
673		    (tcp_lro_rx(&rq->lro, mbuf, 0) == 0))
674			return (0);
675	}
676	/*
677	 * Push this packet to the stack later to avoid
678	 * unlocking completion task in the middle of work.
679	 */
680	err = buf_ring_enqueue(cq->rx_br, mbuf);
681	if (err != 0) {
682		/*
683		 * Failed to enqueue this mbuf.
684		 * We don't drop it, just schedule another task.
685		 */
686		return (err);
687	}
688
689	return (0);
690}
691
692static int
693nicvf_snd_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
694    struct cqe_send_t *cqe_tx, int cqe_type)
695{
696	bus_dmamap_t dmap;
697	struct mbuf *mbuf;
698	struct snd_queue *sq;
699	struct sq_hdr_subdesc *hdr;
700
701	mbuf = NULL;
702	sq = &nic->qs->sq[cqe_tx->sq_idx];
703	/* Avoid blocking here since we hold a non-sleepable NICVF_CMP_LOCK */
704	if (NICVF_TX_TRYLOCK(sq) == 0)
705		return (EAGAIN);
706
707	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
708	if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
709		NICVF_TX_UNLOCK(sq);
710		return (0);
711	}
712
713	dprintf(nic->dev,
714	    "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
715	    __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
716	    cqe_tx->sqe_ptr, hdr->subdesc_cnt);
717
718	dmap = (bus_dmamap_t)sq->snd_buff[cqe_tx->sqe_ptr].dmap;
719	bus_dmamap_unload(sq->snd_buff_dmat, dmap);
720
721	mbuf = (struct mbuf *)sq->snd_buff[cqe_tx->sqe_ptr].mbuf;
722	if (mbuf != NULL) {
723		m_freem(mbuf);
724		sq->snd_buff[cqe_tx->sqe_ptr].mbuf = NULL;
725	}
726
727	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
728	nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
729
730	NICVF_TX_UNLOCK(sq);
731	return (0);
732}
733
734static int
735nicvf_cq_intr_handler(struct nicvf *nic, uint8_t cq_idx)
736{
737	struct mbuf *mbuf;
738	struct ifnet *ifp;
739	int processed_cqe, work_done = 0, tx_done = 0;
740	int cqe_count, cqe_head;
741	struct queue_set *qs = nic->qs;
742	struct cmp_queue *cq = &qs->cq[cq_idx];
743	struct rcv_queue *rq;
744	struct cqe_rx_t *cq_desc;
745	struct lro_ctrl	*lro;
746	struct lro_entry *queued;
747	int rq_idx;
748	int cmp_err;
749
750	NICVF_CMP_LOCK(cq);
751	cmp_err = 0;
752	processed_cqe = 0;
753	/* Get no of valid CQ entries to process */
754	cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
755	cqe_count &= CQ_CQE_COUNT;
756	if (cqe_count == 0)
757		goto out;
758
759	/* Get head of the valid CQ entries */
760	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
761	cqe_head &= 0xFFFF;
762
763	dprintf(nic->dev, "%s CQ%d cqe_count %d cqe_head %d\n",
764	    __func__, cq_idx, cqe_count, cqe_head);
765	while (processed_cqe < cqe_count) {
766		/* Get the CQ descriptor */
767		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
768		cqe_head++;
769		cqe_head &= (cq->dmem.q_len - 1);
770		/* Prefetch next CQ descriptor */
771		__builtin_prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
772
773		dprintf(nic->dev, "CQ%d cq_desc->cqe_type %d\n", cq_idx,
774		    cq_desc->cqe_type);
775		switch (cq_desc->cqe_type) {
776		case CQE_TYPE_RX:
777			cmp_err = nicvf_rcv_pkt_handler(nic, cq, cq_desc,
778			    CQE_TYPE_RX);
779			if (__predict_false(cmp_err != 0)) {
780				/*
781				 * Ups. Cannot finish now.
782				 * Let's try again later.
783				 */
784				goto done;
785			}
786			work_done++;
787			break;
788		case CQE_TYPE_SEND:
789			cmp_err = nicvf_snd_pkt_handler(nic, cq,
790			    (void *)cq_desc, CQE_TYPE_SEND);
791			if (__predict_false(cmp_err != 0)) {
792				/*
793				 * Ups. Cannot finish now.
794				 * Let's try again later.
795				 */
796				goto done;
797			}
798
799			tx_done++;
800			break;
801		case CQE_TYPE_INVALID:
802		case CQE_TYPE_RX_SPLIT:
803		case CQE_TYPE_RX_TCP:
804		case CQE_TYPE_SEND_PTP:
805			/* Ignore for now */
806			break;
807		}
808		processed_cqe++;
809	}
810done:
811	dprintf(nic->dev,
812	    "%s CQ%d processed_cqe %d work_done %d\n",
813	    __func__, cq_idx, processed_cqe, work_done);
814
815	/* Ring doorbell to inform H/W to reuse processed CQEs */
816	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, cq_idx, processed_cqe);
817
818	if ((tx_done > 0) &&
819	    ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0)) {
820		/* Reenable TXQ if its stopped earlier due to SQ full */
821		if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
822	}
823out:
824	/*
825	 * Flush any outstanding LRO work
826	 */
827	rq_idx = cq_idx;
828	rq = &nic->qs->rq[rq_idx];
829	lro = &rq->lro;
830	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
831		SLIST_REMOVE_HEAD(&lro->lro_active, next);
832		tcp_lro_flush(lro, queued);
833	}
834
835	NICVF_CMP_UNLOCK(cq);
836
837	ifp = nic->ifp;
838	/* Push received MBUFs to the stack */
839	while (!buf_ring_empty(cq->rx_br)) {
840		mbuf = buf_ring_dequeue_mc(cq->rx_br);
841		if (__predict_true(mbuf != NULL))
842			(*ifp->if_input)(ifp, mbuf);
843	}
844
845	return (cmp_err);
846}
847
848/*
849 * Qset error interrupt handler
850 *
851 * As of now only CQ errors are handled
852 */
853static void
854nicvf_qs_err_task(void *arg, int pending)
855{
856	struct nicvf *nic;
857	struct queue_set *qs;
858	int qidx;
859	uint64_t status;
860	boolean_t enable = TRUE;
861
862	nic = (struct nicvf *)arg;
863	qs = nic->qs;
864
865	/* Deactivate network interface */
866	if_setdrvflagbits(nic->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
867
868	/* Check if it is CQ err */
869	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
870		status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
871		    qidx);
872		if ((status & CQ_ERR_MASK) == 0)
873			continue;
874		/* Process already queued CQEs and reconfig CQ */
875		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
876		nicvf_sq_disable(nic, qidx);
877		(void)nicvf_cq_intr_handler(nic, qidx);
878		nicvf_cmp_queue_config(nic, qs, qidx, enable);
879		nicvf_sq_free_used_descs(nic, &qs->sq[qidx], qidx);
880		nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
881		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
882	}
883
884	if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
885	/* Re-enable Qset error interrupt */
886	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
887}
888
889static void
890nicvf_cmp_task(void *arg, int pending)
891{
892	uint64_t cq_head;
893	struct cmp_queue *cq;
894	struct nicvf *nic;
895	int cmp_err;
896
897	cq = (struct cmp_queue *)arg;
898	nic = cq->nic;
899
900	/* Handle CQ descriptors */
901	cmp_err = nicvf_cq_intr_handler(nic, cq->idx);
902	/* Re-enable interrupts */
903	cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq->idx);
904	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
905	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, cq->idx, cq_head);
906
907	if (__predict_false(cmp_err != 0)) {
908		/*
909		 * Schedule another thread here since we did not
910		 * process the entire CQ due to Tx or Rx CQ parse error.
911		 */
912		taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
913
914	}
915
916	/* Reenable interrupt (previously disabled in nicvf_intr_handler() */
917	nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->idx);
918
919}
920
921/* Initialize completion queue */
922static int
923nicvf_init_cmp_queue(struct nicvf *nic, struct cmp_queue *cq, int q_len,
924    int qidx)
925{
926	int err;
927
928	/* Initizalize lock */
929	snprintf(cq->mtx_name, sizeof(cq->mtx_name), "%s: CQ(%d) lock",
930	    device_get_nameunit(nic->dev), qidx);
931	mtx_init(&cq->mtx, cq->mtx_name, NULL, MTX_DEF);
932
933	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
934				     NICVF_CQ_BASE_ALIGN_BYTES);
935
936	if (err != 0) {
937		device_printf(nic->dev,
938		    "Could not allocate DMA memory for CQ\n");
939		return (err);
940	}
941
942	cq->desc = cq->dmem.base;
943	cq->thresh = pass1_silicon(nic->dev) ? 0 : CMP_QUEUE_CQE_THRESH;
944	cq->nic = nic;
945	cq->idx = qidx;
946	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
947
948	cq->rx_br = buf_ring_alloc(CMP_QUEUE_LEN * 8, M_DEVBUF, M_WAITOK,
949	    &cq->mtx);
950
951	/* Allocate taskqueue */
952	TASK_INIT(&cq->cmp_task, 0, nicvf_cmp_task, cq);
953	cq->cmp_taskq = taskqueue_create_fast("nicvf_cmp_taskq", M_WAITOK,
954	    taskqueue_thread_enqueue, &cq->cmp_taskq);
955	taskqueue_start_threads(&cq->cmp_taskq, 1, PI_NET, "%s: cmp_taskq(%d)",
956	    device_get_nameunit(nic->dev), qidx);
957
958	return (0);
959}
960
961static void
962nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
963{
964
965	if (cq == NULL)
966		return;
967	/*
968	 * The completion queue itself should be disabled by now
969	 * (ref. nicvf_snd_queue_config()).
970	 * Ensure that it is safe to disable it or panic.
971	 */
972	if (cq->enable)
973		panic("%s: Trying to free working CQ(%d)", __func__, cq->idx);
974
975	if (cq->cmp_taskq != NULL) {
976		/* Remove task */
977		while (taskqueue_cancel(cq->cmp_taskq, &cq->cmp_task, NULL) != 0)
978			taskqueue_drain(cq->cmp_taskq, &cq->cmp_task);
979
980		taskqueue_free(cq->cmp_taskq);
981		cq->cmp_taskq = NULL;
982	}
983	/*
984	 * Completion interrupt will possibly enable interrupts again
985	 * so disable interrupting now after we finished processing
986	 * completion task. It is safe to do so since the corresponding CQ
987	 * was already disabled.
988	 */
989	nicvf_disable_intr(nic, NICVF_INTR_CQ, cq->idx);
990	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
991
992	NICVF_CMP_LOCK(cq);
993	nicvf_free_q_desc_mem(nic, &cq->dmem);
994	drbr_free(cq->rx_br, M_DEVBUF);
995	NICVF_CMP_UNLOCK(cq);
996	mtx_destroy(&cq->mtx);
997	memset(cq->mtx_name, 0, sizeof(cq->mtx_name));
998}
999
1000static void
1001nicvf_snd_task(void *arg, int pending)
1002{
1003	struct snd_queue *sq = (struct snd_queue *)arg;
1004	struct mbuf *mbuf;
1005
1006	NICVF_TX_LOCK(sq);
1007	while (1) {
1008		mbuf = drbr_dequeue(NULL, sq->br);
1009		if (mbuf == NULL)
1010			break;
1011
1012		if (nicvf_tx_mbuf_locked(sq, mbuf) != 0) {
1013			/* XXX ARM64TODO: Increase Tx drop counter */
1014			m_freem(mbuf);
1015			break;
1016		}
1017	}
1018	NICVF_TX_UNLOCK(sq);
1019}
1020
1021/* Initialize transmit queue */
1022static int
1023nicvf_init_snd_queue(struct nicvf *nic, struct snd_queue *sq, int q_len,
1024    int qidx)
1025{
1026	size_t i;
1027	int err;
1028
1029	/* Initizalize TX lock for this queue */
1030	snprintf(sq->mtx_name, sizeof(sq->mtx_name), "%s: SQ(%d) lock",
1031	    device_get_nameunit(nic->dev), qidx);
1032	mtx_init(&sq->mtx, sq->mtx_name, NULL, MTX_DEF);
1033
1034	NICVF_TX_LOCK(sq);
1035	/* Allocate buffer ring */
1036	sq->br = buf_ring_alloc(q_len / MIN_SQ_DESC_PER_PKT_XMIT, M_DEVBUF,
1037	    M_NOWAIT, &sq->mtx);
1038	if (sq->br == NULL) {
1039		device_printf(nic->dev,
1040		    "ERROR: Could not set up buf ring for SQ(%d)\n", qidx);
1041		err = ENOMEM;
1042		goto error;
1043	}
1044
1045	/* Allocate DMA memory for Tx descriptors */
1046	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
1047				     NICVF_SQ_BASE_ALIGN_BYTES);
1048	if (err != 0) {
1049		device_printf(nic->dev,
1050		    "Could not allocate DMA memory for SQ\n");
1051		goto error;
1052	}
1053
1054	sq->desc = sq->dmem.base;
1055	sq->head = sq->tail = 0;
1056	atomic_store_rel_int(&sq->free_cnt, q_len - 1);
1057	sq->thresh = SND_QUEUE_THRESH;
1058	sq->idx = qidx;
1059	sq->nic = nic;
1060
1061	/*
1062	 * Allocate DMA maps for Tx buffers
1063	 */
1064
1065	/* Create DMA tag first */
1066	err = bus_dma_tag_create(
1067	    bus_get_dma_tag(nic->dev),		/* parent tag */
1068	    1,					/* alignment */
1069	    0,					/* boundary */
1070	    BUS_SPACE_MAXADDR,			/* lowaddr */
1071	    BUS_SPACE_MAXADDR,			/* highaddr */
1072	    NULL, NULL,				/* filtfunc, filtfuncarg */
1073	    NICVF_TSO_MAXSIZE,			/* maxsize */
1074	    NICVF_TSO_NSEGS,			/* nsegments */
1075	    MCLBYTES,				/* maxsegsize */
1076	    0,					/* flags */
1077	    NULL, NULL,				/* lockfunc, lockfuncarg */
1078	    &sq->snd_buff_dmat);		/* dmat */
1079
1080	if (err != 0) {
1081		device_printf(nic->dev,
1082		    "Failed to create busdma tag for Tx buffers\n");
1083		goto error;
1084	}
1085
1086	/* Allocate send buffers array */
1087	sq->snd_buff = malloc(sizeof(*sq->snd_buff) * q_len, M_NICVF,
1088	    (M_NOWAIT | M_ZERO));
1089	if (sq->snd_buff == NULL) {
1090		device_printf(nic->dev,
1091		    "Could not allocate memory for Tx buffers array\n");
1092		err = ENOMEM;
1093		goto error;
1094	}
1095
1096	/* Now populate maps */
1097	for (i = 0; i < q_len; i++) {
1098		err = bus_dmamap_create(sq->snd_buff_dmat, 0,
1099		    &sq->snd_buff[i].dmap);
1100		if (err != 0) {
1101			device_printf(nic->dev,
1102			    "Failed to create DMA maps for Tx buffers\n");
1103			goto error;
1104		}
1105	}
1106	NICVF_TX_UNLOCK(sq);
1107
1108	/* Allocate taskqueue */
1109	TASK_INIT(&sq->snd_task, 0, nicvf_snd_task, sq);
1110	sq->snd_taskq = taskqueue_create_fast("nicvf_snd_taskq", M_WAITOK,
1111	    taskqueue_thread_enqueue, &sq->snd_taskq);
1112	taskqueue_start_threads(&sq->snd_taskq, 1, PI_NET, "%s: snd_taskq(%d)",
1113	    device_get_nameunit(nic->dev), qidx);
1114
1115	return (0);
1116error:
1117	NICVF_TX_UNLOCK(sq);
1118	return (err);
1119}
1120
1121static void
1122nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
1123{
1124	struct queue_set *qs = nic->qs;
1125	size_t i;
1126	int err;
1127
1128	if (sq == NULL)
1129		return;
1130
1131	if (sq->snd_taskq != NULL) {
1132		/* Remove task */
1133		while (taskqueue_cancel(sq->snd_taskq, &sq->snd_task, NULL) != 0)
1134			taskqueue_drain(sq->snd_taskq, &sq->snd_task);
1135
1136		taskqueue_free(sq->snd_taskq);
1137		sq->snd_taskq = NULL;
1138	}
1139
1140	NICVF_TX_LOCK(sq);
1141	if (sq->snd_buff_dmat != NULL) {
1142		if (sq->snd_buff != NULL) {
1143			for (i = 0; i < qs->sq_len; i++) {
1144				m_freem(sq->snd_buff[i].mbuf);
1145				sq->snd_buff[i].mbuf = NULL;
1146
1147				bus_dmamap_unload(sq->snd_buff_dmat,
1148				    sq->snd_buff[i].dmap);
1149				err = bus_dmamap_destroy(sq->snd_buff_dmat,
1150				    sq->snd_buff[i].dmap);
1151				/*
1152				 * If bus_dmamap_destroy fails it can cause
1153				 * random panic later if the tag is also
1154				 * destroyed in the process.
1155				 */
1156				KASSERT(err == 0,
1157				    ("%s: Could not destroy DMA map for SQ",
1158				    __func__));
1159			}
1160		}
1161
1162		free(sq->snd_buff, M_NICVF);
1163
1164		err = bus_dma_tag_destroy(sq->snd_buff_dmat);
1165		KASSERT(err == 0,
1166		    ("%s: Trying to destroy BUSY DMA tag", __func__));
1167	}
1168
1169	/* Free private driver ring for this send queue */
1170	if (sq->br != NULL)
1171		drbr_free(sq->br, M_DEVBUF);
1172
1173	if (sq->dmem.base != NULL)
1174		nicvf_free_q_desc_mem(nic, &sq->dmem);
1175
1176	NICVF_TX_UNLOCK(sq);
1177	/* Destroy Tx lock */
1178	mtx_destroy(&sq->mtx);
1179	memset(sq->mtx_name, 0, sizeof(sq->mtx_name));
1180}
1181
1182static void
1183nicvf_reclaim_snd_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1184{
1185
1186	/* Disable send queue */
1187	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
1188	/* Check if SQ is stopped */
1189	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
1190		return;
1191	/* Reset send queue */
1192	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1193}
1194
1195static void
1196nicvf_reclaim_rcv_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1197{
1198	union nic_mbx mbx = {};
1199
1200	/* Make sure all packets in the pipeline are written back into mem */
1201	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
1202	nicvf_send_msg_to_pf(nic, &mbx);
1203}
1204
1205static void
1206nicvf_reclaim_cmp_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1207{
1208
1209	/* Disable timer threshold (doesn't get reset upon CQ reset */
1210	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
1211	/* Disable completion queue */
1212	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
1213	/* Reset completion queue */
1214	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1215}
1216
1217static void
1218nicvf_reclaim_rbdr(struct nicvf *nic, struct rbdr *rbdr, int qidx)
1219{
1220	uint64_t tmp, fifo_state;
1221	int timeout = 10;
1222
1223	/* Save head and tail pointers for feeing up buffers */
1224	rbdr->head =
1225	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, qidx) >> 3;
1226	rbdr->tail =
1227	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, qidx) >> 3;
1228
1229	/*
1230	 * If RBDR FIFO is in 'FAIL' state then do a reset first
1231	 * before relaiming.
1232	 */
1233	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
1234	if (((fifo_state >> 62) & 0x03) == 0x3) {
1235		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
1236		    qidx, NICVF_RBDR_RESET);
1237	}
1238
1239	/* Disable RBDR */
1240	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
1241	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1242		return;
1243	while (1) {
1244		tmp = nicvf_queue_reg_read(nic,
1245		    NIC_QSET_RBDR_0_1_PREFETCH_STATUS, qidx);
1246		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
1247			break;
1248
1249		DELAY(1000);
1250		timeout--;
1251		if (!timeout) {
1252			device_printf(nic->dev,
1253			    "Failed polling on prefetch status\n");
1254			return;
1255		}
1256	}
1257	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1258	    NICVF_RBDR_RESET);
1259
1260	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
1261		return;
1262	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
1263	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1264		return;
1265}
1266
1267/* Configures receive queue */
1268static void
1269nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
1270    int qidx, bool enable)
1271{
1272	union nic_mbx mbx = {};
1273	struct rcv_queue *rq;
1274	struct rq_cfg rq_cfg;
1275	struct ifnet *ifp;
1276	struct lro_ctrl	*lro;
1277
1278	ifp = nic->ifp;
1279
1280	rq = &qs->rq[qidx];
1281	rq->enable = enable;
1282
1283	lro = &rq->lro;
1284
1285	/* Disable receive queue */
1286	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
1287
1288	if (!rq->enable) {
1289		nicvf_reclaim_rcv_queue(nic, qs, qidx);
1290		/* Free LRO memory */
1291		tcp_lro_free(lro);
1292		rq->lro_enabled = FALSE;
1293		return;
1294	}
1295
1296	/* Configure LRO if enabled */
1297	rq->lro_enabled = FALSE;
1298	if ((if_getcapenable(ifp) & IFCAP_LRO) != 0) {
1299		if (tcp_lro_init(lro) != 0) {
1300			device_printf(nic->dev,
1301			    "Failed to initialize LRO for RXQ%d\n", qidx);
1302		} else {
1303			rq->lro_enabled = TRUE;
1304			lro->ifp = nic->ifp;
1305		}
1306	}
1307
1308	rq->cq_qs = qs->vnic_id;
1309	rq->cq_idx = qidx;
1310	rq->start_rbdr_qs = qs->vnic_id;
1311	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
1312	rq->cont_rbdr_qs = qs->vnic_id;
1313	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
1314	/* all writes of RBDR data to be loaded into L2 Cache as well*/
1315	rq->caching = 1;
1316
1317	/* Send a mailbox msg to PF to config RQ */
1318	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
1319	mbx.rq.qs_num = qs->vnic_id;
1320	mbx.rq.rq_num = qidx;
1321	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
1322	    (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
1323	    (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) |
1324	    (rq->start_qs_rbdr_idx);
1325	nicvf_send_msg_to_pf(nic, &mbx);
1326
1327	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
1328	mbx.rq.cfg = (1UL << 63) | (1UL << 62) | (qs->vnic_id << 0);
1329	nicvf_send_msg_to_pf(nic, &mbx);
1330
1331	/*
1332	 * RQ drop config
1333	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
1334	 */
1335	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
1336	mbx.rq.cfg = (1UL << 62) | (RQ_CQ_DROP << 8);
1337	nicvf_send_msg_to_pf(nic, &mbx);
1338
1339	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
1340
1341	/* Enable Receive queue */
1342	rq_cfg.ena = 1;
1343	rq_cfg.tcp_ena = 0;
1344	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx,
1345	    *(uint64_t *)&rq_cfg);
1346}
1347
1348/* Configures completion queue */
1349static void
1350nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
1351    int qidx, boolean_t enable)
1352{
1353	struct cmp_queue *cq;
1354	struct cq_cfg cq_cfg;
1355
1356	cq = &qs->cq[qidx];
1357	cq->enable = enable;
1358
1359	if (!cq->enable) {
1360		nicvf_reclaim_cmp_queue(nic, qs, qidx);
1361		return;
1362	}
1363
1364	/* Reset completion queue */
1365	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1366
1367	/* Set completion queue base address */
1368	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, qidx,
1369	    (uint64_t)(cq->dmem.phys_base));
1370
1371	/* Enable Completion queue */
1372	cq_cfg.ena = 1;
1373	cq_cfg.reset = 0;
1374	cq_cfg.caching = 0;
1375	cq_cfg.qsize = CMP_QSIZE;
1376	cq_cfg.avg_con = 0;
1377	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(uint64_t *)&cq_cfg);
1378
1379	/* Set threshold value for interrupt generation */
1380	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
1381	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx,
1382	    nic->cq_coalesce_usecs);
1383}
1384
1385/* Configures transmit queue */
1386static void
1387nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1388    boolean_t enable)
1389{
1390	union nic_mbx mbx = {};
1391	struct snd_queue *sq;
1392	struct sq_cfg sq_cfg;
1393
1394	sq = &qs->sq[qidx];
1395	sq->enable = enable;
1396
1397	if (!sq->enable) {
1398		nicvf_reclaim_snd_queue(nic, qs, qidx);
1399		return;
1400	}
1401
1402	/* Reset send queue */
1403	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1404
1405	sq->cq_qs = qs->vnic_id;
1406	sq->cq_idx = qidx;
1407
1408	/* Send a mailbox msg to PF to config SQ */
1409	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
1410	mbx.sq.qs_num = qs->vnic_id;
1411	mbx.sq.sq_num = qidx;
1412	mbx.sq.sqs_mode = nic->sqs_mode;
1413	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
1414	nicvf_send_msg_to_pf(nic, &mbx);
1415
1416	/* Set queue base address */
1417	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, qidx,
1418	    (uint64_t)(sq->dmem.phys_base));
1419
1420	/* Enable send queue  & set queue size */
1421	sq_cfg.ena = 1;
1422	sq_cfg.reset = 0;
1423	sq_cfg.ldwb = 0;
1424	sq_cfg.qsize = SND_QSIZE;
1425	sq_cfg.tstmp_bgx_intf = 0;
1426	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(uint64_t *)&sq_cfg);
1427
1428	/* Set threshold value for interrupt generation */
1429	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
1430}
1431
1432/* Configures receive buffer descriptor ring */
1433static void
1434nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1435    boolean_t enable)
1436{
1437	struct rbdr *rbdr;
1438	struct rbdr_cfg rbdr_cfg;
1439
1440	rbdr = &qs->rbdr[qidx];
1441	nicvf_reclaim_rbdr(nic, rbdr, qidx);
1442	if (!enable)
1443		return;
1444
1445	/* Set descriptor base address */
1446	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, qidx,
1447	    (uint64_t)(rbdr->dmem.phys_base));
1448
1449	/* Enable RBDR  & set queue size */
1450	/* Buffer size should be in multiples of 128 bytes */
1451	rbdr_cfg.ena = 1;
1452	rbdr_cfg.reset = 0;
1453	rbdr_cfg.ldwb = 0;
1454	rbdr_cfg.qsize = RBDR_SIZE;
1455	rbdr_cfg.avg_con = 0;
1456	rbdr_cfg.lines = rbdr->dma_size / 128;
1457	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1458	    *(uint64_t *)&rbdr_cfg);
1459
1460	/* Notify HW */
1461	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, qidx,
1462	    qs->rbdr_len - 1);
1463
1464	/* Set threshold value for interrupt generation */
1465	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, qidx,
1466	    rbdr->thresh - 1);
1467}
1468
1469/* Requests PF to assign and enable Qset */
1470void
1471nicvf_qset_config(struct nicvf *nic, boolean_t enable)
1472{
1473	union nic_mbx mbx = {};
1474	struct queue_set *qs;
1475	struct qs_cfg *qs_cfg;
1476
1477	qs = nic->qs;
1478	if (qs == NULL) {
1479		device_printf(nic->dev,
1480		    "Qset is still not allocated, don't init queues\n");
1481		return;
1482	}
1483
1484	qs->enable = enable;
1485	qs->vnic_id = nic->vf_id;
1486
1487	/* Send a mailbox msg to PF to config Qset */
1488	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
1489	mbx.qs.num = qs->vnic_id;
1490
1491	mbx.qs.cfg = 0;
1492	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
1493	if (qs->enable) {
1494		qs_cfg->ena = 1;
1495		qs_cfg->vnic = qs->vnic_id;
1496	}
1497	nicvf_send_msg_to_pf(nic, &mbx);
1498}
1499
1500static void
1501nicvf_free_resources(struct nicvf *nic)
1502{
1503	int qidx;
1504	struct queue_set *qs;
1505
1506	qs = nic->qs;
1507	/*
1508	 * Remove QS error task first since it has to be dead
1509	 * to safely free completion queue tasks.
1510	 */
1511	if (qs->qs_err_taskq != NULL) {
1512		/* Shut down QS error tasks */
1513		while (taskqueue_cancel(qs->qs_err_taskq,
1514		    &qs->qs_err_task,  NULL) != 0) {
1515			taskqueue_drain(qs->qs_err_taskq, &qs->qs_err_task);
1516
1517		}
1518		taskqueue_free(qs->qs_err_taskq);
1519		qs->qs_err_taskq = NULL;
1520	}
1521	/* Free receive buffer descriptor ring */
1522	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1523		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1524
1525	/* Free completion queue */
1526	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1527		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1528
1529	/* Free send queue */
1530	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1531		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1532}
1533
1534static int
1535nicvf_alloc_resources(struct nicvf *nic)
1536{
1537	struct queue_set *qs = nic->qs;
1538	int qidx;
1539
1540	/* Alloc receive buffer descriptor ring */
1541	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1542		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1543				    DMA_BUFFER_LEN, qidx))
1544			goto alloc_fail;
1545	}
1546
1547	/* Alloc send queue */
1548	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1549		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1550			goto alloc_fail;
1551	}
1552
1553	/* Alloc completion queue */
1554	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1555		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len, qidx))
1556			goto alloc_fail;
1557	}
1558
1559	/* Allocate QS error taskqueue */
1560	TASK_INIT(&qs->qs_err_task, 0, nicvf_qs_err_task, nic);
1561	qs->qs_err_taskq = taskqueue_create_fast("nicvf_qs_err_taskq", M_WAITOK,
1562	    taskqueue_thread_enqueue, &qs->qs_err_taskq);
1563	taskqueue_start_threads(&qs->qs_err_taskq, 1, PI_NET, "%s: qs_taskq",
1564	    device_get_nameunit(nic->dev));
1565
1566	return (0);
1567alloc_fail:
1568	nicvf_free_resources(nic);
1569	return (ENOMEM);
1570}
1571
1572int
1573nicvf_set_qset_resources(struct nicvf *nic)
1574{
1575	struct queue_set *qs;
1576
1577	qs = malloc(sizeof(*qs), M_NICVF, (M_ZERO | M_WAITOK));
1578	nic->qs = qs;
1579
1580	/* Set count of each queue */
1581	qs->rbdr_cnt = RBDR_CNT;
1582	/* With no RSS we stay with single RQ */
1583	qs->rq_cnt = 1;
1584
1585	qs->sq_cnt = SND_QUEUE_CNT;
1586	qs->cq_cnt = CMP_QUEUE_CNT;
1587
1588	/* Set queue lengths */
1589	qs->rbdr_len = RCV_BUF_COUNT;
1590	qs->sq_len = SND_QUEUE_LEN;
1591	qs->cq_len = CMP_QUEUE_LEN;
1592
1593	nic->rx_queues = qs->rq_cnt;
1594	nic->tx_queues = qs->sq_cnt;
1595
1596	return (0);
1597}
1598
1599int
1600nicvf_config_data_transfer(struct nicvf *nic, boolean_t enable)
1601{
1602	boolean_t disable = FALSE;
1603	struct queue_set *qs;
1604	int qidx;
1605
1606	qs = nic->qs;
1607	if (qs == NULL)
1608		return (0);
1609
1610	if (enable) {
1611		if (nicvf_alloc_resources(nic) != 0)
1612			return (ENOMEM);
1613
1614		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1615			nicvf_snd_queue_config(nic, qs, qidx, enable);
1616		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1617			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1618		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1619			nicvf_rbdr_config(nic, qs, qidx, enable);
1620		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1621			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1622	} else {
1623		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1624			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1625		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1626			nicvf_rbdr_config(nic, qs, qidx, disable);
1627		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1628			nicvf_snd_queue_config(nic, qs, qidx, disable);
1629		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1630			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1631
1632		nicvf_free_resources(nic);
1633	}
1634
1635	return (0);
1636}
1637
1638/*
1639 * Get a free desc from SQ
1640 * returns descriptor ponter & descriptor number
1641 */
1642static __inline int
1643nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1644{
1645	int qentry;
1646
1647	qentry = sq->tail;
1648	atomic_subtract_int(&sq->free_cnt, desc_cnt);
1649	sq->tail += desc_cnt;
1650	sq->tail &= (sq->dmem.q_len - 1);
1651
1652	return (qentry);
1653}
1654
1655/* Free descriptor back to SQ for future use */
1656static void
1657nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1658{
1659
1660	atomic_add_int(&sq->free_cnt, desc_cnt);
1661	sq->head += desc_cnt;
1662	sq->head &= (sq->dmem.q_len - 1);
1663}
1664
1665static __inline int
1666nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1667{
1668	qentry++;
1669	qentry &= (sq->dmem.q_len - 1);
1670	return (qentry);
1671}
1672
1673static void
1674nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1675{
1676	uint64_t sq_cfg;
1677
1678	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1679	sq_cfg |= NICVF_SQ_EN;
1680	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1681	/* Ring doorbell so that H/W restarts processing SQEs */
1682	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1683}
1684
1685static void
1686nicvf_sq_disable(struct nicvf *nic, int qidx)
1687{
1688	uint64_t sq_cfg;
1689
1690	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1691	sq_cfg &= ~NICVF_SQ_EN;
1692	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1693}
1694
1695static void
1696nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx)
1697{
1698	uint64_t head, tail;
1699	struct snd_buff *snd_buff;
1700	struct sq_hdr_subdesc *hdr;
1701
1702	NICVF_TX_LOCK(sq);
1703	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1704	tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1705	while (sq->head != head) {
1706		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1707		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1708			nicvf_put_sq_desc(sq, 1);
1709			continue;
1710		}
1711		snd_buff = &sq->snd_buff[sq->head];
1712		if (snd_buff->mbuf != NULL) {
1713			bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1714			m_freem(snd_buff->mbuf);
1715			sq->snd_buff[sq->head].mbuf = NULL;
1716		}
1717		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1718	}
1719	NICVF_TX_UNLOCK(sq);
1720}
1721
1722/*
1723 * Add SQ HEADER subdescriptor.
1724 * First subdescriptor for every send descriptor.
1725 */
1726static __inline int
1727nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1728			 int subdesc_cnt, struct mbuf *mbuf, int len)
1729{
1730	struct nicvf *nic;
1731	struct sq_hdr_subdesc *hdr;
1732	struct ether_vlan_header *eh;
1733#ifdef INET
1734	struct ip *ip;
1735	struct tcphdr *th;
1736#endif
1737	uint16_t etype;
1738	int ehdrlen, iphlen, poff;
1739
1740	nic = sq->nic;
1741
1742	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1743	sq->snd_buff[qentry].mbuf = mbuf;
1744
1745	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1746	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1747	/* Enable notification via CQE after processing SQE */
1748	hdr->post_cqe = 1;
1749	/* No of subdescriptors following this */
1750	hdr->subdesc_cnt = subdesc_cnt;
1751	hdr->tot_len = len;
1752
1753	eh = mtod(mbuf, struct ether_vlan_header *);
1754	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1755		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1756		etype = ntohs(eh->evl_proto);
1757	} else {
1758		ehdrlen = ETHER_HDR_LEN;
1759		etype = ntohs(eh->evl_encap_proto);
1760	}
1761
1762	switch (etype) {
1763#ifdef INET6
1764	case ETHERTYPE_IPV6:
1765		/* ARM64TODO: Add support for IPv6 */
1766		hdr->csum_l3 = 0;
1767		sq->snd_buff[qentry].mbuf = NULL;
1768		return (ENXIO);
1769#endif
1770#ifdef INET
1771	case ETHERTYPE_IP:
1772		if (mbuf->m_len < ehdrlen + sizeof(struct ip)) {
1773			mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip));
1774			sq->snd_buff[qentry].mbuf = mbuf;
1775			if (mbuf == NULL)
1776				return (ENOBUFS);
1777		}
1778
1779		ip = (struct ip *)(mbuf->m_data + ehdrlen);
1780		ip->ip_sum = 0;
1781		iphlen = ip->ip_hl << 2;
1782		poff = ehdrlen + iphlen;
1783
1784		if (mbuf->m_pkthdr.csum_flags != 0) {
1785			hdr->csum_l3 = 1; /* Enable IP csum calculation */
1786			switch (ip->ip_p) {
1787			case IPPROTO_TCP:
1788				if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0)
1789					break;
1790
1791				if (mbuf->m_len < (poff + sizeof(struct tcphdr))) {
1792					mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr));
1793					sq->snd_buff[qentry].mbuf = mbuf;
1794					if (mbuf == NULL)
1795						return (ENOBUFS);
1796				}
1797				hdr->csum_l4 = SEND_L4_CSUM_TCP;
1798				break;
1799			case IPPROTO_UDP:
1800				if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0)
1801					break;
1802
1803				if (mbuf->m_len < (poff + sizeof(struct udphdr))) {
1804					mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr));
1805					sq->snd_buff[qentry].mbuf = mbuf;
1806					if (mbuf == NULL)
1807						return (ENOBUFS);
1808				}
1809				hdr->csum_l4 = SEND_L4_CSUM_UDP;
1810				break;
1811			case IPPROTO_SCTP:
1812				if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0)
1813					break;
1814
1815				if (mbuf->m_len < (poff + sizeof(struct sctphdr))) {
1816					mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr));
1817					sq->snd_buff[qentry].mbuf = mbuf;
1818					if (mbuf == NULL)
1819						return (ENOBUFS);
1820				}
1821				hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1822				break;
1823			default:
1824				break;
1825			}
1826			hdr->l3_offset = ehdrlen;
1827			hdr->l4_offset = ehdrlen + iphlen;
1828		}
1829
1830		if ((mbuf->m_pkthdr.tso_segsz != 0) && nic->hw_tso) {
1831			/*
1832			 * Extract ip again as m_data could have been modified.
1833			 */
1834			ip = (struct ip *)(mbuf->m_data + ehdrlen);
1835			th = (struct tcphdr *)((caddr_t)ip + iphlen);
1836
1837			hdr->tso = 1;
1838			hdr->tso_start = ehdrlen + iphlen + (th->th_off * 4);
1839			hdr->tso_max_paysize = mbuf->m_pkthdr.tso_segsz;
1840			hdr->inner_l3_offset = ehdrlen - 2;
1841			nic->drv_stats.tx_tso++;
1842		}
1843		break;
1844#endif
1845	default:
1846		hdr->csum_l3 = 0;
1847	}
1848
1849	return (0);
1850}
1851
1852/*
1853 * SQ GATHER subdescriptor
1854 * Must follow HDR descriptor
1855 */
1856static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1857					       int size, uint64_t data)
1858{
1859	struct sq_gather_subdesc *gather;
1860
1861	qentry &= (sq->dmem.q_len - 1);
1862	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1863
1864	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1865	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1866	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1867	gather->size = size;
1868	gather->addr = data;
1869}
1870
1871/* Put an mbuf to a SQ for packet transfer. */
1872int
1873nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf *mbuf)
1874{
1875	bus_dma_segment_t segs[256];
1876	struct nicvf *nic;
1877	struct snd_buff *snd_buff;
1878	size_t seg;
1879	int nsegs, qentry;
1880	int subdesc_cnt;
1881	int err;
1882
1883	NICVF_TX_LOCK_ASSERT(sq);
1884
1885	if (sq->free_cnt == 0)
1886		return (ENOBUFS);
1887
1888	snd_buff = &sq->snd_buff[sq->tail];
1889
1890	err = bus_dmamap_load_mbuf_sg(sq->snd_buff_dmat, snd_buff->dmap,
1891	    mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1892	if (err != 0) {
1893		/* ARM64TODO: Add mbuf defragmenting if we lack maps */
1894		return (err);
1895	}
1896
1897	/* Set how many subdescriptors is required */
1898	nic = sq->nic;
1899	if (mbuf->m_pkthdr.tso_segsz != 0 && nic->hw_tso)
1900		subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1901	else
1902		subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT + nsegs - 1;
1903
1904	if (subdesc_cnt > sq->free_cnt) {
1905		/* ARM64TODO: Add mbuf defragmentation if we lack descriptors */
1906		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1907		return (ENOBUFS);
1908	}
1909
1910	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1911
1912	/* Add SQ header subdesc */
1913	err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf,
1914	    mbuf->m_pkthdr.len);
1915	if (err != 0) {
1916		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1917		return (err);
1918	}
1919
1920	/* Add SQ gather subdescs */
1921	for (seg = 0; seg < nsegs; seg++) {
1922		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1923		nicvf_sq_add_gather_subdesc(sq, qentry, segs[seg].ds_len,
1924		    segs[seg].ds_addr);
1925	}
1926
1927	/* make sure all memory stores are done before ringing doorbell */
1928	bus_dmamap_sync(sq->dmem.dmat, sq->dmem.dmap, BUS_DMASYNC_PREWRITE);
1929
1930	dprintf(sq->nic->dev, "%s: sq->idx: %d, subdesc_cnt: %d\n",
1931	    __func__, sq->idx, subdesc_cnt);
1932	/* Inform HW to xmit new packet */
1933	nicvf_queue_reg_write(sq->nic, NIC_QSET_SQ_0_7_DOOR,
1934	    sq->idx, subdesc_cnt);
1935	return (0);
1936}
1937
1938static __inline u_int
1939frag_num(u_int i)
1940{
1941#if BYTE_ORDER == BIG_ENDIAN
1942	return ((i & ~3) + 3 - (i & 3));
1943#else
1944	return (i);
1945#endif
1946}
1947
1948/* Returns MBUF for a received packet */
1949struct mbuf *
1950nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1951{
1952	int frag;
1953	int payload_len = 0;
1954	struct mbuf *mbuf;
1955	struct mbuf *mbuf_frag;
1956	uint16_t *rb_lens = NULL;
1957	uint64_t *rb_ptrs = NULL;
1958
1959	mbuf = NULL;
1960	rb_lens = (uint16_t *)((uint8_t *)cqe_rx + (3 * sizeof(uint64_t)));
1961	rb_ptrs = (uint64_t *)((uint8_t *)cqe_rx + (6 * sizeof(uint64_t)));
1962
1963	dprintf(nic->dev, "%s rb_cnt %d rb0_ptr %lx rb0_sz %d\n",
1964	    __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
1965
1966	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1967		payload_len = rb_lens[frag_num(frag)];
1968		if (frag == 0) {
1969			/* First fragment */
1970			mbuf = nicvf_rb_ptr_to_mbuf(nic,
1971			    (*rb_ptrs - cqe_rx->align_pad));
1972			mbuf->m_len = payload_len;
1973			mbuf->m_data += cqe_rx->align_pad;
1974			if_setrcvif(mbuf, nic->ifp);
1975		} else {
1976			/* Add fragments */
1977			mbuf_frag = nicvf_rb_ptr_to_mbuf(nic, *rb_ptrs);
1978			m_append(mbuf, payload_len, mbuf_frag->m_data);
1979			m_freem(mbuf_frag);
1980		}
1981		/* Next buffer pointer */
1982		rb_ptrs++;
1983	}
1984
1985	if (__predict_true(mbuf != NULL)) {
1986		m_fixhdr(mbuf);
1987		mbuf->m_pkthdr.flowid = cqe_rx->rq_idx;
1988		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE);
1989		if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) {
1990			/*
1991			 * HW by default verifies IP & TCP/UDP/SCTP checksums
1992			 */
1993
1994			/* XXX: Do we need to include IP with options too? */
1995			if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4 ||
1996			    cqe_rx->l3_type == L3TYPE_IPV6)) {
1997				mbuf->m_pkthdr.csum_flags =
1998				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
1999			}
2000			if (cqe_rx->l4_type == L4TYPE_TCP ||
2001			    cqe_rx->l4_type == L4TYPE_UDP ||
2002			    cqe_rx->l4_type == L4TYPE_SCTP) {
2003				mbuf->m_pkthdr.csum_flags |=
2004				    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2005				mbuf->m_pkthdr.csum_data = htons(0xffff);
2006			}
2007		}
2008	}
2009
2010	return (mbuf);
2011}
2012
2013/* Enable interrupt */
2014void
2015nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
2016{
2017	uint64_t reg_val;
2018
2019	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2020
2021	switch (int_type) {
2022	case NICVF_INTR_CQ:
2023		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2024		break;
2025	case NICVF_INTR_SQ:
2026		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2027		break;
2028	case NICVF_INTR_RBDR:
2029		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2030		break;
2031	case NICVF_INTR_PKT_DROP:
2032		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2033		break;
2034	case NICVF_INTR_TCP_TIMER:
2035		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2036		break;
2037	case NICVF_INTR_MBOX:
2038		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2039		break;
2040	case NICVF_INTR_QS_ERR:
2041		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2042		break;
2043	default:
2044		device_printf(nic->dev,
2045			   "Failed to enable interrupt: unknown type\n");
2046		break;
2047	}
2048
2049	nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
2050}
2051
2052/* Disable interrupt */
2053void
2054nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
2055{
2056	uint64_t reg_val = 0;
2057
2058	switch (int_type) {
2059	case NICVF_INTR_CQ:
2060		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2061		break;
2062	case NICVF_INTR_SQ:
2063		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2064		break;
2065	case NICVF_INTR_RBDR:
2066		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2067		break;
2068	case NICVF_INTR_PKT_DROP:
2069		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2070		break;
2071	case NICVF_INTR_TCP_TIMER:
2072		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2073		break;
2074	case NICVF_INTR_MBOX:
2075		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2076		break;
2077	case NICVF_INTR_QS_ERR:
2078		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2079		break;
2080	default:
2081		device_printf(nic->dev,
2082			   "Failed to disable interrupt: unknown type\n");
2083		break;
2084	}
2085
2086	nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
2087}
2088
2089/* Clear interrupt */
2090void
2091nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
2092{
2093	uint64_t reg_val = 0;
2094
2095	switch (int_type) {
2096	case NICVF_INTR_CQ:
2097		reg_val = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2098		break;
2099	case NICVF_INTR_SQ:
2100		reg_val = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2101		break;
2102	case NICVF_INTR_RBDR:
2103		reg_val = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2104		break;
2105	case NICVF_INTR_PKT_DROP:
2106		reg_val = (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2107		break;
2108	case NICVF_INTR_TCP_TIMER:
2109		reg_val = (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2110		break;
2111	case NICVF_INTR_MBOX:
2112		reg_val = (1UL << NICVF_INTR_MBOX_SHIFT);
2113		break;
2114	case NICVF_INTR_QS_ERR:
2115		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2116		break;
2117	default:
2118		device_printf(nic->dev,
2119			   "Failed to clear interrupt: unknown type\n");
2120		break;
2121	}
2122
2123	nicvf_reg_write(nic, NIC_VF_INT, reg_val);
2124}
2125
2126/* Check if interrupt is enabled */
2127int
2128nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
2129{
2130	uint64_t reg_val;
2131	uint64_t mask = 0xff;
2132
2133	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2134
2135	switch (int_type) {
2136	case NICVF_INTR_CQ:
2137		mask = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2138		break;
2139	case NICVF_INTR_SQ:
2140		mask = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2141		break;
2142	case NICVF_INTR_RBDR:
2143		mask = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2144		break;
2145	case NICVF_INTR_PKT_DROP:
2146		mask = NICVF_INTR_PKT_DROP_MASK;
2147		break;
2148	case NICVF_INTR_TCP_TIMER:
2149		mask = NICVF_INTR_TCP_TIMER_MASK;
2150		break;
2151	case NICVF_INTR_MBOX:
2152		mask = NICVF_INTR_MBOX_MASK;
2153		break;
2154	case NICVF_INTR_QS_ERR:
2155		mask = NICVF_INTR_QS_ERR_MASK;
2156		break;
2157	default:
2158		device_printf(nic->dev,
2159			   "Failed to check interrupt enable: unknown type\n");
2160		break;
2161	}
2162
2163	return (reg_val & mask);
2164}
2165
2166void
2167nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
2168{
2169	struct rcv_queue *rq;
2170
2171#define GET_RQ_STATS(reg) \
2172	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
2173			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2174
2175	rq = &nic->qs->rq[rq_idx];
2176	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
2177	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
2178}
2179
2180void
2181nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
2182{
2183	struct snd_queue *sq;
2184
2185#define GET_SQ_STATS(reg) \
2186	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
2187			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2188
2189	sq = &nic->qs->sq[sq_idx];
2190	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
2191	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
2192}
2193
2194/* Check for errors in the receive cmp.queue entry */
2195int
2196nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cmp_queue *cq,
2197    struct cqe_rx_t *cqe_rx)
2198{
2199	struct nicvf_hw_stats *stats = &nic->hw_stats;
2200	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
2201
2202	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
2203		drv_stats->rx_frames_ok++;
2204		return (0);
2205	}
2206
2207	switch (cqe_rx->err_opcode) {
2208	case CQ_RX_ERROP_RE_PARTIAL:
2209		stats->rx_bgx_truncated_pkts++;
2210		break;
2211	case CQ_RX_ERROP_RE_JABBER:
2212		stats->rx_jabber_errs++;
2213		break;
2214	case CQ_RX_ERROP_RE_FCS:
2215		stats->rx_fcs_errs++;
2216		break;
2217	case CQ_RX_ERROP_RE_RX_CTL:
2218		stats->rx_bgx_errs++;
2219		break;
2220	case CQ_RX_ERROP_PREL2_ERR:
2221		stats->rx_prel2_errs++;
2222		break;
2223	case CQ_RX_ERROP_L2_MAL:
2224		stats->rx_l2_hdr_malformed++;
2225		break;
2226	case CQ_RX_ERROP_L2_OVERSIZE:
2227		stats->rx_oversize++;
2228		break;
2229	case CQ_RX_ERROP_L2_UNDERSIZE:
2230		stats->rx_undersize++;
2231		break;
2232	case CQ_RX_ERROP_L2_LENMISM:
2233		stats->rx_l2_len_mismatch++;
2234		break;
2235	case CQ_RX_ERROP_L2_PCLP:
2236		stats->rx_l2_pclp++;
2237		break;
2238	case CQ_RX_ERROP_IP_NOT:
2239		stats->rx_ip_ver_errs++;
2240		break;
2241	case CQ_RX_ERROP_IP_CSUM_ERR:
2242		stats->rx_ip_csum_errs++;
2243		break;
2244	case CQ_RX_ERROP_IP_MAL:
2245		stats->rx_ip_hdr_malformed++;
2246		break;
2247	case CQ_RX_ERROP_IP_MALD:
2248		stats->rx_ip_payload_malformed++;
2249		break;
2250	case CQ_RX_ERROP_IP_HOP:
2251		stats->rx_ip_ttl_errs++;
2252		break;
2253	case CQ_RX_ERROP_L3_PCLP:
2254		stats->rx_l3_pclp++;
2255		break;
2256	case CQ_RX_ERROP_L4_MAL:
2257		stats->rx_l4_malformed++;
2258		break;
2259	case CQ_RX_ERROP_L4_CHK:
2260		stats->rx_l4_csum_errs++;
2261		break;
2262	case CQ_RX_ERROP_UDP_LEN:
2263		stats->rx_udp_len_errs++;
2264		break;
2265	case CQ_RX_ERROP_L4_PORT:
2266		stats->rx_l4_port_errs++;
2267		break;
2268	case CQ_RX_ERROP_TCP_FLAG:
2269		stats->rx_tcp_flag_errs++;
2270		break;
2271	case CQ_RX_ERROP_TCP_OFFSET:
2272		stats->rx_tcp_offset_errs++;
2273		break;
2274	case CQ_RX_ERROP_L4_PCLP:
2275		stats->rx_l4_pclp++;
2276		break;
2277	case CQ_RX_ERROP_RBDR_TRUNC:
2278		stats->rx_truncated_pkts++;
2279		break;
2280	}
2281
2282	return (1);
2283}
2284
2285/* Check for errors in the send cmp.queue entry */
2286int
2287nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cmp_queue *cq,
2288    struct cqe_send_t *cqe_tx)
2289{
2290	struct cmp_queue_stats *stats = &cq->stats;
2291
2292	switch (cqe_tx->send_status) {
2293	case CQ_TX_ERROP_GOOD:
2294		stats->tx.good++;
2295		return (0);
2296	case CQ_TX_ERROP_DESC_FAULT:
2297		stats->tx.desc_fault++;
2298		break;
2299	case CQ_TX_ERROP_HDR_CONS_ERR:
2300		stats->tx.hdr_cons_err++;
2301		break;
2302	case CQ_TX_ERROP_SUBDC_ERR:
2303		stats->tx.subdesc_err++;
2304		break;
2305	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
2306		stats->tx.imm_size_oflow++;
2307		break;
2308	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
2309		stats->tx.data_seq_err++;
2310		break;
2311	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
2312		stats->tx.mem_seq_err++;
2313		break;
2314	case CQ_TX_ERROP_LOCK_VIOL:
2315		stats->tx.lock_viol++;
2316		break;
2317	case CQ_TX_ERROP_DATA_FAULT:
2318		stats->tx.data_fault++;
2319		break;
2320	case CQ_TX_ERROP_TSTMP_CONFLICT:
2321		stats->tx.tstmp_conflict++;
2322		break;
2323	case CQ_TX_ERROP_TSTMP_TIMEOUT:
2324		stats->tx.tstmp_timeout++;
2325		break;
2326	case CQ_TX_ERROP_MEM_FAULT:
2327		stats->tx.mem_fault++;
2328		break;
2329	case CQ_TX_ERROP_CK_OVERLAP:
2330		stats->tx.csum_overlap++;
2331		break;
2332	case CQ_TX_ERROP_CK_OFLOW:
2333		stats->tx.csum_overflow++;
2334		break;
2335	}
2336
2337	return (1);
2338}
2339