nicvf_queues.c revision 296032
1/*
2 * Copyright (C) 2015 Cavium Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/dev/vnic/nicvf_queues.c 296032 2016-02-25 14:17:13Z zbb $
27 *
28 */
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/vnic/nicvf_queues.c 296032 2016-02-25 14:17:13Z zbb $");
31
32#include "opt_inet.h"
33#include "opt_inet6.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bitset.h>
38#include <sys/bitstring.h>
39#include <sys/buf_ring.h>
40#include <sys/bus.h>
41#include <sys/endian.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/module.h>
45#include <sys/rman.h>
46#include <sys/pciio.h>
47#include <sys/pcpu.h>
48#include <sys/proc.h>
49#include <sys/sockio.h>
50#include <sys/socket.h>
51#include <sys/stdatomic.h>
52#include <sys/cpuset.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/smp.h>
56#include <sys/taskqueue.h>
57
58#include <vm/vm.h>
59#include <vm/pmap.h>
60
61#include <machine/bus.h>
62#include <machine/vmparam.h>
63
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_var.h>
67#include <net/if_media.h>
68#include <net/ifq.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip6.h>
75#include <netinet/sctp.h>
76#include <netinet/tcp.h>
77#include <netinet/tcp_lro.h>
78#include <netinet/udp.h>
79
80#include <dev/pci/pcireg.h>
81#include <dev/pci/pcivar.h>
82
83#include "thunder_bgx.h"
84#include "nic_reg.h"
85#include "nic.h"
86#include "q_struct.h"
87#include "nicvf_queues.h"
88
89#define	DEBUG
90#undef DEBUG
91
92#ifdef DEBUG
93#define	dprintf(dev, fmt, ...)	device_printf(dev, fmt, ##__VA_ARGS__)
94#else
95#define	dprintf(dev, fmt, ...)
96#endif
97
98MALLOC_DECLARE(M_NICVF);
99
100static void nicvf_free_snd_queue(struct nicvf *, struct snd_queue *);
101static int nicvf_tx_mbuf_locked(struct snd_queue *, struct mbuf *);
102static struct mbuf * nicvf_get_rcv_mbuf(struct nicvf *, struct cqe_rx_t *);
103static void nicvf_sq_disable(struct nicvf *, int);
104static void nicvf_sq_enable(struct nicvf *, struct snd_queue *, int);
105static void nicvf_put_sq_desc(struct snd_queue *, int);
106static void nicvf_cmp_queue_config(struct nicvf *, struct queue_set *, int,
107    boolean_t);
108static void nicvf_sq_free_used_descs(struct nicvf *, struct snd_queue *, int);
109
110static void nicvf_rbdr_task(void *, int);
111static void nicvf_rbdr_task_nowait(void *, int);
112
113struct rbuf_info {
114	bus_dma_tag_t	dmat;
115	bus_dmamap_t	dmap;
116	struct mbuf *	mbuf;
117};
118
119#define GET_RBUF_INFO(x) ((struct rbuf_info *)((x) - NICVF_RCV_BUF_ALIGN_BYTES))
120
121/* Poll a register for a specific value */
122static int nicvf_poll_reg(struct nicvf *nic, int qidx,
123			  uint64_t reg, int bit_pos, int bits, int val)
124{
125	uint64_t bit_mask;
126	uint64_t reg_val;
127	int timeout = 10;
128
129	bit_mask = (1UL << bits) - 1;
130	bit_mask = (bit_mask << bit_pos);
131
132	while (timeout) {
133		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
134		if (((reg_val & bit_mask) >> bit_pos) == val)
135			return (0);
136
137		DELAY(1000);
138		timeout--;
139	}
140	device_printf(nic->dev, "Poll on reg 0x%lx failed\n", reg);
141	return (ETIMEDOUT);
142}
143
144/* Callback for bus_dmamap_load() */
145static void
146nicvf_dmamap_q_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
147{
148	bus_addr_t *paddr;
149
150	KASSERT(nseg == 1, ("wrong number of segments, should be 1"));
151	paddr = arg;
152	*paddr = segs->ds_addr;
153}
154
155/* Allocate memory for a queue's descriptors */
156static int
157nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
158    int q_len, int desc_size, int align_bytes)
159{
160	int err, err_dmat;
161
162	/* Create DMA tag first */
163	err = bus_dma_tag_create(
164	    bus_get_dma_tag(nic->dev),		/* parent tag */
165	    align_bytes,			/* alignment */
166	    0,					/* boundary */
167	    BUS_SPACE_MAXADDR,			/* lowaddr */
168	    BUS_SPACE_MAXADDR,			/* highaddr */
169	    NULL, NULL,				/* filtfunc, filtfuncarg */
170	    (q_len * desc_size),		/* maxsize */
171	    1,					/* nsegments */
172	    (q_len * desc_size),		/* maxsegsize */
173	    0,					/* flags */
174	    NULL, NULL,				/* lockfunc, lockfuncarg */
175	    &dmem->dmat);			/* dmat */
176
177	if (err != 0) {
178		device_printf(nic->dev,
179		    "Failed to create busdma tag for descriptors ring\n");
180		return (err);
181	}
182
183	/* Allocate segment of continuous DMA safe memory */
184	err = bus_dmamem_alloc(
185	    dmem->dmat,				/* DMA tag */
186	    &dmem->base,			/* virtual address */
187	    (BUS_DMA_NOWAIT | BUS_DMA_ZERO),	/* flags */
188	    &dmem->dmap);			/* DMA map */
189	if (err != 0) {
190		device_printf(nic->dev, "Failed to allocate DMA safe memory for"
191		    "descriptors ring\n");
192		goto dmamem_fail;
193	}
194
195	err = bus_dmamap_load(
196	    dmem->dmat,
197	    dmem->dmap,
198	    dmem->base,
199	    (q_len * desc_size),		/* allocation size */
200	    nicvf_dmamap_q_cb,			/* map to DMA address cb. */
201	    &dmem->phys_base,			/* physical address */
202	    BUS_DMA_NOWAIT);
203	if (err != 0) {
204		device_printf(nic->dev,
205		    "Cannot load DMA map of descriptors ring\n");
206		goto dmamap_fail;
207	}
208
209	dmem->q_len = q_len;
210	dmem->size = (desc_size * q_len);
211
212	return (0);
213
214dmamap_fail:
215	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
216	dmem->phys_base = 0;
217dmamem_fail:
218	err_dmat = bus_dma_tag_destroy(dmem->dmat);
219	dmem->base = NULL;
220	KASSERT(err_dmat == 0,
221	    ("%s: Trying to destroy BUSY DMA tag", __func__));
222
223	return (err);
224}
225
226/* Free queue's descriptor memory */
227static void
228nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
229{
230	int err;
231
232	if ((dmem == NULL) || (dmem->base == NULL))
233		return;
234
235	/* Unload a map */
236	bus_dmamap_sync(dmem->dmat, dmem->dmap, BUS_DMASYNC_POSTREAD);
237	bus_dmamap_unload(dmem->dmat, dmem->dmap);
238	/* Free DMA memory */
239	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
240	/* Destroy DMA tag */
241	err = bus_dma_tag_destroy(dmem->dmat);
242
243	KASSERT(err == 0,
244	    ("%s: Trying to destroy BUSY DMA tag", __func__));
245
246	dmem->phys_base = 0;
247	dmem->base = NULL;
248}
249
250/*
251 * Allocate buffer for packet reception
252 * HW returns memory address where packet is DMA'ed but not a pointer
253 * into RBDR ring, so save buffer address at the start of fragment and
254 * align the start address to a cache aligned address
255 */
256static __inline int
257nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
258    bus_dmamap_t dmap, int mflags, uint32_t buf_len, bus_addr_t *rbuf)
259{
260	struct mbuf *mbuf;
261	struct rbuf_info *rinfo;
262	bus_dma_segment_t segs[1];
263	int nsegs;
264	int err;
265
266	mbuf = m_getjcl(mflags, MT_DATA, M_PKTHDR, MCLBYTES);
267	if (mbuf == NULL)
268		return (ENOMEM);
269
270	/*
271	 * The length is equal to the actual length + one 128b line
272	 * used as a room for rbuf_info structure.
273	 */
274	mbuf->m_len = mbuf->m_pkthdr.len = buf_len;
275
276	err = bus_dmamap_load_mbuf_sg(rbdr->rbdr_buff_dmat, dmap, mbuf, segs,
277	    &nsegs, BUS_DMA_NOWAIT);
278	if (err != 0) {
279		device_printf(nic->dev,
280		    "Failed to map mbuf into DMA visible memory, err: %d\n",
281		    err);
282		m_freem(mbuf);
283		bus_dmamap_destroy(rbdr->rbdr_buff_dmat, dmap);
284		return (err);
285	}
286	if (nsegs != 1)
287		panic("Unexpected number of DMA segments for RB: %d", nsegs);
288	/*
289	 * Now use the room for rbuf_info structure
290	 * and adjust mbuf data and length.
291	 */
292	rinfo = (struct rbuf_info *)mbuf->m_data;
293	m_adj(mbuf, NICVF_RCV_BUF_ALIGN_BYTES);
294
295	rinfo->dmat = rbdr->rbdr_buff_dmat;
296	rinfo->dmap = dmap;
297	rinfo->mbuf = mbuf;
298
299	*rbuf = segs[0].ds_addr + NICVF_RCV_BUF_ALIGN_BYTES;
300
301	return (0);
302}
303
304/* Retrieve mbuf for received packet */
305static struct mbuf *
306nicvf_rb_ptr_to_mbuf(struct nicvf *nic, bus_addr_t rb_ptr)
307{
308	struct mbuf *mbuf;
309	struct rbuf_info *rinfo;
310
311	/* Get buffer start address and alignment offset */
312	rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(rb_ptr));
313
314	/* Now retrieve mbuf to give to stack */
315	mbuf = rinfo->mbuf;
316	if (__predict_false(mbuf == NULL)) {
317		panic("%s: Received packet fragment with NULL mbuf",
318		    device_get_nameunit(nic->dev));
319	}
320	/*
321	 * Clear the mbuf in the descriptor to indicate
322	 * that this slot is processed and free to use.
323	 */
324	rinfo->mbuf = NULL;
325
326	bus_dmamap_sync(rinfo->dmat, rinfo->dmap, BUS_DMASYNC_POSTREAD);
327	bus_dmamap_unload(rinfo->dmat, rinfo->dmap);
328
329	return (mbuf);
330}
331
332/* Allocate RBDR ring and populate receive buffers */
333static int
334nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len,
335    int buf_size, int qidx)
336{
337	bus_dmamap_t dmap;
338	bus_addr_t rbuf;
339	struct rbdr_entry_t *desc;
340	int idx;
341	int err;
342
343	/* Allocate rbdr descriptors ring */
344	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
345	    sizeof(struct rbdr_entry_t), NICVF_RCV_BUF_ALIGN_BYTES);
346	if (err != 0) {
347		device_printf(nic->dev,
348		    "Failed to create RBDR descriptors ring\n");
349		return (err);
350	}
351
352	rbdr->desc = rbdr->dmem.base;
353	/*
354	 * Buffer size has to be in multiples of 128 bytes.
355	 * Make room for metadata of size of one line (128 bytes).
356	 */
357	rbdr->dma_size = buf_size - NICVF_RCV_BUF_ALIGN_BYTES;
358	rbdr->enable = TRUE;
359	rbdr->thresh = RBDR_THRESH;
360	rbdr->nic = nic;
361	rbdr->idx = qidx;
362
363	/*
364	 * Create DMA tag for Rx buffers.
365	 * Each map created using this tag is intended to store Rx payload for
366	 * one fragment and one header structure containing rbuf_info (thus
367	 * additional 128 byte line since RB must be a multiple of 128 byte
368	 * cache line).
369	 */
370	if (buf_size > MCLBYTES) {
371		device_printf(nic->dev,
372		    "Buffer size to large for mbuf cluster\n");
373		return (EINVAL);
374	}
375	err = bus_dma_tag_create(
376	    bus_get_dma_tag(nic->dev),		/* parent tag */
377	    NICVF_RCV_BUF_ALIGN_BYTES,		/* alignment */
378	    0,					/* boundary */
379	    DMAP_MAX_PHYSADDR,			/* lowaddr */
380	    DMAP_MIN_PHYSADDR,			/* highaddr */
381	    NULL, NULL,				/* filtfunc, filtfuncarg */
382	    roundup2(buf_size, MCLBYTES),	/* maxsize */
383	    1,					/* nsegments */
384	    roundup2(buf_size, MCLBYTES),	/* maxsegsize */
385	    0,					/* flags */
386	    NULL, NULL,				/* lockfunc, lockfuncarg */
387	    &rbdr->rbdr_buff_dmat);		/* dmat */
388
389	if (err != 0) {
390		device_printf(nic->dev,
391		    "Failed to create busdma tag for RBDR buffers\n");
392		return (err);
393	}
394
395	rbdr->rbdr_buff_dmaps = malloc(sizeof(*rbdr->rbdr_buff_dmaps) *
396	    ring_len, M_NICVF, (M_WAITOK | M_ZERO));
397
398	for (idx = 0; idx < ring_len; idx++) {
399		err = bus_dmamap_create(rbdr->rbdr_buff_dmat, 0, &dmap);
400		if (err != 0) {
401			device_printf(nic->dev,
402			    "Failed to create DMA map for RB\n");
403			return (err);
404		}
405		rbdr->rbdr_buff_dmaps[idx] = dmap;
406
407		err = nicvf_alloc_rcv_buffer(nic, rbdr, dmap, M_WAITOK,
408		    DMA_BUFFER_LEN, &rbuf);
409		if (err != 0)
410			return (err);
411
412		desc = GET_RBDR_DESC(rbdr, idx);
413		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
414	}
415
416	/* Allocate taskqueue */
417	TASK_INIT(&rbdr->rbdr_task, 0, nicvf_rbdr_task, rbdr);
418	TASK_INIT(&rbdr->rbdr_task_nowait, 0, nicvf_rbdr_task_nowait, rbdr);
419	rbdr->rbdr_taskq = taskqueue_create_fast("nicvf_rbdr_taskq", M_WAITOK,
420	    taskqueue_thread_enqueue, &rbdr->rbdr_taskq);
421	taskqueue_start_threads(&rbdr->rbdr_taskq, 1, PI_NET, "%s: rbdr_taskq",
422	    device_get_nameunit(nic->dev));
423
424	return (0);
425}
426
427/* Free RBDR ring and its receive buffers */
428static void
429nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
430{
431	struct mbuf *mbuf;
432	struct queue_set *qs;
433	struct rbdr_entry_t *desc;
434	struct rbuf_info *rinfo;
435	bus_addr_t buf_addr;
436	int head, tail, idx;
437	int err;
438
439	qs = nic->qs;
440
441	if ((qs == NULL) || (rbdr == NULL))
442		return;
443
444	rbdr->enable = FALSE;
445	if (rbdr->rbdr_taskq != NULL) {
446		/* Remove tasks */
447		while (taskqueue_cancel(rbdr->rbdr_taskq,
448		    &rbdr->rbdr_task_nowait, NULL) != 0) {
449			/* Finish the nowait task first */
450			taskqueue_drain(rbdr->rbdr_taskq,
451			    &rbdr->rbdr_task_nowait);
452		}
453		taskqueue_free(rbdr->rbdr_taskq);
454		rbdr->rbdr_taskq = NULL;
455
456		while (taskqueue_cancel(taskqueue_thread,
457		    &rbdr->rbdr_task, NULL) != 0) {
458			/* Now finish the sleepable task */
459			taskqueue_drain(taskqueue_thread, &rbdr->rbdr_task);
460		}
461	}
462
463	/*
464	 * Free all of the memory under the RB descriptors.
465	 * There are assumptions here:
466	 * 1. Corresponding RBDR is disabled
467	 *    - it is safe to operate using head and tail indexes
468	 * 2. All bffers that were received are properly freed by
469	 *    the receive handler
470	 *    - there is no need to unload DMA map and free MBUF for other
471	 *      descriptors than unused ones
472	 */
473	if (rbdr->rbdr_buff_dmat != NULL) {
474		head = rbdr->head;
475		tail = rbdr->tail;
476		while (head != tail) {
477			desc = GET_RBDR_DESC(rbdr, head);
478			buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
479			rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
480			bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
481			mbuf = rinfo->mbuf;
482			/* This will destroy everything including rinfo! */
483			m_freem(mbuf);
484			head++;
485			head &= (rbdr->dmem.q_len - 1);
486		}
487		/* Free tail descriptor */
488		desc = GET_RBDR_DESC(rbdr, tail);
489		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
490		rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
491		bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
492		mbuf = rinfo->mbuf;
493		/* This will destroy everything including rinfo! */
494		m_freem(mbuf);
495
496		/* Destroy DMA maps */
497		for (idx = 0; idx < qs->rbdr_len; idx++) {
498			if (rbdr->rbdr_buff_dmaps[idx] == NULL)
499				continue;
500			err = bus_dmamap_destroy(rbdr->rbdr_buff_dmat,
501			    rbdr->rbdr_buff_dmaps[idx]);
502			KASSERT(err == 0,
503			    ("%s: Could not destroy DMA map for RB, desc: %d",
504			    __func__, idx));
505			rbdr->rbdr_buff_dmaps[idx] = NULL;
506		}
507
508		/* Now destroy the tag */
509		err = bus_dma_tag_destroy(rbdr->rbdr_buff_dmat);
510		KASSERT(err == 0,
511		    ("%s: Trying to destroy BUSY DMA tag", __func__));
512
513		rbdr->head = 0;
514		rbdr->tail = 0;
515	}
516
517	/* Free RBDR ring */
518	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
519}
520
521/*
522 * Refill receive buffer descriptors with new buffers.
523 */
524static int
525nicvf_refill_rbdr(struct rbdr *rbdr, int mflags)
526{
527	struct nicvf *nic;
528	struct queue_set *qs;
529	int rbdr_idx;
530	int tail, qcount;
531	int refill_rb_cnt;
532	struct rbdr_entry_t *desc;
533	bus_dmamap_t dmap;
534	bus_addr_t rbuf;
535	boolean_t rb_alloc_fail;
536	int new_rb;
537
538	rb_alloc_fail = TRUE;
539	new_rb = 0;
540	nic = rbdr->nic;
541	qs = nic->qs;
542	rbdr_idx = rbdr->idx;
543
544	/* Check if it's enabled */
545	if (!rbdr->enable)
546		return (0);
547
548	/* Get no of desc's to be refilled */
549	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
550	qcount &= 0x7FFFF;
551	/* Doorbell can be ringed with a max of ring size minus 1 */
552	if (qcount >= (qs->rbdr_len - 1)) {
553		rb_alloc_fail = FALSE;
554		goto out;
555	} else
556		refill_rb_cnt = qs->rbdr_len - qcount - 1;
557
558	/* Start filling descs from tail */
559	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
560	while (refill_rb_cnt) {
561		tail++;
562		tail &= (rbdr->dmem.q_len - 1);
563
564		dmap = rbdr->rbdr_buff_dmaps[tail];
565		if (nicvf_alloc_rcv_buffer(nic, rbdr, dmap, mflags,
566		    DMA_BUFFER_LEN, &rbuf)) {
567			/* Something went wrong. Resign */
568			break;
569		}
570		desc = GET_RBDR_DESC(rbdr, tail);
571		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
572		refill_rb_cnt--;
573		new_rb++;
574	}
575
576	/* make sure all memory stores are done before ringing doorbell */
577	wmb();
578
579	/* Check if buffer allocation failed */
580	if (refill_rb_cnt == 0)
581		rb_alloc_fail = FALSE;
582
583	/* Notify HW */
584	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
585			      rbdr_idx, new_rb);
586out:
587	if (!rb_alloc_fail) {
588		/*
589		 * Re-enable RBDR interrupts only
590		 * if buffer allocation is success.
591		 */
592		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
593
594		return (0);
595	}
596
597	return (ENOMEM);
598}
599
600/* Refill RBs even if sleep is needed to reclaim memory */
601static void
602nicvf_rbdr_task(void *arg, int pending)
603{
604	struct rbdr *rbdr;
605	int err;
606
607	rbdr = (struct rbdr *)arg;
608
609	err = nicvf_refill_rbdr(rbdr, M_WAITOK);
610	if (__predict_false(err != 0)) {
611		panic("%s: Failed to refill RBs even when sleep enabled",
612		    __func__);
613	}
614}
615
616/* Refill RBs as soon as possible without waiting */
617static void
618nicvf_rbdr_task_nowait(void *arg, int pending)
619{
620	struct rbdr *rbdr;
621	int err;
622
623	rbdr = (struct rbdr *)arg;
624
625	err = nicvf_refill_rbdr(rbdr, M_NOWAIT);
626	if (err != 0) {
627		/*
628		 * Schedule another, sleepable kernel thread
629		 * that will for sure refill the buffers.
630		 */
631		taskqueue_enqueue(taskqueue_thread, &rbdr->rbdr_task);
632	}
633}
634
635static int
636nicvf_rcv_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
637    struct cqe_rx_t *cqe_rx, int cqe_type)
638{
639	struct mbuf *mbuf;
640	struct rcv_queue *rq;
641	int rq_idx;
642	int err = 0;
643
644	rq_idx = cqe_rx->rq_idx;
645	rq = &nic->qs->rq[rq_idx];
646
647	/* Check for errors */
648	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
649	if (err && !cqe_rx->rb_cnt)
650		return (0);
651
652	mbuf = nicvf_get_rcv_mbuf(nic, cqe_rx);
653	if (mbuf == NULL) {
654		dprintf(nic->dev, "Packet not received\n");
655		return (0);
656	}
657
658	/* If error packet */
659	if (err != 0) {
660		m_freem(mbuf);
661		return (0);
662	}
663
664	if (rq->lro_enabled &&
665	    ((cqe_rx->l3_type == L3TYPE_IPV4) && (cqe_rx->l4_type == L4TYPE_TCP)) &&
666	    (mbuf->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
667            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
668		/*
669		 * At this point it is known that there are no errors in the
670		 * packet. Attempt to LRO enqueue. Send to stack if no resources
671		 * or enqueue error.
672		 */
673		if ((rq->lro.lro_cnt != 0) &&
674		    (tcp_lro_rx(&rq->lro, mbuf, 0) == 0))
675			return (0);
676	}
677	/*
678	 * Push this packet to the stack later to avoid
679	 * unlocking completion task in the middle of work.
680	 */
681	err = buf_ring_enqueue(cq->rx_br, mbuf);
682	if (err != 0) {
683		/*
684		 * Failed to enqueue this mbuf.
685		 * We don't drop it, just schedule another task.
686		 */
687		return (err);
688	}
689
690	return (0);
691}
692
693static int
694nicvf_snd_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
695    struct cqe_send_t *cqe_tx, int cqe_type)
696{
697	bus_dmamap_t dmap;
698	struct mbuf *mbuf;
699	struct snd_queue *sq;
700	struct sq_hdr_subdesc *hdr;
701
702	mbuf = NULL;
703	sq = &nic->qs->sq[cqe_tx->sq_idx];
704	/* Avoid blocking here since we hold a non-sleepable NICVF_CMP_LOCK */
705	if (NICVF_TX_TRYLOCK(sq) == 0)
706		return (EAGAIN);
707
708	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
709	if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
710		NICVF_TX_UNLOCK(sq);
711		return (0);
712	}
713
714	dprintf(nic->dev,
715	    "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
716	    __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
717	    cqe_tx->sqe_ptr, hdr->subdesc_cnt);
718
719	dmap = (bus_dmamap_t)sq->snd_buff[cqe_tx->sqe_ptr].dmap;
720	bus_dmamap_unload(sq->snd_buff_dmat, dmap);
721
722	mbuf = (struct mbuf *)sq->snd_buff[cqe_tx->sqe_ptr].mbuf;
723	if (mbuf != NULL) {
724		m_freem(mbuf);
725		sq->snd_buff[cqe_tx->sqe_ptr].mbuf = NULL;
726	}
727
728	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
729	nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
730
731	NICVF_TX_UNLOCK(sq);
732	return (0);
733}
734
735static int
736nicvf_cq_intr_handler(struct nicvf *nic, uint8_t cq_idx)
737{
738	struct mbuf *mbuf;
739	struct ifnet *ifp;
740	int processed_cqe, work_done = 0, tx_done = 0;
741	int cqe_count, cqe_head;
742	struct queue_set *qs = nic->qs;
743	struct cmp_queue *cq = &qs->cq[cq_idx];
744	struct rcv_queue *rq;
745	struct cqe_rx_t *cq_desc;
746	struct lro_ctrl	*lro;
747	struct lro_entry *queued;
748	int rq_idx;
749	int cmp_err;
750
751	NICVF_CMP_LOCK(cq);
752	cmp_err = 0;
753	processed_cqe = 0;
754	/* Get no of valid CQ entries to process */
755	cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
756	cqe_count &= CQ_CQE_COUNT;
757	if (cqe_count == 0)
758		goto out;
759
760	/* Get head of the valid CQ entries */
761	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
762	cqe_head &= 0xFFFF;
763
764	dprintf(nic->dev, "%s CQ%d cqe_count %d cqe_head %d\n",
765	    __func__, cq_idx, cqe_count, cqe_head);
766	while (processed_cqe < cqe_count) {
767		/* Get the CQ descriptor */
768		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
769		cqe_head++;
770		cqe_head &= (cq->dmem.q_len - 1);
771		/* Prefetch next CQ descriptor */
772		__builtin_prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
773
774		dprintf(nic->dev, "CQ%d cq_desc->cqe_type %d\n", cq_idx,
775		    cq_desc->cqe_type);
776		switch (cq_desc->cqe_type) {
777		case CQE_TYPE_RX:
778			cmp_err = nicvf_rcv_pkt_handler(nic, cq, cq_desc,
779			    CQE_TYPE_RX);
780			if (__predict_false(cmp_err != 0)) {
781				/*
782				 * Ups. Cannot finish now.
783				 * Let's try again later.
784				 */
785				goto done;
786			}
787			work_done++;
788			break;
789		case CQE_TYPE_SEND:
790			cmp_err = nicvf_snd_pkt_handler(nic, cq,
791			    (void *)cq_desc, CQE_TYPE_SEND);
792			if (__predict_false(cmp_err != 0)) {
793				/*
794				 * Ups. Cannot finish now.
795				 * Let's try again later.
796				 */
797				goto done;
798			}
799
800			tx_done++;
801			break;
802		case CQE_TYPE_INVALID:
803		case CQE_TYPE_RX_SPLIT:
804		case CQE_TYPE_RX_TCP:
805		case CQE_TYPE_SEND_PTP:
806			/* Ignore for now */
807			break;
808		}
809		processed_cqe++;
810	}
811done:
812	dprintf(nic->dev,
813	    "%s CQ%d processed_cqe %d work_done %d\n",
814	    __func__, cq_idx, processed_cqe, work_done);
815
816	/* Ring doorbell to inform H/W to reuse processed CQEs */
817	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, cq_idx, processed_cqe);
818
819	if ((tx_done > 0) &&
820	    ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0)) {
821		/* Reenable TXQ if its stopped earlier due to SQ full */
822		if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
823	}
824out:
825	/*
826	 * Flush any outstanding LRO work
827	 */
828	rq_idx = cq_idx;
829	rq = &nic->qs->rq[rq_idx];
830	lro = &rq->lro;
831	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
832		SLIST_REMOVE_HEAD(&lro->lro_active, next);
833		tcp_lro_flush(lro, queued);
834	}
835
836	NICVF_CMP_UNLOCK(cq);
837
838	ifp = nic->ifp;
839	/* Push received MBUFs to the stack */
840	while (!buf_ring_empty(cq->rx_br)) {
841		mbuf = buf_ring_dequeue_mc(cq->rx_br);
842		if (__predict_true(mbuf != NULL))
843			(*ifp->if_input)(ifp, mbuf);
844	}
845
846	return (cmp_err);
847}
848
849/*
850 * Qset error interrupt handler
851 *
852 * As of now only CQ errors are handled
853 */
854static void
855nicvf_qs_err_task(void *arg, int pending)
856{
857	struct nicvf *nic;
858	struct queue_set *qs;
859	int qidx;
860	uint64_t status;
861	boolean_t enable = TRUE;
862
863	nic = (struct nicvf *)arg;
864	qs = nic->qs;
865
866	/* Deactivate network interface */
867	if_setdrvflagbits(nic->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
868
869	/* Check if it is CQ err */
870	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
871		status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
872		    qidx);
873		if ((status & CQ_ERR_MASK) == 0)
874			continue;
875		/* Process already queued CQEs and reconfig CQ */
876		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
877		nicvf_sq_disable(nic, qidx);
878		(void)nicvf_cq_intr_handler(nic, qidx);
879		nicvf_cmp_queue_config(nic, qs, qidx, enable);
880		nicvf_sq_free_used_descs(nic, &qs->sq[qidx], qidx);
881		nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
882		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
883	}
884
885	if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
886	/* Re-enable Qset error interrupt */
887	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
888}
889
890static void
891nicvf_cmp_task(void *arg, int pending)
892{
893	uint64_t cq_head;
894	struct cmp_queue *cq;
895	struct nicvf *nic;
896	int cmp_err;
897
898	cq = (struct cmp_queue *)arg;
899	nic = cq->nic;
900
901	/* Handle CQ descriptors */
902	cmp_err = nicvf_cq_intr_handler(nic, cq->idx);
903	/* Re-enable interrupts */
904	cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq->idx);
905	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
906	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, cq->idx, cq_head);
907
908	if (__predict_false(cmp_err != 0)) {
909		/*
910		 * Schedule another thread here since we did not
911		 * process the entire CQ due to Tx or Rx CQ parse error.
912		 */
913		taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
914
915	}
916
917	/* Reenable interrupt (previously disabled in nicvf_intr_handler() */
918	nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->idx);
919
920}
921
922/* Initialize completion queue */
923static int
924nicvf_init_cmp_queue(struct nicvf *nic, struct cmp_queue *cq, int q_len,
925    int qidx)
926{
927	int err;
928
929	/* Initizalize lock */
930	snprintf(cq->mtx_name, sizeof(cq->mtx_name), "%s: CQ(%d) lock",
931	    device_get_nameunit(nic->dev), qidx);
932	mtx_init(&cq->mtx, cq->mtx_name, NULL, MTX_DEF);
933
934	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
935				     NICVF_CQ_BASE_ALIGN_BYTES);
936
937	if (err != 0) {
938		device_printf(nic->dev,
939		    "Could not allocate DMA memory for CQ\n");
940		return (err);
941	}
942
943	cq->desc = cq->dmem.base;
944	cq->thresh = CMP_QUEUE_CQE_THRESH;
945	cq->nic = nic;
946	cq->idx = qidx;
947	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
948
949	cq->rx_br = buf_ring_alloc(CMP_QUEUE_LEN * 8, M_DEVBUF, M_WAITOK,
950	    &cq->mtx);
951
952	/* Allocate taskqueue */
953	TASK_INIT(&cq->cmp_task, 0, nicvf_cmp_task, cq);
954	cq->cmp_taskq = taskqueue_create_fast("nicvf_cmp_taskq", M_WAITOK,
955	    taskqueue_thread_enqueue, &cq->cmp_taskq);
956	taskqueue_start_threads(&cq->cmp_taskq, 1, PI_NET, "%s: cmp_taskq(%d)",
957	    device_get_nameunit(nic->dev), qidx);
958
959	return (0);
960}
961
962static void
963nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
964{
965
966	if (cq == NULL)
967		return;
968	/*
969	 * The completion queue itself should be disabled by now
970	 * (ref. nicvf_snd_queue_config()).
971	 * Ensure that it is safe to disable it or panic.
972	 */
973	if (cq->enable)
974		panic("%s: Trying to free working CQ(%d)", __func__, cq->idx);
975
976	if (cq->cmp_taskq != NULL) {
977		/* Remove task */
978		while (taskqueue_cancel(cq->cmp_taskq, &cq->cmp_task, NULL) != 0)
979			taskqueue_drain(cq->cmp_taskq, &cq->cmp_task);
980
981		taskqueue_free(cq->cmp_taskq);
982		cq->cmp_taskq = NULL;
983	}
984	/*
985	 * Completion interrupt will possibly enable interrupts again
986	 * so disable interrupting now after we finished processing
987	 * completion task. It is safe to do so since the corresponding CQ
988	 * was already disabled.
989	 */
990	nicvf_disable_intr(nic, NICVF_INTR_CQ, cq->idx);
991	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
992
993	NICVF_CMP_LOCK(cq);
994	nicvf_free_q_desc_mem(nic, &cq->dmem);
995	drbr_free(cq->rx_br, M_DEVBUF);
996	NICVF_CMP_UNLOCK(cq);
997	mtx_destroy(&cq->mtx);
998	memset(cq->mtx_name, 0, sizeof(cq->mtx_name));
999}
1000
1001static void
1002nicvf_snd_task(void *arg, int pending)
1003{
1004	struct snd_queue *sq = (struct snd_queue *)arg;
1005	struct mbuf *mbuf;
1006
1007	NICVF_TX_LOCK(sq);
1008	while (1) {
1009		mbuf = drbr_dequeue(NULL, sq->br);
1010		if (mbuf == NULL)
1011			break;
1012
1013		if (nicvf_tx_mbuf_locked(sq, mbuf) != 0) {
1014			/* XXX ARM64TODO: Increase Tx drop counter */
1015			m_freem(mbuf);
1016			break;
1017		}
1018	}
1019	NICVF_TX_UNLOCK(sq);
1020}
1021
1022/* Initialize transmit queue */
1023static int
1024nicvf_init_snd_queue(struct nicvf *nic, struct snd_queue *sq, int q_len,
1025    int qidx)
1026{
1027	size_t i;
1028	int err;
1029
1030	/* Initizalize TX lock for this queue */
1031	snprintf(sq->mtx_name, sizeof(sq->mtx_name), "%s: SQ(%d) lock",
1032	    device_get_nameunit(nic->dev), qidx);
1033	mtx_init(&sq->mtx, sq->mtx_name, NULL, MTX_DEF);
1034
1035	NICVF_TX_LOCK(sq);
1036	/* Allocate buffer ring */
1037	sq->br = buf_ring_alloc(q_len / MIN_SQ_DESC_PER_PKT_XMIT, M_DEVBUF,
1038	    M_NOWAIT, &sq->mtx);
1039	if (sq->br == NULL) {
1040		device_printf(nic->dev,
1041		    "ERROR: Could not set up buf ring for SQ(%d)\n", qidx);
1042		err = ENOMEM;
1043		goto error;
1044	}
1045
1046	/* Allocate DMA memory for Tx descriptors */
1047	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
1048				     NICVF_SQ_BASE_ALIGN_BYTES);
1049	if (err != 0) {
1050		device_printf(nic->dev,
1051		    "Could not allocate DMA memory for SQ\n");
1052		goto error;
1053	}
1054
1055	sq->desc = sq->dmem.base;
1056	sq->head = sq->tail = 0;
1057	atomic_store_rel_int(&sq->free_cnt, q_len - 1);
1058	sq->thresh = SND_QUEUE_THRESH;
1059	sq->idx = qidx;
1060	sq->nic = nic;
1061
1062	/*
1063	 * Allocate DMA maps for Tx buffers
1064	 */
1065
1066	/* Create DMA tag first */
1067	err = bus_dma_tag_create(
1068	    bus_get_dma_tag(nic->dev),		/* parent tag */
1069	    1,					/* alignment */
1070	    0,					/* boundary */
1071	    BUS_SPACE_MAXADDR,			/* lowaddr */
1072	    BUS_SPACE_MAXADDR,			/* highaddr */
1073	    NULL, NULL,				/* filtfunc, filtfuncarg */
1074	    NICVF_TXBUF_MAXSIZE,		/* maxsize */
1075	    NICVF_TXBUF_NSEGS,			/* nsegments */
1076	    MCLBYTES,				/* maxsegsize */
1077	    0,					/* flags */
1078	    NULL, NULL,				/* lockfunc, lockfuncarg */
1079	    &sq->snd_buff_dmat);		/* dmat */
1080
1081	if (err != 0) {
1082		device_printf(nic->dev,
1083		    "Failed to create busdma tag for Tx buffers\n");
1084		goto error;
1085	}
1086
1087	/* Allocate send buffers array */
1088	sq->snd_buff = malloc(sizeof(*sq->snd_buff) * q_len, M_NICVF,
1089	    (M_NOWAIT | M_ZERO));
1090	if (sq->snd_buff == NULL) {
1091		device_printf(nic->dev,
1092		    "Could not allocate memory for Tx buffers array\n");
1093		err = ENOMEM;
1094		goto error;
1095	}
1096
1097	/* Now populate maps */
1098	for (i = 0; i < q_len; i++) {
1099		err = bus_dmamap_create(sq->snd_buff_dmat, 0,
1100		    &sq->snd_buff[i].dmap);
1101		if (err != 0) {
1102			device_printf(nic->dev,
1103			    "Failed to create DMA maps for Tx buffers\n");
1104			goto error;
1105		}
1106	}
1107	NICVF_TX_UNLOCK(sq);
1108
1109	/* Allocate taskqueue */
1110	TASK_INIT(&sq->snd_task, 0, nicvf_snd_task, sq);
1111	sq->snd_taskq = taskqueue_create_fast("nicvf_snd_taskq", M_WAITOK,
1112	    taskqueue_thread_enqueue, &sq->snd_taskq);
1113	taskqueue_start_threads(&sq->snd_taskq, 1, PI_NET, "%s: snd_taskq(%d)",
1114	    device_get_nameunit(nic->dev), qidx);
1115
1116	return (0);
1117error:
1118	NICVF_TX_UNLOCK(sq);
1119	return (err);
1120}
1121
1122static void
1123nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
1124{
1125	struct queue_set *qs = nic->qs;
1126	size_t i;
1127	int err;
1128
1129	if (sq == NULL)
1130		return;
1131
1132	if (sq->snd_taskq != NULL) {
1133		/* Remove task */
1134		while (taskqueue_cancel(sq->snd_taskq, &sq->snd_task, NULL) != 0)
1135			taskqueue_drain(sq->snd_taskq, &sq->snd_task);
1136
1137		taskqueue_free(sq->snd_taskq);
1138		sq->snd_taskq = NULL;
1139	}
1140
1141	NICVF_TX_LOCK(sq);
1142	if (sq->snd_buff_dmat != NULL) {
1143		if (sq->snd_buff != NULL) {
1144			for (i = 0; i < qs->sq_len; i++) {
1145				m_freem(sq->snd_buff[i].mbuf);
1146				sq->snd_buff[i].mbuf = NULL;
1147
1148				bus_dmamap_unload(sq->snd_buff_dmat,
1149				    sq->snd_buff[i].dmap);
1150				err = bus_dmamap_destroy(sq->snd_buff_dmat,
1151				    sq->snd_buff[i].dmap);
1152				/*
1153				 * If bus_dmamap_destroy fails it can cause
1154				 * random panic later if the tag is also
1155				 * destroyed in the process.
1156				 */
1157				KASSERT(err == 0,
1158				    ("%s: Could not destroy DMA map for SQ",
1159				    __func__));
1160			}
1161		}
1162
1163		free(sq->snd_buff, M_NICVF);
1164
1165		err = bus_dma_tag_destroy(sq->snd_buff_dmat);
1166		KASSERT(err == 0,
1167		    ("%s: Trying to destroy BUSY DMA tag", __func__));
1168	}
1169
1170	/* Free private driver ring for this send queue */
1171	if (sq->br != NULL)
1172		drbr_free(sq->br, M_DEVBUF);
1173
1174	if (sq->dmem.base != NULL)
1175		nicvf_free_q_desc_mem(nic, &sq->dmem);
1176
1177	NICVF_TX_UNLOCK(sq);
1178	/* Destroy Tx lock */
1179	mtx_destroy(&sq->mtx);
1180	memset(sq->mtx_name, 0, sizeof(sq->mtx_name));
1181}
1182
1183static void
1184nicvf_reclaim_snd_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1185{
1186
1187	/* Disable send queue */
1188	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
1189	/* Check if SQ is stopped */
1190	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
1191		return;
1192	/* Reset send queue */
1193	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1194}
1195
1196static void
1197nicvf_reclaim_rcv_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1198{
1199	union nic_mbx mbx = {};
1200
1201	/* Make sure all packets in the pipeline are written back into mem */
1202	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
1203	nicvf_send_msg_to_pf(nic, &mbx);
1204}
1205
1206static void
1207nicvf_reclaim_cmp_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1208{
1209
1210	/* Disable timer threshold (doesn't get reset upon CQ reset */
1211	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
1212	/* Disable completion queue */
1213	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
1214	/* Reset completion queue */
1215	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1216}
1217
1218static void
1219nicvf_reclaim_rbdr(struct nicvf *nic, struct rbdr *rbdr, int qidx)
1220{
1221	uint64_t tmp, fifo_state;
1222	int timeout = 10;
1223
1224	/* Save head and tail pointers for feeing up buffers */
1225	rbdr->head =
1226	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, qidx) >> 3;
1227	rbdr->tail =
1228	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, qidx) >> 3;
1229
1230	/*
1231	 * If RBDR FIFO is in 'FAIL' state then do a reset first
1232	 * before relaiming.
1233	 */
1234	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
1235	if (((fifo_state >> 62) & 0x03) == 0x3) {
1236		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
1237		    qidx, NICVF_RBDR_RESET);
1238	}
1239
1240	/* Disable RBDR */
1241	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
1242	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1243		return;
1244	while (1) {
1245		tmp = nicvf_queue_reg_read(nic,
1246		    NIC_QSET_RBDR_0_1_PREFETCH_STATUS, qidx);
1247		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
1248			break;
1249
1250		DELAY(1000);
1251		timeout--;
1252		if (!timeout) {
1253			device_printf(nic->dev,
1254			    "Failed polling on prefetch status\n");
1255			return;
1256		}
1257	}
1258	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1259	    NICVF_RBDR_RESET);
1260
1261	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
1262		return;
1263	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
1264	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1265		return;
1266}
1267
1268/* Configures receive queue */
1269static void
1270nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
1271    int qidx, bool enable)
1272{
1273	union nic_mbx mbx = {};
1274	struct rcv_queue *rq;
1275	struct rq_cfg rq_cfg;
1276	struct ifnet *ifp;
1277	struct lro_ctrl	*lro;
1278
1279	ifp = nic->ifp;
1280
1281	rq = &qs->rq[qidx];
1282	rq->enable = enable;
1283
1284	lro = &rq->lro;
1285
1286	/* Disable receive queue */
1287	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
1288
1289	if (!rq->enable) {
1290		nicvf_reclaim_rcv_queue(nic, qs, qidx);
1291		/* Free LRO memory */
1292		tcp_lro_free(lro);
1293		rq->lro_enabled = FALSE;
1294		return;
1295	}
1296
1297	/* Configure LRO if enabled */
1298	rq->lro_enabled = FALSE;
1299	if ((if_getcapenable(ifp) & IFCAP_LRO) != 0) {
1300		if (tcp_lro_init(lro) != 0) {
1301			device_printf(nic->dev,
1302			    "Failed to initialize LRO for RXQ%d\n", qidx);
1303		} else {
1304			rq->lro_enabled = TRUE;
1305			lro->ifp = nic->ifp;
1306		}
1307	}
1308
1309	rq->cq_qs = qs->vnic_id;
1310	rq->cq_idx = qidx;
1311	rq->start_rbdr_qs = qs->vnic_id;
1312	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
1313	rq->cont_rbdr_qs = qs->vnic_id;
1314	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
1315	/* all writes of RBDR data to be loaded into L2 Cache as well*/
1316	rq->caching = 1;
1317
1318	/* Send a mailbox msg to PF to config RQ */
1319	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
1320	mbx.rq.qs_num = qs->vnic_id;
1321	mbx.rq.rq_num = qidx;
1322	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
1323	    (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
1324	    (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) |
1325	    (rq->start_qs_rbdr_idx);
1326	nicvf_send_msg_to_pf(nic, &mbx);
1327
1328	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
1329	mbx.rq.cfg = (1UL << 63) | (1UL << 62) | (qs->vnic_id << 0);
1330	nicvf_send_msg_to_pf(nic, &mbx);
1331
1332	/*
1333	 * RQ drop config
1334	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
1335	 */
1336	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
1337	mbx.rq.cfg = (1UL << 62) | (RQ_CQ_DROP << 8);
1338	nicvf_send_msg_to_pf(nic, &mbx);
1339
1340	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
1341
1342	/* Enable Receive queue */
1343	rq_cfg.ena = 1;
1344	rq_cfg.tcp_ena = 0;
1345	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx,
1346	    *(uint64_t *)&rq_cfg);
1347}
1348
1349/* Configures completion queue */
1350static void
1351nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
1352    int qidx, boolean_t enable)
1353{
1354	struct cmp_queue *cq;
1355	struct cq_cfg cq_cfg;
1356
1357	cq = &qs->cq[qidx];
1358	cq->enable = enable;
1359
1360	if (!cq->enable) {
1361		nicvf_reclaim_cmp_queue(nic, qs, qidx);
1362		return;
1363	}
1364
1365	/* Reset completion queue */
1366	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1367
1368	/* Set completion queue base address */
1369	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, qidx,
1370	    (uint64_t)(cq->dmem.phys_base));
1371
1372	/* Enable Completion queue */
1373	cq_cfg.ena = 1;
1374	cq_cfg.reset = 0;
1375	cq_cfg.caching = 0;
1376	cq_cfg.qsize = CMP_QSIZE;
1377	cq_cfg.avg_con = 0;
1378	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(uint64_t *)&cq_cfg);
1379
1380	/* Set threshold value for interrupt generation */
1381	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
1382	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx,
1383	    nic->cq_coalesce_usecs);
1384}
1385
1386/* Configures transmit queue */
1387static void
1388nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1389    boolean_t enable)
1390{
1391	union nic_mbx mbx = {};
1392	struct snd_queue *sq;
1393	struct sq_cfg sq_cfg;
1394
1395	sq = &qs->sq[qidx];
1396	sq->enable = enable;
1397
1398	if (!sq->enable) {
1399		nicvf_reclaim_snd_queue(nic, qs, qidx);
1400		return;
1401	}
1402
1403	/* Reset send queue */
1404	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1405
1406	sq->cq_qs = qs->vnic_id;
1407	sq->cq_idx = qidx;
1408
1409	/* Send a mailbox msg to PF to config SQ */
1410	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
1411	mbx.sq.qs_num = qs->vnic_id;
1412	mbx.sq.sq_num = qidx;
1413	mbx.sq.sqs_mode = nic->sqs_mode;
1414	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
1415	nicvf_send_msg_to_pf(nic, &mbx);
1416
1417	/* Set queue base address */
1418	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, qidx,
1419	    (uint64_t)(sq->dmem.phys_base));
1420
1421	/* Enable send queue  & set queue size */
1422	sq_cfg.ena = 1;
1423	sq_cfg.reset = 0;
1424	sq_cfg.ldwb = 0;
1425	sq_cfg.qsize = SND_QSIZE;
1426	sq_cfg.tstmp_bgx_intf = 0;
1427	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(uint64_t *)&sq_cfg);
1428
1429	/* Set threshold value for interrupt generation */
1430	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
1431}
1432
1433/* Configures receive buffer descriptor ring */
1434static void
1435nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1436    boolean_t enable)
1437{
1438	struct rbdr *rbdr;
1439	struct rbdr_cfg rbdr_cfg;
1440
1441	rbdr = &qs->rbdr[qidx];
1442	nicvf_reclaim_rbdr(nic, rbdr, qidx);
1443	if (!enable)
1444		return;
1445
1446	/* Set descriptor base address */
1447	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, qidx,
1448	    (uint64_t)(rbdr->dmem.phys_base));
1449
1450	/* Enable RBDR  & set queue size */
1451	/* Buffer size should be in multiples of 128 bytes */
1452	rbdr_cfg.ena = 1;
1453	rbdr_cfg.reset = 0;
1454	rbdr_cfg.ldwb = 0;
1455	rbdr_cfg.qsize = RBDR_SIZE;
1456	rbdr_cfg.avg_con = 0;
1457	rbdr_cfg.lines = rbdr->dma_size / 128;
1458	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1459	    *(uint64_t *)&rbdr_cfg);
1460
1461	/* Notify HW */
1462	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, qidx,
1463	    qs->rbdr_len - 1);
1464
1465	/* Set threshold value for interrupt generation */
1466	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, qidx,
1467	    rbdr->thresh - 1);
1468}
1469
1470/* Requests PF to assign and enable Qset */
1471void
1472nicvf_qset_config(struct nicvf *nic, boolean_t enable)
1473{
1474	union nic_mbx mbx = {};
1475	struct queue_set *qs;
1476	struct qs_cfg *qs_cfg;
1477
1478	qs = nic->qs;
1479	if (qs == NULL) {
1480		device_printf(nic->dev,
1481		    "Qset is still not allocated, don't init queues\n");
1482		return;
1483	}
1484
1485	qs->enable = enable;
1486	qs->vnic_id = nic->vf_id;
1487
1488	/* Send a mailbox msg to PF to config Qset */
1489	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
1490	mbx.qs.num = qs->vnic_id;
1491
1492	mbx.qs.cfg = 0;
1493	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
1494	if (qs->enable) {
1495		qs_cfg->ena = 1;
1496		qs_cfg->vnic = qs->vnic_id;
1497	}
1498	nicvf_send_msg_to_pf(nic, &mbx);
1499}
1500
1501static void
1502nicvf_free_resources(struct nicvf *nic)
1503{
1504	int qidx;
1505	struct queue_set *qs;
1506
1507	qs = nic->qs;
1508	/*
1509	 * Remove QS error task first since it has to be dead
1510	 * to safely free completion queue tasks.
1511	 */
1512	if (qs->qs_err_taskq != NULL) {
1513		/* Shut down QS error tasks */
1514		while (taskqueue_cancel(qs->qs_err_taskq,
1515		    &qs->qs_err_task,  NULL) != 0) {
1516			taskqueue_drain(qs->qs_err_taskq, &qs->qs_err_task);
1517
1518		}
1519		taskqueue_free(qs->qs_err_taskq);
1520		qs->qs_err_taskq = NULL;
1521	}
1522	/* Free receive buffer descriptor ring */
1523	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1524		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1525
1526	/* Free completion queue */
1527	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1528		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1529
1530	/* Free send queue */
1531	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1532		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1533}
1534
1535static int
1536nicvf_alloc_resources(struct nicvf *nic)
1537{
1538	struct queue_set *qs = nic->qs;
1539	int qidx;
1540
1541	/* Alloc receive buffer descriptor ring */
1542	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1543		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1544				    DMA_BUFFER_LEN, qidx))
1545			goto alloc_fail;
1546	}
1547
1548	/* Alloc send queue */
1549	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1550		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1551			goto alloc_fail;
1552	}
1553
1554	/* Alloc completion queue */
1555	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1556		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len, qidx))
1557			goto alloc_fail;
1558	}
1559
1560	/* Allocate QS error taskqueue */
1561	TASK_INIT(&qs->qs_err_task, 0, nicvf_qs_err_task, nic);
1562	qs->qs_err_taskq = taskqueue_create_fast("nicvf_qs_err_taskq", M_WAITOK,
1563	    taskqueue_thread_enqueue, &qs->qs_err_taskq);
1564	taskqueue_start_threads(&qs->qs_err_taskq, 1, PI_NET, "%s: qs_taskq",
1565	    device_get_nameunit(nic->dev));
1566
1567	return (0);
1568alloc_fail:
1569	nicvf_free_resources(nic);
1570	return (ENOMEM);
1571}
1572
1573int
1574nicvf_set_qset_resources(struct nicvf *nic)
1575{
1576	struct queue_set *qs;
1577
1578	qs = malloc(sizeof(*qs), M_NICVF, (M_ZERO | M_WAITOK));
1579	nic->qs = qs;
1580
1581	/* Set count of each queue */
1582	qs->rbdr_cnt = RBDR_CNT;
1583	/* With no RSS we stay with single RQ */
1584	qs->rq_cnt = 1;
1585
1586	qs->sq_cnt = SND_QUEUE_CNT;
1587	qs->cq_cnt = CMP_QUEUE_CNT;
1588
1589	/* Set queue lengths */
1590	qs->rbdr_len = RCV_BUF_COUNT;
1591	qs->sq_len = SND_QUEUE_LEN;
1592	qs->cq_len = CMP_QUEUE_LEN;
1593
1594	nic->rx_queues = qs->rq_cnt;
1595	nic->tx_queues = qs->sq_cnt;
1596
1597	return (0);
1598}
1599
1600int
1601nicvf_config_data_transfer(struct nicvf *nic, boolean_t enable)
1602{
1603	boolean_t disable = FALSE;
1604	struct queue_set *qs;
1605	int qidx;
1606
1607	qs = nic->qs;
1608	if (qs == NULL)
1609		return (0);
1610
1611	if (enable) {
1612		if (nicvf_alloc_resources(nic) != 0)
1613			return (ENOMEM);
1614
1615		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1616			nicvf_snd_queue_config(nic, qs, qidx, enable);
1617		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1618			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1619		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1620			nicvf_rbdr_config(nic, qs, qidx, enable);
1621		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1622			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1623	} else {
1624		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1625			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1626		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1627			nicvf_rbdr_config(nic, qs, qidx, disable);
1628		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1629			nicvf_snd_queue_config(nic, qs, qidx, disable);
1630		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1631			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1632
1633		nicvf_free_resources(nic);
1634	}
1635
1636	return (0);
1637}
1638
1639/*
1640 * Get a free desc from SQ
1641 * returns descriptor ponter & descriptor number
1642 */
1643static __inline int
1644nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1645{
1646	int qentry;
1647
1648	qentry = sq->tail;
1649	atomic_subtract_int(&sq->free_cnt, desc_cnt);
1650	sq->tail += desc_cnt;
1651	sq->tail &= (sq->dmem.q_len - 1);
1652
1653	return (qentry);
1654}
1655
1656/* Free descriptor back to SQ for future use */
1657static void
1658nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1659{
1660
1661	atomic_add_int(&sq->free_cnt, desc_cnt);
1662	sq->head += desc_cnt;
1663	sq->head &= (sq->dmem.q_len - 1);
1664}
1665
1666static __inline int
1667nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1668{
1669	qentry++;
1670	qentry &= (sq->dmem.q_len - 1);
1671	return (qentry);
1672}
1673
1674static void
1675nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1676{
1677	uint64_t sq_cfg;
1678
1679	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1680	sq_cfg |= NICVF_SQ_EN;
1681	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1682	/* Ring doorbell so that H/W restarts processing SQEs */
1683	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1684}
1685
1686static void
1687nicvf_sq_disable(struct nicvf *nic, int qidx)
1688{
1689	uint64_t sq_cfg;
1690
1691	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1692	sq_cfg &= ~NICVF_SQ_EN;
1693	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1694}
1695
1696static void
1697nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx)
1698{
1699	uint64_t head, tail;
1700	struct snd_buff *snd_buff;
1701	struct sq_hdr_subdesc *hdr;
1702
1703	NICVF_TX_LOCK(sq);
1704	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1705	tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1706	while (sq->head != head) {
1707		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1708		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1709			nicvf_put_sq_desc(sq, 1);
1710			continue;
1711		}
1712		snd_buff = &sq->snd_buff[sq->head];
1713		if (snd_buff->mbuf != NULL) {
1714			bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1715			m_freem(snd_buff->mbuf);
1716			sq->snd_buff[sq->head].mbuf = NULL;
1717		}
1718		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1719	}
1720	NICVF_TX_UNLOCK(sq);
1721}
1722
1723/*
1724 * Add SQ HEADER subdescriptor.
1725 * First subdescriptor for every send descriptor.
1726 */
1727static __inline int
1728nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1729			 int subdesc_cnt, struct mbuf *mbuf, int len)
1730{
1731	struct sq_hdr_subdesc *hdr;
1732	struct ether_vlan_header *eh;
1733#ifdef INET
1734	struct ip *ip;
1735#endif
1736	uint16_t etype;
1737	int ehdrlen, iphlen, poff;
1738
1739	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1740	sq->snd_buff[qentry].mbuf = mbuf;
1741
1742	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1743	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1744	/* Enable notification via CQE after processing SQE */
1745	hdr->post_cqe = 1;
1746	/* No of subdescriptors following this */
1747	hdr->subdesc_cnt = subdesc_cnt;
1748	hdr->tot_len = len;
1749
1750	if (mbuf->m_pkthdr.csum_flags != 0) {
1751		hdr->csum_l3 = 1; /* Enable IP csum calculation */
1752
1753		eh = mtod(mbuf, struct ether_vlan_header *);
1754		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1755			ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1756			etype = ntohs(eh->evl_proto);
1757		} else {
1758			ehdrlen = ETHER_HDR_LEN;
1759			etype = ntohs(eh->evl_encap_proto);
1760		}
1761
1762		if (mbuf->m_len < ehdrlen + sizeof(struct ip)) {
1763			mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip));
1764			sq->snd_buff[qentry].mbuf = mbuf;
1765			if (mbuf == NULL)
1766				return (ENOBUFS);
1767		}
1768
1769		switch (etype) {
1770#ifdef INET6
1771		case ETHERTYPE_IPV6:
1772			/* ARM64TODO: Add support for IPv6 */
1773			hdr->csum_l3 = 0;
1774			sq->snd_buff[qentry].mbuf = NULL;
1775			return (ENXIO);
1776#endif
1777#ifdef INET
1778		case ETHERTYPE_IP:
1779			ip = (struct ip *)(mbuf->m_data + ehdrlen);
1780			ip->ip_sum = 0;
1781			iphlen = ip->ip_hl << 2;
1782			poff = ehdrlen + iphlen;
1783
1784			switch (ip->ip_p) {
1785			case IPPROTO_TCP:
1786				if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0)
1787					break;
1788
1789				if (mbuf->m_len < (poff + sizeof(struct tcphdr))) {
1790					mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr));
1791					sq->snd_buff[qentry].mbuf = mbuf;
1792					if (mbuf == NULL)
1793						return (ENOBUFS);
1794				}
1795				hdr->csum_l4 = SEND_L4_CSUM_TCP;
1796				break;
1797			case IPPROTO_UDP:
1798				if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0)
1799					break;
1800
1801				if (mbuf->m_len < (poff + sizeof(struct udphdr))) {
1802					mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr));
1803					sq->snd_buff[qentry].mbuf = mbuf;
1804					if (mbuf == NULL)
1805						return (ENOBUFS);
1806				}
1807				hdr->csum_l4 = SEND_L4_CSUM_UDP;
1808				break;
1809			case IPPROTO_SCTP:
1810				if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0)
1811					break;
1812
1813				if (mbuf->m_len < (poff + sizeof(struct sctphdr))) {
1814					mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr));
1815					sq->snd_buff[qentry].mbuf = mbuf;
1816					if (mbuf == NULL)
1817						return (ENOBUFS);
1818				}
1819				hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1820				break;
1821			default:
1822				break;
1823			}
1824			break;
1825#endif
1826		default:
1827			hdr->csum_l3 = 0;
1828			return (0);
1829		}
1830
1831		hdr->l3_offset = ehdrlen;
1832		hdr->l4_offset = ehdrlen + iphlen;
1833	} else
1834		hdr->csum_l3 = 0;
1835
1836	return (0);
1837}
1838
1839/*
1840 * SQ GATHER subdescriptor
1841 * Must follow HDR descriptor
1842 */
1843static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1844					       int size, uint64_t data)
1845{
1846	struct sq_gather_subdesc *gather;
1847
1848	qentry &= (sq->dmem.q_len - 1);
1849	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1850
1851	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1852	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1853	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1854	gather->size = size;
1855	gather->addr = data;
1856}
1857
1858/* Put an mbuf to a SQ for packet transfer. */
1859static int
1860nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf *mbuf)
1861{
1862	bus_dma_segment_t segs[256];
1863	struct snd_buff *snd_buff;
1864	size_t seg;
1865	int nsegs, qentry;
1866	int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT - 1;
1867	int err;
1868
1869	NICVF_TX_LOCK_ASSERT(sq);
1870
1871	if (sq->free_cnt == 0)
1872		return (ENOBUFS);
1873
1874	snd_buff = &sq->snd_buff[sq->tail];
1875
1876	err = bus_dmamap_load_mbuf_sg(sq->snd_buff_dmat, snd_buff->dmap,
1877	    mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1878	if (err != 0) {
1879		/* ARM64TODO: Add mbuf defragmenting if we lack maps */
1880		return (err);
1881	}
1882
1883	/* Set how many subdescriptors is required */
1884	subdesc_cnt += nsegs;
1885
1886	if (subdesc_cnt > sq->free_cnt) {
1887		/* ARM64TODO: Add mbuf defragmentation if we lack descriptors */
1888		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1889		return (ENOBUFS);
1890	}
1891
1892	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1893
1894	/* Add SQ header subdesc */
1895	err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf,
1896	    mbuf->m_pkthdr.len);
1897	if (err != 0) {
1898		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1899		return (err);
1900	}
1901
1902	/* Add SQ gather subdescs */
1903	for (seg = 0; seg < nsegs; seg++) {
1904		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1905		nicvf_sq_add_gather_subdesc(sq, qentry, segs[seg].ds_len,
1906		    segs[seg].ds_addr);
1907	}
1908
1909	/* make sure all memory stores are done before ringing doorbell */
1910	bus_dmamap_sync(sq->dmem.dmat, sq->dmem.dmap, BUS_DMASYNC_PREWRITE);
1911
1912	dprintf(sq->nic->dev, "%s: sq->idx: %d, subdesc_cnt: %d\n",
1913	    __func__, sq->idx, subdesc_cnt);
1914	/* Inform HW to xmit new packet */
1915	nicvf_queue_reg_write(sq->nic, NIC_QSET_SQ_0_7_DOOR,
1916	    sq->idx, subdesc_cnt);
1917	return (0);
1918}
1919
1920static __inline u_int
1921frag_num(u_int i)
1922{
1923#if BYTE_ORDER == BIG_ENDIAN
1924	return ((i & ~3) + 3 - (i & 3));
1925#else
1926	return (i);
1927#endif
1928}
1929
1930/* Returns MBUF for a received packet */
1931struct mbuf *
1932nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1933{
1934	int frag;
1935	int payload_len = 0;
1936	struct mbuf *mbuf;
1937	struct mbuf *mbuf_frag;
1938	uint16_t *rb_lens = NULL;
1939	uint64_t *rb_ptrs = NULL;
1940
1941	mbuf = NULL;
1942	rb_lens = (uint16_t *)((uint8_t *)cqe_rx + (3 * sizeof(uint64_t)));
1943	rb_ptrs = (uint64_t *)((uint8_t *)cqe_rx + (6 * sizeof(uint64_t)));
1944
1945	dprintf(nic->dev, "%s rb_cnt %d rb0_ptr %lx rb0_sz %d\n",
1946	    __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
1947
1948	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1949		payload_len = rb_lens[frag_num(frag)];
1950		if (frag == 0) {
1951			/* First fragment */
1952			mbuf = nicvf_rb_ptr_to_mbuf(nic,
1953			    (*rb_ptrs - cqe_rx->align_pad));
1954			mbuf->m_len = payload_len;
1955			mbuf->m_data += cqe_rx->align_pad;
1956			if_setrcvif(mbuf, nic->ifp);
1957		} else {
1958			/* Add fragments */
1959			mbuf_frag = nicvf_rb_ptr_to_mbuf(nic, *rb_ptrs);
1960			m_append(mbuf, payload_len, mbuf_frag->m_data);
1961			m_freem(mbuf_frag);
1962		}
1963		/* Next buffer pointer */
1964		rb_ptrs++;
1965	}
1966
1967	if (__predict_true(mbuf != NULL)) {
1968		m_fixhdr(mbuf);
1969		mbuf->m_pkthdr.flowid = cqe_rx->rq_idx;
1970		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE);
1971		if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) {
1972			/*
1973			 * HW by default verifies IP & TCP/UDP/SCTP checksums
1974			 */
1975
1976			/* XXX: Do we need to include IP with options too? */
1977			if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4 ||
1978			    cqe_rx->l3_type == L3TYPE_IPV6)) {
1979				mbuf->m_pkthdr.csum_flags =
1980				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
1981			}
1982			if (cqe_rx->l4_type == L4TYPE_TCP ||
1983			    cqe_rx->l4_type == L4TYPE_UDP ||
1984			    cqe_rx->l4_type == L4TYPE_SCTP) {
1985				mbuf->m_pkthdr.csum_flags |=
1986				    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1987				mbuf->m_pkthdr.csum_data = htons(0xffff);
1988			}
1989		}
1990	}
1991
1992	return (mbuf);
1993}
1994
1995/* Enable interrupt */
1996void
1997nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
1998{
1999	uint64_t reg_val;
2000
2001	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2002
2003	switch (int_type) {
2004	case NICVF_INTR_CQ:
2005		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2006		break;
2007	case NICVF_INTR_SQ:
2008		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2009		break;
2010	case NICVF_INTR_RBDR:
2011		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2012		break;
2013	case NICVF_INTR_PKT_DROP:
2014		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2015		break;
2016	case NICVF_INTR_TCP_TIMER:
2017		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2018		break;
2019	case NICVF_INTR_MBOX:
2020		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2021		break;
2022	case NICVF_INTR_QS_ERR:
2023		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2024		break;
2025	default:
2026		device_printf(nic->dev,
2027			   "Failed to enable interrupt: unknown type\n");
2028		break;
2029	}
2030
2031	nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
2032}
2033
2034/* Disable interrupt */
2035void
2036nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
2037{
2038	uint64_t reg_val = 0;
2039
2040	switch (int_type) {
2041	case NICVF_INTR_CQ:
2042		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2043		break;
2044	case NICVF_INTR_SQ:
2045		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2046		break;
2047	case NICVF_INTR_RBDR:
2048		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2049		break;
2050	case NICVF_INTR_PKT_DROP:
2051		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2052		break;
2053	case NICVF_INTR_TCP_TIMER:
2054		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2055		break;
2056	case NICVF_INTR_MBOX:
2057		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2058		break;
2059	case NICVF_INTR_QS_ERR:
2060		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2061		break;
2062	default:
2063		device_printf(nic->dev,
2064			   "Failed to disable interrupt: unknown type\n");
2065		break;
2066	}
2067
2068	nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
2069}
2070
2071/* Clear interrupt */
2072void
2073nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
2074{
2075	uint64_t reg_val = 0;
2076
2077	switch (int_type) {
2078	case NICVF_INTR_CQ:
2079		reg_val = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2080		break;
2081	case NICVF_INTR_SQ:
2082		reg_val = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2083		break;
2084	case NICVF_INTR_RBDR:
2085		reg_val = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2086		break;
2087	case NICVF_INTR_PKT_DROP:
2088		reg_val = (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2089		break;
2090	case NICVF_INTR_TCP_TIMER:
2091		reg_val = (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2092		break;
2093	case NICVF_INTR_MBOX:
2094		reg_val = (1UL << NICVF_INTR_MBOX_SHIFT);
2095		break;
2096	case NICVF_INTR_QS_ERR:
2097		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2098		break;
2099	default:
2100		device_printf(nic->dev,
2101			   "Failed to clear interrupt: unknown type\n");
2102		break;
2103	}
2104
2105	nicvf_reg_write(nic, NIC_VF_INT, reg_val);
2106}
2107
2108/* Check if interrupt is enabled */
2109int
2110nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
2111{
2112	uint64_t reg_val;
2113	uint64_t mask = 0xff;
2114
2115	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2116
2117	switch (int_type) {
2118	case NICVF_INTR_CQ:
2119		mask = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2120		break;
2121	case NICVF_INTR_SQ:
2122		mask = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2123		break;
2124	case NICVF_INTR_RBDR:
2125		mask = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2126		break;
2127	case NICVF_INTR_PKT_DROP:
2128		mask = NICVF_INTR_PKT_DROP_MASK;
2129		break;
2130	case NICVF_INTR_TCP_TIMER:
2131		mask = NICVF_INTR_TCP_TIMER_MASK;
2132		break;
2133	case NICVF_INTR_MBOX:
2134		mask = NICVF_INTR_MBOX_MASK;
2135		break;
2136	case NICVF_INTR_QS_ERR:
2137		mask = NICVF_INTR_QS_ERR_MASK;
2138		break;
2139	default:
2140		device_printf(nic->dev,
2141			   "Failed to check interrupt enable: unknown type\n");
2142		break;
2143	}
2144
2145	return (reg_val & mask);
2146}
2147
2148void
2149nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
2150{
2151	struct rcv_queue *rq;
2152
2153#define GET_RQ_STATS(reg) \
2154	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
2155			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2156
2157	rq = &nic->qs->rq[rq_idx];
2158	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
2159	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
2160}
2161
2162void
2163nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
2164{
2165	struct snd_queue *sq;
2166
2167#define GET_SQ_STATS(reg) \
2168	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
2169			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2170
2171	sq = &nic->qs->sq[sq_idx];
2172	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
2173	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
2174}
2175
2176/* Check for errors in the receive cmp.queue entry */
2177int
2178nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cmp_queue *cq,
2179    struct cqe_rx_t *cqe_rx)
2180{
2181	struct nicvf_hw_stats *stats = &nic->hw_stats;
2182	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
2183
2184	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
2185		drv_stats->rx_frames_ok++;
2186		return (0);
2187	}
2188
2189	switch (cqe_rx->err_opcode) {
2190	case CQ_RX_ERROP_RE_PARTIAL:
2191		stats->rx_bgx_truncated_pkts++;
2192		break;
2193	case CQ_RX_ERROP_RE_JABBER:
2194		stats->rx_jabber_errs++;
2195		break;
2196	case CQ_RX_ERROP_RE_FCS:
2197		stats->rx_fcs_errs++;
2198		break;
2199	case CQ_RX_ERROP_RE_RX_CTL:
2200		stats->rx_bgx_errs++;
2201		break;
2202	case CQ_RX_ERROP_PREL2_ERR:
2203		stats->rx_prel2_errs++;
2204		break;
2205	case CQ_RX_ERROP_L2_MAL:
2206		stats->rx_l2_hdr_malformed++;
2207		break;
2208	case CQ_RX_ERROP_L2_OVERSIZE:
2209		stats->rx_oversize++;
2210		break;
2211	case CQ_RX_ERROP_L2_UNDERSIZE:
2212		stats->rx_undersize++;
2213		break;
2214	case CQ_RX_ERROP_L2_LENMISM:
2215		stats->rx_l2_len_mismatch++;
2216		break;
2217	case CQ_RX_ERROP_L2_PCLP:
2218		stats->rx_l2_pclp++;
2219		break;
2220	case CQ_RX_ERROP_IP_NOT:
2221		stats->rx_ip_ver_errs++;
2222		break;
2223	case CQ_RX_ERROP_IP_CSUM_ERR:
2224		stats->rx_ip_csum_errs++;
2225		break;
2226	case CQ_RX_ERROP_IP_MAL:
2227		stats->rx_ip_hdr_malformed++;
2228		break;
2229	case CQ_RX_ERROP_IP_MALD:
2230		stats->rx_ip_payload_malformed++;
2231		break;
2232	case CQ_RX_ERROP_IP_HOP:
2233		stats->rx_ip_ttl_errs++;
2234		break;
2235	case CQ_RX_ERROP_L3_PCLP:
2236		stats->rx_l3_pclp++;
2237		break;
2238	case CQ_RX_ERROP_L4_MAL:
2239		stats->rx_l4_malformed++;
2240		break;
2241	case CQ_RX_ERROP_L4_CHK:
2242		stats->rx_l4_csum_errs++;
2243		break;
2244	case CQ_RX_ERROP_UDP_LEN:
2245		stats->rx_udp_len_errs++;
2246		break;
2247	case CQ_RX_ERROP_L4_PORT:
2248		stats->rx_l4_port_errs++;
2249		break;
2250	case CQ_RX_ERROP_TCP_FLAG:
2251		stats->rx_tcp_flag_errs++;
2252		break;
2253	case CQ_RX_ERROP_TCP_OFFSET:
2254		stats->rx_tcp_offset_errs++;
2255		break;
2256	case CQ_RX_ERROP_L4_PCLP:
2257		stats->rx_l4_pclp++;
2258		break;
2259	case CQ_RX_ERROP_RBDR_TRUNC:
2260		stats->rx_truncated_pkts++;
2261		break;
2262	}
2263
2264	return (1);
2265}
2266
2267/* Check for errors in the send cmp.queue entry */
2268int
2269nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cmp_queue *cq,
2270    struct cqe_send_t *cqe_tx)
2271{
2272	struct cmp_queue_stats *stats = &cq->stats;
2273
2274	switch (cqe_tx->send_status) {
2275	case CQ_TX_ERROP_GOOD:
2276		stats->tx.good++;
2277		return (0);
2278	case CQ_TX_ERROP_DESC_FAULT:
2279		stats->tx.desc_fault++;
2280		break;
2281	case CQ_TX_ERROP_HDR_CONS_ERR:
2282		stats->tx.hdr_cons_err++;
2283		break;
2284	case CQ_TX_ERROP_SUBDC_ERR:
2285		stats->tx.subdesc_err++;
2286		break;
2287	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
2288		stats->tx.imm_size_oflow++;
2289		break;
2290	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
2291		stats->tx.data_seq_err++;
2292		break;
2293	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
2294		stats->tx.mem_seq_err++;
2295		break;
2296	case CQ_TX_ERROP_LOCK_VIOL:
2297		stats->tx.lock_viol++;
2298		break;
2299	case CQ_TX_ERROP_DATA_FAULT:
2300		stats->tx.data_fault++;
2301		break;
2302	case CQ_TX_ERROP_TSTMP_CONFLICT:
2303		stats->tx.tstmp_conflict++;
2304		break;
2305	case CQ_TX_ERROP_TSTMP_TIMEOUT:
2306		stats->tx.tstmp_timeout++;
2307		break;
2308	case CQ_TX_ERROP_MEM_FAULT:
2309		stats->tx.mem_fault++;
2310		break;
2311	case CQ_TX_ERROP_CK_OVERLAP:
2312		stats->tx.csum_overlap++;
2313		break;
2314	case CQ_TX_ERROP_CK_OFLOW:
2315		stats->tx.csum_overflow++;
2316		break;
2317	}
2318
2319	return (1);
2320}
2321