nicvf_queues.c revision 297389
1/*
2 * Copyright (C) 2015 Cavium Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/dev/vnic/nicvf_queues.c 297389 2016-03-29 13:31:09Z zbb $
27 *
28 */
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/vnic/nicvf_queues.c 297389 2016-03-29 13:31:09Z zbb $");
31
32#include "opt_inet.h"
33#include "opt_inet6.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bitset.h>
38#include <sys/bitstring.h>
39#include <sys/buf_ring.h>
40#include <sys/bus.h>
41#include <sys/endian.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/module.h>
45#include <sys/rman.h>
46#include <sys/pciio.h>
47#include <sys/pcpu.h>
48#include <sys/proc.h>
49#include <sys/sockio.h>
50#include <sys/socket.h>
51#include <sys/cpuset.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/smp.h>
55#include <sys/taskqueue.h>
56
57#include <vm/vm.h>
58#include <vm/pmap.h>
59
60#include <machine/bus.h>
61#include <machine/vmparam.h>
62
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_var.h>
66#include <net/if_media.h>
67#include <net/ifq.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip6.h>
74#include <netinet/sctp.h>
75#include <netinet/tcp.h>
76#include <netinet/tcp_lro.h>
77#include <netinet/udp.h>
78
79#include <dev/pci/pcireg.h>
80#include <dev/pci/pcivar.h>
81
82#include "thunder_bgx.h"
83#include "nic_reg.h"
84#include "nic.h"
85#include "q_struct.h"
86#include "nicvf_queues.h"
87
88#define	DEBUG
89#undef DEBUG
90
91#ifdef DEBUG
92#define	dprintf(dev, fmt, ...)	device_printf(dev, fmt, ##__VA_ARGS__)
93#else
94#define	dprintf(dev, fmt, ...)
95#endif
96
97MALLOC_DECLARE(M_NICVF);
98
99static void nicvf_free_snd_queue(struct nicvf *, struct snd_queue *);
100static struct mbuf * nicvf_get_rcv_mbuf(struct nicvf *, struct cqe_rx_t *);
101static void nicvf_sq_disable(struct nicvf *, int);
102static void nicvf_sq_enable(struct nicvf *, struct snd_queue *, int);
103static void nicvf_put_sq_desc(struct snd_queue *, int);
104static void nicvf_cmp_queue_config(struct nicvf *, struct queue_set *, int,
105    boolean_t);
106static void nicvf_sq_free_used_descs(struct nicvf *, struct snd_queue *, int);
107
108static void nicvf_rbdr_task(void *, int);
109static void nicvf_rbdr_task_nowait(void *, int);
110
111struct rbuf_info {
112	bus_dma_tag_t	dmat;
113	bus_dmamap_t	dmap;
114	struct mbuf *	mbuf;
115};
116
117#define GET_RBUF_INFO(x) ((struct rbuf_info *)((x) - NICVF_RCV_BUF_ALIGN_BYTES))
118
119/* Poll a register for a specific value */
120static int nicvf_poll_reg(struct nicvf *nic, int qidx,
121			  uint64_t reg, int bit_pos, int bits, int val)
122{
123	uint64_t bit_mask;
124	uint64_t reg_val;
125	int timeout = 10;
126
127	bit_mask = (1UL << bits) - 1;
128	bit_mask = (bit_mask << bit_pos);
129
130	while (timeout) {
131		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
132		if (((reg_val & bit_mask) >> bit_pos) == val)
133			return (0);
134
135		DELAY(1000);
136		timeout--;
137	}
138	device_printf(nic->dev, "Poll on reg 0x%lx failed\n", reg);
139	return (ETIMEDOUT);
140}
141
142/* Callback for bus_dmamap_load() */
143static void
144nicvf_dmamap_q_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
145{
146	bus_addr_t *paddr;
147
148	KASSERT(nseg == 1, ("wrong number of segments, should be 1"));
149	paddr = arg;
150	*paddr = segs->ds_addr;
151}
152
153/* Allocate memory for a queue's descriptors */
154static int
155nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
156    int q_len, int desc_size, int align_bytes)
157{
158	int err, err_dmat;
159
160	/* Create DMA tag first */
161	err = bus_dma_tag_create(
162	    bus_get_dma_tag(nic->dev),		/* parent tag */
163	    align_bytes,			/* alignment */
164	    0,					/* boundary */
165	    BUS_SPACE_MAXADDR,			/* lowaddr */
166	    BUS_SPACE_MAXADDR,			/* highaddr */
167	    NULL, NULL,				/* filtfunc, filtfuncarg */
168	    (q_len * desc_size),		/* maxsize */
169	    1,					/* nsegments */
170	    (q_len * desc_size),		/* maxsegsize */
171	    0,					/* flags */
172	    NULL, NULL,				/* lockfunc, lockfuncarg */
173	    &dmem->dmat);			/* dmat */
174
175	if (err != 0) {
176		device_printf(nic->dev,
177		    "Failed to create busdma tag for descriptors ring\n");
178		return (err);
179	}
180
181	/* Allocate segment of continuous DMA safe memory */
182	err = bus_dmamem_alloc(
183	    dmem->dmat,				/* DMA tag */
184	    &dmem->base,			/* virtual address */
185	    (BUS_DMA_NOWAIT | BUS_DMA_ZERO),	/* flags */
186	    &dmem->dmap);			/* DMA map */
187	if (err != 0) {
188		device_printf(nic->dev, "Failed to allocate DMA safe memory for"
189		    "descriptors ring\n");
190		goto dmamem_fail;
191	}
192
193	err = bus_dmamap_load(
194	    dmem->dmat,
195	    dmem->dmap,
196	    dmem->base,
197	    (q_len * desc_size),		/* allocation size */
198	    nicvf_dmamap_q_cb,			/* map to DMA address cb. */
199	    &dmem->phys_base,			/* physical address */
200	    BUS_DMA_NOWAIT);
201	if (err != 0) {
202		device_printf(nic->dev,
203		    "Cannot load DMA map of descriptors ring\n");
204		goto dmamap_fail;
205	}
206
207	dmem->q_len = q_len;
208	dmem->size = (desc_size * q_len);
209
210	return (0);
211
212dmamap_fail:
213	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
214	dmem->phys_base = 0;
215dmamem_fail:
216	err_dmat = bus_dma_tag_destroy(dmem->dmat);
217	dmem->base = NULL;
218	KASSERT(err_dmat == 0,
219	    ("%s: Trying to destroy BUSY DMA tag", __func__));
220
221	return (err);
222}
223
224/* Free queue's descriptor memory */
225static void
226nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
227{
228	int err;
229
230	if ((dmem == NULL) || (dmem->base == NULL))
231		return;
232
233	/* Unload a map */
234	bus_dmamap_sync(dmem->dmat, dmem->dmap, BUS_DMASYNC_POSTREAD);
235	bus_dmamap_unload(dmem->dmat, dmem->dmap);
236	/* Free DMA memory */
237	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
238	/* Destroy DMA tag */
239	err = bus_dma_tag_destroy(dmem->dmat);
240
241	KASSERT(err == 0,
242	    ("%s: Trying to destroy BUSY DMA tag", __func__));
243
244	dmem->phys_base = 0;
245	dmem->base = NULL;
246}
247
248/*
249 * Allocate buffer for packet reception
250 * HW returns memory address where packet is DMA'ed but not a pointer
251 * into RBDR ring, so save buffer address at the start of fragment and
252 * align the start address to a cache aligned address
253 */
254static __inline int
255nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
256    bus_dmamap_t dmap, int mflags, uint32_t buf_len, bus_addr_t *rbuf)
257{
258	struct mbuf *mbuf;
259	struct rbuf_info *rinfo;
260	bus_dma_segment_t segs[1];
261	int nsegs;
262	int err;
263
264	mbuf = m_getjcl(mflags, MT_DATA, M_PKTHDR, MCLBYTES);
265	if (mbuf == NULL)
266		return (ENOMEM);
267
268	/*
269	 * The length is equal to the actual length + one 128b line
270	 * used as a room for rbuf_info structure.
271	 */
272	mbuf->m_len = mbuf->m_pkthdr.len = buf_len;
273
274	err = bus_dmamap_load_mbuf_sg(rbdr->rbdr_buff_dmat, dmap, mbuf, segs,
275	    &nsegs, BUS_DMA_NOWAIT);
276	if (err != 0) {
277		device_printf(nic->dev,
278		    "Failed to map mbuf into DMA visible memory, err: %d\n",
279		    err);
280		m_freem(mbuf);
281		bus_dmamap_destroy(rbdr->rbdr_buff_dmat, dmap);
282		return (err);
283	}
284	if (nsegs != 1)
285		panic("Unexpected number of DMA segments for RB: %d", nsegs);
286	/*
287	 * Now use the room for rbuf_info structure
288	 * and adjust mbuf data and length.
289	 */
290	rinfo = (struct rbuf_info *)mbuf->m_data;
291	m_adj(mbuf, NICVF_RCV_BUF_ALIGN_BYTES);
292
293	rinfo->dmat = rbdr->rbdr_buff_dmat;
294	rinfo->dmap = dmap;
295	rinfo->mbuf = mbuf;
296
297	*rbuf = segs[0].ds_addr + NICVF_RCV_BUF_ALIGN_BYTES;
298
299	return (0);
300}
301
302/* Retrieve mbuf for received packet */
303static struct mbuf *
304nicvf_rb_ptr_to_mbuf(struct nicvf *nic, bus_addr_t rb_ptr)
305{
306	struct mbuf *mbuf;
307	struct rbuf_info *rinfo;
308
309	/* Get buffer start address and alignment offset */
310	rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(rb_ptr));
311
312	/* Now retrieve mbuf to give to stack */
313	mbuf = rinfo->mbuf;
314	if (__predict_false(mbuf == NULL)) {
315		panic("%s: Received packet fragment with NULL mbuf",
316		    device_get_nameunit(nic->dev));
317	}
318	/*
319	 * Clear the mbuf in the descriptor to indicate
320	 * that this slot is processed and free to use.
321	 */
322	rinfo->mbuf = NULL;
323
324	bus_dmamap_sync(rinfo->dmat, rinfo->dmap, BUS_DMASYNC_POSTREAD);
325	bus_dmamap_unload(rinfo->dmat, rinfo->dmap);
326
327	return (mbuf);
328}
329
330/* Allocate RBDR ring and populate receive buffers */
331static int
332nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len,
333    int buf_size, int qidx)
334{
335	bus_dmamap_t dmap;
336	bus_addr_t rbuf;
337	struct rbdr_entry_t *desc;
338	int idx;
339	int err;
340
341	/* Allocate rbdr descriptors ring */
342	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
343	    sizeof(struct rbdr_entry_t), NICVF_RCV_BUF_ALIGN_BYTES);
344	if (err != 0) {
345		device_printf(nic->dev,
346		    "Failed to create RBDR descriptors ring\n");
347		return (err);
348	}
349
350	rbdr->desc = rbdr->dmem.base;
351	/*
352	 * Buffer size has to be in multiples of 128 bytes.
353	 * Make room for metadata of size of one line (128 bytes).
354	 */
355	rbdr->dma_size = buf_size - NICVF_RCV_BUF_ALIGN_BYTES;
356	rbdr->enable = TRUE;
357	rbdr->thresh = RBDR_THRESH;
358	rbdr->nic = nic;
359	rbdr->idx = qidx;
360
361	/*
362	 * Create DMA tag for Rx buffers.
363	 * Each map created using this tag is intended to store Rx payload for
364	 * one fragment and one header structure containing rbuf_info (thus
365	 * additional 128 byte line since RB must be a multiple of 128 byte
366	 * cache line).
367	 */
368	if (buf_size > MCLBYTES) {
369		device_printf(nic->dev,
370		    "Buffer size to large for mbuf cluster\n");
371		return (EINVAL);
372	}
373	err = bus_dma_tag_create(
374	    bus_get_dma_tag(nic->dev),		/* parent tag */
375	    NICVF_RCV_BUF_ALIGN_BYTES,		/* alignment */
376	    0,					/* boundary */
377	    DMAP_MAX_PHYSADDR,			/* lowaddr */
378	    DMAP_MIN_PHYSADDR,			/* highaddr */
379	    NULL, NULL,				/* filtfunc, filtfuncarg */
380	    roundup2(buf_size, MCLBYTES),	/* maxsize */
381	    1,					/* nsegments */
382	    roundup2(buf_size, MCLBYTES),	/* maxsegsize */
383	    0,					/* flags */
384	    NULL, NULL,				/* lockfunc, lockfuncarg */
385	    &rbdr->rbdr_buff_dmat);		/* dmat */
386
387	if (err != 0) {
388		device_printf(nic->dev,
389		    "Failed to create busdma tag for RBDR buffers\n");
390		return (err);
391	}
392
393	rbdr->rbdr_buff_dmaps = malloc(sizeof(*rbdr->rbdr_buff_dmaps) *
394	    ring_len, M_NICVF, (M_WAITOK | M_ZERO));
395
396	for (idx = 0; idx < ring_len; idx++) {
397		err = bus_dmamap_create(rbdr->rbdr_buff_dmat, 0, &dmap);
398		if (err != 0) {
399			device_printf(nic->dev,
400			    "Failed to create DMA map for RB\n");
401			return (err);
402		}
403		rbdr->rbdr_buff_dmaps[idx] = dmap;
404
405		err = nicvf_alloc_rcv_buffer(nic, rbdr, dmap, M_WAITOK,
406		    DMA_BUFFER_LEN, &rbuf);
407		if (err != 0)
408			return (err);
409
410		desc = GET_RBDR_DESC(rbdr, idx);
411		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
412	}
413
414	/* Allocate taskqueue */
415	TASK_INIT(&rbdr->rbdr_task, 0, nicvf_rbdr_task, rbdr);
416	TASK_INIT(&rbdr->rbdr_task_nowait, 0, nicvf_rbdr_task_nowait, rbdr);
417	rbdr->rbdr_taskq = taskqueue_create_fast("nicvf_rbdr_taskq", M_WAITOK,
418	    taskqueue_thread_enqueue, &rbdr->rbdr_taskq);
419	taskqueue_start_threads(&rbdr->rbdr_taskq, 1, PI_NET, "%s: rbdr_taskq",
420	    device_get_nameunit(nic->dev));
421
422	return (0);
423}
424
425/* Free RBDR ring and its receive buffers */
426static void
427nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
428{
429	struct mbuf *mbuf;
430	struct queue_set *qs;
431	struct rbdr_entry_t *desc;
432	struct rbuf_info *rinfo;
433	bus_addr_t buf_addr;
434	int head, tail, idx;
435	int err;
436
437	qs = nic->qs;
438
439	if ((qs == NULL) || (rbdr == NULL))
440		return;
441
442	rbdr->enable = FALSE;
443	if (rbdr->rbdr_taskq != NULL) {
444		/* Remove tasks */
445		while (taskqueue_cancel(rbdr->rbdr_taskq,
446		    &rbdr->rbdr_task_nowait, NULL) != 0) {
447			/* Finish the nowait task first */
448			taskqueue_drain(rbdr->rbdr_taskq,
449			    &rbdr->rbdr_task_nowait);
450		}
451		taskqueue_free(rbdr->rbdr_taskq);
452		rbdr->rbdr_taskq = NULL;
453
454		while (taskqueue_cancel(taskqueue_thread,
455		    &rbdr->rbdr_task, NULL) != 0) {
456			/* Now finish the sleepable task */
457			taskqueue_drain(taskqueue_thread, &rbdr->rbdr_task);
458		}
459	}
460
461	/*
462	 * Free all of the memory under the RB descriptors.
463	 * There are assumptions here:
464	 * 1. Corresponding RBDR is disabled
465	 *    - it is safe to operate using head and tail indexes
466	 * 2. All bffers that were received are properly freed by
467	 *    the receive handler
468	 *    - there is no need to unload DMA map and free MBUF for other
469	 *      descriptors than unused ones
470	 */
471	if (rbdr->rbdr_buff_dmat != NULL) {
472		head = rbdr->head;
473		tail = rbdr->tail;
474		while (head != tail) {
475			desc = GET_RBDR_DESC(rbdr, head);
476			buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
477			rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
478			bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
479			mbuf = rinfo->mbuf;
480			/* This will destroy everything including rinfo! */
481			m_freem(mbuf);
482			head++;
483			head &= (rbdr->dmem.q_len - 1);
484		}
485		/* Free tail descriptor */
486		desc = GET_RBDR_DESC(rbdr, tail);
487		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
488		rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
489		bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
490		mbuf = rinfo->mbuf;
491		/* This will destroy everything including rinfo! */
492		m_freem(mbuf);
493
494		/* Destroy DMA maps */
495		for (idx = 0; idx < qs->rbdr_len; idx++) {
496			if (rbdr->rbdr_buff_dmaps[idx] == NULL)
497				continue;
498			err = bus_dmamap_destroy(rbdr->rbdr_buff_dmat,
499			    rbdr->rbdr_buff_dmaps[idx]);
500			KASSERT(err == 0,
501			    ("%s: Could not destroy DMA map for RB, desc: %d",
502			    __func__, idx));
503			rbdr->rbdr_buff_dmaps[idx] = NULL;
504		}
505
506		/* Now destroy the tag */
507		err = bus_dma_tag_destroy(rbdr->rbdr_buff_dmat);
508		KASSERT(err == 0,
509		    ("%s: Trying to destroy BUSY DMA tag", __func__));
510
511		rbdr->head = 0;
512		rbdr->tail = 0;
513	}
514
515	/* Free RBDR ring */
516	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
517}
518
519/*
520 * Refill receive buffer descriptors with new buffers.
521 */
522static int
523nicvf_refill_rbdr(struct rbdr *rbdr, int mflags)
524{
525	struct nicvf *nic;
526	struct queue_set *qs;
527	int rbdr_idx;
528	int tail, qcount;
529	int refill_rb_cnt;
530	struct rbdr_entry_t *desc;
531	bus_dmamap_t dmap;
532	bus_addr_t rbuf;
533	boolean_t rb_alloc_fail;
534	int new_rb;
535
536	rb_alloc_fail = TRUE;
537	new_rb = 0;
538	nic = rbdr->nic;
539	qs = nic->qs;
540	rbdr_idx = rbdr->idx;
541
542	/* Check if it's enabled */
543	if (!rbdr->enable)
544		return (0);
545
546	/* Get no of desc's to be refilled */
547	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
548	qcount &= 0x7FFFF;
549	/* Doorbell can be ringed with a max of ring size minus 1 */
550	if (qcount >= (qs->rbdr_len - 1)) {
551		rb_alloc_fail = FALSE;
552		goto out;
553	} else
554		refill_rb_cnt = qs->rbdr_len - qcount - 1;
555
556	/* Start filling descs from tail */
557	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
558	while (refill_rb_cnt) {
559		tail++;
560		tail &= (rbdr->dmem.q_len - 1);
561
562		dmap = rbdr->rbdr_buff_dmaps[tail];
563		if (nicvf_alloc_rcv_buffer(nic, rbdr, dmap, mflags,
564		    DMA_BUFFER_LEN, &rbuf)) {
565			/* Something went wrong. Resign */
566			break;
567		}
568		desc = GET_RBDR_DESC(rbdr, tail);
569		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
570		refill_rb_cnt--;
571		new_rb++;
572	}
573
574	/* make sure all memory stores are done before ringing doorbell */
575	wmb();
576
577	/* Check if buffer allocation failed */
578	if (refill_rb_cnt == 0)
579		rb_alloc_fail = FALSE;
580
581	/* Notify HW */
582	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
583			      rbdr_idx, new_rb);
584out:
585	if (!rb_alloc_fail) {
586		/*
587		 * Re-enable RBDR interrupts only
588		 * if buffer allocation is success.
589		 */
590		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
591
592		return (0);
593	}
594
595	return (ENOMEM);
596}
597
598/* Refill RBs even if sleep is needed to reclaim memory */
599static void
600nicvf_rbdr_task(void *arg, int pending)
601{
602	struct rbdr *rbdr;
603	int err;
604
605	rbdr = (struct rbdr *)arg;
606
607	err = nicvf_refill_rbdr(rbdr, M_WAITOK);
608	if (__predict_false(err != 0)) {
609		panic("%s: Failed to refill RBs even when sleep enabled",
610		    __func__);
611	}
612}
613
614/* Refill RBs as soon as possible without waiting */
615static void
616nicvf_rbdr_task_nowait(void *arg, int pending)
617{
618	struct rbdr *rbdr;
619	int err;
620
621	rbdr = (struct rbdr *)arg;
622
623	err = nicvf_refill_rbdr(rbdr, M_NOWAIT);
624	if (err != 0) {
625		/*
626		 * Schedule another, sleepable kernel thread
627		 * that will for sure refill the buffers.
628		 */
629		taskqueue_enqueue(taskqueue_thread, &rbdr->rbdr_task);
630	}
631}
632
633static int
634nicvf_rcv_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
635    struct cqe_rx_t *cqe_rx, int cqe_type)
636{
637	struct mbuf *mbuf;
638	struct rcv_queue *rq;
639	int rq_idx;
640	int err = 0;
641
642	rq_idx = cqe_rx->rq_idx;
643	rq = &nic->qs->rq[rq_idx];
644
645	/* Check for errors */
646	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
647	if (err && !cqe_rx->rb_cnt)
648		return (0);
649
650	mbuf = nicvf_get_rcv_mbuf(nic, cqe_rx);
651	if (mbuf == NULL) {
652		dprintf(nic->dev, "Packet not received\n");
653		return (0);
654	}
655
656	/* If error packet */
657	if (err != 0) {
658		m_freem(mbuf);
659		return (0);
660	}
661
662	if (rq->lro_enabled &&
663	    ((cqe_rx->l3_type == L3TYPE_IPV4) && (cqe_rx->l4_type == L4TYPE_TCP)) &&
664	    (mbuf->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
665            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
666		/*
667		 * At this point it is known that there are no errors in the
668		 * packet. Attempt to LRO enqueue. Send to stack if no resources
669		 * or enqueue error.
670		 */
671		if ((rq->lro.lro_cnt != 0) &&
672		    (tcp_lro_rx(&rq->lro, mbuf, 0) == 0))
673			return (0);
674	}
675	/*
676	 * Push this packet to the stack later to avoid
677	 * unlocking completion task in the middle of work.
678	 */
679	err = buf_ring_enqueue(cq->rx_br, mbuf);
680	if (err != 0) {
681		/*
682		 * Failed to enqueue this mbuf.
683		 * We don't drop it, just schedule another task.
684		 */
685		return (err);
686	}
687
688	return (0);
689}
690
691static int
692nicvf_snd_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
693    struct cqe_send_t *cqe_tx, int cqe_type)
694{
695	bus_dmamap_t dmap;
696	struct mbuf *mbuf;
697	struct snd_queue *sq;
698	struct sq_hdr_subdesc *hdr;
699
700	mbuf = NULL;
701	sq = &nic->qs->sq[cqe_tx->sq_idx];
702	/* Avoid blocking here since we hold a non-sleepable NICVF_CMP_LOCK */
703	if (NICVF_TX_TRYLOCK(sq) == 0)
704		return (EAGAIN);
705
706	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
707	if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
708		NICVF_TX_UNLOCK(sq);
709		return (0);
710	}
711
712	dprintf(nic->dev,
713	    "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
714	    __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
715	    cqe_tx->sqe_ptr, hdr->subdesc_cnt);
716
717	dmap = (bus_dmamap_t)sq->snd_buff[cqe_tx->sqe_ptr].dmap;
718	bus_dmamap_unload(sq->snd_buff_dmat, dmap);
719
720	mbuf = (struct mbuf *)sq->snd_buff[cqe_tx->sqe_ptr].mbuf;
721	if (mbuf != NULL) {
722		m_freem(mbuf);
723		sq->snd_buff[cqe_tx->sqe_ptr].mbuf = NULL;
724		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
725	}
726
727	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
728
729	NICVF_TX_UNLOCK(sq);
730	return (0);
731}
732
733static int
734nicvf_cq_intr_handler(struct nicvf *nic, uint8_t cq_idx)
735{
736	struct mbuf *mbuf;
737	struct ifnet *ifp;
738	int processed_cqe, work_done = 0, tx_done = 0;
739	int cqe_count, cqe_head;
740	struct queue_set *qs = nic->qs;
741	struct cmp_queue *cq = &qs->cq[cq_idx];
742	struct rcv_queue *rq;
743	struct cqe_rx_t *cq_desc;
744	struct lro_ctrl	*lro;
745	struct lro_entry *queued;
746	int rq_idx;
747	int cmp_err;
748
749	NICVF_CMP_LOCK(cq);
750	cmp_err = 0;
751	processed_cqe = 0;
752	/* Get no of valid CQ entries to process */
753	cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
754	cqe_count &= CQ_CQE_COUNT;
755	if (cqe_count == 0)
756		goto out;
757
758	/* Get head of the valid CQ entries */
759	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
760	cqe_head &= 0xFFFF;
761
762	dprintf(nic->dev, "%s CQ%d cqe_count %d cqe_head %d\n",
763	    __func__, cq_idx, cqe_count, cqe_head);
764	while (processed_cqe < cqe_count) {
765		/* Get the CQ descriptor */
766		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
767		cqe_head++;
768		cqe_head &= (cq->dmem.q_len - 1);
769		/* Prefetch next CQ descriptor */
770		__builtin_prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
771
772		dprintf(nic->dev, "CQ%d cq_desc->cqe_type %d\n", cq_idx,
773		    cq_desc->cqe_type);
774		switch (cq_desc->cqe_type) {
775		case CQE_TYPE_RX:
776			cmp_err = nicvf_rcv_pkt_handler(nic, cq, cq_desc,
777			    CQE_TYPE_RX);
778			if (__predict_false(cmp_err != 0)) {
779				/*
780				 * Ups. Cannot finish now.
781				 * Let's try again later.
782				 */
783				goto done;
784			}
785			work_done++;
786			break;
787		case CQE_TYPE_SEND:
788			cmp_err = nicvf_snd_pkt_handler(nic, cq,
789			    (void *)cq_desc, CQE_TYPE_SEND);
790			if (__predict_false(cmp_err != 0)) {
791				/*
792				 * Ups. Cannot finish now.
793				 * Let's try again later.
794				 */
795				goto done;
796			}
797
798			tx_done++;
799			break;
800		case CQE_TYPE_INVALID:
801		case CQE_TYPE_RX_SPLIT:
802		case CQE_TYPE_RX_TCP:
803		case CQE_TYPE_SEND_PTP:
804			/* Ignore for now */
805			break;
806		}
807		processed_cqe++;
808	}
809done:
810	dprintf(nic->dev,
811	    "%s CQ%d processed_cqe %d work_done %d\n",
812	    __func__, cq_idx, processed_cqe, work_done);
813
814	/* Ring doorbell to inform H/W to reuse processed CQEs */
815	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, cq_idx, processed_cqe);
816
817	if ((tx_done > 0) &&
818	    ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0)) {
819		/* Reenable TXQ if its stopped earlier due to SQ full */
820		if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
821	}
822out:
823	/*
824	 * Flush any outstanding LRO work
825	 */
826	rq_idx = cq_idx;
827	rq = &nic->qs->rq[rq_idx];
828	lro = &rq->lro;
829	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
830		SLIST_REMOVE_HEAD(&lro->lro_active, next);
831		tcp_lro_flush(lro, queued);
832	}
833
834	NICVF_CMP_UNLOCK(cq);
835
836	ifp = nic->ifp;
837	/* Push received MBUFs to the stack */
838	while (!buf_ring_empty(cq->rx_br)) {
839		mbuf = buf_ring_dequeue_mc(cq->rx_br);
840		if (__predict_true(mbuf != NULL))
841			(*ifp->if_input)(ifp, mbuf);
842	}
843
844	return (cmp_err);
845}
846
847/*
848 * Qset error interrupt handler
849 *
850 * As of now only CQ errors are handled
851 */
852static void
853nicvf_qs_err_task(void *arg, int pending)
854{
855	struct nicvf *nic;
856	struct queue_set *qs;
857	int qidx;
858	uint64_t status;
859	boolean_t enable = TRUE;
860
861	nic = (struct nicvf *)arg;
862	qs = nic->qs;
863
864	/* Deactivate network interface */
865	if_setdrvflagbits(nic->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
866
867	/* Check if it is CQ err */
868	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
869		status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
870		    qidx);
871		if ((status & CQ_ERR_MASK) == 0)
872			continue;
873		/* Process already queued CQEs and reconfig CQ */
874		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
875		nicvf_sq_disable(nic, qidx);
876		(void)nicvf_cq_intr_handler(nic, qidx);
877		nicvf_cmp_queue_config(nic, qs, qidx, enable);
878		nicvf_sq_free_used_descs(nic, &qs->sq[qidx], qidx);
879		nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
880		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
881	}
882
883	if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
884	/* Re-enable Qset error interrupt */
885	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
886}
887
888static void
889nicvf_cmp_task(void *arg, int pending)
890{
891	struct cmp_queue *cq;
892	struct nicvf *nic;
893	int cmp_err;
894
895	cq = (struct cmp_queue *)arg;
896	nic = cq->nic;
897
898	/* Handle CQ descriptors */
899	cmp_err = nicvf_cq_intr_handler(nic, cq->idx);
900	if (__predict_false(cmp_err != 0)) {
901		/*
902		 * Schedule another thread here since we did not
903		 * process the entire CQ due to Tx or Rx CQ parse error.
904		 */
905		taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
906
907	}
908
909	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
910	/* Reenable interrupt (previously disabled in nicvf_intr_handler() */
911	nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->idx);
912
913}
914
915/* Initialize completion queue */
916static int
917nicvf_init_cmp_queue(struct nicvf *nic, struct cmp_queue *cq, int q_len,
918    int qidx)
919{
920	int err;
921
922	/* Initizalize lock */
923	snprintf(cq->mtx_name, sizeof(cq->mtx_name), "%s: CQ(%d) lock",
924	    device_get_nameunit(nic->dev), qidx);
925	mtx_init(&cq->mtx, cq->mtx_name, NULL, MTX_DEF);
926
927	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
928				     NICVF_CQ_BASE_ALIGN_BYTES);
929
930	if (err != 0) {
931		device_printf(nic->dev,
932		    "Could not allocate DMA memory for CQ\n");
933		return (err);
934	}
935
936	cq->desc = cq->dmem.base;
937	cq->thresh = pass1_silicon(nic->dev) ? 0 : CMP_QUEUE_CQE_THRESH;
938	cq->nic = nic;
939	cq->idx = qidx;
940	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
941
942	cq->rx_br = buf_ring_alloc(CMP_QUEUE_LEN * 8, M_DEVBUF, M_WAITOK,
943	    &cq->mtx);
944
945	/* Allocate taskqueue */
946	TASK_INIT(&cq->cmp_task, 0, nicvf_cmp_task, cq);
947	cq->cmp_taskq = taskqueue_create_fast("nicvf_cmp_taskq", M_WAITOK,
948	    taskqueue_thread_enqueue, &cq->cmp_taskq);
949	taskqueue_start_threads(&cq->cmp_taskq, 1, PI_NET, "%s: cmp_taskq(%d)",
950	    device_get_nameunit(nic->dev), qidx);
951
952	return (0);
953}
954
955static void
956nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
957{
958
959	if (cq == NULL)
960		return;
961	/*
962	 * The completion queue itself should be disabled by now
963	 * (ref. nicvf_snd_queue_config()).
964	 * Ensure that it is safe to disable it or panic.
965	 */
966	if (cq->enable)
967		panic("%s: Trying to free working CQ(%d)", __func__, cq->idx);
968
969	if (cq->cmp_taskq != NULL) {
970		/* Remove task */
971		while (taskqueue_cancel(cq->cmp_taskq, &cq->cmp_task, NULL) != 0)
972			taskqueue_drain(cq->cmp_taskq, &cq->cmp_task);
973
974		taskqueue_free(cq->cmp_taskq);
975		cq->cmp_taskq = NULL;
976	}
977	/*
978	 * Completion interrupt will possibly enable interrupts again
979	 * so disable interrupting now after we finished processing
980	 * completion task. It is safe to do so since the corresponding CQ
981	 * was already disabled.
982	 */
983	nicvf_disable_intr(nic, NICVF_INTR_CQ, cq->idx);
984	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
985
986	NICVF_CMP_LOCK(cq);
987	nicvf_free_q_desc_mem(nic, &cq->dmem);
988	drbr_free(cq->rx_br, M_DEVBUF);
989	NICVF_CMP_UNLOCK(cq);
990	mtx_destroy(&cq->mtx);
991	memset(cq->mtx_name, 0, sizeof(cq->mtx_name));
992}
993
994static void
995nicvf_snd_task(void *arg, int pending)
996{
997	struct snd_queue *sq = (struct snd_queue *)arg;
998	struct mbuf *mbuf;
999
1000	NICVF_TX_LOCK(sq);
1001	while (1) {
1002		mbuf = drbr_dequeue(NULL, sq->br);
1003		if (mbuf == NULL)
1004			break;
1005
1006		if (nicvf_tx_mbuf_locked(sq, mbuf) != 0) {
1007			/* XXX ARM64TODO: Increase Tx drop counter */
1008			m_freem(mbuf);
1009			break;
1010		}
1011	}
1012	NICVF_TX_UNLOCK(sq);
1013}
1014
1015/* Initialize transmit queue */
1016static int
1017nicvf_init_snd_queue(struct nicvf *nic, struct snd_queue *sq, int q_len,
1018    int qidx)
1019{
1020	size_t i;
1021	int err;
1022
1023	/* Initizalize TX lock for this queue */
1024	snprintf(sq->mtx_name, sizeof(sq->mtx_name), "%s: SQ(%d) lock",
1025	    device_get_nameunit(nic->dev), qidx);
1026	mtx_init(&sq->mtx, sq->mtx_name, NULL, MTX_DEF);
1027
1028	NICVF_TX_LOCK(sq);
1029	/* Allocate buffer ring */
1030	sq->br = buf_ring_alloc(q_len / MIN_SQ_DESC_PER_PKT_XMIT, M_DEVBUF,
1031	    M_NOWAIT, &sq->mtx);
1032	if (sq->br == NULL) {
1033		device_printf(nic->dev,
1034		    "ERROR: Could not set up buf ring for SQ(%d)\n", qidx);
1035		err = ENOMEM;
1036		goto error;
1037	}
1038
1039	/* Allocate DMA memory for Tx descriptors */
1040	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
1041				     NICVF_SQ_BASE_ALIGN_BYTES);
1042	if (err != 0) {
1043		device_printf(nic->dev,
1044		    "Could not allocate DMA memory for SQ\n");
1045		goto error;
1046	}
1047
1048	sq->desc = sq->dmem.base;
1049	sq->head = sq->tail = 0;
1050	sq->free_cnt = q_len - 1;
1051	sq->thresh = SND_QUEUE_THRESH;
1052	sq->idx = qidx;
1053	sq->nic = nic;
1054
1055	/*
1056	 * Allocate DMA maps for Tx buffers
1057	 */
1058
1059	/* Create DMA tag first */
1060	err = bus_dma_tag_create(
1061	    bus_get_dma_tag(nic->dev),		/* parent tag */
1062	    1,					/* alignment */
1063	    0,					/* boundary */
1064	    BUS_SPACE_MAXADDR,			/* lowaddr */
1065	    BUS_SPACE_MAXADDR,			/* highaddr */
1066	    NULL, NULL,				/* filtfunc, filtfuncarg */
1067	    NICVF_TSO_MAXSIZE,			/* maxsize */
1068	    NICVF_TSO_NSEGS,			/* nsegments */
1069	    MCLBYTES,				/* maxsegsize */
1070	    0,					/* flags */
1071	    NULL, NULL,				/* lockfunc, lockfuncarg */
1072	    &sq->snd_buff_dmat);		/* dmat */
1073
1074	if (err != 0) {
1075		device_printf(nic->dev,
1076		    "Failed to create busdma tag for Tx buffers\n");
1077		goto error;
1078	}
1079
1080	/* Allocate send buffers array */
1081	sq->snd_buff = malloc(sizeof(*sq->snd_buff) * q_len, M_NICVF,
1082	    (M_NOWAIT | M_ZERO));
1083	if (sq->snd_buff == NULL) {
1084		device_printf(nic->dev,
1085		    "Could not allocate memory for Tx buffers array\n");
1086		err = ENOMEM;
1087		goto error;
1088	}
1089
1090	/* Now populate maps */
1091	for (i = 0; i < q_len; i++) {
1092		err = bus_dmamap_create(sq->snd_buff_dmat, 0,
1093		    &sq->snd_buff[i].dmap);
1094		if (err != 0) {
1095			device_printf(nic->dev,
1096			    "Failed to create DMA maps for Tx buffers\n");
1097			goto error;
1098		}
1099	}
1100	NICVF_TX_UNLOCK(sq);
1101
1102	/* Allocate taskqueue */
1103	TASK_INIT(&sq->snd_task, 0, nicvf_snd_task, sq);
1104	sq->snd_taskq = taskqueue_create_fast("nicvf_snd_taskq", M_WAITOK,
1105	    taskqueue_thread_enqueue, &sq->snd_taskq);
1106	taskqueue_start_threads(&sq->snd_taskq, 1, PI_NET, "%s: snd_taskq(%d)",
1107	    device_get_nameunit(nic->dev), qidx);
1108
1109	return (0);
1110error:
1111	NICVF_TX_UNLOCK(sq);
1112	return (err);
1113}
1114
1115static void
1116nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
1117{
1118	struct queue_set *qs = nic->qs;
1119	size_t i;
1120	int err;
1121
1122	if (sq == NULL)
1123		return;
1124
1125	if (sq->snd_taskq != NULL) {
1126		/* Remove task */
1127		while (taskqueue_cancel(sq->snd_taskq, &sq->snd_task, NULL) != 0)
1128			taskqueue_drain(sq->snd_taskq, &sq->snd_task);
1129
1130		taskqueue_free(sq->snd_taskq);
1131		sq->snd_taskq = NULL;
1132	}
1133
1134	NICVF_TX_LOCK(sq);
1135	if (sq->snd_buff_dmat != NULL) {
1136		if (sq->snd_buff != NULL) {
1137			for (i = 0; i < qs->sq_len; i++) {
1138				m_freem(sq->snd_buff[i].mbuf);
1139				sq->snd_buff[i].mbuf = NULL;
1140
1141				bus_dmamap_unload(sq->snd_buff_dmat,
1142				    sq->snd_buff[i].dmap);
1143				err = bus_dmamap_destroy(sq->snd_buff_dmat,
1144				    sq->snd_buff[i].dmap);
1145				/*
1146				 * If bus_dmamap_destroy fails it can cause
1147				 * random panic later if the tag is also
1148				 * destroyed in the process.
1149				 */
1150				KASSERT(err == 0,
1151				    ("%s: Could not destroy DMA map for SQ",
1152				    __func__));
1153			}
1154		}
1155
1156		free(sq->snd_buff, M_NICVF);
1157
1158		err = bus_dma_tag_destroy(sq->snd_buff_dmat);
1159		KASSERT(err == 0,
1160		    ("%s: Trying to destroy BUSY DMA tag", __func__));
1161	}
1162
1163	/* Free private driver ring for this send queue */
1164	if (sq->br != NULL)
1165		drbr_free(sq->br, M_DEVBUF);
1166
1167	if (sq->dmem.base != NULL)
1168		nicvf_free_q_desc_mem(nic, &sq->dmem);
1169
1170	NICVF_TX_UNLOCK(sq);
1171	/* Destroy Tx lock */
1172	mtx_destroy(&sq->mtx);
1173	memset(sq->mtx_name, 0, sizeof(sq->mtx_name));
1174}
1175
1176static void
1177nicvf_reclaim_snd_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1178{
1179
1180	/* Disable send queue */
1181	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
1182	/* Check if SQ is stopped */
1183	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
1184		return;
1185	/* Reset send queue */
1186	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1187}
1188
1189static void
1190nicvf_reclaim_rcv_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1191{
1192	union nic_mbx mbx = {};
1193
1194	/* Make sure all packets in the pipeline are written back into mem */
1195	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
1196	nicvf_send_msg_to_pf(nic, &mbx);
1197}
1198
1199static void
1200nicvf_reclaim_cmp_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1201{
1202
1203	/* Disable timer threshold (doesn't get reset upon CQ reset */
1204	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
1205	/* Disable completion queue */
1206	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
1207	/* Reset completion queue */
1208	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1209}
1210
1211static void
1212nicvf_reclaim_rbdr(struct nicvf *nic, struct rbdr *rbdr, int qidx)
1213{
1214	uint64_t tmp, fifo_state;
1215	int timeout = 10;
1216
1217	/* Save head and tail pointers for feeing up buffers */
1218	rbdr->head =
1219	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, qidx) >> 3;
1220	rbdr->tail =
1221	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, qidx) >> 3;
1222
1223	/*
1224	 * If RBDR FIFO is in 'FAIL' state then do a reset first
1225	 * before relaiming.
1226	 */
1227	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
1228	if (((fifo_state >> 62) & 0x03) == 0x3) {
1229		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
1230		    qidx, NICVF_RBDR_RESET);
1231	}
1232
1233	/* Disable RBDR */
1234	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
1235	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1236		return;
1237	while (1) {
1238		tmp = nicvf_queue_reg_read(nic,
1239		    NIC_QSET_RBDR_0_1_PREFETCH_STATUS, qidx);
1240		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
1241			break;
1242
1243		DELAY(1000);
1244		timeout--;
1245		if (!timeout) {
1246			device_printf(nic->dev,
1247			    "Failed polling on prefetch status\n");
1248			return;
1249		}
1250	}
1251	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1252	    NICVF_RBDR_RESET);
1253
1254	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
1255		return;
1256	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
1257	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1258		return;
1259}
1260
1261/* Configures receive queue */
1262static void
1263nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
1264    int qidx, bool enable)
1265{
1266	union nic_mbx mbx = {};
1267	struct rcv_queue *rq;
1268	struct rq_cfg rq_cfg;
1269	struct ifnet *ifp;
1270	struct lro_ctrl	*lro;
1271
1272	ifp = nic->ifp;
1273
1274	rq = &qs->rq[qidx];
1275	rq->enable = enable;
1276
1277	lro = &rq->lro;
1278
1279	/* Disable receive queue */
1280	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
1281
1282	if (!rq->enable) {
1283		nicvf_reclaim_rcv_queue(nic, qs, qidx);
1284		/* Free LRO memory */
1285		tcp_lro_free(lro);
1286		rq->lro_enabled = FALSE;
1287		return;
1288	}
1289
1290	/* Configure LRO if enabled */
1291	rq->lro_enabled = FALSE;
1292	if ((if_getcapenable(ifp) & IFCAP_LRO) != 0) {
1293		if (tcp_lro_init(lro) != 0) {
1294			device_printf(nic->dev,
1295			    "Failed to initialize LRO for RXQ%d\n", qidx);
1296		} else {
1297			rq->lro_enabled = TRUE;
1298			lro->ifp = nic->ifp;
1299		}
1300	}
1301
1302	rq->cq_qs = qs->vnic_id;
1303	rq->cq_idx = qidx;
1304	rq->start_rbdr_qs = qs->vnic_id;
1305	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
1306	rq->cont_rbdr_qs = qs->vnic_id;
1307	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
1308	/* all writes of RBDR data to be loaded into L2 Cache as well*/
1309	rq->caching = 1;
1310
1311	/* Send a mailbox msg to PF to config RQ */
1312	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
1313	mbx.rq.qs_num = qs->vnic_id;
1314	mbx.rq.rq_num = qidx;
1315	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
1316	    (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
1317	    (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) |
1318	    (rq->start_qs_rbdr_idx);
1319	nicvf_send_msg_to_pf(nic, &mbx);
1320
1321	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
1322	mbx.rq.cfg = (1UL << 63) | (1UL << 62) | (qs->vnic_id << 0);
1323	nicvf_send_msg_to_pf(nic, &mbx);
1324
1325	/*
1326	 * RQ drop config
1327	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
1328	 */
1329	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
1330	mbx.rq.cfg = (1UL << 62) | (RQ_CQ_DROP << 8);
1331	nicvf_send_msg_to_pf(nic, &mbx);
1332
1333	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
1334
1335	/* Enable Receive queue */
1336	rq_cfg.ena = 1;
1337	rq_cfg.tcp_ena = 0;
1338	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx,
1339	    *(uint64_t *)&rq_cfg);
1340}
1341
1342/* Configures completion queue */
1343static void
1344nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
1345    int qidx, boolean_t enable)
1346{
1347	struct cmp_queue *cq;
1348	struct cq_cfg cq_cfg;
1349
1350	cq = &qs->cq[qidx];
1351	cq->enable = enable;
1352
1353	if (!cq->enable) {
1354		nicvf_reclaim_cmp_queue(nic, qs, qidx);
1355		return;
1356	}
1357
1358	/* Reset completion queue */
1359	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1360
1361	/* Set completion queue base address */
1362	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, qidx,
1363	    (uint64_t)(cq->dmem.phys_base));
1364
1365	/* Enable Completion queue */
1366	cq_cfg.ena = 1;
1367	cq_cfg.reset = 0;
1368	cq_cfg.caching = 0;
1369	cq_cfg.qsize = CMP_QSIZE;
1370	cq_cfg.avg_con = 0;
1371	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(uint64_t *)&cq_cfg);
1372
1373	/* Set threshold value for interrupt generation */
1374	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
1375	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx,
1376	    nic->cq_coalesce_usecs);
1377}
1378
1379/* Configures transmit queue */
1380static void
1381nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1382    boolean_t enable)
1383{
1384	union nic_mbx mbx = {};
1385	struct snd_queue *sq;
1386	struct sq_cfg sq_cfg;
1387
1388	sq = &qs->sq[qidx];
1389	sq->enable = enable;
1390
1391	if (!sq->enable) {
1392		nicvf_reclaim_snd_queue(nic, qs, qidx);
1393		return;
1394	}
1395
1396	/* Reset send queue */
1397	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1398
1399	sq->cq_qs = qs->vnic_id;
1400	sq->cq_idx = qidx;
1401
1402	/* Send a mailbox msg to PF to config SQ */
1403	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
1404	mbx.sq.qs_num = qs->vnic_id;
1405	mbx.sq.sq_num = qidx;
1406	mbx.sq.sqs_mode = nic->sqs_mode;
1407	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
1408	nicvf_send_msg_to_pf(nic, &mbx);
1409
1410	/* Set queue base address */
1411	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, qidx,
1412	    (uint64_t)(sq->dmem.phys_base));
1413
1414	/* Enable send queue  & set queue size */
1415	sq_cfg.ena = 1;
1416	sq_cfg.reset = 0;
1417	sq_cfg.ldwb = 0;
1418	sq_cfg.qsize = SND_QSIZE;
1419	sq_cfg.tstmp_bgx_intf = 0;
1420	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(uint64_t *)&sq_cfg);
1421
1422	/* Set threshold value for interrupt generation */
1423	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
1424}
1425
1426/* Configures receive buffer descriptor ring */
1427static void
1428nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1429    boolean_t enable)
1430{
1431	struct rbdr *rbdr;
1432	struct rbdr_cfg rbdr_cfg;
1433
1434	rbdr = &qs->rbdr[qidx];
1435	nicvf_reclaim_rbdr(nic, rbdr, qidx);
1436	if (!enable)
1437		return;
1438
1439	/* Set descriptor base address */
1440	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, qidx,
1441	    (uint64_t)(rbdr->dmem.phys_base));
1442
1443	/* Enable RBDR  & set queue size */
1444	/* Buffer size should be in multiples of 128 bytes */
1445	rbdr_cfg.ena = 1;
1446	rbdr_cfg.reset = 0;
1447	rbdr_cfg.ldwb = 0;
1448	rbdr_cfg.qsize = RBDR_SIZE;
1449	rbdr_cfg.avg_con = 0;
1450	rbdr_cfg.lines = rbdr->dma_size / 128;
1451	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1452	    *(uint64_t *)&rbdr_cfg);
1453
1454	/* Notify HW */
1455	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, qidx,
1456	    qs->rbdr_len - 1);
1457
1458	/* Set threshold value for interrupt generation */
1459	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, qidx,
1460	    rbdr->thresh - 1);
1461}
1462
1463/* Requests PF to assign and enable Qset */
1464void
1465nicvf_qset_config(struct nicvf *nic, boolean_t enable)
1466{
1467	union nic_mbx mbx = {};
1468	struct queue_set *qs;
1469	struct qs_cfg *qs_cfg;
1470
1471	qs = nic->qs;
1472	if (qs == NULL) {
1473		device_printf(nic->dev,
1474		    "Qset is still not allocated, don't init queues\n");
1475		return;
1476	}
1477
1478	qs->enable = enable;
1479	qs->vnic_id = nic->vf_id;
1480
1481	/* Send a mailbox msg to PF to config Qset */
1482	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
1483	mbx.qs.num = qs->vnic_id;
1484
1485	mbx.qs.cfg = 0;
1486	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
1487	if (qs->enable) {
1488		qs_cfg->ena = 1;
1489		qs_cfg->vnic = qs->vnic_id;
1490	}
1491	nicvf_send_msg_to_pf(nic, &mbx);
1492}
1493
1494static void
1495nicvf_free_resources(struct nicvf *nic)
1496{
1497	int qidx;
1498	struct queue_set *qs;
1499
1500	qs = nic->qs;
1501	/*
1502	 * Remove QS error task first since it has to be dead
1503	 * to safely free completion queue tasks.
1504	 */
1505	if (qs->qs_err_taskq != NULL) {
1506		/* Shut down QS error tasks */
1507		while (taskqueue_cancel(qs->qs_err_taskq,
1508		    &qs->qs_err_task,  NULL) != 0) {
1509			taskqueue_drain(qs->qs_err_taskq, &qs->qs_err_task);
1510
1511		}
1512		taskqueue_free(qs->qs_err_taskq);
1513		qs->qs_err_taskq = NULL;
1514	}
1515	/* Free receive buffer descriptor ring */
1516	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1517		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1518
1519	/* Free completion queue */
1520	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1521		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1522
1523	/* Free send queue */
1524	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1525		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1526}
1527
1528static int
1529nicvf_alloc_resources(struct nicvf *nic)
1530{
1531	struct queue_set *qs = nic->qs;
1532	int qidx;
1533
1534	/* Alloc receive buffer descriptor ring */
1535	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1536		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1537				    DMA_BUFFER_LEN, qidx))
1538			goto alloc_fail;
1539	}
1540
1541	/* Alloc send queue */
1542	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1543		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1544			goto alloc_fail;
1545	}
1546
1547	/* Alloc completion queue */
1548	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1549		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len, qidx))
1550			goto alloc_fail;
1551	}
1552
1553	/* Allocate QS error taskqueue */
1554	TASK_INIT(&qs->qs_err_task, 0, nicvf_qs_err_task, nic);
1555	qs->qs_err_taskq = taskqueue_create_fast("nicvf_qs_err_taskq", M_WAITOK,
1556	    taskqueue_thread_enqueue, &qs->qs_err_taskq);
1557	taskqueue_start_threads(&qs->qs_err_taskq, 1, PI_NET, "%s: qs_taskq",
1558	    device_get_nameunit(nic->dev));
1559
1560	return (0);
1561alloc_fail:
1562	nicvf_free_resources(nic);
1563	return (ENOMEM);
1564}
1565
1566int
1567nicvf_set_qset_resources(struct nicvf *nic)
1568{
1569	struct queue_set *qs;
1570
1571	qs = malloc(sizeof(*qs), M_NICVF, (M_ZERO | M_WAITOK));
1572	nic->qs = qs;
1573
1574	/* Set count of each queue */
1575	qs->rbdr_cnt = RBDR_CNT;
1576	/* With no RSS we stay with single RQ */
1577	qs->rq_cnt = 1;
1578
1579	qs->sq_cnt = SND_QUEUE_CNT;
1580	qs->cq_cnt = CMP_QUEUE_CNT;
1581
1582	/* Set queue lengths */
1583	qs->rbdr_len = RCV_BUF_COUNT;
1584	qs->sq_len = SND_QUEUE_LEN;
1585	qs->cq_len = CMP_QUEUE_LEN;
1586
1587	nic->rx_queues = qs->rq_cnt;
1588	nic->tx_queues = qs->sq_cnt;
1589
1590	return (0);
1591}
1592
1593int
1594nicvf_config_data_transfer(struct nicvf *nic, boolean_t enable)
1595{
1596	boolean_t disable = FALSE;
1597	struct queue_set *qs;
1598	int qidx;
1599
1600	qs = nic->qs;
1601	if (qs == NULL)
1602		return (0);
1603
1604	if (enable) {
1605		if (nicvf_alloc_resources(nic) != 0)
1606			return (ENOMEM);
1607
1608		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1609			nicvf_snd_queue_config(nic, qs, qidx, enable);
1610		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1611			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1612		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1613			nicvf_rbdr_config(nic, qs, qidx, enable);
1614		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1615			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1616	} else {
1617		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1618			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1619		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1620			nicvf_rbdr_config(nic, qs, qidx, disable);
1621		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1622			nicvf_snd_queue_config(nic, qs, qidx, disable);
1623		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1624			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1625
1626		nicvf_free_resources(nic);
1627	}
1628
1629	return (0);
1630}
1631
1632/*
1633 * Get a free desc from SQ
1634 * returns descriptor ponter & descriptor number
1635 */
1636static __inline int
1637nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1638{
1639	int qentry;
1640
1641	qentry = sq->tail;
1642	sq->free_cnt -= desc_cnt;
1643	sq->tail += desc_cnt;
1644	sq->tail &= (sq->dmem.q_len - 1);
1645
1646	return (qentry);
1647}
1648
1649/* Free descriptor back to SQ for future use */
1650static void
1651nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1652{
1653
1654	sq->free_cnt += desc_cnt;
1655	sq->head += desc_cnt;
1656	sq->head &= (sq->dmem.q_len - 1);
1657}
1658
1659static __inline int
1660nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1661{
1662	qentry++;
1663	qentry &= (sq->dmem.q_len - 1);
1664	return (qentry);
1665}
1666
1667static void
1668nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1669{
1670	uint64_t sq_cfg;
1671
1672	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1673	sq_cfg |= NICVF_SQ_EN;
1674	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1675	/* Ring doorbell so that H/W restarts processing SQEs */
1676	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1677}
1678
1679static void
1680nicvf_sq_disable(struct nicvf *nic, int qidx)
1681{
1682	uint64_t sq_cfg;
1683
1684	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1685	sq_cfg &= ~NICVF_SQ_EN;
1686	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1687}
1688
1689static void
1690nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx)
1691{
1692	uint64_t head, tail;
1693	struct snd_buff *snd_buff;
1694	struct sq_hdr_subdesc *hdr;
1695
1696	NICVF_TX_LOCK(sq);
1697	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1698	tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1699	while (sq->head != head) {
1700		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1701		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1702			nicvf_put_sq_desc(sq, 1);
1703			continue;
1704		}
1705		snd_buff = &sq->snd_buff[sq->head];
1706		if (snd_buff->mbuf != NULL) {
1707			bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1708			m_freem(snd_buff->mbuf);
1709			sq->snd_buff[sq->head].mbuf = NULL;
1710		}
1711		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1712	}
1713	NICVF_TX_UNLOCK(sq);
1714}
1715
1716/*
1717 * Add SQ HEADER subdescriptor.
1718 * First subdescriptor for every send descriptor.
1719 */
1720static __inline int
1721nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1722			 int subdesc_cnt, struct mbuf *mbuf, int len)
1723{
1724	struct nicvf *nic;
1725	struct sq_hdr_subdesc *hdr;
1726	struct ether_vlan_header *eh;
1727#ifdef INET
1728	struct ip *ip;
1729	struct tcphdr *th;
1730#endif
1731	uint16_t etype;
1732	int ehdrlen, iphlen, poff;
1733
1734	nic = sq->nic;
1735
1736	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1737	sq->snd_buff[qentry].mbuf = mbuf;
1738
1739	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1740	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1741	/* Enable notification via CQE after processing SQE */
1742	hdr->post_cqe = 1;
1743	/* No of subdescriptors following this */
1744	hdr->subdesc_cnt = subdesc_cnt;
1745	hdr->tot_len = len;
1746
1747	eh = mtod(mbuf, struct ether_vlan_header *);
1748	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1749		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1750		etype = ntohs(eh->evl_proto);
1751	} else {
1752		ehdrlen = ETHER_HDR_LEN;
1753		etype = ntohs(eh->evl_encap_proto);
1754	}
1755
1756	switch (etype) {
1757#ifdef INET6
1758	case ETHERTYPE_IPV6:
1759		/* ARM64TODO: Add support for IPv6 */
1760		hdr->csum_l3 = 0;
1761		sq->snd_buff[qentry].mbuf = NULL;
1762		return (ENXIO);
1763#endif
1764#ifdef INET
1765	case ETHERTYPE_IP:
1766		if (mbuf->m_len < ehdrlen + sizeof(struct ip)) {
1767			mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip));
1768			sq->snd_buff[qentry].mbuf = mbuf;
1769			if (mbuf == NULL)
1770				return (ENOBUFS);
1771		}
1772
1773		ip = (struct ip *)(mbuf->m_data + ehdrlen);
1774		iphlen = ip->ip_hl << 2;
1775		poff = ehdrlen + iphlen;
1776
1777		if (mbuf->m_pkthdr.csum_flags != 0) {
1778			hdr->csum_l3 = 1; /* Enable IP csum calculation */
1779			switch (ip->ip_p) {
1780			case IPPROTO_TCP:
1781				if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0)
1782					break;
1783
1784				if (mbuf->m_len < (poff + sizeof(struct tcphdr))) {
1785					mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr));
1786					sq->snd_buff[qentry].mbuf = mbuf;
1787					if (mbuf == NULL)
1788						return (ENOBUFS);
1789				}
1790				hdr->csum_l4 = SEND_L4_CSUM_TCP;
1791				break;
1792			case IPPROTO_UDP:
1793				if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0)
1794					break;
1795
1796				if (mbuf->m_len < (poff + sizeof(struct udphdr))) {
1797					mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr));
1798					sq->snd_buff[qentry].mbuf = mbuf;
1799					if (mbuf == NULL)
1800						return (ENOBUFS);
1801				}
1802				hdr->csum_l4 = SEND_L4_CSUM_UDP;
1803				break;
1804			case IPPROTO_SCTP:
1805				if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0)
1806					break;
1807
1808				if (mbuf->m_len < (poff + sizeof(struct sctphdr))) {
1809					mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr));
1810					sq->snd_buff[qentry].mbuf = mbuf;
1811					if (mbuf == NULL)
1812						return (ENOBUFS);
1813				}
1814				hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1815				break;
1816			default:
1817				break;
1818			}
1819			hdr->l3_offset = ehdrlen;
1820			hdr->l4_offset = ehdrlen + iphlen;
1821		}
1822
1823		if ((mbuf->m_pkthdr.tso_segsz != 0) && nic->hw_tso) {
1824			/*
1825			 * Extract ip again as m_data could have been modified.
1826			 */
1827			ip = (struct ip *)(mbuf->m_data + ehdrlen);
1828			th = (struct tcphdr *)((caddr_t)ip + iphlen);
1829
1830			hdr->tso = 1;
1831			hdr->tso_start = ehdrlen + iphlen + (th->th_off * 4);
1832			hdr->tso_max_paysize = mbuf->m_pkthdr.tso_segsz;
1833			hdr->inner_l3_offset = ehdrlen - 2;
1834			nic->drv_stats.tx_tso++;
1835		}
1836		break;
1837#endif
1838	default:
1839		hdr->csum_l3 = 0;
1840	}
1841
1842	return (0);
1843}
1844
1845/*
1846 * SQ GATHER subdescriptor
1847 * Must follow HDR descriptor
1848 */
1849static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1850					       int size, uint64_t data)
1851{
1852	struct sq_gather_subdesc *gather;
1853
1854	qentry &= (sq->dmem.q_len - 1);
1855	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1856
1857	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1858	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1859	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1860	gather->size = size;
1861	gather->addr = data;
1862}
1863
1864/* Put an mbuf to a SQ for packet transfer. */
1865int
1866nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf *mbuf)
1867{
1868	bus_dma_segment_t segs[256];
1869	struct nicvf *nic;
1870	struct snd_buff *snd_buff;
1871	size_t seg;
1872	int nsegs, qentry;
1873	int subdesc_cnt;
1874	int err;
1875
1876	NICVF_TX_LOCK_ASSERT(sq);
1877
1878	if (sq->free_cnt == 0)
1879		return (ENOBUFS);
1880
1881	snd_buff = &sq->snd_buff[sq->tail];
1882
1883	err = bus_dmamap_load_mbuf_sg(sq->snd_buff_dmat, snd_buff->dmap,
1884	    mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1885	if (err != 0) {
1886		/* ARM64TODO: Add mbuf defragmenting if we lack maps */
1887		return (err);
1888	}
1889
1890	/* Set how many subdescriptors is required */
1891	nic = sq->nic;
1892	if (mbuf->m_pkthdr.tso_segsz != 0 && nic->hw_tso)
1893		subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1894	else
1895		subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT + nsegs - 1;
1896
1897	if (subdesc_cnt > sq->free_cnt) {
1898		/* ARM64TODO: Add mbuf defragmentation if we lack descriptors */
1899		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1900		return (ENOBUFS);
1901	}
1902
1903	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1904
1905	/* Add SQ header subdesc */
1906	err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf,
1907	    mbuf->m_pkthdr.len);
1908	if (err != 0) {
1909		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1910		return (err);
1911	}
1912
1913	/* Add SQ gather subdescs */
1914	for (seg = 0; seg < nsegs; seg++) {
1915		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1916		nicvf_sq_add_gather_subdesc(sq, qentry, segs[seg].ds_len,
1917		    segs[seg].ds_addr);
1918	}
1919
1920	/* make sure all memory stores are done before ringing doorbell */
1921	bus_dmamap_sync(sq->dmem.dmat, sq->dmem.dmap, BUS_DMASYNC_PREWRITE);
1922
1923	dprintf(sq->nic->dev, "%s: sq->idx: %d, subdesc_cnt: %d\n",
1924	    __func__, sq->idx, subdesc_cnt);
1925	/* Inform HW to xmit new packet */
1926	nicvf_queue_reg_write(sq->nic, NIC_QSET_SQ_0_7_DOOR,
1927	    sq->idx, subdesc_cnt);
1928	return (0);
1929}
1930
1931static __inline u_int
1932frag_num(u_int i)
1933{
1934#if BYTE_ORDER == BIG_ENDIAN
1935	return ((i & ~3) + 3 - (i & 3));
1936#else
1937	return (i);
1938#endif
1939}
1940
1941/* Returns MBUF for a received packet */
1942struct mbuf *
1943nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1944{
1945	int frag;
1946	int payload_len = 0;
1947	struct mbuf *mbuf;
1948	struct mbuf *mbuf_frag;
1949	uint16_t *rb_lens = NULL;
1950	uint64_t *rb_ptrs = NULL;
1951
1952	mbuf = NULL;
1953	rb_lens = (uint16_t *)((uint8_t *)cqe_rx + (3 * sizeof(uint64_t)));
1954	rb_ptrs = (uint64_t *)((uint8_t *)cqe_rx + (6 * sizeof(uint64_t)));
1955
1956	dprintf(nic->dev, "%s rb_cnt %d rb0_ptr %lx rb0_sz %d\n",
1957	    __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
1958
1959	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1960		payload_len = rb_lens[frag_num(frag)];
1961		if (frag == 0) {
1962			/* First fragment */
1963			mbuf = nicvf_rb_ptr_to_mbuf(nic,
1964			    (*rb_ptrs - cqe_rx->align_pad));
1965			mbuf->m_len = payload_len;
1966			mbuf->m_data += cqe_rx->align_pad;
1967			if_setrcvif(mbuf, nic->ifp);
1968		} else {
1969			/* Add fragments */
1970			mbuf_frag = nicvf_rb_ptr_to_mbuf(nic, *rb_ptrs);
1971			m_append(mbuf, payload_len, mbuf_frag->m_data);
1972			m_freem(mbuf_frag);
1973		}
1974		/* Next buffer pointer */
1975		rb_ptrs++;
1976	}
1977
1978	if (__predict_true(mbuf != NULL)) {
1979		m_fixhdr(mbuf);
1980		mbuf->m_pkthdr.flowid = cqe_rx->rq_idx;
1981		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE);
1982		if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) {
1983			/*
1984			 * HW by default verifies IP & TCP/UDP/SCTP checksums
1985			 */
1986			if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4)) {
1987				mbuf->m_pkthdr.csum_flags =
1988				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
1989			}
1990
1991			switch (cqe_rx->l4_type) {
1992			case L4TYPE_UDP:
1993			case L4TYPE_TCP: /* fall through */
1994				mbuf->m_pkthdr.csum_flags |=
1995				    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1996				mbuf->m_pkthdr.csum_data = 0xffff;
1997				break;
1998			case L4TYPE_SCTP:
1999				mbuf->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
2000				break;
2001			default:
2002				break;
2003			}
2004		}
2005	}
2006
2007	return (mbuf);
2008}
2009
2010/* Enable interrupt */
2011void
2012nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
2013{
2014	uint64_t reg_val;
2015
2016	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2017
2018	switch (int_type) {
2019	case NICVF_INTR_CQ:
2020		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2021		break;
2022	case NICVF_INTR_SQ:
2023		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2024		break;
2025	case NICVF_INTR_RBDR:
2026		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2027		break;
2028	case NICVF_INTR_PKT_DROP:
2029		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2030		break;
2031	case NICVF_INTR_TCP_TIMER:
2032		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2033		break;
2034	case NICVF_INTR_MBOX:
2035		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2036		break;
2037	case NICVF_INTR_QS_ERR:
2038		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2039		break;
2040	default:
2041		device_printf(nic->dev,
2042			   "Failed to enable interrupt: unknown type\n");
2043		break;
2044	}
2045
2046	nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
2047}
2048
2049/* Disable interrupt */
2050void
2051nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
2052{
2053	uint64_t reg_val = 0;
2054
2055	switch (int_type) {
2056	case NICVF_INTR_CQ:
2057		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2058		break;
2059	case NICVF_INTR_SQ:
2060		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2061		break;
2062	case NICVF_INTR_RBDR:
2063		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2064		break;
2065	case NICVF_INTR_PKT_DROP:
2066		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2067		break;
2068	case NICVF_INTR_TCP_TIMER:
2069		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2070		break;
2071	case NICVF_INTR_MBOX:
2072		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2073		break;
2074	case NICVF_INTR_QS_ERR:
2075		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2076		break;
2077	default:
2078		device_printf(nic->dev,
2079			   "Failed to disable interrupt: unknown type\n");
2080		break;
2081	}
2082
2083	nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
2084}
2085
2086/* Clear interrupt */
2087void
2088nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
2089{
2090	uint64_t reg_val = 0;
2091
2092	switch (int_type) {
2093	case NICVF_INTR_CQ:
2094		reg_val = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2095		break;
2096	case NICVF_INTR_SQ:
2097		reg_val = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2098		break;
2099	case NICVF_INTR_RBDR:
2100		reg_val = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2101		break;
2102	case NICVF_INTR_PKT_DROP:
2103		reg_val = (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2104		break;
2105	case NICVF_INTR_TCP_TIMER:
2106		reg_val = (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2107		break;
2108	case NICVF_INTR_MBOX:
2109		reg_val = (1UL << NICVF_INTR_MBOX_SHIFT);
2110		break;
2111	case NICVF_INTR_QS_ERR:
2112		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2113		break;
2114	default:
2115		device_printf(nic->dev,
2116			   "Failed to clear interrupt: unknown type\n");
2117		break;
2118	}
2119
2120	nicvf_reg_write(nic, NIC_VF_INT, reg_val);
2121}
2122
2123/* Check if interrupt is enabled */
2124int
2125nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
2126{
2127	uint64_t reg_val;
2128	uint64_t mask = 0xff;
2129
2130	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2131
2132	switch (int_type) {
2133	case NICVF_INTR_CQ:
2134		mask = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2135		break;
2136	case NICVF_INTR_SQ:
2137		mask = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2138		break;
2139	case NICVF_INTR_RBDR:
2140		mask = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2141		break;
2142	case NICVF_INTR_PKT_DROP:
2143		mask = NICVF_INTR_PKT_DROP_MASK;
2144		break;
2145	case NICVF_INTR_TCP_TIMER:
2146		mask = NICVF_INTR_TCP_TIMER_MASK;
2147		break;
2148	case NICVF_INTR_MBOX:
2149		mask = NICVF_INTR_MBOX_MASK;
2150		break;
2151	case NICVF_INTR_QS_ERR:
2152		mask = NICVF_INTR_QS_ERR_MASK;
2153		break;
2154	default:
2155		device_printf(nic->dev,
2156			   "Failed to check interrupt enable: unknown type\n");
2157		break;
2158	}
2159
2160	return (reg_val & mask);
2161}
2162
2163void
2164nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
2165{
2166	struct rcv_queue *rq;
2167
2168#define GET_RQ_STATS(reg) \
2169	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
2170			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2171
2172	rq = &nic->qs->rq[rq_idx];
2173	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
2174	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
2175}
2176
2177void
2178nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
2179{
2180	struct snd_queue *sq;
2181
2182#define GET_SQ_STATS(reg) \
2183	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
2184			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2185
2186	sq = &nic->qs->sq[sq_idx];
2187	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
2188	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
2189}
2190
2191/* Check for errors in the receive cmp.queue entry */
2192int
2193nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cmp_queue *cq,
2194    struct cqe_rx_t *cqe_rx)
2195{
2196	struct nicvf_hw_stats *stats = &nic->hw_stats;
2197	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
2198
2199	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
2200		drv_stats->rx_frames_ok++;
2201		return (0);
2202	}
2203
2204	switch (cqe_rx->err_opcode) {
2205	case CQ_RX_ERROP_RE_PARTIAL:
2206		stats->rx_bgx_truncated_pkts++;
2207		break;
2208	case CQ_RX_ERROP_RE_JABBER:
2209		stats->rx_jabber_errs++;
2210		break;
2211	case CQ_RX_ERROP_RE_FCS:
2212		stats->rx_fcs_errs++;
2213		break;
2214	case CQ_RX_ERROP_RE_RX_CTL:
2215		stats->rx_bgx_errs++;
2216		break;
2217	case CQ_RX_ERROP_PREL2_ERR:
2218		stats->rx_prel2_errs++;
2219		break;
2220	case CQ_RX_ERROP_L2_MAL:
2221		stats->rx_l2_hdr_malformed++;
2222		break;
2223	case CQ_RX_ERROP_L2_OVERSIZE:
2224		stats->rx_oversize++;
2225		break;
2226	case CQ_RX_ERROP_L2_UNDERSIZE:
2227		stats->rx_undersize++;
2228		break;
2229	case CQ_RX_ERROP_L2_LENMISM:
2230		stats->rx_l2_len_mismatch++;
2231		break;
2232	case CQ_RX_ERROP_L2_PCLP:
2233		stats->rx_l2_pclp++;
2234		break;
2235	case CQ_RX_ERROP_IP_NOT:
2236		stats->rx_ip_ver_errs++;
2237		break;
2238	case CQ_RX_ERROP_IP_CSUM_ERR:
2239		stats->rx_ip_csum_errs++;
2240		break;
2241	case CQ_RX_ERROP_IP_MAL:
2242		stats->rx_ip_hdr_malformed++;
2243		break;
2244	case CQ_RX_ERROP_IP_MALD:
2245		stats->rx_ip_payload_malformed++;
2246		break;
2247	case CQ_RX_ERROP_IP_HOP:
2248		stats->rx_ip_ttl_errs++;
2249		break;
2250	case CQ_RX_ERROP_L3_PCLP:
2251		stats->rx_l3_pclp++;
2252		break;
2253	case CQ_RX_ERROP_L4_MAL:
2254		stats->rx_l4_malformed++;
2255		break;
2256	case CQ_RX_ERROP_L4_CHK:
2257		stats->rx_l4_csum_errs++;
2258		break;
2259	case CQ_RX_ERROP_UDP_LEN:
2260		stats->rx_udp_len_errs++;
2261		break;
2262	case CQ_RX_ERROP_L4_PORT:
2263		stats->rx_l4_port_errs++;
2264		break;
2265	case CQ_RX_ERROP_TCP_FLAG:
2266		stats->rx_tcp_flag_errs++;
2267		break;
2268	case CQ_RX_ERROP_TCP_OFFSET:
2269		stats->rx_tcp_offset_errs++;
2270		break;
2271	case CQ_RX_ERROP_L4_PCLP:
2272		stats->rx_l4_pclp++;
2273		break;
2274	case CQ_RX_ERROP_RBDR_TRUNC:
2275		stats->rx_truncated_pkts++;
2276		break;
2277	}
2278
2279	return (1);
2280}
2281
2282/* Check for errors in the send cmp.queue entry */
2283int
2284nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cmp_queue *cq,
2285    struct cqe_send_t *cqe_tx)
2286{
2287	struct cmp_queue_stats *stats = &cq->stats;
2288
2289	switch (cqe_tx->send_status) {
2290	case CQ_TX_ERROP_GOOD:
2291		stats->tx.good++;
2292		return (0);
2293	case CQ_TX_ERROP_DESC_FAULT:
2294		stats->tx.desc_fault++;
2295		break;
2296	case CQ_TX_ERROP_HDR_CONS_ERR:
2297		stats->tx.hdr_cons_err++;
2298		break;
2299	case CQ_TX_ERROP_SUBDC_ERR:
2300		stats->tx.subdesc_err++;
2301		break;
2302	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
2303		stats->tx.imm_size_oflow++;
2304		break;
2305	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
2306		stats->tx.data_seq_err++;
2307		break;
2308	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
2309		stats->tx.mem_seq_err++;
2310		break;
2311	case CQ_TX_ERROP_LOCK_VIOL:
2312		stats->tx.lock_viol++;
2313		break;
2314	case CQ_TX_ERROP_DATA_FAULT:
2315		stats->tx.data_fault++;
2316		break;
2317	case CQ_TX_ERROP_TSTMP_CONFLICT:
2318		stats->tx.tstmp_conflict++;
2319		break;
2320	case CQ_TX_ERROP_TSTMP_TIMEOUT:
2321		stats->tx.tstmp_timeout++;
2322		break;
2323	case CQ_TX_ERROP_MEM_FAULT:
2324		stats->tx.mem_fault++;
2325		break;
2326	case CQ_TX_ERROP_CK_OVERLAP:
2327		stats->tx.csum_overlap++;
2328		break;
2329	case CQ_TX_ERROP_CK_OFLOW:
2330		stats->tx.csum_overflow++;
2331		break;
2332	}
2333
2334	return (1);
2335}
2336