nicvf_queues.c revision 326062
1/*
2 * Copyright (C) 2015 Cavium Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/dev/vnic/nicvf_queues.c 326062 2017-11-21 15:34:25Z emaste $
27 *
28 */
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/dev/vnic/nicvf_queues.c 326062 2017-11-21 15:34:25Z emaste $");
31
32#include "opt_inet.h"
33#include "opt_inet6.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bitset.h>
38#include <sys/bitstring.h>
39#include <sys/buf_ring.h>
40#include <sys/bus.h>
41#include <sys/endian.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/module.h>
45#include <sys/rman.h>
46#include <sys/pciio.h>
47#include <sys/pcpu.h>
48#include <sys/proc.h>
49#include <sys/sockio.h>
50#include <sys/socket.h>
51#include <sys/stdatomic.h>
52#include <sys/cpuset.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/smp.h>
56#include <sys/taskqueue.h>
57
58#include <vm/vm.h>
59#include <vm/pmap.h>
60
61#include <machine/bus.h>
62#include <machine/vmparam.h>
63
64#include <net/if.h>
65#include <net/if_var.h>
66#include <net/if_media.h>
67#include <net/ifq.h>
68#include <net/bpf.h>
69#include <net/ethernet.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/sctp.h>
77#include <netinet/tcp.h>
78#include <netinet/tcp_lro.h>
79#include <netinet/udp.h>
80
81#include <netinet6/ip6_var.h>
82
83#include <dev/pci/pcireg.h>
84#include <dev/pci/pcivar.h>
85
86#include "thunder_bgx.h"
87#include "nic_reg.h"
88#include "nic.h"
89#include "q_struct.h"
90#include "nicvf_queues.h"
91
92#define	DEBUG
93#undef DEBUG
94
95#ifdef DEBUG
96#define	dprintf(dev, fmt, ...)	device_printf(dev, fmt, ##__VA_ARGS__)
97#else
98#define	dprintf(dev, fmt, ...)
99#endif
100
101MALLOC_DECLARE(M_NICVF);
102
103static void nicvf_free_snd_queue(struct nicvf *, struct snd_queue *);
104static struct mbuf * nicvf_get_rcv_mbuf(struct nicvf *, struct cqe_rx_t *);
105static void nicvf_sq_disable(struct nicvf *, int);
106static void nicvf_sq_enable(struct nicvf *, struct snd_queue *, int);
107static void nicvf_put_sq_desc(struct snd_queue *, int);
108static void nicvf_cmp_queue_config(struct nicvf *, struct queue_set *, int,
109    boolean_t);
110static void nicvf_sq_free_used_descs(struct nicvf *, struct snd_queue *, int);
111
112static int nicvf_tx_mbuf_locked(struct snd_queue *, struct mbuf **);
113
114static void nicvf_rbdr_task(void *, int);
115static void nicvf_rbdr_task_nowait(void *, int);
116
117struct rbuf_info {
118	bus_dma_tag_t	dmat;
119	bus_dmamap_t	dmap;
120	struct mbuf *	mbuf;
121};
122
123#define GET_RBUF_INFO(x) ((struct rbuf_info *)((x) - NICVF_RCV_BUF_ALIGN_BYTES))
124
125/* Poll a register for a specific value */
126static int nicvf_poll_reg(struct nicvf *nic, int qidx,
127			  uint64_t reg, int bit_pos, int bits, int val)
128{
129	uint64_t bit_mask;
130	uint64_t reg_val;
131	int timeout = 10;
132
133	bit_mask = (1UL << bits) - 1;
134	bit_mask = (bit_mask << bit_pos);
135
136	while (timeout) {
137		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
138		if (((reg_val & bit_mask) >> bit_pos) == val)
139			return (0);
140
141		DELAY(1000);
142		timeout--;
143	}
144	device_printf(nic->dev, "Poll on reg 0x%lx failed\n", reg);
145	return (ETIMEDOUT);
146}
147
148/* Callback for bus_dmamap_load() */
149static void
150nicvf_dmamap_q_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
151{
152	bus_addr_t *paddr;
153
154	KASSERT(nseg == 1, ("wrong number of segments, should be 1"));
155	paddr = arg;
156	*paddr = segs->ds_addr;
157}
158
159/* Allocate memory for a queue's descriptors */
160static int
161nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
162    int q_len, int desc_size, int align_bytes)
163{
164	int err, err_dmat;
165
166	/* Create DMA tag first */
167	err = bus_dma_tag_create(
168	    bus_get_dma_tag(nic->dev),		/* parent tag */
169	    align_bytes,			/* alignment */
170	    0,					/* boundary */
171	    BUS_SPACE_MAXADDR,			/* lowaddr */
172	    BUS_SPACE_MAXADDR,			/* highaddr */
173	    NULL, NULL,				/* filtfunc, filtfuncarg */
174	    (q_len * desc_size),		/* maxsize */
175	    1,					/* nsegments */
176	    (q_len * desc_size),		/* maxsegsize */
177	    0,					/* flags */
178	    NULL, NULL,				/* lockfunc, lockfuncarg */
179	    &dmem->dmat);			/* dmat */
180
181	if (err != 0) {
182		device_printf(nic->dev,
183		    "Failed to create busdma tag for descriptors ring\n");
184		return (err);
185	}
186
187	/* Allocate segment of continuous DMA safe memory */
188	err = bus_dmamem_alloc(
189	    dmem->dmat,				/* DMA tag */
190	    &dmem->base,			/* virtual address */
191	    (BUS_DMA_NOWAIT | BUS_DMA_ZERO),	/* flags */
192	    &dmem->dmap);			/* DMA map */
193	if (err != 0) {
194		device_printf(nic->dev, "Failed to allocate DMA safe memory for"
195		    "descriptors ring\n");
196		goto dmamem_fail;
197	}
198
199	err = bus_dmamap_load(
200	    dmem->dmat,
201	    dmem->dmap,
202	    dmem->base,
203	    (q_len * desc_size),		/* allocation size */
204	    nicvf_dmamap_q_cb,			/* map to DMA address cb. */
205	    &dmem->phys_base,			/* physical address */
206	    BUS_DMA_NOWAIT);
207	if (err != 0) {
208		device_printf(nic->dev,
209		    "Cannot load DMA map of descriptors ring\n");
210		goto dmamap_fail;
211	}
212
213	dmem->q_len = q_len;
214	dmem->size = (desc_size * q_len);
215
216	return (0);
217
218dmamap_fail:
219	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
220	dmem->phys_base = 0;
221dmamem_fail:
222	err_dmat = bus_dma_tag_destroy(dmem->dmat);
223	dmem->base = NULL;
224	KASSERT(err_dmat == 0,
225	    ("%s: Trying to destroy BUSY DMA tag", __func__));
226
227	return (err);
228}
229
230/* Free queue's descriptor memory */
231static void
232nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
233{
234	int err;
235
236	if ((dmem == NULL) || (dmem->base == NULL))
237		return;
238
239	/* Unload a map */
240	bus_dmamap_sync(dmem->dmat, dmem->dmap, BUS_DMASYNC_POSTREAD);
241	bus_dmamap_unload(dmem->dmat, dmem->dmap);
242	/* Free DMA memory */
243	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
244	/* Destroy DMA tag */
245	err = bus_dma_tag_destroy(dmem->dmat);
246
247	KASSERT(err == 0,
248	    ("%s: Trying to destroy BUSY DMA tag", __func__));
249
250	dmem->phys_base = 0;
251	dmem->base = NULL;
252}
253
254/*
255 * Allocate buffer for packet reception
256 * HW returns memory address where packet is DMA'ed but not a pointer
257 * into RBDR ring, so save buffer address at the start of fragment and
258 * align the start address to a cache aligned address
259 */
260static __inline int
261nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
262    bus_dmamap_t dmap, int mflags, uint32_t buf_len, bus_addr_t *rbuf)
263{
264	struct mbuf *mbuf;
265	struct rbuf_info *rinfo;
266	bus_dma_segment_t segs[1];
267	int nsegs;
268	int err;
269
270	mbuf = m_getjcl(mflags, MT_DATA, M_PKTHDR, MCLBYTES);
271	if (mbuf == NULL)
272		return (ENOMEM);
273
274	/*
275	 * The length is equal to the actual length + one 128b line
276	 * used as a room for rbuf_info structure.
277	 */
278	mbuf->m_len = mbuf->m_pkthdr.len = buf_len;
279
280	err = bus_dmamap_load_mbuf_sg(rbdr->rbdr_buff_dmat, dmap, mbuf, segs,
281	    &nsegs, BUS_DMA_NOWAIT);
282	if (err != 0) {
283		device_printf(nic->dev,
284		    "Failed to map mbuf into DMA visible memory, err: %d\n",
285		    err);
286		m_freem(mbuf);
287		bus_dmamap_destroy(rbdr->rbdr_buff_dmat, dmap);
288		return (err);
289	}
290	if (nsegs != 1)
291		panic("Unexpected number of DMA segments for RB: %d", nsegs);
292	/*
293	 * Now use the room for rbuf_info structure
294	 * and adjust mbuf data and length.
295	 */
296	rinfo = (struct rbuf_info *)mbuf->m_data;
297	m_adj(mbuf, NICVF_RCV_BUF_ALIGN_BYTES);
298
299	rinfo->dmat = rbdr->rbdr_buff_dmat;
300	rinfo->dmap = dmap;
301	rinfo->mbuf = mbuf;
302
303	*rbuf = segs[0].ds_addr + NICVF_RCV_BUF_ALIGN_BYTES;
304
305	return (0);
306}
307
308/* Retrieve mbuf for received packet */
309static struct mbuf *
310nicvf_rb_ptr_to_mbuf(struct nicvf *nic, bus_addr_t rb_ptr)
311{
312	struct mbuf *mbuf;
313	struct rbuf_info *rinfo;
314
315	/* Get buffer start address and alignment offset */
316	rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(rb_ptr));
317
318	/* Now retrieve mbuf to give to stack */
319	mbuf = rinfo->mbuf;
320	if (__predict_false(mbuf == NULL)) {
321		panic("%s: Received packet fragment with NULL mbuf",
322		    device_get_nameunit(nic->dev));
323	}
324	/*
325	 * Clear the mbuf in the descriptor to indicate
326	 * that this slot is processed and free to use.
327	 */
328	rinfo->mbuf = NULL;
329
330	bus_dmamap_sync(rinfo->dmat, rinfo->dmap, BUS_DMASYNC_POSTREAD);
331	bus_dmamap_unload(rinfo->dmat, rinfo->dmap);
332
333	return (mbuf);
334}
335
336/* Allocate RBDR ring and populate receive buffers */
337static int
338nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len,
339    int buf_size, int qidx)
340{
341	bus_dmamap_t dmap;
342	bus_addr_t rbuf;
343	struct rbdr_entry_t *desc;
344	int idx;
345	int err;
346
347	/* Allocate rbdr descriptors ring */
348	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
349	    sizeof(struct rbdr_entry_t), NICVF_RCV_BUF_ALIGN_BYTES);
350	if (err != 0) {
351		device_printf(nic->dev,
352		    "Failed to create RBDR descriptors ring\n");
353		return (err);
354	}
355
356	rbdr->desc = rbdr->dmem.base;
357	/*
358	 * Buffer size has to be in multiples of 128 bytes.
359	 * Make room for metadata of size of one line (128 bytes).
360	 */
361	rbdr->dma_size = buf_size - NICVF_RCV_BUF_ALIGN_BYTES;
362	rbdr->enable = TRUE;
363	rbdr->thresh = RBDR_THRESH;
364	rbdr->nic = nic;
365	rbdr->idx = qidx;
366
367	/*
368	 * Create DMA tag for Rx buffers.
369	 * Each map created using this tag is intended to store Rx payload for
370	 * one fragment and one header structure containing rbuf_info (thus
371	 * additional 128 byte line since RB must be a multiple of 128 byte
372	 * cache line).
373	 */
374	if (buf_size > MCLBYTES) {
375		device_printf(nic->dev,
376		    "Buffer size to large for mbuf cluster\n");
377		return (EINVAL);
378	}
379	err = bus_dma_tag_create(
380	    bus_get_dma_tag(nic->dev),		/* parent tag */
381	    NICVF_RCV_BUF_ALIGN_BYTES,		/* alignment */
382	    0,					/* boundary */
383	    DMAP_MAX_PHYSADDR,			/* lowaddr */
384	    DMAP_MIN_PHYSADDR,			/* highaddr */
385	    NULL, NULL,				/* filtfunc, filtfuncarg */
386	    roundup2(buf_size, MCLBYTES),	/* maxsize */
387	    1,					/* nsegments */
388	    roundup2(buf_size, MCLBYTES),	/* maxsegsize */
389	    0,					/* flags */
390	    NULL, NULL,				/* lockfunc, lockfuncarg */
391	    &rbdr->rbdr_buff_dmat);		/* dmat */
392
393	if (err != 0) {
394		device_printf(nic->dev,
395		    "Failed to create busdma tag for RBDR buffers\n");
396		return (err);
397	}
398
399	rbdr->rbdr_buff_dmaps = malloc(sizeof(*rbdr->rbdr_buff_dmaps) *
400	    ring_len, M_NICVF, (M_WAITOK | M_ZERO));
401
402	for (idx = 0; idx < ring_len; idx++) {
403		err = bus_dmamap_create(rbdr->rbdr_buff_dmat, 0, &dmap);
404		if (err != 0) {
405			device_printf(nic->dev,
406			    "Failed to create DMA map for RB\n");
407			return (err);
408		}
409		rbdr->rbdr_buff_dmaps[idx] = dmap;
410
411		err = nicvf_alloc_rcv_buffer(nic, rbdr, dmap, M_WAITOK,
412		    DMA_BUFFER_LEN, &rbuf);
413		if (err != 0)
414			return (err);
415
416		desc = GET_RBDR_DESC(rbdr, idx);
417		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
418	}
419
420	/* Allocate taskqueue */
421	TASK_INIT(&rbdr->rbdr_task, 0, nicvf_rbdr_task, rbdr);
422	TASK_INIT(&rbdr->rbdr_task_nowait, 0, nicvf_rbdr_task_nowait, rbdr);
423	rbdr->rbdr_taskq = taskqueue_create_fast("nicvf_rbdr_taskq", M_WAITOK,
424	    taskqueue_thread_enqueue, &rbdr->rbdr_taskq);
425	taskqueue_start_threads(&rbdr->rbdr_taskq, 1, PI_NET, "%s: rbdr_taskq",
426	    device_get_nameunit(nic->dev));
427
428	return (0);
429}
430
431/* Free RBDR ring and its receive buffers */
432static void
433nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
434{
435	struct mbuf *mbuf;
436	struct queue_set *qs;
437	struct rbdr_entry_t *desc;
438	struct rbuf_info *rinfo;
439	bus_addr_t buf_addr;
440	int head, tail, idx;
441	int err;
442
443	qs = nic->qs;
444
445	if ((qs == NULL) || (rbdr == NULL))
446		return;
447
448	rbdr->enable = FALSE;
449	if (rbdr->rbdr_taskq != NULL) {
450		/* Remove tasks */
451		while (taskqueue_cancel(rbdr->rbdr_taskq,
452		    &rbdr->rbdr_task_nowait, NULL) != 0) {
453			/* Finish the nowait task first */
454			taskqueue_drain(rbdr->rbdr_taskq,
455			    &rbdr->rbdr_task_nowait);
456		}
457		taskqueue_free(rbdr->rbdr_taskq);
458		rbdr->rbdr_taskq = NULL;
459
460		while (taskqueue_cancel(taskqueue_thread,
461		    &rbdr->rbdr_task, NULL) != 0) {
462			/* Now finish the sleepable task */
463			taskqueue_drain(taskqueue_thread, &rbdr->rbdr_task);
464		}
465	}
466
467	/*
468	 * Free all of the memory under the RB descriptors.
469	 * There are assumptions here:
470	 * 1. Corresponding RBDR is disabled
471	 *    - it is safe to operate using head and tail indexes
472	 * 2. All bffers that were received are properly freed by
473	 *    the receive handler
474	 *    - there is no need to unload DMA map and free MBUF for other
475	 *      descriptors than unused ones
476	 */
477	if (rbdr->rbdr_buff_dmat != NULL) {
478		head = rbdr->head;
479		tail = rbdr->tail;
480		while (head != tail) {
481			desc = GET_RBDR_DESC(rbdr, head);
482			buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
483			rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
484			bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
485			mbuf = rinfo->mbuf;
486			/* This will destroy everything including rinfo! */
487			m_freem(mbuf);
488			head++;
489			head &= (rbdr->dmem.q_len - 1);
490		}
491		/* Free tail descriptor */
492		desc = GET_RBDR_DESC(rbdr, tail);
493		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
494		rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
495		bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
496		mbuf = rinfo->mbuf;
497		/* This will destroy everything including rinfo! */
498		m_freem(mbuf);
499
500		/* Destroy DMA maps */
501		for (idx = 0; idx < qs->rbdr_len; idx++) {
502			if (rbdr->rbdr_buff_dmaps[idx] == NULL)
503				continue;
504			err = bus_dmamap_destroy(rbdr->rbdr_buff_dmat,
505			    rbdr->rbdr_buff_dmaps[idx]);
506			KASSERT(err == 0,
507			    ("%s: Could not destroy DMA map for RB, desc: %d",
508			    __func__, idx));
509			rbdr->rbdr_buff_dmaps[idx] = NULL;
510		}
511
512		/* Now destroy the tag */
513		err = bus_dma_tag_destroy(rbdr->rbdr_buff_dmat);
514		KASSERT(err == 0,
515		    ("%s: Trying to destroy BUSY DMA tag", __func__));
516
517		rbdr->head = 0;
518		rbdr->tail = 0;
519	}
520
521	/* Free RBDR ring */
522	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
523}
524
525/*
526 * Refill receive buffer descriptors with new buffers.
527 */
528static int
529nicvf_refill_rbdr(struct rbdr *rbdr, int mflags)
530{
531	struct nicvf *nic;
532	struct queue_set *qs;
533	int rbdr_idx;
534	int tail, qcount;
535	int refill_rb_cnt;
536	struct rbdr_entry_t *desc;
537	bus_dmamap_t dmap;
538	bus_addr_t rbuf;
539	boolean_t rb_alloc_fail;
540	int new_rb;
541
542	rb_alloc_fail = TRUE;
543	new_rb = 0;
544	nic = rbdr->nic;
545	qs = nic->qs;
546	rbdr_idx = rbdr->idx;
547
548	/* Check if it's enabled */
549	if (!rbdr->enable)
550		return (0);
551
552	/* Get no of desc's to be refilled */
553	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
554	qcount &= 0x7FFFF;
555	/* Doorbell can be ringed with a max of ring size minus 1 */
556	if (qcount >= (qs->rbdr_len - 1)) {
557		rb_alloc_fail = FALSE;
558		goto out;
559	} else
560		refill_rb_cnt = qs->rbdr_len - qcount - 1;
561
562	/* Start filling descs from tail */
563	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
564	while (refill_rb_cnt) {
565		tail++;
566		tail &= (rbdr->dmem.q_len - 1);
567
568		dmap = rbdr->rbdr_buff_dmaps[tail];
569		if (nicvf_alloc_rcv_buffer(nic, rbdr, dmap, mflags,
570		    DMA_BUFFER_LEN, &rbuf)) {
571			/* Something went wrong. Resign */
572			break;
573		}
574		desc = GET_RBDR_DESC(rbdr, tail);
575		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
576		refill_rb_cnt--;
577		new_rb++;
578	}
579
580	/* make sure all memory stores are done before ringing doorbell */
581	wmb();
582
583	/* Check if buffer allocation failed */
584	if (refill_rb_cnt == 0)
585		rb_alloc_fail = FALSE;
586
587	/* Notify HW */
588	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
589			      rbdr_idx, new_rb);
590out:
591	if (!rb_alloc_fail) {
592		/*
593		 * Re-enable RBDR interrupts only
594		 * if buffer allocation is success.
595		 */
596		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
597
598		return (0);
599	}
600
601	return (ENOMEM);
602}
603
604/* Refill RBs even if sleep is needed to reclaim memory */
605static void
606nicvf_rbdr_task(void *arg, int pending)
607{
608	struct rbdr *rbdr;
609	int err;
610
611	rbdr = (struct rbdr *)arg;
612
613	err = nicvf_refill_rbdr(rbdr, M_WAITOK);
614	if (__predict_false(err != 0)) {
615		panic("%s: Failed to refill RBs even when sleep enabled",
616		    __func__);
617	}
618}
619
620/* Refill RBs as soon as possible without waiting */
621static void
622nicvf_rbdr_task_nowait(void *arg, int pending)
623{
624	struct rbdr *rbdr;
625	int err;
626
627	rbdr = (struct rbdr *)arg;
628
629	err = nicvf_refill_rbdr(rbdr, M_NOWAIT);
630	if (err != 0) {
631		/*
632		 * Schedule another, sleepable kernel thread
633		 * that will for sure refill the buffers.
634		 */
635		taskqueue_enqueue(taskqueue_thread, &rbdr->rbdr_task);
636	}
637}
638
639static int
640nicvf_rcv_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
641    struct cqe_rx_t *cqe_rx, int cqe_type)
642{
643	struct mbuf *mbuf;
644	struct rcv_queue *rq;
645	int rq_idx;
646	int err = 0;
647
648	rq_idx = cqe_rx->rq_idx;
649	rq = &nic->qs->rq[rq_idx];
650
651	/* Check for errors */
652	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
653	if (err && !cqe_rx->rb_cnt)
654		return (0);
655
656	mbuf = nicvf_get_rcv_mbuf(nic, cqe_rx);
657	if (mbuf == NULL) {
658		dprintf(nic->dev, "Packet not received\n");
659		return (0);
660	}
661
662	/* If error packet */
663	if (err != 0) {
664		m_freem(mbuf);
665		return (0);
666	}
667
668	if (rq->lro_enabled &&
669	    ((cqe_rx->l3_type == L3TYPE_IPV4) && (cqe_rx->l4_type == L4TYPE_TCP)) &&
670	    (mbuf->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
671            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
672		/*
673		 * At this point it is known that there are no errors in the
674		 * packet. Attempt to LRO enqueue. Send to stack if no resources
675		 * or enqueue error.
676		 */
677		if ((rq->lro.lro_cnt != 0) &&
678		    (tcp_lro_rx(&rq->lro, mbuf, 0) == 0))
679			return (0);
680	}
681	/*
682	 * Push this packet to the stack later to avoid
683	 * unlocking completion task in the middle of work.
684	 */
685	err = buf_ring_enqueue(cq->rx_br, mbuf);
686	if (err != 0) {
687		/*
688		 * Failed to enqueue this mbuf.
689		 * We don't drop it, just schedule another task.
690		 */
691		return (err);
692	}
693
694	return (0);
695}
696
697static void
698nicvf_snd_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
699    struct cqe_send_t *cqe_tx, int cqe_type)
700{
701	bus_dmamap_t dmap;
702	struct mbuf *mbuf;
703	struct snd_queue *sq;
704	struct sq_hdr_subdesc *hdr;
705
706	mbuf = NULL;
707	sq = &nic->qs->sq[cqe_tx->sq_idx];
708
709	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
710	if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER)
711		return;
712
713	dprintf(nic->dev,
714	    "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
715	    __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
716	    cqe_tx->sqe_ptr, hdr->subdesc_cnt);
717
718	dmap = (bus_dmamap_t)sq->snd_buff[cqe_tx->sqe_ptr].dmap;
719	bus_dmamap_unload(sq->snd_buff_dmat, dmap);
720
721	mbuf = (struct mbuf *)sq->snd_buff[cqe_tx->sqe_ptr].mbuf;
722	if (mbuf != NULL) {
723		m_freem(mbuf);
724		sq->snd_buff[cqe_tx->sqe_ptr].mbuf = NULL;
725		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
726	}
727
728	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
729}
730
731static int
732nicvf_cq_intr_handler(struct nicvf *nic, uint8_t cq_idx)
733{
734	struct mbuf *mbuf;
735	struct ifnet *ifp;
736	int processed_cqe, work_done = 0, tx_done = 0;
737	int cqe_count, cqe_head;
738	struct queue_set *qs = nic->qs;
739	struct cmp_queue *cq = &qs->cq[cq_idx];
740	struct snd_queue *sq = &qs->sq[cq_idx];
741	struct rcv_queue *rq;
742	struct cqe_rx_t *cq_desc;
743	struct lro_ctrl	*lro;
744	int rq_idx;
745	int cmp_err;
746
747	NICVF_CMP_LOCK(cq);
748	cmp_err = 0;
749	processed_cqe = 0;
750	/* Get no of valid CQ entries to process */
751	cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
752	cqe_count &= CQ_CQE_COUNT;
753	if (cqe_count == 0)
754		goto out;
755
756	/* Get head of the valid CQ entries */
757	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
758	cqe_head &= 0xFFFF;
759
760	dprintf(nic->dev, "%s CQ%d cqe_count %d cqe_head %d\n",
761	    __func__, cq_idx, cqe_count, cqe_head);
762	while (processed_cqe < cqe_count) {
763		/* Get the CQ descriptor */
764		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
765		cqe_head++;
766		cqe_head &= (cq->dmem.q_len - 1);
767		/* Prefetch next CQ descriptor */
768		__builtin_prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
769
770		dprintf(nic->dev, "CQ%d cq_desc->cqe_type %d\n", cq_idx,
771		    cq_desc->cqe_type);
772		switch (cq_desc->cqe_type) {
773		case CQE_TYPE_RX:
774			cmp_err = nicvf_rcv_pkt_handler(nic, cq, cq_desc,
775			    CQE_TYPE_RX);
776			if (__predict_false(cmp_err != 0)) {
777				/*
778				 * Ups. Cannot finish now.
779				 * Let's try again later.
780				 */
781				goto done;
782			}
783			work_done++;
784			break;
785		case CQE_TYPE_SEND:
786			nicvf_snd_pkt_handler(nic, cq, (void *)cq_desc,
787			    CQE_TYPE_SEND);
788			tx_done++;
789			break;
790		case CQE_TYPE_INVALID:
791		case CQE_TYPE_RX_SPLIT:
792		case CQE_TYPE_RX_TCP:
793		case CQE_TYPE_SEND_PTP:
794			/* Ignore for now */
795			break;
796		}
797		processed_cqe++;
798	}
799done:
800	dprintf(nic->dev,
801	    "%s CQ%d processed_cqe %d work_done %d\n",
802	    __func__, cq_idx, processed_cqe, work_done);
803
804	/* Ring doorbell to inform H/W to reuse processed CQEs */
805	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, cq_idx, processed_cqe);
806
807	if ((tx_done > 0) &&
808	    ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0)) {
809		/* Reenable TXQ if its stopped earlier due to SQ full */
810		if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
811		taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);
812	}
813out:
814	/*
815	 * Flush any outstanding LRO work
816	 */
817	rq_idx = cq_idx;
818	rq = &nic->qs->rq[rq_idx];
819	lro = &rq->lro;
820	tcp_lro_flush_all(lro);
821
822	NICVF_CMP_UNLOCK(cq);
823
824	ifp = nic->ifp;
825	/* Push received MBUFs to the stack */
826	while (!buf_ring_empty(cq->rx_br)) {
827		mbuf = buf_ring_dequeue_mc(cq->rx_br);
828		if (__predict_true(mbuf != NULL))
829			(*ifp->if_input)(ifp, mbuf);
830	}
831
832	return (cmp_err);
833}
834
835/*
836 * Qset error interrupt handler
837 *
838 * As of now only CQ errors are handled
839 */
840static void
841nicvf_qs_err_task(void *arg, int pending)
842{
843	struct nicvf *nic;
844	struct queue_set *qs;
845	int qidx;
846	uint64_t status;
847	boolean_t enable = TRUE;
848
849	nic = (struct nicvf *)arg;
850	qs = nic->qs;
851
852	/* Deactivate network interface */
853	if_setdrvflagbits(nic->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
854
855	/* Check if it is CQ err */
856	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
857		status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
858		    qidx);
859		if ((status & CQ_ERR_MASK) == 0)
860			continue;
861		/* Process already queued CQEs and reconfig CQ */
862		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
863		nicvf_sq_disable(nic, qidx);
864		(void)nicvf_cq_intr_handler(nic, qidx);
865		nicvf_cmp_queue_config(nic, qs, qidx, enable);
866		nicvf_sq_free_used_descs(nic, &qs->sq[qidx], qidx);
867		nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
868		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
869	}
870
871	if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
872	/* Re-enable Qset error interrupt */
873	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
874}
875
876static void
877nicvf_cmp_task(void *arg, int pending)
878{
879	struct cmp_queue *cq;
880	struct nicvf *nic;
881	int cmp_err;
882
883	cq = (struct cmp_queue *)arg;
884	nic = cq->nic;
885
886	/* Handle CQ descriptors */
887	cmp_err = nicvf_cq_intr_handler(nic, cq->idx);
888	if (__predict_false(cmp_err != 0)) {
889		/*
890		 * Schedule another thread here since we did not
891		 * process the entire CQ due to Tx or Rx CQ parse error.
892		 */
893		taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
894
895	}
896
897	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
898	/* Reenable interrupt (previously disabled in nicvf_intr_handler() */
899	nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->idx);
900
901}
902
903/* Initialize completion queue */
904static int
905nicvf_init_cmp_queue(struct nicvf *nic, struct cmp_queue *cq, int q_len,
906    int qidx)
907{
908	int err;
909
910	/* Initizalize lock */
911	snprintf(cq->mtx_name, sizeof(cq->mtx_name), "%s: CQ(%d) lock",
912	    device_get_nameunit(nic->dev), qidx);
913	mtx_init(&cq->mtx, cq->mtx_name, NULL, MTX_DEF);
914
915	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
916				     NICVF_CQ_BASE_ALIGN_BYTES);
917
918	if (err != 0) {
919		device_printf(nic->dev,
920		    "Could not allocate DMA memory for CQ\n");
921		return (err);
922	}
923
924	cq->desc = cq->dmem.base;
925	cq->thresh = pass1_silicon(nic->dev) ? 0 : CMP_QUEUE_CQE_THRESH;
926	cq->nic = nic;
927	cq->idx = qidx;
928	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
929
930	cq->rx_br = buf_ring_alloc(CMP_QUEUE_LEN * 8, M_DEVBUF, M_WAITOK,
931	    &cq->mtx);
932
933	/* Allocate taskqueue */
934	TASK_INIT(&cq->cmp_task, 0, nicvf_cmp_task, cq);
935	cq->cmp_taskq = taskqueue_create_fast("nicvf_cmp_taskq", M_WAITOK,
936	    taskqueue_thread_enqueue, &cq->cmp_taskq);
937	taskqueue_start_threads(&cq->cmp_taskq, 1, PI_NET, "%s: cmp_taskq(%d)",
938	    device_get_nameunit(nic->dev), qidx);
939
940	return (0);
941}
942
943static void
944nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
945{
946
947	if (cq == NULL)
948		return;
949	/*
950	 * The completion queue itself should be disabled by now
951	 * (ref. nicvf_snd_queue_config()).
952	 * Ensure that it is safe to disable it or panic.
953	 */
954	if (cq->enable)
955		panic("%s: Trying to free working CQ(%d)", __func__, cq->idx);
956
957	if (cq->cmp_taskq != NULL) {
958		/* Remove task */
959		while (taskqueue_cancel(cq->cmp_taskq, &cq->cmp_task, NULL) != 0)
960			taskqueue_drain(cq->cmp_taskq, &cq->cmp_task);
961
962		taskqueue_free(cq->cmp_taskq);
963		cq->cmp_taskq = NULL;
964	}
965	/*
966	 * Completion interrupt will possibly enable interrupts again
967	 * so disable interrupting now after we finished processing
968	 * completion task. It is safe to do so since the corresponding CQ
969	 * was already disabled.
970	 */
971	nicvf_disable_intr(nic, NICVF_INTR_CQ, cq->idx);
972	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
973
974	NICVF_CMP_LOCK(cq);
975	nicvf_free_q_desc_mem(nic, &cq->dmem);
976	drbr_free(cq->rx_br, M_DEVBUF);
977	NICVF_CMP_UNLOCK(cq);
978	mtx_destroy(&cq->mtx);
979	memset(cq->mtx_name, 0, sizeof(cq->mtx_name));
980}
981
982int
983nicvf_xmit_locked(struct snd_queue *sq)
984{
985	struct nicvf *nic;
986	struct ifnet *ifp;
987	struct mbuf *next;
988	int err;
989
990	NICVF_TX_LOCK_ASSERT(sq);
991
992	nic = sq->nic;
993	ifp = nic->ifp;
994	err = 0;
995
996	while ((next = drbr_peek(ifp, sq->br)) != NULL) {
997		/* Send a copy of the frame to the BPF listener */
998		ETHER_BPF_MTAP(ifp, next);
999
1000		err = nicvf_tx_mbuf_locked(sq, &next);
1001		if (err != 0) {
1002			if (next == NULL)
1003				drbr_advance(ifp, sq->br);
1004			else
1005				drbr_putback(ifp, sq->br, next);
1006
1007			break;
1008		}
1009		drbr_advance(ifp, sq->br);
1010	}
1011	return (err);
1012}
1013
1014static void
1015nicvf_snd_task(void *arg, int pending)
1016{
1017	struct snd_queue *sq = (struct snd_queue *)arg;
1018	struct nicvf *nic;
1019	struct ifnet *ifp;
1020	int err;
1021
1022	nic = sq->nic;
1023	ifp = nic->ifp;
1024
1025	/*
1026	 * Skip sending anything if the driver is not running,
1027	 * SQ full or link is down.
1028	 */
1029	if (((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1030	    IFF_DRV_RUNNING) || !nic->link_up)
1031		return;
1032
1033	NICVF_TX_LOCK(sq);
1034	err = nicvf_xmit_locked(sq);
1035	NICVF_TX_UNLOCK(sq);
1036	/* Try again */
1037	if (err != 0)
1038		taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);
1039}
1040
1041/* Initialize transmit queue */
1042static int
1043nicvf_init_snd_queue(struct nicvf *nic, struct snd_queue *sq, int q_len,
1044    int qidx)
1045{
1046	size_t i;
1047	int err;
1048
1049	/* Initizalize TX lock for this queue */
1050	snprintf(sq->mtx_name, sizeof(sq->mtx_name), "%s: SQ(%d) lock",
1051	    device_get_nameunit(nic->dev), qidx);
1052	mtx_init(&sq->mtx, sq->mtx_name, NULL, MTX_DEF);
1053
1054	NICVF_TX_LOCK(sq);
1055	/* Allocate buffer ring */
1056	sq->br = buf_ring_alloc(q_len / MIN_SQ_DESC_PER_PKT_XMIT, M_DEVBUF,
1057	    M_NOWAIT, &sq->mtx);
1058	if (sq->br == NULL) {
1059		device_printf(nic->dev,
1060		    "ERROR: Could not set up buf ring for SQ(%d)\n", qidx);
1061		err = ENOMEM;
1062		goto error;
1063	}
1064
1065	/* Allocate DMA memory for Tx descriptors */
1066	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
1067				     NICVF_SQ_BASE_ALIGN_BYTES);
1068	if (err != 0) {
1069		device_printf(nic->dev,
1070		    "Could not allocate DMA memory for SQ\n");
1071		goto error;
1072	}
1073
1074	sq->desc = sq->dmem.base;
1075	sq->head = sq->tail = 0;
1076	atomic_store_rel_int(&sq->free_cnt, q_len - 1);
1077	sq->thresh = SND_QUEUE_THRESH;
1078	sq->idx = qidx;
1079	sq->nic = nic;
1080
1081	/*
1082	 * Allocate DMA maps for Tx buffers
1083	 */
1084
1085	/* Create DMA tag first */
1086	err = bus_dma_tag_create(
1087	    bus_get_dma_tag(nic->dev),		/* parent tag */
1088	    1,					/* alignment */
1089	    0,					/* boundary */
1090	    BUS_SPACE_MAXADDR,			/* lowaddr */
1091	    BUS_SPACE_MAXADDR,			/* highaddr */
1092	    NULL, NULL,				/* filtfunc, filtfuncarg */
1093	    NICVF_TSO_MAXSIZE,			/* maxsize */
1094	    NICVF_TSO_NSEGS,			/* nsegments */
1095	    MCLBYTES,				/* maxsegsize */
1096	    0,					/* flags */
1097	    NULL, NULL,				/* lockfunc, lockfuncarg */
1098	    &sq->snd_buff_dmat);		/* dmat */
1099
1100	if (err != 0) {
1101		device_printf(nic->dev,
1102		    "Failed to create busdma tag for Tx buffers\n");
1103		goto error;
1104	}
1105
1106	/* Allocate send buffers array */
1107	sq->snd_buff = malloc(sizeof(*sq->snd_buff) * q_len, M_NICVF,
1108	    (M_NOWAIT | M_ZERO));
1109	if (sq->snd_buff == NULL) {
1110		device_printf(nic->dev,
1111		    "Could not allocate memory for Tx buffers array\n");
1112		err = ENOMEM;
1113		goto error;
1114	}
1115
1116	/* Now populate maps */
1117	for (i = 0; i < q_len; i++) {
1118		err = bus_dmamap_create(sq->snd_buff_dmat, 0,
1119		    &sq->snd_buff[i].dmap);
1120		if (err != 0) {
1121			device_printf(nic->dev,
1122			    "Failed to create DMA maps for Tx buffers\n");
1123			goto error;
1124		}
1125	}
1126	NICVF_TX_UNLOCK(sq);
1127
1128	/* Allocate taskqueue */
1129	TASK_INIT(&sq->snd_task, 0, nicvf_snd_task, sq);
1130	sq->snd_taskq = taskqueue_create_fast("nicvf_snd_taskq", M_WAITOK,
1131	    taskqueue_thread_enqueue, &sq->snd_taskq);
1132	taskqueue_start_threads(&sq->snd_taskq, 1, PI_NET, "%s: snd_taskq(%d)",
1133	    device_get_nameunit(nic->dev), qidx);
1134
1135	return (0);
1136error:
1137	NICVF_TX_UNLOCK(sq);
1138	return (err);
1139}
1140
1141static void
1142nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
1143{
1144	struct queue_set *qs = nic->qs;
1145	size_t i;
1146	int err;
1147
1148	if (sq == NULL)
1149		return;
1150
1151	if (sq->snd_taskq != NULL) {
1152		/* Remove task */
1153		while (taskqueue_cancel(sq->snd_taskq, &sq->snd_task, NULL) != 0)
1154			taskqueue_drain(sq->snd_taskq, &sq->snd_task);
1155
1156		taskqueue_free(sq->snd_taskq);
1157		sq->snd_taskq = NULL;
1158	}
1159
1160	NICVF_TX_LOCK(sq);
1161	if (sq->snd_buff_dmat != NULL) {
1162		if (sq->snd_buff != NULL) {
1163			for (i = 0; i < qs->sq_len; i++) {
1164				m_freem(sq->snd_buff[i].mbuf);
1165				sq->snd_buff[i].mbuf = NULL;
1166
1167				bus_dmamap_unload(sq->snd_buff_dmat,
1168				    sq->snd_buff[i].dmap);
1169				err = bus_dmamap_destroy(sq->snd_buff_dmat,
1170				    sq->snd_buff[i].dmap);
1171				/*
1172				 * If bus_dmamap_destroy fails it can cause
1173				 * random panic later if the tag is also
1174				 * destroyed in the process.
1175				 */
1176				KASSERT(err == 0,
1177				    ("%s: Could not destroy DMA map for SQ",
1178				    __func__));
1179			}
1180		}
1181
1182		free(sq->snd_buff, M_NICVF);
1183
1184		err = bus_dma_tag_destroy(sq->snd_buff_dmat);
1185		KASSERT(err == 0,
1186		    ("%s: Trying to destroy BUSY DMA tag", __func__));
1187	}
1188
1189	/* Free private driver ring for this send queue */
1190	if (sq->br != NULL)
1191		drbr_free(sq->br, M_DEVBUF);
1192
1193	if (sq->dmem.base != NULL)
1194		nicvf_free_q_desc_mem(nic, &sq->dmem);
1195
1196	NICVF_TX_UNLOCK(sq);
1197	/* Destroy Tx lock */
1198	mtx_destroy(&sq->mtx);
1199	memset(sq->mtx_name, 0, sizeof(sq->mtx_name));
1200}
1201
1202static void
1203nicvf_reclaim_snd_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1204{
1205
1206	/* Disable send queue */
1207	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
1208	/* Check if SQ is stopped */
1209	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
1210		return;
1211	/* Reset send queue */
1212	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1213}
1214
1215static void
1216nicvf_reclaim_rcv_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1217{
1218	union nic_mbx mbx = {};
1219
1220	/* Make sure all packets in the pipeline are written back into mem */
1221	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
1222	nicvf_send_msg_to_pf(nic, &mbx);
1223}
1224
1225static void
1226nicvf_reclaim_cmp_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1227{
1228
1229	/* Disable timer threshold (doesn't get reset upon CQ reset */
1230	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
1231	/* Disable completion queue */
1232	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
1233	/* Reset completion queue */
1234	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1235}
1236
1237static void
1238nicvf_reclaim_rbdr(struct nicvf *nic, struct rbdr *rbdr, int qidx)
1239{
1240	uint64_t tmp, fifo_state;
1241	int timeout = 10;
1242
1243	/* Save head and tail pointers for feeing up buffers */
1244	rbdr->head =
1245	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, qidx) >> 3;
1246	rbdr->tail =
1247	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, qidx) >> 3;
1248
1249	/*
1250	 * If RBDR FIFO is in 'FAIL' state then do a reset first
1251	 * before relaiming.
1252	 */
1253	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
1254	if (((fifo_state >> 62) & 0x03) == 0x3) {
1255		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
1256		    qidx, NICVF_RBDR_RESET);
1257	}
1258
1259	/* Disable RBDR */
1260	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
1261	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1262		return;
1263	while (1) {
1264		tmp = nicvf_queue_reg_read(nic,
1265		    NIC_QSET_RBDR_0_1_PREFETCH_STATUS, qidx);
1266		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
1267			break;
1268
1269		DELAY(1000);
1270		timeout--;
1271		if (!timeout) {
1272			device_printf(nic->dev,
1273			    "Failed polling on prefetch status\n");
1274			return;
1275		}
1276	}
1277	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1278	    NICVF_RBDR_RESET);
1279
1280	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
1281		return;
1282	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
1283	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1284		return;
1285}
1286
1287/* Configures receive queue */
1288static void
1289nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
1290    int qidx, bool enable)
1291{
1292	union nic_mbx mbx = {};
1293	struct rcv_queue *rq;
1294	struct rq_cfg rq_cfg;
1295	struct ifnet *ifp;
1296	struct lro_ctrl	*lro;
1297
1298	ifp = nic->ifp;
1299
1300	rq = &qs->rq[qidx];
1301	rq->enable = enable;
1302
1303	lro = &rq->lro;
1304
1305	/* Disable receive queue */
1306	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
1307
1308	if (!rq->enable) {
1309		nicvf_reclaim_rcv_queue(nic, qs, qidx);
1310		/* Free LRO memory */
1311		tcp_lro_free(lro);
1312		rq->lro_enabled = FALSE;
1313		return;
1314	}
1315
1316	/* Configure LRO if enabled */
1317	rq->lro_enabled = FALSE;
1318	if ((if_getcapenable(ifp) & IFCAP_LRO) != 0) {
1319		if (tcp_lro_init(lro) != 0) {
1320			device_printf(nic->dev,
1321			    "Failed to initialize LRO for RXQ%d\n", qidx);
1322		} else {
1323			rq->lro_enabled = TRUE;
1324			lro->ifp = nic->ifp;
1325		}
1326	}
1327
1328	rq->cq_qs = qs->vnic_id;
1329	rq->cq_idx = qidx;
1330	rq->start_rbdr_qs = qs->vnic_id;
1331	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
1332	rq->cont_rbdr_qs = qs->vnic_id;
1333	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
1334	/* all writes of RBDR data to be loaded into L2 Cache as well*/
1335	rq->caching = 1;
1336
1337	/* Send a mailbox msg to PF to config RQ */
1338	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
1339	mbx.rq.qs_num = qs->vnic_id;
1340	mbx.rq.rq_num = qidx;
1341	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
1342	    (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
1343	    (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) |
1344	    (rq->start_qs_rbdr_idx);
1345	nicvf_send_msg_to_pf(nic, &mbx);
1346
1347	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
1348	mbx.rq.cfg = (1UL << 63) | (1UL << 62) | (qs->vnic_id << 0);
1349	nicvf_send_msg_to_pf(nic, &mbx);
1350
1351	/*
1352	 * RQ drop config
1353	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
1354	 */
1355	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
1356	mbx.rq.cfg = (1UL << 62) | (RQ_CQ_DROP << 8);
1357	nicvf_send_msg_to_pf(nic, &mbx);
1358
1359	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
1360
1361	/* Enable Receive queue */
1362	rq_cfg.ena = 1;
1363	rq_cfg.tcp_ena = 0;
1364	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx,
1365	    *(uint64_t *)&rq_cfg);
1366}
1367
1368/* Configures completion queue */
1369static void
1370nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
1371    int qidx, boolean_t enable)
1372{
1373	struct cmp_queue *cq;
1374	struct cq_cfg cq_cfg;
1375
1376	cq = &qs->cq[qidx];
1377	cq->enable = enable;
1378
1379	if (!cq->enable) {
1380		nicvf_reclaim_cmp_queue(nic, qs, qidx);
1381		return;
1382	}
1383
1384	/* Reset completion queue */
1385	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1386
1387	/* Set completion queue base address */
1388	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, qidx,
1389	    (uint64_t)(cq->dmem.phys_base));
1390
1391	/* Enable Completion queue */
1392	cq_cfg.ena = 1;
1393	cq_cfg.reset = 0;
1394	cq_cfg.caching = 0;
1395	cq_cfg.qsize = CMP_QSIZE;
1396	cq_cfg.avg_con = 0;
1397	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(uint64_t *)&cq_cfg);
1398
1399	/* Set threshold value for interrupt generation */
1400	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
1401	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx,
1402	    nic->cq_coalesce_usecs);
1403}
1404
1405/* Configures transmit queue */
1406static void
1407nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1408    boolean_t enable)
1409{
1410	union nic_mbx mbx = {};
1411	struct snd_queue *sq;
1412	struct sq_cfg sq_cfg;
1413
1414	sq = &qs->sq[qidx];
1415	sq->enable = enable;
1416
1417	if (!sq->enable) {
1418		nicvf_reclaim_snd_queue(nic, qs, qidx);
1419		return;
1420	}
1421
1422	/* Reset send queue */
1423	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1424
1425	sq->cq_qs = qs->vnic_id;
1426	sq->cq_idx = qidx;
1427
1428	/* Send a mailbox msg to PF to config SQ */
1429	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
1430	mbx.sq.qs_num = qs->vnic_id;
1431	mbx.sq.sq_num = qidx;
1432	mbx.sq.sqs_mode = nic->sqs_mode;
1433	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
1434	nicvf_send_msg_to_pf(nic, &mbx);
1435
1436	/* Set queue base address */
1437	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, qidx,
1438	    (uint64_t)(sq->dmem.phys_base));
1439
1440	/* Enable send queue  & set queue size */
1441	sq_cfg.ena = 1;
1442	sq_cfg.reset = 0;
1443	sq_cfg.ldwb = 0;
1444	sq_cfg.qsize = SND_QSIZE;
1445	sq_cfg.tstmp_bgx_intf = 0;
1446	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(uint64_t *)&sq_cfg);
1447
1448	/* Set threshold value for interrupt generation */
1449	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
1450}
1451
1452/* Configures receive buffer descriptor ring */
1453static void
1454nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1455    boolean_t enable)
1456{
1457	struct rbdr *rbdr;
1458	struct rbdr_cfg rbdr_cfg;
1459
1460	rbdr = &qs->rbdr[qidx];
1461	nicvf_reclaim_rbdr(nic, rbdr, qidx);
1462	if (!enable)
1463		return;
1464
1465	/* Set descriptor base address */
1466	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, qidx,
1467	    (uint64_t)(rbdr->dmem.phys_base));
1468
1469	/* Enable RBDR  & set queue size */
1470	/* Buffer size should be in multiples of 128 bytes */
1471	rbdr_cfg.ena = 1;
1472	rbdr_cfg.reset = 0;
1473	rbdr_cfg.ldwb = 0;
1474	rbdr_cfg.qsize = RBDR_SIZE;
1475	rbdr_cfg.avg_con = 0;
1476	rbdr_cfg.lines = rbdr->dma_size / 128;
1477	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1478	    *(uint64_t *)&rbdr_cfg);
1479
1480	/* Notify HW */
1481	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, qidx,
1482	    qs->rbdr_len - 1);
1483
1484	/* Set threshold value for interrupt generation */
1485	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, qidx,
1486	    rbdr->thresh - 1);
1487}
1488
1489/* Requests PF to assign and enable Qset */
1490void
1491nicvf_qset_config(struct nicvf *nic, boolean_t enable)
1492{
1493	union nic_mbx mbx = {};
1494	struct queue_set *qs;
1495	struct qs_cfg *qs_cfg;
1496
1497	qs = nic->qs;
1498	if (qs == NULL) {
1499		device_printf(nic->dev,
1500		    "Qset is still not allocated, don't init queues\n");
1501		return;
1502	}
1503
1504	qs->enable = enable;
1505	qs->vnic_id = nic->vf_id;
1506
1507	/* Send a mailbox msg to PF to config Qset */
1508	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
1509	mbx.qs.num = qs->vnic_id;
1510
1511	mbx.qs.cfg = 0;
1512	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
1513	if (qs->enable) {
1514		qs_cfg->ena = 1;
1515		qs_cfg->vnic = qs->vnic_id;
1516	}
1517	nicvf_send_msg_to_pf(nic, &mbx);
1518}
1519
1520static void
1521nicvf_free_resources(struct nicvf *nic)
1522{
1523	int qidx;
1524	struct queue_set *qs;
1525
1526	qs = nic->qs;
1527	/*
1528	 * Remove QS error task first since it has to be dead
1529	 * to safely free completion queue tasks.
1530	 */
1531	if (qs->qs_err_taskq != NULL) {
1532		/* Shut down QS error tasks */
1533		while (taskqueue_cancel(qs->qs_err_taskq,
1534		    &qs->qs_err_task,  NULL) != 0) {
1535			taskqueue_drain(qs->qs_err_taskq, &qs->qs_err_task);
1536
1537		}
1538		taskqueue_free(qs->qs_err_taskq);
1539		qs->qs_err_taskq = NULL;
1540	}
1541	/* Free receive buffer descriptor ring */
1542	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1543		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1544
1545	/* Free completion queue */
1546	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1547		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1548
1549	/* Free send queue */
1550	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1551		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1552}
1553
1554static int
1555nicvf_alloc_resources(struct nicvf *nic)
1556{
1557	struct queue_set *qs = nic->qs;
1558	int qidx;
1559
1560	/* Alloc receive buffer descriptor ring */
1561	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1562		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1563				    DMA_BUFFER_LEN, qidx))
1564			goto alloc_fail;
1565	}
1566
1567	/* Alloc send queue */
1568	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1569		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1570			goto alloc_fail;
1571	}
1572
1573	/* Alloc completion queue */
1574	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1575		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len, qidx))
1576			goto alloc_fail;
1577	}
1578
1579	/* Allocate QS error taskqueue */
1580	TASK_INIT(&qs->qs_err_task, 0, nicvf_qs_err_task, nic);
1581	qs->qs_err_taskq = taskqueue_create_fast("nicvf_qs_err_taskq", M_WAITOK,
1582	    taskqueue_thread_enqueue, &qs->qs_err_taskq);
1583	taskqueue_start_threads(&qs->qs_err_taskq, 1, PI_NET, "%s: qs_taskq",
1584	    device_get_nameunit(nic->dev));
1585
1586	return (0);
1587alloc_fail:
1588	nicvf_free_resources(nic);
1589	return (ENOMEM);
1590}
1591
1592int
1593nicvf_set_qset_resources(struct nicvf *nic)
1594{
1595	struct queue_set *qs;
1596
1597	qs = malloc(sizeof(*qs), M_NICVF, (M_ZERO | M_WAITOK));
1598	nic->qs = qs;
1599
1600	/* Set count of each queue */
1601	qs->rbdr_cnt = RBDR_CNT;
1602	qs->rq_cnt = RCV_QUEUE_CNT;
1603
1604	qs->sq_cnt = SND_QUEUE_CNT;
1605	qs->cq_cnt = CMP_QUEUE_CNT;
1606
1607	/* Set queue lengths */
1608	qs->rbdr_len = RCV_BUF_COUNT;
1609	qs->sq_len = SND_QUEUE_LEN;
1610	qs->cq_len = CMP_QUEUE_LEN;
1611
1612	nic->rx_queues = qs->rq_cnt;
1613	nic->tx_queues = qs->sq_cnt;
1614
1615	return (0);
1616}
1617
1618int
1619nicvf_config_data_transfer(struct nicvf *nic, boolean_t enable)
1620{
1621	boolean_t disable = FALSE;
1622	struct queue_set *qs;
1623	int qidx;
1624
1625	qs = nic->qs;
1626	if (qs == NULL)
1627		return (0);
1628
1629	if (enable) {
1630		if (nicvf_alloc_resources(nic) != 0)
1631			return (ENOMEM);
1632
1633		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1634			nicvf_snd_queue_config(nic, qs, qidx, enable);
1635		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1636			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1637		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1638			nicvf_rbdr_config(nic, qs, qidx, enable);
1639		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1640			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1641	} else {
1642		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1643			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1644		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1645			nicvf_rbdr_config(nic, qs, qidx, disable);
1646		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1647			nicvf_snd_queue_config(nic, qs, qidx, disable);
1648		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1649			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1650
1651		nicvf_free_resources(nic);
1652	}
1653
1654	return (0);
1655}
1656
1657/*
1658 * Get a free desc from SQ
1659 * returns descriptor ponter & descriptor number
1660 */
1661static __inline int
1662nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1663{
1664	int qentry;
1665
1666	qentry = sq->tail;
1667	atomic_subtract_int(&sq->free_cnt, desc_cnt);
1668	sq->tail += desc_cnt;
1669	sq->tail &= (sq->dmem.q_len - 1);
1670
1671	return (qentry);
1672}
1673
1674/* Free descriptor back to SQ for future use */
1675static void
1676nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1677{
1678
1679	atomic_add_int(&sq->free_cnt, desc_cnt);
1680	sq->head += desc_cnt;
1681	sq->head &= (sq->dmem.q_len - 1);
1682}
1683
1684static __inline int
1685nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1686{
1687	qentry++;
1688	qentry &= (sq->dmem.q_len - 1);
1689	return (qentry);
1690}
1691
1692static void
1693nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1694{
1695	uint64_t sq_cfg;
1696
1697	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1698	sq_cfg |= NICVF_SQ_EN;
1699	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1700	/* Ring doorbell so that H/W restarts processing SQEs */
1701	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1702}
1703
1704static void
1705nicvf_sq_disable(struct nicvf *nic, int qidx)
1706{
1707	uint64_t sq_cfg;
1708
1709	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1710	sq_cfg &= ~NICVF_SQ_EN;
1711	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1712}
1713
1714static void
1715nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx)
1716{
1717	uint64_t head, tail;
1718	struct snd_buff *snd_buff;
1719	struct sq_hdr_subdesc *hdr;
1720
1721	NICVF_TX_LOCK(sq);
1722	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1723	tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1724	while (sq->head != head) {
1725		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1726		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1727			nicvf_put_sq_desc(sq, 1);
1728			continue;
1729		}
1730		snd_buff = &sq->snd_buff[sq->head];
1731		if (snd_buff->mbuf != NULL) {
1732			bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1733			m_freem(snd_buff->mbuf);
1734			sq->snd_buff[sq->head].mbuf = NULL;
1735		}
1736		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1737	}
1738	NICVF_TX_UNLOCK(sq);
1739}
1740
1741/*
1742 * Add SQ HEADER subdescriptor.
1743 * First subdescriptor for every send descriptor.
1744 */
1745static __inline int
1746nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1747			 int subdesc_cnt, struct mbuf *mbuf, int len)
1748{
1749	struct nicvf *nic;
1750	struct sq_hdr_subdesc *hdr;
1751	struct ether_vlan_header *eh;
1752#ifdef INET
1753	struct ip *ip;
1754	struct tcphdr *th;
1755#endif
1756	uint16_t etype;
1757	int ehdrlen, iphlen, poff, proto;
1758
1759	nic = sq->nic;
1760
1761	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1762	sq->snd_buff[qentry].mbuf = mbuf;
1763
1764	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1765	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1766	/* Enable notification via CQE after processing SQE */
1767	hdr->post_cqe = 1;
1768	/* No of subdescriptors following this */
1769	hdr->subdesc_cnt = subdesc_cnt;
1770	hdr->tot_len = len;
1771
1772	eh = mtod(mbuf, struct ether_vlan_header *);
1773	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1774		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1775		etype = ntohs(eh->evl_proto);
1776	} else {
1777		ehdrlen = ETHER_HDR_LEN;
1778		etype = ntohs(eh->evl_encap_proto);
1779	}
1780
1781	poff = proto = -1;
1782	switch (etype) {
1783#ifdef INET6
1784	case ETHERTYPE_IPV6:
1785		if (mbuf->m_len < ehdrlen + sizeof(struct ip6_hdr)) {
1786			mbuf = m_pullup(mbuf, ehdrlen +sizeof(struct ip6_hdr));
1787			sq->snd_buff[qentry].mbuf = NULL;
1788			if (mbuf == NULL)
1789				return (ENOBUFS);
1790		}
1791		poff = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &proto);
1792		if (poff < 0)
1793			return (ENOBUFS);
1794		poff += ehdrlen;
1795		break;
1796#endif
1797#ifdef INET
1798	case ETHERTYPE_IP:
1799		if (mbuf->m_len < ehdrlen + sizeof(struct ip)) {
1800			mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip));
1801			sq->snd_buff[qentry].mbuf = mbuf;
1802			if (mbuf == NULL)
1803				return (ENOBUFS);
1804		}
1805
1806		ip = (struct ip *)(mbuf->m_data + ehdrlen);
1807		iphlen = ip->ip_hl << 2;
1808		poff = ehdrlen + iphlen;
1809		proto = ip->ip_p;
1810		break;
1811#endif
1812	default:
1813		hdr->csum_l3 = 0;
1814	}
1815
1816#if defined(INET6) || defined(INET)
1817	if (poff > 0 && mbuf->m_pkthdr.csum_flags != 0) {
1818		hdr->csum_l3 = 1; /* Enable IP csum calculation */
1819		switch (proto) {
1820		case IPPROTO_TCP:
1821			if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0)
1822				break;
1823
1824			if (mbuf->m_len < (poff + sizeof(struct tcphdr))) {
1825				mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr));
1826				sq->snd_buff[qentry].mbuf = mbuf;
1827				if (mbuf == NULL)
1828					return (ENOBUFS);
1829			}
1830			hdr->csum_l4 = SEND_L4_CSUM_TCP;
1831			break;
1832		case IPPROTO_UDP:
1833			if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0)
1834				break;
1835
1836			if (mbuf->m_len < (poff + sizeof(struct udphdr))) {
1837				mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr));
1838				sq->snd_buff[qentry].mbuf = mbuf;
1839				if (mbuf == NULL)
1840					return (ENOBUFS);
1841			}
1842			hdr->csum_l4 = SEND_L4_CSUM_UDP;
1843			break;
1844		case IPPROTO_SCTP:
1845			if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0)
1846				break;
1847
1848			if (mbuf->m_len < (poff + sizeof(struct sctphdr))) {
1849				mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr));
1850				sq->snd_buff[qentry].mbuf = mbuf;
1851				if (mbuf == NULL)
1852					return (ENOBUFS);
1853			}
1854			hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1855			break;
1856		default:
1857			break;
1858		}
1859		hdr->l3_offset = ehdrlen;
1860		hdr->l4_offset = poff;
1861	}
1862
1863	if ((mbuf->m_pkthdr.tso_segsz != 0) && nic->hw_tso) {
1864		th = (struct tcphdr *)((caddr_t)(mbuf->m_data + poff));
1865
1866		hdr->tso = 1;
1867		hdr->tso_start = poff + (th->th_off * 4);
1868		hdr->tso_max_paysize = mbuf->m_pkthdr.tso_segsz;
1869		hdr->inner_l3_offset = ehdrlen - 2;
1870		nic->drv_stats.tx_tso++;
1871	}
1872#endif
1873
1874	return (0);
1875}
1876
1877/*
1878 * SQ GATHER subdescriptor
1879 * Must follow HDR descriptor
1880 */
1881static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1882					       int size, uint64_t data)
1883{
1884	struct sq_gather_subdesc *gather;
1885
1886	qentry &= (sq->dmem.q_len - 1);
1887	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1888
1889	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1890	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1891	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1892	gather->size = size;
1893	gather->addr = data;
1894}
1895
1896/* Put an mbuf to a SQ for packet transfer. */
1897static int
1898nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf **mbufp)
1899{
1900	bus_dma_segment_t segs[256];
1901	struct snd_buff *snd_buff;
1902	size_t seg;
1903	int nsegs, qentry;
1904	int subdesc_cnt;
1905	int err;
1906
1907	NICVF_TX_LOCK_ASSERT(sq);
1908
1909	if (sq->free_cnt == 0)
1910		return (ENOBUFS);
1911
1912	snd_buff = &sq->snd_buff[sq->tail];
1913
1914	err = bus_dmamap_load_mbuf_sg(sq->snd_buff_dmat, snd_buff->dmap,
1915	    *mbufp, segs, &nsegs, BUS_DMA_NOWAIT);
1916	if (__predict_false(err != 0)) {
1917		/* ARM64TODO: Add mbuf defragmenting if we lack maps */
1918		m_freem(*mbufp);
1919		*mbufp = NULL;
1920		return (err);
1921	}
1922
1923	/* Set how many subdescriptors is required */
1924	subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT + nsegs - 1;
1925	if (subdesc_cnt > sq->free_cnt) {
1926		/* ARM64TODO: Add mbuf defragmentation if we lack descriptors */
1927		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1928		return (ENOBUFS);
1929	}
1930
1931	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1932
1933	/* Add SQ header subdesc */
1934	err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, *mbufp,
1935	    (*mbufp)->m_pkthdr.len);
1936	if (err != 0) {
1937		nicvf_put_sq_desc(sq, subdesc_cnt);
1938		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1939		if (err == ENOBUFS) {
1940			m_freem(*mbufp);
1941			*mbufp = NULL;
1942		}
1943		return (err);
1944	}
1945
1946	/* Add SQ gather subdescs */
1947	for (seg = 0; seg < nsegs; seg++) {
1948		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1949		nicvf_sq_add_gather_subdesc(sq, qentry, segs[seg].ds_len,
1950		    segs[seg].ds_addr);
1951	}
1952
1953	/* make sure all memory stores are done before ringing doorbell */
1954	bus_dmamap_sync(sq->dmem.dmat, sq->dmem.dmap, BUS_DMASYNC_PREWRITE);
1955
1956	dprintf(sq->nic->dev, "%s: sq->idx: %d, subdesc_cnt: %d\n",
1957	    __func__, sq->idx, subdesc_cnt);
1958	/* Inform HW to xmit new packet */
1959	nicvf_queue_reg_write(sq->nic, NIC_QSET_SQ_0_7_DOOR,
1960	    sq->idx, subdesc_cnt);
1961	return (0);
1962}
1963
1964static __inline u_int
1965frag_num(u_int i)
1966{
1967#if BYTE_ORDER == BIG_ENDIAN
1968	return ((i & ~3) + 3 - (i & 3));
1969#else
1970	return (i);
1971#endif
1972}
1973
1974/* Returns MBUF for a received packet */
1975struct mbuf *
1976nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1977{
1978	int frag;
1979	int payload_len = 0;
1980	struct mbuf *mbuf;
1981	struct mbuf *mbuf_frag;
1982	uint16_t *rb_lens = NULL;
1983	uint64_t *rb_ptrs = NULL;
1984
1985	mbuf = NULL;
1986	rb_lens = (uint16_t *)((uint8_t *)cqe_rx + (3 * sizeof(uint64_t)));
1987	rb_ptrs = (uint64_t *)((uint8_t *)cqe_rx + (6 * sizeof(uint64_t)));
1988
1989	dprintf(nic->dev, "%s rb_cnt %d rb0_ptr %lx rb0_sz %d\n",
1990	    __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
1991
1992	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1993		payload_len = rb_lens[frag_num(frag)];
1994		if (frag == 0) {
1995			/* First fragment */
1996			mbuf = nicvf_rb_ptr_to_mbuf(nic,
1997			    (*rb_ptrs - cqe_rx->align_pad));
1998			mbuf->m_len = payload_len;
1999			mbuf->m_data += cqe_rx->align_pad;
2000			if_setrcvif(mbuf, nic->ifp);
2001		} else {
2002			/* Add fragments */
2003			mbuf_frag = nicvf_rb_ptr_to_mbuf(nic, *rb_ptrs);
2004			m_append(mbuf, payload_len, mbuf_frag->m_data);
2005			m_freem(mbuf_frag);
2006		}
2007		/* Next buffer pointer */
2008		rb_ptrs++;
2009	}
2010
2011	if (__predict_true(mbuf != NULL)) {
2012		m_fixhdr(mbuf);
2013		mbuf->m_pkthdr.flowid = cqe_rx->rq_idx;
2014		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE);
2015		if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) {
2016			/*
2017			 * HW by default verifies IP & TCP/UDP/SCTP checksums
2018			 */
2019			if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4)) {
2020				mbuf->m_pkthdr.csum_flags =
2021				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
2022			}
2023
2024			switch (cqe_rx->l4_type) {
2025			case L4TYPE_UDP:
2026			case L4TYPE_TCP: /* fall through */
2027				mbuf->m_pkthdr.csum_flags |=
2028				    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2029				mbuf->m_pkthdr.csum_data = 0xffff;
2030				break;
2031			case L4TYPE_SCTP:
2032				mbuf->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
2033				break;
2034			default:
2035				break;
2036			}
2037		}
2038	}
2039
2040	return (mbuf);
2041}
2042
2043/* Enable interrupt */
2044void
2045nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
2046{
2047	uint64_t reg_val;
2048
2049	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2050
2051	switch (int_type) {
2052	case NICVF_INTR_CQ:
2053		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2054		break;
2055	case NICVF_INTR_SQ:
2056		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2057		break;
2058	case NICVF_INTR_RBDR:
2059		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2060		break;
2061	case NICVF_INTR_PKT_DROP:
2062		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2063		break;
2064	case NICVF_INTR_TCP_TIMER:
2065		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2066		break;
2067	case NICVF_INTR_MBOX:
2068		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2069		break;
2070	case NICVF_INTR_QS_ERR:
2071		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2072		break;
2073	default:
2074		device_printf(nic->dev,
2075			   "Failed to enable interrupt: unknown type\n");
2076		break;
2077	}
2078
2079	nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
2080}
2081
2082/* Disable interrupt */
2083void
2084nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
2085{
2086	uint64_t reg_val = 0;
2087
2088	switch (int_type) {
2089	case NICVF_INTR_CQ:
2090		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2091		break;
2092	case NICVF_INTR_SQ:
2093		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2094		break;
2095	case NICVF_INTR_RBDR:
2096		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2097		break;
2098	case NICVF_INTR_PKT_DROP:
2099		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2100		break;
2101	case NICVF_INTR_TCP_TIMER:
2102		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2103		break;
2104	case NICVF_INTR_MBOX:
2105		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2106		break;
2107	case NICVF_INTR_QS_ERR:
2108		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2109		break;
2110	default:
2111		device_printf(nic->dev,
2112			   "Failed to disable interrupt: unknown type\n");
2113		break;
2114	}
2115
2116	nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
2117}
2118
2119/* Clear interrupt */
2120void
2121nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
2122{
2123	uint64_t reg_val = 0;
2124
2125	switch (int_type) {
2126	case NICVF_INTR_CQ:
2127		reg_val = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2128		break;
2129	case NICVF_INTR_SQ:
2130		reg_val = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2131		break;
2132	case NICVF_INTR_RBDR:
2133		reg_val = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2134		break;
2135	case NICVF_INTR_PKT_DROP:
2136		reg_val = (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2137		break;
2138	case NICVF_INTR_TCP_TIMER:
2139		reg_val = (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2140		break;
2141	case NICVF_INTR_MBOX:
2142		reg_val = (1UL << NICVF_INTR_MBOX_SHIFT);
2143		break;
2144	case NICVF_INTR_QS_ERR:
2145		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2146		break;
2147	default:
2148		device_printf(nic->dev,
2149			   "Failed to clear interrupt: unknown type\n");
2150		break;
2151	}
2152
2153	nicvf_reg_write(nic, NIC_VF_INT, reg_val);
2154}
2155
2156/* Check if interrupt is enabled */
2157int
2158nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
2159{
2160	uint64_t reg_val;
2161	uint64_t mask = 0xff;
2162
2163	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2164
2165	switch (int_type) {
2166	case NICVF_INTR_CQ:
2167		mask = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2168		break;
2169	case NICVF_INTR_SQ:
2170		mask = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2171		break;
2172	case NICVF_INTR_RBDR:
2173		mask = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2174		break;
2175	case NICVF_INTR_PKT_DROP:
2176		mask = NICVF_INTR_PKT_DROP_MASK;
2177		break;
2178	case NICVF_INTR_TCP_TIMER:
2179		mask = NICVF_INTR_TCP_TIMER_MASK;
2180		break;
2181	case NICVF_INTR_MBOX:
2182		mask = NICVF_INTR_MBOX_MASK;
2183		break;
2184	case NICVF_INTR_QS_ERR:
2185		mask = NICVF_INTR_QS_ERR_MASK;
2186		break;
2187	default:
2188		device_printf(nic->dev,
2189			   "Failed to check interrupt enable: unknown type\n");
2190		break;
2191	}
2192
2193	return (reg_val & mask);
2194}
2195
2196void
2197nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
2198{
2199	struct rcv_queue *rq;
2200
2201#define GET_RQ_STATS(reg) \
2202	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
2203			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2204
2205	rq = &nic->qs->rq[rq_idx];
2206	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
2207	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
2208}
2209
2210void
2211nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
2212{
2213	struct snd_queue *sq;
2214
2215#define GET_SQ_STATS(reg) \
2216	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
2217			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2218
2219	sq = &nic->qs->sq[sq_idx];
2220	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
2221	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
2222}
2223
2224/* Check for errors in the receive cmp.queue entry */
2225int
2226nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cmp_queue *cq,
2227    struct cqe_rx_t *cqe_rx)
2228{
2229	struct nicvf_hw_stats *stats = &nic->hw_stats;
2230	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
2231
2232	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
2233		drv_stats->rx_frames_ok++;
2234		return (0);
2235	}
2236
2237	switch (cqe_rx->err_opcode) {
2238	case CQ_RX_ERROP_RE_PARTIAL:
2239		stats->rx_bgx_truncated_pkts++;
2240		break;
2241	case CQ_RX_ERROP_RE_JABBER:
2242		stats->rx_jabber_errs++;
2243		break;
2244	case CQ_RX_ERROP_RE_FCS:
2245		stats->rx_fcs_errs++;
2246		break;
2247	case CQ_RX_ERROP_RE_RX_CTL:
2248		stats->rx_bgx_errs++;
2249		break;
2250	case CQ_RX_ERROP_PREL2_ERR:
2251		stats->rx_prel2_errs++;
2252		break;
2253	case CQ_RX_ERROP_L2_MAL:
2254		stats->rx_l2_hdr_malformed++;
2255		break;
2256	case CQ_RX_ERROP_L2_OVERSIZE:
2257		stats->rx_oversize++;
2258		break;
2259	case CQ_RX_ERROP_L2_UNDERSIZE:
2260		stats->rx_undersize++;
2261		break;
2262	case CQ_RX_ERROP_L2_LENMISM:
2263		stats->rx_l2_len_mismatch++;
2264		break;
2265	case CQ_RX_ERROP_L2_PCLP:
2266		stats->rx_l2_pclp++;
2267		break;
2268	case CQ_RX_ERROP_IP_NOT:
2269		stats->rx_ip_ver_errs++;
2270		break;
2271	case CQ_RX_ERROP_IP_CSUM_ERR:
2272		stats->rx_ip_csum_errs++;
2273		break;
2274	case CQ_RX_ERROP_IP_MAL:
2275		stats->rx_ip_hdr_malformed++;
2276		break;
2277	case CQ_RX_ERROP_IP_MALD:
2278		stats->rx_ip_payload_malformed++;
2279		break;
2280	case CQ_RX_ERROP_IP_HOP:
2281		stats->rx_ip_ttl_errs++;
2282		break;
2283	case CQ_RX_ERROP_L3_PCLP:
2284		stats->rx_l3_pclp++;
2285		break;
2286	case CQ_RX_ERROP_L4_MAL:
2287		stats->rx_l4_malformed++;
2288		break;
2289	case CQ_RX_ERROP_L4_CHK:
2290		stats->rx_l4_csum_errs++;
2291		break;
2292	case CQ_RX_ERROP_UDP_LEN:
2293		stats->rx_udp_len_errs++;
2294		break;
2295	case CQ_RX_ERROP_L4_PORT:
2296		stats->rx_l4_port_errs++;
2297		break;
2298	case CQ_RX_ERROP_TCP_FLAG:
2299		stats->rx_tcp_flag_errs++;
2300		break;
2301	case CQ_RX_ERROP_TCP_OFFSET:
2302		stats->rx_tcp_offset_errs++;
2303		break;
2304	case CQ_RX_ERROP_L4_PCLP:
2305		stats->rx_l4_pclp++;
2306		break;
2307	case CQ_RX_ERROP_RBDR_TRUNC:
2308		stats->rx_truncated_pkts++;
2309		break;
2310	}
2311
2312	return (1);
2313}
2314
2315/* Check for errors in the send cmp.queue entry */
2316int
2317nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cmp_queue *cq,
2318    struct cqe_send_t *cqe_tx)
2319{
2320	struct cmp_queue_stats *stats = &cq->stats;
2321
2322	switch (cqe_tx->send_status) {
2323	case CQ_TX_ERROP_GOOD:
2324		stats->tx.good++;
2325		return (0);
2326	case CQ_TX_ERROP_DESC_FAULT:
2327		stats->tx.desc_fault++;
2328		break;
2329	case CQ_TX_ERROP_HDR_CONS_ERR:
2330		stats->tx.hdr_cons_err++;
2331		break;
2332	case CQ_TX_ERROP_SUBDC_ERR:
2333		stats->tx.subdesc_err++;
2334		break;
2335	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
2336		stats->tx.imm_size_oflow++;
2337		break;
2338	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
2339		stats->tx.data_seq_err++;
2340		break;
2341	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
2342		stats->tx.mem_seq_err++;
2343		break;
2344	case CQ_TX_ERROP_LOCK_VIOL:
2345		stats->tx.lock_viol++;
2346		break;
2347	case CQ_TX_ERROP_DATA_FAULT:
2348		stats->tx.data_fault++;
2349		break;
2350	case CQ_TX_ERROP_TSTMP_CONFLICT:
2351		stats->tx.tstmp_conflict++;
2352		break;
2353	case CQ_TX_ERROP_TSTMP_TIMEOUT:
2354		stats->tx.tstmp_timeout++;
2355		break;
2356	case CQ_TX_ERROP_MEM_FAULT:
2357		stats->tx.mem_fault++;
2358		break;
2359	case CQ_TX_ERROP_CK_OVERLAP:
2360		stats->tx.csum_overlap++;
2361		break;
2362	case CQ_TX_ERROP_CK_OFLOW:
2363		stats->tx.csum_overflow++;
2364		break;
2365	}
2366
2367	return (1);
2368}
2369