nicvf_queues.c revision 297450
1/*
2 * Copyright (C) 2015 Cavium Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/dev/vnic/nicvf_queues.c 297450 2016-03-31 13:10:29Z zbb $
27 *
28 */
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/vnic/nicvf_queues.c 297450 2016-03-31 13:10:29Z zbb $");
31
32#include "opt_inet.h"
33#include "opt_inet6.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bitset.h>
38#include <sys/bitstring.h>
39#include <sys/buf_ring.h>
40#include <sys/bus.h>
41#include <sys/endian.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/module.h>
45#include <sys/rman.h>
46#include <sys/pciio.h>
47#include <sys/pcpu.h>
48#include <sys/proc.h>
49#include <sys/sockio.h>
50#include <sys/socket.h>
51#include <sys/cpuset.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/smp.h>
55#include <sys/taskqueue.h>
56
57#include <vm/vm.h>
58#include <vm/pmap.h>
59
60#include <machine/bus.h>
61#include <machine/vmparam.h>
62
63#include <net/if.h>
64#include <net/if_var.h>
65#include <net/if_media.h>
66#include <net/ifq.h>
67#include <net/bpf.h>
68#include <net/ethernet.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip6.h>
75#include <netinet/sctp.h>
76#include <netinet/tcp.h>
77#include <netinet/tcp_lro.h>
78#include <netinet/udp.h>
79
80#include <dev/pci/pcireg.h>
81#include <dev/pci/pcivar.h>
82
83#include "thunder_bgx.h"
84#include "nic_reg.h"
85#include "nic.h"
86#include "q_struct.h"
87#include "nicvf_queues.h"
88
89#define	DEBUG
90#undef DEBUG
91
92#ifdef DEBUG
93#define	dprintf(dev, fmt, ...)	device_printf(dev, fmt, ##__VA_ARGS__)
94#else
95#define	dprintf(dev, fmt, ...)
96#endif
97
98MALLOC_DECLARE(M_NICVF);
99
100static void nicvf_free_snd_queue(struct nicvf *, struct snd_queue *);
101static struct mbuf * nicvf_get_rcv_mbuf(struct nicvf *, struct cqe_rx_t *);
102static void nicvf_sq_disable(struct nicvf *, int);
103static void nicvf_sq_enable(struct nicvf *, struct snd_queue *, int);
104static void nicvf_put_sq_desc(struct snd_queue *, int);
105static void nicvf_cmp_queue_config(struct nicvf *, struct queue_set *, int,
106    boolean_t);
107static void nicvf_sq_free_used_descs(struct nicvf *, struct snd_queue *, int);
108
109static int nicvf_tx_mbuf_locked(struct snd_queue *, struct mbuf **);
110
111static void nicvf_rbdr_task(void *, int);
112static void nicvf_rbdr_task_nowait(void *, int);
113
114struct rbuf_info {
115	bus_dma_tag_t	dmat;
116	bus_dmamap_t	dmap;
117	struct mbuf *	mbuf;
118};
119
120#define GET_RBUF_INFO(x) ((struct rbuf_info *)((x) - NICVF_RCV_BUF_ALIGN_BYTES))
121
122/* Poll a register for a specific value */
123static int nicvf_poll_reg(struct nicvf *nic, int qidx,
124			  uint64_t reg, int bit_pos, int bits, int val)
125{
126	uint64_t bit_mask;
127	uint64_t reg_val;
128	int timeout = 10;
129
130	bit_mask = (1UL << bits) - 1;
131	bit_mask = (bit_mask << bit_pos);
132
133	while (timeout) {
134		reg_val = nicvf_queue_reg_read(nic, reg, qidx);
135		if (((reg_val & bit_mask) >> bit_pos) == val)
136			return (0);
137
138		DELAY(1000);
139		timeout--;
140	}
141	device_printf(nic->dev, "Poll on reg 0x%lx failed\n", reg);
142	return (ETIMEDOUT);
143}
144
145/* Callback for bus_dmamap_load() */
146static void
147nicvf_dmamap_q_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
148{
149	bus_addr_t *paddr;
150
151	KASSERT(nseg == 1, ("wrong number of segments, should be 1"));
152	paddr = arg;
153	*paddr = segs->ds_addr;
154}
155
156/* Allocate memory for a queue's descriptors */
157static int
158nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
159    int q_len, int desc_size, int align_bytes)
160{
161	int err, err_dmat;
162
163	/* Create DMA tag first */
164	err = bus_dma_tag_create(
165	    bus_get_dma_tag(nic->dev),		/* parent tag */
166	    align_bytes,			/* alignment */
167	    0,					/* boundary */
168	    BUS_SPACE_MAXADDR,			/* lowaddr */
169	    BUS_SPACE_MAXADDR,			/* highaddr */
170	    NULL, NULL,				/* filtfunc, filtfuncarg */
171	    (q_len * desc_size),		/* maxsize */
172	    1,					/* nsegments */
173	    (q_len * desc_size),		/* maxsegsize */
174	    0,					/* flags */
175	    NULL, NULL,				/* lockfunc, lockfuncarg */
176	    &dmem->dmat);			/* dmat */
177
178	if (err != 0) {
179		device_printf(nic->dev,
180		    "Failed to create busdma tag for descriptors ring\n");
181		return (err);
182	}
183
184	/* Allocate segment of continuous DMA safe memory */
185	err = bus_dmamem_alloc(
186	    dmem->dmat,				/* DMA tag */
187	    &dmem->base,			/* virtual address */
188	    (BUS_DMA_NOWAIT | BUS_DMA_ZERO),	/* flags */
189	    &dmem->dmap);			/* DMA map */
190	if (err != 0) {
191		device_printf(nic->dev, "Failed to allocate DMA safe memory for"
192		    "descriptors ring\n");
193		goto dmamem_fail;
194	}
195
196	err = bus_dmamap_load(
197	    dmem->dmat,
198	    dmem->dmap,
199	    dmem->base,
200	    (q_len * desc_size),		/* allocation size */
201	    nicvf_dmamap_q_cb,			/* map to DMA address cb. */
202	    &dmem->phys_base,			/* physical address */
203	    BUS_DMA_NOWAIT);
204	if (err != 0) {
205		device_printf(nic->dev,
206		    "Cannot load DMA map of descriptors ring\n");
207		goto dmamap_fail;
208	}
209
210	dmem->q_len = q_len;
211	dmem->size = (desc_size * q_len);
212
213	return (0);
214
215dmamap_fail:
216	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
217	dmem->phys_base = 0;
218dmamem_fail:
219	err_dmat = bus_dma_tag_destroy(dmem->dmat);
220	dmem->base = NULL;
221	KASSERT(err_dmat == 0,
222	    ("%s: Trying to destroy BUSY DMA tag", __func__));
223
224	return (err);
225}
226
227/* Free queue's descriptor memory */
228static void
229nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
230{
231	int err;
232
233	if ((dmem == NULL) || (dmem->base == NULL))
234		return;
235
236	/* Unload a map */
237	bus_dmamap_sync(dmem->dmat, dmem->dmap, BUS_DMASYNC_POSTREAD);
238	bus_dmamap_unload(dmem->dmat, dmem->dmap);
239	/* Free DMA memory */
240	bus_dmamem_free(dmem->dmat, dmem->base, dmem->dmap);
241	/* Destroy DMA tag */
242	err = bus_dma_tag_destroy(dmem->dmat);
243
244	KASSERT(err == 0,
245	    ("%s: Trying to destroy BUSY DMA tag", __func__));
246
247	dmem->phys_base = 0;
248	dmem->base = NULL;
249}
250
251/*
252 * Allocate buffer for packet reception
253 * HW returns memory address where packet is DMA'ed but not a pointer
254 * into RBDR ring, so save buffer address at the start of fragment and
255 * align the start address to a cache aligned address
256 */
257static __inline int
258nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
259    bus_dmamap_t dmap, int mflags, uint32_t buf_len, bus_addr_t *rbuf)
260{
261	struct mbuf *mbuf;
262	struct rbuf_info *rinfo;
263	bus_dma_segment_t segs[1];
264	int nsegs;
265	int err;
266
267	mbuf = m_getjcl(mflags, MT_DATA, M_PKTHDR, MCLBYTES);
268	if (mbuf == NULL)
269		return (ENOMEM);
270
271	/*
272	 * The length is equal to the actual length + one 128b line
273	 * used as a room for rbuf_info structure.
274	 */
275	mbuf->m_len = mbuf->m_pkthdr.len = buf_len;
276
277	err = bus_dmamap_load_mbuf_sg(rbdr->rbdr_buff_dmat, dmap, mbuf, segs,
278	    &nsegs, BUS_DMA_NOWAIT);
279	if (err != 0) {
280		device_printf(nic->dev,
281		    "Failed to map mbuf into DMA visible memory, err: %d\n",
282		    err);
283		m_freem(mbuf);
284		bus_dmamap_destroy(rbdr->rbdr_buff_dmat, dmap);
285		return (err);
286	}
287	if (nsegs != 1)
288		panic("Unexpected number of DMA segments for RB: %d", nsegs);
289	/*
290	 * Now use the room for rbuf_info structure
291	 * and adjust mbuf data and length.
292	 */
293	rinfo = (struct rbuf_info *)mbuf->m_data;
294	m_adj(mbuf, NICVF_RCV_BUF_ALIGN_BYTES);
295
296	rinfo->dmat = rbdr->rbdr_buff_dmat;
297	rinfo->dmap = dmap;
298	rinfo->mbuf = mbuf;
299
300	*rbuf = segs[0].ds_addr + NICVF_RCV_BUF_ALIGN_BYTES;
301
302	return (0);
303}
304
305/* Retrieve mbuf for received packet */
306static struct mbuf *
307nicvf_rb_ptr_to_mbuf(struct nicvf *nic, bus_addr_t rb_ptr)
308{
309	struct mbuf *mbuf;
310	struct rbuf_info *rinfo;
311
312	/* Get buffer start address and alignment offset */
313	rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(rb_ptr));
314
315	/* Now retrieve mbuf to give to stack */
316	mbuf = rinfo->mbuf;
317	if (__predict_false(mbuf == NULL)) {
318		panic("%s: Received packet fragment with NULL mbuf",
319		    device_get_nameunit(nic->dev));
320	}
321	/*
322	 * Clear the mbuf in the descriptor to indicate
323	 * that this slot is processed and free to use.
324	 */
325	rinfo->mbuf = NULL;
326
327	bus_dmamap_sync(rinfo->dmat, rinfo->dmap, BUS_DMASYNC_POSTREAD);
328	bus_dmamap_unload(rinfo->dmat, rinfo->dmap);
329
330	return (mbuf);
331}
332
333/* Allocate RBDR ring and populate receive buffers */
334static int
335nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len,
336    int buf_size, int qidx)
337{
338	bus_dmamap_t dmap;
339	bus_addr_t rbuf;
340	struct rbdr_entry_t *desc;
341	int idx;
342	int err;
343
344	/* Allocate rbdr descriptors ring */
345	err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
346	    sizeof(struct rbdr_entry_t), NICVF_RCV_BUF_ALIGN_BYTES);
347	if (err != 0) {
348		device_printf(nic->dev,
349		    "Failed to create RBDR descriptors ring\n");
350		return (err);
351	}
352
353	rbdr->desc = rbdr->dmem.base;
354	/*
355	 * Buffer size has to be in multiples of 128 bytes.
356	 * Make room for metadata of size of one line (128 bytes).
357	 */
358	rbdr->dma_size = buf_size - NICVF_RCV_BUF_ALIGN_BYTES;
359	rbdr->enable = TRUE;
360	rbdr->thresh = RBDR_THRESH;
361	rbdr->nic = nic;
362	rbdr->idx = qidx;
363
364	/*
365	 * Create DMA tag for Rx buffers.
366	 * Each map created using this tag is intended to store Rx payload for
367	 * one fragment and one header structure containing rbuf_info (thus
368	 * additional 128 byte line since RB must be a multiple of 128 byte
369	 * cache line).
370	 */
371	if (buf_size > MCLBYTES) {
372		device_printf(nic->dev,
373		    "Buffer size to large for mbuf cluster\n");
374		return (EINVAL);
375	}
376	err = bus_dma_tag_create(
377	    bus_get_dma_tag(nic->dev),		/* parent tag */
378	    NICVF_RCV_BUF_ALIGN_BYTES,		/* alignment */
379	    0,					/* boundary */
380	    DMAP_MAX_PHYSADDR,			/* lowaddr */
381	    DMAP_MIN_PHYSADDR,			/* highaddr */
382	    NULL, NULL,				/* filtfunc, filtfuncarg */
383	    roundup2(buf_size, MCLBYTES),	/* maxsize */
384	    1,					/* nsegments */
385	    roundup2(buf_size, MCLBYTES),	/* maxsegsize */
386	    0,					/* flags */
387	    NULL, NULL,				/* lockfunc, lockfuncarg */
388	    &rbdr->rbdr_buff_dmat);		/* dmat */
389
390	if (err != 0) {
391		device_printf(nic->dev,
392		    "Failed to create busdma tag for RBDR buffers\n");
393		return (err);
394	}
395
396	rbdr->rbdr_buff_dmaps = malloc(sizeof(*rbdr->rbdr_buff_dmaps) *
397	    ring_len, M_NICVF, (M_WAITOK | M_ZERO));
398
399	for (idx = 0; idx < ring_len; idx++) {
400		err = bus_dmamap_create(rbdr->rbdr_buff_dmat, 0, &dmap);
401		if (err != 0) {
402			device_printf(nic->dev,
403			    "Failed to create DMA map for RB\n");
404			return (err);
405		}
406		rbdr->rbdr_buff_dmaps[idx] = dmap;
407
408		err = nicvf_alloc_rcv_buffer(nic, rbdr, dmap, M_WAITOK,
409		    DMA_BUFFER_LEN, &rbuf);
410		if (err != 0)
411			return (err);
412
413		desc = GET_RBDR_DESC(rbdr, idx);
414		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
415	}
416
417	/* Allocate taskqueue */
418	TASK_INIT(&rbdr->rbdr_task, 0, nicvf_rbdr_task, rbdr);
419	TASK_INIT(&rbdr->rbdr_task_nowait, 0, nicvf_rbdr_task_nowait, rbdr);
420	rbdr->rbdr_taskq = taskqueue_create_fast("nicvf_rbdr_taskq", M_WAITOK,
421	    taskqueue_thread_enqueue, &rbdr->rbdr_taskq);
422	taskqueue_start_threads(&rbdr->rbdr_taskq, 1, PI_NET, "%s: rbdr_taskq",
423	    device_get_nameunit(nic->dev));
424
425	return (0);
426}
427
428/* Free RBDR ring and its receive buffers */
429static void
430nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
431{
432	struct mbuf *mbuf;
433	struct queue_set *qs;
434	struct rbdr_entry_t *desc;
435	struct rbuf_info *rinfo;
436	bus_addr_t buf_addr;
437	int head, tail, idx;
438	int err;
439
440	qs = nic->qs;
441
442	if ((qs == NULL) || (rbdr == NULL))
443		return;
444
445	rbdr->enable = FALSE;
446	if (rbdr->rbdr_taskq != NULL) {
447		/* Remove tasks */
448		while (taskqueue_cancel(rbdr->rbdr_taskq,
449		    &rbdr->rbdr_task_nowait, NULL) != 0) {
450			/* Finish the nowait task first */
451			taskqueue_drain(rbdr->rbdr_taskq,
452			    &rbdr->rbdr_task_nowait);
453		}
454		taskqueue_free(rbdr->rbdr_taskq);
455		rbdr->rbdr_taskq = NULL;
456
457		while (taskqueue_cancel(taskqueue_thread,
458		    &rbdr->rbdr_task, NULL) != 0) {
459			/* Now finish the sleepable task */
460			taskqueue_drain(taskqueue_thread, &rbdr->rbdr_task);
461		}
462	}
463
464	/*
465	 * Free all of the memory under the RB descriptors.
466	 * There are assumptions here:
467	 * 1. Corresponding RBDR is disabled
468	 *    - it is safe to operate using head and tail indexes
469	 * 2. All bffers that were received are properly freed by
470	 *    the receive handler
471	 *    - there is no need to unload DMA map and free MBUF for other
472	 *      descriptors than unused ones
473	 */
474	if (rbdr->rbdr_buff_dmat != NULL) {
475		head = rbdr->head;
476		tail = rbdr->tail;
477		while (head != tail) {
478			desc = GET_RBDR_DESC(rbdr, head);
479			buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
480			rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
481			bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
482			mbuf = rinfo->mbuf;
483			/* This will destroy everything including rinfo! */
484			m_freem(mbuf);
485			head++;
486			head &= (rbdr->dmem.q_len - 1);
487		}
488		/* Free tail descriptor */
489		desc = GET_RBDR_DESC(rbdr, tail);
490		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
491		rinfo = GET_RBUF_INFO(PHYS_TO_DMAP(buf_addr));
492		bus_dmamap_unload(rbdr->rbdr_buff_dmat, rinfo->dmap);
493		mbuf = rinfo->mbuf;
494		/* This will destroy everything including rinfo! */
495		m_freem(mbuf);
496
497		/* Destroy DMA maps */
498		for (idx = 0; idx < qs->rbdr_len; idx++) {
499			if (rbdr->rbdr_buff_dmaps[idx] == NULL)
500				continue;
501			err = bus_dmamap_destroy(rbdr->rbdr_buff_dmat,
502			    rbdr->rbdr_buff_dmaps[idx]);
503			KASSERT(err == 0,
504			    ("%s: Could not destroy DMA map for RB, desc: %d",
505			    __func__, idx));
506			rbdr->rbdr_buff_dmaps[idx] = NULL;
507		}
508
509		/* Now destroy the tag */
510		err = bus_dma_tag_destroy(rbdr->rbdr_buff_dmat);
511		KASSERT(err == 0,
512		    ("%s: Trying to destroy BUSY DMA tag", __func__));
513
514		rbdr->head = 0;
515		rbdr->tail = 0;
516	}
517
518	/* Free RBDR ring */
519	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
520}
521
522/*
523 * Refill receive buffer descriptors with new buffers.
524 */
525static int
526nicvf_refill_rbdr(struct rbdr *rbdr, int mflags)
527{
528	struct nicvf *nic;
529	struct queue_set *qs;
530	int rbdr_idx;
531	int tail, qcount;
532	int refill_rb_cnt;
533	struct rbdr_entry_t *desc;
534	bus_dmamap_t dmap;
535	bus_addr_t rbuf;
536	boolean_t rb_alloc_fail;
537	int new_rb;
538
539	rb_alloc_fail = TRUE;
540	new_rb = 0;
541	nic = rbdr->nic;
542	qs = nic->qs;
543	rbdr_idx = rbdr->idx;
544
545	/* Check if it's enabled */
546	if (!rbdr->enable)
547		return (0);
548
549	/* Get no of desc's to be refilled */
550	qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
551	qcount &= 0x7FFFF;
552	/* Doorbell can be ringed with a max of ring size minus 1 */
553	if (qcount >= (qs->rbdr_len - 1)) {
554		rb_alloc_fail = FALSE;
555		goto out;
556	} else
557		refill_rb_cnt = qs->rbdr_len - qcount - 1;
558
559	/* Start filling descs from tail */
560	tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
561	while (refill_rb_cnt) {
562		tail++;
563		tail &= (rbdr->dmem.q_len - 1);
564
565		dmap = rbdr->rbdr_buff_dmaps[tail];
566		if (nicvf_alloc_rcv_buffer(nic, rbdr, dmap, mflags,
567		    DMA_BUFFER_LEN, &rbuf)) {
568			/* Something went wrong. Resign */
569			break;
570		}
571		desc = GET_RBDR_DESC(rbdr, tail);
572		desc->buf_addr = (rbuf >> NICVF_RCV_BUF_ALIGN);
573		refill_rb_cnt--;
574		new_rb++;
575	}
576
577	/* make sure all memory stores are done before ringing doorbell */
578	wmb();
579
580	/* Check if buffer allocation failed */
581	if (refill_rb_cnt == 0)
582		rb_alloc_fail = FALSE;
583
584	/* Notify HW */
585	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
586			      rbdr_idx, new_rb);
587out:
588	if (!rb_alloc_fail) {
589		/*
590		 * Re-enable RBDR interrupts only
591		 * if buffer allocation is success.
592		 */
593		nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
594
595		return (0);
596	}
597
598	return (ENOMEM);
599}
600
601/* Refill RBs even if sleep is needed to reclaim memory */
602static void
603nicvf_rbdr_task(void *arg, int pending)
604{
605	struct rbdr *rbdr;
606	int err;
607
608	rbdr = (struct rbdr *)arg;
609
610	err = nicvf_refill_rbdr(rbdr, M_WAITOK);
611	if (__predict_false(err != 0)) {
612		panic("%s: Failed to refill RBs even when sleep enabled",
613		    __func__);
614	}
615}
616
617/* Refill RBs as soon as possible without waiting */
618static void
619nicvf_rbdr_task_nowait(void *arg, int pending)
620{
621	struct rbdr *rbdr;
622	int err;
623
624	rbdr = (struct rbdr *)arg;
625
626	err = nicvf_refill_rbdr(rbdr, M_NOWAIT);
627	if (err != 0) {
628		/*
629		 * Schedule another, sleepable kernel thread
630		 * that will for sure refill the buffers.
631		 */
632		taskqueue_enqueue(taskqueue_thread, &rbdr->rbdr_task);
633	}
634}
635
636static int
637nicvf_rcv_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
638    struct cqe_rx_t *cqe_rx, int cqe_type)
639{
640	struct mbuf *mbuf;
641	struct rcv_queue *rq;
642	int rq_idx;
643	int err = 0;
644
645	rq_idx = cqe_rx->rq_idx;
646	rq = &nic->qs->rq[rq_idx];
647
648	/* Check for errors */
649	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
650	if (err && !cqe_rx->rb_cnt)
651		return (0);
652
653	mbuf = nicvf_get_rcv_mbuf(nic, cqe_rx);
654	if (mbuf == NULL) {
655		dprintf(nic->dev, "Packet not received\n");
656		return (0);
657	}
658
659	/* If error packet */
660	if (err != 0) {
661		m_freem(mbuf);
662		return (0);
663	}
664
665	if (rq->lro_enabled &&
666	    ((cqe_rx->l3_type == L3TYPE_IPV4) && (cqe_rx->l4_type == L4TYPE_TCP)) &&
667	    (mbuf->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
668            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
669		/*
670		 * At this point it is known that there are no errors in the
671		 * packet. Attempt to LRO enqueue. Send to stack if no resources
672		 * or enqueue error.
673		 */
674		if ((rq->lro.lro_cnt != 0) &&
675		    (tcp_lro_rx(&rq->lro, mbuf, 0) == 0))
676			return (0);
677	}
678	/*
679	 * Push this packet to the stack later to avoid
680	 * unlocking completion task in the middle of work.
681	 */
682	err = buf_ring_enqueue(cq->rx_br, mbuf);
683	if (err != 0) {
684		/*
685		 * Failed to enqueue this mbuf.
686		 * We don't drop it, just schedule another task.
687		 */
688		return (err);
689	}
690
691	return (0);
692}
693
694static int
695nicvf_snd_pkt_handler(struct nicvf *nic, struct cmp_queue *cq,
696    struct cqe_send_t *cqe_tx, int cqe_type)
697{
698	bus_dmamap_t dmap;
699	struct mbuf *mbuf;
700	struct snd_queue *sq;
701	struct sq_hdr_subdesc *hdr;
702
703	mbuf = NULL;
704	sq = &nic->qs->sq[cqe_tx->sq_idx];
705	/* Avoid blocking here since we hold a non-sleepable NICVF_CMP_LOCK */
706	if (NICVF_TX_TRYLOCK(sq) == 0)
707		return (EAGAIN);
708
709	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
710	if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
711		NICVF_TX_UNLOCK(sq);
712		return (0);
713	}
714
715	dprintf(nic->dev,
716	    "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
717	    __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
718	    cqe_tx->sqe_ptr, hdr->subdesc_cnt);
719
720	dmap = (bus_dmamap_t)sq->snd_buff[cqe_tx->sqe_ptr].dmap;
721	bus_dmamap_unload(sq->snd_buff_dmat, dmap);
722
723	mbuf = (struct mbuf *)sq->snd_buff[cqe_tx->sqe_ptr].mbuf;
724	if (mbuf != NULL) {
725		m_freem(mbuf);
726		sq->snd_buff[cqe_tx->sqe_ptr].mbuf = NULL;
727		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
728	}
729
730	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
731
732	NICVF_TX_UNLOCK(sq);
733	return (0);
734}
735
736static int
737nicvf_cq_intr_handler(struct nicvf *nic, uint8_t cq_idx)
738{
739	struct mbuf *mbuf;
740	struct ifnet *ifp;
741	int processed_cqe, work_done = 0, tx_done = 0;
742	int cqe_count, cqe_head;
743	struct queue_set *qs = nic->qs;
744	struct cmp_queue *cq = &qs->cq[cq_idx];
745	struct snd_queue *sq = &qs->sq[cq_idx];
746	struct rcv_queue *rq;
747	struct cqe_rx_t *cq_desc;
748	struct lro_ctrl	*lro;
749	struct lro_entry *queued;
750	int rq_idx;
751	int cmp_err;
752
753	NICVF_CMP_LOCK(cq);
754	cmp_err = 0;
755	processed_cqe = 0;
756	/* Get no of valid CQ entries to process */
757	cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
758	cqe_count &= CQ_CQE_COUNT;
759	if (cqe_count == 0)
760		goto out;
761
762	/* Get head of the valid CQ entries */
763	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
764	cqe_head &= 0xFFFF;
765
766	dprintf(nic->dev, "%s CQ%d cqe_count %d cqe_head %d\n",
767	    __func__, cq_idx, cqe_count, cqe_head);
768	while (processed_cqe < cqe_count) {
769		/* Get the CQ descriptor */
770		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
771		cqe_head++;
772		cqe_head &= (cq->dmem.q_len - 1);
773		/* Prefetch next CQ descriptor */
774		__builtin_prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
775
776		dprintf(nic->dev, "CQ%d cq_desc->cqe_type %d\n", cq_idx,
777		    cq_desc->cqe_type);
778		switch (cq_desc->cqe_type) {
779		case CQE_TYPE_RX:
780			cmp_err = nicvf_rcv_pkt_handler(nic, cq, cq_desc,
781			    CQE_TYPE_RX);
782			if (__predict_false(cmp_err != 0)) {
783				/*
784				 * Ups. Cannot finish now.
785				 * Let's try again later.
786				 */
787				goto done;
788			}
789			work_done++;
790			break;
791		case CQE_TYPE_SEND:
792			cmp_err = nicvf_snd_pkt_handler(nic, cq,
793			    (void *)cq_desc, CQE_TYPE_SEND);
794			if (__predict_false(cmp_err != 0)) {
795				/*
796				 * Ups. Cannot finish now.
797				 * Let's try again later.
798				 */
799				goto done;
800			}
801
802			tx_done++;
803			break;
804		case CQE_TYPE_INVALID:
805		case CQE_TYPE_RX_SPLIT:
806		case CQE_TYPE_RX_TCP:
807		case CQE_TYPE_SEND_PTP:
808			/* Ignore for now */
809			break;
810		}
811		processed_cqe++;
812	}
813done:
814	dprintf(nic->dev,
815	    "%s CQ%d processed_cqe %d work_done %d\n",
816	    __func__, cq_idx, processed_cqe, work_done);
817
818	/* Ring doorbell to inform H/W to reuse processed CQEs */
819	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, cq_idx, processed_cqe);
820
821	if ((tx_done > 0) &&
822	    ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0)) {
823		/* Reenable TXQ if its stopped earlier due to SQ full */
824		if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
825		taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);
826	}
827out:
828	/*
829	 * Flush any outstanding LRO work
830	 */
831	rq_idx = cq_idx;
832	rq = &nic->qs->rq[rq_idx];
833	lro = &rq->lro;
834	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
835		SLIST_REMOVE_HEAD(&lro->lro_active, next);
836		tcp_lro_flush(lro, queued);
837	}
838
839	NICVF_CMP_UNLOCK(cq);
840
841	ifp = nic->ifp;
842	/* Push received MBUFs to the stack */
843	while (!buf_ring_empty(cq->rx_br)) {
844		mbuf = buf_ring_dequeue_mc(cq->rx_br);
845		if (__predict_true(mbuf != NULL))
846			(*ifp->if_input)(ifp, mbuf);
847	}
848
849	return (cmp_err);
850}
851
852/*
853 * Qset error interrupt handler
854 *
855 * As of now only CQ errors are handled
856 */
857static void
858nicvf_qs_err_task(void *arg, int pending)
859{
860	struct nicvf *nic;
861	struct queue_set *qs;
862	int qidx;
863	uint64_t status;
864	boolean_t enable = TRUE;
865
866	nic = (struct nicvf *)arg;
867	qs = nic->qs;
868
869	/* Deactivate network interface */
870	if_setdrvflagbits(nic->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
871
872	/* Check if it is CQ err */
873	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
874		status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
875		    qidx);
876		if ((status & CQ_ERR_MASK) == 0)
877			continue;
878		/* Process already queued CQEs and reconfig CQ */
879		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
880		nicvf_sq_disable(nic, qidx);
881		(void)nicvf_cq_intr_handler(nic, qidx);
882		nicvf_cmp_queue_config(nic, qs, qidx, enable);
883		nicvf_sq_free_used_descs(nic, &qs->sq[qidx], qidx);
884		nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
885		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
886	}
887
888	if_setdrvflagbits(nic->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
889	/* Re-enable Qset error interrupt */
890	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
891}
892
893static void
894nicvf_cmp_task(void *arg, int pending)
895{
896	struct cmp_queue *cq;
897	struct nicvf *nic;
898	int cmp_err;
899
900	cq = (struct cmp_queue *)arg;
901	nic = cq->nic;
902
903	/* Handle CQ descriptors */
904	cmp_err = nicvf_cq_intr_handler(nic, cq->idx);
905	if (__predict_false(cmp_err != 0)) {
906		/*
907		 * Schedule another thread here since we did not
908		 * process the entire CQ due to Tx or Rx CQ parse error.
909		 */
910		taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
911
912	}
913
914	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
915	/* Reenable interrupt (previously disabled in nicvf_intr_handler() */
916	nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->idx);
917
918}
919
920/* Initialize completion queue */
921static int
922nicvf_init_cmp_queue(struct nicvf *nic, struct cmp_queue *cq, int q_len,
923    int qidx)
924{
925	int err;
926
927	/* Initizalize lock */
928	snprintf(cq->mtx_name, sizeof(cq->mtx_name), "%s: CQ(%d) lock",
929	    device_get_nameunit(nic->dev), qidx);
930	mtx_init(&cq->mtx, cq->mtx_name, NULL, MTX_DEF);
931
932	err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
933				     NICVF_CQ_BASE_ALIGN_BYTES);
934
935	if (err != 0) {
936		device_printf(nic->dev,
937		    "Could not allocate DMA memory for CQ\n");
938		return (err);
939	}
940
941	cq->desc = cq->dmem.base;
942	cq->thresh = pass1_silicon(nic->dev) ? 0 : CMP_QUEUE_CQE_THRESH;
943	cq->nic = nic;
944	cq->idx = qidx;
945	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
946
947	cq->rx_br = buf_ring_alloc(CMP_QUEUE_LEN * 8, M_DEVBUF, M_WAITOK,
948	    &cq->mtx);
949
950	/* Allocate taskqueue */
951	TASK_INIT(&cq->cmp_task, 0, nicvf_cmp_task, cq);
952	cq->cmp_taskq = taskqueue_create_fast("nicvf_cmp_taskq", M_WAITOK,
953	    taskqueue_thread_enqueue, &cq->cmp_taskq);
954	taskqueue_start_threads(&cq->cmp_taskq, 1, PI_NET, "%s: cmp_taskq(%d)",
955	    device_get_nameunit(nic->dev), qidx);
956
957	return (0);
958}
959
960static void
961nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
962{
963
964	if (cq == NULL)
965		return;
966	/*
967	 * The completion queue itself should be disabled by now
968	 * (ref. nicvf_snd_queue_config()).
969	 * Ensure that it is safe to disable it or panic.
970	 */
971	if (cq->enable)
972		panic("%s: Trying to free working CQ(%d)", __func__, cq->idx);
973
974	if (cq->cmp_taskq != NULL) {
975		/* Remove task */
976		while (taskqueue_cancel(cq->cmp_taskq, &cq->cmp_task, NULL) != 0)
977			taskqueue_drain(cq->cmp_taskq, &cq->cmp_task);
978
979		taskqueue_free(cq->cmp_taskq);
980		cq->cmp_taskq = NULL;
981	}
982	/*
983	 * Completion interrupt will possibly enable interrupts again
984	 * so disable interrupting now after we finished processing
985	 * completion task. It is safe to do so since the corresponding CQ
986	 * was already disabled.
987	 */
988	nicvf_disable_intr(nic, NICVF_INTR_CQ, cq->idx);
989	nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->idx);
990
991	NICVF_CMP_LOCK(cq);
992	nicvf_free_q_desc_mem(nic, &cq->dmem);
993	drbr_free(cq->rx_br, M_DEVBUF);
994	NICVF_CMP_UNLOCK(cq);
995	mtx_destroy(&cq->mtx);
996	memset(cq->mtx_name, 0, sizeof(cq->mtx_name));
997}
998
999int
1000nicvf_xmit_locked(struct snd_queue *sq)
1001{
1002	struct nicvf *nic;
1003	struct ifnet *ifp;
1004	struct mbuf *next;
1005	int err;
1006
1007	NICVF_TX_LOCK_ASSERT(sq);
1008
1009	nic = sq->nic;
1010	ifp = nic->ifp;
1011	err = 0;
1012
1013	while ((next = drbr_peek(ifp, sq->br)) != NULL) {
1014		err = nicvf_tx_mbuf_locked(sq, &next);
1015		if (err != 0) {
1016			if (next == NULL)
1017				drbr_advance(ifp, sq->br);
1018			else
1019				drbr_putback(ifp, sq->br, next);
1020
1021			break;
1022		}
1023		drbr_advance(ifp, sq->br);
1024		/* Send a copy of the frame to the BPF listener */
1025		ETHER_BPF_MTAP(ifp, next);
1026	}
1027	return (err);
1028}
1029
1030static void
1031nicvf_snd_task(void *arg, int pending)
1032{
1033	struct snd_queue *sq = (struct snd_queue *)arg;
1034	struct nicvf *nic;
1035	struct ifnet *ifp;
1036	int err;
1037
1038	nic = sq->nic;
1039	ifp = nic->ifp;
1040
1041	/*
1042	 * Skip sending anything if the driver is not running,
1043	 * SQ full or link is down.
1044	 */
1045	if (((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1046	    IFF_DRV_RUNNING) || !nic->link_up)
1047		return;
1048
1049	NICVF_TX_LOCK(sq);
1050	err = nicvf_xmit_locked(sq);
1051	NICVF_TX_UNLOCK(sq);
1052	/* Try again */
1053	if (err != 0)
1054		taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);
1055}
1056
1057/* Initialize transmit queue */
1058static int
1059nicvf_init_snd_queue(struct nicvf *nic, struct snd_queue *sq, int q_len,
1060    int qidx)
1061{
1062	size_t i;
1063	int err;
1064
1065	/* Initizalize TX lock for this queue */
1066	snprintf(sq->mtx_name, sizeof(sq->mtx_name), "%s: SQ(%d) lock",
1067	    device_get_nameunit(nic->dev), qidx);
1068	mtx_init(&sq->mtx, sq->mtx_name, NULL, MTX_DEF);
1069
1070	NICVF_TX_LOCK(sq);
1071	/* Allocate buffer ring */
1072	sq->br = buf_ring_alloc(q_len / MIN_SQ_DESC_PER_PKT_XMIT, M_DEVBUF,
1073	    M_NOWAIT, &sq->mtx);
1074	if (sq->br == NULL) {
1075		device_printf(nic->dev,
1076		    "ERROR: Could not set up buf ring for SQ(%d)\n", qidx);
1077		err = ENOMEM;
1078		goto error;
1079	}
1080
1081	/* Allocate DMA memory for Tx descriptors */
1082	err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
1083				     NICVF_SQ_BASE_ALIGN_BYTES);
1084	if (err != 0) {
1085		device_printf(nic->dev,
1086		    "Could not allocate DMA memory for SQ\n");
1087		goto error;
1088	}
1089
1090	sq->desc = sq->dmem.base;
1091	sq->head = sq->tail = 0;
1092	sq->free_cnt = q_len - 1;
1093	sq->thresh = SND_QUEUE_THRESH;
1094	sq->idx = qidx;
1095	sq->nic = nic;
1096
1097	/*
1098	 * Allocate DMA maps for Tx buffers
1099	 */
1100
1101	/* Create DMA tag first */
1102	err = bus_dma_tag_create(
1103	    bus_get_dma_tag(nic->dev),		/* parent tag */
1104	    1,					/* alignment */
1105	    0,					/* boundary */
1106	    BUS_SPACE_MAXADDR,			/* lowaddr */
1107	    BUS_SPACE_MAXADDR,			/* highaddr */
1108	    NULL, NULL,				/* filtfunc, filtfuncarg */
1109	    NICVF_TSO_MAXSIZE,			/* maxsize */
1110	    NICVF_TSO_NSEGS,			/* nsegments */
1111	    MCLBYTES,				/* maxsegsize */
1112	    0,					/* flags */
1113	    NULL, NULL,				/* lockfunc, lockfuncarg */
1114	    &sq->snd_buff_dmat);		/* dmat */
1115
1116	if (err != 0) {
1117		device_printf(nic->dev,
1118		    "Failed to create busdma tag for Tx buffers\n");
1119		goto error;
1120	}
1121
1122	/* Allocate send buffers array */
1123	sq->snd_buff = malloc(sizeof(*sq->snd_buff) * q_len, M_NICVF,
1124	    (M_NOWAIT | M_ZERO));
1125	if (sq->snd_buff == NULL) {
1126		device_printf(nic->dev,
1127		    "Could not allocate memory for Tx buffers array\n");
1128		err = ENOMEM;
1129		goto error;
1130	}
1131
1132	/* Now populate maps */
1133	for (i = 0; i < q_len; i++) {
1134		err = bus_dmamap_create(sq->snd_buff_dmat, 0,
1135		    &sq->snd_buff[i].dmap);
1136		if (err != 0) {
1137			device_printf(nic->dev,
1138			    "Failed to create DMA maps for Tx buffers\n");
1139			goto error;
1140		}
1141	}
1142	NICVF_TX_UNLOCK(sq);
1143
1144	/* Allocate taskqueue */
1145	TASK_INIT(&sq->snd_task, 0, nicvf_snd_task, sq);
1146	sq->snd_taskq = taskqueue_create_fast("nicvf_snd_taskq", M_WAITOK,
1147	    taskqueue_thread_enqueue, &sq->snd_taskq);
1148	taskqueue_start_threads(&sq->snd_taskq, 1, PI_NET, "%s: snd_taskq(%d)",
1149	    device_get_nameunit(nic->dev), qidx);
1150
1151	return (0);
1152error:
1153	NICVF_TX_UNLOCK(sq);
1154	return (err);
1155}
1156
1157static void
1158nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
1159{
1160	struct queue_set *qs = nic->qs;
1161	size_t i;
1162	int err;
1163
1164	if (sq == NULL)
1165		return;
1166
1167	if (sq->snd_taskq != NULL) {
1168		/* Remove task */
1169		while (taskqueue_cancel(sq->snd_taskq, &sq->snd_task, NULL) != 0)
1170			taskqueue_drain(sq->snd_taskq, &sq->snd_task);
1171
1172		taskqueue_free(sq->snd_taskq);
1173		sq->snd_taskq = NULL;
1174	}
1175
1176	NICVF_TX_LOCK(sq);
1177	if (sq->snd_buff_dmat != NULL) {
1178		if (sq->snd_buff != NULL) {
1179			for (i = 0; i < qs->sq_len; i++) {
1180				m_freem(sq->snd_buff[i].mbuf);
1181				sq->snd_buff[i].mbuf = NULL;
1182
1183				bus_dmamap_unload(sq->snd_buff_dmat,
1184				    sq->snd_buff[i].dmap);
1185				err = bus_dmamap_destroy(sq->snd_buff_dmat,
1186				    sq->snd_buff[i].dmap);
1187				/*
1188				 * If bus_dmamap_destroy fails it can cause
1189				 * random panic later if the tag is also
1190				 * destroyed in the process.
1191				 */
1192				KASSERT(err == 0,
1193				    ("%s: Could not destroy DMA map for SQ",
1194				    __func__));
1195			}
1196		}
1197
1198		free(sq->snd_buff, M_NICVF);
1199
1200		err = bus_dma_tag_destroy(sq->snd_buff_dmat);
1201		KASSERT(err == 0,
1202		    ("%s: Trying to destroy BUSY DMA tag", __func__));
1203	}
1204
1205	/* Free private driver ring for this send queue */
1206	if (sq->br != NULL)
1207		drbr_free(sq->br, M_DEVBUF);
1208
1209	if (sq->dmem.base != NULL)
1210		nicvf_free_q_desc_mem(nic, &sq->dmem);
1211
1212	NICVF_TX_UNLOCK(sq);
1213	/* Destroy Tx lock */
1214	mtx_destroy(&sq->mtx);
1215	memset(sq->mtx_name, 0, sizeof(sq->mtx_name));
1216}
1217
1218static void
1219nicvf_reclaim_snd_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1220{
1221
1222	/* Disable send queue */
1223	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
1224	/* Check if SQ is stopped */
1225	if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
1226		return;
1227	/* Reset send queue */
1228	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1229}
1230
1231static void
1232nicvf_reclaim_rcv_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1233{
1234	union nic_mbx mbx = {};
1235
1236	/* Make sure all packets in the pipeline are written back into mem */
1237	mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
1238	nicvf_send_msg_to_pf(nic, &mbx);
1239}
1240
1241static void
1242nicvf_reclaim_cmp_queue(struct nicvf *nic, struct queue_set *qs, int qidx)
1243{
1244
1245	/* Disable timer threshold (doesn't get reset upon CQ reset */
1246	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
1247	/* Disable completion queue */
1248	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
1249	/* Reset completion queue */
1250	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1251}
1252
1253static void
1254nicvf_reclaim_rbdr(struct nicvf *nic, struct rbdr *rbdr, int qidx)
1255{
1256	uint64_t tmp, fifo_state;
1257	int timeout = 10;
1258
1259	/* Save head and tail pointers for feeing up buffers */
1260	rbdr->head =
1261	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, qidx) >> 3;
1262	rbdr->tail =
1263	    nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, qidx) >> 3;
1264
1265	/*
1266	 * If RBDR FIFO is in 'FAIL' state then do a reset first
1267	 * before relaiming.
1268	 */
1269	fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
1270	if (((fifo_state >> 62) & 0x03) == 0x3) {
1271		nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
1272		    qidx, NICVF_RBDR_RESET);
1273	}
1274
1275	/* Disable RBDR */
1276	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
1277	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1278		return;
1279	while (1) {
1280		tmp = nicvf_queue_reg_read(nic,
1281		    NIC_QSET_RBDR_0_1_PREFETCH_STATUS, qidx);
1282		if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
1283			break;
1284
1285		DELAY(1000);
1286		timeout--;
1287		if (!timeout) {
1288			device_printf(nic->dev,
1289			    "Failed polling on prefetch status\n");
1290			return;
1291		}
1292	}
1293	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1294	    NICVF_RBDR_RESET);
1295
1296	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
1297		return;
1298	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
1299	if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
1300		return;
1301}
1302
1303/* Configures receive queue */
1304static void
1305nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
1306    int qidx, bool enable)
1307{
1308	union nic_mbx mbx = {};
1309	struct rcv_queue *rq;
1310	struct rq_cfg rq_cfg;
1311	struct ifnet *ifp;
1312	struct lro_ctrl	*lro;
1313
1314	ifp = nic->ifp;
1315
1316	rq = &qs->rq[qidx];
1317	rq->enable = enable;
1318
1319	lro = &rq->lro;
1320
1321	/* Disable receive queue */
1322	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
1323
1324	if (!rq->enable) {
1325		nicvf_reclaim_rcv_queue(nic, qs, qidx);
1326		/* Free LRO memory */
1327		tcp_lro_free(lro);
1328		rq->lro_enabled = FALSE;
1329		return;
1330	}
1331
1332	/* Configure LRO if enabled */
1333	rq->lro_enabled = FALSE;
1334	if ((if_getcapenable(ifp) & IFCAP_LRO) != 0) {
1335		if (tcp_lro_init(lro) != 0) {
1336			device_printf(nic->dev,
1337			    "Failed to initialize LRO for RXQ%d\n", qidx);
1338		} else {
1339			rq->lro_enabled = TRUE;
1340			lro->ifp = nic->ifp;
1341		}
1342	}
1343
1344	rq->cq_qs = qs->vnic_id;
1345	rq->cq_idx = qidx;
1346	rq->start_rbdr_qs = qs->vnic_id;
1347	rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
1348	rq->cont_rbdr_qs = qs->vnic_id;
1349	rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
1350	/* all writes of RBDR data to be loaded into L2 Cache as well*/
1351	rq->caching = 1;
1352
1353	/* Send a mailbox msg to PF to config RQ */
1354	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
1355	mbx.rq.qs_num = qs->vnic_id;
1356	mbx.rq.rq_num = qidx;
1357	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
1358	    (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
1359	    (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) |
1360	    (rq->start_qs_rbdr_idx);
1361	nicvf_send_msg_to_pf(nic, &mbx);
1362
1363	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
1364	mbx.rq.cfg = (1UL << 63) | (1UL << 62) | (qs->vnic_id << 0);
1365	nicvf_send_msg_to_pf(nic, &mbx);
1366
1367	/*
1368	 * RQ drop config
1369	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
1370	 */
1371	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
1372	mbx.rq.cfg = (1UL << 62) | (RQ_CQ_DROP << 8);
1373	nicvf_send_msg_to_pf(nic, &mbx);
1374
1375	nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
1376
1377	/* Enable Receive queue */
1378	rq_cfg.ena = 1;
1379	rq_cfg.tcp_ena = 0;
1380	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx,
1381	    *(uint64_t *)&rq_cfg);
1382}
1383
1384/* Configures completion queue */
1385static void
1386nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
1387    int qidx, boolean_t enable)
1388{
1389	struct cmp_queue *cq;
1390	struct cq_cfg cq_cfg;
1391
1392	cq = &qs->cq[qidx];
1393	cq->enable = enable;
1394
1395	if (!cq->enable) {
1396		nicvf_reclaim_cmp_queue(nic, qs, qidx);
1397		return;
1398	}
1399
1400	/* Reset completion queue */
1401	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
1402
1403	/* Set completion queue base address */
1404	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, qidx,
1405	    (uint64_t)(cq->dmem.phys_base));
1406
1407	/* Enable Completion queue */
1408	cq_cfg.ena = 1;
1409	cq_cfg.reset = 0;
1410	cq_cfg.caching = 0;
1411	cq_cfg.qsize = CMP_QSIZE;
1412	cq_cfg.avg_con = 0;
1413	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(uint64_t *)&cq_cfg);
1414
1415	/* Set threshold value for interrupt generation */
1416	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
1417	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx,
1418	    nic->cq_coalesce_usecs);
1419}
1420
1421/* Configures transmit queue */
1422static void
1423nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1424    boolean_t enable)
1425{
1426	union nic_mbx mbx = {};
1427	struct snd_queue *sq;
1428	struct sq_cfg sq_cfg;
1429
1430	sq = &qs->sq[qidx];
1431	sq->enable = enable;
1432
1433	if (!sq->enable) {
1434		nicvf_reclaim_snd_queue(nic, qs, qidx);
1435		return;
1436	}
1437
1438	/* Reset send queue */
1439	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
1440
1441	sq->cq_qs = qs->vnic_id;
1442	sq->cq_idx = qidx;
1443
1444	/* Send a mailbox msg to PF to config SQ */
1445	mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
1446	mbx.sq.qs_num = qs->vnic_id;
1447	mbx.sq.sq_num = qidx;
1448	mbx.sq.sqs_mode = nic->sqs_mode;
1449	mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
1450	nicvf_send_msg_to_pf(nic, &mbx);
1451
1452	/* Set queue base address */
1453	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, qidx,
1454	    (uint64_t)(sq->dmem.phys_base));
1455
1456	/* Enable send queue  & set queue size */
1457	sq_cfg.ena = 1;
1458	sq_cfg.reset = 0;
1459	sq_cfg.ldwb = 0;
1460	sq_cfg.qsize = SND_QSIZE;
1461	sq_cfg.tstmp_bgx_intf = 0;
1462	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(uint64_t *)&sq_cfg);
1463
1464	/* Set threshold value for interrupt generation */
1465	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
1466}
1467
1468/* Configures receive buffer descriptor ring */
1469static void
1470nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, int qidx,
1471    boolean_t enable)
1472{
1473	struct rbdr *rbdr;
1474	struct rbdr_cfg rbdr_cfg;
1475
1476	rbdr = &qs->rbdr[qidx];
1477	nicvf_reclaim_rbdr(nic, rbdr, qidx);
1478	if (!enable)
1479		return;
1480
1481	/* Set descriptor base address */
1482	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, qidx,
1483	    (uint64_t)(rbdr->dmem.phys_base));
1484
1485	/* Enable RBDR  & set queue size */
1486	/* Buffer size should be in multiples of 128 bytes */
1487	rbdr_cfg.ena = 1;
1488	rbdr_cfg.reset = 0;
1489	rbdr_cfg.ldwb = 0;
1490	rbdr_cfg.qsize = RBDR_SIZE;
1491	rbdr_cfg.avg_con = 0;
1492	rbdr_cfg.lines = rbdr->dma_size / 128;
1493	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx,
1494	    *(uint64_t *)&rbdr_cfg);
1495
1496	/* Notify HW */
1497	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, qidx,
1498	    qs->rbdr_len - 1);
1499
1500	/* Set threshold value for interrupt generation */
1501	nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, qidx,
1502	    rbdr->thresh - 1);
1503}
1504
1505/* Requests PF to assign and enable Qset */
1506void
1507nicvf_qset_config(struct nicvf *nic, boolean_t enable)
1508{
1509	union nic_mbx mbx = {};
1510	struct queue_set *qs;
1511	struct qs_cfg *qs_cfg;
1512
1513	qs = nic->qs;
1514	if (qs == NULL) {
1515		device_printf(nic->dev,
1516		    "Qset is still not allocated, don't init queues\n");
1517		return;
1518	}
1519
1520	qs->enable = enable;
1521	qs->vnic_id = nic->vf_id;
1522
1523	/* Send a mailbox msg to PF to config Qset */
1524	mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
1525	mbx.qs.num = qs->vnic_id;
1526
1527	mbx.qs.cfg = 0;
1528	qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
1529	if (qs->enable) {
1530		qs_cfg->ena = 1;
1531		qs_cfg->vnic = qs->vnic_id;
1532	}
1533	nicvf_send_msg_to_pf(nic, &mbx);
1534}
1535
1536static void
1537nicvf_free_resources(struct nicvf *nic)
1538{
1539	int qidx;
1540	struct queue_set *qs;
1541
1542	qs = nic->qs;
1543	/*
1544	 * Remove QS error task first since it has to be dead
1545	 * to safely free completion queue tasks.
1546	 */
1547	if (qs->qs_err_taskq != NULL) {
1548		/* Shut down QS error tasks */
1549		while (taskqueue_cancel(qs->qs_err_taskq,
1550		    &qs->qs_err_task,  NULL) != 0) {
1551			taskqueue_drain(qs->qs_err_taskq, &qs->qs_err_task);
1552
1553		}
1554		taskqueue_free(qs->qs_err_taskq);
1555		qs->qs_err_taskq = NULL;
1556	}
1557	/* Free receive buffer descriptor ring */
1558	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1559		nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1560
1561	/* Free completion queue */
1562	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1563		nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1564
1565	/* Free send queue */
1566	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1567		nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1568}
1569
1570static int
1571nicvf_alloc_resources(struct nicvf *nic)
1572{
1573	struct queue_set *qs = nic->qs;
1574	int qidx;
1575
1576	/* Alloc receive buffer descriptor ring */
1577	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1578		if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1579				    DMA_BUFFER_LEN, qidx))
1580			goto alloc_fail;
1581	}
1582
1583	/* Alloc send queue */
1584	for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1585		if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1586			goto alloc_fail;
1587	}
1588
1589	/* Alloc completion queue */
1590	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1591		if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len, qidx))
1592			goto alloc_fail;
1593	}
1594
1595	/* Allocate QS error taskqueue */
1596	TASK_INIT(&qs->qs_err_task, 0, nicvf_qs_err_task, nic);
1597	qs->qs_err_taskq = taskqueue_create_fast("nicvf_qs_err_taskq", M_WAITOK,
1598	    taskqueue_thread_enqueue, &qs->qs_err_taskq);
1599	taskqueue_start_threads(&qs->qs_err_taskq, 1, PI_NET, "%s: qs_taskq",
1600	    device_get_nameunit(nic->dev));
1601
1602	return (0);
1603alloc_fail:
1604	nicvf_free_resources(nic);
1605	return (ENOMEM);
1606}
1607
1608int
1609nicvf_set_qset_resources(struct nicvf *nic)
1610{
1611	struct queue_set *qs;
1612
1613	qs = malloc(sizeof(*qs), M_NICVF, (M_ZERO | M_WAITOK));
1614	nic->qs = qs;
1615
1616	/* Set count of each queue */
1617	qs->rbdr_cnt = RBDR_CNT;
1618	/* With no RSS we stay with single RQ */
1619	qs->rq_cnt = 1;
1620
1621	qs->sq_cnt = SND_QUEUE_CNT;
1622	qs->cq_cnt = CMP_QUEUE_CNT;
1623
1624	/* Set queue lengths */
1625	qs->rbdr_len = RCV_BUF_COUNT;
1626	qs->sq_len = SND_QUEUE_LEN;
1627	qs->cq_len = CMP_QUEUE_LEN;
1628
1629	nic->rx_queues = qs->rq_cnt;
1630	nic->tx_queues = qs->sq_cnt;
1631
1632	return (0);
1633}
1634
1635int
1636nicvf_config_data_transfer(struct nicvf *nic, boolean_t enable)
1637{
1638	boolean_t disable = FALSE;
1639	struct queue_set *qs;
1640	int qidx;
1641
1642	qs = nic->qs;
1643	if (qs == NULL)
1644		return (0);
1645
1646	if (enable) {
1647		if (nicvf_alloc_resources(nic) != 0)
1648			return (ENOMEM);
1649
1650		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1651			nicvf_snd_queue_config(nic, qs, qidx, enable);
1652		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1653			nicvf_cmp_queue_config(nic, qs, qidx, enable);
1654		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1655			nicvf_rbdr_config(nic, qs, qidx, enable);
1656		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1657			nicvf_rcv_queue_config(nic, qs, qidx, enable);
1658	} else {
1659		for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1660			nicvf_rcv_queue_config(nic, qs, qidx, disable);
1661		for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1662			nicvf_rbdr_config(nic, qs, qidx, disable);
1663		for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1664			nicvf_snd_queue_config(nic, qs, qidx, disable);
1665		for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1666			nicvf_cmp_queue_config(nic, qs, qidx, disable);
1667
1668		nicvf_free_resources(nic);
1669	}
1670
1671	return (0);
1672}
1673
1674/*
1675 * Get a free desc from SQ
1676 * returns descriptor ponter & descriptor number
1677 */
1678static __inline int
1679nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1680{
1681	int qentry;
1682
1683	qentry = sq->tail;
1684	sq->free_cnt -= desc_cnt;
1685	sq->tail += desc_cnt;
1686	sq->tail &= (sq->dmem.q_len - 1);
1687
1688	return (qentry);
1689}
1690
1691/* Free descriptor back to SQ for future use */
1692static void
1693nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1694{
1695
1696	sq->free_cnt += desc_cnt;
1697	sq->head += desc_cnt;
1698	sq->head &= (sq->dmem.q_len - 1);
1699}
1700
1701static __inline int
1702nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1703{
1704	qentry++;
1705	qentry &= (sq->dmem.q_len - 1);
1706	return (qentry);
1707}
1708
1709static void
1710nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1711{
1712	uint64_t sq_cfg;
1713
1714	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1715	sq_cfg |= NICVF_SQ_EN;
1716	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1717	/* Ring doorbell so that H/W restarts processing SQEs */
1718	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1719}
1720
1721static void
1722nicvf_sq_disable(struct nicvf *nic, int qidx)
1723{
1724	uint64_t sq_cfg;
1725
1726	sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1727	sq_cfg &= ~NICVF_SQ_EN;
1728	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1729}
1730
1731static void
1732nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx)
1733{
1734	uint64_t head, tail;
1735	struct snd_buff *snd_buff;
1736	struct sq_hdr_subdesc *hdr;
1737
1738	NICVF_TX_LOCK(sq);
1739	head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1740	tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1741	while (sq->head != head) {
1742		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1743		if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1744			nicvf_put_sq_desc(sq, 1);
1745			continue;
1746		}
1747		snd_buff = &sq->snd_buff[sq->head];
1748		if (snd_buff->mbuf != NULL) {
1749			bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1750			m_freem(snd_buff->mbuf);
1751			sq->snd_buff[sq->head].mbuf = NULL;
1752		}
1753		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1754	}
1755	NICVF_TX_UNLOCK(sq);
1756}
1757
1758/*
1759 * Add SQ HEADER subdescriptor.
1760 * First subdescriptor for every send descriptor.
1761 */
1762static __inline int
1763nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1764			 int subdesc_cnt, struct mbuf *mbuf, int len)
1765{
1766	struct nicvf *nic;
1767	struct sq_hdr_subdesc *hdr;
1768	struct ether_vlan_header *eh;
1769#ifdef INET
1770	struct ip *ip;
1771	struct tcphdr *th;
1772#endif
1773	uint16_t etype;
1774	int ehdrlen, iphlen, poff;
1775
1776	nic = sq->nic;
1777
1778	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1779	sq->snd_buff[qentry].mbuf = mbuf;
1780
1781	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1782	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1783	/* Enable notification via CQE after processing SQE */
1784	hdr->post_cqe = 1;
1785	/* No of subdescriptors following this */
1786	hdr->subdesc_cnt = subdesc_cnt;
1787	hdr->tot_len = len;
1788
1789	eh = mtod(mbuf, struct ether_vlan_header *);
1790	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1791		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1792		etype = ntohs(eh->evl_proto);
1793	} else {
1794		ehdrlen = ETHER_HDR_LEN;
1795		etype = ntohs(eh->evl_encap_proto);
1796	}
1797
1798	switch (etype) {
1799#ifdef INET6
1800	case ETHERTYPE_IPV6:
1801		/* ARM64TODO: Add support for IPv6 */
1802		hdr->csum_l3 = 0;
1803		sq->snd_buff[qentry].mbuf = NULL;
1804		return (ENXIO);
1805#endif
1806#ifdef INET
1807	case ETHERTYPE_IP:
1808		if (mbuf->m_len < ehdrlen + sizeof(struct ip)) {
1809			mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip));
1810			sq->snd_buff[qentry].mbuf = mbuf;
1811			if (mbuf == NULL)
1812				return (ENOBUFS);
1813		}
1814
1815		ip = (struct ip *)(mbuf->m_data + ehdrlen);
1816		iphlen = ip->ip_hl << 2;
1817		poff = ehdrlen + iphlen;
1818
1819		if (mbuf->m_pkthdr.csum_flags != 0) {
1820			hdr->csum_l3 = 1; /* Enable IP csum calculation */
1821			switch (ip->ip_p) {
1822			case IPPROTO_TCP:
1823				if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0)
1824					break;
1825
1826				if (mbuf->m_len < (poff + sizeof(struct tcphdr))) {
1827					mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr));
1828					sq->snd_buff[qentry].mbuf = mbuf;
1829					if (mbuf == NULL)
1830						return (ENOBUFS);
1831				}
1832				hdr->csum_l4 = SEND_L4_CSUM_TCP;
1833				break;
1834			case IPPROTO_UDP:
1835				if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0)
1836					break;
1837
1838				if (mbuf->m_len < (poff + sizeof(struct udphdr))) {
1839					mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr));
1840					sq->snd_buff[qentry].mbuf = mbuf;
1841					if (mbuf == NULL)
1842						return (ENOBUFS);
1843				}
1844				hdr->csum_l4 = SEND_L4_CSUM_UDP;
1845				break;
1846			case IPPROTO_SCTP:
1847				if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0)
1848					break;
1849
1850				if (mbuf->m_len < (poff + sizeof(struct sctphdr))) {
1851					mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr));
1852					sq->snd_buff[qentry].mbuf = mbuf;
1853					if (mbuf == NULL)
1854						return (ENOBUFS);
1855				}
1856				hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1857				break;
1858			default:
1859				break;
1860			}
1861			hdr->l3_offset = ehdrlen;
1862			hdr->l4_offset = ehdrlen + iphlen;
1863		}
1864
1865		if ((mbuf->m_pkthdr.tso_segsz != 0) && nic->hw_tso) {
1866			/*
1867			 * Extract ip again as m_data could have been modified.
1868			 */
1869			ip = (struct ip *)(mbuf->m_data + ehdrlen);
1870			th = (struct tcphdr *)((caddr_t)ip + iphlen);
1871
1872			hdr->tso = 1;
1873			hdr->tso_start = ehdrlen + iphlen + (th->th_off * 4);
1874			hdr->tso_max_paysize = mbuf->m_pkthdr.tso_segsz;
1875			hdr->inner_l3_offset = ehdrlen - 2;
1876			nic->drv_stats.tx_tso++;
1877		}
1878		break;
1879#endif
1880	default:
1881		hdr->csum_l3 = 0;
1882	}
1883
1884	return (0);
1885}
1886
1887/*
1888 * SQ GATHER subdescriptor
1889 * Must follow HDR descriptor
1890 */
1891static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1892					       int size, uint64_t data)
1893{
1894	struct sq_gather_subdesc *gather;
1895
1896	qentry &= (sq->dmem.q_len - 1);
1897	gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1898
1899	memset(gather, 0, SND_QUEUE_DESC_SIZE);
1900	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1901	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1902	gather->size = size;
1903	gather->addr = data;
1904}
1905
1906/* Put an mbuf to a SQ for packet transfer. */
1907static int
1908nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf **mbufp)
1909{
1910	bus_dma_segment_t segs[256];
1911	struct nicvf *nic;
1912	struct snd_buff *snd_buff;
1913	size_t seg;
1914	int nsegs, qentry;
1915	int subdesc_cnt;
1916	int err;
1917
1918	NICVF_TX_LOCK_ASSERT(sq);
1919
1920	if (sq->free_cnt == 0)
1921		return (ENOBUFS);
1922
1923	snd_buff = &sq->snd_buff[sq->tail];
1924
1925	err = bus_dmamap_load_mbuf_sg(sq->snd_buff_dmat, snd_buff->dmap,
1926	    *mbufp, segs, &nsegs, BUS_DMA_NOWAIT);
1927	if (__predict_false(err != 0)) {
1928		/* ARM64TODO: Add mbuf defragmenting if we lack maps */
1929		m_freem(*mbufp);
1930		*mbufp = NULL;
1931		return (err);
1932	}
1933
1934	/* Set how many subdescriptors is required */
1935	nic = sq->nic;
1936	if ((*mbufp)->m_pkthdr.tso_segsz != 0 && nic->hw_tso)
1937		subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1938	else
1939		subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT + nsegs - 1;
1940
1941	if (subdesc_cnt > sq->free_cnt) {
1942		/* ARM64TODO: Add mbuf defragmentation if we lack descriptors */
1943		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1944		return (ENOBUFS);
1945	}
1946
1947	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1948
1949	/* Add SQ header subdesc */
1950	err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, *mbufp,
1951	    (*mbufp)->m_pkthdr.len);
1952	if (err != 0) {
1953		nicvf_put_sq_desc(sq, subdesc_cnt);
1954		bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
1955		if (err == ENOBUFS) {
1956			m_freem(*mbufp);
1957			*mbufp = NULL;
1958		}
1959		return (err);
1960	}
1961
1962	/* Add SQ gather subdescs */
1963	for (seg = 0; seg < nsegs; seg++) {
1964		qentry = nicvf_get_nxt_sqentry(sq, qentry);
1965		nicvf_sq_add_gather_subdesc(sq, qentry, segs[seg].ds_len,
1966		    segs[seg].ds_addr);
1967	}
1968
1969	/* make sure all memory stores are done before ringing doorbell */
1970	bus_dmamap_sync(sq->dmem.dmat, sq->dmem.dmap, BUS_DMASYNC_PREWRITE);
1971
1972	dprintf(sq->nic->dev, "%s: sq->idx: %d, subdesc_cnt: %d\n",
1973	    __func__, sq->idx, subdesc_cnt);
1974	/* Inform HW to xmit new packet */
1975	nicvf_queue_reg_write(sq->nic, NIC_QSET_SQ_0_7_DOOR,
1976	    sq->idx, subdesc_cnt);
1977	return (0);
1978}
1979
1980static __inline u_int
1981frag_num(u_int i)
1982{
1983#if BYTE_ORDER == BIG_ENDIAN
1984	return ((i & ~3) + 3 - (i & 3));
1985#else
1986	return (i);
1987#endif
1988}
1989
1990/* Returns MBUF for a received packet */
1991struct mbuf *
1992nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1993{
1994	int frag;
1995	int payload_len = 0;
1996	struct mbuf *mbuf;
1997	struct mbuf *mbuf_frag;
1998	uint16_t *rb_lens = NULL;
1999	uint64_t *rb_ptrs = NULL;
2000
2001	mbuf = NULL;
2002	rb_lens = (uint16_t *)((uint8_t *)cqe_rx + (3 * sizeof(uint64_t)));
2003	rb_ptrs = (uint64_t *)((uint8_t *)cqe_rx + (6 * sizeof(uint64_t)));
2004
2005	dprintf(nic->dev, "%s rb_cnt %d rb0_ptr %lx rb0_sz %d\n",
2006	    __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
2007
2008	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
2009		payload_len = rb_lens[frag_num(frag)];
2010		if (frag == 0) {
2011			/* First fragment */
2012			mbuf = nicvf_rb_ptr_to_mbuf(nic,
2013			    (*rb_ptrs - cqe_rx->align_pad));
2014			mbuf->m_len = payload_len;
2015			mbuf->m_data += cqe_rx->align_pad;
2016			if_setrcvif(mbuf, nic->ifp);
2017		} else {
2018			/* Add fragments */
2019			mbuf_frag = nicvf_rb_ptr_to_mbuf(nic, *rb_ptrs);
2020			m_append(mbuf, payload_len, mbuf_frag->m_data);
2021			m_freem(mbuf_frag);
2022		}
2023		/* Next buffer pointer */
2024		rb_ptrs++;
2025	}
2026
2027	if (__predict_true(mbuf != NULL)) {
2028		m_fixhdr(mbuf);
2029		mbuf->m_pkthdr.flowid = cqe_rx->rq_idx;
2030		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE);
2031		if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) {
2032			/*
2033			 * HW by default verifies IP & TCP/UDP/SCTP checksums
2034			 */
2035			if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4)) {
2036				mbuf->m_pkthdr.csum_flags =
2037				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
2038			}
2039
2040			switch (cqe_rx->l4_type) {
2041			case L4TYPE_UDP:
2042			case L4TYPE_TCP: /* fall through */
2043				mbuf->m_pkthdr.csum_flags |=
2044				    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2045				mbuf->m_pkthdr.csum_data = 0xffff;
2046				break;
2047			case L4TYPE_SCTP:
2048				mbuf->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
2049				break;
2050			default:
2051				break;
2052			}
2053		}
2054	}
2055
2056	return (mbuf);
2057}
2058
2059/* Enable interrupt */
2060void
2061nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
2062{
2063	uint64_t reg_val;
2064
2065	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2066
2067	switch (int_type) {
2068	case NICVF_INTR_CQ:
2069		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2070		break;
2071	case NICVF_INTR_SQ:
2072		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2073		break;
2074	case NICVF_INTR_RBDR:
2075		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2076		break;
2077	case NICVF_INTR_PKT_DROP:
2078		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2079		break;
2080	case NICVF_INTR_TCP_TIMER:
2081		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2082		break;
2083	case NICVF_INTR_MBOX:
2084		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2085		break;
2086	case NICVF_INTR_QS_ERR:
2087		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2088		break;
2089	default:
2090		device_printf(nic->dev,
2091			   "Failed to enable interrupt: unknown type\n");
2092		break;
2093	}
2094
2095	nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
2096}
2097
2098/* Disable interrupt */
2099void
2100nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
2101{
2102	uint64_t reg_val = 0;
2103
2104	switch (int_type) {
2105	case NICVF_INTR_CQ:
2106		reg_val |= ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2107		break;
2108	case NICVF_INTR_SQ:
2109		reg_val |= ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2110		break;
2111	case NICVF_INTR_RBDR:
2112		reg_val |= ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2113		break;
2114	case NICVF_INTR_PKT_DROP:
2115		reg_val |= (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2116		break;
2117	case NICVF_INTR_TCP_TIMER:
2118		reg_val |= (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2119		break;
2120	case NICVF_INTR_MBOX:
2121		reg_val |= (1UL << NICVF_INTR_MBOX_SHIFT);
2122		break;
2123	case NICVF_INTR_QS_ERR:
2124		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2125		break;
2126	default:
2127		device_printf(nic->dev,
2128			   "Failed to disable interrupt: unknown type\n");
2129		break;
2130	}
2131
2132	nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
2133}
2134
2135/* Clear interrupt */
2136void
2137nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
2138{
2139	uint64_t reg_val = 0;
2140
2141	switch (int_type) {
2142	case NICVF_INTR_CQ:
2143		reg_val = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2144		break;
2145	case NICVF_INTR_SQ:
2146		reg_val = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2147		break;
2148	case NICVF_INTR_RBDR:
2149		reg_val = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2150		break;
2151	case NICVF_INTR_PKT_DROP:
2152		reg_val = (1UL << NICVF_INTR_PKT_DROP_SHIFT);
2153		break;
2154	case NICVF_INTR_TCP_TIMER:
2155		reg_val = (1UL << NICVF_INTR_TCP_TIMER_SHIFT);
2156		break;
2157	case NICVF_INTR_MBOX:
2158		reg_val = (1UL << NICVF_INTR_MBOX_SHIFT);
2159		break;
2160	case NICVF_INTR_QS_ERR:
2161		reg_val |= (1UL << NICVF_INTR_QS_ERR_SHIFT);
2162		break;
2163	default:
2164		device_printf(nic->dev,
2165			   "Failed to clear interrupt: unknown type\n");
2166		break;
2167	}
2168
2169	nicvf_reg_write(nic, NIC_VF_INT, reg_val);
2170}
2171
2172/* Check if interrupt is enabled */
2173int
2174nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
2175{
2176	uint64_t reg_val;
2177	uint64_t mask = 0xff;
2178
2179	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
2180
2181	switch (int_type) {
2182	case NICVF_INTR_CQ:
2183		mask = ((1UL << q_idx) << NICVF_INTR_CQ_SHIFT);
2184		break;
2185	case NICVF_INTR_SQ:
2186		mask = ((1UL << q_idx) << NICVF_INTR_SQ_SHIFT);
2187		break;
2188	case NICVF_INTR_RBDR:
2189		mask = ((1UL << q_idx) << NICVF_INTR_RBDR_SHIFT);
2190		break;
2191	case NICVF_INTR_PKT_DROP:
2192		mask = NICVF_INTR_PKT_DROP_MASK;
2193		break;
2194	case NICVF_INTR_TCP_TIMER:
2195		mask = NICVF_INTR_TCP_TIMER_MASK;
2196		break;
2197	case NICVF_INTR_MBOX:
2198		mask = NICVF_INTR_MBOX_MASK;
2199		break;
2200	case NICVF_INTR_QS_ERR:
2201		mask = NICVF_INTR_QS_ERR_MASK;
2202		break;
2203	default:
2204		device_printf(nic->dev,
2205			   "Failed to check interrupt enable: unknown type\n");
2206		break;
2207	}
2208
2209	return (reg_val & mask);
2210}
2211
2212void
2213nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
2214{
2215	struct rcv_queue *rq;
2216
2217#define GET_RQ_STATS(reg) \
2218	nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
2219			    (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2220
2221	rq = &nic->qs->rq[rq_idx];
2222	rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
2223	rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
2224}
2225
2226void
2227nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
2228{
2229	struct snd_queue *sq;
2230
2231#define GET_SQ_STATS(reg) \
2232	nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
2233			    (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
2234
2235	sq = &nic->qs->sq[sq_idx];
2236	sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
2237	sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
2238}
2239
2240/* Check for errors in the receive cmp.queue entry */
2241int
2242nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cmp_queue *cq,
2243    struct cqe_rx_t *cqe_rx)
2244{
2245	struct nicvf_hw_stats *stats = &nic->hw_stats;
2246	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
2247
2248	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
2249		drv_stats->rx_frames_ok++;
2250		return (0);
2251	}
2252
2253	switch (cqe_rx->err_opcode) {
2254	case CQ_RX_ERROP_RE_PARTIAL:
2255		stats->rx_bgx_truncated_pkts++;
2256		break;
2257	case CQ_RX_ERROP_RE_JABBER:
2258		stats->rx_jabber_errs++;
2259		break;
2260	case CQ_RX_ERROP_RE_FCS:
2261		stats->rx_fcs_errs++;
2262		break;
2263	case CQ_RX_ERROP_RE_RX_CTL:
2264		stats->rx_bgx_errs++;
2265		break;
2266	case CQ_RX_ERROP_PREL2_ERR:
2267		stats->rx_prel2_errs++;
2268		break;
2269	case CQ_RX_ERROP_L2_MAL:
2270		stats->rx_l2_hdr_malformed++;
2271		break;
2272	case CQ_RX_ERROP_L2_OVERSIZE:
2273		stats->rx_oversize++;
2274		break;
2275	case CQ_RX_ERROP_L2_UNDERSIZE:
2276		stats->rx_undersize++;
2277		break;
2278	case CQ_RX_ERROP_L2_LENMISM:
2279		stats->rx_l2_len_mismatch++;
2280		break;
2281	case CQ_RX_ERROP_L2_PCLP:
2282		stats->rx_l2_pclp++;
2283		break;
2284	case CQ_RX_ERROP_IP_NOT:
2285		stats->rx_ip_ver_errs++;
2286		break;
2287	case CQ_RX_ERROP_IP_CSUM_ERR:
2288		stats->rx_ip_csum_errs++;
2289		break;
2290	case CQ_RX_ERROP_IP_MAL:
2291		stats->rx_ip_hdr_malformed++;
2292		break;
2293	case CQ_RX_ERROP_IP_MALD:
2294		stats->rx_ip_payload_malformed++;
2295		break;
2296	case CQ_RX_ERROP_IP_HOP:
2297		stats->rx_ip_ttl_errs++;
2298		break;
2299	case CQ_RX_ERROP_L3_PCLP:
2300		stats->rx_l3_pclp++;
2301		break;
2302	case CQ_RX_ERROP_L4_MAL:
2303		stats->rx_l4_malformed++;
2304		break;
2305	case CQ_RX_ERROP_L4_CHK:
2306		stats->rx_l4_csum_errs++;
2307		break;
2308	case CQ_RX_ERROP_UDP_LEN:
2309		stats->rx_udp_len_errs++;
2310		break;
2311	case CQ_RX_ERROP_L4_PORT:
2312		stats->rx_l4_port_errs++;
2313		break;
2314	case CQ_RX_ERROP_TCP_FLAG:
2315		stats->rx_tcp_flag_errs++;
2316		break;
2317	case CQ_RX_ERROP_TCP_OFFSET:
2318		stats->rx_tcp_offset_errs++;
2319		break;
2320	case CQ_RX_ERROP_L4_PCLP:
2321		stats->rx_l4_pclp++;
2322		break;
2323	case CQ_RX_ERROP_RBDR_TRUNC:
2324		stats->rx_truncated_pkts++;
2325		break;
2326	}
2327
2328	return (1);
2329}
2330
2331/* Check for errors in the send cmp.queue entry */
2332int
2333nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cmp_queue *cq,
2334    struct cqe_send_t *cqe_tx)
2335{
2336	struct cmp_queue_stats *stats = &cq->stats;
2337
2338	switch (cqe_tx->send_status) {
2339	case CQ_TX_ERROP_GOOD:
2340		stats->tx.good++;
2341		return (0);
2342	case CQ_TX_ERROP_DESC_FAULT:
2343		stats->tx.desc_fault++;
2344		break;
2345	case CQ_TX_ERROP_HDR_CONS_ERR:
2346		stats->tx.hdr_cons_err++;
2347		break;
2348	case CQ_TX_ERROP_SUBDC_ERR:
2349		stats->tx.subdesc_err++;
2350		break;
2351	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
2352		stats->tx.imm_size_oflow++;
2353		break;
2354	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
2355		stats->tx.data_seq_err++;
2356		break;
2357	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
2358		stats->tx.mem_seq_err++;
2359		break;
2360	case CQ_TX_ERROP_LOCK_VIOL:
2361		stats->tx.lock_viol++;
2362		break;
2363	case CQ_TX_ERROP_DATA_FAULT:
2364		stats->tx.data_fault++;
2365		break;
2366	case CQ_TX_ERROP_TSTMP_CONFLICT:
2367		stats->tx.tstmp_conflict++;
2368		break;
2369	case CQ_TX_ERROP_TSTMP_TIMEOUT:
2370		stats->tx.tstmp_timeout++;
2371		break;
2372	case CQ_TX_ERROP_MEM_FAULT:
2373		stats->tx.mem_fault++;
2374		break;
2375	case CQ_TX_ERROP_CK_OVERLAP:
2376		stats->tx.csum_overlap++;
2377		break;
2378	case CQ_TX_ERROP_CK_OFLOW:
2379		stats->tx.csum_overflow++;
2380		break;
2381	}
2382
2383	return (1);
2384}
2385