ena.c revision 361467
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/11/sys/dev/ena/ena.c 361467 2020-05-25 17:41:20Z mw $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/bus.h>
36#include <sys/endian.h>
37#include <sys/kernel.h>
38#include <sys/kthread.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/module.h>
42#include <sys/rman.h>
43#include <sys/smp.h>
44#include <sys/socket.h>
45#include <sys/sockio.h>
46#include <sys/sysctl.h>
47#include <sys/taskqueue.h>
48#include <sys/time.h>
49#include <sys/eventhandler.h>
50
51#include <machine/bus.h>
52#include <machine/resource.h>
53#include <machine/in_cksum.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_var.h>
59#include <net/if_arp.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_types.h>
63#include <net/if_vlan_var.h>
64
65#include <netinet/in_systm.h>
66#include <netinet/in.h>
67#include <netinet/if_ether.h>
68#include <netinet/ip.h>
69#include <netinet/ip6.h>
70#include <netinet/tcp.h>
71#include <netinet/udp.h>
72
73#include <dev/pci/pcivar.h>
74#include <dev/pci/pcireg.h>
75
76#include <vm/vm.h>
77#include <vm/pmap.h>
78
79#include "ena.h"
80#include "ena_sysctl.h"
81
82/*********************************************************
83 *  Function prototypes
84 *********************************************************/
85static int	ena_probe(device_t);
86static void	ena_intr_msix_mgmnt(void *);
87static void	ena_free_pci_resources(struct ena_adapter *);
88static int	ena_change_mtu(if_t, int);
89static inline void ena_alloc_counters(counter_u64_t *, int);
90static inline void ena_free_counters(counter_u64_t *, int);
91static inline void ena_reset_counters(counter_u64_t *, int);
92static void	ena_init_io_rings_common(struct ena_adapter *,
93    struct ena_ring *, uint16_t);
94static void	ena_init_io_rings(struct ena_adapter *);
95static void	ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
96static void	ena_free_all_io_rings_resources(struct ena_adapter *);
97static int	ena_setup_tx_dma_tag(struct ena_adapter *);
98static int	ena_free_tx_dma_tag(struct ena_adapter *);
99static int	ena_setup_rx_dma_tag(struct ena_adapter *);
100static int	ena_free_rx_dma_tag(struct ena_adapter *);
101static int	ena_setup_tx_resources(struct ena_adapter *, int);
102static void	ena_free_tx_resources(struct ena_adapter *, int);
103static int	ena_setup_all_tx_resources(struct ena_adapter *);
104static void	ena_free_all_tx_resources(struct ena_adapter *);
105static inline int validate_rx_req_id(struct ena_ring *, uint16_t);
106static int	ena_setup_rx_resources(struct ena_adapter *, unsigned int);
107static void	ena_free_rx_resources(struct ena_adapter *, unsigned int);
108static int	ena_setup_all_rx_resources(struct ena_adapter *);
109static void	ena_free_all_rx_resources(struct ena_adapter *);
110static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
111    struct ena_rx_buffer *);
112static void	ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
113    struct ena_rx_buffer *);
114static int	ena_refill_rx_bufs(struct ena_ring *, uint32_t);
115static void	ena_free_rx_bufs(struct ena_adapter *, unsigned int);
116static void	ena_refill_all_rx_bufs(struct ena_adapter *);
117static void	ena_free_all_rx_bufs(struct ena_adapter *);
118static void	ena_free_tx_bufs(struct ena_adapter *, unsigned int);
119static void	ena_free_all_tx_bufs(struct ena_adapter *);
120static void	ena_destroy_all_tx_queues(struct ena_adapter *);
121static void	ena_destroy_all_rx_queues(struct ena_adapter *);
122static void	ena_destroy_all_io_queues(struct ena_adapter *);
123static int	ena_create_io_queues(struct ena_adapter *);
124static int	ena_tx_cleanup(struct ena_ring *);
125static int	ena_rx_cleanup(struct ena_ring *);
126static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
127static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
128    struct mbuf *);
129static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
130    struct ena_com_rx_ctx *, uint16_t *);
131static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
132    struct mbuf *);
133static void	ena_cleanup(void *arg, int pending);
134static int	ena_handle_msix(void *);
135static int	ena_enable_msix(struct ena_adapter *);
136static void	ena_setup_mgmnt_intr(struct ena_adapter *);
137static int	ena_setup_io_intr(struct ena_adapter *);
138static int	ena_request_mgmnt_irq(struct ena_adapter *);
139static int	ena_request_io_irq(struct ena_adapter *);
140static void	ena_free_mgmnt_irq(struct ena_adapter *);
141static void	ena_free_io_irq(struct ena_adapter *);
142static void	ena_free_irqs(struct ena_adapter*);
143static void	ena_disable_msix(struct ena_adapter *);
144static void	ena_unmask_all_io_irqs(struct ena_adapter *);
145static int	ena_rss_configure(struct ena_adapter *);
146static int	ena_up_complete(struct ena_adapter *);
147static int	ena_up(struct ena_adapter *);
148static void	ena_down(struct ena_adapter *);
149static uint64_t	ena_get_counter(if_t, ift_counter);
150static int	ena_media_change(if_t);
151static void	ena_media_status(if_t, struct ifmediareq *);
152static void	ena_init(void *);
153static int	ena_ioctl(if_t, u_long, caddr_t);
154static int	ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
155static void	ena_update_host_info(struct ena_admin_host_info *, if_t);
156static void	ena_update_hwassist(struct ena_adapter *);
157static int	ena_setup_ifnet(device_t, struct ena_adapter *,
158    struct ena_com_dev_get_features_ctx *);
159static void	ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
160static int	ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
161    struct mbuf **mbuf);
162static void	ena_dmamap_llq(void *, bus_dma_segment_t *, int, int);
163static int	ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
164static void	ena_start_xmit(struct ena_ring *);
165static int	ena_mq_start(if_t, struct mbuf *);
166static void	ena_deferred_mq_start(void *, int);
167static void	ena_qflush(if_t);
168static int	ena_enable_wc(struct resource *);
169static int	ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
170    struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
171static int	ena_calc_io_queue_num(struct ena_adapter *,
172    struct ena_com_dev_get_features_ctx *);
173static int	ena_calc_queue_size(struct ena_adapter *,
174    struct ena_calc_queue_size_ctx *);
175static int	ena_handle_updated_queues(struct ena_adapter *,
176    struct ena_com_dev_get_features_ctx *);
177static int	ena_rss_init_default(struct ena_adapter *);
178static void	ena_rss_init_default_deferred(void *);
179static void	ena_config_host_info(struct ena_com_dev *, device_t);
180static int	ena_attach(device_t);
181static int	ena_detach(device_t);
182static int	ena_device_init(struct ena_adapter *, device_t,
183    struct ena_com_dev_get_features_ctx *, int *);
184static int	ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
185    int);
186static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
187static void	unimplemented_aenq_handler(void *,
188    struct ena_admin_aenq_entry *);
189static void	ena_timer_service(void *);
190
191static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
192
193static ena_vendor_info_t ena_vendor_info_array[] = {
194    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
195    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
196    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
197    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
198    /* Last entry */
199    { 0, 0, 0 }
200};
201
202/*
203 * Contains pointers to event handlers, e.g. link state chage.
204 */
205static struct ena_aenq_handlers aenq_handlers;
206
207void
208ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
209{
210	if (error != 0)
211		return;
212	*(bus_addr_t *) arg = segs[0].ds_addr;
213}
214
215int
216ena_dma_alloc(device_t dmadev, bus_size_t size,
217    ena_mem_handle_t *dma , int mapflags)
218{
219	struct ena_adapter* adapter = device_get_softc(dmadev);
220	uint32_t maxsize;
221	uint64_t dma_space_addr;
222	int error;
223
224	maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
225
226	dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
227	if (unlikely(dma_space_addr == 0))
228		dma_space_addr = BUS_SPACE_MAXADDR;
229
230	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
231	    8, 0,	      /* alignment, bounds 		*/
232	    dma_space_addr,   /* lowaddr of exclusion window	*/
233	    BUS_SPACE_MAXADDR,/* highaddr of exclusion window	*/
234	    NULL, NULL,	      /* filter, filterarg 		*/
235	    maxsize,	      /* maxsize 			*/
236	    1,		      /* nsegments 			*/
237	    maxsize,	      /* maxsegsize 			*/
238	    BUS_DMA_ALLOCNOW, /* flags 				*/
239	    NULL,	      /* lockfunc 			*/
240	    NULL,	      /* lockarg 			*/
241	    &dma->tag);
242	if (unlikely(error != 0)) {
243		ena_trace(ENA_ALERT, "bus_dma_tag_create failed: %d\n", error);
244		goto fail_tag;
245	}
246
247	error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
248	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
249	if (unlikely(error != 0)) {
250		ena_trace(ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
251		    (uintmax_t)size, error);
252		goto fail_map_create;
253	}
254
255	dma->paddr = 0;
256	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
257	    size, ena_dmamap_callback, &dma->paddr, mapflags);
258	if (unlikely((error != 0) || (dma->paddr == 0))) {
259		ena_trace(ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
260		goto fail_map_load;
261	}
262
263	bus_dmamap_sync(dma->tag, dma->map,
264	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
265
266	return (0);
267
268fail_map_load:
269	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
270fail_map_create:
271	bus_dma_tag_destroy(dma->tag);
272fail_tag:
273	dma->tag = NULL;
274	dma->vaddr = NULL;
275	dma->paddr = 0;
276
277	return (error);
278}
279
280static void
281ena_free_pci_resources(struct ena_adapter *adapter)
282{
283	device_t pdev = adapter->pdev;
284
285	if (adapter->memory != NULL) {
286		bus_release_resource(pdev, SYS_RES_MEMORY,
287		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
288	}
289
290	if (adapter->registers != NULL) {
291		bus_release_resource(pdev, SYS_RES_MEMORY,
292		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
293	}
294}
295
296static int
297ena_probe(device_t dev)
298{
299	ena_vendor_info_t *ent;
300	char		adapter_name[60];
301	uint16_t	pci_vendor_id = 0;
302	uint16_t	pci_device_id = 0;
303
304	pci_vendor_id = pci_get_vendor(dev);
305	pci_device_id = pci_get_device(dev);
306
307	ent = ena_vendor_info_array;
308	while (ent->vendor_id != 0) {
309		if ((pci_vendor_id == ent->vendor_id) &&
310		    (pci_device_id == ent->device_id)) {
311			ena_trace(ENA_DBG, "vendor=%x device=%x\n",
312			    pci_vendor_id, pci_device_id);
313
314			sprintf(adapter_name, DEVICE_DESC);
315			device_set_desc_copy(dev, adapter_name);
316			return (BUS_PROBE_DEFAULT);
317		}
318
319		ent++;
320
321	}
322
323	return (ENXIO);
324}
325
326static int
327ena_change_mtu(if_t ifp, int new_mtu)
328{
329	struct ena_adapter *adapter = if_getsoftc(ifp);
330	int rc;
331
332	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
333		device_printf(adapter->pdev, "Invalid MTU setting. "
334		    "new_mtu: %d max mtu: %d min mtu: %d\n",
335		    new_mtu, adapter->max_mtu, ENA_MIN_MTU);
336		return (EINVAL);
337	}
338
339	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
340	if (likely(rc == 0)) {
341		ena_trace(ENA_DBG, "set MTU to %d\n", new_mtu);
342		if_setmtu(ifp, new_mtu);
343	} else {
344		device_printf(adapter->pdev, "Failed to set MTU to %d\n",
345		    new_mtu);
346	}
347
348	return (rc);
349}
350
351static inline void
352ena_alloc_counters(counter_u64_t *begin, int size)
353{
354	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
355
356	for (; begin < end; ++begin)
357		*begin = counter_u64_alloc(M_WAITOK);
358}
359
360static inline void
361ena_free_counters(counter_u64_t *begin, int size)
362{
363	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
364
365	for (; begin < end; ++begin)
366		counter_u64_free(*begin);
367}
368
369static inline void
370ena_reset_counters(counter_u64_t *begin, int size)
371{
372	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
373
374	for (; begin < end; ++begin)
375		counter_u64_zero(*begin);
376}
377
378static void
379ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
380    uint16_t qid)
381{
382
383	ring->qid = qid;
384	ring->adapter = adapter;
385	ring->ena_dev = adapter->ena_dev;
386	ring->first_interrupt = false;
387	ring->no_interrupt_event_cnt = 0;
388}
389
390static void
391ena_init_io_rings(struct ena_adapter *adapter)
392{
393	struct ena_com_dev *ena_dev;
394	struct ena_ring *txr, *rxr;
395	struct ena_que *que;
396	int i;
397
398	ena_dev = adapter->ena_dev;
399
400	for (i = 0; i < adapter->num_queues; i++) {
401		txr = &adapter->tx_ring[i];
402		rxr = &adapter->rx_ring[i];
403
404		/* TX/RX common ring state */
405		ena_init_io_rings_common(adapter, txr, i);
406		ena_init_io_rings_common(adapter, rxr, i);
407
408		/* TX specific ring state */
409		txr->ring_size = adapter->tx_ring_size;
410		txr->tx_max_header_size = ena_dev->tx_max_header_size;
411		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
412		txr->smoothed_interval =
413		    ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
414
415		/* Allocate a buf ring */
416		txr->buf_ring_size = adapter->buf_ring_size;
417		txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
418		    M_WAITOK, &txr->ring_mtx);
419
420		/* Alloc TX statistics. */
421		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
422		    sizeof(txr->tx_stats));
423
424		/* RX specific ring state */
425		rxr->ring_size = adapter->rx_ring_size;
426		rxr->smoothed_interval =
427		    ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
428
429		/* Alloc RX statistics. */
430		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
431		    sizeof(rxr->rx_stats));
432
433		/* Initialize locks */
434		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
435		    device_get_nameunit(adapter->pdev), i);
436		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
437		    device_get_nameunit(adapter->pdev), i);
438
439		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
440
441		que = &adapter->que[i];
442		que->adapter = adapter;
443		que->id = i;
444		que->tx_ring = txr;
445		que->rx_ring = rxr;
446
447		txr->que = que;
448		rxr->que = que;
449
450		rxr->empty_rx_queue = 0;
451	}
452}
453
454static void
455ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
456{
457	struct ena_ring *txr = &adapter->tx_ring[qid];
458	struct ena_ring *rxr = &adapter->rx_ring[qid];
459
460	ena_free_counters((counter_u64_t *)&txr->tx_stats,
461	    sizeof(txr->tx_stats));
462	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
463	    sizeof(rxr->rx_stats));
464
465	ENA_RING_MTX_LOCK(txr);
466	drbr_free(txr->br, M_DEVBUF);
467	ENA_RING_MTX_UNLOCK(txr);
468
469	mtx_destroy(&txr->ring_mtx);
470}
471
472static void
473ena_free_all_io_rings_resources(struct ena_adapter *adapter)
474{
475	int i;
476
477	for (i = 0; i < adapter->num_queues; i++)
478		ena_free_io_ring_resources(adapter, i);
479
480}
481
482static int
483ena_setup_tx_dma_tag(struct ena_adapter *adapter)
484{
485	int ret;
486
487	/* Create DMA tag for Tx buffers */
488	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
489	    1, 0,				  /* alignment, bounds 	     */
490	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
491	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
492	    NULL, NULL,				  /* filter, filterarg 	     */
493	    ENA_TSO_MAXSIZE,			  /* maxsize 		     */
494	    adapter->max_tx_sgl_size - 1,	  /* nsegments 		     */
495	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	     */
496	    0,					  /* flags 		     */
497	    NULL,				  /* lockfunc 		     */
498	    NULL,				  /* lockfuncarg 	     */
499	    &adapter->tx_buf_tag);
500
501	return (ret);
502}
503
504static int
505ena_free_tx_dma_tag(struct ena_adapter *adapter)
506{
507	int ret;
508
509	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
510
511	if (likely(ret == 0))
512		adapter->tx_buf_tag = NULL;
513
514	return (ret);
515}
516
517static int
518ena_setup_rx_dma_tag(struct ena_adapter *adapter)
519{
520	int ret;
521
522	/* Create DMA tag for Rx buffers*/
523	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
524	    1, 0,				  /* alignment, bounds 	     */
525	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
526	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
527	    NULL, NULL,				  /* filter, filterarg 	     */
528	    MJUM16BYTES,			  /* maxsize 		     */
529	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
530	    MJUM16BYTES,			  /* maxsegsize 	     */
531	    0,					  /* flags 		     */
532	    NULL,				  /* lockfunc 		     */
533	    NULL,				  /* lockarg 		     */
534	    &adapter->rx_buf_tag);
535
536	return (ret);
537}
538
539static int
540ena_free_rx_dma_tag(struct ena_adapter *adapter)
541{
542	int ret;
543
544	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
545
546	if (likely(ret == 0))
547		adapter->rx_buf_tag = NULL;
548
549	return (ret);
550}
551
552/**
553 * ena_setup_tx_resources - allocate Tx resources (Descriptors)
554 * @adapter: network interface device structure
555 * @qid: queue index
556 *
557 * Returns 0 on success, otherwise on failure.
558 **/
559static int
560ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
561{
562	struct ena_que *que = &adapter->que[qid];
563	struct ena_ring *tx_ring = que->tx_ring;
564	int size, i, err;
565
566	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
567
568	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
569	if (unlikely(tx_ring->tx_buffer_info == NULL))
570		return (ENOMEM);
571
572	size = sizeof(uint16_t) * tx_ring->ring_size;
573	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
574	if (unlikely(tx_ring->free_tx_ids == NULL))
575		goto err_buf_info_free;
576
577	size = tx_ring->tx_max_header_size;
578	tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
579	    M_NOWAIT | M_ZERO);
580	if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
581		goto err_tx_ids_free;
582
583	/* Req id stack for TX OOO completions */
584	for (i = 0; i < tx_ring->ring_size; i++)
585		tx_ring->free_tx_ids[i] = i;
586
587	/* Reset TX statistics. */
588	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
589	    sizeof(tx_ring->tx_stats));
590
591	tx_ring->next_to_use = 0;
592	tx_ring->next_to_clean = 0;
593	tx_ring->acum_pkts = 0;
594
595	/* Make sure that drbr is empty */
596	ENA_RING_MTX_LOCK(tx_ring);
597	drbr_flush(adapter->ifp, tx_ring->br);
598	ENA_RING_MTX_UNLOCK(tx_ring);
599
600	/* ... and create the buffer DMA maps */
601	for (i = 0; i < tx_ring->ring_size; i++) {
602		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
603		    &tx_ring->tx_buffer_info[i].map_head);
604		if (unlikely(err != 0)) {
605			ena_trace(ENA_ALERT,
606			    "Unable to create Tx DMA map_head for buffer %d\n",
607			    i);
608			goto err_buf_info_unmap;
609		}
610		tx_ring->tx_buffer_info[i].seg_mapped = false;
611
612		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
613		    &tx_ring->tx_buffer_info[i].map_seg);
614		if (unlikely(err != 0)) {
615			ena_trace(ENA_ALERT,
616			    "Unable to create Tx DMA map_seg for buffer %d\n",
617			    i);
618			goto err_buf_info_head_unmap;
619		}
620		tx_ring->tx_buffer_info[i].head_mapped = false;
621	}
622
623	/* Allocate taskqueues */
624	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
625	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
626	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
627	if (unlikely(tx_ring->enqueue_tq == NULL)) {
628		ena_trace(ENA_ALERT,
629		    "Unable to create taskqueue for enqueue task\n");
630		i = tx_ring->ring_size;
631		goto err_buf_info_unmap;
632	}
633
634	tx_ring->running = true;
635
636	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
637	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
638
639	return (0);
640
641err_buf_info_head_unmap:
642	bus_dmamap_destroy(adapter->tx_buf_tag,
643	    tx_ring->tx_buffer_info[i].map_head);
644err_buf_info_unmap:
645	while (i--) {
646		bus_dmamap_destroy(adapter->tx_buf_tag,
647		    tx_ring->tx_buffer_info[i].map_head);
648		bus_dmamap_destroy(adapter->tx_buf_tag,
649		    tx_ring->tx_buffer_info[i].map_seg);
650	}
651	free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
652err_tx_ids_free:
653	free(tx_ring->free_tx_ids, M_DEVBUF);
654	tx_ring->free_tx_ids = NULL;
655err_buf_info_free:
656	free(tx_ring->tx_buffer_info, M_DEVBUF);
657	tx_ring->tx_buffer_info = NULL;
658
659	return (ENOMEM);
660}
661
662/**
663 * ena_free_tx_resources - Free Tx Resources per Queue
664 * @adapter: network interface device structure
665 * @qid: queue index
666 *
667 * Free all transmit software resources
668 **/
669static void
670ena_free_tx_resources(struct ena_adapter *adapter, int qid)
671{
672	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
673
674	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
675	    NULL))
676		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
677
678	taskqueue_free(tx_ring->enqueue_tq);
679
680	ENA_RING_MTX_LOCK(tx_ring);
681	/* Flush buffer ring, */
682	drbr_flush(adapter->ifp, tx_ring->br);
683
684	/* Free buffer DMA maps, */
685	for (int i = 0; i < tx_ring->ring_size; i++) {
686		if (tx_ring->tx_buffer_info[i].head_mapped == true) {
687			bus_dmamap_sync(adapter->tx_buf_tag,
688			    tx_ring->tx_buffer_info[i].map_head,
689			    BUS_DMASYNC_POSTWRITE);
690			bus_dmamap_unload(adapter->tx_buf_tag,
691			    tx_ring->tx_buffer_info[i].map_head);
692			tx_ring->tx_buffer_info[i].head_mapped = false;
693		}
694		bus_dmamap_destroy(adapter->tx_buf_tag,
695		    tx_ring->tx_buffer_info[i].map_head);
696
697		if (tx_ring->tx_buffer_info[i].seg_mapped == true) {
698			bus_dmamap_sync(adapter->tx_buf_tag,
699			    tx_ring->tx_buffer_info[i].map_seg,
700			    BUS_DMASYNC_POSTWRITE);
701			bus_dmamap_unload(adapter->tx_buf_tag,
702			    tx_ring->tx_buffer_info[i].map_seg);
703			tx_ring->tx_buffer_info[i].seg_mapped = false;
704		}
705		bus_dmamap_destroy(adapter->tx_buf_tag,
706		    tx_ring->tx_buffer_info[i].map_seg);
707
708		m_freem(tx_ring->tx_buffer_info[i].mbuf);
709		tx_ring->tx_buffer_info[i].mbuf = NULL;
710	}
711	ENA_RING_MTX_UNLOCK(tx_ring);
712
713	/* And free allocated memory. */
714	free(tx_ring->tx_buffer_info, M_DEVBUF);
715	tx_ring->tx_buffer_info = NULL;
716
717	free(tx_ring->free_tx_ids, M_DEVBUF);
718	tx_ring->free_tx_ids = NULL;
719
720	ENA_MEM_FREE(adapter->ena_dev->dmadev,
721	    tx_ring->push_buf_intermediate_buf);
722	tx_ring->push_buf_intermediate_buf = NULL;
723}
724
725/**
726 * ena_setup_all_tx_resources - allocate all queues Tx resources
727 * @adapter: network interface device structure
728 *
729 * Returns 0 on success, otherwise on failure.
730 **/
731static int
732ena_setup_all_tx_resources(struct ena_adapter *adapter)
733{
734	int i, rc;
735
736	for (i = 0; i < adapter->num_queues; i++) {
737		rc = ena_setup_tx_resources(adapter, i);
738		if (rc != 0) {
739			device_printf(adapter->pdev,
740			    "Allocation for Tx Queue %u failed\n", i);
741			goto err_setup_tx;
742		}
743	}
744
745	return (0);
746
747err_setup_tx:
748	/* Rewind the index freeing the rings as we go */
749	while (i--)
750		ena_free_tx_resources(adapter, i);
751	return (rc);
752}
753
754/**
755 * ena_free_all_tx_resources - Free Tx Resources for All Queues
756 * @adapter: network interface device structure
757 *
758 * Free all transmit software resources
759 **/
760static void
761ena_free_all_tx_resources(struct ena_adapter *adapter)
762{
763	int i;
764
765	for (i = 0; i < adapter->num_queues; i++)
766		ena_free_tx_resources(adapter, i);
767}
768
769static inline int
770validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
771{
772	if (likely(req_id < rx_ring->ring_size))
773		return (0);
774
775	device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
776	    req_id);
777	counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
778
779	/* Trigger device reset */
780	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter))) {
781		rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
782		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter);
783	}
784
785	return (EFAULT);
786}
787
788/**
789 * ena_setup_rx_resources - allocate Rx resources (Descriptors)
790 * @adapter: network interface device structure
791 * @qid: queue index
792 *
793 * Returns 0 on success, otherwise on failure.
794 **/
795static int
796ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
797{
798	struct ena_que *que = &adapter->que[qid];
799	struct ena_ring *rx_ring = que->rx_ring;
800	int size, err, i;
801
802	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
803
804	/*
805	 * Alloc extra element so in rx path
806	 * we can always prefetch rx_info + 1
807	 */
808	size += sizeof(struct ena_rx_buffer);
809
810	rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
811
812	size = sizeof(uint16_t) * rx_ring->ring_size;
813	rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
814
815	for (i = 0; i < rx_ring->ring_size; i++)
816		rx_ring->free_rx_ids[i] = i;
817
818	/* Reset RX statistics. */
819	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
820	    sizeof(rx_ring->rx_stats));
821
822	rx_ring->next_to_clean = 0;
823	rx_ring->next_to_use = 0;
824
825	/* ... and create the buffer DMA maps */
826	for (i = 0; i < rx_ring->ring_size; i++) {
827		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
828		    &(rx_ring->rx_buffer_info[i].map));
829		if (err != 0) {
830			ena_trace(ENA_ALERT,
831			    "Unable to create Rx DMA map for buffer %d\n", i);
832			goto err_buf_info_unmap;
833		}
834	}
835
836	/* Create LRO for the ring */
837	if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
838		int err = tcp_lro_init(&rx_ring->lro);
839		if (err != 0) {
840			device_printf(adapter->pdev,
841			    "LRO[%d] Initialization failed!\n", qid);
842		} else {
843			ena_trace(ENA_INFO,
844			    "RX Soft LRO[%d] Initialized\n", qid);
845			rx_ring->lro.ifp = adapter->ifp;
846		}
847	}
848
849	return (0);
850
851err_buf_info_unmap:
852	while (i--) {
853		bus_dmamap_destroy(adapter->rx_buf_tag,
854		    rx_ring->rx_buffer_info[i].map);
855	}
856
857	free(rx_ring->free_rx_ids, M_DEVBUF);
858	rx_ring->free_rx_ids = NULL;
859	free(rx_ring->rx_buffer_info, M_DEVBUF);
860	rx_ring->rx_buffer_info = NULL;
861	return (ENOMEM);
862}
863
864/**
865 * ena_free_rx_resources - Free Rx Resources
866 * @adapter: network interface device structure
867 * @qid: queue index
868 *
869 * Free all receive software resources
870 **/
871static void
872ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
873{
874	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
875
876	/* Free buffer DMA maps, */
877	for (int i = 0; i < rx_ring->ring_size; i++) {
878		bus_dmamap_sync(adapter->rx_buf_tag,
879		    rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
880		m_freem(rx_ring->rx_buffer_info[i].mbuf);
881		rx_ring->rx_buffer_info[i].mbuf = NULL;
882		bus_dmamap_unload(adapter->rx_buf_tag,
883		    rx_ring->rx_buffer_info[i].map);
884		bus_dmamap_destroy(adapter->rx_buf_tag,
885		    rx_ring->rx_buffer_info[i].map);
886	}
887
888	/* free LRO resources, */
889	tcp_lro_free(&rx_ring->lro);
890
891	/* free allocated memory */
892	free(rx_ring->rx_buffer_info, M_DEVBUF);
893	rx_ring->rx_buffer_info = NULL;
894
895	free(rx_ring->free_rx_ids, M_DEVBUF);
896	rx_ring->free_rx_ids = NULL;
897}
898
899/**
900 * ena_setup_all_rx_resources - allocate all queues Rx resources
901 * @adapter: network interface device structure
902 *
903 * Returns 0 on success, otherwise on failure.
904 **/
905static int
906ena_setup_all_rx_resources(struct ena_adapter *adapter)
907{
908	int i, rc = 0;
909
910	for (i = 0; i < adapter->num_queues; i++) {
911		rc = ena_setup_rx_resources(adapter, i);
912		if (rc != 0) {
913			device_printf(adapter->pdev,
914			    "Allocation for Rx Queue %u failed\n", i);
915			goto err_setup_rx;
916		}
917	}
918	return (0);
919
920err_setup_rx:
921	/* rewind the index freeing the rings as we go */
922	while (i--)
923		ena_free_rx_resources(adapter, i);
924	return (rc);
925}
926
927/**
928 * ena_free_all_rx_resources - Free Rx resources for all queues
929 * @adapter: network interface device structure
930 *
931 * Free all receive software resources
932 **/
933static void
934ena_free_all_rx_resources(struct ena_adapter *adapter)
935{
936	int i;
937
938	for (i = 0; i < adapter->num_queues; i++)
939		ena_free_rx_resources(adapter, i);
940}
941
942static inline int
943ena_alloc_rx_mbuf(struct ena_adapter *adapter,
944    struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
945{
946	struct ena_com_buf *ena_buf;
947	bus_dma_segment_t segs[1];
948	int nsegs, error;
949	int mlen;
950
951	/* if previous allocated frag is not used */
952	if (unlikely(rx_info->mbuf != NULL))
953		return (0);
954
955	/* Get mbuf using UMA allocator */
956	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
957
958	if (unlikely(rx_info->mbuf == NULL)) {
959		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
960		rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
961		if (unlikely(rx_info->mbuf == NULL)) {
962			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
963			return (ENOMEM);
964		}
965		mlen = MCLBYTES;
966	} else {
967		mlen = MJUM16BYTES;
968	}
969	/* Set mbuf length*/
970	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
971
972	/* Map packets for DMA */
973	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
974	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
975	    adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
976	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
977	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
978	if (unlikely((error != 0) || (nsegs != 1))) {
979		ena_trace(ENA_WARNING, "failed to map mbuf, error: %d, "
980		    "nsegs: %d\n", error, nsegs);
981		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
982		goto exit;
983
984	}
985
986	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
987
988	ena_buf = &rx_info->ena_buf;
989	ena_buf->paddr = segs[0].ds_addr;
990	ena_buf->len = mlen;
991
992	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
993	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
994	    rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
995
996	return (0);
997
998exit:
999	m_freem(rx_info->mbuf);
1000	rx_info->mbuf = NULL;
1001	return (EFAULT);
1002}
1003
1004static void
1005ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1006    struct ena_rx_buffer *rx_info)
1007{
1008
1009	if (rx_info->mbuf == NULL) {
1010		ena_trace(ENA_WARNING, "Trying to free unallocated buffer\n");
1011		return;
1012	}
1013
1014	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1015	    BUS_DMASYNC_POSTREAD);
1016	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1017	m_freem(rx_info->mbuf);
1018	rx_info->mbuf = NULL;
1019}
1020
1021/**
1022 * ena_refill_rx_bufs - Refills ring with descriptors
1023 * @rx_ring: the ring which we want to feed with free descriptors
1024 * @num: number of descriptors to refill
1025 * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1026 **/
1027static int
1028ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1029{
1030	struct ena_adapter *adapter = rx_ring->adapter;
1031	uint16_t next_to_use, req_id;
1032	uint32_t i;
1033	int rc;
1034
1035	ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d\n",
1036	    rx_ring->qid);
1037
1038	next_to_use = rx_ring->next_to_use;
1039
1040	for (i = 0; i < num; i++) {
1041		struct ena_rx_buffer *rx_info;
1042
1043		ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
1044		    "RX buffer - next to use: %d\n", next_to_use);
1045
1046		req_id = rx_ring->free_rx_ids[next_to_use];
1047		rx_info = &rx_ring->rx_buffer_info[req_id];
1048
1049		rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1050		if (unlikely(rc != 0)) {
1051			ena_trace(ENA_WARNING,
1052			    "failed to alloc buffer for rx queue %d\n",
1053			    rx_ring->qid);
1054			break;
1055		}
1056		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1057		    &rx_info->ena_buf, req_id);
1058		if (unlikely(rc != 0)) {
1059			ena_trace(ENA_WARNING,
1060			    "failed to add buffer for rx queue %d\n",
1061			    rx_ring->qid);
1062			break;
1063		}
1064		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1065		    rx_ring->ring_size);
1066	}
1067
1068	if (unlikely(i < num)) {
1069		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1070		ena_trace(ENA_WARNING,
1071		     "refilled rx qid %d with only %d mbufs (from %d)\n",
1072		     rx_ring->qid, i, num);
1073	}
1074
1075	if (likely(i != 0)) {
1076		wmb();
1077		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1078	}
1079	rx_ring->next_to_use = next_to_use;
1080	return (i);
1081}
1082
1083static void
1084ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1085{
1086	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1087	unsigned int i;
1088
1089	for (i = 0; i < rx_ring->ring_size; i++) {
1090		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1091
1092		if (rx_info->mbuf != NULL)
1093			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1094	}
1095}
1096
1097/**
1098 * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1099 * @adapter: network interface device structure
1100 *
1101 */
1102static void
1103ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1104{
1105	struct ena_ring *rx_ring;
1106	int i, rc, bufs_num;
1107
1108	for (i = 0; i < adapter->num_queues; i++) {
1109		rx_ring = &adapter->rx_ring[i];
1110		bufs_num = rx_ring->ring_size - 1;
1111		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1112
1113		if (unlikely(rc != bufs_num))
1114			ena_trace(ENA_WARNING, "refilling Queue %d failed. "
1115			    "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1116	}
1117}
1118
1119static void
1120ena_free_all_rx_bufs(struct ena_adapter *adapter)
1121{
1122	int i;
1123
1124	for (i = 0; i < adapter->num_queues; i++)
1125		ena_free_rx_bufs(adapter, i);
1126}
1127
1128/**
1129 * ena_free_tx_bufs - Free Tx Buffers per Queue
1130 * @adapter: network interface device structure
1131 * @qid: queue index
1132 **/
1133static void
1134ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1135{
1136	bool print_once = true;
1137	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1138
1139	ENA_RING_MTX_LOCK(tx_ring);
1140	for (int i = 0; i < tx_ring->ring_size; i++) {
1141		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1142
1143		if (tx_info->mbuf == NULL)
1144			continue;
1145
1146		if (print_once) {
1147			device_printf(adapter->pdev,
1148			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1149			    qid, i);
1150			print_once = false;
1151		} else {
1152			ena_trace(ENA_DBG,
1153			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1154			     qid, i);
1155		}
1156
1157		if (tx_info->head_mapped == true) {
1158			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_head,
1159			    BUS_DMASYNC_POSTWRITE);
1160			bus_dmamap_unload(adapter->tx_buf_tag,
1161			    tx_info->map_head);
1162			tx_info->head_mapped = false;
1163		}
1164
1165		if (tx_info->seg_mapped == true) {
1166			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_seg,
1167			    BUS_DMASYNC_POSTWRITE);
1168			bus_dmamap_unload(adapter->tx_buf_tag,
1169			    tx_info->map_seg);
1170			tx_info->seg_mapped = false;
1171		}
1172
1173		m_free(tx_info->mbuf);
1174		tx_info->mbuf = NULL;
1175	}
1176	ENA_RING_MTX_UNLOCK(tx_ring);
1177}
1178
1179static void
1180ena_free_all_tx_bufs(struct ena_adapter *adapter)
1181{
1182
1183	for (int i = 0; i < adapter->num_queues; i++)
1184		ena_free_tx_bufs(adapter, i);
1185}
1186
1187static void
1188ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1189{
1190	uint16_t ena_qid;
1191	int i;
1192
1193	for (i = 0; i < adapter->num_queues; i++) {
1194		ena_qid = ENA_IO_TXQ_IDX(i);
1195		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1196	}
1197}
1198
1199static void
1200ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1201{
1202	uint16_t ena_qid;
1203	int i;
1204
1205	for (i = 0; i < adapter->num_queues; i++) {
1206		ena_qid = ENA_IO_RXQ_IDX(i);
1207		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1208	}
1209}
1210
1211static void
1212ena_destroy_all_io_queues(struct ena_adapter *adapter)
1213{
1214	struct ena_que *queue;
1215	int i;
1216
1217	for (i = 0; i < adapter->num_queues; i++) {
1218		queue = &adapter->que[i];
1219		while (taskqueue_cancel(queue->cleanup_tq,
1220		    &queue->cleanup_task, NULL))
1221			taskqueue_drain(queue->cleanup_tq,
1222			    &queue->cleanup_task);
1223		taskqueue_free(queue->cleanup_tq);
1224	}
1225
1226	ena_destroy_all_tx_queues(adapter);
1227	ena_destroy_all_rx_queues(adapter);
1228}
1229
1230static inline int
1231validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
1232{
1233	struct ena_adapter *adapter = tx_ring->adapter;
1234	struct ena_tx_buffer *tx_info = NULL;
1235
1236	if (likely(req_id < tx_ring->ring_size)) {
1237		tx_info = &tx_ring->tx_buffer_info[req_id];
1238		if (tx_info->mbuf != NULL)
1239			return (0);
1240		device_printf(adapter->pdev,
1241		    "tx_info doesn't have valid mbuf\n");
1242	}
1243
1244	device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
1245	counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
1246
1247	/* Trigger device reset */
1248	adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
1249	ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
1250
1251	return (EFAULT);
1252}
1253
1254static int
1255ena_create_io_queues(struct ena_adapter *adapter)
1256{
1257	struct ena_com_dev *ena_dev = adapter->ena_dev;
1258	struct ena_com_create_io_ctx ctx;
1259	struct ena_ring *ring;
1260	struct ena_que *queue;
1261	uint16_t ena_qid;
1262	uint32_t msix_vector;
1263	int rc, i;
1264
1265	/* Create TX queues */
1266	for (i = 0; i < adapter->num_queues; i++) {
1267		msix_vector = ENA_IO_IRQ_IDX(i);
1268		ena_qid = ENA_IO_TXQ_IDX(i);
1269		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1270		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1271		ctx.queue_size = adapter->tx_ring_size;
1272		ctx.msix_vector = msix_vector;
1273		ctx.qid = ena_qid;
1274		rc = ena_com_create_io_queue(ena_dev, &ctx);
1275		if (rc != 0) {
1276			device_printf(adapter->pdev,
1277			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1278			goto err_tx;
1279		}
1280		ring = &adapter->tx_ring[i];
1281		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1282		    &ring->ena_com_io_sq,
1283		    &ring->ena_com_io_cq);
1284		if (rc != 0) {
1285			device_printf(adapter->pdev,
1286			    "Failed to get TX queue handlers. TX queue num"
1287			    " %d rc: %d\n", i, rc);
1288			ena_com_destroy_io_queue(ena_dev, ena_qid);
1289			goto err_tx;
1290		}
1291	}
1292
1293	/* Create RX queues */
1294	for (i = 0; i < adapter->num_queues; i++) {
1295		msix_vector = ENA_IO_IRQ_IDX(i);
1296		ena_qid = ENA_IO_RXQ_IDX(i);
1297		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1298		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1299		ctx.queue_size = adapter->rx_ring_size;
1300		ctx.msix_vector = msix_vector;
1301		ctx.qid = ena_qid;
1302		rc = ena_com_create_io_queue(ena_dev, &ctx);
1303		if (unlikely(rc != 0)) {
1304			device_printf(adapter->pdev,
1305			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1306			goto err_rx;
1307		}
1308
1309		ring = &adapter->rx_ring[i];
1310		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1311		    &ring->ena_com_io_sq,
1312		    &ring->ena_com_io_cq);
1313		if (unlikely(rc != 0)) {
1314			device_printf(adapter->pdev,
1315			    "Failed to get RX queue handlers. RX queue num"
1316			    " %d rc: %d\n", i, rc);
1317			ena_com_destroy_io_queue(ena_dev, ena_qid);
1318			goto err_rx;
1319		}
1320	}
1321
1322	for (i = 0; i < adapter->num_queues; i++) {
1323		queue = &adapter->que[i];
1324
1325		TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1326		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1327		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1328
1329		taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
1330		    "%s queue %d cleanup",
1331		    device_get_nameunit(adapter->pdev), i);
1332	}
1333
1334	return (0);
1335
1336err_rx:
1337	while (i--)
1338		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1339	i = adapter->num_queues;
1340err_tx:
1341	while (i--)
1342		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1343
1344	return (ENXIO);
1345}
1346
1347/**
1348 * ena_tx_cleanup - clear sent packets and corresponding descriptors
1349 * @tx_ring: ring for which we want to clean packets
1350 *
1351 * Once packets are sent, we ask the device in a loop for no longer used
1352 * descriptors. We find the related mbuf chain in a map (index in an array)
1353 * and free it, then update ring state.
1354 * This is performed in "endless" loop, updating ring pointers every
1355 * TX_COMMIT. The first check of free descriptor is performed before the actual
1356 * loop, then repeated at the loop end.
1357 **/
1358static int
1359ena_tx_cleanup(struct ena_ring *tx_ring)
1360{
1361	struct ena_adapter *adapter;
1362	struct ena_com_io_cq* io_cq;
1363	uint16_t next_to_clean;
1364	uint16_t req_id;
1365	uint16_t ena_qid;
1366	unsigned int total_done = 0;
1367	int rc;
1368	int commit = TX_COMMIT;
1369	int budget = TX_BUDGET;
1370	int work_done;
1371	bool above_thresh;
1372
1373	adapter = tx_ring->que->adapter;
1374	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1375	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1376	next_to_clean = tx_ring->next_to_clean;
1377
1378	do {
1379		struct ena_tx_buffer *tx_info;
1380		struct mbuf *mbuf;
1381
1382		rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
1383		if (unlikely(rc != 0))
1384			break;
1385
1386		rc = validate_tx_req_id(tx_ring, req_id);
1387		if (unlikely(rc != 0))
1388			break;
1389
1390		tx_info = &tx_ring->tx_buffer_info[req_id];
1391
1392		mbuf = tx_info->mbuf;
1393
1394		tx_info->mbuf = NULL;
1395		bintime_clear(&tx_info->timestamp);
1396
1397		/* Map is no longer required */
1398		if (tx_info->head_mapped == true) {
1399			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_head,
1400			    BUS_DMASYNC_POSTWRITE);
1401			bus_dmamap_unload(adapter->tx_buf_tag,
1402			    tx_info->map_head);
1403			tx_info->head_mapped = false;
1404		}
1405		if (tx_info->seg_mapped == true) {
1406			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_seg,
1407			    BUS_DMASYNC_POSTWRITE);
1408			bus_dmamap_unload(adapter->tx_buf_tag,
1409			    tx_info->map_seg);
1410			tx_info->seg_mapped = false;
1411		}
1412
1413		ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n",
1414		    tx_ring->qid, mbuf);
1415
1416		m_freem(mbuf);
1417
1418		total_done += tx_info->tx_descs;
1419
1420		tx_ring->free_tx_ids[next_to_clean] = req_id;
1421		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1422		    tx_ring->ring_size);
1423
1424		if (unlikely(--commit == 0)) {
1425			commit = TX_COMMIT;
1426			/* update ring state every TX_COMMIT descriptor */
1427			tx_ring->next_to_clean = next_to_clean;
1428			ena_com_comp_ack(
1429			    &adapter->ena_dev->io_sq_queues[ena_qid],
1430			    total_done);
1431			ena_com_update_dev_comp_head(io_cq);
1432			total_done = 0;
1433		}
1434	} while (likely(--budget));
1435
1436	work_done = TX_BUDGET - budget;
1437
1438	ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n",
1439	tx_ring->qid, work_done);
1440
1441	/* If there is still something to commit update ring state */
1442	if (likely(commit != TX_COMMIT)) {
1443		tx_ring->next_to_clean = next_to_clean;
1444		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
1445		    total_done);
1446		ena_com_update_dev_comp_head(io_cq);
1447	}
1448
1449	/*
1450	 * Need to make the rings circular update visible to
1451	 * ena_xmit_mbuf() before checking for tx_ring->running.
1452	 */
1453	mb();
1454
1455	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1456	    ENA_TX_RESUME_THRESH);
1457	if (unlikely(!tx_ring->running && above_thresh)) {
1458		ENA_RING_MTX_LOCK(tx_ring);
1459		above_thresh =
1460		    ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1461		    ENA_TX_RESUME_THRESH);
1462		if (!tx_ring->running && above_thresh) {
1463			tx_ring->running = true;
1464			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1465			taskqueue_enqueue(tx_ring->enqueue_tq,
1466			    &tx_ring->enqueue_task);
1467		}
1468		ENA_RING_MTX_UNLOCK(tx_ring);
1469	}
1470
1471	return (work_done);
1472}
1473
1474static void
1475ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1476    struct mbuf *mbuf)
1477{
1478	struct ena_adapter *adapter = rx_ring->adapter;
1479
1480	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1481		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
1482
1483		if (ena_rx_ctx->frag &&
1484		    (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
1485			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1486			return;
1487		}
1488
1489		switch (ena_rx_ctx->l3_proto) {
1490		case ENA_ETH_IO_L3_PROTO_IPV4:
1491			switch (ena_rx_ctx->l4_proto) {
1492			case ENA_ETH_IO_L4_PROTO_TCP:
1493				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
1494				break;
1495			case ENA_ETH_IO_L4_PROTO_UDP:
1496				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
1497				break;
1498			default:
1499				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
1500			}
1501			break;
1502		case ENA_ETH_IO_L3_PROTO_IPV6:
1503			switch (ena_rx_ctx->l4_proto) {
1504			case ENA_ETH_IO_L4_PROTO_TCP:
1505				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
1506				break;
1507			case ENA_ETH_IO_L4_PROTO_UDP:
1508				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
1509				break;
1510			default:
1511				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
1512			}
1513			break;
1514		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
1515			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1516			break;
1517		default:
1518			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1519		}
1520	} else {
1521		mbuf->m_pkthdr.flowid = rx_ring->qid;
1522		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1523	}
1524}
1525
1526/**
1527 * ena_rx_mbuf - assemble mbuf from descriptors
1528 * @rx_ring: ring for which we want to clean packets
1529 * @ena_bufs: buffer info
1530 * @ena_rx_ctx: metadata for this packet(s)
1531 * @next_to_clean: ring pointer, will be updated only upon success
1532 *
1533 **/
1534static struct mbuf*
1535ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
1536    struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
1537{
1538	struct mbuf *mbuf;
1539	struct ena_rx_buffer *rx_info;
1540	struct ena_adapter *adapter;
1541	unsigned int descs = ena_rx_ctx->descs;
1542	int rc;
1543	uint16_t ntc, len, req_id, buf = 0;
1544
1545	ntc = *next_to_clean;
1546	adapter = rx_ring->adapter;
1547
1548	len = ena_bufs[buf].len;
1549	req_id = ena_bufs[buf].req_id;
1550	rc = validate_rx_req_id(rx_ring, req_id);
1551	if (unlikely(rc != 0))
1552		return (NULL);
1553
1554	rx_info = &rx_ring->rx_buffer_info[req_id];
1555	if (unlikely(rx_info->mbuf == NULL)) {
1556		device_printf(adapter->pdev, "NULL mbuf in rx_info");
1557		return (NULL);
1558	}
1559
1560	ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n",
1561	    rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
1562
1563	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1564	    BUS_DMASYNC_POSTREAD);
1565	mbuf = rx_info->mbuf;
1566	mbuf->m_flags |= M_PKTHDR;
1567	mbuf->m_pkthdr.len = len;
1568	mbuf->m_len = len;
1569	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
1570
1571	/* Fill mbuf with hash key and it's interpretation for optimization */
1572	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
1573
1574	ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n",
1575	    mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
1576
1577	/* DMA address is not needed anymore, unmap it */
1578	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1579
1580	rx_info->mbuf = NULL;
1581	rx_ring->free_rx_ids[ntc] = req_id;
1582	ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1583
1584	/*
1585	 * While we have more than 1 descriptors for one rcvd packet, append
1586	 * other mbufs to the main one
1587	 */
1588	while (--descs) {
1589		++buf;
1590		len = ena_bufs[buf].len;
1591		req_id = ena_bufs[buf].req_id;
1592		rc = validate_rx_req_id(rx_ring, req_id);
1593		if (unlikely(rc != 0)) {
1594			/*
1595			 * If the req_id is invalid, then the device will be
1596			 * reset. In that case we must free all mbufs that
1597			 * were already gathered.
1598			 */
1599			m_freem(mbuf);
1600			return (NULL);
1601		}
1602		rx_info = &rx_ring->rx_buffer_info[req_id];
1603
1604		if (unlikely(rx_info->mbuf == NULL)) {
1605			device_printf(adapter->pdev, "NULL mbuf in rx_info");
1606			/*
1607			 * If one of the required mbufs was not allocated yet,
1608			 * we can break there.
1609			 * All earlier used descriptors will be reallocated
1610			 * later and not used mbufs can be reused.
1611			 * The next_to_clean pointer will not be updated in case
1612			 * of an error, so caller should advance it manually
1613			 * in error handling routine to keep it up to date
1614			 * with hw ring.
1615			 */
1616			m_freem(mbuf);
1617			return (NULL);
1618		}
1619
1620		bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1621		    BUS_DMASYNC_POSTREAD);
1622		if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
1623			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1624			ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p\n",
1625			    mbuf);
1626		}
1627
1628		ena_trace(ENA_DBG | ENA_RXPTH,
1629		    "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len);
1630
1631		/* Free already appended mbuf, it won't be useful anymore */
1632		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1633		m_freem(rx_info->mbuf);
1634		rx_info->mbuf = NULL;
1635
1636		rx_ring->free_rx_ids[ntc] = req_id;
1637		ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1638	}
1639
1640	*next_to_clean = ntc;
1641
1642	return (mbuf);
1643}
1644
1645/**
1646 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
1647 **/
1648static inline void
1649ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1650    struct mbuf *mbuf)
1651{
1652
1653	/* if IP and error */
1654	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1655	    ena_rx_ctx->l3_csum_err)) {
1656		/* ipv4 checksum error */
1657		mbuf->m_pkthdr.csum_flags = 0;
1658		counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1659		ena_trace(ENA_DBG, "RX IPv4 header checksum error\n");
1660		return;
1661	}
1662
1663	/* if TCP/UDP */
1664	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1665	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
1666		if (ena_rx_ctx->l4_csum_err) {
1667			/* TCP/UDP checksum error */
1668			mbuf->m_pkthdr.csum_flags = 0;
1669			counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1670			ena_trace(ENA_DBG, "RX L4 checksum error\n");
1671		} else {
1672			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1673			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1674		}
1675	}
1676}
1677
1678/**
1679 * ena_rx_cleanup - handle rx irq
1680 * @arg: ring for which irq is being handled
1681 **/
1682static int
1683ena_rx_cleanup(struct ena_ring *rx_ring)
1684{
1685	struct ena_adapter *adapter;
1686	struct mbuf *mbuf;
1687	struct ena_com_rx_ctx ena_rx_ctx;
1688	struct ena_com_io_cq* io_cq;
1689	struct ena_com_io_sq* io_sq;
1690	if_t ifp;
1691	uint16_t ena_qid;
1692	uint16_t next_to_clean;
1693	uint32_t refill_required;
1694	uint32_t refill_threshold;
1695	uint32_t do_if_input = 0;
1696	unsigned int qid;
1697	int rc, i;
1698	int budget = RX_BUDGET;
1699
1700	adapter = rx_ring->que->adapter;
1701	ifp = adapter->ifp;
1702	qid = rx_ring->que->id;
1703	ena_qid = ENA_IO_RXQ_IDX(qid);
1704	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1705	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1706	next_to_clean = rx_ring->next_to_clean;
1707
1708	ena_trace(ENA_DBG, "rx: qid %d\n", qid);
1709
1710	do {
1711		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1712		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
1713		ena_rx_ctx.descs = 0;
1714		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
1715		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
1716		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
1717
1718		if (unlikely(rc != 0))
1719			goto error;
1720
1721		if (unlikely(ena_rx_ctx.descs == 0))
1722			break;
1723
1724		ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
1725		    "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1726		    rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1727		    ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1728
1729		/* Receive mbuf from the ring */
1730		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
1731		    &ena_rx_ctx, &next_to_clean);
1732		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
1733		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
1734		/* Exit if we failed to retrieve a buffer */
1735		if (unlikely(mbuf == NULL)) {
1736			for (i = 0; i < ena_rx_ctx.descs; ++i) {
1737				rx_ring->free_rx_ids[next_to_clean] =
1738				    rx_ring->ena_bufs[i].req_id;
1739				next_to_clean =
1740				    ENA_RX_RING_IDX_NEXT(next_to_clean,
1741				    rx_ring->ring_size);
1742
1743			}
1744			break;
1745		}
1746
1747		if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
1748		    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
1749			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
1750		}
1751
1752		counter_enter();
1753		counter_u64_add_protected(rx_ring->rx_stats.bytes,
1754		    mbuf->m_pkthdr.len);
1755		counter_u64_add_protected(adapter->hw_stats.rx_bytes,
1756		    mbuf->m_pkthdr.len);
1757		counter_exit();
1758		/*
1759		 * LRO is only for IP/TCP packets and TCP checksum of the packet
1760		 * should be computed by hardware.
1761		 */
1762		do_if_input = 1;
1763		if (((ifp->if_capenable & IFCAP_LRO) != 0)  &&
1764		    ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
1765		    (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
1766			/*
1767			 * Send to the stack if:
1768			 *  - LRO not enabled, or
1769			 *  - no LRO resources, or
1770			 *  - lro enqueue fails
1771			 */
1772			if ((rx_ring->lro.lro_cnt != 0) &&
1773			    (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
1774					do_if_input = 0;
1775		}
1776		if (do_if_input != 0) {
1777			ena_trace(ENA_DBG | ENA_RXPTH,
1778			    "calling if_input() with mbuf %p\n", mbuf);
1779			(*ifp->if_input)(ifp, mbuf);
1780		}
1781
1782		counter_enter();
1783		counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
1784		counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
1785		counter_exit();
1786	} while (--budget);
1787
1788	rx_ring->next_to_clean = next_to_clean;
1789
1790	refill_required = ena_com_free_desc(io_sq);
1791	refill_threshold = min_t(int,
1792	    rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1793	    ENA_RX_REFILL_THRESH_PACKET);
1794
1795	if (refill_required > refill_threshold) {
1796		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1797		ena_refill_rx_bufs(rx_ring, refill_required);
1798	}
1799
1800	tcp_lro_flush_all(&rx_ring->lro);
1801
1802	return (RX_BUDGET - budget);
1803
1804error:
1805	counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
1806
1807	/* Too many desc from the device. Trigger reset */
1808	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
1809		adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1810		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
1811	}
1812
1813	return (0);
1814}
1815
1816/*********************************************************************
1817 *
1818 *  MSIX & Interrupt Service routine
1819 *
1820 **********************************************************************/
1821
1822/**
1823 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1824 * @arg: interrupt number
1825 **/
1826static void
1827ena_intr_msix_mgmnt(void *arg)
1828{
1829	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1830
1831	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1832	if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1833		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1834}
1835
1836static void
1837ena_cleanup(void *arg, int pending)
1838{
1839	struct ena_que	*que = arg;
1840	struct ena_adapter *adapter = que->adapter;
1841	if_t ifp = adapter->ifp;
1842	struct ena_ring *tx_ring;
1843	struct ena_ring *rx_ring;
1844	struct ena_com_io_cq* io_cq;
1845	struct ena_eth_io_intr_reg intr_reg;
1846	int qid, ena_qid;
1847	int txc, rxc, i;
1848
1849	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1850		return;
1851
1852	ena_trace(ENA_DBG, "MSI-X TX/RX routine\n");
1853
1854	tx_ring = que->tx_ring;
1855	rx_ring = que->rx_ring;
1856	qid = que->id;
1857	ena_qid = ENA_IO_TXQ_IDX(qid);
1858	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1859
1860	tx_ring->first_interrupt = true;
1861	rx_ring->first_interrupt = true;
1862
1863	for (i = 0; i < CLEAN_BUDGET; ++i) {
1864		rxc = ena_rx_cleanup(rx_ring);
1865		txc = ena_tx_cleanup(tx_ring);
1866
1867		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1868			return;
1869
1870		if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
1871		       break;
1872	}
1873
1874	/* Signal that work is done and unmask interrupt */
1875	ena_com_update_intr_reg(&intr_reg,
1876	    RX_IRQ_INTERVAL,
1877	    TX_IRQ_INTERVAL,
1878	    true);
1879	ena_com_unmask_intr(io_cq, &intr_reg);
1880}
1881
1882/**
1883 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1884 * @arg: queue
1885 **/
1886static int
1887ena_handle_msix(void *arg)
1888{
1889	struct ena_que *queue = arg;
1890	struct ena_adapter *adapter = queue->adapter;
1891	if_t ifp = adapter->ifp;
1892
1893	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1894		return (FILTER_STRAY);
1895
1896	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1897
1898	return (FILTER_HANDLED);
1899}
1900
1901static int
1902ena_enable_msix(struct ena_adapter *adapter)
1903{
1904	device_t dev = adapter->pdev;
1905	int msix_vecs, msix_req;
1906	int i, rc = 0;
1907
1908	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1909		device_printf(dev, "Error, MSI-X is already enabled\n");
1910		return (EINVAL);
1911	}
1912
1913	/* Reserved the max msix vectors we might need */
1914	msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_queues);
1915
1916	adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1917	    M_DEVBUF, M_WAITOK | M_ZERO);
1918
1919	ena_trace(ENA_DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1920
1921	for (i = 0; i < msix_vecs; i++) {
1922		adapter->msix_entries[i].entry = i;
1923		/* Vectors must start from 1 */
1924		adapter->msix_entries[i].vector = i + 1;
1925	}
1926
1927	msix_req = msix_vecs;
1928	rc = pci_alloc_msix(dev, &msix_vecs);
1929	if (unlikely(rc != 0)) {
1930		device_printf(dev,
1931		    "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1932
1933		rc = ENOSPC;
1934		goto err_msix_free;
1935	}
1936
1937	if (msix_vecs != msix_req) {
1938		if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1939			device_printf(dev,
1940			    "Not enough number of MSI-x allocated: %d\n",
1941			    msix_vecs);
1942			pci_release_msi(dev);
1943			rc = ENOSPC;
1944			goto err_msix_free;
1945		}
1946		device_printf(dev, "Enable only %d MSI-x (out of %d), reduce "
1947		    "the number of queues\n", msix_vecs, msix_req);
1948		adapter->num_queues = msix_vecs - ENA_ADMIN_MSIX_VEC;
1949	}
1950
1951	adapter->msix_vecs = msix_vecs;
1952	ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1953
1954	return (0);
1955
1956err_msix_free:
1957	free(adapter->msix_entries, M_DEVBUF);
1958	adapter->msix_entries = NULL;
1959
1960	return (rc);
1961}
1962
1963static void
1964ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1965{
1966
1967	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1968	    ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1969	    device_get_nameunit(adapter->pdev));
1970	/*
1971	 * Handler is NULL on purpose, it will be set
1972	 * when mgmnt interrupt is acquired
1973	 */
1974	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1975	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1976	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1977	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1978}
1979
1980static int
1981ena_setup_io_intr(struct ena_adapter *adapter)
1982{
1983	static int last_bind_cpu = -1;
1984	int irq_idx;
1985
1986	if (adapter->msix_entries == NULL)
1987		return (EINVAL);
1988
1989	for (int i = 0; i < adapter->num_queues; i++) {
1990		irq_idx = ENA_IO_IRQ_IDX(i);
1991
1992		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1993		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1994		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1995		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1996		adapter->irq_tbl[irq_idx].vector =
1997		    adapter->msix_entries[irq_idx].vector;
1998		ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1999		    adapter->msix_entries[irq_idx].vector);
2000
2001		/*
2002		 * We want to bind rings to the corresponding cpu
2003		 * using something similar to the RSS round-robin technique.
2004		 */
2005		if (unlikely(last_bind_cpu < 0))
2006			last_bind_cpu = CPU_FIRST();
2007		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
2008		    last_bind_cpu;
2009		last_bind_cpu = CPU_NEXT(last_bind_cpu);
2010	}
2011
2012	return (0);
2013}
2014
2015static int
2016ena_request_mgmnt_irq(struct ena_adapter *adapter)
2017{
2018	struct ena_irq *irq;
2019	unsigned long flags;
2020	int rc, rcc;
2021
2022	flags = RF_ACTIVE | RF_SHAREABLE;
2023
2024	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2025	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
2026	    &irq->vector, flags);
2027
2028	if (unlikely(irq->res == NULL)) {
2029		device_printf(adapter->pdev, "could not allocate "
2030		    "irq vector: %d\n", irq->vector);
2031		return (ENXIO);
2032	}
2033
2034	rc = bus_setup_intr(adapter->pdev, irq->res,
2035	    INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt,
2036	    irq->data, &irq->cookie);
2037	if (unlikely(rc != 0)) {
2038		device_printf(adapter->pdev, "failed to register "
2039		    "interrupt handler for irq %ju: %d\n",
2040		    rman_get_start(irq->res), rc);
2041		goto err_res_free;
2042	}
2043	irq->requested = true;
2044
2045	return (rc);
2046
2047err_res_free:
2048	ena_trace(ENA_INFO | ENA_ADMQ, "releasing resource for irq %d\n",
2049	    irq->vector);
2050	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2051	    irq->vector, irq->res);
2052	if (unlikely(rcc != 0))
2053		device_printf(adapter->pdev, "dev has no parent while "
2054		    "releasing res for irq: %d\n", irq->vector);
2055	irq->res = NULL;
2056
2057	return (rc);
2058}
2059
2060static int
2061ena_request_io_irq(struct ena_adapter *adapter)
2062{
2063	struct ena_irq *irq;
2064	unsigned long flags = 0;
2065	int rc = 0, i, rcc;
2066
2067	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
2068		device_printf(adapter->pdev,
2069		    "failed to request I/O IRQ: MSI-X is not enabled\n");
2070		return (EINVAL);
2071	} else {
2072		flags = RF_ACTIVE | RF_SHAREABLE;
2073	}
2074
2075	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2076		irq = &adapter->irq_tbl[i];
2077
2078		if (unlikely(irq->requested))
2079			continue;
2080
2081		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
2082		    &irq->vector, flags);
2083		if (unlikely(irq->res == NULL)) {
2084			rc = ENOMEM;
2085			device_printf(adapter->pdev, "could not allocate "
2086			    "irq vector: %d\n", irq->vector);
2087			goto err;
2088		}
2089
2090		rc = bus_setup_intr(adapter->pdev, irq->res,
2091		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
2092		    irq->data, &irq->cookie);
2093		 if (unlikely(rc != 0)) {
2094			device_printf(adapter->pdev, "failed to register "
2095			    "interrupt handler for irq %ju: %d\n",
2096			    rman_get_start(irq->res), rc);
2097			goto err;
2098		}
2099		irq->requested = true;
2100
2101		ena_trace(ENA_INFO, "queue %d - cpu %d\n",
2102		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
2103	}
2104
2105	return (rc);
2106
2107err:
2108
2109	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
2110		irq = &adapter->irq_tbl[i];
2111		rcc = 0;
2112
2113		/* Once we entered err: section and irq->requested is true we
2114		   free both intr and resources */
2115		if (irq->requested)
2116			rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
2117		if (unlikely(rcc != 0))
2118			device_printf(adapter->pdev, "could not release"
2119			    " irq: %d, error: %d\n", irq->vector, rcc);
2120
2121		/* If we entred err: section without irq->requested set we know
2122		   it was bus_alloc_resource_any() that needs cleanup, provided
2123		   res is not NULL. In case res is NULL no work in needed in
2124		   this iteration */
2125		rcc = 0;
2126		if (irq->res != NULL) {
2127			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2128			    irq->vector, irq->res);
2129		}
2130		if (unlikely(rcc != 0))
2131			device_printf(adapter->pdev, "dev has no parent while "
2132			    "releasing res for irq: %d\n", irq->vector);
2133		irq->requested = false;
2134		irq->res = NULL;
2135	}
2136
2137	return (rc);
2138}
2139
2140static void
2141ena_free_mgmnt_irq(struct ena_adapter *adapter)
2142{
2143	struct ena_irq *irq;
2144	int rc;
2145
2146	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2147	if (irq->requested) {
2148		ena_trace(ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
2149		    irq->vector);
2150		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
2151		if (unlikely(rc != 0))
2152			device_printf(adapter->pdev, "failed to tear "
2153			    "down irq: %d\n", irq->vector);
2154		irq->requested = 0;
2155	}
2156
2157	if (irq->res != NULL) {
2158		ena_trace(ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
2159		    irq->vector);
2160		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2161		    irq->vector, irq->res);
2162		irq->res = NULL;
2163		if (unlikely(rc != 0))
2164			device_printf(adapter->pdev, "dev has no parent while "
2165			    "releasing res for irq: %d\n", irq->vector);
2166	}
2167}
2168
2169static void
2170ena_free_io_irq(struct ena_adapter *adapter)
2171{
2172	struct ena_irq *irq;
2173	int rc;
2174
2175	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2176		irq = &adapter->irq_tbl[i];
2177		if (irq->requested) {
2178			ena_trace(ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
2179			    irq->vector);
2180			rc = bus_teardown_intr(adapter->pdev, irq->res,
2181			    irq->cookie);
2182			if (unlikely(rc != 0)) {
2183				device_printf(adapter->pdev, "failed to tear "
2184				    "down irq: %d\n", irq->vector);
2185			}
2186			irq->requested = 0;
2187		}
2188
2189		if (irq->res != NULL) {
2190			ena_trace(ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
2191			    irq->vector);
2192			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2193			    irq->vector, irq->res);
2194			irq->res = NULL;
2195			if (unlikely(rc != 0)) {
2196				device_printf(adapter->pdev, "dev has no parent"
2197				    " while releasing res for irq: %d\n",
2198				    irq->vector);
2199			}
2200		}
2201	}
2202}
2203
2204static void
2205ena_free_irqs(struct ena_adapter* adapter)
2206{
2207
2208	ena_free_io_irq(adapter);
2209	ena_free_mgmnt_irq(adapter);
2210	ena_disable_msix(adapter);
2211}
2212
2213static void
2214ena_disable_msix(struct ena_adapter *adapter)
2215{
2216
2217	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
2218		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
2219		pci_release_msi(adapter->pdev);
2220	}
2221
2222	adapter->msix_vecs = 0;
2223	if (adapter->msix_entries != NULL)
2224		free(adapter->msix_entries, M_DEVBUF);
2225	adapter->msix_entries = NULL;
2226}
2227
2228static void
2229ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2230{
2231	struct ena_com_io_cq* io_cq;
2232	struct ena_eth_io_intr_reg intr_reg;
2233	uint16_t ena_qid;
2234	int i;
2235
2236	/* Unmask interrupts for all queues */
2237	for (i = 0; i < adapter->num_queues; i++) {
2238		ena_qid = ENA_IO_TXQ_IDX(i);
2239		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2240		ena_com_update_intr_reg(&intr_reg, 0, 0, true);
2241		ena_com_unmask_intr(io_cq, &intr_reg);
2242	}
2243}
2244
2245/* Configure the Rx forwarding */
2246static int
2247ena_rss_configure(struct ena_adapter *adapter)
2248{
2249	struct ena_com_dev *ena_dev = adapter->ena_dev;
2250	int rc;
2251
2252	/* Set indirect table */
2253	rc = ena_com_indirect_table_set(ena_dev);
2254	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2255		return (rc);
2256
2257	/* Configure hash function (if supported) */
2258	rc = ena_com_set_hash_function(ena_dev);
2259	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2260		return (rc);
2261
2262	/* Configure hash inputs (if supported) */
2263	rc = ena_com_set_hash_ctrl(ena_dev);
2264	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2265		return (rc);
2266
2267	return (0);
2268}
2269
2270static int
2271ena_up_complete(struct ena_adapter *adapter)
2272{
2273	int rc;
2274
2275	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
2276		rc = ena_rss_configure(adapter);
2277		if (rc != 0)
2278			return (rc);
2279	}
2280
2281	rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
2282	if (unlikely(rc != 0))
2283		return (rc);
2284
2285	ena_refill_all_rx_bufs(adapter);
2286	ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
2287	    sizeof(adapter->hw_stats));
2288
2289	return (0);
2290}
2291
2292static int
2293ena_up(struct ena_adapter *adapter)
2294{
2295	int rc = 0;
2296
2297	if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2298		device_printf(adapter->pdev, "device is not attached!\n");
2299		return (ENXIO);
2300	}
2301
2302	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2303		device_printf(adapter->pdev, "device is going UP\n");
2304
2305		/* setup interrupts for IO queues */
2306		rc = ena_setup_io_intr(adapter);
2307		if (unlikely(rc != 0)) {
2308			ena_trace(ENA_ALERT, "error setting up IO interrupt\n");
2309			goto error;
2310		}
2311		rc = ena_request_io_irq(adapter);
2312		if (unlikely(rc != 0)) {
2313			ena_trace(ENA_ALERT, "err_req_irq\n");
2314			goto error;
2315		}
2316
2317		/* allocate transmit descriptors */
2318		rc = ena_setup_all_tx_resources(adapter);
2319		if (unlikely(rc != 0)) {
2320			ena_trace(ENA_ALERT, "err_setup_tx\n");
2321			goto err_setup_tx;
2322		}
2323
2324		/* allocate receive descriptors */
2325		rc = ena_setup_all_rx_resources(adapter);
2326		if (unlikely(rc != 0)) {
2327			ena_trace(ENA_ALERT, "err_setup_rx\n");
2328			goto err_setup_rx;
2329		}
2330
2331		/* create IO queues for Rx & Tx */
2332		rc = ena_create_io_queues(adapter);
2333		if (unlikely(rc != 0)) {
2334			ena_trace(ENA_ALERT,
2335			    "create IO queues failed\n");
2336			goto err_io_que;
2337		}
2338
2339		if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2340			if_link_state_change(adapter->ifp, LINK_STATE_UP);
2341
2342		rc = ena_up_complete(adapter);
2343		if (unlikely(rc != 0))
2344			goto err_up_complete;
2345
2346		counter_u64_add(adapter->dev_stats.interface_up, 1);
2347
2348		ena_update_hwassist(adapter);
2349
2350		if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2351		    IFF_DRV_OACTIVE);
2352
2353		callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
2354		    ena_timer_service, (void *)adapter, 0);
2355
2356		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2357
2358		ena_unmask_all_io_irqs(adapter);
2359	}
2360
2361	return (0);
2362
2363err_up_complete:
2364	ena_destroy_all_io_queues(adapter);
2365err_io_que:
2366	ena_free_all_rx_resources(adapter);
2367err_setup_rx:
2368	ena_free_all_tx_resources(adapter);
2369err_setup_tx:
2370	ena_free_io_irq(adapter);
2371error:
2372	return (rc);
2373}
2374
2375static uint64_t
2376ena_get_counter(if_t ifp, ift_counter cnt)
2377{
2378	struct ena_adapter *adapter;
2379	struct ena_hw_stats *stats;
2380
2381	adapter = if_getsoftc(ifp);
2382	stats = &adapter->hw_stats;
2383
2384	switch (cnt) {
2385	case IFCOUNTER_IPACKETS:
2386		return (counter_u64_fetch(stats->rx_packets));
2387	case IFCOUNTER_OPACKETS:
2388		return (counter_u64_fetch(stats->tx_packets));
2389	case IFCOUNTER_IBYTES:
2390		return (counter_u64_fetch(stats->rx_bytes));
2391	case IFCOUNTER_OBYTES:
2392		return (counter_u64_fetch(stats->tx_bytes));
2393	case IFCOUNTER_IQDROPS:
2394		return (counter_u64_fetch(stats->rx_drops));
2395	default:
2396		return (if_get_counter_default(ifp, cnt));
2397	}
2398}
2399
2400static int
2401ena_media_change(if_t ifp)
2402{
2403	/* Media Change is not supported by firmware */
2404	return (0);
2405}
2406
2407static void
2408ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2409{
2410	struct ena_adapter *adapter = if_getsoftc(ifp);
2411	ena_trace(ENA_DBG, "enter\n");
2412
2413	mtx_lock(&adapter->global_mtx);
2414
2415	ifmr->ifm_status = IFM_AVALID;
2416	ifmr->ifm_active = IFM_ETHER;
2417
2418	if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2419		mtx_unlock(&adapter->global_mtx);
2420		ena_trace(ENA_INFO, "Link is down\n");
2421		return;
2422	}
2423
2424	ifmr->ifm_status |= IFM_ACTIVE;
2425	ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2426
2427	mtx_unlock(&adapter->global_mtx);
2428}
2429
2430static void
2431ena_init(void *arg)
2432{
2433	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2434
2435	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2436		sx_xlock(&adapter->ioctl_sx);
2437		ena_up(adapter);
2438		sx_unlock(&adapter->ioctl_sx);
2439	}
2440}
2441
2442static int
2443ena_ioctl(if_t ifp, u_long command, caddr_t data)
2444{
2445	struct ena_adapter *adapter;
2446	struct ifreq *ifr;
2447	int rc;
2448
2449	adapter = ifp->if_softc;
2450	ifr = (struct ifreq *)data;
2451
2452	/*
2453	 * Acquiring lock to prevent from running up and down routines parallel.
2454	 */
2455	rc = 0;
2456	switch (command) {
2457	case SIOCSIFMTU:
2458		if (ifp->if_mtu == ifr->ifr_mtu)
2459			break;
2460		sx_xlock(&adapter->ioctl_sx);
2461		ena_down(adapter);
2462
2463		ena_change_mtu(ifp, ifr->ifr_mtu);
2464
2465		rc = ena_up(adapter);
2466		sx_unlock(&adapter->ioctl_sx);
2467		break;
2468
2469	case SIOCSIFFLAGS:
2470		if ((ifp->if_flags & IFF_UP) != 0) {
2471			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2472				if ((ifp->if_flags & (IFF_PROMISC |
2473				    IFF_ALLMULTI)) != 0) {
2474					device_printf(adapter->pdev,
2475					    "ioctl promisc/allmulti\n");
2476				}
2477			} else {
2478				sx_xlock(&adapter->ioctl_sx);
2479				rc = ena_up(adapter);
2480				sx_unlock(&adapter->ioctl_sx);
2481			}
2482		} else {
2483			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2484				sx_xlock(&adapter->ioctl_sx);
2485				ena_down(adapter);
2486				sx_unlock(&adapter->ioctl_sx);
2487			}
2488		}
2489		break;
2490
2491	case SIOCADDMULTI:
2492	case SIOCDELMULTI:
2493		break;
2494
2495	case SIOCSIFMEDIA:
2496	case SIOCGIFMEDIA:
2497		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2498		break;
2499
2500	case SIOCSIFCAP:
2501		{
2502			int reinit = 0;
2503
2504			if (ifr->ifr_reqcap != ifp->if_capenable) {
2505				ifp->if_capenable = ifr->ifr_reqcap;
2506				reinit = 1;
2507			}
2508
2509			if ((reinit != 0) &&
2510			    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2511				sx_xlock(&adapter->ioctl_sx);
2512				ena_down(adapter);
2513				rc = ena_up(adapter);
2514				sx_unlock(&adapter->ioctl_sx);
2515			}
2516		}
2517
2518		break;
2519	default:
2520		rc = ether_ioctl(ifp, command, data);
2521		break;
2522	}
2523
2524	return (rc);
2525}
2526
2527static int
2528ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2529{
2530	int caps = 0;
2531
2532	if ((feat->offload.tx &
2533	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2534	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2535	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2536		caps |= IFCAP_TXCSUM;
2537
2538	if ((feat->offload.tx &
2539	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2540	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2541		caps |= IFCAP_TXCSUM_IPV6;
2542
2543	if ((feat->offload.tx &
2544	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2545		caps |= IFCAP_TSO4;
2546
2547	if ((feat->offload.tx &
2548	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2549		caps |= IFCAP_TSO6;
2550
2551	if ((feat->offload.rx_supported &
2552	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2553	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2554		caps |= IFCAP_RXCSUM;
2555
2556	if ((feat->offload.rx_supported &
2557	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2558		caps |= IFCAP_RXCSUM_IPV6;
2559
2560	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2561
2562	return (caps);
2563}
2564
2565static void
2566ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2567{
2568
2569	host_info->supported_network_features[0] =
2570	    (uint32_t)if_getcapabilities(ifp);
2571}
2572
2573static void
2574ena_update_hwassist(struct ena_adapter *adapter)
2575{
2576	if_t ifp = adapter->ifp;
2577	uint32_t feat = adapter->tx_offload_cap;
2578	int cap = if_getcapenable(ifp);
2579	int flags = 0;
2580
2581	if_clearhwassist(ifp);
2582
2583	if ((cap & IFCAP_TXCSUM) != 0) {
2584		if ((feat &
2585		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2586			flags |= CSUM_IP;
2587		if ((feat &
2588		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2589		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2590			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2591	}
2592
2593	if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2594		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2595
2596	if ((cap & IFCAP_TSO4) != 0)
2597		flags |= CSUM_IP_TSO;
2598
2599	if ((cap & IFCAP_TSO6) != 0)
2600		flags |= CSUM_IP6_TSO;
2601
2602	if_sethwassistbits(ifp, flags, 0);
2603}
2604
2605static int
2606ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2607    struct ena_com_dev_get_features_ctx *feat)
2608{
2609	if_t ifp;
2610	int caps = 0;
2611
2612	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2613	if (unlikely(ifp == NULL)) {
2614		ena_trace(ENA_ALERT, "can not allocate ifnet structure\n");
2615		return (ENXIO);
2616	}
2617	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2618	if_setdev(ifp, pdev);
2619	if_setsoftc(ifp, adapter);
2620
2621	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2622	if_setinitfn(ifp, ena_init);
2623	if_settransmitfn(ifp, ena_mq_start);
2624	if_setqflushfn(ifp, ena_qflush);
2625	if_setioctlfn(ifp, ena_ioctl);
2626	if_setgetcounterfn(ifp, ena_get_counter);
2627
2628	if_setsendqlen(ifp, adapter->tx_ring_size);
2629	if_setsendqready(ifp);
2630	if_setmtu(ifp, ETHERMTU);
2631	if_setbaudrate(ifp, 0);
2632	/* Zeroize capabilities... */
2633	if_setcapabilities(ifp, 0);
2634	if_setcapenable(ifp, 0);
2635	/* check hardware support */
2636	caps = ena_get_dev_offloads(feat);
2637	/* ... and set them */
2638	if_setcapabilitiesbit(ifp, caps, 0);
2639
2640	/* TSO parameters */
2641	ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2642	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2643	ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2644	ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2645
2646	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2647	if_setcapenable(ifp, if_getcapabilities(ifp));
2648
2649	/*
2650	 * Specify the media types supported by this adapter and register
2651	 * callbacks to update media and link information
2652	 */
2653	ifmedia_init(&adapter->media, IFM_IMASK,
2654	    ena_media_change, ena_media_status);
2655	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2656	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2657
2658	ether_ifattach(ifp, adapter->mac_addr);
2659
2660	return (0);
2661}
2662
2663static void
2664ena_down(struct ena_adapter *adapter)
2665{
2666	int rc;
2667
2668	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2669		device_printf(adapter->pdev, "device is going DOWN\n");
2670
2671		callout_drain(&adapter->timer_service);
2672
2673		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2674		if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2675		    IFF_DRV_RUNNING);
2676
2677		ena_free_io_irq(adapter);
2678
2679		if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2680			rc = ena_com_dev_reset(adapter->ena_dev,
2681			    adapter->reset_reason);
2682			if (unlikely(rc != 0))
2683				device_printf(adapter->pdev,
2684				    "Device reset failed\n");
2685		}
2686
2687		ena_destroy_all_io_queues(adapter);
2688
2689		ena_free_all_tx_bufs(adapter);
2690		ena_free_all_rx_bufs(adapter);
2691		ena_free_all_tx_resources(adapter);
2692		ena_free_all_rx_resources(adapter);
2693
2694		counter_u64_add(adapter->dev_stats.interface_down, 1);
2695	}
2696}
2697
2698static void
2699ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
2700{
2701	struct ena_com_tx_meta *ena_meta;
2702	struct ether_vlan_header *eh;
2703	struct mbuf *mbuf_next;
2704	u32 mss;
2705	bool offload;
2706	uint16_t etype;
2707	int ehdrlen;
2708	struct ip *ip;
2709	int iphlen;
2710	struct tcphdr *th;
2711	int offset;
2712
2713	offload = false;
2714	ena_meta = &ena_tx_ctx->ena_meta;
2715	mss = mbuf->m_pkthdr.tso_segsz;
2716
2717	if (mss != 0)
2718		offload = true;
2719
2720	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2721		offload = true;
2722
2723	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
2724		offload = true;
2725
2726	if (!offload) {
2727		ena_tx_ctx->meta_valid = 0;
2728		return;
2729	}
2730
2731	/* Determine where frame payload starts. */
2732	eh = mtod(mbuf, struct ether_vlan_header *);
2733	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2734		etype = ntohs(eh->evl_proto);
2735		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2736	} else {
2737		etype = ntohs(eh->evl_encap_proto);
2738		ehdrlen = ETHER_HDR_LEN;
2739	}
2740
2741	mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
2742	ip = (struct ip *)(mtodo(mbuf_next, offset));
2743	iphlen = ip->ip_hl << 2;
2744
2745	mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
2746	th = (struct tcphdr *)(mtodo(mbuf_next, offset));
2747
2748	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2749		ena_tx_ctx->l3_csum_enable = 1;
2750	}
2751	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
2752		ena_tx_ctx->tso_enable = 1;
2753		ena_meta->l4_hdr_len = (th->th_off);
2754	}
2755
2756	switch (etype) {
2757	case ETHERTYPE_IP:
2758		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2759		if ((ip->ip_off & htons(IP_DF)) != 0)
2760			ena_tx_ctx->df = 1;
2761		break;
2762	case ETHERTYPE_IPV6:
2763		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2764
2765	default:
2766		break;
2767	}
2768
2769	if (ip->ip_p == IPPROTO_TCP) {
2770		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2771		if ((mbuf->m_pkthdr.csum_flags &
2772		    (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
2773			ena_tx_ctx->l4_csum_enable = 1;
2774		else
2775			ena_tx_ctx->l4_csum_enable = 0;
2776	} else if (ip->ip_p == IPPROTO_UDP) {
2777		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2778		if ((mbuf->m_pkthdr.csum_flags &
2779		    (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
2780			ena_tx_ctx->l4_csum_enable = 1;
2781		else
2782			ena_tx_ctx->l4_csum_enable = 0;
2783	} else {
2784		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
2785		ena_tx_ctx->l4_csum_enable = 0;
2786	}
2787
2788	ena_meta->mss = mss;
2789	ena_meta->l3_hdr_len = iphlen;
2790	ena_meta->l3_hdr_offset = ehdrlen;
2791	ena_tx_ctx->meta_valid = 1;
2792}
2793
2794static int
2795ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2796{
2797	struct ena_adapter *adapter;
2798	struct mbuf *collapsed_mbuf;
2799	int num_frags;
2800
2801	adapter = tx_ring->adapter;
2802	num_frags = ena_mbuf_count(*mbuf);
2803
2804	/* One segment must be reserved for configuration descriptor. */
2805	if (num_frags < adapter->max_tx_sgl_size)
2806		return (0);
2807	counter_u64_add(tx_ring->tx_stats.collapse, 1);
2808
2809	collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
2810	    adapter->max_tx_sgl_size - 1);
2811	if (unlikely(collapsed_mbuf == NULL)) {
2812		counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
2813		return (ENOMEM);
2814	}
2815
2816	/* If mbuf was collapsed succesfully, original mbuf is released. */
2817	*mbuf = collapsed_mbuf;
2818
2819	return (0);
2820}
2821
2822static void
2823ena_dmamap_llq(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2824{
2825	struct ena_com_buf *ena_buf = arg;
2826
2827	if (unlikely(error != 0)) {
2828		ena_buf->paddr = 0;
2829		return;
2830	}
2831
2832	KASSERT(nseg == 1, ("Invalid num of segments for LLQ dma"));
2833
2834	ena_buf->paddr = segs->ds_addr;
2835	ena_buf->len = segs->ds_len;
2836}
2837
2838static int
2839ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
2840    struct mbuf *mbuf, void **push_hdr, u16 *header_len)
2841{
2842	struct ena_adapter *adapter = tx_ring->adapter;
2843	struct ena_com_buf *ena_buf;
2844	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
2845	uint32_t mbuf_head_len, frag_len;
2846	uint16_t push_len = 0;
2847	uint16_t delta = 0;
2848	int i, rc, nsegs;
2849
2850	mbuf_head_len = mbuf->m_len;
2851	tx_info->mbuf = mbuf;
2852	ena_buf = tx_info->bufs;
2853
2854	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2855		/*
2856		 * When the device is LLQ mode, the driver will copy
2857		 * the header into the device memory space.
2858		 * the ena_com layer assumes the header is in a linear
2859		 * memory space.
2860		 * This assumption might be wrong since part of the header
2861		 * can be in the fragmented buffers.
2862		 * First check if header fits in the mbuf. If not, copy it to
2863		 * separate buffer that will be holding linearized data.
2864		 */
2865		push_len = min_t(uint32_t, mbuf->m_pkthdr.len,
2866		    tx_ring->tx_max_header_size);
2867		*header_len = push_len;
2868		/* If header is in linear space, just point into mbuf's data. */
2869		if (likely(push_len <= mbuf_head_len)) {
2870			*push_hdr = mbuf->m_data;
2871		/*
2872		 * Otherwise, copy whole portion of header from multiple mbufs
2873		 * to intermediate buffer.
2874		 */
2875		} else {
2876			m_copydata(mbuf, 0, push_len,
2877			    tx_ring->push_buf_intermediate_buf);
2878			*push_hdr = tx_ring->push_buf_intermediate_buf;
2879
2880			counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
2881			delta = push_len - mbuf_head_len;
2882		}
2883
2884		ena_trace(ENA_DBG | ENA_TXPTH,
2885		    "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
2886		    mbuf, *push_hdr, push_len);
2887
2888		/*
2889		* If header was in linear memory space, map for the dma rest of the data
2890		* in the first mbuf of the mbuf chain.
2891		*/
2892		if (mbuf_head_len > push_len) {
2893			rc = bus_dmamap_load(adapter->tx_buf_tag,
2894			    tx_info->map_head,
2895			mbuf->m_data + push_len, mbuf_head_len - push_len,
2896			ena_dmamap_llq, ena_buf, BUS_DMA_NOWAIT);
2897			if (unlikely((rc != 0) || (ena_buf->paddr == 0)))
2898				goto single_dma_error;
2899
2900			ena_buf++;
2901			tx_info->num_of_bufs++;
2902
2903			tx_info->head_mapped = true;
2904		}
2905		mbuf = mbuf->m_next;
2906	} else {
2907		*push_hdr = NULL;
2908		/*
2909		* header_len is just a hint for the device. Because FreeBSD is not
2910		* giving us information about packet header length and it is not
2911		* guaranteed that all packet headers will be in the 1st mbuf, setting
2912		* header_len to 0 is making the device ignore this value and resolve
2913		* header on it's own.
2914		*/
2915		*header_len = 0;
2916	}
2917
2918	/*
2919	 * If header is in non linear space (delta > 0), then skip mbufs
2920	 * containing header and map the last one containing both header and the
2921	 * packet data.
2922	 * The first segment is already counted in.
2923	 * If LLQ is not supported, the loop will be skipped.
2924	 */
2925	while (delta > 0) {
2926		frag_len = mbuf->m_len;
2927
2928		/*
2929		 * If whole segment contains header just move to the
2930		 * next one and reduce delta.
2931		 */
2932		if (unlikely(delta >= frag_len)) {
2933			delta -= frag_len;
2934		} else {
2935			/*
2936			 * Map rest of the packet data that was contained in
2937			 * the mbuf.
2938			 */
2939			rc = bus_dmamap_load(adapter->tx_buf_tag,
2940			    tx_info->map_head, mbuf->m_data + delta,
2941			    frag_len - delta, ena_dmamap_llq, ena_buf,
2942			    BUS_DMA_NOWAIT);
2943			if (unlikely((rc != 0) || (ena_buf->paddr == 0)))
2944				goto single_dma_error;
2945
2946			ena_buf++;
2947			tx_info->num_of_bufs++;
2948			tx_info->head_mapped = true;
2949
2950			delta = 0;
2951		}
2952
2953		mbuf = mbuf->m_next;
2954	}
2955
2956	if (mbuf == NULL) {
2957		return (0);
2958	}
2959
2960	/* Map rest of the mbufs */
2961	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->map_seg, mbuf,
2962	    segs, &nsegs, BUS_DMA_NOWAIT);
2963	if (unlikely((rc != 0) || (nsegs == 0))) {
2964		ena_trace(ENA_WARNING,
2965		    "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
2966		goto dma_error;
2967	}
2968
2969	for (i = 0; i < nsegs; i++) {
2970		ena_buf->len = segs[i].ds_len;
2971		ena_buf->paddr = segs[i].ds_addr;
2972		ena_buf++;
2973	}
2974	tx_info->num_of_bufs += nsegs;
2975	tx_info->seg_mapped = true;
2976
2977	return (0);
2978
2979dma_error:
2980	if (tx_info->head_mapped == true)
2981		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map_head);
2982single_dma_error:
2983	counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
2984	tx_info->mbuf = NULL;
2985	return (rc);
2986}
2987
2988static int
2989ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2990{
2991	struct ena_adapter *adapter;
2992	struct ena_tx_buffer *tx_info;
2993	struct ena_com_tx_ctx ena_tx_ctx;
2994	struct ena_com_dev *ena_dev;
2995	struct ena_com_io_sq* io_sq;
2996	void *push_hdr;
2997	uint16_t next_to_use;
2998	uint16_t req_id;
2999	uint16_t ena_qid;
3000	uint16_t header_len;
3001	int rc;
3002	int nb_hw_desc;
3003
3004	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
3005	adapter = tx_ring->que->adapter;
3006	ena_dev = adapter->ena_dev;
3007	io_sq = &ena_dev->io_sq_queues[ena_qid];
3008
3009	rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
3010	if (unlikely(rc != 0)) {
3011		ena_trace(ENA_WARNING,
3012		    "Failed to collapse mbuf! err: %d\n", rc);
3013		return (rc);
3014	}
3015
3016	ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
3017
3018	next_to_use = tx_ring->next_to_use;
3019	req_id = tx_ring->free_tx_ids[next_to_use];
3020	tx_info = &tx_ring->tx_buffer_info[req_id];
3021	tx_info->num_of_bufs = 0;
3022
3023	rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
3024	if (unlikely(rc != 0)) {
3025		ena_trace(ENA_WARNING, "Failed to map TX mbuf\n");
3026		return (rc);
3027	}
3028	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
3029	ena_tx_ctx.ena_bufs = tx_info->bufs;
3030	ena_tx_ctx.push_header = push_hdr;
3031	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
3032	ena_tx_ctx.req_id = req_id;
3033	ena_tx_ctx.header_len = header_len;
3034
3035	/* Set flags and meta data */
3036	ena_tx_csum(&ena_tx_ctx, *mbuf);
3037
3038	if (tx_ring->acum_pkts == DB_THRESHOLD ||
3039	    ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
3040		ena_trace(ENA_DBG | ENA_TXPTH,
3041		    "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
3042		    tx_ring->que->id);
3043		wmb();
3044		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3045		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
3046		tx_ring->acum_pkts = 0;
3047	}
3048
3049	/* Prepare the packet's descriptors and send them to device */
3050	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
3051	if (unlikely(rc != 0)) {
3052		if (likely(rc == ENA_COM_NO_MEM)) {
3053			ena_trace(ENA_DBG | ENA_TXPTH,
3054			    "tx ring[%d] if out of space\n", tx_ring->que->id);
3055		} else {
3056			device_printf(adapter->pdev,
3057			    "failed to prepare tx bufs\n");
3058		}
3059		counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
3060		goto dma_error;
3061	}
3062
3063	counter_enter();
3064	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
3065	counter_u64_add_protected(tx_ring->tx_stats.bytes,
3066	    (*mbuf)->m_pkthdr.len);
3067
3068	counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
3069	counter_u64_add_protected(adapter->hw_stats.tx_bytes,
3070	    (*mbuf)->m_pkthdr.len);
3071	counter_exit();
3072
3073	tx_info->tx_descs = nb_hw_desc;
3074	getbinuptime(&tx_info->timestamp);
3075	tx_info->print_once = true;
3076
3077	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
3078	    tx_ring->ring_size);
3079
3080	/* stop the queue when no more space available, the packet can have up
3081	 * to sgl_size + 2. one for the meta descriptor and one for header
3082	 * (if the header is larger than tx_max_header_size).
3083	 */
3084	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3085	    adapter->max_tx_sgl_size + 2))) {
3086		ena_trace(ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
3087		    tx_ring->que->id);
3088
3089		tx_ring->running = false;
3090		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
3091
3092		/* There is a rare condition where this function decides to
3093		 * stop the queue but meanwhile tx_cleanup() updates
3094		 * next_to_completion and terminates.
3095		 * The queue will remain stopped forever.
3096		 * To solve this issue this function performs mb(), checks
3097		 * the wakeup condition and wakes up the queue if needed.
3098		 */
3099		mb();
3100
3101		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3102		    ENA_TX_RESUME_THRESH)) {
3103			tx_ring->running = true;
3104			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
3105		}
3106	}
3107
3108	if (tx_info->head_mapped == true)
3109		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_head,
3110		    BUS_DMASYNC_PREWRITE);
3111	if (tx_info->seg_mapped == true)
3112		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_seg,
3113		    BUS_DMASYNC_PREWRITE);
3114
3115	return (0);
3116
3117dma_error:
3118	tx_info->mbuf = NULL;
3119	if (tx_info->seg_mapped == true) {
3120		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map_seg);
3121		tx_info->seg_mapped = false;
3122	}
3123	if (tx_info->head_mapped == true) {
3124		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map_head);
3125		tx_info->head_mapped = false;
3126	}
3127
3128	return (rc);
3129}
3130
3131static void
3132ena_start_xmit(struct ena_ring *tx_ring)
3133{
3134	struct mbuf *mbuf;
3135	struct ena_adapter *adapter = tx_ring->adapter;
3136	struct ena_com_io_sq* io_sq;
3137	int ena_qid;
3138	int ret = 0;
3139
3140	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
3141		return;
3142
3143	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
3144		return;
3145
3146	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
3147	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
3148
3149	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
3150		ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
3151		    " header csum flags %#jx\n",
3152		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
3153
3154		if (unlikely(!tx_ring->running)) {
3155			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
3156			break;
3157		}
3158
3159		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
3160			if (ret == ENA_COM_NO_MEM) {
3161				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
3162			} else if (ret == ENA_COM_NO_SPACE) {
3163				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
3164			} else {
3165				m_freem(mbuf);
3166				drbr_advance(adapter->ifp, tx_ring->br);
3167			}
3168
3169			break;
3170		}
3171
3172		drbr_advance(adapter->ifp, tx_ring->br);
3173
3174		if (unlikely((if_getdrvflags(adapter->ifp) &
3175		    IFF_DRV_RUNNING) == 0))
3176			return;
3177
3178		tx_ring->acum_pkts++;
3179
3180		BPF_MTAP(adapter->ifp, mbuf);
3181	}
3182
3183	if (likely(tx_ring->acum_pkts != 0)) {
3184		wmb();
3185		/* Trigger the dma engine */
3186		ena_com_write_sq_doorbell(io_sq);
3187		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
3188		tx_ring->acum_pkts = 0;
3189	}
3190
3191	if (unlikely(!tx_ring->running))
3192		taskqueue_enqueue(tx_ring->que->cleanup_tq,
3193		    &tx_ring->que->cleanup_task);
3194}
3195
3196static void
3197ena_deferred_mq_start(void *arg, int pending)
3198{
3199	struct ena_ring *tx_ring = (struct ena_ring *)arg;
3200	struct ifnet *ifp = tx_ring->adapter->ifp;
3201
3202	while (!drbr_empty(ifp, tx_ring->br) &&
3203	    tx_ring->running &&
3204	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
3205		ENA_RING_MTX_LOCK(tx_ring);
3206		ena_start_xmit(tx_ring);
3207		ENA_RING_MTX_UNLOCK(tx_ring);
3208	}
3209}
3210
3211static int
3212ena_mq_start(if_t ifp, struct mbuf *m)
3213{
3214	struct ena_adapter *adapter = ifp->if_softc;
3215	struct ena_ring *tx_ring;
3216	int ret, is_drbr_empty;
3217	uint32_t i;
3218
3219	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
3220		return (ENODEV);
3221
3222	/* Which queue to use */
3223	/*
3224	 * If everything is setup correctly, it should be the
3225	 * same bucket that the current CPU we're on is.
3226	 * It should improve performance.
3227	 */
3228	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
3229		i = m->m_pkthdr.flowid % adapter->num_queues;
3230	} else {
3231		i = curcpu % adapter->num_queues;
3232	}
3233	tx_ring = &adapter->tx_ring[i];
3234
3235	/* Check if drbr is empty before putting packet */
3236	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
3237	ret = drbr_enqueue(ifp, tx_ring->br, m);
3238	if (unlikely(ret != 0)) {
3239		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
3240		return (ret);
3241	}
3242
3243	if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
3244		ena_start_xmit(tx_ring);
3245		ENA_RING_MTX_UNLOCK(tx_ring);
3246	} else {
3247		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
3248	}
3249
3250	return (0);
3251}
3252
3253static void
3254ena_qflush(if_t ifp)
3255{
3256	struct ena_adapter *adapter = ifp->if_softc;
3257	struct ena_ring *tx_ring = adapter->tx_ring;
3258	int i;
3259
3260	for(i = 0; i < adapter->num_queues; ++i, ++tx_ring)
3261		if (!drbr_empty(ifp, tx_ring->br)) {
3262			ENA_RING_MTX_LOCK(tx_ring);
3263			drbr_flush(ifp, tx_ring->br);
3264			ENA_RING_MTX_UNLOCK(tx_ring);
3265		}
3266
3267	if_qflush(ifp);
3268}
3269
3270static int
3271ena_calc_io_queue_num(struct ena_adapter *adapter,
3272    struct ena_com_dev_get_features_ctx *get_feat_ctx)
3273{
3274	struct ena_com_dev *ena_dev = adapter->ena_dev;
3275	int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
3276
3277	/* Regular queues capabilities */
3278	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3279		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3280		    &get_feat_ctx->max_queue_ext.max_queue_ext;
3281		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
3282			max_queue_ext->max_rx_cq_num);
3283
3284		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3285		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3286	} else {
3287		struct ena_admin_queue_feature_desc *max_queues =
3288		    &get_feat_ctx->max_queues;
3289		io_tx_sq_num = max_queues->max_sq_num;
3290		io_tx_cq_num = max_queues->max_cq_num;
3291		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
3292	}
3293
3294	/* In case of LLQ use the llq fields for the tx SQ/CQ */
3295	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3296		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3297
3298	io_queue_num = min_t(int, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
3299	io_queue_num = min_t(int, io_queue_num, io_rx_num);
3300	io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
3301	io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
3302	/* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
3303	io_queue_num = min_t(int, io_queue_num,
3304	    pci_msix_count(adapter->pdev) - 1);
3305
3306	return (io_queue_num);
3307}
3308
3309static int
3310ena_enable_wc(struct resource *res)
3311{
3312#if defined(__i386) || defined(__amd64)
3313	vm_offset_t va;
3314	vm_size_t len;
3315	int rc;
3316
3317	va = (vm_offset_t)rman_get_virtual(res);
3318	len = rman_get_size(res);
3319	/* Enable write combining */
3320	rc = pmap_change_attr(va, len, PAT_WRITE_COMBINING);
3321	if (unlikely(rc != 0)) {
3322		ena_trace(ENA_ALERT, "pmap_change_attr failed, %d\n", rc);
3323		return (rc);
3324	}
3325
3326	return (0);
3327#endif
3328	return (EOPNOTSUPP);
3329}
3330
3331static int
3332ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
3333    struct ena_admin_feature_llq_desc *llq,
3334    struct ena_llq_configurations *llq_default_configurations)
3335{
3336	struct ena_adapter *adapter = device_get_softc(pdev);
3337	int rc, rid;
3338	uint32_t llq_feature_mask;
3339
3340	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3341	if (!(ena_dev->supported_features & llq_feature_mask)) {
3342		device_printf(pdev,
3343		    "LLQ is not supported. Fallback to host mode policy.\n");
3344		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3345		return (0);
3346	}
3347
3348	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3349	if (unlikely(rc != 0)) {
3350		device_printf(pdev, "Failed to configure the device mode. "
3351		    "Fallback to host mode policy.\n");
3352		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3353		return (0);
3354	}
3355
3356	/* Nothing to config, exit */
3357	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
3358		return (0);
3359
3360	/* Try to allocate resources for LLQ bar */
3361	rid = PCIR_BAR(ENA_MEM_BAR);
3362	adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3363	    &rid, RF_ACTIVE);
3364	if (unlikely(adapter->memory == NULL)) {
3365		device_printf(pdev, "unable to allocate LLQ bar resource. "
3366		    "Fallback to host mode policy.\n");
3367		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3368		return (0);
3369	}
3370
3371	/* Enable write combining for better LLQ performance */
3372	rc = ena_enable_wc(adapter->memory);
3373	if (unlikely(rc != 0)) {
3374		device_printf(pdev, "failed to enable write combining.\n");
3375		return (rc);
3376	}
3377
3378	/*
3379	 * Save virtual address of the device's memory region
3380	 * for the ena_com layer.
3381	 */
3382	ena_dev->mem_bar = rman_get_virtual(adapter->memory);
3383
3384	return (0);
3385}
3386
3387static inline
3388void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
3389{
3390	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3391	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3392	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3393	llq_config->llq_num_decs_before_header =
3394	    ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3395	llq_config->llq_ring_entry_size_value = 128;
3396}
3397
3398static int
3399ena_calc_queue_size(struct ena_adapter *adapter,
3400    struct ena_calc_queue_size_ctx *ctx)
3401{
3402	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
3403	struct ena_com_dev *ena_dev = ctx->ena_dev;
3404	uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
3405	uint32_t rx_queue_size = adapter->rx_ring_size;
3406
3407	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3408		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3409		    &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
3410		rx_queue_size = min_t(uint32_t, rx_queue_size,
3411		    max_queue_ext->max_rx_cq_depth);
3412		rx_queue_size = min_t(uint32_t, rx_queue_size,
3413		    max_queue_ext->max_rx_sq_depth);
3414		tx_queue_size = min_t(uint32_t, tx_queue_size,
3415		    max_queue_ext->max_tx_cq_depth);
3416
3417		if (ena_dev->tx_mem_queue_type ==
3418		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
3419			tx_queue_size = min_t(uint32_t, tx_queue_size,
3420			    llq->max_llq_depth);
3421		else
3422			tx_queue_size = min_t(uint32_t, tx_queue_size,
3423			    max_queue_ext->max_tx_sq_depth);
3424
3425		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3426		    max_queue_ext->max_per_packet_rx_descs);
3427		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3428		    max_queue_ext->max_per_packet_tx_descs);
3429	} else {
3430		struct ena_admin_queue_feature_desc *max_queues =
3431		    &ctx->get_feat_ctx->max_queues;
3432		rx_queue_size = min_t(uint32_t, rx_queue_size,
3433		    max_queues->max_cq_depth);
3434		rx_queue_size = min_t(uint32_t, rx_queue_size,
3435		    max_queues->max_sq_depth);
3436		tx_queue_size = min_t(uint32_t, tx_queue_size,
3437		    max_queues->max_cq_depth);
3438
3439		if (ena_dev->tx_mem_queue_type ==
3440		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
3441			tx_queue_size = min_t(uint32_t, tx_queue_size,
3442			    llq->max_llq_depth);
3443		else
3444			tx_queue_size = min_t(uint32_t, tx_queue_size,
3445			    max_queues->max_sq_depth);
3446
3447		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3448		    max_queues->max_packet_tx_descs);
3449		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3450		    max_queues->max_packet_rx_descs);
3451	}
3452
3453	/* round down to the nearest power of 2 */
3454	rx_queue_size = 1 << (fls(rx_queue_size) - 1);
3455	tx_queue_size = 1 << (fls(tx_queue_size) - 1);
3456
3457	if (unlikely(rx_queue_size == 0 || tx_queue_size == 0)) {
3458		device_printf(ctx->pdev, "Invalid queue size\n");
3459		return (EFAULT);
3460	}
3461
3462	ctx->rx_queue_size = rx_queue_size;
3463	ctx->tx_queue_size = tx_queue_size;
3464
3465	return (0);
3466}
3467
3468static int
3469ena_handle_updated_queues(struct ena_adapter *adapter,
3470    struct ena_com_dev_get_features_ctx *get_feat_ctx)
3471{
3472	struct ena_com_dev *ena_dev = adapter->ena_dev;
3473	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3474	device_t pdev = adapter->pdev;
3475	bool are_queues_changed = false;
3476	int io_queue_num, rc;
3477
3478	calc_queue_ctx.ena_dev = ena_dev;
3479	calc_queue_ctx.get_feat_ctx = get_feat_ctx;
3480	calc_queue_ctx.pdev = pdev;
3481
3482	io_queue_num = ena_calc_io_queue_num(adapter, get_feat_ctx);
3483	rc = ena_calc_queue_size(adapter, &calc_queue_ctx);
3484	if (unlikely(rc != 0 || io_queue_num <= 0))
3485		return EFAULT;
3486
3487	if (adapter->tx_ring->buf_ring_size != adapter->buf_ring_size)
3488		are_queues_changed = true;
3489
3490	if (unlikely(adapter->tx_ring_size > calc_queue_ctx.tx_queue_size ||
3491	    adapter->rx_ring_size > calc_queue_ctx.rx_queue_size)) {
3492		device_printf(pdev,
3493		    "Not enough resources to allocate requested queue sizes "
3494		    "(TX,RX)=(%d,%d), falling back to queue sizes "
3495		    "(TX,RX)=(%d,%d)\n",
3496		    adapter->tx_ring_size,
3497		    adapter->rx_ring_size,
3498		    calc_queue_ctx.tx_queue_size,
3499		    calc_queue_ctx.rx_queue_size);
3500		adapter->tx_ring_size = calc_queue_ctx.tx_queue_size;
3501		adapter->rx_ring_size = calc_queue_ctx.rx_queue_size;
3502		adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3503		adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3504		are_queues_changed = true;
3505	}
3506
3507	if (unlikely(adapter->num_queues > io_queue_num)) {
3508		device_printf(pdev,
3509		    "Not enough resources to allocate %d queues, "
3510		    "falling back to %d queues\n",
3511		    adapter->num_queues, io_queue_num);
3512		adapter->num_queues = io_queue_num;
3513		if (ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)) {
3514			ena_com_rss_destroy(ena_dev);
3515			rc = ena_rss_init_default(adapter);
3516			if (unlikely(rc != 0) && (rc != EOPNOTSUPP)) {
3517				device_printf(pdev, "Cannot init RSS rc: %d\n",
3518				    rc);
3519				return (rc);
3520			}
3521		}
3522		are_queues_changed = true;
3523	}
3524
3525	if (unlikely(are_queues_changed)) {
3526		ena_free_all_io_rings_resources(adapter);
3527		ena_init_io_rings(adapter);
3528	}
3529
3530	return (0);
3531}
3532
3533static int
3534ena_rss_init_default(struct ena_adapter *adapter)
3535{
3536	struct ena_com_dev *ena_dev = adapter->ena_dev;
3537	device_t dev = adapter->pdev;
3538	int qid, rc, i;
3539
3540	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3541	if (unlikely(rc != 0)) {
3542		device_printf(dev, "Cannot init indirect table\n");
3543		return (rc);
3544	}
3545
3546	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3547		qid = i % adapter->num_queues;
3548		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3549		    ENA_IO_RXQ_IDX(qid));
3550		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3551			device_printf(dev, "Cannot fill indirect table\n");
3552			goto err_rss_destroy;
3553		}
3554	}
3555
3556	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3557	    ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3558	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3559		device_printf(dev, "Cannot fill hash function\n");
3560		goto err_rss_destroy;
3561	}
3562
3563	rc = ena_com_set_default_hash_ctrl(ena_dev);
3564	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3565		device_printf(dev, "Cannot fill hash control\n");
3566		goto err_rss_destroy;
3567	}
3568
3569	return (0);
3570
3571err_rss_destroy:
3572	ena_com_rss_destroy(ena_dev);
3573	return (rc);
3574}
3575
3576static void
3577ena_rss_init_default_deferred(void *arg)
3578{
3579	struct ena_adapter *adapter;
3580	devclass_t dc;
3581	int max;
3582	int rc;
3583
3584	dc = devclass_find("ena");
3585	if (unlikely(dc == NULL)) {
3586		ena_trace(ENA_ALERT, "No devclass ena\n");
3587		return;
3588	}
3589
3590	max = devclass_get_maxunit(dc);
3591	while (max-- >= 0) {
3592		adapter = devclass_get_softc(dc, max);
3593		if (adapter != NULL) {
3594			rc = ena_rss_init_default(adapter);
3595			ENA_FLAG_SET_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
3596			if (unlikely(rc != 0)) {
3597				device_printf(adapter->pdev,
3598				    "WARNING: RSS was not properly initialized,"
3599				    " it will affect bandwidth\n");
3600				ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
3601			}
3602		}
3603	}
3604}
3605SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
3606
3607static void
3608ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
3609{
3610	struct ena_admin_host_info *host_info;
3611	uintptr_t rid;
3612	int rc;
3613
3614	/* Allocate only the host info */
3615	rc = ena_com_allocate_host_info(ena_dev);
3616	if (unlikely(rc != 0)) {
3617		ena_trace(ENA_ALERT, "Cannot allocate host info\n");
3618		return;
3619	}
3620
3621	host_info = ena_dev->host_attr.host_info;
3622
3623	if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
3624		host_info->bdf = rid;
3625	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
3626	host_info->kernel_ver = osreldate;
3627
3628	sprintf(host_info->kernel_ver_str, "%d", osreldate);
3629	host_info->os_dist = 0;
3630	strncpy(host_info->os_dist_str, osrelease,
3631	    sizeof(host_info->os_dist_str) - 1);
3632
3633	host_info->driver_version =
3634		(DRV_MODULE_VER_MAJOR) |
3635		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3636		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
3637	host_info->num_cpus = mp_ncpus;
3638
3639	rc = ena_com_set_host_attributes(ena_dev);
3640	if (unlikely(rc != 0)) {
3641		if (rc == EOPNOTSUPP)
3642			ena_trace(ENA_WARNING, "Cannot set host attributes\n");
3643		else
3644			ena_trace(ENA_ALERT, "Cannot set host attributes\n");
3645
3646		goto err;
3647	}
3648
3649	return;
3650
3651err:
3652	ena_com_delete_host_info(ena_dev);
3653}
3654
3655static int
3656ena_device_init(struct ena_adapter *adapter, device_t pdev,
3657    struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
3658{
3659	struct ena_com_dev* ena_dev = adapter->ena_dev;
3660	bool readless_supported;
3661	uint32_t aenq_groups;
3662	int dma_width;
3663	int rc;
3664
3665	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3666	if (unlikely(rc != 0)) {
3667		device_printf(pdev, "failed to init mmio read less\n");
3668		return (rc);
3669	}
3670
3671	/*
3672	 * The PCIe configuration space revision id indicate if mmio reg
3673	 * read is disabled
3674	 */
3675	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
3676	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3677
3678	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3679	if (unlikely(rc != 0)) {
3680		device_printf(pdev, "Can not reset device\n");
3681		goto err_mmio_read_less;
3682	}
3683
3684	rc = ena_com_validate_version(ena_dev);
3685	if (unlikely(rc != 0)) {
3686		device_printf(pdev, "device version is too low\n");
3687		goto err_mmio_read_less;
3688	}
3689
3690	dma_width = ena_com_get_dma_width(ena_dev);
3691	if (unlikely(dma_width < 0)) {
3692		device_printf(pdev, "Invalid dma width value %d", dma_width);
3693		rc = dma_width;
3694		goto err_mmio_read_less;
3695	}
3696	adapter->dma_width = dma_width;
3697
3698	/* ENA admin level init */
3699	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3700	if (unlikely(rc != 0)) {
3701		device_printf(pdev,
3702		    "Can not initialize ena admin queue with device\n");
3703		goto err_mmio_read_less;
3704	}
3705
3706	/*
3707	 * To enable the msix interrupts the driver needs to know the number
3708	 * of queues. So the driver uses polling mode to retrieve this
3709	 * information
3710	 */
3711	ena_com_set_admin_polling_mode(ena_dev, true);
3712
3713	ena_config_host_info(ena_dev, pdev);
3714
3715	/* Get Device Attributes */
3716	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3717	if (unlikely(rc != 0)) {
3718		device_printf(pdev,
3719		    "Cannot get attribute for ena device rc: %d\n", rc);
3720		goto err_admin_init;
3721	}
3722
3723	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3724	    BIT(ENA_ADMIN_FATAL_ERROR) |
3725	    BIT(ENA_ADMIN_WARNING) |
3726	    BIT(ENA_ADMIN_NOTIFICATION) |
3727	    BIT(ENA_ADMIN_KEEP_ALIVE);
3728
3729	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3730	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3731	if (unlikely(rc != 0)) {
3732		device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
3733		goto err_admin_init;
3734	}
3735
3736	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3737
3738	return (0);
3739
3740err_admin_init:
3741	ena_com_delete_host_info(ena_dev);
3742	ena_com_admin_destroy(ena_dev);
3743err_mmio_read_less:
3744	ena_com_mmio_reg_read_request_destroy(ena_dev);
3745
3746	return (rc);
3747}
3748
3749static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
3750    int io_vectors)
3751{
3752	struct ena_com_dev *ena_dev = adapter->ena_dev;
3753	int rc;
3754
3755	rc = ena_enable_msix(adapter);
3756	if (unlikely(rc != 0)) {
3757		device_printf(adapter->pdev, "Error with MSI-X enablement\n");
3758		return (rc);
3759	}
3760
3761	ena_setup_mgmnt_intr(adapter);
3762
3763	rc = ena_request_mgmnt_irq(adapter);
3764	if (unlikely(rc != 0)) {
3765		device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
3766		goto err_disable_msix;
3767	}
3768
3769	ena_com_set_admin_polling_mode(ena_dev, false);
3770
3771	ena_com_admin_aenq_enable(ena_dev);
3772
3773	return (0);
3774
3775err_disable_msix:
3776	ena_disable_msix(adapter);
3777
3778	return (rc);
3779}
3780
3781/* Function called on ENA_ADMIN_KEEP_ALIVE event */
3782static void ena_keep_alive_wd(void *adapter_data,
3783    struct ena_admin_aenq_entry *aenq_e)
3784{
3785	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3786	struct ena_admin_aenq_keep_alive_desc *desc;
3787	sbintime_t stime;
3788	uint64_t rx_drops;
3789
3790	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3791
3792	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
3793	counter_u64_zero(adapter->hw_stats.rx_drops);
3794	counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
3795
3796	stime = getsbinuptime();
3797	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
3798}
3799
3800/* Check for keep alive expiration */
3801static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3802{
3803	sbintime_t timestamp, time;
3804
3805	if (adapter->wd_active == 0)
3806		return;
3807
3808	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3809		return;
3810
3811	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
3812	time = getsbinuptime() - timestamp;
3813	if (unlikely(time > adapter->keep_alive_timeout)) {
3814		device_printf(adapter->pdev,
3815		    "Keep alive watchdog timeout.\n");
3816		counter_u64_add(adapter->dev_stats.wd_expired, 1);
3817		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3818			adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3819			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3820		}
3821	}
3822}
3823
3824/* Check if admin queue is enabled */
3825static void check_for_admin_com_state(struct ena_adapter *adapter)
3826{
3827	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
3828	    false)) {
3829		device_printf(adapter->pdev,
3830		    "ENA admin queue is not in running state!\n");
3831		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3832		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3833			adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3834			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3835		}
3836	}
3837}
3838
3839static int
3840check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3841    struct ena_ring *rx_ring)
3842{
3843	if (likely(rx_ring->first_interrupt))
3844		return (0);
3845
3846	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3847		return (0);
3848
3849	rx_ring->no_interrupt_event_cnt++;
3850
3851	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3852		device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
3853		    "Queue = %d. Reset the device\n", rx_ring->qid);
3854		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3855			adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3856			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3857		}
3858		return (EIO);
3859	}
3860
3861	return (0);
3862}
3863
3864static int
3865check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3866    struct ena_ring *tx_ring)
3867{
3868	struct bintime curtime, time;
3869	struct ena_tx_buffer *tx_buf;
3870	sbintime_t time_offset;
3871	uint32_t missed_tx = 0;
3872	int i, rc = 0;
3873
3874	getbinuptime(&curtime);
3875
3876	for (i = 0; i < tx_ring->ring_size; i++) {
3877		tx_buf = &tx_ring->tx_buffer_info[i];
3878
3879		if (bintime_isset(&tx_buf->timestamp) == 0)
3880			continue;
3881
3882		time = curtime;
3883		bintime_sub(&time, &tx_buf->timestamp);
3884		time_offset = bttosbt(time);
3885
3886		if (unlikely(!tx_ring->first_interrupt &&
3887		    time_offset > 2 * adapter->missing_tx_timeout)) {
3888			/*
3889			 * If after graceful period interrupt is still not
3890			 * received, we schedule a reset.
3891			 */
3892			device_printf(adapter->pdev,
3893			    "Potential MSIX issue on Tx side Queue = %d. "
3894			    "Reset the device\n", tx_ring->qid);
3895			if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET,
3896			    adapter))) {
3897				adapter->reset_reason =
3898				    ENA_REGS_RESET_MISS_INTERRUPT;
3899				ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET,
3900				    adapter);
3901			}
3902			return (EIO);
3903		}
3904
3905		/* Check again if packet is still waiting */
3906		if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3907
3908			if (!tx_buf->print_once)
3909				ena_trace(ENA_WARNING, "Found a Tx that wasn't "
3910				    "completed on time, qid %d, index %d.\n",
3911				    tx_ring->qid, i);
3912
3913			tx_buf->print_once = true;
3914			missed_tx++;
3915		}
3916	}
3917
3918	if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3919		device_printf(adapter->pdev,
3920		    "The number of lost tx completion is above the threshold "
3921		    "(%d > %d). Reset the device\n",
3922		    missed_tx, adapter->missing_tx_threshold);
3923		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3924			adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
3925			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3926		}
3927		rc = EIO;
3928	}
3929
3930	counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3931
3932	return (rc);
3933}
3934
3935/*
3936 * Check for TX which were not completed on time.
3937 * Timeout is defined by "missing_tx_timeout".
3938 * Reset will be performed if number of incompleted
3939 * transactions exceeds "missing_tx_threshold".
3940 */
3941static void
3942check_for_missing_completions(struct ena_adapter *adapter)
3943{
3944	struct ena_ring *tx_ring;
3945	struct ena_ring *rx_ring;
3946	int i, budget, rc;
3947
3948	/* Make sure the driver doesn't turn the device in other process */
3949	rmb();
3950
3951	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3952		return;
3953
3954	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3955		return;
3956
3957	if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3958		return;
3959
3960	budget = adapter->missing_tx_max_queues;
3961
3962	for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) {
3963		tx_ring = &adapter->tx_ring[i];
3964		rx_ring = &adapter->rx_ring[i];
3965
3966		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3967		if (unlikely(rc != 0))
3968			return;
3969
3970		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3971		if (unlikely(rc != 0))
3972			return;
3973
3974		budget--;
3975		if (budget == 0) {
3976			i++;
3977			break;
3978		}
3979	}
3980
3981	adapter->next_monitored_tx_qid = i % adapter->num_queues;
3982}
3983
3984/* trigger rx cleanup after 2 consecutive detections */
3985#define EMPTY_RX_REFILL 2
3986/* For the rare case where the device runs out of Rx descriptors and the
3987 * msix handler failed to refill new Rx descriptors (due to a lack of memory
3988 * for example).
3989 * This case will lead to a deadlock:
3990 * The device won't send interrupts since all the new Rx packets will be dropped
3991 * The msix handler won't allocate new Rx descriptors so the device won't be
3992 * able to send new packets.
3993 *
3994 * When such a situation is detected - execute rx cleanup task in another thread
3995 */
3996static void
3997check_for_empty_rx_ring(struct ena_adapter *adapter)
3998{
3999	struct ena_ring *rx_ring;
4000	int i, refill_required;
4001
4002	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
4003		return;
4004
4005	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
4006		return;
4007
4008	for (i = 0; i < adapter->num_queues; i++) {
4009		rx_ring = &adapter->rx_ring[i];
4010
4011		refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
4012		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
4013			rx_ring->empty_rx_queue++;
4014
4015			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL)	{
4016				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
4017				    1);
4018
4019				device_printf(adapter->pdev,
4020				    "trigger refill for ring %d\n", i);
4021
4022				taskqueue_enqueue(rx_ring->que->cleanup_tq,
4023				    &rx_ring->que->cleanup_task);
4024				rx_ring->empty_rx_queue = 0;
4025			}
4026		} else {
4027			rx_ring->empty_rx_queue = 0;
4028		}
4029	}
4030}
4031
4032static void ena_update_hints(struct ena_adapter *adapter,
4033			     struct ena_admin_ena_hw_hints *hints)
4034{
4035	struct ena_com_dev *ena_dev = adapter->ena_dev;
4036
4037	if (hints->admin_completion_tx_timeout)
4038		ena_dev->admin_queue.completion_timeout =
4039		    hints->admin_completion_tx_timeout * 1000;
4040
4041	if (hints->mmio_read_timeout)
4042		/* convert to usec */
4043		ena_dev->mmio_read.reg_read_to =
4044		    hints->mmio_read_timeout * 1000;
4045
4046	if (hints->missed_tx_completion_count_threshold_to_reset)
4047		adapter->missing_tx_threshold =
4048		    hints->missed_tx_completion_count_threshold_to_reset;
4049
4050	if (hints->missing_tx_completion_timeout) {
4051		if (hints->missing_tx_completion_timeout ==
4052		     ENA_HW_HINTS_NO_TIMEOUT)
4053			adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
4054		else
4055			adapter->missing_tx_timeout =
4056			    SBT_1MS * hints->missing_tx_completion_timeout;
4057	}
4058
4059	if (hints->driver_watchdog_timeout) {
4060		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
4061			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
4062		else
4063			adapter->keep_alive_timeout =
4064			    SBT_1MS * hints->driver_watchdog_timeout;
4065	}
4066}
4067
4068static void
4069ena_timer_service(void *data)
4070{
4071	struct ena_adapter *adapter = (struct ena_adapter *)data;
4072	struct ena_admin_host_info *host_info =
4073	    adapter->ena_dev->host_attr.host_info;
4074
4075	check_for_missing_keep_alive(adapter);
4076
4077	check_for_admin_com_state(adapter);
4078
4079	check_for_missing_completions(adapter);
4080
4081	check_for_empty_rx_ring(adapter);
4082
4083	if (host_info != NULL)
4084		ena_update_host_info(host_info, adapter->ifp);
4085
4086	if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
4087		device_printf(adapter->pdev, "Trigger reset is on\n");
4088		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
4089		return;
4090	}
4091
4092	/*
4093	 * Schedule another timeout one second from now.
4094	 */
4095	callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
4096}
4097
4098static void
4099ena_destroy_device(struct ena_adapter *adapter, bool graceful)
4100{
4101	if_t ifp = adapter->ifp;
4102	struct ena_com_dev *ena_dev = adapter->ena_dev;
4103	bool dev_up;
4104
4105	if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
4106		return;
4107
4108	if_link_state_change(ifp, LINK_STATE_DOWN);
4109
4110	callout_drain(&adapter->timer_service);
4111
4112	dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
4113	if (dev_up)
4114		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
4115	else
4116		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
4117
4118	if (!graceful)
4119		ena_com_set_admin_running_state(ena_dev, false);
4120
4121	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
4122		ena_down(adapter);
4123
4124	/*
4125	 * Stop the device from sending AENQ events (if the device was up, and
4126	 * the trigger reset was on, ena_down already performs device reset)
4127	 */
4128	if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
4129		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
4130
4131	ena_free_mgmnt_irq(adapter);
4132
4133	ena_disable_msix(adapter);
4134
4135	ena_com_abort_admin_commands(ena_dev);
4136
4137	ena_com_wait_for_abort_completion(ena_dev);
4138
4139	ena_com_admin_destroy(ena_dev);
4140
4141	ena_com_mmio_reg_read_request_destroy(ena_dev);
4142
4143	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4144
4145	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
4146	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4147}
4148
4149static int
4150ena_device_validate_params(struct ena_adapter *adapter,
4151    struct ena_com_dev_get_features_ctx *get_feat_ctx)
4152{
4153
4154	if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
4155	    ETHER_ADDR_LEN) != 0) {
4156		device_printf(adapter->pdev,
4157		    "Error, mac address are different\n");
4158		return (EINVAL);
4159	}
4160
4161	if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
4162		device_printf(adapter->pdev,
4163		    "Error, device max mtu is smaller than ifp MTU\n");
4164		return (EINVAL);
4165	}
4166
4167	return 0;
4168}
4169
4170static int
4171ena_restore_device(struct ena_adapter *adapter)
4172{
4173	struct ena_com_dev_get_features_ctx get_feat_ctx;
4174	struct ena_com_dev *ena_dev = adapter->ena_dev;
4175	if_t ifp = adapter->ifp;
4176	device_t dev = adapter->pdev;
4177	int wd_active;
4178	int rc;
4179
4180	ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
4181
4182	rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
4183	if (rc != 0) {
4184		device_printf(dev, "Cannot initialize device\n");
4185		goto err;
4186	}
4187	/*
4188	 * Only enable WD if it was enabled before reset, so it won't override
4189	 * value set by the user by the sysctl.
4190	 */
4191	if (adapter->wd_active != 0)
4192		adapter->wd_active = wd_active;
4193
4194	rc = ena_device_validate_params(adapter, &get_feat_ctx);
4195	if (rc != 0) {
4196		device_printf(dev, "Validation of device parameters failed\n");
4197		goto err_device_destroy;
4198	}
4199
4200	rc = ena_handle_updated_queues(adapter, &get_feat_ctx);
4201	if (rc != 0)
4202		goto err_device_destroy;
4203
4204	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
4205	/* Make sure we don't have a race with AENQ Links state handler */
4206	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
4207		if_link_state_change(ifp, LINK_STATE_UP);
4208
4209	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
4210	    adapter->num_queues);
4211	if (rc != 0) {
4212		device_printf(dev, "Enable MSI-X failed\n");
4213		goto err_device_destroy;
4214	}
4215
4216	/* If the interface was up before the reset bring it up */
4217	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
4218		rc = ena_up(adapter);
4219		if (rc != 0) {
4220			device_printf(dev, "Failed to create I/O queues\n");
4221			goto err_disable_msix;
4222		}
4223	}
4224
4225	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4226	callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
4227	    ena_timer_service, (void *)adapter, 0);
4228
4229	device_printf(dev,
4230	    "Device reset completed successfully, Driver info: %s\n", ena_version);
4231
4232	return (rc);
4233
4234err_disable_msix:
4235	ena_free_mgmnt_irq(adapter);
4236	ena_disable_msix(adapter);
4237err_device_destroy:
4238	ena_com_abort_admin_commands(ena_dev);
4239	ena_com_wait_for_abort_completion(ena_dev);
4240	ena_com_admin_destroy(ena_dev);
4241	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
4242	ena_com_mmio_reg_read_request_destroy(ena_dev);
4243err:
4244	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4245	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
4246	device_printf(dev, "Reset attempt failed. Can not reset the device\n");
4247
4248	return (rc);
4249}
4250
4251static void
4252ena_reset_task(void *arg, int pending)
4253{
4254	struct ena_adapter *adapter = (struct ena_adapter *)arg;
4255
4256	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
4257		device_printf(adapter->pdev,
4258		    "device reset scheduled but trigger_reset is off\n");
4259		return;
4260	}
4261
4262	sx_xlock(&adapter->ioctl_sx);
4263	ena_destroy_device(adapter, false);
4264	ena_restore_device(adapter);
4265	sx_unlock(&adapter->ioctl_sx);
4266}
4267
4268/**
4269 * ena_attach - Device Initialization Routine
4270 * @pdev: device information struct
4271 *
4272 * Returns 0 on success, otherwise on failure.
4273 *
4274 * ena_attach initializes an adapter identified by a device structure.
4275 * The OS initialization, configuring of the adapter private structure,
4276 * and a hardware reset occur.
4277 **/
4278static int
4279ena_attach(device_t pdev)
4280{
4281	struct ena_com_dev_get_features_ctx get_feat_ctx;
4282	struct ena_llq_configurations llq_config;
4283	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
4284	static int version_printed;
4285	struct ena_adapter *adapter;
4286	struct ena_com_dev *ena_dev = NULL;
4287	const char *queue_type_str;
4288	int io_queue_num;
4289	int rid, rc;
4290
4291	adapter = device_get_softc(pdev);
4292	adapter->pdev = pdev;
4293
4294	mtx_init(&adapter->global_mtx, "ENA global mtx", NULL, MTX_DEF);
4295	sx_init(&adapter->ioctl_sx, "ENA ioctl sx");
4296
4297	/* Set up the timer service */
4298	callout_init_mtx(&adapter->timer_service, &adapter->global_mtx, 0);
4299	adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
4300	adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
4301	adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
4302	adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
4303
4304	if (version_printed++ == 0)
4305		device_printf(pdev, "%s\n", ena_version);
4306
4307	/* Allocate memory for ena_dev structure */
4308	ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
4309	    M_WAITOK | M_ZERO);
4310
4311	adapter->ena_dev = ena_dev;
4312	ena_dev->dmadev = pdev;
4313
4314	rid = PCIR_BAR(ENA_REG_BAR);
4315	adapter->memory = NULL;
4316	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
4317	    &rid, RF_ACTIVE);
4318	if (unlikely(adapter->registers == NULL)) {
4319		device_printf(pdev,
4320		    "unable to allocate bus resource: registers!\n");
4321		rc = ENOMEM;
4322		goto err_dev_free;
4323	}
4324
4325	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
4326	    M_WAITOK | M_ZERO);
4327
4328	/* Store register resources */
4329	((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
4330	    rman_get_bustag(adapter->registers);
4331	((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
4332	    rman_get_bushandle(adapter->registers);
4333
4334	if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
4335		device_printf(pdev, "failed to pmap registers bar\n");
4336		rc = ENXIO;
4337		goto err_bus_free;
4338	}
4339
4340	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
4341
4342	/* Initially clear all the flags */
4343	ENA_FLAG_ZERO(adapter);
4344
4345	/* Device initialization */
4346	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
4347	if (unlikely(rc != 0)) {
4348		device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
4349		rc = ENXIO;
4350		goto err_bus_free;
4351	}
4352
4353	set_default_llq_configurations(&llq_config);
4354
4355#if defined(__arm__) || defined(__aarch64__)
4356	/*
4357	 * Force LLQ disable, as the driver is not supporting WC enablement
4358	 * on the ARM architecture. Using LLQ without WC would affect
4359	 * performance in a negative way.
4360	 */
4361	ena_dev->supported_features &= ~(1 << ENA_ADMIN_LLQ);
4362#endif
4363	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
4364	     &llq_config);
4365	if (unlikely(rc != 0)) {
4366		device_printf(pdev, "failed to set placement policy\n");
4367		goto err_com_free;
4368	}
4369
4370	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
4371		queue_type_str = "Regular";
4372	else
4373		queue_type_str = "Low Latency";
4374	device_printf(pdev, "Placement policy: %s\n", queue_type_str);
4375
4376	adapter->keep_alive_timestamp = getsbinuptime();
4377
4378	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
4379
4380	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
4381	    ETHER_ADDR_LEN);
4382
4383	calc_queue_ctx.ena_dev = ena_dev;
4384	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
4385	calc_queue_ctx.pdev = pdev;
4386
4387	/* calculate IO queue number to create */
4388	io_queue_num = ena_calc_io_queue_num(adapter, &get_feat_ctx);
4389
4390	ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n",
4391	    io_queue_num);
4392	adapter->num_queues = io_queue_num;
4393
4394	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
4395	// Set the requested Rx ring size
4396	adapter->rx_ring_size = ENA_DEFAULT_RING_SIZE;
4397	/* calculatre ring sizes */
4398	rc = ena_calc_queue_size(adapter, &calc_queue_ctx);
4399	if (unlikely((rc != 0) || (io_queue_num <= 0))) {
4400		rc = EFAULT;
4401		goto err_com_free;
4402	}
4403
4404	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4405
4406	adapter->tx_ring_size = calc_queue_ctx.tx_queue_size;
4407	adapter->rx_ring_size = calc_queue_ctx.rx_queue_size;
4408
4409	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
4410	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
4411
4412	adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
4413
4414	/* set up dma tags for rx and tx buffers */
4415	rc = ena_setup_tx_dma_tag(adapter);
4416	if (unlikely(rc != 0)) {
4417		device_printf(pdev, "Failed to create TX DMA tag\n");
4418		goto err_com_free;
4419	}
4420
4421	rc = ena_setup_rx_dma_tag(adapter);
4422	if (unlikely(rc != 0)) {
4423		device_printf(pdev, "Failed to create RX DMA tag\n");
4424		goto err_tx_tag_free;
4425	}
4426
4427	/* initialize rings basic information */
4428	device_printf(pdev,
4429	    "Creating %d io queues. Rx queue size: %d, Tx queue size: %d\n",
4430	    io_queue_num,
4431	    calc_queue_ctx.rx_queue_size,
4432	    calc_queue_ctx.tx_queue_size);
4433	ena_init_io_rings(adapter);
4434
4435	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
4436	if (unlikely(rc != 0)) {
4437		device_printf(pdev,
4438		    "Failed to enable and set the admin interrupts\n");
4439		goto err_io_free;
4440	}
4441
4442	/* setup network interface */
4443	rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
4444	if (unlikely(rc != 0)) {
4445		device_printf(pdev, "Error with network interface setup\n");
4446		goto err_msix_free;
4447	}
4448
4449	/* Initialize reset task queue */
4450	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
4451	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
4452	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
4453	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
4454	    "%s rstq", device_get_nameunit(adapter->pdev));
4455
4456	/* Initialize statistics */
4457	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
4458	    sizeof(struct ena_stats_dev));
4459	ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
4460	    sizeof(struct ena_hw_stats));
4461	ena_sysctl_add_nodes(adapter);
4462
4463	/* Tell the stack that the interface is not active */
4464	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
4465	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4466
4467	return (0);
4468
4469err_msix_free:
4470	ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
4471	ena_free_mgmnt_irq(adapter);
4472	ena_disable_msix(adapter);
4473err_io_free:
4474	ena_free_all_io_rings_resources(adapter);
4475	ena_free_rx_dma_tag(adapter);
4476err_tx_tag_free:
4477	ena_free_tx_dma_tag(adapter);
4478err_com_free:
4479	ena_com_admin_destroy(ena_dev);
4480	ena_com_delete_host_info(ena_dev);
4481	ena_com_mmio_reg_read_request_destroy(ena_dev);
4482err_bus_free:
4483	free(ena_dev->bus, M_DEVBUF);
4484	ena_free_pci_resources(adapter);
4485err_dev_free:
4486	free(ena_dev, M_DEVBUF);
4487
4488	return (rc);
4489}
4490
4491/**
4492 * ena_detach - Device Removal Routine
4493 * @pdev: device information struct
4494 *
4495 * ena_detach is called by the device subsystem to alert the driver
4496 * that it should release a PCI device.
4497 **/
4498static int
4499ena_detach(device_t pdev)
4500{
4501	struct ena_adapter *adapter = device_get_softc(pdev);
4502	struct ena_com_dev *ena_dev = adapter->ena_dev;
4503	int rc;
4504
4505	/* Make sure VLANS are not using driver */
4506	if (adapter->ifp->if_vlantrunk != NULL) {
4507		device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
4508		return (EBUSY);
4509	}
4510
4511	ether_ifdetach(adapter->ifp);
4512
4513	/* Free reset task and callout */
4514	callout_drain(&adapter->timer_service);
4515	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
4516		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
4517	taskqueue_free(adapter->reset_tq);
4518
4519	sx_xlock(&adapter->ioctl_sx);
4520	ena_down(adapter);
4521	ena_destroy_device(adapter, true);
4522	sx_unlock(&adapter->ioctl_sx);
4523
4524	ena_free_all_io_rings_resources(adapter);
4525
4526	ena_free_counters((counter_u64_t *)&adapter->hw_stats,
4527	    sizeof(struct ena_hw_stats));
4528	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
4529	    sizeof(struct ena_stats_dev));
4530
4531	rc = ena_free_rx_dma_tag(adapter);
4532	if (unlikely(rc != 0))
4533		device_printf(adapter->pdev,
4534		    "Unmapped RX DMA tag associations\n");
4535
4536	rc = ena_free_tx_dma_tag(adapter);
4537	if (unlikely(rc != 0))
4538		device_printf(adapter->pdev,
4539		    "Unmapped TX DMA tag associations\n");
4540
4541	ena_free_irqs(adapter);
4542
4543	ena_free_pci_resources(adapter);
4544
4545	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
4546		ena_com_rss_destroy(ena_dev);
4547
4548	ena_com_delete_host_info(ena_dev);
4549
4550	mtx_destroy(&adapter->global_mtx);
4551	sx_destroy(&adapter->ioctl_sx);
4552
4553	if_free(adapter->ifp);
4554
4555	if (ena_dev->bus != NULL)
4556		free(ena_dev->bus, M_DEVBUF);
4557
4558	if (ena_dev != NULL)
4559		free(ena_dev, M_DEVBUF);
4560
4561	return (bus_generic_detach(pdev));
4562}
4563
4564/******************************************************************************
4565 ******************************** AENQ Handlers *******************************
4566 *****************************************************************************/
4567/**
4568 * ena_update_on_link_change:
4569 * Notify the network interface about the change in link status
4570 **/
4571static void
4572ena_update_on_link_change(void *adapter_data,
4573    struct ena_admin_aenq_entry *aenq_e)
4574{
4575	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4576	struct ena_admin_aenq_link_change_desc *aenq_desc;
4577	int status;
4578	if_t ifp;
4579
4580	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
4581	ifp = adapter->ifp;
4582	status = aenq_desc->flags &
4583	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4584
4585	if (status != 0) {
4586		device_printf(adapter->pdev, "link is UP\n");
4587		ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
4588		if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
4589			if_link_state_change(ifp, LINK_STATE_UP);
4590	} else {
4591		device_printf(adapter->pdev, "link is DOWN\n");
4592		if_link_state_change(ifp, LINK_STATE_DOWN);
4593		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
4594	}
4595}
4596
4597static void ena_notification(void *adapter_data,
4598    struct ena_admin_aenq_entry *aenq_e)
4599{
4600	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4601	struct ena_admin_ena_hw_hints *hints;
4602
4603	ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4604	    "Invalid group(%x) expected %x\n",	aenq_e->aenq_common_desc.group,
4605	    ENA_ADMIN_NOTIFICATION);
4606
4607	switch (aenq_e->aenq_common_desc.syndrom) {
4608	case ENA_ADMIN_UPDATE_HINTS:
4609		hints =
4610		    (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
4611		ena_update_hints(adapter, hints);
4612		break;
4613	default:
4614		device_printf(adapter->pdev,
4615		    "Invalid aenq notification link state %d\n",
4616		    aenq_e->aenq_common_desc.syndrom);
4617	}
4618}
4619
4620/**
4621 * This handler will called for unknown event group or unimplemented handlers
4622 **/
4623static void
4624unimplemented_aenq_handler(void *adapter_data,
4625    struct ena_admin_aenq_entry *aenq_e)
4626{
4627	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4628
4629	device_printf(adapter->pdev,
4630	    "Unknown event was received or event with unimplemented handler\n");
4631}
4632
4633static struct ena_aenq_handlers aenq_handlers = {
4634    .handlers = {
4635	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4636	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
4637	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4638    },
4639    .unimplemented_handler = unimplemented_aenq_handler
4640};
4641
4642/*********************************************************************
4643 *  FreeBSD Device Interface Entry Points
4644 *********************************************************************/
4645
4646static device_method_t ena_methods[] = {
4647    /* Device interface */
4648    DEVMETHOD(device_probe, ena_probe),
4649    DEVMETHOD(device_attach, ena_attach),
4650    DEVMETHOD(device_detach, ena_detach),
4651    DEVMETHOD_END
4652};
4653
4654static driver_t ena_driver = {
4655    "ena", ena_methods, sizeof(struct ena_adapter),
4656};
4657
4658devclass_t ena_devclass;
4659DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
4660MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
4661    sizeof(ena_vendor_info_array[0]), nitems(ena_vendor_info_array) - 1);
4662MODULE_DEPEND(ena, pci, 1, 1, 1);
4663MODULE_DEPEND(ena, ether, 1, 1, 1);
4664
4665/*********************************************************************/
4666