125540Sdfr/*-
225540Sdfr * SPDX-License-Identifier: BSD-2-Clause
325540Sdfr *
425540Sdfr * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates.
525540Sdfr * All rights reserved.
625540Sdfr *
725540Sdfr * Redistribution and use in source and binary forms, with or without
825540Sdfr * modification, are permitted provided that the following conditions
925540Sdfr * are met:
1025540Sdfr *
1125540Sdfr * 1. Redistributions of source code must retain the above copyright
1225540Sdfr *    notice, this list of conditions and the following disclaimer.
1325540Sdfr *
1425540Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1525540Sdfr *    notice, this list of conditions and the following disclaimer in the
1625540Sdfr *    documentation and/or other materials provided with the distribution.
1725540Sdfr *
1825540Sdfr * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1925540Sdfr * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2025540Sdfr * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2125540Sdfr * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2225540Sdfr * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2325540Sdfr * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2425540Sdfr * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2525540Sdfr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2625540Sdfr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27114589Sobrien * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28114589Sobrien * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2932269Scharnier */
30193475Sbenno#include <sys/cdefs.h>
31193473Sbenno#include "opt_rss.h"
32193473Sbenno
33193475Sbenno#include <sys/param.h>
34193475Sbenno#include <sys/systm.h>
3530573Sjmg#include <sys/bus.h>
3625540Sdfr#include <sys/endian.h>
3778732Sdd#include <sys/eventhandler.h>
38193475Sbenno#include <sys/kernel.h>
3925540Sdfr#include <sys/kthread.h>
40233109Shselasky#include <sys/malloc.h>
4125540Sdfr#include <sys/mbuf.h>
42193475Sbenno#include <sys/module.h>
43193475Sbenno#include <sys/rman.h>
44193475Sbenno#include <sys/smp.h>
45193473Sbenno#include <sys/socket.h>
46193473Sbenno#include <sys/sockio.h>
47193475Sbenno#include <sys/sysctl.h>
48193475Sbenno#include <sys/taskqueue.h>
49193475Sbenno#include <sys/time.h>
50193475Sbenno
51193475Sbenno#include <vm/vm.h>
52193475Sbenno#include <vm/pmap.h>
53193475Sbenno
54193475Sbenno#include <machine/atomic.h>
55193475Sbenno#include <machine/bus.h>
56193475Sbenno#include <machine/in_cksum.h>
57193475Sbenno#include <machine/resource.h>
58193475Sbenno
59193475Sbenno#include <dev/pci/pcireg.h>
60193475Sbenno#include <dev/pci/pcivar.h>
61193475Sbenno
62193475Sbenno#include <net/bpf.h>
63193475Sbenno#include <net/ethernet.h>
64193475Sbenno#include <net/if.h>
65193475Sbenno#include <net/if_arp.h>
66193475Sbenno#include <net/if_dl.h>
67193475Sbenno#include <net/if_media.h>
68193475Sbenno#include <net/if_types.h>
69193475Sbenno#include <net/if_var.h>
70193475Sbenno#include <net/if_vlan_var.h>
71193475Sbenno#include <netinet/in.h>
72193475Sbenno#include <netinet/in_systm.h>
73193475Sbenno#include <netinet/if_ether.h>
74193475Sbenno#include <netinet/ip.h>
75193475Sbenno#include <netinet/ip6.h>
76193475Sbenno#include <netinet/tcp.h>
77193475Sbenno#include <netinet/udp.h>
78193475Sbenno
79193475Sbenno#include "ena.h"
80193475Sbenno#include "ena_datapath.h"
81193475Sbenno#include "ena_rss.h"
82193475Sbenno#include "ena_sysctl.h"
83193475Sbenno
84193475Sbenno#ifdef DEV_NETMAP
85193475Sbenno#include "ena_netmap.h"
86193475Sbenno#endif /* DEV_NETMAP */
87193475Sbenno
88193475Sbenno/*********************************************************
89193475Sbenno *  Function prototypes
90193475Sbenno *********************************************************/
91193475Sbennostatic int ena_probe(device_t);
92193475Sbennostatic void ena_intr_msix_mgmnt(void *);
93193475Sbennostatic void ena_free_pci_resources(struct ena_adapter *);
94193475Sbennostatic int ena_change_mtu(if_t, int);
95193475Sbennostatic inline void ena_alloc_counters(counter_u64_t *, int);
96193475Sbennostatic inline void ena_free_counters(counter_u64_t *, int);
97193475Sbennostatic inline void ena_reset_counters(counter_u64_t *, int);
98193475Sbennostatic void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *,
99193475Sbenno    uint16_t);
100193475Sbennostatic void ena_init_io_rings_basic(struct ena_adapter *);
101193475Sbennostatic void ena_init_io_rings_advanced(struct ena_adapter *);
102193475Sbennostatic void ena_init_io_rings(struct ena_adapter *);
103193475Sbennostatic void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
104193475Sbennostatic void ena_free_all_io_rings_resources(struct ena_adapter *);
105193475Sbennostatic int ena_setup_tx_dma_tag(struct ena_adapter *);
106193475Sbennostatic int ena_free_tx_dma_tag(struct ena_adapter *);
107193475Sbennostatic int ena_setup_rx_dma_tag(struct ena_adapter *);
108193475Sbennostatic int ena_free_rx_dma_tag(struct ena_adapter *);
109193475Sbennostatic void ena_release_all_tx_dmamap(struct ena_ring *);
110193475Sbennostatic int ena_setup_tx_resources(struct ena_adapter *, int);
111193475Sbennostatic void ena_free_tx_resources(struct ena_adapter *, int);
112193475Sbennostatic int ena_setup_all_tx_resources(struct ena_adapter *);
113193475Sbennostatic void ena_free_all_tx_resources(struct ena_adapter *);
114193475Sbennostatic int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
115193475Sbennostatic void ena_free_rx_resources(struct ena_adapter *, unsigned int);
116193475Sbennostatic int ena_setup_all_rx_resources(struct ena_adapter *);
117193475Sbennostatic void ena_free_all_rx_resources(struct ena_adapter *);
118193475Sbennostatic inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
119193475Sbenno    struct ena_rx_buffer *);
120193475Sbennostatic void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
121193475Sbenno    struct ena_rx_buffer *);
122193475Sbennostatic void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
123193475Sbennostatic void ena_refill_all_rx_bufs(struct ena_adapter *);
124193475Sbennostatic void ena_free_all_rx_bufs(struct ena_adapter *);
125193475Sbennostatic void ena_free_tx_bufs(struct ena_adapter *, unsigned int);
126193475Sbennostatic void ena_free_all_tx_bufs(struct ena_adapter *);
127193475Sbennostatic void ena_destroy_all_tx_queues(struct ena_adapter *);
128193475Sbennostatic void ena_destroy_all_rx_queues(struct ena_adapter *);
129193475Sbennostatic void ena_destroy_all_io_queues(struct ena_adapter *);
13030627Sjmgstatic int ena_create_io_queues(struct ena_adapter *);
13130627Sjmgstatic int ena_handle_msix(void *);
13225540Sdfrstatic int ena_enable_msix(struct ena_adapter *);
133233109Shselaskystatic void ena_setup_mgmnt_intr(struct ena_adapter *);
134193473Sbennostatic int ena_setup_io_intr(struct ena_adapter *);
13525540Sdfrstatic int ena_request_mgmnt_irq(struct ena_adapter *);
13625540Sdfrstatic int ena_request_io_irq(struct ena_adapter *);
13730627Sjmgstatic void ena_free_mgmnt_irq(struct ena_adapter *);
13830627Sjmgstatic void ena_free_io_irq(struct ena_adapter *);
13925540Sdfrstatic void ena_free_irqs(struct ena_adapter *);
140193473Sbennostatic void ena_disable_msix(struct ena_adapter *);
141193473Sbennostatic void ena_unmask_all_io_irqs(struct ena_adapter *);
142193473Sbennostatic int ena_up_complete(struct ena_adapter *);
143193473Sbennostatic uint64_t ena_get_counter(if_t, ift_counter);
144193475Sbennostatic int ena_media_change(if_t);
145233109Shselaskystatic void ena_media_status(if_t, struct ifmediareq *);
14625540Sdfrstatic void ena_init(void *);
147193473Sbennostatic int ena_ioctl(if_t, u_long, caddr_t);
148193473Sbennostatic int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
149193475Sbennostatic void ena_update_host_info(struct ena_admin_host_info *, if_t);
150233109Shselaskystatic void ena_update_hwassist(struct ena_adapter *);
151193475Sbennostatic int ena_setup_ifnet(device_t, struct ena_adapter *,
152233109Shselasky    struct ena_com_dev_get_features_ctx *);
153193473Sbennostatic int ena_enable_wc(device_t, struct resource *);
154193475Sbennostatic int ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
155193475Sbenno    struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
156193475Sbennostatic int ena_map_llq_mem_bar(device_t, struct ena_com_dev *);
157193475Sbennostatic uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
158193473Sbenno    struct ena_com_dev_get_features_ctx *);
159193473Sbennostatic int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
160193475Sbennostatic void ena_config_host_info(struct ena_com_dev *, device_t);
161193473Sbennostatic int ena_attach(device_t);
162233109Shselaskystatic int ena_detach(device_t);
163233109Shselaskystatic int ena_device_init(struct ena_adapter *, device_t,
164233109Shselasky    struct ena_com_dev_get_features_ctx *, int *);
165193473Sbennostatic int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
166193473Sbennostatic void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
167193473Sbennostatic void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *);
16825540Sdfrstatic int ena_copy_eni_metrics(struct ena_adapter *);
169193473Sbennostatic int ena_copy_srd_metrics(struct ena_adapter *);
170193473Sbennostatic int ena_copy_customer_metrics(struct ena_adapter *);
17125540Sdfrstatic void ena_timer_service(void *);
172193473Sbenno
173193473Sbennostatic char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME
17425540Sdfr    " v" ENA_DRV_MODULE_VERSION;
175193473Sbenno
176193475Sbennostatic ena_vendor_info_t ena_vendor_info_array[] = {
177193475Sbenno	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0 },
178193475Sbenno	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0 },
179233109Shselasky	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0 },
180233109Shselasky	{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0 },
181233109Shselasky	/* Last entry */
182233109Shselasky	{ 0, 0, 0 }
183233109Shselasky};
184260483Sbapt
185260483Sbaptstruct sx ena_global_lock;
186260483Sbapt
187260483Sbapt/*
188260483Sbapt * Contains pointers to event handlers, e.g. link state chage.
189260483Sbapt */
190260483Sbaptstatic struct ena_aenq_handlers aenq_handlers;
191260483Sbapt
192233109Shselaskyvoid
193193475Sbennoena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
194193475Sbenno{
195193475Sbenno	if (error != 0)
196193475Sbenno		return;
197193475Sbenno	*(bus_addr_t *)arg = segs[0].ds_addr;
198193475Sbenno}
199193473Sbenno
200193473Sbennoint
201193473Sbennoena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma,
202193473Sbenno    int mapflags, bus_size_t alignment, int domain)
20325540Sdfr{
204193473Sbenno	struct ena_adapter *adapter = device_get_softc(dmadev);
20525540Sdfr	device_t pdev = adapter->pdev;
206	uint32_t maxsize;
207	uint64_t dma_space_addr;
208	int error;
209
210	maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
211
212	dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
213	if (unlikely(dma_space_addr == 0))
214		dma_space_addr = BUS_SPACE_MAXADDR;
215
216	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
217	    alignment, 0,      /* alignment, bounds 		*/
218	    dma_space_addr,    /* lowaddr of exclusion window	*/
219	    BUS_SPACE_MAXADDR, /* highaddr of exclusion window	*/
220	    NULL, NULL,	       /* filter, filterarg 		*/
221	    maxsize,	       /* maxsize 			*/
222	    1,		       /* nsegments 			*/
223	    maxsize,	       /* maxsegsize 			*/
224	    BUS_DMA_ALLOCNOW,  /* flags 			*/
225	    NULL,	       /* lockfunc 			*/
226	    NULL,	       /* lockarg 			*/
227	    &dma->tag);
228	if (unlikely(error != 0)) {
229		ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error);
230		goto fail_tag;
231	}
232
233	error = bus_dma_tag_set_domain(dma->tag, domain);
234	if (unlikely(error != 0)) {
235		ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n",
236		    error);
237		goto fail_map_create;
238	}
239
240	error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr,
241	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
242	if (unlikely(error != 0)) {
243		ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n",
244		    (uintmax_t)size, error);
245		goto fail_map_create;
246	}
247
248	dma->paddr = 0;
249	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
250	    ena_dmamap_callback, &dma->paddr, mapflags);
251	if (unlikely((error != 0) || (dma->paddr == 0))) {
252		ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error);
253		goto fail_map_load;
254	}
255
256	bus_dmamap_sync(dma->tag, dma->map,
257	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
258
259	return (0);
260
261fail_map_load:
262	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
263fail_map_create:
264	bus_dma_tag_destroy(dma->tag);
265fail_tag:
266	dma->tag = NULL;
267	dma->vaddr = NULL;
268	dma->paddr = 0;
269
270	return (error);
271}
272
273static void
274ena_free_pci_resources(struct ena_adapter *adapter)
275{
276	device_t pdev = adapter->pdev;
277
278	if (adapter->memory != NULL) {
279		bus_release_resource(pdev, SYS_RES_MEMORY,
280		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
281	}
282
283	if (adapter->registers != NULL) {
284		bus_release_resource(pdev, SYS_RES_MEMORY,
285		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
286	}
287
288	if (adapter->msix != NULL) {
289		bus_release_resource(pdev, SYS_RES_MEMORY, adapter->msix_rid,
290		    adapter->msix);
291	}
292}
293
294static int
295ena_probe(device_t dev)
296{
297	ena_vendor_info_t *ent;
298	uint16_t pci_vendor_id = 0;
299	uint16_t pci_device_id = 0;
300
301	pci_vendor_id = pci_get_vendor(dev);
302	pci_device_id = pci_get_device(dev);
303
304	ent = ena_vendor_info_array;
305	while (ent->vendor_id != 0) {
306		if ((pci_vendor_id == ent->vendor_id) &&
307		    (pci_device_id == ent->device_id)) {
308			ena_log_raw(DBG, "vendor=%x device=%x\n", pci_vendor_id,
309			    pci_device_id);
310
311			device_set_desc(dev, ENA_DEVICE_DESC);
312			return (BUS_PROBE_DEFAULT);
313		}
314
315		ent++;
316	}
317
318	return (ENXIO);
319}
320
321static int
322ena_change_mtu(if_t ifp, int new_mtu)
323{
324	struct ena_adapter *adapter = if_getsoftc(ifp);
325	device_t pdev = adapter->pdev;
326	int rc;
327
328	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
329		ena_log(pdev, ERR, "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n",
330		    new_mtu, adapter->max_mtu, ENA_MIN_MTU);
331		return (EINVAL);
332	}
333
334	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
335	if (likely(rc == 0)) {
336		ena_log(pdev, DBG, "set MTU to %d\n", new_mtu);
337		if_setmtu(ifp, new_mtu);
338	} else {
339		ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu);
340	}
341
342	return (rc);
343}
344
345static inline void
346ena_alloc_counters(counter_u64_t *begin, int size)
347{
348	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
349
350	for (; begin < end; ++begin)
351		*begin = counter_u64_alloc(M_WAITOK);
352}
353
354static inline void
355ena_free_counters(counter_u64_t *begin, int size)
356{
357	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
358
359	for (; begin < end; ++begin)
360		counter_u64_free(*begin);
361}
362
363static inline void
364ena_reset_counters(counter_u64_t *begin, int size)
365{
366	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
367
368	for (; begin < end; ++begin)
369		counter_u64_zero(*begin);
370}
371
372static void
373ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
374    uint16_t qid)
375{
376	ring->qid = qid;
377	ring->adapter = adapter;
378	ring->ena_dev = adapter->ena_dev;
379	atomic_store_8(&ring->first_interrupt, 0);
380	ring->no_interrupt_event_cnt = 0;
381}
382
383static void
384ena_init_io_rings_basic(struct ena_adapter *adapter)
385{
386	struct ena_com_dev *ena_dev;
387	struct ena_ring *txr, *rxr;
388	struct ena_que *que;
389	int i;
390
391	ena_dev = adapter->ena_dev;
392
393	for (i = 0; i < adapter->num_io_queues; i++) {
394		txr = &adapter->tx_ring[i];
395		rxr = &adapter->rx_ring[i];
396
397		/* TX/RX common ring state */
398		ena_init_io_rings_common(adapter, txr, i);
399		ena_init_io_rings_common(adapter, rxr, i);
400
401		/* TX specific ring state */
402		txr->tx_max_header_size = ena_dev->tx_max_header_size;
403		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
404
405		que = &adapter->que[i];
406		que->adapter = adapter;
407		que->id = i;
408		que->tx_ring = txr;
409		que->rx_ring = rxr;
410
411		txr->que = que;
412		rxr->que = que;
413
414		rxr->empty_rx_queue = 0;
415		rxr->rx_mbuf_sz = ena_mbuf_sz;
416	}
417}
418
419static void
420ena_init_io_rings_advanced(struct ena_adapter *adapter)
421{
422	struct ena_ring *txr, *rxr;
423	int i;
424
425	for (i = 0; i < adapter->num_io_queues; i++) {
426		txr = &adapter->tx_ring[i];
427		rxr = &adapter->rx_ring[i];
428
429		/* Allocate a buf ring */
430		txr->buf_ring_size = adapter->buf_ring_size;
431		txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF, M_WAITOK,
432		    &txr->ring_mtx);
433
434		/* Allocate Tx statistics. */
435		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
436		    sizeof(txr->tx_stats));
437		txr->tx_last_cleanup_ticks = ticks;
438
439		/* Allocate Rx statistics. */
440		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
441		    sizeof(rxr->rx_stats));
442
443		/* Initialize locks */
444		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
445		    device_get_nameunit(adapter->pdev), i);
446		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
447		    device_get_nameunit(adapter->pdev), i);
448
449		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
450	}
451}
452
453static void
454ena_init_io_rings(struct ena_adapter *adapter)
455{
456	/*
457	 * IO rings initialization can be divided into the 2 steps:
458	 *   1. Initialize variables and fields with initial values and copy
459	 *      them from adapter/ena_dev (basic)
460	 *   2. Allocate mutex, counters and buf_ring (advanced)
461	 */
462	ena_init_io_rings_basic(adapter);
463	ena_init_io_rings_advanced(adapter);
464}
465
466static void
467ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
468{
469	struct ena_ring *txr = &adapter->tx_ring[qid];
470	struct ena_ring *rxr = &adapter->rx_ring[qid];
471
472	ena_free_counters((counter_u64_t *)&txr->tx_stats,
473	    sizeof(txr->tx_stats));
474	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
475	    sizeof(rxr->rx_stats));
476
477	ENA_RING_MTX_LOCK(txr);
478	drbr_free(txr->br, M_DEVBUF);
479	ENA_RING_MTX_UNLOCK(txr);
480
481	mtx_destroy(&txr->ring_mtx);
482}
483
484static void
485ena_free_all_io_rings_resources(struct ena_adapter *adapter)
486{
487	int i;
488
489	for (i = 0; i < adapter->num_io_queues; i++)
490		ena_free_io_ring_resources(adapter, i);
491}
492
493static int
494ena_setup_tx_dma_tag(struct ena_adapter *adapter)
495{
496	int ret;
497
498	/* Create DMA tag for Tx buffers */
499	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
500	    1, 0,				  /* alignment, bounds 	     */
501	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
502	    BUS_SPACE_MAXADDR,			  /* highaddr of excl window */
503	    NULL, NULL,				  /* filter, filterarg 	     */
504	    ENA_TSO_MAXSIZE,			  /* maxsize 		     */
505	    adapter->max_tx_sgl_size - 1,	  /* nsegments 		     */
506	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	     */
507	    0,					  /* flags 		     */
508	    NULL,				  /* lockfunc 		     */
509	    NULL,				  /* lockfuncarg 	     */
510	    &adapter->tx_buf_tag);
511
512	return (ret);
513}
514
515static int
516ena_free_tx_dma_tag(struct ena_adapter *adapter)
517{
518	int ret;
519
520	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
521
522	if (likely(ret == 0))
523		adapter->tx_buf_tag = NULL;
524
525	return (ret);
526}
527
528static int
529ena_setup_rx_dma_tag(struct ena_adapter *adapter)
530{
531	int ret;
532
533	/* Create DMA tag for Rx buffers*/
534	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
535	    1, 0,				  /* alignment, bounds 	     */
536	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
537	    BUS_SPACE_MAXADDR,			  /* highaddr of excl window */
538	    NULL, NULL,				  /* filter, filterarg 	     */
539	    ena_mbuf_sz,			  /* maxsize 		     */
540	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
541	    ena_mbuf_sz,			  /* maxsegsize 	     */
542	    0,					  /* flags 		     */
543	    NULL,				  /* lockfunc 		     */
544	    NULL,				  /* lockarg 		     */
545	    &adapter->rx_buf_tag);
546
547	return (ret);
548}
549
550static int
551ena_free_rx_dma_tag(struct ena_adapter *adapter)
552{
553	int ret;
554
555	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
556
557	if (likely(ret == 0))
558		adapter->rx_buf_tag = NULL;
559
560	return (ret);
561}
562
563static void
564ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
565{
566	struct ena_adapter *adapter = tx_ring->adapter;
567	struct ena_tx_buffer *tx_info;
568	bus_dma_tag_t tx_tag = adapter->tx_buf_tag;
569	int i;
570#ifdef DEV_NETMAP
571	struct ena_netmap_tx_info *nm_info;
572	int j;
573#endif /* DEV_NETMAP */
574
575	for (i = 0; i < tx_ring->ring_size; ++i) {
576		tx_info = &tx_ring->tx_buffer_info[i];
577#ifdef DEV_NETMAP
578		if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
579			nm_info = &tx_info->nm_info;
580			for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
581				if (nm_info->map_seg[j] != NULL) {
582					bus_dmamap_destroy(tx_tag,
583					    nm_info->map_seg[j]);
584					nm_info->map_seg[j] = NULL;
585				}
586			}
587		}
588#endif /* DEV_NETMAP */
589		if (tx_info->dmamap != NULL) {
590			bus_dmamap_destroy(tx_tag, tx_info->dmamap);
591			tx_info->dmamap = NULL;
592		}
593	}
594}
595
596/**
597 * ena_setup_tx_resources - allocate Tx resources (Descriptors)
598 * @adapter: network interface device structure
599 * @qid: queue index
600 *
601 * Returns 0 on success, otherwise on failure.
602 **/
603static int
604ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
605{
606	device_t pdev = adapter->pdev;
607	char thread_name[MAXCOMLEN + 1];
608	struct ena_que *que = &adapter->que[qid];
609	struct ena_ring *tx_ring = que->tx_ring;
610	cpuset_t *cpu_mask = NULL;
611	int size, i, err;
612#ifdef DEV_NETMAP
613	bus_dmamap_t *map;
614	int j;
615
616	ena_netmap_reset_tx_ring(adapter, qid);
617#endif /* DEV_NETMAP */
618
619	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
620
621	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
622	if (unlikely(tx_ring->tx_buffer_info == NULL))
623		return (ENOMEM);
624
625	size = sizeof(uint16_t) * tx_ring->ring_size;
626	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
627	if (unlikely(tx_ring->free_tx_ids == NULL))
628		goto err_buf_info_free;
629
630	size = tx_ring->tx_max_header_size;
631	tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
632	    M_NOWAIT | M_ZERO);
633	if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
634		goto err_tx_ids_free;
635
636	/* Req id stack for TX OOO completions */
637	for (i = 0; i < tx_ring->ring_size; i++)
638		tx_ring->free_tx_ids[i] = i;
639
640	/* Reset TX statistics. */
641	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
642	    sizeof(tx_ring->tx_stats));
643
644	tx_ring->next_to_use = 0;
645	tx_ring->next_to_clean = 0;
646	tx_ring->acum_pkts = 0;
647
648	/* Make sure that drbr is empty */
649	ENA_RING_MTX_LOCK(tx_ring);
650	drbr_flush(adapter->ifp, tx_ring->br);
651	ENA_RING_MTX_UNLOCK(tx_ring);
652
653	/* ... and create the buffer DMA maps */
654	for (i = 0; i < tx_ring->ring_size; i++) {
655		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
656		    &tx_ring->tx_buffer_info[i].dmamap);
657		if (unlikely(err != 0)) {
658			ena_log(pdev, ERR,
659			    "Unable to create Tx DMA map for buffer %d\n", i);
660			goto err_map_release;
661		}
662
663#ifdef DEV_NETMAP
664		if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
665			map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
666			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
667				err = bus_dmamap_create(adapter->tx_buf_tag, 0,
668				    &map[j]);
669				if (unlikely(err != 0)) {
670					ena_log(pdev, ERR,
671					    "Unable to create Tx DMA for buffer %d %d\n",
672					    i, j);
673					goto err_map_release;
674				}
675			}
676		}
677#endif /* DEV_NETMAP */
678	}
679
680	/* Allocate taskqueues */
681	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
682	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
683	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
684	if (unlikely(tx_ring->enqueue_tq == NULL)) {
685		ena_log(pdev, ERR,
686		    "Unable to create taskqueue for enqueue task\n");
687		i = tx_ring->ring_size;
688		goto err_map_release;
689	}
690
691	tx_ring->running = true;
692
693#ifdef RSS
694	cpu_mask = &que->cpu_mask;
695	snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
696	    device_get_nameunit(adapter->pdev), que->cpu);
697#else
698	snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
699	    device_get_nameunit(adapter->pdev), que->id);
700#endif
701	taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
702	    cpu_mask, "%s", thread_name);
703
704	return (0);
705
706err_map_release:
707	ena_release_all_tx_dmamap(tx_ring);
708err_tx_ids_free:
709	free(tx_ring->free_tx_ids, M_DEVBUF);
710	tx_ring->free_tx_ids = NULL;
711err_buf_info_free:
712	free(tx_ring->tx_buffer_info, M_DEVBUF);
713	tx_ring->tx_buffer_info = NULL;
714
715	return (ENOMEM);
716}
717
718/**
719 * ena_free_tx_resources - Free Tx Resources per Queue
720 * @adapter: network interface device structure
721 * @qid: queue index
722 *
723 * Free all transmit software resources
724 **/
725static void
726ena_free_tx_resources(struct ena_adapter *adapter, int qid)
727{
728	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
729#ifdef DEV_NETMAP
730	struct ena_netmap_tx_info *nm_info;
731	int j;
732#endif /* DEV_NETMAP */
733
734	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL))
735		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
736
737	taskqueue_free(tx_ring->enqueue_tq);
738
739	ENA_RING_MTX_LOCK(tx_ring);
740	/* Flush buffer ring, */
741	drbr_flush(adapter->ifp, tx_ring->br);
742
743	/* Free buffer DMA maps, */
744	for (int i = 0; i < tx_ring->ring_size; i++) {
745		bus_dmamap_sync(adapter->tx_buf_tag,
746		    tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
747		bus_dmamap_unload(adapter->tx_buf_tag,
748		    tx_ring->tx_buffer_info[i].dmamap);
749		bus_dmamap_destroy(adapter->tx_buf_tag,
750		    tx_ring->tx_buffer_info[i].dmamap);
751
752#ifdef DEV_NETMAP
753		if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
754			nm_info = &tx_ring->tx_buffer_info[i].nm_info;
755			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
756				if (nm_info->socket_buf_idx[j] != 0) {
757					bus_dmamap_sync(adapter->tx_buf_tag,
758					    nm_info->map_seg[j],
759					    BUS_DMASYNC_POSTWRITE);
760					ena_netmap_unload(adapter,
761					    nm_info->map_seg[j]);
762				}
763				bus_dmamap_destroy(adapter->tx_buf_tag,
764				    nm_info->map_seg[j]);
765				nm_info->socket_buf_idx[j] = 0;
766			}
767		}
768#endif /* DEV_NETMAP */
769
770		m_freem(tx_ring->tx_buffer_info[i].mbuf);
771		tx_ring->tx_buffer_info[i].mbuf = NULL;
772	}
773	ENA_RING_MTX_UNLOCK(tx_ring);
774
775	/* And free allocated memory. */
776	free(tx_ring->tx_buffer_info, M_DEVBUF);
777	tx_ring->tx_buffer_info = NULL;
778
779	free(tx_ring->free_tx_ids, M_DEVBUF);
780	tx_ring->free_tx_ids = NULL;
781
782	free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
783	tx_ring->push_buf_intermediate_buf = NULL;
784}
785
786/**
787 * ena_setup_all_tx_resources - allocate all queues Tx resources
788 * @adapter: network interface device structure
789 *
790 * Returns 0 on success, otherwise on failure.
791 **/
792static int
793ena_setup_all_tx_resources(struct ena_adapter *adapter)
794{
795	int i, rc;
796
797	for (i = 0; i < adapter->num_io_queues; i++) {
798		rc = ena_setup_tx_resources(adapter, i);
799		if (rc != 0) {
800			ena_log(adapter->pdev, ERR,
801			    "Allocation for Tx Queue %u failed\n", i);
802			goto err_setup_tx;
803		}
804	}
805
806	return (0);
807
808err_setup_tx:
809	/* Rewind the index freeing the rings as we go */
810	while (i--)
811		ena_free_tx_resources(adapter, i);
812	return (rc);
813}
814
815/**
816 * ena_free_all_tx_resources - Free Tx Resources for All Queues
817 * @adapter: network interface device structure
818 *
819 * Free all transmit software resources
820 **/
821static void
822ena_free_all_tx_resources(struct ena_adapter *adapter)
823{
824	int i;
825
826	for (i = 0; i < adapter->num_io_queues; i++)
827		ena_free_tx_resources(adapter, i);
828}
829
830/**
831 * ena_setup_rx_resources - allocate Rx resources (Descriptors)
832 * @adapter: network interface device structure
833 * @qid: queue index
834 *
835 * Returns 0 on success, otherwise on failure.
836 **/
837static int
838ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
839{
840	device_t pdev = adapter->pdev;
841	struct ena_que *que = &adapter->que[qid];
842	struct ena_ring *rx_ring = que->rx_ring;
843	int size, err, i;
844
845	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
846
847#ifdef DEV_NETMAP
848	ena_netmap_reset_rx_ring(adapter, qid);
849	rx_ring->initialized = false;
850#endif /* DEV_NETMAP */
851
852	/*
853	 * Alloc extra element so in rx path
854	 * we can always prefetch rx_info + 1
855	 */
856	size += sizeof(struct ena_rx_buffer);
857
858	rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
859
860	size = sizeof(uint16_t) * rx_ring->ring_size;
861	rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
862
863	for (i = 0; i < rx_ring->ring_size; i++)
864		rx_ring->free_rx_ids[i] = i;
865
866	/* Reset RX statistics. */
867	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
868	    sizeof(rx_ring->rx_stats));
869
870	rx_ring->next_to_clean = 0;
871	rx_ring->next_to_use = 0;
872
873	/* ... and create the buffer DMA maps */
874	for (i = 0; i < rx_ring->ring_size; i++) {
875		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
876		    &(rx_ring->rx_buffer_info[i].map));
877		if (err != 0) {
878			ena_log(pdev, ERR,
879			    "Unable to create Rx DMA map for buffer %d\n", i);
880			goto err_buf_info_unmap;
881		}
882	}
883
884	/* Create LRO for the ring */
885	if ((if_getcapenable(adapter->ifp) & IFCAP_LRO) != 0) {
886		int err = tcp_lro_init(&rx_ring->lro);
887		if (err != 0) {
888			ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n",
889			    qid);
890		} else {
891			ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n",
892			    qid);
893			rx_ring->lro.ifp = adapter->ifp;
894		}
895	}
896
897	return (0);
898
899err_buf_info_unmap:
900	while (i--) {
901		bus_dmamap_destroy(adapter->rx_buf_tag,
902		    rx_ring->rx_buffer_info[i].map);
903	}
904
905	free(rx_ring->free_rx_ids, M_DEVBUF);
906	rx_ring->free_rx_ids = NULL;
907	free(rx_ring->rx_buffer_info, M_DEVBUF);
908	rx_ring->rx_buffer_info = NULL;
909	return (ENOMEM);
910}
911
912/**
913 * ena_free_rx_resources - Free Rx Resources
914 * @adapter: network interface device structure
915 * @qid: queue index
916 *
917 * Free all receive software resources
918 **/
919static void
920ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
921{
922	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
923
924	/* Free buffer DMA maps, */
925	for (int i = 0; i < rx_ring->ring_size; i++) {
926		bus_dmamap_sync(adapter->rx_buf_tag,
927		    rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
928		m_freem(rx_ring->rx_buffer_info[i].mbuf);
929		rx_ring->rx_buffer_info[i].mbuf = NULL;
930		bus_dmamap_unload(adapter->rx_buf_tag,
931		    rx_ring->rx_buffer_info[i].map);
932		bus_dmamap_destroy(adapter->rx_buf_tag,
933		    rx_ring->rx_buffer_info[i].map);
934	}
935
936	/* free LRO resources, */
937	tcp_lro_free(&rx_ring->lro);
938
939	/* free allocated memory */
940	free(rx_ring->rx_buffer_info, M_DEVBUF);
941	rx_ring->rx_buffer_info = NULL;
942
943	free(rx_ring->free_rx_ids, M_DEVBUF);
944	rx_ring->free_rx_ids = NULL;
945}
946
947/**
948 * ena_setup_all_rx_resources - allocate all queues Rx resources
949 * @adapter: network interface device structure
950 *
951 * Returns 0 on success, otherwise on failure.
952 **/
953static int
954ena_setup_all_rx_resources(struct ena_adapter *adapter)
955{
956	int i, rc = 0;
957
958	for (i = 0; i < adapter->num_io_queues; i++) {
959		rc = ena_setup_rx_resources(adapter, i);
960		if (rc != 0) {
961			ena_log(adapter->pdev, ERR,
962			    "Allocation for Rx Queue %u failed\n", i);
963			goto err_setup_rx;
964		}
965	}
966	return (0);
967
968err_setup_rx:
969	/* rewind the index freeing the rings as we go */
970	while (i--)
971		ena_free_rx_resources(adapter, i);
972	return (rc);
973}
974
975/**
976 * ena_free_all_rx_resources - Free Rx resources for all queues
977 * @adapter: network interface device structure
978 *
979 * Free all receive software resources
980 **/
981static void
982ena_free_all_rx_resources(struct ena_adapter *adapter)
983{
984	int i;
985
986	for (i = 0; i < adapter->num_io_queues; i++)
987		ena_free_rx_resources(adapter, i);
988}
989
990static inline int
991ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
992    struct ena_rx_buffer *rx_info)
993{
994	device_t pdev = adapter->pdev;
995	struct ena_com_buf *ena_buf;
996	bus_dma_segment_t segs[1];
997	int nsegs, error;
998	int mlen;
999
1000	/* if previous allocated frag is not used */
1001	if (unlikely(rx_info->mbuf != NULL))
1002		return (0);
1003
1004	/* Get mbuf using UMA allocator */
1005	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1006	    rx_ring->rx_mbuf_sz);
1007
1008	if (unlikely(rx_info->mbuf == NULL)) {
1009		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1010		rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1011		if (unlikely(rx_info->mbuf == NULL)) {
1012			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1013			return (ENOMEM);
1014		}
1015		mlen = MCLBYTES;
1016	} else {
1017		mlen = rx_ring->rx_mbuf_sz;
1018	}
1019	/* Set mbuf length*/
1020	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1021
1022	/* Map packets for DMA */
1023	ena_log(pdev, DBG,
1024	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1025	    adapter->rx_buf_tag, rx_info->mbuf, rx_info->mbuf->m_len);
1026	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1027	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1028	if (unlikely((error != 0) || (nsegs != 1))) {
1029		ena_log(pdev, WARN,
1030		    "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs);
1031		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1032		goto exit;
1033	}
1034
1035	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1036
1037	ena_buf = &rx_info->ena_buf;
1038	ena_buf->paddr = segs[0].ds_addr;
1039	ena_buf->len = mlen;
1040
1041	ena_log(pdev, DBG,
1042	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1043	    rx_info->mbuf, rx_info, ena_buf->len, (uintmax_t)ena_buf->paddr);
1044
1045	return (0);
1046
1047exit:
1048	m_freem(rx_info->mbuf);
1049	rx_info->mbuf = NULL;
1050	return (EFAULT);
1051}
1052
1053static void
1054ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1055    struct ena_rx_buffer *rx_info)
1056{
1057	if (rx_info->mbuf == NULL) {
1058		ena_log(adapter->pdev, WARN,
1059		    "Trying to free unallocated buffer\n");
1060		return;
1061	}
1062
1063	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1064	    BUS_DMASYNC_POSTREAD);
1065	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1066	m_freem(rx_info->mbuf);
1067	rx_info->mbuf = NULL;
1068}
1069
1070/**
1071 * ena_refill_rx_bufs - Refills ring with descriptors
1072 * @rx_ring: the ring which we want to feed with free descriptors
1073 * @num: number of descriptors to refill
1074 * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1075 **/
1076int
1077ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1078{
1079	struct ena_adapter *adapter = rx_ring->adapter;
1080	device_t pdev = adapter->pdev;
1081	uint16_t next_to_use, req_id;
1082	uint32_t i;
1083	int rc;
1084
1085	ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid);
1086
1087	next_to_use = rx_ring->next_to_use;
1088
1089	for (i = 0; i < num; i++) {
1090		struct ena_rx_buffer *rx_info;
1091
1092		ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n",
1093		    next_to_use);
1094
1095		req_id = rx_ring->free_rx_ids[next_to_use];
1096		rx_info = &rx_ring->rx_buffer_info[req_id];
1097#ifdef DEV_NETMAP
1098		if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1099			rc = ena_netmap_alloc_rx_slot(adapter, rx_ring,
1100			    rx_info);
1101		else
1102#endif /* DEV_NETMAP */
1103			rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1104		if (unlikely(rc != 0)) {
1105			ena_log_io(pdev, WARN,
1106			    "failed to alloc buffer for rx queue %d\n",
1107			    rx_ring->qid);
1108			break;
1109		}
1110		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1111		    &rx_info->ena_buf, req_id);
1112		if (unlikely(rc != 0)) {
1113			ena_log_io(pdev, WARN,
1114			    "failed to add buffer for rx queue %d\n",
1115			    rx_ring->qid);
1116			break;
1117		}
1118		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1119		    rx_ring->ring_size);
1120	}
1121
1122	if (unlikely(i < num)) {
1123		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1124		ena_log_io(pdev, WARN,
1125		    "refilled rx qid %d with only %d mbufs (from %d)\n",
1126		    rx_ring->qid, i, num);
1127	}
1128
1129	if (likely(i != 0))
1130		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1131
1132	rx_ring->next_to_use = next_to_use;
1133	return (i);
1134}
1135
1136int
1137ena_update_buf_ring_size(struct ena_adapter *adapter,
1138    uint32_t new_buf_ring_size)
1139{
1140	uint32_t old_buf_ring_size;
1141	int rc = 0;
1142	bool dev_was_up;
1143
1144	old_buf_ring_size = adapter->buf_ring_size;
1145	adapter->buf_ring_size = new_buf_ring_size;
1146
1147	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1148	ena_down(adapter);
1149
1150	/* Reconfigure buf ring for all Tx rings. */
1151	ena_free_all_io_rings_resources(adapter);
1152	ena_init_io_rings_advanced(adapter);
1153	if (dev_was_up) {
1154		/*
1155		 * If ena_up() fails, it's not because of recent buf_ring size
1156		 * changes. Because of that, we just want to revert old drbr
1157		 * value and trigger the reset because something else had to
1158		 * go wrong.
1159		 */
1160		rc = ena_up(adapter);
1161		if (unlikely(rc != 0)) {
1162			ena_log(adapter->pdev, ERR,
1163			    "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1164			    new_buf_ring_size, old_buf_ring_size);
1165
1166			/* Revert old size and trigger the reset */
1167			adapter->buf_ring_size = old_buf_ring_size;
1168			ena_free_all_io_rings_resources(adapter);
1169			ena_init_io_rings_advanced(adapter);
1170
1171			ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1172			    adapter);
1173			ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1174		}
1175	}
1176
1177	return (rc);
1178}
1179
1180int
1181ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1182    uint32_t new_rx_size)
1183{
1184	uint32_t old_tx_size, old_rx_size;
1185	int rc = 0;
1186	bool dev_was_up;
1187
1188	old_tx_size = adapter->requested_tx_ring_size;
1189	old_rx_size = adapter->requested_rx_ring_size;
1190	adapter->requested_tx_ring_size = new_tx_size;
1191	adapter->requested_rx_ring_size = new_rx_size;
1192
1193	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1194	ena_down(adapter);
1195
1196	/* Configure queues with new size. */
1197	ena_init_io_rings_basic(adapter);
1198	if (dev_was_up) {
1199		rc = ena_up(adapter);
1200		if (unlikely(rc != 0)) {
1201			ena_log(adapter->pdev, ERR,
1202			    "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1203			    new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1204
1205			/* Revert old size. */
1206			adapter->requested_tx_ring_size = old_tx_size;
1207			adapter->requested_rx_ring_size = old_rx_size;
1208			ena_init_io_rings_basic(adapter);
1209
1210			/* And try again. */
1211			rc = ena_up(adapter);
1212			if (unlikely(rc != 0)) {
1213				ena_log(adapter->pdev, ERR,
1214				    "Failed to revert old queue sizes. Triggering device reset.\n");
1215				/*
1216				 * If we've failed again, something had to go
1217				 * wrong. After reset, the device should try to
1218				 * go up
1219				 */
1220				ENA_FLAG_SET_ATOMIC(
1221				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1222				ena_trigger_reset(adapter,
1223				    ENA_REGS_RESET_OS_TRIGGER);
1224			}
1225		}
1226	}
1227
1228	return (rc);
1229}
1230
1231static void
1232ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1233{
1234	ena_free_all_io_rings_resources(adapter);
1235	/* Force indirection table to be reinitialized */
1236	ena_com_rss_destroy(adapter->ena_dev);
1237
1238	adapter->num_io_queues = num;
1239	ena_init_io_rings(adapter);
1240}
1241
1242int
1243ena_update_base_cpu(struct ena_adapter *adapter, int new_num)
1244{
1245	int old_num;
1246	int rc = 0;
1247	bool dev_was_up;
1248
1249	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1250	old_num = adapter->irq_cpu_base;
1251
1252	ena_down(adapter);
1253
1254	adapter->irq_cpu_base = new_num;
1255
1256	if (dev_was_up) {
1257		rc = ena_up(adapter);
1258		if (unlikely(rc != 0)) {
1259			ena_log(adapter->pdev, ERR,
1260			    "Failed to configure device %d IRQ base CPU. "
1261			    "Reverting to previous value: %d\n",
1262			    new_num, old_num);
1263
1264			adapter->irq_cpu_base = old_num;
1265
1266			rc = ena_up(adapter);
1267			if (unlikely(rc != 0)) {
1268				ena_log(adapter->pdev, ERR,
1269				    "Failed to revert to previous setup."
1270				    "Triggering device reset.\n");
1271				ENA_FLAG_SET_ATOMIC(
1272				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1273				ena_trigger_reset(adapter,
1274				    ENA_REGS_RESET_OS_TRIGGER);
1275			}
1276		}
1277	}
1278	return (rc);
1279}
1280
1281int
1282ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num)
1283{
1284	uint32_t old_num;
1285	int rc = 0;
1286	bool dev_was_up;
1287
1288	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1289	old_num = adapter->irq_cpu_stride;
1290
1291	ena_down(adapter);
1292
1293	adapter->irq_cpu_stride = new_num;
1294
1295	if (dev_was_up) {
1296		rc = ena_up(adapter);
1297		if (unlikely(rc != 0)) {
1298			ena_log(adapter->pdev, ERR,
1299			    "Failed to configure device %d IRQ CPU stride. "
1300			    "Reverting to previous value: %d\n",
1301			    new_num, old_num);
1302
1303			adapter->irq_cpu_stride = old_num;
1304
1305			rc = ena_up(adapter);
1306			if (unlikely(rc != 0)) {
1307				ena_log(adapter->pdev, ERR,
1308				    "Failed to revert to previous setup."
1309				    "Triggering device reset.\n");
1310				ENA_FLAG_SET_ATOMIC(
1311				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1312				ena_trigger_reset(adapter,
1313				    ENA_REGS_RESET_OS_TRIGGER);
1314			}
1315		}
1316	}
1317	return (rc);
1318}
1319
1320/* Caller should sanitize new_num */
1321int
1322ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1323{
1324	uint32_t old_num;
1325	int rc = 0;
1326	bool dev_was_up;
1327
1328	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1329	old_num = adapter->num_io_queues;
1330	ena_down(adapter);
1331
1332	ena_update_io_rings(adapter, new_num);
1333
1334	if (dev_was_up) {
1335		rc = ena_up(adapter);
1336		if (unlikely(rc != 0)) {
1337			ena_log(adapter->pdev, ERR,
1338			    "Failed to configure device with %u IO queues. "
1339			    "Reverting to previous value: %u\n",
1340			    new_num, old_num);
1341
1342			ena_update_io_rings(adapter, old_num);
1343
1344			rc = ena_up(adapter);
1345			if (unlikely(rc != 0)) {
1346				ena_log(adapter->pdev, ERR,
1347				    "Failed to revert to previous setup IO "
1348				    "queues. Triggering device reset.\n");
1349				ENA_FLAG_SET_ATOMIC(
1350				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1351				ena_trigger_reset(adapter,
1352				    ENA_REGS_RESET_OS_TRIGGER);
1353			}
1354		}
1355	}
1356
1357	return (rc);
1358}
1359
1360static void
1361ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1362{
1363	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1364	unsigned int i;
1365
1366	for (i = 0; i < rx_ring->ring_size; i++) {
1367		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1368
1369		if (rx_info->mbuf != NULL)
1370			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1371#ifdef DEV_NETMAP
1372		if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1373		    (if_getcapenable(adapter->ifp) & IFCAP_NETMAP)) {
1374			if (rx_info->netmap_buf_idx != 0)
1375				ena_netmap_free_rx_slot(adapter, rx_ring,
1376				    rx_info);
1377		}
1378#endif /* DEV_NETMAP */
1379	}
1380}
1381
1382/**
1383 * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1384 * @adapter: network interface device structure
1385 *
1386 */
1387static void
1388ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1389{
1390	struct ena_ring *rx_ring;
1391	int i, rc, bufs_num;
1392
1393	for (i = 0; i < adapter->num_io_queues; i++) {
1394		rx_ring = &adapter->rx_ring[i];
1395		bufs_num = rx_ring->ring_size - 1;
1396		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1397		if (unlikely(rc != bufs_num))
1398			ena_log_io(adapter->pdev, WARN,
1399			    "refilling Queue %d failed. "
1400			    "Allocated %d buffers from: %d\n",
1401			    i, rc, bufs_num);
1402#ifdef DEV_NETMAP
1403		rx_ring->initialized = true;
1404#endif /* DEV_NETMAP */
1405	}
1406}
1407
1408static void
1409ena_free_all_rx_bufs(struct ena_adapter *adapter)
1410{
1411	int i;
1412
1413	for (i = 0; i < adapter->num_io_queues; i++)
1414		ena_free_rx_bufs(adapter, i);
1415}
1416
1417/**
1418 * ena_free_tx_bufs - Free Tx Buffers per Queue
1419 * @adapter: network interface device structure
1420 * @qid: queue index
1421 **/
1422static void
1423ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1424{
1425	bool print_once = true;
1426	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1427
1428	ENA_RING_MTX_LOCK(tx_ring);
1429	for (int i = 0; i < tx_ring->ring_size; i++) {
1430		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1431
1432		if (tx_info->mbuf == NULL)
1433			continue;
1434
1435		if (print_once) {
1436			ena_log(adapter->pdev, WARN,
1437			    "free uncompleted tx mbuf qid %d idx 0x%x\n", qid,
1438			    i);
1439			print_once = false;
1440		} else {
1441			ena_log(adapter->pdev, DBG,
1442			    "free uncompleted tx mbuf qid %d idx 0x%x\n", qid,
1443			    i);
1444		}
1445
1446		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1447		    BUS_DMASYNC_POSTWRITE);
1448		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1449
1450		m_free(tx_info->mbuf);
1451		tx_info->mbuf = NULL;
1452	}
1453	ENA_RING_MTX_UNLOCK(tx_ring);
1454}
1455
1456static void
1457ena_free_all_tx_bufs(struct ena_adapter *adapter)
1458{
1459	for (int i = 0; i < adapter->num_io_queues; i++)
1460		ena_free_tx_bufs(adapter, i);
1461}
1462
1463static void
1464ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1465{
1466	uint16_t ena_qid;
1467	int i;
1468
1469	for (i = 0; i < adapter->num_io_queues; i++) {
1470		ena_qid = ENA_IO_TXQ_IDX(i);
1471		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1472	}
1473}
1474
1475static void
1476ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1477{
1478	uint16_t ena_qid;
1479	int i;
1480
1481	for (i = 0; i < adapter->num_io_queues; i++) {
1482		ena_qid = ENA_IO_RXQ_IDX(i);
1483		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1484	}
1485}
1486
1487static void
1488ena_destroy_all_io_queues(struct ena_adapter *adapter)
1489{
1490	struct ena_que *queue;
1491	int i;
1492
1493	for (i = 0; i < adapter->num_io_queues; i++) {
1494		queue = &adapter->que[i];
1495		while (taskqueue_cancel(queue->cleanup_tq, &queue->cleanup_task, NULL))
1496			taskqueue_drain(queue->cleanup_tq, &queue->cleanup_task);
1497		taskqueue_free(queue->cleanup_tq);
1498	}
1499
1500	ena_destroy_all_tx_queues(adapter);
1501	ena_destroy_all_rx_queues(adapter);
1502}
1503
1504static int
1505ena_create_io_queues(struct ena_adapter *adapter)
1506{
1507	struct ena_com_dev *ena_dev = adapter->ena_dev;
1508	struct ena_com_create_io_ctx ctx;
1509	struct ena_ring *ring;
1510	struct ena_que *queue;
1511	uint16_t ena_qid;
1512	uint32_t msix_vector;
1513	cpuset_t *cpu_mask = NULL;
1514	int rc, i;
1515
1516	/* Create TX queues */
1517	for (i = 0; i < adapter->num_io_queues; i++) {
1518		msix_vector = ENA_IO_IRQ_IDX(i);
1519		ena_qid = ENA_IO_TXQ_IDX(i);
1520		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1521		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1522		ctx.queue_size = adapter->requested_tx_ring_size;
1523		ctx.msix_vector = msix_vector;
1524		ctx.qid = ena_qid;
1525		ctx.numa_node = adapter->que[i].domain;
1526
1527		rc = ena_com_create_io_queue(ena_dev, &ctx);
1528		if (rc != 0) {
1529			ena_log(adapter->pdev, ERR,
1530			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1531			goto err_tx;
1532		}
1533		ring = &adapter->tx_ring[i];
1534		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1535		    &ring->ena_com_io_sq, &ring->ena_com_io_cq);
1536		if (rc != 0) {
1537			ena_log(adapter->pdev, ERR,
1538			    "Failed to get TX queue handlers. TX queue num"
1539			    " %d rc: %d\n",
1540			    i, rc);
1541			ena_com_destroy_io_queue(ena_dev, ena_qid);
1542			goto err_tx;
1543		}
1544
1545		if (ctx.numa_node >= 0) {
1546			ena_com_update_numa_node(ring->ena_com_io_cq,
1547			    ctx.numa_node);
1548		}
1549	}
1550
1551	/* Create RX queues */
1552	for (i = 0; i < adapter->num_io_queues; i++) {
1553		msix_vector = ENA_IO_IRQ_IDX(i);
1554		ena_qid = ENA_IO_RXQ_IDX(i);
1555		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1556		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1557		ctx.queue_size = adapter->requested_rx_ring_size;
1558		ctx.msix_vector = msix_vector;
1559		ctx.qid = ena_qid;
1560		ctx.numa_node = adapter->que[i].domain;
1561
1562		rc = ena_com_create_io_queue(ena_dev, &ctx);
1563		if (unlikely(rc != 0)) {
1564			ena_log(adapter->pdev, ERR,
1565			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1566			goto err_rx;
1567		}
1568
1569		ring = &adapter->rx_ring[i];
1570		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1571		    &ring->ena_com_io_sq, &ring->ena_com_io_cq);
1572		if (unlikely(rc != 0)) {
1573			ena_log(adapter->pdev, ERR,
1574			    "Failed to get RX queue handlers. RX queue num"
1575			    " %d rc: %d\n",
1576			    i, rc);
1577			ena_com_destroy_io_queue(ena_dev, ena_qid);
1578			goto err_rx;
1579		}
1580
1581		if (ctx.numa_node >= 0) {
1582			ena_com_update_numa_node(ring->ena_com_io_cq,
1583			    ctx.numa_node);
1584		}
1585	}
1586
1587	for (i = 0; i < adapter->num_io_queues; i++) {
1588		queue = &adapter->que[i];
1589
1590		NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1591		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1592		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1593
1594#ifdef RSS
1595		cpu_mask = &queue->cpu_mask;
1596#endif
1597		taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET,
1598		    cpu_mask, "%s queue %d cleanup",
1599		    device_get_nameunit(adapter->pdev), i);
1600	}
1601
1602	return (0);
1603
1604err_rx:
1605	while (i--)
1606		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1607	i = adapter->num_io_queues;
1608err_tx:
1609	while (i--)
1610		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1611
1612	return (ENXIO);
1613}
1614
1615/*********************************************************************
1616 *
1617 *  MSIX & Interrupt Service routine
1618 *
1619 **********************************************************************/
1620
1621/**
1622 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1623 * @arg: interrupt number
1624 **/
1625static void
1626ena_intr_msix_mgmnt(void *arg)
1627{
1628	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1629
1630	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1631	if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1632		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1633}
1634
1635/**
1636 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1637 * @arg: queue
1638 **/
1639static int
1640ena_handle_msix(void *arg)
1641{
1642	struct ena_que *queue = arg;
1643	struct ena_adapter *adapter = queue->adapter;
1644	if_t ifp = adapter->ifp;
1645
1646	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1647		return (FILTER_STRAY);
1648
1649	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1650
1651	return (FILTER_HANDLED);
1652}
1653
1654static int
1655ena_enable_msix(struct ena_adapter *adapter)
1656{
1657	device_t dev = adapter->pdev;
1658	int msix_vecs, msix_req;
1659	int i, rc = 0;
1660
1661	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1662		ena_log(dev, ERR, "Error, MSI-X is already enabled\n");
1663		return (EINVAL);
1664	}
1665
1666	/* Reserved the max msix vectors we might need */
1667	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1668
1669	adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1670	    M_DEVBUF, M_WAITOK | M_ZERO);
1671
1672	ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1673
1674	for (i = 0; i < msix_vecs; i++) {
1675		adapter->msix_entries[i].entry = i;
1676		/* Vectors must start from 1 */
1677		adapter->msix_entries[i].vector = i + 1;
1678	}
1679
1680	msix_req = msix_vecs;
1681	rc = pci_alloc_msix(dev, &msix_vecs);
1682	if (unlikely(rc != 0)) {
1683		ena_log(dev, ERR, "Failed to enable MSIX, vectors %d rc %d\n",
1684		    msix_vecs, rc);
1685
1686		rc = ENOSPC;
1687		goto err_msix_free;
1688	}
1689
1690	if (msix_vecs != msix_req) {
1691		if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1692			ena_log(dev, ERR,
1693			    "Not enough number of MSI-x allocated: %d\n",
1694			    msix_vecs);
1695			pci_release_msi(dev);
1696			rc = ENOSPC;
1697			goto err_msix_free;
1698		}
1699		ena_log(dev, ERR,
1700		    "Enable only %d MSI-x (out of %d), reduce "
1701		    "the number of queues\n",
1702		    msix_vecs, msix_req);
1703	}
1704
1705	adapter->msix_vecs = msix_vecs;
1706	ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1707
1708	return (0);
1709
1710err_msix_free:
1711	free(adapter->msix_entries, M_DEVBUF);
1712	adapter->msix_entries = NULL;
1713
1714	return (rc);
1715}
1716
1717static void
1718ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1719{
1720	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE,
1721	    "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev));
1722	/*
1723	 * Handler is NULL on purpose, it will be set
1724	 * when mgmnt interrupt is acquired
1725	 */
1726	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1727	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1728	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1729	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1730}
1731
1732static int
1733ena_setup_io_intr(struct ena_adapter *adapter)
1734{
1735#ifdef RSS
1736	int num_buckets = rss_getnumbuckets();
1737	static int last_bind = 0;
1738	int cur_bind;
1739	int idx;
1740#endif
1741	int irq_idx;
1742
1743	if (adapter->msix_entries == NULL)
1744		return (EINVAL);
1745
1746#ifdef RSS
1747	if (adapter->first_bind < 0) {
1748		adapter->first_bind = last_bind;
1749		last_bind = (last_bind + adapter->num_io_queues) % num_buckets;
1750	}
1751	cur_bind = adapter->first_bind;
1752#endif
1753
1754	for (int i = 0; i < adapter->num_io_queues; i++) {
1755		irq_idx = ENA_IO_IRQ_IDX(i);
1756
1757		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1758		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1759		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1760		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1761		adapter->irq_tbl[irq_idx].vector =
1762		    adapter->msix_entries[irq_idx].vector;
1763		ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
1764		    adapter->msix_entries[irq_idx].vector);
1765
1766		if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
1767			adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1768			    (unsigned)(adapter->irq_cpu_base +
1769			    i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus;
1770			CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1771		}
1772
1773#ifdef RSS
1774		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1775		    rss_getcpu(cur_bind);
1776		cur_bind = (cur_bind + 1) % num_buckets;
1777		CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1778
1779		for (idx = 0; idx < MAXMEMDOM; ++idx) {
1780			if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx]))
1781				break;
1782		}
1783		adapter->que[i].domain = idx;
1784#else
1785		adapter->que[i].domain = -1;
1786#endif
1787	}
1788
1789	return (0);
1790}
1791
1792static int
1793ena_request_mgmnt_irq(struct ena_adapter *adapter)
1794{
1795	device_t pdev = adapter->pdev;
1796	struct ena_irq *irq;
1797	unsigned long flags;
1798	int rc, rcc;
1799
1800	flags = RF_ACTIVE | RF_SHAREABLE;
1801
1802	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1803	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1804	    &irq->vector, flags);
1805
1806	if (unlikely(irq->res == NULL)) {
1807		ena_log(pdev, ERR, "could not allocate irq vector: %d\n",
1808		    irq->vector);
1809		return (ENXIO);
1810	}
1811
1812	rc = bus_setup_intr(adapter->pdev, irq->res,
1813	    INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data,
1814	    &irq->cookie);
1815	if (unlikely(rc != 0)) {
1816		ena_log(pdev, ERR,
1817		    "failed to register interrupt handler for irq %ju: %d\n",
1818		    rman_get_start(irq->res), rc);
1819		goto err_res_free;
1820	}
1821	irq->requested = true;
1822
1823	return (rc);
1824
1825err_res_free:
1826	ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector);
1827	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector,
1828	    irq->res);
1829	if (unlikely(rcc != 0))
1830		ena_log(pdev, ERR,
1831		    "dev has no parent while releasing res for irq: %d\n",
1832		    irq->vector);
1833	irq->res = NULL;
1834
1835	return (rc);
1836}
1837
1838static int
1839ena_request_io_irq(struct ena_adapter *adapter)
1840{
1841	device_t pdev = adapter->pdev;
1842	struct ena_irq *irq;
1843	unsigned long flags = 0;
1844	int rc = 0, i, rcc;
1845
1846	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1847		ena_log(pdev, ERR,
1848		    "failed to request I/O IRQ: MSI-X is not enabled\n");
1849		return (EINVAL);
1850	} else {
1851		flags = RF_ACTIVE | RF_SHAREABLE;
1852	}
1853
1854	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1855		irq = &adapter->irq_tbl[i];
1856
1857		if (unlikely(irq->requested))
1858			continue;
1859
1860		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1861		    &irq->vector, flags);
1862		if (unlikely(irq->res == NULL)) {
1863			rc = ENOMEM;
1864			ena_log(pdev, ERR,
1865			    "could not allocate irq vector: %d\n", irq->vector);
1866			goto err;
1867		}
1868
1869		rc = bus_setup_intr(adapter->pdev, irq->res,
1870		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL, irq->data,
1871		    &irq->cookie);
1872		if (unlikely(rc != 0)) {
1873			ena_log(pdev, ERR,
1874			    "failed to register interrupt handler for irq %ju: %d\n",
1875			    rman_get_start(irq->res), rc);
1876			goto err;
1877		}
1878		irq->requested = true;
1879
1880		if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
1881			rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
1882			if (unlikely(rc != 0)) {
1883				ena_log(pdev, ERR,
1884				    "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
1885				    rman_get_start(irq->res), irq->cpu, rc);
1886				goto err;
1887			}
1888
1889			ena_log(pdev, INFO, "queue %d - cpu %d\n",
1890			    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1891		}
1892	}
1893	return (rc);
1894
1895err:
1896
1897	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1898		irq = &adapter->irq_tbl[i];
1899		rcc = 0;
1900
1901		/* Once we entered err: section and irq->requested is true we
1902		   free both intr and resources */
1903		if (irq->requested) {
1904			rcc = bus_teardown_intr(adapter->pdev, irq->res,
1905			    irq->cookie);
1906			if (unlikely(rcc != 0))
1907				ena_log(pdev, ERR,
1908				    "could not release irq: %d, error: %d\n",
1909				    irq->vector, rcc);
1910		}
1911
1912		/* If we entered err: section without irq->requested set we know
1913		   it was bus_alloc_resource_any() that needs cleanup, provided
1914		   res is not NULL. In case res is NULL no work in needed in
1915		   this iteration */
1916		rcc = 0;
1917		if (irq->res != NULL) {
1918			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1919			    irq->vector, irq->res);
1920		}
1921		if (unlikely(rcc != 0))
1922			ena_log(pdev, ERR,
1923			    "dev has no parent while releasing res for irq: %d\n",
1924			    irq->vector);
1925		irq->requested = false;
1926		irq->res = NULL;
1927	}
1928
1929	return (rc);
1930}
1931
1932static void
1933ena_free_mgmnt_irq(struct ena_adapter *adapter)
1934{
1935	device_t pdev = adapter->pdev;
1936	struct ena_irq *irq;
1937	int rc;
1938
1939	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1940	if (irq->requested) {
1941		ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1942		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1943		if (unlikely(rc != 0))
1944			ena_log(pdev, ERR, "failed to tear down irq: %d\n",
1945			    irq->vector);
1946		irq->requested = 0;
1947	}
1948
1949	if (irq->res != NULL) {
1950		ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector);
1951		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1952		    irq->vector, irq->res);
1953		irq->res = NULL;
1954		if (unlikely(rc != 0))
1955			ena_log(pdev, ERR,
1956			    "dev has no parent while releasing res for irq: %d\n",
1957			    irq->vector);
1958	}
1959}
1960
1961static void
1962ena_free_io_irq(struct ena_adapter *adapter)
1963{
1964	device_t pdev = adapter->pdev;
1965	struct ena_irq *irq;
1966	int rc;
1967
1968	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1969		irq = &adapter->irq_tbl[i];
1970		if (irq->requested) {
1971			ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1972			rc = bus_teardown_intr(adapter->pdev, irq->res,
1973			    irq->cookie);
1974			if (unlikely(rc != 0)) {
1975				ena_log(pdev, ERR,
1976				    "failed to tear down irq: %d\n",
1977				    irq->vector);
1978			}
1979			irq->requested = 0;
1980		}
1981
1982		if (irq->res != NULL) {
1983			ena_log(pdev, DBG, "release resource irq: %d\n",
1984			    irq->vector);
1985			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1986			    irq->vector, irq->res);
1987			irq->res = NULL;
1988			if (unlikely(rc != 0)) {
1989				ena_log(pdev, ERR,
1990				    "dev has no parent while releasing res for irq: %d\n",
1991				    irq->vector);
1992			}
1993		}
1994	}
1995}
1996
1997static void
1998ena_free_irqs(struct ena_adapter *adapter)
1999{
2000	ena_free_io_irq(adapter);
2001	ena_free_mgmnt_irq(adapter);
2002	ena_disable_msix(adapter);
2003}
2004
2005static void
2006ena_disable_msix(struct ena_adapter *adapter)
2007{
2008	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
2009		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
2010		pci_release_msi(adapter->pdev);
2011	}
2012
2013	adapter->msix_vecs = 0;
2014	free(adapter->msix_entries, M_DEVBUF);
2015	adapter->msix_entries = NULL;
2016}
2017
2018static void
2019ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2020{
2021	struct ena_com_io_cq *io_cq;
2022	struct ena_eth_io_intr_reg intr_reg;
2023	struct ena_ring *tx_ring;
2024	uint16_t ena_qid;
2025	int i;
2026
2027	/* Unmask interrupts for all queues */
2028	for (i = 0; i < adapter->num_io_queues; i++) {
2029		ena_qid = ENA_IO_TXQ_IDX(i);
2030		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2031		ena_com_update_intr_reg(&intr_reg, 0, 0, true, false);
2032		tx_ring = &adapter->tx_ring[i];
2033		counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
2034		ena_com_unmask_intr(io_cq, &intr_reg);
2035	}
2036}
2037
2038static int
2039ena_up_complete(struct ena_adapter *adapter)
2040{
2041	int rc;
2042
2043	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
2044		rc = ena_rss_configure(adapter);
2045		if (rc != 0) {
2046			ena_log(adapter->pdev, ERR,
2047			    "Failed to configure RSS\n");
2048			return (rc);
2049		}
2050	}
2051
2052	rc = ena_change_mtu(adapter->ifp, if_getmtu(adapter->ifp));
2053	if (unlikely(rc != 0))
2054		return (rc);
2055
2056	ena_refill_all_rx_bufs(adapter);
2057	ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
2058	    sizeof(adapter->hw_stats));
2059
2060	return (0);
2061}
2062
2063static void
2064set_io_rings_size(struct ena_adapter *adapter, int new_tx_size, int new_rx_size)
2065{
2066	int i;
2067
2068	for (i = 0; i < adapter->num_io_queues; i++) {
2069		adapter->tx_ring[i].ring_size = new_tx_size;
2070		adapter->rx_ring[i].ring_size = new_rx_size;
2071	}
2072}
2073
2074static int
2075create_queues_with_size_backoff(struct ena_adapter *adapter)
2076{
2077	device_t pdev = adapter->pdev;
2078	int rc;
2079	uint32_t cur_rx_ring_size, cur_tx_ring_size;
2080	uint32_t new_rx_ring_size, new_tx_ring_size;
2081
2082	/*
2083	 * Current queue sizes might be set to smaller than the requested
2084	 * ones due to past queue allocation failures.
2085	 */
2086	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2087	    adapter->requested_rx_ring_size);
2088
2089	while (1) {
2090		/* Allocate transmit descriptors */
2091		rc = ena_setup_all_tx_resources(adapter);
2092		if (unlikely(rc != 0)) {
2093			ena_log(pdev, ERR, "err_setup_tx\n");
2094			goto err_setup_tx;
2095		}
2096
2097		/* Allocate receive descriptors */
2098		rc = ena_setup_all_rx_resources(adapter);
2099		if (unlikely(rc != 0)) {
2100			ena_log(pdev, ERR, "err_setup_rx\n");
2101			goto err_setup_rx;
2102		}
2103
2104		/* Create IO queues for Rx & Tx */
2105		rc = ena_create_io_queues(adapter);
2106		if (unlikely(rc != 0)) {
2107			ena_log(pdev, ERR, "create IO queues failed\n");
2108			goto err_io_que;
2109		}
2110
2111		return (0);
2112
2113err_io_que:
2114		ena_free_all_rx_resources(adapter);
2115err_setup_rx:
2116		ena_free_all_tx_resources(adapter);
2117err_setup_tx:
2118		/*
2119		 * Lower the ring size if ENOMEM. Otherwise, return the
2120		 * error straightaway.
2121		 */
2122		if (unlikely(rc != ENOMEM)) {
2123			ena_log(pdev, ERR,
2124			    "Queue creation failed with error code: %d\n", rc);
2125			return (rc);
2126		}
2127
2128		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2129		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2130
2131		ena_log(pdev, ERR,
2132		    "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2133		    cur_tx_ring_size, cur_rx_ring_size);
2134
2135		new_tx_ring_size = cur_tx_ring_size;
2136		new_rx_ring_size = cur_rx_ring_size;
2137
2138		/*
2139		 * Decrease the size of a larger queue, or decrease both if they
2140		 * are the same size.
2141		 */
2142		if (cur_rx_ring_size <= cur_tx_ring_size)
2143			new_tx_ring_size = cur_tx_ring_size / 2;
2144		if (cur_rx_ring_size >= cur_tx_ring_size)
2145			new_rx_ring_size = cur_rx_ring_size / 2;
2146
2147		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2148		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2149			ena_log(pdev, ERR,
2150			    "Queue creation failed with the smallest possible queue size"
2151			    "of %d for both queues. Not retrying with smaller queues\n",
2152			    ENA_MIN_RING_SIZE);
2153			return (rc);
2154		}
2155
2156		ena_log(pdev, INFO,
2157		    "Retrying queue creation with sizes TX=%d, RX=%d\n",
2158		    new_tx_ring_size, new_rx_ring_size);
2159
2160		set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2161	}
2162}
2163
2164int
2165ena_up(struct ena_adapter *adapter)
2166{
2167	int rc = 0;
2168
2169	ENA_LOCK_ASSERT();
2170
2171	if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2172		ena_log(adapter->pdev, ERR, "device is not attached!\n");
2173		return (ENXIO);
2174	}
2175
2176	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2177		return (0);
2178
2179	ena_log(adapter->pdev, INFO, "device is going UP\n");
2180
2181	/* setup interrupts for IO queues */
2182	rc = ena_setup_io_intr(adapter);
2183	if (unlikely(rc != 0)) {
2184		ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n");
2185		goto error;
2186	}
2187	rc = ena_request_io_irq(adapter);
2188	if (unlikely(rc != 0)) {
2189		ena_log(adapter->pdev, ERR, "err_req_irq\n");
2190		goto error;
2191	}
2192
2193	ena_log(adapter->pdev, INFO,
2194	    "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, LLQ is %s\n",
2195	    adapter->num_io_queues,
2196	    adapter->requested_rx_ring_size,
2197	    adapter->requested_tx_ring_size,
2198	    (adapter->ena_dev->tx_mem_queue_type ==
2199		ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED");
2200
2201	rc = create_queues_with_size_backoff(adapter);
2202	if (unlikely(rc != 0)) {
2203		ena_log(adapter->pdev, ERR,
2204		    "error creating queues with size backoff\n");
2205		goto err_create_queues_with_backoff;
2206	}
2207
2208	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2209		if_link_state_change(adapter->ifp, LINK_STATE_UP);
2210
2211	rc = ena_up_complete(adapter);
2212	if (unlikely(rc != 0))
2213		goto err_up_complete;
2214
2215	counter_u64_add(adapter->dev_stats.interface_up, 1);
2216
2217	ena_update_hwassist(adapter);
2218
2219	if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
2220
2221	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2222
2223	ena_unmask_all_io_irqs(adapter);
2224
2225	return (0);
2226
2227err_up_complete:
2228	ena_destroy_all_io_queues(adapter);
2229	ena_free_all_rx_resources(adapter);
2230	ena_free_all_tx_resources(adapter);
2231err_create_queues_with_backoff:
2232	ena_free_io_irq(adapter);
2233error:
2234	return (rc);
2235}
2236
2237static uint64_t
2238ena_get_counter(if_t ifp, ift_counter cnt)
2239{
2240	struct ena_adapter *adapter;
2241	struct ena_hw_stats *stats;
2242
2243	adapter = if_getsoftc(ifp);
2244	stats = &adapter->hw_stats;
2245
2246	switch (cnt) {
2247	case IFCOUNTER_IPACKETS:
2248		return (counter_u64_fetch(stats->rx_packets));
2249	case IFCOUNTER_OPACKETS:
2250		return (counter_u64_fetch(stats->tx_packets));
2251	case IFCOUNTER_IBYTES:
2252		return (counter_u64_fetch(stats->rx_bytes));
2253	case IFCOUNTER_OBYTES:
2254		return (counter_u64_fetch(stats->tx_bytes));
2255	case IFCOUNTER_IQDROPS:
2256		return (counter_u64_fetch(stats->rx_drops));
2257	case IFCOUNTER_OQDROPS:
2258		return (counter_u64_fetch(stats->tx_drops));
2259	default:
2260		return (if_get_counter_default(ifp, cnt));
2261	}
2262}
2263
2264static int
2265ena_media_change(if_t ifp)
2266{
2267	/* Media Change is not supported by firmware */
2268	return (0);
2269}
2270
2271static void
2272ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2273{
2274	struct ena_adapter *adapter = if_getsoftc(ifp);
2275	ena_log(adapter->pdev, DBG, "Media status update\n");
2276
2277	ENA_LOCK_LOCK();
2278
2279	ifmr->ifm_status = IFM_AVALID;
2280	ifmr->ifm_active = IFM_ETHER;
2281
2282	if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2283		ENA_LOCK_UNLOCK();
2284		ena_log(adapter->pdev, INFO, "Link is down\n");
2285		return;
2286	}
2287
2288	ifmr->ifm_status |= IFM_ACTIVE;
2289	ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2290
2291	ENA_LOCK_UNLOCK();
2292}
2293
2294static void
2295ena_init(void *arg)
2296{
2297	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2298
2299	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2300		ENA_LOCK_LOCK();
2301		ena_up(adapter);
2302		ENA_LOCK_UNLOCK();
2303	}
2304}
2305
2306static int
2307ena_ioctl(if_t ifp, u_long command, caddr_t data)
2308{
2309	struct ena_adapter *adapter;
2310	struct ifreq *ifr;
2311	int rc;
2312
2313	adapter = if_getsoftc(ifp);
2314	ifr = (struct ifreq *)data;
2315
2316	/*
2317	 * Acquiring lock to prevent from running up and down routines parallel.
2318	 */
2319	rc = 0;
2320	switch (command) {
2321	case SIOCSIFMTU:
2322		if (if_getmtu(ifp) == ifr->ifr_mtu)
2323			break;
2324		ENA_LOCK_LOCK();
2325		ena_down(adapter);
2326
2327		ena_change_mtu(ifp, ifr->ifr_mtu);
2328
2329		rc = ena_up(adapter);
2330		ENA_LOCK_UNLOCK();
2331		break;
2332
2333	case SIOCSIFFLAGS:
2334		if ((if_getflags(ifp) & IFF_UP) != 0) {
2335			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2336				if ((if_getflags(ifp) & (IFF_PROMISC |
2337				    IFF_ALLMULTI)) != 0) {
2338					ena_log(adapter->pdev, INFO,
2339					    "ioctl promisc/allmulti\n");
2340				}
2341			} else {
2342				ENA_LOCK_LOCK();
2343				rc = ena_up(adapter);
2344				ENA_LOCK_UNLOCK();
2345			}
2346		} else {
2347			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2348				ENA_LOCK_LOCK();
2349				ena_down(adapter);
2350				ENA_LOCK_UNLOCK();
2351			}
2352		}
2353		break;
2354
2355	case SIOCADDMULTI:
2356	case SIOCDELMULTI:
2357		break;
2358
2359	case SIOCSIFMEDIA:
2360	case SIOCGIFMEDIA:
2361		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2362		break;
2363
2364	case SIOCSIFCAP:
2365		{
2366			int reinit = 0;
2367
2368			if (ifr->ifr_reqcap != if_getcapenable(ifp)) {
2369				if_setcapenable(ifp, ifr->ifr_reqcap);
2370				reinit = 1;
2371			}
2372
2373			if ((reinit != 0) &&
2374			    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2375				ENA_LOCK_LOCK();
2376				ena_down(adapter);
2377				rc = ena_up(adapter);
2378				ENA_LOCK_UNLOCK();
2379			}
2380		}
2381
2382		break;
2383	default:
2384		rc = ether_ioctl(ifp, command, data);
2385		break;
2386	}
2387
2388	return (rc);
2389}
2390
2391static int
2392ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2393{
2394	int caps = 0;
2395
2396	if ((feat->offload.tx &
2397	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2398	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2399	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2400		caps |= IFCAP_TXCSUM;
2401
2402	if ((feat->offload.tx &
2403	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2404	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2405		caps |= IFCAP_TXCSUM_IPV6;
2406
2407	if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2408		caps |= IFCAP_TSO4;
2409
2410	if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2411		caps |= IFCAP_TSO6;
2412
2413	if ((feat->offload.rx_supported &
2414	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2415	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2416		caps |= IFCAP_RXCSUM;
2417
2418	if ((feat->offload.rx_supported &
2419	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2420		caps |= IFCAP_RXCSUM_IPV6;
2421
2422	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2423
2424	return (caps);
2425}
2426
2427static void
2428ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2429{
2430	host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp);
2431}
2432
2433static void
2434ena_update_hwassist(struct ena_adapter *adapter)
2435{
2436	if_t ifp = adapter->ifp;
2437	uint32_t feat = adapter->tx_offload_cap;
2438	int cap = if_getcapenable(ifp);
2439	int flags = 0;
2440
2441	if_clearhwassist(ifp);
2442
2443	if ((cap & IFCAP_TXCSUM) != 0) {
2444		if ((feat &
2445		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2446			flags |= CSUM_IP;
2447		if ((feat &
2448		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2449		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2450			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2451	}
2452
2453	if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2454		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2455
2456	if ((cap & IFCAP_TSO4) != 0)
2457		flags |= CSUM_IP_TSO;
2458
2459	if ((cap & IFCAP_TSO6) != 0)
2460		flags |= CSUM_IP6_TSO;
2461
2462	if_sethwassistbits(ifp, flags, 0);
2463}
2464
2465static int
2466ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2467    struct ena_com_dev_get_features_ctx *feat)
2468{
2469	if_t ifp;
2470	int caps = 0;
2471
2472	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2473	if (unlikely(ifp == NULL)) {
2474		ena_log(pdev, ERR, "can not allocate ifnet structure\n");
2475		return (ENXIO);
2476	}
2477	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2478	if_setdev(ifp, pdev);
2479	if_setsoftc(ifp, adapter);
2480
2481	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2482	if_setinitfn(ifp, ena_init);
2483	if_settransmitfn(ifp, ena_mq_start);
2484	if_setqflushfn(ifp, ena_qflush);
2485	if_setioctlfn(ifp, ena_ioctl);
2486	if_setgetcounterfn(ifp, ena_get_counter);
2487
2488	if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2489	if_setsendqready(ifp);
2490	if_setmtu(ifp, ETHERMTU);
2491	if_setbaudrate(ifp, 0);
2492	/* Zeroize capabilities... */
2493	if_setcapabilities(ifp, 0);
2494	if_setcapenable(ifp, 0);
2495	/* check hardware support */
2496	caps = ena_get_dev_offloads(feat);
2497	/* ... and set them */
2498	if_setcapabilitiesbit(ifp, caps, 0);
2499
2500	/* TSO parameters */
2501	if_sethwtsomax(ifp, ENA_TSO_MAXSIZE -
2502	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
2503	if_sethwtsomaxsegcount(ifp, adapter->max_tx_sgl_size - 1);
2504	if_sethwtsomaxsegsize(ifp, ENA_TSO_MAXSIZE);
2505
2506	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2507	if_setcapenable(ifp, if_getcapabilities(ifp));
2508
2509	/*
2510	 * Specify the media types supported by this adapter and register
2511	 * callbacks to update media and link information
2512	 */
2513	ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change,
2514	    ena_media_status);
2515	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2516	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2517
2518	ether_ifattach(ifp, adapter->mac_addr);
2519
2520	return (0);
2521}
2522
2523void
2524ena_down(struct ena_adapter *adapter)
2525{
2526	int rc;
2527
2528	ENA_LOCK_ASSERT();
2529
2530	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2531		return;
2532
2533	ena_log(adapter->pdev, INFO, "device is going DOWN\n");
2534
2535	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2536	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2537
2538	ena_free_io_irq(adapter);
2539
2540	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2541		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2542		if (unlikely(rc != 0))
2543			ena_log(adapter->pdev, ERR, "Device reset failed\n");
2544	}
2545
2546	ena_destroy_all_io_queues(adapter);
2547
2548	ena_free_all_tx_bufs(adapter);
2549	ena_free_all_rx_bufs(adapter);
2550	ena_free_all_tx_resources(adapter);
2551	ena_free_all_rx_resources(adapter);
2552
2553	counter_u64_add(adapter->dev_stats.interface_down, 1);
2554}
2555
2556static uint32_t
2557ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2558    struct ena_com_dev_get_features_ctx *get_feat_ctx)
2559{
2560	uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2561
2562	/* Regular queues capabilities */
2563	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2564		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2565		    &get_feat_ctx->max_queue_ext.max_queue_ext;
2566		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2567		    max_queue_ext->max_rx_cq_num);
2568
2569		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2570		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2571	} else {
2572		struct ena_admin_queue_feature_desc *max_queues =
2573		    &get_feat_ctx->max_queues;
2574		io_tx_sq_num = max_queues->max_sq_num;
2575		io_tx_cq_num = max_queues->max_cq_num;
2576		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2577	}
2578
2579	/* In case of LLQ use the llq fields for the tx SQ/CQ */
2580	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2581		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2582
2583	max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2584	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2585	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2586	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2587	/* 1 IRQ for mgmnt and 1 IRQ for each TX/RX pair */
2588	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2589	    pci_msix_count(pdev) - 1);
2590#ifdef RSS
2591	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2592	    rss_getnumbuckets());
2593#endif
2594
2595	return (max_num_io_queues);
2596}
2597
2598static int
2599ena_enable_wc(device_t pdev, struct resource *res)
2600{
2601#if defined(__i386) || defined(__amd64) || defined(__aarch64__)
2602	vm_offset_t va;
2603	vm_size_t len;
2604	int rc;
2605
2606	va = (vm_offset_t)rman_get_virtual(res);
2607	len = rman_get_size(res);
2608	/* Enable write combining */
2609	rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING);
2610	if (unlikely(rc != 0)) {
2611		ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc);
2612		return (rc);
2613	}
2614
2615	return (0);
2616#endif
2617	return (EOPNOTSUPP);
2618}
2619
2620static int
2621ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2622    struct ena_admin_feature_llq_desc *llq,
2623    struct ena_llq_configurations *llq_default_configurations)
2624{
2625	int rc;
2626	uint32_t llq_feature_mask;
2627
2628	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2629	if (!(ena_dev->supported_features & llq_feature_mask)) {
2630		ena_log(pdev, WARN,
2631		    "LLQ is not supported. Fallback to host mode policy.\n");
2632		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2633		return (0);
2634	}
2635
2636	if (ena_dev->mem_bar == NULL) {
2637		ena_log(pdev, WARN,
2638		    "LLQ is advertised as supported but device doesn't expose mem bar.\n");
2639		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2640		return (0);
2641	}
2642
2643	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2644	if (unlikely(rc != 0)) {
2645		ena_log(pdev, WARN,
2646		    "Failed to configure the device mode. "
2647		    "Fallback to host mode policy.\n");
2648		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2649	}
2650
2651	return (0);
2652}
2653
2654static int
2655ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev)
2656{
2657	struct ena_adapter *adapter = device_get_softc(pdev);
2658	int rc, rid;
2659
2660	/* Try to allocate resources for LLQ bar */
2661	rid = PCIR_BAR(ENA_MEM_BAR);
2662	adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid,
2663	    RF_ACTIVE);
2664	if (unlikely(adapter->memory == NULL)) {
2665		ena_log(pdev, WARN,
2666		    "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n");
2667		return (0);
2668	}
2669
2670	/* Enable write combining for better LLQ performance */
2671	rc = ena_enable_wc(adapter->pdev, adapter->memory);
2672	if (unlikely(rc != 0)) {
2673		ena_log(pdev, ERR, "failed to enable write combining.\n");
2674		return (rc);
2675	}
2676
2677	/*
2678	 * Save virtual address of the device's memory region
2679	 * for the ena_com layer.
2680	 */
2681	ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2682
2683	return (0);
2684}
2685
2686static inline void
2687set_default_llq_configurations(struct ena_llq_configurations *llq_config,
2688    struct ena_admin_feature_llq_desc *llq)
2689{
2690	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2691	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2692	llq_config->llq_num_decs_before_header =
2693	    ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2694	if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) !=
2695	    0 && ena_force_large_llq_header) {
2696		llq_config->llq_ring_entry_size =
2697		    ENA_ADMIN_LIST_ENTRY_SIZE_256B;
2698		llq_config->llq_ring_entry_size_value = 256;
2699	} else {
2700		llq_config->llq_ring_entry_size =
2701		    ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2702		llq_config->llq_ring_entry_size_value = 128;
2703	}
2704}
2705
2706static int
2707ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2708{
2709	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2710	struct ena_com_dev *ena_dev = ctx->ena_dev;
2711	uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2712	uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2713	uint32_t max_tx_queue_size;
2714	uint32_t max_rx_queue_size;
2715
2716	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2717		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2718		    &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2719		max_rx_queue_size = min_t(uint32_t,
2720		    max_queue_ext->max_rx_cq_depth,
2721		    max_queue_ext->max_rx_sq_depth);
2722		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2723
2724		if (ena_dev->tx_mem_queue_type ==
2725		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2726			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2727			    llq->max_llq_depth);
2728		else
2729			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2730			    max_queue_ext->max_tx_sq_depth);
2731
2732		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2733		    max_queue_ext->max_per_packet_tx_descs);
2734		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2735		    max_queue_ext->max_per_packet_rx_descs);
2736	} else {
2737		struct ena_admin_queue_feature_desc *max_queues =
2738		    &ctx->get_feat_ctx->max_queues;
2739		max_rx_queue_size = min_t(uint32_t, max_queues->max_cq_depth,
2740		    max_queues->max_sq_depth);
2741		max_tx_queue_size = max_queues->max_cq_depth;
2742
2743		if (ena_dev->tx_mem_queue_type ==
2744		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2745			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2746			    llq->max_llq_depth);
2747		else
2748			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2749			    max_queues->max_sq_depth);
2750
2751		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2752		    max_queues->max_packet_tx_descs);
2753		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2754		    max_queues->max_packet_rx_descs);
2755	}
2756
2757	/* round down to the nearest power of 2 */
2758	max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2759	max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2760
2761	/*
2762	 * When forcing large headers, we multiply the entry size by 2,
2763	 * and therefore divide the queue size by 2, leaving the amount
2764	 * of memory used by the queues unchanged.
2765	 */
2766	if (ena_force_large_llq_header) {
2767		if ((llq->entry_size_ctrl_supported &
2768		    ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 &&
2769		    ena_dev->tx_mem_queue_type ==
2770		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2771			max_tx_queue_size /= 2;
2772			ena_log(ctx->pdev, INFO,
2773			    "Forcing large headers and decreasing maximum Tx queue size to %d\n",
2774			    max_tx_queue_size);
2775		} else {
2776			ena_log(ctx->pdev, WARN,
2777			    "Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2778		}
2779	}
2780
2781	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2782	    max_tx_queue_size);
2783	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2784	    max_rx_queue_size);
2785
2786	tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2787	rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2788
2789	ctx->max_tx_queue_size = max_tx_queue_size;
2790	ctx->max_rx_queue_size = max_rx_queue_size;
2791	ctx->tx_queue_size = tx_queue_size;
2792	ctx->rx_queue_size = rx_queue_size;
2793
2794	return (0);
2795}
2796
2797static void
2798ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2799{
2800	struct ena_admin_host_info *host_info;
2801	uintptr_t rid;
2802	int rc;
2803
2804	/* Allocate only the host info */
2805	rc = ena_com_allocate_host_info(ena_dev);
2806	if (unlikely(rc != 0)) {
2807		ena_log(dev, ERR, "Cannot allocate host info\n");
2808		return;
2809	}
2810
2811	host_info = ena_dev->host_attr.host_info;
2812
2813	if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2814		host_info->bdf = rid;
2815	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2816	host_info->kernel_ver = osreldate;
2817
2818	sprintf(host_info->kernel_ver_str, "%d", osreldate);
2819	host_info->os_dist = 0;
2820	strncpy(host_info->os_dist_str, osrelease,
2821	    sizeof(host_info->os_dist_str) - 1);
2822
2823	host_info->driver_version = (ENA_DRV_MODULE_VER_MAJOR) |
2824	    (ENA_DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2825	    (ENA_DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2826	host_info->num_cpus = mp_ncpus;
2827	host_info->driver_supported_features =
2828	    ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2829	    ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
2830
2831	rc = ena_com_set_host_attributes(ena_dev);
2832	if (unlikely(rc != 0)) {
2833		if (rc == EOPNOTSUPP)
2834			ena_log(dev, WARN, "Cannot set host attributes\n");
2835		else
2836			ena_log(dev, ERR, "Cannot set host attributes\n");
2837
2838		goto err;
2839	}
2840
2841	return;
2842
2843err:
2844	ena_com_delete_host_info(ena_dev);
2845}
2846
2847static int
2848ena_device_init(struct ena_adapter *adapter, device_t pdev,
2849    struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2850{
2851	struct ena_llq_configurations llq_config;
2852	struct ena_com_dev *ena_dev = adapter->ena_dev;
2853	bool readless_supported;
2854	uint32_t aenq_groups;
2855	int dma_width;
2856	int rc;
2857
2858	rc = ena_com_mmio_reg_read_request_init(ena_dev);
2859	if (unlikely(rc != 0)) {
2860		ena_log(pdev, ERR, "failed to init mmio read less\n");
2861		return (rc);
2862	}
2863
2864	/*
2865	 * The PCIe configuration space revision id indicate if mmio reg
2866	 * read is disabled
2867	 */
2868	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2869	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2870
2871	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2872	if (unlikely(rc != 0)) {
2873		ena_log(pdev, ERR, "Can not reset device\n");
2874		goto err_mmio_read_less;
2875	}
2876
2877	rc = ena_com_validate_version(ena_dev);
2878	if (unlikely(rc != 0)) {
2879		ena_log(pdev, ERR, "device version is too low\n");
2880		goto err_mmio_read_less;
2881	}
2882
2883	dma_width = ena_com_get_dma_width(ena_dev);
2884	if (unlikely(dma_width < 0)) {
2885		ena_log(pdev, ERR, "Invalid dma width value %d", dma_width);
2886		rc = dma_width;
2887		goto err_mmio_read_less;
2888	}
2889	adapter->dma_width = dma_width;
2890
2891	/* ENA admin level init */
2892	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2893	if (unlikely(rc != 0)) {
2894		ena_log(pdev, ERR,
2895		    "Can not initialize ena admin queue with device\n");
2896		goto err_mmio_read_less;
2897	}
2898
2899	/*
2900	 * To enable the msix interrupts the driver needs to know the number
2901	 * of queues. So the driver uses polling mode to retrieve this
2902	 * information
2903	 */
2904	ena_com_set_admin_polling_mode(ena_dev, true);
2905
2906	ena_config_host_info(ena_dev, pdev);
2907
2908	/* Get Device Attributes */
2909	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2910	if (unlikely(rc != 0)) {
2911		ena_log(pdev, ERR,
2912		    "Cannot get attribute for ena device rc: %d\n", rc);
2913		goto err_admin_init;
2914	}
2915
2916	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2917	    BIT(ENA_ADMIN_FATAL_ERROR) |
2918	    BIT(ENA_ADMIN_WARNING) |
2919	    BIT(ENA_ADMIN_NOTIFICATION) |
2920	    BIT(ENA_ADMIN_KEEP_ALIVE);
2921
2922	aenq_groups &= get_feat_ctx->aenq.supported_groups;
2923	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2924	if (unlikely(rc != 0)) {
2925		ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc);
2926		goto err_admin_init;
2927	}
2928
2929	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2930
2931	set_default_llq_configurations(&llq_config, &get_feat_ctx->llq);
2932
2933	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
2934	    &llq_config);
2935	if (unlikely(rc != 0)) {
2936		ena_log(pdev, ERR, "Failed to set placement policy\n");
2937		goto err_admin_init;
2938	}
2939
2940	return (0);
2941
2942err_admin_init:
2943	ena_com_delete_host_info(ena_dev);
2944	ena_com_admin_destroy(ena_dev);
2945err_mmio_read_less:
2946	ena_com_mmio_reg_read_request_destroy(ena_dev);
2947
2948	return (rc);
2949}
2950
2951static int
2952ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2953{
2954	struct ena_com_dev *ena_dev = adapter->ena_dev;
2955	int rc;
2956
2957	rc = ena_enable_msix(adapter);
2958	if (unlikely(rc != 0)) {
2959		ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n");
2960		return (rc);
2961	}
2962
2963	ena_setup_mgmnt_intr(adapter);
2964
2965	rc = ena_request_mgmnt_irq(adapter);
2966	if (unlikely(rc != 0)) {
2967		ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n");
2968		goto err_disable_msix;
2969	}
2970
2971	ena_com_set_admin_polling_mode(ena_dev, false);
2972
2973	ena_com_admin_aenq_enable(ena_dev);
2974
2975	return (0);
2976
2977err_disable_msix:
2978	ena_disable_msix(adapter);
2979
2980	return (rc);
2981}
2982
2983/* Function called on ENA_ADMIN_KEEP_ALIVE event */
2984static void
2985ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e)
2986{
2987	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2988	struct ena_admin_aenq_keep_alive_desc *desc;
2989	sbintime_t stime;
2990	uint64_t rx_drops;
2991	uint64_t tx_drops;
2992
2993	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2994
2995	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2996	tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2997	counter_u64_zero(adapter->hw_stats.rx_drops);
2998	counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2999	counter_u64_zero(adapter->hw_stats.tx_drops);
3000	counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
3001
3002	stime = getsbinuptime();
3003	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
3004}
3005
3006/* Check for keep alive expiration */
3007static void
3008check_for_missing_keep_alive(struct ena_adapter *adapter)
3009{
3010	sbintime_t timestamp, time;
3011
3012	if (adapter->wd_active == 0)
3013		return;
3014
3015	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3016		return;
3017
3018	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
3019	time = getsbinuptime() - timestamp;
3020	if (unlikely(time > adapter->keep_alive_timeout)) {
3021		ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
3022		counter_u64_add(adapter->dev_stats.wd_expired, 1);
3023		ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3024	}
3025}
3026
3027/* Check if admin queue is enabled */
3028static void
3029check_for_admin_com_state(struct ena_adapter *adapter)
3030{
3031	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) {
3032		ena_log(adapter->pdev, ERR,
3033		    "ENA admin queue is not in running state!\n");
3034		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3035		ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
3036	}
3037}
3038
3039static int
3040check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3041    struct ena_ring *rx_ring)
3042{
3043	if (likely(atomic_load_8(&rx_ring->first_interrupt)))
3044		return (0);
3045
3046	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3047		return (0);
3048
3049	rx_ring->no_interrupt_event_cnt++;
3050
3051	if (rx_ring->no_interrupt_event_cnt ==
3052	    ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3053		ena_log(adapter->pdev, ERR,
3054		    "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3055		    rx_ring->qid);
3056		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3057		return (EIO);
3058	}
3059
3060	return (0);
3061}
3062
3063static int
3064check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3065    struct ena_ring *tx_ring)
3066{
3067	device_t pdev = adapter->pdev;
3068	struct bintime curtime, time;
3069	struct ena_tx_buffer *tx_buf;
3070	int time_since_last_cleanup;
3071	int missing_tx_comp_to;
3072	sbintime_t time_offset;
3073	uint32_t missed_tx = 0;
3074	int i, rc = 0;
3075
3076	getbinuptime(&curtime);
3077
3078	for (i = 0; i < tx_ring->ring_size; i++) {
3079		tx_buf = &tx_ring->tx_buffer_info[i];
3080
3081		if (bintime_isset(&tx_buf->timestamp) == 0)
3082			continue;
3083
3084		time = curtime;
3085		bintime_sub(&time, &tx_buf->timestamp);
3086		time_offset = bttosbt(time);
3087
3088		if (unlikely(!atomic_load_8(&tx_ring->first_interrupt) &&
3089		    time_offset > 2 * adapter->missing_tx_timeout)) {
3090			/*
3091			 * If after graceful period interrupt is still not
3092			 * received, we schedule a reset.
3093			 */
3094			ena_log(pdev, ERR,
3095			    "Potential MSIX issue on Tx side Queue = %d. "
3096			    "Reset the device\n",
3097			    tx_ring->qid);
3098			ena_trigger_reset(adapter,
3099			    ENA_REGS_RESET_MISS_INTERRUPT);
3100			return (EIO);
3101		}
3102
3103		/* Check again if packet is still waiting */
3104		if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3105
3106			if (tx_buf->print_once) {
3107				time_since_last_cleanup = TICKS_2_MSEC(ticks -
3108				    tx_ring->tx_last_cleanup_ticks);
3109				missing_tx_comp_to = sbttoms(
3110				    adapter->missing_tx_timeout);
3111				ena_log(pdev, WARN,
3112				    "Found a Tx that wasn't completed on time, qid %d, index %d. "
3113				    "%d msecs have passed since last cleanup. Missing Tx timeout value %d msecs.\n",
3114				    tx_ring->qid, i, time_since_last_cleanup,
3115				    missing_tx_comp_to);
3116			}
3117
3118			tx_buf->print_once = false;
3119			missed_tx++;
3120		}
3121	}
3122
3123	if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3124		ena_log(pdev, ERR,
3125		    "The number of lost tx completion is above the threshold "
3126		    "(%d > %d). Reset the device\n",
3127		    missed_tx, adapter->missing_tx_threshold);
3128		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3129		rc = EIO;
3130	}
3131
3132	counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3133
3134	return (rc);
3135}
3136
3137/*
3138 * Check for TX which were not completed on time.
3139 * Timeout is defined by "missing_tx_timeout".
3140 * Reset will be performed if number of incompleted
3141 * transactions exceeds "missing_tx_threshold".
3142 */
3143static void
3144check_for_missing_completions(struct ena_adapter *adapter)
3145{
3146	struct ena_ring *tx_ring;
3147	struct ena_ring *rx_ring;
3148	int i, budget, rc;
3149
3150	/* Make sure the driver doesn't turn the device in other process */
3151	rmb();
3152
3153	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3154		return;
3155
3156	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3157		return;
3158
3159	if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3160		return;
3161
3162	budget = adapter->missing_tx_max_queues;
3163
3164	for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3165		tx_ring = &adapter->tx_ring[i];
3166		rx_ring = &adapter->rx_ring[i];
3167
3168		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3169		if (unlikely(rc != 0))
3170			return;
3171
3172		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3173		if (unlikely(rc != 0))
3174			return;
3175
3176		budget--;
3177		if (budget == 0) {
3178			i++;
3179			break;
3180		}
3181	}
3182
3183	adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3184}
3185
3186/* trigger rx cleanup after 2 consecutive detections */
3187#define EMPTY_RX_REFILL 2
3188/* For the rare case where the device runs out of Rx descriptors and the
3189 * msix handler failed to refill new Rx descriptors (due to a lack of memory
3190 * for example).
3191 * This case will lead to a deadlock:
3192 * The device won't send interrupts since all the new Rx packets will be dropped
3193 * The msix handler won't allocate new Rx descriptors so the device won't be
3194 * able to send new packets.
3195 *
3196 * When such a situation is detected - execute rx cleanup task in another thread
3197 */
3198static void
3199check_for_empty_rx_ring(struct ena_adapter *adapter)
3200{
3201	struct ena_ring *rx_ring;
3202	int i, refill_required;
3203
3204	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3205		return;
3206
3207	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3208		return;
3209
3210	for (i = 0; i < adapter->num_io_queues; i++) {
3211		rx_ring = &adapter->rx_ring[i];
3212
3213		refill_required = ena_com_free_q_entries(
3214		    rx_ring->ena_com_io_sq);
3215		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3216			rx_ring->empty_rx_queue++;
3217
3218			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3219				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3220				    1);
3221
3222				ena_log(adapter->pdev, WARN,
3223				    "Rx ring %d is stalled. Triggering the refill function\n",
3224				    i);
3225
3226				taskqueue_enqueue(rx_ring->que->cleanup_tq,
3227				    &rx_ring->que->cleanup_task);
3228				rx_ring->empty_rx_queue = 0;
3229			}
3230		} else {
3231			rx_ring->empty_rx_queue = 0;
3232		}
3233	}
3234}
3235
3236static void
3237ena_update_hints(struct ena_adapter *adapter,
3238    struct ena_admin_ena_hw_hints *hints)
3239{
3240	struct ena_com_dev *ena_dev = adapter->ena_dev;
3241
3242	if (hints->admin_completion_tx_timeout)
3243		ena_dev->admin_queue.completion_timeout =
3244		    hints->admin_completion_tx_timeout * 1000;
3245
3246	if (hints->mmio_read_timeout)
3247		/* convert to usec */
3248		ena_dev->mmio_read.reg_read_to = hints->mmio_read_timeout * 1000;
3249
3250	if (hints->missed_tx_completion_count_threshold_to_reset)
3251		adapter->missing_tx_threshold =
3252		    hints->missed_tx_completion_count_threshold_to_reset;
3253
3254	if (hints->missing_tx_completion_timeout) {
3255		if (hints->missing_tx_completion_timeout ==
3256		    ENA_HW_HINTS_NO_TIMEOUT)
3257			adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3258		else
3259			adapter->missing_tx_timeout = SBT_1MS *
3260			    hints->missing_tx_completion_timeout;
3261	}
3262
3263	if (hints->driver_watchdog_timeout) {
3264		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3265			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3266		else
3267			adapter->keep_alive_timeout = SBT_1MS *
3268			    hints->driver_watchdog_timeout;
3269	}
3270}
3271
3272/**
3273 * ena_copy_eni_metrics - Get and copy ENI metrics from the HW.
3274 * @adapter: ENA device adapter
3275 *
3276 * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics
3277 * and other error codes on failure.
3278 *
3279 * This function can possibly cause a race with other calls to the admin queue.
3280 * Because of that, the caller should either lock this function or make sure
3281 * that there is no race in the current context.
3282 */
3283static int
3284ena_copy_eni_metrics(struct ena_adapter *adapter)
3285{
3286	static bool print_once = true;
3287	int rc;
3288
3289	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics);
3290
3291	if (rc != 0) {
3292		if (rc == ENA_COM_UNSUPPORTED) {
3293			if (print_once) {
3294				ena_log(adapter->pdev, WARN,
3295				    "Retrieving ENI metrics is not supported.\n");
3296				print_once = false;
3297			} else {
3298				ena_log(adapter->pdev, DBG,
3299				    "Retrieving ENI metrics is not supported.\n");
3300			}
3301		} else {
3302			ena_log(adapter->pdev, ERR,
3303			    "Failed to get ENI metrics: %d\n", rc);
3304		}
3305	}
3306
3307	return (rc);
3308}
3309
3310static int
3311ena_copy_srd_metrics(struct ena_adapter *adapter)
3312{
3313	return ena_com_get_ena_srd_info(adapter->ena_dev, &adapter->ena_srd_info);
3314}
3315
3316static int
3317ena_copy_customer_metrics(struct ena_adapter *adapter)
3318{
3319	struct ena_com_dev *dev;
3320	u32 supported_metrics_count;
3321	int rc, len;
3322
3323	dev = adapter->ena_dev;
3324
3325	supported_metrics_count = ena_com_get_customer_metric_count(dev);
3326	len = supported_metrics_count * sizeof(u64);
3327
3328	/* Fill the data buffer */
3329	rc = ena_com_get_customer_metrics(adapter->ena_dev,
3330	    (char *)(adapter->customer_metrics_array), len);
3331
3332	return (rc);
3333}
3334
3335static void
3336ena_timer_service(void *data)
3337{
3338	struct ena_adapter *adapter = (struct ena_adapter *)data;
3339	struct ena_admin_host_info *host_info =
3340	    adapter->ena_dev->host_attr.host_info;
3341
3342	check_for_missing_keep_alive(adapter);
3343
3344	check_for_admin_com_state(adapter);
3345
3346	check_for_missing_completions(adapter);
3347
3348	check_for_empty_rx_ring(adapter);
3349
3350	/*
3351	 * User controller update of the ENA metrics.
3352	 * If the delay was set to 0, then the stats shouldn't be updated at
3353	 * all.
3354	 * Otherwise, wait 'metrics_sample_interval' seconds, before
3355	 * updating stats.
3356	 * As timer service is executed every second, it's enough to increment
3357	 * appropriate counter each time the timer service is executed.
3358	 */
3359	if ((adapter->metrics_sample_interval != 0) &&
3360	    (++adapter->metrics_sample_interval_cnt >=
3361	    adapter->metrics_sample_interval)) {
3362		taskqueue_enqueue(adapter->metrics_tq, &adapter->metrics_task);
3363		adapter->metrics_sample_interval_cnt = 0;
3364	}
3365
3366
3367	if (host_info != NULL)
3368		ena_update_host_info(host_info, adapter->ifp);
3369
3370	if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3371		/*
3372		 * Timeout when validating version indicates that the device
3373		 * became unresponsive. If that happens skip the reset and
3374		 * reschedule timer service, so the reset can be retried later.
3375		 */
3376		if (ena_com_validate_version(adapter->ena_dev) ==
3377		    ENA_COM_TIMER_EXPIRED) {
3378			ena_log(adapter->pdev, WARN,
3379			    "FW unresponsive, skipping reset\n");
3380			ENA_TIMER_RESET(adapter);
3381			return;
3382		}
3383		ena_log(adapter->pdev, WARN, "Trigger reset is on\n");
3384		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3385		return;
3386	}
3387
3388	/*
3389	 * Schedule another timeout one second from now.
3390	 */
3391	ENA_TIMER_RESET(adapter);
3392}
3393
3394void
3395ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3396{
3397	if_t ifp = adapter->ifp;
3398	struct ena_com_dev *ena_dev = adapter->ena_dev;
3399	bool dev_up;
3400
3401	if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3402		return;
3403
3404	if (!graceful)
3405		if_link_state_change(ifp, LINK_STATE_DOWN);
3406
3407	ENA_TIMER_DRAIN(adapter);
3408
3409	dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3410	if (dev_up)
3411		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3412
3413	if (!graceful)
3414		ena_com_set_admin_running_state(ena_dev, false);
3415
3416	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3417		ena_down(adapter);
3418
3419	/*
3420	 * Stop the device from sending AENQ events (if the device was up, and
3421	 * the trigger reset was on, ena_down already performs device reset)
3422	 */
3423	if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3424		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3425
3426	ena_free_mgmnt_irq(adapter);
3427
3428	ena_disable_msix(adapter);
3429
3430	/*
3431	 * IO rings resources should be freed because `ena_restore_device()`
3432	 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3433	 * vectors. The amount of MSIX vectors after destroy-restore may be
3434	 * different than before. Therefore, IO rings resources should be
3435	 * established from scratch each time.
3436	 */
3437	ena_free_all_io_rings_resources(adapter);
3438
3439	ena_com_abort_admin_commands(ena_dev);
3440
3441	ena_com_wait_for_abort_completion(ena_dev);
3442
3443	ena_com_admin_destroy(ena_dev);
3444
3445	ena_com_mmio_reg_read_request_destroy(ena_dev);
3446
3447	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3448
3449	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3450	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3451}
3452
3453static int
3454ena_device_validate_params(struct ena_adapter *adapter,
3455    struct ena_com_dev_get_features_ctx *get_feat_ctx)
3456{
3457	if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3458	    ETHER_ADDR_LEN) != 0) {
3459		ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n");
3460		return (EINVAL);
3461	}
3462
3463	if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3464		ena_log(adapter->pdev, ERR,
3465		    "Error, device max mtu is smaller than ifp MTU\n");
3466		return (EINVAL);
3467	}
3468
3469	return 0;
3470}
3471
3472int
3473ena_restore_device(struct ena_adapter *adapter)
3474{
3475	struct ena_com_dev_get_features_ctx get_feat_ctx;
3476	struct ena_com_dev *ena_dev = adapter->ena_dev;
3477	if_t ifp = adapter->ifp;
3478	device_t dev = adapter->pdev;
3479	int wd_active;
3480	int rc;
3481
3482	ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3483
3484	rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3485	if (rc != 0) {
3486		ena_log(dev, ERR, "Cannot initialize device\n");
3487		goto err;
3488	}
3489	/*
3490	 * Only enable WD if it was enabled before reset, so it won't override
3491	 * value set by the user by the sysctl.
3492	 */
3493	if (adapter->wd_active != 0)
3494		adapter->wd_active = wd_active;
3495
3496	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3497	if (rc != 0) {
3498		ena_log(dev, ERR, "Validation of device parameters failed\n");
3499		goto err_device_destroy;
3500	}
3501
3502	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3503	/* Make sure we don't have a race with AENQ Links state handler */
3504	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3505		if_link_state_change(ifp, LINK_STATE_UP);
3506
3507	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3508	if (rc != 0) {
3509		ena_log(dev, ERR, "Enable MSI-X failed\n");
3510		goto err_device_destroy;
3511	}
3512
3513	/*
3514	 * Effective value of used MSIX vectors should be the same as before
3515	 * `ena_destroy_device()`, if possible, or closest to it if less vectors
3516	 * are available.
3517	 */
3518	if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3519		adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3520
3521	/* Re-initialize rings basic information */
3522	ena_init_io_rings(adapter);
3523
3524	/* If the interface was up before the reset bring it up */
3525	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3526		rc = ena_up(adapter);
3527		if (rc != 0) {
3528			ena_log(dev, ERR, "Failed to create I/O queues\n");
3529			goto err_disable_msix;
3530		}
3531	}
3532
3533	/* Indicate that device is running again and ready to work */
3534	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3535
3536	/*
3537	 * As the AENQ handlers weren't executed during reset because
3538	 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3539	 * timestamp must be updated again That will prevent next reset
3540	 * caused by missing keep alive.
3541	 */
3542	adapter->keep_alive_timestamp = getsbinuptime();
3543	ENA_TIMER_RESET(adapter);
3544
3545	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3546
3547	return (rc);
3548
3549err_disable_msix:
3550	ena_free_mgmnt_irq(adapter);
3551	ena_disable_msix(adapter);
3552err_device_destroy:
3553	ena_com_abort_admin_commands(ena_dev);
3554	ena_com_wait_for_abort_completion(ena_dev);
3555	ena_com_admin_destroy(ena_dev);
3556	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3557	ena_com_mmio_reg_read_request_destroy(ena_dev);
3558err:
3559	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3560	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3561	ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n");
3562
3563	return (rc);
3564}
3565
3566static void
3567ena_metrics_task(void *arg, int pending)
3568{
3569	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3570
3571	ENA_LOCK_LOCK();
3572
3573	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_CUSTOMER_METRICS))
3574		(void)ena_copy_customer_metrics(adapter);
3575	else if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS))
3576		(void)ena_copy_eni_metrics(adapter);
3577
3578	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
3579		(void)ena_copy_srd_metrics(adapter);
3580
3581	ENA_LOCK_UNLOCK();
3582}
3583
3584static void
3585ena_reset_task(void *arg, int pending)
3586{
3587	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3588
3589	ENA_LOCK_LOCK();
3590	if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3591		ena_destroy_device(adapter, false);
3592		ena_restore_device(adapter);
3593
3594		ena_log(adapter->pdev, INFO,
3595		    "Device reset completed successfully, Driver info: %s\n",
3596		    ena_version);
3597	}
3598	ENA_LOCK_UNLOCK();
3599}
3600
3601static void
3602ena_free_stats(struct ena_adapter *adapter)
3603{
3604	ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3605	    sizeof(struct ena_hw_stats));
3606	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3607	    sizeof(struct ena_stats_dev));
3608
3609}
3610/**
3611 * ena_attach - Device Initialization Routine
3612 * @pdev: device information struct
3613 *
3614 * Returns 0 on success, otherwise on failure.
3615 *
3616 * ena_attach initializes an adapter identified by a device structure.
3617 * The OS initialization, configuring of the adapter private structure,
3618 * and a hardware reset occur.
3619 **/
3620static int
3621ena_attach(device_t pdev)
3622{
3623	struct ena_com_dev_get_features_ctx get_feat_ctx;
3624	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3625	static int version_printed;
3626	struct ena_adapter *adapter;
3627	struct ena_com_dev *ena_dev = NULL;
3628	uint32_t max_num_io_queues;
3629	int msix_rid;
3630	int rid, rc;
3631
3632	adapter = device_get_softc(pdev);
3633	adapter->pdev = pdev;
3634	adapter->first_bind = -1;
3635
3636	/*
3637	 * Set up the timer service - driver is responsible for avoiding
3638	 * concurrency, as the callout won't be using any locking inside.
3639	 */
3640	ENA_TIMER_INIT(adapter);
3641	adapter->keep_alive_timeout = ENA_DEFAULT_KEEP_ALIVE_TO;
3642	adapter->missing_tx_timeout = ENA_DEFAULT_TX_CMP_TO;
3643	adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES;
3644	adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD;
3645
3646	adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED;
3647	adapter->irq_cpu_stride = 0;
3648
3649#ifdef RSS
3650	adapter->rss_enabled = 1;
3651#endif
3652
3653	if (version_printed++ == 0)
3654		ena_log(pdev, INFO, "%s\n", ena_version);
3655
3656	/* Allocate memory for ena_dev structure */
3657	ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3658	    M_WAITOK | M_ZERO);
3659
3660	adapter->ena_dev = ena_dev;
3661	ena_dev->dmadev = pdev;
3662
3663	rid = PCIR_BAR(ENA_REG_BAR);
3664	adapter->memory = NULL;
3665	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid,
3666	    RF_ACTIVE);
3667	if (unlikely(adapter->registers == NULL)) {
3668		ena_log(pdev, ERR,
3669		    "unable to allocate bus resource: registers!\n");
3670		rc = ENOMEM;
3671		goto err_dev_free;
3672	}
3673
3674	/* MSIx vector table may reside on BAR0 with registers or on BAR1. */
3675	msix_rid = pci_msix_table_bar(pdev);
3676	if (msix_rid != rid) {
3677		adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3678		    &msix_rid, RF_ACTIVE);
3679		if (unlikely(adapter->msix == NULL)) {
3680			ena_log(pdev, ERR,
3681			    "unable to allocate bus resource: msix!\n");
3682			rc = ENOMEM;
3683			goto err_pci_free;
3684		}
3685		adapter->msix_rid = msix_rid;
3686	}
3687
3688	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3689	    M_WAITOK | M_ZERO);
3690
3691	/* Store register resources */
3692	((struct ena_bus *)(ena_dev->bus))->reg_bar_t = rman_get_bustag(
3693	    adapter->registers);
3694	((struct ena_bus *)(ena_dev->bus))->reg_bar_h = rman_get_bushandle(
3695	    adapter->registers);
3696
3697	if (unlikely(((struct ena_bus *)(ena_dev->bus))->reg_bar_h == 0)) {
3698		ena_log(pdev, ERR, "failed to pmap registers bar\n");
3699		rc = ENXIO;
3700		goto err_bus_free;
3701	}
3702
3703	rc = ena_map_llq_mem_bar(pdev, ena_dev);
3704	if (unlikely(rc != 0)) {
3705		ena_log(pdev, ERR, "Failed to map ENA mem bar");
3706		goto err_bus_free;
3707	}
3708
3709	/* Initially clear all the flags */
3710	ENA_FLAG_ZERO(adapter);
3711
3712	/* Device initialization */
3713	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3714	if (unlikely(rc != 0)) {
3715		ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc);
3716		rc = ENXIO;
3717		goto err_bus_free;
3718	}
3719
3720	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3721		adapter->disable_meta_caching = !!(
3722		    get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3723		    BIT(ENA_ADMIN_DISABLE_META_CACHING));
3724
3725	adapter->keep_alive_timestamp = getsbinuptime();
3726
3727	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3728
3729	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3730	    ETHER_ADDR_LEN);
3731
3732	calc_queue_ctx.pdev = pdev;
3733	calc_queue_ctx.ena_dev = ena_dev;
3734	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3735
3736	/* Calculate initial and maximum IO queue number and size */
3737	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3738	    &get_feat_ctx);
3739	rc = ena_calc_io_queue_size(&calc_queue_ctx);
3740	if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3741		rc = EFAULT;
3742		goto err_com_free;
3743	}
3744
3745	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3746	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3747	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3748	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3749	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3750	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3751
3752	adapter->max_num_io_queues = max_num_io_queues;
3753
3754	adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3755
3756	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3757
3758	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3759
3760	/* set up dma tags for rx and tx buffers */
3761	rc = ena_setup_tx_dma_tag(adapter);
3762	if (unlikely(rc != 0)) {
3763		ena_log(pdev, ERR, "Failed to create TX DMA tag\n");
3764		goto err_com_free;
3765	}
3766
3767	rc = ena_setup_rx_dma_tag(adapter);
3768	if (unlikely(rc != 0)) {
3769		ena_log(pdev, ERR, "Failed to create RX DMA tag\n");
3770		goto err_tx_tag_free;
3771	}
3772
3773	/*
3774	 * The amount of requested MSIX vectors is equal to
3775	 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3776	 * number of admin queue interrupts. The former is initially determined
3777	 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3778	 * achieved if there are not enough system resources. By default, the
3779	 * number of effectively used IO queues is the same but later on it can
3780	 * be limited by the user using sysctl interface.
3781	 */
3782	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3783	if (unlikely(rc != 0)) {
3784		ena_log(pdev, ERR,
3785		    "Failed to enable and set the admin interrupts\n");
3786		goto err_io_free;
3787	}
3788	/* By default all of allocated MSIX vectors are actively used */
3789	adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3790
3791	/* initialize rings basic information */
3792	ena_init_io_rings(adapter);
3793
3794	rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
3795	if (rc) {
3796		ena_log(pdev, ERR, "Failed to allocate customer metrics buffer.\n");
3797		goto err_msix_free;
3798	}
3799
3800	rc = ena_sysctl_allocate_customer_metrics_buffer(adapter);
3801	if (unlikely(rc)){
3802		ena_log(pdev, ERR, "Failed to allocate sysctl customer metrics buffer.\n");
3803		goto err_metrics_buffer_destroy;
3804	}
3805
3806	/* Initialize statistics */
3807	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3808	    sizeof(struct ena_stats_dev));
3809	ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3810	    sizeof(struct ena_hw_stats));
3811	ena_sysctl_add_nodes(adapter);
3812
3813	/* setup network interface */
3814	rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3815	if (unlikely(rc != 0)) {
3816		ena_log(pdev, ERR, "Error with network interface setup\n");
3817		goto err_customer_metrics_alloc;
3818	}
3819
3820	/* Initialize reset task queue */
3821	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3822	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3823	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3824	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq",
3825	    device_get_nameunit(adapter->pdev));
3826
3827	/* Initialize metrics task queue */
3828	TASK_INIT(&adapter->metrics_task, 0, ena_metrics_task, adapter);
3829	adapter->metrics_tq = taskqueue_create("ena_metrics_enqueue",
3830	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->metrics_tq);
3831	taskqueue_start_threads(&adapter->metrics_tq, 1, PI_NET, "%s metricsq",
3832	    device_get_nameunit(adapter->pdev));
3833
3834#ifdef DEV_NETMAP
3835	rc = ena_netmap_attach(adapter);
3836	if (rc != 0) {
3837		ena_log(pdev, ERR, "netmap attach failed: %d\n", rc);
3838		goto err_detach;
3839	}
3840#endif /* DEV_NETMAP */
3841
3842	/* Tell the stack that the interface is not active */
3843	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3844	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3845
3846	/* Run the timer service */
3847	ENA_TIMER_RESET(adapter);
3848
3849	return (0);
3850
3851#ifdef DEV_NETMAP
3852err_detach:
3853	ether_ifdetach(adapter->ifp);
3854#endif /* DEV_NETMAP */
3855err_customer_metrics_alloc:
3856	free(adapter->customer_metrics_array, M_DEVBUF);
3857err_metrics_buffer_destroy:
3858	ena_com_delete_customer_metrics_buffer(ena_dev);
3859err_msix_free:
3860	ena_free_stats(adapter);
3861	ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3862	ena_free_mgmnt_irq(adapter);
3863	ena_disable_msix(adapter);
3864err_io_free:
3865	ena_free_all_io_rings_resources(adapter);
3866	ena_free_rx_dma_tag(adapter);
3867err_tx_tag_free:
3868	ena_free_tx_dma_tag(adapter);
3869err_com_free:
3870	ena_com_admin_destroy(ena_dev);
3871	ena_com_delete_host_info(ena_dev);
3872	ena_com_mmio_reg_read_request_destroy(ena_dev);
3873err_bus_free:
3874	free(ena_dev->bus, M_DEVBUF);
3875err_pci_free:
3876	ena_free_pci_resources(adapter);
3877err_dev_free:
3878	free(ena_dev, M_DEVBUF);
3879
3880	return (rc);
3881}
3882
3883/**
3884 * ena_detach - Device Removal Routine
3885 * @pdev: device information struct
3886 *
3887 * ena_detach is called by the device subsystem to alert the driver
3888 * that it should release a PCI device.
3889 **/
3890static int
3891ena_detach(device_t pdev)
3892{
3893	struct ena_adapter *adapter = device_get_softc(pdev);
3894	struct ena_com_dev *ena_dev = adapter->ena_dev;
3895	int rc;
3896
3897	/* Make sure VLANS are not using driver */
3898	if (if_vlantrunkinuse(adapter->ifp)) {
3899		ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n");
3900		return (EBUSY);
3901	}
3902
3903	ether_ifdetach(adapter->ifp);
3904
3905	/* Stop timer service */
3906	ENA_LOCK_LOCK();
3907	ENA_TIMER_DRAIN(adapter);
3908	ENA_LOCK_UNLOCK();
3909
3910	/* Release metrics task */
3911	while (taskqueue_cancel(adapter->metrics_tq, &adapter->metrics_task, NULL))
3912		taskqueue_drain(adapter->metrics_tq, &adapter->metrics_task);
3913	taskqueue_free(adapter->metrics_tq);
3914
3915	/* Release reset task */
3916	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3917		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3918	taskqueue_free(adapter->reset_tq);
3919
3920	ENA_LOCK_LOCK();
3921	ena_down(adapter);
3922	ena_destroy_device(adapter, true);
3923	ENA_LOCK_UNLOCK();
3924
3925	/* Restore unregistered sysctl queue nodes. */
3926	ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues,
3927	    adapter->max_num_io_queues);
3928
3929#ifdef DEV_NETMAP
3930	netmap_detach(adapter->ifp);
3931#endif /* DEV_NETMAP */
3932
3933	ena_free_stats(adapter);
3934
3935	rc = ena_free_rx_dma_tag(adapter);
3936	if (unlikely(rc != 0))
3937		ena_log(adapter->pdev, WARN,
3938		    "Unmapped RX DMA tag associations\n");
3939
3940	rc = ena_free_tx_dma_tag(adapter);
3941	if (unlikely(rc != 0))
3942		ena_log(adapter->pdev, WARN,
3943		    "Unmapped TX DMA tag associations\n");
3944
3945	ena_free_irqs(adapter);
3946
3947	ena_free_pci_resources(adapter);
3948
3949	if (adapter->rss_indir != NULL)
3950		free(adapter->rss_indir, M_DEVBUF);
3951
3952	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3953		ena_com_rss_destroy(ena_dev);
3954
3955	ena_com_delete_host_info(ena_dev);
3956
3957	free(adapter->customer_metrics_array, M_DEVBUF);
3958
3959	ena_com_delete_customer_metrics_buffer(ena_dev);
3960
3961	if_free(adapter->ifp);
3962
3963	free(ena_dev->bus, M_DEVBUF);
3964
3965	free(ena_dev, M_DEVBUF);
3966
3967	return (bus_generic_detach(pdev));
3968}
3969
3970/******************************************************************************
3971 ******************************** AENQ Handlers *******************************
3972 *****************************************************************************/
3973/**
3974 * ena_update_on_link_change:
3975 * Notify the network interface about the change in link status
3976 **/
3977static void
3978ena_update_on_link_change(void *adapter_data,
3979    struct ena_admin_aenq_entry *aenq_e)
3980{
3981	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3982	struct ena_admin_aenq_link_change_desc *aenq_desc;
3983	int status;
3984	if_t ifp;
3985
3986	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3987	ifp = adapter->ifp;
3988	status = aenq_desc->flags &
3989	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3990
3991	if (status != 0) {
3992		ena_log(adapter->pdev, INFO, "link is UP\n");
3993		ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3994		if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3995			if_link_state_change(ifp, LINK_STATE_UP);
3996	} else {
3997		ena_log(adapter->pdev, INFO, "link is DOWN\n");
3998		if_link_state_change(ifp, LINK_STATE_DOWN);
3999		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
4000	}
4001}
4002
4003static void
4004ena_notification(void *adapter_data, struct ena_admin_aenq_entry *aenq_e)
4005{
4006	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4007	struct ena_admin_ena_hw_hints *hints;
4008
4009	ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4010	    adapter->ena_dev, "Invalid group(%x) expected %x\n",
4011	    aenq_e->aenq_common_desc.group, ENA_ADMIN_NOTIFICATION);
4012
4013	switch (aenq_e->aenq_common_desc.syndrome) {
4014	case ENA_ADMIN_UPDATE_HINTS:
4015		hints =
4016		    (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
4017		ena_update_hints(adapter, hints);
4018		break;
4019	default:
4020		ena_log(adapter->pdev, ERR,
4021		    "Invalid aenq notification link state %d\n",
4022		    aenq_e->aenq_common_desc.syndrome);
4023	}
4024}
4025
4026static void
4027ena_lock_init(void *arg)
4028{
4029	ENA_LOCK_INIT();
4030}
4031SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL);
4032
4033static void
4034ena_lock_uninit(void *arg)
4035{
4036	ENA_LOCK_DESTROY();
4037}
4038SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL);
4039
4040/**
4041 * This handler will called for unknown event group or unimplemented handlers
4042 **/
4043static void
4044unimplemented_aenq_handler(void *adapter_data,
4045    struct ena_admin_aenq_entry *aenq_e)
4046{
4047	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4048
4049	ena_log(adapter->pdev, ERR,
4050	    "Unknown event was received or event with unimplemented handler\n");
4051}
4052
4053static struct ena_aenq_handlers aenq_handlers = {
4054    .handlers = {
4055	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4056	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
4057	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4058    },
4059    .unimplemented_handler = unimplemented_aenq_handler
4060};
4061
4062/*********************************************************************
4063 *  FreeBSD Device Interface Entry Points
4064 *********************************************************************/
4065
4066static device_method_t ena_methods[] = { /* Device interface */
4067	DEVMETHOD(device_probe, ena_probe),
4068	DEVMETHOD(device_attach, ena_attach),
4069	DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END
4070};
4071
4072static driver_t ena_driver = {
4073	"ena",
4074	ena_methods,
4075	sizeof(struct ena_adapter),
4076};
4077
4078DRIVER_MODULE(ena, pci, ena_driver, 0, 0);
4079MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
4080    nitems(ena_vendor_info_array) - 1);
4081MODULE_DEPEND(ena, pci, 1, 1, 1);
4082MODULE_DEPEND(ena, ether, 1, 1, 1);
4083#ifdef DEV_NETMAP
4084MODULE_DEPEND(ena, netmap, 1, 1, 1);
4085#endif /* DEV_NETMAP */
4086
4087/*********************************************************************/
4088