ena.c revision 361534
1181624Skmacy/*-
2181624Skmacy * BSD LICENSE
3181624Skmacy *
4181624Skmacy * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5181624Skmacy * All rights reserved.
6181624Skmacy *
7181624Skmacy * Redistribution and use in source and binary forms, with or without
8181624Skmacy * modification, are permitted provided that the following conditions
9181624Skmacy * are met:
10181624Skmacy *
11181624Skmacy * 1. Redistributions of source code must retain the above copyright
12181624Skmacy *    notice, this list of conditions and the following disclaimer.
13181624Skmacy *
14181624Skmacy * 2. Redistributions in binary form must reproduce the above copyright
15181624Skmacy *    notice, this list of conditions and the following disclaimer in the
16181624Skmacy *    documentation and/or other materials provided with the distribution.
17181624Skmacy *
18183340Skmacy * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19181624Skmacy * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20181624Skmacy * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21181624Skmacy * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22181624Skmacy * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23181624Skmacy * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24181624Skmacy * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25181624Skmacy * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26181624Skmacy * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27181624Skmacy * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28181624Skmacy * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29255040Sgibbs */
30189699Sdfr#include <sys/cdefs.h>
31189699Sdfr__FBSDID("$FreeBSD: stable/11/sys/dev/ena/ena.c 361534 2020-05-26 17:54:11Z mw $");
32181624Skmacy
33186557Skmacy#include <sys/param.h>
34181624Skmacy#include <sys/systm.h>
35181624Skmacy#include <sys/bus.h>
36189699Sdfr#include <sys/endian.h>
37189699Sdfr#include <sys/kernel.h>
38189699Sdfr#include <sys/kthread.h>
39189699Sdfr#include <sys/malloc.h>
40189699Sdfr#include <sys/mbuf.h>
41181624Skmacy#include <sys/module.h>
42181624Skmacy#include <sys/rman.h>
43181624Skmacy#include <sys/smp.h>
44181624Skmacy#include <sys/socket.h>
45181624Skmacy#include <sys/sockio.h>
46181624Skmacy#include <sys/sysctl.h>
47181624Skmacy#include <sys/taskqueue.h>
48181624Skmacy#include <sys/time.h>
49181624Skmacy#include <sys/eventhandler.h>
50181624Skmacy
51181624Skmacy#include <machine/bus.h>
52181624Skmacy#include <machine/resource.h>
53181624Skmacy#include <machine/in_cksum.h>
54181624Skmacy
55181624Skmacy#include <net/bpf.h>
56181624Skmacy#include <net/ethernet.h>
57181624Skmacy#include <net/if.h>
58181624Skmacy#include <net/if_var.h>
59181624Skmacy#include <net/if_arp.h>
60181624Skmacy#include <net/if_dl.h>
61181624Skmacy#include <net/if_media.h>
62181624Skmacy#include <net/if_types.h>
63181624Skmacy#include <net/if_vlan_var.h>
64186557Skmacy
65181624Skmacy#include <netinet/in_systm.h>
66189699Sdfr#include <netinet/in.h>
67181624Skmacy#include <netinet/if_ether.h>
68214077Sgibbs#include <netinet/ip.h>
69181624Skmacy#include <netinet/ip6.h>
70181624Skmacy#include <netinet/tcp.h>
71189699Sdfr#include <netinet/udp.h>
72181624Skmacy
73189699Sdfr#include <dev/pci/pcivar.h>
74181624Skmacy#include <dev/pci/pcireg.h>
75181624Skmacy
76181624Skmacy#include <vm/vm.h>
77181624Skmacy#include <vm/pmap.h>
78181624Skmacy
79181624Skmacy#include "ena_datapath.h"
80181624Skmacy#include "ena.h"
81214077Sgibbs#include "ena_sysctl.h"
82186557Skmacy
83186557Skmacy#ifdef DEV_NETMAP
84186557Skmacy#include "ena_netmap.h"
85181624Skmacy#endif /* DEV_NETMAP */
86181624Skmacy
87181624Skmacy/*********************************************************
88181624Skmacy *  Function prototypes
89181624Skmacy *********************************************************/
90181624Skmacystatic int	ena_probe(device_t);
91181624Skmacystatic void	ena_intr_msix_mgmnt(void *);
92181624Skmacystatic void	ena_free_pci_resources(struct ena_adapter *);
93181624Skmacystatic int	ena_change_mtu(if_t, int);
94181624Skmacystatic inline void ena_alloc_counters(counter_u64_t *, int);
95181624Skmacystatic inline void ena_free_counters(counter_u64_t *, int);
96181624Skmacystatic inline void ena_reset_counters(counter_u64_t *, int);
97181624Skmacystatic void	ena_init_io_rings_common(struct ena_adapter *,
98181624Skmacy    struct ena_ring *, uint16_t);
99181624Skmacystatic void	ena_init_io_rings_basic(struct ena_adapter *);
100181624Skmacystatic void	ena_init_io_rings_advanced(struct ena_adapter *);
101181624Skmacystatic void	ena_init_io_rings(struct ena_adapter *);
102181624Skmacystatic void	ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
103181624Skmacystatic void	ena_free_all_io_rings_resources(struct ena_adapter *);
104181624Skmacystatic int	ena_setup_tx_dma_tag(struct ena_adapter *);
105181624Skmacystatic int	ena_free_tx_dma_tag(struct ena_adapter *);
106181624Skmacystatic int	ena_setup_rx_dma_tag(struct ena_adapter *);
107181624Skmacystatic int	ena_free_rx_dma_tag(struct ena_adapter *);
108181624Skmacystatic void	ena_release_all_tx_dmamap(struct ena_ring *);
109181624Skmacystatic int	ena_setup_tx_resources(struct ena_adapter *, int);
110181624Skmacystatic void	ena_free_tx_resources(struct ena_adapter *, int);
111255040Sgibbsstatic int	ena_setup_all_tx_resources(struct ena_adapter *);
112181624Skmacystatic void	ena_free_all_tx_resources(struct ena_adapter *);
113181624Skmacystatic int	ena_setup_rx_resources(struct ena_adapter *, unsigned int);
114181624Skmacystatic void	ena_free_rx_resources(struct ena_adapter *, unsigned int);
115181624Skmacystatic int	ena_setup_all_rx_resources(struct ena_adapter *);
116181624Skmacystatic void	ena_free_all_rx_resources(struct ena_adapter *);
117181624Skmacystatic inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
118181624Skmacy    struct ena_rx_buffer *);
119181624Skmacystatic void	ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
120181624Skmacy    struct ena_rx_buffer *);
121181624Skmacystatic void	ena_free_rx_bufs(struct ena_adapter *, unsigned int);
122181624Skmacystatic void	ena_refill_all_rx_bufs(struct ena_adapter *);
123181624Skmacystatic void	ena_free_all_rx_bufs(struct ena_adapter *);
124214077Sgibbsstatic void	ena_free_tx_bufs(struct ena_adapter *, unsigned int);
125181624Skmacystatic void	ena_free_all_tx_bufs(struct ena_adapter *);
126181624Skmacystatic void	ena_destroy_all_tx_queues(struct ena_adapter *);
127181624Skmacystatic void	ena_destroy_all_rx_queues(struct ena_adapter *);
128181624Skmacystatic void	ena_destroy_all_io_queues(struct ena_adapter *);
129181624Skmacystatic int	ena_create_io_queues(struct ena_adapter *);
130181624Skmacystatic int	ena_handle_msix(void *);
131181624Skmacystatic int	ena_enable_msix(struct ena_adapter *);
132186557Skmacystatic void	ena_setup_mgmnt_intr(struct ena_adapter *);
133186557Skmacystatic int	ena_setup_io_intr(struct ena_adapter *);
134181624Skmacystatic int	ena_request_mgmnt_irq(struct ena_adapter *);
135186557Skmacystatic int	ena_request_io_irq(struct ena_adapter *);
136181624Skmacystatic void	ena_free_mgmnt_irq(struct ena_adapter *);
137186557Skmacystatic void	ena_free_io_irq(struct ena_adapter *);
138214077Sgibbsstatic void	ena_free_irqs(struct ena_adapter*);
139255040Sgibbsstatic void	ena_disable_msix(struct ena_adapter *);
140186557Skmacystatic void	ena_unmask_all_io_irqs(struct ena_adapter *);
141181624Skmacystatic int	ena_rss_configure(struct ena_adapter *);
142181624Skmacystatic int	ena_up_complete(struct ena_adapter *);
143181624Skmacystatic uint64_t	ena_get_counter(if_t, ift_counter);
144181624Skmacystatic int	ena_media_change(if_t);
145181624Skmacystatic void	ena_media_status(if_t, struct ifmediareq *);
146181624Skmacystatic void	ena_init(void *);
147186557Skmacystatic int	ena_ioctl(if_t, u_long, caddr_t);
148186557Skmacystatic int	ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
149186557Skmacystatic void	ena_update_host_info(struct ena_admin_host_info *, if_t);
150186557Skmacystatic void	ena_update_hwassist(struct ena_adapter *);
151181624Skmacystatic int	ena_setup_ifnet(device_t, struct ena_adapter *,
152181624Skmacy    struct ena_com_dev_get_features_ctx *);
153181624Skmacystatic int	ena_enable_wc(struct resource *);
154181624Skmacystatic int	ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
155181624Skmacy    struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
156181624Skmacystatic uint32_t	ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
157189699Sdfr    struct ena_com_dev_get_features_ctx *);
158181624Skmacystatic int	ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
159181624Skmacystatic int	ena_rss_init_default(struct ena_adapter *);
160181624Skmacystatic void	ena_rss_init_default_deferred(void *);
161181624Skmacystatic void	ena_config_host_info(struct ena_com_dev *, device_t);
162181624Skmacystatic int	ena_attach(device_t);
163181624Skmacystatic int	ena_detach(device_t);
164181624Skmacystatic int	ena_device_init(struct ena_adapter *, device_t,
165181624Skmacy    struct ena_com_dev_get_features_ctx *, int *);
166181624Skmacystatic int	ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
167181624Skmacystatic void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
168214077Sgibbsstatic void	unimplemented_aenq_handler(void *,
169181624Skmacy    struct ena_admin_aenq_entry *);
170214077Sgibbsstatic void	ena_timer_service(void *);
171181624Skmacy
172181624Skmacystatic char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
173181624Skmacy
174181624Skmacystatic ena_vendor_info_t ena_vendor_info_array[] = {
175183375Skmacy    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
176181624Skmacy    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
177181624Skmacy    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
178181624Skmacy    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
179181624Skmacy    /* Last entry */
180181624Skmacy    { 0, 0, 0 }
181181624Skmacy};
182214077Sgibbs
183181624Skmacy/*
184181624Skmacy * Contains pointers to event handlers, e.g. link state chage.
185181624Skmacy */
186181624Skmacystatic struct ena_aenq_handlers aenq_handlers;
187181624Skmacy
188181624Skmacyvoid
189181624Skmacyena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
190181624Skmacy{
191181624Skmacy	if (error != 0)
192183375Skmacy		return;
193181624Skmacy	*(bus_addr_t *) arg = segs[0].ds_addr;
194183375Skmacy}
195181624Skmacy
196181624Skmacyint
197181624Skmacyena_dma_alloc(device_t dmadev, bus_size_t size,
198181624Skmacy    ena_mem_handle_t *dma , int mapflags)
199181624Skmacy{
200181624Skmacy	struct ena_adapter* adapter = device_get_softc(dmadev);
201181624Skmacy	uint32_t maxsize;
202181624Skmacy	uint64_t dma_space_addr;
203214077Sgibbs	int error;
204214077Sgibbs
205181624Skmacy	maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
206181624Skmacy
207181624Skmacy	dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
208214077Sgibbs	if (unlikely(dma_space_addr == 0))
209214077Sgibbs		dma_space_addr = BUS_SPACE_MAXADDR;
210214077Sgibbs
211214077Sgibbs	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
212214077Sgibbs	    8, 0,	      /* alignment, bounds 		*/
213214077Sgibbs	    dma_space_addr,   /* lowaddr of exclusion window	*/
214214077Sgibbs	    BUS_SPACE_MAXADDR,/* highaddr of exclusion window	*/
215214077Sgibbs	    NULL, NULL,	      /* filter, filterarg 		*/
216214077Sgibbs	    maxsize,	      /* maxsize 			*/
217214077Sgibbs	    1,		      /* nsegments 			*/
218214077Sgibbs	    maxsize,	      /* maxsegsize 			*/
219214077Sgibbs	    BUS_DMA_ALLOCNOW, /* flags 				*/
220214077Sgibbs	    NULL,	      /* lockfunc 			*/
221214077Sgibbs	    NULL,	      /* lockarg 			*/
222214077Sgibbs	    &dma->tag);
223214077Sgibbs	if (unlikely(error != 0)) {
224214077Sgibbs		ena_trace(ENA_ALERT, "bus_dma_tag_create failed: %d\n", error);
225214077Sgibbs		goto fail_tag;
226214077Sgibbs	}
227214077Sgibbs
228214077Sgibbs	error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
229214077Sgibbs	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
230214077Sgibbs	if (unlikely(error != 0)) {
231214077Sgibbs		ena_trace(ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
232214077Sgibbs		    (uintmax_t)size, error);
233214077Sgibbs		goto fail_map_create;
234214077Sgibbs	}
235214077Sgibbs
236214077Sgibbs	dma->paddr = 0;
237214077Sgibbs	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
238214077Sgibbs	    size, ena_dmamap_callback, &dma->paddr, mapflags);
239214077Sgibbs	if (unlikely((error != 0) || (dma->paddr == 0))) {
240214077Sgibbs		ena_trace(ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
241214077Sgibbs		goto fail_map_load;
242214077Sgibbs	}
243214077Sgibbs
244181624Skmacy	bus_dmamap_sync(dma->tag, dma->map,
245189699Sdfr	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
246189699Sdfr
247181624Skmacy	return (0);
248186557Skmacy
249181624Skmacyfail_map_load:
250186557Skmacy	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
251255040Sgibbsfail_map_create:
252186557Skmacy	bus_dma_tag_destroy(dma->tag);
253186557Skmacyfail_tag:
254181624Skmacy	dma->tag = NULL;
255214077Sgibbs	dma->vaddr = NULL;
256189699Sdfr	dma->paddr = 0;
257189699Sdfr
258181624Skmacy	return (error);
259181624Skmacy}
260181624Skmacy
261181624Skmacy/*
262181624Skmacy * This function should generate unique key for the whole driver.
263181624Skmacy * If the key was already genereated in the previous call (for example
264181624Skmacy * for another adapter), then it should be returned instead.
265181624Skmacy */
266181624Skmacyvoid
267181624Skmacyena_rss_key_fill(void *key, size_t size)
268181624Skmacy{
269181624Skmacy	static bool key_generated;
270181624Skmacy	static uint8_t default_key[ENA_HASH_KEY_SIZE];
271181624Skmacy
272181624Skmacy	KASSERT(size <= ENA_HASH_KEY_SIZE, ("Requested more bytes than ENA RSS key can hold"));
273181624Skmacy
274181624Skmacy	if (!key_generated) {
275181624Skmacy		arc4rand(default_key, ENA_HASH_KEY_SIZE, 0);
276181624Skmacy		key_generated = true;
277181624Skmacy	}
278181624Skmacy
279181624Skmacy	memcpy(key, default_key, size);
280181624Skmacy}
281181624Skmacy
282181624Skmacystatic void
283181624Skmacyena_free_pci_resources(struct ena_adapter *adapter)
284181624Skmacy{
285181624Skmacy	device_t pdev = adapter->pdev;
286181624Skmacy
287181624Skmacy	if (adapter->memory != NULL) {
288181624Skmacy		bus_release_resource(pdev, SYS_RES_MEMORY,
289181624Skmacy		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
290181624Skmacy	}
291181624Skmacy
292181624Skmacy	if (adapter->registers != NULL) {
293181624Skmacy		bus_release_resource(pdev, SYS_RES_MEMORY,
294181624Skmacy		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
295189699Sdfr	}
296181624Skmacy}
297181624Skmacy
298181624Skmacystatic int
299181624Skmacyena_probe(device_t dev)
300181624Skmacy{
301181624Skmacy	ena_vendor_info_t *ent;
302181624Skmacy	char		adapter_name[60];
303181624Skmacy	uint16_t	pci_vendor_id = 0;
304181624Skmacy	uint16_t	pci_device_id = 0;
305181624Skmacy
306181624Skmacy	pci_vendor_id = pci_get_vendor(dev);
307181624Skmacy	pci_device_id = pci_get_device(dev);
308181624Skmacy
309181624Skmacy	ent = ena_vendor_info_array;
310181624Skmacy	while (ent->vendor_id != 0) {
311181624Skmacy		if ((pci_vendor_id == ent->vendor_id) &&
312181624Skmacy		    (pci_device_id == ent->device_id)) {
313181624Skmacy			ena_trace(ENA_DBG, "vendor=%x device=%x\n",
314181624Skmacy			    pci_vendor_id, pci_device_id);
315181624Skmacy
316181624Skmacy			sprintf(adapter_name, DEVICE_DESC);
317181624Skmacy			device_set_desc_copy(dev, adapter_name);
318181624Skmacy			return (BUS_PROBE_DEFAULT);
319181624Skmacy		}
320181624Skmacy
321214077Sgibbs		ent++;
322181624Skmacy
323181624Skmacy	}
324214077Sgibbs
325181624Skmacy	return (ENXIO);
326181624Skmacy}
327181624Skmacy
328181624Skmacystatic int
329181624Skmacyena_change_mtu(if_t ifp, int new_mtu)
330214077Sgibbs{
331181624Skmacy	struct ena_adapter *adapter = if_getsoftc(ifp);
332181624Skmacy	int rc;
333181624Skmacy
334181624Skmacy	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
335181624Skmacy		device_printf(adapter->pdev, "Invalid MTU setting. "
336181624Skmacy		    "new_mtu: %d max mtu: %d min mtu: %d\n",
337181624Skmacy		    new_mtu, adapter->max_mtu, ENA_MIN_MTU);
338181624Skmacy		return (EINVAL);
339181624Skmacy	}
340181624Skmacy
341186557Skmacy	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
342181624Skmacy	if (likely(rc == 0)) {
343186557Skmacy		ena_trace(ENA_DBG, "set MTU to %d\n", new_mtu);
344255040Sgibbs		if_setmtu(ifp, new_mtu);
345186557Skmacy	} else {
346181624Skmacy		device_printf(adapter->pdev, "Failed to set MTU to %d\n",
347186557Skmacy		    new_mtu);
348186557Skmacy	}
349181624Skmacy
350181624Skmacy	return (rc);
351181624Skmacy}
352181624Skmacy
353181624Skmacystatic inline void
354189699Sdfrena_alloc_counters(counter_u64_t *begin, int size)
355181624Skmacy{
356181624Skmacy	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
357181624Skmacy
358181624Skmacy	for (; begin < end; ++begin)
359181624Skmacy		*begin = counter_u64_alloc(M_WAITOK);
360181624Skmacy}
361181624Skmacy
362181624Skmacystatic inline void
363255040Sgibbsena_free_counters(counter_u64_t *begin, int size)
364201234Sgibbs{
365181624Skmacy	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
366181624Skmacy
367181624Skmacy	for (; begin < end; ++begin)
368181624Skmacy		counter_u64_free(*begin);
369181624Skmacy}
370181624Skmacy
371181624Skmacystatic inline void
372189699Sdfrena_reset_counters(counter_u64_t *begin, int size)
373181624Skmacy{
374181624Skmacy	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
375181624Skmacy
376181624Skmacy	for (; begin < end; ++begin)
377181624Skmacy		counter_u64_zero(*begin);
378181624Skmacy}
379189699Sdfr
380181624Skmacystatic void
381181624Skmacyena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
382181624Skmacy    uint16_t qid)
383181624Skmacy{
384181624Skmacy
385181624Skmacy	ring->qid = qid;
386181624Skmacy	ring->adapter = adapter;
387181624Skmacy	ring->ena_dev = adapter->ena_dev;
388181624Skmacy	ring->first_interrupt = false;
389181624Skmacy	ring->no_interrupt_event_cnt = 0;
390181624Skmacy}
391181624Skmacy
392181624Skmacystatic void
393181624Skmacyena_init_io_rings_basic(struct ena_adapter *adapter)
394181624Skmacy{
395181624Skmacy	struct ena_com_dev *ena_dev;
396181624Skmacy	struct ena_ring *txr, *rxr;
397181624Skmacy	struct ena_que *que;
398181624Skmacy	int i;
399181624Skmacy
400181624Skmacy	ena_dev = adapter->ena_dev;
401181624Skmacy
402181624Skmacy	for (i = 0; i < adapter->num_io_queues; i++) {
403181624Skmacy		txr = &adapter->tx_ring[i];
404181624Skmacy		rxr = &adapter->rx_ring[i];
405181624Skmacy
406181624Skmacy		/* TX/RX common ring state */
407181624Skmacy		ena_init_io_rings_common(adapter, txr, i);
408181624Skmacy		ena_init_io_rings_common(adapter, rxr, i);
409181624Skmacy
410181624Skmacy		/* TX specific ring state */
411181624Skmacy		txr->tx_max_header_size = ena_dev->tx_max_header_size;
412181624Skmacy		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
413181624Skmacy
414181624Skmacy		que = &adapter->que[i];
415181624Skmacy		que->adapter = adapter;
416181624Skmacy		que->id = i;
417181624Skmacy		que->tx_ring = txr;
418181624Skmacy		que->rx_ring = rxr;
419181624Skmacy
420181624Skmacy		txr->que = que;
421181624Skmacy		rxr->que = que;
422189699Sdfr
423189699Sdfr		rxr->empty_rx_queue = 0;
424181624Skmacy		rxr->rx_mbuf_sz = ena_mbuf_sz;
425181624Skmacy	}
426181624Skmacy}
427181624Skmacy
428181624Skmacystatic void
429181624Skmacyena_init_io_rings_advanced(struct ena_adapter *adapter)
430181624Skmacy{
431181624Skmacy	struct ena_ring *txr, *rxr;
432181624Skmacy	int i;
433181624Skmacy
434181624Skmacy	for (i = 0; i < adapter->num_io_queues; i++) {
435181624Skmacy		txr = &adapter->tx_ring[i];
436181624Skmacy		rxr = &adapter->rx_ring[i];
437181624Skmacy
438181624Skmacy		/* Allocate a buf ring */
439181624Skmacy		txr->buf_ring_size = adapter->buf_ring_size;
440181624Skmacy		txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
441189699Sdfr		    M_WAITOK, &txr->ring_mtx);
442214077Sgibbs
443181624Skmacy		/* Allocate Tx statistics. */
444181624Skmacy		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
445181624Skmacy		    sizeof(txr->tx_stats));
446189699Sdfr
447181624Skmacy		/* Allocate Rx statistics. */
448181624Skmacy		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
449181624Skmacy		    sizeof(rxr->rx_stats));
450181624Skmacy
451181624Skmacy		/* Initialize locks */
452181624Skmacy		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
453181624Skmacy		    device_get_nameunit(adapter->pdev), i);
454181624Skmacy		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
455181624Skmacy		    device_get_nameunit(adapter->pdev), i);
456181624Skmacy
457181624Skmacy		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
458181624Skmacy	}
459181624Skmacy}
460181624Skmacy
461181624Skmacystatic void
462181624Skmacyena_init_io_rings(struct ena_adapter *adapter)
463181624Skmacy{
464181624Skmacy	/*
465181624Skmacy	 * IO rings initialization can be divided into the 2 steps:
466181624Skmacy	 *   1. Initialize variables and fields with initial values and copy
467181624Skmacy	 *      them from adapter/ena_dev (basic)
468181624Skmacy	 *   2. Allocate mutex, counters and buf_ring (advanced)
469181624Skmacy	 */
470181624Skmacy	ena_init_io_rings_basic(adapter);
471181624Skmacy	ena_init_io_rings_advanced(adapter);
472181624Skmacy}
473181624Skmacy
474181624Skmacystatic void
475181624Skmacyena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
476181624Skmacy{
477181624Skmacy	struct ena_ring *txr = &adapter->tx_ring[qid];
478181624Skmacy	struct ena_ring *rxr = &adapter->rx_ring[qid];
479181624Skmacy
480181624Skmacy	ena_free_counters((counter_u64_t *)&txr->tx_stats,
481181624Skmacy	    sizeof(txr->tx_stats));
482181624Skmacy	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
483181624Skmacy	    sizeof(rxr->rx_stats));
484181624Skmacy
485181624Skmacy	ENA_RING_MTX_LOCK(txr);
486181624Skmacy	drbr_free(txr->br, M_DEVBUF);
487181624Skmacy	ENA_RING_MTX_UNLOCK(txr);
488181624Skmacy
489181624Skmacy	mtx_destroy(&txr->ring_mtx);
490181624Skmacy}
491181624Skmacy
492181624Skmacystatic void
493181624Skmacyena_free_all_io_rings_resources(struct ena_adapter *adapter)
494181624Skmacy{
495181624Skmacy	int i;
496181624Skmacy
497181624Skmacy	for (i = 0; i < adapter->num_io_queues; i++)
498181624Skmacy		ena_free_io_ring_resources(adapter, i);
499181624Skmacy
500189699Sdfr}
501189699Sdfr
502181624Skmacystatic int
503181624Skmacyena_setup_tx_dma_tag(struct ena_adapter *adapter)
504181624Skmacy{
505181624Skmacy	int ret;
506183375Skmacy
507183375Skmacy	/* Create DMA tag for Tx buffers */
508181624Skmacy	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
509181624Skmacy	    1, 0,				  /* alignment, bounds 	     */
510181624Skmacy	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
511181624Skmacy	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
512181624Skmacy	    NULL, NULL,				  /* filter, filterarg 	     */
513186557Skmacy	    ENA_TSO_MAXSIZE,			  /* maxsize 		     */
514181624Skmacy	    adapter->max_tx_sgl_size - 1,	  /* nsegments 		     */
515181624Skmacy	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	     */
516181624Skmacy	    0,					  /* flags 		     */
517181624Skmacy	    NULL,				  /* lockfunc 		     */
518181624Skmacy	    NULL,				  /* lockfuncarg 	     */
519181624Skmacy	    &adapter->tx_buf_tag);
520181624Skmacy
521181624Skmacy	return (ret);
522186557Skmacy}
523181624Skmacy
524189699Sdfrstatic int
525189699Sdfrena_free_tx_dma_tag(struct ena_adapter *adapter)
526181624Skmacy{
527181624Skmacy	int ret;
528181624Skmacy
529214077Sgibbs	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
530254025Sjeff
531189699Sdfr	if (likely(ret == 0))
532181624Skmacy		adapter->tx_buf_tag = NULL;
533181624Skmacy
534189699Sdfr	return (ret);
535181624Skmacy}
536181624Skmacy
537181804Skmacystatic int
538181624Skmacyena_setup_rx_dma_tag(struct ena_adapter *adapter)
539181624Skmacy{
540214077Sgibbs	int ret;
541189699Sdfr
542181624Skmacy	/* Create DMA tag for Rx buffers*/
543181624Skmacy	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
544181624Skmacy	    1, 0,				  /* alignment, bounds 	     */
545181624Skmacy	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
546181624Skmacy	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
547189699Sdfr	    NULL, NULL,				  /* filter, filterarg 	     */
548181624Skmacy	    ena_mbuf_sz,			  /* maxsize 		     */
549189699Sdfr	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
550189699Sdfr	    ena_mbuf_sz,			  /* maxsegsize 	     */
551181624Skmacy	    0,					  /* flags 		     */
552181624Skmacy	    NULL,				  /* lockfunc 		     */
553181624Skmacy	    NULL,				  /* lockarg 		     */
554181624Skmacy	    &adapter->rx_buf_tag);
555214077Sgibbs
556189699Sdfr	return (ret);
557181624Skmacy}
558189699Sdfr
559189699Sdfrstatic int
560181624Skmacyena_free_rx_dma_tag(struct ena_adapter *adapter)
561189699Sdfr{
562189699Sdfr	int ret;
563181624Skmacy
564189699Sdfr	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
565189699Sdfr
566189699Sdfr	if (likely(ret == 0))
567189699Sdfr		adapter->rx_buf_tag = NULL;
568189699Sdfr
569189699Sdfr	return (ret);
570214077Sgibbs}
571214077Sgibbs
572189699Sdfrstatic void
573189699Sdfrena_release_all_tx_dmamap(struct ena_ring *tx_ring)
574189699Sdfr{
575189699Sdfr	struct ena_adapter *adapter = tx_ring->adapter;
576189699Sdfr	struct ena_tx_buffer *tx_info;
577189699Sdfr	bus_dma_tag_t tx_tag = adapter->tx_buf_tag;;
578189699Sdfr	int i;
579189699Sdfr#ifdef DEV_NETMAP
580189699Sdfr	struct ena_netmap_tx_info *nm_info;
581189699Sdfr	int j;
582189699Sdfr#endif /* DEV_NETMAP */
583189699Sdfr
584189699Sdfr	for (i = 0; i < tx_ring->ring_size; ++i) {
585189699Sdfr		tx_info = &tx_ring->tx_buffer_info[i];
586189699Sdfr#ifdef DEV_NETMAP
587189699Sdfr		if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
588189699Sdfr			nm_info = &tx_info->nm_info;
589189699Sdfr			for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
590189699Sdfr				if (nm_info->map_seg[j] != NULL) {
591214077Sgibbs					bus_dmamap_destroy(tx_tag,
592254025Sjeff					    nm_info->map_seg[j]);
593189699Sdfr					nm_info->map_seg[j] = NULL;
594189699Sdfr				}
595189699Sdfr			}
596189699Sdfr		}
597189699Sdfr#endif /* DEV_NETMAP */
598189699Sdfr		if (tx_info->dmamap != NULL) {
599189699Sdfr			bus_dmamap_destroy(tx_tag, tx_info->dmamap);
600189699Sdfr			tx_info->dmamap = NULL;
601189699Sdfr		}
602181624Skmacy	}
603181624Skmacy}
604181624Skmacy
605189699Sdfr/**
606189699Sdfr * ena_setup_tx_resources - allocate Tx resources (Descriptors)
607189699Sdfr * @adapter: network interface device structure
608189699Sdfr * @qid: queue index
609189699Sdfr *
610189699Sdfr * Returns 0 on success, otherwise on failure.
611189699Sdfr **/
612189699Sdfrstatic int
613189699Sdfrena_setup_tx_resources(struct ena_adapter *adapter, int qid)
614189699Sdfr{
615189699Sdfr	struct ena_que *que = &adapter->que[qid];
616189699Sdfr	struct ena_ring *tx_ring = que->tx_ring;
617189699Sdfr	int size, i, err;
618189699Sdfr#ifdef DEV_NETMAP
619189699Sdfr	bus_dmamap_t *map;
620189699Sdfr	int j;
621189699Sdfr
622189699Sdfr	ena_netmap_reset_tx_ring(adapter, qid);
623189699Sdfr#endif /* DEV_NETMAP */
624189699Sdfr
625189699Sdfr	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
626189699Sdfr
627189699Sdfr	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
628189699Sdfr	if (unlikely(tx_ring->tx_buffer_info == NULL))
629189699Sdfr		return (ENOMEM);
630181624Skmacy
631181624Skmacy	size = sizeof(uint16_t) * tx_ring->ring_size;
632181624Skmacy	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
633189699Sdfr	if (unlikely(tx_ring->free_tx_ids == NULL))
634181624Skmacy		goto err_buf_info_free;
635181624Skmacy
636181624Skmacy	size = tx_ring->tx_max_header_size;
637181624Skmacy	tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
638181624Skmacy	    M_NOWAIT | M_ZERO);
639181624Skmacy	if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
640186557Skmacy		goto err_tx_ids_free;
641181624Skmacy
642189699Sdfr	/* Req id stack for TX OOO completions */
643189699Sdfr	for (i = 0; i < tx_ring->ring_size; i++)
644189699Sdfr		tx_ring->free_tx_ids[i] = i;
645181624Skmacy
646189699Sdfr	/* Reset TX statistics. */
647181624Skmacy	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
648181624Skmacy	    sizeof(tx_ring->tx_stats));
649185605Skmacy
650185605Skmacy	tx_ring->next_to_use = 0;
651181624Skmacy	tx_ring->next_to_clean = 0;
652181624Skmacy	tx_ring->acum_pkts = 0;
653181624Skmacy
654181624Skmacy	/* Make sure that drbr is empty */
655181624Skmacy	ENA_RING_MTX_LOCK(tx_ring);
656181624Skmacy	drbr_flush(adapter->ifp, tx_ring->br);
657189699Sdfr	ENA_RING_MTX_UNLOCK(tx_ring);
658181624Skmacy
659181624Skmacy	/* ... and create the buffer DMA maps */
660181624Skmacy	for (i = 0; i < tx_ring->ring_size; i++) {
661181624Skmacy		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
662181624Skmacy		    &tx_ring->tx_buffer_info[i].dmamap);
663181624Skmacy		if (unlikely(err != 0)) {
664181624Skmacy			ena_trace(ENA_ALERT,
665181624Skmacy			    "Unable to create Tx DMA map for buffer %d\n",
666181624Skmacy			    i);
667181624Skmacy			goto err_map_release;
668181624Skmacy		}
669181624Skmacy
670181624Skmacy#ifdef DEV_NETMAP
671181624Skmacy		if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
672181624Skmacy			map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
673186557Skmacy			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
674181624Skmacy				err = bus_dmamap_create(adapter->tx_buf_tag, 0,
675181624Skmacy				    &map[j]);
676189699Sdfr				if (unlikely(err != 0)) {
677189699Sdfr					ena_trace(ENA_ALERT, "Unable to create "
678181624Skmacy					    "Tx DMA for buffer %d %d\n", i, j);
679181624Skmacy					goto err_map_release;
680181624Skmacy				}
681214077Sgibbs			}
682186557Skmacy		}
683186557Skmacy#endif /* DEV_NETMAP */
684214077Sgibbs	}
685181624Skmacy
686181624Skmacy	/* Allocate taskqueues */
687181624Skmacy	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
688181624Skmacy	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
689181624Skmacy	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
690181624Skmacy	if (unlikely(tx_ring->enqueue_tq == NULL)) {
691181624Skmacy		ena_trace(ENA_ALERT,
692181624Skmacy		    "Unable to create taskqueue for enqueue task\n");
693181624Skmacy		i = tx_ring->ring_size;
694189699Sdfr		goto err_map_release;
695189699Sdfr	}
696181624Skmacy
697189699Sdfr	tx_ring->running = true;
698189699Sdfr
699181624Skmacy	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
700181624Skmacy	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
701181624Skmacy
702181624Skmacy	return (0);
703186557Skmacy
704181624Skmacyerr_map_release:
705181624Skmacy	ena_release_all_tx_dmamap(tx_ring);
706181624Skmacyerr_tx_ids_free:
707181624Skmacy	free(tx_ring->free_tx_ids, M_DEVBUF);
708	tx_ring->free_tx_ids = NULL;
709err_buf_info_free:
710	free(tx_ring->tx_buffer_info, M_DEVBUF);
711	tx_ring->tx_buffer_info = NULL;
712
713	return (ENOMEM);
714}
715
716/**
717 * ena_free_tx_resources - Free Tx Resources per Queue
718 * @adapter: network interface device structure
719 * @qid: queue index
720 *
721 * Free all transmit software resources
722 **/
723static void
724ena_free_tx_resources(struct ena_adapter *adapter, int qid)
725{
726	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
727#ifdef DEV_NETMAP
728	struct ena_netmap_tx_info *nm_info;
729	int j;
730#endif /* DEV_NETMAP */
731
732	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
733	    NULL))
734		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
735
736	taskqueue_free(tx_ring->enqueue_tq);
737
738	ENA_RING_MTX_LOCK(tx_ring);
739	/* Flush buffer ring, */
740	drbr_flush(adapter->ifp, tx_ring->br);
741
742	/* Free buffer DMA maps, */
743	for (int i = 0; i < tx_ring->ring_size; i++) {
744		bus_dmamap_sync(adapter->tx_buf_tag,
745		    tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
746		bus_dmamap_unload(adapter->tx_buf_tag,
747		    tx_ring->tx_buffer_info[i].dmamap);
748		bus_dmamap_destroy(adapter->tx_buf_tag,
749		    tx_ring->tx_buffer_info[i].dmamap);
750
751#ifdef DEV_NETMAP
752		if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
753			nm_info = &tx_ring->tx_buffer_info[i].nm_info;
754			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
755				if (nm_info->socket_buf_idx[j] != 0) {
756					bus_dmamap_sync(adapter->tx_buf_tag,
757					    nm_info->map_seg[j],
758					    BUS_DMASYNC_POSTWRITE);
759					ena_netmap_unload(adapter,
760					    nm_info->map_seg[j]);
761				}
762				bus_dmamap_destroy(adapter->tx_buf_tag,
763				    nm_info->map_seg[j]);
764				nm_info->socket_buf_idx[j] = 0;
765			}
766		}
767#endif /* DEV_NETMAP */
768
769		m_freem(tx_ring->tx_buffer_info[i].mbuf);
770		tx_ring->tx_buffer_info[i].mbuf = NULL;
771	}
772	ENA_RING_MTX_UNLOCK(tx_ring);
773
774	/* And free allocated memory. */
775	free(tx_ring->tx_buffer_info, M_DEVBUF);
776	tx_ring->tx_buffer_info = NULL;
777
778	free(tx_ring->free_tx_ids, M_DEVBUF);
779	tx_ring->free_tx_ids = NULL;
780
781	free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
782	tx_ring->push_buf_intermediate_buf = NULL;
783}
784
785/**
786 * ena_setup_all_tx_resources - allocate all queues Tx resources
787 * @adapter: network interface device structure
788 *
789 * Returns 0 on success, otherwise on failure.
790 **/
791static int
792ena_setup_all_tx_resources(struct ena_adapter *adapter)
793{
794	int i, rc;
795
796	for (i = 0; i < adapter->num_io_queues; i++) {
797		rc = ena_setup_tx_resources(adapter, i);
798		if (rc != 0) {
799			device_printf(adapter->pdev,
800			    "Allocation for Tx Queue %u failed\n", i);
801			goto err_setup_tx;
802		}
803	}
804
805	return (0);
806
807err_setup_tx:
808	/* Rewind the index freeing the rings as we go */
809	while (i--)
810		ena_free_tx_resources(adapter, i);
811	return (rc);
812}
813
814/**
815 * ena_free_all_tx_resources - Free Tx Resources for All Queues
816 * @adapter: network interface device structure
817 *
818 * Free all transmit software resources
819 **/
820static void
821ena_free_all_tx_resources(struct ena_adapter *adapter)
822{
823	int i;
824
825	for (i = 0; i < adapter->num_io_queues; i++)
826		ena_free_tx_resources(adapter, i);
827}
828
829/**
830 * ena_setup_rx_resources - allocate Rx resources (Descriptors)
831 * @adapter: network interface device structure
832 * @qid: queue index
833 *
834 * Returns 0 on success, otherwise on failure.
835 **/
836static int
837ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
838{
839	struct ena_que *que = &adapter->que[qid];
840	struct ena_ring *rx_ring = que->rx_ring;
841	int size, err, i;
842
843	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
844
845#ifdef DEV_NETMAP
846	ena_netmap_reset_rx_ring(adapter, qid);
847	rx_ring->initialized = false;
848#endif /* DEV_NETMAP */
849
850	/*
851	 * Alloc extra element so in rx path
852	 * we can always prefetch rx_info + 1
853	 */
854	size += sizeof(struct ena_rx_buffer);
855
856	rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
857
858	size = sizeof(uint16_t) * rx_ring->ring_size;
859	rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
860
861	for (i = 0; i < rx_ring->ring_size; i++)
862		rx_ring->free_rx_ids[i] = i;
863
864	/* Reset RX statistics. */
865	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
866	    sizeof(rx_ring->rx_stats));
867
868	rx_ring->next_to_clean = 0;
869	rx_ring->next_to_use = 0;
870
871	/* ... and create the buffer DMA maps */
872	for (i = 0; i < rx_ring->ring_size; i++) {
873		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
874		    &(rx_ring->rx_buffer_info[i].map));
875		if (err != 0) {
876			ena_trace(ENA_ALERT,
877			    "Unable to create Rx DMA map for buffer %d\n", i);
878			goto err_buf_info_unmap;
879		}
880	}
881
882	/* Create LRO for the ring */
883	if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
884		int err = tcp_lro_init(&rx_ring->lro);
885		if (err != 0) {
886			device_printf(adapter->pdev,
887			    "LRO[%d] Initialization failed!\n", qid);
888		} else {
889			ena_trace(ENA_INFO,
890			    "RX Soft LRO[%d] Initialized\n", qid);
891			rx_ring->lro.ifp = adapter->ifp;
892		}
893	}
894
895	return (0);
896
897err_buf_info_unmap:
898	while (i--) {
899		bus_dmamap_destroy(adapter->rx_buf_tag,
900		    rx_ring->rx_buffer_info[i].map);
901	}
902
903	free(rx_ring->free_rx_ids, M_DEVBUF);
904	rx_ring->free_rx_ids = NULL;
905	free(rx_ring->rx_buffer_info, M_DEVBUF);
906	rx_ring->rx_buffer_info = NULL;
907	return (ENOMEM);
908}
909
910/**
911 * ena_free_rx_resources - Free Rx Resources
912 * @adapter: network interface device structure
913 * @qid: queue index
914 *
915 * Free all receive software resources
916 **/
917static void
918ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
919{
920	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
921
922	/* Free buffer DMA maps, */
923	for (int i = 0; i < rx_ring->ring_size; i++) {
924		bus_dmamap_sync(adapter->rx_buf_tag,
925		    rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
926		m_freem(rx_ring->rx_buffer_info[i].mbuf);
927		rx_ring->rx_buffer_info[i].mbuf = NULL;
928		bus_dmamap_unload(adapter->rx_buf_tag,
929		    rx_ring->rx_buffer_info[i].map);
930		bus_dmamap_destroy(adapter->rx_buf_tag,
931		    rx_ring->rx_buffer_info[i].map);
932	}
933
934	/* free LRO resources, */
935	tcp_lro_free(&rx_ring->lro);
936
937	/* free allocated memory */
938	free(rx_ring->rx_buffer_info, M_DEVBUF);
939	rx_ring->rx_buffer_info = NULL;
940
941	free(rx_ring->free_rx_ids, M_DEVBUF);
942	rx_ring->free_rx_ids = NULL;
943}
944
945/**
946 * ena_setup_all_rx_resources - allocate all queues Rx resources
947 * @adapter: network interface device structure
948 *
949 * Returns 0 on success, otherwise on failure.
950 **/
951static int
952ena_setup_all_rx_resources(struct ena_adapter *adapter)
953{
954	int i, rc = 0;
955
956	for (i = 0; i < adapter->num_io_queues; i++) {
957		rc = ena_setup_rx_resources(adapter, i);
958		if (rc != 0) {
959			device_printf(adapter->pdev,
960			    "Allocation for Rx Queue %u failed\n", i);
961			goto err_setup_rx;
962		}
963	}
964	return (0);
965
966err_setup_rx:
967	/* rewind the index freeing the rings as we go */
968	while (i--)
969		ena_free_rx_resources(adapter, i);
970	return (rc);
971}
972
973/**
974 * ena_free_all_rx_resources - Free Rx resources for all queues
975 * @adapter: network interface device structure
976 *
977 * Free all receive software resources
978 **/
979static void
980ena_free_all_rx_resources(struct ena_adapter *adapter)
981{
982	int i;
983
984	for (i = 0; i < adapter->num_io_queues; i++)
985		ena_free_rx_resources(adapter, i);
986}
987
988static inline int
989ena_alloc_rx_mbuf(struct ena_adapter *adapter,
990    struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
991{
992	struct ena_com_buf *ena_buf;
993	bus_dma_segment_t segs[1];
994	int nsegs, error;
995	int mlen;
996
997	/* if previous allocated frag is not used */
998	if (unlikely(rx_info->mbuf != NULL))
999		return (0);
1000
1001	/* Get mbuf using UMA allocator */
1002	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1003	    rx_ring->rx_mbuf_sz);
1004
1005	if (unlikely(rx_info->mbuf == NULL)) {
1006		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1007		rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1008		if (unlikely(rx_info->mbuf == NULL)) {
1009			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1010			return (ENOMEM);
1011		}
1012		mlen = MCLBYTES;
1013	} else {
1014		mlen = rx_ring->rx_mbuf_sz;
1015	}
1016	/* Set mbuf length*/
1017	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1018
1019	/* Map packets for DMA */
1020	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
1021	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1022	    adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
1023	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1024	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1025	if (unlikely((error != 0) || (nsegs != 1))) {
1026		ena_trace(ENA_WARNING, "failed to map mbuf, error: %d, "
1027		    "nsegs: %d\n", error, nsegs);
1028		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1029		goto exit;
1030
1031	}
1032
1033	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1034
1035	ena_buf = &rx_info->ena_buf;
1036	ena_buf->paddr = segs[0].ds_addr;
1037	ena_buf->len = mlen;
1038
1039	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
1040	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1041	    rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
1042
1043	return (0);
1044
1045exit:
1046	m_freem(rx_info->mbuf);
1047	rx_info->mbuf = NULL;
1048	return (EFAULT);
1049}
1050
1051static void
1052ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1053    struct ena_rx_buffer *rx_info)
1054{
1055
1056	if (rx_info->mbuf == NULL) {
1057		ena_trace(ENA_WARNING, "Trying to free unallocated buffer\n");
1058		return;
1059	}
1060
1061	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1062	    BUS_DMASYNC_POSTREAD);
1063	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1064	m_freem(rx_info->mbuf);
1065	rx_info->mbuf = NULL;
1066}
1067
1068/**
1069 * ena_refill_rx_bufs - Refills ring with descriptors
1070 * @rx_ring: the ring which we want to feed with free descriptors
1071 * @num: number of descriptors to refill
1072 * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1073 **/
1074int
1075ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1076{
1077	struct ena_adapter *adapter = rx_ring->adapter;
1078	uint16_t next_to_use, req_id;
1079	uint32_t i;
1080	int rc;
1081
1082	ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d\n",
1083	    rx_ring->qid);
1084
1085	next_to_use = rx_ring->next_to_use;
1086
1087	for (i = 0; i < num; i++) {
1088		struct ena_rx_buffer *rx_info;
1089
1090		ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
1091		    "RX buffer - next to use: %d\n", next_to_use);
1092
1093		req_id = rx_ring->free_rx_ids[next_to_use];
1094		rx_info = &rx_ring->rx_buffer_info[req_id];
1095#ifdef DEV_NETMAP
1096		if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1097			rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, rx_info);
1098		else
1099#endif /* DEV_NETMAP */
1100			rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1101		if (unlikely(rc != 0)) {
1102			ena_trace(ENA_WARNING,
1103			    "failed to alloc buffer for rx queue %d\n",
1104			    rx_ring->qid);
1105			break;
1106		}
1107		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1108		    &rx_info->ena_buf, req_id);
1109		if (unlikely(rc != 0)) {
1110			ena_trace(ENA_WARNING,
1111			    "failed to add buffer for rx queue %d\n",
1112			    rx_ring->qid);
1113			break;
1114		}
1115		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1116		    rx_ring->ring_size);
1117	}
1118
1119	if (unlikely(i < num)) {
1120		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1121		ena_trace(ENA_WARNING,
1122		     "refilled rx qid %d with only %d mbufs (from %d)\n",
1123		     rx_ring->qid, i, num);
1124	}
1125
1126	if (likely(i != 0))
1127		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1128
1129	rx_ring->next_to_use = next_to_use;
1130	return (i);
1131}
1132
1133int
1134ena_update_buf_ring_size(struct ena_adapter *adapter,
1135    uint32_t new_buf_ring_size)
1136{
1137	uint32_t old_buf_ring_size;
1138	int rc = 0;
1139	bool dev_was_up;
1140
1141	ENA_LOCK_LOCK(adapter);
1142
1143	old_buf_ring_size = adapter->buf_ring_size;
1144	adapter->buf_ring_size = new_buf_ring_size;
1145
1146	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1147	ena_down(adapter);
1148
1149	/* Reconfigure buf ring for all Tx rings. */
1150	ena_free_all_io_rings_resources(adapter);
1151	ena_init_io_rings_advanced(adapter);
1152	if (dev_was_up) {
1153		/*
1154		 * If ena_up() fails, it's not because of recent buf_ring size
1155		 * changes. Because of that, we just want to revert old drbr
1156		 * value and trigger the reset because something else had to
1157		 * go wrong.
1158		 */
1159		rc = ena_up(adapter);
1160		if (unlikely(rc != 0)) {
1161			device_printf(adapter->pdev,
1162			    "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1163			    new_buf_ring_size, old_buf_ring_size);
1164
1165			/* Revert old size and trigger the reset */
1166			adapter->buf_ring_size = old_buf_ring_size;
1167			ena_free_all_io_rings_resources(adapter);
1168			ena_init_io_rings_advanced(adapter);
1169
1170			ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1171			    adapter);
1172			ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1173
1174		}
1175	}
1176
1177	ENA_LOCK_UNLOCK(adapter);
1178
1179	return (rc);
1180}
1181
1182int
1183ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1184    uint32_t new_rx_size)
1185{
1186	uint32_t old_tx_size, old_rx_size;
1187	int rc = 0;
1188	bool dev_was_up;
1189
1190	ENA_LOCK_LOCK(adapter);
1191
1192	old_tx_size = adapter->requested_tx_ring_size;
1193	old_rx_size = adapter->requested_rx_ring_size;
1194	adapter->requested_tx_ring_size = new_tx_size;
1195	adapter->requested_rx_ring_size = new_rx_size;
1196
1197	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1198	ena_down(adapter);
1199
1200	/* Configure queues with new size. */
1201	ena_init_io_rings_basic(adapter);
1202	if (dev_was_up) {
1203		rc = ena_up(adapter);
1204		if (unlikely(rc != 0)) {
1205			device_printf(adapter->pdev,
1206			    "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1207			    new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1208
1209			/* Revert old size. */
1210			adapter->requested_tx_ring_size = old_tx_size;
1211			adapter->requested_rx_ring_size = old_rx_size;
1212			ena_init_io_rings_basic(adapter);
1213
1214			/* And try again. */
1215			rc = ena_up(adapter);
1216			if (unlikely(rc != 0)) {
1217				device_printf(adapter->pdev,
1218				    "Failed to revert old queue sizes. Triggering device reset.\n");
1219				/*
1220				 * If we've failed again, something had to go
1221				 * wrong. After reset, the device should try to
1222				 * go up
1223				 */
1224				ENA_FLAG_SET_ATOMIC(
1225				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1226				ena_trigger_reset(adapter,
1227				    ENA_REGS_RESET_OS_TRIGGER);
1228			}
1229		}
1230	}
1231
1232	ENA_LOCK_UNLOCK(adapter);
1233
1234	return (rc);
1235}
1236
1237static void
1238ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1239{
1240	ena_free_all_io_rings_resources(adapter);
1241	/* Force indirection table to be reinitialized */
1242	ena_com_rss_destroy(adapter->ena_dev);
1243
1244	adapter->num_io_queues = num;
1245	ena_init_io_rings(adapter);
1246}
1247
1248/* Caller should sanitize new_num */
1249int
1250ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1251{
1252	uint32_t old_num;
1253	int rc = 0;
1254	bool dev_was_up;
1255
1256	ENA_LOCK_LOCK(adapter);
1257
1258	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1259	old_num = adapter->num_io_queues;
1260	ena_down(adapter);
1261
1262	ena_update_io_rings(adapter, new_num);
1263
1264	if (dev_was_up) {
1265		rc = ena_up(adapter);
1266		if (unlikely(rc != 0)) {
1267			device_printf(adapter->pdev,
1268			    "Failed to configure device with %u IO queues. "
1269			    "Reverting to previous value: %u\n",
1270			    new_num, old_num);
1271
1272			ena_update_io_rings(adapter, old_num);
1273
1274			rc = ena_up(adapter);
1275			if (unlikely(rc != 0)) {
1276				device_printf(adapter->pdev,
1277				    "Failed to revert to previous setup IO "
1278				    "queues. Triggering device reset.\n");
1279				ENA_FLAG_SET_ATOMIC(
1280				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1281				ena_trigger_reset(adapter,
1282				    ENA_REGS_RESET_OS_TRIGGER);
1283			}
1284		}
1285	}
1286
1287	ENA_LOCK_UNLOCK(adapter);
1288
1289	return (rc);
1290}
1291
1292static void
1293ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1294{
1295	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1296	unsigned int i;
1297
1298	for (i = 0; i < rx_ring->ring_size; i++) {
1299		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1300
1301		if (rx_info->mbuf != NULL)
1302			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1303#ifdef DEV_NETMAP
1304		if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1305		    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1306			if (rx_info->netmap_buf_idx != 0)
1307				ena_netmap_free_rx_slot(adapter, rx_ring,
1308				    rx_info);
1309		}
1310#endif /* DEV_NETMAP */
1311	}
1312}
1313
1314/**
1315 * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1316 * @adapter: network interface device structure
1317 *
1318 */
1319static void
1320ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1321{
1322	struct ena_ring *rx_ring;
1323	int i, rc, bufs_num;
1324
1325	for (i = 0; i < adapter->num_io_queues; i++) {
1326		rx_ring = &adapter->rx_ring[i];
1327		bufs_num = rx_ring->ring_size - 1;
1328		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1329		if (unlikely(rc != bufs_num))
1330			ena_trace(ENA_WARNING, "refilling Queue %d failed. "
1331			    "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1332#ifdef DEV_NETMAP
1333		rx_ring->initialized = true;
1334#endif /* DEV_NETMAP */
1335	}
1336}
1337
1338static void
1339ena_free_all_rx_bufs(struct ena_adapter *adapter)
1340{
1341	int i;
1342
1343	for (i = 0; i < adapter->num_io_queues; i++)
1344		ena_free_rx_bufs(adapter, i);
1345}
1346
1347/**
1348 * ena_free_tx_bufs - Free Tx Buffers per Queue
1349 * @adapter: network interface device structure
1350 * @qid: queue index
1351 **/
1352static void
1353ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1354{
1355	bool print_once = true;
1356	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1357
1358	ENA_RING_MTX_LOCK(tx_ring);
1359	for (int i = 0; i < tx_ring->ring_size; i++) {
1360		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1361
1362		if (tx_info->mbuf == NULL)
1363			continue;
1364
1365		if (print_once) {
1366			device_printf(adapter->pdev,
1367			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1368			    qid, i);
1369			print_once = false;
1370		} else {
1371			ena_trace(ENA_DBG,
1372			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1373			     qid, i);
1374		}
1375
1376		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1377		    BUS_DMASYNC_POSTWRITE);
1378		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1379
1380		m_free(tx_info->mbuf);
1381		tx_info->mbuf = NULL;
1382	}
1383	ENA_RING_MTX_UNLOCK(tx_ring);
1384}
1385
1386static void
1387ena_free_all_tx_bufs(struct ena_adapter *adapter)
1388{
1389
1390	for (int i = 0; i < adapter->num_io_queues; i++)
1391		ena_free_tx_bufs(adapter, i);
1392}
1393
1394static void
1395ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1396{
1397	uint16_t ena_qid;
1398	int i;
1399
1400	for (i = 0; i < adapter->num_io_queues; i++) {
1401		ena_qid = ENA_IO_TXQ_IDX(i);
1402		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1403	}
1404}
1405
1406static void
1407ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1408{
1409	uint16_t ena_qid;
1410	int i;
1411
1412	for (i = 0; i < adapter->num_io_queues; i++) {
1413		ena_qid = ENA_IO_RXQ_IDX(i);
1414		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1415	}
1416}
1417
1418static void
1419ena_destroy_all_io_queues(struct ena_adapter *adapter)
1420{
1421	struct ena_que *queue;
1422	int i;
1423
1424	for (i = 0; i < adapter->num_io_queues; i++) {
1425		queue = &adapter->que[i];
1426		while (taskqueue_cancel(queue->cleanup_tq,
1427		    &queue->cleanup_task, NULL))
1428			taskqueue_drain(queue->cleanup_tq,
1429			    &queue->cleanup_task);
1430		taskqueue_free(queue->cleanup_tq);
1431	}
1432
1433	ena_destroy_all_tx_queues(adapter);
1434	ena_destroy_all_rx_queues(adapter);
1435}
1436
1437static int
1438ena_create_io_queues(struct ena_adapter *adapter)
1439{
1440	struct ena_com_dev *ena_dev = adapter->ena_dev;
1441	struct ena_com_create_io_ctx ctx;
1442	struct ena_ring *ring;
1443	struct ena_que *queue;
1444	uint16_t ena_qid;
1445	uint32_t msix_vector;
1446	int rc, i;
1447
1448	/* Create TX queues */
1449	for (i = 0; i < adapter->num_io_queues; i++) {
1450		msix_vector = ENA_IO_IRQ_IDX(i);
1451		ena_qid = ENA_IO_TXQ_IDX(i);
1452		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1453		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1454		ctx.queue_size = adapter->requested_tx_ring_size;
1455		ctx.msix_vector = msix_vector;
1456		ctx.qid = ena_qid;
1457		rc = ena_com_create_io_queue(ena_dev, &ctx);
1458		if (rc != 0) {
1459			device_printf(adapter->pdev,
1460			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1461			goto err_tx;
1462		}
1463		ring = &adapter->tx_ring[i];
1464		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1465		    &ring->ena_com_io_sq,
1466		    &ring->ena_com_io_cq);
1467		if (rc != 0) {
1468			device_printf(adapter->pdev,
1469			    "Failed to get TX queue handlers. TX queue num"
1470			    " %d rc: %d\n", i, rc);
1471			ena_com_destroy_io_queue(ena_dev, ena_qid);
1472			goto err_tx;
1473		}
1474	}
1475
1476	/* Create RX queues */
1477	for (i = 0; i < adapter->num_io_queues; i++) {
1478		msix_vector = ENA_IO_IRQ_IDX(i);
1479		ena_qid = ENA_IO_RXQ_IDX(i);
1480		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1481		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1482		ctx.queue_size = adapter->requested_rx_ring_size;
1483		ctx.msix_vector = msix_vector;
1484		ctx.qid = ena_qid;
1485		rc = ena_com_create_io_queue(ena_dev, &ctx);
1486		if (unlikely(rc != 0)) {
1487			device_printf(adapter->pdev,
1488			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1489			goto err_rx;
1490		}
1491
1492		ring = &adapter->rx_ring[i];
1493		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1494		    &ring->ena_com_io_sq,
1495		    &ring->ena_com_io_cq);
1496		if (unlikely(rc != 0)) {
1497			device_printf(adapter->pdev,
1498			    "Failed to get RX queue handlers. RX queue num"
1499			    " %d rc: %d\n", i, rc);
1500			ena_com_destroy_io_queue(ena_dev, ena_qid);
1501			goto err_rx;
1502		}
1503	}
1504
1505	for (i = 0; i < adapter->num_io_queues; i++) {
1506		queue = &adapter->que[i];
1507
1508		TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1509		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1510		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1511
1512		taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
1513		    "%s queue %d cleanup",
1514		    device_get_nameunit(adapter->pdev), i);
1515	}
1516
1517	return (0);
1518
1519err_rx:
1520	while (i--)
1521		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1522	i = adapter->num_io_queues;
1523err_tx:
1524	while (i--)
1525		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1526
1527	return (ENXIO);
1528}
1529
1530/*********************************************************************
1531 *
1532 *  MSIX & Interrupt Service routine
1533 *
1534 **********************************************************************/
1535
1536/**
1537 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1538 * @arg: interrupt number
1539 **/
1540static void
1541ena_intr_msix_mgmnt(void *arg)
1542{
1543	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1544
1545	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1546	if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1547		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1548}
1549
1550/**
1551 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1552 * @arg: queue
1553 **/
1554static int
1555ena_handle_msix(void *arg)
1556{
1557	struct ena_que *queue = arg;
1558	struct ena_adapter *adapter = queue->adapter;
1559	if_t ifp = adapter->ifp;
1560
1561	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1562		return (FILTER_STRAY);
1563
1564	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1565
1566	return (FILTER_HANDLED);
1567}
1568
1569static int
1570ena_enable_msix(struct ena_adapter *adapter)
1571{
1572	device_t dev = adapter->pdev;
1573	int msix_vecs, msix_req;
1574	int i, rc = 0;
1575
1576	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1577		device_printf(dev, "Error, MSI-X is already enabled\n");
1578		return (EINVAL);
1579	}
1580
1581	/* Reserved the max msix vectors we might need */
1582	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1583
1584	adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1585	    M_DEVBUF, M_WAITOK | M_ZERO);
1586
1587	ena_trace(ENA_DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1588
1589	for (i = 0; i < msix_vecs; i++) {
1590		adapter->msix_entries[i].entry = i;
1591		/* Vectors must start from 1 */
1592		adapter->msix_entries[i].vector = i + 1;
1593	}
1594
1595	msix_req = msix_vecs;
1596	rc = pci_alloc_msix(dev, &msix_vecs);
1597	if (unlikely(rc != 0)) {
1598		device_printf(dev,
1599		    "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1600
1601		rc = ENOSPC;
1602		goto err_msix_free;
1603	}
1604
1605	if (msix_vecs != msix_req) {
1606		if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1607			device_printf(dev,
1608			    "Not enough number of MSI-x allocated: %d\n",
1609			    msix_vecs);
1610			pci_release_msi(dev);
1611			rc = ENOSPC;
1612			goto err_msix_free;
1613		}
1614		device_printf(dev, "Enable only %d MSI-x (out of %d), reduce "
1615		    "the number of queues\n", msix_vecs, msix_req);
1616	}
1617
1618	adapter->msix_vecs = msix_vecs;
1619	ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1620
1621	return (0);
1622
1623err_msix_free:
1624	free(adapter->msix_entries, M_DEVBUF);
1625	adapter->msix_entries = NULL;
1626
1627	return (rc);
1628}
1629
1630static void
1631ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1632{
1633
1634	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1635	    ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1636	    device_get_nameunit(adapter->pdev));
1637	/*
1638	 * Handler is NULL on purpose, it will be set
1639	 * when mgmnt interrupt is acquired
1640	 */
1641	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1642	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1643	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1644	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1645}
1646
1647static int
1648ena_setup_io_intr(struct ena_adapter *adapter)
1649{
1650	static int last_bind_cpu = -1;
1651	int irq_idx;
1652
1653	if (adapter->msix_entries == NULL)
1654		return (EINVAL);
1655
1656	for (int i = 0; i < adapter->num_io_queues; i++) {
1657		irq_idx = ENA_IO_IRQ_IDX(i);
1658
1659		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1660		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1661		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1662		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1663		adapter->irq_tbl[irq_idx].vector =
1664		    adapter->msix_entries[irq_idx].vector;
1665		ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1666		    adapter->msix_entries[irq_idx].vector);
1667
1668		/*
1669		 * We want to bind rings to the corresponding cpu
1670		 * using something similar to the RSS round-robin technique.
1671		 */
1672		if (unlikely(last_bind_cpu < 0))
1673			last_bind_cpu = CPU_FIRST();
1674		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1675		    last_bind_cpu;
1676		last_bind_cpu = CPU_NEXT(last_bind_cpu);
1677	}
1678
1679	return (0);
1680}
1681
1682static int
1683ena_request_mgmnt_irq(struct ena_adapter *adapter)
1684{
1685	struct ena_irq *irq;
1686	unsigned long flags;
1687	int rc, rcc;
1688
1689	flags = RF_ACTIVE | RF_SHAREABLE;
1690
1691	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1692	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1693	    &irq->vector, flags);
1694
1695	if (unlikely(irq->res == NULL)) {
1696		device_printf(adapter->pdev, "could not allocate "
1697		    "irq vector: %d\n", irq->vector);
1698		return (ENXIO);
1699	}
1700
1701	rc = bus_setup_intr(adapter->pdev, irq->res,
1702	    INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt,
1703	    irq->data, &irq->cookie);
1704	if (unlikely(rc != 0)) {
1705		device_printf(adapter->pdev, "failed to register "
1706		    "interrupt handler for irq %ju: %d\n",
1707		    rman_get_start(irq->res), rc);
1708		goto err_res_free;
1709	}
1710	irq->requested = true;
1711
1712	return (rc);
1713
1714err_res_free:
1715	ena_trace(ENA_INFO | ENA_ADMQ, "releasing resource for irq %d\n",
1716	    irq->vector);
1717	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1718	    irq->vector, irq->res);
1719	if (unlikely(rcc != 0))
1720		device_printf(adapter->pdev, "dev has no parent while "
1721		    "releasing res for irq: %d\n", irq->vector);
1722	irq->res = NULL;
1723
1724	return (rc);
1725}
1726
1727static int
1728ena_request_io_irq(struct ena_adapter *adapter)
1729{
1730	struct ena_irq *irq;
1731	unsigned long flags = 0;
1732	int rc = 0, i, rcc;
1733
1734	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1735		device_printf(adapter->pdev,
1736		    "failed to request I/O IRQ: MSI-X is not enabled\n");
1737		return (EINVAL);
1738	} else {
1739		flags = RF_ACTIVE | RF_SHAREABLE;
1740	}
1741
1742	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1743		irq = &adapter->irq_tbl[i];
1744
1745		if (unlikely(irq->requested))
1746			continue;
1747
1748		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1749		    &irq->vector, flags);
1750		if (unlikely(irq->res == NULL)) {
1751			rc = ENOMEM;
1752			device_printf(adapter->pdev, "could not allocate "
1753			    "irq vector: %d\n", irq->vector);
1754			goto err;
1755		}
1756
1757		rc = bus_setup_intr(adapter->pdev, irq->res,
1758		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
1759		    irq->data, &irq->cookie);
1760		 if (unlikely(rc != 0)) {
1761			device_printf(adapter->pdev, "failed to register "
1762			    "interrupt handler for irq %ju: %d\n",
1763			    rman_get_start(irq->res), rc);
1764			goto err;
1765		}
1766		irq->requested = true;
1767
1768		ena_trace(ENA_INFO, "queue %d - cpu %d\n",
1769		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1770	}
1771
1772	return (rc);
1773
1774err:
1775
1776	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1777		irq = &adapter->irq_tbl[i];
1778		rcc = 0;
1779
1780		/* Once we entered err: section and irq->requested is true we
1781		   free both intr and resources */
1782		if (irq->requested)
1783			rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1784		if (unlikely(rcc != 0))
1785			device_printf(adapter->pdev, "could not release"
1786			    " irq: %d, error: %d\n", irq->vector, rcc);
1787
1788		/* If we entred err: section without irq->requested set we know
1789		   it was bus_alloc_resource_any() that needs cleanup, provided
1790		   res is not NULL. In case res is NULL no work in needed in
1791		   this iteration */
1792		rcc = 0;
1793		if (irq->res != NULL) {
1794			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1795			    irq->vector, irq->res);
1796		}
1797		if (unlikely(rcc != 0))
1798			device_printf(adapter->pdev, "dev has no parent while "
1799			    "releasing res for irq: %d\n", irq->vector);
1800		irq->requested = false;
1801		irq->res = NULL;
1802	}
1803
1804	return (rc);
1805}
1806
1807static void
1808ena_free_mgmnt_irq(struct ena_adapter *adapter)
1809{
1810	struct ena_irq *irq;
1811	int rc;
1812
1813	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1814	if (irq->requested) {
1815		ena_trace(ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
1816		    irq->vector);
1817		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1818		if (unlikely(rc != 0))
1819			device_printf(adapter->pdev, "failed to tear "
1820			    "down irq: %d\n", irq->vector);
1821		irq->requested = 0;
1822	}
1823
1824	if (irq->res != NULL) {
1825		ena_trace(ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
1826		    irq->vector);
1827		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1828		    irq->vector, irq->res);
1829		irq->res = NULL;
1830		if (unlikely(rc != 0))
1831			device_printf(adapter->pdev, "dev has no parent while "
1832			    "releasing res for irq: %d\n", irq->vector);
1833	}
1834}
1835
1836static void
1837ena_free_io_irq(struct ena_adapter *adapter)
1838{
1839	struct ena_irq *irq;
1840	int rc;
1841
1842	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1843		irq = &adapter->irq_tbl[i];
1844		if (irq->requested) {
1845			ena_trace(ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
1846			    irq->vector);
1847			rc = bus_teardown_intr(adapter->pdev, irq->res,
1848			    irq->cookie);
1849			if (unlikely(rc != 0)) {
1850				device_printf(adapter->pdev, "failed to tear "
1851				    "down irq: %d\n", irq->vector);
1852			}
1853			irq->requested = 0;
1854		}
1855
1856		if (irq->res != NULL) {
1857			ena_trace(ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
1858			    irq->vector);
1859			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1860			    irq->vector, irq->res);
1861			irq->res = NULL;
1862			if (unlikely(rc != 0)) {
1863				device_printf(adapter->pdev, "dev has no parent"
1864				    " while releasing res for irq: %d\n",
1865				    irq->vector);
1866			}
1867		}
1868	}
1869}
1870
1871static void
1872ena_free_irqs(struct ena_adapter* adapter)
1873{
1874
1875	ena_free_io_irq(adapter);
1876	ena_free_mgmnt_irq(adapter);
1877	ena_disable_msix(adapter);
1878}
1879
1880static void
1881ena_disable_msix(struct ena_adapter *adapter)
1882{
1883
1884	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1885		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1886		pci_release_msi(adapter->pdev);
1887	}
1888
1889	adapter->msix_vecs = 0;
1890	if (adapter->msix_entries != NULL)
1891		free(adapter->msix_entries, M_DEVBUF);
1892	adapter->msix_entries = NULL;
1893}
1894
1895static void
1896ena_unmask_all_io_irqs(struct ena_adapter *adapter)
1897{
1898	struct ena_com_io_cq* io_cq;
1899	struct ena_eth_io_intr_reg intr_reg;
1900	uint16_t ena_qid;
1901	int i;
1902
1903	/* Unmask interrupts for all queues */
1904	for (i = 0; i < adapter->num_io_queues; i++) {
1905		ena_qid = ENA_IO_TXQ_IDX(i);
1906		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1907		ena_com_update_intr_reg(&intr_reg, 0, 0, true);
1908		ena_com_unmask_intr(io_cq, &intr_reg);
1909	}
1910}
1911
1912/* Configure the Rx forwarding */
1913static int
1914ena_rss_configure(struct ena_adapter *adapter)
1915{
1916	struct ena_com_dev *ena_dev = adapter->ena_dev;
1917	int rc;
1918
1919	/* In case the RSS table was destroyed */
1920	if (!ena_dev->rss.tbl_log_size) {
1921		rc = ena_rss_init_default(adapter);
1922		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
1923			device_printf(adapter->pdev,
1924			    "WARNING: RSS was not properly re-initialized,"
1925			    " it will affect bandwidth\n");
1926			ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
1927			return (rc);
1928		}
1929	}
1930
1931	/* Set indirect table */
1932	rc = ena_com_indirect_table_set(ena_dev);
1933	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1934		return (rc);
1935
1936	/* Configure hash function (if supported) */
1937	rc = ena_com_set_hash_function(ena_dev);
1938	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1939		return (rc);
1940
1941	/* Configure hash inputs (if supported) */
1942	rc = ena_com_set_hash_ctrl(ena_dev);
1943	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1944		return (rc);
1945
1946	return (0);
1947}
1948
1949static int
1950ena_up_complete(struct ena_adapter *adapter)
1951{
1952	int rc;
1953
1954	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1955		rc = ena_rss_configure(adapter);
1956		if (rc != 0) {
1957			device_printf(adapter->pdev,
1958			    "Failed to configure RSS\n");
1959			return (rc);
1960		}
1961	}
1962
1963	rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
1964	if (unlikely(rc != 0))
1965		return (rc);
1966
1967	ena_refill_all_rx_bufs(adapter);
1968	ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
1969	    sizeof(adapter->hw_stats));
1970
1971	return (0);
1972}
1973
1974static void
1975set_io_rings_size(struct ena_adapter *adapter, int new_tx_size,
1976    int new_rx_size)
1977{
1978	int i;
1979
1980	for (i = 0; i < adapter->num_io_queues; i++) {
1981		adapter->tx_ring[i].ring_size = new_tx_size;
1982		adapter->rx_ring[i].ring_size = new_rx_size;
1983	}
1984}
1985
1986static int
1987create_queues_with_size_backoff(struct ena_adapter *adapter)
1988{
1989	int rc;
1990	uint32_t cur_rx_ring_size, cur_tx_ring_size;
1991	uint32_t new_rx_ring_size, new_tx_ring_size;
1992
1993	/*
1994	 * Current queue sizes might be set to smaller than the requested
1995	 * ones due to past queue allocation failures.
1996	 */
1997	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
1998	    adapter->requested_rx_ring_size);
1999
2000	while (1) {
2001		/* Allocate transmit descriptors */
2002		rc = ena_setup_all_tx_resources(adapter);
2003		if (unlikely(rc != 0)) {
2004			ena_trace(ENA_ALERT, "err_setup_tx\n");
2005			goto err_setup_tx;
2006		}
2007
2008		/* Allocate receive descriptors */
2009		rc = ena_setup_all_rx_resources(adapter);
2010		if (unlikely(rc != 0)) {
2011			ena_trace(ENA_ALERT, "err_setup_rx\n");
2012			goto err_setup_rx;
2013		}
2014
2015		/* Create IO queues for Rx & Tx */
2016		rc = ena_create_io_queues(adapter);
2017		if (unlikely(rc != 0)) {
2018			ena_trace(ENA_ALERT,
2019			    "create IO queues failed\n");
2020			goto err_io_que;
2021		}
2022
2023		return (0);
2024
2025err_io_que:
2026		ena_free_all_rx_resources(adapter);
2027err_setup_rx:
2028		ena_free_all_tx_resources(adapter);
2029err_setup_tx:
2030		/*
2031		 * Lower the ring size if ENOMEM. Otherwise, return the
2032		 * error straightaway.
2033		 */
2034		if (unlikely(rc != ENOMEM)) {
2035			ena_trace(ENA_ALERT,
2036			    "Queue creation failed with error code: %d\n", rc);
2037			return (rc);
2038		}
2039
2040		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2041		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2042
2043		device_printf(adapter->pdev,
2044		    "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2045		    cur_tx_ring_size, cur_rx_ring_size);
2046
2047		new_tx_ring_size = cur_tx_ring_size;
2048		new_rx_ring_size = cur_rx_ring_size;
2049
2050		/*
2051		 * Decrease the size of a larger queue, or decrease both if they are
2052		 * the same size.
2053		 */
2054		if (cur_rx_ring_size <= cur_tx_ring_size)
2055			new_tx_ring_size = cur_tx_ring_size / 2;
2056		if (cur_rx_ring_size >= cur_tx_ring_size)
2057			new_rx_ring_size = cur_rx_ring_size / 2;
2058
2059		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2060		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2061			device_printf(adapter->pdev,
2062			    "Queue creation failed with the smallest possible queue size"
2063			    "of %d for both queues. Not retrying with smaller queues\n",
2064			    ENA_MIN_RING_SIZE);
2065			return (rc);
2066		}
2067
2068		set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2069	}
2070}
2071
2072int
2073ena_up(struct ena_adapter *adapter)
2074{
2075	int rc = 0;
2076
2077	if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2078		device_printf(adapter->pdev, "device is not attached!\n");
2079		return (ENXIO);
2080	}
2081
2082	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2083		return (0);
2084
2085	device_printf(adapter->pdev, "device is going UP\n");
2086
2087	/* setup interrupts for IO queues */
2088	rc = ena_setup_io_intr(adapter);
2089	if (unlikely(rc != 0)) {
2090		ena_trace(ENA_ALERT, "error setting up IO interrupt\n");
2091		goto error;
2092	}
2093	rc = ena_request_io_irq(adapter);
2094	if (unlikely(rc != 0)) {
2095		ena_trace(ENA_ALERT, "err_req_irq\n");
2096		goto error;
2097	}
2098
2099	device_printf(adapter->pdev,
2100	    "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, "
2101	    "LLQ is %s\n",
2102	    adapter->num_io_queues,
2103	    adapter->requested_rx_ring_size,
2104	    adapter->requested_tx_ring_size,
2105	    (adapter->ena_dev->tx_mem_queue_type ==
2106	        ENA_ADMIN_PLACEMENT_POLICY_DEV) ?  "ENABLED" : "DISABLED");
2107
2108	rc = create_queues_with_size_backoff(adapter);
2109	if (unlikely(rc != 0)) {
2110		ena_trace(ENA_ALERT,
2111		    "error creating queues with size backoff\n");
2112		goto err_create_queues_with_backoff;
2113	}
2114
2115	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2116		if_link_state_change(adapter->ifp, LINK_STATE_UP);
2117
2118	rc = ena_up_complete(adapter);
2119	if (unlikely(rc != 0))
2120		goto err_up_complete;
2121
2122	counter_u64_add(adapter->dev_stats.interface_up, 1);
2123
2124	ena_update_hwassist(adapter);
2125
2126	if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2127		IFF_DRV_OACTIVE);
2128
2129	/* Activate timer service only if the device is running.
2130		* If this flag is not set, it means that the driver is being
2131		* reset and timer service will be activated afterwards.
2132		*/
2133	if (ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) {
2134		callout_reset_sbt(&adapter->timer_service, SBT_1S,
2135			SBT_1S, ena_timer_service, (void *)adapter, 0);
2136	}
2137
2138	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2139
2140	ena_unmask_all_io_irqs(adapter);
2141
2142	return (0);
2143
2144err_up_complete:
2145	ena_destroy_all_io_queues(adapter);
2146	ena_free_all_rx_resources(adapter);
2147	ena_free_all_tx_resources(adapter);
2148err_create_queues_with_backoff:
2149	ena_free_io_irq(adapter);
2150error:
2151	return (rc);
2152}
2153
2154static uint64_t
2155ena_get_counter(if_t ifp, ift_counter cnt)
2156{
2157	struct ena_adapter *adapter;
2158	struct ena_hw_stats *stats;
2159
2160	adapter = if_getsoftc(ifp);
2161	stats = &adapter->hw_stats;
2162
2163	switch (cnt) {
2164	case IFCOUNTER_IPACKETS:
2165		return (counter_u64_fetch(stats->rx_packets));
2166	case IFCOUNTER_OPACKETS:
2167		return (counter_u64_fetch(stats->tx_packets));
2168	case IFCOUNTER_IBYTES:
2169		return (counter_u64_fetch(stats->rx_bytes));
2170	case IFCOUNTER_OBYTES:
2171		return (counter_u64_fetch(stats->tx_bytes));
2172	case IFCOUNTER_IQDROPS:
2173		return (counter_u64_fetch(stats->rx_drops));
2174	case IFCOUNTER_OQDROPS:
2175		return (counter_u64_fetch(stats->tx_drops));
2176	default:
2177		return (if_get_counter_default(ifp, cnt));
2178	}
2179}
2180
2181static int
2182ena_media_change(if_t ifp)
2183{
2184	/* Media Change is not supported by firmware */
2185	return (0);
2186}
2187
2188static void
2189ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2190{
2191	struct ena_adapter *adapter = if_getsoftc(ifp);
2192	ena_trace(ENA_DBG, "enter\n");
2193
2194	ENA_LOCK_LOCK(adapter);
2195
2196	ifmr->ifm_status = IFM_AVALID;
2197	ifmr->ifm_active = IFM_ETHER;
2198
2199	if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2200		ENA_LOCK_UNLOCK(adapter);
2201		ena_trace(ENA_INFO, "Link is down\n");
2202		return;
2203	}
2204
2205	ifmr->ifm_status |= IFM_ACTIVE;
2206	ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2207
2208	ENA_LOCK_UNLOCK(adapter);
2209}
2210
2211static void
2212ena_init(void *arg)
2213{
2214	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2215
2216	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2217		ENA_LOCK_LOCK(adapter);
2218		ena_up(adapter);
2219		ENA_LOCK_UNLOCK(adapter);
2220	}
2221}
2222
2223static int
2224ena_ioctl(if_t ifp, u_long command, caddr_t data)
2225{
2226	struct ena_adapter *adapter;
2227	struct ifreq *ifr;
2228	int rc;
2229
2230	adapter = ifp->if_softc;
2231	ifr = (struct ifreq *)data;
2232
2233	/*
2234	 * Acquiring lock to prevent from running up and down routines parallel.
2235	 */
2236	rc = 0;
2237	switch (command) {
2238	case SIOCSIFMTU:
2239		if (ifp->if_mtu == ifr->ifr_mtu)
2240			break;
2241		ENA_LOCK_LOCK(adapter);
2242		ena_down(adapter);
2243
2244		ena_change_mtu(ifp, ifr->ifr_mtu);
2245
2246		rc = ena_up(adapter);
2247		ENA_LOCK_UNLOCK(adapter);
2248		break;
2249
2250	case SIOCSIFFLAGS:
2251		if ((ifp->if_flags & IFF_UP) != 0) {
2252			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2253				if ((ifp->if_flags & (IFF_PROMISC |
2254				    IFF_ALLMULTI)) != 0) {
2255					device_printf(adapter->pdev,
2256					    "ioctl promisc/allmulti\n");
2257				}
2258			} else {
2259				ENA_LOCK_LOCK(adapter);
2260				rc = ena_up(adapter);
2261				ENA_LOCK_UNLOCK(adapter);
2262			}
2263		} else {
2264			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2265				ENA_LOCK_LOCK(adapter);
2266				ena_down(adapter);
2267				ENA_LOCK_UNLOCK(adapter);
2268			}
2269		}
2270		break;
2271
2272	case SIOCADDMULTI:
2273	case SIOCDELMULTI:
2274		break;
2275
2276	case SIOCSIFMEDIA:
2277	case SIOCGIFMEDIA:
2278		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2279		break;
2280
2281	case SIOCSIFCAP:
2282		{
2283			int reinit = 0;
2284
2285			if (ifr->ifr_reqcap != ifp->if_capenable) {
2286				ifp->if_capenable = ifr->ifr_reqcap;
2287				reinit = 1;
2288			}
2289
2290			if ((reinit != 0) &&
2291			    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2292				ENA_LOCK_LOCK(adapter);
2293				ena_down(adapter);
2294				rc = ena_up(adapter);
2295				ENA_LOCK_UNLOCK(adapter);
2296			}
2297		}
2298
2299		break;
2300	default:
2301		rc = ether_ioctl(ifp, command, data);
2302		break;
2303	}
2304
2305	return (rc);
2306}
2307
2308static int
2309ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2310{
2311	int caps = 0;
2312
2313	if ((feat->offload.tx &
2314	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2315	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2316	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2317		caps |= IFCAP_TXCSUM;
2318
2319	if ((feat->offload.tx &
2320	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2321	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2322		caps |= IFCAP_TXCSUM_IPV6;
2323
2324	if ((feat->offload.tx &
2325	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2326		caps |= IFCAP_TSO4;
2327
2328	if ((feat->offload.tx &
2329	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2330		caps |= IFCAP_TSO6;
2331
2332	if ((feat->offload.rx_supported &
2333	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2334	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2335		caps |= IFCAP_RXCSUM;
2336
2337	if ((feat->offload.rx_supported &
2338	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2339		caps |= IFCAP_RXCSUM_IPV6;
2340
2341	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2342
2343	return (caps);
2344}
2345
2346static void
2347ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2348{
2349
2350	host_info->supported_network_features[0] =
2351	    (uint32_t)if_getcapabilities(ifp);
2352}
2353
2354static void
2355ena_update_hwassist(struct ena_adapter *adapter)
2356{
2357	if_t ifp = adapter->ifp;
2358	uint32_t feat = adapter->tx_offload_cap;
2359	int cap = if_getcapenable(ifp);
2360	int flags = 0;
2361
2362	if_clearhwassist(ifp);
2363
2364	if ((cap & IFCAP_TXCSUM) != 0) {
2365		if ((feat &
2366		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2367			flags |= CSUM_IP;
2368		if ((feat &
2369		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2370		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2371			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2372	}
2373
2374	if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2375		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2376
2377	if ((cap & IFCAP_TSO4) != 0)
2378		flags |= CSUM_IP_TSO;
2379
2380	if ((cap & IFCAP_TSO6) != 0)
2381		flags |= CSUM_IP6_TSO;
2382
2383	if_sethwassistbits(ifp, flags, 0);
2384}
2385
2386static int
2387ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2388    struct ena_com_dev_get_features_ctx *feat)
2389{
2390	if_t ifp;
2391	int caps = 0;
2392
2393	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2394	if (unlikely(ifp == NULL)) {
2395		ena_trace(ENA_ALERT, "can not allocate ifnet structure\n");
2396		return (ENXIO);
2397	}
2398	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2399	if_setdev(ifp, pdev);
2400	if_setsoftc(ifp, adapter);
2401
2402	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2403	if_setinitfn(ifp, ena_init);
2404	if_settransmitfn(ifp, ena_mq_start);
2405	if_setqflushfn(ifp, ena_qflush);
2406	if_setioctlfn(ifp, ena_ioctl);
2407	if_setgetcounterfn(ifp, ena_get_counter);
2408
2409	if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2410	if_setsendqready(ifp);
2411	if_setmtu(ifp, ETHERMTU);
2412	if_setbaudrate(ifp, 0);
2413	/* Zeroize capabilities... */
2414	if_setcapabilities(ifp, 0);
2415	if_setcapenable(ifp, 0);
2416	/* check hardware support */
2417	caps = ena_get_dev_offloads(feat);
2418	/* ... and set them */
2419	if_setcapabilitiesbit(ifp, caps, 0);
2420
2421	/* TSO parameters */
2422	ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2423	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2424	ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2425	ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2426
2427	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2428	if_setcapenable(ifp, if_getcapabilities(ifp));
2429
2430	/*
2431	 * Specify the media types supported by this adapter and register
2432	 * callbacks to update media and link information
2433	 */
2434	ifmedia_init(&adapter->media, IFM_IMASK,
2435	    ena_media_change, ena_media_status);
2436	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2437	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2438
2439	ether_ifattach(ifp, adapter->mac_addr);
2440
2441	return (0);
2442}
2443
2444void
2445ena_down(struct ena_adapter *adapter)
2446{
2447	int rc;
2448
2449	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2450		return;
2451
2452	device_printf(adapter->pdev, "device is going DOWN\n");
2453
2454	callout_drain(&adapter->timer_service);
2455
2456	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2457	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2458		IFF_DRV_RUNNING);
2459
2460	ena_free_io_irq(adapter);
2461
2462	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2463		rc = ena_com_dev_reset(adapter->ena_dev,
2464			adapter->reset_reason);
2465		if (unlikely(rc != 0))
2466			device_printf(adapter->pdev,
2467				"Device reset failed\n");
2468	}
2469
2470	ena_destroy_all_io_queues(adapter);
2471
2472	ena_free_all_tx_bufs(adapter);
2473	ena_free_all_rx_bufs(adapter);
2474	ena_free_all_tx_resources(adapter);
2475	ena_free_all_rx_resources(adapter);
2476
2477	counter_u64_add(adapter->dev_stats.interface_down, 1);
2478}
2479
2480static uint32_t
2481ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2482    struct ena_com_dev_get_features_ctx *get_feat_ctx)
2483{
2484	uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2485
2486	/* Regular queues capabilities */
2487	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2488		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2489		    &get_feat_ctx->max_queue_ext.max_queue_ext;
2490		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2491			max_queue_ext->max_rx_cq_num);
2492
2493		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2494		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2495	} else {
2496		struct ena_admin_queue_feature_desc *max_queues =
2497		    &get_feat_ctx->max_queues;
2498		io_tx_sq_num = max_queues->max_sq_num;
2499		io_tx_cq_num = max_queues->max_cq_num;
2500		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2501	}
2502
2503	/* In case of LLQ use the llq fields for the tx SQ/CQ */
2504	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2505		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2506
2507	max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2508	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2509	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2510	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2511	/* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
2512	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2513	    pci_msix_count(pdev) - 1);
2514
2515	return (max_num_io_queues);
2516}
2517
2518static int
2519ena_enable_wc(struct resource *res)
2520{
2521#if defined(__i386) || defined(__amd64)
2522	vm_offset_t va;
2523	vm_size_t len;
2524	int rc;
2525
2526	va = (vm_offset_t)rman_get_virtual(res);
2527	len = rman_get_size(res);
2528	/* Enable write combining */
2529	rc = pmap_change_attr(va, len, PAT_WRITE_COMBINING);
2530	if (unlikely(rc != 0)) {
2531		ena_trace(ENA_ALERT, "pmap_change_attr failed, %d\n", rc);
2532		return (rc);
2533	}
2534
2535	return (0);
2536#endif
2537	return (EOPNOTSUPP);
2538}
2539
2540static int
2541ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2542    struct ena_admin_feature_llq_desc *llq,
2543    struct ena_llq_configurations *llq_default_configurations)
2544{
2545	struct ena_adapter *adapter = device_get_softc(pdev);
2546	int rc, rid;
2547	uint32_t llq_feature_mask;
2548
2549	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2550	if (!(ena_dev->supported_features & llq_feature_mask)) {
2551		device_printf(pdev,
2552		    "LLQ is not supported. Fallback to host mode policy.\n");
2553		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2554		return (0);
2555	}
2556
2557	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2558	if (unlikely(rc != 0)) {
2559		device_printf(pdev, "Failed to configure the device mode. "
2560		    "Fallback to host mode policy.\n");
2561		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2562		return (0);
2563	}
2564
2565	/* Nothing to config, exit */
2566	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
2567		return (0);
2568
2569	/* Try to allocate resources for LLQ bar */
2570	rid = PCIR_BAR(ENA_MEM_BAR);
2571	adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
2572	    &rid, RF_ACTIVE);
2573	if (unlikely(adapter->memory == NULL)) {
2574		device_printf(pdev, "unable to allocate LLQ bar resource. "
2575		    "Fallback to host mode policy.\n");
2576		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2577		return (0);
2578	}
2579
2580	/* Enable write combining for better LLQ performance */
2581	rc = ena_enable_wc(adapter->memory);
2582	if (unlikely(rc != 0)) {
2583		device_printf(pdev, "failed to enable write combining.\n");
2584		return (rc);
2585	}
2586
2587	/*
2588	 * Save virtual address of the device's memory region
2589	 * for the ena_com layer.
2590	 */
2591	ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2592
2593	return (0);
2594}
2595
2596static inline
2597void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
2598{
2599	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2600	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2601	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2602	llq_config->llq_num_decs_before_header =
2603	    ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2604	llq_config->llq_ring_entry_size_value = 128;
2605}
2606
2607static int
2608ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2609{
2610	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2611	struct ena_com_dev *ena_dev = ctx->ena_dev;
2612	uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2613	uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2614	uint32_t max_tx_queue_size;
2615	uint32_t max_rx_queue_size;
2616
2617	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2618		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2619		    &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2620		max_rx_queue_size = min_t(uint32_t,
2621		    max_queue_ext->max_rx_cq_depth,
2622		    max_queue_ext->max_rx_sq_depth);
2623		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2624
2625		if (ena_dev->tx_mem_queue_type ==
2626		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2627			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2628			    llq->max_llq_depth);
2629		else
2630			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2631			    max_queue_ext->max_tx_sq_depth);
2632
2633		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2634		    max_queue_ext->max_per_packet_tx_descs);
2635		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2636		    max_queue_ext->max_per_packet_rx_descs);
2637	} else {
2638		struct ena_admin_queue_feature_desc *max_queues =
2639		    &ctx->get_feat_ctx->max_queues;
2640		max_rx_queue_size = min_t(uint32_t,
2641		    max_queues->max_cq_depth,
2642		    max_queues->max_sq_depth);
2643		max_tx_queue_size = max_queues->max_cq_depth;
2644
2645		if (ena_dev->tx_mem_queue_type ==
2646		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2647			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2648			    llq->max_llq_depth);
2649		else
2650			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2651			    max_queues->max_sq_depth);
2652
2653		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2654		    max_queues->max_packet_tx_descs);
2655		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2656		    max_queues->max_packet_rx_descs);
2657	}
2658
2659	/* round down to the nearest power of 2 */
2660	max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2661	max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2662
2663	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2664	    max_tx_queue_size);
2665	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2666	    max_rx_queue_size);
2667
2668	tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2669	rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2670
2671	ctx->max_tx_queue_size = max_tx_queue_size;
2672	ctx->max_rx_queue_size = max_rx_queue_size;
2673	ctx->tx_queue_size = tx_queue_size;
2674	ctx->rx_queue_size = rx_queue_size;
2675
2676	return (0);
2677}
2678
2679static int
2680ena_rss_init_default(struct ena_adapter *adapter)
2681{
2682	struct ena_com_dev *ena_dev = adapter->ena_dev;
2683	device_t dev = adapter->pdev;
2684	int qid, rc, i;
2685
2686	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
2687	if (unlikely(rc != 0)) {
2688		device_printf(dev, "Cannot init indirect table\n");
2689		return (rc);
2690	}
2691
2692	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
2693		qid = i % adapter->num_io_queues;
2694		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
2695		    ENA_IO_RXQ_IDX(qid));
2696		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2697			device_printf(dev, "Cannot fill indirect table\n");
2698			goto err_rss_destroy;
2699		}
2700	}
2701
2702	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
2703	    ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
2704	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2705		device_printf(dev, "Cannot fill hash function\n");
2706		goto err_rss_destroy;
2707	}
2708
2709	rc = ena_com_set_default_hash_ctrl(ena_dev);
2710	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2711		device_printf(dev, "Cannot fill hash control\n");
2712		goto err_rss_destroy;
2713	}
2714
2715	return (0);
2716
2717err_rss_destroy:
2718	ena_com_rss_destroy(ena_dev);
2719	return (rc);
2720}
2721
2722static void
2723ena_rss_init_default_deferred(void *arg)
2724{
2725	struct ena_adapter *adapter;
2726	devclass_t dc;
2727	int max;
2728	int rc;
2729
2730	dc = devclass_find("ena");
2731	if (unlikely(dc == NULL)) {
2732		ena_trace(ENA_ALERT, "No devclass ena\n");
2733		return;
2734	}
2735
2736	max = devclass_get_maxunit(dc);
2737	while (max-- >= 0) {
2738		adapter = devclass_get_softc(dc, max);
2739		if (adapter != NULL) {
2740			rc = ena_rss_init_default(adapter);
2741			ENA_FLAG_SET_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
2742			if (unlikely(rc != 0)) {
2743				device_printf(adapter->pdev,
2744				    "WARNING: RSS was not properly initialized,"
2745				    " it will affect bandwidth\n");
2746				ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
2747			}
2748		}
2749	}
2750}
2751SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
2752
2753static void
2754ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2755{
2756	struct ena_admin_host_info *host_info;
2757	uintptr_t rid;
2758	int rc;
2759
2760	/* Allocate only the host info */
2761	rc = ena_com_allocate_host_info(ena_dev);
2762	if (unlikely(rc != 0)) {
2763		ena_trace(ENA_ALERT, "Cannot allocate host info\n");
2764		return;
2765	}
2766
2767	host_info = ena_dev->host_attr.host_info;
2768
2769	if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2770		host_info->bdf = rid;
2771	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2772	host_info->kernel_ver = osreldate;
2773
2774	sprintf(host_info->kernel_ver_str, "%d", osreldate);
2775	host_info->os_dist = 0;
2776	strncpy(host_info->os_dist_str, osrelease,
2777	    sizeof(host_info->os_dist_str) - 1);
2778
2779	host_info->driver_version =
2780		(DRV_MODULE_VER_MAJOR) |
2781		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2782		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2783	host_info->num_cpus = mp_ncpus;
2784
2785	rc = ena_com_set_host_attributes(ena_dev);
2786	if (unlikely(rc != 0)) {
2787		if (rc == EOPNOTSUPP)
2788			ena_trace(ENA_WARNING, "Cannot set host attributes\n");
2789		else
2790			ena_trace(ENA_ALERT, "Cannot set host attributes\n");
2791
2792		goto err;
2793	}
2794
2795	return;
2796
2797err:
2798	ena_com_delete_host_info(ena_dev);
2799}
2800
2801static int
2802ena_device_init(struct ena_adapter *adapter, device_t pdev,
2803    struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2804{
2805	struct ena_com_dev* ena_dev = adapter->ena_dev;
2806	bool readless_supported;
2807	uint32_t aenq_groups;
2808	int dma_width;
2809	int rc;
2810
2811	rc = ena_com_mmio_reg_read_request_init(ena_dev);
2812	if (unlikely(rc != 0)) {
2813		device_printf(pdev, "failed to init mmio read less\n");
2814		return (rc);
2815	}
2816
2817	/*
2818	 * The PCIe configuration space revision id indicate if mmio reg
2819	 * read is disabled
2820	 */
2821	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2822	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2823
2824	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2825	if (unlikely(rc != 0)) {
2826		device_printf(pdev, "Can not reset device\n");
2827		goto err_mmio_read_less;
2828	}
2829
2830	rc = ena_com_validate_version(ena_dev);
2831	if (unlikely(rc != 0)) {
2832		device_printf(pdev, "device version is too low\n");
2833		goto err_mmio_read_less;
2834	}
2835
2836	dma_width = ena_com_get_dma_width(ena_dev);
2837	if (unlikely(dma_width < 0)) {
2838		device_printf(pdev, "Invalid dma width value %d", dma_width);
2839		rc = dma_width;
2840		goto err_mmio_read_less;
2841	}
2842	adapter->dma_width = dma_width;
2843
2844	/* ENA admin level init */
2845	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2846	if (unlikely(rc != 0)) {
2847		device_printf(pdev,
2848		    "Can not initialize ena admin queue with device\n");
2849		goto err_mmio_read_less;
2850	}
2851
2852	/*
2853	 * To enable the msix interrupts the driver needs to know the number
2854	 * of queues. So the driver uses polling mode to retrieve this
2855	 * information
2856	 */
2857	ena_com_set_admin_polling_mode(ena_dev, true);
2858
2859	ena_config_host_info(ena_dev, pdev);
2860
2861	/* Get Device Attributes */
2862	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2863	if (unlikely(rc != 0)) {
2864		device_printf(pdev,
2865		    "Cannot get attribute for ena device rc: %d\n", rc);
2866		goto err_admin_init;
2867	}
2868
2869	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2870	    BIT(ENA_ADMIN_FATAL_ERROR) |
2871	    BIT(ENA_ADMIN_WARNING) |
2872	    BIT(ENA_ADMIN_NOTIFICATION) |
2873	    BIT(ENA_ADMIN_KEEP_ALIVE);
2874
2875	aenq_groups &= get_feat_ctx->aenq.supported_groups;
2876	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2877	if (unlikely(rc != 0)) {
2878		device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
2879		goto err_admin_init;
2880	}
2881
2882	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2883
2884	return (0);
2885
2886err_admin_init:
2887	ena_com_delete_host_info(ena_dev);
2888	ena_com_admin_destroy(ena_dev);
2889err_mmio_read_less:
2890	ena_com_mmio_reg_read_request_destroy(ena_dev);
2891
2892	return (rc);
2893}
2894
2895static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2896{
2897	struct ena_com_dev *ena_dev = adapter->ena_dev;
2898	int rc;
2899
2900	rc = ena_enable_msix(adapter);
2901	if (unlikely(rc != 0)) {
2902		device_printf(adapter->pdev, "Error with MSI-X enablement\n");
2903		return (rc);
2904	}
2905
2906	ena_setup_mgmnt_intr(adapter);
2907
2908	rc = ena_request_mgmnt_irq(adapter);
2909	if (unlikely(rc != 0)) {
2910		device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
2911		goto err_disable_msix;
2912	}
2913
2914	ena_com_set_admin_polling_mode(ena_dev, false);
2915
2916	ena_com_admin_aenq_enable(ena_dev);
2917
2918	return (0);
2919
2920err_disable_msix:
2921	ena_disable_msix(adapter);
2922
2923	return (rc);
2924}
2925
2926/* Function called on ENA_ADMIN_KEEP_ALIVE event */
2927static void ena_keep_alive_wd(void *adapter_data,
2928    struct ena_admin_aenq_entry *aenq_e)
2929{
2930	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2931	struct ena_admin_aenq_keep_alive_desc *desc;
2932	sbintime_t stime;
2933	uint64_t rx_drops;
2934	uint64_t tx_drops;
2935
2936	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2937
2938	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2939	tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2940	counter_u64_zero(adapter->hw_stats.rx_drops);
2941	counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2942	counter_u64_zero(adapter->hw_stats.tx_drops);
2943	counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
2944
2945	stime = getsbinuptime();
2946	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
2947}
2948
2949/* Check for keep alive expiration */
2950static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2951{
2952	sbintime_t timestamp, time;
2953
2954	if (adapter->wd_active == 0)
2955		return;
2956
2957	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2958		return;
2959
2960	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
2961	time = getsbinuptime() - timestamp;
2962	if (unlikely(time > adapter->keep_alive_timeout)) {
2963		device_printf(adapter->pdev,
2964		    "Keep alive watchdog timeout.\n");
2965		counter_u64_add(adapter->dev_stats.wd_expired, 1);
2966		ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
2967	}
2968}
2969
2970/* Check if admin queue is enabled */
2971static void check_for_admin_com_state(struct ena_adapter *adapter)
2972{
2973	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
2974	    false)) {
2975		device_printf(adapter->pdev,
2976		    "ENA admin queue is not in running state!\n");
2977		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
2978		ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
2979	}
2980}
2981
2982static int
2983check_for_rx_interrupt_queue(struct ena_adapter *adapter,
2984    struct ena_ring *rx_ring)
2985{
2986	if (likely(rx_ring->first_interrupt))
2987		return (0);
2988
2989	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
2990		return (0);
2991
2992	rx_ring->no_interrupt_event_cnt++;
2993
2994	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
2995		device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
2996		    "Queue = %d. Reset the device\n", rx_ring->qid);
2997		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
2998		return (EIO);
2999	}
3000
3001	return (0);
3002}
3003
3004static int
3005check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3006    struct ena_ring *tx_ring)
3007{
3008	struct bintime curtime, time;
3009	struct ena_tx_buffer *tx_buf;
3010	sbintime_t time_offset;
3011	uint32_t missed_tx = 0;
3012	int i, rc = 0;
3013
3014	getbinuptime(&curtime);
3015
3016	for (i = 0; i < tx_ring->ring_size; i++) {
3017		tx_buf = &tx_ring->tx_buffer_info[i];
3018
3019		if (bintime_isset(&tx_buf->timestamp) == 0)
3020			continue;
3021
3022		time = curtime;
3023		bintime_sub(&time, &tx_buf->timestamp);
3024		time_offset = bttosbt(time);
3025
3026		if (unlikely(!tx_ring->first_interrupt &&
3027		    time_offset > 2 * adapter->missing_tx_timeout)) {
3028			/*
3029			 * If after graceful period interrupt is still not
3030			 * received, we schedule a reset.
3031			 */
3032			device_printf(adapter->pdev,
3033			    "Potential MSIX issue on Tx side Queue = %d. "
3034			    "Reset the device\n", tx_ring->qid);
3035			ena_trigger_reset(adapter,
3036			    ENA_REGS_RESET_MISS_INTERRUPT);
3037			return (EIO);
3038		}
3039
3040		/* Check again if packet is still waiting */
3041		if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3042
3043			if (!tx_buf->print_once)
3044				ena_trace(ENA_WARNING, "Found a Tx that wasn't "
3045				    "completed on time, qid %d, index %d.\n",
3046				    tx_ring->qid, i);
3047
3048			tx_buf->print_once = true;
3049			missed_tx++;
3050		}
3051	}
3052
3053	if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3054		device_printf(adapter->pdev,
3055		    "The number of lost tx completion is above the threshold "
3056		    "(%d > %d). Reset the device\n",
3057		    missed_tx, adapter->missing_tx_threshold);
3058		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3059		rc = EIO;
3060	}
3061
3062	counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3063
3064	return (rc);
3065}
3066
3067/*
3068 * Check for TX which were not completed on time.
3069 * Timeout is defined by "missing_tx_timeout".
3070 * Reset will be performed if number of incompleted
3071 * transactions exceeds "missing_tx_threshold".
3072 */
3073static void
3074check_for_missing_completions(struct ena_adapter *adapter)
3075{
3076	struct ena_ring *tx_ring;
3077	struct ena_ring *rx_ring;
3078	int i, budget, rc;
3079
3080	/* Make sure the driver doesn't turn the device in other process */
3081	rmb();
3082
3083	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3084		return;
3085
3086	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3087		return;
3088
3089	if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3090		return;
3091
3092	budget = adapter->missing_tx_max_queues;
3093
3094	for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3095		tx_ring = &adapter->tx_ring[i];
3096		rx_ring = &adapter->rx_ring[i];
3097
3098		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3099		if (unlikely(rc != 0))
3100			return;
3101
3102		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3103		if (unlikely(rc != 0))
3104			return;
3105
3106		budget--;
3107		if (budget == 0) {
3108			i++;
3109			break;
3110		}
3111	}
3112
3113	adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3114}
3115
3116/* trigger rx cleanup after 2 consecutive detections */
3117#define EMPTY_RX_REFILL 2
3118/* For the rare case where the device runs out of Rx descriptors and the
3119 * msix handler failed to refill new Rx descriptors (due to a lack of memory
3120 * for example).
3121 * This case will lead to a deadlock:
3122 * The device won't send interrupts since all the new Rx packets will be dropped
3123 * The msix handler won't allocate new Rx descriptors so the device won't be
3124 * able to send new packets.
3125 *
3126 * When such a situation is detected - execute rx cleanup task in another thread
3127 */
3128static void
3129check_for_empty_rx_ring(struct ena_adapter *adapter)
3130{
3131	struct ena_ring *rx_ring;
3132	int i, refill_required;
3133
3134	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3135		return;
3136
3137	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3138		return;
3139
3140	for (i = 0; i < adapter->num_io_queues; i++) {
3141		rx_ring = &adapter->rx_ring[i];
3142
3143		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3144		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3145			rx_ring->empty_rx_queue++;
3146
3147			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL)	{
3148				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3149				    1);
3150
3151				device_printf(adapter->pdev,
3152				    "trigger refill for ring %d\n", i);
3153
3154				taskqueue_enqueue(rx_ring->que->cleanup_tq,
3155				    &rx_ring->que->cleanup_task);
3156				rx_ring->empty_rx_queue = 0;
3157			}
3158		} else {
3159			rx_ring->empty_rx_queue = 0;
3160		}
3161	}
3162}
3163
3164static void ena_update_hints(struct ena_adapter *adapter,
3165			     struct ena_admin_ena_hw_hints *hints)
3166{
3167	struct ena_com_dev *ena_dev = adapter->ena_dev;
3168
3169	if (hints->admin_completion_tx_timeout)
3170		ena_dev->admin_queue.completion_timeout =
3171		    hints->admin_completion_tx_timeout * 1000;
3172
3173	if (hints->mmio_read_timeout)
3174		/* convert to usec */
3175		ena_dev->mmio_read.reg_read_to =
3176		    hints->mmio_read_timeout * 1000;
3177
3178	if (hints->missed_tx_completion_count_threshold_to_reset)
3179		adapter->missing_tx_threshold =
3180		    hints->missed_tx_completion_count_threshold_to_reset;
3181
3182	if (hints->missing_tx_completion_timeout) {
3183		if (hints->missing_tx_completion_timeout ==
3184		     ENA_HW_HINTS_NO_TIMEOUT)
3185			adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3186		else
3187			adapter->missing_tx_timeout =
3188			    SBT_1MS * hints->missing_tx_completion_timeout;
3189	}
3190
3191	if (hints->driver_watchdog_timeout) {
3192		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3193			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3194		else
3195			adapter->keep_alive_timeout =
3196			    SBT_1MS * hints->driver_watchdog_timeout;
3197	}
3198}
3199
3200static void
3201ena_timer_service(void *data)
3202{
3203	struct ena_adapter *adapter = (struct ena_adapter *)data;
3204	struct ena_admin_host_info *host_info =
3205	    adapter->ena_dev->host_attr.host_info;
3206
3207	check_for_missing_keep_alive(adapter);
3208
3209	check_for_admin_com_state(adapter);
3210
3211	check_for_missing_completions(adapter);
3212
3213	check_for_empty_rx_ring(adapter);
3214
3215	if (host_info != NULL)
3216		ena_update_host_info(host_info, adapter->ifp);
3217
3218	if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3219		device_printf(adapter->pdev, "Trigger reset is on\n");
3220		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3221		return;
3222	}
3223
3224	/*
3225	 * Schedule another timeout one second from now.
3226	 */
3227	callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
3228}
3229
3230void
3231ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3232{
3233	if_t ifp = adapter->ifp;
3234	struct ena_com_dev *ena_dev = adapter->ena_dev;
3235	bool dev_up;
3236
3237	if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3238		return;
3239
3240	if_link_state_change(ifp, LINK_STATE_DOWN);
3241
3242	callout_drain(&adapter->timer_service);
3243
3244	dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3245	if (dev_up)
3246		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3247
3248	if (!graceful)
3249		ena_com_set_admin_running_state(ena_dev, false);
3250
3251	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3252		ena_down(adapter);
3253
3254	/*
3255	 * Stop the device from sending AENQ events (if the device was up, and
3256	 * the trigger reset was on, ena_down already performs device reset)
3257	 */
3258	if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3259		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3260
3261	ena_free_mgmnt_irq(adapter);
3262
3263	ena_disable_msix(adapter);
3264
3265	/*
3266	 * IO rings resources should be freed because `ena_restore_device()`
3267	 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3268	 * vectors. The amount of MSIX vectors after destroy-restore may be
3269	 * different than before. Therefore, IO rings resources should be
3270	 * established from scratch each time.
3271	 */
3272	ena_free_all_io_rings_resources(adapter);
3273
3274	ena_com_abort_admin_commands(ena_dev);
3275
3276	ena_com_wait_for_abort_completion(ena_dev);
3277
3278	ena_com_admin_destroy(ena_dev);
3279
3280	ena_com_mmio_reg_read_request_destroy(ena_dev);
3281
3282	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3283
3284	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3285	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3286}
3287
3288static int
3289ena_device_validate_params(struct ena_adapter *adapter,
3290    struct ena_com_dev_get_features_ctx *get_feat_ctx)
3291{
3292
3293	if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3294	    ETHER_ADDR_LEN) != 0) {
3295		device_printf(adapter->pdev,
3296		    "Error, mac address are different\n");
3297		return (EINVAL);
3298	}
3299
3300	if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3301		device_printf(adapter->pdev,
3302		    "Error, device max mtu is smaller than ifp MTU\n");
3303		return (EINVAL);
3304	}
3305
3306	return 0;
3307}
3308
3309int
3310ena_restore_device(struct ena_adapter *adapter)
3311{
3312	struct ena_com_dev_get_features_ctx get_feat_ctx;
3313	struct ena_com_dev *ena_dev = adapter->ena_dev;
3314	if_t ifp = adapter->ifp;
3315	device_t dev = adapter->pdev;
3316	int wd_active;
3317	int rc;
3318
3319	ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3320
3321	rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3322	if (rc != 0) {
3323		device_printf(dev, "Cannot initialize device\n");
3324		goto err;
3325	}
3326	/*
3327	 * Only enable WD if it was enabled before reset, so it won't override
3328	 * value set by the user by the sysctl.
3329	 */
3330	if (adapter->wd_active != 0)
3331		adapter->wd_active = wd_active;
3332
3333	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3334	if (rc != 0) {
3335		device_printf(dev, "Validation of device parameters failed\n");
3336		goto err_device_destroy;
3337	}
3338
3339	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3340	/* Make sure we don't have a race with AENQ Links state handler */
3341	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3342		if_link_state_change(ifp, LINK_STATE_UP);
3343
3344	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3345	if (rc != 0) {
3346		device_printf(dev, "Enable MSI-X failed\n");
3347		goto err_device_destroy;
3348	}
3349
3350	/*
3351	 * Effective value of used MSIX vectors should be the same as before
3352	 * `ena_destroy_device()`, if possible, or closest to it if less vectors
3353	 * are available.
3354	 */
3355	if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3356		adapter->num_io_queues =
3357		    adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3358
3359	/* Re-initialize rings basic information */
3360	ena_init_io_rings(adapter);
3361
3362	/* If the interface was up before the reset bring it up */
3363	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3364		rc = ena_up(adapter);
3365		if (rc != 0) {
3366			device_printf(dev, "Failed to create I/O queues\n");
3367			goto err_disable_msix;
3368		}
3369	}
3370
3371	/* Indicate that device is running again and ready to work */
3372	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3373
3374	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3375		/*
3376		 * As the AENQ handlers weren't executed during reset because
3377		 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3378		 * timestamp must be updated again That will prevent next reset
3379		 * caused by missing keep alive.
3380		 */
3381		adapter->keep_alive_timestamp = getsbinuptime();
3382		callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
3383		    ena_timer_service, (void *)adapter, 0);
3384	}
3385	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3386
3387	device_printf(dev,
3388	    "Device reset completed successfully, Driver info: %s\n", ena_version);
3389
3390	return (rc);
3391
3392err_disable_msix:
3393	ena_free_mgmnt_irq(adapter);
3394	ena_disable_msix(adapter);
3395err_device_destroy:
3396	ena_com_abort_admin_commands(ena_dev);
3397	ena_com_wait_for_abort_completion(ena_dev);
3398	ena_com_admin_destroy(ena_dev);
3399	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3400	ena_com_mmio_reg_read_request_destroy(ena_dev);
3401err:
3402	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3403	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3404	device_printf(dev, "Reset attempt failed. Can not reset the device\n");
3405
3406	return (rc);
3407}
3408
3409static void
3410ena_reset_task(void *arg, int pending)
3411{
3412	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3413
3414	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3415		device_printf(adapter->pdev,
3416		    "device reset scheduled but trigger_reset is off\n");
3417		return;
3418	}
3419
3420	ENA_LOCK_LOCK(adapter);
3421	ena_destroy_device(adapter, false);
3422	ena_restore_device(adapter);
3423	ENA_LOCK_UNLOCK(adapter);
3424}
3425
3426/**
3427 * ena_attach - Device Initialization Routine
3428 * @pdev: device information struct
3429 *
3430 * Returns 0 on success, otherwise on failure.
3431 *
3432 * ena_attach initializes an adapter identified by a device structure.
3433 * The OS initialization, configuring of the adapter private structure,
3434 * and a hardware reset occur.
3435 **/
3436static int
3437ena_attach(device_t pdev)
3438{
3439	struct ena_com_dev_get_features_ctx get_feat_ctx;
3440	struct ena_llq_configurations llq_config;
3441	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3442	static int version_printed;
3443	struct ena_adapter *adapter;
3444	struct ena_com_dev *ena_dev = NULL;
3445	uint32_t max_num_io_queues;
3446	int rid, rc;
3447
3448	adapter = device_get_softc(pdev);
3449	adapter->pdev = pdev;
3450
3451	ENA_LOCK_INIT(adapter);
3452
3453	/*
3454	 * Set up the timer service - driver is responsible for avoiding
3455	 * concurrency, as the callout won't be using any locking inside.
3456	 */
3457	callout_init(&adapter->timer_service, true);
3458	adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3459	adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3460	adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3461	adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3462
3463	if (version_printed++ == 0)
3464		device_printf(pdev, "%s\n", ena_version);
3465
3466	/* Allocate memory for ena_dev structure */
3467	ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3468	    M_WAITOK | M_ZERO);
3469
3470	adapter->ena_dev = ena_dev;
3471	ena_dev->dmadev = pdev;
3472
3473	rid = PCIR_BAR(ENA_REG_BAR);
3474	adapter->memory = NULL;
3475	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3476	    &rid, RF_ACTIVE);
3477	if (unlikely(adapter->registers == NULL)) {
3478		device_printf(pdev,
3479		    "unable to allocate bus resource: registers!\n");
3480		rc = ENOMEM;
3481		goto err_dev_free;
3482	}
3483
3484	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3485	    M_WAITOK | M_ZERO);
3486
3487	/* Store register resources */
3488	((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
3489	    rman_get_bustag(adapter->registers);
3490	((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
3491	    rman_get_bushandle(adapter->registers);
3492
3493	if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
3494		device_printf(pdev, "failed to pmap registers bar\n");
3495		rc = ENXIO;
3496		goto err_bus_free;
3497	}
3498
3499	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3500
3501	/* Initially clear all the flags */
3502	ENA_FLAG_ZERO(adapter);
3503
3504	/* Device initialization */
3505	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3506	if (unlikely(rc != 0)) {
3507		device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
3508		rc = ENXIO;
3509		goto err_bus_free;
3510	}
3511
3512	set_default_llq_configurations(&llq_config);
3513
3514#if defined(__arm__) || defined(__aarch64__)
3515	/*
3516	 * Force LLQ disable, as the driver is not supporting WC enablement
3517	 * on the ARM architecture. Using LLQ without WC would affect
3518	 * performance in a negative way.
3519	 */
3520	ena_dev->supported_features &= ~(1 << ENA_ADMIN_LLQ);
3521#endif
3522	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
3523	     &llq_config);
3524	if (unlikely(rc != 0)) {
3525		device_printf(pdev, "failed to set placement policy\n");
3526		goto err_com_free;
3527	}
3528
3529	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3530		adapter->disable_meta_caching =
3531		    !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3532		    BIT(ENA_ADMIN_DISABLE_META_CACHING));
3533
3534	adapter->keep_alive_timestamp = getsbinuptime();
3535
3536	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3537
3538	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3539	    ETHER_ADDR_LEN);
3540
3541	calc_queue_ctx.pdev = pdev;
3542	calc_queue_ctx.ena_dev = ena_dev;
3543	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3544
3545	/* Calculate initial and maximum IO queue number and size */
3546	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3547	    &get_feat_ctx);
3548	rc = ena_calc_io_queue_size(&calc_queue_ctx);
3549	if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3550		rc = EFAULT;
3551		goto err_com_free;
3552	}
3553
3554	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3555	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3556	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3557	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3558	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3559	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3560
3561	adapter->max_num_io_queues = max_num_io_queues;
3562
3563	adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3564
3565	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3566
3567	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3568
3569	/* set up dma tags for rx and tx buffers */
3570	rc = ena_setup_tx_dma_tag(adapter);
3571	if (unlikely(rc != 0)) {
3572		device_printf(pdev, "Failed to create TX DMA tag\n");
3573		goto err_com_free;
3574	}
3575
3576	rc = ena_setup_rx_dma_tag(adapter);
3577	if (unlikely(rc != 0)) {
3578		device_printf(pdev, "Failed to create RX DMA tag\n");
3579		goto err_tx_tag_free;
3580	}
3581
3582	/*
3583	 * The amount of requested MSIX vectors is equal to
3584	 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3585	 * number of admin queue interrupts. The former is initially determined
3586	 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3587	 * achieved if there are not enough system resources. By default, the
3588	 * number of effectively used IO queues is the same but later on it can
3589	 * be limited by the user using sysctl interface.
3590	 */
3591	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3592	if (unlikely(rc != 0)) {
3593		device_printf(pdev,
3594		    "Failed to enable and set the admin interrupts\n");
3595		goto err_io_free;
3596	}
3597	/* By default all of allocated MSIX vectors are actively used */
3598	adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3599
3600	/* initialize rings basic information */
3601	ena_init_io_rings(adapter);
3602
3603	/* setup network interface */
3604	rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3605	if (unlikely(rc != 0)) {
3606		device_printf(pdev, "Error with network interface setup\n");
3607		goto err_msix_free;
3608	}
3609
3610	/* Initialize reset task queue */
3611	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3612	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3613	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3614	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
3615	    "%s rstq", device_get_nameunit(adapter->pdev));
3616
3617	/* Initialize statistics */
3618	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3619	    sizeof(struct ena_stats_dev));
3620	ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3621	    sizeof(struct ena_hw_stats));
3622	ena_sysctl_add_nodes(adapter);
3623
3624#ifdef DEV_NETMAP
3625	rc = ena_netmap_attach(adapter);
3626	if (rc != 0) {
3627		device_printf(pdev, "netmap attach failed: %d\n", rc);
3628		goto err_detach;
3629	}
3630#endif /* DEV_NETMAP */
3631
3632	/* Tell the stack that the interface is not active */
3633	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3634	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3635
3636	return (0);
3637
3638#ifdef DEV_NETMAP
3639err_detach:
3640	ether_ifdetach(adapter->ifp);
3641#endif /* DEV_NETMAP */
3642err_msix_free:
3643	ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3644	ena_free_mgmnt_irq(adapter);
3645	ena_disable_msix(adapter);
3646err_io_free:
3647	ena_free_all_io_rings_resources(adapter);
3648	ena_free_rx_dma_tag(adapter);
3649err_tx_tag_free:
3650	ena_free_tx_dma_tag(adapter);
3651err_com_free:
3652	ena_com_admin_destroy(ena_dev);
3653	ena_com_delete_host_info(ena_dev);
3654	ena_com_mmio_reg_read_request_destroy(ena_dev);
3655err_bus_free:
3656	free(ena_dev->bus, M_DEVBUF);
3657	ena_free_pci_resources(adapter);
3658err_dev_free:
3659	free(ena_dev, M_DEVBUF);
3660
3661	return (rc);
3662}
3663
3664/**
3665 * ena_detach - Device Removal Routine
3666 * @pdev: device information struct
3667 *
3668 * ena_detach is called by the device subsystem to alert the driver
3669 * that it should release a PCI device.
3670 **/
3671static int
3672ena_detach(device_t pdev)
3673{
3674	struct ena_adapter *adapter = device_get_softc(pdev);
3675	struct ena_com_dev *ena_dev = adapter->ena_dev;
3676	int rc;
3677
3678	/* Make sure VLANS are not using driver */
3679	if (adapter->ifp->if_vlantrunk != NULL) {
3680		device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
3681		return (EBUSY);
3682	}
3683
3684	ether_ifdetach(adapter->ifp);
3685
3686	/* Stop timer service */
3687	ENA_LOCK_LOCK(adapter);
3688	callout_drain(&adapter->timer_service);
3689	ENA_LOCK_UNLOCK(adapter);
3690
3691	/* Release reset task */
3692	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3693		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3694	taskqueue_free(adapter->reset_tq);
3695
3696	ENA_LOCK_LOCK(adapter);
3697	ena_down(adapter);
3698	ena_destroy_device(adapter, true);
3699	ENA_LOCK_UNLOCK(adapter);
3700
3701#ifdef DEV_NETMAP
3702	netmap_detach(adapter->ifp);
3703#endif /* DEV_NETMAP */
3704
3705	ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3706	    sizeof(struct ena_hw_stats));
3707	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3708	    sizeof(struct ena_stats_dev));
3709
3710	rc = ena_free_rx_dma_tag(adapter);
3711	if (unlikely(rc != 0))
3712		device_printf(adapter->pdev,
3713		    "Unmapped RX DMA tag associations\n");
3714
3715	rc = ena_free_tx_dma_tag(adapter);
3716	if (unlikely(rc != 0))
3717		device_printf(adapter->pdev,
3718		    "Unmapped TX DMA tag associations\n");
3719
3720	ena_free_irqs(adapter);
3721
3722	ena_free_pci_resources(adapter);
3723
3724	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3725		ena_com_rss_destroy(ena_dev);
3726
3727	ena_com_delete_host_info(ena_dev);
3728
3729	ENA_LOCK_DESTROY(adapter);
3730
3731	if_free(adapter->ifp);
3732
3733	if (ena_dev->bus != NULL)
3734		free(ena_dev->bus, M_DEVBUF);
3735
3736	if (ena_dev != NULL)
3737		free(ena_dev, M_DEVBUF);
3738
3739	return (bus_generic_detach(pdev));
3740}
3741
3742/******************************************************************************
3743 ******************************** AENQ Handlers *******************************
3744 *****************************************************************************/
3745/**
3746 * ena_update_on_link_change:
3747 * Notify the network interface about the change in link status
3748 **/
3749static void
3750ena_update_on_link_change(void *adapter_data,
3751    struct ena_admin_aenq_entry *aenq_e)
3752{
3753	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3754	struct ena_admin_aenq_link_change_desc *aenq_desc;
3755	int status;
3756	if_t ifp;
3757
3758	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3759	ifp = adapter->ifp;
3760	status = aenq_desc->flags &
3761	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3762
3763	if (status != 0) {
3764		device_printf(adapter->pdev, "link is UP\n");
3765		ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3766		if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3767			if_link_state_change(ifp, LINK_STATE_UP);
3768	} else {
3769		device_printf(adapter->pdev, "link is DOWN\n");
3770		if_link_state_change(ifp, LINK_STATE_DOWN);
3771		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3772	}
3773}
3774
3775static void ena_notification(void *adapter_data,
3776    struct ena_admin_aenq_entry *aenq_e)
3777{
3778	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3779	struct ena_admin_ena_hw_hints *hints;
3780
3781	ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3782	    "Invalid group(%x) expected %x\n",	aenq_e->aenq_common_desc.group,
3783	    ENA_ADMIN_NOTIFICATION);
3784
3785	switch (aenq_e->aenq_common_desc.syndrom) {
3786	case ENA_ADMIN_UPDATE_HINTS:
3787		hints =
3788		    (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
3789		ena_update_hints(adapter, hints);
3790		break;
3791	default:
3792		device_printf(adapter->pdev,
3793		    "Invalid aenq notification link state %d\n",
3794		    aenq_e->aenq_common_desc.syndrom);
3795	}
3796}
3797
3798/**
3799 * This handler will called for unknown event group or unimplemented handlers
3800 **/
3801static void
3802unimplemented_aenq_handler(void *adapter_data,
3803    struct ena_admin_aenq_entry *aenq_e)
3804{
3805	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3806
3807	device_printf(adapter->pdev,
3808	    "Unknown event was received or event with unimplemented handler\n");
3809}
3810
3811static struct ena_aenq_handlers aenq_handlers = {
3812    .handlers = {
3813	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3814	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
3815	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3816    },
3817    .unimplemented_handler = unimplemented_aenq_handler
3818};
3819
3820/*********************************************************************
3821 *  FreeBSD Device Interface Entry Points
3822 *********************************************************************/
3823
3824static device_method_t ena_methods[] = {
3825    /* Device interface */
3826    DEVMETHOD(device_probe, ena_probe),
3827    DEVMETHOD(device_attach, ena_attach),
3828    DEVMETHOD(device_detach, ena_detach),
3829    DEVMETHOD_END
3830};
3831
3832static driver_t ena_driver = {
3833    "ena", ena_methods, sizeof(struct ena_adapter),
3834};
3835
3836devclass_t ena_devclass;
3837DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
3838MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
3839    sizeof(ena_vendor_info_array[0]), nitems(ena_vendor_info_array) - 1);
3840MODULE_DEPEND(ena, pci, 1, 1, 1);
3841MODULE_DEPEND(ena, ether, 1, 1, 1);
3842#ifdef DEV_NETMAP
3843MODULE_DEPEND(ena, netmap, 1, 1, 1);
3844#endif /* DEV_NETMAP */
3845
3846/*********************************************************************/
3847