ena.h revision 361468
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * $FreeBSD: stable/11/sys/dev/ena/ena.h 361468 2020-05-25 17:47:31Z mw $
31 *
32 */
33
34#ifndef ENA_H
35#define ENA_H
36
37#include <sys/types.h>
38
39#include "ena-com/ena_com.h"
40#include "ena-com/ena_eth_com.h"
41
42#define DRV_MODULE_VER_MAJOR	2
43#define DRV_MODULE_VER_MINOR	1
44#define DRV_MODULE_VER_SUBMINOR 2
45
46#define DRV_MODULE_NAME		"ena"
47
48#ifndef DRV_MODULE_VERSION
49#define DRV_MODULE_VERSION				\
50	__XSTRING(DRV_MODULE_VER_MAJOR) "."		\
51	__XSTRING(DRV_MODULE_VER_MINOR) "."		\
52	__XSTRING(DRV_MODULE_VER_SUBMINOR)
53#endif
54#define DEVICE_NAME	"Elastic Network Adapter (ENA)"
55#define DEVICE_DESC	"ENA adapter"
56
57/* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
58#define ENA_DMA_BIT_MASK(x)		((1ULL << (x)) - 1ULL)
59
60/* 1 for AENQ + ADMIN */
61#define	ENA_ADMIN_MSIX_VEC		1
62#define	ENA_MAX_MSIX_VEC(io_queues)	(ENA_ADMIN_MSIX_VEC + (io_queues))
63
64#define	ENA_REG_BAR			0
65#define	ENA_MEM_BAR			2
66
67#define	ENA_BUS_DMA_SEGS		32
68
69#define	ENA_DEFAULT_BUF_RING_SIZE	4096
70
71#define	ENA_DEFAULT_RING_SIZE		1024
72
73/*
74 * Refill Rx queue when number of required descriptors is above
75 * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
76 */
77#define	ENA_RX_REFILL_THRESH_DIVIDER	8
78#define	ENA_RX_REFILL_THRESH_PACKET	256
79
80#define	ENA_IRQNAME_SIZE		40
81
82#define	ENA_PKT_MAX_BUFS 		19
83
84#define	ENA_RX_RSS_TABLE_LOG_SIZE	7
85#define	ENA_RX_RSS_TABLE_SIZE		(1 << ENA_RX_RSS_TABLE_LOG_SIZE)
86
87#define	ENA_HASH_KEY_SIZE		40
88
89#define	ENA_MAX_FRAME_LEN		10000
90#define	ENA_MIN_FRAME_LEN 		60
91
92#define ENA_TX_RESUME_THRESH		(ENA_PKT_MAX_BUFS + 2)
93
94#define DB_THRESHOLD	64
95
96#define TX_COMMIT	32
97 /*
98 * TX budget for cleaning. It should be half of the RX budget to reduce amount
99 *  of TCP retransmissions.
100 */
101#define TX_BUDGET	128
102/* RX cleanup budget. -1 stands for infinity. */
103#define RX_BUDGET	256
104/*
105 * How many times we can repeat cleanup in the io irq handling routine if the
106 * RX or TX budget was depleted.
107 */
108#define CLEAN_BUDGET	8
109
110#define RX_IRQ_INTERVAL 20
111#define TX_IRQ_INTERVAL 50
112
113#define	ENA_MIN_MTU		128
114
115#define	ENA_TSO_MAXSIZE		65536
116
117#define	ENA_MMIO_DISABLE_REG_READ	BIT(0)
118
119#define	ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
120
121#define	ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
122
123#define	ENA_IO_TXQ_IDX(q)		(2 * (q))
124#define	ENA_IO_RXQ_IDX(q)		(2 * (q) + 1)
125
126#define	ENA_MGMNT_IRQ_IDX		0
127#define	ENA_IO_IRQ_FIRST_IDX		1
128#define	ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
129
130#define	ENA_MAX_NO_INTERRUPT_ITERATIONS	3
131
132/*
133 * ENA device should send keep alive msg every 1 sec.
134 * We wait for 6 sec just to be on the safe side.
135 */
136#define DEFAULT_KEEP_ALIVE_TO		(SBT_1S * 6)
137
138/* Time in jiffies before concluding the transmitter is hung. */
139#define DEFAULT_TX_CMP_TO		(SBT_1S * 5)
140
141/* Number of queues to check for missing queues per timer tick */
142#define DEFAULT_TX_MONITORED_QUEUES	(4)
143
144/* Max number of timeouted packets before device reset */
145#define DEFAULT_TX_CMP_THRESHOLD	(128)
146
147/*
148 * Supported PCI vendor and devices IDs
149 */
150#define	PCI_VENDOR_ID_AMAZON	0x1d0f
151
152#define	PCI_DEV_ID_ENA_PF	0x0ec2
153#define	PCI_DEV_ID_ENA_LLQ_PF	0x1ec2
154#define	PCI_DEV_ID_ENA_VF	0xec20
155#define	PCI_DEV_ID_ENA_LLQ_VF	0xec21
156
157/*
158 * Flags indicating current ENA driver state
159 */
160enum ena_flags_t {
161	ENA_FLAG_DEVICE_RUNNING,
162	ENA_FLAG_DEV_UP,
163	ENA_FLAG_LINK_UP,
164	ENA_FLAG_MSIX_ENABLED,
165	ENA_FLAG_TRIGGER_RESET,
166	ENA_FLAG_ONGOING_RESET,
167	ENA_FLAG_DEV_UP_BEFORE_RESET,
168	ENA_FLAG_RSS_ACTIVE,
169	ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
170};
171
172BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER);
173typedef struct _ena_state ena_state_t;
174
175#define ENA_FLAG_ZERO(adapter)		\
176	BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags)
177#define ENA_FLAG_ISSET(bit, adapter)	\
178	BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
179#define ENA_FLAG_SET_ATOMIC(bit, adapter)	\
180	BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
181#define ENA_FLAG_CLEAR_ATOMIC(bit, adapter)	\
182	BIT_CLR_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
183
184struct msix_entry {
185	int entry;
186	int vector;
187};
188
189typedef struct _ena_vendor_info_t {
190	uint16_t vendor_id;
191	uint16_t device_id;
192	unsigned int index;
193} ena_vendor_info_t;
194
195struct ena_irq {
196	/* Interrupt resources */
197	struct resource *res;
198	driver_filter_t *handler;
199	void *data;
200	void *cookie;
201	unsigned int vector;
202	bool requested;
203	int cpu;
204	char name[ENA_IRQNAME_SIZE];
205};
206
207struct ena_que {
208	struct ena_adapter *adapter;
209	struct ena_ring *tx_ring;
210	struct ena_ring *rx_ring;
211
212	struct task cleanup_task;
213	struct taskqueue *cleanup_tq;
214
215	uint32_t id;
216	int cpu;
217};
218
219struct ena_calc_queue_size_ctx {
220	struct ena_com_dev_get_features_ctx *get_feat_ctx;
221	struct ena_com_dev *ena_dev;
222	device_t pdev;
223	uint16_t rx_queue_size;
224	uint16_t tx_queue_size;
225	uint16_t max_tx_sgl_size;
226	uint16_t max_rx_sgl_size;
227};
228
229#ifdef DEV_NETMAP
230struct ena_netmap_tx_info {
231	uint32_t socket_buf_idx[ENA_PKT_MAX_BUFS];
232	bus_dmamap_t map_seg[ENA_PKT_MAX_BUFS];
233	unsigned int sockets_used;
234};
235#endif
236
237struct ena_tx_buffer {
238	struct mbuf *mbuf;
239	/* # of ena desc for this specific mbuf
240	 * (includes data desc and metadata desc) */
241	unsigned int tx_descs;
242	/* # of buffers used by this mbuf */
243	unsigned int num_of_bufs;
244
245	bus_dmamap_t dmamap;
246
247	/* Used to detect missing tx packets */
248	struct bintime timestamp;
249	bool print_once;
250
251#ifdef DEV_NETMAP
252	struct ena_netmap_tx_info nm_info;
253#endif /* DEV_NETMAP */
254
255	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
256} __aligned(CACHE_LINE_SIZE);
257
258struct ena_rx_buffer {
259	struct mbuf *mbuf;
260	bus_dmamap_t map;
261	struct ena_com_buf ena_buf;
262#ifdef DEV_NETMAP
263	uint32_t netmap_buf_idx;
264#endif /* DEV_NETMAP */
265} __aligned(CACHE_LINE_SIZE);
266
267struct ena_stats_tx {
268	counter_u64_t cnt;
269	counter_u64_t bytes;
270	counter_u64_t prepare_ctx_err;
271	counter_u64_t dma_mapping_err;
272	counter_u64_t doorbells;
273	counter_u64_t missing_tx_comp;
274	counter_u64_t bad_req_id;
275	counter_u64_t collapse;
276	counter_u64_t collapse_err;
277	counter_u64_t queue_wakeup;
278	counter_u64_t queue_stop;
279	counter_u64_t llq_buffer_copy;
280};
281
282struct ena_stats_rx {
283	counter_u64_t cnt;
284	counter_u64_t bytes;
285	counter_u64_t refil_partial;
286	counter_u64_t bad_csum;
287	counter_u64_t mjum_alloc_fail;
288	counter_u64_t mbuf_alloc_fail;
289	counter_u64_t dma_mapping_err;
290	counter_u64_t bad_desc_num;
291	counter_u64_t bad_req_id;
292	counter_u64_t empty_rx_ring;
293};
294
295struct ena_ring {
296	/* Holds the empty requests for TX/RX out of order completions */
297	union {
298		uint16_t *free_tx_ids;
299		uint16_t *free_rx_ids;
300	};
301	struct ena_com_dev *ena_dev;
302	struct ena_adapter *adapter;
303	struct ena_com_io_cq *ena_com_io_cq;
304	struct ena_com_io_sq *ena_com_io_sq;
305
306	uint16_t qid;
307
308	/* Determines if device will use LLQ or normal mode for TX */
309	enum ena_admin_placement_policy_type tx_mem_queue_type;
310	union {
311		/* The maximum length the driver can push to the device (For LLQ) */
312		uint8_t tx_max_header_size;
313		/* The maximum (and default) mbuf size for the Rx descriptor. */
314		uint16_t rx_mbuf_sz;
315
316	};
317
318	bool first_interrupt;
319	uint16_t no_interrupt_event_cnt;
320
321	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
322
323	/*
324	 * Fields used for Adaptive Interrupt Modulation - to be implemented in
325	 * the future releases
326	 */
327	uint32_t  smoothed_interval;
328	enum ena_intr_moder_level moder_tbl_idx;
329
330	struct ena_que *que;
331	struct lro_ctrl lro;
332
333	uint16_t next_to_use;
334	uint16_t next_to_clean;
335
336	union {
337		struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */
338		struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */
339	};
340	int ring_size; /* number of tx/rx_buffer_info's entries */
341
342	struct buf_ring *br; /* only for TX */
343	uint32_t buf_ring_size;
344
345	struct mtx ring_mtx;
346	char mtx_name[16];
347
348	struct {
349		struct task enqueue_task;
350		struct taskqueue *enqueue_tq;
351	};
352
353	union {
354		struct ena_stats_tx tx_stats;
355		struct ena_stats_rx rx_stats;
356	};
357
358	union {
359		int empty_rx_queue;
360		/* For Tx ring to indicate if it's running or not */
361		bool running;
362	};
363
364	/* How many packets are sent in one Tx loop, used for doorbells */
365	uint32_t acum_pkts;
366
367	/* Used for LLQ */
368	uint8_t *push_buf_intermediate_buf;
369
370#ifdef DEV_NETMAP
371	bool initialized;
372#endif /* DEV_NETMAP */
373} __aligned(CACHE_LINE_SIZE);
374
375struct ena_stats_dev {
376	counter_u64_t wd_expired;
377	counter_u64_t interface_up;
378	counter_u64_t interface_down;
379	counter_u64_t admin_q_pause;
380};
381
382struct ena_hw_stats {
383	counter_u64_t rx_packets;
384	counter_u64_t tx_packets;
385
386	counter_u64_t rx_bytes;
387	counter_u64_t tx_bytes;
388
389	counter_u64_t rx_drops;
390};
391
392/* Board specific private data structure */
393struct ena_adapter {
394	struct ena_com_dev *ena_dev;
395
396	/* OS defined structs */
397	if_t ifp;
398	device_t pdev;
399	struct ifmedia	media;
400
401	/* OS resources */
402	struct resource *memory;
403	struct resource *registers;
404
405	struct mtx global_mtx;
406	struct sx ioctl_sx;
407
408	/* MSI-X */
409	struct msix_entry *msix_entries;
410	int msix_vecs;
411
412	/* DMA tags used throughout the driver adapter for Tx and Rx */
413	bus_dma_tag_t tx_buf_tag;
414	bus_dma_tag_t rx_buf_tag;
415	int dma_width;
416
417	uint32_t max_mtu;
418
419	uint16_t max_tx_sgl_size;
420	uint16_t max_rx_sgl_size;
421
422	uint32_t tx_offload_cap;
423
424	/* Tx fast path data */
425	int num_queues;
426
427	unsigned int tx_ring_size;
428	unsigned int rx_ring_size;
429
430	uint16_t buf_ring_size;
431
432	/* RSS*/
433	uint8_t	rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
434
435	uint8_t mac_addr[ETHER_ADDR_LEN];
436	/* mdio and phy*/
437
438	ena_state_t flags;
439
440	/* Queue will represent one TX and one RX ring */
441	struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
442	    __aligned(CACHE_LINE_SIZE);
443
444	/* TX */
445	struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
446	    __aligned(CACHE_LINE_SIZE);
447
448	/* RX */
449	struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
450	    __aligned(CACHE_LINE_SIZE);
451
452	struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
453
454	/* Timer service */
455	struct callout timer_service;
456	sbintime_t keep_alive_timestamp;
457	uint32_t next_monitored_tx_qid;
458	struct task reset_task;
459	struct taskqueue *reset_tq;
460	int wd_active;
461	sbintime_t keep_alive_timeout;
462	sbintime_t missing_tx_timeout;
463	uint32_t missing_tx_max_queues;
464	uint32_t missing_tx_threshold;
465
466	/* Statistics */
467	struct ena_stats_dev dev_stats;
468	struct ena_hw_stats hw_stats;
469
470	enum ena_regs_reset_reason_types reset_reason;
471};
472
473#define	ENA_RING_MTX_LOCK(_ring)		mtx_lock(&(_ring)->ring_mtx)
474#define	ENA_RING_MTX_TRYLOCK(_ring)		mtx_trylock(&(_ring)->ring_mtx)
475#define	ENA_RING_MTX_UNLOCK(_ring)		mtx_unlock(&(_ring)->ring_mtx)
476
477static inline int ena_mbuf_count(struct mbuf *mbuf)
478{
479	int count = 1;
480
481	while ((mbuf = mbuf->m_next) != NULL)
482		++count;
483
484	return count;
485}
486
487int	ena_up(struct ena_adapter *);
488void	ena_down(struct ena_adapter *);
489int	ena_restore_device(struct ena_adapter *);
490void	ena_destroy_device(struct ena_adapter *, bool);
491int	ena_refill_rx_bufs(struct ena_ring *, uint32_t);
492
493static inline int
494validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
495{
496	if (likely(req_id < rx_ring->ring_size))
497		return (0);
498
499	device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
500	    req_id);
501	counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
502
503	/* Trigger device reset */
504	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter))) {
505		rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
506		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter);
507	}
508
509	return (EFAULT);
510}
511
512#endif /* !(ENA_H) */
513