1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/param.h>
32#include "opt_rss.h"
33
34#include "ena_rss.h"
35#include "ena_sysctl.h"
36
37static void ena_sysctl_add_wd(struct ena_adapter *);
38static void ena_sysctl_add_stats(struct ena_adapter *);
39static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
40static void ena_sysctl_add_customer_metrics(struct ena_adapter *);
41static void ena_sysctl_add_srd_info(struct ena_adapter *);
42static void ena_sysctl_add_tuneables(struct ena_adapter *);
43static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
44/* Kernel option RSS prevents manipulation of key hash and indirection table. */
45#ifndef RSS
46static void ena_sysctl_add_rss(struct ena_adapter *);
47#endif
48static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
49static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
50static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
51static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
52static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
53static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS);
54#ifndef RSS
55static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
56static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
57#endif
58
59/* Limit max ENA sample rate to be an hour. */
60#define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600
61#define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
62
63#define SYSCTL_GSTRING_LEN 128
64
65#define ENA_METRIC_ENI_ENTRY(stat, desc) { \
66        .name = #stat, \
67        .description = #desc, \
68}
69
70#define ENA_STAT_ENTRY(stat, desc, stat_type) { \
71        .name = #stat, \
72        .description = #desc, \
73        .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \
74}
75
76#define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \
77	ENA_STAT_ENTRY(stat, desc, ena_srd_stats)
78
79struct ena_hw_metrics {
80        char name[SYSCTL_GSTRING_LEN];
81        char description[SYSCTL_GSTRING_LEN];
82};
83
84struct ena_srd_metrics {
85        char name[SYSCTL_GSTRING_LEN];
86        char description[SYSCTL_GSTRING_LEN];
87        int stat_offset;
88};
89
90static const struct ena_srd_metrics ena_srd_stats_strings[] = {
91        ENA_STAT_ENA_SRD_ENTRY(
92	    ena_srd_tx_pkts, Number of packets transmitted over ENA SRD),
93        ENA_STAT_ENA_SRD_ENTRY(
94	    ena_srd_eligible_tx_pkts, Number of packets transmitted or could
95	    have been transmitted over ENA SRD),
96        ENA_STAT_ENA_SRD_ENTRY(
97	    ena_srd_rx_pkts, Number of packets received over ENA SRD),
98        ENA_STAT_ENA_SRD_ENTRY(
99	    ena_srd_resource_utilization, Percentage of the ENA SRD resources
100	    that are in use),
101};
102
103static const struct ena_hw_metrics ena_hw_stats_strings[] = {
104        ENA_METRIC_ENI_ENTRY(
105	    bw_in_allowance_exceeded, Inbound BW allowance exceeded),
106        ENA_METRIC_ENI_ENTRY(
107	    bw_out_allowance_exceeded, Outbound BW allowance exceeded),
108        ENA_METRIC_ENI_ENTRY(
109	    pps_allowance_exceeded, PPS allowance exceeded),
110        ENA_METRIC_ENI_ENTRY(
111	    conntrack_allowance_exceeded, Connection tracking allowance exceeded),
112        ENA_METRIC_ENI_ENTRY(
113	    linklocal_allowance_exceeded, Linklocal packet rate allowance),
114        ENA_METRIC_ENI_ENTRY(
115	    conntrack_allowance_available, Number of available conntracks),
116};
117
118#ifndef ARRAY_SIZE
119#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
120#endif
121
122#define ENA_CUSTOMER_METRICS_ARRAY_SIZE      ARRAY_SIZE(ena_hw_stats_strings)
123#define ENA_SRD_METRICS_ARRAY_SIZE           ARRAY_SIZE(ena_srd_stats_strings)
124
125static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
126    "ENA driver parameters");
127
128/*
129 * Logging level for changing verbosity of the output
130 */
131int ena_log_level = ENA_INFO;
132SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
133    "Logging level indicating verbosity of the logs");
134
135SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
136    ENA_DRV_MODULE_VERSION, "ENA driver version");
137
138/*
139 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
140 * Using 9k mbufs in low memory conditions might cause allocation to take a lot
141 * of time and lead to the OS instability as it needs to look for the contiguous
142 * pages.
143 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
144 * the network performance is the priority, the 9k mbufs can be used.
145 */
146int ena_enable_9k_mbufs = 0;
147SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
148    &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
149
150/*
151 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
152 * false. This option may be important for platforms, which often handle packet
153 * headers on Tx with total header size greater than 96B, as it may
154 * reduce the latency.
155 * It also reduces the maximum Tx queue size by half, so it may cause more Tx
156 * packet drops.
157 */
158bool ena_force_large_llq_header = false;
159SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
160    &ena_force_large_llq_header, 0,
161    "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
162
163int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
164
165int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter)
166{
167	int rc = 0;
168
169	adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE),
170	    M_DEVBUF, M_NOWAIT | M_ZERO);
171	if (unlikely(adapter->customer_metrics_array == NULL))
172		rc = ENOMEM;
173
174	return rc;
175}
176void
177ena_sysctl_add_nodes(struct ena_adapter *adapter)
178{
179	struct ena_com_dev *dev = adapter->ena_dev;
180
181	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS))
182		ena_sysctl_add_customer_metrics(adapter);
183	else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS))
184		ena_sysctl_add_eni_metrics(adapter);
185
186	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
187		ena_sysctl_add_srd_info(adapter);
188
189	ena_sysctl_add_wd(adapter);
190	ena_sysctl_add_stats(adapter);
191	ena_sysctl_add_tuneables(adapter);
192	ena_sysctl_add_irq_affinity(adapter);
193#ifndef RSS
194	ena_sysctl_add_rss(adapter);
195#endif
196}
197
198static void
199ena_sysctl_add_wd(struct ena_adapter *adapter)
200{
201	device_t dev;
202
203	struct sysctl_ctx_list *ctx;
204	struct sysctl_oid *tree;
205	struct sysctl_oid_list *child;
206
207	dev = adapter->pdev;
208
209	ctx = device_get_sysctl_ctx(dev);
210	tree = device_get_sysctl_tree(dev);
211	child = SYSCTL_CHILDREN(tree);
212
213	/* Sysctl calls for Watchdog service */
214	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
215	    &adapter->wd_active, 0, "Watchdog is active");
216
217	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
218	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
219	    "Timeout for Keep Alive messages");
220
221	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
222	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
223	    "Timeout for TX completion");
224
225	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
226	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
227	    "Number of TX queues to check per run");
228
229	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
230	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
231	    "Max number of timeouted packets");
232}
233
234static void
235ena_sysctl_add_stats(struct ena_adapter *adapter)
236{
237	device_t dev;
238
239	struct ena_ring *tx_ring;
240	struct ena_ring *rx_ring;
241
242	struct ena_hw_stats *hw_stats;
243	struct ena_stats_dev *dev_stats;
244	struct ena_stats_tx *tx_stats;
245	struct ena_stats_rx *rx_stats;
246	struct ena_com_stats_admin *admin_stats;
247
248	struct sysctl_ctx_list *ctx;
249	struct sysctl_oid *tree;
250	struct sysctl_oid_list *child;
251
252	struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
253	struct sysctl_oid *admin_node;
254	struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
255	struct sysctl_oid_list *admin_list;
256
257#define QUEUE_NAME_LEN 32
258	char namebuf[QUEUE_NAME_LEN];
259	int i;
260
261	dev = adapter->pdev;
262
263	ctx = device_get_sysctl_ctx(dev);
264	tree = device_get_sysctl_tree(dev);
265	child = SYSCTL_CHILDREN(tree);
266
267	tx_ring = adapter->tx_ring;
268	rx_ring = adapter->rx_ring;
269
270	hw_stats = &adapter->hw_stats;
271	dev_stats = &adapter->dev_stats;
272	admin_stats = &adapter->ena_dev->admin_queue.stats;
273
274	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
275	    &dev_stats->wd_expired, "Watchdog expiry count");
276	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
277	    &dev_stats->interface_up, "Network interface up count");
278	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
279	    CTLFLAG_RD, &dev_stats->interface_down,
280	    "Network interface down count");
281	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
282	    CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses");
283
284	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
285		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
286
287		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
288		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
289		queue_list = SYSCTL_CHILDREN(queue_node);
290
291		adapter->que[i].oid = queue_node;
292
293#ifdef RSS
294		/* Common stats */
295		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
296		    &adapter->que[i].cpu, 0, "CPU affinity");
297		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
298		    &adapter->que[i].domain, 0, "NUMA domain");
299#endif
300
301		/* TX specific stats */
302		tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
303		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
304		tx_list = SYSCTL_CHILDREN(tx_node);
305
306		tx_stats = &tx_ring->tx_stats;
307
308		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
309		    CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
310		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
311		    CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
312		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
313		    "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
314		    "TX buffer preparation failures");
315		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
316		    "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
317		    "DMA mapping failures");
318		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
319		    CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
320		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
321		    "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
322		    "TX completions missed");
323		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
324		    CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
325		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
326		    CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
327		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
328		    "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
329		    "Mbuf collapse failures");
330		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
331		    CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
332		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
333		    CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
334		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
335		    "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
336		    "Header copies for llq transaction");
337		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
338		    "unmask_interrupt_num", CTLFLAG_RD,
339		    &tx_stats->unmask_interrupt_num,
340		    "Unmasked interrupt count");
341
342		/* RX specific stats */
343		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
344		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
345		rx_list = SYSCTL_CHILDREN(rx_node);
346
347		rx_stats = &rx_ring->rx_stats;
348
349		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
350		    CTLFLAG_RD, &rx_stats->cnt, "Packets received");
351		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
352		    CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
353		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
354		    CTLFLAG_RD, &rx_stats->refil_partial,
355		    "Partial refilled mbufs");
356		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
357		    CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
358		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
359		    "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
360		    "Failed mbuf allocs");
361		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
362		    "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
363		    "Failed jumbo mbuf allocs");
364		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
365		    "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
366		    "DMA mapping errors");
367		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
368		    CTLFLAG_RD, &rx_stats->bad_desc_num,
369		    "Bad descriptor count");
370		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
371		    CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
372		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
373		    CTLFLAG_RD, &rx_stats->empty_rx_ring,
374		    "RX descriptors depletion count");
375		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
376		    CTLFLAG_RD, &rx_stats->csum_good,
377		    "Valid RX checksum calculations");
378	}
379
380	/* Stats read from device */
381	hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
382	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
383	hw_list = SYSCTL_CHILDREN(hw_node);
384
385	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
386	    &hw_stats->rx_packets, "Packets received");
387	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
388	    &hw_stats->tx_packets, "Packets transmitted");
389	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
390	    &hw_stats->rx_bytes, "Bytes received");
391	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
392	    &hw_stats->tx_bytes, "Bytes transmitted");
393	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
394	    &hw_stats->rx_drops, "Receive packet drops");
395	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
396	    &hw_stats->tx_drops, "Transmit packet drops");
397
398	/* ENA Admin queue stats */
399	admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
400	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
401	admin_list = SYSCTL_CHILDREN(admin_node);
402
403	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
404	    &admin_stats->aborted_cmd, 0, "Aborted commands");
405	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
406	    &admin_stats->submitted_cmd, 0, "Submitted commands");
407	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
408	    &admin_stats->completed_cmd, 0, "Completed commands");
409	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
410	    &admin_stats->out_of_space, 0, "Queue out of space");
411	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
412	    &admin_stats->no_completion, 0, "Commands not completed");
413}
414
415static void
416ena_sysctl_add_srd_info(struct ena_adapter *adapter)
417{
418	device_t dev;
419
420	struct sysctl_oid *ena_srd_info;
421	struct sysctl_oid_list *srd_list;
422
423	struct sysctl_ctx_list *ctx;
424	struct sysctl_oid *tree;
425	struct sysctl_oid_list *child;
426
427	struct ena_admin_ena_srd_stats *srd_stats_ptr;
428	struct ena_srd_metrics cur_stat_strings;
429
430	int i;
431
432	dev = adapter->pdev;
433
434	ctx = device_get_sysctl_ctx(dev);
435	tree = device_get_sysctl_tree(dev);
436	child = SYSCTL_CHILDREN(tree);
437
438	ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info",
439	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information");
440	srd_list = SYSCTL_CHILDREN(ena_srd_info);
441
442	SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode",
443            CTLFLAG_RD, &adapter->ena_srd_info.flags, 0,
444            "Describes which ENA-express features are enabled");
445
446	srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats;
447
448	for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) {
449		cur_stat_strings = ena_srd_stats_strings[i];
450		SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name,
451		    CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset,
452		    0, cur_stat_strings.description);
453	}
454}
455
456static void
457ena_sysctl_add_customer_metrics(struct ena_adapter *adapter)
458{
459	device_t dev;
460	struct ena_com_dev *ena_dev;
461
462	struct sysctl_ctx_list *ctx;
463	struct sysctl_oid *tree;
464	struct sysctl_oid_list *child;
465
466	struct sysctl_oid *customer_metric;
467	struct sysctl_oid_list *customer_list;
468
469	int i;
470
471	dev = adapter->pdev;
472	ena_dev = adapter->ena_dev;
473
474	ctx = device_get_sysctl_ctx(dev);
475	tree = device_get_sysctl_tree(dev);
476	child = SYSCTL_CHILDREN(tree);
477	customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics",
478	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics");
479	customer_list = SYSCTL_CHILDREN(customer_metric);
480
481	for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) {
482	        if (ena_com_get_customer_metric_support(ena_dev, i)) {
483	                SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name,
484	                    CTLFLAG_RD, &adapter->customer_metrics_array[i], 0,
485	                    ena_hw_stats_strings[i].description);
486	         }
487	 }
488}
489
490static void
491ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
492{
493	device_t dev;
494	struct ena_admin_eni_stats *eni_metrics;
495
496	struct sysctl_ctx_list *ctx;
497	struct sysctl_oid *tree;
498	struct sysctl_oid_list *child;
499
500	struct sysctl_oid *eni_node;
501	struct sysctl_oid_list *eni_list;
502
503	dev = adapter->pdev;
504
505	ctx = device_get_sysctl_ctx(dev);
506	tree = device_get_sysctl_tree(dev);
507	child = SYSCTL_CHILDREN(tree);
508
509	eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
510	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
511	eni_list = SYSCTL_CHILDREN(eni_node);
512
513	eni_metrics = &adapter->eni_metrics;
514
515	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
516	    CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
517	    "Inbound BW allowance exceeded");
518	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
519	    CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
520	    "Outbound BW allowance exceeded");
521	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
522	    CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
523	    "PPS allowance exceeded");
524	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
525	    CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
526	    "Connection tracking allowance exceeded");
527	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
528	    CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
529	    "Linklocal packet rate allowance exceeded");
530}
531
532static void
533ena_sysctl_add_tuneables(struct ena_adapter *adapter)
534{
535	device_t dev;
536
537	struct sysctl_ctx_list *ctx;
538	struct sysctl_oid *tree;
539	struct sysctl_oid_list *child;
540
541	dev = adapter->pdev;
542
543	ctx = device_get_sysctl_ctx(dev);
544	tree = device_get_sysctl_tree(dev);
545	child = SYSCTL_CHILDREN(tree);
546
547	/* Tuneable number of buffers in the buf-ring (drbr) */
548	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
549	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
550	    ena_sysctl_buf_ring_size, "I",
551	    "Size of the Tx buffer ring (drbr).");
552
553	/* Tuneable number of the Rx ring size */
554	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
555	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
556	    ena_sysctl_rx_queue_size, "I",
557	    "Size of the Rx ring. The size should be a power of 2.");
558
559	/* Tuneable number of IO queues */
560	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
561	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
562	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
563
564	/*
565	 * Tuneable, which determines how often ENA metrics will be read.
566	 * 0 means it's turned off. Maximum allowed value is limited by:
567	 * ENA_METRICS_MAX_SAMPLE_INTERVAL.
568	 */
569	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval",
570	    CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
571	    ena_sysctl_metrics_interval, "SU",
572	    "Interval in seconds for updating Netword interface metrics. 0 turns off the update.");
573}
574
575/* Kernel option RSS prevents manipulation of key hash and indirection table. */
576#ifndef RSS
577static void
578ena_sysctl_add_rss(struct ena_adapter *adapter)
579{
580	device_t dev;
581
582	struct sysctl_ctx_list *ctx;
583	struct sysctl_oid *tree;
584	struct sysctl_oid_list *child;
585
586	dev = adapter->pdev;
587
588	ctx = device_get_sysctl_ctx(dev);
589	tree = device_get_sysctl_tree(dev);
590	child = SYSCTL_CHILDREN(tree);
591
592	/* RSS options */
593	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
594	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
595	child = SYSCTL_CHILDREN(tree);
596
597	/* RSS hash key */
598	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
599	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
600	    ena_sysctl_rss_key, "A", "RSS key.");
601
602	/* Tuneable RSS indirection table */
603	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
604	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
605	    ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
606
607	/* RSS indirection table size */
608	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
609	    CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
610	    "RSS indirection table size.");
611}
612#endif /* RSS */
613
614static void
615ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
616{
617	device_t dev;
618
619	struct sysctl_ctx_list *ctx;
620	struct sysctl_oid *tree;
621	struct sysctl_oid_list *child;
622
623	dev = adapter->pdev;
624
625	ctx = device_get_sysctl_ctx(dev);
626	tree = device_get_sysctl_tree(dev);
627	child = SYSCTL_CHILDREN(tree);
628
629	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
630	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
631	child = SYSCTL_CHILDREN(tree);
632
633	/* Add base cpu leaf */
634	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
635	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
636	    ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
637
638	/* Add cpu stride leaf */
639	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
640	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
641	    ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
642}
643
644
645/*
646 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
647 *
648 * Whether the nodes are registered or unregistered depends on a delta between
649 * the `old` and `new` parameters, representing the number of queues.
650 *
651 * This function is used to hide sysctl attributes for queue nodes which aren't
652 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
653 *
654 * NOTE:
655 * All unregistered nodes must be registered again at detach, i.e. by a call to
656 * this function.
657 */
658void
659ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
660{
661	struct sysctl_oid *oid;
662	int min, max, i;
663
664	min = MIN(old, new);
665	max = MIN(MAX(old, new), adapter->max_num_io_queues);
666
667	for (i = min; i < max; ++i) {
668		oid = adapter->que[i].oid;
669
670		sysctl_wlock();
671		if (old > new)
672			sysctl_unregister_oid(oid);
673		else
674			sysctl_register_oid(oid);
675		sysctl_wunlock();
676	}
677}
678
679static int
680ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
681{
682	struct ena_adapter *adapter = arg1;
683	uint32_t val;
684	int error;
685
686	ENA_LOCK_LOCK();
687	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
688		error = EINVAL;
689		goto unlock;
690	}
691
692	val = 0;
693	error = sysctl_wire_old_buffer(req, sizeof(val));
694	if (error == 0) {
695		val = adapter->buf_ring_size;
696		error = sysctl_handle_32(oidp, &val, 0, req);
697	}
698	if (error != 0 || req->newptr == NULL)
699		goto unlock;
700
701	if (!powerof2(val) || val == 0) {
702		ena_log(adapter->pdev, ERR,
703		    "Requested new Tx buffer ring size (%u) is not a power of 2\n",
704		    val);
705		error = EINVAL;
706		goto unlock;
707	}
708
709	if (val != adapter->buf_ring_size) {
710		ena_log(adapter->pdev, INFO,
711		    "Requested new Tx buffer ring size: %d. Old size: %d\n",
712		    val, adapter->buf_ring_size);
713
714		error = ena_update_buf_ring_size(adapter, val);
715	} else {
716		ena_log(adapter->pdev, ERR,
717		    "New Tx buffer ring size is the same as already used: %u\n",
718		    adapter->buf_ring_size);
719	}
720
721unlock:
722	ENA_LOCK_UNLOCK();
723
724	return (error);
725}
726
727static int
728ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
729{
730	struct ena_adapter *adapter = arg1;
731	uint32_t val;
732	int error;
733
734	ENA_LOCK_LOCK();
735	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
736		error = EINVAL;
737		goto unlock;
738	}
739
740	val = 0;
741	error = sysctl_wire_old_buffer(req, sizeof(val));
742	if (error == 0) {
743		val = adapter->requested_rx_ring_size;
744		error = sysctl_handle_32(oidp, &val, 0, req);
745	}
746	if (error != 0 || req->newptr == NULL)
747		goto unlock;
748
749	if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
750		ena_log(adapter->pdev, ERR,
751		    "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
752		    val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
753		error = EINVAL;
754		goto unlock;
755	}
756
757	/* Check if the parameter is power of 2 */
758	if (!powerof2(val)) {
759		ena_log(adapter->pdev, ERR,
760		    "Requested new Rx queue size (%u) is not a power of 2\n",
761		    val);
762		error = EINVAL;
763		goto unlock;
764	}
765
766	if (val != adapter->requested_rx_ring_size) {
767		ena_log(adapter->pdev, INFO,
768		    "Requested new Rx queue size: %u. Old size: %u\n", val,
769		    adapter->requested_rx_ring_size);
770
771		error = ena_update_queue_size(adapter,
772		    adapter->requested_tx_ring_size, val);
773	} else {
774		ena_log(adapter->pdev, ERR,
775		    "New Rx queue size is the same as already used: %u\n",
776		    adapter->requested_rx_ring_size);
777	}
778
779unlock:
780	ENA_LOCK_UNLOCK();
781
782	return (error);
783}
784
785/*
786 * Change number of effectively used IO queues adapter->num_io_queues
787 */
788static int
789ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
790{
791	struct ena_adapter *adapter = arg1;
792	uint32_t old_num_queues, tmp = 0;
793	int error;
794
795	ENA_LOCK_LOCK();
796	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
797		error = EINVAL;
798		goto unlock;
799	}
800
801	error = sysctl_wire_old_buffer(req, sizeof(tmp));
802	if (error == 0) {
803		tmp = adapter->num_io_queues;
804		error = sysctl_handle_int(oidp, &tmp, 0, req);
805	}
806	if (error != 0 || req->newptr == NULL)
807		goto unlock;
808
809	if (tmp == 0) {
810		ena_log(adapter->pdev, ERR,
811		    "Requested number of IO queues is zero\n");
812		error = EINVAL;
813		goto unlock;
814	}
815
816	/*
817	 * The adapter::max_num_io_queues is the HW capability. The system
818	 * resources availability may potentially be a tighter limit. Therefore
819	 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
820	 * always holds true, while the `adapter::msix_vecs` is variable across
821	 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
822	 */
823	if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
824		ena_log(adapter->pdev, ERR,
825		    "Requested number of IO queues is higher than maximum allowed (%u)\n",
826		    adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
827		error = EINVAL;
828		goto unlock;
829	}
830	if (tmp == adapter->num_io_queues) {
831		ena_log(adapter->pdev, ERR,
832		    "Requested number of IO queues is equal to current value "
833		    "(%u)\n",
834		    adapter->num_io_queues);
835	} else {
836		ena_log(adapter->pdev, INFO,
837		    "Requested new number of IO queues: %u, current value: "
838		    "%u\n",
839		    tmp, adapter->num_io_queues);
840
841		old_num_queues = adapter->num_io_queues;
842		error = ena_update_io_queue_nb(adapter, tmp);
843		if (error != 0)
844			return (error);
845
846		ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
847	}
848
849unlock:
850	ENA_LOCK_UNLOCK();
851
852	return (error);
853}
854
855static int
856ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS)
857{
858	struct ena_adapter *adapter = arg1;
859	uint16_t interval;
860	int error;
861
862	ENA_LOCK_LOCK();
863	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
864		error = EINVAL;
865		goto unlock;
866	}
867
868	error = sysctl_wire_old_buffer(req, sizeof(interval));
869	if (error == 0) {
870		interval = adapter->metrics_sample_interval;
871		error = sysctl_handle_16(oidp, &interval, 0, req);
872	}
873	if (error != 0 || req->newptr == NULL)
874		goto unlock;
875
876	if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) {
877		ena_log(adapter->pdev, ERR,
878		    "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n",
879		    ENA_METRICS_MAX_SAMPLE_INTERVAL);
880		error = EINVAL;
881		goto unlock;
882	}
883
884	if (interval == 0) {
885		ena_log(adapter->pdev, INFO,
886		    "ENA metrics update is now turned off\n");
887		bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
888	} else {
889		ena_log(adapter->pdev, INFO,
890		    "ENA metrics update interval is set to: %" PRIu16
891		    " seconds\n",
892		    interval);
893	}
894
895	adapter->metrics_sample_interval = interval;
896
897unlock:
898	ENA_LOCK_UNLOCK();
899
900	return (0);
901}
902
903static int
904ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
905{
906	struct ena_adapter *adapter = arg1;
907	int irq_base_cpu = 0;
908	int error;
909
910	ENA_LOCK_LOCK();
911	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
912		error = ENODEV;
913		goto unlock;
914	}
915
916	error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
917	if (error == 0) {
918		irq_base_cpu = adapter->irq_cpu_base;
919		error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
920	}
921	if (error != 0 || req->newptr == NULL)
922		goto unlock;
923
924	if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
925		ena_log(adapter->pdev, ERR,
926		    "Requested base CPU is less than zero.\n");
927		error = EINVAL;
928		goto unlock;
929	}
930
931	if (irq_base_cpu > mp_ncpus) {
932		ena_log(adapter->pdev, INFO,
933		    "Requested base CPU is larger than the number of available CPUs. \n");
934		error = EINVAL;
935		goto unlock;
936
937	}
938
939	if (irq_base_cpu == adapter->irq_cpu_base) {
940		ena_log(adapter->pdev, INFO,
941		    "Requested IRQ base CPU is equal to current value "
942		    "(%d)\n",
943		    adapter->irq_cpu_base);
944		goto unlock;
945	}
946
947	ena_log(adapter->pdev, INFO,
948	    "Requested new IRQ base CPU: %d, current value: %d\n",
949	    irq_base_cpu, adapter->irq_cpu_base);
950
951	error = ena_update_base_cpu(adapter, irq_base_cpu);
952
953unlock:
954	ENA_LOCK_UNLOCK();
955
956	return (error);
957}
958
959static int
960ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
961{
962	struct ena_adapter *adapter = arg1;
963	int32_t irq_cpu_stride = 0;
964	int error;
965
966	ENA_LOCK_LOCK();
967	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
968		error = ENODEV;
969		goto unlock;
970	}
971
972	error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
973	if (error == 0) {
974		irq_cpu_stride = adapter->irq_cpu_stride;
975		error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
976	}
977	if (error != 0 || req->newptr == NULL)
978		goto unlock;
979
980	if (irq_cpu_stride < 0) {
981		ena_log(adapter->pdev, ERR,
982		    "Requested IRQ stride is less than zero.\n");
983		error = EINVAL;
984		goto unlock;
985	}
986
987	if (irq_cpu_stride > mp_ncpus) {
988		ena_log(adapter->pdev, INFO,
989		    "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
990	}
991
992	if (irq_cpu_stride == adapter->irq_cpu_stride) {
993		ena_log(adapter->pdev, INFO,
994		    "Requested IRQ CPU stride is equal to current value "
995		    "(%u)\n",
996		    adapter->irq_cpu_stride);
997		goto unlock;
998	}
999
1000	ena_log(adapter->pdev, INFO,
1001	    "Requested new IRQ CPU stride: %u, current value: %u\n",
1002	    irq_cpu_stride, adapter->irq_cpu_stride);
1003
1004	error = ena_update_cpu_stride(adapter, irq_cpu_stride);
1005	if (error != 0)
1006		goto unlock;
1007
1008unlock:
1009	ENA_LOCK_UNLOCK();
1010
1011	return (error);
1012}
1013
1014#ifndef RSS
1015/*
1016 * Change the Receive Side Scaling hash key.
1017 */
1018static int
1019ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
1020{
1021	struct ena_adapter *adapter = arg1;
1022	struct ena_com_dev *ena_dev = adapter->ena_dev;
1023	enum ena_admin_hash_functions ena_func;
1024	char msg[ENA_HASH_KEY_MSG_SIZE];
1025	char elem[3] = { 0 };
1026	char *endp;
1027	u8 rss_key[ENA_HASH_KEY_SIZE];
1028	int error, i;
1029
1030	ENA_LOCK_LOCK();
1031	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1032		error = EINVAL;
1033		goto unlock;
1034	}
1035
1036	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1037		error = ENOTSUP;
1038		goto unlock;
1039	}
1040
1041	error = sysctl_wire_old_buffer(req, sizeof(msg));
1042	if (error != 0)
1043		goto unlock;
1044
1045	error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
1046	if (error != 0) {
1047		device_printf(adapter->pdev, "Cannot get hash function\n");
1048		goto unlock;
1049	}
1050
1051	if (ena_func != ENA_ADMIN_TOEPLITZ) {
1052		error = EINVAL;
1053		device_printf(adapter->pdev, "Unsupported hash algorithm\n");
1054		goto unlock;
1055	}
1056
1057	error = ena_rss_get_hash_key(ena_dev, rss_key);
1058	if (error != 0) {
1059		device_printf(adapter->pdev, "Cannot get hash key\n");
1060		goto unlock;
1061	}
1062
1063	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
1064		snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
1065
1066	error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
1067	if (error != 0 || req->newptr == NULL)
1068		goto unlock;
1069
1070	if (strlen(msg) != sizeof(msg) - 1) {
1071		error = EINVAL;
1072		device_printf(adapter->pdev, "Invalid key size\n");
1073		goto unlock;
1074	}
1075
1076	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
1077		strncpy(elem, &msg[i * 2], 2);
1078		rss_key[i] = strtol(elem, &endp, 16);
1079
1080		/* Both hex nibbles in the string must be valid to continue. */
1081		if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
1082			error = EINVAL;
1083			device_printf(adapter->pdev,
1084			    "Invalid key hex value: '%c'\n", *endp);
1085			goto unlock;
1086		}
1087	}
1088
1089	error = ena_rss_set_hash(ena_dev, rss_key);
1090	if (error != 0)
1091		device_printf(adapter->pdev, "Cannot fill hash key\n");
1092
1093unlock:
1094	ENA_LOCK_UNLOCK();
1095
1096	return (error);
1097}
1098
1099/*
1100 * Change the Receive Side Scaling indirection table.
1101 *
1102 * The sysctl entry string consists of one or more `x:y` keypairs, where
1103 * x stands for the table index and y for its new value.
1104 * Table indices that don't need to be updated can be omitted from the string
1105 * and will retain their existing values. If an index is entered more than once,
1106 * the last value is used.
1107 *
1108 * Example:
1109 * To update two selected indices in the RSS indirection table, e.g. setting
1110 * index 0 to queue 5 and then index 5 to queue 0, the below command should be
1111 * used:
1112 *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
1113 */
1114static int
1115ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
1116{
1117	int num_queues, error;
1118	struct ena_adapter *adapter = arg1;
1119	struct ena_indir *indir;
1120	char *msg, *buf, *endp;
1121	uint32_t idx, value;
1122
1123	ENA_LOCK_LOCK();
1124	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1125		error = EINVAL;
1126		goto unlock;
1127	}
1128
1129	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1130		error = ENOTSUP;
1131		goto unlock;
1132	}
1133
1134	indir = adapter->rss_indir;
1135	msg = indir->sysctl_buf;
1136
1137	if (unlikely(indir == NULL)) {
1138		error = ENOTSUP;
1139		goto unlock;
1140	}
1141
1142	error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
1143	if (error != 0 || req->newptr == NULL)
1144		goto unlock;
1145
1146	num_queues = adapter->num_io_queues;
1147
1148	/*
1149	 * This sysctl expects msg to be a list of `x:y` record pairs,
1150	 * where x is the indirection table index and y is its value.
1151	 */
1152	for (buf = msg; *buf != '\0'; buf = endp) {
1153		idx = strtol(buf, &endp, 10);
1154
1155		if (endp == buf || idx < 0) {
1156			device_printf(adapter->pdev, "Invalid index: %s\n",
1157			    buf);
1158			error = EINVAL;
1159			break;
1160		}
1161
1162		if (idx >= ENA_RX_RSS_TABLE_SIZE) {
1163			device_printf(adapter->pdev, "Index %d out of range\n",
1164			    idx);
1165			error = ERANGE;
1166			break;
1167		}
1168
1169		buf = endp;
1170
1171		if (*buf++ != ':') {
1172			device_printf(adapter->pdev, "Missing ':' separator\n");
1173			error = EINVAL;
1174			break;
1175		}
1176
1177		value = strtol(buf, &endp, 10);
1178
1179		if (endp == buf || value < 0) {
1180			device_printf(adapter->pdev, "Invalid value: %s\n",
1181			    buf);
1182			error = EINVAL;
1183			break;
1184		}
1185
1186		if (value >= num_queues) {
1187			device_printf(adapter->pdev, "Value %d out of range\n",
1188			    value);
1189			error = ERANGE;
1190			break;
1191		}
1192
1193		indir->table[idx] = value;
1194	}
1195
1196	if (error != 0) /* Reload indirection table with last good data. */
1197		ena_rss_indir_get(adapter, indir->table);
1198
1199	/* At this point msg has been clobbered by sysctl_handle_string. */
1200	ena_rss_copy_indir_buf(msg, indir->table);
1201
1202	if (error == 0)
1203		error = ena_rss_indir_set(adapter, indir->table);
1204
1205unlock:
1206	ENA_LOCK_UNLOCK();
1207
1208	return (error);
1209}
1210#endif /* RSS */
1211