1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2012-2016 Intel Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#include "opt_nvme.h"
31
32#include <sys/param.h>
33#include <sys/bus.h>
34#include <sys/sysctl.h>
35
36#include "nvme_private.h"
37
38#ifndef NVME_USE_NVD
39#define NVME_USE_NVD 0
40#endif
41
42int nvme_use_nvd = NVME_USE_NVD;
43bool nvme_verbose_cmd_dump = false;
44
45SYSCTL_NODE(_hw, OID_AUTO, nvme, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
46    "NVMe sysctl tunables");
47SYSCTL_INT(_hw_nvme, OID_AUTO, use_nvd, CTLFLAG_RDTUN,
48    &nvme_use_nvd, 1, "1 = Create NVD devices, 0 = Create NDA devices");
49SYSCTL_BOOL(_hw_nvme, OID_AUTO, verbose_cmd_dump, CTLFLAG_RWTUN,
50    &nvme_verbose_cmd_dump, 0,
51    "enable verbose command printing when a command fails");
52
53static void
54nvme_dump_queue(struct nvme_qpair *qpair)
55{
56	struct nvme_completion *cpl;
57	struct nvme_command *cmd;
58	int i;
59
60	printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase);
61
62	printf("Completion queue:\n");
63	for (i = 0; i < qpair->num_entries; i++) {
64		cpl = &qpair->cpl[i];
65		printf("%05d: ", i);
66		nvme_qpair_print_completion(qpair, cpl);
67	}
68
69	printf("Submission queue:\n");
70	for (i = 0; i < qpair->num_entries; i++) {
71		cmd = &qpair->cmd[i];
72		printf("%05d: ", i);
73		nvme_qpair_print_command(qpair, cmd);
74	}
75}
76
77static int
78nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)
79{
80	struct nvme_qpair 	*qpair = arg1;
81	uint32_t		val = 0;
82
83	int error = sysctl_handle_int(oidp, &val, 0, req);
84
85	if (error)
86		return (error);
87
88	if (val != 0)
89		nvme_dump_queue(qpair);
90
91	return (0);
92}
93
94static int
95nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)
96{
97	struct nvme_controller *ctrlr = arg1;
98	uint32_t oldval = ctrlr->int_coal_time;
99	int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0,
100	    req);
101
102	if (error)
103		return (error);
104
105	if (oldval != ctrlr->int_coal_time)
106		nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
107		    ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
108		    NULL);
109
110	return (0);
111}
112
113static int
114nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
115{
116	struct nvme_controller *ctrlr = arg1;
117	uint32_t oldval = ctrlr->int_coal_threshold;
118	int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0,
119	    req);
120
121	if (error)
122		return (error);
123
124	if (oldval != ctrlr->int_coal_threshold)
125		nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
126		    ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
127		    NULL);
128
129	return (0);
130}
131
132static int
133nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
134{
135	uint32_t *ptr = arg1;
136	uint32_t newval = *ptr;
137	int error = sysctl_handle_int(oidp, &newval, 0, req);
138
139	if (error || (req->newptr == NULL))
140		return (error);
141
142	if (newval > NVME_MAX_TIMEOUT_PERIOD ||
143	    newval < NVME_MIN_TIMEOUT_PERIOD) {
144		return (EINVAL);
145	} else {
146		*ptr = newval;
147	}
148
149	return (0);
150}
151
152static void
153nvme_qpair_reset_stats(struct nvme_qpair *qpair)
154{
155
156	/*
157	 * Reset the values. Due to sanity checks in
158	 * nvme_qpair_process_completions, we reset the number of interrupt
159	 * calls to 1.
160	 */
161	qpair->num_cmds = 0;
162	qpair->num_intr_handler_calls = 1;
163	qpair->num_retries = 0;
164	qpair->num_failures = 0;
165	qpair->num_ignored = 0;
166	qpair->num_recovery_nolock = 0;
167}
168
169static int
170nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)
171{
172	struct nvme_controller 	*ctrlr = arg1;
173	int64_t			num_cmds = 0;
174	int			i;
175
176	num_cmds = ctrlr->adminq.num_cmds;
177
178	for (i = 0; i < ctrlr->num_io_queues; i++)
179		num_cmds += ctrlr->ioq[i].num_cmds;
180
181	return (sysctl_handle_64(oidp, &num_cmds, 0, req));
182}
183
184static int
185nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)
186{
187	struct nvme_controller 	*ctrlr = arg1;
188	int64_t			num_intr_handler_calls = 0;
189	int			i;
190
191	num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls;
192
193	for (i = 0; i < ctrlr->num_io_queues; i++)
194		num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls;
195
196	return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req));
197}
198
199static int
200nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)
201{
202	struct nvme_controller 	*ctrlr = arg1;
203	int64_t			num_retries = 0;
204	int			i;
205
206	num_retries = ctrlr->adminq.num_retries;
207
208	for (i = 0; i < ctrlr->num_io_queues; i++)
209		num_retries += ctrlr->ioq[i].num_retries;
210
211	return (sysctl_handle_64(oidp, &num_retries, 0, req));
212}
213
214static int
215nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)
216{
217	struct nvme_controller 	*ctrlr = arg1;
218	int64_t			num_failures = 0;
219	int			i;
220
221	num_failures = ctrlr->adminq.num_failures;
222
223	for (i = 0; i < ctrlr->num_io_queues; i++)
224		num_failures += ctrlr->ioq[i].num_failures;
225
226	return (sysctl_handle_64(oidp, &num_failures, 0, req));
227}
228
229static int
230nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)
231{
232	struct nvme_controller 	*ctrlr = arg1;
233	int64_t			num_ignored = 0;
234	int			i;
235
236	num_ignored = ctrlr->adminq.num_ignored;
237
238	for (i = 0; i < ctrlr->num_io_queues; i++)
239		num_ignored += ctrlr->ioq[i].num_ignored;
240
241	return (sysctl_handle_64(oidp, &num_ignored, 0, req));
242}
243
244static int
245nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)
246{
247	struct nvme_controller 	*ctrlr = arg1;
248	int64_t			num;
249	int			i;
250
251	num = ctrlr->adminq.num_recovery_nolock;
252
253	for (i = 0; i < ctrlr->num_io_queues; i++)
254		num += ctrlr->ioq[i].num_recovery_nolock;
255
256	return (sysctl_handle_64(oidp, &num, 0, req));
257}
258
259static int
260nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
261{
262	struct nvme_controller 	*ctrlr = arg1;
263	uint32_t		i, val = 0;
264
265	int error = sysctl_handle_int(oidp, &val, 0, req);
266
267	if (error)
268		return (error);
269
270	if (val != 0) {
271		nvme_qpair_reset_stats(&ctrlr->adminq);
272
273		for (i = 0; i < ctrlr->num_io_queues; i++)
274			nvme_qpair_reset_stats(&ctrlr->ioq[i]);
275	}
276
277	return (0);
278}
279
280static void
281nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
282    struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree)
283{
284	struct sysctl_oid_list	*que_list = SYSCTL_CHILDREN(que_tree);
285
286	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
287	    CTLFLAG_RD, &qpair->num_entries, 0,
288	    "Number of entries in hardware queue");
289	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
290	    CTLFLAG_RD, &qpair->num_trackers, 0,
291	    "Number of trackers pre-allocated for this queue pair");
292	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
293	    CTLFLAG_RD, &qpair->sq_head, 0,
294	    "Current head of submission queue (as observed by driver)");
295	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail",
296	    CTLFLAG_RD, &qpair->sq_tail, 0,
297	    "Current tail of submission queue (as observed by driver)");
298	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head",
299	    CTLFLAG_RD, &qpair->cq_head, 0,
300	    "Current head of completion queue (as observed by driver)");
301
302	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds",
303	    CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted");
304	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls",
305	    CTLFLAG_RD, &qpair->num_intr_handler_calls,
306	    "Number of times interrupt handler was invoked (will typically be "
307	    "less than number of actual interrupts generated due to "
308	    "coalescing)");
309	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries",
310	    CTLFLAG_RD, &qpair->num_retries, "Number of commands retried");
311	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures",
312	    CTLFLAG_RD, &qpair->num_failures,
313	    "Number of commands ending in failure after all retries");
314	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_ignored",
315	    CTLFLAG_RD, &qpair->num_ignored,
316	    "Number of interrupts posted, but were administratively ignored");
317	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_recovery_nolock",
318	    CTLFLAG_RD, &qpair->num_recovery_nolock,
319	    "Number of times that we failed to lock recovery in the ISR");
320
321	SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
322	    "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
323	    qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data");
324}
325
326void
327nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
328{
329	struct sysctl_ctx_list	*ctrlr_ctx;
330	struct sysctl_oid	*ctrlr_tree, *que_tree;
331	struct sysctl_oid_list	*ctrlr_list;
332#define QUEUE_NAME_LENGTH	16
333	char			queue_name[QUEUE_NAME_LENGTH];
334	int			i;
335
336	ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev);
337	ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
338	ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
339
340	SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues",
341	    CTLFLAG_RD, &ctrlr->num_io_queues, 0,
342	    "Number of I/O queue pairs");
343
344	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
345	    "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
346	    ctrlr, 0, nvme_sysctl_int_coal_time, "IU",
347	    "Interrupt coalescing timeout (in microseconds)");
348
349	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
350	    "int_coal_threshold",
351	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0,
352	    nvme_sysctl_int_coal_threshold, "IU",
353	    "Interrupt coalescing threshold");
354
355	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
356	    "admin_timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
357	    &ctrlr->admin_timeout_period, 0, nvme_sysctl_timeout_period, "IU",
358	    "Timeout period for Admin queue (in seconds)");
359
360	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
361	    "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
362	    &ctrlr->timeout_period, 0, nvme_sysctl_timeout_period, "IU",
363	    "Timeout period for I/O queues (in seconds)");
364
365	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
366	    "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
367	    ctrlr, 0, nvme_sysctl_num_cmds, "IU",
368	    "Number of commands submitted");
369
370	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
371	    "num_intr_handler_calls",
372	    CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0,
373	    nvme_sysctl_num_intr_handler_calls, "IU",
374	    "Number of times interrupt handler was invoked (will "
375	    "typically be less than number of actual interrupts "
376	    "generated due to coalescing)");
377
378	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
379	    "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
380	    ctrlr, 0, nvme_sysctl_num_retries, "IU",
381	    "Number of commands retried");
382
383	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
384	    "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
385	    ctrlr, 0, nvme_sysctl_num_failures, "IU",
386	    "Number of commands ending in failure after all retries");
387
388	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
389	    "num_ignored", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
390	    ctrlr, 0, nvme_sysctl_num_ignored, "IU",
391	    "Number of interrupts ignored administratively");
392
393	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
394	    "num_recovery_nolock", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
395	    ctrlr, 0, nvme_sysctl_num_recovery_nolock, "IU",
396	    "Number of times that we failed to lock recovery in the ISR");
397
398	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
399	    "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr,
400	    0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
401
402	SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_lo",
403	    CTLFLAG_RD, &ctrlr->cap_lo, 0,
404	    "Low 32-bits of capacities for the drive");
405
406	SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_hi",
407	    CTLFLAG_RD, &ctrlr->cap_hi, 0,
408	    "Hi 32-bits of capacities for the drive");
409
410	que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq",
411	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
412
413	nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree);
414
415	for (i = 0; i < ctrlr->num_io_queues; i++) {
416		snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i);
417		que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO,
418		    queue_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IO Queue");
419		nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx,
420		    que_tree);
421	}
422}
423