nvme_sysctl.c revision 292074
1276305Sngie/*-
2246149Ssjg * Copyright (C) 2012-2013 Intel Corporation
3246149Ssjg * All rights reserved.
4246149Ssjg *
5246149Ssjg * Redistribution and use in source and binary forms, with or without
6246149Ssjg * modification, are permitted provided that the following conditions
7246149Ssjg * are met:
8246149Ssjg * 1. Redistributions of source code must retain the above copyright
9246149Ssjg *    notice, this list of conditions and the following disclaimer.
10246149Ssjg * 2. Redistributions in binary form must reproduce the above copyright
11246149Ssjg *    notice, this list of conditions and the following disclaimer in the
12246149Ssjg *    documentation and/or other materials provided with the distribution.
13246149Ssjg *
14246149Ssjg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15276305Sngie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16246149Ssjg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17246149Ssjg * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18246149Ssjg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19246149Ssjg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20246149Ssjg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21246149Ssjg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22246149Ssjg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23246149Ssjg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24246149Ssjg * SUCH DAMAGE.
25246149Ssjg */
26246149Ssjg
27246149Ssjg#include <sys/cdefs.h>
28246149Ssjg__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_sysctl.c 292074 2015-12-11 02:06:03Z smh $");
29246149Ssjg
30246149Ssjg#include <sys/param.h>
31246149Ssjg#include <sys/bus.h>
32246149Ssjg#include <sys/sysctl.h>
33246149Ssjg
34246149Ssjg#include "nvme_private.h"
35246149Ssjg
36246149SsjgSYSCTL_NODE(_kern, OID_AUTO, nvme, CTLFLAG_RD, 0, "NVM Express");
37246149Ssjg/*
38246149Ssjg * Intel NVMe controllers have a slow path for I/Os that span a 128KB
39246149Ssjg * stripe boundary but ZFS limits ashift, which is derived from
40246149Ssjg * d_stripesize, to 13 (8KB) so we limit the stripesize reported to
41246149Ssjg * geom(8) to 4KB by default.
42246149Ssjg *
43246149Ssjg * This may result in a small number of additional I/Os to require
44246149Ssjg * splitting in nvme(4), however the NVMe I/O path is very efficient
45246149Ssjg * so these additional I/Os will cause very minimal (if any) difference
46246149Ssjg * in performance or CPU utilisation.
47246149Ssjg */
48246149Ssjgint nvme_max_optimal_sectorsize = 1<<12;
49246149SsjgSYSCTL_INT(_kern_nvme, OID_AUTO, max_optimal_sectorsize, CTLFLAG_RWTUN,
50246149Ssjg    &nvme_max_optimal_sectorsize, 0, "The maximum optimal sectorsize reported");
51246149Ssjg
52246149Ssjg/*
53246149Ssjg * CTLTYPE_S64 and sysctl_handle_64 were added in r217616.  Define these
54246149Ssjg *  explicitly here for older kernels that don't include the r217616
55246149Ssjg *  changeset.
56246149Ssjg */
57246149Ssjg#ifndef CTLTYPE_S64
58246149Ssjg#define CTLTYPE_S64		CTLTYPE_QUAD
59246149Ssjg#define sysctl_handle_64	sysctl_handle_quad
60246149Ssjg#endif
61246149Ssjg
62246149Ssjgstatic void
63246149Ssjgnvme_dump_queue(struct nvme_qpair *qpair)
64246149Ssjg{
65246149Ssjg	struct nvme_completion *cpl;
66246149Ssjg	struct nvme_command *cmd;
67246149Ssjg	int i;
68246149Ssjg
69246149Ssjg	printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase);
70246149Ssjg
71246149Ssjg	printf("Completion queue:\n");
72246149Ssjg	for (i = 0; i < qpair->num_entries; i++) {
73246149Ssjg		cpl = &qpair->cpl[i];
74246149Ssjg		printf("%05d: ", i);
75246149Ssjg		nvme_dump_completion(cpl);
76246149Ssjg	}
77246149Ssjg
78246149Ssjg	printf("Submission queue:\n");
79246149Ssjg	for (i = 0; i < qpair->num_entries; i++) {
80246149Ssjg		cmd = &qpair->cmd[i];
81246149Ssjg		printf("%05d: ", i);
82246149Ssjg		nvme_dump_command(cmd);
83246149Ssjg	}
84246149Ssjg}
85246149Ssjg
86246149Ssjg
87246149Ssjgstatic int
88246149Ssjgnvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)
89246149Ssjg{
90246149Ssjg	struct nvme_qpair 	*qpair = arg1;
91246149Ssjg	uint32_t		val = 0;
92246149Ssjg
93246149Ssjg	int error = sysctl_handle_int(oidp, &val, 0, req);
94246149Ssjg
95246149Ssjg	if (error)
96246149Ssjg		return (error);
97246149Ssjg
98246149Ssjg	if (val != 0)
99246149Ssjg		nvme_dump_queue(qpair);
100246149Ssjg
101246149Ssjg	return (0);
102246149Ssjg}
103246149Ssjg
104246149Ssjgstatic int
105246149Ssjgnvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)
106246149Ssjg{
107246149Ssjg	struct nvme_controller *ctrlr = arg1;
108246149Ssjg	uint32_t oldval = ctrlr->int_coal_time;
109246149Ssjg	int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0,
110246149Ssjg	    req);
111246149Ssjg
112246149Ssjg	if (error)
113246149Ssjg		return (error);
114246149Ssjg
115246149Ssjg	if (oldval != ctrlr->int_coal_time)
116246149Ssjg		nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
117246149Ssjg		    ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
118246149Ssjg		    NULL);
119246149Ssjg
120246149Ssjg	return (0);
121246149Ssjg}
122246149Ssjg
123246149Ssjgstatic int
124246149Ssjgnvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
125246149Ssjg{
126246149Ssjg	struct nvme_controller *ctrlr = arg1;
127246149Ssjg	uint32_t oldval = ctrlr->int_coal_threshold;
128246149Ssjg	int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0,
129246149Ssjg	    req);
130246149Ssjg
131246149Ssjg	if (error)
132246149Ssjg		return (error);
133246149Ssjg
134246149Ssjg	if (oldval != ctrlr->int_coal_threshold)
135246149Ssjg		nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
136246149Ssjg		    ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
137246149Ssjg		    NULL);
138246149Ssjg
139246149Ssjg	return (0);
140246149Ssjg}
141246149Ssjg
142246149Ssjgstatic int
143246149Ssjgnvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
144246149Ssjg{
145246149Ssjg	struct nvme_controller *ctrlr = arg1;
146246149Ssjg	uint32_t oldval = ctrlr->timeout_period;
147246149Ssjg	int error = sysctl_handle_int(oidp, &ctrlr->timeout_period, 0, req);
148246149Ssjg
149246149Ssjg	if (error)
150246149Ssjg		return (error);
151246149Ssjg
152246149Ssjg	if (ctrlr->timeout_period > NVME_MAX_TIMEOUT_PERIOD ||
153246149Ssjg	    ctrlr->timeout_period < NVME_MIN_TIMEOUT_PERIOD) {
154246149Ssjg		ctrlr->timeout_period = oldval;
155246149Ssjg		return (EINVAL);
156246149Ssjg	}
157246149Ssjg
158246149Ssjg	return (0);
159246149Ssjg}
160246149Ssjg
161246149Ssjgstatic void
162246149Ssjgnvme_qpair_reset_stats(struct nvme_qpair *qpair)
163246149Ssjg{
164246149Ssjg
165246149Ssjg	qpair->num_cmds = 0;
166246149Ssjg	qpair->num_intr_handler_calls = 0;
167246149Ssjg}
168246149Ssjg
169246149Ssjgstatic int
170246149Ssjgnvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)
171246149Ssjg{
172246149Ssjg	struct nvme_controller 	*ctrlr = arg1;
173246149Ssjg	int64_t			num_cmds = 0;
174246149Ssjg	int			i;
175246149Ssjg
176246149Ssjg	num_cmds = ctrlr->adminq.num_cmds;
177246149Ssjg
178246149Ssjg	for (i = 0; i < ctrlr->num_io_queues; i++)
179246149Ssjg		num_cmds += ctrlr->ioq[i].num_cmds;
180246149Ssjg
181246149Ssjg	return (sysctl_handle_64(oidp, &num_cmds, 0, req));
182246149Ssjg}
183246149Ssjg
184246149Ssjgstatic int
185246149Ssjgnvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)
186246149Ssjg{
187246149Ssjg	struct nvme_controller 	*ctrlr = arg1;
188246149Ssjg	int64_t			num_intr_handler_calls = 0;
189246149Ssjg	int			i;
190246149Ssjg
191246149Ssjg	num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls;
192246149Ssjg
193246149Ssjg	for (i = 0; i < ctrlr->num_io_queues; i++)
194246149Ssjg		num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls;
195246149Ssjg
196246149Ssjg	return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req));
197246149Ssjg}
198246149Ssjg
199246149Ssjgstatic int
200246149Ssjgnvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
201246149Ssjg{
202246149Ssjg	struct nvme_controller 	*ctrlr = arg1;
203246149Ssjg	uint32_t		i, val = 0;
204246149Ssjg
205246149Ssjg	int error = sysctl_handle_int(oidp, &val, 0, req);
206246149Ssjg
207246149Ssjg	if (error)
208246149Ssjg		return (error);
209246149Ssjg
210246149Ssjg	if (val != 0) {
211246149Ssjg		nvme_qpair_reset_stats(&ctrlr->adminq);
212246149Ssjg
213246149Ssjg		for (i = 0; i < ctrlr->num_io_queues; i++)
214246149Ssjg			nvme_qpair_reset_stats(&ctrlr->ioq[i]);
215246149Ssjg	}
216246149Ssjg
217246149Ssjg	return (0);
218246149Ssjg}
219246149Ssjg
220246149Ssjg
221246149Ssjgstatic void
222246149Ssjgnvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
223246149Ssjg    struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree)
224246149Ssjg{
225246149Ssjg	struct sysctl_oid_list	*que_list = SYSCTL_CHILDREN(que_tree);
226246149Ssjg
227246149Ssjg	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
228246149Ssjg	    CTLFLAG_RD, &qpair->num_entries, 0,
229246149Ssjg	    "Number of entries in hardware queue");
230246149Ssjg	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
231246149Ssjg	    CTLFLAG_RD, &qpair->num_trackers, 0,
232246149Ssjg	    "Number of trackers pre-allocated for this queue pair");
233	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
234	    CTLFLAG_RD, &qpair->sq_head, 0,
235	    "Current head of submission queue (as observed by driver)");
236	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail",
237	    CTLFLAG_RD, &qpair->sq_tail, 0,
238	    "Current tail of submission queue (as observed by driver)");
239	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head",
240	    CTLFLAG_RD, &qpair->cq_head, 0,
241	    "Current head of completion queue (as observed by driver)");
242
243	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds",
244	    CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted");
245	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls",
246	    CTLFLAG_RD, &qpair->num_intr_handler_calls,
247	    "Number of times interrupt handler was invoked (will typically be "
248	    "less than number of actual interrupts generated due to "
249	    "coalescing)");
250
251	SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
252	    "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0,
253	    nvme_sysctl_dump_debug, "IU", "Dump debug data");
254}
255
256void
257nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
258{
259	struct sysctl_ctx_list	*ctrlr_ctx;
260	struct sysctl_oid	*ctrlr_tree, *que_tree;
261	struct sysctl_oid_list	*ctrlr_list;
262#define QUEUE_NAME_LENGTH	16
263	char			queue_name[QUEUE_NAME_LENGTH];
264	int			i;
265
266	ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev);
267	ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
268	ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
269
270	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
271	    "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
272	    nvme_sysctl_int_coal_time, "IU",
273	    "Interrupt coalescing timeout (in microseconds)");
274
275	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
276	    "int_coal_threshold", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
277	    nvme_sysctl_int_coal_threshold, "IU",
278	    "Interrupt coalescing threshold");
279
280	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
281	    "timeout_period", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
282	    nvme_sysctl_timeout_period, "IU",
283	    "Timeout period (in seconds)");
284
285	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
286	    "num_cmds", CTLTYPE_S64 | CTLFLAG_RD,
287	    ctrlr, 0, nvme_sysctl_num_cmds, "IU",
288	    "Number of commands submitted");
289
290	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
291	    "num_intr_handler_calls", CTLTYPE_S64 | CTLFLAG_RD,
292	    ctrlr, 0, nvme_sysctl_num_intr_handler_calls, "IU",
293	    "Number of times interrupt handler was invoked (will "
294	    "typically be less than number of actual interrupts "
295	    "generated due to coalescing)");
296
297	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
298	    "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
299	    nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
300
301	que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq",
302	    CTLFLAG_RD, NULL, "Admin Queue");
303
304	nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree);
305
306	for (i = 0; i < ctrlr->num_io_queues; i++) {
307		snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i);
308		que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO,
309		    queue_name, CTLFLAG_RD, NULL, "IO Queue");
310		nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx,
311		    que_tree);
312	}
313}
314