nvme_sysctl.c revision 292074
1276305Sngie/*- 2246149Ssjg * Copyright (C) 2012-2013 Intel Corporation 3246149Ssjg * All rights reserved. 4246149Ssjg * 5246149Ssjg * Redistribution and use in source and binary forms, with or without 6246149Ssjg * modification, are permitted provided that the following conditions 7246149Ssjg * are met: 8246149Ssjg * 1. Redistributions of source code must retain the above copyright 9246149Ssjg * notice, this list of conditions and the following disclaimer. 10246149Ssjg * 2. Redistributions in binary form must reproduce the above copyright 11246149Ssjg * notice, this list of conditions and the following disclaimer in the 12246149Ssjg * documentation and/or other materials provided with the distribution. 13246149Ssjg * 14246149Ssjg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15276305Sngie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16246149Ssjg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17246149Ssjg * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18246149Ssjg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19246149Ssjg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20246149Ssjg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21246149Ssjg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22246149Ssjg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23246149Ssjg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24246149Ssjg * SUCH DAMAGE. 25246149Ssjg */ 26246149Ssjg 27246149Ssjg#include <sys/cdefs.h> 28246149Ssjg__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_sysctl.c 292074 2015-12-11 02:06:03Z smh $"); 29246149Ssjg 30246149Ssjg#include <sys/param.h> 31246149Ssjg#include <sys/bus.h> 32246149Ssjg#include <sys/sysctl.h> 33246149Ssjg 34246149Ssjg#include "nvme_private.h" 35246149Ssjg 36246149SsjgSYSCTL_NODE(_kern, OID_AUTO, nvme, CTLFLAG_RD, 0, "NVM Express"); 37246149Ssjg/* 38246149Ssjg * Intel NVMe controllers have a slow path for I/Os that span a 128KB 39246149Ssjg * stripe boundary but ZFS limits ashift, which is derived from 40246149Ssjg * d_stripesize, to 13 (8KB) so we limit the stripesize reported to 41246149Ssjg * geom(8) to 4KB by default. 42246149Ssjg * 43246149Ssjg * This may result in a small number of additional I/Os to require 44246149Ssjg * splitting in nvme(4), however the NVMe I/O path is very efficient 45246149Ssjg * so these additional I/Os will cause very minimal (if any) difference 46246149Ssjg * in performance or CPU utilisation. 47246149Ssjg */ 48246149Ssjgint nvme_max_optimal_sectorsize = 1<<12; 49246149SsjgSYSCTL_INT(_kern_nvme, OID_AUTO, max_optimal_sectorsize, CTLFLAG_RWTUN, 50246149Ssjg &nvme_max_optimal_sectorsize, 0, "The maximum optimal sectorsize reported"); 51246149Ssjg 52246149Ssjg/* 53246149Ssjg * CTLTYPE_S64 and sysctl_handle_64 were added in r217616. Define these 54246149Ssjg * explicitly here for older kernels that don't include the r217616 55246149Ssjg * changeset. 56246149Ssjg */ 57246149Ssjg#ifndef CTLTYPE_S64 58246149Ssjg#define CTLTYPE_S64 CTLTYPE_QUAD 59246149Ssjg#define sysctl_handle_64 sysctl_handle_quad 60246149Ssjg#endif 61246149Ssjg 62246149Ssjgstatic void 63246149Ssjgnvme_dump_queue(struct nvme_qpair *qpair) 64246149Ssjg{ 65246149Ssjg struct nvme_completion *cpl; 66246149Ssjg struct nvme_command *cmd; 67246149Ssjg int i; 68246149Ssjg 69246149Ssjg printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase); 70246149Ssjg 71246149Ssjg printf("Completion queue:\n"); 72246149Ssjg for (i = 0; i < qpair->num_entries; i++) { 73246149Ssjg cpl = &qpair->cpl[i]; 74246149Ssjg printf("%05d: ", i); 75246149Ssjg nvme_dump_completion(cpl); 76246149Ssjg } 77246149Ssjg 78246149Ssjg printf("Submission queue:\n"); 79246149Ssjg for (i = 0; i < qpair->num_entries; i++) { 80246149Ssjg cmd = &qpair->cmd[i]; 81246149Ssjg printf("%05d: ", i); 82246149Ssjg nvme_dump_command(cmd); 83246149Ssjg } 84246149Ssjg} 85246149Ssjg 86246149Ssjg 87246149Ssjgstatic int 88246149Ssjgnvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS) 89246149Ssjg{ 90246149Ssjg struct nvme_qpair *qpair = arg1; 91246149Ssjg uint32_t val = 0; 92246149Ssjg 93246149Ssjg int error = sysctl_handle_int(oidp, &val, 0, req); 94246149Ssjg 95246149Ssjg if (error) 96246149Ssjg return (error); 97246149Ssjg 98246149Ssjg if (val != 0) 99246149Ssjg nvme_dump_queue(qpair); 100246149Ssjg 101246149Ssjg return (0); 102246149Ssjg} 103246149Ssjg 104246149Ssjgstatic int 105246149Ssjgnvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS) 106246149Ssjg{ 107246149Ssjg struct nvme_controller *ctrlr = arg1; 108246149Ssjg uint32_t oldval = ctrlr->int_coal_time; 109246149Ssjg int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0, 110246149Ssjg req); 111246149Ssjg 112246149Ssjg if (error) 113246149Ssjg return (error); 114246149Ssjg 115246149Ssjg if (oldval != ctrlr->int_coal_time) 116246149Ssjg nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, 117246149Ssjg ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, 118246149Ssjg NULL); 119246149Ssjg 120246149Ssjg return (0); 121246149Ssjg} 122246149Ssjg 123246149Ssjgstatic int 124246149Ssjgnvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS) 125246149Ssjg{ 126246149Ssjg struct nvme_controller *ctrlr = arg1; 127246149Ssjg uint32_t oldval = ctrlr->int_coal_threshold; 128246149Ssjg int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0, 129246149Ssjg req); 130246149Ssjg 131246149Ssjg if (error) 132246149Ssjg return (error); 133246149Ssjg 134246149Ssjg if (oldval != ctrlr->int_coal_threshold) 135246149Ssjg nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, 136246149Ssjg ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, 137246149Ssjg NULL); 138246149Ssjg 139246149Ssjg return (0); 140246149Ssjg} 141246149Ssjg 142246149Ssjgstatic int 143246149Ssjgnvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS) 144246149Ssjg{ 145246149Ssjg struct nvme_controller *ctrlr = arg1; 146246149Ssjg uint32_t oldval = ctrlr->timeout_period; 147246149Ssjg int error = sysctl_handle_int(oidp, &ctrlr->timeout_period, 0, req); 148246149Ssjg 149246149Ssjg if (error) 150246149Ssjg return (error); 151246149Ssjg 152246149Ssjg if (ctrlr->timeout_period > NVME_MAX_TIMEOUT_PERIOD || 153246149Ssjg ctrlr->timeout_period < NVME_MIN_TIMEOUT_PERIOD) { 154246149Ssjg ctrlr->timeout_period = oldval; 155246149Ssjg return (EINVAL); 156246149Ssjg } 157246149Ssjg 158246149Ssjg return (0); 159246149Ssjg} 160246149Ssjg 161246149Ssjgstatic void 162246149Ssjgnvme_qpair_reset_stats(struct nvme_qpair *qpair) 163246149Ssjg{ 164246149Ssjg 165246149Ssjg qpair->num_cmds = 0; 166246149Ssjg qpair->num_intr_handler_calls = 0; 167246149Ssjg} 168246149Ssjg 169246149Ssjgstatic int 170246149Ssjgnvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS) 171246149Ssjg{ 172246149Ssjg struct nvme_controller *ctrlr = arg1; 173246149Ssjg int64_t num_cmds = 0; 174246149Ssjg int i; 175246149Ssjg 176246149Ssjg num_cmds = ctrlr->adminq.num_cmds; 177246149Ssjg 178246149Ssjg for (i = 0; i < ctrlr->num_io_queues; i++) 179246149Ssjg num_cmds += ctrlr->ioq[i].num_cmds; 180246149Ssjg 181246149Ssjg return (sysctl_handle_64(oidp, &num_cmds, 0, req)); 182246149Ssjg} 183246149Ssjg 184246149Ssjgstatic int 185246149Ssjgnvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS) 186246149Ssjg{ 187246149Ssjg struct nvme_controller *ctrlr = arg1; 188246149Ssjg int64_t num_intr_handler_calls = 0; 189246149Ssjg int i; 190246149Ssjg 191246149Ssjg num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls; 192246149Ssjg 193246149Ssjg for (i = 0; i < ctrlr->num_io_queues; i++) 194246149Ssjg num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls; 195246149Ssjg 196246149Ssjg return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req)); 197246149Ssjg} 198246149Ssjg 199246149Ssjgstatic int 200246149Ssjgnvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS) 201246149Ssjg{ 202246149Ssjg struct nvme_controller *ctrlr = arg1; 203246149Ssjg uint32_t i, val = 0; 204246149Ssjg 205246149Ssjg int error = sysctl_handle_int(oidp, &val, 0, req); 206246149Ssjg 207246149Ssjg if (error) 208246149Ssjg return (error); 209246149Ssjg 210246149Ssjg if (val != 0) { 211246149Ssjg nvme_qpair_reset_stats(&ctrlr->adminq); 212246149Ssjg 213246149Ssjg for (i = 0; i < ctrlr->num_io_queues; i++) 214246149Ssjg nvme_qpair_reset_stats(&ctrlr->ioq[i]); 215246149Ssjg } 216246149Ssjg 217246149Ssjg return (0); 218246149Ssjg} 219246149Ssjg 220246149Ssjg 221246149Ssjgstatic void 222246149Ssjgnvme_sysctl_initialize_queue(struct nvme_qpair *qpair, 223246149Ssjg struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree) 224246149Ssjg{ 225246149Ssjg struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree); 226246149Ssjg 227246149Ssjg SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries", 228246149Ssjg CTLFLAG_RD, &qpair->num_entries, 0, 229246149Ssjg "Number of entries in hardware queue"); 230246149Ssjg SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers", 231246149Ssjg CTLFLAG_RD, &qpair->num_trackers, 0, 232246149Ssjg "Number of trackers pre-allocated for this queue pair"); 233 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head", 234 CTLFLAG_RD, &qpair->sq_head, 0, 235 "Current head of submission queue (as observed by driver)"); 236 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail", 237 CTLFLAG_RD, &qpair->sq_tail, 0, 238 "Current tail of submission queue (as observed by driver)"); 239 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head", 240 CTLFLAG_RD, &qpair->cq_head, 0, 241 "Current head of completion queue (as observed by driver)"); 242 243 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds", 244 CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted"); 245 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls", 246 CTLFLAG_RD, &qpair->num_intr_handler_calls, 247 "Number of times interrupt handler was invoked (will typically be " 248 "less than number of actual interrupts generated due to " 249 "coalescing)"); 250 251 SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO, 252 "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0, 253 nvme_sysctl_dump_debug, "IU", "Dump debug data"); 254} 255 256void 257nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr) 258{ 259 struct sysctl_ctx_list *ctrlr_ctx; 260 struct sysctl_oid *ctrlr_tree, *que_tree; 261 struct sysctl_oid_list *ctrlr_list; 262#define QUEUE_NAME_LENGTH 16 263 char queue_name[QUEUE_NAME_LENGTH]; 264 int i; 265 266 ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev); 267 ctrlr_tree = device_get_sysctl_tree(ctrlr->dev); 268 ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree); 269 270 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 271 "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, 272 nvme_sysctl_int_coal_time, "IU", 273 "Interrupt coalescing timeout (in microseconds)"); 274 275 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 276 "int_coal_threshold", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, 277 nvme_sysctl_int_coal_threshold, "IU", 278 "Interrupt coalescing threshold"); 279 280 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 281 "timeout_period", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, 282 nvme_sysctl_timeout_period, "IU", 283 "Timeout period (in seconds)"); 284 285 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 286 "num_cmds", CTLTYPE_S64 | CTLFLAG_RD, 287 ctrlr, 0, nvme_sysctl_num_cmds, "IU", 288 "Number of commands submitted"); 289 290 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 291 "num_intr_handler_calls", CTLTYPE_S64 | CTLFLAG_RD, 292 ctrlr, 0, nvme_sysctl_num_intr_handler_calls, "IU", 293 "Number of times interrupt handler was invoked (will " 294 "typically be less than number of actual interrupts " 295 "generated due to coalescing)"); 296 297 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 298 "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, 299 nvme_sysctl_reset_stats, "IU", "Reset statistics to zero"); 300 301 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq", 302 CTLFLAG_RD, NULL, "Admin Queue"); 303 304 nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree); 305 306 for (i = 0; i < ctrlr->num_io_queues; i++) { 307 snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i); 308 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, 309 queue_name, CTLFLAG_RD, NULL, "IO Queue"); 310 nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx, 311 que_tree); 312 } 313} 314