nvme_ctrlr.c revision 240616
1/*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_ctrlr.c 240616 2012-09-17 19:23:01Z jimharris $"); 29 30#include <sys/param.h> 31#include <sys/bus.h> 32#include <sys/conf.h> 33#include <sys/ioccom.h> 34#include <sys/smp.h> 35 36#include <dev/pci/pcireg.h> 37#include <dev/pci/pcivar.h> 38 39#include "nvme_private.h" 40 41static void 42nvme_ctrlr_cb(void *arg, const struct nvme_completion *status) 43{ 44 struct nvme_completion *cpl = arg; 45 struct mtx *mtx; 46 47 /* 48 * Copy status into the argument passed by the caller, so that 49 * the caller can check the status to determine if the 50 * the request passed or failed. 51 */ 52 memcpy(cpl, status, sizeof(*cpl)); 53 mtx = mtx_pool_find(mtxpool_sleep, cpl); 54 mtx_lock(mtx); 55 wakeup(cpl); 56 mtx_unlock(mtx); 57} 58 59static int 60nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) 61{ 62 63 /* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */ 64 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 65 ctrlr->resource_id = PCIR_BAR(2); 66 else 67 ctrlr->resource_id = PCIR_BAR(0); 68 69 ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY, 70 &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE); 71 72 if(ctrlr->resource == NULL) { 73 device_printf(ctrlr->dev, "unable to allocate pci resource\n"); 74 return (ENOMEM); 75 } 76 77 ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); 78 ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); 79 ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; 80 81 return (0); 82} 83 84#ifdef CHATHAM2 85static int 86nvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr) 87{ 88 89 ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR); 90 ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev, 91 SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1, 92 RF_ACTIVE); 93 94 if(ctrlr->chatham_resource == NULL) { 95 device_printf(ctrlr->dev, "unable to alloc pci resource\n"); 96 return (ENOMEM); 97 } 98 99 ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource); 100 ctrlr->chatham_bus_handle = 101 rman_get_bushandle(ctrlr->chatham_resource); 102 103 return (0); 104} 105 106static void 107nvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr) 108{ 109 uint64_t reg1, reg2, reg3; 110 uint64_t temp1, temp2; 111 uint32_t temp3; 112 uint32_t use_flash_timings = 0; 113 114 DELAY(10000); 115 116 temp3 = chatham_read_4(ctrlr, 0x8080); 117 118 device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3); 119 120 ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110; 121 ctrlr->chatham_size = ctrlr->chatham_lbas * 512; 122 123 device_printf(ctrlr->dev, "Chatham size: %lld\n", 124 (long long)ctrlr->chatham_size); 125 126 reg1 = reg2 = reg3 = ctrlr->chatham_size - 1; 127 128 TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings); 129 if (use_flash_timings) { 130 device_printf(ctrlr->dev, "Chatham: using flash timings\n"); 131 temp1 = 0x00001b58000007d0LL; 132 temp2 = 0x000000cb00000131LL; 133 } else { 134 device_printf(ctrlr->dev, "Chatham: using DDR timings\n"); 135 temp1 = temp2 = 0x0LL; 136 } 137 138 chatham_write_8(ctrlr, 0x8000, reg1); 139 chatham_write_8(ctrlr, 0x8008, reg2); 140 chatham_write_8(ctrlr, 0x8010, reg3); 141 142 chatham_write_8(ctrlr, 0x8020, temp1); 143 temp3 = chatham_read_4(ctrlr, 0x8020); 144 145 chatham_write_8(ctrlr, 0x8028, temp2); 146 temp3 = chatham_read_4(ctrlr, 0x8028); 147 148 chatham_write_8(ctrlr, 0x8030, temp1); 149 chatham_write_8(ctrlr, 0x8038, temp2); 150 chatham_write_8(ctrlr, 0x8040, temp1); 151 chatham_write_8(ctrlr, 0x8048, temp2); 152 chatham_write_8(ctrlr, 0x8050, temp1); 153 chatham_write_8(ctrlr, 0x8058, temp2); 154 155 DELAY(10000); 156} 157 158static void 159nvme_chatham_populate_cdata(struct nvme_controller *ctrlr) 160{ 161 struct nvme_controller_data *cdata; 162 163 cdata = &ctrlr->cdata; 164 165 cdata->vid = 0x8086; 166 cdata->ssvid = 0x2011; 167 168 /* 169 * Chatham2 puts garbage data in these fields when we 170 * invoke IDENTIFY_CONTROLLER, so we need to re-zero 171 * the fields before calling bcopy(). 172 */ 173 memset(cdata->sn, 0, sizeof(cdata->sn)); 174 memcpy(cdata->sn, "2012", strlen("2012")); 175 memset(cdata->mn, 0, sizeof(cdata->mn)); 176 memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2")); 177 memset(cdata->fr, 0, sizeof(cdata->fr)); 178 memcpy(cdata->fr, "0", strlen("0")); 179 cdata->rab = 8; 180 cdata->aerl = 3; 181 cdata->lpa.ns_smart = 1; 182 cdata->sqes.min = 6; 183 cdata->sqes.max = 6; 184 cdata->sqes.min = 4; 185 cdata->sqes.max = 4; 186 cdata->nn = 1; 187 188 /* Chatham2 doesn't support DSM command */ 189 cdata->oncs.dsm = 0; 190 191 cdata->vwc.present = 1; 192} 193#endif /* CHATHAM2 */ 194 195static void 196nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr) 197{ 198 struct nvme_qpair *qpair; 199 uint32_t num_entries; 200 201 qpair = &ctrlr->adminq; 202 203 num_entries = NVME_ADMIN_ENTRIES; 204 TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries); 205 /* 206 * If admin_entries was overridden to an invalid value, revert it 207 * back to our default value. 208 */ 209 if (num_entries < NVME_MIN_ADMIN_ENTRIES || 210 num_entries > NVME_MAX_ADMIN_ENTRIES) { 211 printf("nvme: invalid hw.nvme.admin_entries=%d specified\n", 212 num_entries); 213 num_entries = NVME_ADMIN_ENTRIES; 214 } 215 216 /* 217 * The admin queue's max xfer size is treated differently than the 218 * max I/O xfer size. 16KB is sufficient here - maybe even less? 219 */ 220 nvme_qpair_construct(qpair, 0, 0, num_entries, 16*1024, ctrlr); 221} 222 223static int 224nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) 225{ 226 struct nvme_qpair *qpair; 227 union cap_lo_register cap_lo; 228 int i, num_entries; 229 230 num_entries = NVME_IO_ENTRIES; 231 TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries); 232 233 num_entries = max(num_entries, NVME_MIN_IO_ENTRIES); 234 235 /* 236 * NVMe spec sets a hard limit of 64K max entries, but 237 * devices may specify a smaller limit, so we need to check 238 * the MQES field in the capabilities register. 239 */ 240 cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 241 num_entries = min(num_entries, cap_lo.bits.mqes+1); 242 243 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 244 TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size); 245 /* 246 * Check that tunable doesn't specify a size greater than what our 247 * driver supports, and is an even PAGE_SIZE multiple. 248 */ 249 if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE || 250 ctrlr->max_xfer_size % PAGE_SIZE) 251 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 252 253 ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair), 254 M_NVME, M_ZERO | M_NOWAIT); 255 256 if (ctrlr->ioq == NULL) 257 return (ENOMEM); 258 259 for (i = 0; i < ctrlr->num_io_queues; i++) { 260 qpair = &ctrlr->ioq[i]; 261 262 /* 263 * Admin queue has ID=0. IO queues start at ID=1 - 264 * hence the 'i+1' here. 265 * 266 * For I/O queues, use the controller-wide max_xfer_size 267 * calculated in nvme_attach(). 268 */ 269 nvme_qpair_construct(qpair, 270 i+1, /* qpair ID */ 271 ctrlr->msix_enabled ? i+1 : 0, /* vector */ 272 num_entries, 273 ctrlr->max_xfer_size, 274 ctrlr); 275 276 if (ctrlr->per_cpu_io_queues) 277 bus_bind_intr(ctrlr->dev, qpair->res, i); 278 } 279 280 return (0); 281} 282 283static int 284nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr) 285{ 286 int ms_waited; 287 union cc_register cc; 288 union csts_register csts; 289 290 cc.raw = nvme_mmio_read_4(ctrlr, cc); 291 csts.raw = nvme_mmio_read_4(ctrlr, csts); 292 293 if (!cc.bits.en) { 294 device_printf(ctrlr->dev, "%s called with cc.en = 0\n", 295 __func__); 296 return (ENXIO); 297 } 298 299 ms_waited = 0; 300 301 while (!csts.bits.rdy) { 302 DELAY(1000); 303 if (ms_waited++ > ctrlr->ready_timeout_in_ms) { 304 device_printf(ctrlr->dev, "controller did not become " 305 "ready within %d ms\n", ctrlr->ready_timeout_in_ms); 306 return (ENXIO); 307 } 308 csts.raw = nvme_mmio_read_4(ctrlr, csts); 309 } 310 311 return (0); 312} 313 314static void 315nvme_ctrlr_disable(struct nvme_controller *ctrlr) 316{ 317 union cc_register cc; 318 union csts_register csts; 319 320 cc.raw = nvme_mmio_read_4(ctrlr, cc); 321 csts.raw = nvme_mmio_read_4(ctrlr, csts); 322 323 if (cc.bits.en == 1 && csts.bits.rdy == 0) 324 nvme_ctrlr_wait_for_ready(ctrlr); 325 326 cc.bits.en = 0; 327 nvme_mmio_write_4(ctrlr, cc, cc.raw); 328 DELAY(5000); 329} 330 331static int 332nvme_ctrlr_enable(struct nvme_controller *ctrlr) 333{ 334 union cc_register cc; 335 union csts_register csts; 336 union aqa_register aqa; 337 338 cc.raw = nvme_mmio_read_4(ctrlr, cc); 339 csts.raw = nvme_mmio_read_4(ctrlr, csts); 340 341 if (cc.bits.en == 1) { 342 if (csts.bits.rdy == 1) 343 return (0); 344 else 345 return (nvme_ctrlr_wait_for_ready(ctrlr)); 346 } 347 348 nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); 349 DELAY(5000); 350 nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); 351 DELAY(5000); 352 353 aqa.raw = 0; 354 /* acqs and asqs are 0-based. */ 355 aqa.bits.acqs = ctrlr->adminq.num_entries-1; 356 aqa.bits.asqs = ctrlr->adminq.num_entries-1; 357 nvme_mmio_write_4(ctrlr, aqa, aqa.raw); 358 DELAY(5000); 359 360 cc.bits.en = 1; 361 cc.bits.css = 0; 362 cc.bits.ams = 0; 363 cc.bits.shn = 0; 364 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 365 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 366 367 /* This evaluates to 0, which is according to spec. */ 368 cc.bits.mps = (PAGE_SIZE >> 13); 369 370 nvme_mmio_write_4(ctrlr, cc, cc.raw); 371 DELAY(5000); 372 373 return (nvme_ctrlr_wait_for_ready(ctrlr)); 374} 375 376int 377nvme_ctrlr_reset(struct nvme_controller *ctrlr) 378{ 379 380 nvme_ctrlr_disable(ctrlr); 381 return (nvme_ctrlr_enable(ctrlr)); 382} 383 384static void 385nvme_async_event_cb(void *arg, const struct nvme_completion *status) 386{ 387 struct nvme_controller *ctrlr = arg; 388 389 printf("Asynchronous event occurred.\n"); 390 391 /* TODO: decode async event type based on status */ 392 /* TODO: check status for any error bits */ 393 394 /* 395 * Repost an asynchronous event request so that it can be 396 * used again by the controller. 397 */ 398 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb, 399 ctrlr); 400} 401 402static int 403nvme_ctrlr_identify(struct nvme_controller *ctrlr) 404{ 405 struct mtx *mtx; 406 struct nvme_completion cpl; 407 int status; 408 409 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 410 411 mtx_lock(mtx); 412 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 413 nvme_ctrlr_cb, &cpl); 414 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 415 mtx_unlock(mtx); 416 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 417 printf("nvme_identify_controller failed!\n"); 418 return (ENXIO); 419 } 420 421#ifdef CHATHAM2 422 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 423 nvme_chatham_populate_cdata(ctrlr); 424#endif 425 426 return (0); 427} 428 429static int 430nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) 431{ 432 struct mtx *mtx; 433 struct nvme_completion cpl; 434 int cq_allocated, sq_allocated, status; 435 436 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 437 438 mtx_lock(mtx); 439 nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues, 440 nvme_ctrlr_cb, &cpl); 441 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 442 mtx_unlock(mtx); 443 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 444 printf("nvme_set_num_queues failed!\n"); 445 return (ENXIO); 446 } 447 448 /* 449 * Data in cdw0 is 0-based. 450 * Lower 16-bits indicate number of submission queues allocated. 451 * Upper 16-bits indicate number of completion queues allocated. 452 */ 453 sq_allocated = (cpl.cdw0 & 0xFFFF) + 1; 454 cq_allocated = (cpl.cdw0 >> 16) + 1; 455 456 /* 457 * Check that the controller was able to allocate the number of 458 * queues we requested. If not, revert to one IO queue. 459 */ 460 if (sq_allocated < ctrlr->num_io_queues || 461 cq_allocated < ctrlr->num_io_queues) { 462 ctrlr->num_io_queues = 1; 463 ctrlr->per_cpu_io_queues = 0; 464 465 /* TODO: destroy extra queues that were created 466 * previously but now found to be not needed. 467 */ 468 } 469 470 return (0); 471} 472 473static int 474nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr) 475{ 476 struct mtx *mtx; 477 struct nvme_qpair *qpair; 478 struct nvme_completion cpl; 479 int i, status; 480 481 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 482 483 for (i = 0; i < ctrlr->num_io_queues; i++) { 484 qpair = &ctrlr->ioq[i]; 485 486 mtx_lock(mtx); 487 nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector, 488 nvme_ctrlr_cb, &cpl); 489 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 490 mtx_unlock(mtx); 491 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 492 printf("nvme_create_io_cq failed!\n"); 493 return (ENXIO); 494 } 495 496 mtx_lock(mtx); 497 nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, 498 nvme_ctrlr_cb, &cpl); 499 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 500 mtx_unlock(mtx); 501 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 502 printf("nvme_create_io_sq failed!\n"); 503 return (ENXIO); 504 } 505 } 506 507 return (0); 508} 509 510static int 511nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr) 512{ 513 struct nvme_namespace *ns; 514 int i, status; 515 516 for (i = 0; i < ctrlr->cdata.nn; i++) { 517 ns = &ctrlr->ns[i]; 518 status = nvme_ns_construct(ns, i+1, ctrlr); 519 if (status != 0) 520 return (status); 521 } 522 523 return (0); 524} 525 526static void 527nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) 528{ 529 union nvme_critical_warning_state state; 530 uint8_t num_async_events; 531 532 state.raw = 0xFF; 533 state.bits.reserved = 0; 534 nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL); 535 536 /* aerl is a zero-based value, so we need to add 1 here. */ 537 num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1)); 538 539 /* 540 * Disable this code for now, since Chatham doesn't support 541 * AERs so I have no good way to test them. 542 */ 543#if 0 544 for (int i = 0; i < num_async_events; i++) 545 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, 546 nvme_async_event_cb, ctrlr); 547#endif 548} 549 550static void 551nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) 552{ 553 554 ctrlr->int_coal_time = 0; 555 TUNABLE_INT_FETCH("hw.nvme.int_coal_time", 556 &ctrlr->int_coal_time); 557 558 ctrlr->int_coal_threshold = 0; 559 TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold", 560 &ctrlr->int_coal_threshold); 561 562 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, 563 ctrlr->int_coal_threshold, NULL, NULL); 564} 565 566void 567nvme_ctrlr_start(void *ctrlr_arg) 568{ 569 struct nvme_controller *ctrlr = ctrlr_arg; 570 571 if (nvme_ctrlr_identify(ctrlr) != 0) 572 goto err; 573 574 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) 575 goto err; 576 577 if (nvme_ctrlr_create_qpairs(ctrlr) != 0) 578 goto err; 579 580 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) 581 goto err; 582 583 nvme_ctrlr_configure_aer(ctrlr); 584 nvme_ctrlr_configure_int_coalescing(ctrlr); 585 586 ctrlr->is_started = TRUE; 587 588err: 589 590 /* 591 * Initialize sysctls, even if controller failed to start, to 592 * assist with debugging admin queue pair. 593 */ 594 nvme_sysctl_initialize_ctrlr(ctrlr); 595 config_intrhook_disestablish(&ctrlr->config_hook); 596} 597 598static void 599nvme_ctrlr_intx_task(void *arg, int pending) 600{ 601 struct nvme_controller *ctrlr = arg; 602 603 nvme_qpair_process_completions(&ctrlr->adminq); 604 605 if (ctrlr->ioq[0].cpl) 606 nvme_qpair_process_completions(&ctrlr->ioq[0]); 607 608 nvme_mmio_write_4(ctrlr, intmc, 1); 609} 610 611static void 612nvme_ctrlr_intx_handler(void *arg) 613{ 614 struct nvme_controller *ctrlr = arg; 615 616 nvme_mmio_write_4(ctrlr, intms, 1); 617 taskqueue_enqueue_fast(ctrlr->taskqueue, &ctrlr->task); 618} 619 620static int 621nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr) 622{ 623 624 ctrlr->num_io_queues = 1; 625 ctrlr->per_cpu_io_queues = 0; 626 ctrlr->rid = 0; 627 ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 628 &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE); 629 630 if (ctrlr->res == NULL) { 631 device_printf(ctrlr->dev, "unable to allocate shared IRQ\n"); 632 return (ENOMEM); 633 } 634 635 bus_setup_intr(ctrlr->dev, ctrlr->res, 636 INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler, 637 ctrlr, &ctrlr->tag); 638 639 if (ctrlr->tag == NULL) { 640 device_printf(ctrlr->dev, 641 "unable to setup legacy interrupt handler\n"); 642 return (ENOMEM); 643 } 644 645 TASK_INIT(&ctrlr->task, 0, nvme_ctrlr_intx_task, ctrlr); 646 ctrlr->taskqueue = taskqueue_create_fast("nvme_taskq", M_NOWAIT, 647 taskqueue_thread_enqueue, &ctrlr->taskqueue); 648 taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_NET, 649 "%s intx taskq", device_get_nameunit(ctrlr->dev)); 650 651 return (0); 652} 653 654static int 655nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 656 struct thread *td) 657{ 658 struct nvme_controller *ctrlr; 659 struct nvme_completion cpl; 660 struct mtx *mtx; 661 662 ctrlr = cdev->si_drv1; 663 664 switch (cmd) { 665 case NVME_IDENTIFY_CONTROLLER: 666#ifdef CHATHAM2 667 /* 668 * Don't refresh data on Chatham, since Chatham returns 669 * garbage on IDENTIFY anyways. 670 */ 671 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) { 672 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 673 break; 674 } 675#endif 676 /* Refresh data before returning to user. */ 677 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 678 mtx_lock(mtx); 679 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 680 nvme_ctrlr_cb, &cpl); 681 msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0); 682 mtx_unlock(mtx); 683 if (cpl.sf_sc || cpl.sf_sct) 684 return (ENXIO); 685 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 686 break; 687 default: 688 return (ENOTTY); 689 } 690 691 return (0); 692} 693 694static struct cdevsw nvme_ctrlr_cdevsw = { 695 .d_version = D_VERSION, 696 .d_flags = 0, 697 .d_ioctl = nvme_ctrlr_ioctl 698}; 699 700int 701nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) 702{ 703 union cap_lo_register cap_lo; 704 union cap_hi_register cap_hi; 705 int num_vectors, per_cpu_io_queues, status = 0; 706 707 ctrlr->dev = dev; 708 ctrlr->is_started = FALSE; 709 710 status = nvme_ctrlr_allocate_bar(ctrlr); 711 712 if (status != 0) 713 return (status); 714 715#ifdef CHATHAM2 716 if (pci_get_devid(dev) == CHATHAM_PCI_ID) { 717 status = nvme_ctrlr_allocate_chatham_bar(ctrlr); 718 if (status != 0) 719 return (status); 720 nvme_ctrlr_setup_chatham(ctrlr); 721 } 722#endif 723 724 /* 725 * Software emulators may set the doorbell stride to something 726 * other than zero, but this driver is not set up to handle that. 727 */ 728 cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi); 729 if (cap_hi.bits.dstrd != 0) 730 return (ENXIO); 731 732 /* Get ready timeout value from controller, in units of 500ms. */ 733 cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 734 ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; 735 736 per_cpu_io_queues = 1; 737 TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); 738 ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE; 739 740 if (ctrlr->per_cpu_io_queues) 741 ctrlr->num_io_queues = mp_ncpus; 742 else 743 ctrlr->num_io_queues = 1; 744 745 ctrlr->force_intx = 0; 746 TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); 747 748 ctrlr->msix_enabled = 1; 749 750 if (ctrlr->force_intx) { 751 ctrlr->msix_enabled = 0; 752 goto intx; 753 } 754 755 /* One vector per IO queue, plus one vector for admin queue. */ 756 num_vectors = ctrlr->num_io_queues + 1; 757 758 if (pci_msix_count(dev) < num_vectors) { 759 ctrlr->msix_enabled = 0; 760 goto intx; 761 } 762 763 if (pci_alloc_msix(dev, &num_vectors) != 0) 764 ctrlr->msix_enabled = 0; 765 766intx: 767 768 if (!ctrlr->msix_enabled) 769 nvme_ctrlr_configure_intx(ctrlr); 770 771 nvme_ctrlr_construct_admin_qpair(ctrlr); 772 773 status = nvme_ctrlr_construct_io_qpairs(ctrlr); 774 775 if (status != 0) 776 return (status); 777 778 ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 779 "nvme%d", device_get_unit(dev)); 780 781 if (ctrlr->cdev == NULL) 782 return (ENXIO); 783 784 ctrlr->cdev->si_drv1 = (void *)ctrlr; 785 786 return (0); 787} 788