nvme_qpair.c revision 248766
1/*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 248766 2013-03-26 21:48:41Z jimharris $"); 29 30#include <sys/param.h> 31#include <sys/bus.h> 32 33#include <dev/pci/pcivar.h> 34 35#include "nvme_private.h" 36 37static void _nvme_qpair_submit_request(struct nvme_qpair *qpair, 38 struct nvme_request *req); 39 40static boolean_t 41nvme_completion_is_retry(const struct nvme_completion *cpl) 42{ 43 /* 44 * TODO: spec is not clear how commands that are aborted due 45 * to TLER will be marked. So for now, it seems 46 * NAMESPACE_NOT_READY is the only case where we should 47 * look at the DNR bit. 48 */ 49 switch (cpl->status.sct) { 50 case NVME_SCT_GENERIC: 51 switch (cpl->status.sc) { 52 case NVME_SC_ABORTED_BY_REQUEST: 53 return (1); 54 case NVME_SC_NAMESPACE_NOT_READY: 55 if (cpl->status.dnr) 56 return (0); 57 else 58 return (1); 59 case NVME_SC_INVALID_OPCODE: 60 case NVME_SC_INVALID_FIELD: 61 case NVME_SC_COMMAND_ID_CONFLICT: 62 case NVME_SC_DATA_TRANSFER_ERROR: 63 case NVME_SC_ABORTED_POWER_LOSS: 64 case NVME_SC_INTERNAL_DEVICE_ERROR: 65 case NVME_SC_ABORTED_SQ_DELETION: 66 case NVME_SC_ABORTED_FAILED_FUSED: 67 case NVME_SC_ABORTED_MISSING_FUSED: 68 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 69 case NVME_SC_COMMAND_SEQUENCE_ERROR: 70 case NVME_SC_LBA_OUT_OF_RANGE: 71 case NVME_SC_CAPACITY_EXCEEDED: 72 default: 73 return (0); 74 } 75 case NVME_SCT_COMMAND_SPECIFIC: 76 case NVME_SCT_MEDIA_ERROR: 77 case NVME_SCT_VENDOR_SPECIFIC: 78 default: 79 return (0); 80 } 81} 82 83static void 84nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, 85 uint16_t cid) 86{ 87 88 bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map); 89 bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); 90 91 bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, 92 sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); 93 94 callout_init(&tr->timer, 1); 95 tr->cid = cid; 96 tr->qpair = qpair; 97} 98 99static void 100nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, 101 struct nvme_completion *cpl, boolean_t print_on_error) 102{ 103 struct nvme_request *req; 104 boolean_t retry, error; 105 106 req = tr->req; 107 error = nvme_completion_is_error(cpl); 108 retry = error && nvme_completion_is_retry(cpl) && 109 req->retries < nvme_retry_count; 110 111 if (error && print_on_error) { 112 nvme_dump_completion(cpl); 113 nvme_dump_command(&req->cmd); 114 } 115 116 qpair->act_tr[cpl->cid] = NULL; 117 118 KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n")); 119 120 if (req->cb_fn && !retry) 121 req->cb_fn(req->cb_arg, cpl); 122 123 mtx_lock(&qpair->lock); 124 callout_stop(&tr->timer); 125 126 if (retry) { 127 req->retries++; 128 nvme_qpair_submit_tracker(qpair, tr); 129 } else { 130 if (req->payload_size > 0 || req->uio != NULL) 131 bus_dmamap_unload(qpair->dma_tag, 132 tr->payload_dma_map); 133 134 nvme_free_request(req); 135 tr->req = NULL; 136 137 TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq); 138 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); 139 140 /* 141 * If the controller is in the middle of resetting, don't 142 * try to submit queued requests here - let the reset logic 143 * handle that instead. 144 */ 145 if (!STAILQ_EMPTY(&qpair->queued_req) && 146 !qpair->ctrlr->is_resetting) { 147 req = STAILQ_FIRST(&qpair->queued_req); 148 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 149 _nvme_qpair_submit_request(qpair, req); 150 } 151 } 152 153 mtx_unlock(&qpair->lock); 154} 155 156static void 157nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, 158 struct nvme_tracker *tr, uint32_t sct, uint32_t sc, 159 boolean_t print_on_error) 160{ 161 struct nvme_completion cpl; 162 163 memset(&cpl, 0, sizeof(cpl)); 164 cpl.sqid = qpair->id; 165 cpl.cid = tr->cid; 166 cpl.status.sct = sct; 167 cpl.status.sc = sc; 168 nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); 169} 170 171void 172nvme_qpair_process_completions(struct nvme_qpair *qpair) 173{ 174 struct nvme_tracker *tr; 175 struct nvme_completion *cpl; 176 177 qpair->num_intr_handler_calls++; 178 179 if (!qpair->is_enabled) 180 /* 181 * qpair is not enabled, likely because a controller reset is 182 * is in progress. Ignore the interrupt - any I/O that was 183 * associated with this interrupt will get retried when the 184 * reset is complete. 185 */ 186 return; 187 188 while (1) { 189 cpl = &qpair->cpl[qpair->cq_head]; 190 191 if (cpl->status.p != qpair->phase) 192 break; 193 194 tr = qpair->act_tr[cpl->cid]; 195 196 if (tr != NULL) { 197 nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE); 198 qpair->sq_head = cpl->sqhd; 199 } else { 200 printf("cpl does not map to outstanding cmd\n"); 201 nvme_dump_completion(cpl); 202 KASSERT(0, ("received completion for unknown cmd\n")); 203 } 204 205 if (++qpair->cq_head == qpair->num_entries) { 206 qpair->cq_head = 0; 207 qpair->phase = !qpair->phase; 208 } 209 210 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, 211 qpair->cq_head); 212 } 213} 214 215static void 216nvme_qpair_msix_handler(void *arg) 217{ 218 struct nvme_qpair *qpair = arg; 219 220 nvme_qpair_process_completions(qpair); 221} 222 223void 224nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, 225 uint16_t vector, uint32_t num_entries, uint32_t num_trackers, 226 uint32_t max_xfer_size, struct nvme_controller *ctrlr) 227{ 228 struct nvme_tracker *tr; 229 uint32_t i; 230 231 qpair->id = id; 232 qpair->vector = vector; 233 qpair->num_entries = num_entries; 234#ifdef CHATHAM2 235 /* 236 * Chatham prototype board starts having issues at higher queue 237 * depths. So use a conservative estimate here of no more than 64 238 * outstanding I/O per queue at any one point. 239 */ 240 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 241 num_trackers = min(num_trackers, 64); 242#endif 243 qpair->num_trackers = num_trackers; 244 qpair->max_xfer_size = max_xfer_size; 245 qpair->ctrlr = ctrlr; 246 247 if (ctrlr->msix_enabled) { 248 249 /* 250 * MSI-X vector resource IDs start at 1, so we add one to 251 * the queue's vector to get the corresponding rid to use. 252 */ 253 qpair->rid = vector + 1; 254 255 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 256 &qpair->rid, RF_ACTIVE); 257 258 bus_setup_intr(ctrlr->dev, qpair->res, 259 INTR_TYPE_MISC | INTR_MPSAFE, NULL, 260 nvme_qpair_msix_handler, qpair, &qpair->tag); 261 } 262 263 mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); 264 265 bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 266 sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, 267 BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, 268 (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, 269 NULL, NULL, &qpair->dma_tag); 270 271 qpair->num_cmds = 0; 272 qpair->num_intr_handler_calls = 0; 273 274 /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ 275 qpair->cmd = contigmalloc(qpair->num_entries * 276 sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 277 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 278 qpair->cpl = contigmalloc(qpair->num_entries * 279 sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 280 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 281 282 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); 283 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); 284 285 bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, 286 qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), 287 nvme_single_map, &qpair->cmd_bus_addr, 0); 288 bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, 289 qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), 290 nvme_single_map, &qpair->cpl_bus_addr, 0); 291 292 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); 293 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); 294 295 TAILQ_INIT(&qpair->free_tr); 296 TAILQ_INIT(&qpair->outstanding_tr); 297 STAILQ_INIT(&qpair->queued_req); 298 299 for (i = 0; i < qpair->num_trackers; i++) { 300 tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT); 301 302 if (tr == NULL) { 303 printf("warning: nvme tracker malloc failed\n"); 304 break; 305 } 306 307 nvme_qpair_construct_tracker(qpair, tr, i); 308 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); 309 } 310 311 qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, 312 M_NVME, M_ZERO | M_NOWAIT); 313} 314 315static void 316nvme_qpair_destroy(struct nvme_qpair *qpair) 317{ 318 struct nvme_tracker *tr; 319 320 if (qpair->tag) 321 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); 322 323 if (qpair->res) 324 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, 325 rman_get_rid(qpair->res), qpair->res); 326 327 if (qpair->cmd) { 328 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 329 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 330 contigfree(qpair->cmd, 331 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 332 } 333 334 if (qpair->cpl) { 335 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 336 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 337 contigfree(qpair->cpl, 338 qpair->num_entries * sizeof(struct nvme_completion), 339 M_NVME); 340 } 341 342 if (qpair->dma_tag) 343 bus_dma_tag_destroy(qpair->dma_tag); 344 345 if (qpair->act_tr) 346 free(qpair->act_tr, M_NVME); 347 348 while (!TAILQ_EMPTY(&qpair->free_tr)) { 349 tr = TAILQ_FIRST(&qpair->free_tr); 350 TAILQ_REMOVE(&qpair->free_tr, tr, tailq); 351 bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); 352 bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map); 353 free(tr, M_NVME); 354 } 355} 356 357static void 358nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) 359{ 360 struct nvme_tracker *tr; 361 362 tr = TAILQ_FIRST(&qpair->outstanding_tr); 363 while (tr != NULL) { 364 if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { 365 nvme_qpair_manual_complete_tracker(qpair, tr, 366 NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 367 FALSE); 368 tr = TAILQ_FIRST(&qpair->outstanding_tr); 369 } else { 370 tr = TAILQ_NEXT(tr, tailq); 371 } 372 } 373} 374 375void 376nvme_admin_qpair_destroy(struct nvme_qpair *qpair) 377{ 378 379 nvme_admin_qpair_abort_aers(qpair); 380 nvme_qpair_destroy(qpair); 381} 382 383void 384nvme_io_qpair_destroy(struct nvme_qpair *qpair) 385{ 386 387 nvme_qpair_destroy(qpair); 388} 389 390static void 391nvme_abort_complete(void *arg, const struct nvme_completion *status) 392{ 393 struct nvme_tracker *tr = arg; 394 395 /* 396 * If cdw0 == 1, the controller was not able to abort the command 397 * we requested. We still need to check the active tracker array, 398 * to cover race where I/O timed out at same time controller was 399 * completing the I/O. 400 */ 401 if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) { 402 /* 403 * An I/O has timed out, and the controller was unable to 404 * abort it for some reason. Construct a fake completion 405 * status, and then complete the I/O's tracker manually. 406 */ 407 printf("abort command failed, aborting command manually\n"); 408 nvme_qpair_manual_complete_tracker(tr->qpair, tr, 409 NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE); 410 } 411} 412 413static void 414nvme_timeout(void *arg) 415{ 416 struct nvme_tracker *tr = arg; 417 struct nvme_qpair *qpair = tr->qpair; 418 struct nvme_controller *ctrlr = qpair->ctrlr; 419 union csts_register csts; 420 421 /* Read csts to get value of cfs - controller fatal status. */ 422 csts.raw = nvme_mmio_read_4(ctrlr, csts); 423 424 if (ctrlr->enable_aborts && csts.bits.cfs == 0) { 425 /* 426 * If aborts are enabled, only use them if the controller is 427 * not reporting fatal status. 428 */ 429 nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, 430 nvme_abort_complete, tr); 431 } else 432 nvme_ctrlr_reset(ctrlr); 433} 434 435void 436nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) 437{ 438 struct nvme_request *req; 439 struct nvme_controller *ctrlr; 440 441 mtx_assert(&qpair->lock, MA_OWNED); 442 443 req = tr->req; 444 req->cmd.cid = tr->cid; 445 qpair->act_tr[tr->cid] = tr; 446 ctrlr = qpair->ctrlr; 447 448 if (req->timeout) 449#if __FreeBSD_version >= 800030 450 callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz, 451 nvme_timeout, tr); 452#else 453 callout_reset(&tr->timer, ctrlr->timeout_period * hz, 454 nvme_timeout, tr); 455#endif 456 457 /* Copy the command from the tracker to the submission queue. */ 458 memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); 459 460 if (++qpair->sq_tail == qpair->num_entries) 461 qpair->sq_tail = 0; 462 463 wmb(); 464 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, 465 qpair->sq_tail); 466 467 qpair->num_cmds++; 468} 469 470static void 471_nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) 472{ 473 struct nvme_tracker *tr; 474 int err; 475 476 mtx_assert(&qpair->lock, MA_OWNED); 477 478 tr = TAILQ_FIRST(&qpair->free_tr); 479 480 if (tr == NULL || !qpair->is_enabled) { 481 /* 482 * No tracker is available, or the qpair is disabled due to 483 * an in-progress controller-level reset. 484 * 485 * Put the request on the qpair's request queue to be processed 486 * when a tracker frees up via a command completion or when 487 * the controller reset is completed. 488 */ 489 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); 490 return; 491 } 492 493 TAILQ_REMOVE(&qpair->free_tr, tr, tailq); 494 TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq); 495 tr->req = req; 496 497 if (req->uio == NULL) { 498 if (req->payload_size > 0) { 499 err = bus_dmamap_load(tr->qpair->dma_tag, 500 tr->payload_dma_map, req->payload, 501 req->payload_size, 502 nvme_payload_map, tr, 0); 503 if (err != 0) 504 panic("bus_dmamap_load returned non-zero!\n"); 505 } else 506 nvme_qpair_submit_tracker(tr->qpair, tr); 507 } else { 508 err = bus_dmamap_load_uio(tr->qpair->dma_tag, 509 tr->payload_dma_map, req->uio, 510 nvme_payload_map_uio, tr, 0); 511 if (err != 0) 512 panic("bus_dmamap_load returned non-zero!\n"); 513 } 514} 515 516void 517nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) 518{ 519 520 mtx_lock(&qpair->lock); 521 _nvme_qpair_submit_request(qpair, req); 522 mtx_unlock(&qpair->lock); 523} 524 525static void 526nvme_qpair_enable(struct nvme_qpair *qpair) 527{ 528 529 qpair->is_enabled = TRUE; 530} 531 532void 533nvme_qpair_reset(struct nvme_qpair *qpair) 534{ 535 536 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; 537 538 /* 539 * First time through the completion queue, HW will set phase 540 * bit on completions to 1. So set this to 1 here, indicating 541 * we're looking for a 1 to know which entries have completed. 542 * we'll toggle the bit each time when the completion queue 543 * rolls over. 544 */ 545 qpair->phase = 1; 546 547 memset(qpair->cmd, 0, 548 qpair->num_entries * sizeof(struct nvme_command)); 549 memset(qpair->cpl, 0, 550 qpair->num_entries * sizeof(struct nvme_completion)); 551} 552 553void 554nvme_admin_qpair_enable(struct nvme_qpair *qpair) 555{ 556 557 nvme_qpair_enable(qpair); 558} 559 560void 561nvme_io_qpair_enable(struct nvme_qpair *qpair) 562{ 563 STAILQ_HEAD(, nvme_request) temp; 564 struct nvme_tracker *tr; 565 struct nvme_tracker *tr_temp; 566 struct nvme_request *req; 567 568 /* 569 * Manually abort each outstanding I/O. This normally results in a 570 * retry, unless the retry count on the associated request has 571 * reached its limit. 572 */ 573 TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { 574 device_printf(qpair->ctrlr->dev, 575 "aborting outstanding i/o\n"); 576 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, 577 NVME_SC_ABORTED_BY_REQUEST, TRUE); 578 } 579 580 mtx_lock(&qpair->lock); 581 582 nvme_qpair_enable(qpair); 583 584 STAILQ_INIT(&temp); 585 STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request); 586 587 while (!STAILQ_EMPTY(&temp)) { 588 req = STAILQ_FIRST(&temp); 589 STAILQ_REMOVE_HEAD(&temp, stailq); 590 device_printf(qpair->ctrlr->dev, 591 "resubmitting queued i/o\n"); 592 nvme_dump_command(&req->cmd); 593 _nvme_qpair_submit_request(qpair, req); 594 } 595 596 mtx_unlock(&qpair->lock); 597} 598 599static void 600nvme_qpair_disable(struct nvme_qpair *qpair) 601{ 602 struct nvme_tracker *tr; 603 604 qpair->is_enabled = FALSE; 605 mtx_lock(&qpair->lock); 606 TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) 607 callout_stop(&tr->timer); 608 mtx_unlock(&qpair->lock); 609} 610 611void 612nvme_admin_qpair_disable(struct nvme_qpair *qpair) 613{ 614 615 nvme_qpair_disable(qpair); 616 nvme_admin_qpair_abort_aers(qpair); 617} 618 619void 620nvme_io_qpair_disable(struct nvme_qpair *qpair) 621{ 622 623 nvme_qpair_disable(qpair); 624} 625