nvme_qpair.c revision 248741
1/*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 248741 2013-03-26 18:45:16Z jimharris $"); 29 30#include <sys/param.h> 31#include <sys/bus.h> 32 33#include <dev/pci/pcivar.h> 34 35#include "nvme_private.h" 36 37static void _nvme_qpair_submit_request(struct nvme_qpair *qpair, 38 struct nvme_request *req); 39 40static boolean_t 41nvme_completion_is_error(struct nvme_completion *cpl) 42{ 43 44 return (cpl->sf_sc != 0 || cpl->sf_sct != 0); 45} 46 47static boolean_t 48nvme_completion_is_retry(const struct nvme_completion *cpl) 49{ 50 /* 51 * TODO: spec is not clear how commands that are aborted due 52 * to TLER will be marked. So for now, it seems 53 * NAMESPACE_NOT_READY is the only case where we should 54 * look at the DNR bit. 55 */ 56 switch (cpl->sf_sct) { 57 case NVME_SCT_GENERIC: 58 switch (cpl->sf_sc) { 59 case NVME_SC_ABORTED_BY_REQUEST: 60 return (1); 61 case NVME_SC_NAMESPACE_NOT_READY: 62 if (cpl->sf_dnr) 63 return (0); 64 else 65 return (1); 66 case NVME_SC_INVALID_OPCODE: 67 case NVME_SC_INVALID_FIELD: 68 case NVME_SC_COMMAND_ID_CONFLICT: 69 case NVME_SC_DATA_TRANSFER_ERROR: 70 case NVME_SC_ABORTED_POWER_LOSS: 71 case NVME_SC_INTERNAL_DEVICE_ERROR: 72 case NVME_SC_ABORTED_SQ_DELETION: 73 case NVME_SC_ABORTED_FAILED_FUSED: 74 case NVME_SC_ABORTED_MISSING_FUSED: 75 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 76 case NVME_SC_COMMAND_SEQUENCE_ERROR: 77 case NVME_SC_LBA_OUT_OF_RANGE: 78 case NVME_SC_CAPACITY_EXCEEDED: 79 default: 80 return (0); 81 } 82 case NVME_SCT_COMMAND_SPECIFIC: 83 case NVME_SCT_MEDIA_ERROR: 84 case NVME_SCT_VENDOR_SPECIFIC: 85 default: 86 return (0); 87 } 88} 89 90static struct nvme_tracker * 91nvme_qpair_find_tracker(struct nvme_qpair *qpair, struct nvme_request *req) 92{ 93 struct nvme_tracker *tr; 94 uint32_t i; 95 96 KASSERT(req != NULL, ("%s: called with NULL req\n", __func__)); 97 98 for (i = 0; i < qpair->num_entries; ++i) { 99 tr = qpair->act_tr[i]; 100 if (tr != NULL && tr->req == req) 101 return (tr); 102 } 103 104 return (NULL); 105} 106 107static void 108nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, 109 uint16_t cid) 110{ 111 112 bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map); 113 bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); 114 115 bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, 116 sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); 117 118 callout_init_mtx(&tr->timer, &qpair->lock, 0); 119 tr->cid = cid; 120 tr->qpair = qpair; 121} 122 123static void 124nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, 125 struct nvme_completion *cpl, boolean_t print_on_error) 126{ 127 struct nvme_request *req; 128 boolean_t retry, error; 129 130 req = tr->req; 131 error = nvme_completion_is_error(cpl); 132 retry = error && nvme_completion_is_retry(cpl); 133 134 if (error && print_on_error) { 135 nvme_dump_completion(cpl); 136 nvme_dump_command(&req->cmd); 137 } 138 139 qpair->act_tr[cpl->cid] = NULL; 140 141 KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n")); 142 143 if (req->cb_fn && !retry) 144 req->cb_fn(req->cb_arg, cpl); 145 146 mtx_lock(&qpair->lock); 147 callout_stop(&tr->timer); 148 149 if (retry) 150 nvme_qpair_submit_cmd(qpair, tr); 151 else { 152 if (req->payload_size > 0 || req->uio != NULL) 153 bus_dmamap_unload(qpair->dma_tag, 154 tr->payload_dma_map); 155 156 nvme_free_request(req); 157 tr->req = NULL; 158 159 TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq); 160 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); 161 162 if (!STAILQ_EMPTY(&qpair->queued_req)) { 163 req = STAILQ_FIRST(&qpair->queued_req); 164 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); 165 _nvme_qpair_submit_request(qpair, req); 166 } 167 } 168 169 mtx_unlock(&qpair->lock); 170} 171 172void 173nvme_qpair_process_completions(struct nvme_qpair *qpair) 174{ 175 struct nvme_tracker *tr; 176 struct nvme_completion *cpl; 177 178 qpair->num_intr_handler_calls++; 179 180 while (1) { 181 cpl = &qpair->cpl[qpair->cq_head]; 182 183 if (cpl->p != qpair->phase) 184 break; 185 186 tr = qpair->act_tr[cpl->cid]; 187 188 if (tr != NULL) { 189 nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE); 190 qpair->sq_head = cpl->sqhd; 191 } else { 192 printf("cpl does not map to outstanding cmd\n"); 193 nvme_dump_completion(cpl); 194 KASSERT(0, ("received completion for unknown cmd\n")); 195 } 196 197 if (++qpair->cq_head == qpair->num_entries) { 198 qpair->cq_head = 0; 199 qpair->phase = !qpair->phase; 200 } 201 202 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, 203 qpair->cq_head); 204 } 205} 206 207static void 208nvme_qpair_msix_handler(void *arg) 209{ 210 struct nvme_qpair *qpair = arg; 211 212 nvme_qpair_process_completions(qpair); 213} 214 215void 216nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, 217 uint16_t vector, uint32_t num_entries, uint32_t num_trackers, 218 uint32_t max_xfer_size, struct nvme_controller *ctrlr) 219{ 220 struct nvme_tracker *tr; 221 uint32_t i; 222 223 qpair->id = id; 224 qpair->vector = vector; 225 qpair->num_entries = num_entries; 226#ifdef CHATHAM2 227 /* 228 * Chatham prototype board starts having issues at higher queue 229 * depths. So use a conservative estimate here of no more than 64 230 * outstanding I/O per queue at any one point. 231 */ 232 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 233 num_trackers = min(num_trackers, 64); 234#endif 235 qpair->num_trackers = num_trackers; 236 qpair->max_xfer_size = max_xfer_size; 237 qpair->ctrlr = ctrlr; 238 239 /* 240 * First time through the completion queue, HW will set phase 241 * bit on completions to 1. So set this to 1 here, indicating 242 * we're looking for a 1 to know which entries have completed. 243 * we'll toggle the bit each time when the completion queue 244 * rolls over. 245 */ 246 qpair->phase = 1; 247 248 if (ctrlr->msix_enabled) { 249 250 /* 251 * MSI-X vector resource IDs start at 1, so we add one to 252 * the queue's vector to get the corresponding rid to use. 253 */ 254 qpair->rid = vector + 1; 255 256 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 257 &qpair->rid, RF_ACTIVE); 258 259 bus_setup_intr(ctrlr->dev, qpair->res, 260 INTR_TYPE_MISC | INTR_MPSAFE, NULL, 261 nvme_qpair_msix_handler, qpair, &qpair->tag); 262 } 263 264 mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); 265 266 bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 267 sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, 268 BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, 269 (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, 270 NULL, NULL, &qpair->dma_tag); 271 272 qpair->num_cmds = 0; 273 qpair->num_intr_handler_calls = 0; 274 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; 275 276 /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ 277 qpair->cmd = contigmalloc(qpair->num_entries * 278 sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 279 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 280 qpair->cpl = contigmalloc(qpair->num_entries * 281 sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 282 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 283 284 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); 285 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); 286 287 bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, 288 qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), 289 nvme_single_map, &qpair->cmd_bus_addr, 0); 290 bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, 291 qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), 292 nvme_single_map, &qpair->cpl_bus_addr, 0); 293 294 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); 295 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); 296 297 TAILQ_INIT(&qpair->free_tr); 298 TAILQ_INIT(&qpair->outstanding_tr); 299 STAILQ_INIT(&qpair->queued_req); 300 301 for (i = 0; i < qpair->num_trackers; i++) { 302 tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT); 303 304 if (tr == NULL) { 305 printf("warning: nvme tracker malloc failed\n"); 306 break; 307 } 308 309 nvme_qpair_construct_tracker(qpair, tr, i); 310 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); 311 } 312 313 qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, 314 M_NVME, M_ZERO | M_NOWAIT); 315} 316 317static void 318nvme_qpair_destroy(struct nvme_qpair *qpair) 319{ 320 struct nvme_tracker *tr; 321 322 if (qpair->tag) 323 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); 324 325 if (qpair->res) 326 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, 327 rman_get_rid(qpair->res), qpair->res); 328 329 if (qpair->dma_tag) 330 bus_dma_tag_destroy(qpair->dma_tag); 331 332 if (qpair->act_tr) 333 free(qpair->act_tr, M_NVME); 334 335 while (!TAILQ_EMPTY(&qpair->free_tr)) { 336 tr = TAILQ_FIRST(&qpair->free_tr); 337 TAILQ_REMOVE(&qpair->free_tr, tr, tailq); 338 bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); 339 bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map); 340 free(tr, M_NVME); 341 } 342} 343 344void 345nvme_admin_qpair_destroy(struct nvme_qpair *qpair) 346{ 347 348 /* 349 * For NVMe, you don't send delete queue commands for the admin 350 * queue, so we just need to unload and free the cmd and cpl memory. 351 */ 352 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 353 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 354 355 contigfree(qpair->cmd, 356 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 357 358 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 359 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 360 contigfree(qpair->cpl, 361 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 362 363 nvme_qpair_destroy(qpair); 364} 365 366static void 367nvme_free_cmd_ring(void *arg, const struct nvme_completion *status) 368{ 369 struct nvme_qpair *qpair; 370 371 qpair = (struct nvme_qpair *)arg; 372 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 373 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 374 contigfree(qpair->cmd, 375 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 376 qpair->cmd = NULL; 377} 378 379static void 380nvme_free_cpl_ring(void *arg, const struct nvme_completion *status) 381{ 382 struct nvme_qpair *qpair; 383 384 qpair = (struct nvme_qpair *)arg; 385 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 386 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 387 contigfree(qpair->cpl, 388 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 389 qpair->cpl = NULL; 390} 391 392void 393nvme_io_qpair_destroy(struct nvme_qpair *qpair) 394{ 395 struct nvme_controller *ctrlr = qpair->ctrlr; 396 397 if (qpair->num_entries > 0) { 398 399 nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring, 400 qpair); 401 /* Spin until free_cmd_ring sets qpair->cmd to NULL. */ 402 while (qpair->cmd) 403 DELAY(5); 404 405 nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring, 406 qpair); 407 /* Spin until free_cpl_ring sets qpair->cmd to NULL. */ 408 while (qpair->cpl) 409 DELAY(5); 410 411 nvme_qpair_destroy(qpair); 412 } 413} 414 415static void 416nvme_qpair_manual_abort_tracker(struct nvme_qpair *qpair, 417 struct nvme_tracker *tr, uint32_t sct, uint32_t sc, 418 boolean_t print_on_error) 419{ 420 struct nvme_completion cpl; 421 422 memset(&cpl, 0, sizeof(cpl)); 423 cpl.sqid = qpair->id; 424 cpl.cid = tr->cid; 425 cpl.sf_sct = sct; 426 cpl.sf_sc = sc; 427 nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); 428} 429 430void 431nvme_qpair_manual_abort_request(struct nvme_qpair *qpair, 432 struct nvme_request *req, uint32_t sct, uint32_t sc, 433 boolean_t print_on_error) 434{ 435 struct nvme_tracker *tr; 436 437 tr = nvme_qpair_find_tracker(qpair, req); 438 439 if (tr == NULL) { 440 printf("%s: request not found\n", __func__); 441 nvme_dump_command(&req->cmd); 442 return; 443 } 444 445 nvme_qpair_manual_abort_tracker(qpair, tr, sct, sc, print_on_error); 446} 447 448static void 449nvme_abort_complete(void *arg, const struct nvme_completion *status) 450{ 451 struct nvme_tracker *tr = arg; 452 453 /* 454 * If cdw0 == 1, the controller was not able to abort the command 455 * we requested. We still need to check the active tracker array, 456 * to cover race where I/O timed out at same time controller was 457 * completing the I/O. 458 */ 459 if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) { 460 /* 461 * An I/O has timed out, and the controller was unable to 462 * abort it for some reason. Construct a fake completion 463 * status, and then complete the I/O's tracker manually. 464 */ 465 printf("abort command failed, aborting command manually\n"); 466 nvme_qpair_manual_abort_tracker(tr->qpair, tr, 467 NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE); 468 } 469} 470 471static void 472nvme_timeout(void *arg) 473{ 474 struct nvme_tracker *tr = arg; 475 476 nvme_ctrlr_cmd_abort(tr->qpair->ctrlr, tr->cid, tr->qpair->id, 477 nvme_abort_complete, tr); 478} 479 480void 481nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) 482{ 483 struct nvme_request *req; 484 485 req = tr->req; 486 req->cmd.cid = tr->cid; 487 qpair->act_tr[tr->cid] = tr; 488 489 if (req->timeout > 0) 490#if __FreeBSD_version >= 800030 491 callout_reset_curcpu(&tr->timer, req->timeout * hz, 492 nvme_timeout, tr); 493#else 494 callout_reset(&tr->timer, req->timeout * hz, nvme_timeout, tr); 495#endif 496 497 /* Copy the command from the tracker to the submission queue. */ 498 memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); 499 500 if (++qpair->sq_tail == qpair->num_entries) 501 qpair->sq_tail = 0; 502 503 wmb(); 504 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, 505 qpair->sq_tail); 506 507 qpair->num_cmds++; 508} 509 510static void 511_nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) 512{ 513 struct nvme_tracker *tr; 514 int err; 515 516 mtx_assert(&qpair->lock, MA_OWNED); 517 518 tr = TAILQ_FIRST(&qpair->free_tr); 519 520 if (tr == NULL) { 521 /* 522 * No tracker is available. Put the request on the qpair's 523 * request queue to be processed when a tracker frees up 524 * via a command completion. 525 */ 526 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); 527 return; 528 } 529 530 TAILQ_REMOVE(&qpair->free_tr, tr, tailq); 531 TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq); 532 tr->req = req; 533 534 if (req->uio == NULL) { 535 if (req->payload_size > 0) { 536 err = bus_dmamap_load(tr->qpair->dma_tag, 537 tr->payload_dma_map, req->payload, 538 req->payload_size, 539 nvme_payload_map, tr, 0); 540 if (err != 0) 541 panic("bus_dmamap_load returned non-zero!\n"); 542 } else 543 nvme_qpair_submit_cmd(tr->qpair, tr); 544 } else { 545 err = bus_dmamap_load_uio(tr->qpair->dma_tag, 546 tr->payload_dma_map, req->uio, 547 nvme_payload_map_uio, tr, 0); 548 if (err != 0) 549 panic("bus_dmamap_load returned non-zero!\n"); 550 } 551} 552 553void 554nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) 555{ 556 557 mtx_lock(&qpair->lock); 558 _nvme_qpair_submit_request(qpair, req); 559 mtx_unlock(&qpair->lock); 560} 561