nvme_qpair.c revision 241661
1/*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 241661 2012-10-18 00:40:40Z jimharris $"); 29 30#include <sys/param.h> 31#include <sys/bus.h> 32 33#include "nvme_private.h" 34 35static boolean_t 36nvme_completion_check_retry(const struct nvme_completion *cpl) 37{ 38 /* 39 * TODO: spec is not clear how commands that are aborted due 40 * to TLER will be marked. So for now, it seems 41 * NAMESPACE_NOT_READY is the only case where we should 42 * look at the DNR bit. 43 */ 44 switch (cpl->sf_sct) { 45 case NVME_SCT_GENERIC: 46 switch (cpl->sf_sc) { 47 case NVME_SC_NAMESPACE_NOT_READY: 48 if (cpl->sf_dnr) 49 return (0); 50 else 51 return (1); 52 case NVME_SC_INVALID_OPCODE: 53 case NVME_SC_INVALID_FIELD: 54 case NVME_SC_COMMAND_ID_CONFLICT: 55 case NVME_SC_DATA_TRANSFER_ERROR: 56 case NVME_SC_ABORTED_POWER_LOSS: 57 case NVME_SC_INTERNAL_DEVICE_ERROR: 58 case NVME_SC_ABORTED_BY_REQUEST: 59 case NVME_SC_ABORTED_SQ_DELETION: 60 case NVME_SC_ABORTED_FAILED_FUSED: 61 case NVME_SC_ABORTED_MISSING_FUSED: 62 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 63 case NVME_SC_COMMAND_SEQUENCE_ERROR: 64 case NVME_SC_LBA_OUT_OF_RANGE: 65 case NVME_SC_CAPACITY_EXCEEDED: 66 default: 67 return (0); 68 } 69 case NVME_SCT_COMMAND_SPECIFIC: 70 case NVME_SCT_MEDIA_ERROR: 71 case NVME_SCT_VENDOR_SPECIFIC: 72 default: 73 return (0); 74 } 75} 76 77struct nvme_tracker * 78nvme_qpair_allocate_tracker(struct nvme_qpair *qpair) 79{ 80 struct nvme_tracker *tr; 81 82 mtx_lock(&qpair->lock); 83 84 tr = SLIST_FIRST(&qpair->free_tr); 85 if (tr == NULL) { 86 /* 87 * We can't support more trackers than we have entries in 88 * our queue, because it would generate invalid indices 89 * into the qpair's active tracker array. 90 */ 91 if (qpair->num_tr == qpair->num_entries) { 92 mtx_unlock(&qpair->lock); 93 return (NULL); 94 } 95 96 tr = malloc(sizeof(struct nvme_tracker), M_NVME, 97 M_ZERO | M_NOWAIT); 98 99 if (tr == NULL) { 100 mtx_unlock(&qpair->lock); 101 return (NULL); 102 } 103 104 bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map); 105 bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); 106 107 bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, 108 sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); 109 110 callout_init_mtx(&tr->timer, &qpair->lock, 0); 111 tr->cid = qpair->num_tr++; 112 tr->qpair = qpair; 113 } else 114 SLIST_REMOVE_HEAD(&qpair->free_tr, slist); 115 116 return (tr); 117} 118 119void 120nvme_qpair_process_completions(struct nvme_qpair *qpair) 121{ 122 struct nvme_tracker *tr; 123 struct nvme_request *req; 124 struct nvme_completion *cpl; 125 boolean_t retry, error; 126 127 qpair->num_intr_handler_calls++; 128 129 while (1) { 130 cpl = &qpair->cpl[qpair->cq_head]; 131 132 if (cpl->p != qpair->phase) 133 break; 134 135 tr = qpair->act_tr[cpl->cid]; 136 req = tr->req; 137 138 KASSERT(tr, 139 ("completion queue has entries but no active trackers\n")); 140 141 error = cpl->sf_sc || cpl->sf_sct; 142 retry = error && nvme_completion_check_retry(cpl); 143 144 if (error) { 145 nvme_dump_completion(cpl); 146 nvme_dump_command(&tr->req->cmd); 147 } 148 149 qpair->act_tr[cpl->cid] = NULL; 150 151 KASSERT(cpl->cid == tr->cmd.cid, 152 ("cpl cid does not match cmd cid\n")); 153 154 if (req->cb_fn && !retry) 155 req->cb_fn(req->cb_arg, cpl); 156 157 qpair->sq_head = cpl->sqhd; 158 159 mtx_lock(&qpair->lock); 160 callout_stop(&tr->timer); 161 162 if (retry) 163 /* nvme_qpair_submit_cmd() will release the lock. */ 164 nvme_qpair_submit_cmd(qpair, tr); 165 else { 166 if (req->payload_size > 0 || req->uio != NULL) 167 bus_dmamap_unload(qpair->dma_tag, 168 tr->payload_dma_map); 169 170 nvme_free_request(req); 171 SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); 172 173 mtx_unlock(&qpair->lock); 174 } 175 176 if (++qpair->cq_head == qpair->num_entries) { 177 qpair->cq_head = 0; 178 qpair->phase = !qpair->phase; 179 } 180 181 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, 182 qpair->cq_head); 183 } 184} 185 186static void 187nvme_qpair_msix_handler(void *arg) 188{ 189 struct nvme_qpair *qpair = arg; 190 191 nvme_qpair_process_completions(qpair); 192} 193 194void 195nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, 196 uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, 197 struct nvme_controller *ctrlr) 198{ 199 200 qpair->id = id; 201 qpair->vector = vector; 202 qpair->num_entries = num_entries; 203 qpair->max_xfer_size = max_xfer_size; 204 qpair->ctrlr = ctrlr; 205 206 /* 207 * First time through the completion queue, HW will set phase 208 * bit on completions to 1. So set this to 1 here, indicating 209 * we're looking for a 1 to know which entries have completed. 210 * we'll toggle the bit each time when the completion queue 211 * rolls over. 212 */ 213 qpair->phase = 1; 214 215 if (ctrlr->msix_enabled) { 216 217 /* 218 * MSI-X vector resource IDs start at 1, so we add one to 219 * the queue's vector to get the corresponding rid to use. 220 */ 221 qpair->rid = vector + 1; 222 223 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 224 &qpair->rid, RF_ACTIVE); 225 226 bus_setup_intr(ctrlr->dev, qpair->res, 227 INTR_TYPE_MISC | INTR_MPSAFE, NULL, 228 nvme_qpair_msix_handler, qpair, &qpair->tag); 229 } 230 231 mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); 232 233 bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 234 sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, 235 BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, 236 (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, 237 NULL, NULL, &qpair->dma_tag); 238 239 qpair->num_cmds = 0; 240 qpair->num_intr_handler_calls = 0; 241 qpair->num_tr = 0; 242 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; 243 244 /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ 245 qpair->cmd = contigmalloc(qpair->num_entries * 246 sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 247 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 248 qpair->cpl = contigmalloc(qpair->num_entries * 249 sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 250 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 251 252 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); 253 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); 254 255 bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, 256 qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), 257 nvme_single_map, &qpair->cmd_bus_addr, 0); 258 bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, 259 qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), 260 nvme_single_map, &qpair->cpl_bus_addr, 0); 261 262 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); 263 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); 264 265 SLIST_INIT(&qpair->free_tr); 266 267 qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, 268 M_NVME, M_ZERO | M_NOWAIT); 269} 270 271static void 272nvme_qpair_destroy(struct nvme_qpair *qpair) 273{ 274 struct nvme_tracker *tr; 275 276 if (qpair->tag) 277 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); 278 279 if (qpair->res) 280 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, 281 rman_get_rid(qpair->res), qpair->res); 282 283 if (qpair->dma_tag) 284 bus_dma_tag_destroy(qpair->dma_tag); 285 286 if (qpair->act_tr) 287 free(qpair->act_tr, M_NVME); 288 289 while (!SLIST_EMPTY(&qpair->free_tr)) { 290 tr = SLIST_FIRST(&qpair->free_tr); 291 SLIST_REMOVE_HEAD(&qpair->free_tr, slist); 292 bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); 293 bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map); 294 free(tr, M_NVME); 295 } 296} 297 298void 299nvme_admin_qpair_destroy(struct nvme_qpair *qpair) 300{ 301 302 /* 303 * For NVMe, you don't send delete queue commands for the admin 304 * queue, so we just need to unload and free the cmd and cpl memory. 305 */ 306 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 307 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 308 309 contigfree(qpair->cmd, 310 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 311 312 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 313 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 314 contigfree(qpair->cpl, 315 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 316 317 nvme_qpair_destroy(qpair); 318} 319 320static void 321nvme_free_cmd_ring(void *arg, const struct nvme_completion *status) 322{ 323 struct nvme_qpair *qpair; 324 325 qpair = (struct nvme_qpair *)arg; 326 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 327 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 328 contigfree(qpair->cmd, 329 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 330 qpair->cmd = NULL; 331} 332 333static void 334nvme_free_cpl_ring(void *arg, const struct nvme_completion *status) 335{ 336 struct nvme_qpair *qpair; 337 338 qpair = (struct nvme_qpair *)arg; 339 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 340 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 341 contigfree(qpair->cpl, 342 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 343 qpair->cpl = NULL; 344} 345 346void 347nvme_io_qpair_destroy(struct nvme_qpair *qpair) 348{ 349 struct nvme_controller *ctrlr = qpair->ctrlr; 350 351 if (qpair->num_entries > 0) { 352 353 nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring, 354 qpair); 355 /* Spin until free_cmd_ring sets qpair->cmd to NULL. */ 356 while (qpair->cmd) 357 DELAY(5); 358 359 nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring, 360 qpair); 361 /* Spin until free_cpl_ring sets qpair->cmd to NULL. */ 362 while (qpair->cpl) 363 DELAY(5); 364 365 nvme_qpair_destroy(qpair); 366 } 367} 368 369static void 370nvme_timeout(void *arg) 371{ 372 /* 373 * TODO: Add explicit abort operation here, once nvme(4) supports 374 * abort commands. 375 */ 376} 377 378void 379nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) 380{ 381 struct nvme_request *req; 382 383 req = tr->req; 384 req->cmd.cid = tr->cid; 385 qpair->act_tr[tr->cid] = tr; 386 387 /* 388 * TODO: rather than spin until entries free up, put this tracker 389 * on a queue, and submit from the interrupt handler when 390 * entries free up. 391 */ 392 if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) { 393 do { 394 mtx_unlock(&qpair->lock); 395 DELAY(5); 396 mtx_lock(&qpair->lock); 397 } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head); 398 } 399 400 callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr); 401 402 /* Copy the command from the tracker to the submission queue. */ 403 memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); 404 405 if (++qpair->sq_tail == qpair->num_entries) 406 qpair->sq_tail = 0; 407 408 wmb(); 409 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, 410 qpair->sq_tail); 411 412 qpair->num_cmds++; 413 414 mtx_unlock(&qpair->lock); 415} 416