nvme_qpair.c revision 241658
1/*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 241658 2012-10-18 00:37:11Z jimharris $"); 29 30#include <sys/param.h> 31#include <sys/bus.h> 32 33#include "nvme_private.h" 34 35static boolean_t 36nvme_completion_check_retry(const struct nvme_completion *cpl) 37{ 38 /* 39 * TODO: spec is not clear how commands that are aborted due 40 * to TLER will be marked. So for now, it seems 41 * NAMESPACE_NOT_READY is the only case where we should 42 * look at the DNR bit. 43 */ 44 switch (cpl->sf_sct) { 45 case NVME_SCT_GENERIC: 46 switch (cpl->sf_sc) { 47 case NVME_SC_NAMESPACE_NOT_READY: 48 if (cpl->sf_dnr) 49 return (0); 50 else 51 return (1); 52 case NVME_SC_INVALID_OPCODE: 53 case NVME_SC_INVALID_FIELD: 54 case NVME_SC_COMMAND_ID_CONFLICT: 55 case NVME_SC_DATA_TRANSFER_ERROR: 56 case NVME_SC_ABORTED_POWER_LOSS: 57 case NVME_SC_INTERNAL_DEVICE_ERROR: 58 case NVME_SC_ABORTED_BY_REQUEST: 59 case NVME_SC_ABORTED_SQ_DELETION: 60 case NVME_SC_ABORTED_FAILED_FUSED: 61 case NVME_SC_ABORTED_MISSING_FUSED: 62 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 63 case NVME_SC_COMMAND_SEQUENCE_ERROR: 64 case NVME_SC_LBA_OUT_OF_RANGE: 65 case NVME_SC_CAPACITY_EXCEEDED: 66 default: 67 return (0); 68 } 69 case NVME_SCT_COMMAND_SPECIFIC: 70 case NVME_SCT_MEDIA_ERROR: 71 case NVME_SCT_VENDOR_SPECIFIC: 72 default: 73 return (0); 74 } 75} 76 77struct nvme_tracker * 78nvme_qpair_allocate_tracker(struct nvme_qpair *qpair) 79{ 80 struct nvme_tracker *tr; 81 82 mtx_lock(&qpair->lock); 83 84 tr = SLIST_FIRST(&qpair->free_tr); 85 if (tr == NULL) { 86 /* 87 * We can't support more trackers than we have entries in 88 * our queue, because it would generate invalid indices 89 * into the qpair's active tracker array. 90 */ 91 if (qpair->num_tr == qpair->num_entries) { 92 mtx_unlock(&qpair->lock); 93 return (NULL); 94 } 95 96 tr = malloc(sizeof(struct nvme_tracker), M_NVME, 97 M_ZERO | M_NOWAIT); 98 99 if (tr == NULL) { 100 mtx_unlock(&qpair->lock); 101 return (NULL); 102 } 103 104 bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map); 105 bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); 106 107 bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, 108 sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); 109 110 callout_init_mtx(&tr->timer, &qpair->lock, 0); 111 tr->cid = qpair->num_tr++; 112 } else 113 SLIST_REMOVE_HEAD(&qpair->free_tr, slist); 114 115 return (tr); 116} 117 118void 119nvme_qpair_process_completions(struct nvme_qpair *qpair) 120{ 121 struct nvme_tracker *tr; 122 struct nvme_completion *cpl; 123 boolean_t retry, error; 124 125 qpair->num_intr_handler_calls++; 126 127 while (1) { 128 cpl = &qpair->cpl[qpair->cq_head]; 129 130 if (cpl->p != qpair->phase) 131 break; 132 133 tr = qpair->act_tr[cpl->cid]; 134 KASSERT(tr, 135 ("completion queue has entries but no active trackers\n")); 136 137 error = cpl->sf_sc || cpl->sf_sct; 138 retry = error && nvme_completion_check_retry(cpl); 139 140 if (error) { 141 nvme_dump_completion(cpl); 142 nvme_dump_command(&tr->cmd); 143 } 144 145 qpair->act_tr[cpl->cid] = NULL; 146 147 KASSERT(cpl->cid == tr->cmd.cid, 148 ("cpl cid does not match cmd cid\n")); 149 150 if (tr->cb_fn && !retry) 151 tr->cb_fn(tr->cb_arg, cpl); 152 153 qpair->sq_head = cpl->sqhd; 154 155 mtx_lock(&qpair->lock); 156 callout_stop(&tr->timer); 157 158 if (retry) 159 /* nvme_qpair_submit_cmd() will release the lock. */ 160 nvme_qpair_submit_cmd(qpair, tr); 161 else { 162 if (tr->payload_size > 0) 163 bus_dmamap_unload(qpair->dma_tag, 164 tr->payload_dma_map); 165 166 SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); 167 168 mtx_unlock(&qpair->lock); 169 } 170 171 if (++qpair->cq_head == qpair->num_entries) { 172 qpair->cq_head = 0; 173 qpair->phase = !qpair->phase; 174 } 175 176 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, 177 qpair->cq_head); 178 } 179} 180 181static void 182nvme_qpair_msix_handler(void *arg) 183{ 184 struct nvme_qpair *qpair = arg; 185 186 nvme_qpair_process_completions(qpair); 187} 188 189void 190nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, 191 uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, 192 struct nvme_controller *ctrlr) 193{ 194 195 qpair->id = id; 196 qpair->vector = vector; 197 qpair->num_entries = num_entries; 198 qpair->max_xfer_size = max_xfer_size; 199 qpair->ctrlr = ctrlr; 200 201 /* 202 * First time through the completion queue, HW will set phase 203 * bit on completions to 1. So set this to 1 here, indicating 204 * we're looking for a 1 to know which entries have completed. 205 * we'll toggle the bit each time when the completion queue 206 * rolls over. 207 */ 208 qpair->phase = 1; 209 210 if (ctrlr->msix_enabled) { 211 212 /* 213 * MSI-X vector resource IDs start at 1, so we add one to 214 * the queue's vector to get the corresponding rid to use. 215 */ 216 qpair->rid = vector + 1; 217 218 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 219 &qpair->rid, RF_ACTIVE); 220 221 bus_setup_intr(ctrlr->dev, qpair->res, 222 INTR_TYPE_MISC | INTR_MPSAFE, NULL, 223 nvme_qpair_msix_handler, qpair, &qpair->tag); 224 } 225 226 mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); 227 228 bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 229 sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, 230 BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, 231 (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, 232 NULL, NULL, &qpair->dma_tag); 233 234 qpair->num_cmds = 0; 235 qpair->num_intr_handler_calls = 0; 236 qpair->num_tr = 0; 237 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; 238 239 /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ 240 qpair->cmd = contigmalloc(qpair->num_entries * 241 sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 242 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 243 qpair->cpl = contigmalloc(qpair->num_entries * 244 sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 245 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 246 247 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); 248 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); 249 250 bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, 251 qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), 252 nvme_single_map, &qpair->cmd_bus_addr, 0); 253 bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, 254 qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), 255 nvme_single_map, &qpair->cpl_bus_addr, 0); 256 257 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); 258 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); 259 260 SLIST_INIT(&qpair->free_tr); 261 262 qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, 263 M_NVME, M_ZERO | M_NOWAIT); 264} 265 266static void 267nvme_qpair_destroy(struct nvme_qpair *qpair) 268{ 269 struct nvme_tracker *tr; 270 271 if (qpair->tag) 272 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); 273 274 if (qpair->res) 275 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, 276 rman_get_rid(qpair->res), qpair->res); 277 278 if (qpair->dma_tag) 279 bus_dma_tag_destroy(qpair->dma_tag); 280 281 if (qpair->act_tr) 282 free(qpair->act_tr, M_NVME); 283 284 while (!SLIST_EMPTY(&qpair->free_tr)) { 285 tr = SLIST_FIRST(&qpair->free_tr); 286 SLIST_REMOVE_HEAD(&qpair->free_tr, slist); 287 bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); 288 bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map); 289 free(tr, M_NVME); 290 } 291} 292 293void 294nvme_admin_qpair_destroy(struct nvme_qpair *qpair) 295{ 296 297 /* 298 * For NVMe, you don't send delete queue commands for the admin 299 * queue, so we just need to unload and free the cmd and cpl memory. 300 */ 301 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 302 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 303 304 contigfree(qpair->cmd, 305 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 306 307 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 308 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 309 contigfree(qpair->cpl, 310 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 311 312 nvme_qpair_destroy(qpair); 313} 314 315static void 316nvme_free_cmd_ring(void *arg, const struct nvme_completion *status) 317{ 318 struct nvme_qpair *qpair; 319 320 qpair = (struct nvme_qpair *)arg; 321 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 322 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 323 contigfree(qpair->cmd, 324 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 325 qpair->cmd = NULL; 326} 327 328static void 329nvme_free_cpl_ring(void *arg, const struct nvme_completion *status) 330{ 331 struct nvme_qpair *qpair; 332 333 qpair = (struct nvme_qpair *)arg; 334 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 335 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 336 contigfree(qpair->cpl, 337 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 338 qpair->cpl = NULL; 339} 340 341void 342nvme_io_qpair_destroy(struct nvme_qpair *qpair) 343{ 344 struct nvme_controller *ctrlr = qpair->ctrlr; 345 346 if (qpair->num_entries > 0) { 347 348 nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring, 349 qpair); 350 /* Spin until free_cmd_ring sets qpair->cmd to NULL. */ 351 while (qpair->cmd) 352 DELAY(5); 353 354 nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring, 355 qpair); 356 /* Spin until free_cpl_ring sets qpair->cmd to NULL. */ 357 while (qpair->cpl) 358 DELAY(5); 359 360 nvme_qpair_destroy(qpair); 361 } 362} 363 364static void 365nvme_timeout(void *arg) 366{ 367 /* 368 * TODO: Add explicit abort operation here, once nvme(4) supports 369 * abort commands. 370 */ 371} 372 373void 374nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) 375{ 376 377 tr->cmd.cid = tr->cid; 378 qpair->act_tr[tr->cid] = tr; 379 380 /* 381 * TODO: rather than spin until entries free up, put this tracker 382 * on a queue, and submit from the interrupt handler when 383 * entries free up. 384 */ 385 if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) { 386 do { 387 mtx_unlock(&qpair->lock); 388 DELAY(5); 389 mtx_lock(&qpair->lock); 390 } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head); 391 } 392 393 callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr); 394 395 /* Copy the command from the tracker to the submission queue. */ 396 memcpy(&qpair->cmd[qpair->sq_tail], &tr->cmd, sizeof(tr->cmd)); 397 398 if (++qpair->sq_tail == qpair->num_entries) 399 qpair->sq_tail = 0; 400 401 wmb(); 402 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, 403 qpair->sq_tail); 404 405 qpair->num_cmds++; 406 407 mtx_unlock(&qpair->lock); 408} 409