1/* $NetBSD: nvme.c,v 1.69 2024/03/11 21:10:46 riastradh Exp $ */ 2/* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4/* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20#include <sys/cdefs.h> 21__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.69 2024/03/11 21:10:46 riastradh Exp $"); 22 23#include <sys/param.h> 24#include <sys/systm.h> 25#include <sys/kernel.h> 26#include <sys/atomic.h> 27#include <sys/bus.h> 28#include <sys/buf.h> 29#include <sys/conf.h> 30#include <sys/device.h> 31#include <sys/kmem.h> 32#include <sys/once.h> 33#include <sys/proc.h> 34#include <sys/queue.h> 35#include <sys/mutex.h> 36 37#include <uvm/uvm_extern.h> 38 39#include <dev/ic/nvmereg.h> 40#include <dev/ic/nvmevar.h> 41#include <dev/ic/nvmeio.h> 42 43#include "ioconf.h" 44#include "locators.h" 45 46#define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */ 47 48int nvme_adminq_size = 32; 49int nvme_ioq_size = 1024; 50 51static int nvme_print(void *, const char *); 52 53static int nvme_ready(struct nvme_softc *, uint32_t); 54static int nvme_enable(struct nvme_softc *, u_int); 55static int nvme_disable(struct nvme_softc *); 56static int nvme_shutdown(struct nvme_softc *); 57 58uint32_t nvme_op_sq_enter(struct nvme_softc *, 59 struct nvme_queue *, struct nvme_ccb *); 60void nvme_op_sq_leave(struct nvme_softc *, 61 struct nvme_queue *, struct nvme_ccb *); 62uint32_t nvme_op_sq_enter_locked(struct nvme_softc *, 63 struct nvme_queue *, struct nvme_ccb *); 64void nvme_op_sq_leave_locked(struct nvme_softc *, 65 struct nvme_queue *, struct nvme_ccb *); 66 67void nvme_op_cq_done(struct nvme_softc *, 68 struct nvme_queue *, struct nvme_ccb *); 69 70static const struct nvme_ops nvme_ops = { 71 .op_sq_enter = nvme_op_sq_enter, 72 .op_sq_leave = nvme_op_sq_leave, 73 .op_sq_enter_locked = nvme_op_sq_enter_locked, 74 .op_sq_leave_locked = nvme_op_sq_leave_locked, 75 76 .op_cq_done = nvme_op_cq_done, 77}; 78 79#ifdef NVME_DEBUG 80static void nvme_dumpregs(struct nvme_softc *); 81#endif 82static int nvme_identify(struct nvme_softc *, u_int); 83static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 84 void *); 85 86static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 87static void nvme_ccbs_free(struct nvme_queue *); 88 89static struct nvme_ccb * 90 nvme_ccb_get(struct nvme_queue *, bool); 91static struct nvme_ccb * 92 nvme_ccb_get_bio(struct nvme_softc *, struct buf *, 93 struct nvme_queue **); 94static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 95 96static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 97 struct nvme_ccb *, void (*)(struct nvme_queue *, 98 struct nvme_ccb *, void *), int); 99static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 100static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 101 struct nvme_cqe *); 102static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 103static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 104 struct nvme_cqe *); 105 106static struct nvme_queue * 107 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 108static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 109static void nvme_q_reset(struct nvme_softc *, struct nvme_queue *); 110static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 111static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 112 struct nvme_ccb *, void (*)(struct nvme_queue *, 113 struct nvme_ccb *, void *)); 114static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 115static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 116static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, 117 bool (*)(void *), void *); 118 119static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 120 void *); 121static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 122 struct nvme_cqe *); 123static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 124 void *); 125static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 126 struct nvme_cqe *); 127static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *, 128 void *); 129static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *, 130 struct nvme_cqe *); 131 132static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 133 void *); 134static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 135 struct nvme_cqe *); 136static int nvme_command_passthrough(struct nvme_softc *, 137 struct nvme_pt_command *, uint32_t, struct lwp *, bool); 138 139static int nvme_set_number_of_queues(struct nvme_softc *, u_int, u_int *, 140 u_int *); 141 142#define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 143#define NVME_TIMO_IDENT 10 /* probe identify timeout */ 144#define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 145#define NVME_TIMO_SY 60 /* sync cache timeout */ 146 147/* 148 * Some controllers, at least Apple NVMe, always require split 149 * transfers, so don't use bus_space_{read,write}_8() on LP64. 150 */ 151uint64_t 152nvme_read8(struct nvme_softc *sc, bus_size_t r) 153{ 154 uint64_t v; 155 uint32_t *a = (uint32_t *)&v; 156 157#if _BYTE_ORDER == _LITTLE_ENDIAN 158 a[0] = nvme_read4(sc, r); 159 a[1] = nvme_read4(sc, r + 4); 160#else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 161 a[1] = nvme_read4(sc, r); 162 a[0] = nvme_read4(sc, r + 4); 163#endif 164 165 return v; 166} 167 168void 169nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 170{ 171 uint32_t *a = (uint32_t *)&v; 172 173#if _BYTE_ORDER == _LITTLE_ENDIAN 174 nvme_write4(sc, r, a[0]); 175 nvme_write4(sc, r + 4, a[1]); 176#else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 177 nvme_write4(sc, r, a[1]); 178 nvme_write4(sc, r + 4, a[0]); 179#endif 180} 181 182#ifdef NVME_DEBUG 183static __used void 184nvme_dumpregs(struct nvme_softc *sc) 185{ 186 uint64_t r8; 187 uint32_t r4; 188 189#define DEVNAME(_sc) device_xname((_sc)->sc_dev) 190 r8 = nvme_read8(sc, NVME_CAP); 191 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 192 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 193 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 194 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 195 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 196 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 197 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 198 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 199 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 200 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 201 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 202 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 203 204 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 205 206 r4 = nvme_read4(sc, NVME_CC); 207 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 208 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 209 (1 << NVME_CC_IOCQES_R(r4))); 210 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 211 (1 << NVME_CC_IOSQES_R(r4))); 212 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 213 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 214 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 215 (1 << NVME_CC_MPS_R(r4))); 216 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 217 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 218 219 r4 = nvme_read4(sc, NVME_CSTS); 220 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 221 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 222 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 223 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 224 225 r4 = nvme_read4(sc, NVME_AQA); 226 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 227 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 228 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 229 230 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 231 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 232#undef DEVNAME 233} 234#endif /* NVME_DEBUG */ 235 236static int 237nvme_ready(struct nvme_softc *sc, uint32_t rdy) 238{ 239 u_int i = 0; 240 241 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 242 if (i++ > sc->sc_rdy_to) 243 return ENXIO; 244 245 delay(1000); 246 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 247 } 248 249 return 0; 250} 251 252static int 253nvme_enable(struct nvme_softc *sc, u_int mps) 254{ 255 uint32_t cc, csts; 256 int error; 257 258 cc = nvme_read4(sc, NVME_CC); 259 csts = nvme_read4(sc, NVME_CSTS); 260 261 /* 262 * See note in nvme_disable. Short circuit if we're already enabled. 263 */ 264 if (ISSET(cc, NVME_CC_EN)) { 265 if (ISSET(csts, NVME_CSTS_RDY)) 266 return 0; 267 268 goto waitready; 269 } else { 270 /* EN == 0 already wait for RDY == 0 or fail */ 271 error = nvme_ready(sc, 0); 272 if (error) 273 return error; 274 } 275 276 if (sc->sc_ops->op_enable != NULL) 277 sc->sc_ops->op_enable(sc); 278 279 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 280 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 281 delay(5000); 282 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 283 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 284 delay(5000); 285 286 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 287 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 288 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 289 delay(5000); 290 291 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 292 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 293 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 294 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 295 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 296 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 297 SET(cc, NVME_CC_MPS(mps)); 298 SET(cc, NVME_CC_EN); 299 300 nvme_write4(sc, NVME_CC, cc); 301 nvme_barrier(sc, 0, sc->sc_ios, 302 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 303 304 waitready: 305 return nvme_ready(sc, NVME_CSTS_RDY); 306} 307 308static int 309nvme_disable(struct nvme_softc *sc) 310{ 311 uint32_t cc, csts; 312 int error; 313 314 cc = nvme_read4(sc, NVME_CC); 315 csts = nvme_read4(sc, NVME_CSTS); 316 317 /* 318 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 319 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when 320 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY 321 * isn't the desired value. Short circuit if we're already disabled. 322 */ 323 if (ISSET(cc, NVME_CC_EN)) { 324 if (!ISSET(csts, NVME_CSTS_RDY)) { 325 /* EN == 1, wait for RDY == 1 or fail */ 326 error = nvme_ready(sc, NVME_CSTS_RDY); 327 if (error) 328 return error; 329 } 330 } else { 331 /* EN == 0 already wait for RDY == 0 */ 332 if (!ISSET(csts, NVME_CSTS_RDY)) 333 return 0; 334 335 goto waitready; 336 } 337 338 CLR(cc, NVME_CC_EN); 339 nvme_write4(sc, NVME_CC, cc); 340 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 341 342 /* 343 * Some drives have issues with accessing the mmio after we disable, 344 * so delay for a bit after we write the bit to cope with these issues. 345 */ 346 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY)) 347 delay(B4_CHK_RDY_DELAY_MS); 348 349 waitready: 350 return nvme_ready(sc, 0); 351} 352 353int 354nvme_attach(struct nvme_softc *sc) 355{ 356 uint64_t cap; 357 uint32_t reg; 358 u_int mps = PAGE_SHIFT; 359 u_int ncq, nsq; 360 uint16_t adminq_entries = nvme_adminq_size; 361 uint16_t ioq_entries = nvme_ioq_size; 362 int i; 363 364 if (sc->sc_ops == NULL) 365 sc->sc_ops = &nvme_ops; 366 367 reg = nvme_read4(sc, NVME_VS); 368 if (reg == 0xffffffff) { 369 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 370 return 1; 371 } 372 373 if (NVME_VS_TER(reg) == 0) 374 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg), 375 NVME_VS_MNR(reg)); 376 else 377 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg), 378 NVME_VS_MNR(reg), NVME_VS_TER(reg)); 379 380 cap = nvme_read8(sc, NVME_CAP); 381 sc->sc_dstrd = NVME_CAP_DSTRD(cap); 382 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 383 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 384 "is greater than CPU page size %u\n", 385 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 386 return 1; 387 } 388 if (NVME_CAP_MPSMAX(cap) < mps) 389 mps = NVME_CAP_MPSMAX(cap); 390 if (ioq_entries > NVME_CAP_MQES(cap)) 391 ioq_entries = NVME_CAP_MQES(cap); 392 393 /* set initial values to be used for admin queue during probe */ 394 sc->sc_rdy_to = NVME_CAP_TO(cap); 395 sc->sc_mps = 1 << mps; 396 sc->sc_mdts = MAXPHYS; 397 sc->sc_max_sgl = btoc(round_page(sc->sc_mdts)); 398 399 if (nvme_disable(sc) != 0) { 400 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 401 return 1; 402 } 403 404 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, 405 sc->sc_dstrd); 406 if (sc->sc_admin_q == NULL) { 407 aprint_error_dev(sc->sc_dev, 408 "unable to allocate admin queue\n"); 409 return 1; 410 } 411 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 412 goto free_admin_q; 413 414 if (nvme_enable(sc, mps) != 0) { 415 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 416 goto disestablish_admin_q; 417 } 418 419 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 420 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 421 goto disable; 422 } 423 if (sc->sc_nn == 0) { 424 aprint_error_dev(sc->sc_dev, "namespace not found\n"); 425 goto disable; 426 } 427 428 /* we know how big things are now */ 429 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 430 431 /* reallocate ccbs of admin queue with new max sgl. */ 432 nvme_ccbs_free(sc->sc_admin_q); 433 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 434 435 if (sc->sc_use_mq) { 436 /* Limit the number of queues to the number allocated in HW */ 437 if (nvme_set_number_of_queues(sc, sc->sc_nq, &ncq, &nsq) != 0) { 438 aprint_error_dev(sc->sc_dev, 439 "unable to get number of queues\n"); 440 goto disable; 441 } 442 if (sc->sc_nq > ncq) 443 sc->sc_nq = ncq; 444 if (sc->sc_nq > nsq) 445 sc->sc_nq = nsq; 446 } 447 448 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 449 for (i = 0; i < sc->sc_nq; i++) { 450 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, 451 sc->sc_dstrd); 452 if (sc->sc_q[i] == NULL) { 453 aprint_error_dev(sc->sc_dev, 454 "unable to allocate io queue\n"); 455 goto free_q; 456 } 457 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 458 aprint_error_dev(sc->sc_dev, 459 "unable to create io queue\n"); 460 nvme_q_free(sc, sc->sc_q[i]); 461 goto free_q; 462 } 463 } 464 465 if (!sc->sc_use_mq) 466 nvme_write4(sc, NVME_INTMC, 1); 467 468 /* probe subdevices */ 469 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 470 KM_SLEEP); 471 nvme_rescan(sc->sc_dev, NULL, NULL); 472 473 return 0; 474 475free_q: 476 while (--i >= 0) { 477 nvme_q_delete(sc, sc->sc_q[i]); 478 nvme_q_free(sc, sc->sc_q[i]); 479 } 480disable: 481 nvme_disable(sc); 482disestablish_admin_q: 483 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 484free_admin_q: 485 nvme_q_free(sc, sc->sc_admin_q); 486 487 return 1; 488} 489 490int 491nvme_rescan(device_t self, const char *ifattr, const int *locs) 492{ 493 struct nvme_softc *sc = device_private(self); 494 struct nvme_attach_args naa; 495 struct nvm_namespace_format *f; 496 struct nvme_namespace *ns; 497 uint64_t cap; 498 int ioq_entries = nvme_ioq_size; 499 int i, mlocs[NVMECF_NLOCS]; 500 int error; 501 502 cap = nvme_read8(sc, NVME_CAP); 503 if (ioq_entries > NVME_CAP_MQES(cap)) 504 ioq_entries = NVME_CAP_MQES(cap); 505 506 for (i = 1; i <= sc->sc_nn; i++) { 507 if (sc->sc_namespaces[i - 1].dev) 508 continue; 509 510 /* identify to check for availability */ 511 error = nvme_ns_identify(sc, i); 512 if (error) { 513 aprint_error_dev(self, "couldn't identify namespace #%d\n", i); 514 continue; 515 } 516 517 ns = nvme_ns_get(sc, i); 518 KASSERT(ns); 519 520 f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)]; 521 522 /* 523 * NVME1.0e 6.11 Identify command 524 * 525 * LBADS values smaller than 9 are not supported, a value 526 * of zero means that the format is not used. 527 */ 528 if (f->lbads < 9) { 529 if (f->lbads > 0) 530 aprint_error_dev(self, 531 "unsupported logical data size %u\n", f->lbads); 532 continue; 533 } 534 535 mlocs[NVMECF_NSID] = i; 536 537 memset(&naa, 0, sizeof(naa)); 538 naa.naa_nsid = i; 539 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 540 naa.naa_maxphys = sc->sc_mdts; 541 naa.naa_typename = sc->sc_modelname; 542 sc->sc_namespaces[i - 1].dev = 543 config_found(sc->sc_dev, &naa, nvme_print, 544 CFARGS(.submatch = config_stdsubmatch, 545 .locators = mlocs)); 546 } 547 return 0; 548} 549 550static int 551nvme_print(void *aux, const char *pnp) 552{ 553 struct nvme_attach_args *naa = aux; 554 555 if (pnp) 556 aprint_normal("ld at %s", pnp); 557 558 if (naa->naa_nsid > 0) 559 aprint_normal(" nsid %d", naa->naa_nsid); 560 561 return UNCONF; 562} 563 564int 565nvme_detach(struct nvme_softc *sc, int flags) 566{ 567 int i, error; 568 569 error = config_detach_children(sc->sc_dev, flags); 570 if (error) 571 return error; 572 573 error = nvme_shutdown(sc); 574 if (error) 575 return error; 576 577 /* from now on we are committed to detach, following will never fail */ 578 for (i = 0; i < sc->sc_nq; i++) 579 nvme_q_free(sc, sc->sc_q[i]); 580 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 581 nvme_q_free(sc, sc->sc_admin_q); 582 583 return 0; 584} 585 586int 587nvme_suspend(struct nvme_softc *sc) 588{ 589 590 return nvme_shutdown(sc); 591} 592 593int 594nvme_resume(struct nvme_softc *sc) 595{ 596 int i, error; 597 598 error = nvme_disable(sc); 599 if (error) { 600 device_printf(sc->sc_dev, "unable to disable controller\n"); 601 return error; 602 } 603 604 nvme_q_reset(sc, sc->sc_admin_q); 605 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) { 606 error = EIO; 607 device_printf(sc->sc_dev, "unable to establish admin q\n"); 608 goto disable; 609 } 610 611 error = nvme_enable(sc, ffs(sc->sc_mps) - 1); 612 if (error) { 613 device_printf(sc->sc_dev, "unable to enable controller\n"); 614 return error; 615 } 616 617 for (i = 0; i < sc->sc_nq; i++) { 618 nvme_q_reset(sc, sc->sc_q[i]); 619 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 620 error = EIO; 621 device_printf(sc->sc_dev, "unable to create io q %d" 622 "\n", i); 623 goto disable; 624 } 625 } 626 627 if (!sc->sc_use_mq) 628 nvme_write4(sc, NVME_INTMC, 1); 629 630 return 0; 631 632disable: 633 (void)nvme_disable(sc); 634 635 return error; 636} 637 638static int 639nvme_shutdown(struct nvme_softc *sc) 640{ 641 uint32_t cc, csts; 642 bool disabled = false; 643 int i; 644 645 if (!sc->sc_use_mq) 646 nvme_write4(sc, NVME_INTMS, 1); 647 648 for (i = 0; i < sc->sc_nq; i++) { 649 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 650 aprint_error_dev(sc->sc_dev, 651 "unable to delete io queue %d, disabling\n", i + 1); 652 disabled = true; 653 } 654 } 655 if (disabled) 656 goto disable; 657 658 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 659 660 cc = nvme_read4(sc, NVME_CC); 661 CLR(cc, NVME_CC_SHN_MASK); 662 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 663 nvme_write4(sc, NVME_CC, cc); 664 665 for (i = 0; i < 4000; i++) { 666 nvme_barrier(sc, 0, sc->sc_ios, 667 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 668 csts = nvme_read4(sc, NVME_CSTS); 669 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 670 return 0; 671 672 delay(1000); 673 } 674 675 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 676 677disable: 678 nvme_disable(sc); 679 return 0; 680} 681 682void 683nvme_childdet(device_t self, device_t child) 684{ 685 struct nvme_softc *sc = device_private(self); 686 int i; 687 688 for (i = 0; i < sc->sc_nn; i++) { 689 if (sc->sc_namespaces[i].dev == child) { 690 /* Already freed ns->ident. */ 691 sc->sc_namespaces[i].dev = NULL; 692 break; 693 } 694 } 695} 696 697int 698nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 699{ 700 struct nvme_sqe sqe; 701 struct nvm_identify_namespace *identify; 702 struct nvme_dmamem *mem; 703 struct nvme_ccb *ccb; 704 struct nvme_namespace *ns; 705 int rv; 706 707 KASSERT(nsid > 0); 708 709 ns = nvme_ns_get(sc, nsid); 710 KASSERT(ns); 711 712 if (ns->ident != NULL) 713 return 0; 714 715 ccb = nvme_ccb_get(sc->sc_admin_q, false); 716 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 717 718 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 719 if (mem == NULL) { 720 nvme_ccb_put(sc->sc_admin_q, ccb); 721 return ENOMEM; 722 } 723 724 memset(&sqe, 0, sizeof(sqe)); 725 sqe.opcode = NVM_ADMIN_IDENTIFY; 726 htolem32(&sqe.nsid, nsid); 727 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 728 htolem32(&sqe.cdw10, 0); 729 730 ccb->ccb_done = nvme_empty_done; 731 ccb->ccb_cookie = &sqe; 732 733 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 734 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 735 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 736 737 nvme_ccb_put(sc->sc_admin_q, ccb); 738 739 if (rv != 0) { 740 rv = EIO; 741 goto done; 742 } 743 744 /* commit */ 745 746 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 747 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 748 749 /* Convert data to host endian */ 750 nvme_identify_namespace_swapbytes(identify); 751 752 ns->ident = identify; 753 754done: 755 nvme_dmamem_free(sc, mem); 756 757 return rv; 758} 759 760int 761nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 762 struct buf *bp, void *data, size_t datasize, 763 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 764{ 765 struct nvme_queue *q; 766 struct nvme_ccb *ccb; 767 bus_dmamap_t dmap; 768 int i, error; 769 770 ccb = nvme_ccb_get_bio(sc, bp, &q); 771 if (ccb == NULL) 772 return EAGAIN; 773 774 ccb->ccb_done = nvme_ns_io_done; 775 ccb->ccb_cookie = cookie; 776 777 /* namespace context */ 778 ccb->nnc_nsid = nsid; 779 ccb->nnc_flags = flags; 780 ccb->nnc_buf = bp; 781 ccb->nnc_datasize = datasize; 782 ccb->nnc_secsize = secsize; 783 ccb->nnc_blkno = blkno; 784 ccb->nnc_done = nnc_done; 785 786 dmap = ccb->ccb_dmamap; 787 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 788 datasize, NULL, 789 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 790 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 791 (ISSET(flags, NVME_NS_CTX_F_READ) ? 792 BUS_DMA_READ : BUS_DMA_WRITE)); 793 if (error) { 794 nvme_ccb_put(q, ccb); 795 return error; 796 } 797 798 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 799 ISSET(flags, NVME_NS_CTX_F_READ) ? 800 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 801 802 if (dmap->dm_nsegs > 2) { 803 for (i = 1; i < dmap->dm_nsegs; i++) { 804 htolem64(&ccb->ccb_prpl[i - 1], 805 dmap->dm_segs[i].ds_addr); 806 } 807 bus_dmamap_sync(sc->sc_dmat, 808 NVME_DMA_MAP(q->q_ccb_prpls), 809 ccb->ccb_prpl_off, 810 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 811 BUS_DMASYNC_PREWRITE); 812 } 813 814 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 815 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 816 return EIO; 817 return 0; 818 } 819 820 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 821 return 0; 822} 823 824static void 825nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 826{ 827 struct nvme_sqe_io *sqe = slot; 828 bus_dmamap_t dmap = ccb->ccb_dmamap; 829 830 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 831 NVM_CMD_READ : NVM_CMD_WRITE; 832 htolem32(&sqe->nsid, ccb->nnc_nsid); 833 834 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 835 switch (dmap->dm_nsegs) { 836 case 1: 837 break; 838 case 2: 839 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 840 break; 841 default: 842 /* the prp list is already set up and synced */ 843 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 844 break; 845 } 846 847 htolem64(&sqe->slba, ccb->nnc_blkno); 848 849 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) 850 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); 851 852 /* guaranteed by upper layers, but check just in case */ 853 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 854 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 855} 856 857static void 858nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 859 struct nvme_cqe *cqe) 860{ 861 struct nvme_softc *sc = q->q_sc; 862 bus_dmamap_t dmap = ccb->ccb_dmamap; 863 void *nnc_cookie = ccb->ccb_cookie; 864 nvme_nnc_done nnc_done = ccb->nnc_done; 865 struct buf *bp = ccb->nnc_buf; 866 867 if (dmap->dm_nsegs > 2) { 868 bus_dmamap_sync(sc->sc_dmat, 869 NVME_DMA_MAP(q->q_ccb_prpls), 870 ccb->ccb_prpl_off, 871 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 872 BUS_DMASYNC_POSTWRITE); 873 } 874 875 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 876 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 877 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 878 879 bus_dmamap_unload(sc->sc_dmat, dmap); 880 nvme_ccb_put(q, ccb); 881 882 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); 883} 884 885/* 886 * If there is no volatile write cache, it makes no sense to issue 887 * flush commands or query for the status. 888 */ 889static bool 890nvme_has_volatile_write_cache(struct nvme_softc *sc) 891{ 892 /* sc_identify is filled during attachment */ 893 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); 894} 895 896static bool 897nvme_ns_sync_finished(void *cookie) 898{ 899 int *result = cookie; 900 901 return (*result != 0); 902} 903 904int 905nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) 906{ 907 struct nvme_queue *q = nvme_get_q(sc); 908 struct nvme_ccb *ccb; 909 int result = 0; 910 911 if (!nvme_has_volatile_write_cache(sc)) { 912 /* cache not present, no value in trying to flush it */ 913 return 0; 914 } 915 916 ccb = nvme_ccb_get(q, true); 917 KASSERT(ccb != NULL); 918 919 ccb->ccb_done = nvme_ns_sync_done; 920 ccb->ccb_cookie = &result; 921 922 /* namespace context */ 923 ccb->nnc_nsid = nsid; 924 ccb->nnc_flags = flags; 925 ccb->nnc_done = NULL; 926 927 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 928 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 929 return EIO; 930 return 0; 931 } 932 933 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 934 935 /* wait for completion */ 936 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); 937 KASSERT(result != 0); 938 939 return (result > 0) ? 0 : EIO; 940} 941 942static void 943nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 944{ 945 struct nvme_sqe *sqe = slot; 946 947 sqe->opcode = NVM_CMD_FLUSH; 948 htolem32(&sqe->nsid, ccb->nnc_nsid); 949} 950 951static void 952nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 953 struct nvme_cqe *cqe) 954{ 955 int *result = ccb->ccb_cookie; 956 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 957 958 if (status == NVME_CQE_SC_SUCCESS) 959 *result = 1; 960 else 961 *result = -1; 962 963 nvme_ccb_put(q, ccb); 964} 965 966static bool 967nvme_getcache_finished(void *xc) 968{ 969 int *addr = xc; 970 971 return (*addr != 0); 972} 973 974/* 975 * Get status of volatile write cache. Always asynchronous. 976 */ 977int 978nvme_admin_getcache(struct nvme_softc *sc, int *addr) 979{ 980 struct nvme_ccb *ccb; 981 struct nvme_queue *q = sc->sc_admin_q; 982 int result = 0, error; 983 984 if (!nvme_has_volatile_write_cache(sc)) { 985 /* cache simply not present */ 986 *addr = 0; 987 return 0; 988 } 989 990 ccb = nvme_ccb_get(q, true); 991 KASSERT(ccb != NULL); 992 993 ccb->ccb_done = nvme_getcache_done; 994 ccb->ccb_cookie = &result; 995 996 /* namespace context */ 997 ccb->nnc_flags = 0; 998 ccb->nnc_done = NULL; 999 1000 nvme_q_submit(sc, q, ccb, nvme_getcache_fill); 1001 1002 /* wait for completion */ 1003 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); 1004 KASSERT(result != 0); 1005 1006 if (result > 0) { 1007 *addr = result; 1008 error = 0; 1009 } else 1010 error = EINVAL; 1011 1012 return error; 1013} 1014 1015static void 1016nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1017{ 1018 struct nvme_sqe *sqe = slot; 1019 1020 sqe->opcode = NVM_ADMIN_GET_FEATURES; 1021 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1022 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1023} 1024 1025static void 1026nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1027 struct nvme_cqe *cqe) 1028{ 1029 int *addr = ccb->ccb_cookie; 1030 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1031 uint32_t cdw0 = lemtoh32(&cqe->cdw0); 1032 int result; 1033 1034 if (status == NVME_CQE_SC_SUCCESS) { 1035 result = 0; 1036 1037 /* 1038 * DPO not supported, Dataset Management (DSM) field doesn't 1039 * specify the same semantics. FUA is always supported. 1040 */ 1041 result = DKCACHE_FUA; 1042 1043 if (cdw0 & NVM_VOLATILE_WRITE_CACHE_WCE) 1044 result |= DKCACHE_WRITE; 1045 1046 /* 1047 * If volatile write cache is present, the flag shall also be 1048 * settable. 1049 */ 1050 result |= DKCACHE_WCHANGE; 1051 1052 /* 1053 * ONCS field indicates whether the optional SAVE is also 1054 * supported for Set Features. According to spec v1.3, 1055 * Volatile Write Cache however doesn't support persistency 1056 * across power cycle/reset. 1057 */ 1058 1059 } else { 1060 result = -1; 1061 } 1062 1063 *addr = result; 1064 1065 nvme_ccb_put(q, ccb); 1066} 1067 1068struct nvme_setcache_state { 1069 int dkcache; 1070 int result; 1071}; 1072 1073static bool 1074nvme_setcache_finished(void *xc) 1075{ 1076 struct nvme_setcache_state *st = xc; 1077 1078 return (st->result != 0); 1079} 1080 1081static void 1082nvme_setcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1083{ 1084 struct nvme_sqe *sqe = slot; 1085 struct nvme_setcache_state *st = ccb->ccb_cookie; 1086 1087 sqe->opcode = NVM_ADMIN_SET_FEATURES; 1088 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1089 if (st->dkcache & DKCACHE_WRITE) 1090 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1091} 1092 1093static void 1094nvme_setcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1095 struct nvme_cqe *cqe) 1096{ 1097 struct nvme_setcache_state *st = ccb->ccb_cookie; 1098 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1099 1100 if (status == NVME_CQE_SC_SUCCESS) { 1101 st->result = 1; 1102 } else { 1103 st->result = -1; 1104 } 1105 1106 nvme_ccb_put(q, ccb); 1107} 1108 1109/* 1110 * Set status of volatile write cache. Always asynchronous. 1111 */ 1112int 1113nvme_admin_setcache(struct nvme_softc *sc, int dkcache) 1114{ 1115 struct nvme_ccb *ccb; 1116 struct nvme_queue *q = sc->sc_admin_q; 1117 int error; 1118 struct nvme_setcache_state st; 1119 1120 if (!nvme_has_volatile_write_cache(sc)) { 1121 /* cache simply not present */ 1122 return EOPNOTSUPP; 1123 } 1124 1125 if (dkcache & ~(DKCACHE_WRITE)) { 1126 /* unsupported parameters */ 1127 return EOPNOTSUPP; 1128 } 1129 1130 ccb = nvme_ccb_get(q, true); 1131 KASSERT(ccb != NULL); 1132 1133 memset(&st, 0, sizeof(st)); 1134 st.dkcache = dkcache; 1135 1136 ccb->ccb_done = nvme_setcache_done; 1137 ccb->ccb_cookie = &st; 1138 1139 /* namespace context */ 1140 ccb->nnc_flags = 0; 1141 ccb->nnc_done = NULL; 1142 1143 nvme_q_submit(sc, q, ccb, nvme_setcache_fill); 1144 1145 /* wait for completion */ 1146 nvme_q_wait_complete(sc, q, nvme_setcache_finished, &st); 1147 KASSERT(st.result != 0); 1148 1149 if (st.result > 0) 1150 error = 0; 1151 else 1152 error = EINVAL; 1153 1154 return error; 1155} 1156 1157void 1158nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 1159{ 1160 struct nvme_namespace *ns; 1161 struct nvm_identify_namespace *identify; 1162 1163 ns = nvme_ns_get(sc, nsid); 1164 KASSERT(ns); 1165 1166 identify = ns->ident; 1167 ns->ident = NULL; 1168 if (identify != NULL) 1169 kmem_free(identify, sizeof(*identify)); 1170} 1171 1172struct nvme_pt_state { 1173 struct nvme_pt_command *pt; 1174 bool finished; 1175}; 1176 1177static void 1178nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1179{ 1180 struct nvme_softc *sc = q->q_sc; 1181 struct nvme_sqe *sqe = slot; 1182 struct nvme_pt_state *state = ccb->ccb_cookie; 1183 struct nvme_pt_command *pt = state->pt; 1184 bus_dmamap_t dmap = ccb->ccb_dmamap; 1185 int i; 1186 1187 sqe->opcode = pt->cmd.opcode; 1188 htolem32(&sqe->nsid, pt->cmd.nsid); 1189 1190 if (pt->buf != NULL && pt->len > 0) { 1191 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 1192 switch (dmap->dm_nsegs) { 1193 case 1: 1194 break; 1195 case 2: 1196 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 1197 break; 1198 default: 1199 for (i = 1; i < dmap->dm_nsegs; i++) { 1200 htolem64(&ccb->ccb_prpl[i - 1], 1201 dmap->dm_segs[i].ds_addr); 1202 } 1203 bus_dmamap_sync(sc->sc_dmat, 1204 NVME_DMA_MAP(q->q_ccb_prpls), 1205 ccb->ccb_prpl_off, 1206 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1207 BUS_DMASYNC_PREWRITE); 1208 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 1209 break; 1210 } 1211 } 1212 1213 htolem32(&sqe->cdw10, pt->cmd.cdw10); 1214 htolem32(&sqe->cdw11, pt->cmd.cdw11); 1215 htolem32(&sqe->cdw12, pt->cmd.cdw12); 1216 htolem32(&sqe->cdw13, pt->cmd.cdw13); 1217 htolem32(&sqe->cdw14, pt->cmd.cdw14); 1218 htolem32(&sqe->cdw15, pt->cmd.cdw15); 1219} 1220 1221static void 1222nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 1223{ 1224 struct nvme_softc *sc = q->q_sc; 1225 struct nvme_pt_state *state = ccb->ccb_cookie; 1226 struct nvme_pt_command *pt = state->pt; 1227 bus_dmamap_t dmap = ccb->ccb_dmamap; 1228 1229 if (pt->buf != NULL && pt->len > 0) { 1230 if (dmap->dm_nsegs > 2) { 1231 bus_dmamap_sync(sc->sc_dmat, 1232 NVME_DMA_MAP(q->q_ccb_prpls), 1233 ccb->ccb_prpl_off, 1234 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1235 BUS_DMASYNC_POSTWRITE); 1236 } 1237 1238 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 1239 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 1240 bus_dmamap_unload(sc->sc_dmat, dmap); 1241 } 1242 1243 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); 1244 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; 1245 1246 state->finished = true; 1247 1248 nvme_ccb_put(q, ccb); 1249} 1250 1251static bool 1252nvme_pt_finished(void *cookie) 1253{ 1254 struct nvme_pt_state *state = cookie; 1255 1256 return state->finished; 1257} 1258 1259static int 1260nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 1261 uint32_t nsid, struct lwp *l, bool is_adminq) 1262{ 1263 struct nvme_queue *q; 1264 struct nvme_ccb *ccb; 1265 void *buf = NULL; 1266 struct nvme_pt_state state; 1267 int error; 1268 1269 /* limit command size to maximum data transfer size */ 1270 if ((pt->buf == NULL && pt->len > 0) || 1271 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 1272 return EINVAL; 1273 1274 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc); 1275 ccb = nvme_ccb_get(q, true); 1276 KASSERT(ccb != NULL); 1277 1278 if (pt->buf != NULL) { 1279 KASSERT(pt->len > 0); 1280 buf = kmem_alloc(pt->len, KM_SLEEP); 1281 if (!pt->is_read) { 1282 error = copyin(pt->buf, buf, pt->len); 1283 if (error) 1284 goto kmem_free; 1285 } 1286 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 1287 pt->len, NULL, 1288 BUS_DMA_WAITOK | 1289 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 1290 if (error) 1291 goto kmem_free; 1292 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 1293 0, ccb->ccb_dmamap->dm_mapsize, 1294 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 1295 } 1296 1297 memset(&state, 0, sizeof(state)); 1298 state.pt = pt; 1299 state.finished = false; 1300 1301 ccb->ccb_done = nvme_pt_done; 1302 ccb->ccb_cookie = &state; 1303 1304 pt->cmd.nsid = nsid; 1305 1306 nvme_q_submit(sc, q, ccb, nvme_pt_fill); 1307 1308 /* wait for completion */ 1309 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); 1310 KASSERT(state.finished); 1311 1312 error = 0; 1313 1314 if (buf != NULL) { 1315 if (error == 0 && pt->is_read) 1316 error = copyout(buf, pt->buf, pt->len); 1317kmem_free: 1318 kmem_free(buf, pt->len); 1319 } 1320 1321 return error; 1322} 1323 1324uint32_t 1325nvme_op_sq_enter(struct nvme_softc *sc, 1326 struct nvme_queue *q, struct nvme_ccb *ccb) 1327{ 1328 mutex_enter(&q->q_sq_mtx); 1329 1330 return nvme_op_sq_enter_locked(sc, q, ccb); 1331} 1332 1333uint32_t 1334nvme_op_sq_enter_locked(struct nvme_softc *sc, 1335 struct nvme_queue *q, struct nvme_ccb *ccb) 1336{ 1337 return q->q_sq_tail; 1338} 1339 1340void 1341nvme_op_sq_leave_locked(struct nvme_softc *sc, 1342 struct nvme_queue *q, struct nvme_ccb *ccb) 1343{ 1344 uint32_t tail; 1345 1346 tail = ++q->q_sq_tail; 1347 if (tail >= q->q_entries) 1348 tail = 0; 1349 q->q_sq_tail = tail; 1350 nvme_write4(sc, q->q_sqtdbl, tail); 1351} 1352 1353void 1354nvme_op_sq_leave(struct nvme_softc *sc, 1355 struct nvme_queue *q, struct nvme_ccb *ccb) 1356{ 1357 nvme_op_sq_leave_locked(sc, q, ccb); 1358 1359 mutex_exit(&q->q_sq_mtx); 1360} 1361 1362static void 1363nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1364 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 1365{ 1366 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1367 uint32_t tail; 1368 1369 tail = sc->sc_ops->op_sq_enter(sc, q, ccb); 1370 1371 sqe += tail; 1372 1373 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1374 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1375 memset(sqe, 0, sizeof(*sqe)); 1376 (*fill)(q, ccb, sqe); 1377 htolem16(&sqe->cid, ccb->ccb_id); 1378 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1379 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1380 1381 sc->sc_ops->op_sq_leave(sc, q, ccb); 1382} 1383 1384struct nvme_poll_state { 1385 struct nvme_sqe s; 1386 struct nvme_cqe c; 1387 void *cookie; 1388 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 1389}; 1390 1391static int 1392nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1393 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 1394{ 1395 struct nvme_poll_state state; 1396 uint16_t flags; 1397 int step = 10; 1398 int maxloop = timo_sec * 1000000 / step; 1399 int error = 0; 1400 1401 memset(&state, 0, sizeof(state)); 1402 (*fill)(q, ccb, &state.s); 1403 1404 state.done = ccb->ccb_done; 1405 state.cookie = ccb->ccb_cookie; 1406 1407 ccb->ccb_done = nvme_poll_done; 1408 ccb->ccb_cookie = &state; 1409 1410 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1411 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 1412 if (nvme_q_complete(sc, q) == 0) 1413 delay(step); 1414 1415 if (timo_sec >= 0 && --maxloop <= 0) { 1416 error = ETIMEDOUT; 1417 break; 1418 } 1419 } 1420 1421 if (error == 0) { 1422 flags = lemtoh16(&state.c.flags); 1423 return flags & ~NVME_CQE_PHASE; 1424 } else { 1425 /* 1426 * If it succeds later, it would hit ccb which will have been 1427 * already reused for something else. Not good. Cross 1428 * fingers and hope for best. XXX do controller reset? 1429 */ 1430 aprint_error_dev(sc->sc_dev, "polled command timed out\n"); 1431 1432 /* Invoke the callback to clean state anyway */ 1433 struct nvme_cqe cqe; 1434 memset(&cqe, 0, sizeof(cqe)); 1435 ccb->ccb_done(q, ccb, &cqe); 1436 1437 return 1; 1438 } 1439} 1440 1441static void 1442nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1443{ 1444 struct nvme_sqe *sqe = slot; 1445 struct nvme_poll_state *state = ccb->ccb_cookie; 1446 1447 *sqe = state->s; 1448} 1449 1450static void 1451nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1452 struct nvme_cqe *cqe) 1453{ 1454 struct nvme_poll_state *state = ccb->ccb_cookie; 1455 1456 state->c = *cqe; 1457 SET(state->c.flags, htole16(NVME_CQE_PHASE)); 1458 1459 ccb->ccb_cookie = state->cookie; 1460 state->done(q, ccb, &state->c); 1461} 1462 1463static void 1464nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1465{ 1466 struct nvme_sqe *src = ccb->ccb_cookie; 1467 struct nvme_sqe *dst = slot; 1468 1469 *dst = *src; 1470} 1471 1472static void 1473nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1474 struct nvme_cqe *cqe) 1475{ 1476} 1477 1478void 1479nvme_op_cq_done(struct nvme_softc *sc, 1480 struct nvme_queue *q, struct nvme_ccb *ccb) 1481{ 1482 /* nop */ 1483} 1484 1485static int 1486nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1487{ 1488 struct nvme_ccb *ccb; 1489 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1490 uint16_t flags; 1491 int rv = 0; 1492 1493 mutex_enter(&q->q_cq_mtx); 1494 1495 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1496 for (;;) { 1497 cqe = &ring[q->q_cq_head]; 1498 flags = lemtoh16(&cqe->flags); 1499 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1500 break; 1501 1502 /* 1503 * Make sure we have read the flags _before_ we read 1504 * the cid. Otherwise the CPU might speculatively read 1505 * the cid before the entry has been assigned to our 1506 * phase. 1507 */ 1508 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1509 1510 ccb = &q->q_ccbs[lemtoh16(&cqe->cid)]; 1511 1512 if (++q->q_cq_head >= q->q_entries) { 1513 q->q_cq_head = 0; 1514 q->q_cq_phase ^= NVME_CQE_PHASE; 1515 } 1516 1517#ifdef DEBUG 1518 /* 1519 * If we get spurious completion notification, something 1520 * is seriously hosed up. Very likely DMA to some random 1521 * memory place happened, so just bail out. 1522 */ 1523 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1524 panic("%s: invalid ccb detected", 1525 device_xname(sc->sc_dev)); 1526 /* NOTREACHED */ 1527 } 1528#endif 1529 1530 rv++; 1531 1532 sc->sc_ops->op_cq_done(sc, q, ccb); 1533 1534 /* 1535 * Unlock the mutex before calling the ccb_done callback 1536 * and re-lock afterwards. The callback triggers lddone() 1537 * which schedules another i/o, and also calls nvme_ccb_put(). 1538 * Unlock/relock avoids possibility of deadlock. 1539 */ 1540 mutex_exit(&q->q_cq_mtx); 1541 ccb->ccb_done(q, ccb, cqe); 1542 mutex_enter(&q->q_cq_mtx); 1543 } 1544 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1545 1546 if (rv) 1547 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1548 1549 mutex_exit(&q->q_cq_mtx); 1550 1551 return rv; 1552} 1553 1554static void 1555nvme_q_wait_complete(struct nvme_softc *sc, 1556 struct nvme_queue *q, bool (*finished)(void *), void *cookie) 1557{ 1558 mutex_enter(&q->q_ccb_mtx); 1559 if (finished(cookie)) 1560 goto out; 1561 1562 for(;;) { 1563 q->q_ccb_waiting = true; 1564 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1565 1566 if (finished(cookie)) 1567 break; 1568 } 1569 1570out: 1571 mutex_exit(&q->q_ccb_mtx); 1572} 1573 1574static int 1575nvme_identify(struct nvme_softc *sc, u_int mps) 1576{ 1577 char sn[41], mn[81], fr[17]; 1578 struct nvm_identify_controller *identify; 1579 struct nvme_dmamem *mem; 1580 struct nvme_ccb *ccb; 1581 u_int mdts; 1582 int rv = 1; 1583 1584 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1585 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1586 1587 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1588 if (mem == NULL) 1589 return 1; 1590 1591 ccb->ccb_done = nvme_empty_done; 1592 ccb->ccb_cookie = mem; 1593 1594 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1595 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1596 NVME_TIMO_IDENT); 1597 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1598 1599 nvme_ccb_put(sc->sc_admin_q, ccb); 1600 1601 if (rv != 0) 1602 goto done; 1603 1604 identify = NVME_DMA_KVA(mem); 1605 sc->sc_identify = *identify; 1606 identify = NULL; 1607 1608 /* Convert data to host endian */ 1609 nvme_identify_controller_swapbytes(&sc->sc_identify); 1610 1611 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn, 1612 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1613 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn, 1614 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1615 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr, 1616 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1617 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1618 sn); 1619 1620 strlcpy(sc->sc_modelname, mn, sizeof(sc->sc_modelname)); 1621 1622 if (sc->sc_identify.mdts > 0) { 1623 mdts = (1 << sc->sc_identify.mdts) * (1 << mps); 1624 if (mdts < sc->sc_mdts) 1625 sc->sc_mdts = mdts; 1626 } 1627 1628 sc->sc_nn = sc->sc_identify.nn; 1629 1630done: 1631 nvme_dmamem_free(sc, mem); 1632 1633 return rv; 1634} 1635 1636static int 1637nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1638{ 1639 struct nvme_sqe_q sqe; 1640 struct nvme_ccb *ccb; 1641 int rv; 1642 1643 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1644 return 1; 1645 1646 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1647 KASSERT(ccb != NULL); 1648 1649 ccb->ccb_done = nvme_empty_done; 1650 ccb->ccb_cookie = &sqe; 1651 1652 memset(&sqe, 0, sizeof(sqe)); 1653 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1654 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1655 htolem16(&sqe.qsize, q->q_entries - 1); 1656 htolem16(&sqe.qid, q->q_id); 1657 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1658 if (sc->sc_use_mq) 1659 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1660 1661 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1662 if (rv != 0) 1663 goto fail; 1664 1665 ccb->ccb_done = nvme_empty_done; 1666 ccb->ccb_cookie = &sqe; 1667 1668 memset(&sqe, 0, sizeof(sqe)); 1669 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1670 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1671 htolem16(&sqe.qsize, q->q_entries - 1); 1672 htolem16(&sqe.qid, q->q_id); 1673 htolem16(&sqe.cqid, q->q_id); 1674 sqe.qflags = NVM_SQE_Q_PC; 1675 1676 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1677 if (rv != 0) 1678 goto fail; 1679 1680 nvme_ccb_put(sc->sc_admin_q, ccb); 1681 return 0; 1682 1683fail: 1684 if (sc->sc_use_mq) 1685 sc->sc_intr_disestablish(sc, q->q_id); 1686 1687 nvme_ccb_put(sc->sc_admin_q, ccb); 1688 return rv; 1689} 1690 1691static int 1692nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1693{ 1694 struct nvme_sqe_q sqe; 1695 struct nvme_ccb *ccb; 1696 int rv; 1697 1698 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1699 KASSERT(ccb != NULL); 1700 1701 ccb->ccb_done = nvme_empty_done; 1702 ccb->ccb_cookie = &sqe; 1703 1704 memset(&sqe, 0, sizeof(sqe)); 1705 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1706 htolem16(&sqe.qid, q->q_id); 1707 1708 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1709 if (rv != 0) 1710 goto fail; 1711 1712 ccb->ccb_done = nvme_empty_done; 1713 ccb->ccb_cookie = &sqe; 1714 1715 memset(&sqe, 0, sizeof(sqe)); 1716 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1717 htolem16(&sqe.qid, q->q_id); 1718 1719 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1720 if (rv != 0) 1721 goto fail; 1722 1723fail: 1724 nvme_ccb_put(sc->sc_admin_q, ccb); 1725 1726 if (rv == 0 && sc->sc_use_mq) { 1727 if (sc->sc_intr_disestablish(sc, q->q_id)) 1728 rv = 1; 1729 } 1730 1731 return rv; 1732} 1733 1734static void 1735nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1736{ 1737 struct nvme_sqe *sqe = slot; 1738 struct nvme_dmamem *mem = ccb->ccb_cookie; 1739 1740 sqe->opcode = NVM_ADMIN_IDENTIFY; 1741 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1742 htolem32(&sqe->cdw10, 1); 1743} 1744 1745static int 1746nvme_set_number_of_queues(struct nvme_softc *sc, u_int nq, u_int *ncqa, 1747 u_int *nsqa) 1748{ 1749 struct nvme_pt_state state; 1750 struct nvme_pt_command pt; 1751 struct nvme_ccb *ccb; 1752 int rv; 1753 1754 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1755 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1756 1757 memset(&pt, 0, sizeof(pt)); 1758 pt.cmd.opcode = NVM_ADMIN_SET_FEATURES; 1759 pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; 1760 pt.cmd.cdw11 = ((nq - 1) << 16) | (nq - 1); 1761 1762 memset(&state, 0, sizeof(state)); 1763 state.pt = &pt; 1764 state.finished = false; 1765 1766 ccb->ccb_done = nvme_pt_done; 1767 ccb->ccb_cookie = &state; 1768 1769 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); 1770 1771 if (rv != 0) { 1772 *ncqa = *nsqa = 0; 1773 return EIO; 1774 } 1775 1776 *ncqa = (pt.cpl.cdw0 >> 16) + 1; 1777 *nsqa = (pt.cpl.cdw0 & 0xffff) + 1; 1778 1779 return 0; 1780} 1781 1782static int 1783nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1784{ 1785 struct nvme_softc *sc = q->q_sc; 1786 struct nvme_ccb *ccb; 1787 bus_addr_t off; 1788 uint64_t *prpl; 1789 u_int i; 1790 1791 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1792 cv_init(&q->q_ccb_wait, "nvmeqw"); 1793 q->q_ccb_waiting = false; 1794 SIMPLEQ_INIT(&q->q_ccb_list); 1795 1796 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1797 1798 q->q_nccbs = nccbs; 1799 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1800 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1801 1802 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1803 off = 0; 1804 1805 for (i = 0; i < nccbs; i++) { 1806 ccb = &q->q_ccbs[i]; 1807 1808 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1809 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1810 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1811 &ccb->ccb_dmamap) != 0) 1812 goto free_maps; 1813 1814 ccb->ccb_id = i; 1815 ccb->ccb_prpl = prpl; 1816 ccb->ccb_prpl_off = off; 1817 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1818 1819 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1820 1821 prpl += sc->sc_max_sgl; 1822 off += sizeof(*prpl) * sc->sc_max_sgl; 1823 } 1824 1825 return 0; 1826 1827free_maps: 1828 nvme_ccbs_free(q); 1829 return 1; 1830} 1831 1832static struct nvme_ccb * 1833nvme_ccb_get(struct nvme_queue *q, bool wait) 1834{ 1835 struct nvme_ccb *ccb = NULL; 1836 1837 mutex_enter(&q->q_ccb_mtx); 1838again: 1839 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1840 if (ccb != NULL) { 1841 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1842#ifdef DEBUG 1843 ccb->ccb_cookie = NULL; 1844#endif 1845 } else { 1846 if (__predict_false(wait)) { 1847 q->q_ccb_waiting = true; 1848 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1849 goto again; 1850 } 1851 } 1852 mutex_exit(&q->q_ccb_mtx); 1853 1854 return ccb; 1855} 1856 1857static struct nvme_ccb * 1858nvme_ccb_get_bio(struct nvme_softc *sc, struct buf *bp, 1859 struct nvme_queue **selq) 1860{ 1861 u_int cpuindex = cpu_index((bp && bp->b_ci) ? bp->b_ci : curcpu()); 1862 1863 /* 1864 * Find a queue with available ccbs, preferring the originating 1865 * CPU's queue. 1866 */ 1867 1868 for (u_int qoff = 0; qoff < sc->sc_nq; qoff++) { 1869 struct nvme_queue *q = sc->sc_q[(cpuindex + qoff) % sc->sc_nq]; 1870 struct nvme_ccb *ccb; 1871 1872 mutex_enter(&q->q_ccb_mtx); 1873 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1874 if (ccb != NULL) { 1875 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1876#ifdef DEBUG 1877 ccb->ccb_cookie = NULL; 1878#endif 1879 } 1880 mutex_exit(&q->q_ccb_mtx); 1881 1882 if (ccb != NULL) { 1883 *selq = q; 1884 return ccb; 1885 } 1886 } 1887 1888 return NULL; 1889} 1890 1891static void 1892nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1893{ 1894 1895 mutex_enter(&q->q_ccb_mtx); 1896#ifdef DEBUG 1897 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1898#endif 1899 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1900 1901 /* It's unlikely there are any waiters, it's not used for regular I/O */ 1902 if (__predict_false(q->q_ccb_waiting)) { 1903 q->q_ccb_waiting = false; 1904 cv_broadcast(&q->q_ccb_wait); 1905 } 1906 1907 mutex_exit(&q->q_ccb_mtx); 1908} 1909 1910static void 1911nvme_ccbs_free(struct nvme_queue *q) 1912{ 1913 struct nvme_softc *sc = q->q_sc; 1914 struct nvme_ccb *ccb; 1915 1916 mutex_enter(&q->q_ccb_mtx); 1917 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1918 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1919 /* 1920 * bus_dmamap_destroy() may call vm_map_lock() and rw_enter() 1921 * internally. don't hold spin mutex 1922 */ 1923 mutex_exit(&q->q_ccb_mtx); 1924 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1925 mutex_enter(&q->q_ccb_mtx); 1926 } 1927 mutex_exit(&q->q_ccb_mtx); 1928 1929 nvme_dmamem_free(sc, q->q_ccb_prpls); 1930 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1931 q->q_ccbs = NULL; 1932 cv_destroy(&q->q_ccb_wait); 1933 mutex_destroy(&q->q_ccb_mtx); 1934} 1935 1936static struct nvme_queue * 1937nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1938{ 1939 struct nvme_queue *q; 1940 1941 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1942 q->q_sc = sc; 1943 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1944 sizeof(struct nvme_sqe) * entries); 1945 if (q->q_sq_dmamem == NULL) 1946 goto free; 1947 1948 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1949 sizeof(struct nvme_cqe) * entries); 1950 if (q->q_cq_dmamem == NULL) 1951 goto free_sq; 1952 1953 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1954 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1955 1956 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1957 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1958 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1959 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1960 q->q_id = id; 1961 q->q_entries = entries; 1962 q->q_sq_tail = 0; 1963 q->q_cq_head = 0; 1964 q->q_cq_phase = NVME_CQE_PHASE; 1965 1966 if (sc->sc_ops->op_q_alloc != NULL) { 1967 if (sc->sc_ops->op_q_alloc(sc, q) != 0) 1968 goto free_cq; 1969 } 1970 1971 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1972 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1973 1974 /* 1975 * Due to definition of full and empty queue (queue is empty 1976 * when head == tail, full when tail is one less then head), 1977 * we can actually only have (entries - 1) in-flight commands. 1978 */ 1979 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1980 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1981 goto free_cq; 1982 } 1983 1984 return q; 1985 1986free_cq: 1987 nvme_dmamem_free(sc, q->q_cq_dmamem); 1988free_sq: 1989 nvme_dmamem_free(sc, q->q_sq_dmamem); 1990free: 1991 kmem_free(q, sizeof(*q)); 1992 1993 return NULL; 1994} 1995 1996static void 1997nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q) 1998{ 1999 2000 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 2001 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 2002 2003 q->q_sq_tail = 0; 2004 q->q_cq_head = 0; 2005 q->q_cq_phase = NVME_CQE_PHASE; 2006 2007 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 2008 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 2009} 2010 2011static void 2012nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 2013{ 2014 nvme_ccbs_free(q); 2015 mutex_destroy(&q->q_sq_mtx); 2016 mutex_destroy(&q->q_cq_mtx); 2017 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 2018 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 2019 2020 if (sc->sc_ops->op_q_alloc != NULL) 2021 sc->sc_ops->op_q_free(sc, q); 2022 2023 nvme_dmamem_free(sc, q->q_cq_dmamem); 2024 nvme_dmamem_free(sc, q->q_sq_dmamem); 2025 kmem_free(q, sizeof(*q)); 2026} 2027 2028int 2029nvme_intr(void *xsc) 2030{ 2031 struct nvme_softc *sc = xsc; 2032 2033 KASSERT(!sc->sc_use_mq); 2034 2035 /* 2036 * INTx is level triggered, controller deasserts the interrupt only 2037 * when we advance command queue head via write to the doorbell. 2038 * Tell the controller to block the interrupts while we process 2039 * the queue(s). 2040 */ 2041 nvme_write4(sc, NVME_INTMS, 1); 2042 2043 softint_schedule(sc->sc_softih[0]); 2044 2045 /* don't know, might not have been for us */ 2046 return 1; 2047} 2048 2049void 2050nvme_softintr_intx(void *xq) 2051{ 2052 struct nvme_queue *q = xq; 2053 struct nvme_softc *sc = q->q_sc; 2054 2055 KASSERT(!sc->sc_use_mq); 2056 2057 nvme_q_complete(sc, sc->sc_admin_q); 2058 if (sc->sc_q != NULL) 2059 nvme_q_complete(sc, sc->sc_q[0]); 2060 2061 /* 2062 * Processing done, tell controller to issue interrupts again. There 2063 * is no race, as NVMe spec requires the controller to maintain state, 2064 * and assert the interrupt whenever there are unacknowledged 2065 * completion queue entries. 2066 */ 2067 nvme_write4(sc, NVME_INTMC, 1); 2068} 2069 2070int 2071nvme_intr_msi(void *xq) 2072{ 2073 struct nvme_queue *q = xq; 2074 2075 KASSERT(q); 2076 KASSERT(q->q_sc); 2077 KASSERT(q->q_sc->sc_softih); 2078 KASSERT(q->q_sc->sc_softih[q->q_id]); 2079 2080 /* 2081 * MSI/MSI-X are edge triggered, so can handover processing to softint 2082 * without masking the interrupt. 2083 */ 2084 softint_schedule(q->q_sc->sc_softih[q->q_id]); 2085 2086 return 1; 2087} 2088 2089void 2090nvme_softintr_msi(void *xq) 2091{ 2092 struct nvme_queue *q = xq; 2093 struct nvme_softc *sc = q->q_sc; 2094 2095 nvme_q_complete(sc, q); 2096} 2097 2098struct nvme_dmamem * 2099nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 2100{ 2101 struct nvme_dmamem *ndm; 2102 int nsegs; 2103 2104 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 2105 if (ndm == NULL) 2106 return NULL; 2107 2108 ndm->ndm_size = size; 2109 2110 if (bus_dmamap_create(sc->sc_dmat, size, btoc(round_page(size)), size, 0, 2111 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 2112 goto ndmfree; 2113 2114 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 2115 1, &nsegs, BUS_DMA_WAITOK) != 0) 2116 goto destroy; 2117 2118 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 2119 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 2120 goto free; 2121 2122 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 2123 NULL, BUS_DMA_WAITOK) != 0) 2124 goto unmap; 2125 2126 memset(ndm->ndm_kva, 0, size); 2127 bus_dmamap_sync(sc->sc_dmat, ndm->ndm_map, 0, size, BUS_DMASYNC_PREREAD); 2128 2129 return ndm; 2130 2131unmap: 2132 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 2133free: 2134 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 2135destroy: 2136 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 2137ndmfree: 2138 kmem_free(ndm, sizeof(*ndm)); 2139 return NULL; 2140} 2141 2142void 2143nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 2144{ 2145 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 2146 0, NVME_DMA_LEN(mem), ops); 2147} 2148 2149void 2150nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 2151{ 2152 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 2153 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 2154 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 2155 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 2156 kmem_free(ndm, sizeof(*ndm)); 2157} 2158 2159/* 2160 * ioctl 2161 */ 2162 2163dev_type_open(nvmeopen); 2164dev_type_close(nvmeclose); 2165dev_type_ioctl(nvmeioctl); 2166 2167const struct cdevsw nvme_cdevsw = { 2168 .d_open = nvmeopen, 2169 .d_close = nvmeclose, 2170 .d_read = noread, 2171 .d_write = nowrite, 2172 .d_ioctl = nvmeioctl, 2173 .d_stop = nostop, 2174 .d_tty = notty, 2175 .d_poll = nopoll, 2176 .d_mmap = nommap, 2177 .d_kqfilter = nokqfilter, 2178 .d_discard = nodiscard, 2179 .d_flag = D_OTHER, 2180}; 2181 2182/* 2183 * Accept an open operation on the control device. 2184 */ 2185int 2186nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 2187{ 2188 struct nvme_softc *sc; 2189 int unit = minor(dev) / 0x10000; 2190 int nsid = minor(dev) & 0xffff; 2191 int nsidx; 2192 2193 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 2194 return ENXIO; 2195 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 2196 return ENXIO; 2197 2198 if (nsid == 0) { 2199 /* controller */ 2200 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 2201 return EBUSY; 2202 SET(sc->sc_flags, NVME_F_OPEN); 2203 } else { 2204 /* namespace */ 2205 nsidx = nsid - 1; 2206 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 2207 return ENXIO; 2208 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 2209 return EBUSY; 2210 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2211 } 2212 return 0; 2213} 2214 2215/* 2216 * Accept the last close on the control device. 2217 */ 2218int 2219nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 2220{ 2221 struct nvme_softc *sc; 2222 int unit = minor(dev) / 0x10000; 2223 int nsid = minor(dev) & 0xffff; 2224 int nsidx; 2225 2226 sc = device_lookup_private(&nvme_cd, unit); 2227 if (sc == NULL) 2228 return ENXIO; 2229 2230 if (nsid == 0) { 2231 /* controller */ 2232 CLR(sc->sc_flags, NVME_F_OPEN); 2233 } else { 2234 /* namespace */ 2235 nsidx = nsid - 1; 2236 if (nsidx >= sc->sc_nn) 2237 return ENXIO; 2238 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2239 } 2240 2241 return 0; 2242} 2243 2244/* 2245 * Handle control operations. 2246 */ 2247int 2248nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 2249{ 2250 struct nvme_softc *sc; 2251 int unit = minor(dev) / 0x10000; 2252 int nsid = minor(dev) & 0xffff; 2253 struct nvme_pt_command *pt; 2254 2255 sc = device_lookup_private(&nvme_cd, unit); 2256 if (sc == NULL) 2257 return ENXIO; 2258 2259 switch (cmd) { 2260 case NVME_PASSTHROUGH_CMD: 2261 pt = data; 2262 return nvme_command_passthrough(sc, data, 2263 nsid == 0 ? pt->cmd.nsid : (uint32_t)nsid, l, nsid == 0); 2264 } 2265 2266 return ENOTTY; 2267} 2268