1// Copyright 2017 The Fuchsia Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <assert.h> 6#include <limits.h> 7#include <stdio.h> 8#include <stdlib.h> 9#include <string.h> 10#include <threads.h> 11 12#include <ddk/binding.h> 13#include <ddk/debug.h> 14#include <ddk/device.h> 15#include <ddk/driver.h> 16#include <ddk/protocol/block.h> 17#include <ddk/protocol/pci.h> 18#include <ddk/io-buffer.h> 19 20#include <hw/reg.h> 21#include <hw/pci.h> 22 23#include <lib/sync/completion.h> 24 25#include <zircon/device/block.h> 26#include <zircon/syscalls.h> 27#include <zircon/types.h> 28#include <zircon/listnode.h> 29 30#include "nvme-hw.h" 31 32// If enabled, gather stats on concurrent io ops, 33// pending txns, etc. Stats are retrieved by 34// IOCTL_BLOCK_GET_STATS 35#define WITH_STATS 1 36 37#define TXN_FLAG_FAILED 1 38 39typedef struct { 40 block_op_t op; 41 list_node_t node; 42 uint16_t pending_utxns; 43 uint8_t opcode; 44 uint8_t flags; 45} nvme_txn_t; 46 47typedef struct { 48 zx_paddr_t phys; // io buffer phys base (1 page) 49 void* virt; // io buffer virt base 50 zx_handle_t pmt; // pinned memory 51 nvme_txn_t* txn; // related txn 52 uint16_t id; 53 uint16_t reserved0; 54 uint32_t reserved1; 55} nvme_utxn_t; 56 57#define UTXN_COUNT 63 58 59// There's no system constant for this. Ensure it matches reality. 60#define PAGE_SHIFT (12ULL) 61static_assert(PAGE_SIZE == (1ULL << PAGE_SHIFT), ""); 62 63#define PAGE_MASK (PAGE_SIZE - 1ULL) 64 65// Limit maximum transfer size to 1MB which fits comfortably 66// within our single scatter gather page per utxn setup 67#define MAX_XFER (1024*1024) 68 69// Maximum submission and completion queue item counts, for 70// queues that are a single page in size. 71#define SQMAX (PAGE_SIZE / sizeof(nvme_cmd_t)) 72#define CQMAX (PAGE_SIZE / sizeof(nvme_cpl_t)) 73 74// global driver state bits 75#define FLAG_IRQ_THREAD_STARTED 0x0001 76#define FLAG_IO_THREAD_STARTED 0x0002 77#define FLAG_SHUTDOWN 0x0004 78 79#define FLAG_HAS_VWC 0x0100 80 81typedef struct { 82 void* io; 83 zx_handle_t ioh; 84 zx_handle_t irqh; 85 zx_handle_t bti; 86 uint32_t flags; 87 mtx_t lock; 88 89 // io queue doorbell registers 90 void* io_sq_tail_db; 91 void* io_cq_head_db; 92 93 nvme_cpl_t* io_cq; 94 nvme_cmd_t* io_sq; 95 uint32_t io_nsid; 96 uint16_t io_cq_head; 97 uint16_t io_cq_toggle; 98 uint16_t io_sq_tail; 99 uint16_t io_sq_head; 100 101 uint64_t utxn_avail; // bitmask of available utxns 102 103 // The pending list is txns that have been received 104 // via nvme_queue() and are waiting for io to start. 105 // The exception is the head of the pending list which may 106 // be partially started, waiting for more utxns to become 107 // available. 108 // The active list consists of txns where all utxns have 109 // been created and we're waiting for them to complete or 110 // error out. 111 list_node_t pending_txns; // inbound txns to process 112 list_node_t active_txns; // txns in flight 113 114 // The io signal completion is signaled from nvme_queue() 115 // or from the irq thread, notifying the io thread that 116 // it has work to do. 117 sync_completion_t io_signal; 118 119 uint32_t max_xfer; 120 block_info_t info; 121 122 // admin queue doorbell registers 123 void* io_admin_sq_tail_db; 124 void* io_admin_cq_head_db; 125 126 // admin queues and state 127 nvme_cpl_t* admin_cq; 128 nvme_cmd_t* admin_sq; 129 uint16_t admin_cq_head; 130 uint16_t admin_cq_toggle; 131 uint16_t admin_sq_tail; 132 uint16_t admin_sq_head; 133 134 // context for admin transactions 135 // presently we serialize these under the admin_lock 136 mtx_t admin_lock; 137 sync_completion_t admin_signal; 138 nvme_cpl_t admin_result; 139 140 pci_protocol_t pci; 141 zx_device_t* zxdev; 142 143 size_t iosz; 144 145 // source of physical pages for queues and admin commands 146 io_buffer_t iob; 147 148 thrd_t irqthread; 149 thrd_t iothread; 150 151#if WITH_STATS 152 size_t stat_concur; 153 size_t stat_pending; 154 size_t stat_max_concur; 155 size_t stat_max_pending; 156 size_t stat_total_ops; 157 size_t stat_total_blocks; 158#endif 159 160 // pool of utxns 161 nvme_utxn_t utxn[UTXN_COUNT]; 162} nvme_device_t; 163 164#if WITH_STATS 165#define STAT_INC(name) do { nvme->stat_##name++; } while (0) 166#define STAT_DEC(name) do { nvme->stat_##name--; } while (0) 167#define STAT_DEC_IF(name, c) do { if (c) nvme->stat_##name--; } while (0) 168#define STAT_ADD(name, num) do { nvme->stat_##name += num; } while (0) 169#define STAT_INC_MAX(name) do { \ 170 if (++nvme->stat_##name > nvme->stat_max_##name) { \ 171 nvme->stat_max_##name = nvme->stat_##name; \ 172 }} while (0) 173#else 174#define STAT_INC(name) do { } while (0) 175#define STAT_DEC(name) do { } while (0) 176#define STAT_DEC_IF(name, c) do { } while (0) 177#define STAT_ADD(name, num) do { } while (0) 178#define STAT_INC_MAX(name) do { } while (0) 179#endif 180 181 182// We break IO transactions down into one or more "micro transactions" (utxn) 183// based on the transfer limits of the controller, etc. Each utxn has an 184// id associated with it, which is used as the command id for the command 185// queued to the NVME device. This id is the same as its index into the 186// pool of utxns and the bitmask of free txns, to simplify management. 187// 188// We maintain a pool of 63 of these, which is the number of commands 189// that can be submitted to NVME via a single page submit queue. 190// 191// The utxns are not protected by locks. Instead, after initialization, 192// they may only be touched by the io thread, which is responsible for 193// queueing commands and dequeuing completion messages. 194 195static nvme_utxn_t* utxn_get(nvme_device_t* nvme) { 196 uint64_t n = __builtin_ffsll(nvme->utxn_avail); 197 if (n == 0) { 198 return NULL; 199 } 200 n--; 201 nvme->utxn_avail &= ~(1ULL << n); 202 STAT_INC_MAX(concur); 203 return nvme->utxn + n; 204} 205 206static void utxn_put(nvme_device_t* nvme, nvme_utxn_t* utxn) { 207 uint64_t n = utxn->id; 208 STAT_DEC(concur); 209 nvme->utxn_avail |= (1ULL << n); 210} 211 212static zx_status_t nvme_admin_cq_get(nvme_device_t* nvme, nvme_cpl_t* cpl) { 213 if ((readw(&nvme->admin_cq[nvme->admin_cq_head].status) & 1) != nvme->admin_cq_toggle) { 214 return ZX_ERR_SHOULD_WAIT; 215 } 216 *cpl = nvme->admin_cq[nvme->admin_cq_head]; 217 218 // advance the head pointer, wrapping and inverting toggle at max 219 uint16_t next = (nvme->admin_cq_head + 1) & (CQMAX - 1); 220 if ((nvme->admin_cq_head = next) == 0) { 221 nvme->admin_cq_toggle ^= 1; 222 } 223 224 // note the new sq head reported by hw 225 nvme->admin_sq_head = cpl->sq_head; 226 227 // ring the doorbell 228 writel(next, nvme->io_admin_cq_head_db); 229 return ZX_OK; 230} 231 232static zx_status_t nvme_admin_sq_put(nvme_device_t* nvme, nvme_cmd_t* cmd) { 233 uint16_t next = (nvme->admin_sq_tail + 1) & (SQMAX - 1); 234 235 // if head+1 == tail: queue is full 236 if (next == nvme->admin_sq_head) { 237 return ZX_ERR_SHOULD_WAIT; 238 } 239 240 nvme->admin_sq[nvme->admin_sq_tail] = *cmd; 241 nvme->admin_sq_tail = next; 242 243 // ring the doorbell 244 writel(next, nvme->io_admin_sq_tail_db); 245 return ZX_OK; 246} 247 248static zx_status_t nvme_io_cq_get(nvme_device_t* nvme, nvme_cpl_t* cpl) { 249 if ((readw(&nvme->io_cq[nvme->io_cq_head].status) & 1) != nvme->io_cq_toggle) { 250 return ZX_ERR_SHOULD_WAIT; 251 } 252 *cpl = nvme->io_cq[nvme->io_cq_head]; 253 254 // advance the head pointer, wrapping and inverting toggle at max 255 uint16_t next = (nvme->io_cq_head + 1) & (CQMAX - 1); 256 if ((nvme->io_cq_head = next) == 0) { 257 nvme->io_cq_toggle ^= 1; 258 } 259 260 // note the new sq head reported by hw 261 nvme->io_sq_head = cpl->sq_head; 262 return ZX_OK; 263} 264 265static void nvme_io_cq_ack(nvme_device_t* nvme) { 266 // ring the doorbell 267 writel(nvme->io_cq_head, nvme->io_cq_head_db); 268} 269 270static zx_status_t nvme_io_sq_put(nvme_device_t* nvme, nvme_cmd_t* cmd) { 271 uint16_t next = (nvme->io_sq_tail + 1) & (SQMAX - 1); 272 273 // if head+1 == tail: queue is full 274 if (next == nvme->io_sq_head) { 275 return ZX_ERR_SHOULD_WAIT; 276 } 277 278 nvme->io_sq[nvme->io_sq_tail] = *cmd; 279 nvme->io_sq_tail = next; 280 281 // ring the doorbell 282 writel(next, nvme->io_sq_tail_db); 283 return ZX_OK; 284} 285 286static int irq_thread(void* arg) { 287 nvme_device_t* nvme = arg; 288 for (;;) { 289 zx_status_t r; 290 if ((r = zx_interrupt_wait(nvme->irqh, NULL)) != ZX_OK) { 291 zxlogf(ERROR, "nvme: irq wait failed: %d\n", r); 292 break; 293 } 294 295 nvme_cpl_t cpl; 296 if (nvme_admin_cq_get(nvme, &cpl) == ZX_OK) { 297 nvme->admin_result = cpl; 298 sync_completion_signal(&nvme->admin_signal); 299 } 300 301 sync_completion_signal(&nvme->io_signal); 302 } 303 return 0; 304} 305 306static zx_status_t nvme_admin_txn(nvme_device_t* nvme, nvme_cmd_t* cmd, nvme_cpl_t* cpl) { 307 zx_status_t r; 308 mtx_lock(&nvme->admin_lock); 309 sync_completion_reset(&nvme->admin_signal); 310 if ((r = nvme_admin_sq_put(nvme, cmd)) != ZX_OK) { 311 goto done; 312 } 313 if ((r = sync_completion_wait(&nvme->admin_signal, ZX_SEC(1))) != ZX_OK) { 314 zxlogf(ERROR, "nvme: admin txn: timed out\n"); 315 goto done; 316 } 317 318 unsigned code = NVME_CPL_STATUS_CODE(nvme->admin_result.status); 319 if (code != 0) { 320 zxlogf(ERROR, "nvme: admin txn: nvm error %03x\n", code); 321 r = ZX_ERR_IO; 322 } 323 if (cpl != NULL) { 324 *cpl = nvme->admin_result; 325 } 326done: 327 mtx_unlock(&nvme->admin_lock); 328 return r; 329} 330 331static inline void txn_complete(nvme_txn_t* txn, zx_status_t status) { 332 txn->op.completion_cb(&txn->op, status); 333} 334 335// Attempt to generate utxns and queue nvme commands for a txn 336// Returns true if this could not be completed due to temporary 337// lack of resources or false if either it succeeded or errored out. 338static bool io_process_txn(nvme_device_t* nvme, nvme_txn_t* txn) { 339 zx_handle_t vmo = txn->op.rw.vmo; 340 nvme_utxn_t* utxn; 341 zx_paddr_t* pages; 342 zx_status_t r; 343 344 for (;;) { 345 // If there are no available utxns, we can't proceed 346 // and we tell the caller to retain the txn (true) 347 if ((utxn = utxn_get(nvme)) == NULL) { 348 return true; 349 } 350 351 uint32_t blocks = txn->op.rw.length; 352 if (blocks > nvme->max_xfer) { 353 blocks = nvme->max_xfer; 354 } 355 356 // Total transfer size in bytes 357 size_t bytes = ((size_t) blocks) * ((size_t) nvme->info.block_size); 358 359 // Page offset of first page of transfer 360 size_t pageoffset = txn->op.rw.offset_vmo & (~PAGE_MASK); 361 362 // Byte offset into first page of transfer 363 size_t byteoffset = txn->op.rw.offset_vmo & PAGE_MASK; 364 365 // Total pages mapped / touched 366 size_t pagecount = (byteoffset + bytes + PAGE_MASK) >> PAGE_SHIFT; 367 368 // read disk (OP_READ) -> memory (PERM_WRITE) or 369 // write memory (PERM_READ) -> disk (OP_WRITE) 370 uint32_t opt = (txn->opcode == NVME_OP_READ) ? ZX_BTI_PERM_WRITE : ZX_BTI_PERM_READ; 371 372 pages = utxn->virt; 373 374 if ((r = zx_bti_pin(nvme->bti, opt, vmo, pageoffset, pagecount << PAGE_SHIFT, 375 pages, pagecount, &utxn->pmt)) != ZX_OK) { 376 zxlogf(ERROR, "nvme: could not pin pages: %d\n", r); 377 break; 378 } 379 380 nvme_cmd_t cmd; 381 memset(&cmd, 0, sizeof(cmd)); 382 cmd.cmd = NVME_CMD_CID(utxn->id) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(txn->opcode); 383 cmd.nsid = 1; 384 cmd.u.rw.start_lba = txn->op.rw.offset_dev; 385 cmd.u.rw.block_count = blocks - 1; 386 // The NVME command has room for two data pointers inline. 387 // The first is always the pointer to the first page where data is. 388 // The second is the second page if pagecount is 2. 389 // The second is the address of an array of page 2..n if pagecount > 2 390 cmd.dptr.prp[0] = pages[0] | byteoffset; 391 if (pagecount == 2) { 392 cmd.dptr.prp[1] = pages[1]; 393 } else if (pagecount > 2) { 394 cmd.dptr.prp[1] = utxn->phys + sizeof(uint64_t); 395 } 396 397 zxlogf(TRACE, "nvme: txn=%p utxn id=%u pages=%zu op=%s\n", txn, utxn->id, pagecount, 398 txn->opcode == NVME_OP_WRITE ? "WR" : "RD"); 399 zxlogf(SPEW, "nvme: prp[0]=%016zx prp[1]=%016zx\n", cmd.dptr.prp[0], cmd.dptr.prp[1]); 400 zxlogf(SPEW, "nvme: pages[] = { %016zx, %016zx, %016zx, %016zx, ... }\n", 401 pages[0], pages[1], pages[2], pages[3]); 402 403 if ((r = nvme_io_sq_put(nvme, &cmd)) != ZX_OK) { 404 zxlogf(ERROR, "nvme: could not submit cmd (txn=%p id=%u)\n", txn, utxn->id); 405 break; 406 } 407 408 utxn->txn = txn; 409 410 // keep track of where we are 411 txn->op.rw.offset_dev += blocks; 412 txn->op.rw.offset_vmo += bytes; 413 txn->op.rw.length -= blocks; 414 txn->pending_utxns++; 415 416 // If there's no more remaining, we're done, and we 417 // move this txn to the active list and tell the 418 // caller not to retain the txn (false) 419 if (txn->op.rw.length == 0) { 420 mtx_lock(&nvme->lock); 421 list_add_tail(&nvme->active_txns, &txn->node); 422 mtx_unlock(&nvme->lock); 423 return false; 424 } 425 } 426 427 // failure 428 if ((r = zx_pmt_unpin(utxn->pmt)) != ZX_OK) { 429 zxlogf(ERROR, "nvme: cannot unpin io buffer: %d\n", r); 430 } 431 utxn_put(nvme, utxn); 432 433 mtx_lock(&nvme->lock); 434 txn->flags |= TXN_FLAG_FAILED; 435 if (txn->pending_utxns) { 436 // if there are earlier uncompleted IOs we become active now 437 // and will finish erroring out when they complete 438 list_add_tail(&nvme->active_txns, &txn->node); 439 txn = NULL; 440 } 441 mtx_unlock(&nvme->lock); 442 443 if (txn != NULL) { 444 txn_complete(txn, ZX_ERR_INTERNAL); 445 } 446 447 // Either way we tell the caller not to retain the txn (false) 448 return false; 449} 450 451static void io_process_txns(nvme_device_t* nvme) { 452 nvme_txn_t* txn; 453 454 for (;;) { 455 mtx_lock(&nvme->lock); 456 txn = list_remove_head_type(&nvme->pending_txns, nvme_txn_t, node); 457 STAT_DEC_IF(pending, txn != NULL); 458 mtx_unlock(&nvme->lock); 459 460 if (txn == NULL) { 461 return; 462 } 463 464 if (io_process_txn(nvme, txn)) { 465 // put txn back at front of queue for further processing later 466 mtx_lock(&nvme->lock); 467 list_add_head(&nvme->pending_txns, &txn->node); 468 STAT_INC_MAX(pending); 469 mtx_unlock(&nvme->lock); 470 return; 471 } 472 } 473} 474 475static void io_process_cpls(nvme_device_t* nvme) { 476 bool ring_doorbell = false; 477 nvme_cpl_t cpl; 478 479 while (nvme_io_cq_get(nvme, &cpl) == ZX_OK) { 480 ring_doorbell = true; 481 482 if (cpl.cmd_id >= UTXN_COUNT) { 483 zxlogf(ERROR, "nvme: unexpected cmd id %u\n", cpl.cmd_id); 484 continue; 485 } 486 nvme_utxn_t* utxn = nvme->utxn + cpl.cmd_id; 487 nvme_txn_t* txn = utxn->txn; 488 489 if (txn == NULL) { 490 zxlogf(ERROR, "nvme: inactive utxn #%u completed?!\n", cpl.cmd_id); 491 continue; 492 } 493 494 uint32_t code = NVME_CPL_STATUS_CODE(cpl.status); 495 if (code != 0) { 496 zxlogf(ERROR, "nvme: utxn #%u txn %p failed: status=%03x\n", 497 cpl.cmd_id, txn, code); 498 txn->flags |= TXN_FLAG_FAILED; 499 // discard any remaining bytes -- no reason to keep creating 500 // further utxns once one has failed 501 txn->op.rw.length = 0; 502 } else { 503 zxlogf(SPEW, "nvme: utxn #%u txn %p OKAY\n", cpl.cmd_id, txn); 504 } 505 506 zx_status_t r; 507 if ((r = zx_pmt_unpin(utxn->pmt)) != ZX_OK) { 508 zxlogf(ERROR, "nvme: cannot unpin io buffer: %d\n", r); 509 } 510 511 // release the microtransaction 512 utxn->txn = NULL; 513 utxn_put(nvme, utxn); 514 515 txn->pending_utxns--; 516 if ((txn->pending_utxns == 0) && (txn->op.rw.length == 0)) { 517 // remove from either pending or active list 518 mtx_lock(&nvme->lock); 519 list_delete(&txn->node); 520 mtx_unlock(&nvme->lock); 521 zxlogf(TRACE, "nvme: txn %p %s\n", txn, txn->flags & TXN_FLAG_FAILED ? "error" : "okay"); 522 txn_complete(txn, txn->flags & TXN_FLAG_FAILED ? ZX_ERR_IO : ZX_OK); 523 } 524 } 525 526 if (ring_doorbell) { 527 nvme_io_cq_ack(nvme); 528 } 529} 530 531static int io_thread(void* arg) { 532 nvme_device_t* nvme = arg; 533 for (;;) { 534 if (sync_completion_wait(&nvme->io_signal, ZX_TIME_INFINITE)) { 535 break; 536 } 537 if (nvme->flags & FLAG_SHUTDOWN) { 538 //TODO: cancel out pending IO 539 zxlogf(INFO, "nvme: io thread exiting\n"); 540 break; 541 } 542 543 sync_completion_reset(&nvme->io_signal); 544 545 // process completion messages 546 io_process_cpls(nvme); 547 548 // process work queue 549 io_process_txns(nvme); 550 551 } 552 return 0; 553} 554 555static void nvme_queue(void* ctx, block_op_t* op) { 556 nvme_device_t* nvme = ctx; 557 nvme_txn_t* txn = containerof(op, nvme_txn_t, op); 558 559 switch (txn->op.command & BLOCK_OP_MASK) { 560 case BLOCK_OP_READ: 561 txn->opcode = NVME_OP_READ; 562 break; 563 case BLOCK_OP_WRITE: 564 txn->opcode = NVME_OP_WRITE; 565 break; 566 case BLOCK_OP_FLUSH: 567 // TODO 568 txn_complete(txn, ZX_OK); 569 return; 570 default: 571 txn_complete(txn, ZX_ERR_NOT_SUPPORTED); 572 return; 573 } 574 575 if (txn->op.rw.length == 0) { 576 txn_complete(txn, ZX_ERR_INVALID_ARGS); 577 return; 578 } 579 // Transaction must fit within device 580 if ((txn->op.rw.offset_dev >= nvme->info.block_count) || 581 (nvme->info.block_count - txn->op.rw.offset_dev < txn->op.rw.length)) { 582 txn_complete(txn, ZX_ERR_OUT_OF_RANGE); 583 return; 584 } 585 586 // convert vmo offset to a byte offset 587 txn->op.rw.offset_vmo *= nvme->info.block_size; 588 589 txn->pending_utxns = 0; 590 txn->flags = 0; 591 592 zxlogf(SPEW, "nvme: io: %s: %ublks @ blk#%zu\n", 593 txn->opcode == NVME_OP_WRITE ? "wr" : "rd", 594 txn->op.rw.length + 1U, txn->op.rw.offset_dev); 595 596 STAT_INC(total_ops); 597 STAT_ADD(total_blocks, txn->op.rw.length); 598 599 mtx_lock(&nvme->lock); 600 list_add_tail(&nvme->pending_txns, &txn->node); 601 STAT_INC_MAX(pending); 602 mtx_unlock(&nvme->lock); 603 604 sync_completion_signal(&nvme->io_signal); 605} 606 607static void nvme_query(void* ctx, block_info_t* info_out, size_t* block_op_size_out) { 608 nvme_device_t* nvme = ctx; 609 *info_out = nvme->info; 610 *block_op_size_out = sizeof(nvme_txn_t); 611} 612 613static zx_status_t nvme_ioctl(void* ctx, uint32_t op, const void* cmd, size_t cmdlen, void* reply, 614 size_t max, size_t* out_actual) { 615 nvme_device_t* nvme = ctx; 616 switch (op) { 617 case IOCTL_BLOCK_GET_INFO: { 618 if (max < sizeof(block_info_t)) { 619 return ZX_ERR_BUFFER_TOO_SMALL; 620 } 621 size_t sz; 622 nvme_query(nvme, reply, &sz); 623 *out_actual = sizeof(block_info_t); 624 return ZX_OK; 625 } 626 case IOCTL_BLOCK_GET_STATS: { 627#if WITH_STATS 628 if (cmdlen != sizeof(bool)) { 629 return ZX_ERR_INVALID_ARGS; 630 } 631 block_stats_t* out = reply; 632 if (max < sizeof(*out)) { 633 return ZX_ERR_BUFFER_TOO_SMALL; 634 } 635 mtx_lock(&nvme->lock); 636 out->max_concur = nvme->stat_max_concur; 637 out->max_pending = nvme->stat_max_pending; 638 out->total_ops = nvme->stat_total_ops; 639 out->total_blocks = nvme->stat_total_blocks; 640 bool clear = *(bool *)cmd; 641 if (clear) { 642 nvme->stat_max_concur = 0; 643 nvme->stat_max_pending = 0; 644 nvme->stat_total_ops = 0; 645 nvme->stat_total_blocks = 0; 646 } 647 mtx_unlock(&nvme->lock); 648 *out_actual = sizeof(*out); 649 return ZX_OK; 650#else 651 return ZX_ERR_NOT_SUPPORTED; 652#endif 653 } 654 case IOCTL_DEVICE_SYNC: { 655 return ZX_OK; 656 } 657 default: 658 return ZX_ERR_NOT_SUPPORTED; 659 } 660} 661 662static zx_off_t nvme_get_size(void* ctx) { 663 nvme_device_t* nvme = ctx; 664 return nvme->info.block_count * nvme->info.block_size; 665} 666 667static zx_status_t nvme_suspend(void* ctx, uint32_t flags) { 668 return ZX_OK; 669} 670 671static zx_status_t nvme_resume(void* ctx, uint32_t flags) { 672 return ZX_OK; 673} 674 675static void nvme_release(void* ctx) { 676 nvme_device_t* nvme = ctx; 677 int r; 678 679 zxlogf(INFO, "nvme: release\n"); 680 nvme->flags |= FLAG_SHUTDOWN; 681 if (nvme->ioh != ZX_HANDLE_INVALID) { 682 pci_enable_bus_master(&nvme->pci, false); 683 zx_handle_close(nvme->bti); 684 zx_handle_close(nvme->ioh); 685 // TODO: risks a handle use-after-close, will be resolved by IRQ api 686 // changes coming soon 687 zx_handle_close(nvme->irqh); 688 } 689 if (nvme->flags & FLAG_IRQ_THREAD_STARTED) { 690 thrd_join(nvme->irqthread, &r); 691 } 692 if (nvme->flags & FLAG_IO_THREAD_STARTED) { 693 sync_completion_signal(&nvme->io_signal); 694 thrd_join(nvme->iothread, &r); 695 } 696 697 // error out any pending txns 698 mtx_lock(&nvme->lock); 699 nvme_txn_t* txn; 700 while ((txn = list_remove_head_type(&nvme->active_txns, nvme_txn_t, node)) != NULL) { 701 txn_complete(txn, ZX_ERR_PEER_CLOSED); 702 } 703 while ((txn = list_remove_head_type(&nvme->pending_txns, nvme_txn_t, node)) != NULL) { 704 txn_complete(txn, ZX_ERR_PEER_CLOSED); 705 } 706 mtx_unlock(&nvme->lock); 707 708 io_buffer_release(&nvme->iob); 709 free(nvme); 710} 711 712static zx_protocol_device_t device_ops = { 713 .version = DEVICE_OPS_VERSION, 714 715 .ioctl = nvme_ioctl, 716 .get_size = nvme_get_size, 717 718 .suspend = nvme_suspend, 719 .resume = nvme_resume, 720 .release = nvme_release, 721}; 722 723static void infostring(const char* prefix, uint8_t* str, size_t len) { 724 char tmp[len + 1]; 725 size_t i; 726 for (i = 0; i < len; i++) { 727 uint8_t c = str[i]; 728 if (c == 0) { 729 break; 730 } 731 if ((c < ' ') || (c > 127)) { 732 c = ' '; 733 } 734 tmp[i] = c; 735 } 736 tmp[i] = 0; 737 while (i > 0) { 738 i--; 739 if (tmp[i] == ' ') { 740 tmp[i] = 0; 741 } else { 742 break; 743 } 744 } 745 zxlogf(INFO, "nvme: %s'%s'\n", prefix, tmp); 746} 747 748// Convenience accessors for BAR0 registers 749#define rd32(r) readl(nvme->io + NVME_REG_##r) 750#define rd64(r) readll(nvme->io + NVME_REG_##r) 751#define wr32(v,r) writel(v, nvme->io + NVME_REG_##r) 752#define wr64(v,r) writell(v, nvme->io + NVME_REG_##r) 753 754// dedicated pages from the page pool 755#define IDX_ADMIN_SQ 0 756#define IDX_ADMIN_CQ 1 757#define IDX_IO_SQ 2 758#define IDX_IO_CQ 3 759#define IDX_SCRATCH 4 760#define IDX_UTXN_POOL 5 // this must always be last 761 762#define IO_PAGE_COUNT (IDX_UTXN_POOL + UTXN_COUNT) 763 764static inline uint64_t U64(uint8_t* x) { 765 return *((uint64_t*) (void*) x); 766} 767static inline uint32_t U32(uint8_t* x) { 768 return *((uint32_t*) (void*) x); 769} 770static inline uint32_t U16(uint8_t* x) { 771 return *((uint16_t*) (void*) x); 772} 773 774#define WAIT_MS 5000 775 776static zx_status_t nvme_init(nvme_device_t* nvme) { 777 uint32_t n = rd32(VS); 778 uint64_t cap = rd64(CAP); 779 780 zxlogf(INFO, "nvme: version %d.%d.%d\n", n >> 16, (n >> 8) & 0xFF, n & 0xFF); 781 zxlogf(INFO, "nvme: page size: (MPSMIN): %u (MPSMAX): %u\n", 782 (unsigned) (1 << NVME_CAP_MPSMIN(cap)), 783 (unsigned) (1 << NVME_CAP_MPSMAX(cap))); 784 zxlogf(INFO, "nvme: doorbell stride: %u\n", (unsigned) (1 << NVME_CAP_DSTRD(cap))); 785 zxlogf(INFO, "nvme: timeout: %u ms\n", (unsigned) (1 << NVME_CAP_TO(cap))); 786 zxlogf(INFO, "nvme: boot partition support (BPS): %c\n", NVME_CAP_BPS(cap) ? 'Y' : 'N'); 787 zxlogf(INFO, "nvme: supports NVM command set (CSS:NVM): %c\n", NVME_CAP_CSS_NVM(cap) ? 'Y' : 'N'); 788 zxlogf(INFO, "nvme: subsystem reset supported (NSSRS): %c\n", NVME_CAP_NSSRS(cap) ? 'Y' : 'N'); 789 zxlogf(INFO, "nvme: weighted-round-robin (AMS:WRR): %c\n", NVME_CAP_AMS_WRR(cap) ? 'Y' : 'N'); 790 zxlogf(INFO, "nvme: vendor-specific arbitration (AMS:VS): %c\n", NVME_CAP_AMS_VS(cap) ? 'Y' : 'N'); 791 zxlogf(INFO, "nvme: contiquous queues required (CQR): %c\n", NVME_CAP_CQR(cap) ? 'Y' : 'N'); 792 zxlogf(INFO, "nvme: maximum queue entries supported (MQES): %u\n", ((unsigned) NVME_CAP_MQES(cap)) + 1); 793 794 if ((1 << NVME_CAP_MPSMIN(cap)) > PAGE_SIZE) { 795 zxlogf(ERROR, "nvme: minimum page size larger than platform page size\n"); 796 return ZX_ERR_NOT_SUPPORTED; 797 } 798 // allocate pages for various queues and the utxn scatter lists 799 // TODO: these should all be RO to hardware apart from the scratch io page(s) 800 if (io_buffer_init(&nvme->iob, nvme->bti, PAGE_SIZE * IO_PAGE_COUNT, IO_BUFFER_RW) || 801 io_buffer_physmap(&nvme->iob)) { 802 zxlogf(ERROR, "nvme: could not allocate io buffers\n"); 803 return ZX_ERR_NO_MEMORY; 804 } 805 806 // initialize the microtransaction pool 807 nvme->utxn_avail = 0x7FFFFFFFFFFFFFFFULL; 808 for (unsigned n = 0; n < UTXN_COUNT; n++) { 809 nvme->utxn[n].id = n; 810 nvme->utxn[n].phys = nvme->iob.phys_list[IDX_UTXN_POOL + n]; 811 nvme->utxn[n].virt = nvme->iob.virt + (IDX_UTXN_POOL + n) * PAGE_SIZE; 812 } 813 814 if (rd32(CSTS) & NVME_CSTS_RDY) { 815 zxlogf(INFO, "nvme: controller is active. resetting...\n"); 816 wr32(rd32(CC) & ~NVME_CC_EN, CC); // disable 817 } 818 819 // ensure previous shutdown (by us or bootloader) has completed 820 unsigned ms_remain = WAIT_MS; 821 while (rd32(CSTS) & NVME_CSTS_RDY) { 822 if (--ms_remain == 0) { 823 zxlogf(ERROR, "nvme: timed out waiting for CSTS ~RDY\n"); 824 return ZX_ERR_INTERNAL; 825 } 826 zx_nanosleep(zx_deadline_after(ZX_MSEC(1))); 827 } 828 829 zxlogf(INFO, "nvme: controller inactive. (after %u ms)\n", WAIT_MS - ms_remain); 830 831 // configure admin submission and completion queues 832 wr64(nvme->iob.phys_list[IDX_ADMIN_SQ], ASQ); 833 wr64(nvme->iob.phys_list[IDX_ADMIN_CQ], ACQ); 834 wr32(NVME_AQA_ASQS(SQMAX - 1) | NVME_AQA_ACQS(CQMAX - 1), AQA); 835 836 zxlogf(INFO, "nvme: enabling\n"); 837 wr32(NVME_CC_EN | NVME_CC_AMS_RR | NVME_CC_MPS(0) | 838 NVME_CC_IOCQES(NVME_CPL_SHIFT) | 839 NVME_CC_IOSQES(NVME_CMD_SHIFT), CC); 840 841 ms_remain = WAIT_MS; 842 while (!(rd32(CSTS) & NVME_CSTS_RDY)) { 843 if (--ms_remain == 0) { 844 zxlogf(ERROR, "nvme: timed out waiting for CSTS RDY\n"); 845 return ZX_ERR_INTERNAL; 846 } 847 zx_nanosleep(zx_deadline_after(ZX_MSEC(1))); 848 } 849 zxlogf(INFO, "nvme: controller ready. (after %u ms)\n", WAIT_MS - ms_remain); 850 851 // registers and buffers for admin queues 852 nvme->io_admin_sq_tail_db = nvme->io + NVME_REG_SQnTDBL(0, cap); 853 nvme->io_admin_cq_head_db = nvme->io + NVME_REG_CQnHDBL(0, cap); 854 855 nvme->admin_sq = nvme->iob.virt + PAGE_SIZE * IDX_ADMIN_SQ; 856 nvme->admin_sq_head = 0; 857 nvme->admin_sq_tail = 0; 858 859 nvme->admin_cq = nvme->iob.virt + PAGE_SIZE * IDX_ADMIN_CQ; 860 nvme->admin_cq_head = 0; 861 nvme->admin_cq_toggle = 1; 862 863 // registers and buffers for IO queues 864 nvme->io_sq_tail_db = nvme->io + NVME_REG_SQnTDBL(1, cap); 865 nvme->io_cq_head_db = nvme->io + NVME_REG_CQnHDBL(1, cap); 866 867 nvme->io_sq = nvme->iob.virt + PAGE_SIZE * IDX_IO_SQ; 868 nvme->io_sq_head = 0; 869 nvme->io_sq_tail = 0; 870 871 nvme->io_cq = nvme->iob.virt + PAGE_SIZE * IDX_IO_CQ; 872 nvme->io_cq_head = 0; 873 nvme->io_cq_toggle = 1; 874 875 // scratch page for admin ops 876 void* scratch = nvme->iob.virt + PAGE_SIZE * IDX_SCRATCH; 877 878 if (thrd_create_with_name(&nvme->irqthread, irq_thread, nvme, "nvme-irq-thread")) { 879 zxlogf(ERROR, "nvme; cannot create irq thread\n"); 880 return ZX_ERR_INTERNAL; 881 } 882 nvme->flags |= FLAG_IRQ_THREAD_STARTED; 883 884 if (thrd_create_with_name(&nvme->iothread, io_thread, nvme, "nvme-io-thread")) { 885 zxlogf(ERROR, "nvme; cannot create io thread\n"); 886 return ZX_ERR_INTERNAL; 887 } 888 nvme->flags |= FLAG_IO_THREAD_STARTED; 889 890 nvme_cmd_t cmd; 891 892 // identify device 893 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_IDENTIFY); 894 cmd.nsid = 0; 895 cmd.reserved = 0; 896 cmd.mptr = 0; 897 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_SCRATCH]; 898 cmd.dptr.prp[1] = 0; 899 cmd.u.raw[0] = 1; // CNS 01 900 901 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) { 902 zxlogf(ERROR, "nvme: device identify op failed\n"); 903 return ZX_ERR_INTERNAL; 904 } 905 906 nvme_identify_t* ci = scratch; 907 infostring("model: ", ci->MN, sizeof(ci->MN)); 908 infostring("serial number: ", ci->SN, sizeof(ci->SN)); 909 infostring("firmware: ", ci->FR, sizeof(ci->FR)); 910 911 if ((ci->SQES & 0xF) != NVME_CMD_SHIFT) { 912 zxlogf(ERROR, "nvme: SQES minimum is not %ub\n", NVME_CMD_SIZE); 913 return ZX_ERR_NOT_SUPPORTED; 914 } 915 if ((ci->CQES & 0xF) != NVME_CPL_SHIFT) { 916 zxlogf(ERROR, "nvme: CQES minimum is not %ub\n", NVME_CPL_SIZE); 917 return ZX_ERR_NOT_SUPPORTED; 918 } 919 zxlogf(INFO, "nvme: max outstanding commands: %u\n", ci->MAXCMD); 920 921 uint32_t nscount = ci->NN; 922 zxlogf(INFO, "nvme: max namespaces: %u\n", nscount); 923 zxlogf(INFO, "nvme: scatter gather lists (SGL): %c %08x\n", 924 (ci->SGLS & 3) ? 'Y' : 'N', ci->SGLS); 925 926 // Maximum transfer is in units of 2^n * PAGESIZE, n == 0 means "infinite" 927 nvme->max_xfer = 0xFFFFFFFF; 928 if ((ci->MDTS != 0) && (ci->MDTS < (31 - PAGE_SHIFT))) { 929 nvme->max_xfer = (1 << ci->MDTS) * PAGE_SIZE; 930 } 931 932 zxlogf(INFO, "nvme: max data transfer: %u bytes\n", nvme->max_xfer); 933 zxlogf(INFO, "nvme: sanitize caps: %u\n", ci->SANICAP & 3); 934 935 zxlogf(INFO, "nvme: abort command limit (ACL): %u\n", ci->ACL + 1); 936 zxlogf(INFO, "nvme: asynch event req limit (AERL): %u\n", ci->AERL + 1); 937 zxlogf(INFO, "nvme: firmware: slots: %u reset: %c slot1ro: %c\n", (ci->FRMW >> 1) & 3, 938 (ci->FRMW & (1 << 4)) ? 'N' : 'Y', (ci->FRMW & 1) ? 'Y' : 'N'); 939 zxlogf(INFO, "nvme: host buffer: min/preferred: %u/%u pages\n", ci->HMMIN, ci->HMPRE); 940 zxlogf(INFO, "nvme: capacity: total/unalloc: %zu/%zu\n", ci->TNVMCAP_LO, ci->UNVMCAP_LO); 941 942 if (ci->VWC & 1) { 943 nvme->flags |= FLAG_HAS_VWC; 944 } 945 uint32_t awun = ci->AWUN + 1; 946 uint32_t awupf = ci->AWUPF + 1; 947 zxlogf(INFO, "nvme: volatile write cache (VWC): %s\n", nvme->flags & FLAG_HAS_VWC ? "Y" : "N"); 948 zxlogf(INFO, "nvme: atomic write unit (AWUN)/(AWUPF): %u/%u blks\n", awun, awupf); 949 950#define FEATURE(a,b) if (ci->a & a##_##b) zxlogf(INFO, "nvme: feature: %s\n", #b) 951 FEATURE(OACS, DOORBELL_BUFFER_CONFIG); 952 FEATURE(OACS, VIRTUALIZATION_MANAGEMENT); 953 FEATURE(OACS, NVME_MI_SEND_RECV); 954 FEATURE(OACS, DIRECTIVE_SEND_RECV); 955 FEATURE(OACS, DEVICE_SELF_TEST); 956 FEATURE(OACS, NAMESPACE_MANAGEMENT); 957 FEATURE(OACS, FIRMWARE_DOWNLOAD_COMMIT); 958 FEATURE(OACS, FORMAT_NVM); 959 FEATURE(OACS, SECURITY_SEND_RECV); 960 FEATURE(ONCS, TIMESTAMP); 961 FEATURE(ONCS, RESERVATIONS); 962 FEATURE(ONCS, SAVE_SELECT_NONZERO); 963 FEATURE(ONCS, WRITE_UNCORRECTABLE); 964 FEATURE(ONCS, COMPARE); 965 966 // set feature (number of queues) to 1 iosq and 1 iocq 967 memset(&cmd, 0, sizeof(cmd)); 968 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_SET_FEATURE); 969 cmd.u.raw[0] = NVME_FEATURE_NUMBER_OF_QUEUES; 970 cmd.u.raw[1] = 0; 971 972 nvme_cpl_t cpl; 973 if (nvme_admin_txn(nvme, &cmd, &cpl) != ZX_OK) { 974 zxlogf(ERROR, "nvme: set feature (number queues) op failed\n"); 975 return ZX_ERR_INTERNAL; 976 } 977 zxlogf(INFO,"cpl.cmd %08x\n", cpl.cmd); 978 979 // create the IO completion queue 980 memset(&cmd, 0, sizeof(cmd)); 981 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_CREATE_IOCQ); 982 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_IO_CQ]; 983 cmd.u.raw[0] = ((CQMAX - 1) << 16) | 1; // queue size, queue id 984 cmd.u.raw[1] = (0 << 16) | 2 | 1; // irq vector, irq enable, phys contig 985 986 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) { 987 zxlogf(ERROR, "nvme: completion queue creation op failed\n"); 988 return ZX_ERR_INTERNAL; 989 } 990 991 // create the IO submit queue 992 memset(&cmd, 0, sizeof(cmd)); 993 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_CREATE_IOSQ); 994 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_IO_SQ]; 995 cmd.u.raw[0] = ((SQMAX - 1) << 16) | 1; // queue size, queue id 996 cmd.u.raw[1] = (1 << 16) | 0 | 1; // cqid, qprio, phys contig 997 998 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) { 999 zxlogf(ERROR, "nvme: submit queue creation op failed\n"); 1000 return ZX_ERR_INTERNAL; 1001 } 1002 1003 // identify namespace 1 1004 memset(&cmd, 0, sizeof(cmd)); 1005 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_IDENTIFY); 1006 cmd.nsid = 1; 1007 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_SCRATCH]; 1008 1009 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) { 1010 zxlogf(ERROR, "nvme: namespace identify op failed\n"); 1011 return ZX_ERR_INTERNAL; 1012 } 1013 1014 nvme_identify_ns_t* ni = scratch; 1015 1016 uint32_t nawun = (ni->NSFEAT & NSFEAT_LOCAL_ATOMIC_SIZES) ? (ni->NAWUN + 1U) : awun; 1017 uint32_t nawupf = (ni->NSFEAT & NSFEAT_LOCAL_ATOMIC_SIZES) ? (ni->NAWUPF + 1U) : awupf; 1018 zxlogf(INFO, "nvme: ns: atomic write unit (AWUN)/(AWUPF): %u/%u blks\n", nawun, nawupf); 1019 zxlogf(INFO, "nvme: ns: NABSN/NABO/NABSPF/NOIOB: %u/%u/%u/%u\n", 1020 ni->NABSN, ni->NABO, ni->NABSPF, ni->NOIOB); 1021 1022 // table of block formats 1023 for (unsigned i = 0; i < 16; i++) { 1024 if (ni->LBAF[i]) { 1025 zxlogf(INFO, "nvme: ns: LBA FMT %02d: RP=%u LBADS=2^%ub MS=%ub\n", 1026 i, NVME_LBAFMT_RP(ni->LBAF[i]), NVME_LBAFMT_LBADS(ni->LBAF[i]), 1027 NVME_LBAFMT_MS(ni->LBAF[i])); 1028 } 1029 } 1030 1031 zxlogf(INFO, "nvme: ns: LBA FMT #%u active\n", ni->FLBAS & 0xF); 1032 zxlogf(INFO, "nvme: ns: data protection: caps/set: 0x%02x/%u\n", 1033 ni->DPC & 0x3F, ni->DPS & 3); 1034 1035 uint32_t fmt = ni->LBAF[ni->FLBAS & 0xF]; 1036 1037 zxlogf(INFO, "nvme: ns: size/cap/util: %zu/%zu/%zu blks\n", ni->NSSZ, ni->NCAP, ni->NUSE); 1038 1039 nvme->info.block_count = ni->NSSZ; 1040 nvme->info.block_size = 1 << NVME_LBAFMT_LBADS(fmt); 1041 nvme->info.max_transfer_size = BLOCK_MAX_TRANSFER_UNBOUNDED; 1042 1043 if (NVME_LBAFMT_MS(fmt)) { 1044 zxlogf(ERROR, "nvme: cannot handle LBA format with metadata\n"); 1045 return ZX_ERR_NOT_SUPPORTED; 1046 } 1047 if ((nvme->info.block_size < 512) || (nvme->info.block_size > 32768)) { 1048 zxlogf(ERROR, "nvme: cannot handle LBA size of %u\n", nvme->info.block_size); 1049 return ZX_ERR_NOT_SUPPORTED; 1050 } 1051 1052 // NVME r/w commands operate in block units, maximum of 64K: 1053 size_t max_bytes_per_cmd = ((size_t) nvme->info.block_size) * ((size_t) 65536); 1054 1055 if (nvme->max_xfer > max_bytes_per_cmd) { 1056 nvme->max_xfer = max_bytes_per_cmd; 1057 } 1058 1059 // The device may allow transfers larger than we are prepared 1060 // to handle. Clip to our limit. 1061 if (nvme->max_xfer > MAX_XFER) { 1062 nvme->max_xfer = MAX_XFER; 1063 } 1064 1065 // convert to block units 1066 nvme->max_xfer /= nvme->info.block_size; 1067 zxlogf(INFO, "nvme: max transfer per r/w op: %u blocks (%u bytes)\n", 1068 nvme->max_xfer, nvme->max_xfer * nvme->info.block_size); 1069 1070 device_make_visible(nvme->zxdev); 1071 return ZX_OK; 1072} 1073 1074block_protocol_ops_t block_ops = { 1075 .query = nvme_query, 1076 .queue = nvme_queue, 1077}; 1078 1079static zx_status_t nvme_bind(void* ctx, zx_device_t* dev) { 1080 nvme_device_t* nvme; 1081 if ((nvme = calloc(1, sizeof(nvme_device_t))) == NULL) { 1082 return ZX_ERR_NO_MEMORY; 1083 } 1084 list_initialize(&nvme->pending_txns); 1085 list_initialize(&nvme->active_txns); 1086 mtx_init(&nvme->lock, mtx_plain); 1087 mtx_init(&nvme->admin_lock, mtx_plain); 1088 1089 if (device_get_protocol(dev, ZX_PROTOCOL_PCI, &nvme->pci)) { 1090 goto fail; 1091 } 1092 1093 if (pci_map_bar(&nvme->pci, 0u, ZX_CACHE_POLICY_UNCACHED_DEVICE, 1094 &nvme->io, &nvme->iosz, &nvme->ioh)) { 1095 zxlogf(ERROR, "nvme: cannot map registers\n"); 1096 goto fail; 1097 } 1098 1099 uint32_t modes[3] = { 1100 ZX_PCIE_IRQ_MODE_MSI_X, ZX_PCIE_IRQ_MODE_MSI, ZX_PCIE_IRQ_MODE_LEGACY, 1101 }; 1102 uint32_t nirq = 0; 1103 for (unsigned n = 0; n < countof(modes); n++) { 1104 if ((pci_query_irq_mode(&nvme->pci, modes[n], &nirq) == ZX_OK) && 1105 (pci_set_irq_mode(&nvme->pci, modes[n], 1) == ZX_OK)) { 1106 zxlogf(INFO, "nvme: irq mode %u, irq count %u (#%u)\n", modes[n], nirq, n); 1107 goto irq_configured; 1108 } 1109 } 1110 zxlogf(ERROR, "nvme: could not configure irqs\n"); 1111 goto fail; 1112 1113irq_configured: 1114 if (pci_map_interrupt(&nvme->pci, 0, &nvme->irqh) != ZX_OK) { 1115 zxlogf(ERROR, "nvme: could not map irq\n"); 1116 goto fail; 1117 } 1118 if (pci_enable_bus_master(&nvme->pci, true)) { 1119 zxlogf(ERROR, "nvme: cannot enable bus mastering\n"); 1120 goto fail; 1121 } 1122 if (pci_get_bti(&nvme->pci, 0, &nvme->bti) != ZX_OK) { 1123 zxlogf(ERROR, "nvme: cannot obtain bti handle\n"); 1124 goto fail; 1125 } 1126 1127 device_add_args_t args = { 1128 .version = DEVICE_ADD_ARGS_VERSION, 1129 .name = "nvme", 1130 .ctx = nvme, 1131 .ops = &device_ops, 1132 .flags = DEVICE_ADD_INVISIBLE, 1133 .proto_id = ZX_PROTOCOL_BLOCK_IMPL, 1134 .proto_ops = &block_ops, 1135 }; 1136 1137 if (device_add(dev, &args, &nvme->zxdev)) { 1138 goto fail; 1139 } 1140 1141 if (nvme_init(nvme) != ZX_OK) { 1142 zxlogf(ERROR, "nvme: init failed\n"); 1143 device_remove(nvme->zxdev); 1144 return ZX_ERR_INTERNAL; 1145 } 1146 1147 return ZX_OK; 1148 1149fail: 1150 nvme_release(nvme); 1151 return ZX_ERR_NOT_SUPPORTED; 1152} 1153 1154static zx_driver_ops_t driver_ops = { 1155 .version = DRIVER_OPS_VERSION, 1156 .bind = nvme_bind, 1157}; 1158 1159ZIRCON_DRIVER_BEGIN(nvme, driver_ops, "zircon", "0.1", 4) 1160 BI_ABORT_IF(NE, BIND_PROTOCOL, ZX_PROTOCOL_PCI), 1161 BI_ABORT_IF(NE, BIND_PCI_CLASS, 1), // Mass Storage 1162 BI_ABORT_IF(NE, BIND_PCI_SUBCLASS, 8), // NVM 1163 BI_MATCH_IF(EQ, BIND_PCI_INTERFACE, 2), // NVMHCI 1164ZIRCON_DRIVER_END(nvme) 1165