nvme_ctrlr.c revision 248770
190792Sgshapiro/*- 2261363Sgshapiro * Copyright (C) 2012 Intel Corporation 390792Sgshapiro * All rights reserved. 490792Sgshapiro * 590792Sgshapiro * Redistribution and use in source and binary forms, with or without 690792Sgshapiro * modification, are permitted provided that the following conditions 790792Sgshapiro * are met: 890792Sgshapiro * 1. Redistributions of source code must retain the above copyright 990792Sgshapiro * notice, this list of conditions and the following disclaimer. 1090792Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright 1190792Sgshapiro * notice, this list of conditions and the following disclaimer in the 1290792Sgshapiro * documentation and/or other materials provided with the distribution. 1390792Sgshapiro * 1490792Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1590792Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16261363Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1790792Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1890792Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1990792Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2090792Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2190792Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2290792Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2390792Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2490792Sgshapiro * SUCH DAMAGE. 2590792Sgshapiro */ 2690792Sgshapiro 2790792Sgshapiro#include <sys/cdefs.h> 2890792Sgshapiro__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_ctrlr.c 248770 2013-03-26 22:11:34Z jimharris $"); 2990792Sgshapiro 3090792Sgshapiro#include <sys/param.h> 3190792Sgshapiro#include <sys/bus.h> 3290792Sgshapiro#include <sys/conf.h> 3390792Sgshapiro#include <sys/ioccom.h> 3490792Sgshapiro#include <sys/smp.h> 3590792Sgshapiro 36141858Sgshapiro#include <dev/pci/pcireg.h> 3790792Sgshapiro#include <dev/pci/pcivar.h> 3890792Sgshapiro 3990792Sgshapiro#include "nvme_private.h" 4090792Sgshapiro 4190792Sgshapirostatic void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, 4290792Sgshapiro struct nvme_async_event_request *aer); 4390792Sgshapiro 4490792Sgshapirostatic int 4590792Sgshapironvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) 4690792Sgshapiro{ 4790792Sgshapiro 4890792Sgshapiro /* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */ 4990792Sgshapiro if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 5090792Sgshapiro ctrlr->resource_id = PCIR_BAR(2); 5190792Sgshapiro else 5290792Sgshapiro ctrlr->resource_id = PCIR_BAR(0); 5390792Sgshapiro 5490792Sgshapiro ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY, 5590792Sgshapiro &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE); 5690792Sgshapiro 5790792Sgshapiro if(ctrlr->resource == NULL) { 5890792Sgshapiro device_printf(ctrlr->dev, "unable to allocate pci resource\n"); 5990792Sgshapiro return (ENOMEM); 6090792Sgshapiro } 6190792Sgshapiro 6290792Sgshapiro ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); 6390792Sgshapiro ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); 6490792Sgshapiro ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; 6590792Sgshapiro 6690792Sgshapiro /* 6790792Sgshapiro * The NVMe spec allows for the MSI-X table to be placed behind 6890792Sgshapiro * BAR 4/5, separate from the control/doorbell registers. Always 6990792Sgshapiro * try to map this bar, because it must be mapped prior to calling 7090792Sgshapiro * pci_alloc_msix(). If the table isn't behind BAR 4/5, 7190792Sgshapiro * bus_alloc_resource() will just return NULL which is OK. 7290792Sgshapiro */ 7390792Sgshapiro ctrlr->bar4_resource_id = PCIR_BAR(4); 7490792Sgshapiro ctrlr->bar4_resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY, 7590792Sgshapiro &ctrlr->bar4_resource_id, 0, ~0, 1, RF_ACTIVE); 7690792Sgshapiro 7790792Sgshapiro return (0); 7890792Sgshapiro} 7990792Sgshapiro 8090792Sgshapiro#ifdef CHATHAM2 8190792Sgshapirostatic int 8290792Sgshapironvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr) 8390792Sgshapiro{ 8490792Sgshapiro 8590792Sgshapiro ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR); 8690792Sgshapiro ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev, 8790792Sgshapiro SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1, 8890792Sgshapiro RF_ACTIVE); 8990792Sgshapiro 9090792Sgshapiro if(ctrlr->chatham_resource == NULL) { 9190792Sgshapiro device_printf(ctrlr->dev, "unable to alloc pci resource\n"); 9290792Sgshapiro return (ENOMEM); 9390792Sgshapiro } 9490792Sgshapiro 9590792Sgshapiro ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource); 9690792Sgshapiro ctrlr->chatham_bus_handle = 9790792Sgshapiro rman_get_bushandle(ctrlr->chatham_resource); 9890792Sgshapiro 99141858Sgshapiro return (0); 10090792Sgshapiro} 10190792Sgshapiro 10290792Sgshapirostatic void 10390792Sgshapironvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr) 10490792Sgshapiro{ 10590792Sgshapiro uint64_t reg1, reg2, reg3; 10690792Sgshapiro uint64_t temp1, temp2; 10790792Sgshapiro uint32_t temp3; 10890792Sgshapiro uint32_t use_flash_timings = 0; 10990792Sgshapiro 11090792Sgshapiro DELAY(10000); 11190792Sgshapiro 11290792Sgshapiro temp3 = chatham_read_4(ctrlr, 0x8080); 11390792Sgshapiro 11490792Sgshapiro device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3); 11590792Sgshapiro 11690792Sgshapiro ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110; 11790792Sgshapiro ctrlr->chatham_size = ctrlr->chatham_lbas * 512; 11890792Sgshapiro 11990792Sgshapiro device_printf(ctrlr->dev, "Chatham size: %jd\n", 12090792Sgshapiro (intmax_t)ctrlr->chatham_size); 12190792Sgshapiro 12290792Sgshapiro reg1 = reg2 = reg3 = ctrlr->chatham_size - 1; 12390792Sgshapiro 12490792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings); 12590792Sgshapiro if (use_flash_timings) { 12690792Sgshapiro device_printf(ctrlr->dev, "Chatham: using flash timings\n"); 12790792Sgshapiro temp1 = 0x00001b58000007d0LL; 12890792Sgshapiro temp2 = 0x000000cb00000131LL; 12990792Sgshapiro } else { 13090792Sgshapiro device_printf(ctrlr->dev, "Chatham: using DDR timings\n"); 13190792Sgshapiro temp1 = temp2 = 0x0LL; 13290792Sgshapiro } 13390792Sgshapiro 13490792Sgshapiro chatham_write_8(ctrlr, 0x8000, reg1); 13590792Sgshapiro chatham_write_8(ctrlr, 0x8008, reg2); 13690792Sgshapiro chatham_write_8(ctrlr, 0x8010, reg3); 13790792Sgshapiro 13890792Sgshapiro chatham_write_8(ctrlr, 0x8020, temp1); 13990792Sgshapiro temp3 = chatham_read_4(ctrlr, 0x8020); 14090792Sgshapiro 14190792Sgshapiro chatham_write_8(ctrlr, 0x8028, temp2); 14290792Sgshapiro temp3 = chatham_read_4(ctrlr, 0x8028); 14390792Sgshapiro 14490792Sgshapiro chatham_write_8(ctrlr, 0x8030, temp1); 14590792Sgshapiro chatham_write_8(ctrlr, 0x8038, temp2); 14690792Sgshapiro chatham_write_8(ctrlr, 0x8040, temp1); 14790792Sgshapiro chatham_write_8(ctrlr, 0x8048, temp2); 14890792Sgshapiro chatham_write_8(ctrlr, 0x8050, temp1); 14990792Sgshapiro chatham_write_8(ctrlr, 0x8058, temp2); 15090792Sgshapiro 15190792Sgshapiro DELAY(10000); 15290792Sgshapiro} 15390792Sgshapiro 15490792Sgshapirostatic void 15590792Sgshapironvme_chatham_populate_cdata(struct nvme_controller *ctrlr) 15690792Sgshapiro{ 15790792Sgshapiro struct nvme_controller_data *cdata; 15890792Sgshapiro 15990792Sgshapiro cdata = &ctrlr->cdata; 16090792Sgshapiro 16190792Sgshapiro cdata->vid = 0x8086; 16290792Sgshapiro cdata->ssvid = 0x2011; 16390792Sgshapiro 16490792Sgshapiro /* 16590792Sgshapiro * Chatham2 puts garbage data in these fields when we 16690792Sgshapiro * invoke IDENTIFY_CONTROLLER, so we need to re-zero 16790792Sgshapiro * the fields before calling bcopy(). 16890792Sgshapiro */ 16990792Sgshapiro memset(cdata->sn, 0, sizeof(cdata->sn)); 17090792Sgshapiro memcpy(cdata->sn, "2012", strlen("2012")); 17190792Sgshapiro memset(cdata->mn, 0, sizeof(cdata->mn)); 17290792Sgshapiro memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2")); 17390792Sgshapiro memset(cdata->fr, 0, sizeof(cdata->fr)); 17490792Sgshapiro memcpy(cdata->fr, "0", strlen("0")); 17590792Sgshapiro cdata->rab = 8; 17690792Sgshapiro cdata->aerl = 3; 17790792Sgshapiro cdata->lpa.ns_smart = 1; 17890792Sgshapiro cdata->sqes.min = 6; 17990792Sgshapiro cdata->sqes.max = 6; 18090792Sgshapiro cdata->sqes.min = 4; 18190792Sgshapiro cdata->sqes.max = 4; 18290792Sgshapiro cdata->nn = 1; 18390792Sgshapiro 18490792Sgshapiro /* Chatham2 doesn't support DSM command */ 18590792Sgshapiro cdata->oncs.dsm = 0; 18690792Sgshapiro 18790792Sgshapiro cdata->vwc.present = 1; 18890792Sgshapiro} 18990792Sgshapiro#endif /* CHATHAM2 */ 19090792Sgshapiro 19190792Sgshapirostatic void 19290792Sgshapironvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr) 19390792Sgshapiro{ 19490792Sgshapiro struct nvme_qpair *qpair; 19590792Sgshapiro uint32_t num_entries; 19690792Sgshapiro 19790792Sgshapiro qpair = &ctrlr->adminq; 19890792Sgshapiro 19990792Sgshapiro num_entries = NVME_ADMIN_ENTRIES; 20090792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries); 20190792Sgshapiro /* 20290792Sgshapiro * If admin_entries was overridden to an invalid value, revert it 20390792Sgshapiro * back to our default value. 20490792Sgshapiro */ 20590792Sgshapiro if (num_entries < NVME_MIN_ADMIN_ENTRIES || 20690792Sgshapiro num_entries > NVME_MAX_ADMIN_ENTRIES) { 20790792Sgshapiro printf("nvme: invalid hw.nvme.admin_entries=%d specified\n", 20890792Sgshapiro num_entries); 20990792Sgshapiro num_entries = NVME_ADMIN_ENTRIES; 21090792Sgshapiro } 21190792Sgshapiro 21290792Sgshapiro /* 21390792Sgshapiro * The admin queue's max xfer size is treated differently than the 21490792Sgshapiro * max I/O xfer size. 16KB is sufficient here - maybe even less? 21590792Sgshapiro */ 21690792Sgshapiro nvme_qpair_construct(qpair, 21790792Sgshapiro 0, /* qpair ID */ 21890792Sgshapiro 0, /* vector */ 21990792Sgshapiro num_entries, 22090792Sgshapiro NVME_ADMIN_TRACKERS, 22190792Sgshapiro 16*1024, /* max xfer size */ 22290792Sgshapiro ctrlr); 22390792Sgshapiro} 22490792Sgshapiro 22590792Sgshapirostatic int 22690792Sgshapironvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) 22790792Sgshapiro{ 22890792Sgshapiro struct nvme_qpair *qpair; 22990792Sgshapiro union cap_lo_register cap_lo; 23090792Sgshapiro int i, num_entries, num_trackers; 23190792Sgshapiro 23290792Sgshapiro num_entries = NVME_IO_ENTRIES; 23390792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries); 23490792Sgshapiro 23590792Sgshapiro /* 23690792Sgshapiro * NVMe spec sets a hard limit of 64K max entries, but 23790792Sgshapiro * devices may specify a smaller limit, so we need to check 23890792Sgshapiro * the MQES field in the capabilities register. 23990792Sgshapiro */ 24090792Sgshapiro cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 24190792Sgshapiro num_entries = min(num_entries, cap_lo.bits.mqes+1); 24290792Sgshapiro 24390792Sgshapiro num_trackers = NVME_IO_TRACKERS; 24490792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers); 24590792Sgshapiro 24690792Sgshapiro num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS); 24790792Sgshapiro num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS); 24890792Sgshapiro /* 24990792Sgshapiro * No need to have more trackers than entries in the submit queue. 25090792Sgshapiro * Note also that for a queue size of N, we can only have (N-1) 25190792Sgshapiro * commands outstanding, hence the "-1" here. 25290792Sgshapiro */ 25390792Sgshapiro num_trackers = min(num_trackers, (num_entries-1)); 25490792Sgshapiro 25590792Sgshapiro ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 25690792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size); 25790792Sgshapiro /* 25890792Sgshapiro * Check that tunable doesn't specify a size greater than what our 25990792Sgshapiro * driver supports, and is an even PAGE_SIZE multiple. 26090792Sgshapiro */ 26190792Sgshapiro if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE || 26290792Sgshapiro ctrlr->max_xfer_size % PAGE_SIZE) 26390792Sgshapiro ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 26490792Sgshapiro 26590792Sgshapiro ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair), 26690792Sgshapiro M_NVME, M_ZERO | M_WAITOK); 26790792Sgshapiro 26890792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) { 26990792Sgshapiro qpair = &ctrlr->ioq[i]; 27090792Sgshapiro 27190792Sgshapiro /* 27290792Sgshapiro * Admin queue has ID=0. IO queues start at ID=1 - 27390792Sgshapiro * hence the 'i+1' here. 27490792Sgshapiro * 27590792Sgshapiro * For I/O queues, use the controller-wide max_xfer_size 27690792Sgshapiro * calculated in nvme_attach(). 27790792Sgshapiro */ 27890792Sgshapiro nvme_qpair_construct(qpair, 27990792Sgshapiro i+1, /* qpair ID */ 28090792Sgshapiro ctrlr->msix_enabled ? i+1 : 0, /* vector */ 28190792Sgshapiro num_entries, 28290792Sgshapiro num_trackers, 28390792Sgshapiro ctrlr->max_xfer_size, 28490792Sgshapiro ctrlr); 28590792Sgshapiro 28690792Sgshapiro if (ctrlr->per_cpu_io_queues) 28790792Sgshapiro bus_bind_intr(ctrlr->dev, qpair->res, i); 28890792Sgshapiro } 28990792Sgshapiro 29090792Sgshapiro return (0); 29190792Sgshapiro} 29290792Sgshapiro 29390792Sgshapirostatic void 29490792Sgshapironvme_ctrlr_fail(struct nvme_controller *ctrlr) 29590792Sgshapiro{ 29690792Sgshapiro int i; 29790792Sgshapiro 29890792Sgshapiro ctrlr->is_failed = TRUE; 29990792Sgshapiro nvme_qpair_fail(&ctrlr->adminq); 30090792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) 30190792Sgshapiro nvme_qpair_fail(&ctrlr->ioq[i]); 30290792Sgshapiro nvme_notify_fail_consumers(ctrlr); 30390792Sgshapiro} 30490792Sgshapiro 30590792Sgshapirovoid 30690792Sgshapironvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr, 30790792Sgshapiro struct nvme_request *req) 30890792Sgshapiro{ 30990792Sgshapiro 31090792Sgshapiro mtx_lock(&ctrlr->fail_req_lock); 31190792Sgshapiro STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq); 31290792Sgshapiro mtx_unlock(&ctrlr->fail_req_lock); 31390792Sgshapiro taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task); 31490792Sgshapiro} 31590792Sgshapiro 31690792Sgshapirostatic void 31790792Sgshapironvme_ctrlr_fail_req_task(void *arg, int pending) 31890792Sgshapiro{ 31990792Sgshapiro struct nvme_controller *ctrlr = arg; 32090792Sgshapiro struct nvme_request *req; 32190792Sgshapiro 32290792Sgshapiro mtx_lock(&ctrlr->fail_req_lock); 32390792Sgshapiro while (!STAILQ_EMPTY(&ctrlr->fail_req)) { 32490792Sgshapiro req = STAILQ_FIRST(&ctrlr->fail_req); 32590792Sgshapiro STAILQ_REMOVE_HEAD(&ctrlr->fail_req, stailq); 32690792Sgshapiro nvme_qpair_manual_complete_request(req->qpair, req, 32790792Sgshapiro NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE); 32890792Sgshapiro } 32990792Sgshapiro mtx_unlock(&ctrlr->fail_req_lock); 33090792Sgshapiro} 33190792Sgshapiro 33290792Sgshapirostatic int 33390792Sgshapironvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr) 33490792Sgshapiro{ 33590792Sgshapiro int ms_waited; 33690792Sgshapiro union cc_register cc; 33790792Sgshapiro union csts_register csts; 33890792Sgshapiro 33990792Sgshapiro cc.raw = nvme_mmio_read_4(ctrlr, cc); 34090792Sgshapiro csts.raw = nvme_mmio_read_4(ctrlr, csts); 34190792Sgshapiro 34290792Sgshapiro if (!cc.bits.en) { 34390792Sgshapiro device_printf(ctrlr->dev, "%s called with cc.en = 0\n", 34490792Sgshapiro __func__); 34590792Sgshapiro return (ENXIO); 34690792Sgshapiro } 34790792Sgshapiro 34890792Sgshapiro ms_waited = 0; 34990792Sgshapiro 35090792Sgshapiro while (!csts.bits.rdy) { 35190792Sgshapiro DELAY(1000); 35290792Sgshapiro if (ms_waited++ > ctrlr->ready_timeout_in_ms) { 35390792Sgshapiro device_printf(ctrlr->dev, "controller did not become " 35490792Sgshapiro "ready within %d ms\n", ctrlr->ready_timeout_in_ms); 35590792Sgshapiro return (ENXIO); 35690792Sgshapiro } 35790792Sgshapiro csts.raw = nvme_mmio_read_4(ctrlr, csts); 35890792Sgshapiro } 35990792Sgshapiro 36090792Sgshapiro return (0); 36190792Sgshapiro} 36290792Sgshapiro 36390792Sgshapirostatic void 36490792Sgshapironvme_ctrlr_disable(struct nvme_controller *ctrlr) 36590792Sgshapiro{ 36690792Sgshapiro union cc_register cc; 36790792Sgshapiro union csts_register csts; 36890792Sgshapiro 36990792Sgshapiro cc.raw = nvme_mmio_read_4(ctrlr, cc); 37090792Sgshapiro csts.raw = nvme_mmio_read_4(ctrlr, csts); 37190792Sgshapiro 37290792Sgshapiro if (cc.bits.en == 1 && csts.bits.rdy == 0) 37390792Sgshapiro nvme_ctrlr_wait_for_ready(ctrlr); 37490792Sgshapiro 37590792Sgshapiro cc.bits.en = 0; 37690792Sgshapiro nvme_mmio_write_4(ctrlr, cc, cc.raw); 37790792Sgshapiro DELAY(5000); 37890792Sgshapiro} 37990792Sgshapiro 38090792Sgshapirostatic int 38190792Sgshapironvme_ctrlr_enable(struct nvme_controller *ctrlr) 38290792Sgshapiro{ 38390792Sgshapiro union cc_register cc; 38490792Sgshapiro union csts_register csts; 38590792Sgshapiro union aqa_register aqa; 38690792Sgshapiro 38790792Sgshapiro cc.raw = nvme_mmio_read_4(ctrlr, cc); 38890792Sgshapiro csts.raw = nvme_mmio_read_4(ctrlr, csts); 38990792Sgshapiro 39090792Sgshapiro if (cc.bits.en == 1) { 39190792Sgshapiro if (csts.bits.rdy == 1) 39290792Sgshapiro return (0); 39390792Sgshapiro else 39490792Sgshapiro return (nvme_ctrlr_wait_for_ready(ctrlr)); 39590792Sgshapiro } 39690792Sgshapiro 39790792Sgshapiro nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); 39890792Sgshapiro DELAY(5000); 39990792Sgshapiro nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); 40090792Sgshapiro DELAY(5000); 40190792Sgshapiro 40290792Sgshapiro aqa.raw = 0; 40390792Sgshapiro /* acqs and asqs are 0-based. */ 40490792Sgshapiro aqa.bits.acqs = ctrlr->adminq.num_entries-1; 40590792Sgshapiro aqa.bits.asqs = ctrlr->adminq.num_entries-1; 40690792Sgshapiro nvme_mmio_write_4(ctrlr, aqa, aqa.raw); 40790792Sgshapiro DELAY(5000); 40890792Sgshapiro 40990792Sgshapiro cc.bits.en = 1; 41090792Sgshapiro cc.bits.css = 0; 41190792Sgshapiro cc.bits.ams = 0; 41290792Sgshapiro cc.bits.shn = 0; 41390792Sgshapiro cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 41490792Sgshapiro cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 41590792Sgshapiro 41690792Sgshapiro /* This evaluates to 0, which is according to spec. */ 41790792Sgshapiro cc.bits.mps = (PAGE_SIZE >> 13); 41890792Sgshapiro 41990792Sgshapiro nvme_mmio_write_4(ctrlr, cc, cc.raw); 42090792Sgshapiro DELAY(5000); 42190792Sgshapiro 42290792Sgshapiro return (nvme_ctrlr_wait_for_ready(ctrlr)); 42390792Sgshapiro} 42490792Sgshapiro 42590792Sgshapiroint 42690792Sgshapironvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) 42790792Sgshapiro{ 42890792Sgshapiro int i; 42990792Sgshapiro 43090792Sgshapiro nvme_admin_qpair_disable(&ctrlr->adminq); 43190792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) 43290792Sgshapiro nvme_io_qpair_disable(&ctrlr->ioq[i]); 43390792Sgshapiro 43490792Sgshapiro DELAY(100*1000); 43590792Sgshapiro 43690792Sgshapiro nvme_ctrlr_disable(ctrlr); 43790792Sgshapiro return (nvme_ctrlr_enable(ctrlr)); 43890792Sgshapiro} 43990792Sgshapiro 44090792Sgshapirovoid 44190792Sgshapironvme_ctrlr_reset(struct nvme_controller *ctrlr) 44290792Sgshapiro{ 44390792Sgshapiro int cmpset; 44490792Sgshapiro 44590792Sgshapiro cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1); 44690792Sgshapiro 44790792Sgshapiro if (cmpset == 0 || ctrlr->is_failed) 44890792Sgshapiro /* 44990792Sgshapiro * Controller is already resetting or has failed. Return 45090792Sgshapiro * immediately since there is no need to kick off another 45190792Sgshapiro * reset in these cases. 45290792Sgshapiro */ 45390792Sgshapiro return; 45490792Sgshapiro 45590792Sgshapiro taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task); 45690792Sgshapiro} 45790792Sgshapiro 45890792Sgshapirostatic int 45990792Sgshapironvme_ctrlr_identify(struct nvme_controller *ctrlr) 46090792Sgshapiro{ 46190792Sgshapiro struct nvme_completion_poll_status status; 46290792Sgshapiro 46390792Sgshapiro status.done = FALSE; 46490792Sgshapiro nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 46590792Sgshapiro nvme_completion_poll_cb, &status); 46690792Sgshapiro while (status.done == FALSE) 46790792Sgshapiro DELAY(5); 46890792Sgshapiro if (nvme_completion_is_error(&status.cpl)) { 46990792Sgshapiro printf("nvme_identify_controller failed!\n"); 47090792Sgshapiro return (ENXIO); 47190792Sgshapiro } 47290792Sgshapiro 47390792Sgshapiro#ifdef CHATHAM2 47490792Sgshapiro if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 47590792Sgshapiro nvme_chatham_populate_cdata(ctrlr); 47690792Sgshapiro#endif 47790792Sgshapiro 47890792Sgshapiro /* 47990792Sgshapiro * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 48090792Sgshapiro * controller supports. 48190792Sgshapiro */ 48290792Sgshapiro if (ctrlr->cdata.mdts > 0) 48390792Sgshapiro ctrlr->max_xfer_size = min(ctrlr->max_xfer_size, 48490792Sgshapiro ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts))); 48590792Sgshapiro 48690792Sgshapiro return (0); 48790792Sgshapiro} 48890792Sgshapiro 48990792Sgshapirostatic int 49090792Sgshapironvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) 49190792Sgshapiro{ 49290792Sgshapiro struct nvme_completion_poll_status status; 49390792Sgshapiro int cq_allocated, sq_allocated; 49490792Sgshapiro 49590792Sgshapiro status.done = FALSE; 49690792Sgshapiro nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues, 49790792Sgshapiro nvme_completion_poll_cb, &status); 49890792Sgshapiro while (status.done == FALSE) 49990792Sgshapiro DELAY(5); 50090792Sgshapiro if (nvme_completion_is_error(&status.cpl)) { 50190792Sgshapiro printf("nvme_set_num_queues failed!\n"); 50290792Sgshapiro return (ENXIO); 50390792Sgshapiro } 50490792Sgshapiro 50590792Sgshapiro /* 50690792Sgshapiro * Data in cdw0 is 0-based. 50790792Sgshapiro * Lower 16-bits indicate number of submission queues allocated. 50890792Sgshapiro * Upper 16-bits indicate number of completion queues allocated. 50990792Sgshapiro */ 51090792Sgshapiro sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1; 51190792Sgshapiro cq_allocated = (status.cpl.cdw0 >> 16) + 1; 51290792Sgshapiro 51390792Sgshapiro /* 51490792Sgshapiro * Check that the controller was able to allocate the number of 51590792Sgshapiro * queues we requested. If not, revert to one IO queue. 51690792Sgshapiro */ 51790792Sgshapiro if (sq_allocated < ctrlr->num_io_queues || 51890792Sgshapiro cq_allocated < ctrlr->num_io_queues) { 51990792Sgshapiro ctrlr->num_io_queues = 1; 52090792Sgshapiro ctrlr->per_cpu_io_queues = 0; 52190792Sgshapiro 52290792Sgshapiro /* TODO: destroy extra queues that were created 52390792Sgshapiro * previously but now found to be not needed. 52490792Sgshapiro */ 52590792Sgshapiro } 52690792Sgshapiro 52790792Sgshapiro return (0); 52890792Sgshapiro} 52990792Sgshapiro 53090792Sgshapirostatic int 53190792Sgshapironvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr) 53290792Sgshapiro{ 53390792Sgshapiro struct nvme_completion_poll_status status; 53490792Sgshapiro struct nvme_qpair *qpair; 53590792Sgshapiro int i; 53690792Sgshapiro 53790792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) { 538157001Sgshapiro qpair = &ctrlr->ioq[i]; 539157001Sgshapiro 540157001Sgshapiro status.done = FALSE; 541157001Sgshapiro nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector, 54290792Sgshapiro nvme_completion_poll_cb, &status); 543157001Sgshapiro while (status.done == FALSE) 54490792Sgshapiro DELAY(5); 545157001Sgshapiro if (nvme_completion_is_error(&status.cpl)) { 546157001Sgshapiro printf("nvme_create_io_cq failed!\n"); 547157001Sgshapiro return (ENXIO); 548157001Sgshapiro } 54990792Sgshapiro 550157001Sgshapiro status.done = FALSE; 55190792Sgshapiro nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, 55290792Sgshapiro nvme_completion_poll_cb, &status); 55390792Sgshapiro while (status.done == FALSE) 55490792Sgshapiro DELAY(5); 55590792Sgshapiro if (nvme_completion_is_error(&status.cpl)) { 55690792Sgshapiro printf("nvme_create_io_sq failed!\n"); 55790792Sgshapiro return (ENXIO); 55890792Sgshapiro } 55990792Sgshapiro } 56090792Sgshapiro 56190792Sgshapiro return (0); 56290792Sgshapiro} 56390792Sgshapiro 56490792Sgshapirostatic int 56590792Sgshapironvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr) 56690792Sgshapiro{ 56790792Sgshapiro struct nvme_namespace *ns; 56890792Sgshapiro int i, status; 56990792Sgshapiro 57090792Sgshapiro for (i = 0; i < ctrlr->cdata.nn; i++) { 57190792Sgshapiro ns = &ctrlr->ns[i]; 57290792Sgshapiro status = nvme_ns_construct(ns, i+1, ctrlr); 57390792Sgshapiro if (status != 0) 57490792Sgshapiro return (status); 57590792Sgshapiro } 57690792Sgshapiro 57790792Sgshapiro return (0); 57890792Sgshapiro} 57990792Sgshapiro 58090792Sgshapirostatic boolean_t 58190792Sgshapirois_log_page_id_valid(uint8_t page_id) 58290792Sgshapiro{ 58390792Sgshapiro 58490792Sgshapiro switch (page_id) { 58590792Sgshapiro case NVME_LOG_ERROR: 58690792Sgshapiro case NVME_LOG_HEALTH_INFORMATION: 58790792Sgshapiro case NVME_LOG_FIRMWARE_SLOT: 58890792Sgshapiro return (TRUE); 58990792Sgshapiro } 59090792Sgshapiro 59190792Sgshapiro return (FALSE); 59290792Sgshapiro} 59390792Sgshapiro 59490792Sgshapirostatic uint32_t 59590792Sgshapironvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id) 59690792Sgshapiro{ 59790792Sgshapiro uint32_t log_page_size; 59890792Sgshapiro 59990792Sgshapiro switch (page_id) { 60090792Sgshapiro case NVME_LOG_ERROR: 60190792Sgshapiro log_page_size = min( 60290792Sgshapiro sizeof(struct nvme_error_information_entry) * 60390792Sgshapiro ctrlr->cdata.elpe, 60490792Sgshapiro NVME_MAX_AER_LOG_SIZE); 60590792Sgshapiro break; 60690792Sgshapiro case NVME_LOG_HEALTH_INFORMATION: 60790792Sgshapiro log_page_size = sizeof(struct nvme_health_information_page); 60890792Sgshapiro break; 60990792Sgshapiro case NVME_LOG_FIRMWARE_SLOT: 61090792Sgshapiro log_page_size = sizeof(struct nvme_firmware_page); 61190792Sgshapiro break; 61290792Sgshapiro default: 61390792Sgshapiro log_page_size = 0; 61490792Sgshapiro break; 61590792Sgshapiro } 61690792Sgshapiro 61790792Sgshapiro return (log_page_size); 61890792Sgshapiro} 61990792Sgshapiro 62090792Sgshapirostatic void 62190792Sgshapironvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl) 62290792Sgshapiro{ 62390792Sgshapiro struct nvme_async_event_request *aer = arg; 62490792Sgshapiro 62590792Sgshapiro /* 62690792Sgshapiro * If the log page fetch for some reason completed with an error, 62790792Sgshapiro * don't pass log page data to the consumers. In practice, this case 62890792Sgshapiro * should never happen. 62990792Sgshapiro */ 63090792Sgshapiro if (nvme_completion_is_error(cpl)) 63190792Sgshapiro nvme_notify_async_consumers(aer->ctrlr, &aer->cpl, 63290792Sgshapiro aer->log_page_id, NULL, 0); 63390792Sgshapiro else 63490792Sgshapiro /* 63590792Sgshapiro * Pass the cpl data from the original async event completion, 63690792Sgshapiro * not the log page fetch. 63790792Sgshapiro */ 63890792Sgshapiro nvme_notify_async_consumers(aer->ctrlr, &aer->cpl, 63990792Sgshapiro aer->log_page_id, aer->log_page_buffer, aer->log_page_size); 64090792Sgshapiro 64190792Sgshapiro /* 64290792Sgshapiro * Repost another asynchronous event request to replace the one 64390792Sgshapiro * that just completed. 64490792Sgshapiro */ 64590792Sgshapiro nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer); 64690792Sgshapiro} 64790792Sgshapiro 64890792Sgshapirostatic void 64990792Sgshapironvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl) 65090792Sgshapiro{ 65190792Sgshapiro struct nvme_async_event_request *aer = arg; 65290792Sgshapiro 65390792Sgshapiro if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION) { 65490792Sgshapiro /* 65590792Sgshapiro * This is simulated when controller is being shut down, to 65690792Sgshapiro * effectively abort outstanding asynchronous event requests 65790792Sgshapiro * and make sure all memory is freed. Do not repost the 65890792Sgshapiro * request in this case. 65990792Sgshapiro */ 66090792Sgshapiro return; 66190792Sgshapiro } 66290792Sgshapiro 66390792Sgshapiro printf("Asynchronous event occurred.\n"); 66490792Sgshapiro 66590792Sgshapiro /* Associated log page is in bits 23:16 of completion entry dw0. */ 66690792Sgshapiro aer->log_page_id = (cpl->cdw0 & 0xFF0000) >> 16; 66790792Sgshapiro 66890792Sgshapiro if (is_log_page_id_valid(aer->log_page_id)) { 66990792Sgshapiro aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr, 67090792Sgshapiro aer->log_page_id); 67190792Sgshapiro memcpy(&aer->cpl, cpl, sizeof(*cpl)); 67290792Sgshapiro nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id, 67390792Sgshapiro NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, 67490792Sgshapiro aer->log_page_size, nvme_ctrlr_async_event_log_page_cb, 67590792Sgshapiro aer); 67690792Sgshapiro /* Wait to notify consumers until after log page is fetched. */ 67790792Sgshapiro } else { 67890792Sgshapiro nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id, 67990792Sgshapiro NULL, 0); 68090792Sgshapiro 68190792Sgshapiro /* 68290792Sgshapiro * Repost another asynchronous event request to replace the one 68390792Sgshapiro * that just completed. 68490792Sgshapiro */ 68590792Sgshapiro nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer); 68690792Sgshapiro } 68790792Sgshapiro} 68890792Sgshapiro 68990792Sgshapirostatic void 69090792Sgshapironvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, 69190792Sgshapiro struct nvme_async_event_request *aer) 69290792Sgshapiro{ 69390792Sgshapiro struct nvme_request *req; 69490792Sgshapiro 69590792Sgshapiro aer->ctrlr = ctrlr; 69690792Sgshapiro req = nvme_allocate_request(NULL, 0, nvme_ctrlr_async_event_cb, aer); 69790792Sgshapiro aer->req = req; 69890792Sgshapiro 69990792Sgshapiro /* 70090792Sgshapiro * Disable timeout here, since asynchronous event requests should by 70190792Sgshapiro * nature never be timed out. 70290792Sgshapiro */ 70390792Sgshapiro req->timeout = FALSE; 70490792Sgshapiro req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST; 70590792Sgshapiro nvme_ctrlr_submit_admin_request(ctrlr, req); 70690792Sgshapiro} 70790792Sgshapiro 70890792Sgshapirostatic void 70990792Sgshapironvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) 71090792Sgshapiro{ 71190792Sgshapiro union nvme_critical_warning_state state; 71290792Sgshapiro struct nvme_async_event_request *aer; 71390792Sgshapiro uint32_t i; 71490792Sgshapiro 71590792Sgshapiro state.raw = 0xFF; 71690792Sgshapiro state.bits.reserved = 0; 71790792Sgshapiro nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, NULL, NULL); 71890792Sgshapiro 71990792Sgshapiro /* aerl is a zero-based value, so we need to add 1 here. */ 72090792Sgshapiro ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1)); 72190792Sgshapiro 72290792Sgshapiro /* Chatham doesn't support AERs. */ 72390792Sgshapiro if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 72490792Sgshapiro ctrlr->num_aers = 0; 72590792Sgshapiro 72690792Sgshapiro for (i = 0; i < ctrlr->num_aers; i++) { 72790792Sgshapiro aer = &ctrlr->aer[i]; 72890792Sgshapiro nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 72990792Sgshapiro } 73090792Sgshapiro} 73190792Sgshapiro 73290792Sgshapirostatic void 73390792Sgshapironvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) 73490792Sgshapiro{ 73590792Sgshapiro 73690792Sgshapiro ctrlr->int_coal_time = 0; 73790792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.int_coal_time", 73890792Sgshapiro &ctrlr->int_coal_time); 73990792Sgshapiro 74090792Sgshapiro ctrlr->int_coal_threshold = 0; 74190792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold", 74290792Sgshapiro &ctrlr->int_coal_threshold); 74390792Sgshapiro 74490792Sgshapiro nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, 74590792Sgshapiro ctrlr->int_coal_threshold, NULL, NULL); 74690792Sgshapiro} 74790792Sgshapiro 74890792Sgshapirostatic void 74990792Sgshapironvme_ctrlr_start(void *ctrlr_arg) 75090792Sgshapiro{ 75190792Sgshapiro struct nvme_controller *ctrlr = ctrlr_arg; 75290792Sgshapiro int i; 75390792Sgshapiro 75490792Sgshapiro nvme_qpair_reset(&ctrlr->adminq); 75590792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) 75690792Sgshapiro nvme_qpair_reset(&ctrlr->ioq[i]); 75790792Sgshapiro 75890792Sgshapiro nvme_admin_qpair_enable(&ctrlr->adminq); 75990792Sgshapiro 76090792Sgshapiro if (nvme_ctrlr_identify(ctrlr) != 0) { 76190792Sgshapiro nvme_ctrlr_fail(ctrlr); 76290792Sgshapiro return; 76390792Sgshapiro } 76490792Sgshapiro 76590792Sgshapiro if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { 76690792Sgshapiro nvme_ctrlr_fail(ctrlr); 76790792Sgshapiro return; 76890792Sgshapiro } 76990792Sgshapiro 77090792Sgshapiro if (nvme_ctrlr_create_qpairs(ctrlr) != 0) { 77190792Sgshapiro nvme_ctrlr_fail(ctrlr); 77290792Sgshapiro return; 77390792Sgshapiro } 77490792Sgshapiro 77590792Sgshapiro if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) { 77690792Sgshapiro nvme_ctrlr_fail(ctrlr); 77790792Sgshapiro return; 77890792Sgshapiro } 77990792Sgshapiro 78090792Sgshapiro nvme_ctrlr_configure_aer(ctrlr); 78190792Sgshapiro nvme_ctrlr_configure_int_coalescing(ctrlr); 78290792Sgshapiro 78390792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) 78490792Sgshapiro nvme_io_qpair_enable(&ctrlr->ioq[i]); 78590792Sgshapiro 78690792Sgshapiro /* 78790792Sgshapiro * Clear software progress marker to 0, to indicate to pre-boot 78890792Sgshapiro * software that OS driver load was successful. 78990792Sgshapiro * 79090792Sgshapiro * Chatham does not support this feature. 79190792Sgshapiro */ 79290792Sgshapiro if (pci_get_devid(ctrlr->dev) != CHATHAM_PCI_ID) 79390792Sgshapiro nvme_ctrlr_cmd_set_feature(ctrlr, 79490792Sgshapiro NVME_FEAT_SOFTWARE_PROGRESS_MARKER, 0, NULL, 0, NULL, NULL); 79590792Sgshapiro} 79690792Sgshapiro 79790792Sgshapirovoid 79890792Sgshapironvme_ctrlr_start_config_hook(void *arg) 79990792Sgshapiro{ 80090792Sgshapiro struct nvme_controller *ctrlr = arg; 80190792Sgshapiro 80290792Sgshapiro nvme_ctrlr_start(ctrlr); 80390792Sgshapiro config_intrhook_disestablish(&ctrlr->config_hook); 80490792Sgshapiro} 80590792Sgshapiro 80690792Sgshapirostatic void 80790792Sgshapironvme_ctrlr_reset_task(void *arg, int pending) 80890792Sgshapiro{ 80990792Sgshapiro struct nvme_controller *ctrlr = arg; 81090792Sgshapiro int status; 81190792Sgshapiro 81290792Sgshapiro device_printf(ctrlr->dev, "resetting controller"); 81390792Sgshapiro status = nvme_ctrlr_hw_reset(ctrlr); 81490792Sgshapiro /* 81590792Sgshapiro * Use pause instead of DELAY, so that we yield to any nvme interrupt 81690792Sgshapiro * handlers on this CPU that were blocked on a qpair lock. We want 81790792Sgshapiro * all nvme interrupts completed before proceeding with restarting the 81890792Sgshapiro * controller. 81990792Sgshapiro * 82090792Sgshapiro * XXX - any way to guarantee the interrupt handlers have quiesced? 82190792Sgshapiro */ 82290792Sgshapiro pause("nvmereset", hz / 10); 82390792Sgshapiro if (status == 0) 82490792Sgshapiro nvme_ctrlr_start(ctrlr); 82590792Sgshapiro else 82690792Sgshapiro nvme_ctrlr_fail(ctrlr); 82790792Sgshapiro 82890792Sgshapiro atomic_cmpset_32(&ctrlr->is_resetting, 1, 0); 82990792Sgshapiro} 83090792Sgshapiro 83190792Sgshapirostatic void 83290792Sgshapironvme_ctrlr_intx_handler(void *arg) 83390792Sgshapiro{ 83490792Sgshapiro struct nvme_controller *ctrlr = arg; 83590792Sgshapiro 83690792Sgshapiro nvme_mmio_write_4(ctrlr, intms, 1); 83790792Sgshapiro 83890792Sgshapiro nvme_qpair_process_completions(&ctrlr->adminq); 83990792Sgshapiro 84090792Sgshapiro if (ctrlr->ioq[0].cpl) 84190792Sgshapiro nvme_qpair_process_completions(&ctrlr->ioq[0]); 84290792Sgshapiro 84390792Sgshapiro nvme_mmio_write_4(ctrlr, intmc, 1); 84490792Sgshapiro} 84590792Sgshapiro 84690792Sgshapirostatic int 84790792Sgshapironvme_ctrlr_configure_intx(struct nvme_controller *ctrlr) 84890792Sgshapiro{ 84990792Sgshapiro 85090792Sgshapiro ctrlr->num_io_queues = 1; 85190792Sgshapiro ctrlr->per_cpu_io_queues = 0; 85290792Sgshapiro ctrlr->rid = 0; 85390792Sgshapiro ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 85490792Sgshapiro &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE); 85590792Sgshapiro 85690792Sgshapiro if (ctrlr->res == NULL) { 85790792Sgshapiro device_printf(ctrlr->dev, "unable to allocate shared IRQ\n"); 85890792Sgshapiro return (ENOMEM); 85990792Sgshapiro } 86090792Sgshapiro 86190792Sgshapiro bus_setup_intr(ctrlr->dev, ctrlr->res, 86290792Sgshapiro INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler, 86390792Sgshapiro ctrlr, &ctrlr->tag); 86490792Sgshapiro 86590792Sgshapiro if (ctrlr->tag == NULL) { 86690792Sgshapiro device_printf(ctrlr->dev, 86790792Sgshapiro "unable to setup legacy interrupt handler\n"); 86890792Sgshapiro return (ENOMEM); 86990792Sgshapiro } 87090792Sgshapiro 87190792Sgshapiro return (0); 87290792Sgshapiro} 87390792Sgshapiro 87490792Sgshapirostatic int 87590792Sgshapironvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 87690792Sgshapiro struct thread *td) 87790792Sgshapiro{ 87890792Sgshapiro struct nvme_completion_poll_status status; 87990792Sgshapiro struct nvme_controller *ctrlr; 88090792Sgshapiro 88190792Sgshapiro ctrlr = cdev->si_drv1; 88290792Sgshapiro 88390792Sgshapiro switch (cmd) { 88490792Sgshapiro case NVME_IDENTIFY_CONTROLLER: 88590792Sgshapiro#ifdef CHATHAM2 88690792Sgshapiro /* 88790792Sgshapiro * Don't refresh data on Chatham, since Chatham returns 88890792Sgshapiro * garbage on IDENTIFY anyways. 88990792Sgshapiro */ 89090792Sgshapiro if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) { 89190792Sgshapiro memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 89290792Sgshapiro break; 89390792Sgshapiro } 89490792Sgshapiro#endif 89590792Sgshapiro /* Refresh data before returning to user. */ 89690792Sgshapiro status.done = FALSE; 89790792Sgshapiro nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 89890792Sgshapiro nvme_completion_poll_cb, &status); 89990792Sgshapiro while (status.done == FALSE) 90090792Sgshapiro DELAY(5); 90190792Sgshapiro if (nvme_completion_is_error(&status.cpl)) 90290792Sgshapiro return (ENXIO); 90390792Sgshapiro memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 90490792Sgshapiro break; 90590792Sgshapiro case NVME_RESET_CONTROLLER: 90690792Sgshapiro nvme_ctrlr_reset(ctrlr); 90790792Sgshapiro break; 90890792Sgshapiro default: 90990792Sgshapiro return (ENOTTY); 91090792Sgshapiro } 91190792Sgshapiro 91290792Sgshapiro return (0); 91390792Sgshapiro} 91490792Sgshapiro 91590792Sgshapirostatic struct cdevsw nvme_ctrlr_cdevsw = { 91690792Sgshapiro .d_version = D_VERSION, 91790792Sgshapiro .d_flags = 0, 91890792Sgshapiro .d_ioctl = nvme_ctrlr_ioctl 91990792Sgshapiro}; 92090792Sgshapiro 92190792Sgshapiroint 92290792Sgshapironvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) 92390792Sgshapiro{ 92490792Sgshapiro union cap_lo_register cap_lo; 92590792Sgshapiro union cap_hi_register cap_hi; 92690792Sgshapiro int num_vectors, per_cpu_io_queues, status = 0; 92790792Sgshapiro int timeout_period; 92890792Sgshapiro 92990792Sgshapiro ctrlr->dev = dev; 93090792Sgshapiro 93190792Sgshapiro status = nvme_ctrlr_allocate_bar(ctrlr); 93290792Sgshapiro 93390792Sgshapiro if (status != 0) 93490792Sgshapiro return (status); 93590792Sgshapiro 93690792Sgshapiro#ifdef CHATHAM2 93790792Sgshapiro if (pci_get_devid(dev) == CHATHAM_PCI_ID) { 93890792Sgshapiro status = nvme_ctrlr_allocate_chatham_bar(ctrlr); 93990792Sgshapiro if (status != 0) 94090792Sgshapiro return (status); 94190792Sgshapiro nvme_ctrlr_setup_chatham(ctrlr); 94290792Sgshapiro } 94390792Sgshapiro#endif 94490792Sgshapiro 94590792Sgshapiro /* 94690792Sgshapiro * Software emulators may set the doorbell stride to something 94790792Sgshapiro * other than zero, but this driver is not set up to handle that. 94890792Sgshapiro */ 94990792Sgshapiro cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi); 95090792Sgshapiro if (cap_hi.bits.dstrd != 0) 95190792Sgshapiro return (ENXIO); 95290792Sgshapiro 95390792Sgshapiro ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin); 95490792Sgshapiro 95590792Sgshapiro /* Get ready timeout value from controller, in units of 500ms. */ 95690792Sgshapiro cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 95790792Sgshapiro ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; 95890792Sgshapiro 95990792Sgshapiro timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD; 96090792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period); 96190792Sgshapiro timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD); 96290792Sgshapiro timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD); 96390792Sgshapiro ctrlr->timeout_period = timeout_period; 96490792Sgshapiro 96590792Sgshapiro nvme_retry_count = NVME_DEFAULT_RETRY_COUNT; 96690792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count); 96790792Sgshapiro 96890792Sgshapiro per_cpu_io_queues = 1; 96990792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); 97090792Sgshapiro ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE; 97190792Sgshapiro 97290792Sgshapiro if (ctrlr->per_cpu_io_queues) 97390792Sgshapiro ctrlr->num_io_queues = mp_ncpus; 97490792Sgshapiro else 97590792Sgshapiro ctrlr->num_io_queues = 1; 97690792Sgshapiro 97790792Sgshapiro ctrlr->force_intx = 0; 97890792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); 97990792Sgshapiro 98090792Sgshapiro ctrlr->enable_aborts = 0; 98190792Sgshapiro TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts); 98290792Sgshapiro 98390792Sgshapiro ctrlr->msix_enabled = 1; 98490792Sgshapiro 98590792Sgshapiro if (ctrlr->force_intx) { 98690792Sgshapiro ctrlr->msix_enabled = 0; 98790792Sgshapiro goto intx; 98890792Sgshapiro } 98990792Sgshapiro 99090792Sgshapiro /* One vector per IO queue, plus one vector for admin queue. */ 99190792Sgshapiro num_vectors = ctrlr->num_io_queues + 1; 99290792Sgshapiro 99390792Sgshapiro if (pci_msix_count(dev) < num_vectors) { 99490792Sgshapiro ctrlr->msix_enabled = 0; 99590792Sgshapiro goto intx; 99690792Sgshapiro } 99790792Sgshapiro 99890792Sgshapiro if (pci_alloc_msix(dev, &num_vectors) != 0) 99990792Sgshapiro ctrlr->msix_enabled = 0; 100090792Sgshapiro 100190792Sgshapirointx: 100290792Sgshapiro 100390792Sgshapiro if (!ctrlr->msix_enabled) 100490792Sgshapiro nvme_ctrlr_configure_intx(ctrlr); 100590792Sgshapiro 100690792Sgshapiro nvme_ctrlr_construct_admin_qpair(ctrlr); 100790792Sgshapiro 100890792Sgshapiro status = nvme_ctrlr_construct_io_qpairs(ctrlr); 100990792Sgshapiro 101090792Sgshapiro if (status != 0) 101190792Sgshapiro return (status); 101290792Sgshapiro 101390792Sgshapiro ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 101490792Sgshapiro "nvme%d", device_get_unit(dev)); 101590792Sgshapiro 101690792Sgshapiro if (ctrlr->cdev == NULL) 101790792Sgshapiro return (ENXIO); 101890792Sgshapiro 101990792Sgshapiro ctrlr->cdev->si_drv1 = (void *)ctrlr; 102090792Sgshapiro 102190792Sgshapiro ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK, 102290792Sgshapiro taskqueue_thread_enqueue, &ctrlr->taskqueue); 102390792Sgshapiro taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq"); 102490792Sgshapiro 102590792Sgshapiro ctrlr->is_resetting = 0; 102690792Sgshapiro TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr); 102790792Sgshapiro 102890792Sgshapiro TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr); 102990792Sgshapiro mtx_init(&ctrlr->fail_req_lock, "nvme ctrlr fail req lock", NULL, 103090792Sgshapiro MTX_DEF); 103190792Sgshapiro STAILQ_INIT(&ctrlr->fail_req); 103290792Sgshapiro ctrlr->is_failed = FALSE; 103390792Sgshapiro 103490792Sgshapiro return (0); 103590792Sgshapiro} 103690792Sgshapiro 103790792Sgshapirovoid 103890792Sgshapironvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) 103990792Sgshapiro{ 104090792Sgshapiro int i; 104190792Sgshapiro 104290792Sgshapiro nvme_ctrlr_disable(ctrlr); 104390792Sgshapiro taskqueue_free(ctrlr->taskqueue); 104490792Sgshapiro 104590792Sgshapiro for (i = 0; i < NVME_MAX_NAMESPACES; i++) 104690792Sgshapiro nvme_ns_destruct(&ctrlr->ns[i]); 104790792Sgshapiro 104890792Sgshapiro if (ctrlr->cdev) 104990792Sgshapiro destroy_dev(ctrlr->cdev); 105090792Sgshapiro 105190792Sgshapiro for (i = 0; i < ctrlr->num_io_queues; i++) { 105290792Sgshapiro nvme_io_qpair_destroy(&ctrlr->ioq[i]); 105390792Sgshapiro } 105490792Sgshapiro 105590792Sgshapiro free(ctrlr->ioq, M_NVME); 105690792Sgshapiro 105790792Sgshapiro nvme_admin_qpair_destroy(&ctrlr->adminq); 105890792Sgshapiro 105990792Sgshapiro if (ctrlr->resource != NULL) { 106090792Sgshapiro bus_release_resource(dev, SYS_RES_MEMORY, 106190792Sgshapiro ctrlr->resource_id, ctrlr->resource); 106290792Sgshapiro } 106390792Sgshapiro 106490792Sgshapiro if (ctrlr->bar4_resource != NULL) { 106590792Sgshapiro bus_release_resource(dev, SYS_RES_MEMORY, 106690792Sgshapiro ctrlr->bar4_resource_id, ctrlr->bar4_resource); 106790792Sgshapiro } 106890792Sgshapiro 106990792Sgshapiro#ifdef CHATHAM2 107090792Sgshapiro if (ctrlr->chatham_resource != NULL) { 107190792Sgshapiro bus_release_resource(dev, SYS_RES_MEMORY, 107290792Sgshapiro ctrlr->chatham_resource_id, ctrlr->chatham_resource); 107390792Sgshapiro } 107490792Sgshapiro#endif 107590792Sgshapiro 107690792Sgshapiro if (ctrlr->tag) 107790792Sgshapiro bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag); 107890792Sgshapiro 107990792Sgshapiro if (ctrlr->res) 108090792Sgshapiro bus_release_resource(ctrlr->dev, SYS_RES_IRQ, 108190792Sgshapiro rman_get_rid(ctrlr->res), ctrlr->res); 108290792Sgshapiro 108390792Sgshapiro if (ctrlr->msix_enabled) 108490792Sgshapiro pci_release_msi(dev); 108590792Sgshapiro} 108690792Sgshapiro 108790792Sgshapirovoid 108890792Sgshapironvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, 108990792Sgshapiro struct nvme_request *req) 109090792Sgshapiro{ 109190792Sgshapiro 109290792Sgshapiro nvme_qpair_submit_request(&ctrlr->adminq, req); 109390792Sgshapiro} 109490792Sgshapiro 109590792Sgshapirovoid 109690792Sgshapironvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, 109790792Sgshapiro struct nvme_request *req) 109890792Sgshapiro{ 109990792Sgshapiro struct nvme_qpair *qpair; 110090792Sgshapiro 110190792Sgshapiro if (ctrlr->per_cpu_io_queues) 110290792Sgshapiro qpair = &ctrlr->ioq[curcpu]; 110390792Sgshapiro else 110490792Sgshapiro qpair = &ctrlr->ioq[0]; 110590792Sgshapiro 110690792Sgshapiro nvme_qpair_submit_request(qpair, req); 110790792Sgshapiro} 110890792Sgshapiro 110990792Sgshapirodevice_t 111090792Sgshapironvme_ctrlr_get_device(struct nvme_controller *ctrlr) 111190792Sgshapiro{ 111290792Sgshapiro 111390792Sgshapiro return (ctrlr->dev); 111490792Sgshapiro} 111590792Sgshapiro 111690792Sgshapiroconst struct nvme_controller_data * 111790792Sgshapironvme_ctrlr_get_data(struct nvme_controller *ctrlr) 111890792Sgshapiro{ 1119 1120 return (&ctrlr->cdata); 1121} 1122