nvme_ctrlr.c revision 248770
190792Sgshapiro/*-
2261363Sgshapiro * Copyright (C) 2012 Intel Corporation
390792Sgshapiro * All rights reserved.
490792Sgshapiro *
590792Sgshapiro * Redistribution and use in source and binary forms, with or without
690792Sgshapiro * modification, are permitted provided that the following conditions
790792Sgshapiro * are met:
890792Sgshapiro * 1. Redistributions of source code must retain the above copyright
990792Sgshapiro *    notice, this list of conditions and the following disclaimer.
1090792Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright
1190792Sgshapiro *    notice, this list of conditions and the following disclaimer in the
1290792Sgshapiro *    documentation and/or other materials provided with the distribution.
1390792Sgshapiro *
1490792Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1590792Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16261363Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1790792Sgshapiro * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1890792Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1990792Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2090792Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2190792Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2290792Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2390792Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2490792Sgshapiro * SUCH DAMAGE.
2590792Sgshapiro */
2690792Sgshapiro
2790792Sgshapiro#include <sys/cdefs.h>
2890792Sgshapiro__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_ctrlr.c 248770 2013-03-26 22:11:34Z jimharris $");
2990792Sgshapiro
3090792Sgshapiro#include <sys/param.h>
3190792Sgshapiro#include <sys/bus.h>
3290792Sgshapiro#include <sys/conf.h>
3390792Sgshapiro#include <sys/ioccom.h>
3490792Sgshapiro#include <sys/smp.h>
3590792Sgshapiro
36141858Sgshapiro#include <dev/pci/pcireg.h>
3790792Sgshapiro#include <dev/pci/pcivar.h>
3890792Sgshapiro
3990792Sgshapiro#include "nvme_private.h"
4090792Sgshapiro
4190792Sgshapirostatic void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
4290792Sgshapiro						struct nvme_async_event_request *aer);
4390792Sgshapiro
4490792Sgshapirostatic int
4590792Sgshapironvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
4690792Sgshapiro{
4790792Sgshapiro
4890792Sgshapiro	/* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */
4990792Sgshapiro	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
5090792Sgshapiro		ctrlr->resource_id = PCIR_BAR(2);
5190792Sgshapiro	else
5290792Sgshapiro		ctrlr->resource_id = PCIR_BAR(0);
5390792Sgshapiro
5490792Sgshapiro	ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
5590792Sgshapiro	    &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE);
5690792Sgshapiro
5790792Sgshapiro	if(ctrlr->resource == NULL) {
5890792Sgshapiro		device_printf(ctrlr->dev, "unable to allocate pci resource\n");
5990792Sgshapiro		return (ENOMEM);
6090792Sgshapiro	}
6190792Sgshapiro
6290792Sgshapiro	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
6390792Sgshapiro	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
6490792Sgshapiro	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
6590792Sgshapiro
6690792Sgshapiro	/*
6790792Sgshapiro	 * The NVMe spec allows for the MSI-X table to be placed behind
6890792Sgshapiro	 *  BAR 4/5, separate from the control/doorbell registers.  Always
6990792Sgshapiro	 *  try to map this bar, because it must be mapped prior to calling
7090792Sgshapiro	 *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
7190792Sgshapiro	 *  bus_alloc_resource() will just return NULL which is OK.
7290792Sgshapiro	 */
7390792Sgshapiro	ctrlr->bar4_resource_id = PCIR_BAR(4);
7490792Sgshapiro	ctrlr->bar4_resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
7590792Sgshapiro	    &ctrlr->bar4_resource_id, 0, ~0, 1, RF_ACTIVE);
7690792Sgshapiro
7790792Sgshapiro	return (0);
7890792Sgshapiro}
7990792Sgshapiro
8090792Sgshapiro#ifdef CHATHAM2
8190792Sgshapirostatic int
8290792Sgshapironvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr)
8390792Sgshapiro{
8490792Sgshapiro
8590792Sgshapiro	ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR);
8690792Sgshapiro	ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev,
8790792Sgshapiro	    SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1,
8890792Sgshapiro	    RF_ACTIVE);
8990792Sgshapiro
9090792Sgshapiro	if(ctrlr->chatham_resource == NULL) {
9190792Sgshapiro		device_printf(ctrlr->dev, "unable to alloc pci resource\n");
9290792Sgshapiro		return (ENOMEM);
9390792Sgshapiro	}
9490792Sgshapiro
9590792Sgshapiro	ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource);
9690792Sgshapiro	ctrlr->chatham_bus_handle =
9790792Sgshapiro	    rman_get_bushandle(ctrlr->chatham_resource);
9890792Sgshapiro
99141858Sgshapiro	return (0);
10090792Sgshapiro}
10190792Sgshapiro
10290792Sgshapirostatic void
10390792Sgshapironvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr)
10490792Sgshapiro{
10590792Sgshapiro	uint64_t reg1, reg2, reg3;
10690792Sgshapiro	uint64_t temp1, temp2;
10790792Sgshapiro	uint32_t temp3;
10890792Sgshapiro	uint32_t use_flash_timings = 0;
10990792Sgshapiro
11090792Sgshapiro	DELAY(10000);
11190792Sgshapiro
11290792Sgshapiro	temp3 = chatham_read_4(ctrlr, 0x8080);
11390792Sgshapiro
11490792Sgshapiro	device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3);
11590792Sgshapiro
11690792Sgshapiro	ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110;
11790792Sgshapiro	ctrlr->chatham_size = ctrlr->chatham_lbas * 512;
11890792Sgshapiro
11990792Sgshapiro	device_printf(ctrlr->dev, "Chatham size: %jd\n",
12090792Sgshapiro	    (intmax_t)ctrlr->chatham_size);
12190792Sgshapiro
12290792Sgshapiro	reg1 = reg2 = reg3 = ctrlr->chatham_size - 1;
12390792Sgshapiro
12490792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings);
12590792Sgshapiro	if (use_flash_timings) {
12690792Sgshapiro		device_printf(ctrlr->dev, "Chatham: using flash timings\n");
12790792Sgshapiro		temp1 = 0x00001b58000007d0LL;
12890792Sgshapiro		temp2 = 0x000000cb00000131LL;
12990792Sgshapiro	} else {
13090792Sgshapiro		device_printf(ctrlr->dev, "Chatham: using DDR timings\n");
13190792Sgshapiro		temp1 = temp2 = 0x0LL;
13290792Sgshapiro	}
13390792Sgshapiro
13490792Sgshapiro	chatham_write_8(ctrlr, 0x8000, reg1);
13590792Sgshapiro	chatham_write_8(ctrlr, 0x8008, reg2);
13690792Sgshapiro	chatham_write_8(ctrlr, 0x8010, reg3);
13790792Sgshapiro
13890792Sgshapiro	chatham_write_8(ctrlr, 0x8020, temp1);
13990792Sgshapiro	temp3 = chatham_read_4(ctrlr, 0x8020);
14090792Sgshapiro
14190792Sgshapiro	chatham_write_8(ctrlr, 0x8028, temp2);
14290792Sgshapiro	temp3 = chatham_read_4(ctrlr, 0x8028);
14390792Sgshapiro
14490792Sgshapiro	chatham_write_8(ctrlr, 0x8030, temp1);
14590792Sgshapiro	chatham_write_8(ctrlr, 0x8038, temp2);
14690792Sgshapiro	chatham_write_8(ctrlr, 0x8040, temp1);
14790792Sgshapiro	chatham_write_8(ctrlr, 0x8048, temp2);
14890792Sgshapiro	chatham_write_8(ctrlr, 0x8050, temp1);
14990792Sgshapiro	chatham_write_8(ctrlr, 0x8058, temp2);
15090792Sgshapiro
15190792Sgshapiro	DELAY(10000);
15290792Sgshapiro}
15390792Sgshapiro
15490792Sgshapirostatic void
15590792Sgshapironvme_chatham_populate_cdata(struct nvme_controller *ctrlr)
15690792Sgshapiro{
15790792Sgshapiro	struct nvme_controller_data *cdata;
15890792Sgshapiro
15990792Sgshapiro	cdata = &ctrlr->cdata;
16090792Sgshapiro
16190792Sgshapiro	cdata->vid = 0x8086;
16290792Sgshapiro	cdata->ssvid = 0x2011;
16390792Sgshapiro
16490792Sgshapiro	/*
16590792Sgshapiro	 * Chatham2 puts garbage data in these fields when we
16690792Sgshapiro	 *  invoke IDENTIFY_CONTROLLER, so we need to re-zero
16790792Sgshapiro	 *  the fields before calling bcopy().
16890792Sgshapiro	 */
16990792Sgshapiro	memset(cdata->sn, 0, sizeof(cdata->sn));
17090792Sgshapiro	memcpy(cdata->sn, "2012", strlen("2012"));
17190792Sgshapiro	memset(cdata->mn, 0, sizeof(cdata->mn));
17290792Sgshapiro	memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2"));
17390792Sgshapiro	memset(cdata->fr, 0, sizeof(cdata->fr));
17490792Sgshapiro	memcpy(cdata->fr, "0", strlen("0"));
17590792Sgshapiro	cdata->rab = 8;
17690792Sgshapiro	cdata->aerl = 3;
17790792Sgshapiro	cdata->lpa.ns_smart = 1;
17890792Sgshapiro	cdata->sqes.min = 6;
17990792Sgshapiro	cdata->sqes.max = 6;
18090792Sgshapiro	cdata->sqes.min = 4;
18190792Sgshapiro	cdata->sqes.max = 4;
18290792Sgshapiro	cdata->nn = 1;
18390792Sgshapiro
18490792Sgshapiro	/* Chatham2 doesn't support DSM command */
18590792Sgshapiro	cdata->oncs.dsm = 0;
18690792Sgshapiro
18790792Sgshapiro	cdata->vwc.present = 1;
18890792Sgshapiro}
18990792Sgshapiro#endif /* CHATHAM2 */
19090792Sgshapiro
19190792Sgshapirostatic void
19290792Sgshapironvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
19390792Sgshapiro{
19490792Sgshapiro	struct nvme_qpair	*qpair;
19590792Sgshapiro	uint32_t		num_entries;
19690792Sgshapiro
19790792Sgshapiro	qpair = &ctrlr->adminq;
19890792Sgshapiro
19990792Sgshapiro	num_entries = NVME_ADMIN_ENTRIES;
20090792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
20190792Sgshapiro	/*
20290792Sgshapiro	 * If admin_entries was overridden to an invalid value, revert it
20390792Sgshapiro	 *  back to our default value.
20490792Sgshapiro	 */
20590792Sgshapiro	if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
20690792Sgshapiro	    num_entries > NVME_MAX_ADMIN_ENTRIES) {
20790792Sgshapiro		printf("nvme: invalid hw.nvme.admin_entries=%d specified\n",
20890792Sgshapiro		    num_entries);
20990792Sgshapiro		num_entries = NVME_ADMIN_ENTRIES;
21090792Sgshapiro	}
21190792Sgshapiro
21290792Sgshapiro	/*
21390792Sgshapiro	 * The admin queue's max xfer size is treated differently than the
21490792Sgshapiro	 *  max I/O xfer size.  16KB is sufficient here - maybe even less?
21590792Sgshapiro	 */
21690792Sgshapiro	nvme_qpair_construct(qpair,
21790792Sgshapiro			     0, /* qpair ID */
21890792Sgshapiro			     0, /* vector */
21990792Sgshapiro			     num_entries,
22090792Sgshapiro			     NVME_ADMIN_TRACKERS,
22190792Sgshapiro			     16*1024, /* max xfer size */
22290792Sgshapiro			     ctrlr);
22390792Sgshapiro}
22490792Sgshapiro
22590792Sgshapirostatic int
22690792Sgshapironvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
22790792Sgshapiro{
22890792Sgshapiro	struct nvme_qpair	*qpair;
22990792Sgshapiro	union cap_lo_register	cap_lo;
23090792Sgshapiro	int			i, num_entries, num_trackers;
23190792Sgshapiro
23290792Sgshapiro	num_entries = NVME_IO_ENTRIES;
23390792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
23490792Sgshapiro
23590792Sgshapiro	/*
23690792Sgshapiro	 * NVMe spec sets a hard limit of 64K max entries, but
23790792Sgshapiro	 *  devices may specify a smaller limit, so we need to check
23890792Sgshapiro	 *  the MQES field in the capabilities register.
23990792Sgshapiro	 */
24090792Sgshapiro	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
24190792Sgshapiro	num_entries = min(num_entries, cap_lo.bits.mqes+1);
24290792Sgshapiro
24390792Sgshapiro	num_trackers = NVME_IO_TRACKERS;
24490792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
24590792Sgshapiro
24690792Sgshapiro	num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
24790792Sgshapiro	num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
24890792Sgshapiro	/*
24990792Sgshapiro	 * No need to have more trackers than entries in the submit queue.
25090792Sgshapiro	 *  Note also that for a queue size of N, we can only have (N-1)
25190792Sgshapiro	 *  commands outstanding, hence the "-1" here.
25290792Sgshapiro	 */
25390792Sgshapiro	num_trackers = min(num_trackers, (num_entries-1));
25490792Sgshapiro
25590792Sgshapiro	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
25690792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size);
25790792Sgshapiro	/*
25890792Sgshapiro	 * Check that tunable doesn't specify a size greater than what our
25990792Sgshapiro	 *  driver supports, and is an even PAGE_SIZE multiple.
26090792Sgshapiro	 */
26190792Sgshapiro	if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE ||
26290792Sgshapiro	    ctrlr->max_xfer_size % PAGE_SIZE)
26390792Sgshapiro		ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
26490792Sgshapiro
26590792Sgshapiro	ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
26690792Sgshapiro	    M_NVME, M_ZERO | M_WAITOK);
26790792Sgshapiro
26890792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++) {
26990792Sgshapiro		qpair = &ctrlr->ioq[i];
27090792Sgshapiro
27190792Sgshapiro		/*
27290792Sgshapiro		 * Admin queue has ID=0. IO queues start at ID=1 -
27390792Sgshapiro		 *  hence the 'i+1' here.
27490792Sgshapiro		 *
27590792Sgshapiro		 * For I/O queues, use the controller-wide max_xfer_size
27690792Sgshapiro		 *  calculated in nvme_attach().
27790792Sgshapiro		 */
27890792Sgshapiro		nvme_qpair_construct(qpair,
27990792Sgshapiro				     i+1, /* qpair ID */
28090792Sgshapiro				     ctrlr->msix_enabled ? i+1 : 0, /* vector */
28190792Sgshapiro				     num_entries,
28290792Sgshapiro				     num_trackers,
28390792Sgshapiro				     ctrlr->max_xfer_size,
28490792Sgshapiro				     ctrlr);
28590792Sgshapiro
28690792Sgshapiro		if (ctrlr->per_cpu_io_queues)
28790792Sgshapiro			bus_bind_intr(ctrlr->dev, qpair->res, i);
28890792Sgshapiro	}
28990792Sgshapiro
29090792Sgshapiro	return (0);
29190792Sgshapiro}
29290792Sgshapiro
29390792Sgshapirostatic void
29490792Sgshapironvme_ctrlr_fail(struct nvme_controller *ctrlr)
29590792Sgshapiro{
29690792Sgshapiro	int i;
29790792Sgshapiro
29890792Sgshapiro	ctrlr->is_failed = TRUE;
29990792Sgshapiro	nvme_qpair_fail(&ctrlr->adminq);
30090792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++)
30190792Sgshapiro		nvme_qpair_fail(&ctrlr->ioq[i]);
30290792Sgshapiro	nvme_notify_fail_consumers(ctrlr);
30390792Sgshapiro}
30490792Sgshapiro
30590792Sgshapirovoid
30690792Sgshapironvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
30790792Sgshapiro    struct nvme_request *req)
30890792Sgshapiro{
30990792Sgshapiro
31090792Sgshapiro	mtx_lock(&ctrlr->fail_req_lock);
31190792Sgshapiro	STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq);
31290792Sgshapiro	mtx_unlock(&ctrlr->fail_req_lock);
31390792Sgshapiro	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
31490792Sgshapiro}
31590792Sgshapiro
31690792Sgshapirostatic void
31790792Sgshapironvme_ctrlr_fail_req_task(void *arg, int pending)
31890792Sgshapiro{
31990792Sgshapiro	struct nvme_controller	*ctrlr = arg;
32090792Sgshapiro	struct nvme_request	*req;
32190792Sgshapiro
32290792Sgshapiro	mtx_lock(&ctrlr->fail_req_lock);
32390792Sgshapiro	while (!STAILQ_EMPTY(&ctrlr->fail_req)) {
32490792Sgshapiro		req = STAILQ_FIRST(&ctrlr->fail_req);
32590792Sgshapiro		STAILQ_REMOVE_HEAD(&ctrlr->fail_req, stailq);
32690792Sgshapiro		nvme_qpair_manual_complete_request(req->qpair, req,
32790792Sgshapiro		    NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
32890792Sgshapiro	}
32990792Sgshapiro	mtx_unlock(&ctrlr->fail_req_lock);
33090792Sgshapiro}
33190792Sgshapiro
33290792Sgshapirostatic int
33390792Sgshapironvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
33490792Sgshapiro{
33590792Sgshapiro	int ms_waited;
33690792Sgshapiro	union cc_register cc;
33790792Sgshapiro	union csts_register csts;
33890792Sgshapiro
33990792Sgshapiro	cc.raw = nvme_mmio_read_4(ctrlr, cc);
34090792Sgshapiro	csts.raw = nvme_mmio_read_4(ctrlr, csts);
34190792Sgshapiro
34290792Sgshapiro	if (!cc.bits.en) {
34390792Sgshapiro		device_printf(ctrlr->dev, "%s called with cc.en = 0\n",
34490792Sgshapiro		    __func__);
34590792Sgshapiro		return (ENXIO);
34690792Sgshapiro	}
34790792Sgshapiro
34890792Sgshapiro	ms_waited = 0;
34990792Sgshapiro
35090792Sgshapiro	while (!csts.bits.rdy) {
35190792Sgshapiro		DELAY(1000);
35290792Sgshapiro		if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
35390792Sgshapiro			device_printf(ctrlr->dev, "controller did not become "
35490792Sgshapiro			    "ready within %d ms\n", ctrlr->ready_timeout_in_ms);
35590792Sgshapiro			return (ENXIO);
35690792Sgshapiro		}
35790792Sgshapiro		csts.raw = nvme_mmio_read_4(ctrlr, csts);
35890792Sgshapiro	}
35990792Sgshapiro
36090792Sgshapiro	return (0);
36190792Sgshapiro}
36290792Sgshapiro
36390792Sgshapirostatic void
36490792Sgshapironvme_ctrlr_disable(struct nvme_controller *ctrlr)
36590792Sgshapiro{
36690792Sgshapiro	union cc_register cc;
36790792Sgshapiro	union csts_register csts;
36890792Sgshapiro
36990792Sgshapiro	cc.raw = nvme_mmio_read_4(ctrlr, cc);
37090792Sgshapiro	csts.raw = nvme_mmio_read_4(ctrlr, csts);
37190792Sgshapiro
37290792Sgshapiro	if (cc.bits.en == 1 && csts.bits.rdy == 0)
37390792Sgshapiro		nvme_ctrlr_wait_for_ready(ctrlr);
37490792Sgshapiro
37590792Sgshapiro	cc.bits.en = 0;
37690792Sgshapiro	nvme_mmio_write_4(ctrlr, cc, cc.raw);
37790792Sgshapiro	DELAY(5000);
37890792Sgshapiro}
37990792Sgshapiro
38090792Sgshapirostatic int
38190792Sgshapironvme_ctrlr_enable(struct nvme_controller *ctrlr)
38290792Sgshapiro{
38390792Sgshapiro	union cc_register	cc;
38490792Sgshapiro	union csts_register	csts;
38590792Sgshapiro	union aqa_register	aqa;
38690792Sgshapiro
38790792Sgshapiro	cc.raw = nvme_mmio_read_4(ctrlr, cc);
38890792Sgshapiro	csts.raw = nvme_mmio_read_4(ctrlr, csts);
38990792Sgshapiro
39090792Sgshapiro	if (cc.bits.en == 1) {
39190792Sgshapiro		if (csts.bits.rdy == 1)
39290792Sgshapiro			return (0);
39390792Sgshapiro		else
39490792Sgshapiro			return (nvme_ctrlr_wait_for_ready(ctrlr));
39590792Sgshapiro	}
39690792Sgshapiro
39790792Sgshapiro	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
39890792Sgshapiro	DELAY(5000);
39990792Sgshapiro	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
40090792Sgshapiro	DELAY(5000);
40190792Sgshapiro
40290792Sgshapiro	aqa.raw = 0;
40390792Sgshapiro	/* acqs and asqs are 0-based. */
40490792Sgshapiro	aqa.bits.acqs = ctrlr->adminq.num_entries-1;
40590792Sgshapiro	aqa.bits.asqs = ctrlr->adminq.num_entries-1;
40690792Sgshapiro	nvme_mmio_write_4(ctrlr, aqa, aqa.raw);
40790792Sgshapiro	DELAY(5000);
40890792Sgshapiro
40990792Sgshapiro	cc.bits.en = 1;
41090792Sgshapiro	cc.bits.css = 0;
41190792Sgshapiro	cc.bits.ams = 0;
41290792Sgshapiro	cc.bits.shn = 0;
41390792Sgshapiro	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
41490792Sgshapiro	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
41590792Sgshapiro
41690792Sgshapiro	/* This evaluates to 0, which is according to spec. */
41790792Sgshapiro	cc.bits.mps = (PAGE_SIZE >> 13);
41890792Sgshapiro
41990792Sgshapiro	nvme_mmio_write_4(ctrlr, cc, cc.raw);
42090792Sgshapiro	DELAY(5000);
42190792Sgshapiro
42290792Sgshapiro	return (nvme_ctrlr_wait_for_ready(ctrlr));
42390792Sgshapiro}
42490792Sgshapiro
42590792Sgshapiroint
42690792Sgshapironvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
42790792Sgshapiro{
42890792Sgshapiro	int i;
42990792Sgshapiro
43090792Sgshapiro	nvme_admin_qpair_disable(&ctrlr->adminq);
43190792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++)
43290792Sgshapiro		nvme_io_qpair_disable(&ctrlr->ioq[i]);
43390792Sgshapiro
43490792Sgshapiro	DELAY(100*1000);
43590792Sgshapiro
43690792Sgshapiro	nvme_ctrlr_disable(ctrlr);
43790792Sgshapiro	return (nvme_ctrlr_enable(ctrlr));
43890792Sgshapiro}
43990792Sgshapiro
44090792Sgshapirovoid
44190792Sgshapironvme_ctrlr_reset(struct nvme_controller *ctrlr)
44290792Sgshapiro{
44390792Sgshapiro	int cmpset;
44490792Sgshapiro
44590792Sgshapiro	cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
44690792Sgshapiro
44790792Sgshapiro	if (cmpset == 0 || ctrlr->is_failed)
44890792Sgshapiro		/*
44990792Sgshapiro		 * Controller is already resetting or has failed.  Return
45090792Sgshapiro		 *  immediately since there is no need to kick off another
45190792Sgshapiro		 *  reset in these cases.
45290792Sgshapiro		 */
45390792Sgshapiro		return;
45490792Sgshapiro
45590792Sgshapiro	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
45690792Sgshapiro}
45790792Sgshapiro
45890792Sgshapirostatic int
45990792Sgshapironvme_ctrlr_identify(struct nvme_controller *ctrlr)
46090792Sgshapiro{
46190792Sgshapiro	struct nvme_completion_poll_status	status;
46290792Sgshapiro
46390792Sgshapiro	status.done = FALSE;
46490792Sgshapiro	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
46590792Sgshapiro	    nvme_completion_poll_cb, &status);
46690792Sgshapiro	while (status.done == FALSE)
46790792Sgshapiro		DELAY(5);
46890792Sgshapiro	if (nvme_completion_is_error(&status.cpl)) {
46990792Sgshapiro		printf("nvme_identify_controller failed!\n");
47090792Sgshapiro		return (ENXIO);
47190792Sgshapiro	}
47290792Sgshapiro
47390792Sgshapiro#ifdef CHATHAM2
47490792Sgshapiro	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
47590792Sgshapiro		nvme_chatham_populate_cdata(ctrlr);
47690792Sgshapiro#endif
47790792Sgshapiro
47890792Sgshapiro	/*
47990792Sgshapiro	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
48090792Sgshapiro	 *  controller supports.
48190792Sgshapiro	 */
48290792Sgshapiro	if (ctrlr->cdata.mdts > 0)
48390792Sgshapiro		ctrlr->max_xfer_size = min(ctrlr->max_xfer_size,
48490792Sgshapiro		    ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
48590792Sgshapiro
48690792Sgshapiro	return (0);
48790792Sgshapiro}
48890792Sgshapiro
48990792Sgshapirostatic int
49090792Sgshapironvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
49190792Sgshapiro{
49290792Sgshapiro	struct nvme_completion_poll_status	status;
49390792Sgshapiro	int					cq_allocated, sq_allocated;
49490792Sgshapiro
49590792Sgshapiro	status.done = FALSE;
49690792Sgshapiro	nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
49790792Sgshapiro	    nvme_completion_poll_cb, &status);
49890792Sgshapiro	while (status.done == FALSE)
49990792Sgshapiro		DELAY(5);
50090792Sgshapiro	if (nvme_completion_is_error(&status.cpl)) {
50190792Sgshapiro		printf("nvme_set_num_queues failed!\n");
50290792Sgshapiro		return (ENXIO);
50390792Sgshapiro	}
50490792Sgshapiro
50590792Sgshapiro	/*
50690792Sgshapiro	 * Data in cdw0 is 0-based.
50790792Sgshapiro	 * Lower 16-bits indicate number of submission queues allocated.
50890792Sgshapiro	 * Upper 16-bits indicate number of completion queues allocated.
50990792Sgshapiro	 */
51090792Sgshapiro	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
51190792Sgshapiro	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
51290792Sgshapiro
51390792Sgshapiro	/*
51490792Sgshapiro	 * Check that the controller was able to allocate the number of
51590792Sgshapiro	 *  queues we requested.  If not, revert to one IO queue.
51690792Sgshapiro	 */
51790792Sgshapiro	if (sq_allocated < ctrlr->num_io_queues ||
51890792Sgshapiro	    cq_allocated < ctrlr->num_io_queues) {
51990792Sgshapiro		ctrlr->num_io_queues = 1;
52090792Sgshapiro		ctrlr->per_cpu_io_queues = 0;
52190792Sgshapiro
52290792Sgshapiro		/* TODO: destroy extra queues that were created
52390792Sgshapiro		 *  previously but now found to be not needed.
52490792Sgshapiro		 */
52590792Sgshapiro	}
52690792Sgshapiro
52790792Sgshapiro	return (0);
52890792Sgshapiro}
52990792Sgshapiro
53090792Sgshapirostatic int
53190792Sgshapironvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
53290792Sgshapiro{
53390792Sgshapiro	struct nvme_completion_poll_status	status;
53490792Sgshapiro	struct nvme_qpair			*qpair;
53590792Sgshapiro	int					i;
53690792Sgshapiro
53790792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++) {
538157001Sgshapiro		qpair = &ctrlr->ioq[i];
539157001Sgshapiro
540157001Sgshapiro		status.done = FALSE;
541157001Sgshapiro		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
54290792Sgshapiro		    nvme_completion_poll_cb, &status);
543157001Sgshapiro		while (status.done == FALSE)
54490792Sgshapiro			DELAY(5);
545157001Sgshapiro		if (nvme_completion_is_error(&status.cpl)) {
546157001Sgshapiro			printf("nvme_create_io_cq failed!\n");
547157001Sgshapiro			return (ENXIO);
548157001Sgshapiro		}
54990792Sgshapiro
550157001Sgshapiro		status.done = FALSE;
55190792Sgshapiro		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
55290792Sgshapiro		    nvme_completion_poll_cb, &status);
55390792Sgshapiro		while (status.done == FALSE)
55490792Sgshapiro			DELAY(5);
55590792Sgshapiro		if (nvme_completion_is_error(&status.cpl)) {
55690792Sgshapiro			printf("nvme_create_io_sq failed!\n");
55790792Sgshapiro			return (ENXIO);
55890792Sgshapiro		}
55990792Sgshapiro	}
56090792Sgshapiro
56190792Sgshapiro	return (0);
56290792Sgshapiro}
56390792Sgshapiro
56490792Sgshapirostatic int
56590792Sgshapironvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
56690792Sgshapiro{
56790792Sgshapiro	struct nvme_namespace	*ns;
56890792Sgshapiro	int			i, status;
56990792Sgshapiro
57090792Sgshapiro	for (i = 0; i < ctrlr->cdata.nn; i++) {
57190792Sgshapiro		ns = &ctrlr->ns[i];
57290792Sgshapiro		status = nvme_ns_construct(ns, i+1, ctrlr);
57390792Sgshapiro		if (status != 0)
57490792Sgshapiro			return (status);
57590792Sgshapiro	}
57690792Sgshapiro
57790792Sgshapiro	return (0);
57890792Sgshapiro}
57990792Sgshapiro
58090792Sgshapirostatic boolean_t
58190792Sgshapirois_log_page_id_valid(uint8_t page_id)
58290792Sgshapiro{
58390792Sgshapiro
58490792Sgshapiro	switch (page_id) {
58590792Sgshapiro	case NVME_LOG_ERROR:
58690792Sgshapiro	case NVME_LOG_HEALTH_INFORMATION:
58790792Sgshapiro	case NVME_LOG_FIRMWARE_SLOT:
58890792Sgshapiro		return (TRUE);
58990792Sgshapiro	}
59090792Sgshapiro
59190792Sgshapiro	return (FALSE);
59290792Sgshapiro}
59390792Sgshapiro
59490792Sgshapirostatic uint32_t
59590792Sgshapironvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id)
59690792Sgshapiro{
59790792Sgshapiro	uint32_t	log_page_size;
59890792Sgshapiro
59990792Sgshapiro	switch (page_id) {
60090792Sgshapiro	case NVME_LOG_ERROR:
60190792Sgshapiro		log_page_size = min(
60290792Sgshapiro		    sizeof(struct nvme_error_information_entry) *
60390792Sgshapiro		    ctrlr->cdata.elpe,
60490792Sgshapiro		    NVME_MAX_AER_LOG_SIZE);
60590792Sgshapiro		break;
60690792Sgshapiro	case NVME_LOG_HEALTH_INFORMATION:
60790792Sgshapiro		log_page_size = sizeof(struct nvme_health_information_page);
60890792Sgshapiro		break;
60990792Sgshapiro	case NVME_LOG_FIRMWARE_SLOT:
61090792Sgshapiro		log_page_size = sizeof(struct nvme_firmware_page);
61190792Sgshapiro		break;
61290792Sgshapiro	default:
61390792Sgshapiro		log_page_size = 0;
61490792Sgshapiro		break;
61590792Sgshapiro	}
61690792Sgshapiro
61790792Sgshapiro	return (log_page_size);
61890792Sgshapiro}
61990792Sgshapiro
62090792Sgshapirostatic void
62190792Sgshapironvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
62290792Sgshapiro{
62390792Sgshapiro	struct nvme_async_event_request	*aer = arg;
62490792Sgshapiro
62590792Sgshapiro	/*
62690792Sgshapiro	 * If the log page fetch for some reason completed with an error,
62790792Sgshapiro	 *  don't pass log page data to the consumers.  In practice, this case
62890792Sgshapiro	 *  should never happen.
62990792Sgshapiro	 */
63090792Sgshapiro	if (nvme_completion_is_error(cpl))
63190792Sgshapiro		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
63290792Sgshapiro		    aer->log_page_id, NULL, 0);
63390792Sgshapiro	else
63490792Sgshapiro		/*
63590792Sgshapiro		 * Pass the cpl data from the original async event completion,
63690792Sgshapiro		 *  not the log page fetch.
63790792Sgshapiro		 */
63890792Sgshapiro		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
63990792Sgshapiro		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
64090792Sgshapiro
64190792Sgshapiro	/*
64290792Sgshapiro	 * Repost another asynchronous event request to replace the one
64390792Sgshapiro	 *  that just completed.
64490792Sgshapiro	 */
64590792Sgshapiro	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
64690792Sgshapiro}
64790792Sgshapiro
64890792Sgshapirostatic void
64990792Sgshapironvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
65090792Sgshapiro{
65190792Sgshapiro	struct nvme_async_event_request	*aer = arg;
65290792Sgshapiro
65390792Sgshapiro	if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION) {
65490792Sgshapiro		/*
65590792Sgshapiro		 *  This is simulated when controller is being shut down, to
65690792Sgshapiro		 *  effectively abort outstanding asynchronous event requests
65790792Sgshapiro		 *  and make sure all memory is freed.  Do not repost the
65890792Sgshapiro		 *  request in this case.
65990792Sgshapiro		 */
66090792Sgshapiro		return;
66190792Sgshapiro	}
66290792Sgshapiro
66390792Sgshapiro	printf("Asynchronous event occurred.\n");
66490792Sgshapiro
66590792Sgshapiro	/* Associated log page is in bits 23:16 of completion entry dw0. */
66690792Sgshapiro	aer->log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
66790792Sgshapiro
66890792Sgshapiro	if (is_log_page_id_valid(aer->log_page_id)) {
66990792Sgshapiro		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
67090792Sgshapiro		    aer->log_page_id);
67190792Sgshapiro		memcpy(&aer->cpl, cpl, sizeof(*cpl));
67290792Sgshapiro		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
67390792Sgshapiro		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
67490792Sgshapiro		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
67590792Sgshapiro		    aer);
67690792Sgshapiro		/* Wait to notify consumers until after log page is fetched. */
67790792Sgshapiro	} else {
67890792Sgshapiro		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
67990792Sgshapiro		    NULL, 0);
68090792Sgshapiro
68190792Sgshapiro		/*
68290792Sgshapiro		 * Repost another asynchronous event request to replace the one
68390792Sgshapiro		 *  that just completed.
68490792Sgshapiro		 */
68590792Sgshapiro		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
68690792Sgshapiro	}
68790792Sgshapiro}
68890792Sgshapiro
68990792Sgshapirostatic void
69090792Sgshapironvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
69190792Sgshapiro    struct nvme_async_event_request *aer)
69290792Sgshapiro{
69390792Sgshapiro	struct nvme_request *req;
69490792Sgshapiro
69590792Sgshapiro	aer->ctrlr = ctrlr;
69690792Sgshapiro	req = nvme_allocate_request(NULL, 0, nvme_ctrlr_async_event_cb, aer);
69790792Sgshapiro	aer->req = req;
69890792Sgshapiro
69990792Sgshapiro	/*
70090792Sgshapiro	 * Disable timeout here, since asynchronous event requests should by
70190792Sgshapiro	 *  nature never be timed out.
70290792Sgshapiro	 */
70390792Sgshapiro	req->timeout = FALSE;
70490792Sgshapiro	req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
70590792Sgshapiro	nvme_ctrlr_submit_admin_request(ctrlr, req);
70690792Sgshapiro}
70790792Sgshapiro
70890792Sgshapirostatic void
70990792Sgshapironvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
71090792Sgshapiro{
71190792Sgshapiro	union nvme_critical_warning_state	state;
71290792Sgshapiro	struct nvme_async_event_request		*aer;
71390792Sgshapiro	uint32_t				i;
71490792Sgshapiro
71590792Sgshapiro	state.raw = 0xFF;
71690792Sgshapiro	state.bits.reserved = 0;
71790792Sgshapiro	nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, NULL, NULL);
71890792Sgshapiro
71990792Sgshapiro	/* aerl is a zero-based value, so we need to add 1 here. */
72090792Sgshapiro	ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
72190792Sgshapiro
72290792Sgshapiro	/* Chatham doesn't support AERs. */
72390792Sgshapiro	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
72490792Sgshapiro		ctrlr->num_aers = 0;
72590792Sgshapiro
72690792Sgshapiro	for (i = 0; i < ctrlr->num_aers; i++) {
72790792Sgshapiro		aer = &ctrlr->aer[i];
72890792Sgshapiro		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
72990792Sgshapiro	}
73090792Sgshapiro}
73190792Sgshapiro
73290792Sgshapirostatic void
73390792Sgshapironvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
73490792Sgshapiro{
73590792Sgshapiro
73690792Sgshapiro	ctrlr->int_coal_time = 0;
73790792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
73890792Sgshapiro	    &ctrlr->int_coal_time);
73990792Sgshapiro
74090792Sgshapiro	ctrlr->int_coal_threshold = 0;
74190792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
74290792Sgshapiro	    &ctrlr->int_coal_threshold);
74390792Sgshapiro
74490792Sgshapiro	nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
74590792Sgshapiro	    ctrlr->int_coal_threshold, NULL, NULL);
74690792Sgshapiro}
74790792Sgshapiro
74890792Sgshapirostatic void
74990792Sgshapironvme_ctrlr_start(void *ctrlr_arg)
75090792Sgshapiro{
75190792Sgshapiro	struct nvme_controller *ctrlr = ctrlr_arg;
75290792Sgshapiro	int i;
75390792Sgshapiro
75490792Sgshapiro	nvme_qpair_reset(&ctrlr->adminq);
75590792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++)
75690792Sgshapiro		nvme_qpair_reset(&ctrlr->ioq[i]);
75790792Sgshapiro
75890792Sgshapiro	nvme_admin_qpair_enable(&ctrlr->adminq);
75990792Sgshapiro
76090792Sgshapiro	if (nvme_ctrlr_identify(ctrlr) != 0) {
76190792Sgshapiro		nvme_ctrlr_fail(ctrlr);
76290792Sgshapiro		return;
76390792Sgshapiro	}
76490792Sgshapiro
76590792Sgshapiro	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
76690792Sgshapiro		nvme_ctrlr_fail(ctrlr);
76790792Sgshapiro		return;
76890792Sgshapiro	}
76990792Sgshapiro
77090792Sgshapiro	if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
77190792Sgshapiro		nvme_ctrlr_fail(ctrlr);
77290792Sgshapiro		return;
77390792Sgshapiro	}
77490792Sgshapiro
77590792Sgshapiro	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
77690792Sgshapiro		nvme_ctrlr_fail(ctrlr);
77790792Sgshapiro		return;
77890792Sgshapiro	}
77990792Sgshapiro
78090792Sgshapiro	nvme_ctrlr_configure_aer(ctrlr);
78190792Sgshapiro	nvme_ctrlr_configure_int_coalescing(ctrlr);
78290792Sgshapiro
78390792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++)
78490792Sgshapiro		nvme_io_qpair_enable(&ctrlr->ioq[i]);
78590792Sgshapiro
78690792Sgshapiro	/*
78790792Sgshapiro	 * Clear software progress marker to 0, to indicate to pre-boot
78890792Sgshapiro	 *  software that OS driver load was successful.
78990792Sgshapiro	 *
79090792Sgshapiro	 * Chatham does not support this feature.
79190792Sgshapiro	 */
79290792Sgshapiro	if (pci_get_devid(ctrlr->dev) != CHATHAM_PCI_ID)
79390792Sgshapiro		nvme_ctrlr_cmd_set_feature(ctrlr,
79490792Sgshapiro		    NVME_FEAT_SOFTWARE_PROGRESS_MARKER, 0, NULL, 0, NULL, NULL);
79590792Sgshapiro}
79690792Sgshapiro
79790792Sgshapirovoid
79890792Sgshapironvme_ctrlr_start_config_hook(void *arg)
79990792Sgshapiro{
80090792Sgshapiro	struct nvme_controller *ctrlr = arg;
80190792Sgshapiro
80290792Sgshapiro	nvme_ctrlr_start(ctrlr);
80390792Sgshapiro	config_intrhook_disestablish(&ctrlr->config_hook);
80490792Sgshapiro}
80590792Sgshapiro
80690792Sgshapirostatic void
80790792Sgshapironvme_ctrlr_reset_task(void *arg, int pending)
80890792Sgshapiro{
80990792Sgshapiro	struct nvme_controller	*ctrlr = arg;
81090792Sgshapiro	int			status;
81190792Sgshapiro
81290792Sgshapiro	device_printf(ctrlr->dev, "resetting controller");
81390792Sgshapiro	status = nvme_ctrlr_hw_reset(ctrlr);
81490792Sgshapiro	/*
81590792Sgshapiro	 * Use pause instead of DELAY, so that we yield to any nvme interrupt
81690792Sgshapiro	 *  handlers on this CPU that were blocked on a qpair lock. We want
81790792Sgshapiro	 *  all nvme interrupts completed before proceeding with restarting the
81890792Sgshapiro	 *  controller.
81990792Sgshapiro	 *
82090792Sgshapiro	 * XXX - any way to guarantee the interrupt handlers have quiesced?
82190792Sgshapiro	 */
82290792Sgshapiro	pause("nvmereset", hz / 10);
82390792Sgshapiro	if (status == 0)
82490792Sgshapiro		nvme_ctrlr_start(ctrlr);
82590792Sgshapiro	else
82690792Sgshapiro		nvme_ctrlr_fail(ctrlr);
82790792Sgshapiro
82890792Sgshapiro	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
82990792Sgshapiro}
83090792Sgshapiro
83190792Sgshapirostatic void
83290792Sgshapironvme_ctrlr_intx_handler(void *arg)
83390792Sgshapiro{
83490792Sgshapiro	struct nvme_controller *ctrlr = arg;
83590792Sgshapiro
83690792Sgshapiro	nvme_mmio_write_4(ctrlr, intms, 1);
83790792Sgshapiro
83890792Sgshapiro	nvme_qpair_process_completions(&ctrlr->adminq);
83990792Sgshapiro
84090792Sgshapiro	if (ctrlr->ioq[0].cpl)
84190792Sgshapiro		nvme_qpair_process_completions(&ctrlr->ioq[0]);
84290792Sgshapiro
84390792Sgshapiro	nvme_mmio_write_4(ctrlr, intmc, 1);
84490792Sgshapiro}
84590792Sgshapiro
84690792Sgshapirostatic int
84790792Sgshapironvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
84890792Sgshapiro{
84990792Sgshapiro
85090792Sgshapiro	ctrlr->num_io_queues = 1;
85190792Sgshapiro	ctrlr->per_cpu_io_queues = 0;
85290792Sgshapiro	ctrlr->rid = 0;
85390792Sgshapiro	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
85490792Sgshapiro	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
85590792Sgshapiro
85690792Sgshapiro	if (ctrlr->res == NULL) {
85790792Sgshapiro		device_printf(ctrlr->dev, "unable to allocate shared IRQ\n");
85890792Sgshapiro		return (ENOMEM);
85990792Sgshapiro	}
86090792Sgshapiro
86190792Sgshapiro	bus_setup_intr(ctrlr->dev, ctrlr->res,
86290792Sgshapiro	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
86390792Sgshapiro	    ctrlr, &ctrlr->tag);
86490792Sgshapiro
86590792Sgshapiro	if (ctrlr->tag == NULL) {
86690792Sgshapiro		device_printf(ctrlr->dev,
86790792Sgshapiro		    "unable to setup legacy interrupt handler\n");
86890792Sgshapiro		return (ENOMEM);
86990792Sgshapiro	}
87090792Sgshapiro
87190792Sgshapiro	return (0);
87290792Sgshapiro}
87390792Sgshapiro
87490792Sgshapirostatic int
87590792Sgshapironvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
87690792Sgshapiro    struct thread *td)
87790792Sgshapiro{
87890792Sgshapiro	struct nvme_completion_poll_status	status;
87990792Sgshapiro	struct nvme_controller			*ctrlr;
88090792Sgshapiro
88190792Sgshapiro	ctrlr = cdev->si_drv1;
88290792Sgshapiro
88390792Sgshapiro	switch (cmd) {
88490792Sgshapiro	case NVME_IDENTIFY_CONTROLLER:
88590792Sgshapiro#ifdef CHATHAM2
88690792Sgshapiro		/*
88790792Sgshapiro		 * Don't refresh data on Chatham, since Chatham returns
88890792Sgshapiro		 *  garbage on IDENTIFY anyways.
88990792Sgshapiro		 */
89090792Sgshapiro		if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) {
89190792Sgshapiro			memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
89290792Sgshapiro			break;
89390792Sgshapiro		}
89490792Sgshapiro#endif
89590792Sgshapiro		/* Refresh data before returning to user. */
89690792Sgshapiro		status.done = FALSE;
89790792Sgshapiro		nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
89890792Sgshapiro		    nvme_completion_poll_cb, &status);
89990792Sgshapiro		while (status.done == FALSE)
90090792Sgshapiro			DELAY(5);
90190792Sgshapiro		if (nvme_completion_is_error(&status.cpl))
90290792Sgshapiro			return (ENXIO);
90390792Sgshapiro		memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
90490792Sgshapiro		break;
90590792Sgshapiro	case NVME_RESET_CONTROLLER:
90690792Sgshapiro		nvme_ctrlr_reset(ctrlr);
90790792Sgshapiro		break;
90890792Sgshapiro	default:
90990792Sgshapiro		return (ENOTTY);
91090792Sgshapiro	}
91190792Sgshapiro
91290792Sgshapiro	return (0);
91390792Sgshapiro}
91490792Sgshapiro
91590792Sgshapirostatic struct cdevsw nvme_ctrlr_cdevsw = {
91690792Sgshapiro	.d_version =	D_VERSION,
91790792Sgshapiro	.d_flags =	0,
91890792Sgshapiro	.d_ioctl =	nvme_ctrlr_ioctl
91990792Sgshapiro};
92090792Sgshapiro
92190792Sgshapiroint
92290792Sgshapironvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
92390792Sgshapiro{
92490792Sgshapiro	union cap_lo_register	cap_lo;
92590792Sgshapiro	union cap_hi_register	cap_hi;
92690792Sgshapiro	int			num_vectors, per_cpu_io_queues, status = 0;
92790792Sgshapiro	int			timeout_period;
92890792Sgshapiro
92990792Sgshapiro	ctrlr->dev = dev;
93090792Sgshapiro
93190792Sgshapiro	status = nvme_ctrlr_allocate_bar(ctrlr);
93290792Sgshapiro
93390792Sgshapiro	if (status != 0)
93490792Sgshapiro		return (status);
93590792Sgshapiro
93690792Sgshapiro#ifdef CHATHAM2
93790792Sgshapiro	if (pci_get_devid(dev) == CHATHAM_PCI_ID) {
93890792Sgshapiro		status = nvme_ctrlr_allocate_chatham_bar(ctrlr);
93990792Sgshapiro		if (status != 0)
94090792Sgshapiro			return (status);
94190792Sgshapiro		nvme_ctrlr_setup_chatham(ctrlr);
94290792Sgshapiro	}
94390792Sgshapiro#endif
94490792Sgshapiro
94590792Sgshapiro	/*
94690792Sgshapiro	 * Software emulators may set the doorbell stride to something
94790792Sgshapiro	 *  other than zero, but this driver is not set up to handle that.
94890792Sgshapiro	 */
94990792Sgshapiro	cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi);
95090792Sgshapiro	if (cap_hi.bits.dstrd != 0)
95190792Sgshapiro		return (ENXIO);
95290792Sgshapiro
95390792Sgshapiro	ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin);
95490792Sgshapiro
95590792Sgshapiro	/* Get ready timeout value from controller, in units of 500ms. */
95690792Sgshapiro	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
95790792Sgshapiro	ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500;
95890792Sgshapiro
95990792Sgshapiro	timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD;
96090792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period);
96190792Sgshapiro	timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD);
96290792Sgshapiro	timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD);
96390792Sgshapiro	ctrlr->timeout_period = timeout_period;
96490792Sgshapiro
96590792Sgshapiro	nvme_retry_count = NVME_DEFAULT_RETRY_COUNT;
96690792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count);
96790792Sgshapiro
96890792Sgshapiro	per_cpu_io_queues = 1;
96990792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
97090792Sgshapiro	ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE;
97190792Sgshapiro
97290792Sgshapiro	if (ctrlr->per_cpu_io_queues)
97390792Sgshapiro		ctrlr->num_io_queues = mp_ncpus;
97490792Sgshapiro	else
97590792Sgshapiro		ctrlr->num_io_queues = 1;
97690792Sgshapiro
97790792Sgshapiro	ctrlr->force_intx = 0;
97890792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
97990792Sgshapiro
98090792Sgshapiro	ctrlr->enable_aborts = 0;
98190792Sgshapiro	TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
98290792Sgshapiro
98390792Sgshapiro	ctrlr->msix_enabled = 1;
98490792Sgshapiro
98590792Sgshapiro	if (ctrlr->force_intx) {
98690792Sgshapiro		ctrlr->msix_enabled = 0;
98790792Sgshapiro		goto intx;
98890792Sgshapiro	}
98990792Sgshapiro
99090792Sgshapiro	/* One vector per IO queue, plus one vector for admin queue. */
99190792Sgshapiro	num_vectors = ctrlr->num_io_queues + 1;
99290792Sgshapiro
99390792Sgshapiro	if (pci_msix_count(dev) < num_vectors) {
99490792Sgshapiro		ctrlr->msix_enabled = 0;
99590792Sgshapiro		goto intx;
99690792Sgshapiro	}
99790792Sgshapiro
99890792Sgshapiro	if (pci_alloc_msix(dev, &num_vectors) != 0)
99990792Sgshapiro		ctrlr->msix_enabled = 0;
100090792Sgshapiro
100190792Sgshapirointx:
100290792Sgshapiro
100390792Sgshapiro	if (!ctrlr->msix_enabled)
100490792Sgshapiro		nvme_ctrlr_configure_intx(ctrlr);
100590792Sgshapiro
100690792Sgshapiro	nvme_ctrlr_construct_admin_qpair(ctrlr);
100790792Sgshapiro
100890792Sgshapiro	status = nvme_ctrlr_construct_io_qpairs(ctrlr);
100990792Sgshapiro
101090792Sgshapiro	if (status != 0)
101190792Sgshapiro		return (status);
101290792Sgshapiro
101390792Sgshapiro	ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
101490792Sgshapiro	    "nvme%d", device_get_unit(dev));
101590792Sgshapiro
101690792Sgshapiro	if (ctrlr->cdev == NULL)
101790792Sgshapiro		return (ENXIO);
101890792Sgshapiro
101990792Sgshapiro	ctrlr->cdev->si_drv1 = (void *)ctrlr;
102090792Sgshapiro
102190792Sgshapiro	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
102290792Sgshapiro	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
102390792Sgshapiro	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
102490792Sgshapiro
102590792Sgshapiro	ctrlr->is_resetting = 0;
102690792Sgshapiro	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
102790792Sgshapiro
102890792Sgshapiro	TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
102990792Sgshapiro	mtx_init(&ctrlr->fail_req_lock, "nvme ctrlr fail req lock", NULL,
103090792Sgshapiro	    MTX_DEF);
103190792Sgshapiro	STAILQ_INIT(&ctrlr->fail_req);
103290792Sgshapiro	ctrlr->is_failed = FALSE;
103390792Sgshapiro
103490792Sgshapiro	return (0);
103590792Sgshapiro}
103690792Sgshapiro
103790792Sgshapirovoid
103890792Sgshapironvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
103990792Sgshapiro{
104090792Sgshapiro	int				i;
104190792Sgshapiro
104290792Sgshapiro	nvme_ctrlr_disable(ctrlr);
104390792Sgshapiro	taskqueue_free(ctrlr->taskqueue);
104490792Sgshapiro
104590792Sgshapiro	for (i = 0; i < NVME_MAX_NAMESPACES; i++)
104690792Sgshapiro		nvme_ns_destruct(&ctrlr->ns[i]);
104790792Sgshapiro
104890792Sgshapiro	if (ctrlr->cdev)
104990792Sgshapiro		destroy_dev(ctrlr->cdev);
105090792Sgshapiro
105190792Sgshapiro	for (i = 0; i < ctrlr->num_io_queues; i++) {
105290792Sgshapiro		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
105390792Sgshapiro	}
105490792Sgshapiro
105590792Sgshapiro	free(ctrlr->ioq, M_NVME);
105690792Sgshapiro
105790792Sgshapiro	nvme_admin_qpair_destroy(&ctrlr->adminq);
105890792Sgshapiro
105990792Sgshapiro	if (ctrlr->resource != NULL) {
106090792Sgshapiro		bus_release_resource(dev, SYS_RES_MEMORY,
106190792Sgshapiro		    ctrlr->resource_id, ctrlr->resource);
106290792Sgshapiro	}
106390792Sgshapiro
106490792Sgshapiro	if (ctrlr->bar4_resource != NULL) {
106590792Sgshapiro		bus_release_resource(dev, SYS_RES_MEMORY,
106690792Sgshapiro		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
106790792Sgshapiro	}
106890792Sgshapiro
106990792Sgshapiro#ifdef CHATHAM2
107090792Sgshapiro	if (ctrlr->chatham_resource != NULL) {
107190792Sgshapiro		bus_release_resource(dev, SYS_RES_MEMORY,
107290792Sgshapiro		    ctrlr->chatham_resource_id, ctrlr->chatham_resource);
107390792Sgshapiro	}
107490792Sgshapiro#endif
107590792Sgshapiro
107690792Sgshapiro	if (ctrlr->tag)
107790792Sgshapiro		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
107890792Sgshapiro
107990792Sgshapiro	if (ctrlr->res)
108090792Sgshapiro		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
108190792Sgshapiro		    rman_get_rid(ctrlr->res), ctrlr->res);
108290792Sgshapiro
108390792Sgshapiro	if (ctrlr->msix_enabled)
108490792Sgshapiro		pci_release_msi(dev);
108590792Sgshapiro}
108690792Sgshapiro
108790792Sgshapirovoid
108890792Sgshapironvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
108990792Sgshapiro    struct nvme_request *req)
109090792Sgshapiro{
109190792Sgshapiro
109290792Sgshapiro	nvme_qpair_submit_request(&ctrlr->adminq, req);
109390792Sgshapiro}
109490792Sgshapiro
109590792Sgshapirovoid
109690792Sgshapironvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
109790792Sgshapiro    struct nvme_request *req)
109890792Sgshapiro{
109990792Sgshapiro	struct nvme_qpair       *qpair;
110090792Sgshapiro
110190792Sgshapiro	if (ctrlr->per_cpu_io_queues)
110290792Sgshapiro		qpair = &ctrlr->ioq[curcpu];
110390792Sgshapiro	else
110490792Sgshapiro		qpair = &ctrlr->ioq[0];
110590792Sgshapiro
110690792Sgshapiro	nvme_qpair_submit_request(qpair, req);
110790792Sgshapiro}
110890792Sgshapiro
110990792Sgshapirodevice_t
111090792Sgshapironvme_ctrlr_get_device(struct nvme_controller *ctrlr)
111190792Sgshapiro{
111290792Sgshapiro
111390792Sgshapiro	return (ctrlr->dev);
111490792Sgshapiro}
111590792Sgshapiro
111690792Sgshapiroconst struct nvme_controller_data *
111790792Sgshapironvme_ctrlr_get_data(struct nvme_controller *ctrlr)
111890792Sgshapiro{
1119
1120	return (&ctrlr->cdata);
1121}
1122