nvme_ctrlr.c revision 241661
1210006Srdivacky/*-
2210006Srdivacky * Copyright (C) 2012 Intel Corporation
3210006Srdivacky * All rights reserved.
4210006Srdivacky *
5210006Srdivacky * Redistribution and use in source and binary forms, with or without
6210006Srdivacky * modification, are permitted provided that the following conditions
7210006Srdivacky * are met:
8210006Srdivacky * 1. Redistributions of source code must retain the above copyright
9210006Srdivacky *    notice, this list of conditions and the following disclaimer.
10210006Srdivacky * 2. Redistributions in binary form must reproduce the above copyright
11210006Srdivacky *    notice, this list of conditions and the following disclaimer in the
12210006Srdivacky *    documentation and/or other materials provided with the distribution.
13210006Srdivacky *
14210006Srdivacky * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15210006Srdivacky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16210006Srdivacky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17212904Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18210006Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19210006Srdivacky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20210006Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21210006Srdivacky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22212904Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23210006Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24210006Srdivacky * SUCH DAMAGE.
25210006Srdivacky */
26210006Srdivacky
27210006Srdivacky#include <sys/cdefs.h>
28210006Srdivacky__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_ctrlr.c 241661 2012-10-18 00:40:40Z jimharris $");
29210006Srdivacky
30210006Srdivacky#include <sys/param.h>
31210006Srdivacky#include <sys/bus.h>
32210006Srdivacky#include <sys/conf.h>
33212904Sdim#include <sys/ioccom.h>
34210006Srdivacky#include <sys/smp.h>
35210006Srdivacky
36212904Sdim#include <dev/pci/pcireg.h>
37210006Srdivacky#include <dev/pci/pcivar.h>
38210006Srdivacky
39210006Srdivacky#include "nvme_private.h"
40210006Srdivacky
41210006Srdivackystatic void
42210006Srdivackynvme_ctrlr_cb(void *arg, const struct nvme_completion *status)
43210006Srdivacky{
44212904Sdim	struct nvme_completion	*cpl = arg;
45212904Sdim	struct mtx		*mtx;
46210006Srdivacky
47210006Srdivacky	/*
48212904Sdim	 * Copy status into the argument passed by the caller, so that
49210006Srdivacky	 *  the caller can check the status to determine if the
50210006Srdivacky	 *  the request passed or failed.
51210006Srdivacky	 */
52210006Srdivacky	memcpy(cpl, status, sizeof(*cpl));
53210006Srdivacky	mtx = mtx_pool_find(mtxpool_sleep, cpl);
54210006Srdivacky	mtx_lock(mtx);
55210006Srdivacky	wakeup(cpl);
56210006Srdivacky	mtx_unlock(mtx);
57210006Srdivacky}
58210006Srdivacky
59210006Srdivackystatic int
60210006Srdivackynvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
61210006Srdivacky{
62212904Sdim
63212904Sdim	/* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */
64212904Sdim	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
65212904Sdim		ctrlr->resource_id = PCIR_BAR(2);
66212904Sdim	else
67212904Sdim		ctrlr->resource_id = PCIR_BAR(0);
68212904Sdim
69212904Sdim	ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
70212904Sdim	    &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE);
71212904Sdim
72212904Sdim	if(ctrlr->resource == NULL) {
73212904Sdim		device_printf(ctrlr->dev, "unable to allocate pci resource\n");
74212904Sdim		return (ENOMEM);
75212904Sdim	}
76210006Srdivacky
77210006Srdivacky	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
78212904Sdim	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
79212904Sdim	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
80210006Srdivacky
81210006Srdivacky	return (0);
82212904Sdim}
83212904Sdim
84210006Srdivacky#ifdef CHATHAM2
85210006Srdivackystatic int
86210006Srdivackynvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr)
87210006Srdivacky{
88210006Srdivacky
89212904Sdim	ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR);
90210006Srdivacky	ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev,
91210006Srdivacky	    SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1,
92210006Srdivacky	    RF_ACTIVE);
93210006Srdivacky
94210006Srdivacky	if(ctrlr->chatham_resource == NULL) {
95210006Srdivacky		device_printf(ctrlr->dev, "unable to alloc pci resource\n");
96210006Srdivacky		return (ENOMEM);
97210006Srdivacky	}
98212904Sdim
99212904Sdim	ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource);
100212904Sdim	ctrlr->chatham_bus_handle =
101212904Sdim	    rman_get_bushandle(ctrlr->chatham_resource);
102210006Srdivacky
103210006Srdivacky	return (0);
104210006Srdivacky}
105212904Sdim
106212904Sdimstatic void
107212904Sdimnvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr)
108212904Sdim{
109212904Sdim	uint64_t reg1, reg2, reg3;
110212904Sdim	uint64_t temp1, temp2;
111212904Sdim	uint32_t temp3;
112212904Sdim	uint32_t use_flash_timings = 0;
113212904Sdim
114212904Sdim	DELAY(10000);
115212904Sdim
116212904Sdim	temp3 = chatham_read_4(ctrlr, 0x8080);
117212904Sdim
118212904Sdim	device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3);
119212904Sdim
120212904Sdim	ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110;
121212904Sdim	ctrlr->chatham_size = ctrlr->chatham_lbas * 512;
122212904Sdim
123212904Sdim	device_printf(ctrlr->dev, "Chatham size: %lld\n",
124212904Sdim	    (long long)ctrlr->chatham_size);
125212904Sdim
126212904Sdim	reg1 = reg2 = reg3 = ctrlr->chatham_size - 1;
127212904Sdim
128212904Sdim	TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings);
129212904Sdim	if (use_flash_timings) {
130212904Sdim		device_printf(ctrlr->dev, "Chatham: using flash timings\n");
131212904Sdim		temp1 = 0x00001b58000007d0LL;
132212904Sdim		temp2 = 0x000000cb00000131LL;
133212904Sdim	} else {
134212904Sdim		device_printf(ctrlr->dev, "Chatham: using DDR timings\n");
135212904Sdim		temp1 = temp2 = 0x0LL;
136212904Sdim	}
137210006Srdivacky
138210006Srdivacky	chatham_write_8(ctrlr, 0x8000, reg1);
139210006Srdivacky	chatham_write_8(ctrlr, 0x8008, reg2);
140210006Srdivacky	chatham_write_8(ctrlr, 0x8010, reg3);
141210006Srdivacky
142210006Srdivacky	chatham_write_8(ctrlr, 0x8020, temp1);
143210006Srdivacky	temp3 = chatham_read_4(ctrlr, 0x8020);
144210006Srdivacky
145210006Srdivacky	chatham_write_8(ctrlr, 0x8028, temp2);
146210006Srdivacky	temp3 = chatham_read_4(ctrlr, 0x8028);
147210006Srdivacky
148210006Srdivacky	chatham_write_8(ctrlr, 0x8030, temp1);
149210006Srdivacky	chatham_write_8(ctrlr, 0x8038, temp2);
150210006Srdivacky	chatham_write_8(ctrlr, 0x8040, temp1);
151210006Srdivacky	chatham_write_8(ctrlr, 0x8048, temp2);
152210006Srdivacky	chatham_write_8(ctrlr, 0x8050, temp1);
153210006Srdivacky	chatham_write_8(ctrlr, 0x8058, temp2);
154210006Srdivacky
155210006Srdivacky	DELAY(10000);
156210006Srdivacky}
157210006Srdivacky
158210006Srdivackystatic void
159210006Srdivackynvme_chatham_populate_cdata(struct nvme_controller *ctrlr)
160210006Srdivacky{
161210006Srdivacky	struct nvme_controller_data *cdata;
162210006Srdivacky
163210006Srdivacky	cdata = &ctrlr->cdata;
164210006Srdivacky
165210006Srdivacky	cdata->vid = 0x8086;
166210006Srdivacky	cdata->ssvid = 0x2011;
167210006Srdivacky
168210006Srdivacky	/*
169210006Srdivacky	 * Chatham2 puts garbage data in these fields when we
170210006Srdivacky	 *  invoke IDENTIFY_CONTROLLER, so we need to re-zero
171210006Srdivacky	 *  the fields before calling bcopy().
172210006Srdivacky	 */
173210006Srdivacky	memset(cdata->sn, 0, sizeof(cdata->sn));
174210006Srdivacky	memcpy(cdata->sn, "2012", strlen("2012"));
175210006Srdivacky	memset(cdata->mn, 0, sizeof(cdata->mn));
176210006Srdivacky	memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2"));
177210006Srdivacky	memset(cdata->fr, 0, sizeof(cdata->fr));
178210006Srdivacky	memcpy(cdata->fr, "0", strlen("0"));
179210006Srdivacky	cdata->rab = 8;
180210006Srdivacky	cdata->aerl = 3;
181210006Srdivacky	cdata->lpa.ns_smart = 1;
182210006Srdivacky	cdata->sqes.min = 6;
183210006Srdivacky	cdata->sqes.max = 6;
184210006Srdivacky	cdata->sqes.min = 4;
185210006Srdivacky	cdata->sqes.max = 4;
186210006Srdivacky	cdata->nn = 1;
187210006Srdivacky
188210006Srdivacky	/* Chatham2 doesn't support DSM command */
189210006Srdivacky	cdata->oncs.dsm = 0;
190210006Srdivacky
191210006Srdivacky	cdata->vwc.present = 1;
192210006Srdivacky}
193210006Srdivacky#endif /* CHATHAM2 */
194210006Srdivacky
195210006Srdivackystatic void
196210006Srdivackynvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
197210006Srdivacky{
198210006Srdivacky	struct nvme_qpair	*qpair;
199210006Srdivacky	uint32_t		num_entries;
200210006Srdivacky
201210006Srdivacky	qpair = &ctrlr->adminq;
202210006Srdivacky
203210006Srdivacky	num_entries = NVME_ADMIN_ENTRIES;
204210006Srdivacky	TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
205210006Srdivacky	/*
206210006Srdivacky	 * If admin_entries was overridden to an invalid value, revert it
207210006Srdivacky	 *  back to our default value.
208210006Srdivacky	 */
209210006Srdivacky	if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
210210006Srdivacky	    num_entries > NVME_MAX_ADMIN_ENTRIES) {
211210006Srdivacky		printf("nvme: invalid hw.nvme.admin_entries=%d specified\n",
212210006Srdivacky		    num_entries);
213210006Srdivacky		num_entries = NVME_ADMIN_ENTRIES;
214210006Srdivacky	}
215210006Srdivacky
216210006Srdivacky	/*
217210006Srdivacky	 * The admin queue's max xfer size is treated differently than the
218210006Srdivacky	 *  max I/O xfer size.  16KB is sufficient here - maybe even less?
219210006Srdivacky	 */
220210006Srdivacky	nvme_qpair_construct(qpair, 0, 0, num_entries, 16*1024, ctrlr);
221210006Srdivacky}
222210006Srdivacky
223210006Srdivackystatic int
224210006Srdivackynvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
225210006Srdivacky{
226210006Srdivacky	struct nvme_qpair	*qpair;
227210006Srdivacky	union cap_lo_register	cap_lo;
228210006Srdivacky	int			i, num_entries;
229210006Srdivacky
230210006Srdivacky	num_entries = NVME_IO_ENTRIES;
231210006Srdivacky	TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
232210006Srdivacky
233210006Srdivacky	num_entries = max(num_entries, NVME_MIN_IO_ENTRIES);
234210006Srdivacky
235210006Srdivacky	/*
236210006Srdivacky	 * NVMe spec sets a hard limit of 64K max entries, but
237210006Srdivacky	 *  devices may specify a smaller limit, so we need to check
238210006Srdivacky	 *  the MQES field in the capabilities register.
239210006Srdivacky	 */
240210006Srdivacky	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
241210006Srdivacky	num_entries = min(num_entries, cap_lo.bits.mqes+1);
242210006Srdivacky
243210006Srdivacky	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
244210006Srdivacky	TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size);
245210006Srdivacky	/*
246210006Srdivacky	 * Check that tunable doesn't specify a size greater than what our
247210006Srdivacky	 *  driver supports, and is an even PAGE_SIZE multiple.
248210006Srdivacky	 */
249210006Srdivacky	if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE ||
250210006Srdivacky	    ctrlr->max_xfer_size % PAGE_SIZE)
251210006Srdivacky		ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
252210006Srdivacky
253210006Srdivacky	ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
254210006Srdivacky	    M_NVME, M_ZERO | M_NOWAIT);
255210006Srdivacky
256210006Srdivacky	if (ctrlr->ioq == NULL)
257210006Srdivacky		return (ENOMEM);
258210006Srdivacky
259210006Srdivacky	for (i = 0; i < ctrlr->num_io_queues; i++) {
260210006Srdivacky		qpair = &ctrlr->ioq[i];
261210006Srdivacky
262210006Srdivacky		/*
263210006Srdivacky		 * Admin queue has ID=0. IO queues start at ID=1 -
264210006Srdivacky		 *  hence the 'i+1' here.
265210006Srdivacky		 *
266210006Srdivacky		 * For I/O queues, use the controller-wide max_xfer_size
267210006Srdivacky		 *  calculated in nvme_attach().
268210006Srdivacky		 */
269210006Srdivacky		nvme_qpair_construct(qpair,
270210006Srdivacky				     i+1, /* qpair ID */
271210006Srdivacky				     ctrlr->msix_enabled ? i+1 : 0, /* vector */
272210006Srdivacky				     num_entries,
273210006Srdivacky				     ctrlr->max_xfer_size,
274210006Srdivacky				     ctrlr);
275210006Srdivacky
276210006Srdivacky		if (ctrlr->per_cpu_io_queues)
277210006Srdivacky			bus_bind_intr(ctrlr->dev, qpair->res, i);
278210006Srdivacky	}
279210006Srdivacky
280210006Srdivacky	return (0);
281210006Srdivacky}
282210006Srdivacky
283210006Srdivackystatic int
284210006Srdivackynvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
285210006Srdivacky{
286212904Sdim	int ms_waited;
287210006Srdivacky	union cc_register cc;
288210006Srdivacky	union csts_register csts;
289210006Srdivacky
290210006Srdivacky	cc.raw = nvme_mmio_read_4(ctrlr, cc);
291210006Srdivacky	csts.raw = nvme_mmio_read_4(ctrlr, csts);
292210006Srdivacky
293210006Srdivacky	if (!cc.bits.en) {
294210006Srdivacky		device_printf(ctrlr->dev, "%s called with cc.en = 0\n",
295210006Srdivacky		    __func__);
296210006Srdivacky		return (ENXIO);
297210006Srdivacky	}
298210006Srdivacky
299210006Srdivacky	ms_waited = 0;
300210006Srdivacky
301210006Srdivacky	while (!csts.bits.rdy) {
302212904Sdim		DELAY(1000);
303212904Sdim		if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
304210006Srdivacky			device_printf(ctrlr->dev, "controller did not become "
305210006Srdivacky			    "ready within %d ms\n", ctrlr->ready_timeout_in_ms);
306210006Srdivacky			return (ENXIO);
307210006Srdivacky		}
308210006Srdivacky		csts.raw = nvme_mmio_read_4(ctrlr, csts);
309210006Srdivacky	}
310210006Srdivacky
311212904Sdim	return (0);
312212904Sdim}
313212904Sdim
314212904Sdimstatic void
315212904Sdimnvme_ctrlr_disable(struct nvme_controller *ctrlr)
316212904Sdim{
317212904Sdim	union cc_register cc;
318212904Sdim	union csts_register csts;
319212904Sdim
320212904Sdim	cc.raw = nvme_mmio_read_4(ctrlr, cc);
321212904Sdim	csts.raw = nvme_mmio_read_4(ctrlr, csts);
322212904Sdim
323212904Sdim	if (cc.bits.en == 1 && csts.bits.rdy == 0)
324212904Sdim		nvme_ctrlr_wait_for_ready(ctrlr);
325212904Sdim
326212904Sdim	cc.bits.en = 0;
327212904Sdim	nvme_mmio_write_4(ctrlr, cc, cc.raw);
328212904Sdim	DELAY(5000);
329210006Srdivacky}
330210006Srdivacky
331210006Srdivackystatic int
332210006Srdivackynvme_ctrlr_enable(struct nvme_controller *ctrlr)
333210006Srdivacky{
334210006Srdivacky	union cc_register	cc;
335210006Srdivacky	union csts_register	csts;
336210006Srdivacky	union aqa_register	aqa;
337210006Srdivacky
338210006Srdivacky	cc.raw = nvme_mmio_read_4(ctrlr, cc);
339210006Srdivacky	csts.raw = nvme_mmio_read_4(ctrlr, csts);
340210006Srdivacky
341210006Srdivacky	if (cc.bits.en == 1) {
342210006Srdivacky		if (csts.bits.rdy == 1)
343210006Srdivacky			return (0);
344210006Srdivacky		else
345210006Srdivacky			return (nvme_ctrlr_wait_for_ready(ctrlr));
346210006Srdivacky	}
347210006Srdivacky
348210006Srdivacky	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
349210006Srdivacky	DELAY(5000);
350210006Srdivacky	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
351210006Srdivacky	DELAY(5000);
352210006Srdivacky
353210006Srdivacky	aqa.raw = 0;
354210006Srdivacky	/* acqs and asqs are 0-based. */
355210006Srdivacky	aqa.bits.acqs = ctrlr->adminq.num_entries-1;
356210006Srdivacky	aqa.bits.asqs = ctrlr->adminq.num_entries-1;
357210006Srdivacky	nvme_mmio_write_4(ctrlr, aqa, aqa.raw);
358210006Srdivacky	DELAY(5000);
359210006Srdivacky
360210006Srdivacky	cc.bits.en = 1;
361210006Srdivacky	cc.bits.css = 0;
362210006Srdivacky	cc.bits.ams = 0;
363210006Srdivacky	cc.bits.shn = 0;
364210006Srdivacky	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
365210006Srdivacky	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
366210006Srdivacky
367210006Srdivacky	/* This evaluates to 0, which is according to spec. */
368210006Srdivacky	cc.bits.mps = (PAGE_SIZE >> 13);
369210006Srdivacky
370210006Srdivacky	nvme_mmio_write_4(ctrlr, cc, cc.raw);
371210006Srdivacky	DELAY(5000);
372210006Srdivacky
373210006Srdivacky	return (nvme_ctrlr_wait_for_ready(ctrlr));
374210006Srdivacky}
375210006Srdivacky
376210006Srdivackyint
377210006Srdivackynvme_ctrlr_reset(struct nvme_controller *ctrlr)
378210006Srdivacky{
379210006Srdivacky
380210006Srdivacky	nvme_ctrlr_disable(ctrlr);
381210006Srdivacky	return (nvme_ctrlr_enable(ctrlr));
382210006Srdivacky}
383210006Srdivacky
384210006Srdivacky/*
385210006Srdivacky * Disable this code for now, since Chatham doesn't support
386210006Srdivacky *  AERs so I have no good way to test them.
387210006Srdivacky */
388210006Srdivacky#if 0
389210006Srdivackystatic void
390212904Sdimnvme_async_event_cb(void *arg, const struct nvme_completion *status)
391212904Sdim{
392210006Srdivacky	struct nvme_controller *ctrlr = arg;
393210006Srdivacky
394210006Srdivacky	printf("Asynchronous event occurred.\n");
395210006Srdivacky
396210006Srdivacky	/* TODO: decode async event type based on status */
397210006Srdivacky	/* TODO: check status for any error bits */
398210006Srdivacky
399210006Srdivacky	/*
400210006Srdivacky	 * Repost an asynchronous event request so that it can be
401212904Sdim	 *  used again by the controller.
402212904Sdim	 */
403212904Sdim	nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb,
404210006Srdivacky	    ctrlr);
405210006Srdivacky}
406210006Srdivacky#endif
407210006Srdivacky
408210006Srdivackystatic int
409210006Srdivackynvme_ctrlr_identify(struct nvme_controller *ctrlr)
410212904Sdim{
411212904Sdim	struct mtx		*mtx;
412212904Sdim	struct nvme_completion	cpl;
413210006Srdivacky	int			status;
414210006Srdivacky
415210006Srdivacky	mtx = mtx_pool_find(mtxpool_sleep, &cpl);
416210006Srdivacky
417210006Srdivacky	mtx_lock(mtx);
418210006Srdivacky	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
419210006Srdivacky	    nvme_ctrlr_cb, &cpl);
420210006Srdivacky	status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
421210006Srdivacky	mtx_unlock(mtx);
422210006Srdivacky	if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
423210006Srdivacky		printf("nvme_identify_controller failed!\n");
424210006Srdivacky		return (ENXIO);
425210006Srdivacky	}
426210006Srdivacky
427210006Srdivacky#ifdef CHATHAM2
428210006Srdivacky	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
429210006Srdivacky		nvme_chatham_populate_cdata(ctrlr);
430210006Srdivacky#endif
431210006Srdivacky
432210006Srdivacky	return (0);
433210006Srdivacky}
434210006Srdivacky
435210006Srdivackystatic int
436212904Sdimnvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
437212904Sdim{
438212904Sdim	struct mtx		*mtx;
439212904Sdim	struct nvme_completion	cpl;
440210006Srdivacky	int			cq_allocated, sq_allocated, status;
441210006Srdivacky
442210006Srdivacky	mtx = mtx_pool_find(mtxpool_sleep, &cpl);
443210006Srdivacky
444210006Srdivacky	mtx_lock(mtx);
445210006Srdivacky	nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
446210006Srdivacky	    nvme_ctrlr_cb, &cpl);
447210006Srdivacky	status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
448210006Srdivacky	mtx_unlock(mtx);
449210006Srdivacky	if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
450210006Srdivacky		printf("nvme_set_num_queues failed!\n");
451210006Srdivacky		return (ENXIO);
452210006Srdivacky	}
453210006Srdivacky
454210006Srdivacky	/*
455210006Srdivacky	 * Data in cdw0 is 0-based.
456210006Srdivacky	 * Lower 16-bits indicate number of submission queues allocated.
457210006Srdivacky	 * Upper 16-bits indicate number of completion queues allocated.
458210006Srdivacky	 */
459210006Srdivacky	sq_allocated = (cpl.cdw0 & 0xFFFF) + 1;
460210006Srdivacky	cq_allocated = (cpl.cdw0 >> 16) + 1;
461210006Srdivacky
462210006Srdivacky	/*
463210006Srdivacky	 * Check that the controller was able to allocate the number of
464210006Srdivacky	 *  queues we requested.  If not, revert to one IO queue.
465210006Srdivacky	 */
466210006Srdivacky	if (sq_allocated < ctrlr->num_io_queues ||
467210006Srdivacky	    cq_allocated < ctrlr->num_io_queues) {
468210006Srdivacky		ctrlr->num_io_queues = 1;
469210006Srdivacky		ctrlr->per_cpu_io_queues = 0;
470210006Srdivacky
471210006Srdivacky		/* TODO: destroy extra queues that were created
472210006Srdivacky		 *  previously but now found to be not needed.
473210006Srdivacky		 */
474210006Srdivacky	}
475210006Srdivacky
476210006Srdivacky	return (0);
477210006Srdivacky}
478210006Srdivacky
479210006Srdivackystatic int
480210006Srdivackynvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
481{
482	struct mtx		*mtx;
483	struct nvme_qpair	*qpair;
484	struct nvme_completion	cpl;
485	int			i, status;
486
487	mtx = mtx_pool_find(mtxpool_sleep, &cpl);
488
489	for (i = 0; i < ctrlr->num_io_queues; i++) {
490		qpair = &ctrlr->ioq[i];
491
492		mtx_lock(mtx);
493		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
494		    nvme_ctrlr_cb, &cpl);
495		status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
496		mtx_unlock(mtx);
497		if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
498			printf("nvme_create_io_cq failed!\n");
499			return (ENXIO);
500		}
501
502		mtx_lock(mtx);
503		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
504		    nvme_ctrlr_cb, &cpl);
505		status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
506		mtx_unlock(mtx);
507		if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
508			printf("nvme_create_io_sq failed!\n");
509			return (ENXIO);
510		}
511	}
512
513	return (0);
514}
515
516static int
517nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
518{
519	struct nvme_namespace	*ns;
520	int			i, status;
521
522	for (i = 0; i < ctrlr->cdata.nn; i++) {
523		ns = &ctrlr->ns[i];
524		status = nvme_ns_construct(ns, i+1, ctrlr);
525		if (status != 0)
526			return (status);
527	}
528
529	return (0);
530}
531
532static void
533nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
534{
535	union nvme_critical_warning_state	state;
536	uint8_t					num_async_events;
537
538	state.raw = 0xFF;
539	state.bits.reserved = 0;
540	nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL);
541
542	/* aerl is a zero-based value, so we need to add 1 here. */
543	num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
544
545	/*
546	 * Disable this code for now, since Chatham doesn't support
547	 *  AERs so I have no good way to test them.
548	 */
549#if 0
550	for (int i = 0; i < num_async_events; i++)
551		nvme_ctrlr_cmd_asynchronous_event_request(ctrlr,
552		    nvme_async_event_cb, ctrlr);
553#endif
554}
555
556static void
557nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
558{
559
560	ctrlr->int_coal_time = 0;
561	TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
562	    &ctrlr->int_coal_time);
563
564	ctrlr->int_coal_threshold = 0;
565	TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
566	    &ctrlr->int_coal_threshold);
567
568	nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
569	    ctrlr->int_coal_threshold, NULL, NULL);
570}
571
572void
573nvme_ctrlr_start(void *ctrlr_arg)
574{
575	struct nvme_controller *ctrlr = ctrlr_arg;
576
577	if (nvme_ctrlr_identify(ctrlr) != 0)
578		goto err;
579
580	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
581		goto err;
582
583	if (nvme_ctrlr_create_qpairs(ctrlr) != 0)
584		goto err;
585
586	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
587		goto err;
588
589	nvme_ctrlr_configure_aer(ctrlr);
590	nvme_ctrlr_configure_int_coalescing(ctrlr);
591
592	ctrlr->is_started = TRUE;
593
594err:
595
596	/*
597	 * Initialize sysctls, even if controller failed to start, to
598	 *  assist with debugging admin queue pair.
599	 */
600	nvme_sysctl_initialize_ctrlr(ctrlr);
601	config_intrhook_disestablish(&ctrlr->config_hook);
602}
603
604static void
605nvme_ctrlr_intx_task(void *arg, int pending)
606{
607	struct nvme_controller *ctrlr = arg;
608
609	nvme_qpair_process_completions(&ctrlr->adminq);
610
611	if (ctrlr->ioq[0].cpl)
612		nvme_qpair_process_completions(&ctrlr->ioq[0]);
613
614	nvme_mmio_write_4(ctrlr, intmc, 1);
615}
616
617static void
618nvme_ctrlr_intx_handler(void *arg)
619{
620	struct nvme_controller *ctrlr = arg;
621
622	nvme_mmio_write_4(ctrlr, intms, 1);
623	taskqueue_enqueue_fast(ctrlr->taskqueue, &ctrlr->task);
624}
625
626static int
627nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
628{
629
630	ctrlr->num_io_queues = 1;
631	ctrlr->per_cpu_io_queues = 0;
632	ctrlr->rid = 0;
633	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
634	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
635
636	if (ctrlr->res == NULL) {
637		device_printf(ctrlr->dev, "unable to allocate shared IRQ\n");
638		return (ENOMEM);
639	}
640
641	bus_setup_intr(ctrlr->dev, ctrlr->res,
642	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
643	    ctrlr, &ctrlr->tag);
644
645	if (ctrlr->tag == NULL) {
646		device_printf(ctrlr->dev,
647		    "unable to setup legacy interrupt handler\n");
648		return (ENOMEM);
649	}
650
651	TASK_INIT(&ctrlr->task, 0, nvme_ctrlr_intx_task, ctrlr);
652	ctrlr->taskqueue = taskqueue_create_fast("nvme_taskq", M_NOWAIT,
653	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
654	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_NET,
655	    "%s intx taskq", device_get_nameunit(ctrlr->dev));
656
657	return (0);
658}
659
660static int
661nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
662    struct thread *td)
663{
664	struct nvme_controller	*ctrlr;
665	struct nvme_completion	cpl;
666	struct mtx		*mtx;
667
668	ctrlr = cdev->si_drv1;
669
670	switch (cmd) {
671	case NVME_IDENTIFY_CONTROLLER:
672#ifdef CHATHAM2
673		/*
674		 * Don't refresh data on Chatham, since Chatham returns
675		 *  garbage on IDENTIFY anyways.
676		 */
677		if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) {
678			memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
679			break;
680		}
681#endif
682		/* Refresh data before returning to user. */
683		mtx = mtx_pool_find(mtxpool_sleep, &cpl);
684		mtx_lock(mtx);
685		nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
686		    nvme_ctrlr_cb, &cpl);
687		msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0);
688		mtx_unlock(mtx);
689		if (cpl.sf_sc || cpl.sf_sct)
690			return (ENXIO);
691		memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
692		break;
693	default:
694		return (ENOTTY);
695	}
696
697	return (0);
698}
699
700static struct cdevsw nvme_ctrlr_cdevsw = {
701	.d_version =	D_VERSION,
702	.d_flags =	0,
703	.d_ioctl =	nvme_ctrlr_ioctl
704};
705
706int
707nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
708{
709	union cap_lo_register	cap_lo;
710	union cap_hi_register	cap_hi;
711	int			num_vectors, per_cpu_io_queues, status = 0;
712
713	ctrlr->dev = dev;
714	ctrlr->is_started = FALSE;
715
716	status = nvme_ctrlr_allocate_bar(ctrlr);
717
718	if (status != 0)
719		return (status);
720
721#ifdef CHATHAM2
722	if (pci_get_devid(dev) == CHATHAM_PCI_ID) {
723		status = nvme_ctrlr_allocate_chatham_bar(ctrlr);
724		if (status != 0)
725			return (status);
726		nvme_ctrlr_setup_chatham(ctrlr);
727	}
728#endif
729
730	/*
731	 * Software emulators may set the doorbell stride to something
732	 *  other than zero, but this driver is not set up to handle that.
733	 */
734	cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi);
735	if (cap_hi.bits.dstrd != 0)
736		return (ENXIO);
737
738	/* Get ready timeout value from controller, in units of 500ms. */
739	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
740	ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500;
741
742	per_cpu_io_queues = 1;
743	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
744	ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE;
745
746	if (ctrlr->per_cpu_io_queues)
747		ctrlr->num_io_queues = mp_ncpus;
748	else
749		ctrlr->num_io_queues = 1;
750
751	ctrlr->force_intx = 0;
752	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
753
754	ctrlr->msix_enabled = 1;
755
756	if (ctrlr->force_intx) {
757		ctrlr->msix_enabled = 0;
758		goto intx;
759	}
760
761	/* One vector per IO queue, plus one vector for admin queue. */
762	num_vectors = ctrlr->num_io_queues + 1;
763
764	if (pci_msix_count(dev) < num_vectors) {
765		ctrlr->msix_enabled = 0;
766		goto intx;
767	}
768
769	if (pci_alloc_msix(dev, &num_vectors) != 0)
770		ctrlr->msix_enabled = 0;
771
772intx:
773
774	if (!ctrlr->msix_enabled)
775		nvme_ctrlr_configure_intx(ctrlr);
776
777	nvme_ctrlr_construct_admin_qpair(ctrlr);
778
779	status = nvme_ctrlr_construct_io_qpairs(ctrlr);
780
781	if (status != 0)
782		return (status);
783
784	ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
785	    "nvme%d", device_get_unit(dev));
786
787	if (ctrlr->cdev == NULL)
788		return (ENXIO);
789
790	ctrlr->cdev->si_drv1 = (void *)ctrlr;
791
792	return (0);
793}
794
795void
796nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
797    struct nvme_request *req)
798{
799	struct nvme_qpair       *qpair;
800	struct nvme_tracker     *tr;
801	int                     err;
802
803	qpair = &ctrlr->adminq;
804
805	tr = nvme_qpair_allocate_tracker(qpair);
806
807	tr->req = req;
808
809	if (req->payload_size > 0) {
810		err = bus_dmamap_load(tr->qpair->dma_tag, tr->payload_dma_map,
811				      req->payload, req->payload_size,
812				      nvme_payload_map, tr, 0);
813		if (err != 0)
814			panic("bus_dmamap_load returned non-zero!\n");
815	} else
816		nvme_qpair_submit_cmd(tr->qpair, tr);
817}
818
819void
820nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
821    struct nvme_request *req)
822{
823	struct nvme_qpair       *qpair;
824	struct nvme_tracker     *tr;
825	int                     err;
826
827	if (ctrlr->per_cpu_io_queues)
828		qpair = &ctrlr->ioq[curcpu];
829	else
830		qpair = &ctrlr->ioq[0];
831
832	tr = nvme_qpair_allocate_tracker(qpair);
833
834	tr->req = req;
835
836	if (req->uio == NULL) {
837		if (req->payload_size > 0) {
838			err = bus_dmamap_load(tr->qpair->dma_tag,
839					      tr->payload_dma_map, req->payload,
840					      req->payload_size,
841					      nvme_payload_map, tr, 0);
842			if (err != 0)
843				panic("bus_dmamap_load returned non-zero!\n");
844		} else
845			nvme_qpair_submit_cmd(tr->qpair, tr);
846	} else {
847		err = bus_dmamap_load_uio(tr->qpair->dma_tag,
848					  tr->payload_dma_map, req->uio,
849					  nvme_payload_map_uio, tr, 0);
850		if (err != 0)
851			panic("bus_dmamap_load returned non-zero!\n");
852	}
853}
854