nvme_ctrlr.c revision 241663
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_ctrlr.c 241663 2012-10-18 00:43:25Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32#include <sys/conf.h>
33#include <sys/ioccom.h>
34#include <sys/smp.h>
35
36#include <dev/pci/pcireg.h>
37#include <dev/pci/pcivar.h>
38
39#include "nvme_private.h"
40
41static void
42nvme_ctrlr_cb(void *arg, const struct nvme_completion *status)
43{
44	struct nvme_completion	*cpl = arg;
45	struct mtx		*mtx;
46
47	/*
48	 * Copy status into the argument passed by the caller, so that
49	 *  the caller can check the status to determine if the
50	 *  the request passed or failed.
51	 */
52	memcpy(cpl, status, sizeof(*cpl));
53	mtx = mtx_pool_find(mtxpool_sleep, cpl);
54	mtx_lock(mtx);
55	wakeup(cpl);
56	mtx_unlock(mtx);
57}
58
59static int
60nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
61{
62
63	/* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */
64	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
65		ctrlr->resource_id = PCIR_BAR(2);
66	else
67		ctrlr->resource_id = PCIR_BAR(0);
68
69	ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
70	    &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE);
71
72	if(ctrlr->resource == NULL) {
73		device_printf(ctrlr->dev, "unable to allocate pci resource\n");
74		return (ENOMEM);
75	}
76
77	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
78	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
79	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
80
81	return (0);
82}
83
84#ifdef CHATHAM2
85static int
86nvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr)
87{
88
89	ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR);
90	ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev,
91	    SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1,
92	    RF_ACTIVE);
93
94	if(ctrlr->chatham_resource == NULL) {
95		device_printf(ctrlr->dev, "unable to alloc pci resource\n");
96		return (ENOMEM);
97	}
98
99	ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource);
100	ctrlr->chatham_bus_handle =
101	    rman_get_bushandle(ctrlr->chatham_resource);
102
103	return (0);
104}
105
106static void
107nvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr)
108{
109	uint64_t reg1, reg2, reg3;
110	uint64_t temp1, temp2;
111	uint32_t temp3;
112	uint32_t use_flash_timings = 0;
113
114	DELAY(10000);
115
116	temp3 = chatham_read_4(ctrlr, 0x8080);
117
118	device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3);
119
120	ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110;
121	ctrlr->chatham_size = ctrlr->chatham_lbas * 512;
122
123	device_printf(ctrlr->dev, "Chatham size: %lld\n",
124	    (long long)ctrlr->chatham_size);
125
126	reg1 = reg2 = reg3 = ctrlr->chatham_size - 1;
127
128	TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings);
129	if (use_flash_timings) {
130		device_printf(ctrlr->dev, "Chatham: using flash timings\n");
131		temp1 = 0x00001b58000007d0LL;
132		temp2 = 0x000000cb00000131LL;
133	} else {
134		device_printf(ctrlr->dev, "Chatham: using DDR timings\n");
135		temp1 = temp2 = 0x0LL;
136	}
137
138	chatham_write_8(ctrlr, 0x8000, reg1);
139	chatham_write_8(ctrlr, 0x8008, reg2);
140	chatham_write_8(ctrlr, 0x8010, reg3);
141
142	chatham_write_8(ctrlr, 0x8020, temp1);
143	temp3 = chatham_read_4(ctrlr, 0x8020);
144
145	chatham_write_8(ctrlr, 0x8028, temp2);
146	temp3 = chatham_read_4(ctrlr, 0x8028);
147
148	chatham_write_8(ctrlr, 0x8030, temp1);
149	chatham_write_8(ctrlr, 0x8038, temp2);
150	chatham_write_8(ctrlr, 0x8040, temp1);
151	chatham_write_8(ctrlr, 0x8048, temp2);
152	chatham_write_8(ctrlr, 0x8050, temp1);
153	chatham_write_8(ctrlr, 0x8058, temp2);
154
155	DELAY(10000);
156}
157
158static void
159nvme_chatham_populate_cdata(struct nvme_controller *ctrlr)
160{
161	struct nvme_controller_data *cdata;
162
163	cdata = &ctrlr->cdata;
164
165	cdata->vid = 0x8086;
166	cdata->ssvid = 0x2011;
167
168	/*
169	 * Chatham2 puts garbage data in these fields when we
170	 *  invoke IDENTIFY_CONTROLLER, so we need to re-zero
171	 *  the fields before calling bcopy().
172	 */
173	memset(cdata->sn, 0, sizeof(cdata->sn));
174	memcpy(cdata->sn, "2012", strlen("2012"));
175	memset(cdata->mn, 0, sizeof(cdata->mn));
176	memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2"));
177	memset(cdata->fr, 0, sizeof(cdata->fr));
178	memcpy(cdata->fr, "0", strlen("0"));
179	cdata->rab = 8;
180	cdata->aerl = 3;
181	cdata->lpa.ns_smart = 1;
182	cdata->sqes.min = 6;
183	cdata->sqes.max = 6;
184	cdata->sqes.min = 4;
185	cdata->sqes.max = 4;
186	cdata->nn = 1;
187
188	/* Chatham2 doesn't support DSM command */
189	cdata->oncs.dsm = 0;
190
191	cdata->vwc.present = 1;
192}
193#endif /* CHATHAM2 */
194
195static void
196nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
197{
198	struct nvme_qpair	*qpair;
199	uint32_t		num_entries;
200
201	qpair = &ctrlr->adminq;
202
203	num_entries = NVME_ADMIN_ENTRIES;
204	TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
205	/*
206	 * If admin_entries was overridden to an invalid value, revert it
207	 *  back to our default value.
208	 */
209	if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
210	    num_entries > NVME_MAX_ADMIN_ENTRIES) {
211		printf("nvme: invalid hw.nvme.admin_entries=%d specified\n",
212		    num_entries);
213		num_entries = NVME_ADMIN_ENTRIES;
214	}
215
216	/*
217	 * The admin queue's max xfer size is treated differently than the
218	 *  max I/O xfer size.  16KB is sufficient here - maybe even less?
219	 */
220	nvme_qpair_construct(qpair, 0, 0, num_entries, 16*1024, ctrlr);
221}
222
223static int
224nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
225{
226	struct nvme_qpair	*qpair;
227	union cap_lo_register	cap_lo;
228	int			i, num_entries;
229
230	num_entries = NVME_IO_ENTRIES;
231	TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
232
233	num_entries = max(num_entries, NVME_MIN_IO_ENTRIES);
234
235	/*
236	 * NVMe spec sets a hard limit of 64K max entries, but
237	 *  devices may specify a smaller limit, so we need to check
238	 *  the MQES field in the capabilities register.
239	 */
240	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
241	num_entries = min(num_entries, cap_lo.bits.mqes+1);
242
243	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
244	TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size);
245	/*
246	 * Check that tunable doesn't specify a size greater than what our
247	 *  driver supports, and is an even PAGE_SIZE multiple.
248	 */
249	if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE ||
250	    ctrlr->max_xfer_size % PAGE_SIZE)
251		ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
252
253	ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
254	    M_NVME, M_ZERO | M_NOWAIT);
255
256	if (ctrlr->ioq == NULL)
257		return (ENOMEM);
258
259	for (i = 0; i < ctrlr->num_io_queues; i++) {
260		qpair = &ctrlr->ioq[i];
261
262		/*
263		 * Admin queue has ID=0. IO queues start at ID=1 -
264		 *  hence the 'i+1' here.
265		 *
266		 * For I/O queues, use the controller-wide max_xfer_size
267		 *  calculated in nvme_attach().
268		 */
269		nvme_qpair_construct(qpair,
270				     i+1, /* qpair ID */
271				     ctrlr->msix_enabled ? i+1 : 0, /* vector */
272				     num_entries,
273				     ctrlr->max_xfer_size,
274				     ctrlr);
275
276		if (ctrlr->per_cpu_io_queues)
277			bus_bind_intr(ctrlr->dev, qpair->res, i);
278	}
279
280	return (0);
281}
282
283static int
284nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
285{
286	int ms_waited;
287	union cc_register cc;
288	union csts_register csts;
289
290	cc.raw = nvme_mmio_read_4(ctrlr, cc);
291	csts.raw = nvme_mmio_read_4(ctrlr, csts);
292
293	if (!cc.bits.en) {
294		device_printf(ctrlr->dev, "%s called with cc.en = 0\n",
295		    __func__);
296		return (ENXIO);
297	}
298
299	ms_waited = 0;
300
301	while (!csts.bits.rdy) {
302		DELAY(1000);
303		if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
304			device_printf(ctrlr->dev, "controller did not become "
305			    "ready within %d ms\n", ctrlr->ready_timeout_in_ms);
306			return (ENXIO);
307		}
308		csts.raw = nvme_mmio_read_4(ctrlr, csts);
309	}
310
311	return (0);
312}
313
314static void
315nvme_ctrlr_disable(struct nvme_controller *ctrlr)
316{
317	union cc_register cc;
318	union csts_register csts;
319
320	cc.raw = nvme_mmio_read_4(ctrlr, cc);
321	csts.raw = nvme_mmio_read_4(ctrlr, csts);
322
323	if (cc.bits.en == 1 && csts.bits.rdy == 0)
324		nvme_ctrlr_wait_for_ready(ctrlr);
325
326	cc.bits.en = 0;
327	nvme_mmio_write_4(ctrlr, cc, cc.raw);
328	DELAY(5000);
329}
330
331static int
332nvme_ctrlr_enable(struct nvme_controller *ctrlr)
333{
334	union cc_register	cc;
335	union csts_register	csts;
336	union aqa_register	aqa;
337
338	cc.raw = nvme_mmio_read_4(ctrlr, cc);
339	csts.raw = nvme_mmio_read_4(ctrlr, csts);
340
341	if (cc.bits.en == 1) {
342		if (csts.bits.rdy == 1)
343			return (0);
344		else
345			return (nvme_ctrlr_wait_for_ready(ctrlr));
346	}
347
348	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
349	DELAY(5000);
350	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
351	DELAY(5000);
352
353	aqa.raw = 0;
354	/* acqs and asqs are 0-based. */
355	aqa.bits.acqs = ctrlr->adminq.num_entries-1;
356	aqa.bits.asqs = ctrlr->adminq.num_entries-1;
357	nvme_mmio_write_4(ctrlr, aqa, aqa.raw);
358	DELAY(5000);
359
360	cc.bits.en = 1;
361	cc.bits.css = 0;
362	cc.bits.ams = 0;
363	cc.bits.shn = 0;
364	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
365	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
366
367	/* This evaluates to 0, which is according to spec. */
368	cc.bits.mps = (PAGE_SIZE >> 13);
369
370	nvme_mmio_write_4(ctrlr, cc, cc.raw);
371	DELAY(5000);
372
373	return (nvme_ctrlr_wait_for_ready(ctrlr));
374}
375
376int
377nvme_ctrlr_reset(struct nvme_controller *ctrlr)
378{
379
380	nvme_ctrlr_disable(ctrlr);
381	return (nvme_ctrlr_enable(ctrlr));
382}
383
384/*
385 * Disable this code for now, since Chatham doesn't support
386 *  AERs so I have no good way to test them.
387 */
388#if 0
389static void
390nvme_async_event_cb(void *arg, const struct nvme_completion *status)
391{
392	struct nvme_controller *ctrlr = arg;
393
394	printf("Asynchronous event occurred.\n");
395
396	/* TODO: decode async event type based on status */
397	/* TODO: check status for any error bits */
398
399	/*
400	 * Repost an asynchronous event request so that it can be
401	 *  used again by the controller.
402	 */
403	nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb,
404	    ctrlr);
405}
406#endif
407
408static int
409nvme_ctrlr_identify(struct nvme_controller *ctrlr)
410{
411	struct mtx		*mtx;
412	struct nvme_completion	cpl;
413	int			status;
414
415	mtx = mtx_pool_find(mtxpool_sleep, &cpl);
416
417	mtx_lock(mtx);
418	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
419	    nvme_ctrlr_cb, &cpl);
420	status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
421	mtx_unlock(mtx);
422	if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
423		printf("nvme_identify_controller failed!\n");
424		return (ENXIO);
425	}
426
427#ifdef CHATHAM2
428	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
429		nvme_chatham_populate_cdata(ctrlr);
430#endif
431
432	return (0);
433}
434
435static int
436nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
437{
438	struct mtx		*mtx;
439	struct nvme_completion	cpl;
440	int			cq_allocated, sq_allocated, status;
441
442	mtx = mtx_pool_find(mtxpool_sleep, &cpl);
443
444	mtx_lock(mtx);
445	nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
446	    nvme_ctrlr_cb, &cpl);
447	status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
448	mtx_unlock(mtx);
449	if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
450		printf("nvme_set_num_queues failed!\n");
451		return (ENXIO);
452	}
453
454	/*
455	 * Data in cdw0 is 0-based.
456	 * Lower 16-bits indicate number of submission queues allocated.
457	 * Upper 16-bits indicate number of completion queues allocated.
458	 */
459	sq_allocated = (cpl.cdw0 & 0xFFFF) + 1;
460	cq_allocated = (cpl.cdw0 >> 16) + 1;
461
462	/*
463	 * Check that the controller was able to allocate the number of
464	 *  queues we requested.  If not, revert to one IO queue.
465	 */
466	if (sq_allocated < ctrlr->num_io_queues ||
467	    cq_allocated < ctrlr->num_io_queues) {
468		ctrlr->num_io_queues = 1;
469		ctrlr->per_cpu_io_queues = 0;
470
471		/* TODO: destroy extra queues that were created
472		 *  previously but now found to be not needed.
473		 */
474	}
475
476	return (0);
477}
478
479static int
480nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
481{
482	struct mtx		*mtx;
483	struct nvme_qpair	*qpair;
484	struct nvme_completion	cpl;
485	int			i, status;
486
487	mtx = mtx_pool_find(mtxpool_sleep, &cpl);
488
489	for (i = 0; i < ctrlr->num_io_queues; i++) {
490		qpair = &ctrlr->ioq[i];
491
492		mtx_lock(mtx);
493		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
494		    nvme_ctrlr_cb, &cpl);
495		status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
496		mtx_unlock(mtx);
497		if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
498			printf("nvme_create_io_cq failed!\n");
499			return (ENXIO);
500		}
501
502		mtx_lock(mtx);
503		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
504		    nvme_ctrlr_cb, &cpl);
505		status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
506		mtx_unlock(mtx);
507		if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
508			printf("nvme_create_io_sq failed!\n");
509			return (ENXIO);
510		}
511	}
512
513	return (0);
514}
515
516static int
517nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
518{
519	struct nvme_namespace	*ns;
520	int			i, status;
521
522	for (i = 0; i < ctrlr->cdata.nn; i++) {
523		ns = &ctrlr->ns[i];
524		status = nvme_ns_construct(ns, i+1, ctrlr);
525		if (status != 0)
526			return (status);
527	}
528
529	return (0);
530}
531
532static void
533nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
534{
535	union nvme_critical_warning_state	state;
536	uint8_t					num_async_events;
537
538	state.raw = 0xFF;
539	state.bits.reserved = 0;
540	nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL);
541
542	/* aerl is a zero-based value, so we need to add 1 here. */
543	num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
544
545	/*
546	 * Disable this code for now, since Chatham doesn't support
547	 *  AERs so I have no good way to test them.
548	 */
549#if 0
550	for (int i = 0; i < num_async_events; i++)
551		nvme_ctrlr_cmd_asynchronous_event_request(ctrlr,
552		    nvme_async_event_cb, ctrlr);
553#endif
554}
555
556static void
557nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
558{
559
560	ctrlr->int_coal_time = 0;
561	TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
562	    &ctrlr->int_coal_time);
563
564	ctrlr->int_coal_threshold = 0;
565	TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
566	    &ctrlr->int_coal_threshold);
567
568	nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
569	    ctrlr->int_coal_threshold, NULL, NULL);
570}
571
572void
573nvme_ctrlr_start(void *ctrlr_arg)
574{
575	struct nvme_controller *ctrlr = ctrlr_arg;
576
577	if (nvme_ctrlr_identify(ctrlr) != 0)
578		goto err;
579
580	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
581		goto err;
582
583	if (nvme_ctrlr_create_qpairs(ctrlr) != 0)
584		goto err;
585
586	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
587		goto err;
588
589	nvme_ctrlr_configure_aer(ctrlr);
590	nvme_ctrlr_configure_int_coalescing(ctrlr);
591
592	ctrlr->is_started = TRUE;
593
594err:
595
596	/*
597	 * Initialize sysctls, even if controller failed to start, to
598	 *  assist with debugging admin queue pair.
599	 */
600	nvme_sysctl_initialize_ctrlr(ctrlr);
601	config_intrhook_disestablish(&ctrlr->config_hook);
602}
603
604static void
605nvme_ctrlr_intx_task(void *arg, int pending)
606{
607	struct nvme_controller *ctrlr = arg;
608
609	nvme_qpair_process_completions(&ctrlr->adminq);
610
611	if (ctrlr->ioq[0].cpl)
612		nvme_qpair_process_completions(&ctrlr->ioq[0]);
613
614	nvme_mmio_write_4(ctrlr, intmc, 1);
615}
616
617static void
618nvme_ctrlr_intx_handler(void *arg)
619{
620	struct nvme_controller *ctrlr = arg;
621
622	nvme_mmio_write_4(ctrlr, intms, 1);
623	taskqueue_enqueue_fast(ctrlr->taskqueue, &ctrlr->task);
624}
625
626static int
627nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
628{
629
630	ctrlr->num_io_queues = 1;
631	ctrlr->per_cpu_io_queues = 0;
632	ctrlr->rid = 0;
633	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
634	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
635
636	if (ctrlr->res == NULL) {
637		device_printf(ctrlr->dev, "unable to allocate shared IRQ\n");
638		return (ENOMEM);
639	}
640
641	bus_setup_intr(ctrlr->dev, ctrlr->res,
642	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
643	    ctrlr, &ctrlr->tag);
644
645	if (ctrlr->tag == NULL) {
646		device_printf(ctrlr->dev,
647		    "unable to setup legacy interrupt handler\n");
648		return (ENOMEM);
649	}
650
651	TASK_INIT(&ctrlr->task, 0, nvme_ctrlr_intx_task, ctrlr);
652	ctrlr->taskqueue = taskqueue_create_fast("nvme_taskq", M_NOWAIT,
653	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
654	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_NET,
655	    "%s intx taskq", device_get_nameunit(ctrlr->dev));
656
657	return (0);
658}
659
660static int
661nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
662    struct thread *td)
663{
664	struct nvme_controller	*ctrlr;
665	struct nvme_completion	cpl;
666	struct mtx		*mtx;
667
668	ctrlr = cdev->si_drv1;
669
670	switch (cmd) {
671	case NVME_IDENTIFY_CONTROLLER:
672#ifdef CHATHAM2
673		/*
674		 * Don't refresh data on Chatham, since Chatham returns
675		 *  garbage on IDENTIFY anyways.
676		 */
677		if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) {
678			memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
679			break;
680		}
681#endif
682		/* Refresh data before returning to user. */
683		mtx = mtx_pool_find(mtxpool_sleep, &cpl);
684		mtx_lock(mtx);
685		nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
686		    nvme_ctrlr_cb, &cpl);
687		msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0);
688		mtx_unlock(mtx);
689		if (cpl.sf_sc || cpl.sf_sct)
690			return (ENXIO);
691		memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
692		break;
693	default:
694		return (ENOTTY);
695	}
696
697	return (0);
698}
699
700static struct cdevsw nvme_ctrlr_cdevsw = {
701	.d_version =	D_VERSION,
702	.d_flags =	0,
703	.d_ioctl =	nvme_ctrlr_ioctl
704};
705
706int
707nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
708{
709	union cap_lo_register	cap_lo;
710	union cap_hi_register	cap_hi;
711	int			num_vectors, per_cpu_io_queues, status = 0;
712
713	ctrlr->dev = dev;
714	ctrlr->is_started = FALSE;
715
716	status = nvme_ctrlr_allocate_bar(ctrlr);
717
718	if (status != 0)
719		return (status);
720
721#ifdef CHATHAM2
722	if (pci_get_devid(dev) == CHATHAM_PCI_ID) {
723		status = nvme_ctrlr_allocate_chatham_bar(ctrlr);
724		if (status != 0)
725			return (status);
726		nvme_ctrlr_setup_chatham(ctrlr);
727	}
728#endif
729
730	/*
731	 * Software emulators may set the doorbell stride to something
732	 *  other than zero, but this driver is not set up to handle that.
733	 */
734	cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi);
735	if (cap_hi.bits.dstrd != 0)
736		return (ENXIO);
737
738	/* Get ready timeout value from controller, in units of 500ms. */
739	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
740	ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500;
741
742	per_cpu_io_queues = 1;
743	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
744	ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE;
745
746	if (ctrlr->per_cpu_io_queues)
747		ctrlr->num_io_queues = mp_ncpus;
748	else
749		ctrlr->num_io_queues = 1;
750
751	ctrlr->force_intx = 0;
752	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
753
754	ctrlr->msix_enabled = 1;
755
756	if (ctrlr->force_intx) {
757		ctrlr->msix_enabled = 0;
758		goto intx;
759	}
760
761	/* One vector per IO queue, plus one vector for admin queue. */
762	num_vectors = ctrlr->num_io_queues + 1;
763
764	if (pci_msix_count(dev) < num_vectors) {
765		ctrlr->msix_enabled = 0;
766		goto intx;
767	}
768
769	if (pci_alloc_msix(dev, &num_vectors) != 0)
770		ctrlr->msix_enabled = 0;
771
772intx:
773
774	if (!ctrlr->msix_enabled)
775		nvme_ctrlr_configure_intx(ctrlr);
776
777	nvme_ctrlr_construct_admin_qpair(ctrlr);
778
779	status = nvme_ctrlr_construct_io_qpairs(ctrlr);
780
781	if (status != 0)
782		return (status);
783
784	ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
785	    "nvme%d", device_get_unit(dev));
786
787	if (ctrlr->cdev == NULL)
788		return (ENXIO);
789
790	ctrlr->cdev->si_drv1 = (void *)ctrlr;
791
792	return (0);
793}
794
795void
796nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
797    struct nvme_request *req)
798{
799
800	nvme_qpair_submit_request(&ctrlr->adminq, req);
801}
802
803void
804nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
805    struct nvme_request *req)
806{
807	struct nvme_qpair       *qpair;
808
809	if (ctrlr->per_cpu_io_queues)
810		qpair = &ctrlr->ioq[curcpu];
811	else
812		qpair = &ctrlr->ioq[0];
813
814	nvme_qpair_submit_request(qpair, req);
815}
816