nvme.c revision 247963
1185377Ssam/*-
2185377Ssam * Copyright (C) 2012 Intel Corporation
3185377Ssam * All rights reserved.
4185377Ssam *
5185377Ssam * Redistribution and use in source and binary forms, with or without
6185377Ssam * modification, are permitted provided that the following conditions
7185377Ssam * are met:
8185377Ssam * 1. Redistributions of source code must retain the above copyright
9185377Ssam *    notice, this list of conditions and the following disclaimer.
10185377Ssam * 2. Redistributions in binary form must reproduce the above copyright
11185377Ssam *    notice, this list of conditions and the following disclaimer in the
12185377Ssam *    documentation and/or other materials provided with the distribution.
13185377Ssam *
14185377Ssam * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15185377Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16185377Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17204644Srpaulo * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18185377Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19185377Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20185377Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21185377Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22185377Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23185377Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24185377Ssam * SUCH DAMAGE.
25185377Ssam */
26185377Ssam
27185377Ssam#include <sys/cdefs.h>
28185377Ssam__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme.c 247963 2013-03-07 22:54:28Z obrien $");
29185377Ssam
30185377Ssam#include <sys/param.h>
31185377Ssam#include <sys/bus.h>
32185377Ssam#include <sys/conf.h>
33185377Ssam#include <sys/module.h>
34185377Ssam
35185377Ssam#include <vm/uma.h>
36185377Ssam
37185377Ssam#include <dev/pci/pcireg.h>
38185377Ssam#include <dev/pci/pcivar.h>
39185377Ssam
40185377Ssam#include "nvme_private.h"
41185377Ssam
42185377Ssamstruct nvme_consumer {
43185377Ssam	nvme_consumer_cb_fn_t		cb_fn;
44185377Ssam	void				*cb_arg;
45185377Ssam};
46185377Ssam
47185377Ssamstruct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
48185377Ssam
49185377Ssamuma_zone_t nvme_request_zone;
50185377Ssam
51185377SsamMALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
52185377Ssam
53185377Ssamstatic int    nvme_probe(device_t);
54185377Ssamstatic int    nvme_attach(device_t);
55185377Ssamstatic int    nvme_detach(device_t);
56185377Ssamstatic int    nvme_modevent(module_t mod, int type, void *arg);
57185377Ssam
58185377Ssamstatic devclass_t nvme_devclass;
59185377Ssam
60185377Ssamstatic device_method_t nvme_pci_methods[] = {
61185377Ssam	/* Device interface */
62185377Ssam	DEVMETHOD(device_probe,     nvme_probe),
63185377Ssam	DEVMETHOD(device_attach,    nvme_attach),
64185377Ssam	DEVMETHOD(device_detach,    nvme_detach),
65185377Ssam	{ 0, 0 }
66185377Ssam};
67185377Ssam
68185377Ssamstatic driver_t nvme_pci_driver = {
69185377Ssam	"nvme",
70185377Ssam	nvme_pci_methods,
71185377Ssam	sizeof(struct nvme_controller),
72185377Ssam};
73185377Ssam
74185377SsamDRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, nvme_modevent, 0);
75185377SsamMODULE_VERSION(nvme, 1);
76185377Ssam
77185377Ssamstatic struct _pcsid
78185377Ssam{
79185377Ssam	u_int32_t   type;
80185377Ssam	const char  *desc;
81185377Ssam} pci_ids[] = {
82185377Ssam	{ 0x01118086,		"NVMe Controller"  },
83185377Ssam	{ CHATHAM_PCI_ID,	"Chatham Prototype NVMe Controller"  },
84185377Ssam	{ IDT32_PCI_ID,		"IDT NVMe Controller (32 channel)"  },
85185377Ssam	{ IDT8_PCI_ID,		"IDT NVMe Controller (8 channel)" },
86185377Ssam	{ 0x00000000,		NULL  }
87185377Ssam};
88185377Ssam
89185377Ssamstatic int
90185377Ssamnvme_probe (device_t device)
91185377Ssam{
92185377Ssam	struct _pcsid	*ep;
93185377Ssam	u_int32_t	type;
94185377Ssam
95185377Ssam	type = pci_get_devid(device);
96185377Ssam	ep = pci_ids;
97185377Ssam
98185377Ssam	while (ep->type && ep->type != type)
99185377Ssam		++ep;
100185377Ssam
101185377Ssam	if (ep->desc) {
102185377Ssam		device_set_desc(device, ep->desc);
103185377Ssam		return (BUS_PROBE_DEFAULT);
104185377Ssam	}
105185377Ssam
106185377Ssam#if defined(PCIS_STORAGE_NVM)
107185377Ssam	if (pci_get_class(device)    == PCIC_STORAGE &&
108185377Ssam	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
109185377Ssam	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
110185377Ssam		device_set_desc(device, "Generic NVMe Device");
111185377Ssam		return (BUS_PROBE_GENERIC);
112185377Ssam	}
113185377Ssam#endif
114185377Ssam
115185377Ssam	return (ENXIO);
116185377Ssam}
117185377Ssam
118185377Ssamstatic void
119185377Ssamnvme_init(void)
120185377Ssam{
121185377Ssam	nvme_request_zone = uma_zcreate("nvme_request",
122185377Ssam	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
123185377Ssam}
124185377Ssam
125185377SsamSYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
126185377Ssam
127185377Ssamstatic void
128185377Ssamnvme_uninit(void)
129185377Ssam{
130185377Ssam	uma_zdestroy(nvme_request_zone);
131185377Ssam}
132185377Ssam
133185377SsamSYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
134185377Ssam
135185377Ssamstatic void
136185377Ssamnvme_load(void)
137185377Ssam{
138185377Ssam}
139185377Ssam
140static void
141nvme_unload(void)
142{
143}
144
145static void
146nvme_shutdown(void)
147{
148	device_t		*devlist;
149	struct nvme_controller	*ctrlr;
150	union cc_register	cc;
151	union csts_register	csts;
152	int			dev, devcount;
153
154	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
155		return;
156
157	for (dev = 0; dev < devcount; dev++) {
158		/*
159		 * Only notify controller of shutdown when a real shutdown is
160		 *  in process, not when a module unload occurs.  It seems at
161		 *  least some controllers (Chatham at least) don't let you
162		 *  re-enable the controller after shutdown notification has
163		 *  been received.
164		 */
165		ctrlr = DEVICE2SOFTC(devlist[dev]);
166		cc.raw = nvme_mmio_read_4(ctrlr, cc);
167		cc.bits.shn = NVME_SHN_NORMAL;
168		nvme_mmio_write_4(ctrlr, cc, cc.raw);
169		csts.raw = nvme_mmio_read_4(ctrlr, csts);
170		while (csts.bits.shst != NVME_SHST_COMPLETE) {
171			DELAY(5);
172			csts.raw = nvme_mmio_read_4(ctrlr, csts);
173		}
174	}
175
176	free(devlist, M_TEMP);
177}
178
179static int
180nvme_modevent(module_t mod, int type, void *arg)
181{
182
183	switch (type) {
184	case MOD_LOAD:
185		nvme_load();
186		break;
187	case MOD_UNLOAD:
188		nvme_unload();
189		break;
190	case MOD_SHUTDOWN:
191		nvme_shutdown();
192		break;
193	default:
194		break;
195	}
196
197	return (0);
198}
199
200void
201nvme_dump_command(struct nvme_command *cmd)
202{
203	printf(
204"opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n",
205	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
206	    cmd->rsvd2, cmd->rsvd3,
207	    (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1, (uintmax_t)cmd->prp2,
208	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
209	    cmd->cdw15);
210}
211
212void
213nvme_dump_completion(struct nvme_completion *cpl)
214{
215	printf("cdw0:%08x sqhd:%04x sqid:%04x "
216	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
217	    cpl->cdw0, cpl->sqhd, cpl->sqid,
218	    cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
219	    cpl->sf_dnr);
220}
221
222void
223nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
224{
225	struct nvme_tracker 	*tr = arg;
226	uint32_t		cur_nseg;
227
228	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
229
230	/*
231	 * Note that we specified PAGE_SIZE for alignment and max
232	 *  segment size when creating the bus dma tags.  So here
233	 *  we can safely just transfer each segment to its
234	 *  associated PRP entry.
235	 */
236	tr->req->cmd.prp1 = seg[0].ds_addr;
237
238	if (nseg == 2) {
239		tr->req->cmd.prp2 = seg[1].ds_addr;
240	} else if (nseg > 2) {
241		cur_nseg = 1;
242		tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr;
243		while (cur_nseg < nseg) {
244			tr->prp[cur_nseg-1] =
245			    (uint64_t)seg[cur_nseg].ds_addr;
246			cur_nseg++;
247		}
248	}
249
250	nvme_qpair_submit_cmd(tr->qpair, tr);
251}
252
253static int
254nvme_attach(device_t dev)
255{
256	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
257	int			status;
258
259	status = nvme_ctrlr_construct(ctrlr, dev);
260
261	if (status != 0)
262		return (status);
263
264	/*
265	 * Reset controller twice to ensure we do a transition from cc.en==1
266	 *  to cc.en==0.  This is because we don't really know what status
267	 *  the controller was left in when boot handed off to OS.
268	 */
269	status = nvme_ctrlr_reset(ctrlr);
270	if (status != 0)
271		return (status);
272
273	status = nvme_ctrlr_reset(ctrlr);
274	if (status != 0)
275		return (status);
276
277	ctrlr->config_hook.ich_func = nvme_ctrlr_start;
278	ctrlr->config_hook.ich_arg = ctrlr;
279
280	config_intrhook_establish(&ctrlr->config_hook);
281
282	return (0);
283}
284
285static int
286nvme_detach (device_t dev)
287{
288	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
289	struct nvme_namespace	*ns;
290	int			i;
291
292	for (i = 0; i < NVME_MAX_NAMESPACES; i++) {
293		ns = &ctrlr->ns[i];
294		if (ns->cdev)
295			destroy_dev(ns->cdev);
296	}
297
298	if (ctrlr->cdev)
299		destroy_dev(ctrlr->cdev);
300
301	for (i = 0; i < ctrlr->num_io_queues; i++) {
302		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
303	}
304
305	free(ctrlr->ioq, M_NVME);
306
307	nvme_admin_qpair_destroy(&ctrlr->adminq);
308
309	if (ctrlr->resource != NULL) {
310		bus_release_resource(dev, SYS_RES_MEMORY,
311		    ctrlr->resource_id, ctrlr->resource);
312	}
313
314	if (ctrlr->bar4_resource != NULL) {
315		bus_release_resource(dev, SYS_RES_MEMORY,
316		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
317	}
318
319#ifdef CHATHAM2
320	if (ctrlr->chatham_resource != NULL) {
321		bus_release_resource(dev, SYS_RES_MEMORY,
322		    ctrlr->chatham_resource_id, ctrlr->chatham_resource);
323	}
324#endif
325
326	if (ctrlr->tag)
327		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
328
329	if (ctrlr->res)
330		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
331		    rman_get_rid(ctrlr->res), ctrlr->res);
332
333	if (ctrlr->msix_enabled)
334		pci_release_msi(dev);
335
336	return (0);
337}
338
339static void
340nvme_notify_consumer(struct nvme_consumer *consumer)
341{
342	device_t		*devlist;
343	struct nvme_controller	*ctrlr;
344	int			dev, ns, devcount;
345
346	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
347		return;
348
349	for (dev = 0; dev < devcount; dev++) {
350		ctrlr = DEVICE2SOFTC(devlist[dev]);
351		for (ns = 0; ns < ctrlr->cdata.nn; ns++)
352			(*consumer->cb_fn)(consumer->cb_arg, &ctrlr->ns[ns]);
353	}
354
355	free(devlist, M_TEMP);
356}
357
358struct nvme_consumer *
359nvme_register_consumer(nvme_consumer_cb_fn_t cb_fn, void *cb_arg)
360{
361	int i;
362
363	/*
364	 * TODO: add locking around consumer registration.  Not an issue
365	 *  right now since we only have one nvme consumer - nvd(4).
366	 */
367	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
368		if (nvme_consumer[i].cb_fn == NULL) {
369			nvme_consumer[i].cb_fn = cb_fn;
370			nvme_consumer[i].cb_arg = cb_arg;
371
372			nvme_notify_consumer(&nvme_consumer[i]);
373			return (&nvme_consumer[i]);
374		}
375
376	printf("nvme(4): consumer not registered - no slots available\n");
377	return (NULL);
378}
379
380void
381nvme_unregister_consumer(struct nvme_consumer *consumer)
382{
383
384	consumer->cb_fn = NULL;
385	consumer->cb_arg = NULL;
386}
387
388