nvme.c revision 248736
10Sstevel@tonic-gate/*-
20Sstevel@tonic-gate * Copyright (C) 2012 Intel Corporation
30Sstevel@tonic-gate * All rights reserved.
40Sstevel@tonic-gate *
50Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
60Sstevel@tonic-gate * modification, are permitted provided that the following conditions
70Sstevel@tonic-gate * are met:
80Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
90Sstevel@tonic-gate *    notice, this list of conditions and the following disclaimer.
100Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
110Sstevel@tonic-gate *    notice, this list of conditions and the following disclaimer in the
120Sstevel@tonic-gate *    documentation and/or other materials provided with the distribution.
130Sstevel@tonic-gate *
140Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
150Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
160Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
170Sstevel@tonic-gate * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
180Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
190Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
200Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
210Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
220Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
230Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
240Sstevel@tonic-gate * SUCH DAMAGE.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate#include <sys/cdefs.h>
280Sstevel@tonic-gate__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme.c 248736 2013-03-26 18:34:19Z jimharris $");
290Sstevel@tonic-gate
300Sstevel@tonic-gate#include <sys/param.h>
310Sstevel@tonic-gate#include <sys/bus.h>
320Sstevel@tonic-gate#include <sys/conf.h>
330Sstevel@tonic-gate#include <sys/module.h>
340Sstevel@tonic-gate
350Sstevel@tonic-gate#include <vm/uma.h>
360Sstevel@tonic-gate
370Sstevel@tonic-gate#include <dev/pci/pcireg.h>
380Sstevel@tonic-gate#include <dev/pci/pcivar.h>
390Sstevel@tonic-gate
400Sstevel@tonic-gate#include "nvme_private.h"
410Sstevel@tonic-gate
420Sstevel@tonic-gatestruct nvme_consumer {
430Sstevel@tonic-gate	nvme_consumer_cb_fn_t		cb_fn;
440Sstevel@tonic-gate	void				*cb_arg;
450Sstevel@tonic-gate};
460Sstevel@tonic-gate
470Sstevel@tonic-gatestruct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
480Sstevel@tonic-gate
490Sstevel@tonic-gateuma_zone_t nvme_request_zone;
500Sstevel@tonic-gate
510Sstevel@tonic-gateMALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
520Sstevel@tonic-gate
530Sstevel@tonic-gatestatic int    nvme_probe(device_t);
540Sstevel@tonic-gatestatic int    nvme_attach(device_t);
550Sstevel@tonic-gatestatic int    nvme_detach(device_t);
560Sstevel@tonic-gatestatic int    nvme_modevent(module_t mod, int type, void *arg);
570Sstevel@tonic-gate
580Sstevel@tonic-gatestatic devclass_t nvme_devclass;
590Sstevel@tonic-gate
600Sstevel@tonic-gatestatic device_method_t nvme_pci_methods[] = {
610Sstevel@tonic-gate	/* Device interface */
620Sstevel@tonic-gate	DEVMETHOD(device_probe,     nvme_probe),
630Sstevel@tonic-gate	DEVMETHOD(device_attach,    nvme_attach),
640Sstevel@tonic-gate	DEVMETHOD(device_detach,    nvme_detach),
650Sstevel@tonic-gate	{ 0, 0 }
660Sstevel@tonic-gate};
670Sstevel@tonic-gate
680Sstevel@tonic-gatestatic driver_t nvme_pci_driver = {
690Sstevel@tonic-gate	"nvme",
700Sstevel@tonic-gate	nvme_pci_methods,
710Sstevel@tonic-gate	sizeof(struct nvme_controller),
720Sstevel@tonic-gate};
730Sstevel@tonic-gate
740Sstevel@tonic-gateDRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, nvme_modevent, 0);
750Sstevel@tonic-gateMODULE_VERSION(nvme, 1);
760Sstevel@tonic-gate
770Sstevel@tonic-gatestatic struct _pcsid
780Sstevel@tonic-gate{
790Sstevel@tonic-gate	u_int32_t   type;
800Sstevel@tonic-gate	const char  *desc;
810Sstevel@tonic-gate} pci_ids[] = {
820Sstevel@tonic-gate	{ 0x01118086,		"NVMe Controller"  },
830Sstevel@tonic-gate	{ CHATHAM_PCI_ID,	"Chatham Prototype NVMe Controller"  },
840Sstevel@tonic-gate	{ IDT32_PCI_ID,		"IDT NVMe Controller (32 channel)"  },
850Sstevel@tonic-gate	{ IDT8_PCI_ID,		"IDT NVMe Controller (8 channel)" },
860Sstevel@tonic-gate	{ 0x00000000,		NULL  }
870Sstevel@tonic-gate};
880Sstevel@tonic-gate
890Sstevel@tonic-gatestatic int
900Sstevel@tonic-gatenvme_probe (device_t device)
910Sstevel@tonic-gate{
920Sstevel@tonic-gate	struct _pcsid	*ep;
930Sstevel@tonic-gate	u_int32_t	type;
940Sstevel@tonic-gate
950Sstevel@tonic-gate	type = pci_get_devid(device);
960Sstevel@tonic-gate	ep = pci_ids;
970Sstevel@tonic-gate
980Sstevel@tonic-gate	while (ep->type && ep->type != type)
990Sstevel@tonic-gate		++ep;
1000Sstevel@tonic-gate
1010Sstevel@tonic-gate	if (ep->desc) {
1020Sstevel@tonic-gate		device_set_desc(device, ep->desc);
1030Sstevel@tonic-gate		return (BUS_PROBE_DEFAULT);
1040Sstevel@tonic-gate	}
1050Sstevel@tonic-gate
1060Sstevel@tonic-gate#if defined(PCIS_STORAGE_NVM)
1070Sstevel@tonic-gate	if (pci_get_class(device)    == PCIC_STORAGE &&
1080Sstevel@tonic-gate	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
1090Sstevel@tonic-gate	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
1100Sstevel@tonic-gate		device_set_desc(device, "Generic NVMe Device");
1110Sstevel@tonic-gate		return (BUS_PROBE_GENERIC);
1120Sstevel@tonic-gate	}
1130Sstevel@tonic-gate#endif
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate	return (ENXIO);
1160Sstevel@tonic-gate}
1170Sstevel@tonic-gate
1180Sstevel@tonic-gatestatic void
1190Sstevel@tonic-gatenvme_init(void)
1200Sstevel@tonic-gate{
1210Sstevel@tonic-gate	nvme_request_zone = uma_zcreate("nvme_request",
1220Sstevel@tonic-gate	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
1230Sstevel@tonic-gate}
1240Sstevel@tonic-gate
1250Sstevel@tonic-gateSYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
1260Sstevel@tonic-gate
1270Sstevel@tonic-gatestatic void
1280Sstevel@tonic-gatenvme_uninit(void)
1290Sstevel@tonic-gate{
1300Sstevel@tonic-gate	uma_zdestroy(nvme_request_zone);
1310Sstevel@tonic-gate}
1320Sstevel@tonic-gate
1330Sstevel@tonic-gateSYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
1340Sstevel@tonic-gate
1350Sstevel@tonic-gatestatic void
1360Sstevel@tonic-gatenvme_load(void)
1370Sstevel@tonic-gate{
1380Sstevel@tonic-gate}
1390Sstevel@tonic-gate
1400Sstevel@tonic-gatestatic void
1410Sstevel@tonic-gatenvme_unload(void)
1420Sstevel@tonic-gate{
1430Sstevel@tonic-gate}
1440Sstevel@tonic-gate
1450Sstevel@tonic-gatestatic void
1460Sstevel@tonic-gatenvme_shutdown(void)
1470Sstevel@tonic-gate{
1480Sstevel@tonic-gate	device_t		*devlist;
1490Sstevel@tonic-gate	struct nvme_controller	*ctrlr;
1500Sstevel@tonic-gate	union cc_register	cc;
1510Sstevel@tonic-gate	union csts_register	csts;
1520Sstevel@tonic-gate	int			dev, devcount;
1530Sstevel@tonic-gate
1540Sstevel@tonic-gate	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
1550Sstevel@tonic-gate		return;
1560Sstevel@tonic-gate
1570Sstevel@tonic-gate	for (dev = 0; dev < devcount; dev++) {
1580Sstevel@tonic-gate		/*
1590Sstevel@tonic-gate		 * Only notify controller of shutdown when a real shutdown is
1600Sstevel@tonic-gate		 *  in process, not when a module unload occurs.  It seems at
1610Sstevel@tonic-gate		 *  least some controllers (Chatham at least) don't let you
1620Sstevel@tonic-gate		 *  re-enable the controller after shutdown notification has
1630Sstevel@tonic-gate		 *  been received.
1640Sstevel@tonic-gate		 */
1650Sstevel@tonic-gate		ctrlr = DEVICE2SOFTC(devlist[dev]);
1660Sstevel@tonic-gate		cc.raw = nvme_mmio_read_4(ctrlr, cc);
1670Sstevel@tonic-gate		cc.bits.shn = NVME_SHN_NORMAL;
1680Sstevel@tonic-gate		nvme_mmio_write_4(ctrlr, cc, cc.raw);
1690Sstevel@tonic-gate		csts.raw = nvme_mmio_read_4(ctrlr, csts);
1700Sstevel@tonic-gate		while (csts.bits.shst != NVME_SHST_COMPLETE) {
1710Sstevel@tonic-gate			DELAY(5);
1720Sstevel@tonic-gate			csts.raw = nvme_mmio_read_4(ctrlr, csts);
1730Sstevel@tonic-gate		}
1740Sstevel@tonic-gate	}
1750Sstevel@tonic-gate
1760Sstevel@tonic-gate	free(devlist, M_TEMP);
177}
178
179static int
180nvme_modevent(module_t mod, int type, void *arg)
181{
182
183	switch (type) {
184	case MOD_LOAD:
185		nvme_load();
186		break;
187	case MOD_UNLOAD:
188		nvme_unload();
189		break;
190	case MOD_SHUTDOWN:
191		nvme_shutdown();
192		break;
193	default:
194		break;
195	}
196
197	return (0);
198}
199
200void
201nvme_dump_command(struct nvme_command *cmd)
202{
203	printf(
204"opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n",
205	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
206	    cmd->rsvd2, cmd->rsvd3,
207	    (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1, (uintmax_t)cmd->prp2,
208	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
209	    cmd->cdw15);
210}
211
212void
213nvme_dump_completion(struct nvme_completion *cpl)
214{
215	printf("cdw0:%08x sqhd:%04x sqid:%04x "
216	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
217	    cpl->cdw0, cpl->sqhd, cpl->sqid,
218	    cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
219	    cpl->sf_dnr);
220}
221
222void
223nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
224{
225	struct nvme_tracker 	*tr = arg;
226	uint32_t		cur_nseg;
227
228	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
229
230	/*
231	 * Note that we specified PAGE_SIZE for alignment and max
232	 *  segment size when creating the bus dma tags.  So here
233	 *  we can safely just transfer each segment to its
234	 *  associated PRP entry.
235	 */
236	tr->req->cmd.prp1 = seg[0].ds_addr;
237
238	if (nseg == 2) {
239		tr->req->cmd.prp2 = seg[1].ds_addr;
240	} else if (nseg > 2) {
241		cur_nseg = 1;
242		tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr;
243		while (cur_nseg < nseg) {
244			tr->prp[cur_nseg-1] =
245			    (uint64_t)seg[cur_nseg].ds_addr;
246			cur_nseg++;
247		}
248	}
249
250	nvme_qpair_submit_cmd(tr->qpair, tr);
251}
252
253static int
254nvme_attach(device_t dev)
255{
256	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
257	int			status;
258
259	status = nvme_ctrlr_construct(ctrlr, dev);
260
261	if (status != 0)
262		return (status);
263
264	/*
265	 * Reset controller twice to ensure we do a transition from cc.en==1
266	 *  to cc.en==0.  This is because we don't really know what status
267	 *  the controller was left in when boot handed off to OS.
268	 */
269	status = nvme_ctrlr_reset(ctrlr);
270	if (status != 0)
271		return (status);
272
273	status = nvme_ctrlr_reset(ctrlr);
274	if (status != 0)
275		return (status);
276
277	ctrlr->config_hook.ich_func = nvme_ctrlr_start;
278	ctrlr->config_hook.ich_arg = ctrlr;
279
280	config_intrhook_establish(&ctrlr->config_hook);
281
282	return (0);
283}
284
285static int
286nvme_detach (device_t dev)
287{
288	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
289
290	nvme_ctrlr_destruct(ctrlr, dev);
291	return (0);
292}
293
294static void
295nvme_notify_consumer(struct nvme_consumer *consumer)
296{
297	device_t		*devlist;
298	struct nvme_controller	*ctrlr;
299	int			dev, ns, devcount;
300
301	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
302		return;
303
304	for (dev = 0; dev < devcount; dev++) {
305		ctrlr = DEVICE2SOFTC(devlist[dev]);
306		for (ns = 0; ns < ctrlr->cdata.nn; ns++)
307			(*consumer->cb_fn)(consumer->cb_arg, &ctrlr->ns[ns]);
308	}
309
310	free(devlist, M_TEMP);
311}
312
313struct nvme_consumer *
314nvme_register_consumer(nvme_consumer_cb_fn_t cb_fn, void *cb_arg)
315{
316	int i;
317
318	/*
319	 * TODO: add locking around consumer registration.  Not an issue
320	 *  right now since we only have one nvme consumer - nvd(4).
321	 */
322	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
323		if (nvme_consumer[i].cb_fn == NULL) {
324			nvme_consumer[i].cb_fn = cb_fn;
325			nvme_consumer[i].cb_arg = cb_arg;
326
327			nvme_notify_consumer(&nvme_consumer[i]);
328			return (&nvme_consumer[i]);
329		}
330
331	printf("nvme(4): consumer not registered - no slots available\n");
332	return (NULL);
333}
334
335void
336nvme_unregister_consumer(struct nvme_consumer *consumer)
337{
338
339	consumer->cb_fn = NULL;
340	consumer->cb_arg = NULL;
341}
342
343