nvme_qpair.c revision 248734
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 248734 2013-03-26 18:29:04Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32
33#include <dev/pci/pcivar.h>
34
35#include "nvme_private.h"
36
37static void	_nvme_qpair_submit_request(struct nvme_qpair *qpair,
38					   struct nvme_request *req);
39
40static boolean_t
41nvme_completion_is_error(struct nvme_completion *cpl)
42{
43
44	return (cpl->sf_sc != 0 || cpl->sf_sct != 0);
45}
46
47static boolean_t
48nvme_completion_is_retry(const struct nvme_completion *cpl)
49{
50	/*
51	 * TODO: spec is not clear how commands that are aborted due
52	 *  to TLER will be marked.  So for now, it seems
53	 *  NAMESPACE_NOT_READY is the only case where we should
54	 *  look at the DNR bit.
55	 */
56	switch (cpl->sf_sct) {
57	case NVME_SCT_GENERIC:
58		switch (cpl->sf_sc) {
59		case NVME_SC_ABORTED_BY_REQUEST:
60			return (1);
61		case NVME_SC_NAMESPACE_NOT_READY:
62			if (cpl->sf_dnr)
63				return (0);
64			else
65				return (1);
66		case NVME_SC_INVALID_OPCODE:
67		case NVME_SC_INVALID_FIELD:
68		case NVME_SC_COMMAND_ID_CONFLICT:
69		case NVME_SC_DATA_TRANSFER_ERROR:
70		case NVME_SC_ABORTED_POWER_LOSS:
71		case NVME_SC_INTERNAL_DEVICE_ERROR:
72		case NVME_SC_ABORTED_SQ_DELETION:
73		case NVME_SC_ABORTED_FAILED_FUSED:
74		case NVME_SC_ABORTED_MISSING_FUSED:
75		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
76		case NVME_SC_COMMAND_SEQUENCE_ERROR:
77		case NVME_SC_LBA_OUT_OF_RANGE:
78		case NVME_SC_CAPACITY_EXCEEDED:
79		default:
80			return (0);
81		}
82	case NVME_SCT_COMMAND_SPECIFIC:
83	case NVME_SCT_MEDIA_ERROR:
84	case NVME_SCT_VENDOR_SPECIFIC:
85	default:
86		return (0);
87	}
88}
89
90static void
91nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
92    uint16_t cid)
93{
94
95	bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
96	bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
97
98	bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
99	    sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
100
101	callout_init_mtx(&tr->timer, &qpair->lock, 0);
102	tr->cid = cid;
103	tr->qpair = qpair;
104}
105
106static void
107nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
108    struct nvme_completion *cpl, boolean_t print_on_error)
109{
110	struct nvme_request	*req;
111	boolean_t		retry, error;
112
113	req = tr->req;
114	error = nvme_completion_is_error(cpl);
115	retry = error && nvme_completion_is_retry(cpl);
116
117	if (error && print_on_error) {
118		nvme_dump_completion(cpl);
119		nvme_dump_command(&req->cmd);
120	}
121
122	qpair->act_tr[cpl->cid] = NULL;
123
124	KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
125
126	if (req->cb_fn && !retry)
127		req->cb_fn(req->cb_arg, cpl);
128
129	mtx_lock(&qpair->lock);
130	callout_stop(&tr->timer);
131
132	if (retry)
133		nvme_qpair_submit_cmd(qpair, tr);
134	else {
135		if (req->payload_size > 0 || req->uio != NULL)
136			bus_dmamap_unload(qpair->dma_tag,
137			    tr->payload_dma_map);
138
139		nvme_free_request(req);
140
141		SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
142
143		if (!STAILQ_EMPTY(&qpair->queued_req)) {
144			req = STAILQ_FIRST(&qpair->queued_req);
145			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
146			_nvme_qpair_submit_request(qpair, req);
147		}
148	}
149
150	mtx_unlock(&qpair->lock);
151}
152
153void
154nvme_qpair_process_completions(struct nvme_qpair *qpair)
155{
156	struct nvme_tracker	*tr;
157	struct nvme_completion	*cpl;
158
159	qpair->num_intr_handler_calls++;
160
161	while (1) {
162		cpl = &qpair->cpl[qpair->cq_head];
163
164		if (cpl->p != qpair->phase)
165			break;
166
167		tr = qpair->act_tr[cpl->cid];
168
169		if (tr != NULL) {
170			nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE);
171			qpair->sq_head = cpl->sqhd;
172		} else {
173			printf("cpl does not map to outstanding cmd\n");
174			nvme_dump_completion(cpl);
175			KASSERT(0, ("received completion for unknown cmd\n"));
176		}
177
178		if (++qpair->cq_head == qpair->num_entries) {
179			qpair->cq_head = 0;
180			qpair->phase = !qpair->phase;
181		}
182
183		nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
184		    qpair->cq_head);
185	}
186}
187
188static void
189nvme_qpair_msix_handler(void *arg)
190{
191	struct nvme_qpair *qpair = arg;
192
193	nvme_qpair_process_completions(qpair);
194}
195
196void
197nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
198    uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
199    uint32_t max_xfer_size, struct nvme_controller *ctrlr)
200{
201	struct nvme_tracker	*tr;
202	uint32_t		i;
203
204	qpair->id = id;
205	qpair->vector = vector;
206	qpair->num_entries = num_entries;
207#ifdef CHATHAM2
208	/*
209	 * Chatham prototype board starts having issues at higher queue
210	 *  depths.  So use a conservative estimate here of no more than 64
211	 *  outstanding I/O per queue at any one point.
212	 */
213	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
214		num_trackers = min(num_trackers, 64);
215#endif
216	qpair->num_trackers = num_trackers;
217	qpair->max_xfer_size = max_xfer_size;
218	qpair->ctrlr = ctrlr;
219
220	/*
221	 * First time through the completion queue, HW will set phase
222	 *  bit on completions to 1.  So set this to 1 here, indicating
223	 *  we're looking for a 1 to know which entries have completed.
224	 *  we'll toggle the bit each time when the completion queue
225	 *  rolls over.
226	 */
227	qpair->phase = 1;
228
229	if (ctrlr->msix_enabled) {
230
231		/*
232		 * MSI-X vector resource IDs start at 1, so we add one to
233		 *  the queue's vector to get the corresponding rid to use.
234		 */
235		qpair->rid = vector + 1;
236
237		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
238		    &qpair->rid, RF_ACTIVE);
239
240		bus_setup_intr(ctrlr->dev, qpair->res,
241		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
242		    nvme_qpair_msix_handler, qpair, &qpair->tag);
243	}
244
245	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
246
247	bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
248	    sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
249	    BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
250	    (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
251	    NULL, NULL, &qpair->dma_tag);
252
253	qpair->num_cmds = 0;
254	qpair->num_intr_handler_calls = 0;
255	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
256
257	/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
258	qpair->cmd = contigmalloc(qpair->num_entries *
259	    sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
260	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
261	qpair->cpl = contigmalloc(qpair->num_entries *
262	    sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
263	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
264
265	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
266	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
267
268	bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
269	    qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
270	    nvme_single_map, &qpair->cmd_bus_addr, 0);
271	bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
272	    qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
273	    nvme_single_map, &qpair->cpl_bus_addr, 0);
274
275	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
276	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
277
278	SLIST_INIT(&qpair->free_tr);
279	STAILQ_INIT(&qpair->queued_req);
280
281	for (i = 0; i < qpair->num_trackers; i++) {
282		tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT);
283
284		if (tr == NULL) {
285			printf("warning: nvme tracker malloc failed\n");
286			break;
287		}
288
289		nvme_qpair_construct_tracker(qpair, tr, i);
290		SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
291	}
292
293	qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
294	    M_NVME, M_ZERO | M_NOWAIT);
295}
296
297static void
298nvme_qpair_destroy(struct nvme_qpair *qpair)
299{
300	struct nvme_tracker *tr;
301
302	if (qpair->tag)
303		bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
304
305	if (qpair->res)
306		bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
307		    rman_get_rid(qpair->res), qpair->res);
308
309	if (qpair->dma_tag)
310		bus_dma_tag_destroy(qpair->dma_tag);
311
312	if (qpair->act_tr)
313		free(qpair->act_tr, M_NVME);
314
315	while (!SLIST_EMPTY(&qpair->free_tr)) {
316		tr = SLIST_FIRST(&qpair->free_tr);
317		SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
318		bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
319		bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
320		free(tr, M_NVME);
321	}
322}
323
324void
325nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
326{
327
328	/*
329	 * For NVMe, you don't send delete queue commands for the admin
330	 *  queue, so we just need to unload and free the cmd and cpl memory.
331	 */
332	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
333	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
334
335	contigfree(qpair->cmd,
336	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
337
338	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
339	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
340	contigfree(qpair->cpl,
341	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
342
343	nvme_qpair_destroy(qpair);
344}
345
346static void
347nvme_free_cmd_ring(void *arg, const struct nvme_completion *status)
348{
349	struct nvme_qpair *qpair;
350
351	qpair = (struct nvme_qpair *)arg;
352	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
353	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
354	contigfree(qpair->cmd,
355	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
356	qpair->cmd = NULL;
357}
358
359static void
360nvme_free_cpl_ring(void *arg, const struct nvme_completion *status)
361{
362	struct nvme_qpair *qpair;
363
364	qpair = (struct nvme_qpair *)arg;
365	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
366	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
367	contigfree(qpair->cpl,
368	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
369	qpair->cpl = NULL;
370}
371
372void
373nvme_io_qpair_destroy(struct nvme_qpair *qpair)
374{
375	struct nvme_controller *ctrlr = qpair->ctrlr;
376
377	if (qpair->num_entries > 0) {
378
379		nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring,
380		    qpair);
381		/* Spin until free_cmd_ring sets qpair->cmd to NULL. */
382		while (qpair->cmd)
383			DELAY(5);
384
385		nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring,
386		    qpair);
387		/* Spin until free_cpl_ring sets qpair->cmd to NULL. */
388		while (qpair->cpl)
389			DELAY(5);
390
391		nvme_qpair_destroy(qpair);
392	}
393}
394
395static void
396nvme_abort_complete(void *arg, const struct nvme_completion *status)
397{
398	struct nvme_completion	cpl;
399	struct nvme_tracker	*tr = arg;
400
401	/*
402	 * If cdw0 == 1, the controller was not able to abort the command
403	 *  we requested.  We still need to check the active tracker array,
404	 *  to cover race where I/O timed out at same time controller was
405	 *  completing the I/O.
406	 */
407	if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
408		/*
409		 * An I/O has timed out, and the controller was unable to
410		 *  abort it for some reason.  Construct a fake completion
411		 *  status, and then complete the I/O's tracker manually.
412		 */
413		printf("abort command failed, aborting command manually\n");
414		memset(&cpl, 0, sizeof(cpl));
415		cpl.sqid = tr->qpair->id;
416		cpl.cid = tr->cid;
417		cpl.sf_sct = NVME_SCT_GENERIC;
418		cpl.sf_sc = NVME_SC_ABORTED_BY_REQUEST;
419		nvme_qpair_complete_tracker(tr->qpair, tr, &cpl, TRUE);
420	}
421}
422
423static void
424nvme_timeout(void *arg)
425{
426	struct nvme_tracker	*tr = arg;
427
428	nvme_ctrlr_cmd_abort(tr->qpair->ctrlr, tr->cid, tr->qpair->id,
429	    nvme_abort_complete, tr);
430}
431
432void
433nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
434{
435	struct nvme_request *req;
436
437	req = tr->req;
438	req->cmd.cid = tr->cid;
439	qpair->act_tr[tr->cid] = tr;
440
441#if __FreeBSD_version >= 800030
442	callout_reset_curcpu(&tr->timer, NVME_TIMEOUT_IN_SEC * hz,
443	    nvme_timeout, tr);
444#else
445	callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr);
446#endif
447
448	/* Copy the command from the tracker to the submission queue. */
449	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
450
451	if (++qpair->sq_tail == qpair->num_entries)
452		qpair->sq_tail = 0;
453
454	wmb();
455	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
456	    qpair->sq_tail);
457
458	qpair->num_cmds++;
459}
460
461static void
462_nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
463{
464	struct nvme_tracker	*tr;
465	int			err;
466
467	mtx_assert(&qpair->lock, MA_OWNED);
468
469	tr = SLIST_FIRST(&qpair->free_tr);
470
471	if (tr == NULL) {
472		/*
473		 * No tracker is available.  Put the request on the qpair's
474		 *  request queue to be processed when a tracker frees up
475		 *  via a command completion.
476		 */
477		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
478		return;
479	}
480
481	SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
482	tr->req = req;
483
484	if (req->uio == NULL) {
485		if (req->payload_size > 0) {
486			err = bus_dmamap_load(tr->qpair->dma_tag,
487					      tr->payload_dma_map, req->payload,
488					      req->payload_size,
489					      nvme_payload_map, tr, 0);
490			if (err != 0)
491				panic("bus_dmamap_load returned non-zero!\n");
492		} else
493			nvme_qpair_submit_cmd(tr->qpair, tr);
494	} else {
495		err = bus_dmamap_load_uio(tr->qpair->dma_tag,
496					  tr->payload_dma_map, req->uio,
497					  nvme_payload_map_uio, tr, 0);
498		if (err != 0)
499			panic("bus_dmamap_load returned non-zero!\n");
500	}
501}
502
503void
504nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
505{
506
507	mtx_lock(&qpair->lock);
508	_nvme_qpair_submit_request(qpair, req);
509	mtx_unlock(&qpair->lock);
510}
511