nvme_qpair.c revision 248741
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 248741 2013-03-26 18:45:16Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32
33#include <dev/pci/pcivar.h>
34
35#include "nvme_private.h"
36
37static void	_nvme_qpair_submit_request(struct nvme_qpair *qpair,
38					   struct nvme_request *req);
39
40static boolean_t
41nvme_completion_is_error(struct nvme_completion *cpl)
42{
43
44	return (cpl->sf_sc != 0 || cpl->sf_sct != 0);
45}
46
47static boolean_t
48nvme_completion_is_retry(const struct nvme_completion *cpl)
49{
50	/*
51	 * TODO: spec is not clear how commands that are aborted due
52	 *  to TLER will be marked.  So for now, it seems
53	 *  NAMESPACE_NOT_READY is the only case where we should
54	 *  look at the DNR bit.
55	 */
56	switch (cpl->sf_sct) {
57	case NVME_SCT_GENERIC:
58		switch (cpl->sf_sc) {
59		case NVME_SC_ABORTED_BY_REQUEST:
60			return (1);
61		case NVME_SC_NAMESPACE_NOT_READY:
62			if (cpl->sf_dnr)
63				return (0);
64			else
65				return (1);
66		case NVME_SC_INVALID_OPCODE:
67		case NVME_SC_INVALID_FIELD:
68		case NVME_SC_COMMAND_ID_CONFLICT:
69		case NVME_SC_DATA_TRANSFER_ERROR:
70		case NVME_SC_ABORTED_POWER_LOSS:
71		case NVME_SC_INTERNAL_DEVICE_ERROR:
72		case NVME_SC_ABORTED_SQ_DELETION:
73		case NVME_SC_ABORTED_FAILED_FUSED:
74		case NVME_SC_ABORTED_MISSING_FUSED:
75		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
76		case NVME_SC_COMMAND_SEQUENCE_ERROR:
77		case NVME_SC_LBA_OUT_OF_RANGE:
78		case NVME_SC_CAPACITY_EXCEEDED:
79		default:
80			return (0);
81		}
82	case NVME_SCT_COMMAND_SPECIFIC:
83	case NVME_SCT_MEDIA_ERROR:
84	case NVME_SCT_VENDOR_SPECIFIC:
85	default:
86		return (0);
87	}
88}
89
90static struct nvme_tracker *
91nvme_qpair_find_tracker(struct nvme_qpair *qpair, struct nvme_request *req)
92{
93	struct nvme_tracker	*tr;
94	uint32_t		i;
95
96	KASSERT(req != NULL, ("%s: called with NULL req\n", __func__));
97
98	for (i = 0; i < qpair->num_entries; ++i) {
99		tr = qpair->act_tr[i];
100		if (tr != NULL && tr->req == req)
101			return (tr);
102	}
103
104	return (NULL);
105}
106
107static void
108nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
109    uint16_t cid)
110{
111
112	bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
113	bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
114
115	bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
116	    sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
117
118	callout_init_mtx(&tr->timer, &qpair->lock, 0);
119	tr->cid = cid;
120	tr->qpair = qpair;
121}
122
123static void
124nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
125    struct nvme_completion *cpl, boolean_t print_on_error)
126{
127	struct nvme_request	*req;
128	boolean_t		retry, error;
129
130	req = tr->req;
131	error = nvme_completion_is_error(cpl);
132	retry = error && nvme_completion_is_retry(cpl);
133
134	if (error && print_on_error) {
135		nvme_dump_completion(cpl);
136		nvme_dump_command(&req->cmd);
137	}
138
139	qpair->act_tr[cpl->cid] = NULL;
140
141	KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
142
143	if (req->cb_fn && !retry)
144		req->cb_fn(req->cb_arg, cpl);
145
146	mtx_lock(&qpair->lock);
147	callout_stop(&tr->timer);
148
149	if (retry)
150		nvme_qpair_submit_cmd(qpair, tr);
151	else {
152		if (req->payload_size > 0 || req->uio != NULL)
153			bus_dmamap_unload(qpair->dma_tag,
154			    tr->payload_dma_map);
155
156		nvme_free_request(req);
157		tr->req = NULL;
158
159		TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
160		TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
161
162		if (!STAILQ_EMPTY(&qpair->queued_req)) {
163			req = STAILQ_FIRST(&qpair->queued_req);
164			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
165			_nvme_qpair_submit_request(qpair, req);
166		}
167	}
168
169	mtx_unlock(&qpair->lock);
170}
171
172void
173nvme_qpair_process_completions(struct nvme_qpair *qpair)
174{
175	struct nvme_tracker	*tr;
176	struct nvme_completion	*cpl;
177
178	qpair->num_intr_handler_calls++;
179
180	while (1) {
181		cpl = &qpair->cpl[qpair->cq_head];
182
183		if (cpl->p != qpair->phase)
184			break;
185
186		tr = qpair->act_tr[cpl->cid];
187
188		if (tr != NULL) {
189			nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE);
190			qpair->sq_head = cpl->sqhd;
191		} else {
192			printf("cpl does not map to outstanding cmd\n");
193			nvme_dump_completion(cpl);
194			KASSERT(0, ("received completion for unknown cmd\n"));
195		}
196
197		if (++qpair->cq_head == qpair->num_entries) {
198			qpair->cq_head = 0;
199			qpair->phase = !qpair->phase;
200		}
201
202		nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
203		    qpair->cq_head);
204	}
205}
206
207static void
208nvme_qpair_msix_handler(void *arg)
209{
210	struct nvme_qpair *qpair = arg;
211
212	nvme_qpair_process_completions(qpair);
213}
214
215void
216nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
217    uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
218    uint32_t max_xfer_size, struct nvme_controller *ctrlr)
219{
220	struct nvme_tracker	*tr;
221	uint32_t		i;
222
223	qpair->id = id;
224	qpair->vector = vector;
225	qpair->num_entries = num_entries;
226#ifdef CHATHAM2
227	/*
228	 * Chatham prototype board starts having issues at higher queue
229	 *  depths.  So use a conservative estimate here of no more than 64
230	 *  outstanding I/O per queue at any one point.
231	 */
232	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
233		num_trackers = min(num_trackers, 64);
234#endif
235	qpair->num_trackers = num_trackers;
236	qpair->max_xfer_size = max_xfer_size;
237	qpair->ctrlr = ctrlr;
238
239	/*
240	 * First time through the completion queue, HW will set phase
241	 *  bit on completions to 1.  So set this to 1 here, indicating
242	 *  we're looking for a 1 to know which entries have completed.
243	 *  we'll toggle the bit each time when the completion queue
244	 *  rolls over.
245	 */
246	qpair->phase = 1;
247
248	if (ctrlr->msix_enabled) {
249
250		/*
251		 * MSI-X vector resource IDs start at 1, so we add one to
252		 *  the queue's vector to get the corresponding rid to use.
253		 */
254		qpair->rid = vector + 1;
255
256		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
257		    &qpair->rid, RF_ACTIVE);
258
259		bus_setup_intr(ctrlr->dev, qpair->res,
260		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
261		    nvme_qpair_msix_handler, qpair, &qpair->tag);
262	}
263
264	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
265
266	bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
267	    sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
268	    BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
269	    (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
270	    NULL, NULL, &qpair->dma_tag);
271
272	qpair->num_cmds = 0;
273	qpair->num_intr_handler_calls = 0;
274	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
275
276	/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
277	qpair->cmd = contigmalloc(qpair->num_entries *
278	    sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
279	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
280	qpair->cpl = contigmalloc(qpair->num_entries *
281	    sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
282	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
283
284	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
285	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
286
287	bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
288	    qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
289	    nvme_single_map, &qpair->cmd_bus_addr, 0);
290	bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
291	    qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
292	    nvme_single_map, &qpair->cpl_bus_addr, 0);
293
294	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
295	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
296
297	TAILQ_INIT(&qpair->free_tr);
298	TAILQ_INIT(&qpair->outstanding_tr);
299	STAILQ_INIT(&qpair->queued_req);
300
301	for (i = 0; i < qpair->num_trackers; i++) {
302		tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT);
303
304		if (tr == NULL) {
305			printf("warning: nvme tracker malloc failed\n");
306			break;
307		}
308
309		nvme_qpair_construct_tracker(qpair, tr, i);
310		TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
311	}
312
313	qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
314	    M_NVME, M_ZERO | M_NOWAIT);
315}
316
317static void
318nvme_qpair_destroy(struct nvme_qpair *qpair)
319{
320	struct nvme_tracker	*tr;
321
322	if (qpair->tag)
323		bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
324
325	if (qpair->res)
326		bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
327		    rman_get_rid(qpair->res), qpair->res);
328
329	if (qpair->dma_tag)
330		bus_dma_tag_destroy(qpair->dma_tag);
331
332	if (qpair->act_tr)
333		free(qpair->act_tr, M_NVME);
334
335	while (!TAILQ_EMPTY(&qpair->free_tr)) {
336		tr = TAILQ_FIRST(&qpair->free_tr);
337		TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
338		bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
339		bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
340		free(tr, M_NVME);
341	}
342}
343
344void
345nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
346{
347
348	/*
349	 * For NVMe, you don't send delete queue commands for the admin
350	 *  queue, so we just need to unload and free the cmd and cpl memory.
351	 */
352	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
353	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
354
355	contigfree(qpair->cmd,
356	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
357
358	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
359	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
360	contigfree(qpair->cpl,
361	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
362
363	nvme_qpair_destroy(qpair);
364}
365
366static void
367nvme_free_cmd_ring(void *arg, const struct nvme_completion *status)
368{
369	struct nvme_qpair *qpair;
370
371	qpair = (struct nvme_qpair *)arg;
372	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
373	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
374	contigfree(qpair->cmd,
375	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
376	qpair->cmd = NULL;
377}
378
379static void
380nvme_free_cpl_ring(void *arg, const struct nvme_completion *status)
381{
382	struct nvme_qpair *qpair;
383
384	qpair = (struct nvme_qpair *)arg;
385	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
386	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
387	contigfree(qpair->cpl,
388	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
389	qpair->cpl = NULL;
390}
391
392void
393nvme_io_qpair_destroy(struct nvme_qpair *qpair)
394{
395	struct nvme_controller *ctrlr = qpair->ctrlr;
396
397	if (qpair->num_entries > 0) {
398
399		nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring,
400		    qpair);
401		/* Spin until free_cmd_ring sets qpair->cmd to NULL. */
402		while (qpair->cmd)
403			DELAY(5);
404
405		nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring,
406		    qpair);
407		/* Spin until free_cpl_ring sets qpair->cmd to NULL. */
408		while (qpair->cpl)
409			DELAY(5);
410
411		nvme_qpair_destroy(qpair);
412	}
413}
414
415static void
416nvme_qpair_manual_abort_tracker(struct nvme_qpair *qpair,
417    struct nvme_tracker *tr, uint32_t sct, uint32_t sc,
418    boolean_t print_on_error)
419{
420	struct nvme_completion	cpl;
421
422	memset(&cpl, 0, sizeof(cpl));
423	cpl.sqid = qpair->id;
424	cpl.cid = tr->cid;
425	cpl.sf_sct = sct;
426	cpl.sf_sc = sc;
427	nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
428}
429
430void
431nvme_qpair_manual_abort_request(struct nvme_qpair *qpair,
432    struct nvme_request *req, uint32_t sct, uint32_t sc,
433    boolean_t print_on_error)
434{
435	struct nvme_tracker	*tr;
436
437	tr = nvme_qpair_find_tracker(qpair, req);
438
439	if (tr == NULL) {
440		printf("%s: request not found\n", __func__);
441		nvme_dump_command(&req->cmd);
442		return;
443	}
444
445	nvme_qpair_manual_abort_tracker(qpair, tr, sct, sc, print_on_error);
446}
447
448static void
449nvme_abort_complete(void *arg, const struct nvme_completion *status)
450{
451	struct nvme_tracker	*tr = arg;
452
453	/*
454	 * If cdw0 == 1, the controller was not able to abort the command
455	 *  we requested.  We still need to check the active tracker array,
456	 *  to cover race where I/O timed out at same time controller was
457	 *  completing the I/O.
458	 */
459	if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
460		/*
461		 * An I/O has timed out, and the controller was unable to
462		 *  abort it for some reason.  Construct a fake completion
463		 *  status, and then complete the I/O's tracker manually.
464		 */
465		printf("abort command failed, aborting command manually\n");
466		nvme_qpair_manual_abort_tracker(tr->qpair, tr,
467		    NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
468	}
469}
470
471static void
472nvme_timeout(void *arg)
473{
474	struct nvme_tracker	*tr = arg;
475
476	nvme_ctrlr_cmd_abort(tr->qpair->ctrlr, tr->cid, tr->qpair->id,
477	    nvme_abort_complete, tr);
478}
479
480void
481nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
482{
483	struct nvme_request *req;
484
485	req = tr->req;
486	req->cmd.cid = tr->cid;
487	qpair->act_tr[tr->cid] = tr;
488
489	if (req->timeout > 0)
490#if __FreeBSD_version >= 800030
491		callout_reset_curcpu(&tr->timer, req->timeout * hz,
492		    nvme_timeout, tr);
493#else
494		callout_reset(&tr->timer, req->timeout * hz, nvme_timeout, tr);
495#endif
496
497	/* Copy the command from the tracker to the submission queue. */
498	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
499
500	if (++qpair->sq_tail == qpair->num_entries)
501		qpair->sq_tail = 0;
502
503	wmb();
504	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
505	    qpair->sq_tail);
506
507	qpair->num_cmds++;
508}
509
510static void
511_nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
512{
513	struct nvme_tracker	*tr;
514	int			err;
515
516	mtx_assert(&qpair->lock, MA_OWNED);
517
518	tr = TAILQ_FIRST(&qpair->free_tr);
519
520	if (tr == NULL) {
521		/*
522		 * No tracker is available.  Put the request on the qpair's
523		 *  request queue to be processed when a tracker frees up
524		 *  via a command completion.
525		 */
526		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
527		return;
528	}
529
530	TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
531	TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
532	tr->req = req;
533
534	if (req->uio == NULL) {
535		if (req->payload_size > 0) {
536			err = bus_dmamap_load(tr->qpair->dma_tag,
537					      tr->payload_dma_map, req->payload,
538					      req->payload_size,
539					      nvme_payload_map, tr, 0);
540			if (err != 0)
541				panic("bus_dmamap_load returned non-zero!\n");
542		} else
543			nvme_qpair_submit_cmd(tr->qpair, tr);
544	} else {
545		err = bus_dmamap_load_uio(tr->qpair->dma_tag,
546					  tr->payload_dma_map, req->uio,
547					  nvme_payload_map_uio, tr, 0);
548		if (err != 0)
549			panic("bus_dmamap_load returned non-zero!\n");
550	}
551}
552
553void
554nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
555{
556
557	mtx_lock(&qpair->lock);
558	_nvme_qpair_submit_request(qpair, req);
559	mtx_unlock(&qpair->lock);
560}
561