nvme_qpair.c revision 241662
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 241662 2012-10-18 00:41:31Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32
33#include "nvme_private.h"
34
35static boolean_t
36nvme_completion_check_retry(const struct nvme_completion *cpl)
37{
38	/*
39	 * TODO: spec is not clear how commands that are aborted due
40	 *  to TLER will be marked.  So for now, it seems
41	 *  NAMESPACE_NOT_READY is the only case where we should
42	 *  look at the DNR bit.
43	 */
44	switch (cpl->sf_sct) {
45	case NVME_SCT_GENERIC:
46		switch (cpl->sf_sc) {
47		case NVME_SC_NAMESPACE_NOT_READY:
48			if (cpl->sf_dnr)
49				return (0);
50			else
51				return (1);
52		case NVME_SC_INVALID_OPCODE:
53		case NVME_SC_INVALID_FIELD:
54		case NVME_SC_COMMAND_ID_CONFLICT:
55		case NVME_SC_DATA_TRANSFER_ERROR:
56		case NVME_SC_ABORTED_POWER_LOSS:
57		case NVME_SC_INTERNAL_DEVICE_ERROR:
58		case NVME_SC_ABORTED_BY_REQUEST:
59		case NVME_SC_ABORTED_SQ_DELETION:
60		case NVME_SC_ABORTED_FAILED_FUSED:
61		case NVME_SC_ABORTED_MISSING_FUSED:
62		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
63		case NVME_SC_COMMAND_SEQUENCE_ERROR:
64		case NVME_SC_LBA_OUT_OF_RANGE:
65		case NVME_SC_CAPACITY_EXCEEDED:
66		default:
67			return (0);
68		}
69	case NVME_SCT_COMMAND_SPECIFIC:
70	case NVME_SCT_MEDIA_ERROR:
71	case NVME_SCT_VENDOR_SPECIFIC:
72	default:
73		return (0);
74	}
75}
76
77struct nvme_tracker *
78nvme_qpair_allocate_tracker(struct nvme_qpair *qpair)
79{
80	struct nvme_tracker	*tr;
81
82	tr = SLIST_FIRST(&qpair->free_tr);
83	if (tr == NULL) {
84		/*
85		 * We can't support more trackers than we have entries in
86		 *  our queue, because it would generate invalid indices
87		 *  into the qpair's active tracker array.
88		 */
89		if (qpair->num_tr == qpair->num_entries) {
90			return (NULL);
91		}
92
93		tr = malloc(sizeof(struct nvme_tracker), M_NVME,
94		    M_ZERO | M_NOWAIT);
95
96		if (tr == NULL) {
97			return (NULL);
98		}
99
100		bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
101		bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
102
103		bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
104		    sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
105
106		callout_init_mtx(&tr->timer, &qpair->lock, 0);
107		tr->cid = qpair->num_tr++;
108		tr->qpair = qpair;
109	} else
110		SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
111
112	return (tr);
113}
114
115void
116nvme_qpair_process_completions(struct nvme_qpair *qpair)
117{
118	struct nvme_tracker	*tr;
119	struct nvme_request	*req;
120	struct nvme_completion	*cpl;
121	boolean_t		retry, error;
122
123	qpair->num_intr_handler_calls++;
124
125	while (1) {
126		cpl = &qpair->cpl[qpair->cq_head];
127
128		if (cpl->p != qpair->phase)
129			break;
130
131		tr = qpair->act_tr[cpl->cid];
132		req = tr->req;
133
134		KASSERT(tr,
135		    ("completion queue has entries but no active trackers\n"));
136
137		error = cpl->sf_sc || cpl->sf_sct;
138		retry = error && nvme_completion_check_retry(cpl);
139
140		if (error) {
141			nvme_dump_completion(cpl);
142			nvme_dump_command(&tr->req->cmd);
143		}
144
145		qpair->act_tr[cpl->cid] = NULL;
146
147		KASSERT(cpl->cid == tr->cmd.cid,
148		    ("cpl cid does not match cmd cid\n"));
149
150		if (req->cb_fn && !retry)
151			req->cb_fn(req->cb_arg, cpl);
152
153		qpair->sq_head = cpl->sqhd;
154
155		mtx_lock(&qpair->lock);
156		callout_stop(&tr->timer);
157
158		if (retry)
159			nvme_qpair_submit_cmd(qpair, tr);
160		else {
161			if (req->payload_size > 0 || req->uio != NULL)
162				bus_dmamap_unload(qpair->dma_tag,
163				    tr->payload_dma_map);
164
165			nvme_free_request(req);
166			SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
167		}
168
169		mtx_unlock(&qpair->lock);
170
171		if (++qpair->cq_head == qpair->num_entries) {
172			qpair->cq_head = 0;
173			qpair->phase = !qpair->phase;
174		}
175
176		nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
177		    qpair->cq_head);
178	}
179}
180
181static void
182nvme_qpair_msix_handler(void *arg)
183{
184	struct nvme_qpair *qpair = arg;
185
186	nvme_qpair_process_completions(qpair);
187}
188
189void
190nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
191    uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size,
192    struct nvme_controller *ctrlr)
193{
194
195	qpair->id = id;
196	qpair->vector = vector;
197	qpair->num_entries = num_entries;
198	qpair->max_xfer_size = max_xfer_size;
199	qpair->ctrlr = ctrlr;
200
201	/*
202	 * First time through the completion queue, HW will set phase
203	 *  bit on completions to 1.  So set this to 1 here, indicating
204	 *  we're looking for a 1 to know which entries have completed.
205	 *  we'll toggle the bit each time when the completion queue
206	 *  rolls over.
207	 */
208	qpair->phase = 1;
209
210	if (ctrlr->msix_enabled) {
211
212		/*
213		 * MSI-X vector resource IDs start at 1, so we add one to
214		 *  the queue's vector to get the corresponding rid to use.
215		 */
216		qpair->rid = vector + 1;
217
218		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
219		    &qpair->rid, RF_ACTIVE);
220
221		bus_setup_intr(ctrlr->dev, qpair->res,
222		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
223		    nvme_qpair_msix_handler, qpair, &qpair->tag);
224	}
225
226	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
227
228	bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
229	    sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
230	    BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
231	    (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
232	    NULL, NULL, &qpair->dma_tag);
233
234	qpair->num_cmds = 0;
235	qpair->num_intr_handler_calls = 0;
236	qpair->num_tr = 0;
237	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
238
239	/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
240	qpair->cmd = contigmalloc(qpair->num_entries *
241	    sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
242	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
243	qpair->cpl = contigmalloc(qpair->num_entries *
244	    sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
245	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
246
247	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
248	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
249
250	bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
251	    qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
252	    nvme_single_map, &qpair->cmd_bus_addr, 0);
253	bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
254	    qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
255	    nvme_single_map, &qpair->cpl_bus_addr, 0);
256
257	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
258	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
259
260	SLIST_INIT(&qpair->free_tr);
261
262	qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
263	    M_NVME, M_ZERO | M_NOWAIT);
264}
265
266static void
267nvme_qpair_destroy(struct nvme_qpair *qpair)
268{
269	struct nvme_tracker *tr;
270
271	if (qpair->tag)
272		bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
273
274	if (qpair->res)
275		bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
276		    rman_get_rid(qpair->res), qpair->res);
277
278	if (qpair->dma_tag)
279		bus_dma_tag_destroy(qpair->dma_tag);
280
281	if (qpair->act_tr)
282		free(qpair->act_tr, M_NVME);
283
284	while (!SLIST_EMPTY(&qpair->free_tr)) {
285		tr = SLIST_FIRST(&qpair->free_tr);
286		SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
287		bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
288		bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
289		free(tr, M_NVME);
290	}
291}
292
293void
294nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
295{
296
297	/*
298	 * For NVMe, you don't send delete queue commands for the admin
299	 *  queue, so we just need to unload and free the cmd and cpl memory.
300	 */
301	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
302	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
303
304	contigfree(qpair->cmd,
305	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
306
307	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
308	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
309	contigfree(qpair->cpl,
310	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
311
312	nvme_qpair_destroy(qpair);
313}
314
315static void
316nvme_free_cmd_ring(void *arg, const struct nvme_completion *status)
317{
318	struct nvme_qpair *qpair;
319
320	qpair = (struct nvme_qpair *)arg;
321	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
322	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
323	contigfree(qpair->cmd,
324	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
325	qpair->cmd = NULL;
326}
327
328static void
329nvme_free_cpl_ring(void *arg, const struct nvme_completion *status)
330{
331	struct nvme_qpair *qpair;
332
333	qpair = (struct nvme_qpair *)arg;
334	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
335	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
336	contigfree(qpair->cpl,
337	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
338	qpair->cpl = NULL;
339}
340
341void
342nvme_io_qpair_destroy(struct nvme_qpair *qpair)
343{
344	struct nvme_controller *ctrlr = qpair->ctrlr;
345
346	if (qpair->num_entries > 0) {
347
348		nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring,
349		    qpair);
350		/* Spin until free_cmd_ring sets qpair->cmd to NULL. */
351		while (qpair->cmd)
352			DELAY(5);
353
354		nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring,
355		    qpair);
356		/* Spin until free_cpl_ring sets qpair->cmd to NULL. */
357		while (qpair->cpl)
358			DELAY(5);
359
360		nvme_qpair_destroy(qpair);
361	}
362}
363
364static void
365nvme_timeout(void *arg)
366{
367	/*
368	 * TODO: Add explicit abort operation here, once nvme(4) supports
369	 *  abort commands.
370	 */
371}
372
373void
374nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
375{
376	struct nvme_request *req;
377
378	req = tr->req;
379	req->cmd.cid = tr->cid;
380	qpair->act_tr[tr->cid] = tr;
381
382	/*
383	 * TODO: rather than spin until entries free up, put this tracker
384	 *  on a queue, and submit from the interrupt handler when
385	 *  entries free up.
386	 */
387	if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) {
388		do {
389			mtx_unlock(&qpair->lock);
390			DELAY(5);
391			mtx_lock(&qpair->lock);
392		} while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head);
393	}
394
395	callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr);
396
397	/* Copy the command from the tracker to the submission queue. */
398	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
399
400	if (++qpair->sq_tail == qpair->num_entries)
401		qpair->sq_tail = 0;
402
403	wmb();
404	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
405	    qpair->sq_tail);
406
407	qpair->num_cmds++;
408}
409