nvme_qpair.c revision 248766
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_qpair.c 248766 2013-03-26 21:48:41Z jimharris $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32
33#include <dev/pci/pcivar.h>
34
35#include "nvme_private.h"
36
37static void	_nvme_qpair_submit_request(struct nvme_qpair *qpair,
38					   struct nvme_request *req);
39
40static boolean_t
41nvme_completion_is_retry(const struct nvme_completion *cpl)
42{
43	/*
44	 * TODO: spec is not clear how commands that are aborted due
45	 *  to TLER will be marked.  So for now, it seems
46	 *  NAMESPACE_NOT_READY is the only case where we should
47	 *  look at the DNR bit.
48	 */
49	switch (cpl->status.sct) {
50	case NVME_SCT_GENERIC:
51		switch (cpl->status.sc) {
52		case NVME_SC_ABORTED_BY_REQUEST:
53			return (1);
54		case NVME_SC_NAMESPACE_NOT_READY:
55			if (cpl->status.dnr)
56				return (0);
57			else
58				return (1);
59		case NVME_SC_INVALID_OPCODE:
60		case NVME_SC_INVALID_FIELD:
61		case NVME_SC_COMMAND_ID_CONFLICT:
62		case NVME_SC_DATA_TRANSFER_ERROR:
63		case NVME_SC_ABORTED_POWER_LOSS:
64		case NVME_SC_INTERNAL_DEVICE_ERROR:
65		case NVME_SC_ABORTED_SQ_DELETION:
66		case NVME_SC_ABORTED_FAILED_FUSED:
67		case NVME_SC_ABORTED_MISSING_FUSED:
68		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
69		case NVME_SC_COMMAND_SEQUENCE_ERROR:
70		case NVME_SC_LBA_OUT_OF_RANGE:
71		case NVME_SC_CAPACITY_EXCEEDED:
72		default:
73			return (0);
74		}
75	case NVME_SCT_COMMAND_SPECIFIC:
76	case NVME_SCT_MEDIA_ERROR:
77	case NVME_SCT_VENDOR_SPECIFIC:
78	default:
79		return (0);
80	}
81}
82
83static void
84nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
85    uint16_t cid)
86{
87
88	bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
89	bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
90
91	bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
92	    sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
93
94	callout_init(&tr->timer, 1);
95	tr->cid = cid;
96	tr->qpair = qpair;
97}
98
99static void
100nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
101    struct nvme_completion *cpl, boolean_t print_on_error)
102{
103	struct nvme_request	*req;
104	boolean_t		retry, error;
105
106	req = tr->req;
107	error = nvme_completion_is_error(cpl);
108	retry = error && nvme_completion_is_retry(cpl) &&
109	   req->retries < nvme_retry_count;
110
111	if (error && print_on_error) {
112		nvme_dump_completion(cpl);
113		nvme_dump_command(&req->cmd);
114	}
115
116	qpair->act_tr[cpl->cid] = NULL;
117
118	KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
119
120	if (req->cb_fn && !retry)
121		req->cb_fn(req->cb_arg, cpl);
122
123	mtx_lock(&qpair->lock);
124	callout_stop(&tr->timer);
125
126	if (retry) {
127		req->retries++;
128		nvme_qpair_submit_tracker(qpair, tr);
129	} else {
130		if (req->payload_size > 0 || req->uio != NULL)
131			bus_dmamap_unload(qpair->dma_tag,
132			    tr->payload_dma_map);
133
134		nvme_free_request(req);
135		tr->req = NULL;
136
137		TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
138		TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
139
140		/*
141		 * If the controller is in the middle of resetting, don't
142		 *  try to submit queued requests here - let the reset logic
143		 *  handle that instead.
144		 */
145		if (!STAILQ_EMPTY(&qpair->queued_req) &&
146		    !qpair->ctrlr->is_resetting) {
147			req = STAILQ_FIRST(&qpair->queued_req);
148			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
149			_nvme_qpair_submit_request(qpair, req);
150		}
151	}
152
153	mtx_unlock(&qpair->lock);
154}
155
156static void
157nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
158    struct nvme_tracker *tr, uint32_t sct, uint32_t sc,
159    boolean_t print_on_error)
160{
161	struct nvme_completion	cpl;
162
163	memset(&cpl, 0, sizeof(cpl));
164	cpl.sqid = qpair->id;
165	cpl.cid = tr->cid;
166	cpl.status.sct = sct;
167	cpl.status.sc = sc;
168	nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
169}
170
171void
172nvme_qpair_process_completions(struct nvme_qpair *qpair)
173{
174	struct nvme_tracker	*tr;
175	struct nvme_completion	*cpl;
176
177	qpair->num_intr_handler_calls++;
178
179	if (!qpair->is_enabled)
180		/*
181		 * qpair is not enabled, likely because a controller reset is
182		 *  is in progress.  Ignore the interrupt - any I/O that was
183		 *  associated with this interrupt will get retried when the
184		 *  reset is complete.
185		 */
186		return;
187
188	while (1) {
189		cpl = &qpair->cpl[qpair->cq_head];
190
191		if (cpl->status.p != qpair->phase)
192			break;
193
194		tr = qpair->act_tr[cpl->cid];
195
196		if (tr != NULL) {
197			nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE);
198			qpair->sq_head = cpl->sqhd;
199		} else {
200			printf("cpl does not map to outstanding cmd\n");
201			nvme_dump_completion(cpl);
202			KASSERT(0, ("received completion for unknown cmd\n"));
203		}
204
205		if (++qpair->cq_head == qpair->num_entries) {
206			qpair->cq_head = 0;
207			qpair->phase = !qpair->phase;
208		}
209
210		nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
211		    qpair->cq_head);
212	}
213}
214
215static void
216nvme_qpair_msix_handler(void *arg)
217{
218	struct nvme_qpair *qpair = arg;
219
220	nvme_qpair_process_completions(qpair);
221}
222
223void
224nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
225    uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
226    uint32_t max_xfer_size, struct nvme_controller *ctrlr)
227{
228	struct nvme_tracker	*tr;
229	uint32_t		i;
230
231	qpair->id = id;
232	qpair->vector = vector;
233	qpair->num_entries = num_entries;
234#ifdef CHATHAM2
235	/*
236	 * Chatham prototype board starts having issues at higher queue
237	 *  depths.  So use a conservative estimate here of no more than 64
238	 *  outstanding I/O per queue at any one point.
239	 */
240	if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
241		num_trackers = min(num_trackers, 64);
242#endif
243	qpair->num_trackers = num_trackers;
244	qpair->max_xfer_size = max_xfer_size;
245	qpair->ctrlr = ctrlr;
246
247	if (ctrlr->msix_enabled) {
248
249		/*
250		 * MSI-X vector resource IDs start at 1, so we add one to
251		 *  the queue's vector to get the corresponding rid to use.
252		 */
253		qpair->rid = vector + 1;
254
255		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
256		    &qpair->rid, RF_ACTIVE);
257
258		bus_setup_intr(ctrlr->dev, qpair->res,
259		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
260		    nvme_qpair_msix_handler, qpair, &qpair->tag);
261	}
262
263	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
264
265	bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
266	    sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
267	    BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
268	    (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
269	    NULL, NULL, &qpair->dma_tag);
270
271	qpair->num_cmds = 0;
272	qpair->num_intr_handler_calls = 0;
273
274	/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
275	qpair->cmd = contigmalloc(qpair->num_entries *
276	    sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
277	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
278	qpair->cpl = contigmalloc(qpair->num_entries *
279	    sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
280	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
281
282	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
283	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
284
285	bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
286	    qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
287	    nvme_single_map, &qpair->cmd_bus_addr, 0);
288	bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
289	    qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
290	    nvme_single_map, &qpair->cpl_bus_addr, 0);
291
292	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
293	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
294
295	TAILQ_INIT(&qpair->free_tr);
296	TAILQ_INIT(&qpair->outstanding_tr);
297	STAILQ_INIT(&qpair->queued_req);
298
299	for (i = 0; i < qpair->num_trackers; i++) {
300		tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT);
301
302		if (tr == NULL) {
303			printf("warning: nvme tracker malloc failed\n");
304			break;
305		}
306
307		nvme_qpair_construct_tracker(qpair, tr, i);
308		TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
309	}
310
311	qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
312	    M_NVME, M_ZERO | M_NOWAIT);
313}
314
315static void
316nvme_qpair_destroy(struct nvme_qpair *qpair)
317{
318	struct nvme_tracker	*tr;
319
320	if (qpair->tag)
321		bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
322
323	if (qpair->res)
324		bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
325		    rman_get_rid(qpair->res), qpair->res);
326
327	if (qpair->cmd) {
328		bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
329		bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
330		contigfree(qpair->cmd,
331		    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
332	}
333
334	if (qpair->cpl) {
335		bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
336		bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
337		contigfree(qpair->cpl,
338		    qpair->num_entries * sizeof(struct nvme_completion),
339		    M_NVME);
340	}
341
342	if (qpair->dma_tag)
343		bus_dma_tag_destroy(qpair->dma_tag);
344
345	if (qpair->act_tr)
346		free(qpair->act_tr, M_NVME);
347
348	while (!TAILQ_EMPTY(&qpair->free_tr)) {
349		tr = TAILQ_FIRST(&qpair->free_tr);
350		TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
351		bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
352		bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
353		free(tr, M_NVME);
354	}
355}
356
357static void
358nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair)
359{
360	struct nvme_tracker	*tr;
361
362	tr = TAILQ_FIRST(&qpair->outstanding_tr);
363	while (tr != NULL) {
364		if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
365			nvme_qpair_manual_complete_tracker(qpair, tr,
366			    NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION,
367			    FALSE);
368			tr = TAILQ_FIRST(&qpair->outstanding_tr);
369		} else {
370			tr = TAILQ_NEXT(tr, tailq);
371		}
372	}
373}
374
375void
376nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
377{
378
379	nvme_admin_qpair_abort_aers(qpair);
380	nvme_qpair_destroy(qpair);
381}
382
383void
384nvme_io_qpair_destroy(struct nvme_qpair *qpair)
385{
386
387	nvme_qpair_destroy(qpair);
388}
389
390static void
391nvme_abort_complete(void *arg, const struct nvme_completion *status)
392{
393	struct nvme_tracker	*tr = arg;
394
395	/*
396	 * If cdw0 == 1, the controller was not able to abort the command
397	 *  we requested.  We still need to check the active tracker array,
398	 *  to cover race where I/O timed out at same time controller was
399	 *  completing the I/O.
400	 */
401	if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
402		/*
403		 * An I/O has timed out, and the controller was unable to
404		 *  abort it for some reason.  Construct a fake completion
405		 *  status, and then complete the I/O's tracker manually.
406		 */
407		printf("abort command failed, aborting command manually\n");
408		nvme_qpair_manual_complete_tracker(tr->qpair, tr,
409		    NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
410	}
411}
412
413static void
414nvme_timeout(void *arg)
415{
416	struct nvme_tracker	*tr = arg;
417	struct nvme_qpair	*qpair = tr->qpair;
418	struct nvme_controller	*ctrlr = qpair->ctrlr;
419	union csts_register	csts;
420
421	/* Read csts to get value of cfs - controller fatal status. */
422	csts.raw = nvme_mmio_read_4(ctrlr, csts);
423
424	if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
425		/*
426		 * If aborts are enabled, only use them if the controller is
427		 *  not reporting fatal status.
428		 */
429		nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
430		    nvme_abort_complete, tr);
431	} else
432		nvme_ctrlr_reset(ctrlr);
433}
434
435void
436nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
437{
438	struct nvme_request	*req;
439	struct nvme_controller	*ctrlr;
440
441	mtx_assert(&qpair->lock, MA_OWNED);
442
443	req = tr->req;
444	req->cmd.cid = tr->cid;
445	qpair->act_tr[tr->cid] = tr;
446	ctrlr = qpair->ctrlr;
447
448	if (req->timeout)
449#if __FreeBSD_version >= 800030
450		callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz,
451		    nvme_timeout, tr);
452#else
453		callout_reset(&tr->timer, ctrlr->timeout_period * hz,
454		    nvme_timeout, tr);
455#endif
456
457	/* Copy the command from the tracker to the submission queue. */
458	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
459
460	if (++qpair->sq_tail == qpair->num_entries)
461		qpair->sq_tail = 0;
462
463	wmb();
464	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
465	    qpair->sq_tail);
466
467	qpair->num_cmds++;
468}
469
470static void
471_nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
472{
473	struct nvme_tracker	*tr;
474	int			err;
475
476	mtx_assert(&qpair->lock, MA_OWNED);
477
478	tr = TAILQ_FIRST(&qpair->free_tr);
479
480	if (tr == NULL || !qpair->is_enabled) {
481		/*
482		 * No tracker is available, or the qpair is disabled due to
483		 *  an in-progress controller-level reset.
484		 *
485		 * Put the request on the qpair's request queue to be processed
486		 *  when a tracker frees up via a command completion or when
487		 *  the controller reset is completed.
488		 */
489		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
490		return;
491	}
492
493	TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
494	TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
495	tr->req = req;
496
497	if (req->uio == NULL) {
498		if (req->payload_size > 0) {
499			err = bus_dmamap_load(tr->qpair->dma_tag,
500					      tr->payload_dma_map, req->payload,
501					      req->payload_size,
502					      nvme_payload_map, tr, 0);
503			if (err != 0)
504				panic("bus_dmamap_load returned non-zero!\n");
505		} else
506			nvme_qpair_submit_tracker(tr->qpair, tr);
507	} else {
508		err = bus_dmamap_load_uio(tr->qpair->dma_tag,
509					  tr->payload_dma_map, req->uio,
510					  nvme_payload_map_uio, tr, 0);
511		if (err != 0)
512			panic("bus_dmamap_load returned non-zero!\n");
513	}
514}
515
516void
517nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
518{
519
520	mtx_lock(&qpair->lock);
521	_nvme_qpair_submit_request(qpair, req);
522	mtx_unlock(&qpair->lock);
523}
524
525static void
526nvme_qpair_enable(struct nvme_qpair *qpair)
527{
528
529	qpair->is_enabled = TRUE;
530}
531
532void
533nvme_qpair_reset(struct nvme_qpair *qpair)
534{
535
536	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
537
538	/*
539	 * First time through the completion queue, HW will set phase
540	 *  bit on completions to 1.  So set this to 1 here, indicating
541	 *  we're looking for a 1 to know which entries have completed.
542	 *  we'll toggle the bit each time when the completion queue
543	 *  rolls over.
544	 */
545	qpair->phase = 1;
546
547	memset(qpair->cmd, 0,
548	    qpair->num_entries * sizeof(struct nvme_command));
549	memset(qpair->cpl, 0,
550	    qpair->num_entries * sizeof(struct nvme_completion));
551}
552
553void
554nvme_admin_qpair_enable(struct nvme_qpair *qpair)
555{
556
557	nvme_qpair_enable(qpair);
558}
559
560void
561nvme_io_qpair_enable(struct nvme_qpair *qpair)
562{
563	STAILQ_HEAD(, nvme_request)	temp;
564	struct nvme_tracker		*tr;
565	struct nvme_tracker		*tr_temp;
566	struct nvme_request		*req;
567
568	/*
569	 * Manually abort each outstanding I/O.  This normally results in a
570	 *  retry, unless the retry count on the associated request has
571	 *  reached its limit.
572	 */
573	TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
574		device_printf(qpair->ctrlr->dev,
575		    "aborting outstanding i/o\n");
576		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
577		    NVME_SC_ABORTED_BY_REQUEST, TRUE);
578	}
579
580	mtx_lock(&qpair->lock);
581
582	nvme_qpair_enable(qpair);
583
584	STAILQ_INIT(&temp);
585	STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
586
587	while (!STAILQ_EMPTY(&temp)) {
588		req = STAILQ_FIRST(&temp);
589		STAILQ_REMOVE_HEAD(&temp, stailq);
590		device_printf(qpair->ctrlr->dev,
591		    "resubmitting queued i/o\n");
592		nvme_dump_command(&req->cmd);
593		_nvme_qpair_submit_request(qpair, req);
594	}
595
596	mtx_unlock(&qpair->lock);
597}
598
599static void
600nvme_qpair_disable(struct nvme_qpair *qpair)
601{
602	struct nvme_tracker *tr;
603
604	qpair->is_enabled = FALSE;
605	mtx_lock(&qpair->lock);
606	TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
607		callout_stop(&tr->timer);
608	mtx_unlock(&qpair->lock);
609}
610
611void
612nvme_admin_qpair_disable(struct nvme_qpair *qpair)
613{
614
615	nvme_qpair_disable(qpair);
616	nvme_admin_qpair_abort_aers(qpair);
617}
618
619void
620nvme_io_qpair_disable(struct nvme_qpair *qpair)
621{
622
623	nvme_qpair_disable(qpair);
624}
625