1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8#include <sys/types.h>
9#include <sys/bus.h>
10#include <sys/lock.h>
11#include <sys/malloc.h>
12#include <sys/mutex.h>
13#include <dev/nvme/nvme.h>
14#include <dev/nvmf/nvmf.h>
15#include <dev/nvmf/nvmf_transport.h>
16#include <dev/nvmf/host/nvmf_var.h>
17
18struct nvmf_host_command {
19	struct nvmf_request *req;
20	TAILQ_ENTRY(nvmf_host_command) link;
21	uint16_t cid;
22};
23
24struct nvmf_host_qpair {
25	struct nvmf_softc *sc;
26	struct nvmf_qpair *qp;
27
28	bool	sq_flow_control;
29	bool	shutting_down;
30	u_int	allocating;
31	u_int	num_commands;
32	uint16_t sqhd;
33	uint16_t sqtail;
34
35	struct mtx lock;
36
37	TAILQ_HEAD(, nvmf_host_command) free_commands;
38	STAILQ_HEAD(, nvmf_request) pending_requests;
39
40	/* Indexed by cid. */
41	struct nvmf_host_command **active_commands;
42
43	char	name[16];
44};
45
46struct nvmf_request *
47nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe,
48    nvmf_request_complete_t *cb, void *cb_arg, int how)
49{
50	struct nvmf_request *req;
51	struct nvmf_qpair *nq;
52
53	KASSERT(how == M_WAITOK || how == M_NOWAIT,
54	    ("%s: invalid how", __func__));
55
56	req = malloc(sizeof(*req), M_NVMF, how | M_ZERO);
57	if (req == NULL)
58		return (NULL);
59
60	mtx_lock(&qp->lock);
61	nq = qp->qp;
62	if (nq == NULL) {
63		mtx_unlock(&qp->lock);
64		free(req, M_NVMF);
65		return (NULL);
66	}
67	qp->allocating++;
68	MPASS(qp->allocating != 0);
69	mtx_unlock(&qp->lock);
70
71	req->qp = qp;
72	req->cb = cb;
73	req->cb_arg = cb_arg;
74	req->nc = nvmf_allocate_command(nq, sqe, how);
75	if (req->nc == NULL) {
76		free(req, M_NVMF);
77		req = NULL;
78	}
79
80	mtx_lock(&qp->lock);
81	qp->allocating--;
82	if (qp->allocating == 0 && qp->shutting_down)
83		wakeup(qp);
84	mtx_unlock(&qp->lock);
85
86	return (req);
87}
88
89static void
90nvmf_abort_request(struct nvmf_request *req, uint16_t cid)
91{
92	struct nvme_completion cqe;
93
94	memset(&cqe, 0, sizeof(cqe));
95	cqe.cid = cid;
96	cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) |
97	    NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST));
98	req->cb(req->cb_arg, &cqe);
99}
100
101void
102nvmf_free_request(struct nvmf_request *req)
103{
104	if (req->nc != NULL)
105		nvmf_free_capsule(req->nc);
106	free(req, M_NVMF);
107}
108
109static void
110nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd)
111{
112	struct nvmf_softc *sc = qp->sc;
113	struct nvme_command *sqe;
114	struct nvmf_capsule *nc;
115	int error;
116
117	nc = cmd->req->nc;
118	sqe = nvmf_capsule_sqe(nc);
119
120	/*
121	 * NB: Don't bother byte-swapping the cid so that receive
122	 * doesn't have to swap.
123	 */
124	sqe->cid = cmd->cid;
125
126	error = nvmf_transmit_capsule(nc);
127	if (error != 0) {
128		device_printf(sc->dev,
129		    "failed to transmit capsule: %d, disconnecting\n", error);
130		nvmf_disconnect(sc);
131		return;
132	}
133
134	if (sc->ka_traffic)
135		atomic_store_int(&sc->ka_active_tx_traffic, 1);
136}
137
138static void
139nvmf_qp_error(void *arg, int error)
140{
141	struct nvmf_host_qpair *qp = arg;
142	struct nvmf_softc *sc = qp->sc;
143
144	/* Ignore simple close of queue pairs during shutdown. */
145	if (!(sc->detaching && error == 0))
146		device_printf(sc->dev, "error %d on %s, disconnecting\n", error,
147		    qp->name);
148	nvmf_disconnect(sc);
149}
150
151static void
152nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
153{
154	struct nvmf_host_qpair *qp = arg;
155	struct nvmf_softc *sc = qp->sc;
156	struct nvmf_host_command *cmd;
157	struct nvmf_request *req;
158	const struct nvme_completion *cqe;
159	uint16_t cid;
160
161	cqe = nvmf_capsule_cqe(nc);
162
163	if (sc->ka_traffic)
164		atomic_store_int(&sc->ka_active_rx_traffic, 1);
165
166	/*
167	 * NB: Don't bother byte-swapping the cid as transmit doesn't
168	 * swap either.
169	 */
170	cid = cqe->cid;
171
172	if (cid > qp->num_commands) {
173		device_printf(sc->dev,
174		    "received invalid CID %u, disconnecting\n", cid);
175		nvmf_disconnect(sc);
176		nvmf_free_capsule(nc);
177		return;
178	}
179
180	/*
181	 * If the queue has been shutdown due to an error, silently
182	 * drop the response.
183	 */
184	mtx_lock(&qp->lock);
185	if (qp->qp == NULL) {
186		device_printf(sc->dev,
187		    "received completion for CID %u on shutdown %s\n", cid,
188		    qp->name);
189		mtx_unlock(&qp->lock);
190		nvmf_free_capsule(nc);
191		return;
192	}
193
194	cmd = qp->active_commands[cid];
195	if (cmd == NULL) {
196		mtx_unlock(&qp->lock);
197		device_printf(sc->dev,
198		    "received completion for inactive CID %u, disconnecting\n",
199		    cid);
200		nvmf_disconnect(sc);
201		nvmf_free_capsule(nc);
202		return;
203	}
204
205	KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__));
206	req = cmd->req;
207	cmd->req = NULL;
208	if (STAILQ_EMPTY(&qp->pending_requests)) {
209		qp->active_commands[cid] = NULL;
210		TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
211		mtx_unlock(&qp->lock);
212	} else {
213		cmd->req = STAILQ_FIRST(&qp->pending_requests);
214		STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
215		mtx_unlock(&qp->lock);
216		nvmf_dispatch_command(qp, cmd);
217	}
218
219	req->cb(req->cb_arg, cqe);
220	nvmf_free_capsule(nc);
221	nvmf_free_request(req);
222}
223
224struct nvmf_host_qpair *
225nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
226    struct nvmf_handoff_qpair_params *handoff, const char *name)
227{
228	struct nvmf_host_command *cmd, *ncmd;
229	struct nvmf_host_qpair *qp;
230	u_int i;
231
232	qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO);
233	qp->sc = sc;
234	qp->sq_flow_control = handoff->sq_flow_control;
235	qp->sqhd = handoff->sqhd;
236	qp->sqtail = handoff->sqtail;
237	strlcpy(qp->name, name, sizeof(qp->name));
238	mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF);
239
240	/*
241	 * Allocate a spare command slot for each pending AER command
242	 * on the admin queue.
243	 */
244	qp->num_commands = handoff->qsize - 1;
245	if (handoff->admin)
246		qp->num_commands += sc->num_aer;
247
248	qp->active_commands = malloc(sizeof(*qp->active_commands) *
249	    qp->num_commands, M_NVMF, M_WAITOK | M_ZERO);
250	TAILQ_INIT(&qp->free_commands);
251	for (i = 0; i < qp->num_commands; i++) {
252		cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO);
253		cmd->cid = i;
254		TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
255	}
256	STAILQ_INIT(&qp->pending_requests);
257
258	qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error,
259	    qp, nvmf_receive_capsule, qp);
260	if (qp->qp == NULL) {
261		TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
262			TAILQ_REMOVE(&qp->free_commands, cmd, link);
263			free(cmd, M_NVMF);
264		}
265		free(qp->active_commands, M_NVMF);
266		mtx_destroy(&qp->lock);
267		free(qp, M_NVMF);
268		return (NULL);
269	}
270
271	return (qp);
272}
273
274void
275nvmf_shutdown_qp(struct nvmf_host_qpair *qp)
276{
277	struct nvmf_host_command *cmd;
278	struct nvmf_request *req;
279	struct nvmf_qpair *nq;
280
281	mtx_lock(&qp->lock);
282	nq = qp->qp;
283	qp->qp = NULL;
284
285	if (nq == NULL) {
286		while (qp->shutting_down)
287			mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0);
288		mtx_unlock(&qp->lock);
289		return;
290	}
291	qp->shutting_down = true;
292	while (qp->allocating != 0)
293		mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0);
294	mtx_unlock(&qp->lock);
295
296	nvmf_free_qpair(nq);
297
298	/*
299	 * Abort outstanding requests.  Active requests will have
300	 * their I/O completions invoked and associated capsules freed
301	 * by the transport layer via nvmf_free_qpair.  Pending
302	 * requests must have their I/O completion invoked via
303	 * nvmf_abort_capsule_data.
304	 */
305	for (u_int i = 0; i < qp->num_commands; i++) {
306		cmd = qp->active_commands[i];
307		if (cmd != NULL) {
308			if (!cmd->req->aer)
309				printf("%s: aborted active command %p (CID %u)\n",
310				    __func__, cmd->req, cmd->cid);
311
312			/* This was freed by nvmf_free_qpair. */
313			cmd->req->nc = NULL;
314			nvmf_abort_request(cmd->req, cmd->cid);
315			nvmf_free_request(cmd->req);
316			free(cmd, M_NVMF);
317		}
318	}
319	while (!STAILQ_EMPTY(&qp->pending_requests)) {
320		req = STAILQ_FIRST(&qp->pending_requests);
321		STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
322		if (!req->aer)
323			printf("%s: aborted pending command %p\n", __func__,
324			    req);
325		nvmf_abort_capsule_data(req->nc, ECONNABORTED);
326		nvmf_abort_request(req, 0);
327		nvmf_free_request(req);
328	}
329
330	mtx_lock(&qp->lock);
331	qp->shutting_down = false;
332	mtx_unlock(&qp->lock);
333	wakeup(qp);
334}
335
336void
337nvmf_destroy_qp(struct nvmf_host_qpair *qp)
338{
339	struct nvmf_host_command *cmd, *ncmd;
340
341	nvmf_shutdown_qp(qp);
342
343	TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
344		TAILQ_REMOVE(&qp->free_commands, cmd, link);
345		free(cmd, M_NVMF);
346	}
347	free(qp->active_commands, M_NVMF);
348	mtx_destroy(&qp->lock);
349	free(qp, M_NVMF);
350}
351
352void
353nvmf_submit_request(struct nvmf_request *req)
354{
355	struct nvmf_host_qpair *qp;
356	struct nvmf_host_command *cmd;
357
358	qp = req->qp;
359	mtx_lock(&qp->lock);
360	if (qp->qp == NULL) {
361		mtx_unlock(&qp->lock);
362		printf("%s: aborted pending command %p\n", __func__, req);
363		nvmf_abort_capsule_data(req->nc, ECONNABORTED);
364		nvmf_abort_request(req, 0);
365		nvmf_free_request(req);
366		return;
367	}
368	cmd = TAILQ_FIRST(&qp->free_commands);
369	if (cmd == NULL) {
370		/*
371		 * Queue this request.  Will be sent after enough
372		 * in-flight requests have completed.
373		 */
374		STAILQ_INSERT_TAIL(&qp->pending_requests, req, link);
375		mtx_unlock(&qp->lock);
376		return;
377	}
378
379	TAILQ_REMOVE(&qp->free_commands, cmd, link);
380	KASSERT(qp->active_commands[cmd->cid] == NULL,
381	    ("%s: CID already busy", __func__));
382	qp->active_commands[cmd->cid] = cmd;
383	cmd->req = req;
384	mtx_unlock(&qp->lock);
385	nvmf_dispatch_command(qp, cmd);
386}
387