1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8#include <sys/types.h>
9#include <sys/_bitset.h>
10#include <sys/bitset.h>
11#include <sys/lock.h>
12#include <sys/mutex.h>
13
14#include <dev/nvmf/nvmf_transport.h>
15#include <dev/nvmf/controller/nvmft_var.h>
16
17/*
18 * A bitmask of command ID values.  This is used to detect duplicate
19 * commands with the same ID.
20 */
21#define	NUM_CIDS	(UINT16_MAX + 1)
22BITSET_DEFINE(cidset, NUM_CIDS);
23
24struct nvmft_qpair {
25	struct nvmft_controller *ctrlr;
26	struct nvmf_qpair *qp;
27	struct cidset *cids;
28
29	bool	admin;
30	bool	sq_flow_control;
31	uint16_t qid;
32	u_int	qsize;
33	uint16_t sqhd;
34	uint16_t sqtail;
35	volatile u_int qp_refs;		/* Internal references on 'qp'. */
36
37	struct mtx lock;
38
39	char	name[16];
40};
41
42static int	_nvmft_send_generic_error(struct nvmft_qpair *qp,
43    struct nvmf_capsule *nc, uint8_t sc_status);
44
45static void
46nvmft_qpair_error(void *arg, int error)
47{
48	struct nvmft_qpair *qp = arg;
49	struct nvmft_controller *ctrlr = qp->ctrlr;
50
51	/*
52	 * XXX: The Linux TCP initiator sends a RST immediately after
53	 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious
54	 * errors on shutdown.
55	 */
56	if (error == ECONNRESET)
57		error = 0;
58
59	if (error != 0)
60		nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name);
61	nvmft_controller_error(ctrlr, qp, error);
62}
63
64static void
65nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
66{
67	struct nvmft_qpair *qp = arg;
68	struct nvmft_controller *ctrlr = qp->ctrlr;
69	const struct nvme_command *cmd;
70	uint8_t sc_status;
71
72	cmd = nvmf_capsule_sqe(nc);
73	if (ctrlr == NULL) {
74		printf("NVMFT: %s received CID %u opcode %u on newborn queue\n",
75		    qp->name, le16toh(cmd->cid), cmd->opc);
76		nvmf_free_capsule(nc);
77		return;
78	}
79
80	sc_status = nvmf_validate_command_capsule(nc);
81	if (sc_status != NVME_SC_SUCCESS) {
82		_nvmft_send_generic_error(qp, nc, sc_status);
83		nvmf_free_capsule(nc);
84		return;
85	}
86
87	/* Don't bother byte-swapping CID. */
88	if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) {
89		_nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT);
90		nvmf_free_capsule(nc);
91		return;
92	}
93
94	if (qp->admin)
95		nvmft_handle_admin_command(ctrlr, nc);
96	else
97		nvmft_handle_io_command(qp, qp->qid, nc);
98}
99
100struct nvmft_qpair *
101nvmft_qpair_init(enum nvmf_trtype trtype,
102    const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
103    const char *name)
104{
105	struct nvmft_qpair *qp;
106
107	qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
108	qp->admin = handoff->admin;
109	qp->sq_flow_control = handoff->sq_flow_control;
110	qp->qsize = handoff->qsize;
111	qp->qid = qid;
112	qp->sqhd = handoff->sqhd;
113	qp->sqtail = handoff->sqtail;
114	strlcpy(qp->name, name, sizeof(qp->name));
115	mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
116	qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
117
118	qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error,
119	    qp, nvmft_receive_capsule, qp);
120	if (qp->qp == NULL) {
121		mtx_destroy(&qp->lock);
122		free(qp->cids, M_NVMFT);
123		free(qp, M_NVMFT);
124		return (NULL);
125	}
126
127	refcount_init(&qp->qp_refs, 1);
128	return (qp);
129}
130
131void
132nvmft_qpair_shutdown(struct nvmft_qpair *qp)
133{
134	struct nvmf_qpair *nq;
135
136	mtx_lock(&qp->lock);
137	nq = qp->qp;
138	qp->qp = NULL;
139	mtx_unlock(&qp->lock);
140	if (nq != NULL && refcount_release(&qp->qp_refs))
141		nvmf_free_qpair(nq);
142}
143
144void
145nvmft_qpair_destroy(struct nvmft_qpair *qp)
146{
147	nvmft_qpair_shutdown(qp);
148	mtx_destroy(&qp->lock);
149	free(qp->cids, M_NVMFT);
150	free(qp, M_NVMFT);
151}
152
153struct nvmft_controller *
154nvmft_qpair_ctrlr(struct nvmft_qpair *qp)
155{
156	return (qp->ctrlr);
157}
158
159uint16_t
160nvmft_qpair_id(struct nvmft_qpair *qp)
161{
162	return (qp->qid);
163}
164
165const char *
166nvmft_qpair_name(struct nvmft_qpair *qp)
167{
168	return (qp->name);
169}
170
171static int
172_nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
173{
174	struct nvme_completion cpl;
175	struct nvmf_qpair *nq;
176	struct nvmf_capsule *rc;
177	int error;
178
179	memcpy(&cpl, cqe, sizeof(cpl));
180	mtx_lock(&qp->lock);
181	nq = qp->qp;
182	if (nq == NULL) {
183		mtx_unlock(&qp->lock);
184		return (ENOTCONN);
185	}
186	refcount_acquire(&qp->qp_refs);
187
188	/* Set SQHD. */
189	if (qp->sq_flow_control) {
190		qp->sqhd = (qp->sqhd + 1) % qp->qsize;
191		cpl.sqhd = htole16(qp->sqhd);
192	} else
193		cpl.sqhd = 0;
194	mtx_unlock(&qp->lock);
195
196	rc = nvmf_allocate_response(nq, &cpl, M_WAITOK);
197	error = nvmf_transmit_capsule(rc);
198	nvmf_free_capsule(rc);
199
200	if (refcount_release(&qp->qp_refs))
201		nvmf_free_qpair(nq);
202	return (error);
203}
204
205void
206nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
207{
208	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
209
210	/* Don't bother byte-swapping CID. */
211	KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids),
212	    ("%s: CID %u not busy", __func__, cmd->cid));
213
214	BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids);
215}
216
217int
218nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
219{
220	const struct nvme_completion *cpl = cqe;
221
222	/* Don't bother byte-swapping CID. */
223	KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids),
224	    ("%s: CID %u not busy", __func__, cpl->cid));
225
226	BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids);
227	return (_nvmft_send_response(qp, cqe));
228}
229
230void
231nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status)
232{
233	struct nvme_completion *cpl = cqe;
234	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
235
236	memset(cpl, 0, sizeof(*cpl));
237	cpl->cid = cmd->cid;
238	cpl->status = htole16(status);
239}
240
241int
242nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
243    uint8_t sc_type, uint8_t sc_status)
244{
245	struct nvme_completion cpl;
246	uint16_t status;
247
248	status = NVMEF(NVME_STATUS_SCT, sc_type) |
249	    NVMEF(NVME_STATUS_SC, sc_status);
250	nvmft_init_cqe(&cpl, nc, status);
251	return (nvmft_send_response(qp, &cpl));
252}
253
254int
255nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
256    uint8_t sc_status)
257{
258	return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status));
259}
260
261/*
262 * This version doesn't clear CID in qp->cids and is used for errors
263 * before the CID is validated.
264 */
265static int
266_nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
267    uint8_t sc_status)
268{
269	struct nvme_completion cpl;
270	uint16_t status;
271
272	status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) |
273	    NVMEF(NVME_STATUS_SC, sc_status);
274	nvmft_init_cqe(&cpl, nc, status);
275	return (_nvmft_send_response(qp, &cpl));
276}
277
278int
279nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
280{
281	return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS));
282}
283
284static void
285nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp,
286    const struct nvmf_fabric_connect_cmd *cmd, uint16_t status)
287{
288	memset(rsp, 0, sizeof(*rsp));
289	rsp->cid = cmd->cid;
290	rsp->status = htole16(status);
291}
292
293static int
294nvmft_send_connect_response(struct nvmft_qpair *qp,
295    const struct nvmf_fabric_connect_rsp *rsp)
296{
297	struct nvmf_capsule *rc;
298	struct nvmf_qpair *nq;
299	int error;
300
301	mtx_lock(&qp->lock);
302	nq = qp->qp;
303	if (nq == NULL) {
304		mtx_unlock(&qp->lock);
305		return (ENOTCONN);
306	}
307	refcount_acquire(&qp->qp_refs);
308	mtx_unlock(&qp->lock);
309
310	rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK);
311	error = nvmf_transmit_capsule(rc);
312	nvmf_free_capsule(rc);
313
314	if (refcount_release(&qp->qp_refs))
315		nvmf_free_qpair(nq);
316	return (error);
317}
318
319void
320nvmft_connect_error(struct nvmft_qpair *qp,
321    const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type,
322    uint8_t sc_status)
323{
324	struct nvmf_fabric_connect_rsp rsp;
325	uint16_t status;
326
327	status = NVMEF(NVME_STATUS_SCT, sc_type) |
328	    NVMEF(NVME_STATUS_SC, sc_status);
329	nvmft_init_connect_rsp(&rsp, cmd, status);
330	nvmft_send_connect_response(qp, &rsp);
331}
332
333void
334nvmft_connect_invalid_parameters(struct nvmft_qpair *qp,
335    const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset)
336{
337	struct nvmf_fabric_connect_rsp rsp;
338
339	nvmft_init_connect_rsp(&rsp, cmd,
340	    NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
341	    NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
342	rsp.status_code_specific.invalid.ipo = htole16(offset);
343	rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
344	nvmft_send_connect_response(qp, &rsp);
345}
346
347int
348nvmft_finish_accept(struct nvmft_qpair *qp,
349    const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr)
350{
351	struct nvmf_fabric_connect_rsp rsp;
352
353	qp->ctrlr = ctrlr;
354	nvmft_init_connect_rsp(&rsp, cmd, 0);
355	if (qp->sq_flow_control)
356		rsp.sqhd = htole16(qp->sqhd);
357	else
358		rsp.sqhd = htole16(0xffff);
359	rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
360	return (nvmft_send_connect_response(qp, &rsp));
361}
362