1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8#include <sys/param.h>
9#include <sys/kernel.h>
10#include <sys/limits.h>
11#include <sys/lock.h>
12#include <sys/malloc.h>
13#include <sys/mbuf.h>
14#include <sys/module.h>
15#include <sys/refcount.h>
16#include <sys/sysctl.h>
17#include <sys/sx.h>
18#include <dev/nvme/nvme.h>
19#include <dev/nvmf/nvmf.h>
20#include <dev/nvmf/nvmf_transport.h>
21#include <dev/nvmf/nvmf_transport_internal.h>
22
23/* Transport-independent support for fabrics queue pairs and commands. */
24
25struct nvmf_transport {
26	struct nvmf_transport_ops *nt_ops;
27
28	volatile u_int nt_active_qpairs;
29	SLIST_ENTRY(nvmf_transport) nt_link;
30};
31
32/* nvmf_transports[nvmf_trtype] is sorted by priority */
33static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1];
34static struct sx nvmf_transports_lock;
35
36static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport",
37    "NVMe over Fabrics transport");
38
39SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
40    "NVMe over Fabrics");
41
42static bool
43nvmf_supported_trtype(enum nvmf_trtype trtype)
44{
45	return (trtype < nitems(nvmf_transports));
46}
47
48struct nvmf_qpair *
49nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller,
50    const struct nvmf_handoff_qpair_params *params,
51    nvmf_qpair_error_t *error_cb, void *error_cb_arg,
52    nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg)
53{
54	struct nvmf_transport *nt;
55	struct nvmf_qpair *qp;
56
57	if (!nvmf_supported_trtype(trtype))
58		return (NULL);
59
60	sx_slock(&nvmf_transports_lock);
61	SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) {
62		qp = nt->nt_ops->allocate_qpair(controller, params);
63		if (qp != NULL) {
64			refcount_acquire(&nt->nt_active_qpairs);
65			break;
66		}
67	}
68	sx_sunlock(&nvmf_transports_lock);
69	if (qp == NULL)
70		return (NULL);
71
72	qp->nq_transport = nt;
73	qp->nq_ops = nt->nt_ops;
74	qp->nq_controller = controller;
75	qp->nq_error = error_cb;
76	qp->nq_error_arg = error_cb_arg;
77	qp->nq_receive = receive_cb;
78	qp->nq_receive_arg = receive_cb_arg;
79	qp->nq_admin = params->admin;
80	return (qp);
81}
82
83void
84nvmf_free_qpair(struct nvmf_qpair *qp)
85{
86	struct nvmf_transport *nt;
87
88	nt = qp->nq_transport;
89	qp->nq_ops->free_qpair(qp);
90	if (refcount_release(&nt->nt_active_qpairs))
91		wakeup(nt);
92}
93
94struct nvmf_capsule *
95nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how)
96{
97	struct nvmf_capsule *nc;
98
99	KASSERT(how == M_WAITOK || how == M_NOWAIT,
100	    ("%s: invalid how", __func__));
101	nc = qp->nq_ops->allocate_capsule(qp, how);
102	if (nc == NULL)
103		return (NULL);
104
105	nc->nc_qpair = qp;
106	nc->nc_qe_len = sizeof(struct nvme_command);
107	memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len);
108
109	/* 4.2 of NVMe base spec: Fabrics always uses SGL. */
110	nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT);
111	nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL);
112	return (nc);
113}
114
115struct nvmf_capsule *
116nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how)
117{
118	struct nvmf_capsule *nc;
119
120	KASSERT(how == M_WAITOK || how == M_NOWAIT,
121	    ("%s: invalid how", __func__));
122	nc = qp->nq_ops->allocate_capsule(qp, how);
123	if (nc == NULL)
124		return (NULL);
125
126	nc->nc_qpair = qp;
127	nc->nc_qe_len = sizeof(struct nvme_completion);
128	memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len);
129	return (nc);
130}
131
132int
133nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem,
134    size_t len, bool send, nvmf_io_complete_t *complete_cb,
135    void *cb_arg)
136{
137	if (nc->nc_data.io_len != 0)
138		return (EBUSY);
139
140	nc->nc_send_data = send;
141	nc->nc_data.io_mem = *mem;
142	nc->nc_data.io_len = len;
143	nc->nc_data.io_complete = complete_cb;
144	nc->nc_data.io_complete_arg = cb_arg;
145	return (0);
146}
147
148void
149nvmf_free_capsule(struct nvmf_capsule *nc)
150{
151	nc->nc_qpair->nq_ops->free_capsule(nc);
152}
153
154int
155nvmf_transmit_capsule(struct nvmf_capsule *nc)
156{
157	return (nc->nc_qpair->nq_ops->transmit_capsule(nc));
158}
159
160void
161nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error)
162{
163	if (nc->nc_data.io_len != 0)
164		nvmf_complete_io_request(&nc->nc_data, 0, error);
165}
166
167void *
168nvmf_capsule_sqe(struct nvmf_capsule *nc)
169{
170	KASSERT(nc->nc_qe_len == sizeof(struct nvme_command),
171	    ("%s: capsule %p is not a command capsule", __func__, nc));
172	return (&nc->nc_sqe);
173}
174
175void *
176nvmf_capsule_cqe(struct nvmf_capsule *nc)
177{
178	KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion),
179	    ("%s: capsule %p is not a response capsule", __func__, nc));
180	return (&nc->nc_cqe);
181}
182
183uint8_t
184nvmf_validate_command_capsule(struct nvmf_capsule *nc)
185{
186	KASSERT(nc->nc_qe_len == sizeof(struct nvme_command),
187	    ("%s: capsule %p is not a command capsule", __func__, nc));
188
189	if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL)
190		return (NVME_SC_INVALID_FIELD);
191
192	return (nc->nc_qpair->nq_ops->validate_command_capsule(nc));
193}
194
195size_t
196nvmf_capsule_data_len(const struct nvmf_capsule *nc)
197{
198	return (nc->nc_qpair->nq_ops->capsule_data_len(nc));
199}
200
201int
202nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
203    struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb,
204    void *cb_arg)
205{
206	struct nvmf_io_request io;
207
208	io.io_mem = *mem;
209	io.io_len = len;
210	io.io_complete = complete_cb;
211	io.io_complete_arg = cb_arg;
212	return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset,
213	    &io));
214}
215
216u_int
217nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
218    struct mbuf *m, size_t len)
219{
220	MPASS(m_length(m, NULL) == len);
221	return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m,
222	    len));
223}
224
225int
226nvmf_transport_module_handler(struct module *mod, int what, void *arg)
227{
228	struct nvmf_transport_ops *ops = arg;
229	struct nvmf_transport *nt, *nt2, *prev;
230	int error;
231
232	switch (what) {
233	case MOD_LOAD:
234		if (!nvmf_supported_trtype(ops->trtype)) {
235			printf("NVMF: Unsupported transport %u", ops->trtype);
236			return (EINVAL);
237		}
238
239		nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO);
240		nt->nt_ops = arg;
241
242		sx_xlock(&nvmf_transports_lock);
243		if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) {
244			SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt,
245			    nt_link);
246		} else {
247			prev = NULL;
248			SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype],
249			    nt_link) {
250				if (ops->priority > nt2->nt_ops->priority)
251					break;
252				prev = nt2;
253			}
254			if (prev == NULL)
255				SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype],
256				    nt, nt_link);
257			else
258				SLIST_INSERT_AFTER(prev, nt, nt_link);
259		}
260		sx_xunlock(&nvmf_transports_lock);
261		return (0);
262
263	case MOD_QUIESCE:
264		if (!nvmf_supported_trtype(ops->trtype))
265			return (0);
266
267		sx_slock(&nvmf_transports_lock);
268		SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) {
269			if (nt->nt_ops == ops)
270				break;
271		}
272		if (nt == NULL) {
273			sx_sunlock(&nvmf_transports_lock);
274			return (0);
275		}
276		if (nt->nt_active_qpairs != 0) {
277			sx_sunlock(&nvmf_transports_lock);
278			return (EBUSY);
279		}
280		sx_sunlock(&nvmf_transports_lock);
281		return (0);
282
283	case MOD_UNLOAD:
284		if (!nvmf_supported_trtype(ops->trtype))
285			return (0);
286
287		sx_xlock(&nvmf_transports_lock);
288		prev = NULL;
289		SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) {
290			if (nt->nt_ops == ops)
291				break;
292			prev = nt;
293		}
294		if (nt == NULL) {
295			sx_xunlock(&nvmf_transports_lock);
296			return (0);
297		}
298
299		if (prev == NULL)
300			SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype],
301			    nt_link);
302		else
303			SLIST_REMOVE_AFTER(prev, nt_link);
304
305		error = 0;
306		while (nt->nt_active_qpairs != 0 && error == 0)
307			error = sx_sleep(nt, &nvmf_transports_lock, PCATCH,
308			    "nftunld", 0);
309		sx_xunlock(&nvmf_transports_lock);
310		if (error != 0)
311			return (error);
312		free(nt, M_NVMF_TRANSPORT);
313		return (0);
314
315	default:
316		return (EOPNOTSUPP);
317	}
318}
319
320static int
321nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused)
322{
323	switch (what) {
324	case MOD_LOAD:
325		for (u_int i = 0; i < nitems(nvmf_transports); i++)
326			SLIST_INIT(&nvmf_transports[i]);
327		sx_init(&nvmf_transports_lock, "nvmf transports");
328		return (0);
329	default:
330		return (EOPNOTSUPP);
331	}
332}
333
334static moduledata_t nvmf_transport_mod = {
335	"nvmf_transport",
336	nvmf_transport_modevent,
337	0
338};
339
340DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS,
341    SI_ORDER_FIRST);
342MODULE_VERSION(nvmf_transport, 1);
343