1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8#include <sys/param.h> 9#include <sys/kernel.h> 10#include <sys/limits.h> 11#include <sys/lock.h> 12#include <sys/malloc.h> 13#include <sys/mbuf.h> 14#include <sys/module.h> 15#include <sys/refcount.h> 16#include <sys/sysctl.h> 17#include <sys/sx.h> 18#include <dev/nvme/nvme.h> 19#include <dev/nvmf/nvmf.h> 20#include <dev/nvmf/nvmf_transport.h> 21#include <dev/nvmf/nvmf_transport_internal.h> 22 23/* Transport-independent support for fabrics queue pairs and commands. */ 24 25struct nvmf_transport { 26 struct nvmf_transport_ops *nt_ops; 27 28 volatile u_int nt_active_qpairs; 29 SLIST_ENTRY(nvmf_transport) nt_link; 30}; 31 32/* nvmf_transports[nvmf_trtype] is sorted by priority */ 33static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1]; 34static struct sx nvmf_transports_lock; 35 36static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport", 37 "NVMe over Fabrics transport"); 38 39SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 40 "NVMe over Fabrics"); 41 42static bool 43nvmf_supported_trtype(enum nvmf_trtype trtype) 44{ 45 return (trtype < nitems(nvmf_transports)); 46} 47 48struct nvmf_qpair * 49nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, 50 const struct nvmf_handoff_qpair_params *params, 51 nvmf_qpair_error_t *error_cb, void *error_cb_arg, 52 nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg) 53{ 54 struct nvmf_transport *nt; 55 struct nvmf_qpair *qp; 56 57 if (!nvmf_supported_trtype(trtype)) 58 return (NULL); 59 60 sx_slock(&nvmf_transports_lock); 61 SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) { 62 qp = nt->nt_ops->allocate_qpair(controller, params); 63 if (qp != NULL) { 64 refcount_acquire(&nt->nt_active_qpairs); 65 break; 66 } 67 } 68 sx_sunlock(&nvmf_transports_lock); 69 if (qp == NULL) 70 return (NULL); 71 72 qp->nq_transport = nt; 73 qp->nq_ops = nt->nt_ops; 74 qp->nq_controller = controller; 75 qp->nq_error = error_cb; 76 qp->nq_error_arg = error_cb_arg; 77 qp->nq_receive = receive_cb; 78 qp->nq_receive_arg = receive_cb_arg; 79 qp->nq_admin = params->admin; 80 return (qp); 81} 82 83void 84nvmf_free_qpair(struct nvmf_qpair *qp) 85{ 86 struct nvmf_transport *nt; 87 88 nt = qp->nq_transport; 89 qp->nq_ops->free_qpair(qp); 90 if (refcount_release(&nt->nt_active_qpairs)) 91 wakeup(nt); 92} 93 94struct nvmf_capsule * 95nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how) 96{ 97 struct nvmf_capsule *nc; 98 99 KASSERT(how == M_WAITOK || how == M_NOWAIT, 100 ("%s: invalid how", __func__)); 101 nc = qp->nq_ops->allocate_capsule(qp, how); 102 if (nc == NULL) 103 return (NULL); 104 105 nc->nc_qpair = qp; 106 nc->nc_qe_len = sizeof(struct nvme_command); 107 memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len); 108 109 /* 4.2 of NVMe base spec: Fabrics always uses SGL. */ 110 nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT); 111 nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL); 112 return (nc); 113} 114 115struct nvmf_capsule * 116nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how) 117{ 118 struct nvmf_capsule *nc; 119 120 KASSERT(how == M_WAITOK || how == M_NOWAIT, 121 ("%s: invalid how", __func__)); 122 nc = qp->nq_ops->allocate_capsule(qp, how); 123 if (nc == NULL) 124 return (NULL); 125 126 nc->nc_qpair = qp; 127 nc->nc_qe_len = sizeof(struct nvme_completion); 128 memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len); 129 return (nc); 130} 131 132int 133nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem, 134 size_t len, bool send, nvmf_io_complete_t *complete_cb, 135 void *cb_arg) 136{ 137 if (nc->nc_data.io_len != 0) 138 return (EBUSY); 139 140 nc->nc_send_data = send; 141 nc->nc_data.io_mem = *mem; 142 nc->nc_data.io_len = len; 143 nc->nc_data.io_complete = complete_cb; 144 nc->nc_data.io_complete_arg = cb_arg; 145 return (0); 146} 147 148void 149nvmf_free_capsule(struct nvmf_capsule *nc) 150{ 151 nc->nc_qpair->nq_ops->free_capsule(nc); 152} 153 154int 155nvmf_transmit_capsule(struct nvmf_capsule *nc) 156{ 157 return (nc->nc_qpair->nq_ops->transmit_capsule(nc)); 158} 159 160void 161nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error) 162{ 163 if (nc->nc_data.io_len != 0) 164 nvmf_complete_io_request(&nc->nc_data, 0, error); 165} 166 167void * 168nvmf_capsule_sqe(struct nvmf_capsule *nc) 169{ 170 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 171 ("%s: capsule %p is not a command capsule", __func__, nc)); 172 return (&nc->nc_sqe); 173} 174 175void * 176nvmf_capsule_cqe(struct nvmf_capsule *nc) 177{ 178 KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), 179 ("%s: capsule %p is not a response capsule", __func__, nc)); 180 return (&nc->nc_cqe); 181} 182 183uint8_t 184nvmf_validate_command_capsule(struct nvmf_capsule *nc) 185{ 186 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 187 ("%s: capsule %p is not a command capsule", __func__, nc)); 188 189 if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL) 190 return (NVME_SC_INVALID_FIELD); 191 192 return (nc->nc_qpair->nq_ops->validate_command_capsule(nc)); 193} 194 195size_t 196nvmf_capsule_data_len(const struct nvmf_capsule *nc) 197{ 198 return (nc->nc_qpair->nq_ops->capsule_data_len(nc)); 199} 200 201int 202nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 203 struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb, 204 void *cb_arg) 205{ 206 struct nvmf_io_request io; 207 208 io.io_mem = *mem; 209 io.io_len = len; 210 io.io_complete = complete_cb; 211 io.io_complete_arg = cb_arg; 212 return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset, 213 &io)); 214} 215 216u_int 217nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 218 struct mbuf *m, size_t len) 219{ 220 MPASS(m_length(m, NULL) == len); 221 return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m, 222 len)); 223} 224 225int 226nvmf_transport_module_handler(struct module *mod, int what, void *arg) 227{ 228 struct nvmf_transport_ops *ops = arg; 229 struct nvmf_transport *nt, *nt2, *prev; 230 int error; 231 232 switch (what) { 233 case MOD_LOAD: 234 if (!nvmf_supported_trtype(ops->trtype)) { 235 printf("NVMF: Unsupported transport %u", ops->trtype); 236 return (EINVAL); 237 } 238 239 nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO); 240 nt->nt_ops = arg; 241 242 sx_xlock(&nvmf_transports_lock); 243 if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) { 244 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt, 245 nt_link); 246 } else { 247 prev = NULL; 248 SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype], 249 nt_link) { 250 if (ops->priority > nt2->nt_ops->priority) 251 break; 252 prev = nt2; 253 } 254 if (prev == NULL) 255 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], 256 nt, nt_link); 257 else 258 SLIST_INSERT_AFTER(prev, nt, nt_link); 259 } 260 sx_xunlock(&nvmf_transports_lock); 261 return (0); 262 263 case MOD_QUIESCE: 264 if (!nvmf_supported_trtype(ops->trtype)) 265 return (0); 266 267 sx_slock(&nvmf_transports_lock); 268 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 269 if (nt->nt_ops == ops) 270 break; 271 } 272 if (nt == NULL) { 273 sx_sunlock(&nvmf_transports_lock); 274 return (0); 275 } 276 if (nt->nt_active_qpairs != 0) { 277 sx_sunlock(&nvmf_transports_lock); 278 return (EBUSY); 279 } 280 sx_sunlock(&nvmf_transports_lock); 281 return (0); 282 283 case MOD_UNLOAD: 284 if (!nvmf_supported_trtype(ops->trtype)) 285 return (0); 286 287 sx_xlock(&nvmf_transports_lock); 288 prev = NULL; 289 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 290 if (nt->nt_ops == ops) 291 break; 292 prev = nt; 293 } 294 if (nt == NULL) { 295 sx_xunlock(&nvmf_transports_lock); 296 return (0); 297 } 298 299 if (prev == NULL) 300 SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype], 301 nt_link); 302 else 303 SLIST_REMOVE_AFTER(prev, nt_link); 304 305 error = 0; 306 while (nt->nt_active_qpairs != 0 && error == 0) 307 error = sx_sleep(nt, &nvmf_transports_lock, PCATCH, 308 "nftunld", 0); 309 sx_xunlock(&nvmf_transports_lock); 310 if (error != 0) 311 return (error); 312 free(nt, M_NVMF_TRANSPORT); 313 return (0); 314 315 default: 316 return (EOPNOTSUPP); 317 } 318} 319 320static int 321nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused) 322{ 323 switch (what) { 324 case MOD_LOAD: 325 for (u_int i = 0; i < nitems(nvmf_transports); i++) 326 SLIST_INIT(&nvmf_transports[i]); 327 sx_init(&nvmf_transports_lock, "nvmf transports"); 328 return (0); 329 default: 330 return (EOPNOTSUPP); 331 } 332} 333 334static moduledata_t nvmf_transport_mod = { 335 "nvmf_transport", 336 nvmf_transport_modevent, 337 0 338}; 339 340DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS, 341 SI_ORDER_FIRST); 342MODULE_VERSION(nvmf_transport, 1); 343