1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8#include <sys/param.h>
9#include <sys/dnv.h>
10#include <sys/jail.h>
11#include <sys/kernel.h>
12#include <sys/limits.h>
13#include <sys/lock.h>
14#include <sys/malloc.h>
15#include <sys/mbuf.h>
16#include <sys/memdesc.h>
17#include <sys/module.h>
18#include <sys/proc.h>
19#include <sys/queue.h>
20#include <sys/refcount.h>
21#include <sys/sbuf.h>
22#include <sys/sx.h>
23
24#include <machine/bus.h>
25#include <machine/bus_dma.h>
26
27#include <dev/nvmf/nvmf.h>
28#include <dev/nvmf/nvmf_transport.h>
29#include <dev/nvmf/controller/nvmft_subr.h>
30#include <dev/nvmf/controller/nvmft_var.h>
31
32#include <cam/ctl/ctl.h>
33#include <cam/ctl/ctl_error.h>
34#include <cam/ctl/ctl_io.h>
35#include <cam/ctl/ctl_frontend.h>
36
37/*
38 * Store pointers to the capsule and qpair in the two pointer members
39 * of CTL_PRIV_FRONTEND.
40 */
41#define	NVMFT_NC(io)	((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0])
42#define	NVMFT_QP(io)	((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1])
43
44static void	nvmft_done(union ctl_io *io);
45static int	nvmft_init(void);
46static int	nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
47    int flag, struct thread *td);
48static int	nvmft_shutdown(void);
49
50static TAILQ_HEAD(, nvmft_port) nvmft_ports;
51static struct sx nvmft_ports_lock;
52
53MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller");
54
55static struct ctl_frontend nvmft_frontend = {
56	.name = "nvmf",
57	.init = nvmft_init,
58	.ioctl = nvmft_ioctl,
59	.fe_dump = NULL,
60	.shutdown = nvmft_shutdown,
61};
62
63static void
64nvmft_online(void *arg)
65{
66	struct nvmft_port *np = arg;
67
68	sx_xlock(&np->lock);
69	np->online = true;
70	sx_xunlock(&np->lock);
71}
72
73static void
74nvmft_offline(void *arg)
75{
76	struct nvmft_port *np = arg;
77	struct nvmft_controller *ctrlr;
78
79	sx_xlock(&np->lock);
80	np->online = false;
81
82	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
83		nvmft_printf(ctrlr,
84		    "shutting down due to port going offline\n");
85		nvmft_controller_error(ctrlr, NULL, ENODEV);
86	}
87
88	while (!TAILQ_EMPTY(&np->controllers))
89		sx_sleep(np, &np->lock, 0, "nvmfoff", 0);
90	sx_xunlock(&np->lock);
91}
92
93static int
94nvmft_lun_enable(void *arg, int lun_id)
95{
96	struct nvmft_port *np = arg;
97	struct nvmft_controller *ctrlr;
98	uint32_t *old_ns, *new_ns;
99	uint32_t nsid;
100	u_int i;
101
102	if (lun_id >= le32toh(np->cdata.nn)) {
103		printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
104		    np->cdata.subnqn, lun_id, le32toh(np->cdata.nn));
105		return (EOPNOTSUPP);
106	}
107	nsid = lun_id + 1;
108
109	sx_xlock(&np->lock);
110	new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT,
111	    M_WAITOK);
112	for (i = 0; i < np->num_ns; i++) {
113		if (np->active_ns[i] < nsid)
114			continue;
115		if (np->active_ns[i] == nsid) {
116			sx_xunlock(&np->lock);
117			free(new_ns, M_NVMFT);
118			printf("NVMFT: %s duplicate lun %d\n",
119			    np->cdata.subnqn, lun_id);
120			return (EINVAL);
121		}
122		break;
123	}
124
125	/* Copy over IDs smaller than nsid. */
126	memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns));
127
128	/* Insert nsid. */
129	new_ns[i] = nsid;
130
131	/* Copy over IDs greater than nsid. */
132	memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) *
133	    sizeof(*np->active_ns));
134
135	np->num_ns++;
136	old_ns = np->active_ns;
137	np->active_ns = new_ns;
138
139	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
140		nvmft_controller_lun_changed(ctrlr, lun_id);
141	}
142
143	sx_xunlock(&np->lock);
144	free(old_ns, M_NVMFT);
145
146	return (0);
147}
148
149static int
150nvmft_lun_disable(void *arg, int lun_id)
151{
152	struct nvmft_port *np = arg;
153	struct nvmft_controller *ctrlr;
154	uint32_t nsid;
155	u_int i;
156
157	if (lun_id >= le32toh(np->cdata.nn))
158		return (0);
159	nsid = lun_id + 1;
160
161	sx_xlock(&np->lock);
162	for (i = 0; i < np->num_ns; i++) {
163		if (np->active_ns[i] == nsid)
164			goto found;
165	}
166	sx_xunlock(&np->lock);
167	printf("NVMFT: %s request to disable nonexistent lun %d\n",
168	    np->cdata.subnqn, lun_id);
169	return (EINVAL);
170
171found:
172	/* Move down IDs greater than nsid. */
173	memmove(np->active_ns + i, np->active_ns + i + 1,
174	    (np->num_ns - (i + 1)) * sizeof(*np->active_ns));
175	np->num_ns--;
176
177	/* NB: Don't bother freeing the old active_ns array. */
178
179	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
180		nvmft_controller_lun_changed(ctrlr, lun_id);
181	}
182
183	sx_xunlock(&np->lock);
184
185	return (0);
186}
187
188void
189nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
190    struct nvme_ns_list *nslist)
191{
192	u_int i, count;
193
194	sx_slock(&np->lock);
195	count = 0;
196	for (i = 0; i < np->num_ns; i++) {
197		if (np->active_ns[i] <= nsid)
198			continue;
199		nslist->ns[count] = htole32(np->active_ns[i]);
200		count++;
201		if (count == nitems(nslist->ns))
202			break;
203	}
204	sx_sunlock(&np->lock);
205}
206
207void
208nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
209    bool admin)
210{
211	struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
212	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
213	struct nvmft_port *np = ctrlr->np;
214	union ctl_io *io;
215	int error;
216
217	if (cmd->nsid == htole32(0)) {
218		nvmft_send_generic_error(qp, nc,
219		    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
220		nvmf_free_capsule(nc);
221		return;
222	}
223
224	mtx_lock(&ctrlr->lock);
225	if (ctrlr->pending_commands == 0)
226		ctrlr->start_busy = sbinuptime();
227	ctrlr->pending_commands++;
228	mtx_unlock(&ctrlr->lock);
229	io = ctl_alloc_io(np->port.ctl_pool_ref);
230	ctl_zero_io(io);
231	NVMFT_NC(io) = nc;
232	NVMFT_QP(io) = qp;
233	io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME;
234	io->io_hdr.nexus.initid = ctrlr->cntlid;
235	io->io_hdr.nexus.targ_port = np->port.targ_port;
236	io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1;
237	io->nvmeio.cmd = *cmd;
238	error = ctl_run(io);
239	if (error != 0) {
240		nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n",
241		    nvmft_qpair_name(qp), error);
242		ctl_nvme_set_generic_error(&io->nvmeio,
243		    NVME_SC_INTERNAL_DEVICE_ERROR);
244		nvmft_done(io);
245
246		nvmft_controller_error(ctrlr, qp, ENXIO);
247	}
248}
249
250void
251nvmft_terminate_commands(struct nvmft_controller *ctrlr)
252{
253	struct nvmft_port *np = ctrlr->np;
254	union ctl_io *io;
255	int error;
256
257	mtx_lock(&ctrlr->lock);
258	if (ctrlr->pending_commands == 0)
259		ctrlr->start_busy = sbinuptime();
260	ctrlr->pending_commands++;
261	mtx_unlock(&ctrlr->lock);
262	io = ctl_alloc_io(np->port.ctl_pool_ref);
263	ctl_zero_io(io);
264	NVMFT_QP(io) = ctrlr->admin;
265	io->io_hdr.io_type = CTL_IO_TASK;
266	io->io_hdr.nexus.initid = ctrlr->cntlid;
267	io->io_hdr.nexus.targ_port = np->port.targ_port;
268	io->io_hdr.nexus.targ_lun = 0;
269	io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */
270	io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
271	error = ctl_run(io);
272	if (error != CTL_RETVAL_COMPLETE) {
273		nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error);
274#ifdef INVARIANTS
275		io->io_hdr.status = CTL_SUCCESS;
276#endif
277		nvmft_done(io);
278	}
279}
280
281static void
282nvmft_datamove_out_cb(void *arg, size_t xfered, int error)
283{
284	struct ctl_nvmeio *ctnio = arg;
285
286	if (error != 0) {
287		ctl_nvme_set_data_transfer_error(ctnio);
288	} else {
289		MPASS(xfered == ctnio->kern_data_len);
290		ctnio->kern_data_resid -= xfered;
291	}
292
293	if (ctnio->kern_sg_entries) {
294		free(ctnio->ext_data_ptr, M_NVMFT);
295		ctnio->ext_data_ptr = NULL;
296	} else
297		MPASS(ctnio->ext_data_ptr == NULL);
298	ctl_datamove_done((union ctl_io *)ctnio, false);
299}
300
301static void
302nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
303    struct nvmf_capsule *nc)
304{
305	struct memdesc mem;
306	int error;
307
308	MPASS(ctnio->ext_data_ptr == NULL);
309	if (ctnio->kern_sg_entries > 0) {
310		struct ctl_sg_entry *sgl;
311		struct bus_dma_segment *vlist;
312
313		vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist),
314		    M_NVMFT, M_WAITOK);
315		ctnio->ext_data_ptr = (void *)vlist;
316		sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
317		for (u_int i = 0; i < ctnio->kern_sg_entries; i++) {
318			vlist[i].ds_addr = (uintptr_t)sgl[i].addr;
319			vlist[i].ds_len = sgl[i].len;
320		}
321		mem = memdesc_vlist(vlist, ctnio->kern_sg_entries);
322	} else
323		mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len);
324
325	error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem,
326	    ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio);
327	if (error == 0)
328		return;
329
330	nvmft_printf(nvmft_qpair_ctrlr(qp),
331	    "Failed to request capsule data: %d\n", error);
332	ctl_nvme_set_data_transfer_error(ctnio);
333
334	if (ctnio->kern_sg_entries) {
335		free(ctnio->ext_data_ptr, M_NVMFT);
336		ctnio->ext_data_ptr = NULL;
337	} else
338		MPASS(ctnio->ext_data_ptr == NULL);
339	ctl_datamove_done((union ctl_io *)ctnio, true);
340}
341
342static struct mbuf *
343nvmft_copy_data(struct ctl_nvmeio *ctnio)
344{
345	struct ctl_sg_entry *sgl;
346	struct mbuf *m0, *m;
347	uint32_t resid, off, todo;
348	int mlen;
349
350	MPASS(ctnio->kern_data_len != 0);
351
352	m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0);
353
354	if (ctnio->kern_sg_entries == 0) {
355		m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr);
356		return (m0);
357	}
358
359	resid = ctnio->kern_data_len;
360	sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
361	off = 0;
362	m = m0;
363	mlen = M_TRAILINGSPACE(m);
364	for (;;) {
365		todo = MIN(mlen, sgl->len - off);
366		memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off,
367		    todo);
368		m->m_len += todo;
369		resid -= todo;
370		if (resid == 0) {
371			MPASS(m->m_next == NULL);
372			break;
373		}
374
375		off += todo;
376		if (off == sgl->len) {
377			sgl++;
378			off = 0;
379		}
380		mlen -= todo;
381		if (mlen == 0) {
382			m = m->m_next;
383			mlen = M_TRAILINGSPACE(m);
384		}
385	}
386
387	return (m0);
388}
389
390static void
391m_free_ref_data(struct mbuf *m)
392{
393	ctl_ref kern_data_ref = m->m_ext.ext_arg1;
394
395	kern_data_ref(m->m_ext.ext_arg2, -1);
396}
397
398static struct mbuf *
399m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size)
400{
401	struct mbuf *m;
402
403	m = m_get(M_WAITOK, MT_DATA);
404	m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref,
405	    ctnio->kern_data_arg, M_RDONLY, EXT_CTL);
406	m->m_len = size;
407	ctnio->kern_data_ref(ctnio->kern_data_arg, 1);
408	return (m);
409}
410
411static struct mbuf *
412nvmft_ref_data(struct ctl_nvmeio *ctnio)
413{
414	struct ctl_sg_entry *sgl;
415	struct mbuf *m0, *m;
416
417	MPASS(ctnio->kern_data_len != 0);
418
419	if (ctnio->kern_sg_entries == 0)
420		return (m_get_ref_data(ctnio, ctnio->kern_data_ptr,
421		    ctnio->kern_data_len));
422
423	sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
424	m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len);
425	m = m0;
426	for (u_int i = 1; i < ctnio->kern_sg_entries; i++) {
427		m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len);
428		m = m->m_next;
429	}
430	return (m0);
431}
432
433static void
434nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
435    struct nvmf_capsule *nc)
436{
437	struct mbuf *m;
438	u_int status;
439
440	if (ctnio->kern_data_ref != NULL)
441		m = nvmft_ref_data(ctnio);
442	else
443		m = nvmft_copy_data(ctnio);
444	status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m,
445	    ctnio->kern_data_len);
446	switch (status) {
447	case NVMF_SUCCESS_SENT:
448		ctnio->success_sent = true;
449		nvmft_command_completed(qp, nc);
450		/* FALLTHROUGH */
451	case NVMF_MORE:
452	case NVME_SC_SUCCESS:
453		break;
454	default:
455		ctl_nvme_set_generic_error(ctnio, status);
456		break;
457	}
458	ctl_datamove_done((union ctl_io *)ctnio, true);
459}
460
461static void
462nvmft_datamove(union ctl_io *io)
463{
464	struct nvmf_capsule *nc;
465	struct nvmft_qpair *qp;
466
467	/* Some CTL commands preemptively set a success status. */
468	MPASS(io->io_hdr.status == CTL_STATUS_NONE ||
469	    io->io_hdr.status == CTL_SUCCESS);
470	MPASS(!io->nvmeio.success_sent);
471
472	nc = NVMFT_NC(io);
473	qp = NVMFT_QP(io);
474
475	if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
476		nvmft_datamove_in(&io->nvmeio, qp, nc);
477	else
478		nvmft_datamove_out(&io->nvmeio, qp, nc);
479}
480
481static void
482hip_add(uint64_t pair[2], uint64_t addend)
483{
484	uint64_t old, new;
485
486	old = le64toh(pair[0]);
487	new = old + addend;
488	pair[0] = htole64(new);
489	if (new < old)
490		pair[1] += htole64(1);
491}
492
493static void
494nvmft_done(union ctl_io *io)
495{
496	struct nvmft_controller *ctrlr;
497	const struct nvme_command *cmd;
498	struct nvmft_qpair *qp;
499	struct nvmf_capsule *nc;
500	size_t len;
501
502	KASSERT(io->io_hdr.status == CTL_SUCCESS ||
503	    io->io_hdr.status == CTL_NVME_ERROR,
504	    ("%s: bad status %u", __func__, io->io_hdr.status));
505
506	nc = NVMFT_NC(io);
507	qp = NVMFT_QP(io);
508	ctrlr = nvmft_qpair_ctrlr(qp);
509
510	if (nc == NULL) {
511		/* Completion of nvmft_terminate_commands. */
512		goto end;
513	}
514
515	cmd = nvmf_capsule_sqe(nc);
516
517	if (io->io_hdr.status == CTL_SUCCESS)
518		len = nvmf_capsule_data_len(nc) / 512;
519	else
520		len = 0;
521	switch (cmd->opc) {
522	case NVME_OPC_WRITE:
523		mtx_lock(&ctrlr->lock);
524		hip_add(ctrlr->hip.host_write_commands, 1);
525		len += ctrlr->partial_duw;
526		if (len > 1000)
527			hip_add(ctrlr->hip.data_units_written, len / 1000);
528		ctrlr->partial_duw = len % 1000;
529		mtx_unlock(&ctrlr->lock);
530		break;
531	case NVME_OPC_READ:
532	case NVME_OPC_COMPARE:
533	case NVME_OPC_VERIFY:
534		mtx_lock(&ctrlr->lock);
535		if (cmd->opc != NVME_OPC_VERIFY)
536			hip_add(ctrlr->hip.host_read_commands, 1);
537		len += ctrlr->partial_dur;
538		if (len > 1000)
539			hip_add(ctrlr->hip.data_units_read, len / 1000);
540		ctrlr->partial_dur = len % 1000;
541		mtx_unlock(&ctrlr->lock);
542		break;
543	}
544
545	if (io->nvmeio.success_sent) {
546		MPASS(io->io_hdr.status == CTL_SUCCESS);
547	} else {
548		io->nvmeio.cpl.cid = cmd->cid;
549		nvmft_send_response(qp, &io->nvmeio.cpl);
550	}
551	nvmf_free_capsule(nc);
552end:
553	ctl_free_io(io);
554	mtx_lock(&ctrlr->lock);
555	ctrlr->pending_commands--;
556	if (ctrlr->pending_commands == 0)
557		ctrlr->busy_total += sbinuptime() - ctrlr->start_busy;
558	mtx_unlock(&ctrlr->lock);
559}
560
561static int
562nvmft_init(void)
563{
564	TAILQ_INIT(&nvmft_ports);
565	sx_init(&nvmft_ports_lock, "nvmft ports");
566	return (0);
567}
568
569void
570nvmft_port_free(struct nvmft_port *np)
571{
572	KASSERT(TAILQ_EMPTY(&np->controllers),
573	    ("%s(%p): active controllers", __func__, np));
574
575	if (np->port.targ_port != -1) {
576		if (ctl_port_deregister(&np->port) != 0)
577			printf("%s: ctl_port_deregister() failed\n", __func__);
578	}
579
580	free(np->active_ns, M_NVMFT);
581	clean_unrhdr(np->ids);
582	delete_unrhdr(np->ids);
583	sx_destroy(&np->lock);
584	free(np, M_NVMFT);
585}
586
587static struct nvmft_port *
588nvmft_port_find(const char *subnqn)
589{
590	struct nvmft_port *np;
591
592	KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__));
593
594	sx_assert(&nvmft_ports_lock, SA_LOCKED);
595	TAILQ_FOREACH(np, &nvmft_ports, link) {
596		if (strcmp(np->cdata.subnqn, subnqn) == 0)
597			break;
598	}
599	return (np);
600}
601
602static struct nvmft_port *
603nvmft_port_find_by_id(int port_id)
604{
605	struct nvmft_port *np;
606
607	sx_assert(&nvmft_ports_lock, SA_LOCKED);
608	TAILQ_FOREACH(np, &nvmft_ports, link) {
609		if (np->port.targ_port == port_id)
610			break;
611	}
612	return (np);
613}
614
615/*
616 * Helper function to fetch a number stored as a string in an nv_list.
617 * Returns false if the string was not a valid number.
618 */
619static bool
620dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value,
621	u_long *value)
622{
623	const char *str;
624	char *cp;
625
626	str = dnvlist_get_string(nvl, name, NULL);
627	if (str == NULL) {
628		*value = default_value;
629		return (true);
630	}
631	if (*str == '\0')
632		return (false);
633	*value = strtoul(str, &cp, 0);
634	if (*cp != '\0')
635		return (false);
636	return (true);
637}
638
639/*
640 * NVMeoF ports support the following parameters:
641 *
642 * Mandatory:
643 *
644 * subnqn: subsystem NVMe Qualified Name
645 * portid: integer port ID from Discovery Log Page entry
646 *
647 * Optional:
648 * serial: Serial Number string
649 * max_io_qsize: Maximum number of I/O queue entries
650 * enable_timeout: Timeout for controller enable in milliseconds
651 * ioccsz: Maximum command capsule size
652 * iorcsz: Maximum response capsule size
653 * nn: Number of namespaces
654 */
655static void
656nvmft_port_create(struct ctl_req *req)
657{
658	struct nvmft_port *np;
659	struct ctl_port *port;
660	const char *serial, *subnqn;
661	char serial_buf[NVME_SERIAL_NUMBER_LENGTH];
662	u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid;
663	int error;
664
665	/* Required parameters. */
666	subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
667	if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) {
668		req->status = CTL_LUN_ERROR;
669		snprintf(req->error_str, sizeof(req->error_str),
670		    "Missing required argument");
671		return;
672	}
673	if (!nvmf_nqn_valid(subnqn)) {
674		req->status = CTL_LUN_ERROR;
675		snprintf(req->error_str, sizeof(req->error_str),
676		    "Invalid SubNQN");
677		return;
678	}
679	if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) ||
680	    portid > UINT16_MAX) {
681		req->status = CTL_LUN_ERROR;
682		snprintf(req->error_str, sizeof(req->error_str),
683		    "Invalid port ID");
684		return;
685	}
686
687	/* Optional parameters. */
688	if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize",
689	    NVMF_MAX_IO_ENTRIES, &max_io_qsize) ||
690	    max_io_qsize < NVME_MIN_IO_ENTRIES ||
691	    max_io_qsize > NVME_MAX_IO_ENTRIES) {
692		req->status = CTL_LUN_ERROR;
693		snprintf(req->error_str, sizeof(req->error_str),
694		    "Invalid maximum I/O queue size");
695		return;
696	}
697
698	if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout",
699	    NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) ||
700	    (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) {
701		req->status = CTL_LUN_ERROR;
702		snprintf(req->error_str, sizeof(req->error_str),
703		    "Invalid enable timeout");
704		return;
705	}
706
707	if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ,
708	    &ioccsz) || ioccsz < sizeof(struct nvme_command) ||
709	    (ioccsz % 16) != 0) {
710		req->status = CTL_LUN_ERROR;
711		snprintf(req->error_str, sizeof(req->error_str),
712		    "Invalid Command Capsule size");
713		return;
714	}
715
716	if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ,
717	    &iorcsz) || iorcsz < sizeof(struct nvme_completion) ||
718	    (iorcsz % 16) != 0) {
719		req->status = CTL_LUN_ERROR;
720		snprintf(req->error_str, sizeof(req->error_str),
721		    "Invalid Response Capsule size");
722		return;
723	}
724
725	if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) ||
726	    nn < 1 || nn > UINT32_MAX) {
727		req->status = CTL_LUN_ERROR;
728		snprintf(req->error_str, sizeof(req->error_str),
729		    "Invalid number of namespaces");
730		return;
731	}
732
733	serial = dnvlist_get_string(req->args_nvl, "serial", NULL);
734	if (serial == NULL) {
735		getcredhostid(curthread->td_ucred, &hostid);
736		nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid);
737		serial = serial_buf;
738	}
739
740	sx_xlock(&nvmft_ports_lock);
741
742	np = nvmft_port_find(subnqn);
743	if (np != NULL) {
744		req->status = CTL_LUN_ERROR;
745		snprintf(req->error_str, sizeof(req->error_str),
746		    "SubNQN \"%s\" already exists", subnqn);
747		sx_xunlock(&nvmft_ports_lock);
748		return;
749	}
750
751	np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
752	refcount_init(&np->refs, 1);
753	np->max_io_qsize = max_io_qsize;
754	np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
755	sx_init(&np->lock, "nvmft port");
756	np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
757	    NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
758	TAILQ_INIT(&np->controllers);
759
760	/* The controller ID is set later for individual controllers. */
761	_nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype,
762	    osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata);
763	np->cdata.aerl = NVMFT_NUM_AER - 1;
764	np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE);
765	np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) |
766	    NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) |
767	    NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) |
768	    NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1));
769	np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1);
770
771	np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
772	memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr));
773
774	port = &np->port;
775
776	port->frontend = &nvmft_frontend;
777	port->port_type = CTL_PORT_NVMF;
778	port->num_requested_ctl_io = max_io_qsize;
779	port->port_name = "nvmf";
780	port->physical_port = portid;
781	port->virtual_port = 0;
782	port->port_online = nvmft_online;
783	port->port_offline = nvmft_offline;
784	port->onoff_arg = np;
785	port->lun_enable = nvmft_lun_enable;
786	port->lun_disable = nvmft_lun_disable;
787	port->targ_lun_arg = np;
788	port->fe_datamove = nvmft_datamove;
789	port->fe_done = nvmft_done;
790	port->targ_port = -1;
791	port->options = nvlist_clone(req->args_nvl);
792
793	error = ctl_port_register(port);
794	if (error != 0) {
795		sx_xunlock(&nvmft_ports_lock);
796		nvlist_destroy(port->options);
797		nvmft_port_rele(np);
798		req->status = CTL_LUN_ERROR;
799		snprintf(req->error_str, sizeof(req->error_str),
800		    "Failed to register CTL port with error %d", error);
801		return;
802	}
803
804	TAILQ_INSERT_TAIL(&nvmft_ports, np, link);
805	sx_xunlock(&nvmft_ports_lock);
806
807	req->status = CTL_LUN_OK;
808	req->result_nvl = nvlist_create(0);
809	nvlist_add_number(req->result_nvl, "port_id", port->targ_port);
810}
811
812static void
813nvmft_port_remove(struct ctl_req *req)
814{
815	struct nvmft_port *np;
816	const char *subnqn;
817	u_long port_id;
818
819	/*
820	 * ctladm port -r just provides the port_id, so permit looking
821	 * up a port either by "subnqn" or "port_id".
822	 */
823	port_id = ULONG_MAX;
824	subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
825	if (subnqn == NULL) {
826		if (!nvlist_exists_string(req->args_nvl, "port_id")) {
827			req->status = CTL_LUN_ERROR;
828			snprintf(req->error_str, sizeof(req->error_str),
829			    "Missing required argument");
830			return;
831		}
832		if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX,
833		    &port_id)) {
834			req->status = CTL_LUN_ERROR;
835			snprintf(req->error_str, sizeof(req->error_str),
836			    "Invalid CTL port ID");
837			return;
838		}
839	} else {
840		if (nvlist_exists_string(req->args_nvl, "port_id")) {
841			req->status = CTL_LUN_ERROR;
842			snprintf(req->error_str, sizeof(req->error_str),
843			    "Ambiguous port removal request");
844			return;
845		}
846	}
847
848	sx_xlock(&nvmft_ports_lock);
849
850	if (subnqn != NULL) {
851		np = nvmft_port_find(subnqn);
852		if (np == NULL) {
853			req->status = CTL_LUN_ERROR;
854			snprintf(req->error_str, sizeof(req->error_str),
855			    "SubNQN \"%s\" does not exist", subnqn);
856			sx_xunlock(&nvmft_ports_lock);
857			return;
858		}
859	} else {
860		np = nvmft_port_find_by_id(port_id);
861		if (np == NULL) {
862			req->status = CTL_LUN_ERROR;
863			snprintf(req->error_str, sizeof(req->error_str),
864			    "CTL port %lu is not a NVMF port", port_id);
865			sx_xunlock(&nvmft_ports_lock);
866			return;
867		}
868	}
869
870	TAILQ_REMOVE(&nvmft_ports, np, link);
871	sx_xunlock(&nvmft_ports_lock);
872
873	ctl_port_offline(&np->port);
874	nvmft_port_rele(np);
875	req->status = CTL_LUN_OK;
876}
877
878static void
879nvmft_handoff(struct ctl_nvmf *cn)
880{
881	struct nvmf_fabric_connect_cmd cmd;
882	struct nvmf_handoff_controller_qpair *handoff;
883	struct nvmf_fabric_connect_data *data;
884	struct nvmft_port *np;
885	int error;
886
887	np = NULL;
888	data = NULL;
889	handoff = &cn->data.handoff;
890	error = copyin(handoff->cmd, &cmd, sizeof(cmd));
891	if (error != 0) {
892		cn->status = CTL_NVMF_ERROR;
893		snprintf(cn->error_str, sizeof(cn->error_str),
894		    "Failed to copyin CONNECT SQE");
895		return;
896	}
897
898	data = malloc(sizeof(*data), M_NVMFT, M_WAITOK);
899	error = copyin(handoff->data, data, sizeof(*data));
900	if (error != 0) {
901		cn->status = CTL_NVMF_ERROR;
902		snprintf(cn->error_str, sizeof(cn->error_str),
903		    "Failed to copyin CONNECT data");
904		goto out;
905	}
906
907	if (!nvmf_nqn_valid(data->subnqn)) {
908		cn->status = CTL_NVMF_ERROR;
909		snprintf(cn->error_str, sizeof(cn->error_str),
910		    "Invalid SubNQN");
911		goto out;
912	}
913
914	sx_slock(&nvmft_ports_lock);
915	np = nvmft_port_find(data->subnqn);
916	if (np == NULL) {
917		sx_sunlock(&nvmft_ports_lock);
918		cn->status = CTL_NVMF_ERROR;
919		snprintf(cn->error_str, sizeof(cn->error_str),
920		    "Unknown SubNQN");
921		goto out;
922	}
923	if (!np->online) {
924		sx_sunlock(&nvmft_ports_lock);
925		cn->status = CTL_NVMF_ERROR;
926		snprintf(cn->error_str, sizeof(cn->error_str),
927		    "CTL port offline");
928		np = NULL;
929		goto out;
930	}
931	nvmft_port_ref(np);
932	sx_sunlock(&nvmft_ports_lock);
933
934	if (handoff->params.admin) {
935		error = nvmft_handoff_admin_queue(np, handoff, &cmd, data);
936		if (error != 0) {
937			cn->status = CTL_NVMF_ERROR;
938			snprintf(cn->error_str, sizeof(cn->error_str),
939			    "Failed to handoff admin queue: %d", error);
940			goto out;
941		}
942	} else {
943		error = nvmft_handoff_io_queue(np, handoff, &cmd, data);
944		if (error != 0) {
945			cn->status = CTL_NVMF_ERROR;
946			snprintf(cn->error_str, sizeof(cn->error_str),
947			    "Failed to handoff admin queue: %d", error);
948			goto out;
949		}
950	}
951
952	cn->status = CTL_NVMF_OK;
953out:
954	if (np != NULL)
955		nvmft_port_rele(np);
956	free(data, M_NVMFT);
957}
958
959static void
960nvmft_list(struct ctl_nvmf *cn)
961{
962	struct ctl_nvmf_list_params *lp;
963	struct nvmft_controller *ctrlr;
964	struct nvmft_port *np;
965	struct sbuf *sb;
966	int error;
967
968	lp = &cn->data.list;
969
970	sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN |
971	    SBUF_INCLUDENUL);
972	if (sb == NULL) {
973		cn->status = CTL_NVMF_ERROR;
974		snprintf(cn->error_str, sizeof(cn->error_str),
975		    "Failed to allocate NVMeoF session list");
976		return;
977	}
978
979	sbuf_printf(sb, "<ctlnvmflist>\n");
980	sx_slock(&nvmft_ports_lock);
981	TAILQ_FOREACH(np, &nvmft_ports, link) {
982		sx_slock(&np->lock);
983		TAILQ_FOREACH(ctrlr, &np->controllers, link) {
984			sbuf_printf(sb, "<connection id=\"%d\">"
985			    "<hostnqn>%s</hostnqn>"
986			    "<subnqn>%s</subnqn>"
987			    "<trtype>%u</trtype>"
988			    "</connection>\n",
989			    ctrlr->cntlid,
990			    ctrlr->hostnqn,
991			    np->cdata.subnqn,
992			    ctrlr->trtype);
993		}
994		sx_sunlock(&np->lock);
995	}
996	sx_sunlock(&nvmft_ports_lock);
997	sbuf_printf(sb, "</ctlnvmflist>\n");
998	if (sbuf_finish(sb) != 0) {
999		sbuf_delete(sb);
1000		cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE;
1001		snprintf(cn->error_str, sizeof(cn->error_str),
1002		    "Out of space, %d bytes is too small", lp->alloc_len);
1003		return;
1004	}
1005
1006	error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb));
1007	if (error != 0) {
1008		sbuf_delete(sb);
1009		cn->status = CTL_NVMF_ERROR;
1010		snprintf(cn->error_str, sizeof(cn->error_str),
1011		    "Failed to copyout session list: %d", error);
1012		return;
1013	}
1014	lp->fill_len = sbuf_len(sb);
1015	cn->status = CTL_NVMF_OK;
1016	sbuf_delete(sb);
1017}
1018
1019static void
1020nvmft_terminate(struct ctl_nvmf *cn)
1021{
1022	struct ctl_nvmf_terminate_params *tp;
1023	struct nvmft_controller *ctrlr;
1024	struct nvmft_port *np;
1025	bool found, match;
1026
1027	tp = &cn->data.terminate;
1028
1029	found = false;
1030	sx_slock(&nvmft_ports_lock);
1031	TAILQ_FOREACH(np, &nvmft_ports, link) {
1032		sx_slock(&np->lock);
1033		TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1034			if (tp->all != 0)
1035				match = true;
1036			else if (tp->cntlid != -1)
1037				match = tp->cntlid == ctrlr->cntlid;
1038			else if (tp->hostnqn[0] != '\0')
1039				match = strncmp(tp->hostnqn, ctrlr->hostnqn,
1040				    sizeof(tp->hostnqn)) == 0;
1041			else
1042				match = false;
1043			if (!match)
1044				continue;
1045			nvmft_printf(ctrlr,
1046			    "disconnecting due to administrative request\n");
1047			nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
1048			found = true;
1049		}
1050		sx_sunlock(&np->lock);
1051	}
1052	sx_sunlock(&nvmft_ports_lock);
1053
1054	if (!found) {
1055		cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND;
1056		snprintf(cn->error_str, sizeof(cn->error_str),
1057		    "No matching associations found");
1058		return;
1059	}
1060	cn->status = CTL_NVMF_OK;
1061}
1062
1063static int
1064nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag,
1065    struct thread *td)
1066{
1067	struct ctl_nvmf *cn;
1068	struct ctl_req *req;
1069
1070	switch (cmd) {
1071	case CTL_PORT_REQ:
1072		req = (struct ctl_req *)data;
1073		switch (req->reqtype) {
1074		case CTL_REQ_CREATE:
1075			nvmft_port_create(req);
1076			break;
1077		case CTL_REQ_REMOVE:
1078			nvmft_port_remove(req);
1079			break;
1080		default:
1081			req->status = CTL_LUN_ERROR;
1082			snprintf(req->error_str, sizeof(req->error_str),
1083			    "Unsupported request type %d", req->reqtype);
1084			break;
1085		}
1086		return (0);
1087	case CTL_NVMF:
1088		cn = (struct ctl_nvmf *)data;
1089		switch (cn->type) {
1090		case CTL_NVMF_HANDOFF:
1091			nvmft_handoff(cn);
1092			break;
1093		case CTL_NVMF_LIST:
1094			nvmft_list(cn);
1095			break;
1096		case CTL_NVMF_TERMINATE:
1097			nvmft_terminate(cn);
1098			break;
1099		default:
1100			cn->status = CTL_NVMF_ERROR;
1101			snprintf(cn->error_str, sizeof(cn->error_str),
1102			    "Invalid NVMeoF request type %d", cn->type);
1103			break;
1104		}
1105		return (0);
1106	default:
1107		return (ENOTTY);
1108	}
1109}
1110
1111static int
1112nvmft_shutdown(void)
1113{
1114	/* TODO: Need to check for active controllers. */
1115	if (!TAILQ_EMPTY(&nvmft_ports))
1116		return (EBUSY);
1117
1118	sx_destroy(&nvmft_ports_lock);
1119	return (0);
1120}
1121
1122CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend);
1123MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1);
1124