1// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVMe over Fabrics common host code.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/init.h>
8#include <linux/miscdevice.h>
9#include <linux/module.h>
10#include <linux/mutex.h>
11#include <linux/parser.h>
12#include <linux/seq_file.h>
13#include "nvme.h"
14#include "fabrics.h"
15#include <linux/nvme-keyring.h>
16
17static LIST_HEAD(nvmf_transports);
18static DECLARE_RWSEM(nvmf_transports_rwsem);
19
20static LIST_HEAD(nvmf_hosts);
21static DEFINE_MUTEX(nvmf_hosts_mutex);
22
23static struct nvmf_host *nvmf_default_host;
24
25static struct nvmf_host *nvmf_host_alloc(const char *hostnqn, uuid_t *id)
26{
27	struct nvmf_host *host;
28
29	host = kmalloc(sizeof(*host), GFP_KERNEL);
30	if (!host)
31		return NULL;
32
33	kref_init(&host->ref);
34	uuid_copy(&host->id, id);
35	strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
36
37	return host;
38}
39
40static struct nvmf_host *nvmf_host_add(const char *hostnqn, uuid_t *id)
41{
42	struct nvmf_host *host;
43
44	mutex_lock(&nvmf_hosts_mutex);
45
46	/*
47	 * We have defined a host as how it is perceived by the target.
48	 * Therefore, we don't allow different Host NQNs with the same Host ID.
49	 * Similarly, we do not allow the usage of the same Host NQN with
50	 * different Host IDs. This'll maintain unambiguous host identification.
51	 */
52	list_for_each_entry(host, &nvmf_hosts, list) {
53		bool same_hostnqn = !strcmp(host->nqn, hostnqn);
54		bool same_hostid = uuid_equal(&host->id, id);
55
56		if (same_hostnqn && same_hostid) {
57			kref_get(&host->ref);
58			goto out_unlock;
59		}
60		if (same_hostnqn) {
61			pr_err("found same hostnqn %s but different hostid %pUb\n",
62			       hostnqn, id);
63			host = ERR_PTR(-EINVAL);
64			goto out_unlock;
65		}
66		if (same_hostid) {
67			pr_err("found same hostid %pUb but different hostnqn %s\n",
68			       id, hostnqn);
69			host = ERR_PTR(-EINVAL);
70			goto out_unlock;
71		}
72	}
73
74	host = nvmf_host_alloc(hostnqn, id);
75	if (!host) {
76		host = ERR_PTR(-ENOMEM);
77		goto out_unlock;
78	}
79
80	list_add_tail(&host->list, &nvmf_hosts);
81out_unlock:
82	mutex_unlock(&nvmf_hosts_mutex);
83	return host;
84}
85
86static struct nvmf_host *nvmf_host_default(void)
87{
88	struct nvmf_host *host;
89	char nqn[NVMF_NQN_SIZE];
90	uuid_t id;
91
92	uuid_gen(&id);
93	snprintf(nqn, NVMF_NQN_SIZE,
94		"nqn.2014-08.org.nvmexpress:uuid:%pUb", &id);
95
96	host = nvmf_host_alloc(nqn, &id);
97	if (!host)
98		return NULL;
99
100	mutex_lock(&nvmf_hosts_mutex);
101	list_add_tail(&host->list, &nvmf_hosts);
102	mutex_unlock(&nvmf_hosts_mutex);
103
104	return host;
105}
106
107static void nvmf_host_destroy(struct kref *ref)
108{
109	struct nvmf_host *host = container_of(ref, struct nvmf_host, ref);
110
111	mutex_lock(&nvmf_hosts_mutex);
112	list_del(&host->list);
113	mutex_unlock(&nvmf_hosts_mutex);
114
115	kfree(host);
116}
117
118static void nvmf_host_put(struct nvmf_host *host)
119{
120	if (host)
121		kref_put(&host->ref, nvmf_host_destroy);
122}
123
124/**
125 * nvmf_get_address() -  Get address/port
126 * @ctrl:	Host NVMe controller instance which we got the address
127 * @buf:	OUTPUT parameter that will contain the address/port
128 * @size:	buffer size
129 */
130int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
131{
132	int len = 0;
133
134	if (ctrl->opts->mask & NVMF_OPT_TRADDR)
135		len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
136	if (ctrl->opts->mask & NVMF_OPT_TRSVCID)
137		len += scnprintf(buf + len, size - len, "%strsvcid=%s",
138				(len) ? "," : "", ctrl->opts->trsvcid);
139	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
140		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
141				(len) ? "," : "", ctrl->opts->host_traddr);
142	if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)
143		len += scnprintf(buf + len, size - len, "%shost_iface=%s",
144				(len) ? "," : "", ctrl->opts->host_iface);
145	len += scnprintf(buf + len, size - len, "\n");
146
147	return len;
148}
149EXPORT_SYMBOL_GPL(nvmf_get_address);
150
151/**
152 * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
153 * @ctrl:	Host NVMe controller instance maintaining the admin
154 *		queue used to submit the property read command to
155 *		the allocated NVMe controller resource on the target system.
156 * @off:	Starting offset value of the targeted property
157 *		register (see the fabrics section of the NVMe standard).
158 * @val:	OUTPUT parameter that will contain the value of
159 *		the property after a successful read.
160 *
161 * Used by the host system to retrieve a 32-bit capsule property value
162 * from an NVMe controller on the target system.
163 *
164 * ("Capsule property" is an "PCIe register concept" applied to the
165 * NVMe fabrics space.)
166 *
167 * Return:
168 *	0: successful read
169 *	> 0: NVMe error status code
170 *	< 0: Linux errno error code
171 */
172int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
173{
174	struct nvme_command cmd = { };
175	union nvme_result res;
176	int ret;
177
178	cmd.prop_get.opcode = nvme_fabrics_command;
179	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
180	cmd.prop_get.offset = cpu_to_le32(off);
181
182	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
183			NVME_QID_ANY, 0);
184
185	if (ret >= 0)
186		*val = le64_to_cpu(res.u64);
187	if (unlikely(ret != 0))
188		dev_err(ctrl->device,
189			"Property Get error: %d, offset %#x\n",
190			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
191
192	return ret;
193}
194EXPORT_SYMBOL_GPL(nvmf_reg_read32);
195
196/**
197 * nvmf_reg_read64() -  NVMe Fabrics "Property Get" API function.
198 * @ctrl:	Host NVMe controller instance maintaining the admin
199 *		queue used to submit the property read command to
200 *		the allocated controller resource on the target system.
201 * @off:	Starting offset value of the targeted property
202 *		register (see the fabrics section of the NVMe standard).
203 * @val:	OUTPUT parameter that will contain the value of
204 *		the property after a successful read.
205 *
206 * Used by the host system to retrieve a 64-bit capsule property value
207 * from an NVMe controller on the target system.
208 *
209 * ("Capsule property" is an "PCIe register concept" applied to the
210 * NVMe fabrics space.)
211 *
212 * Return:
213 *	0: successful read
214 *	> 0: NVMe error status code
215 *	< 0: Linux errno error code
216 */
217int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
218{
219	struct nvme_command cmd = { };
220	union nvme_result res;
221	int ret;
222
223	cmd.prop_get.opcode = nvme_fabrics_command;
224	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
225	cmd.prop_get.attrib = 1;
226	cmd.prop_get.offset = cpu_to_le32(off);
227
228	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
229			NVME_QID_ANY, 0);
230
231	if (ret >= 0)
232		*val = le64_to_cpu(res.u64);
233	if (unlikely(ret != 0))
234		dev_err(ctrl->device,
235			"Property Get error: %d, offset %#x\n",
236			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
237	return ret;
238}
239EXPORT_SYMBOL_GPL(nvmf_reg_read64);
240
241/**
242 * nvmf_reg_write32() -  NVMe Fabrics "Property Write" API function.
243 * @ctrl:	Host NVMe controller instance maintaining the admin
244 *		queue used to submit the property read command to
245 *		the allocated NVMe controller resource on the target system.
246 * @off:	Starting offset value of the targeted property
247 *		register (see the fabrics section of the NVMe standard).
248 * @val:	Input parameter that contains the value to be
249 *		written to the property.
250 *
251 * Used by the NVMe host system to write a 32-bit capsule property value
252 * to an NVMe controller on the target system.
253 *
254 * ("Capsule property" is an "PCIe register concept" applied to the
255 * NVMe fabrics space.)
256 *
257 * Return:
258 *	0: successful write
259 *	> 0: NVMe error status code
260 *	< 0: Linux errno error code
261 */
262int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
263{
264	struct nvme_command cmd = { };
265	int ret;
266
267	cmd.prop_set.opcode = nvme_fabrics_command;
268	cmd.prop_set.fctype = nvme_fabrics_type_property_set;
269	cmd.prop_set.attrib = 0;
270	cmd.prop_set.offset = cpu_to_le32(off);
271	cmd.prop_set.value = cpu_to_le64(val);
272
273	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
274			NVME_QID_ANY, 0);
275	if (unlikely(ret))
276		dev_err(ctrl->device,
277			"Property Set error: %d, offset %#x\n",
278			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
279	return ret;
280}
281EXPORT_SYMBOL_GPL(nvmf_reg_write32);
282
283/**
284 * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
285 * 				connect() errors.
286 * @ctrl:	The specific /dev/nvmeX device that had the error.
287 * @errval:	Error code to be decoded in a more human-friendly
288 * 		printout.
289 * @offset:	For use with the NVMe error code
290 * 		NVME_SC_CONNECT_INVALID_PARAM.
291 * @cmd:	This is the SQE portion of a submission capsule.
292 * @data:	This is the "Data" portion of a submission capsule.
293 */
294static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
295		int errval, int offset, struct nvme_command *cmd,
296		struct nvmf_connect_data *data)
297{
298	int err_sctype = errval & ~NVME_SC_DNR;
299
300	if (errval < 0) {
301		dev_err(ctrl->device,
302			"Connect command failed, errno: %d\n", errval);
303		return;
304	}
305
306	switch (err_sctype) {
307	case NVME_SC_CONNECT_INVALID_PARAM:
308		if (offset >> 16) {
309			char *inv_data = "Connect Invalid Data Parameter";
310
311			switch (offset & 0xffff) {
312			case (offsetof(struct nvmf_connect_data, cntlid)):
313				dev_err(ctrl->device,
314					"%s, cntlid: %d\n",
315					inv_data, data->cntlid);
316				break;
317			case (offsetof(struct nvmf_connect_data, hostnqn)):
318				dev_err(ctrl->device,
319					"%s, hostnqn \"%s\"\n",
320					inv_data, data->hostnqn);
321				break;
322			case (offsetof(struct nvmf_connect_data, subsysnqn)):
323				dev_err(ctrl->device,
324					"%s, subsysnqn \"%s\"\n",
325					inv_data, data->subsysnqn);
326				break;
327			default:
328				dev_err(ctrl->device,
329					"%s, starting byte offset: %d\n",
330				       inv_data, offset & 0xffff);
331				break;
332			}
333		} else {
334			char *inv_sqe = "Connect Invalid SQE Parameter";
335
336			switch (offset) {
337			case (offsetof(struct nvmf_connect_command, qid)):
338				dev_err(ctrl->device,
339				       "%s, qid %d\n",
340					inv_sqe, cmd->connect.qid);
341				break;
342			default:
343				dev_err(ctrl->device,
344					"%s, starting byte offset: %d\n",
345					inv_sqe, offset);
346			}
347		}
348		break;
349	case NVME_SC_CONNECT_INVALID_HOST:
350		dev_err(ctrl->device,
351			"Connect for subsystem %s is not allowed, hostnqn: %s\n",
352			data->subsysnqn, data->hostnqn);
353		break;
354	case NVME_SC_CONNECT_CTRL_BUSY:
355		dev_err(ctrl->device,
356			"Connect command failed: controller is busy or not available\n");
357		break;
358	case NVME_SC_CONNECT_FORMAT:
359		dev_err(ctrl->device,
360			"Connect incompatible format: %d",
361			cmd->connect.recfmt);
362		break;
363	case NVME_SC_HOST_PATH_ERROR:
364		dev_err(ctrl->device,
365			"Connect command failed: host path error\n");
366		break;
367	case NVME_SC_AUTH_REQUIRED:
368		dev_err(ctrl->device,
369			"Connect command failed: authentication required\n");
370		break;
371	default:
372		dev_err(ctrl->device,
373			"Connect command failed, error wo/DNR bit: %d\n",
374			err_sctype);
375		break;
376	}
377}
378
379static struct nvmf_connect_data *nvmf_connect_data_prep(struct nvme_ctrl *ctrl,
380		u16 cntlid)
381{
382	struct nvmf_connect_data *data;
383
384	data = kzalloc(sizeof(*data), GFP_KERNEL);
385	if (!data)
386		return NULL;
387
388	uuid_copy(&data->hostid, &ctrl->opts->host->id);
389	data->cntlid = cpu_to_le16(cntlid);
390	strscpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
391	strscpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
392
393	return data;
394}
395
396static void nvmf_connect_cmd_prep(struct nvme_ctrl *ctrl, u16 qid,
397		struct nvme_command *cmd)
398{
399	cmd->connect.opcode = nvme_fabrics_command;
400	cmd->connect.fctype = nvme_fabrics_type_connect;
401	cmd->connect.qid = cpu_to_le16(qid);
402
403	if (qid) {
404		cmd->connect.sqsize = cpu_to_le16(ctrl->sqsize);
405	} else {
406		cmd->connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
407
408		/*
409		 * set keep-alive timeout in seconds granularity (ms * 1000)
410		 */
411		cmd->connect.kato = cpu_to_le32(ctrl->kato * 1000);
412	}
413
414	if (ctrl->opts->disable_sqflow)
415		cmd->connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
416}
417
418/**
419 * nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect"
420 *				API function.
421 * @ctrl:	Host nvme controller instance used to request
422 *              a new NVMe controller allocation on the target
423 *              system and  establish an NVMe Admin connection to
424 *              that controller.
425 *
426 * This function enables an NVMe host device to request a new allocation of
427 * an NVMe controller resource on a target system as well establish a
428 * fabrics-protocol connection of the NVMe Admin queue between the
429 * host system device and the allocated NVMe controller on the
430 * target system via a NVMe Fabrics "Connect" command.
431 *
432 * Return:
433 *	0: success
434 *	> 0: NVMe error status code
435 *	< 0: Linux errno error code
436 *
437 */
438int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
439{
440	struct nvme_command cmd = { };
441	union nvme_result res;
442	struct nvmf_connect_data *data;
443	int ret;
444	u32 result;
445
446	nvmf_connect_cmd_prep(ctrl, 0, &cmd);
447
448	data = nvmf_connect_data_prep(ctrl, 0xffff);
449	if (!data)
450		return -ENOMEM;
451
452	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
453			data, sizeof(*data), NVME_QID_ANY,
454			NVME_SUBMIT_AT_HEAD |
455			NVME_SUBMIT_NOWAIT |
456			NVME_SUBMIT_RESERVED);
457	if (ret) {
458		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
459				       &cmd, data);
460		goto out_free_data;
461	}
462
463	result = le32_to_cpu(res.u32);
464	ctrl->cntlid = result & 0xFFFF;
465	if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
466		/* Secure concatenation is not implemented */
467		if (result & NVME_CONNECT_AUTHREQ_ASCR) {
468			dev_warn(ctrl->device,
469				 "qid 0: secure concatenation is not supported\n");
470			ret = NVME_SC_AUTH_REQUIRED;
471			goto out_free_data;
472		}
473		/* Authentication required */
474		ret = nvme_auth_negotiate(ctrl, 0);
475		if (ret) {
476			dev_warn(ctrl->device,
477				 "qid 0: authentication setup failed\n");
478			ret = NVME_SC_AUTH_REQUIRED;
479			goto out_free_data;
480		}
481		ret = nvme_auth_wait(ctrl, 0);
482		if (ret)
483			dev_warn(ctrl->device,
484				 "qid 0: authentication failed\n");
485		else
486			dev_info(ctrl->device,
487				 "qid 0: authenticated\n");
488	}
489out_free_data:
490	kfree(data);
491	return ret;
492}
493EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue);
494
495/**
496 * nvmf_connect_io_queue() - NVMe Fabrics I/O Queue "Connect"
497 *			     API function.
498 * @ctrl:	Host nvme controller instance used to establish an
499 *		NVMe I/O queue connection to the already allocated NVMe
500 *		controller on the target system.
501 * @qid:	NVMe I/O queue number for the new I/O connection between
502 *		host and target (note qid == 0 is illegal as this is
503 *		the Admin queue, per NVMe standard).
504 *
505 * This function issues a fabrics-protocol connection
506 * of a NVMe I/O queue (via NVMe Fabrics "Connect" command)
507 * between the host system device and the allocated NVMe controller
508 * on the target system.
509 *
510 * Return:
511 *	0: success
512 *	> 0: NVMe error status code
513 *	< 0: Linux errno error code
514 */
515int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
516{
517	struct nvme_command cmd = { };
518	struct nvmf_connect_data *data;
519	union nvme_result res;
520	int ret;
521	u32 result;
522
523	nvmf_connect_cmd_prep(ctrl, qid, &cmd);
524
525	data = nvmf_connect_data_prep(ctrl, ctrl->cntlid);
526	if (!data)
527		return -ENOMEM;
528
529	ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
530			data, sizeof(*data), qid,
531			NVME_SUBMIT_AT_HEAD |
532			NVME_SUBMIT_RESERVED |
533			NVME_SUBMIT_NOWAIT);
534	if (ret) {
535		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
536				       &cmd, data);
537		goto out_free_data;
538	}
539	result = le32_to_cpu(res.u32);
540	if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
541		/* Secure concatenation is not implemented */
542		if (result & NVME_CONNECT_AUTHREQ_ASCR) {
543			dev_warn(ctrl->device,
544				 "qid 0: secure concatenation is not supported\n");
545			ret = NVME_SC_AUTH_REQUIRED;
546			goto out_free_data;
547		}
548		/* Authentication required */
549		ret = nvme_auth_negotiate(ctrl, qid);
550		if (ret) {
551			dev_warn(ctrl->device,
552				 "qid %d: authentication setup failed\n", qid);
553			ret = NVME_SC_AUTH_REQUIRED;
554		} else {
555			ret = nvme_auth_wait(ctrl, qid);
556			if (ret)
557				dev_warn(ctrl->device,
558					 "qid %u: authentication failed\n", qid);
559		}
560	}
561out_free_data:
562	kfree(data);
563	return ret;
564}
565EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
566
567bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
568{
569	if (ctrl->opts->max_reconnects == -1 ||
570	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
571		return true;
572
573	return false;
574}
575EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
576
577/**
578 * nvmf_register_transport() - NVMe Fabrics Library registration function.
579 * @ops:	Transport ops instance to be registered to the
580 *		common fabrics library.
581 *
582 * API function that registers the type of specific transport fabric
583 * being implemented to the common NVMe fabrics library. Part of
584 * the overall init sequence of starting up a fabrics driver.
585 */
586int nvmf_register_transport(struct nvmf_transport_ops *ops)
587{
588	if (!ops->create_ctrl)
589		return -EINVAL;
590
591	down_write(&nvmf_transports_rwsem);
592	list_add_tail(&ops->entry, &nvmf_transports);
593	up_write(&nvmf_transports_rwsem);
594
595	return 0;
596}
597EXPORT_SYMBOL_GPL(nvmf_register_transport);
598
599/**
600 * nvmf_unregister_transport() - NVMe Fabrics Library unregistration function.
601 * @ops:	Transport ops instance to be unregistered from the
602 *		common fabrics library.
603 *
604 * Fabrics API function that unregisters the type of specific transport
605 * fabric being implemented from the common NVMe fabrics library.
606 * Part of the overall exit sequence of unloading the implemented driver.
607 */
608void nvmf_unregister_transport(struct nvmf_transport_ops *ops)
609{
610	down_write(&nvmf_transports_rwsem);
611	list_del(&ops->entry);
612	up_write(&nvmf_transports_rwsem);
613}
614EXPORT_SYMBOL_GPL(nvmf_unregister_transport);
615
616static struct nvmf_transport_ops *nvmf_lookup_transport(
617		struct nvmf_ctrl_options *opts)
618{
619	struct nvmf_transport_ops *ops;
620
621	lockdep_assert_held(&nvmf_transports_rwsem);
622
623	list_for_each_entry(ops, &nvmf_transports, entry) {
624		if (strcmp(ops->name, opts->transport) == 0)
625			return ops;
626	}
627
628	return NULL;
629}
630
631static struct key *nvmf_parse_key(int key_id)
632{
633	struct key *key;
634
635	if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) {
636		pr_err("TLS is not supported\n");
637		return ERR_PTR(-EINVAL);
638	}
639
640	key = key_lookup(key_id);
641	if (IS_ERR(key))
642		pr_err("key id %08x not found\n", key_id);
643	else
644		pr_debug("Using key id %08x\n", key_id);
645	return key;
646}
647
648static const match_table_t opt_tokens = {
649	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
650	{ NVMF_OPT_TRADDR,		"traddr=%s"		},
651	{ NVMF_OPT_TRSVCID,		"trsvcid=%s"		},
652	{ NVMF_OPT_NQN,			"nqn=%s"		},
653	{ NVMF_OPT_QUEUE_SIZE,		"queue_size=%d"		},
654	{ NVMF_OPT_NR_IO_QUEUES,	"nr_io_queues=%d"	},
655	{ NVMF_OPT_RECONNECT_DELAY,	"reconnect_delay=%d"	},
656	{ NVMF_OPT_CTRL_LOSS_TMO,	"ctrl_loss_tmo=%d"	},
657	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
658	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
659	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
660	{ NVMF_OPT_HOST_IFACE,		"host_iface=%s"		},
661	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
662	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
663	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
664	{ NVMF_OPT_HDR_DIGEST,		"hdr_digest"		},
665	{ NVMF_OPT_DATA_DIGEST,		"data_digest"		},
666	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
667	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
668	{ NVMF_OPT_TOS,			"tos=%d"		},
669#ifdef CONFIG_NVME_TCP_TLS
670	{ NVMF_OPT_KEYRING,		"keyring=%d"		},
671	{ NVMF_OPT_TLS_KEY,		"tls_key=%d"		},
672#endif
673	{ NVMF_OPT_FAIL_FAST_TMO,	"fast_io_fail_tmo=%d"	},
674	{ NVMF_OPT_DISCOVERY,		"discovery"		},
675#ifdef CONFIG_NVME_HOST_AUTH
676	{ NVMF_OPT_DHCHAP_SECRET,	"dhchap_secret=%s"	},
677	{ NVMF_OPT_DHCHAP_CTRL_SECRET,	"dhchap_ctrl_secret=%s"	},
678#endif
679#ifdef CONFIG_NVME_TCP_TLS
680	{ NVMF_OPT_TLS,			"tls"			},
681#endif
682	{ NVMF_OPT_ERR,			NULL			}
683};
684
685static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
686		const char *buf)
687{
688	substring_t args[MAX_OPT_ARGS];
689	char *options, *o, *p;
690	int token, ret = 0;
691	size_t nqnlen  = 0;
692	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO, key_id;
693	uuid_t hostid;
694	char hostnqn[NVMF_NQN_SIZE];
695	struct key *key;
696
697	/* Set defaults */
698	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
699	opts->nr_io_queues = num_online_cpus();
700	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
701	opts->kato = 0;
702	opts->duplicate_connect = false;
703	opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
704	opts->hdr_digest = false;
705	opts->data_digest = false;
706	opts->tos = -1; /* < 0 == use transport default */
707	opts->tls = false;
708	opts->tls_key = NULL;
709	opts->keyring = NULL;
710
711	options = o = kstrdup(buf, GFP_KERNEL);
712	if (!options)
713		return -ENOMEM;
714
715	/* use default host if not given by user space */
716	uuid_copy(&hostid, &nvmf_default_host->id);
717	strscpy(hostnqn, nvmf_default_host->nqn, NVMF_NQN_SIZE);
718
719	while ((p = strsep(&o, ",\n")) != NULL) {
720		if (!*p)
721			continue;
722
723		token = match_token(p, opt_tokens, args);
724		opts->mask |= token;
725		switch (token) {
726		case NVMF_OPT_TRANSPORT:
727			p = match_strdup(args);
728			if (!p) {
729				ret = -ENOMEM;
730				goto out;
731			}
732			kfree(opts->transport);
733			opts->transport = p;
734			break;
735		case NVMF_OPT_NQN:
736			p = match_strdup(args);
737			if (!p) {
738				ret = -ENOMEM;
739				goto out;
740			}
741			kfree(opts->subsysnqn);
742			opts->subsysnqn = p;
743			nqnlen = strlen(opts->subsysnqn);
744			if (nqnlen >= NVMF_NQN_SIZE) {
745				pr_err("%s needs to be < %d bytes\n",
746					opts->subsysnqn, NVMF_NQN_SIZE);
747				ret = -EINVAL;
748				goto out;
749			}
750			opts->discovery_nqn =
751				!(strcmp(opts->subsysnqn,
752					 NVME_DISC_SUBSYS_NAME));
753			break;
754		case NVMF_OPT_TRADDR:
755			p = match_strdup(args);
756			if (!p) {
757				ret = -ENOMEM;
758				goto out;
759			}
760			kfree(opts->traddr);
761			opts->traddr = p;
762			break;
763		case NVMF_OPT_TRSVCID:
764			p = match_strdup(args);
765			if (!p) {
766				ret = -ENOMEM;
767				goto out;
768			}
769			kfree(opts->trsvcid);
770			opts->trsvcid = p;
771			break;
772		case NVMF_OPT_QUEUE_SIZE:
773			if (match_int(args, &token)) {
774				ret = -EINVAL;
775				goto out;
776			}
777			if (token < NVMF_MIN_QUEUE_SIZE ||
778			    token > NVMF_MAX_QUEUE_SIZE) {
779				pr_err("Invalid queue_size %d\n", token);
780				ret = -EINVAL;
781				goto out;
782			}
783			opts->queue_size = token;
784			break;
785		case NVMF_OPT_NR_IO_QUEUES:
786			if (match_int(args, &token)) {
787				ret = -EINVAL;
788				goto out;
789			}
790			if (token <= 0) {
791				pr_err("Invalid number of IOQs %d\n", token);
792				ret = -EINVAL;
793				goto out;
794			}
795			if (opts->discovery_nqn) {
796				pr_debug("Ignoring nr_io_queues value for discovery controller\n");
797				break;
798			}
799
800			opts->nr_io_queues = min_t(unsigned int,
801					num_online_cpus(), token);
802			break;
803		case NVMF_OPT_KATO:
804			if (match_int(args, &token)) {
805				ret = -EINVAL;
806				goto out;
807			}
808
809			if (token < 0) {
810				pr_err("Invalid keep_alive_tmo %d\n", token);
811				ret = -EINVAL;
812				goto out;
813			} else if (token == 0 && !opts->discovery_nqn) {
814				/* Allowed for debug */
815				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
816			}
817			opts->kato = token;
818			break;
819		case NVMF_OPT_CTRL_LOSS_TMO:
820			if (match_int(args, &token)) {
821				ret = -EINVAL;
822				goto out;
823			}
824
825			if (token < 0)
826				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
827			ctrl_loss_tmo = token;
828			break;
829		case NVMF_OPT_FAIL_FAST_TMO:
830			if (match_int(args, &token)) {
831				ret = -EINVAL;
832				goto out;
833			}
834
835			if (token >= 0)
836				pr_warn("I/O fail on reconnect controller after %d sec\n",
837					token);
838			else
839				token = -1;
840
841			opts->fast_io_fail_tmo = token;
842			break;
843		case NVMF_OPT_HOSTNQN:
844			if (opts->host) {
845				pr_err("hostnqn already user-assigned: %s\n",
846				       opts->host->nqn);
847				ret = -EADDRINUSE;
848				goto out;
849			}
850			p = match_strdup(args);
851			if (!p) {
852				ret = -ENOMEM;
853				goto out;
854			}
855			nqnlen = strlen(p);
856			if (nqnlen >= NVMF_NQN_SIZE) {
857				pr_err("%s needs to be < %d bytes\n",
858					p, NVMF_NQN_SIZE);
859				kfree(p);
860				ret = -EINVAL;
861				goto out;
862			}
863			strscpy(hostnqn, p, NVMF_NQN_SIZE);
864			kfree(p);
865			break;
866		case NVMF_OPT_RECONNECT_DELAY:
867			if (match_int(args, &token)) {
868				ret = -EINVAL;
869				goto out;
870			}
871			if (token <= 0) {
872				pr_err("Invalid reconnect_delay %d\n", token);
873				ret = -EINVAL;
874				goto out;
875			}
876			opts->reconnect_delay = token;
877			break;
878		case NVMF_OPT_HOST_TRADDR:
879			p = match_strdup(args);
880			if (!p) {
881				ret = -ENOMEM;
882				goto out;
883			}
884			kfree(opts->host_traddr);
885			opts->host_traddr = p;
886			break;
887		case NVMF_OPT_HOST_IFACE:
888			p = match_strdup(args);
889			if (!p) {
890				ret = -ENOMEM;
891				goto out;
892			}
893			kfree(opts->host_iface);
894			opts->host_iface = p;
895			break;
896		case NVMF_OPT_HOST_ID:
897			p = match_strdup(args);
898			if (!p) {
899				ret = -ENOMEM;
900				goto out;
901			}
902			ret = uuid_parse(p, &hostid);
903			if (ret) {
904				pr_err("Invalid hostid %s\n", p);
905				ret = -EINVAL;
906				kfree(p);
907				goto out;
908			}
909			kfree(p);
910			break;
911		case NVMF_OPT_DUP_CONNECT:
912			opts->duplicate_connect = true;
913			break;
914		case NVMF_OPT_DISABLE_SQFLOW:
915			opts->disable_sqflow = true;
916			break;
917		case NVMF_OPT_HDR_DIGEST:
918			opts->hdr_digest = true;
919			break;
920		case NVMF_OPT_DATA_DIGEST:
921			opts->data_digest = true;
922			break;
923		case NVMF_OPT_NR_WRITE_QUEUES:
924			if (match_int(args, &token)) {
925				ret = -EINVAL;
926				goto out;
927			}
928			if (token <= 0) {
929				pr_err("Invalid nr_write_queues %d\n", token);
930				ret = -EINVAL;
931				goto out;
932			}
933			opts->nr_write_queues = token;
934			break;
935		case NVMF_OPT_NR_POLL_QUEUES:
936			if (match_int(args, &token)) {
937				ret = -EINVAL;
938				goto out;
939			}
940			if (token <= 0) {
941				pr_err("Invalid nr_poll_queues %d\n", token);
942				ret = -EINVAL;
943				goto out;
944			}
945			opts->nr_poll_queues = token;
946			break;
947		case NVMF_OPT_TOS:
948			if (match_int(args, &token)) {
949				ret = -EINVAL;
950				goto out;
951			}
952			if (token < 0) {
953				pr_err("Invalid type of service %d\n", token);
954				ret = -EINVAL;
955				goto out;
956			}
957			if (token > 255) {
958				pr_warn("Clamping type of service to 255\n");
959				token = 255;
960			}
961			opts->tos = token;
962			break;
963		case NVMF_OPT_KEYRING:
964			if (match_int(args, &key_id) || key_id <= 0) {
965				ret = -EINVAL;
966				goto out;
967			}
968			key = nvmf_parse_key(key_id);
969			if (IS_ERR(key)) {
970				ret = PTR_ERR(key);
971				goto out;
972			}
973			key_put(opts->keyring);
974			opts->keyring = key;
975			break;
976		case NVMF_OPT_TLS_KEY:
977			if (match_int(args, &key_id) || key_id <= 0) {
978				ret = -EINVAL;
979				goto out;
980			}
981			key = nvmf_parse_key(key_id);
982			if (IS_ERR(key)) {
983				ret = PTR_ERR(key);
984				goto out;
985			}
986			key_put(opts->tls_key);
987			opts->tls_key = key;
988			break;
989		case NVMF_OPT_DISCOVERY:
990			opts->discovery_nqn = true;
991			break;
992		case NVMF_OPT_DHCHAP_SECRET:
993			p = match_strdup(args);
994			if (!p) {
995				ret = -ENOMEM;
996				goto out;
997			}
998			if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) {
999				pr_err("Invalid DH-CHAP secret %s\n", p);
1000				ret = -EINVAL;
1001				goto out;
1002			}
1003			kfree(opts->dhchap_secret);
1004			opts->dhchap_secret = p;
1005			break;
1006		case NVMF_OPT_DHCHAP_CTRL_SECRET:
1007			p = match_strdup(args);
1008			if (!p) {
1009				ret = -ENOMEM;
1010				goto out;
1011			}
1012			if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) {
1013				pr_err("Invalid DH-CHAP secret %s\n", p);
1014				ret = -EINVAL;
1015				goto out;
1016			}
1017			kfree(opts->dhchap_ctrl_secret);
1018			opts->dhchap_ctrl_secret = p;
1019			break;
1020		case NVMF_OPT_TLS:
1021			if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) {
1022				pr_err("TLS is not supported\n");
1023				ret = -EINVAL;
1024				goto out;
1025			}
1026			opts->tls = true;
1027			break;
1028		default:
1029			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
1030				p);
1031			ret = -EINVAL;
1032			goto out;
1033		}
1034	}
1035
1036	if (opts->discovery_nqn) {
1037		opts->nr_io_queues = 0;
1038		opts->nr_write_queues = 0;
1039		opts->nr_poll_queues = 0;
1040		opts->duplicate_connect = true;
1041	} else {
1042		if (!opts->kato)
1043			opts->kato = NVME_DEFAULT_KATO;
1044	}
1045	if (ctrl_loss_tmo < 0) {
1046		opts->max_reconnects = -1;
1047	} else {
1048		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
1049						opts->reconnect_delay);
1050		if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
1051			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
1052				opts->fast_io_fail_tmo, ctrl_loss_tmo);
1053	}
1054
1055	opts->host = nvmf_host_add(hostnqn, &hostid);
1056	if (IS_ERR(opts->host)) {
1057		ret = PTR_ERR(opts->host);
1058		opts->host = NULL;
1059		goto out;
1060	}
1061
1062out:
1063	kfree(options);
1064	return ret;
1065}
1066
1067void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
1068			u32 io_queues[HCTX_MAX_TYPES])
1069{
1070	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
1071		/*
1072		 * separate read/write queues
1073		 * hand out dedicated default queues only after we have
1074		 * sufficient read queues.
1075		 */
1076		io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
1077		nr_io_queues -= io_queues[HCTX_TYPE_READ];
1078		io_queues[HCTX_TYPE_DEFAULT] =
1079			min(opts->nr_write_queues, nr_io_queues);
1080		nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
1081	} else {
1082		/*
1083		 * shared read/write queues
1084		 * either no write queues were requested, or we don't have
1085		 * sufficient queue count to have dedicated default queues.
1086		 */
1087		io_queues[HCTX_TYPE_DEFAULT] =
1088			min(opts->nr_io_queues, nr_io_queues);
1089		nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
1090	}
1091
1092	if (opts->nr_poll_queues && nr_io_queues) {
1093		/* map dedicated poll queues only if we have queues left */
1094		io_queues[HCTX_TYPE_POLL] =
1095			min(opts->nr_poll_queues, nr_io_queues);
1096	}
1097}
1098EXPORT_SYMBOL_GPL(nvmf_set_io_queues);
1099
1100void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
1101		     u32 io_queues[HCTX_MAX_TYPES])
1102{
1103	struct nvmf_ctrl_options *opts = ctrl->opts;
1104
1105	if (opts->nr_write_queues && io_queues[HCTX_TYPE_READ]) {
1106		/* separate read/write queues */
1107		set->map[HCTX_TYPE_DEFAULT].nr_queues =
1108			io_queues[HCTX_TYPE_DEFAULT];
1109		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
1110		set->map[HCTX_TYPE_READ].nr_queues =
1111			io_queues[HCTX_TYPE_READ];
1112		set->map[HCTX_TYPE_READ].queue_offset =
1113			io_queues[HCTX_TYPE_DEFAULT];
1114	} else {
1115		/* shared read/write queues */
1116		set->map[HCTX_TYPE_DEFAULT].nr_queues =
1117			io_queues[HCTX_TYPE_DEFAULT];
1118		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
1119		set->map[HCTX_TYPE_READ].nr_queues =
1120			io_queues[HCTX_TYPE_DEFAULT];
1121		set->map[HCTX_TYPE_READ].queue_offset = 0;
1122	}
1123
1124	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
1125	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
1126	if (opts->nr_poll_queues && io_queues[HCTX_TYPE_POLL]) {
1127		/* map dedicated poll queues only if we have queues left */
1128		set->map[HCTX_TYPE_POLL].nr_queues = io_queues[HCTX_TYPE_POLL];
1129		set->map[HCTX_TYPE_POLL].queue_offset =
1130			io_queues[HCTX_TYPE_DEFAULT] +
1131			io_queues[HCTX_TYPE_READ];
1132		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
1133	}
1134
1135	dev_info(ctrl->device,
1136		"mapped %d/%d/%d default/read/poll queues.\n",
1137		io_queues[HCTX_TYPE_DEFAULT],
1138		io_queues[HCTX_TYPE_READ],
1139		io_queues[HCTX_TYPE_POLL]);
1140}
1141EXPORT_SYMBOL_GPL(nvmf_map_queues);
1142
1143static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
1144		unsigned int required_opts)
1145{
1146	if ((opts->mask & required_opts) != required_opts) {
1147		unsigned int i;
1148
1149		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
1150			if ((opt_tokens[i].token & required_opts) &&
1151			    !(opt_tokens[i].token & opts->mask)) {
1152				pr_warn("missing parameter '%s'\n",
1153					opt_tokens[i].pattern);
1154			}
1155		}
1156
1157		return -EINVAL;
1158	}
1159
1160	return 0;
1161}
1162
1163bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
1164		struct nvmf_ctrl_options *opts)
1165{
1166	if (!nvmf_ctlr_matches_baseopts(ctrl, opts) ||
1167	    strcmp(opts->traddr, ctrl->opts->traddr) ||
1168	    strcmp(opts->trsvcid, ctrl->opts->trsvcid))
1169		return false;
1170
1171	/*
1172	 * Checking the local address or host interfaces is rough.
1173	 *
1174	 * In most cases, none is specified and the host port or
1175	 * host interface is selected by the stack.
1176	 *
1177	 * Assume no match if:
1178	 * -  local address or host interface is specified and address
1179	 *    or host interface is not the same
1180	 * -  local address or host interface is not specified but
1181	 *    remote is, or vice versa (admin using specific
1182	 *    host_traddr/host_iface when it matters).
1183	 */
1184	if ((opts->mask & NVMF_OPT_HOST_TRADDR) &&
1185	    (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
1186		if (strcmp(opts->host_traddr, ctrl->opts->host_traddr))
1187			return false;
1188	} else if ((opts->mask & NVMF_OPT_HOST_TRADDR) ||
1189		   (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
1190		return false;
1191	}
1192
1193	if ((opts->mask & NVMF_OPT_HOST_IFACE) &&
1194	    (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) {
1195		if (strcmp(opts->host_iface, ctrl->opts->host_iface))
1196			return false;
1197	} else if ((opts->mask & NVMF_OPT_HOST_IFACE) ||
1198		   (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) {
1199		return false;
1200	}
1201
1202	return true;
1203}
1204EXPORT_SYMBOL_GPL(nvmf_ip_options_match);
1205
1206static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
1207		unsigned int allowed_opts)
1208{
1209	if (opts->mask & ~allowed_opts) {
1210		unsigned int i;
1211
1212		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
1213			if ((opt_tokens[i].token & opts->mask) &&
1214			    (opt_tokens[i].token & ~allowed_opts)) {
1215				pr_warn("invalid parameter '%s'\n",
1216					opt_tokens[i].pattern);
1217			}
1218		}
1219
1220		return -EINVAL;
1221	}
1222
1223	return 0;
1224}
1225
1226void nvmf_free_options(struct nvmf_ctrl_options *opts)
1227{
1228	nvmf_host_put(opts->host);
1229	key_put(opts->keyring);
1230	key_put(opts->tls_key);
1231	kfree(opts->transport);
1232	kfree(opts->traddr);
1233	kfree(opts->trsvcid);
1234	kfree(opts->subsysnqn);
1235	kfree(opts->host_traddr);
1236	kfree(opts->host_iface);
1237	kfree(opts->dhchap_secret);
1238	kfree(opts->dhchap_ctrl_secret);
1239	kfree(opts);
1240}
1241EXPORT_SYMBOL_GPL(nvmf_free_options);
1242
1243#define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
1244#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
1245				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
1246				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
1247				 NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
1248				 NVMF_OPT_FAIL_FAST_TMO | NVMF_OPT_DHCHAP_SECRET |\
1249				 NVMF_OPT_DHCHAP_CTRL_SECRET)
1250
1251static struct nvme_ctrl *
1252nvmf_create_ctrl(struct device *dev, const char *buf)
1253{
1254	struct nvmf_ctrl_options *opts;
1255	struct nvmf_transport_ops *ops;
1256	struct nvme_ctrl *ctrl;
1257	int ret;
1258
1259	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
1260	if (!opts)
1261		return ERR_PTR(-ENOMEM);
1262
1263	ret = nvmf_parse_options(opts, buf);
1264	if (ret)
1265		goto out_free_opts;
1266
1267
1268	request_module("nvme-%s", opts->transport);
1269
1270	/*
1271	 * Check the generic options first as we need a valid transport for
1272	 * the lookup below.  Then clear the generic flags so that transport
1273	 * drivers don't have to care about them.
1274	 */
1275	ret = nvmf_check_required_opts(opts, NVMF_REQUIRED_OPTS);
1276	if (ret)
1277		goto out_free_opts;
1278	opts->mask &= ~NVMF_REQUIRED_OPTS;
1279
1280	down_read(&nvmf_transports_rwsem);
1281	ops = nvmf_lookup_transport(opts);
1282	if (!ops) {
1283		pr_info("no handler found for transport %s.\n",
1284			opts->transport);
1285		ret = -EINVAL;
1286		goto out_unlock;
1287	}
1288
1289	if (!try_module_get(ops->module)) {
1290		ret = -EBUSY;
1291		goto out_unlock;
1292	}
1293	up_read(&nvmf_transports_rwsem);
1294
1295	ret = nvmf_check_required_opts(opts, ops->required_opts);
1296	if (ret)
1297		goto out_module_put;
1298	ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS |
1299				ops->allowed_opts | ops->required_opts);
1300	if (ret)
1301		goto out_module_put;
1302
1303	ctrl = ops->create_ctrl(dev, opts);
1304	if (IS_ERR(ctrl)) {
1305		ret = PTR_ERR(ctrl);
1306		goto out_module_put;
1307	}
1308
1309	module_put(ops->module);
1310	return ctrl;
1311
1312out_module_put:
1313	module_put(ops->module);
1314	goto out_free_opts;
1315out_unlock:
1316	up_read(&nvmf_transports_rwsem);
1317out_free_opts:
1318	nvmf_free_options(opts);
1319	return ERR_PTR(ret);
1320}
1321
1322static const struct class nvmf_class = {
1323	.name = "nvme-fabrics",
1324};
1325
1326static struct device *nvmf_device;
1327static DEFINE_MUTEX(nvmf_dev_mutex);
1328
1329static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
1330		size_t count, loff_t *pos)
1331{
1332	struct seq_file *seq_file = file->private_data;
1333	struct nvme_ctrl *ctrl;
1334	const char *buf;
1335	int ret = 0;
1336
1337	if (count > PAGE_SIZE)
1338		return -ENOMEM;
1339
1340	buf = memdup_user_nul(ubuf, count);
1341	if (IS_ERR(buf))
1342		return PTR_ERR(buf);
1343
1344	mutex_lock(&nvmf_dev_mutex);
1345	if (seq_file->private) {
1346		ret = -EINVAL;
1347		goto out_unlock;
1348	}
1349
1350	ctrl = nvmf_create_ctrl(nvmf_device, buf);
1351	if (IS_ERR(ctrl)) {
1352		ret = PTR_ERR(ctrl);
1353		goto out_unlock;
1354	}
1355
1356	seq_file->private = ctrl;
1357
1358out_unlock:
1359	mutex_unlock(&nvmf_dev_mutex);
1360	kfree(buf);
1361	return ret ? ret : count;
1362}
1363
1364static void __nvmf_concat_opt_tokens(struct seq_file *seq_file)
1365{
1366	const struct match_token *tok;
1367	int idx;
1368
1369	/*
1370	 * Add dummy entries for instance and cntlid to
1371	 * signal an invalid/non-existing controller
1372	 */
1373	seq_puts(seq_file, "instance=-1,cntlid=-1");
1374	for (idx = 0; idx < ARRAY_SIZE(opt_tokens); idx++) {
1375		tok = &opt_tokens[idx];
1376		if (tok->token == NVMF_OPT_ERR)
1377			continue;
1378		seq_puts(seq_file, ",");
1379		seq_puts(seq_file, tok->pattern);
1380	}
1381	seq_puts(seq_file, "\n");
1382}
1383
1384static int nvmf_dev_show(struct seq_file *seq_file, void *private)
1385{
1386	struct nvme_ctrl *ctrl;
1387
1388	mutex_lock(&nvmf_dev_mutex);
1389	ctrl = seq_file->private;
1390	if (!ctrl) {
1391		__nvmf_concat_opt_tokens(seq_file);
1392		goto out_unlock;
1393	}
1394
1395	seq_printf(seq_file, "instance=%d,cntlid=%d\n",
1396			ctrl->instance, ctrl->cntlid);
1397
1398out_unlock:
1399	mutex_unlock(&nvmf_dev_mutex);
1400	return 0;
1401}
1402
1403static int nvmf_dev_open(struct inode *inode, struct file *file)
1404{
1405	/*
1406	 * The miscdevice code initializes file->private_data, but doesn't
1407	 * make use of it later.
1408	 */
1409	file->private_data = NULL;
1410	return single_open(file, nvmf_dev_show, NULL);
1411}
1412
1413static int nvmf_dev_release(struct inode *inode, struct file *file)
1414{
1415	struct seq_file *seq_file = file->private_data;
1416	struct nvme_ctrl *ctrl = seq_file->private;
1417
1418	if (ctrl)
1419		nvme_put_ctrl(ctrl);
1420	return single_release(inode, file);
1421}
1422
1423static const struct file_operations nvmf_dev_fops = {
1424	.owner		= THIS_MODULE,
1425	.write		= nvmf_dev_write,
1426	.read		= seq_read,
1427	.open		= nvmf_dev_open,
1428	.release	= nvmf_dev_release,
1429};
1430
1431static struct miscdevice nvmf_misc = {
1432	.minor		= MISC_DYNAMIC_MINOR,
1433	.name           = "nvme-fabrics",
1434	.fops		= &nvmf_dev_fops,
1435};
1436
1437static int __init nvmf_init(void)
1438{
1439	int ret;
1440
1441	nvmf_default_host = nvmf_host_default();
1442	if (!nvmf_default_host)
1443		return -ENOMEM;
1444
1445	ret = class_register(&nvmf_class);
1446	if (ret) {
1447		pr_err("couldn't register class nvme-fabrics\n");
1448		goto out_free_host;
1449	}
1450
1451	nvmf_device =
1452		device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
1453	if (IS_ERR(nvmf_device)) {
1454		pr_err("couldn't create nvme-fabrics device!\n");
1455		ret = PTR_ERR(nvmf_device);
1456		goto out_destroy_class;
1457	}
1458
1459	ret = misc_register(&nvmf_misc);
1460	if (ret) {
1461		pr_err("couldn't register misc device: %d\n", ret);
1462		goto out_destroy_device;
1463	}
1464
1465	return 0;
1466
1467out_destroy_device:
1468	device_destroy(&nvmf_class, MKDEV(0, 0));
1469out_destroy_class:
1470	class_unregister(&nvmf_class);
1471out_free_host:
1472	nvmf_host_put(nvmf_default_host);
1473	return ret;
1474}
1475
1476static void __exit nvmf_exit(void)
1477{
1478	misc_deregister(&nvmf_misc);
1479	device_destroy(&nvmf_class, MKDEV(0, 0));
1480	class_unregister(&nvmf_class);
1481	nvmf_host_put(nvmf_default_host);
1482
1483	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
1484	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
1485	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
1486	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
1487	BUILD_BUG_ON(sizeof(struct nvmf_auth_send_command) != 64);
1488	BUILD_BUG_ON(sizeof(struct nvmf_auth_receive_command) != 64);
1489	BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
1490	BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_negotiate_data) != 8);
1491	BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_challenge_data) != 16);
1492	BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_reply_data) != 16);
1493	BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success1_data) != 16);
1494	BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success2_data) != 16);
1495}
1496
1497MODULE_LICENSE("GPL v2");
1498MODULE_DESCRIPTION("NVMe host fabrics library");
1499
1500module_init(nvmf_init);
1501module_exit(nvmf_exit);
1502