1/*
2 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/err.h>
39#include <linux/string.h>
40#include <linux/parser.h>
41#include <linux/random.h>
42#include <linux/jiffies.h>
43#include <linux/lockdep.h>
44#include <linux/inet.h>
45#include <rdma/ib_cache.h>
46
47#include <linux/atomic.h>
48
49#include <scsi/scsi.h>
50#include <scsi/scsi_device.h>
51#include <scsi/scsi_dbg.h>
52#include <scsi/scsi_tcq.h>
53#include <scsi/srp.h>
54#include <scsi/scsi_transport_srp.h>
55
56#include "ib_srp.h"
57
58#define DRV_NAME	"ib_srp"
59#define PFX		DRV_NAME ": "
60
61MODULE_AUTHOR("Roland Dreier");
62MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63MODULE_LICENSE("Dual BSD/GPL");
64
65static unsigned int srp_sg_tablesize;
66static unsigned int cmd_sg_entries;
67static unsigned int indirect_sg_entries;
68static bool allow_ext_sg;
69static bool register_always = true;
70static bool never_register;
71static int topspin_workarounds = 1;
72
73module_param(srp_sg_tablesize, uint, 0444);
74MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
75
76module_param(cmd_sg_entries, uint, 0444);
77MODULE_PARM_DESC(cmd_sg_entries,
78		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
79
80module_param(indirect_sg_entries, uint, 0444);
81MODULE_PARM_DESC(indirect_sg_entries,
82		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
83
84module_param(allow_ext_sg, bool, 0444);
85MODULE_PARM_DESC(allow_ext_sg,
86		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
87
88module_param(topspin_workarounds, int, 0444);
89MODULE_PARM_DESC(topspin_workarounds,
90		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
91
92module_param(register_always, bool, 0444);
93MODULE_PARM_DESC(register_always,
94		 "Use memory registration even for contiguous memory regions");
95
96module_param(never_register, bool, 0444);
97MODULE_PARM_DESC(never_register, "Never register memory");
98
99static const struct kernel_param_ops srp_tmo_ops;
100
101static int srp_reconnect_delay = 10;
102module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
103		S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
105
106static int srp_fast_io_fail_tmo = 15;
107module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
108		S_IRUGO | S_IWUSR);
109MODULE_PARM_DESC(fast_io_fail_tmo,
110		 "Number of seconds between the observation of a transport"
111		 " layer error and failing all I/O. \"off\" means that this"
112		 " functionality is disabled.");
113
114static int srp_dev_loss_tmo = 600;
115module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
116		S_IRUGO | S_IWUSR);
117MODULE_PARM_DESC(dev_loss_tmo,
118		 "Maximum number of seconds that the SRP transport should"
119		 " insulate transport layer errors. After this time has been"
120		 " exceeded the SCSI host is removed. Should be"
121		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
122		 " if fast_io_fail_tmo has not been set. \"off\" means that"
123		 " this functionality is disabled.");
124
125static bool srp_use_imm_data = true;
126module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
127MODULE_PARM_DESC(use_imm_data,
128		 "Whether or not to request permission to use immediate data during SRP login.");
129
130static unsigned int srp_max_imm_data = 8 * 1024;
131module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
132MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
133
134static unsigned ch_count;
135module_param(ch_count, uint, 0444);
136MODULE_PARM_DESC(ch_count,
137		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
138
139static int srp_add_one(struct ib_device *device);
140static void srp_remove_one(struct ib_device *device, void *client_data);
141static void srp_rename_dev(struct ib_device *device, void *client_data);
142static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
143static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
144		const char *opname);
145static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
146			     const struct ib_cm_event *event);
147static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
148			       struct rdma_cm_event *event);
149
150static struct scsi_transport_template *ib_srp_transport_template;
151static struct workqueue_struct *srp_remove_wq;
152
153static struct ib_client srp_client = {
154	.name   = "srp",
155	.add    = srp_add_one,
156	.remove = srp_remove_one,
157	.rename = srp_rename_dev
158};
159
160static struct ib_sa_client srp_sa_client;
161
162static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
163{
164	int tmo = *(int *)kp->arg;
165
166	if (tmo >= 0)
167		return sysfs_emit(buffer, "%d\n", tmo);
168	else
169		return sysfs_emit(buffer, "off\n");
170}
171
172static int srp_tmo_set(const char *val, const struct kernel_param *kp)
173{
174	int tmo, res;
175
176	res = srp_parse_tmo(&tmo, val);
177	if (res)
178		goto out;
179
180	if (kp->arg == &srp_reconnect_delay)
181		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
182				    srp_dev_loss_tmo);
183	else if (kp->arg == &srp_fast_io_fail_tmo)
184		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
185	else
186		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
187				    tmo);
188	if (res)
189		goto out;
190	*(int *)kp->arg = tmo;
191
192out:
193	return res;
194}
195
196static const struct kernel_param_ops srp_tmo_ops = {
197	.get = srp_tmo_get,
198	.set = srp_tmo_set,
199};
200
201static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
202{
203	return (struct srp_target_port *) host->hostdata;
204}
205
206static const char *srp_target_info(struct Scsi_Host *host)
207{
208	return host_to_target(host)->target_name;
209}
210
211static int srp_target_is_topspin(struct srp_target_port *target)
212{
213	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
214	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
215
216	return topspin_workarounds &&
217		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
218		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
219}
220
221static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
222				   gfp_t gfp_mask,
223				   enum dma_data_direction direction)
224{
225	struct srp_iu *iu;
226
227	iu = kmalloc(sizeof *iu, gfp_mask);
228	if (!iu)
229		goto out;
230
231	iu->buf = kzalloc(size, gfp_mask);
232	if (!iu->buf)
233		goto out_free_iu;
234
235	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
236				    direction);
237	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
238		goto out_free_buf;
239
240	iu->size      = size;
241	iu->direction = direction;
242
243	return iu;
244
245out_free_buf:
246	kfree(iu->buf);
247out_free_iu:
248	kfree(iu);
249out:
250	return NULL;
251}
252
253static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
254{
255	if (!iu)
256		return;
257
258	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
259			    iu->direction);
260	kfree(iu->buf);
261	kfree(iu);
262}
263
264static void srp_qp_event(struct ib_event *event, void *context)
265{
266	pr_debug("QP event %s (%d)\n",
267		 ib_event_msg(event->event), event->event);
268}
269
270static int srp_init_ib_qp(struct srp_target_port *target,
271			  struct ib_qp *qp)
272{
273	struct ib_qp_attr *attr;
274	int ret;
275
276	attr = kmalloc(sizeof *attr, GFP_KERNEL);
277	if (!attr)
278		return -ENOMEM;
279
280	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
281				  target->srp_host->port,
282				  be16_to_cpu(target->ib_cm.pkey),
283				  &attr->pkey_index);
284	if (ret)
285		goto out;
286
287	attr->qp_state        = IB_QPS_INIT;
288	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
289				    IB_ACCESS_REMOTE_WRITE);
290	attr->port_num        = target->srp_host->port;
291
292	ret = ib_modify_qp(qp, attr,
293			   IB_QP_STATE		|
294			   IB_QP_PKEY_INDEX	|
295			   IB_QP_ACCESS_FLAGS	|
296			   IB_QP_PORT);
297
298out:
299	kfree(attr);
300	return ret;
301}
302
303static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
304{
305	struct srp_target_port *target = ch->target;
306	struct ib_cm_id *new_cm_id;
307
308	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
309				    srp_ib_cm_handler, ch);
310	if (IS_ERR(new_cm_id))
311		return PTR_ERR(new_cm_id);
312
313	if (ch->ib_cm.cm_id)
314		ib_destroy_cm_id(ch->ib_cm.cm_id);
315	ch->ib_cm.cm_id = new_cm_id;
316	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
317			    target->srp_host->port))
318		ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
319	else
320		ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
321	ch->ib_cm.path.sgid = target->sgid;
322	ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
323	ch->ib_cm.path.pkey = target->ib_cm.pkey;
324	ch->ib_cm.path.service_id = target->ib_cm.service_id;
325
326	return 0;
327}
328
329static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
330{
331	struct srp_target_port *target = ch->target;
332	struct rdma_cm_id *new_cm_id;
333	int ret;
334
335	new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
336				   RDMA_PS_TCP, IB_QPT_RC);
337	if (IS_ERR(new_cm_id)) {
338		ret = PTR_ERR(new_cm_id);
339		new_cm_id = NULL;
340		goto out;
341	}
342
343	init_completion(&ch->done);
344	ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
345				&target->rdma_cm.src.sa : NULL,
346				&target->rdma_cm.dst.sa,
347				SRP_PATH_REC_TIMEOUT_MS);
348	if (ret) {
349		pr_err("No route available from %pISpsc to %pISpsc (%d)\n",
350		       &target->rdma_cm.src, &target->rdma_cm.dst, ret);
351		goto out;
352	}
353	ret = wait_for_completion_interruptible(&ch->done);
354	if (ret < 0)
355		goto out;
356
357	ret = ch->status;
358	if (ret) {
359		pr_err("Resolving address %pISpsc failed (%d)\n",
360		       &target->rdma_cm.dst, ret);
361		goto out;
362	}
363
364	swap(ch->rdma_cm.cm_id, new_cm_id);
365
366out:
367	if (new_cm_id)
368		rdma_destroy_id(new_cm_id);
369
370	return ret;
371}
372
373static int srp_new_cm_id(struct srp_rdma_ch *ch)
374{
375	struct srp_target_port *target = ch->target;
376
377	return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
378		srp_new_ib_cm_id(ch);
379}
380
381/**
382 * srp_destroy_fr_pool() - free the resources owned by a pool
383 * @pool: Fast registration pool to be destroyed.
384 */
385static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
386{
387	int i;
388	struct srp_fr_desc *d;
389
390	if (!pool)
391		return;
392
393	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
394		if (d->mr)
395			ib_dereg_mr(d->mr);
396	}
397	kfree(pool);
398}
399
400/**
401 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
402 * @device:            IB device to allocate fast registration descriptors for.
403 * @pd:                Protection domain associated with the FR descriptors.
404 * @pool_size:         Number of descriptors to allocate.
405 * @max_page_list_len: Maximum fast registration work request page list length.
406 */
407static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
408					      struct ib_pd *pd, int pool_size,
409					      int max_page_list_len)
410{
411	struct srp_fr_pool *pool;
412	struct srp_fr_desc *d;
413	struct ib_mr *mr;
414	int i, ret = -EINVAL;
415	enum ib_mr_type mr_type;
416
417	if (pool_size <= 0)
418		goto err;
419	ret = -ENOMEM;
420	pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL);
421	if (!pool)
422		goto err;
423	pool->size = pool_size;
424	pool->max_page_list_len = max_page_list_len;
425	spin_lock_init(&pool->lock);
426	INIT_LIST_HEAD(&pool->free_list);
427
428	if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
429		mr_type = IB_MR_TYPE_SG_GAPS;
430	else
431		mr_type = IB_MR_TYPE_MEM_REG;
432
433	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
434		mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
435		if (IS_ERR(mr)) {
436			ret = PTR_ERR(mr);
437			if (ret == -ENOMEM)
438				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
439					dev_name(&device->dev));
440			goto destroy_pool;
441		}
442		d->mr = mr;
443		list_add_tail(&d->entry, &pool->free_list);
444	}
445
446out:
447	return pool;
448
449destroy_pool:
450	srp_destroy_fr_pool(pool);
451
452err:
453	pool = ERR_PTR(ret);
454	goto out;
455}
456
457/**
458 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
459 * @pool: Pool to obtain descriptor from.
460 */
461static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
462{
463	struct srp_fr_desc *d = NULL;
464	unsigned long flags;
465
466	spin_lock_irqsave(&pool->lock, flags);
467	if (!list_empty(&pool->free_list)) {
468		d = list_first_entry(&pool->free_list, typeof(*d), entry);
469		list_del(&d->entry);
470	}
471	spin_unlock_irqrestore(&pool->lock, flags);
472
473	return d;
474}
475
476/**
477 * srp_fr_pool_put() - put an FR descriptor back in the free list
478 * @pool: Pool the descriptor was allocated from.
479 * @desc: Pointer to an array of fast registration descriptor pointers.
480 * @n:    Number of descriptors to put back.
481 *
482 * Note: The caller must already have queued an invalidation request for
483 * desc->mr->rkey before calling this function.
484 */
485static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
486			    int n)
487{
488	unsigned long flags;
489	int i;
490
491	spin_lock_irqsave(&pool->lock, flags);
492	for (i = 0; i < n; i++)
493		list_add(&desc[i]->entry, &pool->free_list);
494	spin_unlock_irqrestore(&pool->lock, flags);
495}
496
497static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
498{
499	struct srp_device *dev = target->srp_host->srp_dev;
500
501	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
502				  dev->max_pages_per_mr);
503}
504
505/**
506 * srp_destroy_qp() - destroy an RDMA queue pair
507 * @ch: SRP RDMA channel.
508 *
509 * Drain the qp before destroying it.  This avoids that the receive
510 * completion handler can access the queue pair while it is
511 * being destroyed.
512 */
513static void srp_destroy_qp(struct srp_rdma_ch *ch)
514{
515	spin_lock_irq(&ch->lock);
516	ib_process_cq_direct(ch->send_cq, -1);
517	spin_unlock_irq(&ch->lock);
518
519	ib_drain_qp(ch->qp);
520	ib_destroy_qp(ch->qp);
521}
522
523static int srp_create_ch_ib(struct srp_rdma_ch *ch)
524{
525	struct srp_target_port *target = ch->target;
526	struct srp_device *dev = target->srp_host->srp_dev;
527	const struct ib_device_attr *attr = &dev->dev->attrs;
528	struct ib_qp_init_attr *init_attr;
529	struct ib_cq *recv_cq, *send_cq;
530	struct ib_qp *qp;
531	struct srp_fr_pool *fr_pool = NULL;
532	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
533	int ret;
534
535	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
536	if (!init_attr)
537		return -ENOMEM;
538
539	/* queue_size + 1 for ib_drain_rq() */
540	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
541				ch->comp_vector, IB_POLL_SOFTIRQ);
542	if (IS_ERR(recv_cq)) {
543		ret = PTR_ERR(recv_cq);
544		goto err;
545	}
546
547	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
548				ch->comp_vector, IB_POLL_DIRECT);
549	if (IS_ERR(send_cq)) {
550		ret = PTR_ERR(send_cq);
551		goto err_recv_cq;
552	}
553
554	init_attr->event_handler       = srp_qp_event;
555	init_attr->cap.max_send_wr     = m * target->queue_size;
556	init_attr->cap.max_recv_wr     = target->queue_size + 1;
557	init_attr->cap.max_recv_sge    = 1;
558	init_attr->cap.max_send_sge    = min(SRP_MAX_SGE, attr->max_send_sge);
559	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
560	init_attr->qp_type             = IB_QPT_RC;
561	init_attr->send_cq             = send_cq;
562	init_attr->recv_cq             = recv_cq;
563
564	ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U);
565
566	if (target->using_rdma_cm) {
567		ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
568		qp = ch->rdma_cm.cm_id->qp;
569	} else {
570		qp = ib_create_qp(dev->pd, init_attr);
571		if (!IS_ERR(qp)) {
572			ret = srp_init_ib_qp(target, qp);
573			if (ret)
574				ib_destroy_qp(qp);
575		} else {
576			ret = PTR_ERR(qp);
577		}
578	}
579	if (ret) {
580		pr_err("QP creation failed for dev %s: %d\n",
581		       dev_name(&dev->dev->dev), ret);
582		goto err_send_cq;
583	}
584
585	if (dev->use_fast_reg) {
586		fr_pool = srp_alloc_fr_pool(target);
587		if (IS_ERR(fr_pool)) {
588			ret = PTR_ERR(fr_pool);
589			shost_printk(KERN_WARNING, target->scsi_host, PFX
590				     "FR pool allocation failed (%d)\n", ret);
591			goto err_qp;
592		}
593	}
594
595	if (ch->qp)
596		srp_destroy_qp(ch);
597	if (ch->recv_cq)
598		ib_free_cq(ch->recv_cq);
599	if (ch->send_cq)
600		ib_free_cq(ch->send_cq);
601
602	ch->qp = qp;
603	ch->recv_cq = recv_cq;
604	ch->send_cq = send_cq;
605
606	if (dev->use_fast_reg) {
607		if (ch->fr_pool)
608			srp_destroy_fr_pool(ch->fr_pool);
609		ch->fr_pool = fr_pool;
610	}
611
612	kfree(init_attr);
613	return 0;
614
615err_qp:
616	if (target->using_rdma_cm)
617		rdma_destroy_qp(ch->rdma_cm.cm_id);
618	else
619		ib_destroy_qp(qp);
620
621err_send_cq:
622	ib_free_cq(send_cq);
623
624err_recv_cq:
625	ib_free_cq(recv_cq);
626
627err:
628	kfree(init_attr);
629	return ret;
630}
631
632/*
633 * Note: this function may be called without srp_alloc_iu_bufs() having been
634 * invoked. Hence the ch->[rt]x_ring checks.
635 */
636static void srp_free_ch_ib(struct srp_target_port *target,
637			   struct srp_rdma_ch *ch)
638{
639	struct srp_device *dev = target->srp_host->srp_dev;
640	int i;
641
642	if (!ch->target)
643		return;
644
645	if (target->using_rdma_cm) {
646		if (ch->rdma_cm.cm_id) {
647			rdma_destroy_id(ch->rdma_cm.cm_id);
648			ch->rdma_cm.cm_id = NULL;
649		}
650	} else {
651		if (ch->ib_cm.cm_id) {
652			ib_destroy_cm_id(ch->ib_cm.cm_id);
653			ch->ib_cm.cm_id = NULL;
654		}
655	}
656
657	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
658	if (!ch->qp)
659		return;
660
661	if (dev->use_fast_reg) {
662		if (ch->fr_pool)
663			srp_destroy_fr_pool(ch->fr_pool);
664	}
665
666	srp_destroy_qp(ch);
667	ib_free_cq(ch->send_cq);
668	ib_free_cq(ch->recv_cq);
669
670	/*
671	 * Avoid that the SCSI error handler tries to use this channel after
672	 * it has been freed. The SCSI error handler can namely continue
673	 * trying to perform recovery actions after scsi_remove_host()
674	 * returned.
675	 */
676	ch->target = NULL;
677
678	ch->qp = NULL;
679	ch->send_cq = ch->recv_cq = NULL;
680
681	if (ch->rx_ring) {
682		for (i = 0; i < target->queue_size; ++i)
683			srp_free_iu(target->srp_host, ch->rx_ring[i]);
684		kfree(ch->rx_ring);
685		ch->rx_ring = NULL;
686	}
687	if (ch->tx_ring) {
688		for (i = 0; i < target->queue_size; ++i)
689			srp_free_iu(target->srp_host, ch->tx_ring[i]);
690		kfree(ch->tx_ring);
691		ch->tx_ring = NULL;
692	}
693}
694
695static void srp_path_rec_completion(int status,
696				    struct sa_path_rec *pathrec,
697				    unsigned int num_paths, void *ch_ptr)
698{
699	struct srp_rdma_ch *ch = ch_ptr;
700	struct srp_target_port *target = ch->target;
701
702	ch->status = status;
703	if (status)
704		shost_printk(KERN_ERR, target->scsi_host,
705			     PFX "Got failed path rec status %d\n", status);
706	else
707		ch->ib_cm.path = *pathrec;
708	complete(&ch->done);
709}
710
711static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
712{
713	struct srp_target_port *target = ch->target;
714	int ret;
715
716	ch->ib_cm.path.numb_path = 1;
717
718	init_completion(&ch->done);
719
720	ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
721					       target->srp_host->srp_dev->dev,
722					       target->srp_host->port,
723					       &ch->ib_cm.path,
724					       IB_SA_PATH_REC_SERVICE_ID |
725					       IB_SA_PATH_REC_DGID	 |
726					       IB_SA_PATH_REC_SGID	 |
727					       IB_SA_PATH_REC_NUMB_PATH	 |
728					       IB_SA_PATH_REC_PKEY,
729					       SRP_PATH_REC_TIMEOUT_MS,
730					       GFP_KERNEL,
731					       srp_path_rec_completion,
732					       ch, &ch->ib_cm.path_query);
733	if (ch->ib_cm.path_query_id < 0)
734		return ch->ib_cm.path_query_id;
735
736	ret = wait_for_completion_interruptible(&ch->done);
737	if (ret < 0)
738		return ret;
739
740	if (ch->status < 0)
741		shost_printk(KERN_WARNING, target->scsi_host,
742			     PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
743			     ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
744			     be16_to_cpu(target->ib_cm.pkey),
745			     be64_to_cpu(target->ib_cm.service_id));
746
747	return ch->status;
748}
749
750static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
751{
752	struct srp_target_port *target = ch->target;
753	int ret;
754
755	init_completion(&ch->done);
756
757	ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
758	if (ret)
759		return ret;
760
761	wait_for_completion_interruptible(&ch->done);
762
763	if (ch->status != 0)
764		shost_printk(KERN_WARNING, target->scsi_host,
765			     PFX "Path resolution failed\n");
766
767	return ch->status;
768}
769
770static int srp_lookup_path(struct srp_rdma_ch *ch)
771{
772	struct srp_target_port *target = ch->target;
773
774	return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
775		srp_ib_lookup_path(ch);
776}
777
778static u8 srp_get_subnet_timeout(struct srp_host *host)
779{
780	struct ib_port_attr attr;
781	int ret;
782	u8 subnet_timeout = 18;
783
784	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
785	if (ret == 0)
786		subnet_timeout = attr.subnet_timeout;
787
788	if (unlikely(subnet_timeout < 15))
789		pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
790			dev_name(&host->srp_dev->dev->dev), subnet_timeout);
791
792	return subnet_timeout;
793}
794
795static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
796			bool multich)
797{
798	struct srp_target_port *target = ch->target;
799	struct {
800		struct rdma_conn_param	  rdma_param;
801		struct srp_login_req_rdma rdma_req;
802		struct ib_cm_req_param	  ib_param;
803		struct srp_login_req	  ib_req;
804	} *req = NULL;
805	char *ipi, *tpi;
806	int status;
807
808	req = kzalloc(sizeof *req, GFP_KERNEL);
809	if (!req)
810		return -ENOMEM;
811
812	req->ib_param.flow_control = 1;
813	req->ib_param.retry_count = target->tl_retry_count;
814
815	/*
816	 * Pick some arbitrary defaults here; we could make these
817	 * module parameters if anyone cared about setting them.
818	 */
819	req->ib_param.responder_resources = 4;
820	req->ib_param.rnr_retry_count = 7;
821	req->ib_param.max_cm_retries = 15;
822
823	req->ib_req.opcode = SRP_LOGIN_REQ;
824	req->ib_req.tag = 0;
825	req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
826	req->ib_req.req_buf_fmt	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
827					      SRP_BUF_FORMAT_INDIRECT);
828	req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
829				 SRP_MULTICHAN_SINGLE);
830	if (srp_use_imm_data) {
831		req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
832		req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
833	}
834
835	if (target->using_rdma_cm) {
836		req->rdma_param.flow_control = req->ib_param.flow_control;
837		req->rdma_param.responder_resources =
838			req->ib_param.responder_resources;
839		req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
840		req->rdma_param.retry_count = req->ib_param.retry_count;
841		req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
842		req->rdma_param.private_data = &req->rdma_req;
843		req->rdma_param.private_data_len = sizeof(req->rdma_req);
844
845		req->rdma_req.opcode = req->ib_req.opcode;
846		req->rdma_req.tag = req->ib_req.tag;
847		req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
848		req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
849		req->rdma_req.req_flags	= req->ib_req.req_flags;
850		req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
851
852		ipi = req->rdma_req.initiator_port_id;
853		tpi = req->rdma_req.target_port_id;
854	} else {
855		u8 subnet_timeout;
856
857		subnet_timeout = srp_get_subnet_timeout(target->srp_host);
858
859		req->ib_param.primary_path = &ch->ib_cm.path;
860		req->ib_param.alternate_path = NULL;
861		req->ib_param.service_id = target->ib_cm.service_id;
862		get_random_bytes(&req->ib_param.starting_psn, 4);
863		req->ib_param.starting_psn &= 0xffffff;
864		req->ib_param.qp_num = ch->qp->qp_num;
865		req->ib_param.qp_type = ch->qp->qp_type;
866		req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
867		req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
868		req->ib_param.private_data = &req->ib_req;
869		req->ib_param.private_data_len = sizeof(req->ib_req);
870
871		ipi = req->ib_req.initiator_port_id;
872		tpi = req->ib_req.target_port_id;
873	}
874
875	/*
876	 * In the published SRP specification (draft rev. 16a), the
877	 * port identifier format is 8 bytes of ID extension followed
878	 * by 8 bytes of GUID.  Older drafts put the two halves in the
879	 * opposite order, so that the GUID comes first.
880	 *
881	 * Targets conforming to these obsolete drafts can be
882	 * recognized by the I/O Class they report.
883	 */
884	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
885		memcpy(ipi,     &target->sgid.global.interface_id, 8);
886		memcpy(ipi + 8, &target->initiator_ext, 8);
887		memcpy(tpi,     &target->ioc_guid, 8);
888		memcpy(tpi + 8, &target->id_ext, 8);
889	} else {
890		memcpy(ipi,     &target->initiator_ext, 8);
891		memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
892		memcpy(tpi,     &target->id_ext, 8);
893		memcpy(tpi + 8, &target->ioc_guid, 8);
894	}
895
896	/*
897	 * Topspin/Cisco SRP targets will reject our login unless we
898	 * zero out the first 8 bytes of our initiator port ID and set
899	 * the second 8 bytes to the local node GUID.
900	 */
901	if (srp_target_is_topspin(target)) {
902		shost_printk(KERN_DEBUG, target->scsi_host,
903			     PFX "Topspin/Cisco initiator port ID workaround "
904			     "activated for target GUID %016llx\n",
905			     be64_to_cpu(target->ioc_guid));
906		memset(ipi, 0, 8);
907		memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
908	}
909
910	if (target->using_rdma_cm)
911		status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
912	else
913		status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
914
915	kfree(req);
916
917	return status;
918}
919
920static bool srp_queue_remove_work(struct srp_target_port *target)
921{
922	bool changed = false;
923
924	spin_lock_irq(&target->lock);
925	if (target->state != SRP_TARGET_REMOVED) {
926		target->state = SRP_TARGET_REMOVED;
927		changed = true;
928	}
929	spin_unlock_irq(&target->lock);
930
931	if (changed)
932		queue_work(srp_remove_wq, &target->remove_work);
933
934	return changed;
935}
936
937static void srp_disconnect_target(struct srp_target_port *target)
938{
939	struct srp_rdma_ch *ch;
940	int i, ret;
941
942	/* XXX should send SRP_I_LOGOUT request */
943
944	for (i = 0; i < target->ch_count; i++) {
945		ch = &target->ch[i];
946		ch->connected = false;
947		ret = 0;
948		if (target->using_rdma_cm) {
949			if (ch->rdma_cm.cm_id)
950				rdma_disconnect(ch->rdma_cm.cm_id);
951		} else {
952			if (ch->ib_cm.cm_id)
953				ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
954						      NULL, 0);
955		}
956		if (ret < 0) {
957			shost_printk(KERN_DEBUG, target->scsi_host,
958				     PFX "Sending CM DREQ failed\n");
959		}
960	}
961}
962
963static int srp_exit_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
964{
965	struct srp_target_port *target = host_to_target(shost);
966	struct srp_device *dev = target->srp_host->srp_dev;
967	struct ib_device *ibdev = dev->dev;
968	struct srp_request *req = scsi_cmd_priv(cmd);
969
970	kfree(req->fr_list);
971	if (req->indirect_dma_addr) {
972		ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
973				    target->indirect_size,
974				    DMA_TO_DEVICE);
975	}
976	kfree(req->indirect_desc);
977
978	return 0;
979}
980
981static int srp_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
982{
983	struct srp_target_port *target = host_to_target(shost);
984	struct srp_device *srp_dev = target->srp_host->srp_dev;
985	struct ib_device *ibdev = srp_dev->dev;
986	struct srp_request *req = scsi_cmd_priv(cmd);
987	dma_addr_t dma_addr;
988	int ret = -ENOMEM;
989
990	if (srp_dev->use_fast_reg) {
991		req->fr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
992					GFP_KERNEL);
993		if (!req->fr_list)
994			goto out;
995	}
996	req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
997	if (!req->indirect_desc)
998		goto out;
999
1000	dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1001				     target->indirect_size,
1002				     DMA_TO_DEVICE);
1003	if (ib_dma_mapping_error(ibdev, dma_addr)) {
1004		srp_exit_cmd_priv(shost, cmd);
1005		goto out;
1006	}
1007
1008	req->indirect_dma_addr = dma_addr;
1009	ret = 0;
1010
1011out:
1012	return ret;
1013}
1014
1015/**
1016 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1017 * @shost: SCSI host whose attributes to remove from sysfs.
1018 *
1019 * Note: Any attributes defined in the host template and that did not exist
1020 * before invocation of this function will be ignored.
1021 */
1022static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1023{
1024	const struct attribute_group **g;
1025	struct attribute **attr;
1026
1027	for (g = shost->hostt->shost_groups; *g; ++g) {
1028		for (attr = (*g)->attrs; *attr; ++attr) {
1029			struct device_attribute *dev_attr =
1030				container_of(*attr, typeof(*dev_attr), attr);
1031
1032			device_remove_file(&shost->shost_dev, dev_attr);
1033		}
1034	}
1035}
1036
1037static void srp_remove_target(struct srp_target_port *target)
1038{
1039	struct srp_rdma_ch *ch;
1040	int i;
1041
1042	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1043
1044	srp_del_scsi_host_attr(target->scsi_host);
1045	srp_rport_get(target->rport);
1046	srp_remove_host(target->scsi_host);
1047	scsi_remove_host(target->scsi_host);
1048	srp_stop_rport_timers(target->rport);
1049	srp_disconnect_target(target);
1050	kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1051	for (i = 0; i < target->ch_count; i++) {
1052		ch = &target->ch[i];
1053		srp_free_ch_ib(target, ch);
1054	}
1055	cancel_work_sync(&target->tl_err_work);
1056	srp_rport_put(target->rport);
1057	kfree(target->ch);
1058	target->ch = NULL;
1059
1060	spin_lock(&target->srp_host->target_lock);
1061	list_del(&target->list);
1062	spin_unlock(&target->srp_host->target_lock);
1063
1064	scsi_host_put(target->scsi_host);
1065}
1066
1067static void srp_remove_work(struct work_struct *work)
1068{
1069	struct srp_target_port *target =
1070		container_of(work, struct srp_target_port, remove_work);
1071
1072	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1073
1074	srp_remove_target(target);
1075}
1076
1077static void srp_rport_delete(struct srp_rport *rport)
1078{
1079	struct srp_target_port *target = rport->lld_data;
1080
1081	srp_queue_remove_work(target);
1082}
1083
1084/**
1085 * srp_connected_ch() - number of connected channels
1086 * @target: SRP target port.
1087 */
1088static int srp_connected_ch(struct srp_target_port *target)
1089{
1090	int i, c = 0;
1091
1092	for (i = 0; i < target->ch_count; i++)
1093		c += target->ch[i].connected;
1094
1095	return c;
1096}
1097
1098static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
1099			  bool multich)
1100{
1101	struct srp_target_port *target = ch->target;
1102	int ret;
1103
1104	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1105
1106	ret = srp_lookup_path(ch);
1107	if (ret)
1108		goto out;
1109
1110	while (1) {
1111		init_completion(&ch->done);
1112		ret = srp_send_req(ch, max_iu_len, multich);
1113		if (ret)
1114			goto out;
1115		ret = wait_for_completion_interruptible(&ch->done);
1116		if (ret < 0)
1117			goto out;
1118
1119		/*
1120		 * The CM event handling code will set status to
1121		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1122		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1123		 * redirect REJ back.
1124		 */
1125		ret = ch->status;
1126		switch (ret) {
1127		case 0:
1128			ch->connected = true;
1129			goto out;
1130
1131		case SRP_PORT_REDIRECT:
1132			ret = srp_lookup_path(ch);
1133			if (ret)
1134				goto out;
1135			break;
1136
1137		case SRP_DLID_REDIRECT:
1138			break;
1139
1140		case SRP_STALE_CONN:
1141			shost_printk(KERN_ERR, target->scsi_host, PFX
1142				     "giving up on stale connection\n");
1143			ret = -ECONNRESET;
1144			goto out;
1145
1146		default:
1147			goto out;
1148		}
1149	}
1150
1151out:
1152	return ret <= 0 ? ret : -ENODEV;
1153}
1154
1155static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1156{
1157	srp_handle_qp_err(cq, wc, "INV RKEY");
1158}
1159
1160static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1161		u32 rkey)
1162{
1163	struct ib_send_wr wr = {
1164		.opcode		    = IB_WR_LOCAL_INV,
1165		.next		    = NULL,
1166		.num_sge	    = 0,
1167		.send_flags	    = 0,
1168		.ex.invalidate_rkey = rkey,
1169	};
1170
1171	wr.wr_cqe = &req->reg_cqe;
1172	req->reg_cqe.done = srp_inv_rkey_err_done;
1173	return ib_post_send(ch->qp, &wr, NULL);
1174}
1175
1176static void srp_unmap_data(struct scsi_cmnd *scmnd,
1177			   struct srp_rdma_ch *ch,
1178			   struct srp_request *req)
1179{
1180	struct srp_target_port *target = ch->target;
1181	struct srp_device *dev = target->srp_host->srp_dev;
1182	struct ib_device *ibdev = dev->dev;
1183	int i, res;
1184
1185	if (!scsi_sglist(scmnd) ||
1186	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1187	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1188		return;
1189
1190	if (dev->use_fast_reg) {
1191		struct srp_fr_desc **pfr;
1192
1193		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1194			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1195			if (res < 0) {
1196				shost_printk(KERN_ERR, target->scsi_host, PFX
1197				  "Queueing INV WR for rkey %#x failed (%d)\n",
1198				  (*pfr)->mr->rkey, res);
1199				queue_work(system_long_wq,
1200					   &target->tl_err_work);
1201			}
1202		}
1203		if (req->nmdesc)
1204			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1205					req->nmdesc);
1206	}
1207
1208	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1209			scmnd->sc_data_direction);
1210}
1211
1212/**
1213 * srp_claim_req - Take ownership of the scmnd associated with a request.
1214 * @ch: SRP RDMA channel.
1215 * @req: SRP request.
1216 * @sdev: If not NULL, only take ownership for this SCSI device.
1217 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1218 *         ownership of @req->scmnd if it equals @scmnd.
1219 *
1220 * Return value:
1221 * Either NULL or a pointer to the SCSI command the caller became owner of.
1222 */
1223static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1224				       struct srp_request *req,
1225				       struct scsi_device *sdev,
1226				       struct scsi_cmnd *scmnd)
1227{
1228	unsigned long flags;
1229
1230	spin_lock_irqsave(&ch->lock, flags);
1231	if (req->scmnd &&
1232	    (!sdev || req->scmnd->device == sdev) &&
1233	    (!scmnd || req->scmnd == scmnd)) {
1234		scmnd = req->scmnd;
1235		req->scmnd = NULL;
1236	} else {
1237		scmnd = NULL;
1238	}
1239	spin_unlock_irqrestore(&ch->lock, flags);
1240
1241	return scmnd;
1242}
1243
1244/**
1245 * srp_free_req() - Unmap data and adjust ch->req_lim.
1246 * @ch:     SRP RDMA channel.
1247 * @req:    Request to be freed.
1248 * @scmnd:  SCSI command associated with @req.
1249 * @req_lim_delta: Amount to be added to @target->req_lim.
1250 */
1251static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1252			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1253{
1254	unsigned long flags;
1255
1256	srp_unmap_data(scmnd, ch, req);
1257
1258	spin_lock_irqsave(&ch->lock, flags);
1259	ch->req_lim += req_lim_delta;
1260	spin_unlock_irqrestore(&ch->lock, flags);
1261}
1262
1263static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1264			   struct scsi_device *sdev, int result)
1265{
1266	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1267
1268	if (scmnd) {
1269		srp_free_req(ch, req, scmnd, 0);
1270		scmnd->result = result;
1271		scsi_done(scmnd);
1272	}
1273}
1274
1275struct srp_terminate_context {
1276	struct srp_target_port *srp_target;
1277	int scsi_result;
1278};
1279
1280static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr)
1281{
1282	struct srp_terminate_context *context = context_ptr;
1283	struct srp_target_port *target = context->srp_target;
1284	u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd));
1285	struct srp_rdma_ch *ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
1286	struct srp_request *req = scsi_cmd_priv(scmnd);
1287
1288	srp_finish_req(ch, req, NULL, context->scsi_result);
1289
1290	return true;
1291}
1292
1293static void srp_terminate_io(struct srp_rport *rport)
1294{
1295	struct srp_target_port *target = rport->lld_data;
1296	struct srp_terminate_context context = { .srp_target = target,
1297		.scsi_result = DID_TRANSPORT_FAILFAST << 16 };
1298
1299	scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, &context);
1300}
1301
1302/* Calculate maximum initiator to target information unit length. */
1303static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data,
1304				  uint32_t max_it_iu_size)
1305{
1306	uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
1307		sizeof(struct srp_indirect_buf) +
1308		cmd_sg_cnt * sizeof(struct srp_direct_buf);
1309
1310	if (use_imm_data)
1311		max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
1312				 srp_max_imm_data);
1313
1314	if (max_it_iu_size)
1315		max_iu_len = min(max_iu_len, max_it_iu_size);
1316
1317	pr_debug("max_iu_len = %d\n", max_iu_len);
1318
1319	return max_iu_len;
1320}
1321
1322/*
1323 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1324 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1325 * srp_reset_device() or srp_reset_host() calls will occur while this function
1326 * is in progress. One way to realize that is not to call this function
1327 * directly but to call srp_reconnect_rport() instead since that last function
1328 * serializes calls of this function via rport->mutex and also blocks
1329 * srp_queuecommand() calls before invoking this function.
1330 */
1331static int srp_rport_reconnect(struct srp_rport *rport)
1332{
1333	struct srp_target_port *target = rport->lld_data;
1334	struct srp_rdma_ch *ch;
1335	uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
1336						srp_use_imm_data,
1337						target->max_it_iu_size);
1338	int i, j, ret = 0;
1339	bool multich = false;
1340
1341	srp_disconnect_target(target);
1342
1343	if (target->state == SRP_TARGET_SCANNING)
1344		return -ENODEV;
1345
1346	/*
1347	 * Now get a new local CM ID so that we avoid confusing the target in
1348	 * case things are really fouled up. Doing so also ensures that all CM
1349	 * callbacks will have finished before a new QP is allocated.
1350	 */
1351	for (i = 0; i < target->ch_count; i++) {
1352		ch = &target->ch[i];
1353		ret += srp_new_cm_id(ch);
1354	}
1355	{
1356		struct srp_terminate_context context = {
1357			.srp_target = target, .scsi_result = DID_RESET << 16};
1358
1359		scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd,
1360				    &context);
1361	}
1362	for (i = 0; i < target->ch_count; i++) {
1363		ch = &target->ch[i];
1364		/*
1365		 * Whether or not creating a new CM ID succeeded, create a new
1366		 * QP. This guarantees that all completion callback function
1367		 * invocations have finished before request resetting starts.
1368		 */
1369		ret += srp_create_ch_ib(ch);
1370
1371		INIT_LIST_HEAD(&ch->free_tx);
1372		for (j = 0; j < target->queue_size; ++j)
1373			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1374	}
1375
1376	target->qp_in_error = false;
1377
1378	for (i = 0; i < target->ch_count; i++) {
1379		ch = &target->ch[i];
1380		if (ret)
1381			break;
1382		ret = srp_connect_ch(ch, max_iu_len, multich);
1383		multich = true;
1384	}
1385
1386	if (ret == 0)
1387		shost_printk(KERN_INFO, target->scsi_host,
1388			     PFX "reconnect succeeded\n");
1389
1390	return ret;
1391}
1392
1393static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1394			 unsigned int dma_len, u32 rkey)
1395{
1396	struct srp_direct_buf *desc = state->desc;
1397
1398	WARN_ON_ONCE(!dma_len);
1399
1400	desc->va = cpu_to_be64(dma_addr);
1401	desc->key = cpu_to_be32(rkey);
1402	desc->len = cpu_to_be32(dma_len);
1403
1404	state->total_len += dma_len;
1405	state->desc++;
1406	state->ndesc++;
1407}
1408
1409static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1410{
1411	srp_handle_qp_err(cq, wc, "FAST REG");
1412}
1413
1414/*
1415 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1416 * where to start in the first element. If sg_offset_p != NULL then
1417 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1418 * byte that has not yet been mapped.
1419 */
1420static int srp_map_finish_fr(struct srp_map_state *state,
1421			     struct srp_request *req,
1422			     struct srp_rdma_ch *ch, int sg_nents,
1423			     unsigned int *sg_offset_p)
1424{
1425	struct srp_target_port *target = ch->target;
1426	struct srp_device *dev = target->srp_host->srp_dev;
1427	struct ib_reg_wr wr;
1428	struct srp_fr_desc *desc;
1429	u32 rkey;
1430	int n, err;
1431
1432	if (state->fr.next >= state->fr.end) {
1433		shost_printk(KERN_ERR, ch->target->scsi_host,
1434			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1435			     ch->target->mr_per_cmd);
1436		return -ENOMEM;
1437	}
1438
1439	WARN_ON_ONCE(!dev->use_fast_reg);
1440
1441	if (sg_nents == 1 && target->global_rkey) {
1442		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1443
1444		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1445			     sg_dma_len(state->sg) - sg_offset,
1446			     target->global_rkey);
1447		if (sg_offset_p)
1448			*sg_offset_p = 0;
1449		return 1;
1450	}
1451
1452	desc = srp_fr_pool_get(ch->fr_pool);
1453	if (!desc)
1454		return -ENOMEM;
1455
1456	rkey = ib_inc_rkey(desc->mr->rkey);
1457	ib_update_fast_reg_key(desc->mr, rkey);
1458
1459	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1460			 dev->mr_page_size);
1461	if (unlikely(n < 0)) {
1462		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1463		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1464			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1465			 sg_offset_p ? *sg_offset_p : -1, n);
1466		return n;
1467	}
1468
1469	WARN_ON_ONCE(desc->mr->length == 0);
1470
1471	req->reg_cqe.done = srp_reg_mr_err_done;
1472
1473	wr.wr.next = NULL;
1474	wr.wr.opcode = IB_WR_REG_MR;
1475	wr.wr.wr_cqe = &req->reg_cqe;
1476	wr.wr.num_sge = 0;
1477	wr.wr.send_flags = 0;
1478	wr.mr = desc->mr;
1479	wr.key = desc->mr->rkey;
1480	wr.access = (IB_ACCESS_LOCAL_WRITE |
1481		     IB_ACCESS_REMOTE_READ |
1482		     IB_ACCESS_REMOTE_WRITE);
1483
1484	*state->fr.next++ = desc;
1485	state->nmdesc++;
1486
1487	srp_map_desc(state, desc->mr->iova,
1488		     desc->mr->length, desc->mr->rkey);
1489
1490	err = ib_post_send(ch->qp, &wr.wr, NULL);
1491	if (unlikely(err)) {
1492		WARN_ON_ONCE(err == -ENOMEM);
1493		return err;
1494	}
1495
1496	return n;
1497}
1498
1499static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1500			 struct srp_request *req, struct scatterlist *scat,
1501			 int count)
1502{
1503	unsigned int sg_offset = 0;
1504
1505	state->fr.next = req->fr_list;
1506	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1507	state->sg = scat;
1508
1509	if (count == 0)
1510		return 0;
1511
1512	while (count) {
1513		int i, n;
1514
1515		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1516		if (unlikely(n < 0))
1517			return n;
1518
1519		count -= n;
1520		for (i = 0; i < n; i++)
1521			state->sg = sg_next(state->sg);
1522	}
1523
1524	return 0;
1525}
1526
1527static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1528			  struct srp_request *req, struct scatterlist *scat,
1529			  int count)
1530{
1531	struct srp_target_port *target = ch->target;
1532	struct scatterlist *sg;
1533	int i;
1534
1535	for_each_sg(scat, sg, count, i) {
1536		srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg),
1537			     target->global_rkey);
1538	}
1539
1540	return 0;
1541}
1542
1543/*
1544 * Register the indirect data buffer descriptor with the HCA.
1545 *
1546 * Note: since the indirect data buffer descriptor has been allocated with
1547 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1548 * memory buffer.
1549 */
1550static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1551		       void **next_mr, void **end_mr, u32 idb_len,
1552		       __be32 *idb_rkey)
1553{
1554	struct srp_target_port *target = ch->target;
1555	struct srp_device *dev = target->srp_host->srp_dev;
1556	struct srp_map_state state;
1557	struct srp_direct_buf idb_desc;
1558	struct scatterlist idb_sg[1];
1559	int ret;
1560
1561	memset(&state, 0, sizeof(state));
1562	memset(&idb_desc, 0, sizeof(idb_desc));
1563	state.gen.next = next_mr;
1564	state.gen.end = end_mr;
1565	state.desc = &idb_desc;
1566	state.base_dma_addr = req->indirect_dma_addr;
1567	state.dma_len = idb_len;
1568
1569	if (dev->use_fast_reg) {
1570		state.sg = idb_sg;
1571		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1572		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1573#ifdef CONFIG_NEED_SG_DMA_LENGTH
1574		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1575#endif
1576		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1577		if (ret < 0)
1578			return ret;
1579		WARN_ON_ONCE(ret < 1);
1580	} else {
1581		return -EINVAL;
1582	}
1583
1584	*idb_rkey = idb_desc.key;
1585
1586	return 0;
1587}
1588
1589static void srp_check_mapping(struct srp_map_state *state,
1590			      struct srp_rdma_ch *ch, struct srp_request *req,
1591			      struct scatterlist *scat, int count)
1592{
1593	struct srp_device *dev = ch->target->srp_host->srp_dev;
1594	struct srp_fr_desc **pfr;
1595	u64 desc_len = 0, mr_len = 0;
1596	int i;
1597
1598	for (i = 0; i < state->ndesc; i++)
1599		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1600	if (dev->use_fast_reg)
1601		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1602			mr_len += (*pfr)->mr->length;
1603	if (desc_len != scsi_bufflen(req->scmnd) ||
1604	    mr_len > scsi_bufflen(req->scmnd))
1605		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1606		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1607		       state->ndesc, state->nmdesc);
1608}
1609
1610/**
1611 * srp_map_data() - map SCSI data buffer onto an SRP request
1612 * @scmnd: SCSI command to map
1613 * @ch: SRP RDMA channel
1614 * @req: SRP request
1615 *
1616 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1617 * mapping failed. The size of any immediate data is not included in the
1618 * return value.
1619 */
1620static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1621			struct srp_request *req)
1622{
1623	struct srp_target_port *target = ch->target;
1624	struct scatterlist *scat, *sg;
1625	struct srp_cmd *cmd = req->cmd->buf;
1626	int i, len, nents, count, ret;
1627	struct srp_device *dev;
1628	struct ib_device *ibdev;
1629	struct srp_map_state state;
1630	struct srp_indirect_buf *indirect_hdr;
1631	u64 data_len;
1632	u32 idb_len, table_len;
1633	__be32 idb_rkey;
1634	u8 fmt;
1635
1636	req->cmd->num_sge = 1;
1637
1638	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1639		return sizeof(struct srp_cmd) + cmd->add_cdb_len;
1640
1641	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1642	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1643		shost_printk(KERN_WARNING, target->scsi_host,
1644			     PFX "Unhandled data direction %d\n",
1645			     scmnd->sc_data_direction);
1646		return -EINVAL;
1647	}
1648
1649	nents = scsi_sg_count(scmnd);
1650	scat  = scsi_sglist(scmnd);
1651	data_len = scsi_bufflen(scmnd);
1652
1653	dev = target->srp_host->srp_dev;
1654	ibdev = dev->dev;
1655
1656	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1657	if (unlikely(count == 0))
1658		return -EIO;
1659
1660	if (ch->use_imm_data &&
1661	    count <= ch->max_imm_sge &&
1662	    SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
1663	    scmnd->sc_data_direction == DMA_TO_DEVICE) {
1664		struct srp_imm_buf *buf;
1665		struct ib_sge *sge = &req->cmd->sge[1];
1666
1667		fmt = SRP_DATA_DESC_IMM;
1668		len = SRP_IMM_DATA_OFFSET;
1669		req->nmdesc = 0;
1670		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1671		buf->len = cpu_to_be32(data_len);
1672		WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
1673		for_each_sg(scat, sg, count, i) {
1674			sge[i].addr   = sg_dma_address(sg);
1675			sge[i].length = sg_dma_len(sg);
1676			sge[i].lkey   = target->lkey;
1677		}
1678		req->cmd->num_sge += count;
1679		goto map_complete;
1680	}
1681
1682	fmt = SRP_DATA_DESC_DIRECT;
1683	len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1684		sizeof(struct srp_direct_buf);
1685
1686	if (count == 1 && target->global_rkey) {
1687		/*
1688		 * The midlayer only generated a single gather/scatter
1689		 * entry, or DMA mapping coalesced everything to a
1690		 * single entry.  So a direct descriptor along with
1691		 * the DMA MR suffices.
1692		 */
1693		struct srp_direct_buf *buf;
1694
1695		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1696		buf->va  = cpu_to_be64(sg_dma_address(scat));
1697		buf->key = cpu_to_be32(target->global_rkey);
1698		buf->len = cpu_to_be32(sg_dma_len(scat));
1699
1700		req->nmdesc = 0;
1701		goto map_complete;
1702	}
1703
1704	/*
1705	 * We have more than one scatter/gather entry, so build our indirect
1706	 * descriptor table, trying to merge as many entries as we can.
1707	 */
1708	indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
1709
1710	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1711				   target->indirect_size, DMA_TO_DEVICE);
1712
1713	memset(&state, 0, sizeof(state));
1714	state.desc = req->indirect_desc;
1715	if (dev->use_fast_reg)
1716		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1717	else
1718		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1719	req->nmdesc = state.nmdesc;
1720	if (ret < 0)
1721		goto unmap;
1722
1723	{
1724		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1725			"Memory mapping consistency check");
1726		if (DYNAMIC_DEBUG_BRANCH(ddm))
1727			srp_check_mapping(&state, ch, req, scat, count);
1728	}
1729
1730	/* We've mapped the request, now pull as much of the indirect
1731	 * descriptor table as we can into the command buffer. If this
1732	 * target is not using an external indirect table, we are
1733	 * guaranteed to fit into the command, as the SCSI layer won't
1734	 * give us more S/G entries than we allow.
1735	 */
1736	if (state.ndesc == 1) {
1737		/*
1738		 * Memory registration collapsed the sg-list into one entry,
1739		 * so use a direct descriptor.
1740		 */
1741		struct srp_direct_buf *buf;
1742
1743		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1744		*buf = req->indirect_desc[0];
1745		goto map_complete;
1746	}
1747
1748	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1749						!target->allow_ext_sg)) {
1750		shost_printk(KERN_ERR, target->scsi_host,
1751			     "Could not fit S/G list into SRP_CMD\n");
1752		ret = -EIO;
1753		goto unmap;
1754	}
1755
1756	count = min(state.ndesc, target->cmd_sg_cnt);
1757	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1758	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1759
1760	fmt = SRP_DATA_DESC_INDIRECT;
1761	len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1762		sizeof(struct srp_indirect_buf);
1763	len += count * sizeof (struct srp_direct_buf);
1764
1765	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1766	       count * sizeof (struct srp_direct_buf));
1767
1768	if (!target->global_rkey) {
1769		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1770				  idb_len, &idb_rkey);
1771		if (ret < 0)
1772			goto unmap;
1773		req->nmdesc++;
1774	} else {
1775		idb_rkey = cpu_to_be32(target->global_rkey);
1776	}
1777
1778	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1779	indirect_hdr->table_desc.key = idb_rkey;
1780	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1781	indirect_hdr->len = cpu_to_be32(state.total_len);
1782
1783	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1784		cmd->data_out_desc_cnt = count;
1785	else
1786		cmd->data_in_desc_cnt = count;
1787
1788	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1789				      DMA_TO_DEVICE);
1790
1791map_complete:
1792	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1793		cmd->buf_fmt = fmt << 4;
1794	else
1795		cmd->buf_fmt = fmt;
1796
1797	return len;
1798
1799unmap:
1800	srp_unmap_data(scmnd, ch, req);
1801	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1802		ret = -E2BIG;
1803	return ret;
1804}
1805
1806/*
1807 * Return an IU and possible credit to the free pool
1808 */
1809static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1810			  enum srp_iu_type iu_type)
1811{
1812	unsigned long flags;
1813
1814	spin_lock_irqsave(&ch->lock, flags);
1815	list_add(&iu->list, &ch->free_tx);
1816	if (iu_type != SRP_IU_RSP)
1817		++ch->req_lim;
1818	spin_unlock_irqrestore(&ch->lock, flags);
1819}
1820
1821/*
1822 * Must be called with ch->lock held to protect req_lim and free_tx.
1823 * If IU is not sent, it must be returned using srp_put_tx_iu().
1824 *
1825 * Note:
1826 * An upper limit for the number of allocated information units for each
1827 * request type is:
1828 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1829 *   more than Scsi_Host.can_queue requests.
1830 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1831 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1832 *   one unanswered SRP request to an initiator.
1833 */
1834static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1835				      enum srp_iu_type iu_type)
1836{
1837	struct srp_target_port *target = ch->target;
1838	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1839	struct srp_iu *iu;
1840
1841	lockdep_assert_held(&ch->lock);
1842
1843	ib_process_cq_direct(ch->send_cq, -1);
1844
1845	if (list_empty(&ch->free_tx))
1846		return NULL;
1847
1848	/* Initiator responses to target requests do not consume credits */
1849	if (iu_type != SRP_IU_RSP) {
1850		if (ch->req_lim <= rsv) {
1851			++target->zero_req_lim;
1852			return NULL;
1853		}
1854
1855		--ch->req_lim;
1856	}
1857
1858	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1859	list_del(&iu->list);
1860	return iu;
1861}
1862
1863/*
1864 * Note: if this function is called from inside ib_drain_sq() then it will
1865 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1866 * with status IB_WC_SUCCESS then that's a bug.
1867 */
1868static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1869{
1870	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1871	struct srp_rdma_ch *ch = cq->cq_context;
1872
1873	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1874		srp_handle_qp_err(cq, wc, "SEND");
1875		return;
1876	}
1877
1878	lockdep_assert_held(&ch->lock);
1879
1880	list_add(&iu->list, &ch->free_tx);
1881}
1882
1883/**
1884 * srp_post_send() - send an SRP information unit
1885 * @ch: RDMA channel over which to send the information unit.
1886 * @iu: Information unit to send.
1887 * @len: Length of the information unit excluding immediate data.
1888 */
1889static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1890{
1891	struct srp_target_port *target = ch->target;
1892	struct ib_send_wr wr;
1893
1894	if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
1895		return -EINVAL;
1896
1897	iu->sge[0].addr   = iu->dma;
1898	iu->sge[0].length = len;
1899	iu->sge[0].lkey   = target->lkey;
1900
1901	iu->cqe.done = srp_send_done;
1902
1903	wr.next       = NULL;
1904	wr.wr_cqe     = &iu->cqe;
1905	wr.sg_list    = &iu->sge[0];
1906	wr.num_sge    = iu->num_sge;
1907	wr.opcode     = IB_WR_SEND;
1908	wr.send_flags = IB_SEND_SIGNALED;
1909
1910	return ib_post_send(ch->qp, &wr, NULL);
1911}
1912
1913static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1914{
1915	struct srp_target_port *target = ch->target;
1916	struct ib_recv_wr wr;
1917	struct ib_sge list;
1918
1919	list.addr   = iu->dma;
1920	list.length = iu->size;
1921	list.lkey   = target->lkey;
1922
1923	iu->cqe.done = srp_recv_done;
1924
1925	wr.next     = NULL;
1926	wr.wr_cqe   = &iu->cqe;
1927	wr.sg_list  = &list;
1928	wr.num_sge  = 1;
1929
1930	return ib_post_recv(ch->qp, &wr, NULL);
1931}
1932
1933static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1934{
1935	struct srp_target_port *target = ch->target;
1936	struct srp_request *req;
1937	struct scsi_cmnd *scmnd;
1938	unsigned long flags;
1939
1940	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1941		spin_lock_irqsave(&ch->lock, flags);
1942		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1943		if (rsp->tag == ch->tsk_mgmt_tag) {
1944			ch->tsk_mgmt_status = -1;
1945			if (be32_to_cpu(rsp->resp_data_len) >= 4)
1946				ch->tsk_mgmt_status = rsp->data[3];
1947			complete(&ch->tsk_mgmt_done);
1948		} else {
1949			shost_printk(KERN_ERR, target->scsi_host,
1950				     "Received tsk mgmt response too late for tag %#llx\n",
1951				     rsp->tag);
1952		}
1953		spin_unlock_irqrestore(&ch->lock, flags);
1954	} else {
1955		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1956		if (scmnd) {
1957			req = scsi_cmd_priv(scmnd);
1958			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1959		}
1960		if (!scmnd) {
1961			shost_printk(KERN_ERR, target->scsi_host,
1962				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1963				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1964
1965			spin_lock_irqsave(&ch->lock, flags);
1966			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1967			spin_unlock_irqrestore(&ch->lock, flags);
1968
1969			return;
1970		}
1971		scmnd->result = rsp->status;
1972
1973		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1974			memcpy(scmnd->sense_buffer, rsp->data +
1975			       be32_to_cpu(rsp->resp_data_len),
1976			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1977				     SCSI_SENSE_BUFFERSIZE));
1978		}
1979
1980		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1981			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1982		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1983			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1984
1985		srp_free_req(ch, req, scmnd,
1986			     be32_to_cpu(rsp->req_lim_delta));
1987
1988		scsi_done(scmnd);
1989	}
1990}
1991
1992static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1993			       void *rsp, int len)
1994{
1995	struct srp_target_port *target = ch->target;
1996	struct ib_device *dev = target->srp_host->srp_dev->dev;
1997	unsigned long flags;
1998	struct srp_iu *iu;
1999	int err;
2000
2001	spin_lock_irqsave(&ch->lock, flags);
2002	ch->req_lim += req_delta;
2003	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2004	spin_unlock_irqrestore(&ch->lock, flags);
2005
2006	if (!iu) {
2007		shost_printk(KERN_ERR, target->scsi_host, PFX
2008			     "no IU available to send response\n");
2009		return 1;
2010	}
2011
2012	iu->num_sge = 1;
2013	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2014	memcpy(iu->buf, rsp, len);
2015	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2016
2017	err = srp_post_send(ch, iu, len);
2018	if (err) {
2019		shost_printk(KERN_ERR, target->scsi_host, PFX
2020			     "unable to post response: %d\n", err);
2021		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2022	}
2023
2024	return err;
2025}
2026
2027static void srp_process_cred_req(struct srp_rdma_ch *ch,
2028				 struct srp_cred_req *req)
2029{
2030	struct srp_cred_rsp rsp = {
2031		.opcode = SRP_CRED_RSP,
2032		.tag = req->tag,
2033	};
2034	s32 delta = be32_to_cpu(req->req_lim_delta);
2035
2036	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2037		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2038			     "problems processing SRP_CRED_REQ\n");
2039}
2040
2041static void srp_process_aer_req(struct srp_rdma_ch *ch,
2042				struct srp_aer_req *req)
2043{
2044	struct srp_target_port *target = ch->target;
2045	struct srp_aer_rsp rsp = {
2046		.opcode = SRP_AER_RSP,
2047		.tag = req->tag,
2048	};
2049	s32 delta = be32_to_cpu(req->req_lim_delta);
2050
2051	shost_printk(KERN_ERR, target->scsi_host, PFX
2052		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2053
2054	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2055		shost_printk(KERN_ERR, target->scsi_host, PFX
2056			     "problems processing SRP_AER_REQ\n");
2057}
2058
2059static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2060{
2061	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2062	struct srp_rdma_ch *ch = cq->cq_context;
2063	struct srp_target_port *target = ch->target;
2064	struct ib_device *dev = target->srp_host->srp_dev->dev;
2065	int res;
2066	u8 opcode;
2067
2068	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2069		srp_handle_qp_err(cq, wc, "RECV");
2070		return;
2071	}
2072
2073	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2074				   DMA_FROM_DEVICE);
2075
2076	opcode = *(u8 *) iu->buf;
2077
2078	if (0) {
2079		shost_printk(KERN_ERR, target->scsi_host,
2080			     PFX "recv completion, opcode 0x%02x\n", opcode);
2081		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2082			       iu->buf, wc->byte_len, true);
2083	}
2084
2085	switch (opcode) {
2086	case SRP_RSP:
2087		srp_process_rsp(ch, iu->buf);
2088		break;
2089
2090	case SRP_CRED_REQ:
2091		srp_process_cred_req(ch, iu->buf);
2092		break;
2093
2094	case SRP_AER_REQ:
2095		srp_process_aer_req(ch, iu->buf);
2096		break;
2097
2098	case SRP_T_LOGOUT:
2099		/* XXX Handle target logout */
2100		shost_printk(KERN_WARNING, target->scsi_host,
2101			     PFX "Got target logout request\n");
2102		break;
2103
2104	default:
2105		shost_printk(KERN_WARNING, target->scsi_host,
2106			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2107		break;
2108	}
2109
2110	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2111				      DMA_FROM_DEVICE);
2112
2113	res = srp_post_recv(ch, iu);
2114	if (res != 0)
2115		shost_printk(KERN_ERR, target->scsi_host,
2116			     PFX "Recv failed with error code %d\n", res);
2117}
2118
2119/**
2120 * srp_tl_err_work() - handle a transport layer error
2121 * @work: Work structure embedded in an SRP target port.
2122 *
2123 * Note: This function may get invoked before the rport has been created,
2124 * hence the target->rport test.
2125 */
2126static void srp_tl_err_work(struct work_struct *work)
2127{
2128	struct srp_target_port *target;
2129
2130	target = container_of(work, struct srp_target_port, tl_err_work);
2131	if (target->rport)
2132		srp_start_tl_fail_timers(target->rport);
2133}
2134
2135static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2136		const char *opname)
2137{
2138	struct srp_rdma_ch *ch = cq->cq_context;
2139	struct srp_target_port *target = ch->target;
2140
2141	if (ch->connected && !target->qp_in_error) {
2142		shost_printk(KERN_ERR, target->scsi_host,
2143			     PFX "failed %s status %s (%d) for CQE %p\n",
2144			     opname, ib_wc_status_msg(wc->status), wc->status,
2145			     wc->wr_cqe);
2146		queue_work(system_long_wq, &target->tl_err_work);
2147	}
2148	target->qp_in_error = true;
2149}
2150
2151static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2152{
2153	struct request *rq = scsi_cmd_to_rq(scmnd);
2154	struct srp_target_port *target = host_to_target(shost);
2155	struct srp_rdma_ch *ch;
2156	struct srp_request *req = scsi_cmd_priv(scmnd);
2157	struct srp_iu *iu;
2158	struct srp_cmd *cmd;
2159	struct ib_device *dev;
2160	unsigned long flags;
2161	u32 tag;
2162	int len, ret;
2163
2164	scmnd->result = srp_chkready(target->rport);
2165	if (unlikely(scmnd->result))
2166		goto err;
2167
2168	WARN_ON_ONCE(rq->tag < 0);
2169	tag = blk_mq_unique_tag(rq);
2170	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2171
2172	spin_lock_irqsave(&ch->lock, flags);
2173	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2174	spin_unlock_irqrestore(&ch->lock, flags);
2175
2176	if (!iu)
2177		goto err;
2178
2179	dev = target->srp_host->srp_dev->dev;
2180	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
2181				   DMA_TO_DEVICE);
2182
2183	cmd = iu->buf;
2184	memset(cmd, 0, sizeof *cmd);
2185
2186	cmd->opcode = SRP_CMD;
2187	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2188	cmd->tag    = tag;
2189	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2190	if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
2191		cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
2192					    4);
2193		if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
2194			goto err_iu;
2195	}
2196
2197	req->scmnd    = scmnd;
2198	req->cmd      = iu;
2199
2200	len = srp_map_data(scmnd, ch, req);
2201	if (len < 0) {
2202		shost_printk(KERN_ERR, target->scsi_host,
2203			     PFX "Failed to map data (%d)\n", len);
2204		/*
2205		 * If we ran out of memory descriptors (-ENOMEM) because an
2206		 * application is queuing many requests with more than
2207		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2208		 * to reduce queue depth temporarily.
2209		 */
2210		scmnd->result = len == -ENOMEM ?
2211			DID_OK << 16 | SAM_STAT_TASK_SET_FULL : DID_ERROR << 16;
2212		goto err_iu;
2213	}
2214
2215	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
2216				      DMA_TO_DEVICE);
2217
2218	if (srp_post_send(ch, iu, len)) {
2219		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2220		scmnd->result = DID_ERROR << 16;
2221		goto err_unmap;
2222	}
2223
2224	return 0;
2225
2226err_unmap:
2227	srp_unmap_data(scmnd, ch, req);
2228
2229err_iu:
2230	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2231
2232	/*
2233	 * Avoid that the loops that iterate over the request ring can
2234	 * encounter a dangling SCSI command pointer.
2235	 */
2236	req->scmnd = NULL;
2237
2238err:
2239	if (scmnd->result) {
2240		scsi_done(scmnd);
2241		ret = 0;
2242	} else {
2243		ret = SCSI_MLQUEUE_HOST_BUSY;
2244	}
2245
2246	return ret;
2247}
2248
2249/*
2250 * Note: the resources allocated in this function are freed in
2251 * srp_free_ch_ib().
2252 */
2253static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2254{
2255	struct srp_target_port *target = ch->target;
2256	int i;
2257
2258	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2259			      GFP_KERNEL);
2260	if (!ch->rx_ring)
2261		goto err_no_ring;
2262	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2263			      GFP_KERNEL);
2264	if (!ch->tx_ring)
2265		goto err_no_ring;
2266
2267	for (i = 0; i < target->queue_size; ++i) {
2268		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2269					      ch->max_ti_iu_len,
2270					      GFP_KERNEL, DMA_FROM_DEVICE);
2271		if (!ch->rx_ring[i])
2272			goto err;
2273	}
2274
2275	for (i = 0; i < target->queue_size; ++i) {
2276		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2277					      ch->max_it_iu_len,
2278					      GFP_KERNEL, DMA_TO_DEVICE);
2279		if (!ch->tx_ring[i])
2280			goto err;
2281
2282		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2283	}
2284
2285	return 0;
2286
2287err:
2288	for (i = 0; i < target->queue_size; ++i) {
2289		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2290		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2291	}
2292
2293
2294err_no_ring:
2295	kfree(ch->tx_ring);
2296	ch->tx_ring = NULL;
2297	kfree(ch->rx_ring);
2298	ch->rx_ring = NULL;
2299
2300	return -ENOMEM;
2301}
2302
2303static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2304{
2305	uint64_t T_tr_ns, max_compl_time_ms;
2306	uint32_t rq_tmo_jiffies;
2307
2308	/*
2309	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2310	 * table 91), both the QP timeout and the retry count have to be set
2311	 * for RC QP's during the RTR to RTS transition.
2312	 */
2313	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2314		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2315
2316	/*
2317	 * Set target->rq_tmo_jiffies to one second more than the largest time
2318	 * it can take before an error completion is generated. See also
2319	 * C9-140..142 in the IBTA spec for more information about how to
2320	 * convert the QP Local ACK Timeout value to nanoseconds.
2321	 */
2322	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2323	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2324	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2325	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2326
2327	return rq_tmo_jiffies;
2328}
2329
2330static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2331			       const struct srp_login_rsp *lrsp,
2332			       struct srp_rdma_ch *ch)
2333{
2334	struct srp_target_port *target = ch->target;
2335	struct ib_qp_attr *qp_attr = NULL;
2336	int attr_mask = 0;
2337	int ret = 0;
2338	int i;
2339
2340	if (lrsp->opcode == SRP_LOGIN_RSP) {
2341		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2342		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2343		ch->use_imm_data  = srp_use_imm_data &&
2344			(lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP);
2345		ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
2346						      ch->use_imm_data,
2347						      target->max_it_iu_size);
2348		WARN_ON_ONCE(ch->max_it_iu_len >
2349			     be32_to_cpu(lrsp->max_it_iu_len));
2350
2351		if (ch->use_imm_data)
2352			shost_printk(KERN_DEBUG, target->scsi_host,
2353				     PFX "using immediate data\n");
2354
2355		/*
2356		 * Reserve credits for task management so we don't
2357		 * bounce requests back to the SCSI mid-layer.
2358		 */
2359		target->scsi_host->can_queue
2360			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2361			      target->scsi_host->can_queue);
2362		target->scsi_host->cmd_per_lun
2363			= min_t(int, target->scsi_host->can_queue,
2364				target->scsi_host->cmd_per_lun);
2365	} else {
2366		shost_printk(KERN_WARNING, target->scsi_host,
2367			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2368		ret = -ECONNRESET;
2369		goto error;
2370	}
2371
2372	if (!ch->rx_ring) {
2373		ret = srp_alloc_iu_bufs(ch);
2374		if (ret)
2375			goto error;
2376	}
2377
2378	for (i = 0; i < target->queue_size; i++) {
2379		struct srp_iu *iu = ch->rx_ring[i];
2380
2381		ret = srp_post_recv(ch, iu);
2382		if (ret)
2383			goto error;
2384	}
2385
2386	if (!target->using_rdma_cm) {
2387		ret = -ENOMEM;
2388		qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2389		if (!qp_attr)
2390			goto error;
2391
2392		qp_attr->qp_state = IB_QPS_RTR;
2393		ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2394		if (ret)
2395			goto error_free;
2396
2397		ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2398		if (ret)
2399			goto error_free;
2400
2401		qp_attr->qp_state = IB_QPS_RTS;
2402		ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2403		if (ret)
2404			goto error_free;
2405
2406		target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2407
2408		ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2409		if (ret)
2410			goto error_free;
2411
2412		ret = ib_send_cm_rtu(cm_id, NULL, 0);
2413	}
2414
2415error_free:
2416	kfree(qp_attr);
2417
2418error:
2419	ch->status = ret;
2420}
2421
2422static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2423				  const struct ib_cm_event *event,
2424				  struct srp_rdma_ch *ch)
2425{
2426	struct srp_target_port *target = ch->target;
2427	struct Scsi_Host *shost = target->scsi_host;
2428	struct ib_class_port_info *cpi;
2429	int opcode;
2430	u16 dlid;
2431
2432	switch (event->param.rej_rcvd.reason) {
2433	case IB_CM_REJ_PORT_CM_REDIRECT:
2434		cpi = event->param.rej_rcvd.ari;
2435		dlid = be16_to_cpu(cpi->redirect_lid);
2436		sa_path_set_dlid(&ch->ib_cm.path, dlid);
2437		ch->ib_cm.path.pkey = cpi->redirect_pkey;
2438		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2439		memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2440
2441		ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2442		break;
2443
2444	case IB_CM_REJ_PORT_REDIRECT:
2445		if (srp_target_is_topspin(target)) {
2446			union ib_gid *dgid = &ch->ib_cm.path.dgid;
2447
2448			/*
2449			 * Topspin/Cisco SRP gateways incorrectly send
2450			 * reject reason code 25 when they mean 24
2451			 * (port redirect).
2452			 */
2453			memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2454
2455			shost_printk(KERN_DEBUG, shost,
2456				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2457				     be64_to_cpu(dgid->global.subnet_prefix),
2458				     be64_to_cpu(dgid->global.interface_id));
2459
2460			ch->status = SRP_PORT_REDIRECT;
2461		} else {
2462			shost_printk(KERN_WARNING, shost,
2463				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2464			ch->status = -ECONNRESET;
2465		}
2466		break;
2467
2468	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2469		shost_printk(KERN_WARNING, shost,
2470			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2471		ch->status = -ECONNRESET;
2472		break;
2473
2474	case IB_CM_REJ_CONSUMER_DEFINED:
2475		opcode = *(u8 *) event->private_data;
2476		if (opcode == SRP_LOGIN_REJ) {
2477			struct srp_login_rej *rej = event->private_data;
2478			u32 reason = be32_to_cpu(rej->reason);
2479
2480			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2481				shost_printk(KERN_WARNING, shost,
2482					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2483			else
2484				shost_printk(KERN_WARNING, shost, PFX
2485					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2486					     target->sgid.raw,
2487					     target->ib_cm.orig_dgid.raw,
2488					     reason);
2489		} else
2490			shost_printk(KERN_WARNING, shost,
2491				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2492				     " opcode 0x%02x\n", opcode);
2493		ch->status = -ECONNRESET;
2494		break;
2495
2496	case IB_CM_REJ_STALE_CONN:
2497		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2498		ch->status = SRP_STALE_CONN;
2499		break;
2500
2501	default:
2502		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2503			     event->param.rej_rcvd.reason);
2504		ch->status = -ECONNRESET;
2505	}
2506}
2507
2508static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2509			     const struct ib_cm_event *event)
2510{
2511	struct srp_rdma_ch *ch = cm_id->context;
2512	struct srp_target_port *target = ch->target;
2513	int comp = 0;
2514
2515	switch (event->event) {
2516	case IB_CM_REQ_ERROR:
2517		shost_printk(KERN_DEBUG, target->scsi_host,
2518			     PFX "Sending CM REQ failed\n");
2519		comp = 1;
2520		ch->status = -ECONNRESET;
2521		break;
2522
2523	case IB_CM_REP_RECEIVED:
2524		comp = 1;
2525		srp_cm_rep_handler(cm_id, event->private_data, ch);
2526		break;
2527
2528	case IB_CM_REJ_RECEIVED:
2529		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2530		comp = 1;
2531
2532		srp_ib_cm_rej_handler(cm_id, event, ch);
2533		break;
2534
2535	case IB_CM_DREQ_RECEIVED:
2536		shost_printk(KERN_WARNING, target->scsi_host,
2537			     PFX "DREQ received - connection closed\n");
2538		ch->connected = false;
2539		if (ib_send_cm_drep(cm_id, NULL, 0))
2540			shost_printk(KERN_ERR, target->scsi_host,
2541				     PFX "Sending CM DREP failed\n");
2542		queue_work(system_long_wq, &target->tl_err_work);
2543		break;
2544
2545	case IB_CM_TIMEWAIT_EXIT:
2546		shost_printk(KERN_ERR, target->scsi_host,
2547			     PFX "connection closed\n");
2548		comp = 1;
2549
2550		ch->status = 0;
2551		break;
2552
2553	case IB_CM_MRA_RECEIVED:
2554	case IB_CM_DREQ_ERROR:
2555	case IB_CM_DREP_RECEIVED:
2556		break;
2557
2558	default:
2559		shost_printk(KERN_WARNING, target->scsi_host,
2560			     PFX "Unhandled CM event %d\n", event->event);
2561		break;
2562	}
2563
2564	if (comp)
2565		complete(&ch->done);
2566
2567	return 0;
2568}
2569
2570static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2571				    struct rdma_cm_event *event)
2572{
2573	struct srp_target_port *target = ch->target;
2574	struct Scsi_Host *shost = target->scsi_host;
2575	int opcode;
2576
2577	switch (event->status) {
2578	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2579		shost_printk(KERN_WARNING, shost,
2580			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2581		ch->status = -ECONNRESET;
2582		break;
2583
2584	case IB_CM_REJ_CONSUMER_DEFINED:
2585		opcode = *(u8 *) event->param.conn.private_data;
2586		if (opcode == SRP_LOGIN_REJ) {
2587			struct srp_login_rej *rej =
2588				(struct srp_login_rej *)
2589				event->param.conn.private_data;
2590			u32 reason = be32_to_cpu(rej->reason);
2591
2592			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2593				shost_printk(KERN_WARNING, shost,
2594					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2595			else
2596				shost_printk(KERN_WARNING, shost,
2597					    PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2598		} else {
2599			shost_printk(KERN_WARNING, shost,
2600				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2601				     opcode);
2602		}
2603		ch->status = -ECONNRESET;
2604		break;
2605
2606	case IB_CM_REJ_STALE_CONN:
2607		shost_printk(KERN_WARNING, shost,
2608			     "  REJ reason: stale connection\n");
2609		ch->status = SRP_STALE_CONN;
2610		break;
2611
2612	default:
2613		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2614			     event->status);
2615		ch->status = -ECONNRESET;
2616		break;
2617	}
2618}
2619
2620static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2621			       struct rdma_cm_event *event)
2622{
2623	struct srp_rdma_ch *ch = cm_id->context;
2624	struct srp_target_port *target = ch->target;
2625	int comp = 0;
2626
2627	switch (event->event) {
2628	case RDMA_CM_EVENT_ADDR_RESOLVED:
2629		ch->status = 0;
2630		comp = 1;
2631		break;
2632
2633	case RDMA_CM_EVENT_ADDR_ERROR:
2634		ch->status = -ENXIO;
2635		comp = 1;
2636		break;
2637
2638	case RDMA_CM_EVENT_ROUTE_RESOLVED:
2639		ch->status = 0;
2640		comp = 1;
2641		break;
2642
2643	case RDMA_CM_EVENT_ROUTE_ERROR:
2644	case RDMA_CM_EVENT_UNREACHABLE:
2645		ch->status = -EHOSTUNREACH;
2646		comp = 1;
2647		break;
2648
2649	case RDMA_CM_EVENT_CONNECT_ERROR:
2650		shost_printk(KERN_DEBUG, target->scsi_host,
2651			     PFX "Sending CM REQ failed\n");
2652		comp = 1;
2653		ch->status = -ECONNRESET;
2654		break;
2655
2656	case RDMA_CM_EVENT_ESTABLISHED:
2657		comp = 1;
2658		srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2659		break;
2660
2661	case RDMA_CM_EVENT_REJECTED:
2662		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2663		comp = 1;
2664
2665		srp_rdma_cm_rej_handler(ch, event);
2666		break;
2667
2668	case RDMA_CM_EVENT_DISCONNECTED:
2669		if (ch->connected) {
2670			shost_printk(KERN_WARNING, target->scsi_host,
2671				     PFX "received DREQ\n");
2672			rdma_disconnect(ch->rdma_cm.cm_id);
2673			comp = 1;
2674			ch->status = 0;
2675			queue_work(system_long_wq, &target->tl_err_work);
2676		}
2677		break;
2678
2679	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2680		shost_printk(KERN_ERR, target->scsi_host,
2681			     PFX "connection closed\n");
2682
2683		comp = 1;
2684		ch->status = 0;
2685		break;
2686
2687	default:
2688		shost_printk(KERN_WARNING, target->scsi_host,
2689			     PFX "Unhandled CM event %d\n", event->event);
2690		break;
2691	}
2692
2693	if (comp)
2694		complete(&ch->done);
2695
2696	return 0;
2697}
2698
2699/**
2700 * srp_change_queue_depth - setting device queue depth
2701 * @sdev: scsi device struct
2702 * @qdepth: requested queue depth
2703 *
2704 * Returns queue depth.
2705 */
2706static int
2707srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2708{
2709	if (!sdev->tagged_supported)
2710		qdepth = 1;
2711	return scsi_change_queue_depth(sdev, qdepth);
2712}
2713
2714static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2715			     u8 func, u8 *status)
2716{
2717	struct srp_target_port *target = ch->target;
2718	struct srp_rport *rport = target->rport;
2719	struct ib_device *dev = target->srp_host->srp_dev->dev;
2720	struct srp_iu *iu;
2721	struct srp_tsk_mgmt *tsk_mgmt;
2722	int res;
2723
2724	if (!ch->connected || target->qp_in_error)
2725		return -1;
2726
2727	/*
2728	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2729	 * invoked while a task management function is being sent.
2730	 */
2731	mutex_lock(&rport->mutex);
2732	spin_lock_irq(&ch->lock);
2733	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2734	spin_unlock_irq(&ch->lock);
2735
2736	if (!iu) {
2737		mutex_unlock(&rport->mutex);
2738
2739		return -1;
2740	}
2741
2742	iu->num_sge = 1;
2743
2744	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2745				   DMA_TO_DEVICE);
2746	tsk_mgmt = iu->buf;
2747	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2748
2749	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2750	int_to_scsilun(lun, &tsk_mgmt->lun);
2751	tsk_mgmt->tsk_mgmt_func = func;
2752	tsk_mgmt->task_tag	= req_tag;
2753
2754	spin_lock_irq(&ch->lock);
2755	ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2756	tsk_mgmt->tag = ch->tsk_mgmt_tag;
2757	spin_unlock_irq(&ch->lock);
2758
2759	init_completion(&ch->tsk_mgmt_done);
2760
2761	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2762				      DMA_TO_DEVICE);
2763	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2764		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2765		mutex_unlock(&rport->mutex);
2766
2767		return -1;
2768	}
2769	res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2770					msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2771	if (res > 0 && status)
2772		*status = ch->tsk_mgmt_status;
2773	mutex_unlock(&rport->mutex);
2774
2775	WARN_ON_ONCE(res < 0);
2776
2777	return res > 0 ? 0 : -1;
2778}
2779
2780static int srp_abort(struct scsi_cmnd *scmnd)
2781{
2782	struct srp_target_port *target = host_to_target(scmnd->device->host);
2783	struct srp_request *req = scsi_cmd_priv(scmnd);
2784	u32 tag;
2785	u16 ch_idx;
2786	struct srp_rdma_ch *ch;
2787
2788	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2789
2790	tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd));
2791	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2792	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2793		return SUCCESS;
2794	ch = &target->ch[ch_idx];
2795	if (!srp_claim_req(ch, req, NULL, scmnd))
2796		return SUCCESS;
2797	shost_printk(KERN_ERR, target->scsi_host,
2798		     "Sending SRP abort for tag %#x\n", tag);
2799	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2800			      SRP_TSK_ABORT_TASK, NULL) == 0) {
2801		srp_free_req(ch, req, scmnd, 0);
2802		return SUCCESS;
2803	}
2804	if (target->rport->state == SRP_RPORT_LOST)
2805		return FAST_IO_FAIL;
2806
2807	return FAILED;
2808}
2809
2810static int srp_reset_device(struct scsi_cmnd *scmnd)
2811{
2812	struct srp_target_port *target = host_to_target(scmnd->device->host);
2813	struct srp_rdma_ch *ch;
2814	u8 status;
2815
2816	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2817
2818	ch = &target->ch[0];
2819	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2820			      SRP_TSK_LUN_RESET, &status))
2821		return FAILED;
2822	if (status)
2823		return FAILED;
2824
2825	return SUCCESS;
2826}
2827
2828static int srp_reset_host(struct scsi_cmnd *scmnd)
2829{
2830	struct srp_target_port *target = host_to_target(scmnd->device->host);
2831
2832	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2833
2834	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2835}
2836
2837static int srp_target_alloc(struct scsi_target *starget)
2838{
2839	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
2840	struct srp_target_port *target = host_to_target(shost);
2841
2842	if (target->target_can_queue)
2843		starget->can_queue = target->target_can_queue;
2844	return 0;
2845}
2846
2847static int srp_slave_configure(struct scsi_device *sdev)
2848{
2849	struct Scsi_Host *shost = sdev->host;
2850	struct srp_target_port *target = host_to_target(shost);
2851	struct request_queue *q = sdev->request_queue;
2852	unsigned long timeout;
2853
2854	if (sdev->type == TYPE_DISK) {
2855		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2856		blk_queue_rq_timeout(q, timeout);
2857	}
2858
2859	return 0;
2860}
2861
2862static ssize_t id_ext_show(struct device *dev, struct device_attribute *attr,
2863			   char *buf)
2864{
2865	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2866
2867	return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2868}
2869
2870static DEVICE_ATTR_RO(id_ext);
2871
2872static ssize_t ioc_guid_show(struct device *dev, struct device_attribute *attr,
2873			     char *buf)
2874{
2875	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2876
2877	return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2878}
2879
2880static DEVICE_ATTR_RO(ioc_guid);
2881
2882static ssize_t service_id_show(struct device *dev,
2883			       struct device_attribute *attr, char *buf)
2884{
2885	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2886
2887	if (target->using_rdma_cm)
2888		return -ENOENT;
2889	return sysfs_emit(buf, "0x%016llx\n",
2890			  be64_to_cpu(target->ib_cm.service_id));
2891}
2892
2893static DEVICE_ATTR_RO(service_id);
2894
2895static ssize_t pkey_show(struct device *dev, struct device_attribute *attr,
2896			 char *buf)
2897{
2898	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2899
2900	if (target->using_rdma_cm)
2901		return -ENOENT;
2902
2903	return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
2904}
2905
2906static DEVICE_ATTR_RO(pkey);
2907
2908static ssize_t sgid_show(struct device *dev, struct device_attribute *attr,
2909			 char *buf)
2910{
2911	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2912
2913	return sysfs_emit(buf, "%pI6\n", target->sgid.raw);
2914}
2915
2916static DEVICE_ATTR_RO(sgid);
2917
2918static ssize_t dgid_show(struct device *dev, struct device_attribute *attr,
2919			 char *buf)
2920{
2921	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2922	struct srp_rdma_ch *ch = &target->ch[0];
2923
2924	if (target->using_rdma_cm)
2925		return -ENOENT;
2926
2927	return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
2928}
2929
2930static DEVICE_ATTR_RO(dgid);
2931
2932static ssize_t orig_dgid_show(struct device *dev, struct device_attribute *attr,
2933			      char *buf)
2934{
2935	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2936
2937	if (target->using_rdma_cm)
2938		return -ENOENT;
2939
2940	return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
2941}
2942
2943static DEVICE_ATTR_RO(orig_dgid);
2944
2945static ssize_t req_lim_show(struct device *dev, struct device_attribute *attr,
2946			    char *buf)
2947{
2948	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2949	struct srp_rdma_ch *ch;
2950	int i, req_lim = INT_MAX;
2951
2952	for (i = 0; i < target->ch_count; i++) {
2953		ch = &target->ch[i];
2954		req_lim = min(req_lim, ch->req_lim);
2955	}
2956
2957	return sysfs_emit(buf, "%d\n", req_lim);
2958}
2959
2960static DEVICE_ATTR_RO(req_lim);
2961
2962static ssize_t zero_req_lim_show(struct device *dev,
2963				 struct device_attribute *attr, char *buf)
2964{
2965	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2966
2967	return sysfs_emit(buf, "%d\n", target->zero_req_lim);
2968}
2969
2970static DEVICE_ATTR_RO(zero_req_lim);
2971
2972static ssize_t local_ib_port_show(struct device *dev,
2973				  struct device_attribute *attr, char *buf)
2974{
2975	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2976
2977	return sysfs_emit(buf, "%u\n", target->srp_host->port);
2978}
2979
2980static DEVICE_ATTR_RO(local_ib_port);
2981
2982static ssize_t local_ib_device_show(struct device *dev,
2983				    struct device_attribute *attr, char *buf)
2984{
2985	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2986
2987	return sysfs_emit(buf, "%s\n",
2988			  dev_name(&target->srp_host->srp_dev->dev->dev));
2989}
2990
2991static DEVICE_ATTR_RO(local_ib_device);
2992
2993static ssize_t ch_count_show(struct device *dev, struct device_attribute *attr,
2994			     char *buf)
2995{
2996	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2997
2998	return sysfs_emit(buf, "%d\n", target->ch_count);
2999}
3000
3001static DEVICE_ATTR_RO(ch_count);
3002
3003static ssize_t comp_vector_show(struct device *dev,
3004				struct device_attribute *attr, char *buf)
3005{
3006	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3007
3008	return sysfs_emit(buf, "%d\n", target->comp_vector);
3009}
3010
3011static DEVICE_ATTR_RO(comp_vector);
3012
3013static ssize_t tl_retry_count_show(struct device *dev,
3014				   struct device_attribute *attr, char *buf)
3015{
3016	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3017
3018	return sysfs_emit(buf, "%d\n", target->tl_retry_count);
3019}
3020
3021static DEVICE_ATTR_RO(tl_retry_count);
3022
3023static ssize_t cmd_sg_entries_show(struct device *dev,
3024				   struct device_attribute *attr, char *buf)
3025{
3026	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3027
3028	return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt);
3029}
3030
3031static DEVICE_ATTR_RO(cmd_sg_entries);
3032
3033static ssize_t allow_ext_sg_show(struct device *dev,
3034				 struct device_attribute *attr, char *buf)
3035{
3036	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3037
3038	return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3039}
3040
3041static DEVICE_ATTR_RO(allow_ext_sg);
3042
3043static struct attribute *srp_host_attrs[] = {
3044	&dev_attr_id_ext.attr,
3045	&dev_attr_ioc_guid.attr,
3046	&dev_attr_service_id.attr,
3047	&dev_attr_pkey.attr,
3048	&dev_attr_sgid.attr,
3049	&dev_attr_dgid.attr,
3050	&dev_attr_orig_dgid.attr,
3051	&dev_attr_req_lim.attr,
3052	&dev_attr_zero_req_lim.attr,
3053	&dev_attr_local_ib_port.attr,
3054	&dev_attr_local_ib_device.attr,
3055	&dev_attr_ch_count.attr,
3056	&dev_attr_comp_vector.attr,
3057	&dev_attr_tl_retry_count.attr,
3058	&dev_attr_cmd_sg_entries.attr,
3059	&dev_attr_allow_ext_sg.attr,
3060	NULL
3061};
3062
3063ATTRIBUTE_GROUPS(srp_host);
3064
3065static const struct scsi_host_template srp_template = {
3066	.module				= THIS_MODULE,
3067	.name				= "InfiniBand SRP initiator",
3068	.proc_name			= DRV_NAME,
3069	.target_alloc			= srp_target_alloc,
3070	.slave_configure		= srp_slave_configure,
3071	.info				= srp_target_info,
3072	.init_cmd_priv			= srp_init_cmd_priv,
3073	.exit_cmd_priv			= srp_exit_cmd_priv,
3074	.queuecommand			= srp_queuecommand,
3075	.change_queue_depth             = srp_change_queue_depth,
3076	.eh_timed_out			= srp_timed_out,
3077	.eh_abort_handler		= srp_abort,
3078	.eh_device_reset_handler	= srp_reset_device,
3079	.eh_host_reset_handler		= srp_reset_host,
3080	.skip_settle_delay		= true,
3081	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
3082	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
3083	.this_id			= -1,
3084	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
3085	.shost_groups			= srp_host_groups,
3086	.track_queue_depth		= 1,
3087	.cmd_size			= sizeof(struct srp_request),
3088};
3089
3090static int srp_sdev_count(struct Scsi_Host *host)
3091{
3092	struct scsi_device *sdev;
3093	int c = 0;
3094
3095	shost_for_each_device(sdev, host)
3096		c++;
3097
3098	return c;
3099}
3100
3101/*
3102 * Return values:
3103 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3104 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3105 *    removal has been scheduled.
3106 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3107 */
3108static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3109{
3110	struct srp_rport_identifiers ids;
3111	struct srp_rport *rport;
3112
3113	target->state = SRP_TARGET_SCANNING;
3114	sprintf(target->target_name, "SRP.T10:%016llX",
3115		be64_to_cpu(target->id_ext));
3116
3117	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3118		return -ENODEV;
3119
3120	memcpy(ids.port_id, &target->id_ext, 8);
3121	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3122	ids.roles = SRP_RPORT_ROLE_TARGET;
3123	rport = srp_rport_add(target->scsi_host, &ids);
3124	if (IS_ERR(rport)) {
3125		scsi_remove_host(target->scsi_host);
3126		return PTR_ERR(rport);
3127	}
3128
3129	rport->lld_data = target;
3130	target->rport = rport;
3131
3132	spin_lock(&host->target_lock);
3133	list_add_tail(&target->list, &host->target_list);
3134	spin_unlock(&host->target_lock);
3135
3136	scsi_scan_target(&target->scsi_host->shost_gendev,
3137			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3138
3139	if (srp_connected_ch(target) < target->ch_count ||
3140	    target->qp_in_error) {
3141		shost_printk(KERN_INFO, target->scsi_host,
3142			     PFX "SCSI scan failed - removing SCSI host\n");
3143		srp_queue_remove_work(target);
3144		goto out;
3145	}
3146
3147	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3148		 dev_name(&target->scsi_host->shost_gendev),
3149		 srp_sdev_count(target->scsi_host));
3150
3151	spin_lock_irq(&target->lock);
3152	if (target->state == SRP_TARGET_SCANNING)
3153		target->state = SRP_TARGET_LIVE;
3154	spin_unlock_irq(&target->lock);
3155
3156out:
3157	return 0;
3158}
3159
3160static void srp_release_dev(struct device *dev)
3161{
3162	struct srp_host *host =
3163		container_of(dev, struct srp_host, dev);
3164
3165	kfree(host);
3166}
3167
3168static struct attribute *srp_class_attrs[];
3169
3170ATTRIBUTE_GROUPS(srp_class);
3171
3172static struct class srp_class = {
3173	.name    = "infiniband_srp",
3174	.dev_groups = srp_class_groups,
3175	.dev_release = srp_release_dev
3176};
3177
3178/**
3179 * srp_conn_unique() - check whether the connection to a target is unique
3180 * @host:   SRP host.
3181 * @target: SRP target port.
3182 */
3183static bool srp_conn_unique(struct srp_host *host,
3184			    struct srp_target_port *target)
3185{
3186	struct srp_target_port *t;
3187	bool ret = false;
3188
3189	if (target->state == SRP_TARGET_REMOVED)
3190		goto out;
3191
3192	ret = true;
3193
3194	spin_lock(&host->target_lock);
3195	list_for_each_entry(t, &host->target_list, list) {
3196		if (t != target &&
3197		    target->id_ext == t->id_ext &&
3198		    target->ioc_guid == t->ioc_guid &&
3199		    target->initiator_ext == t->initiator_ext) {
3200			ret = false;
3201			break;
3202		}
3203	}
3204	spin_unlock(&host->target_lock);
3205
3206out:
3207	return ret;
3208}
3209
3210/*
3211 * Target ports are added by writing
3212 *
3213 *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3214 *     pkey=<P_Key>,service_id=<service ID>
3215 * or
3216 *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3217 *     [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3218 *
3219 * to the add_target sysfs attribute.
3220 */
3221enum {
3222	SRP_OPT_ERR		= 0,
3223	SRP_OPT_ID_EXT		= 1 << 0,
3224	SRP_OPT_IOC_GUID	= 1 << 1,
3225	SRP_OPT_DGID		= 1 << 2,
3226	SRP_OPT_PKEY		= 1 << 3,
3227	SRP_OPT_SERVICE_ID	= 1 << 4,
3228	SRP_OPT_MAX_SECT	= 1 << 5,
3229	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3230	SRP_OPT_IO_CLASS	= 1 << 7,
3231	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3232	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3233	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3234	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3235	SRP_OPT_COMP_VECTOR	= 1 << 12,
3236	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3237	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3238	SRP_OPT_IP_SRC		= 1 << 15,
3239	SRP_OPT_IP_DEST		= 1 << 16,
3240	SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3241	SRP_OPT_MAX_IT_IU_SIZE  = 1 << 18,
3242	SRP_OPT_CH_COUNT	= 1 << 19,
3243};
3244
3245static unsigned int srp_opt_mandatory[] = {
3246	SRP_OPT_ID_EXT		|
3247	SRP_OPT_IOC_GUID	|
3248	SRP_OPT_DGID		|
3249	SRP_OPT_PKEY		|
3250	SRP_OPT_SERVICE_ID,
3251	SRP_OPT_ID_EXT		|
3252	SRP_OPT_IOC_GUID	|
3253	SRP_OPT_IP_DEST,
3254};
3255
3256static const match_table_t srp_opt_tokens = {
3257	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3258	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3259	{ SRP_OPT_DGID,			"dgid=%s" 		},
3260	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3261	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3262	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3263	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3264	{ SRP_OPT_TARGET_CAN_QUEUE,	"target_can_queue=%d"	},
3265	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3266	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3267	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3268	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3269	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3270	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3271	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3272	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3273	{ SRP_OPT_IP_SRC,		"src=%s"		},
3274	{ SRP_OPT_IP_DEST,		"dest=%s"		},
3275	{ SRP_OPT_MAX_IT_IU_SIZE,	"max_it_iu_size=%d"	},
3276	{ SRP_OPT_CH_COUNT,		"ch_count=%u",		},
3277	{ SRP_OPT_ERR,			NULL 			}
3278};
3279
3280/**
3281 * srp_parse_in - parse an IP address and port number combination
3282 * @net:	   [in]  Network namespace.
3283 * @sa:		   [out] Address family, IP address and port number.
3284 * @addr_port_str: [in]  IP address and port number.
3285 * @has_port:	   [out] Whether or not @addr_port_str includes a port number.
3286 *
3287 * Parse the following address formats:
3288 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3289 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3290 */
3291static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3292			const char *addr_port_str, bool *has_port)
3293{
3294	char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3295	char *port_str;
3296	int ret;
3297
3298	if (!addr)
3299		return -ENOMEM;
3300	port_str = strrchr(addr, ':');
3301	if (port_str && strchr(port_str, ']'))
3302		port_str = NULL;
3303	if (port_str)
3304		*port_str++ = '\0';
3305	if (has_port)
3306		*has_port = port_str != NULL;
3307	ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3308	if (ret && addr[0]) {
3309		addr_end = addr + strlen(addr) - 1;
3310		if (addr[0] == '[' && *addr_end == ']') {
3311			*addr_end = '\0';
3312			ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3313						   port_str, sa);
3314		}
3315	}
3316	kfree(addr);
3317	pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3318	return ret;
3319}
3320
3321static int srp_parse_options(struct net *net, const char *buf,
3322			     struct srp_target_port *target)
3323{
3324	char *options, *sep_opt;
3325	char *p;
3326	substring_t args[MAX_OPT_ARGS];
3327	unsigned long long ull;
3328	bool has_port;
3329	int opt_mask = 0;
3330	int token;
3331	int ret = -EINVAL;
3332	int i;
3333
3334	options = kstrdup(buf, GFP_KERNEL);
3335	if (!options)
3336		return -ENOMEM;
3337
3338	sep_opt = options;
3339	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3340		if (!*p)
3341			continue;
3342
3343		token = match_token(p, srp_opt_tokens, args);
3344		opt_mask |= token;
3345
3346		switch (token) {
3347		case SRP_OPT_ID_EXT:
3348			p = match_strdup(args);
3349			if (!p) {
3350				ret = -ENOMEM;
3351				goto out;
3352			}
3353			ret = kstrtoull(p, 16, &ull);
3354			if (ret) {
3355				pr_warn("invalid id_ext parameter '%s'\n", p);
3356				kfree(p);
3357				goto out;
3358			}
3359			target->id_ext = cpu_to_be64(ull);
3360			kfree(p);
3361			break;
3362
3363		case SRP_OPT_IOC_GUID:
3364			p = match_strdup(args);
3365			if (!p) {
3366				ret = -ENOMEM;
3367				goto out;
3368			}
3369			ret = kstrtoull(p, 16, &ull);
3370			if (ret) {
3371				pr_warn("invalid ioc_guid parameter '%s'\n", p);
3372				kfree(p);
3373				goto out;
3374			}
3375			target->ioc_guid = cpu_to_be64(ull);
3376			kfree(p);
3377			break;
3378
3379		case SRP_OPT_DGID:
3380			p = match_strdup(args);
3381			if (!p) {
3382				ret = -ENOMEM;
3383				goto out;
3384			}
3385			if (strlen(p) != 32) {
3386				pr_warn("bad dest GID parameter '%s'\n", p);
3387				kfree(p);
3388				goto out;
3389			}
3390
3391			ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3392			kfree(p);
3393			if (ret < 0)
3394				goto out;
3395			break;
3396
3397		case SRP_OPT_PKEY:
3398			ret = match_hex(args, &token);
3399			if (ret) {
3400				pr_warn("bad P_Key parameter '%s'\n", p);
3401				goto out;
3402			}
3403			target->ib_cm.pkey = cpu_to_be16(token);
3404			break;
3405
3406		case SRP_OPT_SERVICE_ID:
3407			p = match_strdup(args);
3408			if (!p) {
3409				ret = -ENOMEM;
3410				goto out;
3411			}
3412			ret = kstrtoull(p, 16, &ull);
3413			if (ret) {
3414				pr_warn("bad service_id parameter '%s'\n", p);
3415				kfree(p);
3416				goto out;
3417			}
3418			target->ib_cm.service_id = cpu_to_be64(ull);
3419			kfree(p);
3420			break;
3421
3422		case SRP_OPT_IP_SRC:
3423			p = match_strdup(args);
3424			if (!p) {
3425				ret = -ENOMEM;
3426				goto out;
3427			}
3428			ret = srp_parse_in(net, &target->rdma_cm.src.ss, p,
3429					   NULL);
3430			if (ret < 0) {
3431				pr_warn("bad source parameter '%s'\n", p);
3432				kfree(p);
3433				goto out;
3434			}
3435			target->rdma_cm.src_specified = true;
3436			kfree(p);
3437			break;
3438
3439		case SRP_OPT_IP_DEST:
3440			p = match_strdup(args);
3441			if (!p) {
3442				ret = -ENOMEM;
3443				goto out;
3444			}
3445			ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p,
3446					   &has_port);
3447			if (!has_port)
3448				ret = -EINVAL;
3449			if (ret < 0) {
3450				pr_warn("bad dest parameter '%s'\n", p);
3451				kfree(p);
3452				goto out;
3453			}
3454			target->using_rdma_cm = true;
3455			kfree(p);
3456			break;
3457
3458		case SRP_OPT_MAX_SECT:
3459			ret = match_int(args, &token);
3460			if (ret) {
3461				pr_warn("bad max sect parameter '%s'\n", p);
3462				goto out;
3463			}
3464			target->scsi_host->max_sectors = token;
3465			break;
3466
3467		case SRP_OPT_QUEUE_SIZE:
3468			ret = match_int(args, &token);
3469			if (ret) {
3470				pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n",
3471					p, ret);
3472				goto out;
3473			}
3474			if (token < 1) {
3475				pr_warn("bad queue_size parameter '%s'\n", p);
3476				ret = -EINVAL;
3477				goto out;
3478			}
3479			target->scsi_host->can_queue = token;
3480			target->queue_size = token + SRP_RSP_SQ_SIZE +
3481					     SRP_TSK_MGMT_SQ_SIZE;
3482			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3483				target->scsi_host->cmd_per_lun = token;
3484			break;
3485
3486		case SRP_OPT_MAX_CMD_PER_LUN:
3487			ret = match_int(args, &token);
3488			if (ret) {
3489				pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n",
3490					p, ret);
3491				goto out;
3492			}
3493			if (token < 1) {
3494				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3495					p);
3496				ret = -EINVAL;
3497				goto out;
3498			}
3499			target->scsi_host->cmd_per_lun = token;
3500			break;
3501
3502		case SRP_OPT_TARGET_CAN_QUEUE:
3503			ret = match_int(args, &token);
3504			if (ret) {
3505				pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n",
3506					p, ret);
3507				goto out;
3508			}
3509			if (token < 1) {
3510				pr_warn("bad max target_can_queue parameter '%s'\n",
3511					p);
3512				ret = -EINVAL;
3513				goto out;
3514			}
3515			target->target_can_queue = token;
3516			break;
3517
3518		case SRP_OPT_IO_CLASS:
3519			ret = match_hex(args, &token);
3520			if (ret) {
3521				pr_warn("bad IO class parameter '%s'\n", p);
3522				goto out;
3523			}
3524			if (token != SRP_REV10_IB_IO_CLASS &&
3525			    token != SRP_REV16A_IB_IO_CLASS) {
3526				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3527					token, SRP_REV10_IB_IO_CLASS,
3528					SRP_REV16A_IB_IO_CLASS);
3529				ret = -EINVAL;
3530				goto out;
3531			}
3532			target->io_class = token;
3533			break;
3534
3535		case SRP_OPT_INITIATOR_EXT:
3536			p = match_strdup(args);
3537			if (!p) {
3538				ret = -ENOMEM;
3539				goto out;
3540			}
3541			ret = kstrtoull(p, 16, &ull);
3542			if (ret) {
3543				pr_warn("bad initiator_ext value '%s'\n", p);
3544				kfree(p);
3545				goto out;
3546			}
3547			target->initiator_ext = cpu_to_be64(ull);
3548			kfree(p);
3549			break;
3550
3551		case SRP_OPT_CMD_SG_ENTRIES:
3552			ret = match_int(args, &token);
3553			if (ret) {
3554				pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n",
3555					p, ret);
3556				goto out;
3557			}
3558			if (token < 1 || token > 255) {
3559				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3560					p);
3561				ret = -EINVAL;
3562				goto out;
3563			}
3564			target->cmd_sg_cnt = token;
3565			break;
3566
3567		case SRP_OPT_ALLOW_EXT_SG:
3568			ret = match_int(args, &token);
3569			if (ret) {
3570				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3571				goto out;
3572			}
3573			target->allow_ext_sg = !!token;
3574			break;
3575
3576		case SRP_OPT_SG_TABLESIZE:
3577			ret = match_int(args, &token);
3578			if (ret) {
3579				pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n",
3580					p, ret);
3581				goto out;
3582			}
3583			if (token < 1 || token > SG_MAX_SEGMENTS) {
3584				pr_warn("bad max sg_tablesize parameter '%s'\n",
3585					p);
3586				ret = -EINVAL;
3587				goto out;
3588			}
3589			target->sg_tablesize = token;
3590			break;
3591
3592		case SRP_OPT_COMP_VECTOR:
3593			ret = match_int(args, &token);
3594			if (ret) {
3595				pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n",
3596					p, ret);
3597				goto out;
3598			}
3599			if (token < 0) {
3600				pr_warn("bad comp_vector parameter '%s'\n", p);
3601				ret = -EINVAL;
3602				goto out;
3603			}
3604			target->comp_vector = token;
3605			break;
3606
3607		case SRP_OPT_TL_RETRY_COUNT:
3608			ret = match_int(args, &token);
3609			if (ret) {
3610				pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n",
3611					p, ret);
3612				goto out;
3613			}
3614			if (token < 2 || token > 7) {
3615				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3616					p);
3617				ret = -EINVAL;
3618				goto out;
3619			}
3620			target->tl_retry_count = token;
3621			break;
3622
3623		case SRP_OPT_MAX_IT_IU_SIZE:
3624			ret = match_int(args, &token);
3625			if (ret) {
3626				pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n",
3627					p, ret);
3628				goto out;
3629			}
3630			if (token < 0) {
3631				pr_warn("bad maximum initiator to target IU size '%s'\n", p);
3632				ret = -EINVAL;
3633				goto out;
3634			}
3635			target->max_it_iu_size = token;
3636			break;
3637
3638		case SRP_OPT_CH_COUNT:
3639			ret = match_int(args, &token);
3640			if (ret) {
3641				pr_warn("match_int() failed for channel count parameter '%s', Error %d\n",
3642					p, ret);
3643				goto out;
3644			}
3645			if (token < 1) {
3646				pr_warn("bad channel count %s\n", p);
3647				ret = -EINVAL;
3648				goto out;
3649			}
3650			target->ch_count = token;
3651			break;
3652
3653		default:
3654			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3655				p);
3656			ret = -EINVAL;
3657			goto out;
3658		}
3659	}
3660
3661	for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3662		if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3663			ret = 0;
3664			break;
3665		}
3666	}
3667	if (ret)
3668		pr_warn("target creation request is missing one or more parameters\n");
3669
3670	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3671	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3672		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3673			target->scsi_host->cmd_per_lun,
3674			target->scsi_host->can_queue);
3675
3676out:
3677	kfree(options);
3678	return ret;
3679}
3680
3681static ssize_t add_target_store(struct device *dev,
3682				struct device_attribute *attr, const char *buf,
3683				size_t count)
3684{
3685	struct srp_host *host =
3686		container_of(dev, struct srp_host, dev);
3687	struct Scsi_Host *target_host;
3688	struct srp_target_port *target;
3689	struct srp_rdma_ch *ch;
3690	struct srp_device *srp_dev = host->srp_dev;
3691	struct ib_device *ibdev = srp_dev->dev;
3692	int ret, i, ch_idx;
3693	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3694	bool multich = false;
3695	uint32_t max_iu_len;
3696
3697	target_host = scsi_host_alloc(&srp_template,
3698				      sizeof (struct srp_target_port));
3699	if (!target_host)
3700		return -ENOMEM;
3701
3702	target_host->transportt  = ib_srp_transport_template;
3703	target_host->max_channel = 0;
3704	target_host->max_id      = 1;
3705	target_host->max_lun     = -1LL;
3706	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3707	target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
3708
3709	if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
3710		target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
3711
3712	target = host_to_target(target_host);
3713
3714	target->net		= kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3715	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3716	target->scsi_host	= target_host;
3717	target->srp_host	= host;
3718	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3719	target->global_rkey	= host->srp_dev->global_rkey;
3720	target->cmd_sg_cnt	= cmd_sg_entries;
3721	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3722	target->allow_ext_sg	= allow_ext_sg;
3723	target->tl_retry_count	= 7;
3724	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3725
3726	/*
3727	 * Avoid that the SCSI host can be removed by srp_remove_target()
3728	 * before this function returns.
3729	 */
3730	scsi_host_get(target->scsi_host);
3731
3732	ret = mutex_lock_interruptible(&host->add_target_mutex);
3733	if (ret < 0)
3734		goto put;
3735
3736	ret = srp_parse_options(target->net, buf, target);
3737	if (ret)
3738		goto out;
3739
3740	if (!srp_conn_unique(target->srp_host, target)) {
3741		if (target->using_rdma_cm) {
3742			shost_printk(KERN_INFO, target->scsi_host,
3743				     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3744				     be64_to_cpu(target->id_ext),
3745				     be64_to_cpu(target->ioc_guid),
3746				     &target->rdma_cm.dst);
3747		} else {
3748			shost_printk(KERN_INFO, target->scsi_host,
3749				     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3750				     be64_to_cpu(target->id_ext),
3751				     be64_to_cpu(target->ioc_guid),
3752				     be64_to_cpu(target->initiator_ext));
3753		}
3754		ret = -EEXIST;
3755		goto out;
3756	}
3757
3758	if (!srp_dev->has_fr && !target->allow_ext_sg &&
3759	    target->cmd_sg_cnt < target->sg_tablesize) {
3760		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3761		target->sg_tablesize = target->cmd_sg_cnt;
3762	}
3763
3764	if (srp_dev->use_fast_reg) {
3765		bool gaps_reg = ibdev->attrs.kernel_cap_flags &
3766				 IBK_SG_GAPS_REG;
3767
3768		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3769				  (ilog2(srp_dev->mr_page_size) - 9);
3770		if (!gaps_reg) {
3771			/*
3772			 * FR can only map one HCA page per entry. If the start
3773			 * address is not aligned on a HCA page boundary two
3774			 * entries will be used for the head and the tail
3775			 * although these two entries combined contain at most
3776			 * one HCA page of data. Hence the "+ 1" in the
3777			 * calculation below.
3778			 *
3779			 * The indirect data buffer descriptor is contiguous
3780			 * so the memory for that buffer will only be
3781			 * registered if register_always is true. Hence add
3782			 * one to mr_per_cmd if register_always has been set.
3783			 */
3784			mr_per_cmd = register_always +
3785				(target->scsi_host->max_sectors + 1 +
3786				 max_sectors_per_mr - 1) / max_sectors_per_mr;
3787		} else {
3788			mr_per_cmd = register_always +
3789				(target->sg_tablesize +
3790				 srp_dev->max_pages_per_mr - 1) /
3791				srp_dev->max_pages_per_mr;
3792		}
3793		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3794			 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3795			 max_sectors_per_mr, mr_per_cmd);
3796	}
3797
3798	target_host->sg_tablesize = target->sg_tablesize;
3799	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3800	target->mr_per_cmd = mr_per_cmd;
3801	target->indirect_size = target->sg_tablesize *
3802				sizeof (struct srp_direct_buf);
3803	max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
3804				       srp_use_imm_data,
3805				       target->max_it_iu_size);
3806
3807	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3808	INIT_WORK(&target->remove_work, srp_remove_work);
3809	spin_lock_init(&target->lock);
3810	ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3811	if (ret)
3812		goto out;
3813
3814	ret = -ENOMEM;
3815	if (target->ch_count == 0) {
3816		target->ch_count =
3817			min(ch_count ?:
3818				max(4 * num_online_nodes(),
3819				    ibdev->num_comp_vectors),
3820				num_online_cpus());
3821	}
3822
3823	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3824			     GFP_KERNEL);
3825	if (!target->ch)
3826		goto out;
3827
3828	for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
3829		ch = &target->ch[ch_idx];
3830		ch->target = target;
3831		ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
3832		spin_lock_init(&ch->lock);
3833		INIT_LIST_HEAD(&ch->free_tx);
3834		ret = srp_new_cm_id(ch);
3835		if (ret)
3836			goto err_disconnect;
3837
3838		ret = srp_create_ch_ib(ch);
3839		if (ret)
3840			goto err_disconnect;
3841
3842		ret = srp_connect_ch(ch, max_iu_len, multich);
3843		if (ret) {
3844			char dst[64];
3845
3846			if (target->using_rdma_cm)
3847				snprintf(dst, sizeof(dst), "%pIS",
3848					&target->rdma_cm.dst);
3849			else
3850				snprintf(dst, sizeof(dst), "%pI6",
3851					target->ib_cm.orig_dgid.raw);
3852			shost_printk(KERN_ERR, target->scsi_host,
3853				PFX "Connection %d/%d to %s failed\n",
3854				ch_idx,
3855				target->ch_count, dst);
3856			if (ch_idx == 0) {
3857				goto free_ch;
3858			} else {
3859				srp_free_ch_ib(target, ch);
3860				target->ch_count = ch - target->ch;
3861				goto connected;
3862			}
3863		}
3864		multich = true;
3865	}
3866
3867connected:
3868	target->scsi_host->nr_hw_queues = target->ch_count;
3869
3870	ret = srp_add_target(host, target);
3871	if (ret)
3872		goto err_disconnect;
3873
3874	if (target->state != SRP_TARGET_REMOVED) {
3875		if (target->using_rdma_cm) {
3876			shost_printk(KERN_DEBUG, target->scsi_host, PFX
3877				     "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3878				     be64_to_cpu(target->id_ext),
3879				     be64_to_cpu(target->ioc_guid),
3880				     target->sgid.raw, &target->rdma_cm.dst);
3881		} else {
3882			shost_printk(KERN_DEBUG, target->scsi_host, PFX
3883				     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3884				     be64_to_cpu(target->id_ext),
3885				     be64_to_cpu(target->ioc_guid),
3886				     be16_to_cpu(target->ib_cm.pkey),
3887				     be64_to_cpu(target->ib_cm.service_id),
3888				     target->sgid.raw,
3889				     target->ib_cm.orig_dgid.raw);
3890		}
3891	}
3892
3893	ret = count;
3894
3895out:
3896	mutex_unlock(&host->add_target_mutex);
3897
3898put:
3899	scsi_host_put(target->scsi_host);
3900	if (ret < 0) {
3901		/*
3902		 * If a call to srp_remove_target() has not been scheduled,
3903		 * drop the network namespace reference now that was obtained
3904		 * earlier in this function.
3905		 */
3906		if (target->state != SRP_TARGET_REMOVED)
3907			kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3908		scsi_host_put(target->scsi_host);
3909	}
3910
3911	return ret;
3912
3913err_disconnect:
3914	srp_disconnect_target(target);
3915
3916free_ch:
3917	for (i = 0; i < target->ch_count; i++) {
3918		ch = &target->ch[i];
3919		srp_free_ch_ib(target, ch);
3920	}
3921
3922	kfree(target->ch);
3923	goto out;
3924}
3925
3926static DEVICE_ATTR_WO(add_target);
3927
3928static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
3929			  char *buf)
3930{
3931	struct srp_host *host = container_of(dev, struct srp_host, dev);
3932
3933	return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
3934}
3935
3936static DEVICE_ATTR_RO(ibdev);
3937
3938static ssize_t port_show(struct device *dev, struct device_attribute *attr,
3939			 char *buf)
3940{
3941	struct srp_host *host = container_of(dev, struct srp_host, dev);
3942
3943	return sysfs_emit(buf, "%u\n", host->port);
3944}
3945
3946static DEVICE_ATTR_RO(port);
3947
3948static struct attribute *srp_class_attrs[] = {
3949	&dev_attr_add_target.attr,
3950	&dev_attr_ibdev.attr,
3951	&dev_attr_port.attr,
3952	NULL
3953};
3954
3955static struct srp_host *srp_add_port(struct srp_device *device, u32 port)
3956{
3957	struct srp_host *host;
3958
3959	host = kzalloc(sizeof *host, GFP_KERNEL);
3960	if (!host)
3961		return NULL;
3962
3963	INIT_LIST_HEAD(&host->target_list);
3964	spin_lock_init(&host->target_lock);
3965	mutex_init(&host->add_target_mutex);
3966	host->srp_dev = device;
3967	host->port = port;
3968
3969	device_initialize(&host->dev);
3970	host->dev.class = &srp_class;
3971	host->dev.parent = device->dev->dev.parent;
3972	if (dev_set_name(&host->dev, "srp-%s-%u", dev_name(&device->dev->dev),
3973			 port))
3974		goto put_host;
3975	if (device_add(&host->dev))
3976		goto put_host;
3977
3978	return host;
3979
3980put_host:
3981	device_del(&host->dev);
3982	put_device(&host->dev);
3983	return NULL;
3984}
3985
3986static void srp_rename_dev(struct ib_device *device, void *client_data)
3987{
3988	struct srp_device *srp_dev = client_data;
3989	struct srp_host *host, *tmp_host;
3990
3991	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3992		char name[IB_DEVICE_NAME_MAX + 8];
3993
3994		snprintf(name, sizeof(name), "srp-%s-%u",
3995			 dev_name(&device->dev), host->port);
3996		device_rename(&host->dev, name);
3997	}
3998}
3999
4000static int srp_add_one(struct ib_device *device)
4001{
4002	struct srp_device *srp_dev;
4003	struct ib_device_attr *attr = &device->attrs;
4004	struct srp_host *host;
4005	int mr_page_shift;
4006	u32 p;
4007	u64 max_pages_per_mr;
4008	unsigned int flags = 0;
4009
4010	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4011	if (!srp_dev)
4012		return -ENOMEM;
4013
4014	/*
4015	 * Use the smallest page size supported by the HCA, down to a
4016	 * minimum of 4096 bytes. We're unlikely to build large sglists
4017	 * out of smaller entries.
4018	 */
4019	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
4020	srp_dev->mr_page_size	= 1 << mr_page_shift;
4021	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
4022	max_pages_per_mr	= attr->max_mr_size;
4023	do_div(max_pages_per_mr, srp_dev->mr_page_size);
4024	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4025		 attr->max_mr_size, srp_dev->mr_page_size,
4026		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4027	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4028					  max_pages_per_mr);
4029
4030	srp_dev->has_fr = (attr->device_cap_flags &
4031			   IB_DEVICE_MEM_MGT_EXTENSIONS);
4032	if (!never_register && !srp_dev->has_fr)
4033		dev_warn(&device->dev, "FR is not supported\n");
4034	else if (!never_register &&
4035		 attr->max_mr_size >= 2 * srp_dev->mr_page_size)
4036		srp_dev->use_fast_reg = srp_dev->has_fr;
4037
4038	if (never_register || !register_always || !srp_dev->has_fr)
4039		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4040
4041	if (srp_dev->use_fast_reg) {
4042		srp_dev->max_pages_per_mr =
4043			min_t(u32, srp_dev->max_pages_per_mr,
4044			      attr->max_fast_reg_page_list_len);
4045	}
4046	srp_dev->mr_max_size	= srp_dev->mr_page_size *
4047				   srp_dev->max_pages_per_mr;
4048	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4049		 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
4050		 attr->max_fast_reg_page_list_len,
4051		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4052
4053	INIT_LIST_HEAD(&srp_dev->dev_list);
4054
4055	srp_dev->dev = device;
4056	srp_dev->pd  = ib_alloc_pd(device, flags);
4057	if (IS_ERR(srp_dev->pd)) {
4058		int ret = PTR_ERR(srp_dev->pd);
4059
4060		kfree(srp_dev);
4061		return ret;
4062	}
4063
4064	if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4065		srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4066		WARN_ON_ONCE(srp_dev->global_rkey == 0);
4067	}
4068
4069	rdma_for_each_port (device, p) {
4070		host = srp_add_port(srp_dev, p);
4071		if (host)
4072			list_add_tail(&host->list, &srp_dev->dev_list);
4073	}
4074
4075	ib_set_client_data(device, &srp_client, srp_dev);
4076	return 0;
4077}
4078
4079static void srp_remove_one(struct ib_device *device, void *client_data)
4080{
4081	struct srp_device *srp_dev;
4082	struct srp_host *host, *tmp_host;
4083	struct srp_target_port *target;
4084
4085	srp_dev = client_data;
4086
4087	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4088		/*
4089		 * Remove the add_target sysfs entry so that no new target ports
4090		 * can be created.
4091		 */
4092		device_del(&host->dev);
4093
4094		/*
4095		 * Remove all target ports.
4096		 */
4097		spin_lock(&host->target_lock);
4098		list_for_each_entry(target, &host->target_list, list)
4099			srp_queue_remove_work(target);
4100		spin_unlock(&host->target_lock);
4101
4102		/*
4103		 * srp_queue_remove_work() queues a call to
4104		 * srp_remove_target(). The latter function cancels
4105		 * target->tl_err_work so waiting for the remove works to
4106		 * finish is sufficient.
4107		 */
4108		flush_workqueue(srp_remove_wq);
4109
4110		put_device(&host->dev);
4111	}
4112
4113	ib_dealloc_pd(srp_dev->pd);
4114
4115	kfree(srp_dev);
4116}
4117
4118static struct srp_function_template ib_srp_transport_functions = {
4119	.has_rport_state	 = true,
4120	.reset_timer_if_blocked	 = true,
4121	.reconnect_delay	 = &srp_reconnect_delay,
4122	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
4123	.dev_loss_tmo		 = &srp_dev_loss_tmo,
4124	.reconnect		 = srp_rport_reconnect,
4125	.rport_delete		 = srp_rport_delete,
4126	.terminate_rport_io	 = srp_terminate_io,
4127};
4128
4129static int __init srp_init_module(void)
4130{
4131	int ret;
4132
4133	BUILD_BUG_ON(sizeof(struct srp_aer_req) != 36);
4134	BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
4135	BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
4136	BUILD_BUG_ON(sizeof(struct srp_indirect_buf) != 20);
4137	BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
4138	BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
4139	BUILD_BUG_ON(sizeof(struct srp_rsp) != 36);
4140
4141	if (srp_sg_tablesize) {
4142		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4143		if (!cmd_sg_entries)
4144			cmd_sg_entries = srp_sg_tablesize;
4145	}
4146
4147	if (!cmd_sg_entries)
4148		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4149
4150	if (cmd_sg_entries > 255) {
4151		pr_warn("Clamping cmd_sg_entries to 255\n");
4152		cmd_sg_entries = 255;
4153	}
4154
4155	if (!indirect_sg_entries)
4156		indirect_sg_entries = cmd_sg_entries;
4157	else if (indirect_sg_entries < cmd_sg_entries) {
4158		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4159			cmd_sg_entries);
4160		indirect_sg_entries = cmd_sg_entries;
4161	}
4162
4163	if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4164		pr_warn("Clamping indirect_sg_entries to %u\n",
4165			SG_MAX_SEGMENTS);
4166		indirect_sg_entries = SG_MAX_SEGMENTS;
4167	}
4168
4169	srp_remove_wq = create_workqueue("srp_remove");
4170	if (!srp_remove_wq) {
4171		ret = -ENOMEM;
4172		goto out;
4173	}
4174
4175	ret = -ENOMEM;
4176	ib_srp_transport_template =
4177		srp_attach_transport(&ib_srp_transport_functions);
4178	if (!ib_srp_transport_template)
4179		goto destroy_wq;
4180
4181	ret = class_register(&srp_class);
4182	if (ret) {
4183		pr_err("couldn't register class infiniband_srp\n");
4184		goto release_tr;
4185	}
4186
4187	ib_sa_register_client(&srp_sa_client);
4188
4189	ret = ib_register_client(&srp_client);
4190	if (ret) {
4191		pr_err("couldn't register IB client\n");
4192		goto unreg_sa;
4193	}
4194
4195out:
4196	return ret;
4197
4198unreg_sa:
4199	ib_sa_unregister_client(&srp_sa_client);
4200	class_unregister(&srp_class);
4201
4202release_tr:
4203	srp_release_transport(ib_srp_transport_template);
4204
4205destroy_wq:
4206	destroy_workqueue(srp_remove_wq);
4207	goto out;
4208}
4209
4210static void __exit srp_cleanup_module(void)
4211{
4212	ib_unregister_client(&srp_client);
4213	ib_sa_unregister_client(&srp_sa_client);
4214	class_unregister(&srp_class);
4215	srp_release_transport(ib_srp_transport_template);
4216	destroy_workqueue(srp_remove_wq);
4217}
4218
4219module_init(srp_init_module);
4220module_exit(srp_cleanup_module);
4221