cma.c revision 325945
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6 * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#define	LINUXKPI_PARAM_PREFIX ibcore_
38
39#include <linux/completion.h>
40#include <linux/in.h>
41#include <linux/in6.h>
42#include <linux/mutex.h>
43#include <linux/random.h>
44#include <linux/idr.h>
45#include <linux/inetdevice.h>
46
47#include <net/tcp.h>
48#include <net/ipv6.h>
49
50#include <netinet6/scope6_var.h>
51#include <netinet6/ip6_var.h>
52
53#include <rdma/rdma_cm.h>
54#include <rdma/rdma_cm_ib.h>
55#include <rdma/ib_cache.h>
56#include <rdma/ib_cm.h>
57#include <rdma/ib_sa.h>
58#include <rdma/iw_cm.h>
59
60MODULE_AUTHOR("Sean Hefty");
61MODULE_DESCRIPTION("Generic RDMA CM Agent");
62MODULE_LICENSE("Dual BSD/GPL");
63
64static int tavor_quirk = 0;
65module_param_named(tavor_quirk, tavor_quirk, int, 0644);
66MODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0");
67
68#define CMA_CM_RESPONSE_TIMEOUT 20
69#define CMA_MAX_CM_RETRIES 15
70#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
71#define IBOE_PACKET_LIFETIME 18
72
73static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
74module_param_named(cma_response_timeout, cma_response_timeout, int, 0644);
75MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT default=20");
76
77static int def_prec2sl = 3;
78module_param_named(def_prec2sl, def_prec2sl, int, 0644);
79MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7");
80
81static void cma_add_one(struct ib_device *device);
82static void cma_remove_one(struct ib_device *device);
83
84static struct ib_client cma_client = {
85	.name   = "cma",
86	.add    = cma_add_one,
87	.remove = cma_remove_one
88};
89
90static struct ib_sa_client sa_client;
91static struct rdma_addr_client addr_client;
92static LIST_HEAD(dev_list);
93static LIST_HEAD(listen_any_list);
94static DEFINE_MUTEX(lock);
95static struct workqueue_struct *cma_wq;
96static DEFINE_IDR(sdp_ps);
97static DEFINE_IDR(tcp_ps);
98static DEFINE_IDR(udp_ps);
99static DEFINE_IDR(ipoib_ps);
100#if defined(INET)
101static int next_port;
102#endif
103
104struct cma_device {
105	struct list_head	list;
106	struct ib_device	*device;
107	struct completion	comp;
108	atomic_t		refcount;
109	struct list_head	id_list;
110};
111
112enum cma_state {
113	CMA_IDLE,
114	CMA_ADDR_QUERY,
115	CMA_ADDR_RESOLVED,
116	CMA_ROUTE_QUERY,
117	CMA_ROUTE_RESOLVED,
118	CMA_CONNECT,
119	CMA_DISCONNECT,
120	CMA_ADDR_BOUND,
121	CMA_LISTEN,
122	CMA_DEVICE_REMOVAL,
123	CMA_DESTROYING
124};
125
126struct rdma_bind_list {
127	struct idr		*ps;
128	struct hlist_head	owners;
129	unsigned short		port;
130};
131
132/*
133 * Device removal can occur at anytime, so we need extra handling to
134 * serialize notifying the user of device removal with other callbacks.
135 * We do this by disabling removal notification while a callback is in process,
136 * and reporting it after the callback completes.
137 */
138struct rdma_id_private {
139	struct rdma_cm_id	id;
140
141	struct rdma_bind_list	*bind_list;
142	struct socket		*sock;
143	struct hlist_node	node;
144	struct list_head	list; /* listen_any_list or cma_device.list */
145	struct list_head	listen_list; /* per device listens */
146	struct cma_device	*cma_dev;
147	struct list_head	mc_list;
148
149	int			internal_id;
150	enum cma_state		state;
151	spinlock_t		lock;
152	struct mutex		qp_mutex;
153
154	struct completion	comp;
155	atomic_t		refcount;
156	struct mutex		handler_mutex;
157
158	int			backlog;
159	int			timeout_ms;
160	struct ib_sa_query	*query;
161	int			query_id;
162	union {
163		struct ib_cm_id	*ib;
164		struct iw_cm_id	*iw;
165	} cm_id;
166
167	u32			seq_num;
168	u32			qkey;
169	u32			qp_num;
170	u8			srq;
171	u8			tos;
172	int unify_ps_tcp;
173};
174
175struct cma_multicast {
176	struct rdma_id_private *id_priv;
177	union {
178		struct ib_sa_multicast *ib;
179	} multicast;
180	struct list_head	list;
181	void			*context;
182	struct sockaddr_storage	addr;
183	struct kref		mcref;
184};
185
186struct cma_work {
187	struct work_struct	work;
188	struct rdma_id_private	*id;
189	enum cma_state		old_state;
190	enum cma_state		new_state;
191	struct rdma_cm_event	event;
192};
193
194struct cma_ndev_work {
195	struct work_struct	work;
196	struct rdma_id_private	*id;
197	struct rdma_cm_event	event;
198};
199
200struct iboe_mcast_work {
201	struct work_struct	 work;
202	struct rdma_id_private	*id;
203	struct cma_multicast	*mc;
204};
205
206union cma_ip_addr {
207	struct in6_addr ip6;
208	struct {
209		__be32 pad[3];
210		__be32 addr;
211	} ip4;
212};
213
214struct cma_hdr {
215	u8 cma_version;
216	u8 ip_version;	/* IP version: 7:4 */
217	__be16 port;
218	union cma_ip_addr src_addr;
219	union cma_ip_addr dst_addr;
220};
221
222struct sdp_hh {
223	u8 bsdh[16];
224	u8 sdp_version; /* Major version: 7:4 */
225	u8 ip_version;	/* IP version: 7:4 */
226	u8 sdp_specific1[10];
227	__be16 port;
228	__be16 sdp_specific2;
229	union cma_ip_addr src_addr;
230	union cma_ip_addr dst_addr;
231};
232
233struct sdp_hah {
234	u8 bsdh[16];
235	u8 sdp_version;
236};
237
238#define CMA_VERSION 0x00
239#define SDP_MAJ_VERSION 0x2
240
241static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
242{
243	unsigned long flags;
244	int ret;
245
246	spin_lock_irqsave(&id_priv->lock, flags);
247	ret = (id_priv->state == comp);
248	spin_unlock_irqrestore(&id_priv->lock, flags);
249	return ret;
250}
251
252static int cma_comp_exch(struct rdma_id_private *id_priv,
253			 enum cma_state comp, enum cma_state exch)
254{
255	unsigned long flags;
256	int ret;
257
258	spin_lock_irqsave(&id_priv->lock, flags);
259	if ((ret = (id_priv->state == comp)))
260		id_priv->state = exch;
261	spin_unlock_irqrestore(&id_priv->lock, flags);
262	return ret;
263}
264
265static enum cma_state cma_exch(struct rdma_id_private *id_priv,
266			       enum cma_state exch)
267{
268	unsigned long flags;
269	enum cma_state old;
270
271	spin_lock_irqsave(&id_priv->lock, flags);
272	old = id_priv->state;
273	id_priv->state = exch;
274	spin_unlock_irqrestore(&id_priv->lock, flags);
275	return old;
276}
277
278static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
279{
280	return hdr->ip_version >> 4;
281}
282
283static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
284{
285	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
286}
287
288static inline u8 sdp_get_majv(u8 sdp_version)
289{
290	return sdp_version >> 4;
291}
292
293static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
294{
295	return hh->ip_version >> 4;
296}
297
298static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
299{
300	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
301}
302
303static inline int cma_is_ud_ps(enum rdma_port_space ps)
304{
305	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
306}
307
308static void cma_attach_to_dev(struct rdma_id_private *id_priv,
309			      struct cma_device *cma_dev)
310{
311	atomic_inc(&cma_dev->refcount);
312	id_priv->cma_dev = cma_dev;
313	id_priv->id.device = cma_dev->device;
314	id_priv->id.route.addr.dev_addr.transport =
315		rdma_node_get_transport(cma_dev->device->node_type);
316	list_add_tail(&id_priv->list, &cma_dev->id_list);
317}
318
319static inline void cma_deref_dev(struct cma_device *cma_dev)
320{
321	if (atomic_dec_and_test(&cma_dev->refcount))
322		complete(&cma_dev->comp);
323}
324
325static inline void release_mc(struct kref *kref)
326{
327	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
328
329	kfree(mc->multicast.ib);
330	kfree(mc);
331}
332
333static void cma_detach_from_dev(struct rdma_id_private *id_priv)
334{
335	list_del(&id_priv->list);
336	cma_deref_dev(id_priv->cma_dev);
337	id_priv->cma_dev = NULL;
338}
339
340static int cma_set_qkey(struct rdma_id_private *id_priv)
341{
342	struct ib_sa_mcmember_rec rec;
343	int ret = 0;
344
345	if (id_priv->qkey)
346		return 0;
347
348	switch (id_priv->id.ps) {
349	case RDMA_PS_UDP:
350		id_priv->qkey = RDMA_UDP_QKEY;
351		break;
352	case RDMA_PS_IPOIB:
353		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
354		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
355					     id_priv->id.port_num, &rec.mgid,
356					     &rec);
357		if (!ret)
358			id_priv->qkey = be32_to_cpu(rec.qkey);
359		break;
360	default:
361		break;
362	}
363	return ret;
364}
365
366static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
367{
368	int i;
369	int err;
370	struct ib_port_attr props;
371	union ib_gid tmp;
372
373	err = ib_query_port(device, port_num, &props);
374	if (err)
375		return 1;
376
377	for (i = 0; i < props.gid_tbl_len; ++i) {
378		err = ib_query_gid(device, port_num, i, &tmp);
379		if (err)
380			return 1;
381		if (!memcmp(&tmp, gid, sizeof tmp))
382			return 0;
383	}
384
385	return -EAGAIN;
386}
387
388int
389rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
390							void **cm_id)
391{
392	int ret;
393	u8 port;
394	int found_dev = 0, found_cmid = 0;
395	struct rdma_id_private  *id_priv;
396	struct rdma_id_private  *dev_id_priv;
397	struct cma_device	*cma_dev;
398	struct rdma_dev_addr	dev_addr;
399	union ib_gid		gid;
400	enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
401		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
402
403	memset(&dev_addr, 0, sizeof(dev_addr));
404
405	ret = rdma_translate_ip((struct sockaddr *)local_addr,
406							&dev_addr);
407	if (ret)
408		goto err;
409
410	/* find rdma device based on MAC address/gid */
411	mutex_lock(&lock);
412
413	memcpy(&gid, dev_addr.src_dev_addr +
414	       rdma_addr_gid_offset(&dev_addr), sizeof(gid));
415
416	list_for_each_entry(cma_dev, &dev_list, list)
417		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
418			if ((rdma_port_get_link_layer(cma_dev->device, port) ==
419								 dev_ll) &&
420			 (rdma_node_get_transport(cma_dev->device->node_type) ==
421							RDMA_TRANSPORT_IWARP)) {
422					ret = find_gid_port(cma_dev->device,
423								&gid, port);
424					if (!ret) {
425						found_dev = 1;
426						goto out;
427					} else if (ret == 1) {
428						mutex_unlock(&lock);
429						goto err;
430					}
431			}
432out:
433	mutex_unlock(&lock);
434
435	if (!found_dev)
436		goto err;
437
438	/* Traverse through the list of listening cm_id's to find the
439	 * desired cm_id based on rdma device & port number.
440	 */
441	list_for_each_entry(id_priv, &listen_any_list, list)
442		list_for_each_entry(dev_id_priv, &id_priv->listen_list,
443						 listen_list)
444			if (dev_id_priv->cma_dev == cma_dev)
445				if (dev_id_priv->cm_id.iw->local_addr.sin_port
446						== local_addr->sin_port) {
447					*cm_id = (void *)dev_id_priv->cm_id.iw;
448					found_cmid = 1;
449				}
450	return found_cmid ? 0 : -ENODEV;
451
452err:
453	return -ENODEV;
454}
455EXPORT_SYMBOL(rdma_find_cmid_laddr);
456
457static int cma_acquire_dev(struct rdma_id_private *id_priv)
458{
459	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
460	struct cma_device *cma_dev;
461	union ib_gid gid;
462	int ret = -ENODEV;
463
464	if (dev_addr->dev_type != ARPHRD_INFINIBAND) {
465		iboe_addr_get_sgid(dev_addr, &gid);
466		list_for_each_entry(cma_dev, &dev_list, list) {
467			ret = ib_find_cached_gid(cma_dev->device, &gid,
468						 &id_priv->id.port_num, NULL);
469			if (!ret)
470				goto out;
471		}
472	}
473
474	memcpy(&gid, dev_addr->src_dev_addr +
475	       rdma_addr_gid_offset(dev_addr), sizeof gid);
476	list_for_each_entry(cma_dev, &dev_list, list) {
477		ret = ib_find_cached_gid(cma_dev->device, &gid,
478					 &id_priv->id.port_num, NULL);
479		if (!ret)
480			break;
481	}
482
483out:
484	if (!ret)
485		cma_attach_to_dev(id_priv, cma_dev);
486
487	return ret;
488}
489
490static void cma_deref_id(struct rdma_id_private *id_priv)
491{
492	if (atomic_dec_and_test(&id_priv->refcount))
493		complete(&id_priv->comp);
494}
495
496static int cma_disable_callback(struct rdma_id_private *id_priv,
497			      enum cma_state state)
498{
499	mutex_lock(&id_priv->handler_mutex);
500	if (id_priv->state != state) {
501		mutex_unlock(&id_priv->handler_mutex);
502		return -EINVAL;
503	}
504	return 0;
505}
506
507static int cma_has_cm_dev(struct rdma_id_private *id_priv)
508{
509	return (id_priv->id.device && id_priv->cm_id.ib);
510}
511
512struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
513				  void *context, enum rdma_port_space ps)
514{
515	struct rdma_id_private *id_priv;
516
517	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
518	if (!id_priv)
519		return ERR_PTR(-ENOMEM);
520
521	id_priv->state = CMA_IDLE;
522	id_priv->id.context = context;
523	id_priv->id.event_handler = event_handler;
524	id_priv->id.ps = ps;
525	spin_lock_init(&id_priv->lock);
526	mutex_init(&id_priv->qp_mutex);
527	init_completion(&id_priv->comp);
528	atomic_set(&id_priv->refcount, 1);
529	mutex_init(&id_priv->handler_mutex);
530	INIT_LIST_HEAD(&id_priv->listen_list);
531	INIT_LIST_HEAD(&id_priv->mc_list);
532	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
533
534	return &id_priv->id;
535}
536EXPORT_SYMBOL(rdma_create_id);
537
538static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
539{
540	struct ib_qp_attr qp_attr;
541	int qp_attr_mask, ret;
542
543	qp_attr.qp_state = IB_QPS_INIT;
544	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
545	if (ret)
546		return ret;
547
548	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
549	if (ret)
550		return ret;
551
552	qp_attr.qp_state = IB_QPS_RTR;
553	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
554	if (ret)
555		return ret;
556
557	qp_attr.qp_state = IB_QPS_RTS;
558	qp_attr.sq_psn = 0;
559	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
560
561	return ret;
562}
563
564static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
565{
566	struct ib_qp_attr qp_attr;
567	int qp_attr_mask, ret;
568
569	qp_attr.qp_state = IB_QPS_INIT;
570	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
571	if (ret)
572		return ret;
573
574	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
575}
576
577int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
578		   struct ib_qp_init_attr *qp_init_attr)
579{
580	struct rdma_id_private *id_priv;
581	struct ib_qp *qp;
582	int ret;
583
584	id_priv = container_of(id, struct rdma_id_private, id);
585	if (id->device != pd->device)
586		return -EINVAL;
587
588	qp = ib_create_qp(pd, qp_init_attr);
589	if (IS_ERR(qp))
590		return PTR_ERR(qp);
591
592	if (cma_is_ud_ps(id_priv->id.ps))
593		ret = cma_init_ud_qp(id_priv, qp);
594	else
595		ret = cma_init_conn_qp(id_priv, qp);
596	if (ret)
597		goto err;
598
599	id->qp = qp;
600	id_priv->qp_num = qp->qp_num;
601	id_priv->srq = (qp->srq != NULL);
602	return 0;
603err:
604	ib_destroy_qp(qp);
605	return ret;
606}
607EXPORT_SYMBOL(rdma_create_qp);
608
609void rdma_destroy_qp(struct rdma_cm_id *id)
610{
611	struct rdma_id_private *id_priv;
612
613	id_priv = container_of(id, struct rdma_id_private, id);
614	mutex_lock(&id_priv->qp_mutex);
615	ib_destroy_qp(id_priv->id.qp);
616	id_priv->id.qp = NULL;
617	mutex_unlock(&id_priv->qp_mutex);
618}
619EXPORT_SYMBOL(rdma_destroy_qp);
620
621static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
622			     struct rdma_conn_param *conn_param)
623{
624	struct ib_qp_attr qp_attr;
625	int qp_attr_mask, ret;
626
627	mutex_lock(&id_priv->qp_mutex);
628	if (!id_priv->id.qp) {
629		ret = 0;
630		goto out;
631	}
632
633	/* Need to update QP attributes from default values. */
634	qp_attr.qp_state = IB_QPS_INIT;
635	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
636	if (ret)
637		goto out;
638
639	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
640	if (ret)
641		goto out;
642
643	qp_attr.qp_state = IB_QPS_RTR;
644	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
645	if (ret)
646		goto out;
647
648	if (conn_param)
649		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
650	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
651out:
652	mutex_unlock(&id_priv->qp_mutex);
653	return ret;
654}
655
656static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
657			     struct rdma_conn_param *conn_param)
658{
659	struct ib_qp_attr qp_attr;
660	int qp_attr_mask, ret;
661
662	mutex_lock(&id_priv->qp_mutex);
663	if (!id_priv->id.qp) {
664		ret = 0;
665		goto out;
666	}
667
668	qp_attr.qp_state = IB_QPS_RTS;
669	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
670	if (ret)
671		goto out;
672
673	if (conn_param)
674		qp_attr.max_rd_atomic = conn_param->initiator_depth;
675	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
676out:
677	mutex_unlock(&id_priv->qp_mutex);
678	return ret;
679}
680
681static int cma_modify_qp_err(struct rdma_id_private *id_priv)
682{
683	struct ib_qp_attr qp_attr;
684	int ret;
685
686	mutex_lock(&id_priv->qp_mutex);
687	if (!id_priv->id.qp) {
688		ret = 0;
689		goto out;
690	}
691
692	qp_attr.qp_state = IB_QPS_ERR;
693	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
694out:
695	mutex_unlock(&id_priv->qp_mutex);
696	return ret;
697}
698
699static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
700			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
701{
702	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
703	int ret;
704	u16 pkey;
705
706	if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
707	    IB_LINK_LAYER_INFINIBAND)
708		pkey = ib_addr_get_pkey(dev_addr);
709	else
710		pkey = 0xffff;
711
712	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
713				  pkey, &qp_attr->pkey_index);
714	if (ret)
715		return ret;
716
717	qp_attr->port_num = id_priv->id.port_num;
718	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
719
720	if (cma_is_ud_ps(id_priv->id.ps)) {
721		ret = cma_set_qkey(id_priv);
722		if (ret)
723			return ret;
724
725		qp_attr->qkey = id_priv->qkey;
726		*qp_attr_mask |= IB_QP_QKEY;
727	} else {
728		qp_attr->qp_access_flags = 0;
729		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
730	}
731	return 0;
732}
733
734int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
735		       int *qp_attr_mask)
736{
737	struct rdma_id_private *id_priv;
738	int ret = 0;
739
740	id_priv = container_of(id, struct rdma_id_private, id);
741	if (rdma_cap_ib_cm(id->device, id->port_num)) {
742		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
743			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
744		else
745			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
746						 qp_attr_mask);
747
748		if (qp_attr->qp_state == IB_QPS_RTR)
749			qp_attr->rq_psn = id_priv->seq_num;
750	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
751		if (!id_priv->cm_id.iw) {
752			qp_attr->qp_access_flags = 0;
753			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
754		} else
755			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
756						 qp_attr_mask);
757	} else
758		ret = -ENOSYS;
759
760	return ret;
761}
762EXPORT_SYMBOL(rdma_init_qp_attr);
763
764static inline int cma_zero_addr(struct sockaddr *addr)
765{
766	struct in6_addr *ip6;
767
768	if (addr->sa_family == AF_INET)
769		return ipv4_is_zeronet(
770			((struct sockaddr_in *)addr)->sin_addr.s_addr);
771	else {
772		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
773		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
774			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
775	}
776}
777
778static inline int cma_loopback_addr(struct sockaddr *addr)
779{
780	if (addr->sa_family == AF_INET)
781		return ipv4_is_loopback(
782			((struct sockaddr_in *) addr)->sin_addr.s_addr);
783	else
784		return ipv6_addr_loopback(
785			&((struct sockaddr_in6 *) addr)->sin6_addr);
786}
787
788static inline int cma_any_addr(struct sockaddr *addr)
789{
790	return cma_zero_addr(addr) || cma_loopback_addr(addr);
791}
792int
793rdma_cma_any_addr(struct sockaddr *addr)
794{
795	return cma_any_addr(addr);
796}
797EXPORT_SYMBOL(rdma_cma_any_addr);
798
799static inline __be16 cma_port(struct sockaddr *addr)
800{
801	if (addr->sa_family == AF_INET)
802		return ((struct sockaddr_in *) addr)->sin_port;
803	else
804		return ((struct sockaddr_in6 *) addr)->sin6_port;
805}
806
807static inline int cma_any_port(struct sockaddr *addr)
808{
809	return !cma_port(addr);
810}
811
812static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
813			    u8 *ip_ver, __be16 *port,
814			    union cma_ip_addr **src, union cma_ip_addr **dst)
815{
816	switch (ps) {
817	case RDMA_PS_SDP:
818		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
819		    SDP_MAJ_VERSION)
820			return -EINVAL;
821
822		*ip_ver	= sdp_get_ip_ver(hdr);
823		*port	= ((struct sdp_hh *) hdr)->port;
824		*src	= &((struct sdp_hh *) hdr)->src_addr;
825		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
826		break;
827	default:
828		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
829			return -EINVAL;
830
831		*ip_ver	= cma_get_ip_ver(hdr);
832		*port	= ((struct cma_hdr *) hdr)->port;
833		*src	= &((struct cma_hdr *) hdr)->src_addr;
834		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
835		break;
836	}
837
838	if (*ip_ver != 4 && *ip_ver != 6)
839		return -EINVAL;
840	return 0;
841}
842
843static void cma_ip6_clear_scope_id(struct in6_addr *addr)
844{
845	/* make sure link local scope ID gets zeroed */
846	if (IN6_IS_SCOPE_LINKLOCAL(addr) ||
847	    IN6_IS_ADDR_MC_INTFACELOCAL(addr)) {
848		/* use byte-access to be alignment safe */
849		addr->s6_addr[2] = 0;
850		addr->s6_addr[3] = 0;
851	}
852}
853
854static void cma_save_net_info(struct rdma_addr *addr,
855			      struct rdma_addr *listen_addr,
856			      u8 ip_ver, __be16 port,
857			      union cma_ip_addr *src, union cma_ip_addr *dst)
858{
859	struct sockaddr_in *listen4, *ip4;
860	struct sockaddr_in6 *listen6, *ip6;
861
862	switch (ip_ver) {
863	case 4:
864		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
865		ip4 = (struct sockaddr_in *) &addr->src_addr;
866		ip4->sin_family = listen4->sin_family;
867		ip4->sin_addr.s_addr = dst->ip4.addr;
868		ip4->sin_port = listen4->sin_port;
869		ip4->sin_len = sizeof(struct sockaddr_in);
870		memset(ip4->sin_zero, 0, sizeof(ip4->sin_zero));
871
872		ip4 = (struct sockaddr_in *) &addr->dst_addr;
873		ip4->sin_family = listen4->sin_family;
874		ip4->sin_addr.s_addr = src->ip4.addr;
875		ip4->sin_port = port;
876		ip4->sin_len = sizeof(struct sockaddr_in);
877		memset(ip4->sin_zero, 0, sizeof(ip4->sin_zero));
878		break;
879	case 6:
880		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
881		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
882		ip6->sin6_family = listen6->sin6_family;
883		ip6->sin6_addr = dst->ip6;
884		ip6->sin6_port = listen6->sin6_port;
885		ip6->sin6_len = sizeof(struct sockaddr_in6);
886		ip6->sin6_scope_id = listen6->sin6_scope_id;
887		cma_ip6_clear_scope_id(&ip6->sin6_addr);
888
889		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
890		ip6->sin6_family = listen6->sin6_family;
891		ip6->sin6_addr = src->ip6;
892		ip6->sin6_port = port;
893		ip6->sin6_len = sizeof(struct sockaddr_in6);
894		ip6->sin6_scope_id = listen6->sin6_scope_id;
895		cma_ip6_clear_scope_id(&ip6->sin6_addr);
896		break;
897	default:
898		break;
899	}
900}
901
902static inline int cma_user_data_offset(enum rdma_port_space ps)
903{
904	switch (ps) {
905	case RDMA_PS_SDP:
906		return 0;
907	default:
908		return sizeof(struct cma_hdr);
909	}
910}
911
912static void cma_cancel_route(struct rdma_id_private *id_priv)
913{
914	switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
915	case IB_LINK_LAYER_INFINIBAND:
916		if (id_priv->query)
917			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
918		break;
919	default:
920		break;
921	}
922}
923
924static void cma_cancel_listens(struct rdma_id_private *id_priv)
925{
926	struct rdma_id_private *dev_id_priv;
927
928	/*
929	 * Remove from listen_any_list to prevent added devices from spawning
930	 * additional listen requests.
931	 */
932	mutex_lock(&lock);
933	list_del(&id_priv->list);
934
935	while (!list_empty(&id_priv->listen_list)) {
936		dev_id_priv = list_entry(id_priv->listen_list.next,
937					 struct rdma_id_private, listen_list);
938		/* sync with device removal to avoid duplicate destruction */
939		list_del_init(&dev_id_priv->list);
940		list_del(&dev_id_priv->listen_list);
941		mutex_unlock(&lock);
942
943		rdma_destroy_id(&dev_id_priv->id);
944		mutex_lock(&lock);
945	}
946	mutex_unlock(&lock);
947}
948
949static void cma_cancel_operation(struct rdma_id_private *id_priv,
950				 enum cma_state state)
951{
952	switch (state) {
953	case CMA_ADDR_QUERY:
954		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
955		break;
956	case CMA_ROUTE_QUERY:
957		cma_cancel_route(id_priv);
958		break;
959	case CMA_LISTEN:
960		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
961				&& !id_priv->cma_dev)
962			cma_cancel_listens(id_priv);
963		break;
964	default:
965		break;
966	}
967}
968
969static void cma_release_port(struct rdma_id_private *id_priv)
970{
971	struct rdma_bind_list *bind_list = id_priv->bind_list;
972
973	if (!bind_list)
974		return;
975
976	mutex_lock(&lock);
977	hlist_del(&id_priv->node);
978	if (hlist_empty(&bind_list->owners)) {
979		idr_remove(bind_list->ps, bind_list->port);
980		kfree(bind_list);
981	}
982	mutex_unlock(&lock);
983}
984
985static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
986{
987	struct cma_multicast *mc;
988
989	while (!list_empty(&id_priv->mc_list)) {
990		mc = container_of(id_priv->mc_list.next,
991				  struct cma_multicast, list);
992		list_del(&mc->list);
993		switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
994		case IB_LINK_LAYER_INFINIBAND:
995			ib_sa_free_multicast(mc->multicast.ib);
996			kfree(mc);
997			break;
998		case IB_LINK_LAYER_ETHERNET:
999			kref_put(&mc->mcref, release_mc);
1000			break;
1001		default:
1002			break;
1003		}
1004	}
1005}
1006
1007void rdma_destroy_id(struct rdma_cm_id *id)
1008{
1009	struct rdma_id_private *id_priv;
1010	enum cma_state state;
1011
1012	id_priv = container_of(id, struct rdma_id_private, id);
1013	state = cma_exch(id_priv, CMA_DESTROYING);
1014	cma_cancel_operation(id_priv, state);
1015
1016	mutex_lock(&lock);
1017	if (id_priv->cma_dev) {
1018		mutex_unlock(&lock);
1019		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1020			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
1021				ib_destroy_cm_id(id_priv->cm_id.ib);
1022		} else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
1023			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
1024				iw_destroy_cm_id(id_priv->cm_id.iw);
1025		}
1026		cma_leave_mc_groups(id_priv);
1027		mutex_lock(&lock);
1028		cma_detach_from_dev(id_priv);
1029	}
1030	mutex_unlock(&lock);
1031
1032	cma_release_port(id_priv);
1033	cma_deref_id(id_priv);
1034	wait_for_completion(&id_priv->comp);
1035
1036	if (id_priv->internal_id)
1037		cma_deref_id(id_priv->id.context);
1038
1039	if (id_priv->sock != NULL && !id_priv->internal_id &&
1040	    !id_priv->unify_ps_tcp)
1041		sock_release(id_priv->sock);
1042
1043	kfree(id_priv->id.route.path_rec);
1044	kfree(id_priv);
1045}
1046EXPORT_SYMBOL(rdma_destroy_id);
1047
1048static int cma_rep_recv(struct rdma_id_private *id_priv)
1049{
1050	int ret;
1051
1052	ret = cma_modify_qp_rtr(id_priv, NULL);
1053	if (ret)
1054		goto reject;
1055
1056	ret = cma_modify_qp_rts(id_priv, NULL);
1057	if (ret)
1058		goto reject;
1059
1060	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1061	if (ret)
1062		goto reject;
1063
1064	return 0;
1065reject:
1066	cma_modify_qp_err(id_priv);
1067	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1068		       NULL, 0, NULL, 0);
1069	return ret;
1070}
1071
1072static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
1073{
1074	if (id_priv->id.ps == RDMA_PS_SDP &&
1075	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
1076	    SDP_MAJ_VERSION)
1077		return -EINVAL;
1078
1079	return 0;
1080}
1081
1082static void cma_set_rep_event_data(struct rdma_cm_event *event,
1083				   struct ib_cm_rep_event_param *rep_data,
1084				   void *private_data)
1085{
1086	event->param.conn.private_data = private_data;
1087	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1088	event->param.conn.responder_resources = rep_data->responder_resources;
1089	event->param.conn.initiator_depth = rep_data->initiator_depth;
1090	event->param.conn.flow_control = rep_data->flow_control;
1091	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1092	event->param.conn.srq = rep_data->srq;
1093	event->param.conn.qp_num = rep_data->remote_qpn;
1094}
1095
1096static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1097{
1098	struct rdma_id_private *id_priv = cm_id->context;
1099	struct rdma_cm_event event;
1100	int ret = 0;
1101
1102	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1103		cma_disable_callback(id_priv, CMA_CONNECT)) ||
1104	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1105		cma_disable_callback(id_priv, CMA_DISCONNECT)))
1106		return 0;
1107
1108	memset(&event, 0, sizeof event);
1109	switch (ib_event->event) {
1110	case IB_CM_REQ_ERROR:
1111	case IB_CM_REP_ERROR:
1112		event.event = RDMA_CM_EVENT_UNREACHABLE;
1113		event.status = -ETIMEDOUT;
1114		break;
1115	case IB_CM_REP_RECEIVED:
1116		event.status = cma_verify_rep(id_priv, ib_event->private_data);
1117		if (event.status)
1118			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1119		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
1120			event.status = cma_rep_recv(id_priv);
1121			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1122						     RDMA_CM_EVENT_ESTABLISHED;
1123		} else
1124			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1125		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1126				       ib_event->private_data);
1127		break;
1128	case IB_CM_RTU_RECEIVED:
1129	case IB_CM_USER_ESTABLISHED:
1130		event.event = RDMA_CM_EVENT_ESTABLISHED;
1131		break;
1132	case IB_CM_DREQ_ERROR:
1133		event.status = -ETIMEDOUT; /* fall through */
1134	case IB_CM_DREQ_RECEIVED:
1135	case IB_CM_DREP_RECEIVED:
1136		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
1137			goto out;
1138		event.event = RDMA_CM_EVENT_DISCONNECTED;
1139		break;
1140	case IB_CM_TIMEWAIT_EXIT:
1141		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1142		break;
1143	case IB_CM_MRA_RECEIVED:
1144		/* ignore event */
1145		goto out;
1146	case IB_CM_REJ_RECEIVED:
1147		cma_modify_qp_err(id_priv);
1148		event.status = ib_event->param.rej_rcvd.reason;
1149		event.event = RDMA_CM_EVENT_REJECTED;
1150		event.param.conn.private_data = ib_event->private_data;
1151		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1152		break;
1153	default:
1154		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
1155		       ib_event->event);
1156		goto out;
1157	}
1158
1159	ret = id_priv->id.event_handler(&id_priv->id, &event);
1160	if (ret) {
1161		/* Destroy the CM ID by returning a non-zero value. */
1162		id_priv->cm_id.ib = NULL;
1163		cma_exch(id_priv, CMA_DESTROYING);
1164		mutex_unlock(&id_priv->handler_mutex);
1165		rdma_destroy_id(&id_priv->id);
1166		return ret;
1167	}
1168out:
1169	mutex_unlock(&id_priv->handler_mutex);
1170	return ret;
1171}
1172
1173static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1174					       struct ib_cm_event *ib_event)
1175{
1176	struct rdma_id_private *id_priv;
1177	struct rdma_cm_id *id;
1178	struct rdma_route *rt;
1179	union cma_ip_addr *src, *dst;
1180	__be16 port;
1181	u8 ip_ver;
1182	int ret;
1183
1184	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1185			     &ip_ver, &port, &src, &dst))
1186		goto err;
1187
1188	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1189			    listen_id->ps);
1190	if (IS_ERR(id))
1191		goto err;
1192
1193	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1194			  ip_ver, port, src, dst);
1195
1196	rt = &id->route;
1197	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1198	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1199			       GFP_KERNEL);
1200	if (!rt->path_rec)
1201		goto destroy_id;
1202
1203	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1204	if (rt->num_paths == 2)
1205		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1206
1207	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
1208		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1209		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1210		ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
1211	} else {
1212		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
1213					&rt->addr.dev_addr);
1214		if (ret)
1215			goto destroy_id;
1216	}
1217	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1218
1219	id_priv = container_of(id, struct rdma_id_private, id);
1220	id_priv->state = CMA_CONNECT;
1221	return id_priv;
1222
1223destroy_id:
1224	rdma_destroy_id(id);
1225err:
1226	return NULL;
1227}
1228
1229static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1230					      struct ib_cm_event *ib_event)
1231{
1232	struct rdma_id_private *id_priv;
1233	struct rdma_cm_id *id;
1234	union cma_ip_addr *src, *dst;
1235	__be16 port;
1236	u8 ip_ver;
1237	int ret;
1238
1239	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1240			    listen_id->ps);
1241	if (IS_ERR(id))
1242		return NULL;
1243
1244
1245	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1246			     &ip_ver, &port, &src, &dst))
1247		goto err;
1248
1249	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1250			  ip_ver, port, src, dst);
1251
1252	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1253		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1254					&id->route.addr.dev_addr);
1255		if (ret)
1256			goto err;
1257	}
1258
1259	id_priv = container_of(id, struct rdma_id_private, id);
1260	id_priv->state = CMA_CONNECT;
1261	return id_priv;
1262err:
1263	rdma_destroy_id(id);
1264	return NULL;
1265}
1266
1267static void cma_set_req_event_data(struct rdma_cm_event *event,
1268				   struct ib_cm_req_event_param *req_data,
1269				   void *private_data, int offset)
1270{
1271	event->param.conn.private_data = private_data + offset;
1272	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1273	event->param.conn.responder_resources = req_data->responder_resources;
1274	event->param.conn.initiator_depth = req_data->initiator_depth;
1275	event->param.conn.flow_control = req_data->flow_control;
1276	event->param.conn.retry_count = req_data->retry_count;
1277	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1278	event->param.conn.srq = req_data->srq;
1279	event->param.conn.qp_num = req_data->remote_qpn;
1280}
1281
1282static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1283{
1284	struct rdma_id_private *listen_id, *conn_id;
1285	struct rdma_cm_event event;
1286	int offset, ret;
1287
1288	listen_id = cm_id->context;
1289	if (cma_disable_callback(listen_id, CMA_LISTEN))
1290		return -ECONNABORTED;
1291
1292	memset(&event, 0, sizeof event);
1293	offset = cma_user_data_offset(listen_id->id.ps);
1294	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1295	if (cma_is_ud_ps(listen_id->id.ps)) {
1296		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1297		event.param.ud.private_data = ib_event->private_data + offset;
1298		event.param.ud.private_data_len =
1299				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1300	} else {
1301		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1302		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1303				       ib_event->private_data, offset);
1304	}
1305	if (!conn_id) {
1306		ret = -ENOMEM;
1307		goto out;
1308	}
1309
1310	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1311	mutex_lock(&lock);
1312	ret = cma_acquire_dev(conn_id);
1313	mutex_unlock(&lock);
1314	if (ret)
1315		goto release_conn_id;
1316
1317	conn_id->cm_id.ib = cm_id;
1318	cm_id->context = conn_id;
1319	cm_id->cm_handler = cma_ib_handler;
1320
1321	ret = conn_id->id.event_handler(&conn_id->id, &event);
1322	if (!ret) {
1323		/*
1324		 * Acquire mutex to prevent user executing rdma_destroy_id()
1325		 * while we're accessing the cm_id.
1326		 */
1327		mutex_lock(&lock);
1328		if (cma_comp(conn_id, CMA_CONNECT) &&
1329		    !cma_is_ud_ps(conn_id->id.ps))
1330			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1331		mutex_unlock(&lock);
1332		mutex_unlock(&conn_id->handler_mutex);
1333		goto out;
1334	}
1335
1336	/* Destroy the CM ID by returning a non-zero value. */
1337	conn_id->cm_id.ib = NULL;
1338
1339release_conn_id:
1340	cma_exch(conn_id, CMA_DESTROYING);
1341	mutex_unlock(&conn_id->handler_mutex);
1342	rdma_destroy_id(&conn_id->id);
1343
1344out:
1345	mutex_unlock(&listen_id->handler_mutex);
1346	return ret;
1347}
1348
1349static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1350{
1351	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1352}
1353
1354static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1355				 struct ib_cm_compare_data *compare)
1356{
1357	struct cma_hdr *cma_data, *cma_mask;
1358	struct sdp_hh *sdp_data, *sdp_mask;
1359	__be32 ip4_addr;
1360#ifdef INET6
1361	struct in6_addr ip6_addr;
1362#endif
1363
1364	memset(compare, 0, sizeof *compare);
1365	cma_data = (void *) compare->data;
1366	cma_mask = (void *) compare->mask;
1367	sdp_data = (void *) compare->data;
1368	sdp_mask = (void *) compare->mask;
1369
1370	switch (addr->sa_family) {
1371	case AF_INET:
1372		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1373		if (ps == RDMA_PS_SDP) {
1374			sdp_set_ip_ver(sdp_data, 4);
1375			sdp_set_ip_ver(sdp_mask, 0xF);
1376			sdp_data->dst_addr.ip4.addr = ip4_addr;
1377			sdp_mask->dst_addr.ip4.addr = htonl(~0);
1378		} else {
1379			cma_set_ip_ver(cma_data, 4);
1380			cma_set_ip_ver(cma_mask, 0xF);
1381			cma_data->dst_addr.ip4.addr = ip4_addr;
1382			cma_mask->dst_addr.ip4.addr = htonl(~0);
1383		}
1384		break;
1385#ifdef INET6
1386	case AF_INET6:
1387		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1388		cma_ip6_clear_scope_id(&ip6_addr);
1389		if (ps == RDMA_PS_SDP) {
1390			sdp_set_ip_ver(sdp_data, 6);
1391			sdp_set_ip_ver(sdp_mask, 0xF);
1392			sdp_data->dst_addr.ip6 = ip6_addr;
1393			memset(&sdp_mask->dst_addr.ip6, 0xFF,
1394			       sizeof sdp_mask->dst_addr.ip6);
1395		} else {
1396			cma_set_ip_ver(cma_data, 6);
1397			cma_set_ip_ver(cma_mask, 0xF);
1398			cma_data->dst_addr.ip6 = ip6_addr;
1399			memset(&cma_mask->dst_addr.ip6, 0xFF,
1400			       sizeof cma_mask->dst_addr.ip6);
1401		}
1402		break;
1403#endif
1404	default:
1405		break;
1406	}
1407}
1408
1409static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1410{
1411	struct rdma_id_private *id_priv = iw_id->context;
1412	struct rdma_cm_event event;
1413	struct sockaddr_in *sin;
1414	int ret = 0;
1415
1416	if (cma_disable_callback(id_priv, CMA_CONNECT))
1417		return 0;
1418
1419	memset(&event, 0, sizeof event);
1420	switch (iw_event->event) {
1421	case IW_CM_EVENT_CLOSE:
1422		event.event = RDMA_CM_EVENT_DISCONNECTED;
1423		break;
1424	case IW_CM_EVENT_CONNECT_REPLY:
1425		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1426		*sin = iw_event->local_addr;
1427		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1428		*sin = iw_event->remote_addr;
1429		switch ((int)iw_event->status) {
1430		case 0:
1431			event.event = RDMA_CM_EVENT_ESTABLISHED;
1432			break;
1433		case -ECONNRESET:
1434		case -ECONNREFUSED:
1435			event.event = RDMA_CM_EVENT_REJECTED;
1436			break;
1437		case -ETIMEDOUT:
1438			event.event = RDMA_CM_EVENT_UNREACHABLE;
1439			break;
1440		default:
1441			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1442			break;
1443		}
1444		break;
1445	case IW_CM_EVENT_ESTABLISHED:
1446		event.event = RDMA_CM_EVENT_ESTABLISHED;
1447		break;
1448	default:
1449		BUG_ON(1);
1450	}
1451
1452	event.status = iw_event->status;
1453	event.param.conn.private_data = iw_event->private_data;
1454	event.param.conn.private_data_len = iw_event->private_data_len;
1455	ret = id_priv->id.event_handler(&id_priv->id, &event);
1456	if (ret) {
1457		/* Destroy the CM ID by returning a non-zero value. */
1458		id_priv->cm_id.iw = NULL;
1459		cma_exch(id_priv, CMA_DESTROYING);
1460		mutex_unlock(&id_priv->handler_mutex);
1461		rdma_destroy_id(&id_priv->id);
1462		return ret;
1463	}
1464
1465	mutex_unlock(&id_priv->handler_mutex);
1466	return ret;
1467}
1468
1469static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1470			       struct iw_cm_event *iw_event)
1471{
1472	struct rdma_cm_id *new_cm_id;
1473	struct rdma_id_private *listen_id, *conn_id;
1474	struct sockaddr_in *sin;
1475	struct net_device *dev = NULL;
1476	struct rdma_cm_event event;
1477	int ret;
1478	struct ib_device_attr attr;
1479
1480	listen_id = cm_id->context;
1481	if (cma_disable_callback(listen_id, CMA_LISTEN))
1482		return -ECONNABORTED;
1483
1484	/* Create a new RDMA id for the new IW CM ID */
1485	new_cm_id = rdma_create_id(listen_id->id.event_handler,
1486				   listen_id->id.context,
1487				   RDMA_PS_TCP);
1488	if (IS_ERR(new_cm_id)) {
1489		ret = -ENOMEM;
1490		goto out;
1491	}
1492	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1493	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1494	conn_id->state = CMA_CONNECT;
1495
1496	dev = ip_dev_find(NULL, iw_event->local_addr.sin_addr.s_addr);
1497	if (!dev) {
1498		ret = -EADDRNOTAVAIL;
1499		mutex_unlock(&conn_id->handler_mutex);
1500		rdma_destroy_id(new_cm_id);
1501		goto out;
1502	}
1503	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1504	if (ret) {
1505		mutex_unlock(&conn_id->handler_mutex);
1506		rdma_destroy_id(new_cm_id);
1507		goto out;
1508	}
1509
1510	mutex_lock(&lock);
1511	ret = cma_acquire_dev(conn_id);
1512	mutex_unlock(&lock);
1513	if (ret) {
1514		mutex_unlock(&conn_id->handler_mutex);
1515		rdma_destroy_id(new_cm_id);
1516		goto out;
1517	}
1518
1519	conn_id->cm_id.iw = cm_id;
1520	cm_id->context = conn_id;
1521	cm_id->cm_handler = cma_iw_handler;
1522
1523	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1524	*sin = iw_event->local_addr;
1525	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1526	*sin = iw_event->remote_addr;
1527
1528	ret = ib_query_device(conn_id->id.device, &attr);
1529	if (ret) {
1530		mutex_unlock(&conn_id->handler_mutex);
1531		rdma_destroy_id(new_cm_id);
1532		goto out;
1533	}
1534
1535	memset(&event, 0, sizeof event);
1536	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1537	event.param.conn.private_data = iw_event->private_data;
1538	event.param.conn.private_data_len = iw_event->private_data_len;
1539	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1540	event.param.conn.responder_resources = attr.max_qp_rd_atom;
1541	ret = conn_id->id.event_handler(&conn_id->id, &event);
1542	if (ret) {
1543		/* User wants to destroy the CM ID */
1544		conn_id->cm_id.iw = NULL;
1545		cma_exch(conn_id, CMA_DESTROYING);
1546		mutex_unlock(&conn_id->handler_mutex);
1547		rdma_destroy_id(&conn_id->id);
1548		goto out;
1549	}
1550
1551	mutex_unlock(&conn_id->handler_mutex);
1552
1553out:
1554	if (dev)
1555		dev_put(dev);
1556	mutex_unlock(&listen_id->handler_mutex);
1557	return ret;
1558}
1559
1560static int cma_ib_listen(struct rdma_id_private *id_priv)
1561{
1562	struct ib_cm_compare_data compare_data;
1563	struct sockaddr *addr;
1564	__be64 svc_id;
1565	int ret;
1566
1567	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1568					    id_priv);
1569	if (IS_ERR(id_priv->cm_id.ib))
1570		return PTR_ERR(id_priv->cm_id.ib);
1571
1572	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1573	svc_id = cma_get_service_id(id_priv->id.ps, addr);
1574	if (cma_any_addr(addr))
1575		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1576	else {
1577		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1578		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1579	}
1580
1581	if (ret) {
1582		ib_destroy_cm_id(id_priv->cm_id.ib);
1583		id_priv->cm_id.ib = NULL;
1584	}
1585
1586	return ret;
1587}
1588
1589static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1590{
1591	int ret;
1592	struct sockaddr_in *sin;
1593
1594	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1595					    id_priv->sock,
1596					    iw_conn_req_handler,
1597					    id_priv);
1598	if (IS_ERR(id_priv->cm_id.iw))
1599		return PTR_ERR(id_priv->cm_id.iw);
1600
1601	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1602	id_priv->cm_id.iw->local_addr = *sin;
1603
1604	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1605
1606	if (ret) {
1607		iw_destroy_cm_id(id_priv->cm_id.iw);
1608		id_priv->cm_id.iw = NULL;
1609	}
1610
1611	return ret;
1612}
1613
1614static int cma_listen_handler(struct rdma_cm_id *id,
1615			      struct rdma_cm_event *event)
1616{
1617	struct rdma_id_private *id_priv = id->context;
1618
1619	id->context = id_priv->id.context;
1620	id->event_handler = id_priv->id.event_handler;
1621	return id_priv->id.event_handler(id, event);
1622}
1623
1624static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1625			      struct cma_device *cma_dev)
1626{
1627	struct rdma_id_private *dev_id_priv;
1628	struct rdma_cm_id *id;
1629	int ret;
1630
1631	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1632	if (IS_ERR(id))
1633		return;
1634
1635	dev_id_priv = container_of(id, struct rdma_id_private, id);
1636
1637	dev_id_priv->state = CMA_ADDR_BOUND;
1638	dev_id_priv->sock = id_priv->sock;
1639	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1640	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
1641
1642	cma_attach_to_dev(dev_id_priv, cma_dev);
1643	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1644	atomic_inc(&id_priv->refcount);
1645	dev_id_priv->internal_id = 1;
1646
1647	ret = rdma_listen(id, id_priv->backlog);
1648	if (ret)
1649		printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1650		       "listening on device %s\n", ret, cma_dev->device->name);
1651}
1652
1653static void cma_listen_on_all(struct rdma_id_private *id_priv)
1654{
1655	struct cma_device *cma_dev;
1656
1657	mutex_lock(&lock);
1658	list_add_tail(&id_priv->list, &listen_any_list);
1659	list_for_each_entry(cma_dev, &dev_list, list)
1660		cma_listen_on_dev(id_priv, cma_dev);
1661	mutex_unlock(&lock);
1662}
1663
1664int rdma_listen(struct rdma_cm_id *id, int backlog)
1665{
1666	struct rdma_id_private *id_priv;
1667	int ret;
1668
1669	id_priv = container_of(id, struct rdma_id_private, id);
1670	if (id_priv->state == CMA_IDLE) {
1671		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
1672		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
1673		if (ret)
1674			return ret;
1675	}
1676
1677	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1678		return -EINVAL;
1679
1680	id_priv->backlog = backlog;
1681	if (id->device) {
1682		if (rdma_cap_ib_cm(id->device, 1)) {
1683			ret = cma_ib_listen(id_priv);
1684			if (ret)
1685				goto err;
1686		} else if (rdma_cap_iw_cm(id->device, 1)) {
1687			ret = cma_iw_listen(id_priv, backlog);
1688			if (ret)
1689				goto err;
1690		} else {
1691			ret = -ENOSYS;
1692			goto err;
1693		}
1694	} else
1695		cma_listen_on_all(id_priv);
1696
1697	return 0;
1698err:
1699	id_priv->backlog = 0;
1700	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1701	return ret;
1702}
1703EXPORT_SYMBOL(rdma_listen);
1704
1705void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1706{
1707	struct rdma_id_private *id_priv;
1708
1709	id_priv = container_of(id, struct rdma_id_private, id);
1710	id_priv->tos = (u8) tos;
1711}
1712EXPORT_SYMBOL(rdma_set_service_type);
1713
1714static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1715			      void *context)
1716{
1717	struct cma_work *work = context;
1718	struct rdma_route *route;
1719
1720	route = &work->id->id.route;
1721
1722	if (!status) {
1723		route->num_paths = 1;
1724		*route->path_rec = *path_rec;
1725	} else {
1726		work->old_state = CMA_ROUTE_QUERY;
1727		work->new_state = CMA_ADDR_RESOLVED;
1728		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1729		work->event.status = status;
1730	}
1731
1732	queue_work(cma_wq, &work->work);
1733}
1734
1735static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1736			      struct cma_work *work)
1737{
1738	struct rdma_addr *addr = &id_priv->id.route.addr;
1739	struct ib_sa_path_rec path_rec;
1740	ib_sa_comp_mask comp_mask;
1741	struct sockaddr_in6 *sin6;
1742
1743	memset(&path_rec, 0, sizeof path_rec);
1744	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1745	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1746	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1747	path_rec.numb_path = 1;
1748	path_rec.reversible = 1;
1749	path_rec.service_id = cma_get_service_id(id_priv->id.ps,
1750							(struct sockaddr *) &addr->dst_addr);
1751
1752	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1753		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1754		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1755
1756	if (addr->src_addr.ss_family == AF_INET) {
1757		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1758		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1759	} else {
1760		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1761		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1762		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1763	}
1764
1765	if (tavor_quirk) {
1766		path_rec.mtu_selector = IB_SA_LT;
1767		path_rec.mtu = IB_MTU_2048;
1768	}
1769
1770	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1771					       id_priv->id.port_num, &path_rec,
1772					       comp_mask, timeout_ms,
1773					       GFP_KERNEL, cma_query_handler,
1774					       work, &id_priv->query);
1775
1776	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1777}
1778
1779static void cma_work_handler(struct work_struct *_work)
1780{
1781	struct cma_work *work = container_of(_work, struct cma_work, work);
1782	struct rdma_id_private *id_priv = work->id;
1783	int destroy = 0;
1784
1785	mutex_lock(&id_priv->handler_mutex);
1786	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1787		goto out;
1788
1789	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1790		cma_exch(id_priv, CMA_DESTROYING);
1791		destroy = 1;
1792	}
1793out:
1794	mutex_unlock(&id_priv->handler_mutex);
1795	cma_deref_id(id_priv);
1796	if (destroy)
1797		rdma_destroy_id(&id_priv->id);
1798	kfree(work);
1799}
1800
1801static void cma_ndev_work_handler(struct work_struct *_work)
1802{
1803	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1804	struct rdma_id_private *id_priv = work->id;
1805	int destroy = 0;
1806
1807	mutex_lock(&id_priv->handler_mutex);
1808	if (id_priv->state == CMA_DESTROYING ||
1809	    id_priv->state == CMA_DEVICE_REMOVAL)
1810		goto out;
1811
1812	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1813		cma_exch(id_priv, CMA_DESTROYING);
1814		destroy = 1;
1815	}
1816
1817out:
1818	mutex_unlock(&id_priv->handler_mutex);
1819	cma_deref_id(id_priv);
1820	if (destroy)
1821		rdma_destroy_id(&id_priv->id);
1822	kfree(work);
1823}
1824
1825static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1826{
1827	struct rdma_route *route = &id_priv->id.route;
1828	struct cma_work *work;
1829	int ret;
1830
1831	work = kzalloc(sizeof *work, GFP_KERNEL);
1832	if (!work)
1833		return -ENOMEM;
1834
1835	work->id = id_priv;
1836	INIT_WORK(&work->work, cma_work_handler);
1837	work->old_state = CMA_ROUTE_QUERY;
1838	work->new_state = CMA_ROUTE_RESOLVED;
1839	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1840
1841	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1842	if (!route->path_rec) {
1843		ret = -ENOMEM;
1844		goto err1;
1845	}
1846
1847	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1848	if (ret)
1849		goto err2;
1850
1851	return 0;
1852err2:
1853	kfree(route->path_rec);
1854	route->path_rec = NULL;
1855err1:
1856	kfree(work);
1857	return ret;
1858}
1859
1860int rdma_set_ib_paths(struct rdma_cm_id *id,
1861		      struct ib_sa_path_rec *path_rec, int num_paths)
1862{
1863	struct rdma_id_private *id_priv;
1864	int ret;
1865
1866	id_priv = container_of(id, struct rdma_id_private, id);
1867	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1868		return -EINVAL;
1869
1870	id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1871	if (!id->route.path_rec) {
1872		ret = -ENOMEM;
1873		goto err;
1874	}
1875
1876	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1877	return 0;
1878err:
1879	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1880	return ret;
1881}
1882EXPORT_SYMBOL(rdma_set_ib_paths);
1883
1884static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1885{
1886	struct cma_work *work;
1887
1888	work = kzalloc(sizeof *work, GFP_KERNEL);
1889	if (!work)
1890		return -ENOMEM;
1891
1892	work->id = id_priv;
1893	INIT_WORK(&work->work, cma_work_handler);
1894	work->old_state = CMA_ROUTE_QUERY;
1895	work->new_state = CMA_ROUTE_RESOLVED;
1896	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1897	queue_work(cma_wq, &work->work);
1898	return 0;
1899}
1900
1901static u8 tos_to_sl(u8 tos)
1902{
1903	return def_prec2sl & 7;
1904}
1905
1906static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1907{
1908	struct rdma_route *route = &id_priv->id.route;
1909	struct rdma_addr *addr = &route->addr;
1910	struct cma_work *work;
1911	int ret;
1912	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1913	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1914	struct net_device *ndev = NULL;
1915	u16 vid;
1916
1917	if (src_addr->sin_family != dst_addr->sin_family)
1918		return -EINVAL;
1919
1920	work = kzalloc(sizeof *work, GFP_KERNEL);
1921	if (!work)
1922		return -ENOMEM;
1923
1924	work->id = id_priv;
1925	INIT_WORK(&work->work, cma_work_handler);
1926
1927	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
1928	if (!route->path_rec) {
1929		ret = -ENOMEM;
1930		goto err1;
1931	}
1932
1933	route->num_paths = 1;
1934
1935	if (addr->dev_addr.bound_dev_if)
1936		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
1937	if (!ndev) {
1938		ret = -ENODEV;
1939		goto err2;
1940	}
1941
1942	vid = rdma_vlan_dev_vlan_id(ndev);
1943
1944	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1945	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
1946
1947	route->path_rec->hop_limit = 1;
1948	route->path_rec->reversible = 1;
1949	route->path_rec->pkey = cpu_to_be16(0xffff);
1950	route->path_rec->mtu_selector = IB_SA_EQ;
1951	route->path_rec->sl = tos_to_sl(id_priv->tos);
1952
1953#ifdef __linux__
1954	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1955#else
1956	route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
1957#endif
1958	route->path_rec->rate_selector = IB_SA_EQ;
1959	route->path_rec->rate = iboe_get_rate(ndev);
1960	dev_put(ndev);
1961	route->path_rec->packet_life_time_selector = IB_SA_EQ;
1962	route->path_rec->packet_life_time = IBOE_PACKET_LIFETIME;
1963	if (!route->path_rec->mtu) {
1964		ret = -EINVAL;
1965		goto err2;
1966	}
1967
1968	work->old_state = CMA_ROUTE_QUERY;
1969	work->new_state = CMA_ROUTE_RESOLVED;
1970	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1971	work->event.status = 0;
1972
1973	queue_work(cma_wq, &work->work);
1974
1975	return 0;
1976
1977err2:
1978	kfree(route->path_rec);
1979	route->path_rec = NULL;
1980err1:
1981	kfree(work);
1982	return ret;
1983}
1984
1985int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1986{
1987	struct rdma_id_private *id_priv;
1988	int ret;
1989
1990	id_priv = container_of(id, struct rdma_id_private, id);
1991	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1992		return -EINVAL;
1993
1994	atomic_inc(&id_priv->refcount);
1995	if (rdma_cap_ib_sa(id->device, id->port_num))
1996		ret = cma_resolve_ib_route(id_priv, timeout_ms);
1997	else if (rdma_protocol_roce(id->device, id->port_num))
1998		ret = cma_resolve_iboe_route(id_priv);
1999	else if (rdma_protocol_iwarp(id->device, id->port_num))
2000		ret = cma_resolve_iw_route(id_priv, timeout_ms);
2001	else
2002		ret = -ENOSYS;
2003
2004	if (ret)
2005		goto err;
2006
2007	return 0;
2008err:
2009	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
2010	cma_deref_id(id_priv);
2011	return ret;
2012}
2013EXPORT_SYMBOL(rdma_resolve_route);
2014
2015static int cma_bind_loopback(struct rdma_id_private *id_priv)
2016{
2017	struct cma_device *cma_dev;
2018	struct ib_port_attr port_attr;
2019	union ib_gid gid;
2020	u16 pkey;
2021	int ret;
2022	u8 p;
2023
2024	mutex_lock(&lock);
2025	if (list_empty(&dev_list)) {
2026		ret = -ENODEV;
2027		goto out;
2028	}
2029	list_for_each_entry(cma_dev, &dev_list, list)
2030		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
2031			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
2032			    port_attr.state == IB_PORT_ACTIVE)
2033				goto port_found;
2034
2035	p = 1;
2036	cma_dev = list_entry(dev_list.next, struct cma_device, list);
2037
2038port_found:
2039	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
2040	if (ret)
2041		goto out;
2042
2043	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
2044	if (ret)
2045		goto out;
2046
2047	id_priv->id.route.addr.dev_addr.dev_type =
2048		(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
2049		ARPHRD_INFINIBAND : ARPHRD_ETHER;
2050
2051	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2052	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
2053	id_priv->id.port_num = p;
2054	cma_attach_to_dev(id_priv, cma_dev);
2055out:
2056	mutex_unlock(&lock);
2057	return ret;
2058}
2059
2060static void addr_handler(int status, struct sockaddr *src_addr,
2061			 struct rdma_dev_addr *dev_addr, void *context)
2062{
2063	struct rdma_id_private *id_priv = context;
2064	struct rdma_cm_event event;
2065
2066	memset(&event, 0, sizeof event);
2067	mutex_lock(&id_priv->handler_mutex);
2068
2069	/*
2070	 * Grab mutex to block rdma_destroy_id() from removing the device while
2071	 * we're trying to acquire it.
2072	 */
2073	mutex_lock(&lock);
2074	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
2075		mutex_unlock(&lock);
2076		goto out;
2077	}
2078
2079	if (!status && !id_priv->cma_dev)
2080		status = cma_acquire_dev(id_priv);
2081	mutex_unlock(&lock);
2082
2083	if (status) {
2084		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
2085			goto out;
2086		event.event = RDMA_CM_EVENT_ADDR_ERROR;
2087		event.status = status;
2088	} else {
2089		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
2090		       ip_addr_size(src_addr));
2091		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2092	}
2093
2094	if (id_priv->id.event_handler(&id_priv->id, &event)) {
2095		cma_exch(id_priv, CMA_DESTROYING);
2096		mutex_unlock(&id_priv->handler_mutex);
2097		cma_deref_id(id_priv);
2098		rdma_destroy_id(&id_priv->id);
2099		return;
2100	}
2101out:
2102	mutex_unlock(&id_priv->handler_mutex);
2103	cma_deref_id(id_priv);
2104}
2105
2106static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2107{
2108	struct cma_work *work;
2109	struct sockaddr *src, *dst;
2110	union ib_gid gid;
2111	int ret;
2112
2113	work = kzalloc(sizeof *work, GFP_KERNEL);
2114	if (!work)
2115		return -ENOMEM;
2116
2117	if (!id_priv->cma_dev) {
2118		ret = cma_bind_loopback(id_priv);
2119		if (ret)
2120			goto err;
2121	}
2122
2123	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2124	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2125
2126	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
2127	if (cma_zero_addr(src)) {
2128		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
2129		if ((src->sa_family = dst->sa_family) == AF_INET) {
2130			((struct sockaddr_in *) src)->sin_addr.s_addr =
2131				((struct sockaddr_in *) dst)->sin_addr.s_addr;
2132		} else {
2133			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
2134				       &((struct sockaddr_in6 *) dst)->sin6_addr);
2135		}
2136	}
2137
2138	work->id = id_priv;
2139	INIT_WORK(&work->work, cma_work_handler);
2140	work->old_state = CMA_ADDR_QUERY;
2141	work->new_state = CMA_ADDR_RESOLVED;
2142	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2143	queue_work(cma_wq, &work->work);
2144	return 0;
2145err:
2146	kfree(work);
2147	return ret;
2148}
2149
2150static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2151			 struct sockaddr *dst_addr)
2152{
2153	if (!src_addr || !src_addr->sa_family) {
2154		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2155		src_addr->sa_family = dst_addr->sa_family;
2156#ifdef INET6
2157		if (dst_addr->sa_family == AF_INET6) {
2158			struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
2159			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
2160			src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
2161			if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) ||
2162			    IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr))
2163				id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
2164		}
2165#endif
2166	}
2167	if (!cma_any_addr(src_addr))
2168		return rdma_bind_addr(id, src_addr);
2169	else {
2170#if defined(INET6) || defined(INET)
2171		union {
2172#ifdef INET
2173			struct sockaddr_in in;
2174#endif
2175#ifdef INET6
2176			struct sockaddr_in6 in6;
2177#endif
2178		} addr;
2179#endif
2180
2181		switch(dst_addr->sa_family) {
2182#ifdef INET
2183		case AF_INET:
2184			memset(&addr.in, 0, sizeof(addr.in));
2185			addr.in.sin_family = dst_addr->sa_family;
2186			addr.in.sin_len = sizeof(addr.in);
2187			return rdma_bind_addr(id, (struct sockaddr *)&addr.in);
2188#endif
2189#ifdef INET6
2190		case AF_INET6:
2191			memset(&addr.in6, 0, sizeof(addr.in6));
2192			addr.in6.sin6_family = dst_addr->sa_family;
2193			addr.in6.sin6_len = sizeof(addr.in6);
2194			addr.in6.sin6_scope_id =
2195			    ((struct sockaddr_in6 *)dst_addr)->sin6_scope_id;
2196			return rdma_bind_addr(id, (struct sockaddr *)&addr.in6);
2197#endif
2198		default:
2199			return -EINVAL;
2200		}
2201	}
2202}
2203
2204int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2205		      struct sockaddr *dst_addr, int timeout_ms)
2206{
2207	struct rdma_id_private *id_priv;
2208	int ret;
2209
2210	id_priv = container_of(id, struct rdma_id_private, id);
2211	if (id_priv->state == CMA_IDLE) {
2212		ret = cma_bind_addr(id, src_addr, dst_addr);
2213		if (ret)
2214			return ret;
2215	}
2216
2217	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
2218		return -EINVAL;
2219
2220	atomic_inc(&id_priv->refcount);
2221	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
2222	if (cma_any_addr(dst_addr))
2223		ret = cma_resolve_loopback(id_priv);
2224	else
2225		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
2226				      dst_addr, &id->route.addr.dev_addr,
2227				      timeout_ms, addr_handler, id_priv);
2228	if (ret)
2229		goto err;
2230
2231	return 0;
2232err:
2233	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
2234	cma_deref_id(id_priv);
2235	return ret;
2236}
2237EXPORT_SYMBOL(rdma_resolve_addr);
2238
2239static void cma_bind_port(struct rdma_bind_list *bind_list,
2240			  struct rdma_id_private *id_priv)
2241{
2242	struct sockaddr_in *sin;
2243
2244	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2245	sin->sin_port = htons(bind_list->port);
2246	id_priv->bind_list = bind_list;
2247	hlist_add_head(&id_priv->node, &bind_list->owners);
2248}
2249
2250static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
2251			  unsigned short snum)
2252{
2253	struct rdma_bind_list *bind_list;
2254	int port, ret;
2255
2256	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2257	if (!bind_list)
2258		return -ENOMEM;
2259
2260	do {
2261		ret = idr_get_new_above(ps, bind_list, snum, &port);
2262	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
2263
2264	if (ret)
2265		goto err1;
2266
2267	if (port != snum) {
2268		ret = -EADDRNOTAVAIL;
2269		goto err2;
2270	}
2271
2272	bind_list->ps = ps;
2273	bind_list->port = (unsigned short) port;
2274	cma_bind_port(bind_list, id_priv);
2275	return 0;
2276err2:
2277	idr_remove(ps, port);
2278err1:
2279	kfree(bind_list);
2280	return ret;
2281}
2282
2283static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
2284{
2285#if defined(INET)
2286	struct rdma_bind_list *bind_list;
2287	int port, ret, low, high;
2288
2289	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2290	if (!bind_list)
2291		return -ENOMEM;
2292
2293retry:
2294	/* FIXME: add proper port randomization per like inet_csk_get_port */
2295	do {
2296		ret = idr_get_new_above(ps, bind_list, next_port, &port);
2297	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
2298
2299	if (ret)
2300		goto err1;
2301
2302	inet_get_local_port_range(&low, &high);
2303	if (port > high) {
2304		if (next_port != low) {
2305			idr_remove(ps, port);
2306			next_port = low;
2307			goto retry;
2308		}
2309		ret = -EADDRNOTAVAIL;
2310		goto err2;
2311	}
2312
2313	if (port == high)
2314		next_port = low;
2315	else
2316		next_port = port + 1;
2317
2318	bind_list->ps = ps;
2319	bind_list->port = (unsigned short) port;
2320	cma_bind_port(bind_list, id_priv);
2321	return 0;
2322err2:
2323	idr_remove(ps, port);
2324err1:
2325	kfree(bind_list);
2326	return ret;
2327#else
2328	return -ENOSPC;
2329#endif
2330}
2331
2332static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2333{
2334	struct rdma_id_private *cur_id;
2335	struct sockaddr_in *sin, *cur_sin;
2336	struct rdma_bind_list *bind_list;
2337	struct hlist_node *node;
2338	unsigned short snum;
2339
2340	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2341	snum = ntohs(sin->sin_port);
2342#ifdef __linux__
2343	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2344		return -EACCES;
2345#endif
2346
2347	bind_list = idr_find(ps, snum);
2348	if (!bind_list)
2349		return cma_alloc_port(ps, id_priv, snum);
2350
2351	/*
2352	 * We don't support binding to any address if anyone is bound to
2353	 * a specific address on the same port.
2354	 */
2355	if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2356		return -EADDRNOTAVAIL;
2357
2358	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
2359		if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
2360			return -EADDRNOTAVAIL;
2361
2362		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
2363		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
2364			return -EADDRINUSE;
2365	}
2366
2367	cma_bind_port(bind_list, id_priv);
2368	return 0;
2369}
2370
2371static int cma_get_tcp_port(struct rdma_id_private *id_priv)
2372{
2373	int ret;
2374	int size;
2375	struct socket *sock;
2376
2377	ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
2378	if (ret)
2379		return ret;
2380#ifdef __linux__
2381	ret = sock->ops->bind(sock,
2382			(struct sockaddr *) &id_priv->id.route.addr.src_addr,
2383			ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
2384#else
2385	SOCK_LOCK(sock);
2386	sock->so_options |= SO_REUSEADDR;
2387	SOCK_UNLOCK(sock);
2388
2389	ret = -sobind(sock,
2390			(struct sockaddr *)&id_priv->id.route.addr.src_addr,
2391			curthread);
2392#endif
2393	if (ret) {
2394		sock_release(sock);
2395		return ret;
2396	}
2397
2398	size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
2399	ret = sock_getname(sock,
2400			(struct sockaddr *) &id_priv->id.route.addr.src_addr,
2401			&size, 0);
2402	if (ret) {
2403		sock_release(sock);
2404		return ret;
2405	}
2406
2407	id_priv->sock = sock;
2408	return 0;
2409}
2410
2411static int cma_get_port(struct rdma_id_private *id_priv)
2412{
2413	struct cma_device *cma_dev;
2414	struct idr *ps;
2415	int ret;
2416
2417	switch (id_priv->id.ps) {
2418	case RDMA_PS_SDP:
2419		ps = &sdp_ps;
2420		break;
2421	case RDMA_PS_TCP:
2422		ps = &tcp_ps;
2423
2424		mutex_lock(&lock);
2425		/* check if there are any iWarp IB devices present */
2426		list_for_each_entry(cma_dev, &dev_list, list) {
2427			if (rdma_protocol_iwarp(cma_dev->device, 1)) {
2428				id_priv->unify_ps_tcp = 1;
2429				break;
2430			}
2431		}
2432		mutex_unlock(&lock);
2433
2434		if (id_priv->unify_ps_tcp) {
2435			ret = cma_get_tcp_port(id_priv);
2436			if (ret)
2437				goto out;
2438		}
2439		break;
2440	case RDMA_PS_UDP:
2441		ps = &udp_ps;
2442		break;
2443	case RDMA_PS_IPOIB:
2444		ps = &ipoib_ps;
2445		break;
2446	default:
2447		return -EPROTONOSUPPORT;
2448	}
2449
2450	mutex_lock(&lock);
2451	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2452		ret = cma_alloc_any_port(ps, id_priv);
2453	else
2454		ret = cma_use_port(ps, id_priv);
2455	mutex_unlock(&lock);
2456out:
2457	return ret;
2458}
2459
2460static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2461			       struct sockaddr *addr)
2462{
2463#ifdef INET6
2464	struct sockaddr_in6 sin6;
2465
2466	if (addr->sa_family != AF_INET6)
2467		return 0;
2468
2469	sin6 = *(struct sockaddr_in6 *)addr;
2470
2471	if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) ||
2472	    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) {
2473		/* check if IPv6 scope ID is set */
2474		if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0)
2475			return -EINVAL;
2476		dev_addr->bound_dev_if = sin6.sin6_scope_id;
2477	}
2478#endif
2479	return (0);
2480}
2481
2482int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2483{
2484	struct rdma_id_private *id_priv;
2485	int ret;
2486
2487	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
2488		return -EAFNOSUPPORT;
2489
2490	id_priv = container_of(id, struct rdma_id_private, id);
2491	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2492		return -EINVAL;
2493
2494	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
2495	if (ret)
2496		goto err1;
2497
2498	if (!cma_any_addr(addr)) {
2499		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2500		if (ret)
2501			goto err1;
2502
2503		mutex_lock(&lock);
2504		ret = cma_acquire_dev(id_priv);
2505		mutex_unlock(&lock);
2506		if (ret)
2507			goto err1;
2508	}
2509
2510	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2511	ret = cma_get_port(id_priv);
2512	if (ret)
2513		goto err2;
2514
2515	return 0;
2516err2:
2517	if (id_priv->cma_dev) {
2518		mutex_lock(&lock);
2519		cma_detach_from_dev(id_priv);
2520		mutex_unlock(&lock);
2521	}
2522err1:
2523	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2524	return ret;
2525}
2526EXPORT_SYMBOL(rdma_bind_addr);
2527
2528static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2529			  struct rdma_route *route)
2530{
2531	struct cma_hdr *cma_hdr;
2532	struct sdp_hh *sdp_hdr;
2533
2534	if (route->addr.src_addr.ss_family == AF_INET) {
2535		struct sockaddr_in *src4, *dst4;
2536
2537		src4 = (struct sockaddr_in *) &route->addr.src_addr;
2538		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2539
2540		switch (ps) {
2541		case RDMA_PS_SDP:
2542			sdp_hdr = hdr;
2543			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2544				return -EINVAL;
2545			sdp_set_ip_ver(sdp_hdr, 4);
2546			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2547			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2548			sdp_hdr->port = src4->sin_port;
2549			break;
2550		default:
2551			cma_hdr = hdr;
2552			cma_hdr->cma_version = CMA_VERSION;
2553			cma_set_ip_ver(cma_hdr, 4);
2554			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2555			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2556			cma_hdr->port = src4->sin_port;
2557			break;
2558		}
2559	} else {
2560		struct sockaddr_in6 *src6, *dst6;
2561
2562		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
2563		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
2564
2565		switch (ps) {
2566		case RDMA_PS_SDP:
2567			sdp_hdr = hdr;
2568			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2569				return -EINVAL;
2570			sdp_set_ip_ver(sdp_hdr, 6);
2571			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2572			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2573			sdp_hdr->port = src6->sin6_port;
2574			break;
2575		default:
2576			cma_hdr = hdr;
2577			cma_hdr->cma_version = CMA_VERSION;
2578			cma_set_ip_ver(cma_hdr, 6);
2579			cma_hdr->src_addr.ip6 = src6->sin6_addr;
2580			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2581			cma_hdr->port = src6->sin6_port;
2582			cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6);
2583			cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6);
2584			break;
2585		}
2586	}
2587	return 0;
2588}
2589
2590static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2591				struct ib_cm_event *ib_event)
2592{
2593	struct rdma_id_private *id_priv = cm_id->context;
2594	struct rdma_cm_event event;
2595	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2596	int ret = 0;
2597
2598	if (cma_disable_callback(id_priv, CMA_CONNECT))
2599		return 0;
2600
2601	memset(&event, 0, sizeof event);
2602	switch (ib_event->event) {
2603	case IB_CM_SIDR_REQ_ERROR:
2604		event.event = RDMA_CM_EVENT_UNREACHABLE;
2605		event.status = -ETIMEDOUT;
2606		break;
2607	case IB_CM_SIDR_REP_RECEIVED:
2608		event.param.ud.private_data = ib_event->private_data;
2609		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2610		if (rep->status != IB_SIDR_SUCCESS) {
2611			event.event = RDMA_CM_EVENT_UNREACHABLE;
2612			event.status = ib_event->param.sidr_rep_rcvd.status;
2613			break;
2614		}
2615		ret = cma_set_qkey(id_priv);
2616		if (ret) {
2617			event.event = RDMA_CM_EVENT_ADDR_ERROR;
2618			event.status = -EINVAL;
2619			break;
2620		}
2621		if (id_priv->qkey != rep->qkey) {
2622			event.event = RDMA_CM_EVENT_UNREACHABLE;
2623			event.status = -EINVAL;
2624			break;
2625		}
2626		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2627				     id_priv->id.route.path_rec,
2628				     &event.param.ud.ah_attr);
2629		event.param.ud.qp_num = rep->qpn;
2630		event.param.ud.qkey = rep->qkey;
2631		event.event = RDMA_CM_EVENT_ESTABLISHED;
2632		event.status = 0;
2633		break;
2634	default:
2635		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2636		       ib_event->event);
2637		goto out;
2638	}
2639
2640	ret = id_priv->id.event_handler(&id_priv->id, &event);
2641	if (ret) {
2642		/* Destroy the CM ID by returning a non-zero value. */
2643		id_priv->cm_id.ib = NULL;
2644		cma_exch(id_priv, CMA_DESTROYING);
2645		mutex_unlock(&id_priv->handler_mutex);
2646		rdma_destroy_id(&id_priv->id);
2647		return ret;
2648	}
2649out:
2650	mutex_unlock(&id_priv->handler_mutex);
2651	return ret;
2652}
2653
2654static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2655			      struct rdma_conn_param *conn_param)
2656{
2657	struct ib_cm_sidr_req_param req;
2658	struct rdma_route *route;
2659	int ret;
2660
2661	req.private_data_len = sizeof(struct cma_hdr) +
2662			       conn_param->private_data_len;
2663	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2664	if (!req.private_data)
2665		return -ENOMEM;
2666
2667	if (conn_param->private_data && conn_param->private_data_len)
2668		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2669		       conn_param->private_data, conn_param->private_data_len);
2670
2671	route = &id_priv->id.route;
2672	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2673	if (ret)
2674		goto out;
2675
2676	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2677					    cma_sidr_rep_handler, id_priv);
2678	if (IS_ERR(id_priv->cm_id.ib)) {
2679		ret = PTR_ERR(id_priv->cm_id.ib);
2680		goto out;
2681	}
2682
2683	req.path = route->path_rec;
2684	req.service_id = cma_get_service_id(id_priv->id.ps,
2685					    (struct sockaddr *) &route->addr.dst_addr);
2686	req.timeout_ms = 1 << (cma_response_timeout - 8);
2687	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2688
2689	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2690	if (ret) {
2691		ib_destroy_cm_id(id_priv->cm_id.ib);
2692		id_priv->cm_id.ib = NULL;
2693	}
2694out:
2695	kfree(req.private_data);
2696	return ret;
2697}
2698
2699static int cma_connect_ib(struct rdma_id_private *id_priv,
2700			  struct rdma_conn_param *conn_param)
2701{
2702	struct ib_cm_req_param req;
2703	struct rdma_route *route;
2704	void *private_data;
2705	int offset, ret;
2706
2707	memset(&req, 0, sizeof req);
2708	offset = cma_user_data_offset(id_priv->id.ps);
2709	req.private_data_len = offset + conn_param->private_data_len;
2710	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2711	if (!private_data)
2712		return -ENOMEM;
2713
2714	if (conn_param->private_data && conn_param->private_data_len)
2715		memcpy(private_data + offset, conn_param->private_data,
2716		       conn_param->private_data_len);
2717
2718	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2719					    id_priv);
2720	if (IS_ERR(id_priv->cm_id.ib)) {
2721		ret = PTR_ERR(id_priv->cm_id.ib);
2722		goto out;
2723	}
2724
2725	route = &id_priv->id.route;
2726	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2727	if (ret)
2728		goto out;
2729	req.private_data = private_data;
2730
2731	req.primary_path = &route->path_rec[0];
2732	if (route->num_paths == 2)
2733		req.alternate_path = &route->path_rec[1];
2734
2735	req.service_id = cma_get_service_id(id_priv->id.ps,
2736					    (struct sockaddr *) &route->addr.dst_addr);
2737	req.qp_num = id_priv->qp_num;
2738	req.qp_type = IB_QPT_RC;
2739	req.starting_psn = id_priv->seq_num;
2740	req.responder_resources = conn_param->responder_resources;
2741	req.initiator_depth = conn_param->initiator_depth;
2742	req.flow_control = conn_param->flow_control;
2743	req.retry_count = conn_param->retry_count;
2744	req.rnr_retry_count = conn_param->rnr_retry_count;
2745       req.remote_cm_response_timeout = cma_response_timeout;
2746       req.local_cm_response_timeout = cma_response_timeout;
2747	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2748	req.srq = id_priv->srq ? 1 : 0;
2749
2750	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2751out:
2752	if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2753		ib_destroy_cm_id(id_priv->cm_id.ib);
2754		id_priv->cm_id.ib = NULL;
2755	}
2756
2757	kfree(private_data);
2758	return ret;
2759}
2760
2761static int cma_connect_iw(struct rdma_id_private *id_priv,
2762			  struct rdma_conn_param *conn_param)
2763{
2764	struct iw_cm_id *cm_id;
2765	struct sockaddr_in* sin;
2766	int ret;
2767	struct iw_cm_conn_param iw_param;
2768
2769	cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
2770				cma_iw_handler, id_priv);
2771	if (IS_ERR(cm_id)) {
2772		ret = PTR_ERR(cm_id);
2773		goto out;
2774	}
2775
2776	id_priv->cm_id.iw = cm_id;
2777
2778	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2779	cm_id->local_addr = *sin;
2780
2781	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2782	cm_id->remote_addr = *sin;
2783
2784	ret = cma_modify_qp_rtr(id_priv, conn_param);
2785	if (ret)
2786		goto out;
2787
2788	iw_param.ord = conn_param->initiator_depth;
2789	iw_param.ird = conn_param->responder_resources;
2790	iw_param.private_data = conn_param->private_data;
2791	iw_param.private_data_len = conn_param->private_data_len;
2792	if (id_priv->id.qp)
2793		iw_param.qpn = id_priv->qp_num;
2794	else
2795		iw_param.qpn = conn_param->qp_num;
2796	ret = iw_cm_connect(cm_id, &iw_param);
2797out:
2798	if (ret && !IS_ERR(cm_id)) {
2799		iw_destroy_cm_id(cm_id);
2800		id_priv->cm_id.iw = NULL;
2801	}
2802	return ret;
2803}
2804
2805int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2806{
2807	struct rdma_id_private *id_priv;
2808	int ret;
2809
2810	id_priv = container_of(id, struct rdma_id_private, id);
2811	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2812		return -EINVAL;
2813
2814	if (!id->qp) {
2815		id_priv->qp_num = conn_param->qp_num;
2816		id_priv->srq = conn_param->srq;
2817	}
2818
2819	if (rdma_cap_ib_cm(id->device, id->port_num)) {
2820		if (cma_is_ud_ps(id->ps))
2821			ret = cma_resolve_ib_udp(id_priv, conn_param);
2822		else
2823			ret = cma_connect_ib(id_priv, conn_param);
2824	} else if (rdma_cap_iw_cm(id->device, id->port_num))
2825		ret = cma_connect_iw(id_priv, conn_param);
2826	else
2827		ret = -ENOSYS;
2828	if (ret)
2829		goto err;
2830
2831	return 0;
2832err:
2833	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2834	return ret;
2835}
2836EXPORT_SYMBOL(rdma_connect);
2837
2838static int cma_accept_ib(struct rdma_id_private *id_priv,
2839			 struct rdma_conn_param *conn_param)
2840{
2841	struct ib_cm_rep_param rep;
2842	int ret;
2843
2844	ret = cma_modify_qp_rtr(id_priv, conn_param);
2845	if (ret)
2846		goto out;
2847
2848	ret = cma_modify_qp_rts(id_priv, conn_param);
2849	if (ret)
2850		goto out;
2851
2852	memset(&rep, 0, sizeof rep);
2853	rep.qp_num = id_priv->qp_num;
2854	rep.starting_psn = id_priv->seq_num;
2855	rep.private_data = conn_param->private_data;
2856	rep.private_data_len = conn_param->private_data_len;
2857	rep.responder_resources = conn_param->responder_resources;
2858	rep.initiator_depth = conn_param->initiator_depth;
2859	rep.failover_accepted = 0;
2860	rep.flow_control = conn_param->flow_control;
2861	rep.rnr_retry_count = conn_param->rnr_retry_count;
2862	rep.srq = id_priv->srq ? 1 : 0;
2863
2864	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2865out:
2866	return ret;
2867}
2868
2869static int cma_accept_iw(struct rdma_id_private *id_priv,
2870		  struct rdma_conn_param *conn_param)
2871{
2872	struct iw_cm_conn_param iw_param;
2873	int ret;
2874
2875	ret = cma_modify_qp_rtr(id_priv, conn_param);
2876	if (ret)
2877		return ret;
2878
2879	iw_param.ord = conn_param->initiator_depth;
2880	iw_param.ird = conn_param->responder_resources;
2881	iw_param.private_data = conn_param->private_data;
2882	iw_param.private_data_len = conn_param->private_data_len;
2883	if (id_priv->id.qp) {
2884		iw_param.qpn = id_priv->qp_num;
2885	} else
2886		iw_param.qpn = conn_param->qp_num;
2887
2888	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2889}
2890
2891static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2892			     enum ib_cm_sidr_status status,
2893			     const void *private_data, int private_data_len)
2894{
2895	struct ib_cm_sidr_rep_param rep;
2896	int ret;
2897
2898	memset(&rep, 0, sizeof rep);
2899	rep.status = status;
2900	if (status == IB_SIDR_SUCCESS) {
2901		ret = cma_set_qkey(id_priv);
2902		if (ret)
2903			return ret;
2904		rep.qp_num = id_priv->qp_num;
2905		rep.qkey = id_priv->qkey;
2906	}
2907	rep.private_data = private_data;
2908	rep.private_data_len = private_data_len;
2909
2910	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2911}
2912
2913int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2914{
2915	struct rdma_id_private *id_priv;
2916	int ret;
2917
2918	id_priv = container_of(id, struct rdma_id_private, id);
2919	if (!cma_comp(id_priv, CMA_CONNECT))
2920		return -EINVAL;
2921
2922	if (!id->qp && conn_param) {
2923		id_priv->qp_num = conn_param->qp_num;
2924		id_priv->srq = conn_param->srq;
2925	}
2926
2927	if (rdma_cap_ib_cm(id->device, id->port_num)) {
2928		if (cma_is_ud_ps(id->ps))
2929			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2930						conn_param->private_data,
2931						conn_param->private_data_len);
2932		else if (conn_param)
2933			ret = cma_accept_ib(id_priv, conn_param);
2934		else
2935			ret = cma_rep_recv(id_priv);
2936	} else if (rdma_cap_iw_cm(id->device, id->port_num))
2937		ret = cma_accept_iw(id_priv, conn_param);
2938	else
2939		ret = -ENOSYS;
2940
2941	if (ret)
2942		goto reject;
2943
2944	return 0;
2945reject:
2946	cma_modify_qp_err(id_priv);
2947	rdma_reject(id, NULL, 0);
2948	return ret;
2949}
2950EXPORT_SYMBOL(rdma_accept);
2951
2952int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2953{
2954	struct rdma_id_private *id_priv;
2955	int ret;
2956
2957	id_priv = container_of(id, struct rdma_id_private, id);
2958	if (!cma_has_cm_dev(id_priv))
2959		return -EINVAL;
2960
2961	switch (id->device->node_type) {
2962	case RDMA_NODE_IB_CA:
2963		ret = ib_cm_notify(id_priv->cm_id.ib, event);
2964		break;
2965	default:
2966		ret = 0;
2967		break;
2968	}
2969	return ret;
2970}
2971EXPORT_SYMBOL(rdma_notify);
2972
2973int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2974		u8 private_data_len)
2975{
2976	struct rdma_id_private *id_priv;
2977	int ret;
2978
2979	id_priv = container_of(id, struct rdma_id_private, id);
2980	if (!cma_has_cm_dev(id_priv))
2981		return -EINVAL;
2982
2983	if (rdma_cap_ib_cm(id->device, id->port_num)) {
2984		if (cma_is_ud_ps(id->ps))
2985			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2986						private_data, private_data_len);
2987		else
2988			ret = ib_send_cm_rej(id_priv->cm_id.ib,
2989					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
2990					     0, private_data, private_data_len);
2991	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
2992		ret = iw_cm_reject(id_priv->cm_id.iw,
2993				   private_data, private_data_len);
2994	} else
2995		ret = -ENOSYS;
2996
2997	return ret;
2998}
2999EXPORT_SYMBOL(rdma_reject);
3000
3001int rdma_disconnect(struct rdma_cm_id *id)
3002{
3003	struct rdma_id_private *id_priv;
3004	int ret;
3005
3006	id_priv = container_of(id, struct rdma_id_private, id);
3007	if (!cma_has_cm_dev(id_priv))
3008		return -EINVAL;
3009
3010	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3011		ret = cma_modify_qp_err(id_priv);
3012		if (ret)
3013			goto out;
3014		/* Initiate or respond to a disconnect. */
3015		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
3016			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
3017	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3018		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
3019	} else
3020		ret = -EINVAL;
3021
3022out:
3023	return ret;
3024}
3025EXPORT_SYMBOL(rdma_disconnect);
3026
3027static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3028{
3029	struct rdma_id_private *id_priv;
3030	struct cma_multicast *mc = multicast->context;
3031	struct rdma_cm_event event;
3032	int ret;
3033
3034	id_priv = mc->id_priv;
3035	if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
3036	    cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
3037		return 0;
3038
3039	mutex_lock(&id_priv->qp_mutex);
3040	if (!status && id_priv->id.qp)
3041		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
3042					 multicast->rec.mlid);
3043	mutex_unlock(&id_priv->qp_mutex);
3044
3045	memset(&event, 0, sizeof event);
3046	event.status = status;
3047	event.param.ud.private_data = mc->context;
3048	if (!status) {
3049		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
3050		ib_init_ah_from_mcmember(id_priv->id.device,
3051					 id_priv->id.port_num, &multicast->rec,
3052					 &event.param.ud.ah_attr);
3053		event.param.ud.qp_num = 0xFFFFFF;
3054		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
3055	} else
3056		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3057
3058	ret = id_priv->id.event_handler(&id_priv->id, &event);
3059	if (ret) {
3060		cma_exch(id_priv, CMA_DESTROYING);
3061		mutex_unlock(&id_priv->handler_mutex);
3062		rdma_destroy_id(&id_priv->id);
3063		return 0;
3064	}
3065
3066	mutex_unlock(&id_priv->handler_mutex);
3067	return 0;
3068}
3069
3070static void cma_set_mgid(struct rdma_id_private *id_priv,
3071			 struct sockaddr *addr, union ib_gid *mgid)
3072{
3073#if defined(INET) || defined(INET6)
3074	unsigned char mc_map[MAX_ADDR_LEN];
3075	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3076#endif
3077#ifdef INET
3078	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
3079#endif
3080#ifdef INET6
3081	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
3082#endif
3083
3084	if (cma_any_addr(addr)) {
3085		memset(mgid, 0, sizeof *mgid);
3086#ifdef INET6
3087	} else if ((addr->sa_family == AF_INET6) &&
3088		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
3089								 0xFF10A01B)) {
3090		/* IPv6 address is an SA assigned MGID. */
3091		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3092	} else if (addr->sa_family == AF_INET6) {
3093		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3094		if (id_priv->id.ps == RDMA_PS_UDP)
3095			mc_map[7] = 0x01;	/* Use RDMA CM signature */
3096		*mgid = *(union ib_gid *) (mc_map + 4);
3097#endif
3098#ifdef INET
3099	} else {
3100		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
3101		if (id_priv->id.ps == RDMA_PS_UDP)
3102			mc_map[7] = 0x01;	/* Use RDMA CM signature */
3103		*mgid = *(union ib_gid *) (mc_map + 4);
3104#endif
3105	}
3106}
3107
3108static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3109				 struct cma_multicast *mc)
3110{
3111	struct ib_sa_mcmember_rec rec;
3112	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3113	ib_sa_comp_mask comp_mask;
3114	int ret;
3115
3116	ib_addr_get_mgid(dev_addr, &rec.mgid);
3117	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
3118				     &rec.mgid, &rec);
3119	if (ret)
3120		return ret;
3121
3122	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3123	if (id_priv->id.ps == RDMA_PS_UDP)
3124		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3125	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3126	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3127	rec.join_state = 1;
3128
3129	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3130		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
3131		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
3132		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
3133		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
3134
3135	if (id_priv->id.ps == RDMA_PS_IPOIB)
3136		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3137			     IB_SA_MCMEMBER_REC_RATE_SELECTOR;
3138
3139	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3140						id_priv->id.port_num, &rec,
3141						comp_mask, GFP_KERNEL,
3142						cma_ib_mc_handler, mc);
3143	if (IS_ERR(mc->multicast.ib))
3144		return PTR_ERR(mc->multicast.ib);
3145
3146	return 0;
3147}
3148
3149
3150static void iboe_mcast_work_handler(struct work_struct *work)
3151{
3152	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
3153	struct cma_multicast *mc = mw->mc;
3154	struct ib_sa_multicast *m = mc->multicast.ib;
3155
3156	mc->multicast.ib->context = mc;
3157	cma_ib_mc_handler(0, m);
3158	kref_put(&mc->mcref, release_mc);
3159	kfree(mw);
3160}
3161
3162static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
3163{
3164	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
3165	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
3166
3167	if (cma_any_addr(addr)) {
3168		memset(mgid, 0, sizeof *mgid);
3169	} else if (addr->sa_family == AF_INET6)
3170		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3171	else {
3172		mgid->raw[0] = 0xff;
3173		mgid->raw[1] = 0x0e;
3174		mgid->raw[2] = 0;
3175		mgid->raw[3] = 0;
3176		mgid->raw[4] = 0;
3177		mgid->raw[5] = 0;
3178		mgid->raw[6] = 0;
3179		mgid->raw[7] = 0;
3180		mgid->raw[8] = 0;
3181		mgid->raw[9] = 0;
3182		mgid->raw[10] = 0xff;
3183		mgid->raw[11] = 0xff;
3184		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
3185	}
3186}
3187
3188static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3189				   struct cma_multicast *mc)
3190{
3191	struct iboe_mcast_work *work;
3192	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3193	int err;
3194	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3195	struct net_device *ndev = NULL;
3196
3197	if (cma_zero_addr((struct sockaddr *)&mc->addr))
3198		return -EINVAL;
3199
3200	work = kzalloc(sizeof *work, GFP_KERNEL);
3201	if (!work)
3202		return -ENOMEM;
3203
3204	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
3205	if (!mc->multicast.ib) {
3206		err = -ENOMEM;
3207		goto out1;
3208	}
3209
3210	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
3211
3212	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
3213	if (id_priv->id.ps == RDMA_PS_UDP)
3214		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3215
3216	if (dev_addr->bound_dev_if)
3217		ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3218	if (!ndev) {
3219		err = -ENODEV;
3220		goto out2;
3221	}
3222
3223	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3224	mc->multicast.ib->rec.hop_limit = 1;
3225#ifdef __linux__
3226	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
3227#else
3228	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
3229#endif
3230	dev_put(ndev);
3231	if (!mc->multicast.ib->rec.mtu) {
3232		err = -EINVAL;
3233		goto out2;
3234	}
3235	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
3236	work->id = id_priv;
3237	work->mc = mc;
3238	INIT_WORK(&work->work, iboe_mcast_work_handler);
3239	kref_get(&mc->mcref);
3240	queue_work(cma_wq, &work->work);
3241
3242	return 0;
3243
3244out2:
3245	kfree(mc->multicast.ib);
3246out1:
3247	kfree(work);
3248	return err;
3249}
3250
3251int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3252			void *context)
3253{
3254	struct rdma_id_private *id_priv;
3255	struct cma_multicast *mc;
3256	int ret;
3257
3258	id_priv = container_of(id, struct rdma_id_private, id);
3259	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
3260	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
3261		return -EINVAL;
3262
3263	mc = kmalloc(sizeof *mc, GFP_KERNEL);
3264	if (!mc)
3265		return -ENOMEM;
3266
3267	memcpy(&mc->addr, addr, ip_addr_size(addr));
3268	mc->context = context;
3269	mc->id_priv = id_priv;
3270
3271	spin_lock(&id_priv->lock);
3272	list_add(&mc->list, &id_priv->mc_list);
3273	spin_unlock(&id_priv->lock);
3274
3275	switch (rdma_node_get_transport(id->device->node_type)) {
3276	case RDMA_TRANSPORT_IB:
3277		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3278		case IB_LINK_LAYER_INFINIBAND:
3279			ret = cma_join_ib_multicast(id_priv, mc);
3280			break;
3281		case IB_LINK_LAYER_ETHERNET:
3282			kref_init(&mc->mcref);
3283			ret = cma_iboe_join_multicast(id_priv, mc);
3284			break;
3285		default:
3286			ret = -EINVAL;
3287		}
3288		break;
3289	default:
3290		ret = -ENOSYS;
3291		break;
3292	}
3293
3294	if (ret) {
3295		spin_lock_irq(&id_priv->lock);
3296		list_del(&mc->list);
3297		spin_unlock_irq(&id_priv->lock);
3298		kfree(mc);
3299	}
3300
3301	return ret;
3302}
3303EXPORT_SYMBOL(rdma_join_multicast);
3304
3305void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3306{
3307	struct rdma_id_private *id_priv;
3308	struct cma_multicast *mc;
3309
3310	id_priv = container_of(id, struct rdma_id_private, id);
3311	spin_lock_irq(&id_priv->lock);
3312	list_for_each_entry(mc, &id_priv->mc_list, list) {
3313		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
3314			list_del(&mc->list);
3315			spin_unlock_irq(&id_priv->lock);
3316
3317			if (id->qp)
3318				ib_detach_mcast(id->qp,
3319						&mc->multicast.ib->rec.mgid,
3320						mc->multicast.ib->rec.mlid);
3321			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
3322				switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3323				case IB_LINK_LAYER_INFINIBAND:
3324					ib_sa_free_multicast(mc->multicast.ib);
3325					kfree(mc);
3326					break;
3327				case IB_LINK_LAYER_ETHERNET:
3328					kref_put(&mc->mcref, release_mc);
3329					break;
3330				default:
3331					break;
3332				}
3333			}
3334			return;
3335		}
3336	}
3337	spin_unlock_irq(&id_priv->lock);
3338}
3339EXPORT_SYMBOL(rdma_leave_multicast);
3340
3341static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
3342{
3343	struct rdma_dev_addr *dev_addr;
3344	struct cma_ndev_work *work;
3345
3346	dev_addr = &id_priv->id.route.addr.dev_addr;
3347
3348	if ((dev_addr->bound_dev_if == ndev->if_index) &&
3349	    memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
3350		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3351		       ndev->if_xname, &id_priv->id);
3352		work = kzalloc(sizeof *work, GFP_KERNEL);
3353		if (!work)
3354			return -ENOMEM;
3355
3356		INIT_WORK(&work->work, cma_ndev_work_handler);
3357		work->id = id_priv;
3358		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
3359		atomic_inc(&id_priv->refcount);
3360		queue_work(cma_wq, &work->work);
3361	}
3362
3363	return 0;
3364}
3365
3366static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3367			       void *ctx)
3368{
3369	struct net_device *ndev = (struct net_device *)ctx;
3370	struct cma_device *cma_dev;
3371	struct rdma_id_private *id_priv;
3372	int ret = NOTIFY_DONE;
3373
3374#ifdef __linux__
3375	if (dev_net(ndev) != &init_net)
3376		return NOTIFY_DONE;
3377
3378	if (event != NETDEV_BONDING_FAILOVER)
3379		return NOTIFY_DONE;
3380
3381	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
3382		return NOTIFY_DONE;
3383#else
3384	if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
3385		return NOTIFY_DONE;
3386#endif
3387
3388	mutex_lock(&lock);
3389	list_for_each_entry(cma_dev, &dev_list, list)
3390		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3391			ret = cma_netdev_change(ndev, id_priv);
3392			if (ret)
3393				goto out;
3394		}
3395
3396out:
3397	mutex_unlock(&lock);
3398	return ret;
3399}
3400
3401static struct notifier_block cma_nb = {
3402	.notifier_call = cma_netdev_callback
3403};
3404
3405static void cma_add_one(struct ib_device *device)
3406{
3407	struct cma_device *cma_dev;
3408	struct rdma_id_private *id_priv;
3409
3410	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
3411	if (!cma_dev)
3412		return;
3413
3414	cma_dev->device = device;
3415
3416	init_completion(&cma_dev->comp);
3417	atomic_set(&cma_dev->refcount, 1);
3418	INIT_LIST_HEAD(&cma_dev->id_list);
3419	ib_set_client_data(device, &cma_client, cma_dev);
3420
3421	mutex_lock(&lock);
3422	list_add_tail(&cma_dev->list, &dev_list);
3423	list_for_each_entry(id_priv, &listen_any_list, list)
3424		cma_listen_on_dev(id_priv, cma_dev);
3425	mutex_unlock(&lock);
3426}
3427
3428static int cma_remove_id_dev(struct rdma_id_private *id_priv)
3429{
3430	struct rdma_cm_event event;
3431	enum cma_state state;
3432	int ret = 0;
3433
3434	/* Record that we want to remove the device */
3435	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
3436	if (state == CMA_DESTROYING)
3437		return 0;
3438
3439	cma_cancel_operation(id_priv, state);
3440	mutex_lock(&id_priv->handler_mutex);
3441
3442	/* Check for destruction from another callback. */
3443	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
3444		goto out;
3445
3446	memset(&event, 0, sizeof event);
3447	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
3448	ret = id_priv->id.event_handler(&id_priv->id, &event);
3449out:
3450	mutex_unlock(&id_priv->handler_mutex);
3451	return ret;
3452}
3453
3454static void cma_process_remove(struct cma_device *cma_dev)
3455{
3456	struct rdma_id_private *id_priv;
3457	int ret;
3458
3459	mutex_lock(&lock);
3460	while (!list_empty(&cma_dev->id_list)) {
3461		id_priv = list_entry(cma_dev->id_list.next,
3462				     struct rdma_id_private, list);
3463
3464		list_del(&id_priv->listen_list);
3465		list_del_init(&id_priv->list);
3466		atomic_inc(&id_priv->refcount);
3467		mutex_unlock(&lock);
3468
3469		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
3470		cma_deref_id(id_priv);
3471		if (ret)
3472			rdma_destroy_id(&id_priv->id);
3473
3474		mutex_lock(&lock);
3475	}
3476	mutex_unlock(&lock);
3477
3478	cma_deref_dev(cma_dev);
3479	wait_for_completion(&cma_dev->comp);
3480}
3481
3482static void cma_remove_one(struct ib_device *device)
3483{
3484	struct cma_device *cma_dev;
3485
3486	cma_dev = ib_get_client_data(device, &cma_client);
3487	if (!cma_dev)
3488		return;
3489
3490	mutex_lock(&lock);
3491	list_del(&cma_dev->list);
3492	mutex_unlock(&lock);
3493
3494	cma_process_remove(cma_dev);
3495	kfree(cma_dev);
3496}
3497
3498static int cma_init(void)
3499{
3500	int ret;
3501#if defined(INET)
3502	int low, high, remaining;
3503
3504	get_random_bytes(&next_port, sizeof next_port);
3505	inet_get_local_port_range(&low, &high);
3506	remaining = (high - low) + 1;
3507	next_port = ((unsigned int) next_port % remaining) + low;
3508#endif
3509
3510	cma_wq = create_singlethread_workqueue("rdma_cm");
3511	if (!cma_wq)
3512		return -ENOMEM;
3513
3514	ib_sa_register_client(&sa_client);
3515	rdma_addr_register_client(&addr_client);
3516	register_netdevice_notifier(&cma_nb);
3517
3518	ret = ib_register_client(&cma_client);
3519	if (ret)
3520		goto err;
3521	return 0;
3522
3523err:
3524	unregister_netdevice_notifier(&cma_nb);
3525	rdma_addr_unregister_client(&addr_client);
3526	ib_sa_unregister_client(&sa_client);
3527	destroy_workqueue(cma_wq);
3528	return ret;
3529}
3530
3531static void cma_cleanup(void)
3532{
3533	ib_unregister_client(&cma_client);
3534	unregister_netdevice_notifier(&cma_nb);
3535	rdma_addr_unregister_client(&addr_client);
3536	ib_sa_unregister_client(&sa_client);
3537	destroy_workqueue(cma_wq);
3538	idr_destroy(&sdp_ps);
3539	idr_destroy(&tcp_ps);
3540	idr_destroy(&udp_ps);
3541	idr_destroy(&ipoib_ps);
3542}
3543
3544module_init(cma_init);
3545module_exit(cma_cleanup);
3546