1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses.  You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 *     Redistribution and use in source and binary forms, with or
16 *     without modification, are permitted provided that the following
17 *     conditions are met:
18 *
19 *      - Redistributions of source code must retain the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer.
22 *
23 *      - Redistributions in binary form must reproduce the above
24 *        copyright notice, this list of conditions and the following
25 *        disclaimer in the documentation and/or other materials
26 *        provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD$");
40
41#include <linux/completion.h>
42#include <linux/dma-mapping.h>
43#include <linux/device.h>
44#include <linux/module.h>
45#include <linux/err.h>
46#include <linux/idr.h>
47#include <linux/interrupt.h>
48#include <linux/random.h>
49#include <linux/rbtree.h>
50#include <linux/spinlock.h>
51#include <linux/slab.h>
52#include <linux/sysfs.h>
53#include <linux/workqueue.h>
54#include <linux/kdev_t.h>
55#include <linux/etherdevice.h>
56
57#include <asm/atomic-long.h>
58
59#include <rdma/ib_cache.h>
60#include <rdma/ib_cm.h>
61#include "cm_msgs.h"
62
63MODULE_AUTHOR("Sean Hefty");
64MODULE_DESCRIPTION("InfiniBand CM");
65MODULE_LICENSE("Dual BSD/GPL");
66
67static void cm_add_one(struct ib_device *device);
68static void cm_remove_one(struct ib_device *device, void *client_data);
69
70static struct ib_client cm_client = {
71	.name   = "cm",
72	.add    = cm_add_one,
73	.remove = cm_remove_one
74};
75
76static struct ib_cm {
77	spinlock_t lock;
78	struct list_head device_list;
79	rwlock_t device_lock;
80	struct rb_root listen_service_table;
81	u64 listen_service_id;
82	/* struct rb_root peer_service_table; todo: fix peer to peer */
83	struct rb_root remote_qp_table;
84	struct rb_root remote_id_table;
85	struct rb_root remote_sidr_table;
86	struct idr local_id_table;
87	__be32 random_id_operand;
88	struct list_head timewait_list;
89	struct workqueue_struct *wq;
90	/* Sync on cm change port state */
91	spinlock_t state_lock;
92} cm;
93
94/* Counter indexes ordered by attribute ID */
95enum {
96	CM_REQ_COUNTER,
97	CM_MRA_COUNTER,
98	CM_REJ_COUNTER,
99	CM_REP_COUNTER,
100	CM_RTU_COUNTER,
101	CM_DREQ_COUNTER,
102	CM_DREP_COUNTER,
103	CM_SIDR_REQ_COUNTER,
104	CM_SIDR_REP_COUNTER,
105	CM_LAP_COUNTER,
106	CM_APR_COUNTER,
107	CM_ATTR_COUNT,
108	CM_ATTR_ID_OFFSET = 0x0010,
109};
110
111enum {
112	CM_XMIT,
113	CM_XMIT_RETRIES,
114	CM_RECV,
115	CM_RECV_DUPLICATES,
116	CM_COUNTER_GROUPS
117};
118
119static char const counter_group_names[CM_COUNTER_GROUPS]
120				     [sizeof("cm_rx_duplicates")] = {
121	"cm_tx_msgs", "cm_tx_retries",
122	"cm_rx_msgs", "cm_rx_duplicates"
123};
124
125struct cm_counter_group {
126	struct kobject obj;
127	atomic_long_t counter[CM_ATTR_COUNT];
128};
129
130struct cm_counter_attribute {
131	struct attribute attr;
132	int index;
133};
134
135#define CM_COUNTER_ATTR(_name, _index) \
136struct cm_counter_attribute cm_##_name##_counter_attr = { \
137	.attr = { .name = __stringify(_name), .mode = 0444 }, \
138	.index = _index \
139}
140
141static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
142static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
143static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
144static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
145static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
146static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
147static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
148static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
149static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
150static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
151static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
152
153static struct attribute *cm_counter_default_attrs[] = {
154	&cm_req_counter_attr.attr,
155	&cm_mra_counter_attr.attr,
156	&cm_rej_counter_attr.attr,
157	&cm_rep_counter_attr.attr,
158	&cm_rtu_counter_attr.attr,
159	&cm_dreq_counter_attr.attr,
160	&cm_drep_counter_attr.attr,
161	&cm_sidr_req_counter_attr.attr,
162	&cm_sidr_rep_counter_attr.attr,
163	&cm_lap_counter_attr.attr,
164	&cm_apr_counter_attr.attr,
165	NULL
166};
167
168struct cm_port {
169	struct cm_device *cm_dev;
170	struct ib_mad_agent *mad_agent;
171	struct kobject port_obj;
172	u8 port_num;
173	struct list_head cm_priv_prim_list;
174	struct list_head cm_priv_altr_list;
175	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
176};
177
178struct cm_device {
179	struct list_head list;
180	struct ib_device *ib_device;
181	struct device *device;
182	u8 ack_delay;
183	int going_down;
184	struct cm_port *port[0];
185};
186
187struct cm_av {
188	struct cm_port *port;
189	union ib_gid dgid;
190	struct ib_ah_attr ah_attr;
191	u16 pkey_index;
192	u8 timeout;
193};
194
195struct cm_work {
196	struct delayed_work work;
197	struct list_head list;
198	struct cm_port *port;
199	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
200	__be32 local_id;			/* Established / timewait */
201	__be32 remote_id;
202	struct ib_cm_event cm_event;
203	struct ib_sa_path_rec path[0];
204};
205
206struct cm_timewait_info {
207	struct cm_work work;			/* Must be first. */
208	struct list_head list;
209	struct rb_node remote_qp_node;
210	struct rb_node remote_id_node;
211	__be64 remote_ca_guid;
212	__be32 remote_qpn;
213	u8 inserted_remote_qp;
214	u8 inserted_remote_id;
215};
216
217struct cm_id_private {
218	struct ib_cm_id	id;
219
220	struct rb_node service_node;
221	struct rb_node sidr_id_node;
222	spinlock_t lock;	/* Do not acquire inside cm.lock */
223	struct completion comp;
224	atomic_t refcount;
225	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
226	 * Protected by the cm.lock spinlock. */
227	int listen_sharecount;
228
229	struct ib_mad_send_buf *msg;
230	struct cm_timewait_info *timewait_info;
231	/* todo: use alternate port on send failure */
232	struct cm_av av;
233	struct cm_av alt_av;
234
235	void *private_data;
236	__be64 tid;
237	__be32 local_qpn;
238	__be32 remote_qpn;
239	enum ib_qp_type qp_type;
240	__be32 sq_psn;
241	__be32 rq_psn;
242	int timeout_ms;
243	enum ib_mtu path_mtu;
244	__be16 pkey;
245	u8 private_data_len;
246	u8 max_cm_retries;
247	u8 peer_to_peer;
248	u8 responder_resources;
249	u8 initiator_depth;
250	u8 retry_count;
251	u8 rnr_retry_count;
252	u8 service_timeout;
253	u8 target_ack_delay;
254
255	struct list_head prim_list;
256	struct list_head altr_list;
257	/* Indicates that the send port mad is registered and av is set */
258	int prim_send_port_not_ready;
259	int altr_send_port_not_ready;
260
261	struct list_head work_list;
262	atomic_t work_count;
263};
264
265static void cm_work_handler(struct work_struct *work);
266
267static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
268{
269	if (atomic_dec_and_test(&cm_id_priv->refcount))
270		complete(&cm_id_priv->comp);
271}
272
273static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
274			struct ib_mad_send_buf **msg)
275{
276	struct ib_mad_agent *mad_agent;
277	struct ib_mad_send_buf *m;
278	struct ib_ah *ah;
279	struct cm_av *av;
280	unsigned long flags, flags2;
281	int ret = 0;
282
283	/* don't let the port to be released till the agent is down */
284	spin_lock_irqsave(&cm.state_lock, flags2);
285	spin_lock_irqsave(&cm.lock, flags);
286	if (!cm_id_priv->prim_send_port_not_ready)
287		av = &cm_id_priv->av;
288	else if (!cm_id_priv->altr_send_port_not_ready &&
289		 (cm_id_priv->alt_av.port))
290		av = &cm_id_priv->alt_av;
291	else {
292		pr_info("%s: not valid CM id\n", __func__);
293		ret = -ENODEV;
294		spin_unlock_irqrestore(&cm.lock, flags);
295		goto out;
296	}
297	spin_unlock_irqrestore(&cm.lock, flags);
298	/* Make sure the port haven't released the mad yet */
299	mad_agent = cm_id_priv->av.port->mad_agent;
300	if (!mad_agent) {
301		pr_info("%s: not a valid MAD agent\n", __func__);
302		ret = -ENODEV;
303		goto out;
304	}
305	ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
306	if (IS_ERR(ah)) {
307		ret = PTR_ERR(ah);
308		goto out;
309	}
310
311	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
312			       av->pkey_index,
313			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
314			       GFP_ATOMIC,
315			       IB_MGMT_BASE_VERSION);
316	if (IS_ERR(m)) {
317		ib_destroy_ah(ah);
318		ret = PTR_ERR(m);
319		goto out;
320	}
321
322	/* Timeout set by caller if response is expected. */
323	m->ah = ah;
324	m->retries = cm_id_priv->max_cm_retries;
325
326	atomic_inc(&cm_id_priv->refcount);
327	m->context[0] = cm_id_priv;
328	*msg = m;
329
330out:
331	spin_unlock_irqrestore(&cm.state_lock, flags2);
332	return ret;
333}
334
335static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
336							   struct ib_mad_recv_wc *mad_recv_wc)
337{
338	return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
339				  0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
340				  GFP_ATOMIC,
341				  IB_MGMT_BASE_VERSION);
342}
343
344static int cm_create_response_msg_ah(struct cm_port *port,
345				     struct ib_mad_recv_wc *mad_recv_wc,
346				     struct ib_mad_send_buf *msg)
347{
348	struct ib_ah *ah;
349
350	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
351				  mad_recv_wc->recv_buf.grh, port->port_num);
352	if (IS_ERR(ah))
353		return PTR_ERR(ah);
354
355	msg->ah = ah;
356	return 0;
357}
358
359static void cm_free_msg(struct ib_mad_send_buf *msg)
360{
361	if (msg->ah)
362		ib_destroy_ah(msg->ah);
363	if (msg->context[0])
364		cm_deref_id(msg->context[0]);
365	ib_free_send_mad(msg);
366}
367
368static int cm_alloc_response_msg(struct cm_port *port,
369				 struct ib_mad_recv_wc *mad_recv_wc,
370				 struct ib_mad_send_buf **msg)
371{
372	struct ib_mad_send_buf *m;
373	int ret;
374
375	m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
376	if (IS_ERR(m))
377		return PTR_ERR(m);
378
379	ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
380	if (ret) {
381		cm_free_msg(m);
382		return ret;
383	}
384
385	*msg = m;
386	return 0;
387}
388
389static void * cm_copy_private_data(const void *private_data,
390				   u8 private_data_len)
391{
392	void *data;
393
394	if (!private_data || !private_data_len)
395		return NULL;
396
397	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
398	if (!data)
399		return ERR_PTR(-ENOMEM);
400
401	return data;
402}
403
404static void cm_set_private_data(struct cm_id_private *cm_id_priv,
405				 void *private_data, u8 private_data_len)
406{
407	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
408		kfree(cm_id_priv->private_data);
409
410	cm_id_priv->private_data = private_data;
411	cm_id_priv->private_data_len = private_data_len;
412}
413
414static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
415				   struct ib_grh *grh, struct cm_av *av)
416{
417	av->port = port;
418	av->pkey_index = wc->pkey_index;
419	return ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
420				  grh, &av->ah_attr);
421}
422
423static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
424			      struct cm_id_private *cm_id_priv)
425{
426	struct cm_device *cm_dev;
427	struct cm_port *port = NULL;
428	unsigned long flags;
429	int ret;
430	u8 p;
431	struct net_device *ndev = ib_get_ndev_from_path(path);
432
433	read_lock_irqsave(&cm.device_lock, flags);
434	list_for_each_entry(cm_dev, &cm.device_list, list) {
435		if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
436					path->gid_type, ndev, &p, NULL)) {
437			port = cm_dev->port[p-1];
438			break;
439		}
440	}
441	read_unlock_irqrestore(&cm.device_lock, flags);
442
443	if (ndev)
444		dev_put(ndev);
445
446	if (!port)
447		return -EINVAL;
448
449	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
450				  be16_to_cpu(path->pkey), &av->pkey_index);
451	if (ret)
452		return ret;
453
454	av->port = port;
455	ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num,
456				   path, &av->ah_attr);
457	if (ret)
458		return ret;
459
460	av->timeout = path->packet_life_time + 1;
461
462	spin_lock_irqsave(&cm.lock, flags);
463	if (&cm_id_priv->av == av)
464		list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
465	else if (&cm_id_priv->alt_av == av)
466		list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
467	else
468		ret = -EINVAL;
469
470	spin_unlock_irqrestore(&cm.lock, flags);
471
472	return ret;
473}
474
475static int cm_alloc_id(struct cm_id_private *cm_id_priv)
476{
477	unsigned long flags;
478	int id;
479
480	idr_preload(GFP_KERNEL);
481	spin_lock_irqsave(&cm.lock, flags);
482
483	id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
484
485	spin_unlock_irqrestore(&cm.lock, flags);
486	idr_preload_end();
487
488	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
489	return id < 0 ? id : 0;
490}
491
492static void cm_free_id(__be32 local_id)
493{
494	spin_lock_irq(&cm.lock);
495	idr_remove(&cm.local_id_table,
496		   (__force int) (local_id ^ cm.random_id_operand));
497	spin_unlock_irq(&cm.lock);
498}
499
500static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
501{
502	struct cm_id_private *cm_id_priv;
503
504	cm_id_priv = idr_find(&cm.local_id_table,
505			      (__force int) (local_id ^ cm.random_id_operand));
506	if (cm_id_priv) {
507		if (cm_id_priv->id.remote_id == remote_id)
508			atomic_inc(&cm_id_priv->refcount);
509		else
510			cm_id_priv = NULL;
511	}
512
513	return cm_id_priv;
514}
515
516static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
517{
518	struct cm_id_private *cm_id_priv;
519
520	spin_lock_irq(&cm.lock);
521	cm_id_priv = cm_get_id(local_id, remote_id);
522	spin_unlock_irq(&cm.lock);
523
524	return cm_id_priv;
525}
526
527/*
528 * Trivial helpers to strip endian annotation and compare; the
529 * endianness doesn't actually matter since we just need a stable
530 * order for the RB tree.
531 */
532static int be32_lt(__be32 a, __be32 b)
533{
534	return (__force u32) a < (__force u32) b;
535}
536
537static int be32_gt(__be32 a, __be32 b)
538{
539	return (__force u32) a > (__force u32) b;
540}
541
542static int be64_lt(__be64 a, __be64 b)
543{
544	return (__force u64) a < (__force u64) b;
545}
546
547static int be64_gt(__be64 a, __be64 b)
548{
549	return (__force u64) a > (__force u64) b;
550}
551
552static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
553{
554	struct rb_node **link = &cm.listen_service_table.rb_node;
555	struct rb_node *parent = NULL;
556	struct cm_id_private *cur_cm_id_priv;
557	__be64 service_id = cm_id_priv->id.service_id;
558	__be64 service_mask = cm_id_priv->id.service_mask;
559
560	while (*link) {
561		parent = *link;
562		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
563					  service_node);
564		if ((cur_cm_id_priv->id.service_mask & service_id) ==
565		    (service_mask & cur_cm_id_priv->id.service_id) &&
566		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
567			return cur_cm_id_priv;
568
569		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
570			link = &(*link)->rb_left;
571		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
572			link = &(*link)->rb_right;
573		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
574			link = &(*link)->rb_left;
575		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
576			link = &(*link)->rb_right;
577		else
578			link = &(*link)->rb_right;
579	}
580	rb_link_node(&cm_id_priv->service_node, parent, link);
581	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
582	return NULL;
583}
584
585static struct cm_id_private * cm_find_listen(struct ib_device *device,
586					     __be64 service_id)
587{
588	struct rb_node *node = cm.listen_service_table.rb_node;
589	struct cm_id_private *cm_id_priv;
590
591	while (node) {
592		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
593		if ((cm_id_priv->id.service_mask & service_id) ==
594		     cm_id_priv->id.service_id &&
595		    (cm_id_priv->id.device == device))
596			return cm_id_priv;
597
598		if (device < cm_id_priv->id.device)
599			node = node->rb_left;
600		else if (device > cm_id_priv->id.device)
601			node = node->rb_right;
602		else if (be64_lt(service_id, cm_id_priv->id.service_id))
603			node = node->rb_left;
604		else if (be64_gt(service_id, cm_id_priv->id.service_id))
605			node = node->rb_right;
606		else
607			node = node->rb_right;
608	}
609	return NULL;
610}
611
612static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
613						     *timewait_info)
614{
615	struct rb_node **link = &cm.remote_id_table.rb_node;
616	struct rb_node *parent = NULL;
617	struct cm_timewait_info *cur_timewait_info;
618	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
619	__be32 remote_id = timewait_info->work.remote_id;
620
621	while (*link) {
622		parent = *link;
623		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
624					     remote_id_node);
625		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
626			link = &(*link)->rb_left;
627		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
628			link = &(*link)->rb_right;
629		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
630			link = &(*link)->rb_left;
631		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
632			link = &(*link)->rb_right;
633		else
634			return cur_timewait_info;
635	}
636	timewait_info->inserted_remote_id = 1;
637	rb_link_node(&timewait_info->remote_id_node, parent, link);
638	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
639	return NULL;
640}
641
642static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
643						   __be32 remote_id)
644{
645	struct rb_node *node = cm.remote_id_table.rb_node;
646	struct cm_timewait_info *timewait_info;
647
648	while (node) {
649		timewait_info = rb_entry(node, struct cm_timewait_info,
650					 remote_id_node);
651		if (be32_lt(remote_id, timewait_info->work.remote_id))
652			node = node->rb_left;
653		else if (be32_gt(remote_id, timewait_info->work.remote_id))
654			node = node->rb_right;
655		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
656			node = node->rb_left;
657		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
658			node = node->rb_right;
659		else
660			return timewait_info;
661	}
662	return NULL;
663}
664
665static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
666						      *timewait_info)
667{
668	struct rb_node **link = &cm.remote_qp_table.rb_node;
669	struct rb_node *parent = NULL;
670	struct cm_timewait_info *cur_timewait_info;
671	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
672	__be32 remote_qpn = timewait_info->remote_qpn;
673
674	while (*link) {
675		parent = *link;
676		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
677					     remote_qp_node);
678		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
679			link = &(*link)->rb_left;
680		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
681			link = &(*link)->rb_right;
682		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
683			link = &(*link)->rb_left;
684		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
685			link = &(*link)->rb_right;
686		else
687			return cur_timewait_info;
688	}
689	timewait_info->inserted_remote_qp = 1;
690	rb_link_node(&timewait_info->remote_qp_node, parent, link);
691	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
692	return NULL;
693}
694
695static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
696						    *cm_id_priv)
697{
698	struct rb_node **link = &cm.remote_sidr_table.rb_node;
699	struct rb_node *parent = NULL;
700	struct cm_id_private *cur_cm_id_priv;
701	union ib_gid *port_gid = &cm_id_priv->av.dgid;
702	__be32 remote_id = cm_id_priv->id.remote_id;
703
704	while (*link) {
705		parent = *link;
706		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
707					  sidr_id_node);
708		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
709			link = &(*link)->rb_left;
710		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
711			link = &(*link)->rb_right;
712		else {
713			int cmp;
714			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
715				     sizeof *port_gid);
716			if (cmp < 0)
717				link = &(*link)->rb_left;
718			else if (cmp > 0)
719				link = &(*link)->rb_right;
720			else
721				return cur_cm_id_priv;
722		}
723	}
724	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
725	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
726	return NULL;
727}
728
729static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
730			       enum ib_cm_sidr_status status)
731{
732	struct ib_cm_sidr_rep_param param;
733
734	memset(&param, 0, sizeof param);
735	param.status = status;
736	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
737}
738
739struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
740				 ib_cm_handler cm_handler,
741				 void *context)
742{
743	struct cm_id_private *cm_id_priv;
744	int ret;
745
746	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
747	if (!cm_id_priv)
748		return ERR_PTR(-ENOMEM);
749
750	cm_id_priv->id.state = IB_CM_IDLE;
751	cm_id_priv->id.device = device;
752	cm_id_priv->id.cm_handler = cm_handler;
753	cm_id_priv->id.context = context;
754	cm_id_priv->id.remote_cm_qpn = 1;
755	ret = cm_alloc_id(cm_id_priv);
756	if (ret)
757		goto error;
758
759	spin_lock_init(&cm_id_priv->lock);
760	init_completion(&cm_id_priv->comp);
761	INIT_LIST_HEAD(&cm_id_priv->work_list);
762	INIT_LIST_HEAD(&cm_id_priv->prim_list);
763	INIT_LIST_HEAD(&cm_id_priv->altr_list);
764	atomic_set(&cm_id_priv->work_count, -1);
765	atomic_set(&cm_id_priv->refcount, 1);
766	return &cm_id_priv->id;
767
768error:
769	kfree(cm_id_priv);
770	return ERR_PTR(-ENOMEM);
771}
772EXPORT_SYMBOL(ib_create_cm_id);
773
774static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
775{
776	struct cm_work *work;
777
778	if (list_empty(&cm_id_priv->work_list))
779		return NULL;
780
781	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
782	list_del(&work->list);
783	return work;
784}
785
786static void cm_free_work(struct cm_work *work)
787{
788	if (work->mad_recv_wc)
789		ib_free_recv_mad(work->mad_recv_wc);
790	kfree(work);
791}
792
793static inline int cm_convert_to_ms(int iba_time)
794{
795	/* approximate conversion to ms from 4.096us x 2^iba_time */
796	return 1 << max(iba_time - 8, 0);
797}
798
799/*
800 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
801 * Because of how ack_timeout is stored, adding one doubles the timeout.
802 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
803 * increment it (round up) only if the other is within 50%.
804 */
805static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
806{
807	int ack_timeout = packet_life_time + 1;
808
809	if (ack_timeout >= ca_ack_delay)
810		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
811	else
812		ack_timeout = ca_ack_delay +
813			      (ack_timeout >= (ca_ack_delay - 1));
814
815	return min(31, ack_timeout);
816}
817
818static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
819{
820	if (timewait_info->inserted_remote_id) {
821		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
822		timewait_info->inserted_remote_id = 0;
823	}
824
825	if (timewait_info->inserted_remote_qp) {
826		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
827		timewait_info->inserted_remote_qp = 0;
828	}
829}
830
831static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
832{
833	struct cm_timewait_info *timewait_info;
834
835	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
836	if (!timewait_info)
837		return ERR_PTR(-ENOMEM);
838
839	timewait_info->work.local_id = local_id;
840	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
841	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
842	return timewait_info;
843}
844
845static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
846{
847	int wait_time;
848	unsigned long flags;
849	struct cm_device *cm_dev;
850
851	cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
852	if (!cm_dev)
853		return;
854
855	spin_lock_irqsave(&cm.lock, flags);
856	cm_cleanup_timewait(cm_id_priv->timewait_info);
857	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
858	spin_unlock_irqrestore(&cm.lock, flags);
859
860	/*
861	 * The cm_id could be destroyed by the user before we exit timewait.
862	 * To protect against this, we search for the cm_id after exiting
863	 * timewait before notifying the user that we've exited timewait.
864	 */
865	cm_id_priv->id.state = IB_CM_TIMEWAIT;
866	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
867
868	/* Check if the device started its remove_one */
869	spin_lock_irqsave(&cm.lock, flags);
870	if (!cm_dev->going_down)
871		queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
872				   msecs_to_jiffies(wait_time));
873	spin_unlock_irqrestore(&cm.lock, flags);
874
875	cm_id_priv->timewait_info = NULL;
876}
877
878static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
879{
880	unsigned long flags;
881
882	cm_id_priv->id.state = IB_CM_IDLE;
883	if (cm_id_priv->timewait_info) {
884		spin_lock_irqsave(&cm.lock, flags);
885		cm_cleanup_timewait(cm_id_priv->timewait_info);
886		spin_unlock_irqrestore(&cm.lock, flags);
887		kfree(cm_id_priv->timewait_info);
888		cm_id_priv->timewait_info = NULL;
889	}
890}
891
892static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
893{
894	struct cm_id_private *cm_id_priv;
895	struct cm_work *work;
896
897	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
898retest:
899	spin_lock_irq(&cm_id_priv->lock);
900	switch (cm_id->state) {
901	case IB_CM_LISTEN:
902		spin_unlock_irq(&cm_id_priv->lock);
903
904		spin_lock_irq(&cm.lock);
905		if (--cm_id_priv->listen_sharecount > 0) {
906			/* The id is still shared. */
907			cm_deref_id(cm_id_priv);
908			spin_unlock_irq(&cm.lock);
909			return;
910		}
911		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
912		spin_unlock_irq(&cm.lock);
913		break;
914	case IB_CM_SIDR_REQ_SENT:
915		cm_id->state = IB_CM_IDLE;
916		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
917		spin_unlock_irq(&cm_id_priv->lock);
918		break;
919	case IB_CM_SIDR_REQ_RCVD:
920		spin_unlock_irq(&cm_id_priv->lock);
921		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
922		spin_lock_irq(&cm.lock);
923		if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
924			rb_erase(&cm_id_priv->sidr_id_node,
925				 &cm.remote_sidr_table);
926		spin_unlock_irq(&cm.lock);
927		break;
928	case IB_CM_REQ_SENT:
929	case IB_CM_MRA_REQ_RCVD:
930		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
931		spin_unlock_irq(&cm_id_priv->lock);
932		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
933			       &cm_id_priv->id.device->node_guid,
934			       sizeof cm_id_priv->id.device->node_guid,
935			       NULL, 0);
936		break;
937	case IB_CM_REQ_RCVD:
938		if (err == -ENOMEM) {
939			/* Do not reject to allow future retries. */
940			cm_reset_to_idle(cm_id_priv);
941			spin_unlock_irq(&cm_id_priv->lock);
942		} else {
943			spin_unlock_irq(&cm_id_priv->lock);
944			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
945				       NULL, 0, NULL, 0);
946		}
947		break;
948	case IB_CM_REP_SENT:
949	case IB_CM_MRA_REP_RCVD:
950		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
951		/* Fall through */
952	case IB_CM_MRA_REQ_SENT:
953	case IB_CM_REP_RCVD:
954	case IB_CM_MRA_REP_SENT:
955		spin_unlock_irq(&cm_id_priv->lock);
956		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
957			       NULL, 0, NULL, 0);
958		break;
959	case IB_CM_ESTABLISHED:
960		spin_unlock_irq(&cm_id_priv->lock);
961		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
962			break;
963		ib_send_cm_dreq(cm_id, NULL, 0);
964		goto retest;
965	case IB_CM_DREQ_SENT:
966		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
967		cm_enter_timewait(cm_id_priv);
968		spin_unlock_irq(&cm_id_priv->lock);
969		break;
970	case IB_CM_DREQ_RCVD:
971		spin_unlock_irq(&cm_id_priv->lock);
972		ib_send_cm_drep(cm_id, NULL, 0);
973		break;
974	default:
975		spin_unlock_irq(&cm_id_priv->lock);
976		break;
977	}
978
979	spin_lock_irq(&cm.lock);
980	if (!list_empty(&cm_id_priv->altr_list) &&
981	    (!cm_id_priv->altr_send_port_not_ready))
982		list_del(&cm_id_priv->altr_list);
983	if (!list_empty(&cm_id_priv->prim_list) &&
984	    (!cm_id_priv->prim_send_port_not_ready))
985		list_del(&cm_id_priv->prim_list);
986	spin_unlock_irq(&cm.lock);
987
988	cm_free_id(cm_id->local_id);
989	cm_deref_id(cm_id_priv);
990	wait_for_completion(&cm_id_priv->comp);
991	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
992		cm_free_work(work);
993	kfree(cm_id_priv->private_data);
994	kfree(cm_id_priv);
995}
996
997void ib_destroy_cm_id(struct ib_cm_id *cm_id)
998{
999	cm_destroy_id(cm_id, 0);
1000}
1001EXPORT_SYMBOL(ib_destroy_cm_id);
1002
1003/**
1004 * __ib_cm_listen - Initiates listening on the specified service ID for
1005 *   connection and service ID resolution requests.
1006 * @cm_id: Connection identifier associated with the listen request.
1007 * @service_id: Service identifier matched against incoming connection
1008 *   and service ID resolution requests.  The service ID should be specified
1009 *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1010 *   assign a service ID to the caller.
1011 * @service_mask: Mask applied to service ID used to listen across a
1012 *   range of service IDs.  If set to 0, the service ID is matched
1013 *   exactly.  This parameter is ignored if %service_id is set to
1014 *   IB_CM_ASSIGN_SERVICE_ID.
1015 */
1016static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1017			  __be64 service_mask)
1018{
1019	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1020	int ret = 0;
1021
1022	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1023	service_id &= service_mask;
1024	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1025	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
1026		return -EINVAL;
1027
1028	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1029	if (cm_id->state != IB_CM_IDLE)
1030		return -EINVAL;
1031
1032	cm_id->state = IB_CM_LISTEN;
1033	++cm_id_priv->listen_sharecount;
1034
1035	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1036		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1037		cm_id->service_mask = ~cpu_to_be64(0);
1038	} else {
1039		cm_id->service_id = service_id;
1040		cm_id->service_mask = service_mask;
1041	}
1042	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1043
1044	if (cur_cm_id_priv) {
1045		cm_id->state = IB_CM_IDLE;
1046		--cm_id_priv->listen_sharecount;
1047		ret = -EBUSY;
1048	}
1049	return ret;
1050}
1051
1052int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1053{
1054	unsigned long flags;
1055	int ret;
1056
1057	spin_lock_irqsave(&cm.lock, flags);
1058	ret = __ib_cm_listen(cm_id, service_id, service_mask);
1059	spin_unlock_irqrestore(&cm.lock, flags);
1060
1061	return ret;
1062}
1063EXPORT_SYMBOL(ib_cm_listen);
1064
1065/**
1066 * Create a new listening ib_cm_id and listen on the given service ID.
1067 *
1068 * If there's an existing ID listening on that same device and service ID,
1069 * return it.
1070 *
1071 * @device: Device associated with the cm_id.  All related communication will
1072 * be associated with the specified device.
1073 * @cm_handler: Callback invoked to notify the user of CM events.
1074 * @service_id: Service identifier matched against incoming connection
1075 *   and service ID resolution requests.  The service ID should be specified
1076 *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1077 *   assign a service ID to the caller.
1078 *
1079 * Callers should call ib_destroy_cm_id when done with the listener ID.
1080 */
1081struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1082				     ib_cm_handler cm_handler,
1083				     __be64 service_id)
1084{
1085	struct cm_id_private *cm_id_priv;
1086	struct ib_cm_id *cm_id;
1087	unsigned long flags;
1088	int err = 0;
1089
1090	/* Create an ID in advance, since the creation may sleep */
1091	cm_id = ib_create_cm_id(device, cm_handler, NULL);
1092	if (IS_ERR(cm_id))
1093		return cm_id;
1094
1095	spin_lock_irqsave(&cm.lock, flags);
1096
1097	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1098		goto new_id;
1099
1100	/* Find an existing ID */
1101	cm_id_priv = cm_find_listen(device, service_id);
1102	if (cm_id_priv) {
1103		if (cm_id->cm_handler != cm_handler || cm_id->context) {
1104			/* Sharing an ib_cm_id with different handlers is not
1105			 * supported */
1106			spin_unlock_irqrestore(&cm.lock, flags);
1107			return ERR_PTR(-EINVAL);
1108		}
1109		atomic_inc(&cm_id_priv->refcount);
1110		++cm_id_priv->listen_sharecount;
1111		spin_unlock_irqrestore(&cm.lock, flags);
1112
1113		ib_destroy_cm_id(cm_id);
1114		cm_id = &cm_id_priv->id;
1115		return cm_id;
1116	}
1117
1118new_id:
1119	/* Use newly created ID */
1120	err = __ib_cm_listen(cm_id, service_id, 0);
1121
1122	spin_unlock_irqrestore(&cm.lock, flags);
1123
1124	if (err) {
1125		ib_destroy_cm_id(cm_id);
1126		return ERR_PTR(err);
1127	}
1128	return cm_id;
1129}
1130EXPORT_SYMBOL(ib_cm_insert_listen);
1131
1132static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
1133			  enum cm_msg_sequence msg_seq)
1134{
1135	u64 hi_tid, low_tid;
1136
1137	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1138	low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
1139			  (msg_seq << 30));
1140	return cpu_to_be64(hi_tid | low_tid);
1141}
1142
1143static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1144			      __be16 attr_id, __be64 tid)
1145{
1146	hdr->base_version  = IB_MGMT_BASE_VERSION;
1147	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
1148	hdr->class_version = IB_CM_CLASS_VERSION;
1149	hdr->method	   = IB_MGMT_METHOD_SEND;
1150	hdr->attr_id	   = attr_id;
1151	hdr->tid	   = tid;
1152}
1153
1154static void cm_format_req(struct cm_req_msg *req_msg,
1155			  struct cm_id_private *cm_id_priv,
1156			  struct ib_cm_req_param *param)
1157{
1158	struct ib_sa_path_rec *pri_path = param->primary_path;
1159	struct ib_sa_path_rec *alt_path = param->alternate_path;
1160
1161	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1162			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1163
1164	req_msg->local_comm_id = cm_id_priv->id.local_id;
1165	req_msg->service_id = param->service_id;
1166	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1167	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1168	cm_req_set_init_depth(req_msg, param->initiator_depth);
1169	cm_req_set_remote_resp_timeout(req_msg,
1170				       param->remote_cm_response_timeout);
1171	cm_req_set_qp_type(req_msg, param->qp_type);
1172	cm_req_set_flow_ctrl(req_msg, param->flow_control);
1173	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1174	cm_req_set_local_resp_timeout(req_msg,
1175				      param->local_cm_response_timeout);
1176	req_msg->pkey = param->primary_path->pkey;
1177	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1178	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1179
1180	if (param->qp_type != IB_QPT_XRC_INI) {
1181		cm_req_set_resp_res(req_msg, param->responder_resources);
1182		cm_req_set_retry_count(req_msg, param->retry_count);
1183		cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1184		cm_req_set_srq(req_msg, param->srq);
1185	}
1186
1187	if (pri_path->hop_limit <= 1) {
1188		req_msg->primary_local_lid = pri_path->slid;
1189		req_msg->primary_remote_lid = pri_path->dlid;
1190	} else {
1191		/* Work-around until there's a way to obtain remote LID info */
1192		req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1193		req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1194	}
1195	req_msg->primary_local_gid = pri_path->sgid;
1196	req_msg->primary_remote_gid = pri_path->dgid;
1197	cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1198	cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1199	req_msg->primary_traffic_class = pri_path->traffic_class;
1200	req_msg->primary_hop_limit = pri_path->hop_limit;
1201	cm_req_set_primary_sl(req_msg, pri_path->sl);
1202	cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1203	cm_req_set_primary_local_ack_timeout(req_msg,
1204		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1205			       pri_path->packet_life_time));
1206
1207	if (alt_path) {
1208		if (alt_path->hop_limit <= 1) {
1209			req_msg->alt_local_lid = alt_path->slid;
1210			req_msg->alt_remote_lid = alt_path->dlid;
1211		} else {
1212			req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1213			req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1214		}
1215		req_msg->alt_local_gid = alt_path->sgid;
1216		req_msg->alt_remote_gid = alt_path->dgid;
1217		cm_req_set_alt_flow_label(req_msg,
1218					  alt_path->flow_label);
1219		cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1220		req_msg->alt_traffic_class = alt_path->traffic_class;
1221		req_msg->alt_hop_limit = alt_path->hop_limit;
1222		cm_req_set_alt_sl(req_msg, alt_path->sl);
1223		cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1224		cm_req_set_alt_local_ack_timeout(req_msg,
1225			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1226				       alt_path->packet_life_time));
1227	}
1228
1229	if (param->private_data && param->private_data_len)
1230		memcpy(req_msg->private_data, param->private_data,
1231		       param->private_data_len);
1232}
1233
1234static int cm_validate_req_param(struct ib_cm_req_param *param)
1235{
1236	/* peer-to-peer not supported */
1237	if (param->peer_to_peer)
1238		return -EINVAL;
1239
1240	if (!param->primary_path)
1241		return -EINVAL;
1242
1243	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1244	    param->qp_type != IB_QPT_XRC_INI)
1245		return -EINVAL;
1246
1247	if (param->private_data &&
1248	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1249		return -EINVAL;
1250
1251	if (param->alternate_path &&
1252	    (param->alternate_path->pkey != param->primary_path->pkey ||
1253	     param->alternate_path->mtu != param->primary_path->mtu))
1254		return -EINVAL;
1255
1256	return 0;
1257}
1258
1259int ib_send_cm_req(struct ib_cm_id *cm_id,
1260		   struct ib_cm_req_param *param)
1261{
1262	struct cm_id_private *cm_id_priv;
1263	struct cm_req_msg *req_msg;
1264	unsigned long flags;
1265	int ret;
1266
1267	ret = cm_validate_req_param(param);
1268	if (ret)
1269		return ret;
1270
1271	/* Verify that we're not in timewait. */
1272	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1273	spin_lock_irqsave(&cm_id_priv->lock, flags);
1274	if (cm_id->state != IB_CM_IDLE) {
1275		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1276		ret = -EINVAL;
1277		goto out;
1278	}
1279	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1280
1281	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1282							    id.local_id);
1283	if (IS_ERR(cm_id_priv->timewait_info)) {
1284		ret = PTR_ERR(cm_id_priv->timewait_info);
1285		goto out;
1286	}
1287
1288	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
1289				 cm_id_priv);
1290	if (ret)
1291		goto error1;
1292	if (param->alternate_path) {
1293		ret = cm_init_av_by_path(param->alternate_path,
1294					 &cm_id_priv->alt_av, cm_id_priv);
1295		if (ret)
1296			goto error1;
1297	}
1298	cm_id->service_id = param->service_id;
1299	cm_id->service_mask = ~cpu_to_be64(0);
1300	cm_id_priv->timeout_ms = cm_convert_to_ms(
1301				    param->primary_path->packet_life_time) * 2 +
1302				 cm_convert_to_ms(
1303				    param->remote_cm_response_timeout);
1304	cm_id_priv->max_cm_retries = param->max_cm_retries;
1305	cm_id_priv->initiator_depth = param->initiator_depth;
1306	cm_id_priv->responder_resources = param->responder_resources;
1307	cm_id_priv->retry_count = param->retry_count;
1308	cm_id_priv->path_mtu = param->primary_path->mtu;
1309	cm_id_priv->pkey = param->primary_path->pkey;
1310	cm_id_priv->qp_type = param->qp_type;
1311
1312	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1313	if (ret)
1314		goto error1;
1315
1316	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1317	cm_format_req(req_msg, cm_id_priv, param);
1318	cm_id_priv->tid = req_msg->hdr.tid;
1319	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1320	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1321
1322	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1323	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1324
1325	spin_lock_irqsave(&cm_id_priv->lock, flags);
1326	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1327	if (ret) {
1328		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1329		goto error2;
1330	}
1331	BUG_ON(cm_id->state != IB_CM_IDLE);
1332	cm_id->state = IB_CM_REQ_SENT;
1333	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1334	return 0;
1335
1336error2:	cm_free_msg(cm_id_priv->msg);
1337error1:	kfree(cm_id_priv->timewait_info);
1338out:	return ret;
1339}
1340EXPORT_SYMBOL(ib_send_cm_req);
1341
1342static int cm_issue_rej(struct cm_port *port,
1343			struct ib_mad_recv_wc *mad_recv_wc,
1344			enum ib_cm_rej_reason reason,
1345			enum cm_msg_response msg_rejected,
1346			void *ari, u8 ari_length)
1347{
1348	struct ib_mad_send_buf *msg = NULL;
1349	struct cm_rej_msg *rej_msg, *rcv_msg;
1350	int ret;
1351
1352	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1353	if (ret)
1354		return ret;
1355
1356	/* We just need common CM header information.  Cast to any message. */
1357	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1358	rej_msg = (struct cm_rej_msg *) msg->mad;
1359
1360	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1361	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1362	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1363	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1364	rej_msg->reason = cpu_to_be16(reason);
1365
1366	if (ari && ari_length) {
1367		cm_rej_set_reject_info_len(rej_msg, ari_length);
1368		memcpy(rej_msg->ari, ari, ari_length);
1369	}
1370
1371	ret = ib_post_send_mad(msg, NULL);
1372	if (ret)
1373		cm_free_msg(msg);
1374
1375	return ret;
1376}
1377
1378static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1379					    struct ib_sa_path_rec *primary_path,
1380					    struct ib_sa_path_rec *alt_path)
1381{
1382	memset(primary_path, 0, sizeof *primary_path);
1383	primary_path->dgid = req_msg->primary_local_gid;
1384	primary_path->sgid = req_msg->primary_remote_gid;
1385	primary_path->dlid = req_msg->primary_local_lid;
1386	primary_path->slid = req_msg->primary_remote_lid;
1387	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1388	primary_path->hop_limit = req_msg->primary_hop_limit;
1389	primary_path->traffic_class = req_msg->primary_traffic_class;
1390	primary_path->reversible = 1;
1391	primary_path->pkey = req_msg->pkey;
1392	primary_path->sl = cm_req_get_primary_sl(req_msg);
1393	primary_path->mtu_selector = IB_SA_EQ;
1394	primary_path->mtu = cm_req_get_path_mtu(req_msg);
1395	primary_path->rate_selector = IB_SA_EQ;
1396	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1397	primary_path->packet_life_time_selector = IB_SA_EQ;
1398	primary_path->packet_life_time =
1399		cm_req_get_primary_local_ack_timeout(req_msg);
1400	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1401	primary_path->service_id = req_msg->service_id;
1402
1403	if (req_msg->alt_local_lid) {
1404		memset(alt_path, 0, sizeof *alt_path);
1405		alt_path->dgid = req_msg->alt_local_gid;
1406		alt_path->sgid = req_msg->alt_remote_gid;
1407		alt_path->dlid = req_msg->alt_local_lid;
1408		alt_path->slid = req_msg->alt_remote_lid;
1409		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1410		alt_path->hop_limit = req_msg->alt_hop_limit;
1411		alt_path->traffic_class = req_msg->alt_traffic_class;
1412		alt_path->reversible = 1;
1413		alt_path->pkey = req_msg->pkey;
1414		alt_path->sl = cm_req_get_alt_sl(req_msg);
1415		alt_path->mtu_selector = IB_SA_EQ;
1416		alt_path->mtu = cm_req_get_path_mtu(req_msg);
1417		alt_path->rate_selector = IB_SA_EQ;
1418		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1419		alt_path->packet_life_time_selector = IB_SA_EQ;
1420		alt_path->packet_life_time =
1421			cm_req_get_alt_local_ack_timeout(req_msg);
1422		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1423		alt_path->service_id = req_msg->service_id;
1424	}
1425}
1426
1427static u16 cm_get_bth_pkey(struct cm_work *work)
1428{
1429	struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1430	u8 port_num = work->port->port_num;
1431	u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1432	u16 pkey;
1433	int ret;
1434
1435	ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1436	if (ret) {
1437		dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1438				     port_num, pkey_index, ret);
1439		return 0;
1440	}
1441
1442	return pkey;
1443}
1444
1445static void cm_format_req_event(struct cm_work *work,
1446				struct cm_id_private *cm_id_priv,
1447				struct ib_cm_id *listen_id)
1448{
1449	struct cm_req_msg *req_msg;
1450	struct ib_cm_req_event_param *param;
1451
1452	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1453	param = &work->cm_event.param.req_rcvd;
1454	param->listen_id = listen_id;
1455	param->bth_pkey = cm_get_bth_pkey(work);
1456	param->port = cm_id_priv->av.port->port_num;
1457	param->primary_path = &work->path[0];
1458	if (req_msg->alt_local_lid)
1459		param->alternate_path = &work->path[1];
1460	else
1461		param->alternate_path = NULL;
1462	param->remote_ca_guid = req_msg->local_ca_guid;
1463	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1464	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1465	param->qp_type = cm_req_get_qp_type(req_msg);
1466	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1467	param->responder_resources = cm_req_get_init_depth(req_msg);
1468	param->initiator_depth = cm_req_get_resp_res(req_msg);
1469	param->local_cm_response_timeout =
1470					cm_req_get_remote_resp_timeout(req_msg);
1471	param->flow_control = cm_req_get_flow_ctrl(req_msg);
1472	param->remote_cm_response_timeout =
1473					cm_req_get_local_resp_timeout(req_msg);
1474	param->retry_count = cm_req_get_retry_count(req_msg);
1475	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1476	param->srq = cm_req_get_srq(req_msg);
1477	param->ppath_sgid_index = cm_id_priv->av.ah_attr.grh.sgid_index;
1478	work->cm_event.private_data = &req_msg->private_data;
1479}
1480
1481static void cm_process_work(struct cm_id_private *cm_id_priv,
1482			    struct cm_work *work)
1483{
1484	int ret;
1485
1486	/* We will typically only have the current event to report. */
1487	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1488	cm_free_work(work);
1489
1490	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1491		spin_lock_irq(&cm_id_priv->lock);
1492		work = cm_dequeue_work(cm_id_priv);
1493		spin_unlock_irq(&cm_id_priv->lock);
1494		BUG_ON(!work);
1495		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1496						&work->cm_event);
1497		cm_free_work(work);
1498	}
1499	cm_deref_id(cm_id_priv);
1500	if (ret)
1501		cm_destroy_id(&cm_id_priv->id, ret);
1502}
1503
1504static void cm_format_mra(struct cm_mra_msg *mra_msg,
1505			  struct cm_id_private *cm_id_priv,
1506			  enum cm_msg_response msg_mraed, u8 service_timeout,
1507			  const void *private_data, u8 private_data_len)
1508{
1509	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1510	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1511	mra_msg->local_comm_id = cm_id_priv->id.local_id;
1512	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1513	cm_mra_set_service_timeout(mra_msg, service_timeout);
1514
1515	if (private_data && private_data_len)
1516		memcpy(mra_msg->private_data, private_data, private_data_len);
1517}
1518
1519static void cm_format_rej(struct cm_rej_msg *rej_msg,
1520			  struct cm_id_private *cm_id_priv,
1521			  enum ib_cm_rej_reason reason,
1522			  void *ari,
1523			  u8 ari_length,
1524			  const void *private_data,
1525			  u8 private_data_len)
1526{
1527	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1528	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1529
1530	switch(cm_id_priv->id.state) {
1531	case IB_CM_REQ_RCVD:
1532		rej_msg->local_comm_id = 0;
1533		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1534		break;
1535	case IB_CM_MRA_REQ_SENT:
1536		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1537		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1538		break;
1539	case IB_CM_REP_RCVD:
1540	case IB_CM_MRA_REP_SENT:
1541		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1542		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1543		break;
1544	default:
1545		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1546		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1547		break;
1548	}
1549
1550	rej_msg->reason = cpu_to_be16(reason);
1551	if (ari && ari_length) {
1552		cm_rej_set_reject_info_len(rej_msg, ari_length);
1553		memcpy(rej_msg->ari, ari, ari_length);
1554	}
1555
1556	if (private_data && private_data_len)
1557		memcpy(rej_msg->private_data, private_data, private_data_len);
1558}
1559
1560static void cm_dup_req_handler(struct cm_work *work,
1561			       struct cm_id_private *cm_id_priv)
1562{
1563	struct ib_mad_send_buf *msg = NULL;
1564	int ret;
1565
1566	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1567			counter[CM_REQ_COUNTER]);
1568
1569	/* Quick state check to discard duplicate REQs. */
1570	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1571		return;
1572
1573	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1574	if (ret)
1575		return;
1576
1577	spin_lock_irq(&cm_id_priv->lock);
1578	switch (cm_id_priv->id.state) {
1579	case IB_CM_MRA_REQ_SENT:
1580		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1581			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1582			      cm_id_priv->private_data,
1583			      cm_id_priv->private_data_len);
1584		break;
1585	case IB_CM_TIMEWAIT:
1586		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1587			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1588		break;
1589	default:
1590		goto unlock;
1591	}
1592	spin_unlock_irq(&cm_id_priv->lock);
1593
1594	ret = ib_post_send_mad(msg, NULL);
1595	if (ret)
1596		goto free;
1597	return;
1598
1599unlock:	spin_unlock_irq(&cm_id_priv->lock);
1600free:	cm_free_msg(msg);
1601}
1602
1603static struct cm_id_private * cm_match_req(struct cm_work *work,
1604					   struct cm_id_private *cm_id_priv)
1605{
1606	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1607	struct cm_timewait_info *timewait_info;
1608	struct cm_req_msg *req_msg;
1609
1610	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1611
1612	/* Check for possible duplicate REQ. */
1613	spin_lock_irq(&cm.lock);
1614	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1615	if (timewait_info) {
1616		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1617					   timewait_info->work.remote_id);
1618		spin_unlock_irq(&cm.lock);
1619		if (cur_cm_id_priv) {
1620			cm_dup_req_handler(work, cur_cm_id_priv);
1621			cm_deref_id(cur_cm_id_priv);
1622		}
1623		return NULL;
1624	}
1625
1626	/* Check for stale connections. */
1627	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1628	if (timewait_info) {
1629		cm_cleanup_timewait(cm_id_priv->timewait_info);
1630		spin_unlock_irq(&cm.lock);
1631		cm_issue_rej(work->port, work->mad_recv_wc,
1632			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1633			     NULL, 0);
1634		return NULL;
1635	}
1636
1637	/* Find matching listen request. */
1638	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1639					   req_msg->service_id);
1640	if (!listen_cm_id_priv) {
1641		cm_cleanup_timewait(cm_id_priv->timewait_info);
1642		spin_unlock_irq(&cm.lock);
1643		cm_issue_rej(work->port, work->mad_recv_wc,
1644			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1645			     NULL, 0);
1646		goto out;
1647	}
1648	atomic_inc(&listen_cm_id_priv->refcount);
1649	atomic_inc(&cm_id_priv->refcount);
1650	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1651	atomic_inc(&cm_id_priv->work_count);
1652	spin_unlock_irq(&cm.lock);
1653out:
1654	return listen_cm_id_priv;
1655}
1656
1657/*
1658 * Work-around for inter-subnet connections.  If the LIDs are permissive,
1659 * we need to override the LID/SL data in the REQ with the LID information
1660 * in the work completion.
1661 */
1662static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1663{
1664	if (!cm_req_get_primary_subnet_local(req_msg)) {
1665		if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1666			req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1667			cm_req_set_primary_sl(req_msg, wc->sl);
1668		}
1669
1670		if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1671			req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1672	}
1673
1674	if (!cm_req_get_alt_subnet_local(req_msg)) {
1675		if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1676			req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1677			cm_req_set_alt_sl(req_msg, wc->sl);
1678		}
1679
1680		if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1681			req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1682	}
1683}
1684
1685static int cm_req_handler(struct cm_work *work)
1686{
1687	struct ib_cm_id *cm_id;
1688	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1689	struct cm_req_msg *req_msg;
1690	union ib_gid gid;
1691	struct ib_gid_attr gid_attr;
1692	int ret;
1693
1694	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1695
1696	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1697	if (IS_ERR(cm_id))
1698		return PTR_ERR(cm_id);
1699
1700	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1701	cm_id_priv->id.remote_id = req_msg->local_comm_id;
1702	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1703				      work->mad_recv_wc->recv_buf.grh,
1704				      &cm_id_priv->av);
1705	if (ret)
1706		goto destroy;
1707	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1708							    id.local_id);
1709	if (IS_ERR(cm_id_priv->timewait_info)) {
1710		ret = PTR_ERR(cm_id_priv->timewait_info);
1711		goto destroy;
1712	}
1713	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1714	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1715	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1716
1717	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1718	if (!listen_cm_id_priv) {
1719		ret = -EINVAL;
1720		kfree(cm_id_priv->timewait_info);
1721		goto destroy;
1722	}
1723
1724	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1725	cm_id_priv->id.context = listen_cm_id_priv->id.context;
1726	cm_id_priv->id.service_id = req_msg->service_id;
1727	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1728
1729	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1730	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1731
1732	memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1733	work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
1734	ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
1735				work->port->port_num,
1736				cm_id_priv->av.ah_attr.grh.sgid_index,
1737				&gid, &gid_attr);
1738	if (!ret) {
1739		if (gid_attr.ndev) {
1740			work->path[0].ifindex = gid_attr.ndev->if_index;
1741			work->path[0].net = dev_net(gid_attr.ndev);
1742			dev_put(gid_attr.ndev);
1743		}
1744		work->path[0].gid_type = gid_attr.gid_type;
1745		ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1746					 cm_id_priv);
1747	}
1748	if (ret) {
1749		int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
1750					    work->port->port_num, 0,
1751					    &work->path[0].sgid,
1752					    &gid_attr);
1753		if (!err && gid_attr.ndev) {
1754			work->path[0].ifindex = gid_attr.ndev->if_index;
1755			work->path[0].net = dev_net(gid_attr.ndev);
1756			dev_put(gid_attr.ndev);
1757		}
1758		work->path[0].gid_type = gid_attr.gid_type;
1759		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1760			       &work->path[0].sgid, sizeof work->path[0].sgid,
1761			       NULL, 0);
1762		goto rejected;
1763	}
1764	if (req_msg->alt_local_lid) {
1765		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
1766					 cm_id_priv);
1767		if (ret) {
1768			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1769				       &work->path[0].sgid,
1770				       sizeof work->path[0].sgid, NULL, 0);
1771			goto rejected;
1772		}
1773	}
1774	cm_id_priv->tid = req_msg->hdr.tid;
1775	cm_id_priv->timeout_ms = cm_convert_to_ms(
1776					cm_req_get_local_resp_timeout(req_msg));
1777	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1778	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1779	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1780	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1781	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1782	cm_id_priv->pkey = req_msg->pkey;
1783	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1784	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1785	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1786	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1787
1788	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1789	cm_process_work(cm_id_priv, work);
1790	cm_deref_id(listen_cm_id_priv);
1791	return 0;
1792
1793rejected:
1794	atomic_dec(&cm_id_priv->refcount);
1795	cm_deref_id(listen_cm_id_priv);
1796destroy:
1797	ib_destroy_cm_id(cm_id);
1798	return ret;
1799}
1800
1801static void cm_format_rep(struct cm_rep_msg *rep_msg,
1802			  struct cm_id_private *cm_id_priv,
1803			  struct ib_cm_rep_param *param)
1804{
1805	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1806	rep_msg->local_comm_id = cm_id_priv->id.local_id;
1807	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1808	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1809	rep_msg->resp_resources = param->responder_resources;
1810	cm_rep_set_target_ack_delay(rep_msg,
1811				    cm_id_priv->av.port->cm_dev->ack_delay);
1812	cm_rep_set_failover(rep_msg, param->failover_accepted);
1813	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1814	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1815
1816	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1817		rep_msg->initiator_depth = param->initiator_depth;
1818		cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1819		cm_rep_set_srq(rep_msg, param->srq);
1820		cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1821	} else {
1822		cm_rep_set_srq(rep_msg, 1);
1823		cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1824	}
1825
1826	if (param->private_data && param->private_data_len)
1827		memcpy(rep_msg->private_data, param->private_data,
1828		       param->private_data_len);
1829}
1830
1831int ib_send_cm_rep(struct ib_cm_id *cm_id,
1832		   struct ib_cm_rep_param *param)
1833{
1834	struct cm_id_private *cm_id_priv;
1835	struct ib_mad_send_buf *msg;
1836	struct cm_rep_msg *rep_msg;
1837	unsigned long flags;
1838	int ret;
1839
1840	if (param->private_data &&
1841	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1842		return -EINVAL;
1843
1844	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1845	spin_lock_irqsave(&cm_id_priv->lock, flags);
1846	if (cm_id->state != IB_CM_REQ_RCVD &&
1847	    cm_id->state != IB_CM_MRA_REQ_SENT) {
1848		ret = -EINVAL;
1849		goto out;
1850	}
1851
1852	ret = cm_alloc_msg(cm_id_priv, &msg);
1853	if (ret)
1854		goto out;
1855
1856	rep_msg = (struct cm_rep_msg *) msg->mad;
1857	cm_format_rep(rep_msg, cm_id_priv, param);
1858	msg->timeout_ms = cm_id_priv->timeout_ms;
1859	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1860
1861	ret = ib_post_send_mad(msg, NULL);
1862	if (ret) {
1863		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1864		cm_free_msg(msg);
1865		return ret;
1866	}
1867
1868	cm_id->state = IB_CM_REP_SENT;
1869	cm_id_priv->msg = msg;
1870	cm_id_priv->initiator_depth = param->initiator_depth;
1871	cm_id_priv->responder_resources = param->responder_resources;
1872	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1873	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1874
1875out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1876	return ret;
1877}
1878EXPORT_SYMBOL(ib_send_cm_rep);
1879
1880static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1881			  struct cm_id_private *cm_id_priv,
1882			  const void *private_data,
1883			  u8 private_data_len)
1884{
1885	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1886	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1887	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1888
1889	if (private_data && private_data_len)
1890		memcpy(rtu_msg->private_data, private_data, private_data_len);
1891}
1892
1893int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1894		   const void *private_data,
1895		   u8 private_data_len)
1896{
1897	struct cm_id_private *cm_id_priv;
1898	struct ib_mad_send_buf *msg;
1899	unsigned long flags;
1900	void *data;
1901	int ret;
1902
1903	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1904		return -EINVAL;
1905
1906	data = cm_copy_private_data(private_data, private_data_len);
1907	if (IS_ERR(data))
1908		return PTR_ERR(data);
1909
1910	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1911	spin_lock_irqsave(&cm_id_priv->lock, flags);
1912	if (cm_id->state != IB_CM_REP_RCVD &&
1913	    cm_id->state != IB_CM_MRA_REP_SENT) {
1914		ret = -EINVAL;
1915		goto error;
1916	}
1917
1918	ret = cm_alloc_msg(cm_id_priv, &msg);
1919	if (ret)
1920		goto error;
1921
1922	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1923		      private_data, private_data_len);
1924
1925	ret = ib_post_send_mad(msg, NULL);
1926	if (ret) {
1927		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1928		cm_free_msg(msg);
1929		kfree(data);
1930		return ret;
1931	}
1932
1933	cm_id->state = IB_CM_ESTABLISHED;
1934	cm_set_private_data(cm_id_priv, data, private_data_len);
1935	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1936	return 0;
1937
1938error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1939	kfree(data);
1940	return ret;
1941}
1942EXPORT_SYMBOL(ib_send_cm_rtu);
1943
1944static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1945{
1946	struct cm_rep_msg *rep_msg;
1947	struct ib_cm_rep_event_param *param;
1948
1949	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1950	param = &work->cm_event.param.rep_rcvd;
1951	param->remote_ca_guid = rep_msg->local_ca_guid;
1952	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1953	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1954	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1955	param->responder_resources = rep_msg->initiator_depth;
1956	param->initiator_depth = rep_msg->resp_resources;
1957	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1958	param->failover_accepted = cm_rep_get_failover(rep_msg);
1959	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1960	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1961	param->srq = cm_rep_get_srq(rep_msg);
1962	work->cm_event.private_data = &rep_msg->private_data;
1963}
1964
1965static void cm_dup_rep_handler(struct cm_work *work)
1966{
1967	struct cm_id_private *cm_id_priv;
1968	struct cm_rep_msg *rep_msg;
1969	struct ib_mad_send_buf *msg = NULL;
1970	int ret;
1971
1972	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1973	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1974				   rep_msg->local_comm_id);
1975	if (!cm_id_priv)
1976		return;
1977
1978	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1979			counter[CM_REP_COUNTER]);
1980	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1981	if (ret)
1982		goto deref;
1983
1984	spin_lock_irq(&cm_id_priv->lock);
1985	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1986		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1987			      cm_id_priv->private_data,
1988			      cm_id_priv->private_data_len);
1989	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1990		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1991			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1992			      cm_id_priv->private_data,
1993			      cm_id_priv->private_data_len);
1994	else
1995		goto unlock;
1996	spin_unlock_irq(&cm_id_priv->lock);
1997
1998	ret = ib_post_send_mad(msg, NULL);
1999	if (ret)
2000		goto free;
2001	goto deref;
2002
2003unlock:	spin_unlock_irq(&cm_id_priv->lock);
2004free:	cm_free_msg(msg);
2005deref:	cm_deref_id(cm_id_priv);
2006}
2007
2008static int cm_rep_handler(struct cm_work *work)
2009{
2010	struct cm_id_private *cm_id_priv;
2011	struct cm_rep_msg *rep_msg;
2012	int ret;
2013
2014	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2015	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2016	if (!cm_id_priv) {
2017		cm_dup_rep_handler(work);
2018		return -EINVAL;
2019	}
2020
2021	cm_format_rep_event(work, cm_id_priv->qp_type);
2022
2023	spin_lock_irq(&cm_id_priv->lock);
2024	switch (cm_id_priv->id.state) {
2025	case IB_CM_REQ_SENT:
2026	case IB_CM_MRA_REQ_RCVD:
2027		break;
2028	default:
2029		spin_unlock_irq(&cm_id_priv->lock);
2030		ret = -EINVAL;
2031		goto error;
2032	}
2033
2034	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
2035	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
2036	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2037
2038	spin_lock(&cm.lock);
2039	/* Check for duplicate REP. */
2040	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2041		spin_unlock(&cm.lock);
2042		spin_unlock_irq(&cm_id_priv->lock);
2043		ret = -EINVAL;
2044		goto error;
2045	}
2046	/* Check for a stale connection. */
2047	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
2048		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2049			 &cm.remote_id_table);
2050		cm_id_priv->timewait_info->inserted_remote_id = 0;
2051		spin_unlock(&cm.lock);
2052		spin_unlock_irq(&cm_id_priv->lock);
2053		cm_issue_rej(work->port, work->mad_recv_wc,
2054			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2055			     NULL, 0);
2056		ret = -EINVAL;
2057		goto error;
2058	}
2059	spin_unlock(&cm.lock);
2060
2061	cm_id_priv->id.state = IB_CM_REP_RCVD;
2062	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2063	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2064	cm_id_priv->initiator_depth = rep_msg->resp_resources;
2065	cm_id_priv->responder_resources = rep_msg->initiator_depth;
2066	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2067	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2068	cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2069	cm_id_priv->av.timeout =
2070			cm_ack_timeout(cm_id_priv->target_ack_delay,
2071				       cm_id_priv->av.timeout - 1);
2072	cm_id_priv->alt_av.timeout =
2073			cm_ack_timeout(cm_id_priv->target_ack_delay,
2074				       cm_id_priv->alt_av.timeout - 1);
2075
2076	/* todo: handle peer_to_peer */
2077
2078	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2079	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2080	if (!ret)
2081		list_add_tail(&work->list, &cm_id_priv->work_list);
2082	spin_unlock_irq(&cm_id_priv->lock);
2083
2084	if (ret)
2085		cm_process_work(cm_id_priv, work);
2086	else
2087		cm_deref_id(cm_id_priv);
2088	return 0;
2089
2090error:
2091	cm_deref_id(cm_id_priv);
2092	return ret;
2093}
2094
2095static int cm_establish_handler(struct cm_work *work)
2096{
2097	struct cm_id_private *cm_id_priv;
2098	int ret;
2099
2100	/* See comment in cm_establish about lookup. */
2101	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2102	if (!cm_id_priv)
2103		return -EINVAL;
2104
2105	spin_lock_irq(&cm_id_priv->lock);
2106	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2107		spin_unlock_irq(&cm_id_priv->lock);
2108		goto out;
2109	}
2110
2111	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2112	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2113	if (!ret)
2114		list_add_tail(&work->list, &cm_id_priv->work_list);
2115	spin_unlock_irq(&cm_id_priv->lock);
2116
2117	if (ret)
2118		cm_process_work(cm_id_priv, work);
2119	else
2120		cm_deref_id(cm_id_priv);
2121	return 0;
2122out:
2123	cm_deref_id(cm_id_priv);
2124	return -EINVAL;
2125}
2126
2127static int cm_rtu_handler(struct cm_work *work)
2128{
2129	struct cm_id_private *cm_id_priv;
2130	struct cm_rtu_msg *rtu_msg;
2131	int ret;
2132
2133	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2134	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2135				   rtu_msg->local_comm_id);
2136	if (!cm_id_priv)
2137		return -EINVAL;
2138
2139	work->cm_event.private_data = &rtu_msg->private_data;
2140
2141	spin_lock_irq(&cm_id_priv->lock);
2142	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2143	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2144		spin_unlock_irq(&cm_id_priv->lock);
2145		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2146				counter[CM_RTU_COUNTER]);
2147		goto out;
2148	}
2149	cm_id_priv->id.state = IB_CM_ESTABLISHED;
2150
2151	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2152	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2153	if (!ret)
2154		list_add_tail(&work->list, &cm_id_priv->work_list);
2155	spin_unlock_irq(&cm_id_priv->lock);
2156
2157	if (ret)
2158		cm_process_work(cm_id_priv, work);
2159	else
2160		cm_deref_id(cm_id_priv);
2161	return 0;
2162out:
2163	cm_deref_id(cm_id_priv);
2164	return -EINVAL;
2165}
2166
2167static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2168			  struct cm_id_private *cm_id_priv,
2169			  const void *private_data,
2170			  u8 private_data_len)
2171{
2172	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2173			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
2174	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2175	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2176	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2177
2178	if (private_data && private_data_len)
2179		memcpy(dreq_msg->private_data, private_data, private_data_len);
2180}
2181
2182int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2183		    const void *private_data,
2184		    u8 private_data_len)
2185{
2186	struct cm_id_private *cm_id_priv;
2187	struct ib_mad_send_buf *msg;
2188	unsigned long flags;
2189	int ret;
2190
2191	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2192		return -EINVAL;
2193
2194	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2195	spin_lock_irqsave(&cm_id_priv->lock, flags);
2196	if (cm_id->state != IB_CM_ESTABLISHED) {
2197		ret = -EINVAL;
2198		goto out;
2199	}
2200
2201	if (cm_id->lap_state == IB_CM_LAP_SENT ||
2202	    cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2203		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2204
2205	ret = cm_alloc_msg(cm_id_priv, &msg);
2206	if (ret) {
2207		cm_enter_timewait(cm_id_priv);
2208		goto out;
2209	}
2210
2211	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2212		       private_data, private_data_len);
2213	msg->timeout_ms = cm_id_priv->timeout_ms;
2214	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2215
2216	ret = ib_post_send_mad(msg, NULL);
2217	if (ret) {
2218		cm_enter_timewait(cm_id_priv);
2219		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2220		cm_free_msg(msg);
2221		return ret;
2222	}
2223
2224	cm_id->state = IB_CM_DREQ_SENT;
2225	cm_id_priv->msg = msg;
2226out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2227	return ret;
2228}
2229EXPORT_SYMBOL(ib_send_cm_dreq);
2230
2231static void cm_format_drep(struct cm_drep_msg *drep_msg,
2232			  struct cm_id_private *cm_id_priv,
2233			  const void *private_data,
2234			  u8 private_data_len)
2235{
2236	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2237	drep_msg->local_comm_id = cm_id_priv->id.local_id;
2238	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2239
2240	if (private_data && private_data_len)
2241		memcpy(drep_msg->private_data, private_data, private_data_len);
2242}
2243
2244int ib_send_cm_drep(struct ib_cm_id *cm_id,
2245		    const void *private_data,
2246		    u8 private_data_len)
2247{
2248	struct cm_id_private *cm_id_priv;
2249	struct ib_mad_send_buf *msg;
2250	unsigned long flags;
2251	void *data;
2252	int ret;
2253
2254	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2255		return -EINVAL;
2256
2257	data = cm_copy_private_data(private_data, private_data_len);
2258	if (IS_ERR(data))
2259		return PTR_ERR(data);
2260
2261	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2262	spin_lock_irqsave(&cm_id_priv->lock, flags);
2263	if (cm_id->state != IB_CM_DREQ_RCVD) {
2264		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2265		kfree(data);
2266		return -EINVAL;
2267	}
2268
2269	cm_set_private_data(cm_id_priv, data, private_data_len);
2270	cm_enter_timewait(cm_id_priv);
2271
2272	ret = cm_alloc_msg(cm_id_priv, &msg);
2273	if (ret)
2274		goto out;
2275
2276	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2277		       private_data, private_data_len);
2278
2279	ret = ib_post_send_mad(msg, NULL);
2280	if (ret) {
2281		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2282		cm_free_msg(msg);
2283		return ret;
2284	}
2285
2286out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2287	return ret;
2288}
2289EXPORT_SYMBOL(ib_send_cm_drep);
2290
2291static int cm_issue_drep(struct cm_port *port,
2292			 struct ib_mad_recv_wc *mad_recv_wc)
2293{
2294	struct ib_mad_send_buf *msg = NULL;
2295	struct cm_dreq_msg *dreq_msg;
2296	struct cm_drep_msg *drep_msg;
2297	int ret;
2298
2299	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2300	if (ret)
2301		return ret;
2302
2303	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2304	drep_msg = (struct cm_drep_msg *) msg->mad;
2305
2306	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2307	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2308	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2309
2310	ret = ib_post_send_mad(msg, NULL);
2311	if (ret)
2312		cm_free_msg(msg);
2313
2314	return ret;
2315}
2316
2317static int cm_dreq_handler(struct cm_work *work)
2318{
2319	struct cm_id_private *cm_id_priv;
2320	struct cm_dreq_msg *dreq_msg;
2321	struct ib_mad_send_buf *msg = NULL;
2322	int ret;
2323
2324	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2325	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2326				   dreq_msg->local_comm_id);
2327	if (!cm_id_priv) {
2328		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2329				counter[CM_DREQ_COUNTER]);
2330		cm_issue_drep(work->port, work->mad_recv_wc);
2331		return -EINVAL;
2332	}
2333
2334	work->cm_event.private_data = &dreq_msg->private_data;
2335
2336	spin_lock_irq(&cm_id_priv->lock);
2337	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2338		goto unlock;
2339
2340	switch (cm_id_priv->id.state) {
2341	case IB_CM_REP_SENT:
2342	case IB_CM_DREQ_SENT:
2343		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2344		break;
2345	case IB_CM_ESTABLISHED:
2346		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2347		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2348			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2349		break;
2350	case IB_CM_MRA_REP_RCVD:
2351		break;
2352	case IB_CM_TIMEWAIT:
2353		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2354				counter[CM_DREQ_COUNTER]);
2355		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2356		if (IS_ERR(msg))
2357			goto unlock;
2358
2359		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2360			       cm_id_priv->private_data,
2361			       cm_id_priv->private_data_len);
2362		spin_unlock_irq(&cm_id_priv->lock);
2363
2364		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2365		    ib_post_send_mad(msg, NULL))
2366			cm_free_msg(msg);
2367		goto deref;
2368	case IB_CM_DREQ_RCVD:
2369		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2370				counter[CM_DREQ_COUNTER]);
2371		goto unlock;
2372	default:
2373		goto unlock;
2374	}
2375	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2376	cm_id_priv->tid = dreq_msg->hdr.tid;
2377	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2378	if (!ret)
2379		list_add_tail(&work->list, &cm_id_priv->work_list);
2380	spin_unlock_irq(&cm_id_priv->lock);
2381
2382	if (ret)
2383		cm_process_work(cm_id_priv, work);
2384	else
2385		cm_deref_id(cm_id_priv);
2386	return 0;
2387
2388unlock:	spin_unlock_irq(&cm_id_priv->lock);
2389deref:	cm_deref_id(cm_id_priv);
2390	return -EINVAL;
2391}
2392
2393static int cm_drep_handler(struct cm_work *work)
2394{
2395	struct cm_id_private *cm_id_priv;
2396	struct cm_drep_msg *drep_msg;
2397	int ret;
2398
2399	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2400	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2401				   drep_msg->local_comm_id);
2402	if (!cm_id_priv)
2403		return -EINVAL;
2404
2405	work->cm_event.private_data = &drep_msg->private_data;
2406
2407	spin_lock_irq(&cm_id_priv->lock);
2408	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2409	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2410		spin_unlock_irq(&cm_id_priv->lock);
2411		goto out;
2412	}
2413	cm_enter_timewait(cm_id_priv);
2414
2415	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2416	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2417	if (!ret)
2418		list_add_tail(&work->list, &cm_id_priv->work_list);
2419	spin_unlock_irq(&cm_id_priv->lock);
2420
2421	if (ret)
2422		cm_process_work(cm_id_priv, work);
2423	else
2424		cm_deref_id(cm_id_priv);
2425	return 0;
2426out:
2427	cm_deref_id(cm_id_priv);
2428	return -EINVAL;
2429}
2430
2431int ib_send_cm_rej(struct ib_cm_id *cm_id,
2432		   enum ib_cm_rej_reason reason,
2433		   void *ari,
2434		   u8 ari_length,
2435		   const void *private_data,
2436		   u8 private_data_len)
2437{
2438	struct cm_id_private *cm_id_priv;
2439	struct ib_mad_send_buf *msg;
2440	unsigned long flags;
2441	int ret;
2442
2443	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2444	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2445		return -EINVAL;
2446
2447	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2448
2449	spin_lock_irqsave(&cm_id_priv->lock, flags);
2450	switch (cm_id->state) {
2451	case IB_CM_REQ_SENT:
2452	case IB_CM_MRA_REQ_RCVD:
2453	case IB_CM_REQ_RCVD:
2454	case IB_CM_MRA_REQ_SENT:
2455	case IB_CM_REP_RCVD:
2456	case IB_CM_MRA_REP_SENT:
2457		ret = cm_alloc_msg(cm_id_priv, &msg);
2458		if (!ret)
2459			cm_format_rej((struct cm_rej_msg *) msg->mad,
2460				      cm_id_priv, reason, ari, ari_length,
2461				      private_data, private_data_len);
2462
2463		cm_reset_to_idle(cm_id_priv);
2464		break;
2465	case IB_CM_REP_SENT:
2466	case IB_CM_MRA_REP_RCVD:
2467		ret = cm_alloc_msg(cm_id_priv, &msg);
2468		if (!ret)
2469			cm_format_rej((struct cm_rej_msg *) msg->mad,
2470				      cm_id_priv, reason, ari, ari_length,
2471				      private_data, private_data_len);
2472
2473		cm_enter_timewait(cm_id_priv);
2474		break;
2475	default:
2476		ret = -EINVAL;
2477		goto out;
2478	}
2479
2480	if (ret)
2481		goto out;
2482
2483	ret = ib_post_send_mad(msg, NULL);
2484	if (ret)
2485		cm_free_msg(msg);
2486
2487out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2488	return ret;
2489}
2490EXPORT_SYMBOL(ib_send_cm_rej);
2491
2492static void cm_format_rej_event(struct cm_work *work)
2493{
2494	struct cm_rej_msg *rej_msg;
2495	struct ib_cm_rej_event_param *param;
2496
2497	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2498	param = &work->cm_event.param.rej_rcvd;
2499	param->ari = rej_msg->ari;
2500	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2501	param->reason = __be16_to_cpu(rej_msg->reason);
2502	work->cm_event.private_data = &rej_msg->private_data;
2503}
2504
2505static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2506{
2507	struct cm_timewait_info *timewait_info;
2508	struct cm_id_private *cm_id_priv;
2509	__be32 remote_id;
2510
2511	remote_id = rej_msg->local_comm_id;
2512
2513	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2514		spin_lock_irq(&cm.lock);
2515		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2516						  remote_id);
2517		if (!timewait_info) {
2518			spin_unlock_irq(&cm.lock);
2519			return NULL;
2520		}
2521		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2522				      (timewait_info->work.local_id ^
2523				       cm.random_id_operand));
2524		if (cm_id_priv) {
2525			if (cm_id_priv->id.remote_id == remote_id)
2526				atomic_inc(&cm_id_priv->refcount);
2527			else
2528				cm_id_priv = NULL;
2529		}
2530		spin_unlock_irq(&cm.lock);
2531	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2532		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2533	else
2534		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2535
2536	return cm_id_priv;
2537}
2538
2539static int cm_rej_handler(struct cm_work *work)
2540{
2541	struct cm_id_private *cm_id_priv;
2542	struct cm_rej_msg *rej_msg;
2543	int ret;
2544
2545	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2546	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2547	if (!cm_id_priv)
2548		return -EINVAL;
2549
2550	cm_format_rej_event(work);
2551
2552	spin_lock_irq(&cm_id_priv->lock);
2553	switch (cm_id_priv->id.state) {
2554	case IB_CM_REQ_SENT:
2555	case IB_CM_MRA_REQ_RCVD:
2556	case IB_CM_REP_SENT:
2557	case IB_CM_MRA_REP_RCVD:
2558		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2559		/* fall through */
2560	case IB_CM_REQ_RCVD:
2561	case IB_CM_MRA_REQ_SENT:
2562		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2563			cm_enter_timewait(cm_id_priv);
2564		else
2565			cm_reset_to_idle(cm_id_priv);
2566		break;
2567	case IB_CM_DREQ_SENT:
2568		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2569		/* fall through */
2570	case IB_CM_REP_RCVD:
2571	case IB_CM_MRA_REP_SENT:
2572		cm_enter_timewait(cm_id_priv);
2573		break;
2574	case IB_CM_ESTABLISHED:
2575		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2576		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2577			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2578				ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2579					      cm_id_priv->msg);
2580			cm_enter_timewait(cm_id_priv);
2581			break;
2582		}
2583		/* fall through */
2584	default:
2585		spin_unlock_irq(&cm_id_priv->lock);
2586		ret = -EINVAL;
2587		goto out;
2588	}
2589
2590	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2591	if (!ret)
2592		list_add_tail(&work->list, &cm_id_priv->work_list);
2593	spin_unlock_irq(&cm_id_priv->lock);
2594
2595	if (ret)
2596		cm_process_work(cm_id_priv, work);
2597	else
2598		cm_deref_id(cm_id_priv);
2599	return 0;
2600out:
2601	cm_deref_id(cm_id_priv);
2602	return -EINVAL;
2603}
2604
2605int ib_send_cm_mra(struct ib_cm_id *cm_id,
2606		   u8 service_timeout,
2607		   const void *private_data,
2608		   u8 private_data_len)
2609{
2610	struct cm_id_private *cm_id_priv;
2611	struct ib_mad_send_buf *msg;
2612	enum ib_cm_state cm_state;
2613	enum ib_cm_lap_state lap_state;
2614	enum cm_msg_response msg_response;
2615	void *data;
2616	unsigned long flags;
2617	int ret;
2618
2619	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2620		return -EINVAL;
2621
2622	data = cm_copy_private_data(private_data, private_data_len);
2623	if (IS_ERR(data))
2624		return PTR_ERR(data);
2625
2626	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2627
2628	spin_lock_irqsave(&cm_id_priv->lock, flags);
2629	switch(cm_id_priv->id.state) {
2630	case IB_CM_REQ_RCVD:
2631		cm_state = IB_CM_MRA_REQ_SENT;
2632		lap_state = cm_id->lap_state;
2633		msg_response = CM_MSG_RESPONSE_REQ;
2634		break;
2635	case IB_CM_REP_RCVD:
2636		cm_state = IB_CM_MRA_REP_SENT;
2637		lap_state = cm_id->lap_state;
2638		msg_response = CM_MSG_RESPONSE_REP;
2639		break;
2640	case IB_CM_ESTABLISHED:
2641		if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2642			cm_state = cm_id->state;
2643			lap_state = IB_CM_MRA_LAP_SENT;
2644			msg_response = CM_MSG_RESPONSE_OTHER;
2645			break;
2646		}
2647	default:
2648		ret = -EINVAL;
2649		goto error1;
2650	}
2651
2652	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2653		ret = cm_alloc_msg(cm_id_priv, &msg);
2654		if (ret)
2655			goto error1;
2656
2657		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2658			      msg_response, service_timeout,
2659			      private_data, private_data_len);
2660		ret = ib_post_send_mad(msg, NULL);
2661		if (ret)
2662			goto error2;
2663	}
2664
2665	cm_id->state = cm_state;
2666	cm_id->lap_state = lap_state;
2667	cm_id_priv->service_timeout = service_timeout;
2668	cm_set_private_data(cm_id_priv, data, private_data_len);
2669	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2670	return 0;
2671
2672error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2673	kfree(data);
2674	return ret;
2675
2676error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2677	kfree(data);
2678	cm_free_msg(msg);
2679	return ret;
2680}
2681EXPORT_SYMBOL(ib_send_cm_mra);
2682
2683static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2684{
2685	switch (cm_mra_get_msg_mraed(mra_msg)) {
2686	case CM_MSG_RESPONSE_REQ:
2687		return cm_acquire_id(mra_msg->remote_comm_id, 0);
2688	case CM_MSG_RESPONSE_REP:
2689	case CM_MSG_RESPONSE_OTHER:
2690		return cm_acquire_id(mra_msg->remote_comm_id,
2691				     mra_msg->local_comm_id);
2692	default:
2693		return NULL;
2694	}
2695}
2696
2697static int cm_mra_handler(struct cm_work *work)
2698{
2699	struct cm_id_private *cm_id_priv;
2700	struct cm_mra_msg *mra_msg;
2701	int timeout, ret;
2702
2703	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2704	cm_id_priv = cm_acquire_mraed_id(mra_msg);
2705	if (!cm_id_priv)
2706		return -EINVAL;
2707
2708	work->cm_event.private_data = &mra_msg->private_data;
2709	work->cm_event.param.mra_rcvd.service_timeout =
2710					cm_mra_get_service_timeout(mra_msg);
2711	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2712		  cm_convert_to_ms(cm_id_priv->av.timeout);
2713
2714	spin_lock_irq(&cm_id_priv->lock);
2715	switch (cm_id_priv->id.state) {
2716	case IB_CM_REQ_SENT:
2717		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2718		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2719				  cm_id_priv->msg, timeout))
2720			goto out;
2721		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2722		break;
2723	case IB_CM_REP_SENT:
2724		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2725		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2726				  cm_id_priv->msg, timeout))
2727			goto out;
2728		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2729		break;
2730	case IB_CM_ESTABLISHED:
2731		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2732		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2733		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2734				  cm_id_priv->msg, timeout)) {
2735			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2736				atomic_long_inc(&work->port->
2737						counter_group[CM_RECV_DUPLICATES].
2738						counter[CM_MRA_COUNTER]);
2739			goto out;
2740		}
2741		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2742		break;
2743	case IB_CM_MRA_REQ_RCVD:
2744	case IB_CM_MRA_REP_RCVD:
2745		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2746				counter[CM_MRA_COUNTER]);
2747		/* fall through */
2748	default:
2749		goto out;
2750	}
2751
2752	cm_id_priv->msg->context[1] = (void *) (unsigned long)
2753				      cm_id_priv->id.state;
2754	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2755	if (!ret)
2756		list_add_tail(&work->list, &cm_id_priv->work_list);
2757	spin_unlock_irq(&cm_id_priv->lock);
2758
2759	if (ret)
2760		cm_process_work(cm_id_priv, work);
2761	else
2762		cm_deref_id(cm_id_priv);
2763	return 0;
2764out:
2765	spin_unlock_irq(&cm_id_priv->lock);
2766	cm_deref_id(cm_id_priv);
2767	return -EINVAL;
2768}
2769
2770static void cm_format_lap(struct cm_lap_msg *lap_msg,
2771			  struct cm_id_private *cm_id_priv,
2772			  struct ib_sa_path_rec *alternate_path,
2773			  const void *private_data,
2774			  u8 private_data_len)
2775{
2776	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2777			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2778	lap_msg->local_comm_id = cm_id_priv->id.local_id;
2779	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2780	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2781	/* todo: need remote CM response timeout */
2782	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2783	lap_msg->alt_local_lid = alternate_path->slid;
2784	lap_msg->alt_remote_lid = alternate_path->dlid;
2785	lap_msg->alt_local_gid = alternate_path->sgid;
2786	lap_msg->alt_remote_gid = alternate_path->dgid;
2787	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2788	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2789	lap_msg->alt_hop_limit = alternate_path->hop_limit;
2790	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2791	cm_lap_set_sl(lap_msg, alternate_path->sl);
2792	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2793	cm_lap_set_local_ack_timeout(lap_msg,
2794		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2795			       alternate_path->packet_life_time));
2796
2797	if (private_data && private_data_len)
2798		memcpy(lap_msg->private_data, private_data, private_data_len);
2799}
2800
2801int ib_send_cm_lap(struct ib_cm_id *cm_id,
2802		   struct ib_sa_path_rec *alternate_path,
2803		   const void *private_data,
2804		   u8 private_data_len)
2805{
2806	struct cm_id_private *cm_id_priv;
2807	struct ib_mad_send_buf *msg;
2808	unsigned long flags;
2809	int ret;
2810
2811	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2812		return -EINVAL;
2813
2814	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2815	spin_lock_irqsave(&cm_id_priv->lock, flags);
2816	if (cm_id->state != IB_CM_ESTABLISHED ||
2817	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2818	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
2819		ret = -EINVAL;
2820		goto out;
2821	}
2822
2823	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
2824				 cm_id_priv);
2825	if (ret)
2826		goto out;
2827	cm_id_priv->alt_av.timeout =
2828			cm_ack_timeout(cm_id_priv->target_ack_delay,
2829				       cm_id_priv->alt_av.timeout - 1);
2830
2831	ret = cm_alloc_msg(cm_id_priv, &msg);
2832	if (ret)
2833		goto out;
2834
2835	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2836		      alternate_path, private_data, private_data_len);
2837	msg->timeout_ms = cm_id_priv->timeout_ms;
2838	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2839
2840	ret = ib_post_send_mad(msg, NULL);
2841	if (ret) {
2842		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2843		cm_free_msg(msg);
2844		return ret;
2845	}
2846
2847	cm_id->lap_state = IB_CM_LAP_SENT;
2848	cm_id_priv->msg = msg;
2849
2850out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2851	return ret;
2852}
2853EXPORT_SYMBOL(ib_send_cm_lap);
2854
2855static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2856				    struct ib_sa_path_rec *path,
2857				    struct cm_lap_msg *lap_msg)
2858{
2859	memset(path, 0, sizeof *path);
2860	path->dgid = lap_msg->alt_local_gid;
2861	path->sgid = lap_msg->alt_remote_gid;
2862	path->dlid = lap_msg->alt_local_lid;
2863	path->slid = lap_msg->alt_remote_lid;
2864	path->flow_label = cm_lap_get_flow_label(lap_msg);
2865	path->hop_limit = lap_msg->alt_hop_limit;
2866	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2867	path->reversible = 1;
2868	path->pkey = cm_id_priv->pkey;
2869	path->sl = cm_lap_get_sl(lap_msg);
2870	path->mtu_selector = IB_SA_EQ;
2871	path->mtu = cm_id_priv->path_mtu;
2872	path->rate_selector = IB_SA_EQ;
2873	path->rate = cm_lap_get_packet_rate(lap_msg);
2874	path->packet_life_time_selector = IB_SA_EQ;
2875	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2876	path->packet_life_time -= (path->packet_life_time > 0);
2877}
2878
2879static int cm_lap_handler(struct cm_work *work)
2880{
2881	struct cm_id_private *cm_id_priv;
2882	struct cm_lap_msg *lap_msg;
2883	struct ib_cm_lap_event_param *param;
2884	struct ib_mad_send_buf *msg = NULL;
2885	int ret;
2886
2887	/* todo: verify LAP request and send reject APR if invalid. */
2888	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2889	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2890				   lap_msg->local_comm_id);
2891	if (!cm_id_priv)
2892		return -EINVAL;
2893
2894	param = &work->cm_event.param.lap_rcvd;
2895	param->alternate_path = &work->path[0];
2896	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2897	work->cm_event.private_data = &lap_msg->private_data;
2898
2899	spin_lock_irq(&cm_id_priv->lock);
2900	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2901		goto unlock;
2902
2903	switch (cm_id_priv->id.lap_state) {
2904	case IB_CM_LAP_UNINIT:
2905	case IB_CM_LAP_IDLE:
2906		break;
2907	case IB_CM_MRA_LAP_SENT:
2908		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2909				counter[CM_LAP_COUNTER]);
2910		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2911		if (IS_ERR(msg))
2912			goto unlock;
2913
2914		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2915			      CM_MSG_RESPONSE_OTHER,
2916			      cm_id_priv->service_timeout,
2917			      cm_id_priv->private_data,
2918			      cm_id_priv->private_data_len);
2919		spin_unlock_irq(&cm_id_priv->lock);
2920
2921		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2922		    ib_post_send_mad(msg, NULL))
2923			cm_free_msg(msg);
2924		goto deref;
2925	case IB_CM_LAP_RCVD:
2926		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2927				counter[CM_LAP_COUNTER]);
2928		goto unlock;
2929	default:
2930		goto unlock;
2931	}
2932
2933	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2934	cm_id_priv->tid = lap_msg->hdr.tid;
2935	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2936				      work->mad_recv_wc->recv_buf.grh,
2937				      &cm_id_priv->av);
2938	if (ret)
2939		goto unlock;
2940	ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
2941				 cm_id_priv);
2942	if (ret)
2943		goto unlock;
2944	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2945	if (!ret)
2946		list_add_tail(&work->list, &cm_id_priv->work_list);
2947	spin_unlock_irq(&cm_id_priv->lock);
2948
2949	if (ret)
2950		cm_process_work(cm_id_priv, work);
2951	else
2952		cm_deref_id(cm_id_priv);
2953	return 0;
2954
2955unlock:	spin_unlock_irq(&cm_id_priv->lock);
2956deref:	cm_deref_id(cm_id_priv);
2957	return -EINVAL;
2958}
2959
2960static void cm_format_apr(struct cm_apr_msg *apr_msg,
2961			  struct cm_id_private *cm_id_priv,
2962			  enum ib_cm_apr_status status,
2963			  void *info,
2964			  u8 info_length,
2965			  const void *private_data,
2966			  u8 private_data_len)
2967{
2968	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2969	apr_msg->local_comm_id = cm_id_priv->id.local_id;
2970	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2971	apr_msg->ap_status = (u8) status;
2972
2973	if (info && info_length) {
2974		apr_msg->info_length = info_length;
2975		memcpy(apr_msg->info, info, info_length);
2976	}
2977
2978	if (private_data && private_data_len)
2979		memcpy(apr_msg->private_data, private_data, private_data_len);
2980}
2981
2982int ib_send_cm_apr(struct ib_cm_id *cm_id,
2983		   enum ib_cm_apr_status status,
2984		   void *info,
2985		   u8 info_length,
2986		   const void *private_data,
2987		   u8 private_data_len)
2988{
2989	struct cm_id_private *cm_id_priv;
2990	struct ib_mad_send_buf *msg;
2991	unsigned long flags;
2992	int ret;
2993
2994	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2995	    (info && info_length > IB_CM_APR_INFO_LENGTH))
2996		return -EINVAL;
2997
2998	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2999	spin_lock_irqsave(&cm_id_priv->lock, flags);
3000	if (cm_id->state != IB_CM_ESTABLISHED ||
3001	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
3002	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
3003		ret = -EINVAL;
3004		goto out;
3005	}
3006
3007	ret = cm_alloc_msg(cm_id_priv, &msg);
3008	if (ret)
3009		goto out;
3010
3011	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
3012		      info, info_length, private_data, private_data_len);
3013	ret = ib_post_send_mad(msg, NULL);
3014	if (ret) {
3015		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3016		cm_free_msg(msg);
3017		return ret;
3018	}
3019
3020	cm_id->lap_state = IB_CM_LAP_IDLE;
3021out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3022	return ret;
3023}
3024EXPORT_SYMBOL(ib_send_cm_apr);
3025
3026static int cm_apr_handler(struct cm_work *work)
3027{
3028	struct cm_id_private *cm_id_priv;
3029	struct cm_apr_msg *apr_msg;
3030	int ret;
3031
3032	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3033	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
3034				   apr_msg->local_comm_id);
3035	if (!cm_id_priv)
3036		return -EINVAL; /* Unmatched reply. */
3037
3038	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
3039	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
3040	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
3041	work->cm_event.private_data = &apr_msg->private_data;
3042
3043	spin_lock_irq(&cm_id_priv->lock);
3044	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3045	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3046	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3047		spin_unlock_irq(&cm_id_priv->lock);
3048		goto out;
3049	}
3050	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3051	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3052	cm_id_priv->msg = NULL;
3053
3054	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3055	if (!ret)
3056		list_add_tail(&work->list, &cm_id_priv->work_list);
3057	spin_unlock_irq(&cm_id_priv->lock);
3058
3059	if (ret)
3060		cm_process_work(cm_id_priv, work);
3061	else
3062		cm_deref_id(cm_id_priv);
3063	return 0;
3064out:
3065	cm_deref_id(cm_id_priv);
3066	return -EINVAL;
3067}
3068
3069static int cm_timewait_handler(struct cm_work *work)
3070{
3071	struct cm_timewait_info *timewait_info;
3072	struct cm_id_private *cm_id_priv;
3073	int ret;
3074
3075	timewait_info = (struct cm_timewait_info *)work;
3076	spin_lock_irq(&cm.lock);
3077	list_del(&timewait_info->list);
3078	spin_unlock_irq(&cm.lock);
3079
3080	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3081				   timewait_info->work.remote_id);
3082	if (!cm_id_priv)
3083		return -EINVAL;
3084
3085	spin_lock_irq(&cm_id_priv->lock);
3086	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3087	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3088		spin_unlock_irq(&cm_id_priv->lock);
3089		goto out;
3090	}
3091	cm_id_priv->id.state = IB_CM_IDLE;
3092	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3093	if (!ret)
3094		list_add_tail(&work->list, &cm_id_priv->work_list);
3095	spin_unlock_irq(&cm_id_priv->lock);
3096
3097	if (ret)
3098		cm_process_work(cm_id_priv, work);
3099	else
3100		cm_deref_id(cm_id_priv);
3101	return 0;
3102out:
3103	cm_deref_id(cm_id_priv);
3104	return -EINVAL;
3105}
3106
3107static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3108			       struct cm_id_private *cm_id_priv,
3109			       struct ib_cm_sidr_req_param *param)
3110{
3111	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3112			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
3113	sidr_req_msg->request_id = cm_id_priv->id.local_id;
3114	sidr_req_msg->pkey = param->path->pkey;
3115	sidr_req_msg->service_id = param->service_id;
3116
3117	if (param->private_data && param->private_data_len)
3118		memcpy(sidr_req_msg->private_data, param->private_data,
3119		       param->private_data_len);
3120}
3121
3122int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3123			struct ib_cm_sidr_req_param *param)
3124{
3125	struct cm_id_private *cm_id_priv;
3126	struct ib_mad_send_buf *msg;
3127	unsigned long flags;
3128	int ret;
3129
3130	if (!param->path || (param->private_data &&
3131	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3132		return -EINVAL;
3133
3134	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3135	ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
3136	if (ret)
3137		goto out;
3138
3139	cm_id->service_id = param->service_id;
3140	cm_id->service_mask = ~cpu_to_be64(0);
3141	cm_id_priv->timeout_ms = param->timeout_ms;
3142	cm_id_priv->max_cm_retries = param->max_cm_retries;
3143	ret = cm_alloc_msg(cm_id_priv, &msg);
3144	if (ret)
3145		goto out;
3146
3147	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3148			   param);
3149	msg->timeout_ms = cm_id_priv->timeout_ms;
3150	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3151
3152	spin_lock_irqsave(&cm_id_priv->lock, flags);
3153	if (cm_id->state == IB_CM_IDLE)
3154		ret = ib_post_send_mad(msg, NULL);
3155	else
3156		ret = -EINVAL;
3157
3158	if (ret) {
3159		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3160		cm_free_msg(msg);
3161		goto out;
3162	}
3163	cm_id->state = IB_CM_SIDR_REQ_SENT;
3164	cm_id_priv->msg = msg;
3165	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3166out:
3167	return ret;
3168}
3169EXPORT_SYMBOL(ib_send_cm_sidr_req);
3170
3171static void cm_format_sidr_req_event(struct cm_work *work,
3172				     const struct cm_id_private *rx_cm_id,
3173				     struct ib_cm_id *listen_id)
3174{
3175	struct cm_sidr_req_msg *sidr_req_msg;
3176	struct ib_cm_sidr_req_event_param *param;
3177
3178	sidr_req_msg = (struct cm_sidr_req_msg *)
3179				work->mad_recv_wc->recv_buf.mad;
3180	param = &work->cm_event.param.sidr_req_rcvd;
3181	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3182	param->listen_id = listen_id;
3183	param->service_id = sidr_req_msg->service_id;
3184	param->bth_pkey = cm_get_bth_pkey(work);
3185	param->port = work->port->port_num;
3186	param->sgid_index = rx_cm_id->av.ah_attr.grh.sgid_index;
3187	work->cm_event.private_data = &sidr_req_msg->private_data;
3188}
3189
3190static int cm_sidr_req_handler(struct cm_work *work)
3191{
3192	struct ib_cm_id *cm_id;
3193	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3194	struct cm_sidr_req_msg *sidr_req_msg;
3195	struct ib_wc *wc;
3196	int ret;
3197
3198	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3199	if (IS_ERR(cm_id))
3200		return PTR_ERR(cm_id);
3201	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3202
3203	/* Record SGID/SLID and request ID for lookup. */
3204	sidr_req_msg = (struct cm_sidr_req_msg *)
3205				work->mad_recv_wc->recv_buf.mad;
3206	wc = work->mad_recv_wc->wc;
3207	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3208	cm_id_priv->av.dgid.global.interface_id = 0;
3209	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3210				      work->mad_recv_wc->recv_buf.grh,
3211				      &cm_id_priv->av);
3212	if (ret)
3213		goto out;
3214	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3215	cm_id_priv->tid = sidr_req_msg->hdr.tid;
3216	atomic_inc(&cm_id_priv->work_count);
3217
3218	spin_lock_irq(&cm.lock);
3219	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3220	if (cur_cm_id_priv) {
3221		spin_unlock_irq(&cm.lock);
3222		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3223				counter[CM_SIDR_REQ_COUNTER]);
3224		goto out; /* Duplicate message. */
3225	}
3226	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3227	cur_cm_id_priv = cm_find_listen(cm_id->device,
3228					sidr_req_msg->service_id);
3229	if (!cur_cm_id_priv) {
3230		spin_unlock_irq(&cm.lock);
3231		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3232		goto out; /* No match. */
3233	}
3234	atomic_inc(&cur_cm_id_priv->refcount);
3235	atomic_inc(&cm_id_priv->refcount);
3236	spin_unlock_irq(&cm.lock);
3237
3238	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3239	cm_id_priv->id.context = cur_cm_id_priv->id.context;
3240	cm_id_priv->id.service_id = sidr_req_msg->service_id;
3241	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3242
3243	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3244	cm_process_work(cm_id_priv, work);
3245	cm_deref_id(cur_cm_id_priv);
3246	return 0;
3247out:
3248	ib_destroy_cm_id(&cm_id_priv->id);
3249	return -EINVAL;
3250}
3251
3252static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3253			       struct cm_id_private *cm_id_priv,
3254			       struct ib_cm_sidr_rep_param *param)
3255{
3256	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3257			  cm_id_priv->tid);
3258	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3259	sidr_rep_msg->status = param->status;
3260	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3261	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3262	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3263
3264	if (param->info && param->info_length)
3265		memcpy(sidr_rep_msg->info, param->info, param->info_length);
3266
3267	if (param->private_data && param->private_data_len)
3268		memcpy(sidr_rep_msg->private_data, param->private_data,
3269		       param->private_data_len);
3270}
3271
3272int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3273			struct ib_cm_sidr_rep_param *param)
3274{
3275	struct cm_id_private *cm_id_priv;
3276	struct ib_mad_send_buf *msg;
3277	unsigned long flags;
3278	int ret;
3279
3280	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3281	    (param->private_data &&
3282	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3283		return -EINVAL;
3284
3285	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3286	spin_lock_irqsave(&cm_id_priv->lock, flags);
3287	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3288		ret = -EINVAL;
3289		goto error;
3290	}
3291
3292	ret = cm_alloc_msg(cm_id_priv, &msg);
3293	if (ret)
3294		goto error;
3295
3296	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3297			   param);
3298	ret = ib_post_send_mad(msg, NULL);
3299	if (ret) {
3300		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3301		cm_free_msg(msg);
3302		return ret;
3303	}
3304	cm_id->state = IB_CM_IDLE;
3305	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3306
3307	spin_lock_irqsave(&cm.lock, flags);
3308	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3309		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3310		RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3311	}
3312	spin_unlock_irqrestore(&cm.lock, flags);
3313	return 0;
3314
3315error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3316	return ret;
3317}
3318EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3319
3320static void cm_format_sidr_rep_event(struct cm_work *work)
3321{
3322	struct cm_sidr_rep_msg *sidr_rep_msg;
3323	struct ib_cm_sidr_rep_event_param *param;
3324
3325	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3326				work->mad_recv_wc->recv_buf.mad;
3327	param = &work->cm_event.param.sidr_rep_rcvd;
3328	param->status = sidr_rep_msg->status;
3329	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3330	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3331	param->info = &sidr_rep_msg->info;
3332	param->info_len = sidr_rep_msg->info_length;
3333	work->cm_event.private_data = &sidr_rep_msg->private_data;
3334}
3335
3336static int cm_sidr_rep_handler(struct cm_work *work)
3337{
3338	struct cm_sidr_rep_msg *sidr_rep_msg;
3339	struct cm_id_private *cm_id_priv;
3340
3341	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3342				work->mad_recv_wc->recv_buf.mad;
3343	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3344	if (!cm_id_priv)
3345		return -EINVAL; /* Unmatched reply. */
3346
3347	spin_lock_irq(&cm_id_priv->lock);
3348	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3349		spin_unlock_irq(&cm_id_priv->lock);
3350		goto out;
3351	}
3352	cm_id_priv->id.state = IB_CM_IDLE;
3353	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3354	spin_unlock_irq(&cm_id_priv->lock);
3355
3356	cm_format_sidr_rep_event(work);
3357	cm_process_work(cm_id_priv, work);
3358	return 0;
3359out:
3360	cm_deref_id(cm_id_priv);
3361	return -EINVAL;
3362}
3363
3364static void cm_process_send_error(struct ib_mad_send_buf *msg,
3365				  enum ib_wc_status wc_status)
3366{
3367	struct cm_id_private *cm_id_priv;
3368	struct ib_cm_event cm_event;
3369	enum ib_cm_state state;
3370	int ret;
3371
3372	memset(&cm_event, 0, sizeof cm_event);
3373	cm_id_priv = msg->context[0];
3374
3375	/* Discard old sends or ones without a response. */
3376	spin_lock_irq(&cm_id_priv->lock);
3377	state = (enum ib_cm_state) (unsigned long) msg->context[1];
3378	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3379		goto discard;
3380
3381	switch (state) {
3382	case IB_CM_REQ_SENT:
3383	case IB_CM_MRA_REQ_RCVD:
3384		cm_reset_to_idle(cm_id_priv);
3385		cm_event.event = IB_CM_REQ_ERROR;
3386		break;
3387	case IB_CM_REP_SENT:
3388	case IB_CM_MRA_REP_RCVD:
3389		cm_reset_to_idle(cm_id_priv);
3390		cm_event.event = IB_CM_REP_ERROR;
3391		break;
3392	case IB_CM_DREQ_SENT:
3393		cm_enter_timewait(cm_id_priv);
3394		cm_event.event = IB_CM_DREQ_ERROR;
3395		break;
3396	case IB_CM_SIDR_REQ_SENT:
3397		cm_id_priv->id.state = IB_CM_IDLE;
3398		cm_event.event = IB_CM_SIDR_REQ_ERROR;
3399		break;
3400	default:
3401		goto discard;
3402	}
3403	spin_unlock_irq(&cm_id_priv->lock);
3404	cm_event.param.send_status = wc_status;
3405
3406	/* No other events can occur on the cm_id at this point. */
3407	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3408	cm_free_msg(msg);
3409	if (ret)
3410		ib_destroy_cm_id(&cm_id_priv->id);
3411	return;
3412discard:
3413	spin_unlock_irq(&cm_id_priv->lock);
3414	cm_free_msg(msg);
3415}
3416
3417static void cm_send_handler(struct ib_mad_agent *mad_agent,
3418			    struct ib_mad_send_wc *mad_send_wc)
3419{
3420	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3421	struct cm_port *port;
3422	u16 attr_index;
3423
3424	port = mad_agent->context;
3425	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3426				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3427
3428	/*
3429	 * If the send was in response to a received message (context[0] is not
3430	 * set to a cm_id), and is not a REJ, then it is a send that was
3431	 * manually retried.
3432	 */
3433	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3434		msg->retries = 1;
3435
3436	atomic_long_add(1 + msg->retries,
3437			&port->counter_group[CM_XMIT].counter[attr_index]);
3438	if (msg->retries)
3439		atomic_long_add(msg->retries,
3440				&port->counter_group[CM_XMIT_RETRIES].
3441				counter[attr_index]);
3442
3443	switch (mad_send_wc->status) {
3444	case IB_WC_SUCCESS:
3445	case IB_WC_WR_FLUSH_ERR:
3446		cm_free_msg(msg);
3447		break;
3448	default:
3449		if (msg->context[0] && msg->context[1])
3450			cm_process_send_error(msg, mad_send_wc->status);
3451		else
3452			cm_free_msg(msg);
3453		break;
3454	}
3455}
3456
3457static void cm_work_handler(struct work_struct *_work)
3458{
3459	struct cm_work *work = container_of(_work, struct cm_work, work.work);
3460	int ret;
3461
3462	switch (work->cm_event.event) {
3463	case IB_CM_REQ_RECEIVED:
3464		ret = cm_req_handler(work);
3465		break;
3466	case IB_CM_MRA_RECEIVED:
3467		ret = cm_mra_handler(work);
3468		break;
3469	case IB_CM_REJ_RECEIVED:
3470		ret = cm_rej_handler(work);
3471		break;
3472	case IB_CM_REP_RECEIVED:
3473		ret = cm_rep_handler(work);
3474		break;
3475	case IB_CM_RTU_RECEIVED:
3476		ret = cm_rtu_handler(work);
3477		break;
3478	case IB_CM_USER_ESTABLISHED:
3479		ret = cm_establish_handler(work);
3480		break;
3481	case IB_CM_DREQ_RECEIVED:
3482		ret = cm_dreq_handler(work);
3483		break;
3484	case IB_CM_DREP_RECEIVED:
3485		ret = cm_drep_handler(work);
3486		break;
3487	case IB_CM_SIDR_REQ_RECEIVED:
3488		ret = cm_sidr_req_handler(work);
3489		break;
3490	case IB_CM_SIDR_REP_RECEIVED:
3491		ret = cm_sidr_rep_handler(work);
3492		break;
3493	case IB_CM_LAP_RECEIVED:
3494		ret = cm_lap_handler(work);
3495		break;
3496	case IB_CM_APR_RECEIVED:
3497		ret = cm_apr_handler(work);
3498		break;
3499	case IB_CM_TIMEWAIT_EXIT:
3500		ret = cm_timewait_handler(work);
3501		break;
3502	default:
3503		ret = -EINVAL;
3504		break;
3505	}
3506	if (ret)
3507		cm_free_work(work);
3508}
3509
3510static int cm_establish(struct ib_cm_id *cm_id)
3511{
3512	struct cm_id_private *cm_id_priv;
3513	struct cm_work *work;
3514	unsigned long flags;
3515	int ret = 0;
3516	struct cm_device *cm_dev;
3517
3518	cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3519	if (!cm_dev)
3520		return -ENODEV;
3521
3522	work = kmalloc(sizeof *work, GFP_ATOMIC);
3523	if (!work)
3524		return -ENOMEM;
3525
3526	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3527	spin_lock_irqsave(&cm_id_priv->lock, flags);
3528	switch (cm_id->state)
3529	{
3530	case IB_CM_REP_SENT:
3531	case IB_CM_MRA_REP_RCVD:
3532		cm_id->state = IB_CM_ESTABLISHED;
3533		break;
3534	case IB_CM_ESTABLISHED:
3535		ret = -EISCONN;
3536		break;
3537	default:
3538		ret = -EINVAL;
3539		break;
3540	}
3541	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3542
3543	if (ret) {
3544		kfree(work);
3545		goto out;
3546	}
3547
3548	/*
3549	 * The CM worker thread may try to destroy the cm_id before it
3550	 * can execute this work item.  To prevent potential deadlock,
3551	 * we need to find the cm_id once we're in the context of the
3552	 * worker thread, rather than holding a reference on it.
3553	 */
3554	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3555	work->local_id = cm_id->local_id;
3556	work->remote_id = cm_id->remote_id;
3557	work->mad_recv_wc = NULL;
3558	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3559
3560	/* Check if the device started its remove_one */
3561	spin_lock_irqsave(&cm.lock, flags);
3562	if (!cm_dev->going_down) {
3563		queue_delayed_work(cm.wq, &work->work, 0);
3564	} else {
3565		kfree(work);
3566		ret = -ENODEV;
3567	}
3568	spin_unlock_irqrestore(&cm.lock, flags);
3569
3570out:
3571	return ret;
3572}
3573
3574static int cm_migrate(struct ib_cm_id *cm_id)
3575{
3576	struct cm_id_private *cm_id_priv;
3577	struct cm_av tmp_av;
3578	unsigned long flags;
3579	int tmp_send_port_not_ready;
3580	int ret = 0;
3581
3582	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3583	spin_lock_irqsave(&cm_id_priv->lock, flags);
3584	if (cm_id->state == IB_CM_ESTABLISHED &&
3585	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3586	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
3587		cm_id->lap_state = IB_CM_LAP_IDLE;
3588		/* Swap address vector */
3589		tmp_av = cm_id_priv->av;
3590		cm_id_priv->av = cm_id_priv->alt_av;
3591		cm_id_priv->alt_av = tmp_av;
3592		/* Swap port send ready state */
3593		tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3594		cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3595		cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3596	} else
3597		ret = -EINVAL;
3598	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3599
3600	return ret;
3601}
3602
3603int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3604{
3605	int ret;
3606
3607	switch (event) {
3608	case IB_EVENT_COMM_EST:
3609		ret = cm_establish(cm_id);
3610		break;
3611	case IB_EVENT_PATH_MIG:
3612		ret = cm_migrate(cm_id);
3613		break;
3614	default:
3615		ret = -EINVAL;
3616	}
3617	return ret;
3618}
3619EXPORT_SYMBOL(ib_cm_notify);
3620
3621static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3622			    struct ib_mad_send_buf *send_buf,
3623			    struct ib_mad_recv_wc *mad_recv_wc)
3624{
3625	struct cm_port *port = mad_agent->context;
3626	struct cm_work *work;
3627	enum ib_cm_event_type event;
3628	u16 attr_id;
3629	int paths = 0;
3630	int going_down = 0;
3631
3632	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3633	case CM_REQ_ATTR_ID:
3634		paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3635						    alt_local_lid != 0);
3636		event = IB_CM_REQ_RECEIVED;
3637		break;
3638	case CM_MRA_ATTR_ID:
3639		event = IB_CM_MRA_RECEIVED;
3640		break;
3641	case CM_REJ_ATTR_ID:
3642		event = IB_CM_REJ_RECEIVED;
3643		break;
3644	case CM_REP_ATTR_ID:
3645		event = IB_CM_REP_RECEIVED;
3646		break;
3647	case CM_RTU_ATTR_ID:
3648		event = IB_CM_RTU_RECEIVED;
3649		break;
3650	case CM_DREQ_ATTR_ID:
3651		event = IB_CM_DREQ_RECEIVED;
3652		break;
3653	case CM_DREP_ATTR_ID:
3654		event = IB_CM_DREP_RECEIVED;
3655		break;
3656	case CM_SIDR_REQ_ATTR_ID:
3657		event = IB_CM_SIDR_REQ_RECEIVED;
3658		break;
3659	case CM_SIDR_REP_ATTR_ID:
3660		event = IB_CM_SIDR_REP_RECEIVED;
3661		break;
3662	case CM_LAP_ATTR_ID:
3663		paths = 1;
3664		event = IB_CM_LAP_RECEIVED;
3665		break;
3666	case CM_APR_ATTR_ID:
3667		event = IB_CM_APR_RECEIVED;
3668		break;
3669	default:
3670		ib_free_recv_mad(mad_recv_wc);
3671		return;
3672	}
3673
3674	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3675	atomic_long_inc(&port->counter_group[CM_RECV].
3676			counter[attr_id - CM_ATTR_ID_OFFSET]);
3677
3678	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3679		       GFP_KERNEL);
3680	if (!work) {
3681		ib_free_recv_mad(mad_recv_wc);
3682		return;
3683	}
3684
3685	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3686	work->cm_event.event = event;
3687	work->mad_recv_wc = mad_recv_wc;
3688	work->port = port;
3689
3690	/* Check if the device started its remove_one */
3691	spin_lock_irq(&cm.lock);
3692	if (!port->cm_dev->going_down)
3693		queue_delayed_work(cm.wq, &work->work, 0);
3694	else
3695		going_down = 1;
3696	spin_unlock_irq(&cm.lock);
3697
3698	if (going_down) {
3699		kfree(work);
3700		ib_free_recv_mad(mad_recv_wc);
3701	}
3702}
3703
3704static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3705				struct ib_qp_attr *qp_attr,
3706				int *qp_attr_mask)
3707{
3708	unsigned long flags;
3709	int ret;
3710
3711	spin_lock_irqsave(&cm_id_priv->lock, flags);
3712	switch (cm_id_priv->id.state) {
3713	case IB_CM_REQ_SENT:
3714	case IB_CM_MRA_REQ_RCVD:
3715	case IB_CM_REQ_RCVD:
3716	case IB_CM_MRA_REQ_SENT:
3717	case IB_CM_REP_RCVD:
3718	case IB_CM_MRA_REP_SENT:
3719	case IB_CM_REP_SENT:
3720	case IB_CM_MRA_REP_RCVD:
3721	case IB_CM_ESTABLISHED:
3722		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3723				IB_QP_PKEY_INDEX | IB_QP_PORT;
3724		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3725		if (cm_id_priv->responder_resources)
3726			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3727						    IB_ACCESS_REMOTE_ATOMIC;
3728		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3729		qp_attr->port_num = cm_id_priv->av.port->port_num;
3730		ret = 0;
3731		break;
3732	default:
3733		ret = -EINVAL;
3734		break;
3735	}
3736	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3737	return ret;
3738}
3739
3740static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3741			       struct ib_qp_attr *qp_attr,
3742			       int *qp_attr_mask)
3743{
3744	unsigned long flags;
3745	int ret;
3746
3747	spin_lock_irqsave(&cm_id_priv->lock, flags);
3748	switch (cm_id_priv->id.state) {
3749	case IB_CM_REQ_RCVD:
3750	case IB_CM_MRA_REQ_SENT:
3751	case IB_CM_REP_RCVD:
3752	case IB_CM_MRA_REP_SENT:
3753	case IB_CM_REP_SENT:
3754	case IB_CM_MRA_REP_RCVD:
3755	case IB_CM_ESTABLISHED:
3756		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3757				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3758		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3759		qp_attr->path_mtu = cm_id_priv->path_mtu;
3760		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3761		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3762		if (cm_id_priv->qp_type == IB_QPT_RC ||
3763		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3764			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3765					 IB_QP_MIN_RNR_TIMER;
3766			qp_attr->max_dest_rd_atomic =
3767					cm_id_priv->responder_resources;
3768			qp_attr->min_rnr_timer = 0;
3769		}
3770		if (cm_id_priv->alt_av.ah_attr.dlid) {
3771			*qp_attr_mask |= IB_QP_ALT_PATH;
3772			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3773			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3774			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3775			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3776		}
3777		ret = 0;
3778		break;
3779	default:
3780		ret = -EINVAL;
3781		break;
3782	}
3783	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3784	return ret;
3785}
3786
3787static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3788			       struct ib_qp_attr *qp_attr,
3789			       int *qp_attr_mask)
3790{
3791	unsigned long flags;
3792	int ret;
3793
3794	spin_lock_irqsave(&cm_id_priv->lock, flags);
3795	switch (cm_id_priv->id.state) {
3796	/* Allow transition to RTS before sending REP */
3797	case IB_CM_REQ_RCVD:
3798	case IB_CM_MRA_REQ_SENT:
3799
3800	case IB_CM_REP_RCVD:
3801	case IB_CM_MRA_REP_SENT:
3802	case IB_CM_REP_SENT:
3803	case IB_CM_MRA_REP_RCVD:
3804	case IB_CM_ESTABLISHED:
3805		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3806			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3807			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3808			switch (cm_id_priv->qp_type) {
3809			case IB_QPT_RC:
3810			case IB_QPT_XRC_INI:
3811				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3812						 IB_QP_MAX_QP_RD_ATOMIC;
3813				qp_attr->retry_cnt = cm_id_priv->retry_count;
3814				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3815				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3816				/* fall through */
3817			case IB_QPT_XRC_TGT:
3818				*qp_attr_mask |= IB_QP_TIMEOUT;
3819				qp_attr->timeout = cm_id_priv->av.timeout;
3820				break;
3821			default:
3822				break;
3823			}
3824			if (cm_id_priv->alt_av.ah_attr.dlid) {
3825				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3826				qp_attr->path_mig_state = IB_MIG_REARM;
3827			}
3828		} else {
3829			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3830			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3831			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3832			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3833			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3834			qp_attr->path_mig_state = IB_MIG_REARM;
3835		}
3836		ret = 0;
3837		break;
3838	default:
3839		ret = -EINVAL;
3840		break;
3841	}
3842	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3843	return ret;
3844}
3845
3846int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3847		       struct ib_qp_attr *qp_attr,
3848		       int *qp_attr_mask)
3849{
3850	struct cm_id_private *cm_id_priv;
3851	int ret;
3852
3853	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3854	switch (qp_attr->qp_state) {
3855	case IB_QPS_INIT:
3856		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3857		break;
3858	case IB_QPS_RTR:
3859		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3860		break;
3861	case IB_QPS_RTS:
3862		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3863		break;
3864	default:
3865		ret = -EINVAL;
3866		break;
3867	}
3868	return ret;
3869}
3870EXPORT_SYMBOL(ib_cm_init_qp_attr);
3871
3872static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3873			       char *buf)
3874{
3875	struct cm_counter_group *group;
3876	struct cm_counter_attribute *cm_attr;
3877
3878	group = container_of(obj, struct cm_counter_group, obj);
3879	cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3880
3881	return sprintf(buf, "%ld\n",
3882		       atomic_long_read(&group->counter[cm_attr->index]));
3883}
3884
3885static const struct sysfs_ops cm_counter_ops = {
3886	.show = cm_show_counter
3887};
3888
3889static struct kobj_type cm_counter_obj_type = {
3890	.sysfs_ops = &cm_counter_ops,
3891	.default_attrs = cm_counter_default_attrs
3892};
3893
3894static void cm_release_port_obj(struct kobject *obj)
3895{
3896	struct cm_port *cm_port;
3897
3898	cm_port = container_of(obj, struct cm_port, port_obj);
3899	kfree(cm_port);
3900}
3901
3902static struct kobj_type cm_port_obj_type = {
3903	.release = cm_release_port_obj
3904};
3905
3906static char *cm_devnode(struct device *dev, umode_t *mode)
3907{
3908	if (mode)
3909		*mode = 0666;
3910	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3911}
3912
3913struct class cm_class = {
3914	.owner   = THIS_MODULE,
3915	.name    = "infiniband_cm",
3916	.devnode = cm_devnode,
3917};
3918EXPORT_SYMBOL(cm_class);
3919
3920static int cm_create_port_fs(struct cm_port *port)
3921{
3922	int i, ret;
3923
3924	ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3925				   &port->cm_dev->device->kobj,
3926				   "%d", port->port_num);
3927	if (ret) {
3928		kfree(port);
3929		return ret;
3930	}
3931
3932	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3933		ret = kobject_init_and_add(&port->counter_group[i].obj,
3934					   &cm_counter_obj_type,
3935					   &port->port_obj,
3936					   "%s", counter_group_names[i]);
3937		if (ret)
3938			goto error;
3939	}
3940
3941	return 0;
3942
3943error:
3944	while (i--)
3945		kobject_put(&port->counter_group[i].obj);
3946	kobject_put(&port->port_obj);
3947	return ret;
3948
3949}
3950
3951static void cm_remove_port_fs(struct cm_port *port)
3952{
3953	int i;
3954
3955	for (i = 0; i < CM_COUNTER_GROUPS; i++)
3956		kobject_put(&port->counter_group[i].obj);
3957
3958	kobject_put(&port->port_obj);
3959}
3960
3961static void cm_add_one(struct ib_device *ib_device)
3962{
3963	struct cm_device *cm_dev;
3964	struct cm_port *port;
3965	struct ib_mad_reg_req reg_req = {
3966		.mgmt_class = IB_MGMT_CLASS_CM,
3967		.mgmt_class_version = IB_CM_CLASS_VERSION,
3968	};
3969	struct ib_port_modify port_modify = {
3970		.set_port_cap_mask = IB_PORT_CM_SUP
3971	};
3972	unsigned long flags;
3973	int ret;
3974	int count = 0;
3975	u8 i;
3976
3977	cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3978			 ib_device->phys_port_cnt, GFP_KERNEL);
3979	if (!cm_dev)
3980		return;
3981
3982	cm_dev->ib_device = ib_device;
3983	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
3984	cm_dev->going_down = 0;
3985	cm_dev->device = device_create(&cm_class, &ib_device->dev,
3986				       MKDEV(0, 0), NULL,
3987				       "%s", ib_device->name);
3988	if (IS_ERR(cm_dev->device)) {
3989		kfree(cm_dev);
3990		return;
3991	}
3992
3993	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3994	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3995		if (!rdma_cap_ib_cm(ib_device, i))
3996			continue;
3997
3998		port = kzalloc(sizeof *port, GFP_KERNEL);
3999		if (!port)
4000			goto error1;
4001
4002		cm_dev->port[i-1] = port;
4003		port->cm_dev = cm_dev;
4004		port->port_num = i;
4005
4006		INIT_LIST_HEAD(&port->cm_priv_prim_list);
4007		INIT_LIST_HEAD(&port->cm_priv_altr_list);
4008
4009		ret = cm_create_port_fs(port);
4010		if (ret)
4011			goto error1;
4012
4013		port->mad_agent = ib_register_mad_agent(ib_device, i,
4014							IB_QPT_GSI,
4015							&reg_req,
4016							0,
4017							cm_send_handler,
4018							cm_recv_handler,
4019							port,
4020							0);
4021		if (IS_ERR(port->mad_agent))
4022			goto error2;
4023
4024		ret = ib_modify_port(ib_device, i, 0, &port_modify);
4025		if (ret)
4026			goto error3;
4027
4028		count++;
4029	}
4030
4031	if (!count)
4032		goto free;
4033
4034	ib_set_client_data(ib_device, &cm_client, cm_dev);
4035
4036	write_lock_irqsave(&cm.device_lock, flags);
4037	list_add_tail(&cm_dev->list, &cm.device_list);
4038	write_unlock_irqrestore(&cm.device_lock, flags);
4039	return;
4040
4041error3:
4042	ib_unregister_mad_agent(port->mad_agent);
4043error2:
4044	cm_remove_port_fs(port);
4045error1:
4046	port_modify.set_port_cap_mask = 0;
4047	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4048	while (--i) {
4049		if (!rdma_cap_ib_cm(ib_device, i))
4050			continue;
4051
4052		port = cm_dev->port[i-1];
4053		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4054		ib_unregister_mad_agent(port->mad_agent);
4055		cm_remove_port_fs(port);
4056	}
4057free:
4058	device_unregister(cm_dev->device);
4059	kfree(cm_dev);
4060}
4061
4062static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4063{
4064	struct cm_device *cm_dev = client_data;
4065	struct cm_port *port;
4066	struct cm_id_private *cm_id_priv;
4067	struct ib_mad_agent *cur_mad_agent;
4068	struct ib_port_modify port_modify = {
4069		.clr_port_cap_mask = IB_PORT_CM_SUP
4070	};
4071	unsigned long flags;
4072	int i;
4073
4074	if (!cm_dev)
4075		return;
4076
4077	write_lock_irqsave(&cm.device_lock, flags);
4078	list_del(&cm_dev->list);
4079	write_unlock_irqrestore(&cm.device_lock, flags);
4080
4081	spin_lock_irq(&cm.lock);
4082	cm_dev->going_down = 1;
4083	spin_unlock_irq(&cm.lock);
4084
4085	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4086		if (!rdma_cap_ib_cm(ib_device, i))
4087			continue;
4088
4089		port = cm_dev->port[i-1];
4090		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4091		/* Mark all the cm_id's as not valid */
4092		spin_lock_irq(&cm.lock);
4093		list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4094			cm_id_priv->altr_send_port_not_ready = 1;
4095		list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4096			cm_id_priv->prim_send_port_not_ready = 1;
4097		spin_unlock_irq(&cm.lock);
4098		/*
4099		 * We flush the queue here after the going_down set, this
4100		 * verify that no new works will be queued in the recv handler,
4101		 * after that we can call the unregister_mad_agent
4102		 */
4103		flush_workqueue(cm.wq);
4104		spin_lock_irq(&cm.state_lock);
4105		cur_mad_agent = port->mad_agent;
4106		port->mad_agent = NULL;
4107		spin_unlock_irq(&cm.state_lock);
4108		ib_unregister_mad_agent(cur_mad_agent);
4109		cm_remove_port_fs(port);
4110	}
4111
4112	device_unregister(cm_dev->device);
4113	kfree(cm_dev);
4114}
4115
4116static int __init ib_cm_init(void)
4117{
4118	int ret;
4119
4120	memset(&cm, 0, sizeof cm);
4121	INIT_LIST_HEAD(&cm.device_list);
4122	rwlock_init(&cm.device_lock);
4123	spin_lock_init(&cm.lock);
4124	spin_lock_init(&cm.state_lock);
4125	cm.listen_service_table = RB_ROOT;
4126	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4127	cm.remote_id_table = RB_ROOT;
4128	cm.remote_qp_table = RB_ROOT;
4129	cm.remote_sidr_table = RB_ROOT;
4130	idr_init(&cm.local_id_table);
4131	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4132	INIT_LIST_HEAD(&cm.timewait_list);
4133
4134	ret = class_register(&cm_class);
4135	if (ret) {
4136		ret = -ENOMEM;
4137		goto error1;
4138	}
4139
4140	cm.wq = create_workqueue("ib_cm");
4141	if (!cm.wq) {
4142		ret = -ENOMEM;
4143		goto error2;
4144	}
4145
4146	ret = ib_register_client(&cm_client);
4147	if (ret)
4148		goto error3;
4149
4150	return 0;
4151error3:
4152	destroy_workqueue(cm.wq);
4153error2:
4154	class_unregister(&cm_class);
4155error1:
4156	idr_destroy(&cm.local_id_table);
4157	return ret;
4158}
4159
4160static void __exit ib_cm_cleanup(void)
4161{
4162	struct cm_timewait_info *timewait_info, *tmp;
4163
4164	spin_lock_irq(&cm.lock);
4165	list_for_each_entry(timewait_info, &cm.timewait_list, list)
4166		cancel_delayed_work(&timewait_info->work.work);
4167	spin_unlock_irq(&cm.lock);
4168
4169	ib_unregister_client(&cm_client);
4170	destroy_workqueue(cm.wq);
4171
4172	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4173		cancel_delayed_work_sync(&timewait_info->work.work);
4174		list_del(&timewait_info->list);
4175		kfree(timewait_info);
4176	}
4177
4178	class_unregister(&cm_class);
4179	idr_destroy(&cm.local_id_table);
4180}
4181
4182module_init_order(ib_cm_init, SI_ORDER_SECOND);
4183module_exit_order(ib_cm_cleanup, SI_ORDER_SECOND);
4184
4185