ib_iwcm.c revision 331769
1/*
2 * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses.  You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 *     Redistribution and use in source and binary forms, with or
16 *     without modification, are permitted provided that the following
17 *     conditions are met:
18 *
19 *      - Redistributions of source code must retain the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer.
22 *
23 *      - Redistributions in binary form must reproduce the above
24 *        copyright notice, this list of conditions and the following
25 *        disclaimer in the documentation and/or other materials
26 *        provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 *
37 */
38#include <linux/dma-mapping.h>
39#include <linux/err.h>
40#include <linux/idr.h>
41#include <linux/interrupt.h>
42#include <linux/rbtree.h>
43#include <linux/sched.h>
44#include <linux/spinlock.h>
45#include <linux/workqueue.h>
46#include <linux/completion.h>
47#include <linux/slab.h>
48#include <linux/module.h>
49#include <linux/wait.h>
50
51#include <rdma/iw_cm.h>
52#include <rdma/ib_addr.h>
53#include <rdma/iw_portmap.h>
54
55#include "iwcm.h"
56
57MODULE_AUTHOR("Tom Tucker");
58MODULE_DESCRIPTION("iWARP CM");
59MODULE_LICENSE("Dual BSD/GPL");
60
61static struct workqueue_struct *iwcm_wq;
62struct iwcm_work {
63	struct work_struct work;
64	struct iwcm_id_private *cm_id;
65	struct list_head list;
66	struct iw_cm_event event;
67	struct list_head free_list;
68};
69
70static unsigned int default_backlog = 256;
71
72/*
73 * The following services provide a mechanism for pre-allocating iwcm_work
74 * elements.  The design pre-allocates them  based on the cm_id type:
75 *	LISTENING IDS: 	Get enough elements preallocated to handle the
76 *			listen backlog.
77 *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
78 *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
79 *
80 * Allocating them in connect and listen avoids having to deal
81 * with allocation failures on the event upcall from the provider (which
82 * is called in the interrupt context).
83 *
84 * One exception is when creating the cm_id for incoming connection requests.
85 * There are two cases:
86 * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
87 *    the backlog is exceeded, then no more connection request events will
88 *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
89 *    to the provider to reject the connection request.
90 * 2) in the connection request workqueue handler, cm_conn_req_handler().
91 *    If work elements cannot be allocated for the new connect request cm_id,
92 *    then IWCM will call the provider reject method.  This is ok since
93 *    cm_conn_req_handler() runs in the workqueue thread context.
94 */
95
96static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
97{
98	struct iwcm_work *work;
99
100	if (list_empty(&cm_id_priv->work_free_list))
101		return NULL;
102	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
103			  free_list);
104	list_del_init(&work->free_list);
105	return work;
106}
107
108static void put_work(struct iwcm_work *work)
109{
110	list_add(&work->free_list, &work->cm_id->work_free_list);
111}
112
113static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
114{
115	struct list_head *e, *tmp;
116
117	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
118		kfree(list_entry(e, struct iwcm_work, free_list));
119}
120
121static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
122{
123	struct iwcm_work *work;
124
125	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
126	while (count--) {
127		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
128		if (!work) {
129			dealloc_work_entries(cm_id_priv);
130			return -ENOMEM;
131		}
132		work->cm_id = cm_id_priv;
133		INIT_LIST_HEAD(&work->list);
134		put_work(work);
135	}
136	return 0;
137}
138
139/*
140 * Save private data from incoming connection requests to
141 * iw_cm_event, so the low level driver doesn't have to. Adjust
142 * the event ptr to point to the local copy.
143 */
144static int copy_private_data(struct iw_cm_event *event)
145{
146	void *p;
147
148	p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
149	if (!p)
150		return -ENOMEM;
151	event->private_data = p;
152	return 0;
153}
154
155static void free_cm_id(struct iwcm_id_private *cm_id_priv)
156{
157	dealloc_work_entries(cm_id_priv);
158	kfree(cm_id_priv);
159}
160
161/*
162 * Release a reference on cm_id. If the last reference is being
163 * released, free the cm_id and return 1.
164 */
165static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
166{
167	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
168	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
169		BUG_ON(!list_empty(&cm_id_priv->work_list));
170		free_cm_id(cm_id_priv);
171		return 1;
172	}
173
174	return 0;
175}
176
177static void add_ref(struct iw_cm_id *cm_id)
178{
179	struct iwcm_id_private *cm_id_priv;
180	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
181	atomic_inc(&cm_id_priv->refcount);
182}
183
184static void rem_ref(struct iw_cm_id *cm_id)
185{
186	struct iwcm_id_private *cm_id_priv;
187
188	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
189
190	(void)iwcm_deref_id(cm_id_priv);
191}
192
193static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
194
195struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
196				 iw_cm_handler cm_handler,
197				 void *context)
198{
199	struct iwcm_id_private *cm_id_priv;
200
201	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
202	if (!cm_id_priv)
203		return ERR_PTR(-ENOMEM);
204
205	cm_id_priv->state = IW_CM_STATE_IDLE;
206	cm_id_priv->id.device = device;
207	cm_id_priv->id.cm_handler = cm_handler;
208	cm_id_priv->id.context = context;
209	cm_id_priv->id.event_handler = cm_event_handler;
210	cm_id_priv->id.add_ref = add_ref;
211	cm_id_priv->id.rem_ref = rem_ref;
212	spin_lock_init(&cm_id_priv->lock);
213	atomic_set(&cm_id_priv->refcount, 1);
214	init_waitqueue_head(&cm_id_priv->connect_wait);
215	init_completion(&cm_id_priv->destroy_comp);
216	INIT_LIST_HEAD(&cm_id_priv->work_list);
217	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
218
219	return &cm_id_priv->id;
220}
221EXPORT_SYMBOL(iw_create_cm_id);
222
223
224static int iwcm_modify_qp_err(struct ib_qp *qp)
225{
226	struct ib_qp_attr qp_attr;
227
228	if (!qp)
229		return -EINVAL;
230
231	qp_attr.qp_state = IB_QPS_ERR;
232	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
233}
234
235/*
236 * This is really the RDMAC CLOSING state. It is most similar to the
237 * IB SQD QP state.
238 */
239static int iwcm_modify_qp_sqd(struct ib_qp *qp)
240{
241	struct ib_qp_attr qp_attr;
242
243	BUG_ON(qp == NULL);
244	qp_attr.qp_state = IB_QPS_SQD;
245	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
246}
247
248/*
249 * CM_ID <-- CLOSING
250 *
251 * Block if a passive or active connection is currently being processed. Then
252 * process the event as follows:
253 * - If we are ESTABLISHED, move to CLOSING and modify the QP state
254 *   based on the abrupt flag
255 * - If the connection is already in the CLOSING or IDLE state, the peer is
256 *   disconnecting concurrently with us and we've already seen the
257 *   DISCONNECT event -- ignore the request and return 0
258 * - Disconnect on a listening endpoint returns -EINVAL
259 */
260int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
261{
262	struct iwcm_id_private *cm_id_priv;
263	unsigned long flags;
264	int ret = 0;
265	struct ib_qp *qp = NULL;
266
267	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
268	/* Wait if we're currently in a connect or accept downcall */
269	wait_event(cm_id_priv->connect_wait,
270		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
271
272	spin_lock_irqsave(&cm_id_priv->lock, flags);
273	switch (cm_id_priv->state) {
274	case IW_CM_STATE_ESTABLISHED:
275		cm_id_priv->state = IW_CM_STATE_CLOSING;
276
277		/* QP could be <nul> for user-mode client */
278		if (cm_id_priv->qp)
279			qp = cm_id_priv->qp;
280		else
281			ret = -EINVAL;
282		break;
283	case IW_CM_STATE_LISTEN:
284		ret = -EINVAL;
285		break;
286	case IW_CM_STATE_CLOSING:
287		/* remote peer closed first */
288	case IW_CM_STATE_IDLE:
289		/* accept or connect returned !0 */
290		break;
291	case IW_CM_STATE_CONN_RECV:
292		/*
293		 * App called disconnect before/without calling accept after
294		 * connect_request event delivered.
295		 */
296		break;
297	case IW_CM_STATE_CONN_SENT:
298		/* Can only get here if wait above fails */
299	default:
300		BUG();
301	}
302	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
303
304	if (qp) {
305		if (abrupt)
306			ret = iwcm_modify_qp_err(qp);
307		else
308			ret = iwcm_modify_qp_sqd(qp);
309
310		/*
311		 * If both sides are disconnecting the QP could
312		 * already be in ERR or SQD states
313		 */
314		ret = 0;
315	}
316
317	return ret;
318}
319EXPORT_SYMBOL(iw_cm_disconnect);
320
321/*
322 * CM_ID <-- DESTROYING
323 *
324 * Clean up all resources associated with the connection and release
325 * the initial reference taken by iw_create_cm_id.
326 */
327static void destroy_cm_id(struct iw_cm_id *cm_id)
328{
329	struct iwcm_id_private *cm_id_priv;
330	unsigned long flags;
331
332	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
333	/*
334	 * Wait if we're currently in a connect or accept downcall. A
335	 * listening endpoint should never block here.
336	 */
337	wait_event(cm_id_priv->connect_wait,
338		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
339
340	/*
341	 * Since we're deleting the cm_id, drop any events that
342	 * might arrive before the last dereference.
343	 */
344	set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
345
346	spin_lock_irqsave(&cm_id_priv->lock, flags);
347	switch (cm_id_priv->state) {
348	case IW_CM_STATE_LISTEN:
349		cm_id_priv->state = IW_CM_STATE_DESTROYING;
350		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
351		/* destroy the listening endpoint */
352		cm_id->device->iwcm->destroy_listen(cm_id);
353		spin_lock_irqsave(&cm_id_priv->lock, flags);
354		break;
355	case IW_CM_STATE_ESTABLISHED:
356		cm_id_priv->state = IW_CM_STATE_DESTROYING;
357		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
358		/* Abrupt close of the connection */
359		(void)iwcm_modify_qp_err(cm_id_priv->qp);
360		spin_lock_irqsave(&cm_id_priv->lock, flags);
361		break;
362	case IW_CM_STATE_IDLE:
363	case IW_CM_STATE_CLOSING:
364		cm_id_priv->state = IW_CM_STATE_DESTROYING;
365		break;
366	case IW_CM_STATE_CONN_RECV:
367		/*
368		 * App called destroy before/without calling accept after
369		 * receiving connection request event notification or
370		 * returned non zero from the event callback function.
371		 * In either case, must tell the provider to reject.
372		 */
373		cm_id_priv->state = IW_CM_STATE_DESTROYING;
374		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
375		cm_id->device->iwcm->reject(cm_id, NULL, 0);
376		spin_lock_irqsave(&cm_id_priv->lock, flags);
377		break;
378	case IW_CM_STATE_CONN_SENT:
379	case IW_CM_STATE_DESTROYING:
380	default:
381		BUG();
382		break;
383	}
384	if (cm_id_priv->qp) {
385		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
386		cm_id_priv->qp = NULL;
387	}
388	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
389
390	(void)iwcm_deref_id(cm_id_priv);
391}
392
393/*
394 * This function is only called by the application thread and cannot
395 * be called by the event thread. The function will wait for all
396 * references to be released on the cm_id and then kfree the cm_id
397 * object.
398 */
399void iw_destroy_cm_id(struct iw_cm_id *cm_id)
400{
401	struct iwcm_id_private *cm_id_priv;
402
403	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
404	destroy_cm_id(cm_id);
405}
406EXPORT_SYMBOL(iw_destroy_cm_id);
407
408/**
409 * iw_cm_map - Use portmapper to map the ports
410 * @cm_id: connection manager pointer
411 * @active: Indicates the active side when true
412 * returns nonzero for error only if iwpm_create_mapinfo() fails
413 *
414 * Tries to add a mapping for a port using the Portmapper. If
415 * successful in mapping the IP/Port it will check the remote
416 * mapped IP address for a wildcard IP address and replace the
417 * zero IP address with the remote_addr.
418 */
419static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
420{
421	cm_id->m_local_addr = cm_id->local_addr;
422	cm_id->m_remote_addr = cm_id->remote_addr;
423
424	return 0;
425}
426
427/*
428 * CM_ID <-- LISTEN
429 *
430 * Start listening for connect requests. Generates one CONNECT_REQUEST
431 * event for each inbound connect request.
432 */
433int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
434{
435	struct iwcm_id_private *cm_id_priv;
436	unsigned long flags;
437	int ret;
438
439	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
440
441	if (!backlog)
442		backlog = default_backlog;
443
444	ret = alloc_work_entries(cm_id_priv, backlog);
445	if (ret)
446		return ret;
447
448	spin_lock_irqsave(&cm_id_priv->lock, flags);
449	switch (cm_id_priv->state) {
450	case IW_CM_STATE_IDLE:
451		cm_id_priv->state = IW_CM_STATE_LISTEN;
452		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
453		ret = iw_cm_map(cm_id, false);
454		if (!ret)
455			ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
456		if (ret)
457			cm_id_priv->state = IW_CM_STATE_IDLE;
458		spin_lock_irqsave(&cm_id_priv->lock, flags);
459		break;
460	default:
461		ret = -EINVAL;
462	}
463	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
464
465	return ret;
466}
467EXPORT_SYMBOL(iw_cm_listen);
468
469/*
470 * CM_ID <-- IDLE
471 *
472 * Rejects an inbound connection request. No events are generated.
473 */
474int iw_cm_reject(struct iw_cm_id *cm_id,
475		 const void *private_data,
476		 u8 private_data_len)
477{
478	struct iwcm_id_private *cm_id_priv;
479	unsigned long flags;
480	int ret;
481
482	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
483	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
484
485	spin_lock_irqsave(&cm_id_priv->lock, flags);
486	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
487		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
488		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
489		wake_up_all(&cm_id_priv->connect_wait);
490		return -EINVAL;
491	}
492	cm_id_priv->state = IW_CM_STATE_IDLE;
493	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
494
495	ret = cm_id->device->iwcm->reject(cm_id, private_data,
496					  private_data_len);
497
498	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
499	wake_up_all(&cm_id_priv->connect_wait);
500
501	return ret;
502}
503EXPORT_SYMBOL(iw_cm_reject);
504
505/*
506 * CM_ID <-- ESTABLISHED
507 *
508 * Accepts an inbound connection request and generates an ESTABLISHED
509 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
510 * until the ESTABLISHED event is received from the provider.
511 */
512int iw_cm_accept(struct iw_cm_id *cm_id,
513		 struct iw_cm_conn_param *iw_param)
514{
515	struct iwcm_id_private *cm_id_priv;
516	struct ib_qp *qp;
517	unsigned long flags;
518	int ret;
519
520	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
521	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
522
523	spin_lock_irqsave(&cm_id_priv->lock, flags);
524	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
525		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
526		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
527		wake_up_all(&cm_id_priv->connect_wait);
528		return -EINVAL;
529	}
530	/* Get the ib_qp given the QPN */
531	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
532	if (!qp) {
533		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
534		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
535		wake_up_all(&cm_id_priv->connect_wait);
536		return -EINVAL;
537	}
538	cm_id->device->iwcm->add_ref(qp);
539	cm_id_priv->qp = qp;
540	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
541
542	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
543	if (ret) {
544		/* An error on accept precludes provider events */
545		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
546		cm_id_priv->state = IW_CM_STATE_IDLE;
547		spin_lock_irqsave(&cm_id_priv->lock, flags);
548		if (cm_id_priv->qp) {
549			cm_id->device->iwcm->rem_ref(qp);
550			cm_id_priv->qp = NULL;
551		}
552		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
553		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
554		wake_up_all(&cm_id_priv->connect_wait);
555	}
556
557	return ret;
558}
559EXPORT_SYMBOL(iw_cm_accept);
560
561/*
562 * Active Side: CM_ID <-- CONN_SENT
563 *
564 * If successful, results in the generation of a CONNECT_REPLY
565 * event. iw_cm_disconnect and iw_cm_destroy will block until the
566 * CONNECT_REPLY event is received from the provider.
567 */
568int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
569{
570	struct iwcm_id_private *cm_id_priv;
571	int ret;
572	unsigned long flags;
573	struct ib_qp *qp;
574
575	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
576
577	ret = alloc_work_entries(cm_id_priv, 4);
578	if (ret)
579		return ret;
580
581	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
582	spin_lock_irqsave(&cm_id_priv->lock, flags);
583
584	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
585		ret = -EINVAL;
586		goto err;
587	}
588
589	/* Get the ib_qp given the QPN */
590	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
591	if (!qp) {
592		ret = -EINVAL;
593		goto err;
594	}
595	cm_id->device->iwcm->add_ref(qp);
596	cm_id_priv->qp = qp;
597	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
598	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
599
600	ret = iw_cm_map(cm_id, true);
601	if (!ret)
602		ret = cm_id->device->iwcm->connect(cm_id, iw_param);
603	if (!ret)
604		return 0;	/* success */
605
606	spin_lock_irqsave(&cm_id_priv->lock, flags);
607	if (cm_id_priv->qp) {
608		cm_id->device->iwcm->rem_ref(qp);
609		cm_id_priv->qp = NULL;
610	}
611	cm_id_priv->state = IW_CM_STATE_IDLE;
612err:
613	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
614	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
615	wake_up_all(&cm_id_priv->connect_wait);
616	return ret;
617}
618EXPORT_SYMBOL(iw_cm_connect);
619
620/*
621 * Passive Side: new CM_ID <-- CONN_RECV
622 *
623 * Handles an inbound connect request. The function creates a new
624 * iw_cm_id to represent the new connection and inherits the client
625 * callback function and other attributes from the listening parent.
626 *
627 * The work item contains a pointer to the listen_cm_id and the event. The
628 * listen_cm_id contains the client cm_handler, context and
629 * device. These are copied when the device is cloned. The event
630 * contains the new four tuple.
631 *
632 * An error on the child should not affect the parent, so this
633 * function does not return a value.
634 */
635static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
636				struct iw_cm_event *iw_event)
637{
638	unsigned long flags;
639	struct iw_cm_id *cm_id;
640	struct iwcm_id_private *cm_id_priv;
641	int ret;
642
643	/*
644	 * The provider should never generate a connection request
645	 * event with a bad status.
646	 */
647	BUG_ON(iw_event->status);
648
649	cm_id = iw_create_cm_id(listen_id_priv->id.device,
650				listen_id_priv->id.cm_handler,
651				listen_id_priv->id.context);
652	/* If the cm_id could not be created, ignore the request */
653	if (IS_ERR(cm_id))
654		goto out;
655
656	cm_id->provider_data = iw_event->provider_data;
657	cm_id->m_local_addr = iw_event->local_addr;
658	cm_id->m_remote_addr = iw_event->remote_addr;
659	cm_id->local_addr = listen_id_priv->id.local_addr;
660	cm_id->remote_addr = iw_event->remote_addr;
661	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
662	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
663
664	/*
665	 * We could be destroying the listening id. If so, ignore this
666	 * upcall.
667	 */
668	spin_lock_irqsave(&listen_id_priv->lock, flags);
669	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
670		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
671		iw_cm_reject(cm_id, NULL, 0);
672		iw_destroy_cm_id(cm_id);
673		goto out;
674	}
675	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
676
677	ret = alloc_work_entries(cm_id_priv, 3);
678	if (ret) {
679		iw_cm_reject(cm_id, NULL, 0);
680		iw_destroy_cm_id(cm_id);
681		goto out;
682	}
683
684	/* Call the client CM handler */
685	ret = cm_id->cm_handler(cm_id, iw_event);
686	if (ret) {
687		iw_cm_reject(cm_id, NULL, 0);
688		iw_destroy_cm_id(cm_id);
689	}
690
691out:
692	if (iw_event->private_data_len)
693		kfree(iw_event->private_data);
694}
695
696/*
697 * Passive Side: CM_ID <-- ESTABLISHED
698 *
699 * The provider generated an ESTABLISHED event which means that
700 * the MPA negotion has completed successfully and we are now in MPA
701 * FPDU mode.
702 *
703 * This event can only be received in the CONN_RECV state. If the
704 * remote peer closed, the ESTABLISHED event would be received followed
705 * by the CLOSE event. If the app closes, it will block until we wake
706 * it up after processing this event.
707 */
708static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
709			       struct iw_cm_event *iw_event)
710{
711	unsigned long flags;
712	int ret;
713
714	spin_lock_irqsave(&cm_id_priv->lock, flags);
715
716	/*
717	 * We clear the CONNECT_WAIT bit here to allow the callback
718	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
719	 * from a callback handler is not allowed.
720	 */
721	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
722	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
723	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
724	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
725	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
726	wake_up_all(&cm_id_priv->connect_wait);
727
728	return ret;
729}
730
731/*
732 * Active Side: CM_ID <-- ESTABLISHED
733 *
734 * The app has called connect and is waiting for the established event to
735 * post it's requests to the server. This event will wake up anyone
736 * blocked in iw_cm_disconnect or iw_destroy_id.
737 */
738static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
739			       struct iw_cm_event *iw_event)
740{
741	unsigned long flags;
742	int ret;
743
744	spin_lock_irqsave(&cm_id_priv->lock, flags);
745	/*
746	 * Clear the connect wait bit so a callback function calling
747	 * iw_cm_disconnect will not wait and deadlock this thread
748	 */
749	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
750	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
751	if (iw_event->status == 0) {
752		cm_id_priv->id.m_local_addr = iw_event->local_addr;
753		cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
754		iw_event->local_addr = cm_id_priv->id.local_addr;
755		iw_event->remote_addr = cm_id_priv->id.remote_addr;
756		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
757	} else {
758		/* REJECTED or RESET */
759		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
760		cm_id_priv->qp = NULL;
761		cm_id_priv->state = IW_CM_STATE_IDLE;
762	}
763	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
764	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
765
766	if (iw_event->private_data_len)
767		kfree(iw_event->private_data);
768
769	/* Wake up waiters on connect complete */
770	wake_up_all(&cm_id_priv->connect_wait);
771
772	return ret;
773}
774
775/*
776 * CM_ID <-- CLOSING
777 *
778 * If in the ESTABLISHED state, move to CLOSING.
779 */
780static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
781				  struct iw_cm_event *iw_event)
782{
783	unsigned long flags;
784
785	spin_lock_irqsave(&cm_id_priv->lock, flags);
786	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
787		cm_id_priv->state = IW_CM_STATE_CLOSING;
788	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
789}
790
791/*
792 * CM_ID <-- IDLE
793 *
794 * If in the ESTBLISHED or CLOSING states, the QP will have have been
795 * moved by the provider to the ERR state. Disassociate the CM_ID from
796 * the QP,  move to IDLE, and remove the 'connected' reference.
797 *
798 * If in some other state, the cm_id was destroyed asynchronously.
799 * This is the last reference that will result in waking up
800 * the app thread blocked in iw_destroy_cm_id.
801 */
802static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
803				  struct iw_cm_event *iw_event)
804{
805	unsigned long flags;
806	int ret = 0;
807	spin_lock_irqsave(&cm_id_priv->lock, flags);
808
809	if (cm_id_priv->qp) {
810		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
811		cm_id_priv->qp = NULL;
812	}
813	switch (cm_id_priv->state) {
814	case IW_CM_STATE_ESTABLISHED:
815	case IW_CM_STATE_CLOSING:
816		cm_id_priv->state = IW_CM_STATE_IDLE;
817		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
818		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
819		spin_lock_irqsave(&cm_id_priv->lock, flags);
820		break;
821	case IW_CM_STATE_DESTROYING:
822		break;
823	default:
824		BUG();
825	}
826	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
827
828	return ret;
829}
830
831static int process_event(struct iwcm_id_private *cm_id_priv,
832			 struct iw_cm_event *iw_event)
833{
834	int ret = 0;
835
836	switch (iw_event->event) {
837	case IW_CM_EVENT_CONNECT_REQUEST:
838		cm_conn_req_handler(cm_id_priv, iw_event);
839		break;
840	case IW_CM_EVENT_CONNECT_REPLY:
841		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
842		break;
843	case IW_CM_EVENT_ESTABLISHED:
844		ret = cm_conn_est_handler(cm_id_priv, iw_event);
845		break;
846	case IW_CM_EVENT_DISCONNECT:
847		cm_disconnect_handler(cm_id_priv, iw_event);
848		break;
849	case IW_CM_EVENT_CLOSE:
850		ret = cm_close_handler(cm_id_priv, iw_event);
851		break;
852	default:
853		BUG();
854	}
855
856	return ret;
857}
858
859/*
860 * Process events on the work_list for the cm_id. If the callback
861 * function requests that the cm_id be deleted, a flag is set in the
862 * cm_id flags to indicate that when the last reference is
863 * removed, the cm_id is to be destroyed. This is necessary to
864 * distinguish between an object that will be destroyed by the app
865 * thread asleep on the destroy_comp list vs. an object destroyed
866 * here synchronously when the last reference is removed.
867 */
868static void cm_work_handler(struct work_struct *_work)
869{
870	struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
871	struct iw_cm_event levent;
872	struct iwcm_id_private *cm_id_priv = work->cm_id;
873	unsigned long flags;
874	int empty;
875	int ret = 0;
876
877	spin_lock_irqsave(&cm_id_priv->lock, flags);
878	empty = list_empty(&cm_id_priv->work_list);
879	while (!empty) {
880		work = list_entry(cm_id_priv->work_list.next,
881				  struct iwcm_work, list);
882		list_del_init(&work->list);
883		empty = list_empty(&cm_id_priv->work_list);
884		levent = work->event;
885		put_work(work);
886		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
887
888		if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
889			ret = process_event(cm_id_priv, &levent);
890			if (ret)
891				destroy_cm_id(&cm_id_priv->id);
892		} else
893			pr_debug("dropping event %d\n", levent.event);
894		if (iwcm_deref_id(cm_id_priv))
895			return;
896		if (empty)
897			return;
898		spin_lock_irqsave(&cm_id_priv->lock, flags);
899	}
900	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
901}
902
903/*
904 * This function is called on interrupt context. Schedule events on
905 * the iwcm_wq thread to allow callback functions to downcall into
906 * the CM and/or block.  Events are queued to a per-CM_ID
907 * work_list. If this is the first event on the work_list, the work
908 * element is also queued on the iwcm_wq thread.
909 *
910 * Each event holds a reference on the cm_id. Until the last posted
911 * event has been delivered and processed, the cm_id cannot be
912 * deleted.
913 *
914 * Returns:
915 * 	      0	- the event was handled.
916 *	-ENOMEM	- the event was not handled due to lack of resources.
917 */
918static int cm_event_handler(struct iw_cm_id *cm_id,
919			     struct iw_cm_event *iw_event)
920{
921	struct iwcm_work *work;
922	struct iwcm_id_private *cm_id_priv;
923	unsigned long flags;
924	int ret = 0;
925
926	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
927
928	spin_lock_irqsave(&cm_id_priv->lock, flags);
929	work = get_work(cm_id_priv);
930	if (!work) {
931		ret = -ENOMEM;
932		goto out;
933	}
934
935	INIT_WORK(&work->work, cm_work_handler);
936	work->cm_id = cm_id_priv;
937	work->event = *iw_event;
938
939	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
940	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
941	    work->event.private_data_len) {
942		ret = copy_private_data(&work->event);
943		if (ret) {
944			put_work(work);
945			goto out;
946		}
947	}
948
949	atomic_inc(&cm_id_priv->refcount);
950	if (list_empty(&cm_id_priv->work_list)) {
951		list_add_tail(&work->list, &cm_id_priv->work_list);
952		queue_work(iwcm_wq, &work->work);
953	} else
954		list_add_tail(&work->list, &cm_id_priv->work_list);
955out:
956	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
957	return ret;
958}
959
960static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
961				  struct ib_qp_attr *qp_attr,
962				  int *qp_attr_mask)
963{
964	unsigned long flags;
965	int ret;
966
967	spin_lock_irqsave(&cm_id_priv->lock, flags);
968	switch (cm_id_priv->state) {
969	case IW_CM_STATE_IDLE:
970	case IW_CM_STATE_CONN_SENT:
971	case IW_CM_STATE_CONN_RECV:
972	case IW_CM_STATE_ESTABLISHED:
973		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
974		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
975					   IB_ACCESS_REMOTE_READ;
976		ret = 0;
977		break;
978	default:
979		ret = -EINVAL;
980		break;
981	}
982	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
983	return ret;
984}
985
986static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
987				  struct ib_qp_attr *qp_attr,
988				  int *qp_attr_mask)
989{
990	unsigned long flags;
991	int ret;
992
993	spin_lock_irqsave(&cm_id_priv->lock, flags);
994	switch (cm_id_priv->state) {
995	case IW_CM_STATE_IDLE:
996	case IW_CM_STATE_CONN_SENT:
997	case IW_CM_STATE_CONN_RECV:
998	case IW_CM_STATE_ESTABLISHED:
999		*qp_attr_mask = 0;
1000		ret = 0;
1001		break;
1002	default:
1003		ret = -EINVAL;
1004		break;
1005	}
1006	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1007	return ret;
1008}
1009
1010int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
1011		       struct ib_qp_attr *qp_attr,
1012		       int *qp_attr_mask)
1013{
1014	struct iwcm_id_private *cm_id_priv;
1015	int ret;
1016
1017	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
1018	switch (qp_attr->qp_state) {
1019	case IB_QPS_INIT:
1020	case IB_QPS_RTR:
1021		ret = iwcm_init_qp_init_attr(cm_id_priv,
1022					     qp_attr, qp_attr_mask);
1023		break;
1024	case IB_QPS_RTS:
1025		ret = iwcm_init_qp_rts_attr(cm_id_priv,
1026					    qp_attr, qp_attr_mask);
1027		break;
1028	default:
1029		ret = -EINVAL;
1030		break;
1031	}
1032	return ret;
1033}
1034EXPORT_SYMBOL(iw_cm_init_qp_attr);
1035
1036static int __init iw_cm_init(void)
1037{
1038	iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
1039	if (!iwcm_wq)
1040		return -ENOMEM;
1041
1042	return 0;
1043}
1044
1045static void __exit iw_cm_cleanup(void)
1046{
1047	destroy_workqueue(iwcm_wq);
1048}
1049
1050module_init(iw_cm_init);
1051module_exit(iw_cm_cleanup);
1052