1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
9 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
10 *
11 * This software is available to you under a choice of one of two
12 * licenses.  You may choose to be licensed under the terms of the GNU
13 * General Public License (GPL) Version 2, available from the file
14 * COPYING in the main directory of this source tree, or the
15 * OpenIB.org BSD license below:
16 *
17 *     Redistribution and use in source and binary forms, with or
18 *     without modification, are permitted provided that the following
19 *     conditions are met:
20 *
21 *      - Redistributions of source code must retain the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer.
24 *
25 *      - Redistributions in binary form must reproduce the above
26 *        copyright notice, this list of conditions and the following
27 *        disclaimer in the documentation and/or other materials
28 *        provided with the distribution.
29 *
30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 * SOFTWARE.
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD$");
42
43#include <linux/dma-mapping.h>
44#include <linux/err.h>
45#include <linux/idr.h>
46#include <linux/interrupt.h>
47#include <linux/rbtree.h>
48#include <linux/sched.h>
49#include <linux/spinlock.h>
50#include <linux/workqueue.h>
51#include <linux/completion.h>
52#include <linux/slab.h>
53#include <linux/module.h>
54#include <linux/wait.h>
55
56#include <rdma/iw_cm.h>
57#include <rdma/ib_addr.h>
58#include <rdma/iw_portmap.h>
59
60#include "iwcm.h"
61
62MODULE_AUTHOR("Tom Tucker");
63MODULE_DESCRIPTION("iWARP CM");
64MODULE_LICENSE("Dual BSD/GPL");
65
66static struct workqueue_struct *iwcm_wq;
67struct iwcm_work {
68	struct work_struct work;
69	struct iwcm_id_private *cm_id;
70	struct list_head list;
71	struct iw_cm_event event;
72	struct list_head free_list;
73};
74
75static unsigned int default_backlog = 256;
76
77/*
78 * The following services provide a mechanism for pre-allocating iwcm_work
79 * elements.  The design pre-allocates them  based on the cm_id type:
80 *	LISTENING IDS: 	Get enough elements preallocated to handle the
81 *			listen backlog.
82 *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
83 *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
84 *
85 * Allocating them in connect and listen avoids having to deal
86 * with allocation failures on the event upcall from the provider (which
87 * is called in the interrupt context).
88 *
89 * One exception is when creating the cm_id for incoming connection requests.
90 * There are two cases:
91 * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
92 *    the backlog is exceeded, then no more connection request events will
93 *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
94 *    to the provider to reject the connection request.
95 * 2) in the connection request workqueue handler, cm_conn_req_handler().
96 *    If work elements cannot be allocated for the new connect request cm_id,
97 *    then IWCM will call the provider reject method.  This is ok since
98 *    cm_conn_req_handler() runs in the workqueue thread context.
99 */
100
101static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
102{
103	struct iwcm_work *work;
104
105	if (list_empty(&cm_id_priv->work_free_list))
106		return NULL;
107	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
108			  free_list);
109	list_del_init(&work->free_list);
110	return work;
111}
112
113static void put_work(struct iwcm_work *work)
114{
115	list_add(&work->free_list, &work->cm_id->work_free_list);
116}
117
118static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
119{
120	struct list_head *e, *tmp;
121
122	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
123		kfree(list_entry(e, struct iwcm_work, free_list));
124}
125
126static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
127{
128	struct iwcm_work *work;
129
130	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
131	while (count--) {
132		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
133		if (!work) {
134			dealloc_work_entries(cm_id_priv);
135			return -ENOMEM;
136		}
137		work->cm_id = cm_id_priv;
138		INIT_LIST_HEAD(&work->list);
139		put_work(work);
140	}
141	return 0;
142}
143
144/*
145 * Save private data from incoming connection requests to
146 * iw_cm_event, so the low level driver doesn't have to. Adjust
147 * the event ptr to point to the local copy.
148 */
149static int copy_private_data(struct iw_cm_event *event)
150{
151	void *p;
152
153	p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
154	if (!p)
155		return -ENOMEM;
156	event->private_data = p;
157	return 0;
158}
159
160static void free_cm_id(struct iwcm_id_private *cm_id_priv)
161{
162	dealloc_work_entries(cm_id_priv);
163	kfree(cm_id_priv);
164}
165
166/*
167 * Release a reference on cm_id. If the last reference is being
168 * released, free the cm_id and return 1.
169 */
170static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
171{
172	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
173	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
174		BUG_ON(!list_empty(&cm_id_priv->work_list));
175		free_cm_id(cm_id_priv);
176		return 1;
177	}
178
179	return 0;
180}
181
182static void add_ref(struct iw_cm_id *cm_id)
183{
184	struct iwcm_id_private *cm_id_priv;
185	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
186	atomic_inc(&cm_id_priv->refcount);
187}
188
189static void rem_ref(struct iw_cm_id *cm_id)
190{
191	struct iwcm_id_private *cm_id_priv;
192
193	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
194
195	(void)iwcm_deref_id(cm_id_priv);
196}
197
198static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
199
200struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
201				 iw_cm_handler cm_handler,
202				 void *context)
203{
204	struct iwcm_id_private *cm_id_priv;
205
206	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
207	if (!cm_id_priv)
208		return ERR_PTR(-ENOMEM);
209
210	cm_id_priv->state = IW_CM_STATE_IDLE;
211	cm_id_priv->id.device = device;
212	cm_id_priv->id.cm_handler = cm_handler;
213	cm_id_priv->id.context = context;
214	cm_id_priv->id.event_handler = cm_event_handler;
215	cm_id_priv->id.add_ref = add_ref;
216	cm_id_priv->id.rem_ref = rem_ref;
217	spin_lock_init(&cm_id_priv->lock);
218	atomic_set(&cm_id_priv->refcount, 1);
219	init_waitqueue_head(&cm_id_priv->connect_wait);
220	init_completion(&cm_id_priv->destroy_comp);
221	INIT_LIST_HEAD(&cm_id_priv->work_list);
222	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
223
224	return &cm_id_priv->id;
225}
226EXPORT_SYMBOL(iw_create_cm_id);
227
228
229static int iwcm_modify_qp_err(struct ib_qp *qp)
230{
231	struct ib_qp_attr qp_attr;
232
233	if (!qp)
234		return -EINVAL;
235
236	qp_attr.qp_state = IB_QPS_ERR;
237	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
238}
239
240/*
241 * This is really the RDMAC CLOSING state. It is most similar to the
242 * IB SQD QP state.
243 */
244static int iwcm_modify_qp_sqd(struct ib_qp *qp)
245{
246	struct ib_qp_attr qp_attr;
247
248	BUG_ON(qp == NULL);
249	qp_attr.qp_state = IB_QPS_SQD;
250	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
251}
252
253/*
254 * CM_ID <-- CLOSING
255 *
256 * Block if a passive or active connection is currently being processed. Then
257 * process the event as follows:
258 * - If we are ESTABLISHED, move to CLOSING and modify the QP state
259 *   based on the abrupt flag
260 * - If the connection is already in the CLOSING or IDLE state, the peer is
261 *   disconnecting concurrently with us and we've already seen the
262 *   DISCONNECT event -- ignore the request and return 0
263 * - Disconnect on a listening endpoint returns -EINVAL
264 */
265int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
266{
267	struct iwcm_id_private *cm_id_priv;
268	unsigned long flags;
269	int ret = 0;
270	struct ib_qp *qp = NULL;
271
272	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
273	/* Wait if we're currently in a connect or accept downcall */
274	wait_event(cm_id_priv->connect_wait,
275		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
276
277	spin_lock_irqsave(&cm_id_priv->lock, flags);
278	switch (cm_id_priv->state) {
279	case IW_CM_STATE_ESTABLISHED:
280		cm_id_priv->state = IW_CM_STATE_CLOSING;
281
282		/* QP could be <nul> for user-mode client */
283		if (cm_id_priv->qp)
284			qp = cm_id_priv->qp;
285		else
286			ret = -EINVAL;
287		break;
288	case IW_CM_STATE_LISTEN:
289		ret = -EINVAL;
290		break;
291	case IW_CM_STATE_CLOSING:
292		/* remote peer closed first */
293	case IW_CM_STATE_IDLE:
294		/* accept or connect returned !0 */
295		break;
296	case IW_CM_STATE_CONN_RECV:
297		/*
298		 * App called disconnect before/without calling accept after
299		 * connect_request event delivered.
300		 */
301		break;
302	case IW_CM_STATE_CONN_SENT:
303		/* Can only get here if wait above fails */
304	default:
305		BUG();
306	}
307	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
308
309	if (qp) {
310		if (abrupt)
311			(void) iwcm_modify_qp_err(qp);
312		else
313			(void) iwcm_modify_qp_sqd(qp);
314
315		/*
316		 * If both sides are disconnecting the QP could
317		 * already be in ERR or SQD states
318		 */
319		ret = 0;
320	}
321
322	return ret;
323}
324EXPORT_SYMBOL(iw_cm_disconnect);
325
326/*
327 * CM_ID <-- DESTROYING
328 *
329 * Clean up all resources associated with the connection and release
330 * the initial reference taken by iw_create_cm_id.
331 */
332static void destroy_cm_id(struct iw_cm_id *cm_id)
333{
334	struct iwcm_id_private *cm_id_priv;
335	unsigned long flags;
336
337	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
338	/*
339	 * Wait if we're currently in a connect or accept downcall. A
340	 * listening endpoint should never block here.
341	 */
342	wait_event(cm_id_priv->connect_wait,
343		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
344
345	/*
346	 * Since we're deleting the cm_id, drop any events that
347	 * might arrive before the last dereference.
348	 */
349	set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
350
351	spin_lock_irqsave(&cm_id_priv->lock, flags);
352	switch (cm_id_priv->state) {
353	case IW_CM_STATE_LISTEN:
354		cm_id_priv->state = IW_CM_STATE_DESTROYING;
355		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
356		/* destroy the listening endpoint */
357		cm_id->device->iwcm->destroy_listen(cm_id);
358		spin_lock_irqsave(&cm_id_priv->lock, flags);
359		break;
360	case IW_CM_STATE_ESTABLISHED:
361		cm_id_priv->state = IW_CM_STATE_DESTROYING;
362		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
363		/* Abrupt close of the connection */
364		(void)iwcm_modify_qp_err(cm_id_priv->qp);
365		spin_lock_irqsave(&cm_id_priv->lock, flags);
366		break;
367	case IW_CM_STATE_IDLE:
368	case IW_CM_STATE_CLOSING:
369		cm_id_priv->state = IW_CM_STATE_DESTROYING;
370		break;
371	case IW_CM_STATE_CONN_RECV:
372		/*
373		 * App called destroy before/without calling accept after
374		 * receiving connection request event notification or
375		 * returned non zero from the event callback function.
376		 * In either case, must tell the provider to reject.
377		 */
378		cm_id_priv->state = IW_CM_STATE_DESTROYING;
379		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
380		cm_id->device->iwcm->reject(cm_id, NULL, 0);
381		spin_lock_irqsave(&cm_id_priv->lock, flags);
382		break;
383	case IW_CM_STATE_CONN_SENT:
384	case IW_CM_STATE_DESTROYING:
385	default:
386		BUG();
387		break;
388	}
389	if (cm_id_priv->qp) {
390		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
391		cm_id_priv->qp = NULL;
392	}
393	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
394
395	(void)iwcm_deref_id(cm_id_priv);
396}
397
398/*
399 * This function is only called by the application thread and cannot
400 * be called by the event thread. The function will wait for all
401 * references to be released on the cm_id and then kfree the cm_id
402 * object.
403 */
404void iw_destroy_cm_id(struct iw_cm_id *cm_id)
405{
406	struct iwcm_id_private *cm_id_priv;
407
408	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
409	destroy_cm_id(cm_id);
410}
411EXPORT_SYMBOL(iw_destroy_cm_id);
412
413/**
414 * iw_cm_map - Use portmapper to map the ports
415 * @cm_id: connection manager pointer
416 * @active: Indicates the active side when true
417 * returns nonzero for error only if iwpm_create_mapinfo() fails
418 *
419 * Tries to add a mapping for a port using the Portmapper. If
420 * successful in mapping the IP/Port it will check the remote
421 * mapped IP address for a wildcard IP address and replace the
422 * zero IP address with the remote_addr.
423 */
424static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
425{
426	cm_id->m_local_addr = cm_id->local_addr;
427	cm_id->m_remote_addr = cm_id->remote_addr;
428
429	return 0;
430}
431
432/*
433 * CM_ID <-- LISTEN
434 *
435 * Start listening for connect requests. Generates one CONNECT_REQUEST
436 * event for each inbound connect request.
437 */
438int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
439{
440	struct iwcm_id_private *cm_id_priv;
441	unsigned long flags;
442	int ret;
443
444	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
445
446	if (!backlog)
447		backlog = default_backlog;
448
449	ret = alloc_work_entries(cm_id_priv, backlog);
450	if (ret)
451		return ret;
452
453	spin_lock_irqsave(&cm_id_priv->lock, flags);
454	switch (cm_id_priv->state) {
455	case IW_CM_STATE_IDLE:
456		cm_id_priv->state = IW_CM_STATE_LISTEN;
457		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
458		ret = iw_cm_map(cm_id, false);
459		if (!ret)
460			ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
461		if (ret)
462			cm_id_priv->state = IW_CM_STATE_IDLE;
463		spin_lock_irqsave(&cm_id_priv->lock, flags);
464		break;
465	default:
466		ret = -EINVAL;
467	}
468	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
469
470	return ret;
471}
472EXPORT_SYMBOL(iw_cm_listen);
473
474/*
475 * CM_ID <-- IDLE
476 *
477 * Rejects an inbound connection request. No events are generated.
478 */
479int iw_cm_reject(struct iw_cm_id *cm_id,
480		 const void *private_data,
481		 u8 private_data_len)
482{
483	struct iwcm_id_private *cm_id_priv;
484	unsigned long flags;
485	int ret;
486
487	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
488	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
489
490	spin_lock_irqsave(&cm_id_priv->lock, flags);
491	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
492		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
493		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
494		wake_up_all(&cm_id_priv->connect_wait);
495		return -EINVAL;
496	}
497	cm_id_priv->state = IW_CM_STATE_IDLE;
498	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
499
500	ret = cm_id->device->iwcm->reject(cm_id, private_data,
501					  private_data_len);
502
503	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
504	wake_up_all(&cm_id_priv->connect_wait);
505
506	return ret;
507}
508EXPORT_SYMBOL(iw_cm_reject);
509
510/*
511 * CM_ID <-- ESTABLISHED
512 *
513 * Accepts an inbound connection request and generates an ESTABLISHED
514 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
515 * until the ESTABLISHED event is received from the provider.
516 */
517int iw_cm_accept(struct iw_cm_id *cm_id,
518		 struct iw_cm_conn_param *iw_param)
519{
520	struct iwcm_id_private *cm_id_priv;
521	struct ib_qp *qp;
522	unsigned long flags;
523	int ret;
524
525	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
526	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
527
528	spin_lock_irqsave(&cm_id_priv->lock, flags);
529	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
530		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
531		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
532		wake_up_all(&cm_id_priv->connect_wait);
533		return -EINVAL;
534	}
535	/* Get the ib_qp given the QPN */
536	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
537	if (!qp) {
538		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
539		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
540		wake_up_all(&cm_id_priv->connect_wait);
541		return -EINVAL;
542	}
543	cm_id->device->iwcm->add_ref(qp);
544	cm_id_priv->qp = qp;
545	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
546
547	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
548	if (ret) {
549		/* An error on accept precludes provider events */
550		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
551		cm_id_priv->state = IW_CM_STATE_IDLE;
552		spin_lock_irqsave(&cm_id_priv->lock, flags);
553		if (cm_id_priv->qp) {
554			cm_id->device->iwcm->rem_ref(qp);
555			cm_id_priv->qp = NULL;
556		}
557		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
558		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
559		wake_up_all(&cm_id_priv->connect_wait);
560	}
561
562	return ret;
563}
564EXPORT_SYMBOL(iw_cm_accept);
565
566/*
567 * Active Side: CM_ID <-- CONN_SENT
568 *
569 * If successful, results in the generation of a CONNECT_REPLY
570 * event. iw_cm_disconnect and iw_cm_destroy will block until the
571 * CONNECT_REPLY event is received from the provider.
572 */
573int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
574{
575	struct iwcm_id_private *cm_id_priv;
576	int ret;
577	unsigned long flags;
578	struct ib_qp *qp;
579
580	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
581
582	ret = alloc_work_entries(cm_id_priv, 4);
583	if (ret)
584		return ret;
585
586	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
587	spin_lock_irqsave(&cm_id_priv->lock, flags);
588
589	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
590		ret = -EINVAL;
591		goto err;
592	}
593
594	/* Get the ib_qp given the QPN */
595	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
596	if (!qp) {
597		ret = -EINVAL;
598		goto err;
599	}
600	cm_id->device->iwcm->add_ref(qp);
601	cm_id_priv->qp = qp;
602	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
603	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
604
605	ret = iw_cm_map(cm_id, true);
606	if (!ret)
607		ret = cm_id->device->iwcm->connect(cm_id, iw_param);
608	if (!ret)
609		return 0;	/* success */
610
611	spin_lock_irqsave(&cm_id_priv->lock, flags);
612	if (cm_id_priv->qp) {
613		cm_id->device->iwcm->rem_ref(qp);
614		cm_id_priv->qp = NULL;
615	}
616	cm_id_priv->state = IW_CM_STATE_IDLE;
617err:
618	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
619	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
620	wake_up_all(&cm_id_priv->connect_wait);
621	return ret;
622}
623EXPORT_SYMBOL(iw_cm_connect);
624
625/*
626 * Passive Side: new CM_ID <-- CONN_RECV
627 *
628 * Handles an inbound connect request. The function creates a new
629 * iw_cm_id to represent the new connection and inherits the client
630 * callback function and other attributes from the listening parent.
631 *
632 * The work item contains a pointer to the listen_cm_id and the event. The
633 * listen_cm_id contains the client cm_handler, context and
634 * device. These are copied when the device is cloned. The event
635 * contains the new four tuple.
636 *
637 * An error on the child should not affect the parent, so this
638 * function does not return a value.
639 */
640static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
641				struct iw_cm_event *iw_event)
642{
643	unsigned long flags;
644	struct iw_cm_id *cm_id;
645	struct iwcm_id_private *cm_id_priv;
646	int ret;
647
648	/*
649	 * The provider should never generate a connection request
650	 * event with a bad status.
651	 */
652	BUG_ON(iw_event->status);
653
654	cm_id = iw_create_cm_id(listen_id_priv->id.device,
655				listen_id_priv->id.cm_handler,
656				listen_id_priv->id.context);
657	/* If the cm_id could not be created, ignore the request */
658	if (IS_ERR(cm_id))
659		goto out;
660
661	cm_id->provider_data = iw_event->provider_data;
662	cm_id->m_local_addr = iw_event->local_addr;
663	cm_id->m_remote_addr = iw_event->remote_addr;
664	cm_id->local_addr = listen_id_priv->id.local_addr;
665	cm_id->remote_addr = iw_event->remote_addr;
666	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
667	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
668
669	/*
670	 * We could be destroying the listening id. If so, ignore this
671	 * upcall.
672	 */
673	spin_lock_irqsave(&listen_id_priv->lock, flags);
674	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
675		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
676		iw_cm_reject(cm_id, NULL, 0);
677		iw_destroy_cm_id(cm_id);
678		goto out;
679	}
680	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
681
682	ret = alloc_work_entries(cm_id_priv, 3);
683	if (ret) {
684		iw_cm_reject(cm_id, NULL, 0);
685		iw_destroy_cm_id(cm_id);
686		goto out;
687	}
688
689	/* Call the client CM handler */
690	ret = cm_id->cm_handler(cm_id, iw_event);
691	if (ret) {
692		iw_cm_reject(cm_id, NULL, 0);
693		iw_destroy_cm_id(cm_id);
694	}
695
696out:
697	if (iw_event->private_data_len)
698		kfree(iw_event->private_data);
699}
700
701/*
702 * Passive Side: CM_ID <-- ESTABLISHED
703 *
704 * The provider generated an ESTABLISHED event which means that
705 * the MPA negotion has completed successfully and we are now in MPA
706 * FPDU mode.
707 *
708 * This event can only be received in the CONN_RECV state. If the
709 * remote peer closed, the ESTABLISHED event would be received followed
710 * by the CLOSE event. If the app closes, it will block until we wake
711 * it up after processing this event.
712 */
713static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
714			       struct iw_cm_event *iw_event)
715{
716	unsigned long flags;
717	int ret;
718
719	spin_lock_irqsave(&cm_id_priv->lock, flags);
720
721	/*
722	 * We clear the CONNECT_WAIT bit here to allow the callback
723	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
724	 * from a callback handler is not allowed.
725	 */
726	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
727	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
728	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
729	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
730	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
731	wake_up_all(&cm_id_priv->connect_wait);
732
733	return ret;
734}
735
736/*
737 * Active Side: CM_ID <-- ESTABLISHED
738 *
739 * The app has called connect and is waiting for the established event to
740 * post it's requests to the server. This event will wake up anyone
741 * blocked in iw_cm_disconnect or iw_destroy_id.
742 */
743static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
744			       struct iw_cm_event *iw_event)
745{
746	unsigned long flags;
747	int ret;
748
749	spin_lock_irqsave(&cm_id_priv->lock, flags);
750	/*
751	 * Clear the connect wait bit so a callback function calling
752	 * iw_cm_disconnect will not wait and deadlock this thread
753	 */
754	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
755	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
756	if (iw_event->status == 0) {
757		cm_id_priv->id.m_local_addr = iw_event->local_addr;
758		cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
759		iw_event->local_addr = cm_id_priv->id.local_addr;
760		iw_event->remote_addr = cm_id_priv->id.remote_addr;
761		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
762	} else {
763		/* REJECTED or RESET */
764		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
765		cm_id_priv->qp = NULL;
766		cm_id_priv->state = IW_CM_STATE_IDLE;
767	}
768	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
769	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
770
771	if (iw_event->private_data_len)
772		kfree(iw_event->private_data);
773
774	/* Wake up waiters on connect complete */
775	wake_up_all(&cm_id_priv->connect_wait);
776
777	return ret;
778}
779
780/*
781 * CM_ID <-- CLOSING
782 *
783 * If in the ESTABLISHED state, move to CLOSING.
784 */
785static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
786				  struct iw_cm_event *iw_event)
787{
788	unsigned long flags;
789
790	spin_lock_irqsave(&cm_id_priv->lock, flags);
791	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
792		cm_id_priv->state = IW_CM_STATE_CLOSING;
793	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
794}
795
796/*
797 * CM_ID <-- IDLE
798 *
799 * If in the ESTBLISHED or CLOSING states, the QP will have have been
800 * moved by the provider to the ERR state. Disassociate the CM_ID from
801 * the QP,  move to IDLE, and remove the 'connected' reference.
802 *
803 * If in some other state, the cm_id was destroyed asynchronously.
804 * This is the last reference that will result in waking up
805 * the app thread blocked in iw_destroy_cm_id.
806 */
807static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
808				  struct iw_cm_event *iw_event)
809{
810	unsigned long flags;
811	int ret = 0;
812	spin_lock_irqsave(&cm_id_priv->lock, flags);
813
814	if (cm_id_priv->qp) {
815		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
816		cm_id_priv->qp = NULL;
817	}
818	switch (cm_id_priv->state) {
819	case IW_CM_STATE_ESTABLISHED:
820	case IW_CM_STATE_CLOSING:
821		cm_id_priv->state = IW_CM_STATE_IDLE;
822		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
823		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
824		spin_lock_irqsave(&cm_id_priv->lock, flags);
825		break;
826	case IW_CM_STATE_DESTROYING:
827		break;
828	default:
829		BUG();
830	}
831	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
832
833	return ret;
834}
835
836static int process_event(struct iwcm_id_private *cm_id_priv,
837			 struct iw_cm_event *iw_event)
838{
839	int ret = 0;
840
841	switch (iw_event->event) {
842	case IW_CM_EVENT_CONNECT_REQUEST:
843		cm_conn_req_handler(cm_id_priv, iw_event);
844		break;
845	case IW_CM_EVENT_CONNECT_REPLY:
846		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
847		break;
848	case IW_CM_EVENT_ESTABLISHED:
849		ret = cm_conn_est_handler(cm_id_priv, iw_event);
850		break;
851	case IW_CM_EVENT_DISCONNECT:
852		cm_disconnect_handler(cm_id_priv, iw_event);
853		break;
854	case IW_CM_EVENT_CLOSE:
855		ret = cm_close_handler(cm_id_priv, iw_event);
856		break;
857	default:
858		BUG();
859	}
860
861	return ret;
862}
863
864/*
865 * Process events on the work_list for the cm_id. If the callback
866 * function requests that the cm_id be deleted, a flag is set in the
867 * cm_id flags to indicate that when the last reference is
868 * removed, the cm_id is to be destroyed. This is necessary to
869 * distinguish between an object that will be destroyed by the app
870 * thread asleep on the destroy_comp list vs. an object destroyed
871 * here synchronously when the last reference is removed.
872 */
873static void cm_work_handler(struct work_struct *_work)
874{
875	struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
876	struct iw_cm_event levent;
877	struct iwcm_id_private *cm_id_priv = work->cm_id;
878	unsigned long flags;
879	int empty;
880	int ret = 0;
881
882	spin_lock_irqsave(&cm_id_priv->lock, flags);
883	empty = list_empty(&cm_id_priv->work_list);
884	while (!empty) {
885		work = list_entry(cm_id_priv->work_list.next,
886				  struct iwcm_work, list);
887		list_del_init(&work->list);
888		empty = list_empty(&cm_id_priv->work_list);
889		levent = work->event;
890		put_work(work);
891		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
892
893		if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
894			ret = process_event(cm_id_priv, &levent);
895			if (ret)
896				destroy_cm_id(&cm_id_priv->id);
897		} else
898			pr_debug("dropping event %d\n", levent.event);
899		if (iwcm_deref_id(cm_id_priv))
900			return;
901		if (empty)
902			return;
903		spin_lock_irqsave(&cm_id_priv->lock, flags);
904	}
905	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
906}
907
908/*
909 * This function is called on interrupt context. Schedule events on
910 * the iwcm_wq thread to allow callback functions to downcall into
911 * the CM and/or block.  Events are queued to a per-CM_ID
912 * work_list. If this is the first event on the work_list, the work
913 * element is also queued on the iwcm_wq thread.
914 *
915 * Each event holds a reference on the cm_id. Until the last posted
916 * event has been delivered and processed, the cm_id cannot be
917 * deleted.
918 *
919 * Returns:
920 * 	      0	- the event was handled.
921 *	-ENOMEM	- the event was not handled due to lack of resources.
922 */
923static int cm_event_handler(struct iw_cm_id *cm_id,
924			     struct iw_cm_event *iw_event)
925{
926	struct iwcm_work *work;
927	struct iwcm_id_private *cm_id_priv;
928	unsigned long flags;
929	int ret = 0;
930
931	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
932
933	spin_lock_irqsave(&cm_id_priv->lock, flags);
934	work = get_work(cm_id_priv);
935	if (!work) {
936		ret = -ENOMEM;
937		goto out;
938	}
939
940	INIT_WORK(&work->work, cm_work_handler);
941	work->cm_id = cm_id_priv;
942	work->event = *iw_event;
943
944	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
945	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
946	    work->event.private_data_len) {
947		ret = copy_private_data(&work->event);
948		if (ret) {
949			put_work(work);
950			goto out;
951		}
952	}
953
954	atomic_inc(&cm_id_priv->refcount);
955	if (list_empty(&cm_id_priv->work_list)) {
956		list_add_tail(&work->list, &cm_id_priv->work_list);
957		queue_work(iwcm_wq, &work->work);
958	} else
959		list_add_tail(&work->list, &cm_id_priv->work_list);
960out:
961	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
962	return ret;
963}
964
965static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
966				  struct ib_qp_attr *qp_attr,
967				  int *qp_attr_mask)
968{
969	unsigned long flags;
970	int ret;
971
972	spin_lock_irqsave(&cm_id_priv->lock, flags);
973	switch (cm_id_priv->state) {
974	case IW_CM_STATE_IDLE:
975	case IW_CM_STATE_CONN_SENT:
976	case IW_CM_STATE_CONN_RECV:
977	case IW_CM_STATE_ESTABLISHED:
978		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
979		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
980					   IB_ACCESS_REMOTE_READ;
981		ret = 0;
982		break;
983	default:
984		ret = -EINVAL;
985		break;
986	}
987	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
988	return ret;
989}
990
991static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
992				  struct ib_qp_attr *qp_attr,
993				  int *qp_attr_mask)
994{
995	unsigned long flags;
996	int ret;
997
998	spin_lock_irqsave(&cm_id_priv->lock, flags);
999	switch (cm_id_priv->state) {
1000	case IW_CM_STATE_IDLE:
1001	case IW_CM_STATE_CONN_SENT:
1002	case IW_CM_STATE_CONN_RECV:
1003	case IW_CM_STATE_ESTABLISHED:
1004		*qp_attr_mask = 0;
1005		ret = 0;
1006		break;
1007	default:
1008		ret = -EINVAL;
1009		break;
1010	}
1011	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1012	return ret;
1013}
1014
1015int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
1016		       struct ib_qp_attr *qp_attr,
1017		       int *qp_attr_mask)
1018{
1019	struct iwcm_id_private *cm_id_priv;
1020	int ret;
1021
1022	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
1023	switch (qp_attr->qp_state) {
1024	case IB_QPS_INIT:
1025	case IB_QPS_RTR:
1026		ret = iwcm_init_qp_init_attr(cm_id_priv,
1027					     qp_attr, qp_attr_mask);
1028		break;
1029	case IB_QPS_RTS:
1030		ret = iwcm_init_qp_rts_attr(cm_id_priv,
1031					    qp_attr, qp_attr_mask);
1032		break;
1033	default:
1034		ret = -EINVAL;
1035		break;
1036	}
1037	return ret;
1038}
1039EXPORT_SYMBOL(iw_cm_init_qp_attr);
1040
1041static int __init iw_cm_init(void)
1042{
1043	iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
1044	if (!iwcm_wq)
1045		return -ENOMEM;
1046
1047	return 0;
1048}
1049
1050static void __exit iw_cm_cleanup(void)
1051{
1052	destroy_workqueue(iwcm_wq);
1053}
1054
1055module_init_order(iw_cm_init, SI_ORDER_FIRST);
1056module_exit_order(iw_cm_cleanup, SI_ORDER_FIRST);
1057