ib_ucma.c revision 331769
1/*
2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	copyright notice, this list of conditions and the following
16 *	disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	copyright notice, this list of conditions and the following
20 *	disclaimer in the documentation and/or other materials
21 *	provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/completion.h>
34#include <linux/file.h>
35#include <linux/mutex.h>
36#include <linux/poll.h>
37#include <linux/sched.h>
38#include <linux/idr.h>
39#include <linux/in.h>
40#include <linux/in6.h>
41#include <linux/miscdevice.h>
42#include <linux/slab.h>
43#include <linux/module.h>
44
45#include <sys/filio.h>
46
47#include <rdma/rdma_user_cm.h>
48#include <rdma/ib_marshall.h>
49#include <rdma/rdma_cm.h>
50#include <rdma/rdma_cm_ib.h>
51#include <rdma/ib_addr.h>
52#include <rdma/ib.h>
53
54MODULE_AUTHOR("Sean Hefty");
55MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
56MODULE_LICENSE("Dual BSD/GPL");
57
58static unsigned int max_backlog = 1024;
59
60struct ucma_file {
61	struct mutex		mut;
62	struct file		*filp;
63	struct list_head	ctx_list;
64	struct list_head	event_list;
65	wait_queue_head_t	poll_wait;
66	struct workqueue_struct	*close_wq;
67};
68
69struct ucma_context {
70	int			id;
71	struct completion	comp;
72	atomic_t		ref;
73	int			events_reported;
74	int			backlog;
75
76	struct ucma_file	*file;
77	struct rdma_cm_id	*cm_id;
78	u64			uid;
79
80	struct list_head	list;
81	struct list_head	mc_list;
82	/* mark that device is in process of destroying the internal HW
83	 * resources, protected by the global mut
84	 */
85	int			closing;
86	/* sync between removal event and id destroy, protected by file mut */
87	int			destroying;
88	struct work_struct	close_work;
89};
90
91struct ucma_multicast {
92	struct ucma_context	*ctx;
93	int			id;
94	int			events_reported;
95
96	u64			uid;
97	u8			join_state;
98	struct list_head	list;
99	struct sockaddr_storage	addr;
100};
101
102struct ucma_event {
103	struct ucma_context	*ctx;
104	struct ucma_multicast	*mc;
105	struct list_head	list;
106	struct rdma_cm_id	*cm_id;
107	struct rdma_ucm_event_resp resp;
108	struct work_struct	close_work;
109};
110
111static DEFINE_MUTEX(mut);
112static DEFINE_IDR(ctx_idr);
113static DEFINE_IDR(multicast_idr);
114
115static inline struct ucma_context *_ucma_find_context(int id,
116						      struct ucma_file *file)
117{
118	struct ucma_context *ctx;
119
120	ctx = idr_find(&ctx_idr, id);
121	if (!ctx)
122		ctx = ERR_PTR(-ENOENT);
123	else if (ctx->file != file)
124		ctx = ERR_PTR(-EINVAL);
125	return ctx;
126}
127
128static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
129{
130	struct ucma_context *ctx;
131
132	mutex_lock(&mut);
133	ctx = _ucma_find_context(id, file);
134	if (!IS_ERR(ctx)) {
135		if (ctx->closing)
136			ctx = ERR_PTR(-EIO);
137		else
138			atomic_inc(&ctx->ref);
139	}
140	mutex_unlock(&mut);
141	return ctx;
142}
143
144static void ucma_put_ctx(struct ucma_context *ctx)
145{
146	if (atomic_dec_and_test(&ctx->ref))
147		complete(&ctx->comp);
148}
149
150static void ucma_close_event_id(struct work_struct *work)
151{
152	struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
153
154	rdma_destroy_id(uevent_close->cm_id);
155	kfree(uevent_close);
156}
157
158static void ucma_close_id(struct work_struct *work)
159{
160	struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
161
162	/* once all inflight tasks are finished, we close all underlying
163	 * resources. The context is still alive till its explicit destryoing
164	 * by its creator.
165	 */
166	ucma_put_ctx(ctx);
167	wait_for_completion(&ctx->comp);
168	/* No new events will be generated after destroying the id. */
169	rdma_destroy_id(ctx->cm_id);
170}
171
172static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
173{
174	struct ucma_context *ctx;
175
176	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
177	if (!ctx)
178		return NULL;
179
180	INIT_WORK(&ctx->close_work, ucma_close_id);
181	atomic_set(&ctx->ref, 1);
182	init_completion(&ctx->comp);
183	INIT_LIST_HEAD(&ctx->mc_list);
184	ctx->file = file;
185
186	mutex_lock(&mut);
187	ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
188	mutex_unlock(&mut);
189	if (ctx->id < 0)
190		goto error;
191
192	list_add_tail(&ctx->list, &file->ctx_list);
193	return ctx;
194
195error:
196	kfree(ctx);
197	return NULL;
198}
199
200static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
201{
202	struct ucma_multicast *mc;
203
204	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
205	if (!mc)
206		return NULL;
207
208	mutex_lock(&mut);
209	mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
210	mutex_unlock(&mut);
211	if (mc->id < 0)
212		goto error;
213
214	mc->ctx = ctx;
215	list_add_tail(&mc->list, &ctx->mc_list);
216	return mc;
217
218error:
219	kfree(mc);
220	return NULL;
221}
222
223static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
224				 struct rdma_conn_param *src)
225{
226	if (src->private_data_len)
227		memcpy(dst->private_data, src->private_data,
228		       src->private_data_len);
229	dst->private_data_len = src->private_data_len;
230	dst->responder_resources =src->responder_resources;
231	dst->initiator_depth = src->initiator_depth;
232	dst->flow_control = src->flow_control;
233	dst->retry_count = src->retry_count;
234	dst->rnr_retry_count = src->rnr_retry_count;
235	dst->srq = src->srq;
236	dst->qp_num = src->qp_num;
237}
238
239static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
240			       struct rdma_ud_param *src)
241{
242	if (src->private_data_len)
243		memcpy(dst->private_data, src->private_data,
244		       src->private_data_len);
245	dst->private_data_len = src->private_data_len;
246	ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
247	dst->qp_num = src->qp_num;
248	dst->qkey = src->qkey;
249}
250
251static void ucma_set_event_context(struct ucma_context *ctx,
252				   struct rdma_cm_event *event,
253				   struct ucma_event *uevent)
254{
255	uevent->ctx = ctx;
256	switch (event->event) {
257	case RDMA_CM_EVENT_MULTICAST_JOIN:
258	case RDMA_CM_EVENT_MULTICAST_ERROR:
259		uevent->mc = __DECONST(struct ucma_multicast *,
260		    event->param.ud.private_data);
261		uevent->resp.uid = uevent->mc->uid;
262		uevent->resp.id = uevent->mc->id;
263		break;
264	default:
265		uevent->resp.uid = ctx->uid;
266		uevent->resp.id = ctx->id;
267		break;
268	}
269}
270
271/* Called with file->mut locked for the relevant context. */
272static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
273{
274	struct ucma_context *ctx = cm_id->context;
275	struct ucma_event *con_req_eve;
276	int event_found = 0;
277
278	if (ctx->destroying)
279		return;
280
281	/* only if context is pointing to cm_id that it owns it and can be
282	 * queued to be closed, otherwise that cm_id is an inflight one that
283	 * is part of that context event list pending to be detached and
284	 * reattached to its new context as part of ucma_get_event,
285	 * handled separately below.
286	 */
287	if (ctx->cm_id == cm_id) {
288		mutex_lock(&mut);
289		ctx->closing = 1;
290		mutex_unlock(&mut);
291		queue_work(ctx->file->close_wq, &ctx->close_work);
292		return;
293	}
294
295	list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
296		if (con_req_eve->cm_id == cm_id &&
297		    con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
298			list_del(&con_req_eve->list);
299			INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
300			queue_work(ctx->file->close_wq, &con_req_eve->close_work);
301			event_found = 1;
302			break;
303		}
304	}
305	if (!event_found)
306		pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
307}
308
309static int ucma_event_handler(struct rdma_cm_id *cm_id,
310			      struct rdma_cm_event *event)
311{
312	struct ucma_event *uevent;
313	struct ucma_context *ctx = cm_id->context;
314	int ret = 0;
315
316	uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
317	if (!uevent)
318		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
319
320	mutex_lock(&ctx->file->mut);
321	uevent->cm_id = cm_id;
322	ucma_set_event_context(ctx, event, uevent);
323	uevent->resp.event = event->event;
324	uevent->resp.status = event->status;
325	if (cm_id->qp_type == IB_QPT_UD)
326		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
327	else
328		ucma_copy_conn_event(&uevent->resp.param.conn,
329				     &event->param.conn);
330
331	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
332		if (!ctx->backlog) {
333			ret = -ENOMEM;
334			kfree(uevent);
335			goto out;
336		}
337		ctx->backlog--;
338	} else if (!ctx->uid || ctx->cm_id != cm_id) {
339		/*
340		 * We ignore events for new connections until userspace has set
341		 * their context.  This can only happen if an error occurs on a
342		 * new connection before the user accepts it.  This is okay,
343		 * since the accept will just fail later. However, we do need
344		 * to release the underlying HW resources in case of a device
345		 * removal event.
346		 */
347		if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
348			ucma_removal_event_handler(cm_id);
349
350		kfree(uevent);
351		goto out;
352	}
353
354	list_add_tail(&uevent->list, &ctx->file->event_list);
355	wake_up_interruptible(&ctx->file->poll_wait);
356	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
357		ucma_removal_event_handler(cm_id);
358out:
359	mutex_unlock(&ctx->file->mut);
360	return ret;
361}
362
363static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
364			      int in_len, int out_len)
365{
366	struct ucma_context *ctx;
367	struct rdma_ucm_get_event cmd;
368	struct ucma_event *uevent;
369	int ret = 0;
370
371	if (out_len < sizeof uevent->resp)
372		return -ENOSPC;
373
374	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
375		return -EFAULT;
376
377	mutex_lock(&file->mut);
378	while (list_empty(&file->event_list)) {
379		mutex_unlock(&file->mut);
380
381		if (file->filp->f_flags & O_NONBLOCK)
382			return -EAGAIN;
383
384		if (wait_event_interruptible(file->poll_wait,
385					     !list_empty(&file->event_list)))
386			return -ERESTARTSYS;
387
388		mutex_lock(&file->mut);
389	}
390
391	uevent = list_entry(file->event_list.next, struct ucma_event, list);
392
393	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
394		ctx = ucma_alloc_ctx(file);
395		if (!ctx) {
396			ret = -ENOMEM;
397			goto done;
398		}
399		uevent->ctx->backlog++;
400		ctx->cm_id = uevent->cm_id;
401		ctx->cm_id->context = ctx;
402		uevent->resp.id = ctx->id;
403	}
404
405	if (copy_to_user((void __user *)(unsigned long)cmd.response,
406			 &uevent->resp, sizeof uevent->resp)) {
407		ret = -EFAULT;
408		goto done;
409	}
410
411	list_del(&uevent->list);
412	uevent->ctx->events_reported++;
413	if (uevent->mc)
414		uevent->mc->events_reported++;
415	kfree(uevent);
416done:
417	mutex_unlock(&file->mut);
418	return ret;
419}
420
421static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
422{
423	switch (cmd->ps) {
424	case RDMA_PS_TCP:
425		*qp_type = IB_QPT_RC;
426		return 0;
427	case RDMA_PS_UDP:
428	case RDMA_PS_IPOIB:
429		*qp_type = IB_QPT_UD;
430		return 0;
431	case RDMA_PS_IB:
432		*qp_type = cmd->qp_type;
433		return 0;
434	default:
435		return -EINVAL;
436	}
437}
438
439static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
440			      int in_len, int out_len)
441{
442	struct rdma_ucm_create_id cmd;
443	struct rdma_ucm_create_id_resp resp;
444	struct ucma_context *ctx;
445	enum ib_qp_type qp_type;
446	int ret;
447
448	if (out_len < sizeof(resp))
449		return -ENOSPC;
450
451	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
452		return -EFAULT;
453
454	ret = ucma_get_qp_type(&cmd, &qp_type);
455	if (ret)
456		return ret;
457
458	mutex_lock(&file->mut);
459	ctx = ucma_alloc_ctx(file);
460	mutex_unlock(&file->mut);
461	if (!ctx)
462		return -ENOMEM;
463
464	ctx->uid = cmd.uid;
465	ctx->cm_id = rdma_create_id(TD_TO_VNET(curthread),
466				    ucma_event_handler, ctx, cmd.ps, qp_type);
467	if (IS_ERR(ctx->cm_id)) {
468		ret = PTR_ERR(ctx->cm_id);
469		goto err1;
470	}
471
472	resp.id = ctx->id;
473	if (copy_to_user((void __user *)(unsigned long)cmd.response,
474			 &resp, sizeof(resp))) {
475		ret = -EFAULT;
476		goto err2;
477	}
478	return 0;
479
480err2:
481	rdma_destroy_id(ctx->cm_id);
482err1:
483	mutex_lock(&mut);
484	idr_remove(&ctx_idr, ctx->id);
485	mutex_unlock(&mut);
486	kfree(ctx);
487	return ret;
488}
489
490static void ucma_cleanup_multicast(struct ucma_context *ctx)
491{
492	struct ucma_multicast *mc, *tmp;
493
494	mutex_lock(&mut);
495	list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
496		list_del(&mc->list);
497		idr_remove(&multicast_idr, mc->id);
498		kfree(mc);
499	}
500	mutex_unlock(&mut);
501}
502
503static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
504{
505	struct ucma_event *uevent, *tmp;
506
507	list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
508		if (uevent->mc != mc)
509			continue;
510
511		list_del(&uevent->list);
512		kfree(uevent);
513	}
514}
515
516/*
517 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
518 * this point, no new events will be reported from the hardware. However, we
519 * still need to cleanup the UCMA context for this ID. Specifically, there
520 * might be events that have not yet been consumed by the user space software.
521 * These might include pending connect requests which we have not completed
522 * processing.  We cannot call rdma_destroy_id while holding the lock of the
523 * context (file->mut), as it might cause a deadlock. We therefore extract all
524 * relevant events from the context pending events list while holding the
525 * mutex. After that we release them as needed.
526 */
527static int ucma_free_ctx(struct ucma_context *ctx)
528{
529	int events_reported;
530	struct ucma_event *uevent, *tmp;
531	LIST_HEAD(list);
532
533
534	ucma_cleanup_multicast(ctx);
535
536	/* Cleanup events not yet reported to the user. */
537	mutex_lock(&ctx->file->mut);
538	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
539		if (uevent->ctx == ctx)
540			list_move_tail(&uevent->list, &list);
541	}
542	list_del(&ctx->list);
543	mutex_unlock(&ctx->file->mut);
544
545	list_for_each_entry_safe(uevent, tmp, &list, list) {
546		list_del(&uevent->list);
547		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
548			rdma_destroy_id(uevent->cm_id);
549		kfree(uevent);
550	}
551
552	events_reported = ctx->events_reported;
553	kfree(ctx);
554	return events_reported;
555}
556
557static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
558			       int in_len, int out_len)
559{
560	struct rdma_ucm_destroy_id cmd;
561	struct rdma_ucm_destroy_id_resp resp;
562	struct ucma_context *ctx;
563	int ret = 0;
564
565	if (out_len < sizeof(resp))
566		return -ENOSPC;
567
568	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
569		return -EFAULT;
570
571	mutex_lock(&mut);
572	ctx = _ucma_find_context(cmd.id, file);
573	if (!IS_ERR(ctx))
574		idr_remove(&ctx_idr, ctx->id);
575	mutex_unlock(&mut);
576
577	if (IS_ERR(ctx))
578		return PTR_ERR(ctx);
579
580	mutex_lock(&ctx->file->mut);
581	ctx->destroying = 1;
582	mutex_unlock(&ctx->file->mut);
583
584	flush_workqueue(ctx->file->close_wq);
585	/* At this point it's guaranteed that there is no inflight
586	 * closing task */
587	mutex_lock(&mut);
588	if (!ctx->closing) {
589		mutex_unlock(&mut);
590		ucma_put_ctx(ctx);
591		wait_for_completion(&ctx->comp);
592		rdma_destroy_id(ctx->cm_id);
593	} else {
594		mutex_unlock(&mut);
595	}
596
597	resp.events_reported = ucma_free_ctx(ctx);
598	if (copy_to_user((void __user *)(unsigned long)cmd.response,
599			 &resp, sizeof(resp)))
600		ret = -EFAULT;
601
602	return ret;
603}
604
605static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
606			      int in_len, int out_len)
607{
608	struct rdma_ucm_bind_ip cmd;
609	struct ucma_context *ctx;
610	int ret;
611
612	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
613		return -EFAULT;
614
615	ctx = ucma_get_ctx(file, cmd.id);
616	if (IS_ERR(ctx))
617		return PTR_ERR(ctx);
618
619	ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
620	ucma_put_ctx(ctx);
621	return ret;
622}
623
624static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
625			 int in_len, int out_len)
626{
627	struct rdma_ucm_bind cmd;
628	struct sockaddr *addr;
629	struct ucma_context *ctx;
630	int ret;
631
632	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
633		return -EFAULT;
634
635	addr = (struct sockaddr *) &cmd.addr;
636	if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
637		return -EINVAL;
638
639	ctx = ucma_get_ctx(file, cmd.id);
640	if (IS_ERR(ctx))
641		return PTR_ERR(ctx);
642
643	ret = rdma_bind_addr(ctx->cm_id, addr);
644	ucma_put_ctx(ctx);
645	return ret;
646}
647
648static ssize_t ucma_resolve_ip(struct ucma_file *file,
649			       const char __user *inbuf,
650			       int in_len, int out_len)
651{
652	struct rdma_ucm_resolve_ip cmd;
653	struct ucma_context *ctx;
654	int ret;
655
656	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
657		return -EFAULT;
658
659	ctx = ucma_get_ctx(file, cmd.id);
660	if (IS_ERR(ctx))
661		return PTR_ERR(ctx);
662
663	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
664				(struct sockaddr *) &cmd.dst_addr,
665				cmd.timeout_ms);
666	ucma_put_ctx(ctx);
667	return ret;
668}
669
670static ssize_t ucma_resolve_addr(struct ucma_file *file,
671				 const char __user *inbuf,
672				 int in_len, int out_len)
673{
674	struct rdma_ucm_resolve_addr cmd;
675	struct sockaddr *src, *dst;
676	struct ucma_context *ctx;
677	int ret;
678
679	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
680		return -EFAULT;
681
682	src = (struct sockaddr *) &cmd.src_addr;
683	dst = (struct sockaddr *) &cmd.dst_addr;
684	if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
685	    !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
686		return -EINVAL;
687
688	ctx = ucma_get_ctx(file, cmd.id);
689	if (IS_ERR(ctx))
690		return PTR_ERR(ctx);
691
692	ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
693	ucma_put_ctx(ctx);
694	return ret;
695}
696
697static ssize_t ucma_resolve_route(struct ucma_file *file,
698				  const char __user *inbuf,
699				  int in_len, int out_len)
700{
701	struct rdma_ucm_resolve_route cmd;
702	struct ucma_context *ctx;
703	int ret;
704
705	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
706		return -EFAULT;
707
708	ctx = ucma_get_ctx(file, cmd.id);
709	if (IS_ERR(ctx))
710		return PTR_ERR(ctx);
711
712	ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
713	ucma_put_ctx(ctx);
714	return ret;
715}
716
717static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
718			       struct rdma_route *route)
719{
720	struct rdma_dev_addr *dev_addr;
721
722	resp->num_paths = route->num_paths;
723	switch (route->num_paths) {
724	case 0:
725		dev_addr = &route->addr.dev_addr;
726		rdma_addr_get_dgid(dev_addr,
727				   (union ib_gid *) &resp->ib_route[0].dgid);
728		rdma_addr_get_sgid(dev_addr,
729				   (union ib_gid *) &resp->ib_route[0].sgid);
730		resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
731		break;
732	case 2:
733		ib_copy_path_rec_to_user(&resp->ib_route[1],
734					 &route->path_rec[1]);
735		/* fall through */
736	case 1:
737		ib_copy_path_rec_to_user(&resp->ib_route[0],
738					 &route->path_rec[0]);
739		break;
740	default:
741		break;
742	}
743}
744
745static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
746				 struct rdma_route *route)
747{
748
749	resp->num_paths = route->num_paths;
750	switch (route->num_paths) {
751	case 0:
752		rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
753			    (union ib_gid *)&resp->ib_route[0].dgid);
754		rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
755			    (union ib_gid *)&resp->ib_route[0].sgid);
756		resp->ib_route[0].pkey = cpu_to_be16(0xffff);
757		break;
758	case 2:
759		ib_copy_path_rec_to_user(&resp->ib_route[1],
760					 &route->path_rec[1]);
761		/* fall through */
762	case 1:
763		ib_copy_path_rec_to_user(&resp->ib_route[0],
764					 &route->path_rec[0]);
765		break;
766	default:
767		break;
768	}
769}
770
771static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
772			       struct rdma_route *route)
773{
774	struct rdma_dev_addr *dev_addr;
775
776	dev_addr = &route->addr.dev_addr;
777	rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
778	rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
779}
780
781static ssize_t ucma_query_route(struct ucma_file *file,
782				const char __user *inbuf,
783				int in_len, int out_len)
784{
785	struct rdma_ucm_query cmd;
786	struct rdma_ucm_query_route_resp resp;
787	struct ucma_context *ctx;
788	struct sockaddr *addr;
789	int ret = 0;
790
791	if (out_len < sizeof(resp))
792		return -ENOSPC;
793
794	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
795		return -EFAULT;
796
797	ctx = ucma_get_ctx(file, cmd.id);
798	if (IS_ERR(ctx))
799		return PTR_ERR(ctx);
800
801	memset(&resp, 0, sizeof resp);
802	addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
803	memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
804				     sizeof(struct sockaddr_in) :
805				     sizeof(struct sockaddr_in6));
806	addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
807	memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
808				     sizeof(struct sockaddr_in) :
809				     sizeof(struct sockaddr_in6));
810	if (!ctx->cm_id->device)
811		goto out;
812
813	resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
814	resp.port_num = ctx->cm_id->port_num;
815
816	if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
817		ucma_copy_ib_route(&resp, &ctx->cm_id->route);
818	else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num))
819		ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
820	else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num))
821		ucma_copy_iw_route(&resp, &ctx->cm_id->route);
822
823out:
824	if (copy_to_user((void __user *)(unsigned long)cmd.response,
825			 &resp, sizeof(resp)))
826		ret = -EFAULT;
827
828	ucma_put_ctx(ctx);
829	return ret;
830}
831
832static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
833				   struct rdma_ucm_query_addr_resp *resp)
834{
835	if (!cm_id->device)
836		return;
837
838	resp->node_guid = (__force __u64) cm_id->device->node_guid;
839	resp->port_num = cm_id->port_num;
840	resp->pkey = (__force __u16) cpu_to_be16(
841		     ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
842}
843
844static ssize_t ucma_query_addr(struct ucma_context *ctx,
845			       void __user *response, int out_len)
846{
847	struct rdma_ucm_query_addr_resp resp;
848	struct sockaddr *addr;
849	int ret = 0;
850
851	if (out_len < sizeof(resp))
852		return -ENOSPC;
853
854	memset(&resp, 0, sizeof resp);
855
856	addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
857	resp.src_size = rdma_addr_size(addr);
858	memcpy(&resp.src_addr, addr, resp.src_size);
859
860	addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
861	resp.dst_size = rdma_addr_size(addr);
862	memcpy(&resp.dst_addr, addr, resp.dst_size);
863
864	ucma_query_device_addr(ctx->cm_id, &resp);
865
866	if (copy_to_user(response, &resp, sizeof(resp)))
867		ret = -EFAULT;
868
869	return ret;
870}
871
872static ssize_t ucma_query_path(struct ucma_context *ctx,
873			       void __user *response, int out_len)
874{
875	struct rdma_ucm_query_path_resp *resp;
876	int i, ret = 0;
877
878	if (out_len < sizeof(*resp))
879		return -ENOSPC;
880
881	resp = kzalloc(out_len, GFP_KERNEL);
882	if (!resp)
883		return -ENOMEM;
884
885	resp->num_paths = ctx->cm_id->route.num_paths;
886	for (i = 0, out_len -= sizeof(*resp);
887	     i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
888	     i++, out_len -= sizeof(struct ib_path_rec_data)) {
889
890		resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
891					   IB_PATH_BIDIRECTIONAL;
892		ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
893				&resp->path_data[i].path_rec);
894	}
895
896	if (copy_to_user(response, resp,
897			 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
898		ret = -EFAULT;
899
900	kfree(resp);
901	return ret;
902}
903
904static ssize_t ucma_query_gid(struct ucma_context *ctx,
905			      void __user *response, int out_len)
906{
907	struct rdma_ucm_query_addr_resp resp;
908	struct sockaddr_ib *addr;
909	int ret = 0;
910
911	if (out_len < sizeof(resp))
912		return -ENOSPC;
913
914	memset(&resp, 0, sizeof resp);
915
916	ucma_query_device_addr(ctx->cm_id, &resp);
917
918	addr = (struct sockaddr_ib *) &resp.src_addr;
919	resp.src_size = sizeof(*addr);
920	if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
921		memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
922	} else {
923		addr->sib_family = AF_IB;
924		addr->sib_pkey = (__force __be16) resp.pkey;
925		rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
926				   (union ib_gid *) &addr->sib_addr);
927		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
928						    &ctx->cm_id->route.addr.src_addr);
929	}
930
931	addr = (struct sockaddr_ib *) &resp.dst_addr;
932	resp.dst_size = sizeof(*addr);
933	if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
934		memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
935	} else {
936		addr->sib_family = AF_IB;
937		addr->sib_pkey = (__force __be16) resp.pkey;
938		rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
939				   (union ib_gid *) &addr->sib_addr);
940		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
941						    &ctx->cm_id->route.addr.dst_addr);
942	}
943
944	if (copy_to_user(response, &resp, sizeof(resp)))
945		ret = -EFAULT;
946
947	return ret;
948}
949
950static ssize_t ucma_query(struct ucma_file *file,
951			  const char __user *inbuf,
952			  int in_len, int out_len)
953{
954	struct rdma_ucm_query cmd;
955	struct ucma_context *ctx;
956	void __user *response;
957	int ret;
958
959	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
960		return -EFAULT;
961
962	response = (void __user *)(unsigned long) cmd.response;
963	ctx = ucma_get_ctx(file, cmd.id);
964	if (IS_ERR(ctx))
965		return PTR_ERR(ctx);
966
967	switch (cmd.option) {
968	case RDMA_USER_CM_QUERY_ADDR:
969		ret = ucma_query_addr(ctx, response, out_len);
970		break;
971	case RDMA_USER_CM_QUERY_PATH:
972		ret = ucma_query_path(ctx, response, out_len);
973		break;
974	case RDMA_USER_CM_QUERY_GID:
975		ret = ucma_query_gid(ctx, response, out_len);
976		break;
977	default:
978		ret = -ENOSYS;
979		break;
980	}
981
982	ucma_put_ctx(ctx);
983	return ret;
984}
985
986static void ucma_copy_conn_param(struct rdma_cm_id *id,
987				 struct rdma_conn_param *dst,
988				 struct rdma_ucm_conn_param *src)
989{
990	dst->private_data = src->private_data;
991	dst->private_data_len = src->private_data_len;
992	dst->responder_resources =src->responder_resources;
993	dst->initiator_depth = src->initiator_depth;
994	dst->flow_control = src->flow_control;
995	dst->retry_count = src->retry_count;
996	dst->rnr_retry_count = src->rnr_retry_count;
997	dst->srq = src->srq;
998	dst->qp_num = src->qp_num;
999	dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
1000}
1001
1002static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
1003			    int in_len, int out_len)
1004{
1005	struct rdma_ucm_connect cmd;
1006	struct rdma_conn_param conn_param;
1007	struct ucma_context *ctx;
1008	int ret;
1009
1010	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1011		return -EFAULT;
1012
1013	if (!cmd.conn_param.valid)
1014		return -EINVAL;
1015
1016	ctx = ucma_get_ctx(file, cmd.id);
1017	if (IS_ERR(ctx))
1018		return PTR_ERR(ctx);
1019
1020	ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1021	ret = rdma_connect(ctx->cm_id, &conn_param);
1022	ucma_put_ctx(ctx);
1023	return ret;
1024}
1025
1026static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
1027			   int in_len, int out_len)
1028{
1029	struct rdma_ucm_listen cmd;
1030	struct ucma_context *ctx;
1031	int ret;
1032
1033	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1034		return -EFAULT;
1035
1036	ctx = ucma_get_ctx(file, cmd.id);
1037	if (IS_ERR(ctx))
1038		return PTR_ERR(ctx);
1039
1040	ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
1041		       cmd.backlog : max_backlog;
1042	ret = rdma_listen(ctx->cm_id, ctx->backlog);
1043	ucma_put_ctx(ctx);
1044	return ret;
1045}
1046
1047static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
1048			   int in_len, int out_len)
1049{
1050	struct rdma_ucm_accept cmd;
1051	struct rdma_conn_param conn_param;
1052	struct ucma_context *ctx;
1053	int ret;
1054
1055	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1056		return -EFAULT;
1057
1058	ctx = ucma_get_ctx(file, cmd.id);
1059	if (IS_ERR(ctx))
1060		return PTR_ERR(ctx);
1061
1062	if (cmd.conn_param.valid) {
1063		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1064		mutex_lock(&file->mut);
1065		ret = rdma_accept(ctx->cm_id, &conn_param);
1066		if (!ret)
1067			ctx->uid = cmd.uid;
1068		mutex_unlock(&file->mut);
1069	} else
1070		ret = rdma_accept(ctx->cm_id, NULL);
1071
1072	ucma_put_ctx(ctx);
1073	return ret;
1074}
1075
1076static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
1077			   int in_len, int out_len)
1078{
1079	struct rdma_ucm_reject cmd;
1080	struct ucma_context *ctx;
1081	int ret;
1082
1083	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1084		return -EFAULT;
1085
1086	ctx = ucma_get_ctx(file, cmd.id);
1087	if (IS_ERR(ctx))
1088		return PTR_ERR(ctx);
1089
1090	ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
1091	ucma_put_ctx(ctx);
1092	return ret;
1093}
1094
1095static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
1096			       int in_len, int out_len)
1097{
1098	struct rdma_ucm_disconnect cmd;
1099	struct ucma_context *ctx;
1100	int ret;
1101
1102	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1103		return -EFAULT;
1104
1105	ctx = ucma_get_ctx(file, cmd.id);
1106	if (IS_ERR(ctx))
1107		return PTR_ERR(ctx);
1108
1109	ret = rdma_disconnect(ctx->cm_id);
1110	ucma_put_ctx(ctx);
1111	return ret;
1112}
1113
1114static ssize_t ucma_init_qp_attr(struct ucma_file *file,
1115				 const char __user *inbuf,
1116				 int in_len, int out_len)
1117{
1118	struct rdma_ucm_init_qp_attr cmd;
1119	struct ib_uverbs_qp_attr resp;
1120	struct ucma_context *ctx;
1121	struct ib_qp_attr qp_attr;
1122	int ret;
1123
1124	if (out_len < sizeof(resp))
1125		return -ENOSPC;
1126
1127	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1128		return -EFAULT;
1129
1130	ctx = ucma_get_ctx(file, cmd.id);
1131	if (IS_ERR(ctx))
1132		return PTR_ERR(ctx);
1133
1134	resp.qp_attr_mask = 0;
1135	memset(&qp_attr, 0, sizeof qp_attr);
1136	qp_attr.qp_state = cmd.qp_state;
1137	ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
1138	if (ret)
1139		goto out;
1140
1141	ib_copy_qp_attr_to_user(&resp, &qp_attr);
1142	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1143			 &resp, sizeof(resp)))
1144		ret = -EFAULT;
1145
1146out:
1147	ucma_put_ctx(ctx);
1148	return ret;
1149}
1150
1151static int ucma_set_option_id(struct ucma_context *ctx, int optname,
1152			      void *optval, size_t optlen)
1153{
1154	int ret = 0;
1155
1156	switch (optname) {
1157	case RDMA_OPTION_ID_TOS:
1158		if (optlen != sizeof(u8)) {
1159			ret = -EINVAL;
1160			break;
1161		}
1162		rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
1163		break;
1164	case RDMA_OPTION_ID_REUSEADDR:
1165		if (optlen != sizeof(int)) {
1166			ret = -EINVAL;
1167			break;
1168		}
1169		ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
1170		break;
1171	case RDMA_OPTION_ID_AFONLY:
1172		if (optlen != sizeof(int)) {
1173			ret = -EINVAL;
1174			break;
1175		}
1176		ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
1177		break;
1178	default:
1179		ret = -ENOSYS;
1180	}
1181
1182	return ret;
1183}
1184
1185static int ucma_set_ib_path(struct ucma_context *ctx,
1186			    struct ib_path_rec_data *path_data, size_t optlen)
1187{
1188	struct ib_sa_path_rec sa_path;
1189	struct rdma_cm_event event;
1190	int ret;
1191
1192	if (optlen % sizeof(*path_data))
1193		return -EINVAL;
1194
1195	for (; optlen; optlen -= sizeof(*path_data), path_data++) {
1196		if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
1197					 IB_PATH_BIDIRECTIONAL))
1198			break;
1199	}
1200
1201	if (!optlen)
1202		return -EINVAL;
1203
1204	memset(&sa_path, 0, sizeof(sa_path));
1205
1206	ib_sa_unpack_path(path_data->path_rec, &sa_path);
1207	ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
1208	if (ret)
1209		return ret;
1210
1211	memset(&event, 0, sizeof event);
1212	event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1213	return ucma_event_handler(ctx->cm_id, &event);
1214}
1215
1216static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
1217			      void *optval, size_t optlen)
1218{
1219	int ret;
1220
1221	switch (optname) {
1222	case RDMA_OPTION_IB_PATH:
1223		ret = ucma_set_ib_path(ctx, optval, optlen);
1224		break;
1225	default:
1226		ret = -ENOSYS;
1227	}
1228
1229	return ret;
1230}
1231
1232static int ucma_set_option_level(struct ucma_context *ctx, int level,
1233				 int optname, void *optval, size_t optlen)
1234{
1235	int ret;
1236
1237	switch (level) {
1238	case RDMA_OPTION_ID:
1239		ret = ucma_set_option_id(ctx, optname, optval, optlen);
1240		break;
1241	case RDMA_OPTION_IB:
1242		ret = ucma_set_option_ib(ctx, optname, optval, optlen);
1243		break;
1244	default:
1245		ret = -ENOSYS;
1246	}
1247
1248	return ret;
1249}
1250
1251static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
1252			       int in_len, int out_len)
1253{
1254	struct rdma_ucm_set_option cmd;
1255	struct ucma_context *ctx;
1256	void *optval;
1257	int ret;
1258
1259	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1260		return -EFAULT;
1261
1262	ctx = ucma_get_ctx(file, cmd.id);
1263	if (IS_ERR(ctx))
1264		return PTR_ERR(ctx);
1265
1266	optval = memdup_user((void __user *) (unsigned long) cmd.optval,
1267			     cmd.optlen);
1268	if (IS_ERR(optval)) {
1269		ret = PTR_ERR(optval);
1270		goto out;
1271	}
1272
1273	ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1274				    cmd.optlen);
1275	kfree(optval);
1276
1277out:
1278	ucma_put_ctx(ctx);
1279	return ret;
1280}
1281
1282static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1283			   int in_len, int out_len)
1284{
1285	struct rdma_ucm_notify cmd;
1286	struct ucma_context *ctx;
1287	int ret;
1288
1289	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1290		return -EFAULT;
1291
1292	ctx = ucma_get_ctx(file, cmd.id);
1293	if (IS_ERR(ctx))
1294		return PTR_ERR(ctx);
1295
1296	ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
1297	ucma_put_ctx(ctx);
1298	return ret;
1299}
1300
1301static ssize_t ucma_process_join(struct ucma_file *file,
1302				 struct rdma_ucm_join_mcast *cmd,  int out_len)
1303{
1304	struct rdma_ucm_create_id_resp resp;
1305	struct ucma_context *ctx;
1306	struct ucma_multicast *mc;
1307	struct sockaddr *addr;
1308	int ret;
1309	u8 join_state;
1310
1311	if (out_len < sizeof(resp))
1312		return -ENOSPC;
1313
1314	addr = (struct sockaddr *) &cmd->addr;
1315	if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
1316		return -EINVAL;
1317
1318	if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
1319		join_state = BIT(FULLMEMBER_JOIN);
1320	else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
1321		join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
1322	else
1323		return -EINVAL;
1324
1325	ctx = ucma_get_ctx(file, cmd->id);
1326	if (IS_ERR(ctx))
1327		return PTR_ERR(ctx);
1328
1329	mutex_lock(&file->mut);
1330	mc = ucma_alloc_multicast(ctx);
1331	if (!mc) {
1332		ret = -ENOMEM;
1333		goto err1;
1334	}
1335	mc->join_state = join_state;
1336	mc->uid = cmd->uid;
1337	memcpy(&mc->addr, addr, cmd->addr_size);
1338	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
1339				  join_state, mc);
1340	if (ret)
1341		goto err2;
1342
1343	resp.id = mc->id;
1344	if (copy_to_user((void __user *)(unsigned long) cmd->response,
1345			 &resp, sizeof(resp))) {
1346		ret = -EFAULT;
1347		goto err3;
1348	}
1349
1350	mutex_unlock(&file->mut);
1351	ucma_put_ctx(ctx);
1352	return 0;
1353
1354err3:
1355	rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
1356	ucma_cleanup_mc_events(mc);
1357err2:
1358	mutex_lock(&mut);
1359	idr_remove(&multicast_idr, mc->id);
1360	mutex_unlock(&mut);
1361	list_del(&mc->list);
1362	kfree(mc);
1363err1:
1364	mutex_unlock(&file->mut);
1365	ucma_put_ctx(ctx);
1366	return ret;
1367}
1368
1369static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1370				      const char __user *inbuf,
1371				      int in_len, int out_len)
1372{
1373	struct rdma_ucm_join_ip_mcast cmd;
1374	struct rdma_ucm_join_mcast join_cmd;
1375
1376	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1377		return -EFAULT;
1378
1379	join_cmd.response = cmd.response;
1380	join_cmd.uid = cmd.uid;
1381	join_cmd.id = cmd.id;
1382	join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
1383	join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
1384	memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1385
1386	return ucma_process_join(file, &join_cmd, out_len);
1387}
1388
1389static ssize_t ucma_join_multicast(struct ucma_file *file,
1390				   const char __user *inbuf,
1391				   int in_len, int out_len)
1392{
1393	struct rdma_ucm_join_mcast cmd;
1394
1395	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1396		return -EFAULT;
1397
1398	return ucma_process_join(file, &cmd, out_len);
1399}
1400
1401static ssize_t ucma_leave_multicast(struct ucma_file *file,
1402				    const char __user *inbuf,
1403				    int in_len, int out_len)
1404{
1405	struct rdma_ucm_destroy_id cmd;
1406	struct rdma_ucm_destroy_id_resp resp;
1407	struct ucma_multicast *mc;
1408	int ret = 0;
1409
1410	if (out_len < sizeof(resp))
1411		return -ENOSPC;
1412
1413	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1414		return -EFAULT;
1415
1416	mutex_lock(&mut);
1417	mc = idr_find(&multicast_idr, cmd.id);
1418	if (!mc)
1419		mc = ERR_PTR(-ENOENT);
1420	else if (mc->ctx->file != file)
1421		mc = ERR_PTR(-EINVAL);
1422	else if (!atomic_inc_not_zero(&mc->ctx->ref))
1423		mc = ERR_PTR(-ENXIO);
1424	else
1425		idr_remove(&multicast_idr, mc->id);
1426	mutex_unlock(&mut);
1427
1428	if (IS_ERR(mc)) {
1429		ret = PTR_ERR(mc);
1430		goto out;
1431	}
1432
1433	rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1434	mutex_lock(&mc->ctx->file->mut);
1435	ucma_cleanup_mc_events(mc);
1436	list_del(&mc->list);
1437	mutex_unlock(&mc->ctx->file->mut);
1438
1439	ucma_put_ctx(mc->ctx);
1440	resp.events_reported = mc->events_reported;
1441	kfree(mc);
1442
1443	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1444			 &resp, sizeof(resp)))
1445		ret = -EFAULT;
1446out:
1447	return ret;
1448}
1449
1450static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1451{
1452	/* Acquire mutex's based on pointer comparison to prevent deadlock. */
1453	if (file1 < file2) {
1454		mutex_lock(&file1->mut);
1455		mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
1456	} else {
1457		mutex_lock(&file2->mut);
1458		mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
1459	}
1460}
1461
1462static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1463{
1464	if (file1 < file2) {
1465		mutex_unlock(&file2->mut);
1466		mutex_unlock(&file1->mut);
1467	} else {
1468		mutex_unlock(&file1->mut);
1469		mutex_unlock(&file2->mut);
1470	}
1471}
1472
1473static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1474{
1475	struct ucma_event *uevent, *tmp;
1476
1477	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1478		if (uevent->ctx == ctx)
1479			list_move_tail(&uevent->list, &file->event_list);
1480}
1481
1482static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1483			       const char __user *inbuf,
1484			       int in_len, int out_len)
1485{
1486	struct rdma_ucm_migrate_id cmd;
1487	struct rdma_ucm_migrate_resp resp;
1488	struct ucma_context *ctx;
1489	struct fd f;
1490	struct ucma_file *cur_file;
1491	int ret = 0;
1492
1493	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1494		return -EFAULT;
1495
1496	/* Get current fd to protect against it being closed */
1497	f = fdget(cmd.fd);
1498	if (!f.file)
1499		return -ENOENT;
1500
1501	/* Validate current fd and prevent destruction of id. */
1502	ctx = ucma_get_ctx(f.file->private_data, cmd.id);
1503	if (IS_ERR(ctx)) {
1504		ret = PTR_ERR(ctx);
1505		goto file_put;
1506	}
1507
1508	cur_file = ctx->file;
1509	if (cur_file == new_file) {
1510		resp.events_reported = ctx->events_reported;
1511		goto response;
1512	}
1513
1514	/*
1515	 * Migrate events between fd's, maintaining order, and avoiding new
1516	 * events being added before existing events.
1517	 */
1518	ucma_lock_files(cur_file, new_file);
1519	mutex_lock(&mut);
1520
1521	list_move_tail(&ctx->list, &new_file->ctx_list);
1522	ucma_move_events(ctx, new_file);
1523	ctx->file = new_file;
1524	resp.events_reported = ctx->events_reported;
1525
1526	mutex_unlock(&mut);
1527	ucma_unlock_files(cur_file, new_file);
1528
1529response:
1530	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1531			 &resp, sizeof(resp)))
1532		ret = -EFAULT;
1533
1534	ucma_put_ctx(ctx);
1535file_put:
1536	fdput(f);
1537	return ret;
1538}
1539
1540static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1541				   const char __user *inbuf,
1542				   int in_len, int out_len) = {
1543	[RDMA_USER_CM_CMD_CREATE_ID] 	 = ucma_create_id,
1544	[RDMA_USER_CM_CMD_DESTROY_ID]	 = ucma_destroy_id,
1545	[RDMA_USER_CM_CMD_BIND_IP]	 = ucma_bind_ip,
1546	[RDMA_USER_CM_CMD_RESOLVE_IP]	 = ucma_resolve_ip,
1547	[RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1548	[RDMA_USER_CM_CMD_QUERY_ROUTE]	 = ucma_query_route,
1549	[RDMA_USER_CM_CMD_CONNECT]	 = ucma_connect,
1550	[RDMA_USER_CM_CMD_LISTEN]	 = ucma_listen,
1551	[RDMA_USER_CM_CMD_ACCEPT]	 = ucma_accept,
1552	[RDMA_USER_CM_CMD_REJECT]	 = ucma_reject,
1553	[RDMA_USER_CM_CMD_DISCONNECT]	 = ucma_disconnect,
1554	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	 = ucma_init_qp_attr,
1555	[RDMA_USER_CM_CMD_GET_EVENT]	 = ucma_get_event,
1556	[RDMA_USER_CM_CMD_GET_OPTION]	 = NULL,
1557	[RDMA_USER_CM_CMD_SET_OPTION]	 = ucma_set_option,
1558	[RDMA_USER_CM_CMD_NOTIFY]	 = ucma_notify,
1559	[RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1560	[RDMA_USER_CM_CMD_LEAVE_MCAST]	 = ucma_leave_multicast,
1561	[RDMA_USER_CM_CMD_MIGRATE_ID]	 = ucma_migrate_id,
1562	[RDMA_USER_CM_CMD_QUERY]	 = ucma_query,
1563	[RDMA_USER_CM_CMD_BIND]		 = ucma_bind,
1564	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	 = ucma_resolve_addr,
1565	[RDMA_USER_CM_CMD_JOIN_MCAST]	 = ucma_join_multicast
1566};
1567
1568static ssize_t ucma_write(struct file *filp, const char __user *buf,
1569			  size_t len, loff_t *pos)
1570{
1571	struct ucma_file *file = filp->private_data;
1572	struct rdma_ucm_cmd_hdr hdr;
1573	ssize_t ret;
1574
1575	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
1576		return -EACCES;
1577
1578	if (len < sizeof(hdr))
1579		return -EINVAL;
1580
1581	if (copy_from_user(&hdr, buf, sizeof(hdr)))
1582		return -EFAULT;
1583
1584	if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1585		return -EINVAL;
1586
1587	if (hdr.in + sizeof(hdr) > len)
1588		return -EINVAL;
1589
1590	if (!ucma_cmd_table[hdr.cmd])
1591		return -ENOSYS;
1592
1593	ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1594	if (!ret)
1595		ret = len;
1596
1597	return ret;
1598}
1599
1600static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait)
1601{
1602	struct ucma_file *file = filp->private_data;
1603	unsigned int mask = 0;
1604
1605	poll_wait(filp, &file->poll_wait, wait);
1606
1607	if (!list_empty(&file->event_list))
1608		mask = POLLIN | POLLRDNORM;
1609
1610	return mask;
1611}
1612
1613/*
1614 * ucma_open() does not need the BKL:
1615 *
1616 *  - no global state is referred to;
1617 *  - there is no ioctl method to race against;
1618 *  - no further module initialization is required for open to work
1619 *    after the device is registered.
1620 */
1621static int ucma_open(struct inode *inode, struct file *filp)
1622{
1623	struct ucma_file *file;
1624
1625	file = kmalloc(sizeof *file, GFP_KERNEL);
1626	if (!file)
1627		return -ENOMEM;
1628
1629	file->close_wq = alloc_ordered_workqueue("ucma_close_id",
1630						 WQ_MEM_RECLAIM);
1631	if (!file->close_wq) {
1632		kfree(file);
1633		return -ENOMEM;
1634	}
1635
1636	INIT_LIST_HEAD(&file->event_list);
1637	INIT_LIST_HEAD(&file->ctx_list);
1638	init_waitqueue_head(&file->poll_wait);
1639	mutex_init(&file->mut);
1640
1641	filp->private_data = file;
1642	file->filp = filp;
1643
1644	return nonseekable_open(inode, filp);
1645}
1646
1647static int ucma_close(struct inode *inode, struct file *filp)
1648{
1649	struct ucma_file *file = filp->private_data;
1650	struct ucma_context *ctx, *tmp;
1651
1652	mutex_lock(&file->mut);
1653	list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1654		ctx->destroying = 1;
1655		mutex_unlock(&file->mut);
1656
1657		mutex_lock(&mut);
1658		idr_remove(&ctx_idr, ctx->id);
1659		mutex_unlock(&mut);
1660
1661		flush_workqueue(file->close_wq);
1662		/* At that step once ctx was marked as destroying and workqueue
1663		 * was flushed we are safe from any inflights handlers that
1664		 * might put other closing task.
1665		 */
1666		mutex_lock(&mut);
1667		if (!ctx->closing) {
1668			mutex_unlock(&mut);
1669			/* rdma_destroy_id ensures that no event handlers are
1670			 * inflight for that id before releasing it.
1671			 */
1672			rdma_destroy_id(ctx->cm_id);
1673		} else {
1674			mutex_unlock(&mut);
1675		}
1676
1677		ucma_free_ctx(ctx);
1678		mutex_lock(&file->mut);
1679	}
1680	mutex_unlock(&file->mut);
1681	destroy_workqueue(file->close_wq);
1682	kfree(file);
1683	return 0;
1684}
1685
1686static long
1687ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1688{
1689
1690	switch (cmd) {
1691	case FIONBIO:
1692	case FIOASYNC:
1693		return (0);
1694	default:
1695		return (-ENOTTY);
1696	}
1697}
1698
1699static const struct file_operations ucma_fops = {
1700	.owner 	 = THIS_MODULE,
1701	.open 	 = ucma_open,
1702	.release = ucma_close,
1703	.write	 = ucma_write,
1704	.unlocked_ioctl = ucma_ioctl,
1705	.poll    = ucma_poll,
1706	.llseek	 = no_llseek,
1707};
1708
1709static struct miscdevice ucma_misc = {
1710	.minor		= MISC_DYNAMIC_MINOR,
1711	.name		= "rdma_cm",
1712	.nodename	= "infiniband/rdma_cm",
1713	.mode		= 0666,
1714	.fops		= &ucma_fops,
1715};
1716
1717static ssize_t show_abi_version(struct device *dev,
1718				struct device_attribute *attr,
1719				char *buf)
1720{
1721	return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1722}
1723static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1724
1725static int __init ucma_init(void)
1726{
1727	int ret;
1728
1729	ret = misc_register(&ucma_misc);
1730	if (ret)
1731		return ret;
1732
1733	ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1734	if (ret) {
1735		pr_err("rdma_ucm: couldn't create abi_version attr\n");
1736		goto err1;
1737	}
1738
1739	return 0;
1740err1:
1741	misc_deregister(&ucma_misc);
1742	return ret;
1743}
1744
1745static void __exit ucma_cleanup(void)
1746{
1747	device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1748	misc_deregister(&ucma_misc);
1749	idr_destroy(&ctx_idr);
1750	idr_destroy(&multicast_idr);
1751}
1752
1753module_init(ucma_init);
1754module_exit(ucma_cleanup);
1755