1/*
2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: cm.c 3453 2005-09-15 21:43:21Z sean.hefty $
33 */
34
35#if HAVE_CONFIG_H
36#  include <config.h>
37#endif /* HAVE_CONFIG_H */
38
39#include <stdlib.h>
40#include <string.h>
41#include <glob.h>
42#include <stdio.h>
43#include <fcntl.h>
44#include <errno.h>
45#include <stdint.h>
46#include <poll.h>
47#include <unistd.h>
48#include <pthread.h>
49#include <infiniband/endian.h>
50#include <infiniband/byteswap.h>
51#include <stddef.h>
52
53#include <infiniband/driver.h>
54#include <infiniband/marshall.h>
55#include <rdma/rdma_cma.h>
56#include <rdma/rdma_cma_abi.h>
57
58#ifdef INCLUDE_VALGRIND
59#   include <valgrind/memcheck.h>
60#   ifndef VALGRIND_MAKE_MEM_DEFINED
61#       warning "Valgrind requested, but VALGRIND_MAKE_MEM_DEFINED undefined"
62#   endif
63#endif
64
65#ifndef VALGRIND_MAKE_MEM_DEFINED
66#   define VALGRIND_MAKE_MEM_DEFINED(addr,len)
67#endif
68
69#define PFX "librdmacm: "
70
71#if __BYTE_ORDER == __LITTLE_ENDIAN
72static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
73static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
74#else
75static inline uint64_t htonll(uint64_t x) { return x; }
76static inline uint64_t ntohll(uint64_t x) { return x; }
77#endif
78
79static inline int ERR(int err)
80{
81	errno = err;
82	return -1;
83}
84
85#define CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, type, size) \
86do {                                        \
87	struct ucma_abi_cmd_hdr *hdr;         \
88                                            \
89	size = sizeof(*hdr) + sizeof(*cmd); \
90	msg = alloca(size);                 \
91	if (!msg)                           \
92		return ERR(ENOMEM);         \
93	hdr = msg;                          \
94	cmd = msg + sizeof(*hdr);           \
95	hdr->cmd = type;                    \
96	hdr->in  = sizeof(*cmd);            \
97	hdr->out = sizeof(*resp);           \
98	memset(cmd, 0, sizeof(*cmd));       \
99	resp = alloca(sizeof(*resp));       \
100	if (!resp)                          \
101		return ERR(ENOMEM);         \
102	cmd->response = (uintptr_t)resp;\
103} while (0)
104
105#define CMA_CREATE_MSG_CMD(msg, cmd, type, size) \
106do {                                        \
107	struct ucma_abi_cmd_hdr *hdr;       \
108                                            \
109	size = sizeof(*hdr) + sizeof(*cmd); \
110	msg = alloca(size);                 \
111	if (!msg)                           \
112		return ERR(ENOMEM);         \
113	hdr = msg;                          \
114	cmd = msg + sizeof(*hdr);           \
115	hdr->cmd = type;                    \
116	hdr->in  = sizeof(*cmd);            \
117	hdr->out = 0;                       \
118	memset(cmd, 0, sizeof(*cmd));       \
119} while (0)
120
121struct cma_device {
122	struct ibv_context *verbs;
123	uint64_t	    guid;
124	int		    port_cnt;
125	uint8_t		    max_initiator_depth;
126	uint8_t		    max_responder_resources;
127};
128
129struct cma_id_private {
130	struct rdma_cm_id id;
131	struct cma_device *cma_dev;
132	int		  events_completed;
133	int		  connect_error;
134	pthread_cond_t	  cond;
135	pthread_mutex_t	  mut;
136	uint32_t	  handle;
137	struct cma_multicast *mc_list;
138};
139
140struct cma_multicast {
141	struct cma_multicast  *next;
142	struct cma_id_private *id_priv;
143	void		*context;
144	int		events_completed;
145	pthread_cond_t	cond;
146	uint32_t	handle;
147	union ibv_gid	mgid;
148	uint16_t	mlid;
149	struct sockaddr_storage addr;
150};
151
152struct cma_event {
153	struct rdma_cm_event	event;
154	uint8_t			private_data[RDMA_MAX_PRIVATE_DATA];
155	struct cma_id_private	*id_priv;
156	struct cma_multicast	*mc;
157};
158
159static struct cma_device *cma_dev_array;
160static int cma_dev_cnt;
161static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
162static int abi_ver = RDMA_USER_CM_MAX_ABI_VERSION;
163
164#define container_of(ptr, type, field) \
165	((type *) ((void *)ptr - offsetof(type, field)))
166
167static void ucma_cleanup(void)
168{
169	if (cma_dev_cnt) {
170		while (cma_dev_cnt)
171			ibv_close_device(cma_dev_array[--cma_dev_cnt].verbs);
172
173		free(cma_dev_array);
174		cma_dev_cnt = 0;
175	}
176}
177
178static int check_abi_version(void)
179{
180	char value[8];
181
182	if ((ibv_read_sysfs_file(ibv_get_sysfs_path(),
183				 "class/misc/rdma_cm/abi_version",
184				 value, sizeof value) < 0) &&
185	    (ibv_read_sysfs_file(ibv_get_sysfs_path(),
186				 "class/infiniband_ucma/abi_version",
187				 value, sizeof value) < 0)) {
188		/*
189		 * Older version of Linux do not have class/misc.  To support
190		 * backports, assume the most recent version of the ABI.  If
191		 * we're wrong, we'll simply fail later when calling the ABI.
192		 */
193		fprintf(stderr, "librdmacm: couldn't read ABI version.\n");
194		fprintf(stderr, "librdmacm: assuming: %d\n", abi_ver);
195		return 0;
196	}
197
198	abi_ver = strtol(value, NULL, 10);
199	if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION ||
200	    abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) {
201		fprintf(stderr, "librdmacm: kernel ABI version %d "
202				"doesn't match library version %d.\n",
203				abi_ver, RDMA_USER_CM_MAX_ABI_VERSION);
204		return -1;
205	}
206	return 0;
207}
208
209static int ucma_init(void)
210{
211	struct ibv_device **dev_list = NULL;
212	struct cma_device *cma_dev;
213	struct ibv_device_attr attr;
214	int i, ret, dev_cnt;
215
216	pthread_mutex_lock(&mut);
217	if (cma_dev_cnt) {
218		pthread_mutex_unlock(&mut);
219		return 0;
220	}
221
222	ret = check_abi_version();
223	if (ret)
224		goto err1;
225
226	dev_list = ibv_get_device_list(&dev_cnt);
227	if (!dev_list) {
228		printf("CMA: unable to get RDMA device list\n");
229		ret = ERR(ENODEV);
230		goto err1;
231	}
232
233	cma_dev_array = malloc(sizeof *cma_dev * dev_cnt);
234	if (!cma_dev_array) {
235		ret = ERR(ENOMEM);
236		goto err2;
237	}
238
239	for (i = 0; dev_list[i];) {
240		cma_dev = &cma_dev_array[i];
241
242		cma_dev->guid = ibv_get_device_guid(dev_list[i]);
243		cma_dev->verbs = ibv_open_device(dev_list[i]);
244		if (!cma_dev->verbs) {
245			printf("CMA: unable to open RDMA device\n");
246			ret = ERR(ENODEV);
247			goto err3;
248		}
249
250		i++;
251		ret = ibv_query_device(cma_dev->verbs, &attr);
252		if (ret) {
253			printf("CMA: unable to query RDMA device\n");
254			goto err3;
255		}
256
257		cma_dev->port_cnt = attr.phys_port_cnt;
258		cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
259		cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
260	}
261
262	cma_dev_cnt = dev_cnt;
263	pthread_mutex_unlock(&mut);
264	ibv_free_device_list(dev_list);
265	return 0;
266
267err3:
268	while (i--)
269		ibv_close_device(cma_dev_array[i].verbs);
270	free(cma_dev_array);
271err2:
272	ibv_free_device_list(dev_list);
273err1:
274	pthread_mutex_unlock(&mut);
275	return ret;
276}
277
278struct ibv_context **rdma_get_devices(int *num_devices)
279{
280	struct ibv_context **devs = NULL;
281	int i;
282
283	if (!cma_dev_cnt && ucma_init())
284		goto out;
285
286	devs = malloc(sizeof *devs * (cma_dev_cnt + 1));
287	if (!devs)
288		goto out;
289
290	for (i = 0; i < cma_dev_cnt; i++)
291		devs[i] = cma_dev_array[i].verbs;
292	devs[i] = NULL;
293out:
294	if (num_devices)
295		*num_devices = devs ? cma_dev_cnt : 0;
296	return devs;
297}
298
299void rdma_free_devices(struct ibv_context **list)
300{
301	free(list);
302}
303
304static void __attribute__((destructor)) rdma_cma_fini(void)
305{
306	ucma_cleanup();
307}
308
309struct rdma_event_channel *rdma_create_event_channel(void)
310{
311	struct rdma_event_channel *channel;
312
313	if (!cma_dev_cnt && ucma_init())
314		return NULL;
315
316	channel = malloc(sizeof *channel);
317	if (!channel)
318		return NULL;
319
320	channel->fd = open("/dev/rdma_cm", O_RDWR);
321	if (channel->fd < 0) {
322		printf("CMA: unable to open /dev/rdma_cm\n");
323		goto err;
324	}
325	return channel;
326err:
327	free(channel);
328	return NULL;
329}
330
331void rdma_destroy_event_channel(struct rdma_event_channel *channel)
332{
333	close(channel->fd);
334	free(channel);
335}
336
337static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)
338{
339	struct cma_device *cma_dev;
340	int i;
341
342	for (i = 0; i < cma_dev_cnt; i++) {
343		cma_dev = &cma_dev_array[i];
344		if (cma_dev->guid == guid) {
345			id_priv->cma_dev = cma_dev;
346			id_priv->id.verbs = cma_dev->verbs;
347			return 0;
348		}
349	}
350
351	return ERR(ENODEV);
352}
353
354static void ucma_free_id(struct cma_id_private *id_priv)
355{
356	pthread_cond_destroy(&id_priv->cond);
357	pthread_mutex_destroy(&id_priv->mut);
358	if (id_priv->id.route.path_rec)
359		free(id_priv->id.route.path_rec);
360	free(id_priv);
361}
362
363static struct cma_id_private *ucma_alloc_id(struct rdma_event_channel *channel,
364					    void *context,
365					    enum rdma_port_space ps)
366{
367	struct cma_id_private *id_priv;
368
369	id_priv = malloc(sizeof *id_priv);
370	if (!id_priv)
371		return NULL;
372
373	memset(id_priv, 0, sizeof *id_priv);
374	id_priv->id.context = context;
375	id_priv->id.ps = ps;
376	id_priv->id.channel = channel;
377	pthread_mutex_init(&id_priv->mut, NULL);
378	if (pthread_cond_init(&id_priv->cond, NULL))
379		goto err;
380
381	return id_priv;
382
383err:	ucma_free_id(id_priv);
384	return NULL;
385}
386
387int rdma_create_id(struct rdma_event_channel *channel,
388		   struct rdma_cm_id **id, void *context,
389		   enum rdma_port_space ps)
390{
391	struct ucma_abi_create_id_resp *resp;
392	struct ucma_abi_create_id *cmd;
393	struct cma_id_private *id_priv;
394	void *msg;
395	int ret, size;
396
397	ret = cma_dev_cnt ? 0 : ucma_init();
398	if (ret)
399		return ret;
400
401	id_priv = ucma_alloc_id(channel, context, ps);
402	if (!id_priv)
403		return ERR(ENOMEM);
404
405	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_CREATE_ID, size);
406	cmd->uid = (uintptr_t) id_priv;
407	cmd->ps = ps;
408
409	ret = write(channel->fd, msg, size);
410	if (ret != size)
411		goto err;
412
413	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
414
415	id_priv->handle = resp->id;
416	*id = &id_priv->id;
417	return 0;
418
419err:	ucma_free_id(id_priv);
420	return ret;
421}
422
423static int ucma_destroy_kern_id(int fd, uint32_t handle)
424{
425	struct ucma_abi_destroy_id_resp *resp;
426	struct ucma_abi_destroy_id *cmd;
427	void *msg;
428	int ret, size;
429
430	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_DESTROY_ID, size);
431	cmd->id = handle;
432
433	ret = write(fd, msg, size);
434	if (ret != size)
435		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
436
437	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
438
439	return resp->events_reported;
440}
441
442int rdma_destroy_id(struct rdma_cm_id *id)
443{
444	struct cma_id_private *id_priv;
445	int ret;
446
447	id_priv = container_of(id, struct cma_id_private, id);
448	ret = ucma_destroy_kern_id(id->channel->fd, id_priv->handle);
449	if (ret < 0)
450		return ret;
451
452	pthread_mutex_lock(&id_priv->mut);
453	while (id_priv->events_completed < ret)
454		pthread_cond_wait(&id_priv->cond, &id_priv->mut);
455	pthread_mutex_unlock(&id_priv->mut);
456
457	ucma_free_id(id_priv);
458	return 0;
459}
460
461static int ucma_addrlen(struct sockaddr *addr)
462{
463	if (!addr)
464		return 0;
465
466	switch (addr->sa_family) {
467	case PF_INET:
468		return sizeof(struct sockaddr_in);
469	case PF_INET6:
470		return sizeof(struct sockaddr_in6);
471	default:
472		return 0;
473	}
474}
475
476static int ucma_query_route(struct rdma_cm_id *id)
477{
478	struct ucma_abi_query_route_resp *resp;
479	struct ucma_abi_query_route *cmd;
480	struct cma_id_private *id_priv;
481	void *msg;
482	int ret, size, i;
483
484	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_QUERY_ROUTE, size);
485	id_priv = container_of(id, struct cma_id_private, id);
486	cmd->id = id_priv->handle;
487
488	ret = write(id->channel->fd, msg, size);
489	if (ret != size)
490		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
491
492	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
493
494	if (resp->num_paths) {
495		id->route.path_rec = malloc(sizeof *id->route.path_rec *
496					    resp->num_paths);
497		if (!id->route.path_rec)
498			return ERR(ENOMEM);
499
500		id->route.num_paths = resp->num_paths;
501		for (i = 0; i < resp->num_paths; i++)
502			ibv_copy_path_rec_from_kern(&id->route.path_rec[i],
503						    &resp->ib_route[i]);
504	}
505
506	memcpy(id->route.addr.addr.ibaddr.sgid.raw, resp->ib_route[0].sgid,
507	       sizeof id->route.addr.addr.ibaddr.sgid);
508	memcpy(id->route.addr.addr.ibaddr.dgid.raw, resp->ib_route[0].dgid,
509	       sizeof id->route.addr.addr.ibaddr.dgid);
510	id->route.addr.addr.ibaddr.pkey = resp->ib_route[0].pkey;
511	memcpy(&id->route.addr.src_addr, &resp->src_addr,
512	       sizeof resp->src_addr);
513	memcpy(&id->route.addr.dst_addr, &resp->dst_addr,
514	       sizeof resp->dst_addr);
515
516	if (!id_priv->cma_dev && resp->node_guid) {
517		ret = ucma_get_device(id_priv, resp->node_guid);
518		if (ret)
519			return ret;
520		id_priv->id.port_num = resp->port_num;
521	}
522
523	return 0;
524}
525
526int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
527{
528	struct ucma_abi_bind_addr *cmd;
529	struct cma_id_private *id_priv;
530	void *msg;
531	int ret, size, addrlen;
532
533	addrlen = ucma_addrlen(addr);
534	if (!addrlen)
535		return ERR(EINVAL);
536
537	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_BIND_ADDR, size);
538	id_priv = container_of(id, struct cma_id_private, id);
539	cmd->id = id_priv->handle;
540	memcpy(&cmd->addr, addr, addrlen);
541
542	ret = write(id->channel->fd, msg, size);
543	if (ret != size)
544		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
545
546	return ucma_query_route(id);
547}
548
549int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
550		      struct sockaddr *dst_addr, int timeout_ms)
551{
552	struct ucma_abi_resolve_addr *cmd;
553	struct cma_id_private *id_priv;
554	void *msg;
555	int ret, size, daddrlen;
556
557	daddrlen = ucma_addrlen(dst_addr);
558	if (!daddrlen)
559		return ERR(EINVAL);
560
561	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_RESOLVE_ADDR, size);
562	id_priv = container_of(id, struct cma_id_private, id);
563	cmd->id = id_priv->handle;
564	if (src_addr)
565		memcpy(&cmd->src_addr, src_addr, ucma_addrlen(src_addr));
566	memcpy(&cmd->dst_addr, dst_addr, daddrlen);
567	cmd->timeout_ms = timeout_ms;
568
569	ret = write(id->channel->fd, msg, size);
570	if (ret != size)
571		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
572
573	memcpy(&id->route.addr.dst_addr, dst_addr, daddrlen);
574	return 0;
575}
576
577int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
578{
579	struct ucma_abi_resolve_route *cmd;
580	struct cma_id_private *id_priv;
581	void *msg;
582	int ret, size;
583
584	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_RESOLVE_ROUTE, size);
585	id_priv = container_of(id, struct cma_id_private, id);
586	cmd->id = id_priv->handle;
587	cmd->timeout_ms = timeout_ms;
588
589	ret = write(id->channel->fd, msg, size);
590	if (ret != size)
591		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
592
593	return 0;
594}
595
596static int ucma_is_ud_ps(enum rdma_port_space ps)
597{
598	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
599}
600
601static int rdma_init_qp_attr(struct rdma_cm_id *id, struct ibv_qp_attr *qp_attr,
602			     int *qp_attr_mask)
603{
604	struct ucma_abi_init_qp_attr *cmd;
605	struct ibv_kern_qp_attr *resp;
606	struct cma_id_private *id_priv;
607	void *msg;
608	int ret, size;
609
610	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_INIT_QP_ATTR, size);
611	id_priv = container_of(id, struct cma_id_private, id);
612	cmd->id = id_priv->handle;
613	cmd->qp_state = qp_attr->qp_state;
614
615	ret = write(id->channel->fd, msg, size);
616	if (ret != size)
617		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
618
619	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
620
621	ibv_copy_qp_attr_from_kern(qp_attr, resp);
622	*qp_attr_mask = resp->qp_attr_mask;
623	return 0;
624}
625
626static int ucma_modify_qp_rtr(struct rdma_cm_id *id,
627			      struct rdma_conn_param *conn_param)
628{
629	struct ibv_qp_attr qp_attr;
630	int qp_attr_mask, ret;
631
632	if (!id->qp)
633		return ERR(EINVAL);
634
635	/* Need to update QP attributes from default values. */
636	qp_attr.qp_state = IBV_QPS_INIT;
637	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
638	if (ret)
639		return ret;
640
641	ret = ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
642	if (ret)
643		return ret;
644
645	qp_attr.qp_state = IBV_QPS_RTR;
646	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
647	if (ret)
648		return ret;
649
650	if (conn_param)
651		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
652	return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
653}
654
655static int ucma_modify_qp_rts(struct rdma_cm_id *id)
656{
657	struct ibv_qp_attr qp_attr;
658	int qp_attr_mask, ret;
659
660	qp_attr.qp_state = IBV_QPS_RTS;
661	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
662	if (ret)
663		return ret;
664
665	return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
666}
667
668static int ucma_modify_qp_sqd(struct rdma_cm_id *id)
669{
670	struct ibv_qp_attr qp_attr;
671
672	if (!id->qp)
673		return 0;
674
675	qp_attr.qp_state = IBV_QPS_SQD;
676	return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE);
677}
678
679static int ucma_modify_qp_err(struct rdma_cm_id *id)
680{
681	struct ibv_qp_attr qp_attr;
682
683	if (!id->qp)
684		return 0;
685
686	qp_attr.qp_state = IBV_QPS_ERR;
687	return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE);
688}
689
690static int ucma_find_pkey(struct cma_device *cma_dev, uint8_t port_num,
691			  uint16_t pkey, uint16_t *pkey_index)
692{
693	int ret, i;
694	uint16_t chk_pkey;
695
696	for (i = 0, ret = 0; !ret; i++) {
697		ret = ibv_query_pkey(cma_dev->verbs, port_num, i, &chk_pkey);
698		if (!ret && pkey == chk_pkey) {
699			*pkey_index = (uint16_t) i;
700			return 0;
701		}
702	}
703	return ERR(EINVAL);
704}
705
706static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
707{
708	struct ibv_qp_attr qp_attr;
709	int ret;
710
711	ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
712			     id_priv->id.route.addr.addr.ibaddr.pkey,
713			     &qp_attr.pkey_index);
714	if (ret)
715		return ret;
716
717	qp_attr.port_num = id_priv->id.port_num;
718	qp_attr.qp_state = IBV_QPS_INIT;
719	qp_attr.qp_access_flags = 0;
720
721	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS |
722					   IBV_QP_PKEY_INDEX | IBV_QP_PORT);
723}
724
725static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
726{
727	struct ibv_qp_attr qp_attr;
728	int qp_attr_mask, ret;
729
730	if (abi_ver == 3)
731		return ucma_init_conn_qp3(id_priv, qp);
732
733	qp_attr.qp_state = IBV_QPS_INIT;
734	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
735	if (ret)
736		return ret;
737
738	return ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
739}
740
741static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
742{
743	struct ibv_qp_attr qp_attr;
744	int ret;
745
746	ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
747			     id_priv->id.route.addr.addr.ibaddr.pkey,
748			     &qp_attr.pkey_index);
749	if (ret)
750		return ret;
751
752	qp_attr.port_num = id_priv->id.port_num;
753	qp_attr.qp_state = IBV_QPS_INIT;
754	qp_attr.qkey = RDMA_UDP_QKEY;
755
756	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY |
757					  IBV_QP_PKEY_INDEX | IBV_QP_PORT);
758	if (ret)
759		return ret;
760
761	qp_attr.qp_state = IBV_QPS_RTR;
762	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
763	if (ret)
764		return ret;
765
766	qp_attr.qp_state = IBV_QPS_RTS;
767	qp_attr.sq_psn = 0;
768	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
769}
770
771static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
772{
773	struct ibv_qp_attr qp_attr;
774	int qp_attr_mask, ret;
775
776	if (abi_ver == 3)
777		return ucma_init_ud_qp3(id_priv, qp);
778
779	qp_attr.qp_state = IBV_QPS_INIT;
780	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
781	if (ret)
782		return ret;
783
784	ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
785	if (ret)
786		return ret;
787
788	qp_attr.qp_state = IBV_QPS_RTR;
789	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
790	if (ret)
791		return ret;
792
793	qp_attr.qp_state = IBV_QPS_RTS;
794	qp_attr.sq_psn = 0;
795	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
796}
797
798int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
799		   struct ibv_qp_init_attr *qp_init_attr)
800{
801	struct cma_id_private *id_priv;
802	struct ibv_qp *qp;
803	int ret;
804
805	id_priv = container_of(id, struct cma_id_private, id);
806	if (id->verbs != pd->context)
807		return ERR(EINVAL);
808
809	qp = ibv_create_qp(pd, qp_init_attr);
810	if (!qp)
811		return ERR(ENOMEM);
812
813	if (ucma_is_ud_ps(id->ps))
814		ret = ucma_init_ud_qp(id_priv, qp);
815	else
816		ret = ucma_init_conn_qp(id_priv, qp);
817	if (ret)
818		goto err;
819
820	id->qp = qp;
821	return 0;
822err:
823	ibv_destroy_qp(qp);
824	return ret;
825}
826
827void rdma_destroy_qp(struct rdma_cm_id *id)
828{
829	ibv_destroy_qp(id->qp);
830}
831
832static int ucma_valid_param(struct cma_id_private *id_priv,
833			    struct rdma_conn_param *conn_param)
834{
835	if (id_priv->id.ps != RDMA_PS_TCP)
836		return 0;
837
838	if ((conn_param->responder_resources >
839	     id_priv->cma_dev->max_responder_resources) ||
840	    (conn_param->initiator_depth >
841	     id_priv->cma_dev->max_initiator_depth))
842		return ERR(EINVAL);
843
844	return 0;
845}
846
847static void ucma_copy_conn_param_to_kern(struct ucma_abi_conn_param *dst,
848					 struct rdma_conn_param *src,
849					 uint32_t qp_num, uint8_t srq)
850{
851	dst->qp_num = qp_num;
852	dst->srq = srq;
853	dst->responder_resources = src->responder_resources;
854	dst->initiator_depth = src->initiator_depth;
855	dst->flow_control = src->flow_control;
856	dst->retry_count = src->retry_count;
857	dst->rnr_retry_count = src->rnr_retry_count;
858	dst->valid = 1;
859
860	if (src->private_data && src->private_data_len) {
861		memcpy(dst->private_data, src->private_data,
862		       src->private_data_len);
863		dst->private_data_len = src->private_data_len;
864	}
865}
866
867int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
868{
869	struct ucma_abi_connect *cmd;
870	struct cma_id_private *id_priv;
871	void *msg;
872	int ret, size;
873
874	id_priv = container_of(id, struct cma_id_private, id);
875	ret = ucma_valid_param(id_priv, conn_param);
876	if (ret)
877		return ret;
878
879	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_CONNECT, size);
880	cmd->id = id_priv->handle;
881	if (id->qp)
882		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
883					     id->qp->qp_num,
884					     (id->qp->srq != NULL));
885	else
886		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
887					     conn_param->qp_num,
888					     conn_param->srq);
889
890	ret = write(id->channel->fd, msg, size);
891	if (ret != size)
892		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
893
894	return 0;
895}
896
897int rdma_listen(struct rdma_cm_id *id, int backlog)
898{
899	struct ucma_abi_listen *cmd;
900	struct cma_id_private *id_priv;
901	void *msg;
902	int ret, size;
903
904	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_LISTEN, size);
905	id_priv = container_of(id, struct cma_id_private, id);
906	cmd->id = id_priv->handle;
907	cmd->backlog = backlog;
908
909	ret = write(id->channel->fd, msg, size);
910	if (ret != size)
911		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
912
913	return ucma_query_route(id);
914}
915
916int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
917{
918	struct ucma_abi_accept *cmd;
919	struct cma_id_private *id_priv;
920	void *msg;
921	int ret, size;
922
923	id_priv = container_of(id, struct cma_id_private, id);
924	ret = ucma_valid_param(id_priv, conn_param);
925	if (ret)
926		return ret;
927
928	if (!ucma_is_ud_ps(id->ps)) {
929		ret = ucma_modify_qp_rtr(id, conn_param);
930		if (ret)
931			return ret;
932	}
933
934	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
935	cmd->id = id_priv->handle;
936	cmd->uid = (uintptr_t) id_priv;
937	if (id->qp)
938		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
939					     id->qp->qp_num,
940					     (id->qp->srq != NULL));
941	else
942		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
943					     conn_param->qp_num,
944					     conn_param->srq);
945
946	ret = write(id->channel->fd, msg, size);
947	if (ret != size) {
948		ucma_modify_qp_err(id);
949		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
950	}
951
952	return 0;
953}
954
955int rdma_reject(struct rdma_cm_id *id, const void *private_data,
956		uint8_t private_data_len)
957{
958	struct ucma_abi_reject *cmd;
959	struct cma_id_private *id_priv;
960	void *msg;
961	int ret, size;
962
963	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_REJECT, size);
964
965	id_priv = container_of(id, struct cma_id_private, id);
966	cmd->id = id_priv->handle;
967	if (private_data && private_data_len) {
968		memcpy(cmd->private_data, private_data, private_data_len);
969		cmd->private_data_len = private_data_len;
970	} else
971		cmd->private_data_len = 0;
972
973	ret = write(id->channel->fd, msg, size);
974	if (ret != size)
975		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
976
977	return 0;
978}
979
980int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event)
981{
982	struct ucma_abi_notify *cmd;
983	struct cma_id_private *id_priv;
984	void *msg;
985	int ret, size;
986
987	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_NOTIFY, size);
988
989	id_priv = container_of(id, struct cma_id_private, id);
990	cmd->id = id_priv->handle;
991	cmd->event = event;
992	ret = write(id->channel->fd, msg, size);
993	if (ret != size)
994		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
995
996	return 0;
997}
998
999int rdma_disconnect(struct rdma_cm_id *id)
1000{
1001	struct ucma_abi_disconnect *cmd;
1002	struct cma_id_private *id_priv;
1003	void *msg;
1004	int ret, size;
1005
1006	switch (id->verbs->device->transport_type) {
1007	case IBV_TRANSPORT_IB:
1008		ret = ucma_modify_qp_err(id);
1009		break;
1010	case IBV_TRANSPORT_IWARP:
1011		ret = ucma_modify_qp_sqd(id);
1012		break;
1013	default:
1014		ret = ERR(EINVAL);
1015	}
1016	if (ret)
1017		return ret;
1018
1019	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_DISCONNECT, size);
1020	id_priv = container_of(id, struct cma_id_private, id);
1021	cmd->id = id_priv->handle;
1022
1023	ret = write(id->channel->fd, msg, size);
1024	if (ret != size)
1025		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1026
1027	return 0;
1028}
1029
1030int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
1031			void *context)
1032{
1033	struct ucma_abi_join_mcast *cmd;
1034	struct ucma_abi_create_id_resp *resp;
1035	struct cma_id_private *id_priv;
1036	struct cma_multicast *mc, **pos;
1037	void *msg;
1038	int ret, size, addrlen;
1039
1040	id_priv = container_of(id, struct cma_id_private, id);
1041	addrlen = ucma_addrlen(addr);
1042	if (!addrlen)
1043		return ERR(EINVAL);
1044
1045	mc = malloc(sizeof *mc);
1046	if (!mc)
1047		return ERR(ENOMEM);
1048
1049	memset(mc, 0, sizeof *mc);
1050	mc->context = context;
1051	mc->id_priv = id_priv;
1052	memcpy(&mc->addr, addr, addrlen);
1053	if (pthread_cond_init(&mc->cond, NULL)) {
1054		ret = -1;
1055		goto err1;
1056	}
1057
1058	pthread_mutex_lock(&id_priv->mut);
1059	mc->next = id_priv->mc_list;
1060	id_priv->mc_list = mc;
1061	pthread_mutex_unlock(&id_priv->mut);
1062
1063	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_JOIN_MCAST, size);
1064	cmd->id = id_priv->handle;
1065	memcpy(&cmd->addr, addr, addrlen);
1066	cmd->uid = (uintptr_t) mc;
1067
1068	ret = write(id->channel->fd, msg, size);
1069	if (ret != size) {
1070		ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1071		goto err2;
1072	}
1073
1074	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1075
1076	mc->handle = resp->id;
1077	return 0;
1078err2:
1079	pthread_mutex_lock(&id_priv->mut);
1080	for (pos = &id_priv->mc_list; *pos != mc; pos = &(*pos)->next)
1081		;
1082	*pos = mc->next;
1083	pthread_mutex_unlock(&id_priv->mut);
1084err1:
1085	free(mc);
1086	return ret;
1087}
1088
1089int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
1090{
1091	struct ucma_abi_destroy_id *cmd;
1092	struct ucma_abi_destroy_id_resp *resp;
1093	struct cma_id_private *id_priv;
1094	struct cma_multicast *mc, **pos;
1095	void *msg;
1096	int ret, size, addrlen;
1097
1098	addrlen = ucma_addrlen(addr);
1099	if (!addrlen)
1100		return ERR(EINVAL);
1101
1102	id_priv = container_of(id, struct cma_id_private, id);
1103	pthread_mutex_lock(&id_priv->mut);
1104	for (pos = &id_priv->mc_list; *pos; pos = &(*pos)->next)
1105		if (!memcmp(&(*pos)->addr, addr, addrlen))
1106			break;
1107
1108	mc = *pos;
1109	if (*pos)
1110		*pos = mc->next;
1111	pthread_mutex_unlock(&id_priv->mut);
1112	if (!mc)
1113		return ERR(EADDRNOTAVAIL);
1114
1115	if (id->qp)
1116		ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid);
1117
1118	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_LEAVE_MCAST, size);
1119	cmd->id = mc->handle;
1120
1121	ret = write(id->channel->fd, msg, size);
1122	if (ret != size) {
1123		ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1124		goto free;
1125	}
1126
1127	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1128
1129	pthread_mutex_lock(&id_priv->mut);
1130	while (mc->events_completed < resp->events_reported)
1131		pthread_cond_wait(&mc->cond, &id_priv->mut);
1132	pthread_mutex_unlock(&id_priv->mut);
1133
1134	ret = 0;
1135free:
1136	free(mc);
1137	return ret;
1138}
1139
1140static void ucma_complete_event(struct cma_id_private *id_priv)
1141{
1142	pthread_mutex_lock(&id_priv->mut);
1143	id_priv->events_completed++;
1144	pthread_cond_signal(&id_priv->cond);
1145	pthread_mutex_unlock(&id_priv->mut);
1146}
1147
1148static void ucma_complete_mc_event(struct cma_multicast *mc)
1149{
1150	pthread_mutex_lock(&mc->id_priv->mut);
1151	mc->events_completed++;
1152	pthread_cond_signal(&mc->cond);
1153	mc->id_priv->events_completed++;
1154	pthread_cond_signal(&mc->id_priv->cond);
1155	pthread_mutex_unlock(&mc->id_priv->mut);
1156}
1157
1158int rdma_ack_cm_event(struct rdma_cm_event *event)
1159{
1160	struct cma_event *evt;
1161
1162	if (!event)
1163		return ERR(EINVAL);
1164
1165	evt = container_of(event, struct cma_event, event);
1166
1167	if (evt->mc)
1168		ucma_complete_mc_event(evt->mc);
1169	else
1170		ucma_complete_event(evt->id_priv);
1171	free(evt);
1172	return 0;
1173}
1174
1175static int ucma_process_conn_req(struct cma_event *evt,
1176				 uint32_t handle)
1177{
1178	struct cma_id_private *id_priv;
1179	int ret;
1180
1181	id_priv = ucma_alloc_id(evt->id_priv->id.channel,
1182				evt->id_priv->id.context, evt->id_priv->id.ps);
1183	if (!id_priv) {
1184		ucma_destroy_kern_id(evt->id_priv->id.channel->fd, handle);
1185		ret = ERR(ENOMEM);
1186		goto err;
1187	}
1188
1189	evt->event.listen_id = &evt->id_priv->id;
1190	evt->event.id = &id_priv->id;
1191	id_priv->handle = handle;
1192
1193	ret = ucma_query_route(&id_priv->id);
1194	if (ret) {
1195		rdma_destroy_id(&id_priv->id);
1196		goto err;
1197	}
1198
1199	return 0;
1200err:
1201	ucma_complete_event(evt->id_priv);
1202	return ret;
1203}
1204
1205static int ucma_process_conn_resp(struct cma_id_private *id_priv)
1206{
1207	struct ucma_abi_accept *cmd;
1208	void *msg;
1209	int ret, size;
1210
1211	ret = ucma_modify_qp_rtr(&id_priv->id, NULL);
1212	if (ret)
1213		goto err;
1214
1215	ret = ucma_modify_qp_rts(&id_priv->id);
1216	if (ret)
1217		goto err;
1218
1219	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
1220	cmd->id = id_priv->handle;
1221
1222	ret = write(id_priv->id.channel->fd, msg, size);
1223	if (ret != size) {
1224		ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1225		goto err;
1226	}
1227
1228	return 0;
1229err:
1230	ucma_modify_qp_err(&id_priv->id);
1231	return ret;
1232}
1233
1234static int ucma_process_establish(struct rdma_cm_id *id)
1235{
1236	int ret;
1237
1238	ret = ucma_modify_qp_rts(id);
1239	if (ret)
1240		ucma_modify_qp_err(id);
1241
1242	return ret;
1243}
1244
1245static int ucma_process_join(struct cma_event *evt)
1246{
1247	evt->mc->mgid = evt->event.param.ud.ah_attr.grh.dgid;
1248	evt->mc->mlid = evt->event.param.ud.ah_attr.dlid;
1249
1250	if (!evt->id_priv->id.qp)
1251		return 0;
1252
1253	return ibv_attach_mcast(evt->id_priv->id.qp, &evt->mc->mgid,
1254				evt->mc->mlid);
1255}
1256
1257static void ucma_copy_conn_event(struct cma_event *event,
1258				 struct ucma_abi_conn_param *src)
1259{
1260	struct rdma_conn_param *dst = &event->event.param.conn;
1261
1262	dst->private_data_len = src->private_data_len;
1263	if (src->private_data_len) {
1264		dst->private_data = &event->private_data;
1265		memcpy(&event->private_data, src->private_data,
1266		       src->private_data_len);
1267	}
1268
1269	dst->responder_resources = src->responder_resources;
1270	dst->initiator_depth = src->initiator_depth;
1271	dst->flow_control = src->flow_control;
1272	dst->retry_count = src->retry_count;
1273	dst->rnr_retry_count = src->rnr_retry_count;
1274	dst->srq = src->srq;
1275	dst->qp_num = src->qp_num;
1276}
1277
1278static void ucma_copy_ud_event(struct cma_event *event,
1279			       struct ucma_abi_ud_param *src)
1280{
1281	struct rdma_ud_param *dst = &event->event.param.ud;
1282
1283	dst->private_data_len = src->private_data_len;
1284	if (src->private_data_len) {
1285		dst->private_data = &event->private_data;
1286		memcpy(&event->private_data, src->private_data,
1287		       src->private_data_len);
1288	}
1289
1290	ibv_copy_ah_attr_from_kern(&dst->ah_attr, &src->ah_attr);
1291	dst->qp_num = src->qp_num;
1292	dst->qkey = src->qkey;
1293}
1294
1295int rdma_get_cm_event(struct rdma_event_channel *channel,
1296		      struct rdma_cm_event **event)
1297{
1298	struct ucma_abi_event_resp *resp;
1299	struct ucma_abi_get_event *cmd;
1300	struct cma_event *evt;
1301	void *msg;
1302	int ret, size;
1303
1304	ret = cma_dev_cnt ? 0 : ucma_init();
1305	if (ret)
1306		return ret;
1307
1308	if (!event)
1309		return ERR(EINVAL);
1310
1311	evt = malloc(sizeof *evt);
1312	if (!evt)
1313		return ERR(ENOMEM);
1314
1315retry:
1316	memset(evt, 0, sizeof *evt);
1317	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_GET_EVENT, size);
1318	ret = write(channel->fd, msg, size);
1319	if (ret != size) {
1320		free(evt);
1321		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1322	}
1323
1324	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1325
1326	evt->event.event = resp->event;
1327	evt->id_priv = (void *) (uintptr_t) resp->uid;
1328	evt->event.id = &evt->id_priv->id;
1329	evt->event.status = resp->status;
1330
1331	switch (resp->event) {
1332	case RDMA_CM_EVENT_ADDR_RESOLVED:
1333		evt->event.status = ucma_query_route(&evt->id_priv->id);
1334		if (evt->event.status)
1335			evt->event.event = RDMA_CM_EVENT_ADDR_ERROR;
1336		break;
1337	case RDMA_CM_EVENT_ROUTE_RESOLVED:
1338		evt->event.status = ucma_query_route(&evt->id_priv->id);
1339		if (evt->event.status)
1340			evt->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1341		break;
1342	case RDMA_CM_EVENT_CONNECT_REQUEST:
1343		evt->id_priv = (void *) (uintptr_t) resp->uid;
1344		if (ucma_is_ud_ps(evt->id_priv->id.ps))
1345			ucma_copy_ud_event(evt, &resp->param.ud);
1346		else
1347			ucma_copy_conn_event(evt, &resp->param.conn);
1348
1349		ret = ucma_process_conn_req(evt, resp->id);
1350		if (ret)
1351			goto retry;
1352		break;
1353	case RDMA_CM_EVENT_CONNECT_RESPONSE:
1354		ucma_copy_conn_event(evt, &resp->param.conn);
1355		evt->event.status = ucma_process_conn_resp(evt->id_priv);
1356		if (!evt->event.status)
1357			evt->event.event = RDMA_CM_EVENT_ESTABLISHED;
1358		else {
1359			evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1360			evt->id_priv->connect_error = 1;
1361		}
1362		break;
1363	case RDMA_CM_EVENT_ESTABLISHED:
1364		if (ucma_is_ud_ps(evt->id_priv->id.ps)) {
1365			ucma_copy_ud_event(evt, &resp->param.ud);
1366			break;
1367		}
1368
1369		ucma_copy_conn_event(evt, &resp->param.conn);
1370		evt->event.status = ucma_process_establish(&evt->id_priv->id);
1371		if (evt->event.status) {
1372			evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1373			evt->id_priv->connect_error = 1;
1374		}
1375		break;
1376	case RDMA_CM_EVENT_REJECTED:
1377		if (evt->id_priv->connect_error) {
1378			ucma_complete_event(evt->id_priv);
1379			goto retry;
1380		}
1381		ucma_copy_conn_event(evt, &resp->param.conn);
1382		ucma_modify_qp_err(evt->event.id);
1383		break;
1384	case RDMA_CM_EVENT_DISCONNECTED:
1385		if (evt->id_priv->connect_error) {
1386			ucma_complete_event(evt->id_priv);
1387			goto retry;
1388		}
1389		ucma_copy_conn_event(evt, &resp->param.conn);
1390		break;
1391	case RDMA_CM_EVENT_MULTICAST_JOIN:
1392		evt->mc = (void *) (uintptr_t) resp->uid;
1393		evt->id_priv = evt->mc->id_priv;
1394		evt->event.id = &evt->id_priv->id;
1395		ucma_copy_ud_event(evt, &resp->param.ud);
1396		evt->event.param.ud.private_data = evt->mc->context;
1397		evt->event.status = ucma_process_join(evt);
1398		if (evt->event.status)
1399			evt->event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
1400		break;
1401	case RDMA_CM_EVENT_MULTICAST_ERROR:
1402		evt->mc = (void *) (uintptr_t) resp->uid;
1403		evt->id_priv = evt->mc->id_priv;
1404		evt->event.id = &evt->id_priv->id;
1405		evt->event.param.ud.private_data = evt->mc->context;
1406		break;
1407	default:
1408		evt->id_priv = (void *) (uintptr_t) resp->uid;
1409		evt->event.id = &evt->id_priv->id;
1410		evt->event.status = resp->status;
1411		if (ucma_is_ud_ps(evt->id_priv->id.ps))
1412			ucma_copy_ud_event(evt, &resp->param.ud);
1413		else
1414			ucma_copy_conn_event(evt, &resp->param.conn);
1415		break;
1416	}
1417
1418	*event = &evt->event;
1419	return 0;
1420}
1421
1422const char *rdma_event_str(enum rdma_cm_event_type event)
1423{
1424	switch (event) {
1425	case RDMA_CM_EVENT_ADDR_RESOLVED:
1426		return "RDMA_CM_EVENT_ADDR_RESOLVED";
1427	case RDMA_CM_EVENT_ADDR_ERROR:
1428		return "RDMA_CM_EVENT_ADDR_ERROR";
1429	case RDMA_CM_EVENT_ROUTE_RESOLVED:
1430		return "RDMA_CM_EVENT_ROUTE_RESOLVED";
1431	case RDMA_CM_EVENT_ROUTE_ERROR:
1432		return "RDMA_CM_EVENT_ROUTE_ERROR";
1433	case RDMA_CM_EVENT_CONNECT_REQUEST:
1434		return "RDMA_CM_EVENT_CONNECT_REQUEST";
1435	case RDMA_CM_EVENT_CONNECT_RESPONSE:
1436		return "RDMA_CM_EVENT_CONNECT_RESPONSE";
1437	case RDMA_CM_EVENT_CONNECT_ERROR:
1438		return "RDMA_CM_EVENT_CONNECT_ERROR";
1439	case RDMA_CM_EVENT_UNREACHABLE:
1440		return "RDMA_CM_EVENT_UNREACHABLE";
1441	case RDMA_CM_EVENT_REJECTED:
1442		return "RDMA_CM_EVENT_REJECTED";
1443	case RDMA_CM_EVENT_ESTABLISHED:
1444		return "RDMA_CM_EVENT_ESTABLISHED";
1445	case RDMA_CM_EVENT_DISCONNECTED:
1446		return "RDMA_CM_EVENT_DISCONNECTED";
1447	case RDMA_CM_EVENT_DEVICE_REMOVAL:
1448		return "RDMA_CM_EVENT_DEVICE_REMOVAL";
1449	case RDMA_CM_EVENT_MULTICAST_JOIN:
1450		return "RDMA_CM_EVENT_MULTICAST_JOIN";
1451	case RDMA_CM_EVENT_MULTICAST_ERROR:
1452		return "RDMA_CM_EVENT_MULTICAST_ERROR";
1453	case RDMA_CM_EVENT_ADDR_CHANGE:
1454		return "RDMA_CM_EVENT_ADDR_CHANGE";
1455	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1456		return "RDMA_CM_EVENT_TIMEWAIT_EXIT";
1457	default:
1458		return "UNKNOWN EVENT";
1459	}
1460}
1461
1462int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
1463		    void *optval, size_t optlen)
1464{
1465	struct ucma_abi_set_option *cmd;
1466	struct cma_id_private *id_priv;
1467	void *msg;
1468	int ret, size;
1469
1470	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_SET_OPTION, size);
1471	id_priv = container_of(id, struct cma_id_private, id);
1472	cmd->id = id_priv->handle;
1473	cmd->optval = (uintptr_t) optval;
1474	cmd->level = level;
1475	cmd->optname = optname;
1476	cmd->optlen = optlen;
1477
1478	ret = write(id->channel->fd, msg, size);
1479	if (ret != size)
1480		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1481
1482	return 0;
1483}
1484
1485int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)
1486{
1487	struct ucma_abi_migrate_resp *resp;
1488	struct ucma_abi_migrate_id *cmd;
1489	struct cma_id_private *id_priv;
1490	void *msg;
1491	int ret, size;
1492
1493	id_priv = container_of(id, struct cma_id_private, id);
1494	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size);
1495	cmd->id = id_priv->handle;
1496	cmd->fd = id->channel->fd;
1497
1498	ret = write(channel->fd, msg, size);
1499	if (ret != size)
1500		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1501
1502	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1503
1504	/*
1505	 * Eventually if we want to support migrating channels while events are
1506	 * being processed on the current channel, we need to block here while
1507	 * there are any outstanding events on the current channel for this id
1508	 * to prevent the user from processing events for this id on the old
1509	 * channel after this call returns.
1510	 */
1511	pthread_mutex_lock(&id_priv->mut);
1512	id->channel = channel;
1513	while (id_priv->events_completed < resp->events_reported)
1514		pthread_cond_wait(&id_priv->cond, &id_priv->mut);
1515	pthread_mutex_unlock(&id_priv->mut);
1516
1517	return 0;
1518}
1519