1/*
2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include <stdlib.h>
36#include <string.h>
37#include <stdio.h>
38#include <errno.h>
39#include <sys/types.h>
40#include <netinet/in.h>
41#include <sys/socket.h>
42#include <netdb.h>
43#include <byteswap.h>
44#include <getopt.h>
45
46#include <rdma/rdma_cma.h>
47
48#if __BYTE_ORDER == __BIG_ENDIAN
49static inline uint64_t cpu_to_be64(uint64_t x) { return x; }
50static inline uint32_t cpu_to_be32(uint32_t x) { return x; }
51#else
52static inline uint64_t cpu_to_be64(uint64_t x) { return bswap_64(x); }
53static inline uint32_t cpu_to_be32(uint32_t x) { return bswap_32(x); }
54#endif
55
56struct cmatest_node {
57	int			id;
58	struct rdma_cm_id	*cma_id;
59	int			connected;
60	struct ibv_pd		*pd;
61	struct ibv_cq		*cq[2];
62	struct ibv_mr		*mr;
63	void			*mem;
64};
65
66enum CQ_INDEX {
67	SEND_CQ_INDEX,
68	RECV_CQ_INDEX
69};
70
71struct cmatest {
72	struct rdma_event_channel *channel;
73	struct cmatest_node	*nodes;
74	int			conn_index;
75	int			connects_left;
76	int			disconnects_left;
77
78	struct rdma_addr	addr;
79};
80
81static struct cmatest test;
82static int connections = 1;
83static int message_size = 100;
84static int message_count = 10;
85static uint16_t port = 7471;
86static uint8_t set_tos = 0;
87static uint8_t tos;
88static uint8_t migrate = 0;
89static char *dst_addr;
90static char *src_addr;
91
92static int create_message(struct cmatest_node *node)
93{
94	if (!message_size)
95		message_count = 0;
96
97	if (!message_count)
98		return 0;
99
100	node->mem = malloc(message_size);
101	if (!node->mem) {
102		printf("failed message allocation\n");
103		return -1;
104	}
105	node->mr = ibv_reg_mr(node->pd, node->mem, message_size,
106			     IBV_ACCESS_LOCAL_WRITE);
107	if (!node->mr) {
108		printf("failed to reg MR\n");
109		goto err;
110	}
111	return 0;
112err:
113	free(node->mem);
114	return -1;
115}
116
117static int init_node(struct cmatest_node *node)
118{
119	struct ibv_qp_init_attr init_qp_attr;
120	int cqe, ret;
121
122	node->pd = ibv_alloc_pd(node->cma_id->verbs);
123	if (!node->pd) {
124		ret = -ENOMEM;
125		printf("cmatose: unable to allocate PD\n");
126		goto out;
127	}
128
129	cqe = message_count ? message_count : 1;
130	node->cq[SEND_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
131	node->cq[RECV_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
132	if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) {
133		ret = -ENOMEM;
134		printf("cmatose: unable to create CQ\n");
135		goto out;
136	}
137
138	memset(&init_qp_attr, 0, sizeof init_qp_attr);
139	init_qp_attr.cap.max_send_wr = cqe;
140	init_qp_attr.cap.max_recv_wr = cqe;
141	init_qp_attr.cap.max_send_sge = 1;
142	init_qp_attr.cap.max_recv_sge = 1;
143	init_qp_attr.qp_context = node;
144	init_qp_attr.sq_sig_all = 1;
145	init_qp_attr.qp_type = IBV_QPT_RC;
146	init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX];
147	init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX];
148	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
149	if (ret) {
150		perror("cmatose: unable to create QP");
151		goto out;
152	}
153
154	ret = create_message(node);
155	if (ret) {
156		printf("cmatose: failed to create messages: %d\n", ret);
157		goto out;
158	}
159out:
160	return ret;
161}
162
163static int post_recvs(struct cmatest_node *node)
164{
165	struct ibv_recv_wr recv_wr, *recv_failure;
166	struct ibv_sge sge;
167	int i, ret = 0;
168
169	if (!message_count)
170		return 0;
171
172	recv_wr.next = NULL;
173	recv_wr.sg_list = &sge;
174	recv_wr.num_sge = 1;
175	recv_wr.wr_id = (uintptr_t) node;
176
177	sge.length = message_size;
178	sge.lkey = node->mr->lkey;
179	sge.addr = (uintptr_t) node->mem;
180
181	for (i = 0; i < message_count && !ret; i++ ) {
182		ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
183		if (ret) {
184			printf("failed to post receives: %d\n", ret);
185			break;
186		}
187	}
188	return ret;
189}
190
191static int post_sends(struct cmatest_node *node)
192{
193	struct ibv_send_wr send_wr, *bad_send_wr;
194	struct ibv_sge sge;
195	int i, ret = 0;
196
197	if (!node->connected || !message_count)
198		return 0;
199
200	send_wr.next = NULL;
201	send_wr.sg_list = &sge;
202	send_wr.num_sge = 1;
203	send_wr.opcode = IBV_WR_SEND;
204	send_wr.send_flags = 0;
205	send_wr.wr_id = (unsigned long)node;
206
207	sge.length = message_size;
208	sge.lkey = node->mr->lkey;
209	sge.addr = (uintptr_t) node->mem;
210
211	for (i = 0; i < message_count && !ret; i++) {
212		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
213		if (ret)
214			printf("failed to post sends: %d\n", ret);
215	}
216	return ret;
217}
218
219static void connect_error(void)
220{
221	test.disconnects_left--;
222	test.connects_left--;
223}
224
225static int addr_handler(struct cmatest_node *node)
226{
227	int ret;
228
229	if (set_tos) {
230		ret = rdma_set_option(node->cma_id, RDMA_OPTION_ID,
231				      RDMA_OPTION_ID_TOS, &tos, sizeof tos);
232		if (ret)
233			perror("cmatose: set TOS option failed");
234	}
235
236	ret = rdma_resolve_route(node->cma_id, 2000);
237	if (ret) {
238		perror("cmatose: resolve route failed");
239		connect_error();
240	}
241	return ret;
242}
243
244static int route_handler(struct cmatest_node *node)
245{
246	struct rdma_conn_param conn_param;
247	int ret;
248
249	ret = init_node(node);
250	if (ret)
251		goto err;
252
253	ret = post_recvs(node);
254	if (ret)
255		goto err;
256
257	memset(&conn_param, 0, sizeof conn_param);
258	conn_param.responder_resources = 1;
259	conn_param.initiator_depth = 1;
260	conn_param.retry_count = 5;
261	ret = rdma_connect(node->cma_id, &conn_param);
262	if (ret) {
263		perror("cmatose: failure connecting");
264		goto err;
265	}
266	return 0;
267err:
268	connect_error();
269	return ret;
270}
271
272static int connect_handler(struct rdma_cm_id *cma_id)
273{
274	struct cmatest_node *node;
275	struct rdma_conn_param conn_param;
276	int ret;
277
278	if (test.conn_index == connections) {
279		ret = -ENOMEM;
280		goto err1;
281	}
282	node = &test.nodes[test.conn_index++];
283
284	node->cma_id = cma_id;
285	cma_id->context = node;
286
287	ret = init_node(node);
288	if (ret)
289		goto err2;
290
291	ret = post_recvs(node);
292	if (ret)
293		goto err2;
294
295	memset(&conn_param, 0, sizeof conn_param);
296	conn_param.responder_resources = 1;
297	conn_param.initiator_depth = 1;
298	ret = rdma_accept(node->cma_id, &conn_param);
299	if (ret) {
300		perror("cmatose: failure accepting");
301		goto err2;
302	}
303	return 0;
304
305err2:
306	node->cma_id = NULL;
307	connect_error();
308err1:
309	printf("cmatose: failing connection request\n");
310	rdma_reject(cma_id, NULL, 0);
311	return ret;
312}
313
314static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
315{
316	int ret = 0;
317
318	switch (event->event) {
319	case RDMA_CM_EVENT_ADDR_RESOLVED:
320		ret = addr_handler(cma_id->context);
321		break;
322	case RDMA_CM_EVENT_ROUTE_RESOLVED:
323		ret = route_handler(cma_id->context);
324		break;
325	case RDMA_CM_EVENT_CONNECT_REQUEST:
326		ret = connect_handler(cma_id);
327		break;
328	case RDMA_CM_EVENT_ESTABLISHED:
329		((struct cmatest_node *) cma_id->context)->connected = 1;
330		test.connects_left--;
331		break;
332	case RDMA_CM_EVENT_ADDR_ERROR:
333	case RDMA_CM_EVENT_ROUTE_ERROR:
334	case RDMA_CM_EVENT_CONNECT_ERROR:
335	case RDMA_CM_EVENT_UNREACHABLE:
336	case RDMA_CM_EVENT_REJECTED:
337		printf("cmatose: event: %s, error: %d\n",
338		       rdma_event_str(event->event), event->status);
339		connect_error();
340		break;
341	case RDMA_CM_EVENT_DISCONNECTED:
342		rdma_disconnect(cma_id);
343		test.disconnects_left--;
344		break;
345	case RDMA_CM_EVENT_DEVICE_REMOVAL:
346		/* Cleanup will occur after test completes. */
347		break;
348	default:
349		break;
350	}
351	return ret;
352}
353
354static void destroy_node(struct cmatest_node *node)
355{
356	if (!node->cma_id)
357		return;
358
359	if (node->cma_id->qp)
360		rdma_destroy_qp(node->cma_id);
361
362	if (node->cq[SEND_CQ_INDEX])
363		ibv_destroy_cq(node->cq[SEND_CQ_INDEX]);
364
365	if (node->cq[RECV_CQ_INDEX])
366		ibv_destroy_cq(node->cq[RECV_CQ_INDEX]);
367
368	if (node->mem) {
369		ibv_dereg_mr(node->mr);
370		free(node->mem);
371	}
372
373	if (node->pd)
374		ibv_dealloc_pd(node->pd);
375
376	/* Destroy the RDMA ID after all device resources */
377	rdma_destroy_id(node->cma_id);
378}
379
380static int alloc_nodes(void)
381{
382	int ret, i;
383
384	test.nodes = malloc(sizeof *test.nodes * connections);
385	if (!test.nodes) {
386		printf("cmatose: unable to allocate memory for test nodes\n");
387		return -ENOMEM;
388	}
389	memset(test.nodes, 0, sizeof *test.nodes * connections);
390
391	for (i = 0; i < connections; i++) {
392		test.nodes[i].id = i;
393		if (dst_addr) {
394			ret = rdma_create_id(test.channel,
395					     &test.nodes[i].cma_id,
396					     &test.nodes[i], RDMA_PS_TCP);
397			if (ret)
398				goto err;
399		}
400	}
401	return 0;
402err:
403	while (--i >= 0)
404		rdma_destroy_id(test.nodes[i].cma_id);
405	free(test.nodes);
406	return ret;
407}
408
409static void destroy_nodes(void)
410{
411	int i;
412
413	for (i = 0; i < connections; i++)
414		destroy_node(&test.nodes[i]);
415	free(test.nodes);
416}
417
418static int poll_cqs(enum CQ_INDEX index)
419{
420	struct ibv_wc wc[8];
421	int done, i, ret;
422
423	for (i = 0; i < connections; i++) {
424		if (!test.nodes[i].connected)
425			continue;
426
427		for (done = 0; done < message_count; done += ret) {
428			ret = ibv_poll_cq(test.nodes[i].cq[index], 8, wc);
429			if (ret < 0) {
430				printf("cmatose: failed polling CQ: %d\n", ret);
431				return ret;
432			}
433		}
434	}
435	return 0;
436}
437
438static int connect_events(void)
439{
440	struct rdma_cm_event *event;
441	int err = 0, ret = 0;
442
443	while (test.connects_left && !err) {
444		err = rdma_get_cm_event(test.channel, &event);
445		if (!err) {
446			cma_handler(event->id, event);
447			rdma_ack_cm_event(event);
448		} else {
449			perror("cmatose: failure in rdma_get_cm_event in connect events");
450			ret = errno;
451		}
452	}
453
454	return ret;
455}
456
457static int disconnect_events(void)
458{
459	struct rdma_cm_event *event;
460	int err = 0, ret = 0;
461
462	while (test.disconnects_left && !err) {
463		err = rdma_get_cm_event(test.channel, &event);
464		if (!err) {
465			cma_handler(event->id, event);
466			rdma_ack_cm_event(event);
467		} else {
468			perror("cmatose: failure in rdma_get_cm_event in disconnect events");
469			ret = errno;
470		}
471	}
472
473	return ret;
474}
475
476static int migrate_channel(struct rdma_cm_id *listen_id)
477{
478	struct rdma_event_channel *channel;
479	int i, ret;
480
481	printf("migrating to new event channel\n");
482
483	channel = rdma_create_event_channel();
484	if (!channel) {
485		perror("cmatose: failed to create event channel");
486		return -1;
487	}
488
489	ret = 0;
490	if (listen_id)
491		ret = rdma_migrate_id(listen_id, channel);
492
493	for (i = 0; i < connections && !ret; i++)
494		ret = rdma_migrate_id(test.nodes[i].cma_id, channel);
495
496	if (!ret) {
497		rdma_destroy_event_channel(test.channel);
498		test.channel = channel;
499	} else
500		perror("cmatose: failure migrating to channel");
501
502	return ret;
503}
504
505static int get_addr(char *dst, struct sockaddr *addr)
506{
507	struct addrinfo *res;
508	int ret;
509
510	ret = getaddrinfo(dst, NULL, NULL, &res);
511	if (ret) {
512		printf("getaddrinfo failed - invalid hostname or IP address\n");
513		return ret;
514	}
515
516	if (res->ai_family == PF_INET)
517		memcpy(addr, res->ai_addr, sizeof(struct sockaddr_in));
518	else if (res->ai_family == PF_INET6)
519		memcpy(addr, res->ai_addr, sizeof(struct sockaddr_in6));
520	else
521		ret = -1;
522
523        freeaddrinfo(res);
524        return ret;
525}
526
527static int run_server(void)
528{
529	struct rdma_cm_id *listen_id;
530	int i, ret;
531
532	printf("cmatose: starting server\n");
533	ret = rdma_create_id(test.channel, &listen_id, &test, RDMA_PS_TCP);
534	if (ret) {
535		perror("cmatose: listen request failed");
536		return ret;
537	}
538
539	if (src_addr) {
540		ret = get_addr(src_addr, &test.addr.src_addr);
541		if (ret)
542			goto out;
543		if (test.addr.src_addr.sa_family == AF_INET)
544			((struct sockaddr_in *) &test.addr.src_addr)->sin_port = port;
545		else
546			((struct sockaddr_in6 *) &test.addr.src_addr)->sin6_port = port;
547
548	} else {
549		test.addr.src_addr.sa_family = PF_INET;
550		((struct sockaddr_in *) &test.addr.src_addr)->sin_port = port;
551	}
552
553	ret = rdma_bind_addr(listen_id, &test.addr.src_addr);
554
555	if (ret) {
556		perror("cmatose: bind address failed");
557		goto out;
558	}
559
560	ret = rdma_listen(listen_id, 0);
561	if (ret) {
562		perror("cmatose: failure trying to listen");
563		goto out;
564	}
565
566	ret = connect_events();
567	if (ret)
568		goto out;
569
570	if (message_count) {
571		printf("initiating data transfers\n");
572		for (i = 0; i < connections; i++) {
573			ret = post_sends(&test.nodes[i]);
574			if (ret)
575				goto out;
576		}
577
578		printf("completing sends\n");
579		ret = poll_cqs(SEND_CQ_INDEX);
580		if (ret)
581			goto out;
582
583		printf("receiving data transfers\n");
584		ret = poll_cqs(RECV_CQ_INDEX);
585		if (ret)
586			goto out;
587		printf("data transfers complete\n");
588
589	}
590
591	if (migrate) {
592		ret = migrate_channel(listen_id);
593		if (ret)
594			goto out;
595	}
596
597	printf("cmatose: disconnecting\n");
598	for (i = 0; i < connections; i++) {
599		if (!test.nodes[i].connected)
600			continue;
601
602		test.nodes[i].connected = 0;
603		rdma_disconnect(test.nodes[i].cma_id);
604	}
605
606	ret = disconnect_events();
607
608 	printf("disconnected\n");
609
610out:
611	rdma_destroy_id(listen_id);
612	return ret;
613}
614
615static int run_client(void)
616{
617	int i, ret, ret2;
618
619	printf("cmatose: starting client\n");
620	if (src_addr) {
621		ret = get_addr(src_addr, &test.addr.src_addr);
622		if (ret)
623			return ret;
624	}
625
626	ret = get_addr(dst_addr, &test.addr.dst_addr);
627	if (ret)
628		return ret;
629
630	if (test.addr.dst_addr.sa_family == AF_INET)
631		((struct sockaddr_in *) &test.addr.dst_addr)->sin_port = port;
632	else
633		((struct sockaddr_in6 *) &test.addr.dst_addr)->sin6_port = port;
634
635	printf("cmatose: connecting\n");
636	for (i = 0; i < connections; i++) {
637		ret = rdma_resolve_addr(test.nodes[i].cma_id,
638					src_addr ? &test.addr.src_addr : NULL,
639					&test.addr.dst_addr, 2000);
640		if (ret) {
641			perror("cmatose: failure getting addr");
642			connect_error();
643			return ret;
644		}
645	}
646
647	ret = connect_events();
648	if (ret)
649		goto disc;
650
651	if (message_count) {
652		printf("receiving data transfers\n");
653		ret = poll_cqs(RECV_CQ_INDEX);
654		if (ret)
655			goto disc;
656
657		printf("sending replies\n");
658		for (i = 0; i < connections; i++) {
659			ret = post_sends(&test.nodes[i]);
660			if (ret)
661				goto disc;
662		}
663
664		printf("data transfers complete\n");
665	}
666
667	ret = 0;
668
669	if (migrate) {
670		ret = migrate_channel(NULL);
671		if (ret)
672			goto out;
673	}
674disc:
675	ret2 = disconnect_events();
676	if (ret2)
677		ret = ret2;
678out:
679	return ret;
680}
681
682int main(int argc, char **argv)
683{
684	int op, ret;
685
686	while ((op = getopt(argc, argv, "s:b:c:C:S:t:p:m")) != -1) {
687		switch (op) {
688		case 's':
689			dst_addr = optarg;
690			break;
691		case 'b':
692			src_addr = optarg;
693			break;
694		case 'c':
695			connections = atoi(optarg);
696			break;
697		case 'C':
698			message_count = atoi(optarg);
699			break;
700		case 'S':
701			message_size = atoi(optarg);
702			break;
703		case 't':
704			set_tos = 1;
705			tos = (uint8_t) atoi(optarg);
706			break;
707		case 'p':
708			port = atoi(optarg);
709			break;
710		case 'm':
711			migrate = 1;
712			break;
713		default:
714			printf("usage: %s\n", argv[0]);
715			printf("\t[-s server_address]\n");
716			printf("\t[-b bind_address]\n");
717			printf("\t[-c connections]\n");
718			printf("\t[-C message_count]\n");
719			printf("\t[-S message_size]\n");
720			printf("\t[-t type_of_service]\n");
721			printf("\t[-p port_number]\n");
722			printf("\t[-m(igrate)]\n");
723			exit(1);
724		}
725	}
726
727	test.connects_left = connections;
728	test.disconnects_left = connections;
729
730	test.channel = rdma_create_event_channel();
731	if (!test.channel) {
732		printf("failed to create event channel\n");
733		exit(1);
734	}
735
736	if (alloc_nodes())
737		exit(1);
738
739	if (dst_addr)
740		ret = run_client();
741	else
742		ret = run_server();
743
744	printf("test complete\n");
745	destroy_nodes();
746	rdma_destroy_event_channel(test.channel);
747
748	printf("return status %d\n", ret);
749	return ret;
750}
751