1/*
2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include <stdlib.h>
36#include <string.h>
37#include <stdio.h>
38#include <errno.h>
39#include <sys/types.h>
40#include <sys/socket.h>
41#include <netdb.h>
42#include <getopt.h>
43
44#include <rdma/rdma_cma.h>
45#include "common.h"
46
47struct cmatest_node {
48	int			id;
49	struct rdma_cm_id	*cma_id;
50	int			connected;
51	struct ibv_pd		*pd;
52	struct ibv_cq		*cq;
53	struct ibv_mr		*mr;
54	struct ibv_ah		*ah;
55	uint32_t		remote_qpn;
56	uint32_t		remote_qkey;
57	void			*mem;
58};
59
60struct cmatest {
61	struct rdma_event_channel *channel;
62	struct cmatest_node	*nodes;
63	int			conn_index;
64	int			connects_left;
65
66	struct rdma_addrinfo	*rai;
67};
68
69static struct cmatest test;
70static int connections = 1;
71static int message_size = 100;
72static int message_count = 10;
73static const char *port = "7174";
74static uint8_t set_tos = 0;
75static uint8_t tos;
76static char *dst_addr;
77static char *src_addr;
78static struct rdma_addrinfo hints;
79
80static int create_message(struct cmatest_node *node)
81{
82	if (!message_size)
83		message_count = 0;
84
85	if (!message_count)
86		return 0;
87
88	node->mem = malloc(message_size + sizeof(struct ibv_grh));
89	if (!node->mem) {
90		printf("failed message allocation\n");
91		return -1;
92	}
93	node->mr = ibv_reg_mr(node->pd, node->mem,
94			      message_size + sizeof(struct ibv_grh),
95			      IBV_ACCESS_LOCAL_WRITE);
96	if (!node->mr) {
97		printf("failed to reg MR\n");
98		goto err;
99	}
100	return 0;
101err:
102	free(node->mem);
103	return -1;
104}
105
106static int verify_test_params(struct cmatest_node *node)
107{
108	struct ibv_port_attr port_attr;
109	int ret;
110
111	ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num,
112			     &port_attr);
113	if (ret)
114		return ret;
115
116	if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) {
117		printf("udaddy: message_size %d is larger than active mtu %d\n",
118		       message_size, 1 << (port_attr.active_mtu + 7));
119		return -EINVAL;
120	}
121
122	return 0;
123}
124
125static int init_node(struct cmatest_node *node)
126{
127	struct ibv_qp_init_attr init_qp_attr;
128	int cqe, ret;
129
130	node->pd = ibv_alloc_pd(node->cma_id->verbs);
131	if (!node->pd) {
132		ret = -ENOMEM;
133		printf("udaddy: unable to allocate PD\n");
134		goto out;
135	}
136
137	cqe = message_count ? message_count * 2 : 2;
138	node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0);
139	if (!node->cq) {
140		ret = -ENOMEM;
141		printf("udaddy: unable to create CQ\n");
142		goto out;
143	}
144
145	memset(&init_qp_attr, 0, sizeof init_qp_attr);
146	init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
147	init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
148	init_qp_attr.cap.max_send_sge = 1;
149	init_qp_attr.cap.max_recv_sge = 1;
150	init_qp_attr.qp_context = node;
151	init_qp_attr.sq_sig_all = 0;
152	init_qp_attr.qp_type = IBV_QPT_UD;
153	init_qp_attr.send_cq = node->cq;
154	init_qp_attr.recv_cq = node->cq;
155	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
156	if (ret) {
157		perror("udaddy: unable to create QP");
158		goto out;
159	}
160
161	ret = create_message(node);
162	if (ret) {
163		printf("udaddy: failed to create messages: %d\n", ret);
164		goto out;
165	}
166out:
167	return ret;
168}
169
170static int post_recvs(struct cmatest_node *node)
171{
172	struct ibv_recv_wr recv_wr, *recv_failure;
173	struct ibv_sge sge;
174	int i, ret = 0;
175
176	if (!message_count)
177		return 0;
178
179	recv_wr.next = NULL;
180	recv_wr.sg_list = &sge;
181	recv_wr.num_sge = 1;
182	recv_wr.wr_id = (uintptr_t) node;
183
184	sge.length = message_size + sizeof(struct ibv_grh);
185	sge.lkey = node->mr->lkey;
186	sge.addr = (uintptr_t) node->mem;
187
188	for (i = 0; i < message_count && !ret; i++ ) {
189		ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
190		if (ret) {
191			printf("failed to post receives: %d\n", ret);
192			break;
193		}
194	}
195	return ret;
196}
197
198static int post_sends(struct cmatest_node *node, int signal_flag)
199{
200	struct ibv_send_wr send_wr, *bad_send_wr;
201	struct ibv_sge sge;
202	int i, ret = 0;
203
204	if (!node->connected || !message_count)
205		return 0;
206
207	send_wr.next = NULL;
208	send_wr.sg_list = &sge;
209	send_wr.num_sge = 1;
210	send_wr.opcode = IBV_WR_SEND_WITH_IMM;
211	send_wr.send_flags = signal_flag;
212	send_wr.wr_id = (unsigned long)node;
213	send_wr.imm_data = htobe32(node->cma_id->qp->qp_num);
214
215	send_wr.wr.ud.ah = node->ah;
216	send_wr.wr.ud.remote_qpn = node->remote_qpn;
217	send_wr.wr.ud.remote_qkey = node->remote_qkey;
218
219	sge.length = message_size;
220	sge.lkey = node->mr->lkey;
221	sge.addr = (uintptr_t) node->mem;
222
223	for (i = 0; i < message_count && !ret; i++) {
224		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
225		if (ret)
226			printf("failed to post sends: %d\n", ret);
227	}
228	return ret;
229}
230
231static void connect_error(void)
232{
233	test.connects_left--;
234}
235
236static int addr_handler(struct cmatest_node *node)
237{
238	int ret;
239
240	if (set_tos) {
241		ret = rdma_set_option(node->cma_id, RDMA_OPTION_ID,
242				      RDMA_OPTION_ID_TOS, &tos, sizeof tos);
243		if (ret)
244			perror("udaddy: set TOS option failed");
245	}
246
247	ret = rdma_resolve_route(node->cma_id, 2000);
248	if (ret) {
249		perror("udaddy: resolve route failed");
250		connect_error();
251	}
252	return ret;
253}
254
255static int route_handler(struct cmatest_node *node)
256{
257	struct rdma_conn_param conn_param;
258	int ret;
259
260	ret = verify_test_params(node);
261	if (ret)
262		goto err;
263
264	ret = init_node(node);
265	if (ret)
266		goto err;
267
268	ret = post_recvs(node);
269	if (ret)
270		goto err;
271
272	memset(&conn_param, 0, sizeof conn_param);
273	conn_param.private_data = test.rai->ai_connect;
274	conn_param.private_data_len = test.rai->ai_connect_len;
275	ret = rdma_connect(node->cma_id, &conn_param);
276	if (ret) {
277		perror("udaddy: failure connecting");
278		goto err;
279	}
280	return 0;
281err:
282	connect_error();
283	return ret;
284}
285
286static int connect_handler(struct rdma_cm_id *cma_id)
287{
288	struct cmatest_node *node;
289	struct rdma_conn_param conn_param;
290	int ret;
291
292	if (test.conn_index == connections) {
293		ret = -ENOMEM;
294		goto err1;
295	}
296	node = &test.nodes[test.conn_index++];
297
298	node->cma_id = cma_id;
299	cma_id->context = node;
300
301	ret = verify_test_params(node);
302	if (ret)
303		goto err2;
304
305	ret = init_node(node);
306	if (ret)
307		goto err2;
308
309	ret = post_recvs(node);
310	if (ret)
311		goto err2;
312
313	memset(&conn_param, 0, sizeof conn_param);
314	conn_param.qp_num = node->cma_id->qp->qp_num;
315	ret = rdma_accept(node->cma_id, &conn_param);
316	if (ret) {
317		perror("udaddy: failure accepting");
318		goto err2;
319	}
320	node->connected = 1;
321	test.connects_left--;
322	return 0;
323
324err2:
325	node->cma_id = NULL;
326	connect_error();
327err1:
328	printf("udaddy: failing connection request\n");
329	rdma_reject(cma_id, NULL, 0);
330	return ret;
331}
332
333static int resolved_handler(struct cmatest_node *node,
334			    struct rdma_cm_event *event)
335{
336	node->remote_qpn = event->param.ud.qp_num;
337	node->remote_qkey = event->param.ud.qkey;
338	node->ah = ibv_create_ah(node->pd, &event->param.ud.ah_attr);
339	if (!node->ah) {
340		printf("udaddy: failure creating address handle\n");
341		goto err;
342	}
343
344	node->connected = 1;
345	test.connects_left--;
346	return 0;
347err:
348	connect_error();
349	return -1;
350}
351
352static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
353{
354	int ret = 0;
355
356	switch (event->event) {
357	case RDMA_CM_EVENT_ADDR_RESOLVED:
358		ret = addr_handler(cma_id->context);
359		break;
360	case RDMA_CM_EVENT_ROUTE_RESOLVED:
361		ret = route_handler(cma_id->context);
362		break;
363	case RDMA_CM_EVENT_CONNECT_REQUEST:
364		ret = connect_handler(cma_id);
365		break;
366	case RDMA_CM_EVENT_ESTABLISHED:
367		ret = resolved_handler(cma_id->context, event);
368		break;
369	case RDMA_CM_EVENT_ADDR_ERROR:
370	case RDMA_CM_EVENT_ROUTE_ERROR:
371	case RDMA_CM_EVENT_CONNECT_ERROR:
372	case RDMA_CM_EVENT_UNREACHABLE:
373	case RDMA_CM_EVENT_REJECTED:
374		printf("udaddy: event: %s, error: %d\n",
375		       rdma_event_str(event->event), event->status);
376		connect_error();
377		ret = event->status;
378		break;
379	case RDMA_CM_EVENT_DEVICE_REMOVAL:
380		/* Cleanup will occur after test completes. */
381		break;
382	default:
383		break;
384	}
385	return ret;
386}
387
388static void destroy_node(struct cmatest_node *node)
389{
390	if (!node->cma_id)
391		return;
392
393	if (node->ah)
394		ibv_destroy_ah(node->ah);
395
396	if (node->cma_id->qp)
397		rdma_destroy_qp(node->cma_id);
398
399	if (node->cq)
400		ibv_destroy_cq(node->cq);
401
402	if (node->mem) {
403		ibv_dereg_mr(node->mr);
404		free(node->mem);
405	}
406
407	if (node->pd)
408		ibv_dealloc_pd(node->pd);
409
410	/* Destroy the RDMA ID after all device resources */
411	rdma_destroy_id(node->cma_id);
412}
413
414static int alloc_nodes(void)
415{
416	int ret, i;
417
418	test.nodes = malloc(sizeof *test.nodes * connections);
419	if (!test.nodes) {
420		printf("udaddy: unable to allocate memory for test nodes\n");
421		return -ENOMEM;
422	}
423	memset(test.nodes, 0, sizeof *test.nodes * connections);
424
425	for (i = 0; i < connections; i++) {
426		test.nodes[i].id = i;
427		if (dst_addr) {
428			ret = rdma_create_id(test.channel,
429					     &test.nodes[i].cma_id,
430					     &test.nodes[i], hints.ai_port_space);
431			if (ret)
432				goto err;
433		}
434	}
435	return 0;
436err:
437	while (--i >= 0)
438		rdma_destroy_id(test.nodes[i].cma_id);
439	free(test.nodes);
440	return ret;
441}
442
443static void destroy_nodes(void)
444{
445	int i;
446
447	for (i = 0; i < connections; i++)
448		destroy_node(&test.nodes[i]);
449	free(test.nodes);
450}
451
452static void create_reply_ah(struct cmatest_node *node, struct ibv_wc *wc)
453{
454	struct ibv_qp_attr attr;
455	struct ibv_qp_init_attr init_attr;
456
457	node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem,
458					 node->cma_id->port_num);
459	node->remote_qpn = be32toh(wc->imm_data);
460
461	ibv_query_qp(node->cma_id->qp, &attr, IBV_QP_QKEY, &init_attr);
462	node->remote_qkey = attr.qkey;
463}
464
465static int poll_cqs(void)
466{
467	struct ibv_wc wc[8];
468	int done, i, ret;
469
470	for (i = 0; i < connections; i++) {
471		if (!test.nodes[i].connected)
472			continue;
473
474		for (done = 0; done < message_count; done += ret) {
475			ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
476			if (ret < 0) {
477				printf("udaddy: failed polling CQ: %d\n", ret);
478				return ret;
479			}
480
481			if (ret && !test.nodes[i].ah)
482				create_reply_ah(&test.nodes[i], wc);
483		}
484	}
485	return 0;
486}
487
488static int connect_events(void)
489{
490	struct rdma_cm_event *event;
491	int ret = 0;
492
493	while (test.connects_left && !ret) {
494		ret = rdma_get_cm_event(test.channel, &event);
495		if (!ret) {
496			ret = cma_handler(event->id, event);
497			rdma_ack_cm_event(event);
498		}
499	}
500	return ret;
501}
502
503static int run_server(void)
504{
505	struct rdma_cm_id *listen_id;
506	int i, ret;
507
508	printf("udaddy: starting server\n");
509	ret = rdma_create_id(test.channel, &listen_id, &test, hints.ai_port_space);
510	if (ret) {
511		perror("udaddy: listen request failed");
512		return ret;
513	}
514
515	ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &test.rai);
516	if (ret) {
517		printf("udaddy: getrdmaaddr error: %s\n", gai_strerror(ret));
518		goto out;
519	}
520
521	ret = rdma_bind_addr(listen_id, test.rai->ai_src_addr);
522	if (ret) {
523		perror("udaddy: bind address failed");
524		goto out;
525	}
526
527	ret = rdma_listen(listen_id, 0);
528	if (ret) {
529		perror("udaddy: failure trying to listen");
530		goto out;
531	}
532
533	connect_events();
534
535	if (message_count) {
536		printf("receiving data transfers\n");
537		ret = poll_cqs();
538		if (ret)
539			goto out;
540
541		printf("sending replies\n");
542		for (i = 0; i < connections; i++) {
543			ret = post_sends(&test.nodes[i], IBV_SEND_SIGNALED);
544			if (ret)
545				goto out;
546		}
547
548		ret = poll_cqs();
549		if (ret)
550			goto out;
551		printf("data transfers complete\n");
552	}
553out:
554	rdma_destroy_id(listen_id);
555	return ret;
556}
557
558static int run_client(void)
559{
560	int i, ret;
561
562	printf("udaddy: starting client\n");
563
564	ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &test.rai);
565	if (ret) {
566		printf("udaddy: getaddrinfo error: %s\n", gai_strerror(ret));
567		return ret;
568	}
569
570	printf("udaddy: connecting\n");
571	for (i = 0; i < connections; i++) {
572		ret = rdma_resolve_addr(test.nodes[i].cma_id, test.rai->ai_src_addr,
573					test.rai->ai_dst_addr, 2000);
574		if (ret) {
575			perror("udaddy: failure getting addr");
576			connect_error();
577			return ret;
578		}
579	}
580
581	ret = connect_events();
582	if (ret)
583		goto out;
584
585	if (message_count) {
586		printf("initiating data transfers\n");
587		for (i = 0; i < connections; i++) {
588			ret = post_sends(&test.nodes[i], 0);
589			if (ret)
590				goto out;
591		}
592		printf("receiving data transfers\n");
593		ret = poll_cqs();
594		if (ret)
595			goto out;
596
597		printf("data transfers complete\n");
598	}
599out:
600	return ret;
601}
602
603int main(int argc, char **argv)
604{
605	int op, ret;
606
607	hints.ai_port_space = RDMA_PS_UDP;
608	while ((op = getopt(argc, argv, "s:b:c:C:S:t:p:P:f:")) != -1) {
609		switch (op) {
610		case 's':
611			dst_addr = optarg;
612			break;
613		case 'b':
614			src_addr = optarg;
615			break;
616		case 'c':
617			connections = atoi(optarg);
618			break;
619		case 'C':
620			message_count = atoi(optarg);
621			break;
622		case 'S':
623			message_size = atoi(optarg);
624			break;
625		case 't':
626			set_tos = 1;
627			tos = (uint8_t) strtoul(optarg, NULL, 0);
628			break;
629		case 'p': /* for backwards compatibility - use -P */
630			hints.ai_port_space = strtol(optarg, NULL, 0);
631			break;
632		case 'f':
633			if (!strncasecmp("ip", optarg, 2)) {
634				hints.ai_flags = RAI_NUMERICHOST;
635			} else if (!strncasecmp("gid", optarg, 3)) {
636				hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY;
637				hints.ai_family = AF_IB;
638			} else if (strncasecmp("name", optarg, 4)) {
639				fprintf(stderr, "Warning: unknown address format\n");
640			}
641			break;
642		case 'P':
643			if (!strncasecmp("ipoib", optarg, 5)) {
644				hints.ai_port_space = RDMA_PS_IPOIB;
645			} else if (strncasecmp("udp", optarg, 3)) {
646				fprintf(stderr, "Warning: unknown port space format\n");
647			}
648			break;
649		default:
650			printf("usage: %s\n", argv[0]);
651			printf("\t[-s server_address]\n");
652			printf("\t[-b bind_address]\n");
653			printf("\t[-f address_format]\n");
654			printf("\t    name, ip, ipv6, or gid\n");
655			printf("\t[-P port_space]\n");
656			printf("\t    udp or ipoib\n");
657			printf("\t[-c connections]\n");
658			printf("\t[-C message_count]\n");
659			printf("\t[-S message_size]\n");
660			printf("\t[-t type_of_service]\n");
661			printf("\t[-p port_space - %#x for UDP (default), "
662			       "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB);
663			exit(1);
664		}
665	}
666
667	test.connects_left = connections;
668
669	test.channel = rdma_create_event_channel();
670	if (!test.channel) {
671		perror("failed to create event channel");
672		exit(1);
673	}
674
675	if (alloc_nodes())
676		exit(1);
677
678	if (dst_addr) {
679		ret = run_client();
680	} else {
681		hints.ai_flags |= RAI_PASSIVE;
682		ret = run_server();
683	}
684
685	printf("test complete\n");
686	destroy_nodes();
687	rdma_destroy_event_channel(test.channel);
688	if (test.rai)
689		rdma_freeaddrinfo(test.rai);
690
691	printf("return status %d\n", ret);
692	return ret;
693}
694