1/*
2 * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include <stdlib.h>
36#include <string.h>
37#include <stdio.h>
38#include <errno.h>
39#include <sys/types.h>
40#include <arpa/inet.h>
41#include <sys/socket.h>
42#include <netdb.h>
43#include <unistd.h>
44#include <getopt.h>
45
46#include <rdma/rdma_cma.h>
47#include <infiniband/ib.h>
48
49struct cmatest_node {
50	int			id;
51	struct rdma_cm_id	*cma_id;
52	int			connected;
53	struct ibv_pd		*pd;
54	struct ibv_cq		*cq;
55	struct ibv_mr		*mr;
56	struct ibv_ah		*ah;
57	uint32_t		remote_qpn;
58	uint32_t		remote_qkey;
59	void			*mem;
60};
61
62struct cmatest {
63	struct rdma_event_channel *channel;
64	pthread_t 		cmathread;
65	struct cmatest_node	*nodes;
66	int			conn_index;
67	int			connects_left;
68
69	struct sockaddr_storage	dst_in;
70	struct sockaddr		*dst_addr;
71	struct sockaddr_storage	src_in;
72	struct sockaddr		*src_addr;
73};
74
75static struct cmatest test;
76static int connections = 1;
77static int message_size = 100;
78static int message_count = 10;
79static int is_sender;
80static int unmapped_addr;
81static char *dst_addr;
82static char *src_addr;
83static enum rdma_port_space port_space = RDMA_PS_UDP;
84
85static int create_message(struct cmatest_node *node)
86{
87	if (!message_size)
88		message_count = 0;
89
90	if (!message_count)
91		return 0;
92
93	node->mem = malloc(message_size + sizeof(struct ibv_grh));
94	if (!node->mem) {
95		printf("failed message allocation\n");
96		return -1;
97	}
98	node->mr = ibv_reg_mr(node->pd, node->mem,
99			      message_size + sizeof(struct ibv_grh),
100			      IBV_ACCESS_LOCAL_WRITE);
101	if (!node->mr) {
102		printf("failed to reg MR\n");
103		goto err;
104	}
105	return 0;
106err:
107	free(node->mem);
108	return -1;
109}
110
111static int verify_test_params(struct cmatest_node *node)
112{
113	struct ibv_port_attr port_attr;
114	int ret;
115
116	ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num,
117			     &port_attr);
118	if (ret)
119		return ret;
120
121	if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) {
122		printf("mckey: message_size %d is larger than active mtu %d\n",
123		       message_size, 1 << (port_attr.active_mtu + 7));
124		return -EINVAL;
125	}
126
127	return 0;
128}
129
130static int init_node(struct cmatest_node *node)
131{
132	struct ibv_qp_init_attr init_qp_attr;
133	int cqe, ret;
134
135	node->pd = ibv_alloc_pd(node->cma_id->verbs);
136	if (!node->pd) {
137		ret = -ENOMEM;
138		printf("mckey: unable to allocate PD\n");
139		goto out;
140	}
141
142	cqe = message_count ? message_count * 2 : 2;
143	node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0);
144	if (!node->cq) {
145		ret = -ENOMEM;
146		printf("mckey: unable to create CQ\n");
147		goto out;
148	}
149
150	memset(&init_qp_attr, 0, sizeof init_qp_attr);
151	init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
152	init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
153	init_qp_attr.cap.max_send_sge = 1;
154	init_qp_attr.cap.max_recv_sge = 1;
155	init_qp_attr.qp_context = node;
156	init_qp_attr.sq_sig_all = 0;
157	init_qp_attr.qp_type = IBV_QPT_UD;
158	init_qp_attr.send_cq = node->cq;
159	init_qp_attr.recv_cq = node->cq;
160	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
161	if (ret) {
162		perror("mckey: unable to create QP");
163		goto out;
164	}
165
166	ret = create_message(node);
167	if (ret) {
168		printf("mckey: failed to create messages: %d\n", ret);
169		goto out;
170	}
171out:
172	return ret;
173}
174
175static int post_recvs(struct cmatest_node *node)
176{
177	struct ibv_recv_wr recv_wr, *recv_failure;
178	struct ibv_sge sge;
179	int i, ret = 0;
180
181	if (!message_count)
182		return 0;
183
184	recv_wr.next = NULL;
185	recv_wr.sg_list = &sge;
186	recv_wr.num_sge = 1;
187	recv_wr.wr_id = (uintptr_t) node;
188
189	sge.length = message_size + sizeof(struct ibv_grh);
190	sge.lkey = node->mr->lkey;
191	sge.addr = (uintptr_t) node->mem;
192
193	for (i = 0; i < message_count && !ret; i++ ) {
194		ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
195		if (ret) {
196			printf("failed to post receives: %d\n", ret);
197			break;
198		}
199	}
200	return ret;
201}
202
203static int post_sends(struct cmatest_node *node, int signal_flag)
204{
205	struct ibv_send_wr send_wr, *bad_send_wr;
206	struct ibv_sge sge;
207	int i, ret = 0;
208
209	if (!node->connected || !message_count)
210		return 0;
211
212	send_wr.next = NULL;
213	send_wr.sg_list = &sge;
214	send_wr.num_sge = 1;
215	send_wr.opcode = IBV_WR_SEND_WITH_IMM;
216	send_wr.send_flags = signal_flag;
217	send_wr.wr_id = (unsigned long)node;
218	send_wr.imm_data = htobe32(node->cma_id->qp->qp_num);
219
220	send_wr.wr.ud.ah = node->ah;
221	send_wr.wr.ud.remote_qpn = node->remote_qpn;
222	send_wr.wr.ud.remote_qkey = node->remote_qkey;
223
224	sge.length = message_size;
225	sge.lkey = node->mr->lkey;
226	sge.addr = (uintptr_t) node->mem;
227
228	for (i = 0; i < message_count && !ret; i++) {
229		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
230		if (ret)
231			printf("failed to post sends: %d\n", ret);
232	}
233	return ret;
234}
235
236static void connect_error(void)
237{
238	test.connects_left--;
239}
240
241static int addr_handler(struct cmatest_node *node)
242{
243	int ret;
244
245	ret = verify_test_params(node);
246	if (ret)
247		goto err;
248
249	ret = init_node(node);
250	if (ret)
251		goto err;
252
253	if (!is_sender) {
254		ret = post_recvs(node);
255		if (ret)
256			goto err;
257	}
258
259	ret = rdma_join_multicast(node->cma_id, test.dst_addr, node);
260	if (ret) {
261		perror("mckey: failure joining");
262		goto err;
263	}
264	return 0;
265err:
266	connect_error();
267	return ret;
268}
269
270static int join_handler(struct cmatest_node *node,
271			struct rdma_ud_param *param)
272{
273	char buf[40];
274
275	inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40);
276	printf("mckey: joined dgid: %s mlid 0x%x sl %d\n", buf,
277		param->ah_attr.dlid, param->ah_attr.sl);
278
279	node->remote_qpn = param->qp_num;
280	node->remote_qkey = param->qkey;
281	node->ah = ibv_create_ah(node->pd, &param->ah_attr);
282	if (!node->ah) {
283		printf("mckey: failure creating address handle\n");
284		goto err;
285	}
286
287	node->connected = 1;
288	test.connects_left--;
289	return 0;
290err:
291	connect_error();
292	return -1;
293}
294
295static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
296{
297	int ret = 0;
298
299	switch (event->event) {
300	case RDMA_CM_EVENT_ADDR_RESOLVED:
301		ret = addr_handler(cma_id->context);
302		break;
303	case RDMA_CM_EVENT_MULTICAST_JOIN:
304		ret = join_handler(cma_id->context, &event->param.ud);
305		break;
306	case RDMA_CM_EVENT_ADDR_ERROR:
307	case RDMA_CM_EVENT_ROUTE_ERROR:
308	case RDMA_CM_EVENT_MULTICAST_ERROR:
309		printf("mckey: event: %s, error: %d\n",
310		       rdma_event_str(event->event), event->status);
311		connect_error();
312		ret = event->status;
313		break;
314	case RDMA_CM_EVENT_DEVICE_REMOVAL:
315		/* Cleanup will occur after test completes. */
316		break;
317	default:
318		break;
319	}
320	return ret;
321}
322
323static void *cma_thread(void *arg)
324{
325	struct rdma_cm_event *event;
326	int ret;
327
328	while (1) {
329		ret = rdma_get_cm_event(test.channel, &event);
330		if (ret) {
331			perror("rdma_get_cm_event");
332			break;
333		}
334
335		switch (event->event) {
336		case RDMA_CM_EVENT_MULTICAST_ERROR:
337		case RDMA_CM_EVENT_ADDR_CHANGE:
338			printf("mckey: event: %s, status: %d\n",
339			       rdma_event_str(event->event), event->status);
340			break;
341		default:
342			break;
343		}
344
345		rdma_ack_cm_event(event);
346	}
347	return NULL;
348}
349
350static void destroy_node(struct cmatest_node *node)
351{
352	if (!node->cma_id)
353		return;
354
355	if (node->ah)
356		ibv_destroy_ah(node->ah);
357
358	if (node->cma_id->qp)
359		rdma_destroy_qp(node->cma_id);
360
361	if (node->cq)
362		ibv_destroy_cq(node->cq);
363
364	if (node->mem) {
365		ibv_dereg_mr(node->mr);
366		free(node->mem);
367	}
368
369	if (node->pd)
370		ibv_dealloc_pd(node->pd);
371
372	/* Destroy the RDMA ID after all device resources */
373	rdma_destroy_id(node->cma_id);
374}
375
376static int alloc_nodes(void)
377{
378	int ret, i;
379
380	test.nodes = malloc(sizeof *test.nodes * connections);
381	if (!test.nodes) {
382		printf("mckey: unable to allocate memory for test nodes\n");
383		return -ENOMEM;
384	}
385	memset(test.nodes, 0, sizeof *test.nodes * connections);
386
387	for (i = 0; i < connections; i++) {
388		test.nodes[i].id = i;
389		ret = rdma_create_id(test.channel, &test.nodes[i].cma_id,
390				     &test.nodes[i], port_space);
391		if (ret)
392			goto err;
393	}
394	return 0;
395err:
396	while (--i >= 0)
397		rdma_destroy_id(test.nodes[i].cma_id);
398	free(test.nodes);
399	return ret;
400}
401
402static void destroy_nodes(void)
403{
404	int i;
405
406	for (i = 0; i < connections; i++)
407		destroy_node(&test.nodes[i]);
408	free(test.nodes);
409}
410
411static int poll_cqs(void)
412{
413	struct ibv_wc wc[8];
414	int done, i, ret;
415
416	for (i = 0; i < connections; i++) {
417		if (!test.nodes[i].connected)
418			continue;
419
420		for (done = 0; done < message_count; done += ret) {
421			ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
422			if (ret < 0) {
423				printf("mckey: failed polling CQ: %d\n", ret);
424				return ret;
425			}
426		}
427	}
428	return 0;
429}
430
431static int connect_events(void)
432{
433	struct rdma_cm_event *event;
434	int ret = 0;
435
436	while (test.connects_left && !ret) {
437		ret = rdma_get_cm_event(test.channel, &event);
438		if (!ret) {
439			ret = cma_handler(event->id, event);
440			rdma_ack_cm_event(event);
441		}
442	}
443	return ret;
444}
445
446static int get_addr(char *dst, struct sockaddr *addr)
447{
448	struct addrinfo *res;
449	int ret;
450
451	ret = getaddrinfo(dst, NULL, NULL, &res);
452	if (ret) {
453		printf("getaddrinfo failed (%s) - invalid hostname or IP address\n", gai_strerror(ret));
454		return ret;
455	}
456
457	memcpy(addr, res->ai_addr, res->ai_addrlen);
458	freeaddrinfo(res);
459	return ret;
460}
461
462static int get_dst_addr(char *dst, struct sockaddr *addr)
463{
464	struct sockaddr_ib *sib;
465
466	if (!unmapped_addr)
467		return get_addr(dst, addr);
468
469	sib = (struct sockaddr_ib *) addr;
470	memset(sib, 0, sizeof *sib);
471	sib->sib_family = AF_IB;
472	inet_pton(AF_INET6, dst, &sib->sib_addr);
473	return 0;
474}
475
476static int run(void)
477{
478	int i, ret, err;
479
480	printf("mckey: starting %s\n", is_sender ? "client" : "server");
481	if (src_addr) {
482		ret = get_addr(src_addr, (struct sockaddr *) &test.src_in);
483		if (ret)
484			return ret;
485	}
486
487	ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in);
488	if (ret)
489		return ret;
490
491	printf("mckey: joining\n");
492	for (i = 0; i < connections; i++) {
493		if (src_addr) {
494			ret = rdma_bind_addr(test.nodes[i].cma_id,
495					     test.src_addr);
496			if (ret) {
497				perror("mckey: addr bind failure");
498				connect_error();
499				return ret;
500			}
501		}
502
503		if (unmapped_addr)
504			ret = addr_handler(&test.nodes[i]);
505		else
506			ret = rdma_resolve_addr(test.nodes[i].cma_id,
507						test.src_addr, test.dst_addr,
508						2000);
509		if (ret) {
510			perror("mckey: resolve addr failure");
511			connect_error();
512			return ret;
513		}
514	}
515
516	ret = connect_events();
517	if (ret)
518		goto out;
519
520	pthread_create(&test.cmathread, NULL, cma_thread, NULL);
521
522	/*
523	 * Pause to give SM chance to configure switches.  We don't want to
524	 * handle reliability issue in this simple test program.
525	 */
526	sleep(3);
527
528	if (message_count) {
529		if (is_sender) {
530			printf("initiating data transfers\n");
531			for (i = 0; i < connections; i++) {
532				ret = post_sends(&test.nodes[i], 0);
533				if (ret)
534					goto out;
535			}
536		} else {
537			printf("receiving data transfers\n");
538			ret = poll_cqs();
539			if (ret)
540				goto out;
541		}
542		printf("data transfers complete\n");
543	}
544out:
545	for (i = 0; i < connections; i++) {
546		err = rdma_leave_multicast(test.nodes[i].cma_id,
547					   test.dst_addr);
548		if (err) {
549			perror("mckey: failure leaving");
550			ret = err;
551		}
552	}
553	return ret;
554}
555
556int main(int argc, char **argv)
557{
558	int op, ret;
559
560
561	while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1) {
562		switch (op) {
563		case 'm':
564			dst_addr = optarg;
565			break;
566		case 'M':
567			unmapped_addr = 1;
568			dst_addr = optarg;
569			break;
570		case 's':
571			is_sender = 1;
572			break;
573		case 'b':
574			src_addr = optarg;
575			test.src_addr = (struct sockaddr *) &test.src_in;
576			break;
577		case 'c':
578			connections = atoi(optarg);
579			break;
580		case 'C':
581			message_count = atoi(optarg);
582			break;
583		case 'S':
584			message_size = atoi(optarg);
585			break;
586		case 'p':
587			port_space = strtol(optarg, NULL, 0);
588			break;
589		default:
590			printf("usage: %s\n", argv[0]);
591			printf("\t-m multicast_address\n");
592			printf("\t[-M unmapped_multicast_address]\n"
593			       "\t replaces -m and requires -b\n");
594			printf("\t[-s(ender)]\n");
595			printf("\t[-b bind_address]\n");
596			printf("\t[-c connections]\n");
597			printf("\t[-C message_count]\n");
598			printf("\t[-S message_size]\n");
599			printf("\t[-p port_space - %#x for UDP (default), "
600			       "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB);
601			exit(1);
602		}
603	}
604
605	if (unmapped_addr && !src_addr) {
606		printf("unmapped multicast address requires binding "
607			"to source address\n");
608		exit(1);
609	}
610
611	test.dst_addr = (struct sockaddr *) &test.dst_in;
612	test.connects_left = connections;
613
614	test.channel = rdma_create_event_channel();
615	if (!test.channel) {
616		perror("failed to create event channel");
617		exit(1);
618	}
619
620	if (alloc_nodes())
621		exit(1);
622
623	ret = run();
624
625	printf("test complete\n");
626	destroy_nodes();
627	rdma_destroy_event_channel(test.channel);
628
629	printf("return status %d\n", ret);
630	return ret;
631}
632