1/*
2 * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include <stdlib.h>
36#include <string.h>
37#include <stdio.h>
38#include <errno.h>
39#include <sys/types.h>
40#include <netinet/in.h>
41#include <arpa/inet.h>
42#include <sys/socket.h>
43#include <netdb.h>
44#include <byteswap.h>
45#include <unistd.h>
46#include <getopt.h>
47
48#include <rdma/rdma_cma.h>
49
50struct cmatest_node {
51	int			id;
52	struct rdma_cm_id	*cma_id;
53	int			connected;
54	struct ibv_pd		*pd;
55	struct ibv_cq		*cq;
56	struct ibv_mr		*mr;
57	struct ibv_ah		*ah;
58	uint32_t		remote_qpn;
59	uint32_t		remote_qkey;
60	void			*mem;
61};
62
63struct cmatest {
64	struct rdma_event_channel *channel;
65	pthread_t 		cmathread;
66	struct cmatest_node	*nodes;
67	int			conn_index;
68	int			connects_left;
69
70	struct sockaddr_in6	dst_in;
71	struct sockaddr		*dst_addr;
72	struct sockaddr_in6	src_in;
73	struct sockaddr		*src_addr;
74};
75
76static struct cmatest test;
77static int connections = 1;
78static int message_size = 100;
79static int message_count = 10;
80static int is_sender;
81static int unmapped_addr;
82static char *dst_addr;
83static char *src_addr;
84static enum rdma_port_space port_space = RDMA_PS_UDP;
85
86static int create_message(struct cmatest_node *node)
87{
88	if (!message_size)
89		message_count = 0;
90
91	if (!message_count)
92		return 0;
93
94	node->mem = malloc(message_size + sizeof(struct ibv_grh));
95	if (!node->mem) {
96		printf("failed message allocation\n");
97		return -1;
98	}
99	node->mr = ibv_reg_mr(node->pd, node->mem,
100			      message_size + sizeof(struct ibv_grh),
101			      IBV_ACCESS_LOCAL_WRITE);
102	if (!node->mr) {
103		printf("failed to reg MR\n");
104		goto err;
105	}
106	return 0;
107err:
108	free(node->mem);
109	return -1;
110}
111
112static int verify_test_params(struct cmatest_node *node)
113{
114	struct ibv_port_attr port_attr;
115	int ret;
116
117	ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num,
118			     &port_attr);
119	if (ret)
120		return ret;
121
122	if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) {
123		printf("mckey: message_size %d is larger than active mtu %d\n",
124		       message_size, 1 << (port_attr.active_mtu + 7));
125		return -EINVAL;
126	}
127
128	return 0;
129}
130
131static int init_node(struct cmatest_node *node)
132{
133	struct ibv_qp_init_attr init_qp_attr;
134	int cqe, ret;
135
136	node->pd = ibv_alloc_pd(node->cma_id->verbs);
137	if (!node->pd) {
138		ret = -ENOMEM;
139		printf("mckey: unable to allocate PD\n");
140		goto out;
141	}
142
143	cqe = message_count ? message_count * 2 : 2;
144	node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
145	if (!node->cq) {
146		ret = -ENOMEM;
147		printf("mckey: unable to create CQ\n");
148		goto out;
149	}
150
151	memset(&init_qp_attr, 0, sizeof init_qp_attr);
152	init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
153	init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
154	init_qp_attr.cap.max_send_sge = 1;
155	init_qp_attr.cap.max_recv_sge = 1;
156	init_qp_attr.qp_context = node;
157	init_qp_attr.sq_sig_all = 0;
158	init_qp_attr.qp_type = IBV_QPT_UD;
159	init_qp_attr.send_cq = node->cq;
160	init_qp_attr.recv_cq = node->cq;
161	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
162	if (ret) {
163		perror("mckey: unable to create QP");
164		goto out;
165	}
166
167	ret = create_message(node);
168	if (ret) {
169		printf("mckey: failed to create messages: %d\n", ret);
170		goto out;
171	}
172out:
173	return ret;
174}
175
176static int post_recvs(struct cmatest_node *node)
177{
178	struct ibv_recv_wr recv_wr, *recv_failure;
179	struct ibv_sge sge;
180	int i, ret = 0;
181
182	if (!message_count)
183		return 0;
184
185	recv_wr.next = NULL;
186	recv_wr.sg_list = &sge;
187	recv_wr.num_sge = 1;
188	recv_wr.wr_id = (uintptr_t) node;
189
190	sge.length = message_size + sizeof(struct ibv_grh);
191	sge.lkey = node->mr->lkey;
192	sge.addr = (uintptr_t) node->mem;
193
194	for (i = 0; i < message_count && !ret; i++ ) {
195		ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
196		if (ret) {
197			printf("failed to post receives: %d\n", ret);
198			break;
199		}
200	}
201	return ret;
202}
203
204static int post_sends(struct cmatest_node *node, int signal_flag)
205{
206	struct ibv_send_wr send_wr, *bad_send_wr;
207	struct ibv_sge sge;
208	int i, ret = 0;
209
210	if (!node->connected || !message_count)
211		return 0;
212
213	send_wr.next = NULL;
214	send_wr.sg_list = &sge;
215	send_wr.num_sge = 1;
216	send_wr.opcode = IBV_WR_SEND_WITH_IMM;
217	send_wr.send_flags = signal_flag;
218	send_wr.wr_id = (unsigned long)node;
219	send_wr.imm_data = htonl(node->cma_id->qp->qp_num);
220
221	send_wr.wr.ud.ah = node->ah;
222	send_wr.wr.ud.remote_qpn = node->remote_qpn;
223	send_wr.wr.ud.remote_qkey = node->remote_qkey;
224
225	sge.length = message_size;
226	sge.lkey = node->mr->lkey;
227	sge.addr = (uintptr_t) node->mem;
228
229	for (i = 0; i < message_count && !ret; i++) {
230		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
231		if (ret)
232			printf("failed to post sends: %d\n", ret);
233	}
234	return ret;
235}
236
237static void connect_error(void)
238{
239	test.connects_left--;
240}
241
242static int addr_handler(struct cmatest_node *node)
243{
244	int ret;
245
246	ret = verify_test_params(node);
247	if (ret)
248		goto err;
249
250	ret = init_node(node);
251	if (ret)
252		goto err;
253
254	if (!is_sender) {
255		ret = post_recvs(node);
256		if (ret)
257			goto err;
258	}
259
260	ret = rdma_join_multicast(node->cma_id, test.dst_addr, node);
261	if (ret) {
262		perror("mckey: failure joining");
263		goto err;
264	}
265	return 0;
266err:
267	connect_error();
268	return ret;
269}
270
271static int join_handler(struct cmatest_node *node,
272			struct rdma_ud_param *param)
273{
274	char buf[40];
275
276	inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40);
277	printf("mckey: joined dgid: %s mlid 0x%x sl %d\n", buf,
278		param->ah_attr.dlid, param->ah_attr.sl);
279
280	node->remote_qpn = param->qp_num;
281	node->remote_qkey = param->qkey;
282	node->ah = ibv_create_ah(node->pd, &param->ah_attr);
283	if (!node->ah) {
284		printf("mckey: failure creating address handle\n");
285		goto err;
286	}
287
288	node->connected = 1;
289	test.connects_left--;
290	return 0;
291err:
292	connect_error();
293	return -1;
294}
295
296static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
297{
298	int ret = 0;
299
300	switch (event->event) {
301	case RDMA_CM_EVENT_ADDR_RESOLVED:
302		ret = addr_handler(cma_id->context);
303		break;
304	case RDMA_CM_EVENT_MULTICAST_JOIN:
305		ret = join_handler(cma_id->context, &event->param.ud);
306		break;
307	case RDMA_CM_EVENT_ADDR_ERROR:
308	case RDMA_CM_EVENT_ROUTE_ERROR:
309	case RDMA_CM_EVENT_MULTICAST_ERROR:
310		printf("mckey: event: %s, error: %d\n",
311		       rdma_event_str(event->event), event->status);
312		connect_error();
313		ret = event->status;
314		break;
315	case RDMA_CM_EVENT_DEVICE_REMOVAL:
316		/* Cleanup will occur after test completes. */
317		break;
318	default:
319		break;
320	}
321	return ret;
322}
323
324static void *cma_thread(void *arg)
325{
326	struct rdma_cm_event *event;
327	int ret;
328
329	while (1) {
330		ret = rdma_get_cm_event(test.channel, &event);
331		if (ret) {
332			perror("rdma_get_cm_event");
333			break;
334		}
335
336		switch (event->event) {
337		case RDMA_CM_EVENT_MULTICAST_ERROR:
338		case RDMA_CM_EVENT_ADDR_CHANGE:
339			printf("mckey: event: %s, status: %d\n",
340			       rdma_event_str(event->event), event->status);
341			break;
342		default:
343			break;
344		}
345
346		rdma_ack_cm_event(event);
347	}
348	return NULL;
349}
350
351static void destroy_node(struct cmatest_node *node)
352{
353	if (!node->cma_id)
354		return;
355
356	if (node->ah)
357		ibv_destroy_ah(node->ah);
358
359	if (node->cma_id->qp)
360		rdma_destroy_qp(node->cma_id);
361
362	if (node->cq)
363		ibv_destroy_cq(node->cq);
364
365	if (node->mem) {
366		ibv_dereg_mr(node->mr);
367		free(node->mem);
368	}
369
370	if (node->pd)
371		ibv_dealloc_pd(node->pd);
372
373	/* Destroy the RDMA ID after all device resources */
374	rdma_destroy_id(node->cma_id);
375}
376
377static int alloc_nodes(void)
378{
379	int ret, i;
380
381	test.nodes = malloc(sizeof *test.nodes * connections);
382	if (!test.nodes) {
383		printf("mckey: unable to allocate memory for test nodes\n");
384		return -ENOMEM;
385	}
386	memset(test.nodes, 0, sizeof *test.nodes * connections);
387
388	for (i = 0; i < connections; i++) {
389		test.nodes[i].id = i;
390		ret = rdma_create_id(test.channel, &test.nodes[i].cma_id,
391				     &test.nodes[i], port_space);
392		if (ret)
393			goto err;
394	}
395	return 0;
396err:
397	while (--i >= 0)
398		rdma_destroy_id(test.nodes[i].cma_id);
399	free(test.nodes);
400	return ret;
401}
402
403static void destroy_nodes(void)
404{
405	int i;
406
407	for (i = 0; i < connections; i++)
408		destroy_node(&test.nodes[i]);
409	free(test.nodes);
410}
411
412static int poll_cqs(void)
413{
414	struct ibv_wc wc[8];
415	int done, i, ret;
416
417	for (i = 0; i < connections; i++) {
418		if (!test.nodes[i].connected)
419			continue;
420
421		for (done = 0; done < message_count; done += ret) {
422			ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
423			if (ret < 0) {
424				printf("mckey: failed polling CQ: %d\n", ret);
425				return ret;
426			}
427		}
428	}
429	return 0;
430}
431
432static int connect_events(void)
433{
434	struct rdma_cm_event *event;
435	int ret = 0;
436
437	while (test.connects_left && !ret) {
438		ret = rdma_get_cm_event(test.channel, &event);
439		if (!ret) {
440			ret = cma_handler(event->id, event);
441			rdma_ack_cm_event(event);
442		}
443	}
444	return ret;
445}
446
447static int get_addr(char *dst, struct sockaddr *addr)
448{
449	struct addrinfo *res;
450	int ret;
451
452	ret = getaddrinfo(dst, NULL, NULL, &res);
453	if (ret) {
454		printf("getaddrinfo failed - invalid hostname or IP address\n");
455		return ret;
456	}
457
458	memcpy(addr, res->ai_addr, res->ai_addrlen);
459	freeaddrinfo(res);
460	return ret;
461}
462
463static int run(void)
464{
465	int i, ret;
466
467	printf("mckey: starting %s\n", is_sender ? "client" : "server");
468	if (src_addr) {
469		ret = get_addr(src_addr, (struct sockaddr *) &test.src_in);
470		if (ret)
471			return ret;
472	}
473
474	ret = get_addr(dst_addr, (struct sockaddr *) &test.dst_in);
475	if (ret)
476		return ret;
477
478	printf("mckey: joining\n");
479	for (i = 0; i < connections; i++) {
480		if (src_addr) {
481			ret = rdma_bind_addr(test.nodes[i].cma_id,
482					     test.src_addr);
483			if (ret) {
484				perror("mckey: addr bind failure");
485				connect_error();
486				return ret;
487			}
488		}
489
490		if (unmapped_addr)
491			ret = addr_handler(&test.nodes[i]);
492		else
493			ret = rdma_resolve_addr(test.nodes[i].cma_id,
494						test.src_addr, test.dst_addr,
495						2000);
496		if (ret) {
497			perror("mckey: resolve addr failure");
498			connect_error();
499			return ret;
500		}
501	}
502
503	ret = connect_events();
504	if (ret)
505		goto out;
506
507	pthread_create(&test.cmathread, NULL, cma_thread, NULL);
508
509	/*
510	 * Pause to give SM chance to configure switches.  We don't want to
511	 * handle reliability issue in this simple test program.
512	 */
513	sleep(3);
514
515	if (message_count) {
516		if (is_sender) {
517			printf("initiating data transfers\n");
518			for (i = 0; i < connections; i++) {
519				ret = post_sends(&test.nodes[i], 0);
520				if (ret)
521					goto out;
522			}
523		} else {
524			printf("receiving data transfers\n");
525			ret = poll_cqs();
526			if (ret)
527				goto out;
528		}
529		printf("data transfers complete\n");
530	}
531out:
532	for (i = 0; i < connections; i++) {
533		ret = rdma_leave_multicast(test.nodes[i].cma_id,
534					   test.dst_addr);
535		if (ret)
536			perror("mckey: failure leaving");
537	}
538	return ret;
539}
540
541int main(int argc, char **argv)
542{
543	int op, ret;
544
545
546	while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1) {
547		switch (op) {
548		case 'm':
549			dst_addr = optarg;
550			break;
551		case 'M':
552			unmapped_addr = 1;
553			dst_addr = optarg;
554			break;
555		case 's':
556			is_sender = 1;
557			break;
558		case 'b':
559			src_addr = optarg;
560			test.src_addr = (struct sockaddr *) &test.src_in;
561			break;
562		case 'c':
563			connections = atoi(optarg);
564			break;
565		case 'C':
566			message_count = atoi(optarg);
567			break;
568		case 'S':
569			message_size = atoi(optarg);
570			break;
571		case 'p':
572			port_space = strtol(optarg, NULL, 0);
573			break;
574		default:
575			printf("usage: %s\n", argv[0]);
576			printf("\t-m multicast_address\n");
577			printf("\t[-M unmapped_multicast_address]\n"
578			       "\t replaces -m and requires -b\n");
579			printf("\t[-s(ender)]\n");
580			printf("\t[-b bind_address]\n");
581			printf("\t[-c connections]\n");
582			printf("\t[-C message_count]\n");
583			printf("\t[-S message_size]\n");
584			printf("\t[-p port_space - %#x for UDP (default), "
585			       "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB);
586			exit(1);
587		}
588	}
589
590	if (unmapped_addr && !src_addr) {
591		printf("unmapped multicast address requires binding "
592			"to source address\n");
593		exit(1);
594	}
595
596	test.dst_addr = (struct sockaddr *) &test.dst_in;
597	test.connects_left = connections;
598
599	test.channel = rdma_create_event_channel();
600	if (!test.channel) {
601		perror("failed to create event channel");
602		exit(1);
603	}
604
605	if (alloc_nodes())
606		exit(1);
607
608	ret = run();
609
610	printf("test complete\n");
611	destroy_nodes();
612	rdma_destroy_event_channel(test.channel);
613
614	printf("return status %d\n", ret);
615	return ret;
616}
617