cmtime.c revision 331769
1/*
2 * Copyright (c) 2013 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under the OpenIB.org BSD license
5 * below:
6 *
7 *     Redistribution and use in source and binary forms, with or
8 *     without modification, are permitted provided that the following
9 *     conditions are met:
10 *
11 *      - Redistributions of source code must retain the above
12 *        copyright notice, this list of conditions and the following
13 *        disclaimer.
14 *
15 *      - Redistributions in binary form must reproduce the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer in the documentation and/or other materials
18 *        provided with the distribution.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
23 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <strings.h>
34#include <errno.h>
35#include <getopt.h>
36#include <sys/types.h>
37#include <sys/socket.h>
38#include <sys/time.h>
39#include <sys/wait.h>
40#include <netdb.h>
41#include <fcntl.h>
42#include <unistd.h>
43#include <netinet/tcp.h>
44
45#include <rdma/rdma_cma.h>
46#include "common.h"
47
48static struct rdma_addrinfo hints, *rai;
49static struct rdma_event_channel *channel;
50static const char *port = "7471";
51static char *dst_addr;
52static char *src_addr;
53static int timeout = 2000;
54static int retries = 2;
55
56enum step {
57	STEP_CREATE_ID,
58	STEP_BIND,
59	STEP_RESOLVE_ADDR,
60	STEP_RESOLVE_ROUTE,
61	STEP_CREATE_QP,
62	STEP_CONNECT,
63	STEP_DISCONNECT,
64	STEP_DESTROY,
65	STEP_CNT
66};
67
68static const char *step_str[] = {
69	"create id",
70	"bind addr",
71	"resolve addr",
72	"resolve route",
73	"create qp",
74	"connect",
75	"disconnect",
76	"destroy"
77};
78
79struct node {
80	struct rdma_cm_id *id;
81	struct timeval times[STEP_CNT][2];
82	int error;
83	int retries;
84};
85
86struct list_head {
87	struct list_head	*prev;
88	struct list_head	*next;
89	struct rdma_cm_id	*id;
90};
91
92struct work_list {
93	pthread_mutex_t		lock;
94	pthread_cond_t		cond;
95	struct list_head	list;
96};
97
98#define INIT_LIST(x) ((x)->prev = (x)->next = (x))
99
100static struct work_list req_work;
101static struct work_list disc_work;
102static struct node *nodes;
103static struct timeval times[STEP_CNT][2];
104static int connections = 100;
105static volatile int started[STEP_CNT];
106static volatile int completed[STEP_CNT];
107static struct ibv_qp_init_attr init_qp_attr;
108static struct rdma_conn_param conn_param;
109
110#define start_perf(n, s)	gettimeofday(&((n)->times[s][0]), NULL)
111#define end_perf(n, s)		gettimeofday(&((n)->times[s][1]), NULL)
112#define start_time(s)		gettimeofday(&times[s][0], NULL)
113#define end_time(s)		gettimeofday(&times[s][1], NULL)
114
115static inline void __list_delete(struct list_head *list)
116{
117	struct list_head *prev, *next;
118	prev = list->prev;
119	next = list->next;
120	prev->next = next;
121	next->prev = prev;
122	INIT_LIST(list);
123}
124
125static inline int __list_empty(struct work_list *list)
126{
127	return list->list.next == &list->list;
128}
129
130static inline struct list_head *__list_remove_head(struct work_list *work_list)
131{
132	struct list_head *list_item;
133
134	list_item = work_list->list.next;
135	__list_delete(list_item);
136	return list_item;
137}
138
139static inline void list_add_tail(struct work_list *work_list, struct list_head *req)
140{
141	int empty;
142	pthread_mutex_lock(&work_list->lock);
143	empty = __list_empty(work_list);
144	req->prev = work_list->list.prev;
145	req->next = &work_list->list;
146	req->prev->next = work_list->list.prev = req;
147	pthread_mutex_unlock(&work_list->lock);
148	if (empty)
149		pthread_cond_signal(&work_list->cond);
150}
151
152static int zero_time(struct timeval *t)
153{
154	return !(t->tv_sec || t->tv_usec);
155}
156
157static float diff_us(struct timeval *end, struct timeval *start)
158{
159	return (end->tv_sec - start->tv_sec) * 1000000. + (end->tv_usec - start->tv_usec);
160}
161
162static void show_perf(void)
163{
164	int c, i;
165	float us, max[STEP_CNT], min[STEP_CNT];
166
167	for (i = 0; i < STEP_CNT; i++) {
168		max[i] = 0;
169		min[i] = 999999999.;
170		for (c = 0; c < connections; c++) {
171			if (!zero_time(&nodes[c].times[i][0]) &&
172			    !zero_time(&nodes[c].times[i][1])) {
173				us = diff_us(&nodes[c].times[i][1], &nodes[c].times[i][0]);
174				if (us > max[i])
175					max[i] = us;
176				if (us < min[i])
177					min[i] = us;
178			}
179		}
180	}
181
182	printf("step              total ms     max ms     min us  us / conn\n");
183	for (i = 0; i < STEP_CNT; i++) {
184		if (i == STEP_BIND && !src_addr)
185			continue;
186
187		us = diff_us(&times[i][1], &times[i][0]);
188		printf("%-13s: %11.2f%11.2f%11.2f%11.2f\n", step_str[i], us / 1000.,
189			max[i] / 1000., min[i], us / connections);
190	}
191}
192
193static void addr_handler(struct node *n)
194{
195	end_perf(n, STEP_RESOLVE_ADDR);
196	completed[STEP_RESOLVE_ADDR]++;
197}
198
199static void route_handler(struct node *n)
200{
201	end_perf(n, STEP_RESOLVE_ROUTE);
202	completed[STEP_RESOLVE_ROUTE]++;
203}
204
205static void conn_handler(struct node *n)
206{
207	end_perf(n, STEP_CONNECT);
208	completed[STEP_CONNECT]++;
209}
210
211static void disc_handler(struct node *n)
212{
213	end_perf(n, STEP_DISCONNECT);
214	completed[STEP_DISCONNECT]++;
215}
216
217static void __req_handler(struct rdma_cm_id *id)
218{
219	int ret;
220
221	ret = rdma_create_qp(id, NULL, &init_qp_attr);
222	if (ret) {
223		perror("failure creating qp");
224		goto err;
225	}
226
227	ret = rdma_accept(id, NULL);
228	if (ret) {
229		perror("failure accepting");
230		goto err;
231	}
232	return;
233
234err:
235	printf("failing connection request\n");
236	rdma_reject(id, NULL, 0);
237	rdma_destroy_id(id);
238	return;
239}
240
241static void *req_handler_thread(void *arg)
242{
243	struct list_head *work;
244	do {
245		pthread_mutex_lock(&req_work.lock);
246		if (__list_empty(&req_work))
247			pthread_cond_wait(&req_work.cond, &req_work.lock);
248		work = __list_remove_head(&req_work);
249		pthread_mutex_unlock(&req_work.lock);
250		__req_handler(work->id);
251		free(work);
252	} while (1);
253	return NULL;
254}
255
256static void *disc_handler_thread(void *arg)
257{
258	struct list_head *work;
259	do {
260		pthread_mutex_lock(&disc_work.lock);
261		if (__list_empty(&disc_work))
262			pthread_cond_wait(&disc_work.cond, &disc_work.lock);
263		work = __list_remove_head(&disc_work);
264		pthread_mutex_unlock(&disc_work.lock);
265		rdma_disconnect(work->id);
266		rdma_destroy_id(work->id);
267		free(work);
268	} while (1);
269	return NULL;
270}
271
272static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
273{
274	struct node *n = id->context;
275	struct list_head *request;
276
277	switch (event->event) {
278	case RDMA_CM_EVENT_ADDR_RESOLVED:
279		addr_handler(n);
280		break;
281	case RDMA_CM_EVENT_ROUTE_RESOLVED:
282		route_handler(n);
283		break;
284	case RDMA_CM_EVENT_CONNECT_REQUEST:
285		request = malloc(sizeof *request);
286		if (!request) {
287			perror("out of memory accepting connect request");
288			rdma_reject(id, NULL, 0);
289			rdma_destroy_id(id);
290		} else {
291			INIT_LIST(request);
292			request->id = id;
293			list_add_tail(&req_work, request);
294		}
295		break;
296	case RDMA_CM_EVENT_ESTABLISHED:
297		if (n)
298			conn_handler(n);
299		break;
300	case RDMA_CM_EVENT_ADDR_ERROR:
301		if (n->retries--) {
302			if (!rdma_resolve_addr(n->id, rai->ai_src_addr,
303					       rai->ai_dst_addr, timeout))
304				break;
305		}
306		printf("RDMA_CM_EVENT_ADDR_ERROR, error: %d\n", event->status);
307		addr_handler(n);
308		n->error = 1;
309		break;
310	case RDMA_CM_EVENT_ROUTE_ERROR:
311		if (n->retries--) {
312			if (!rdma_resolve_route(n->id, timeout))
313				break;
314		}
315		printf("RDMA_CM_EVENT_ROUTE_ERROR, error: %d\n", event->status);
316		route_handler(n);
317		n->error = 1;
318		break;
319	case RDMA_CM_EVENT_CONNECT_ERROR:
320	case RDMA_CM_EVENT_UNREACHABLE:
321	case RDMA_CM_EVENT_REJECTED:
322		printf("event: %s, error: %d\n",
323		       rdma_event_str(event->event), event->status);
324		conn_handler(n);
325		n->error = 1;
326		break;
327	case RDMA_CM_EVENT_DISCONNECTED:
328		if (!n) {
329			request = malloc(sizeof *request);
330			if (!request) {
331				perror("out of memory queueing disconnect request, handling synchronously");
332				rdma_disconnect(id);
333				rdma_destroy_id(id);
334			} else {
335				INIT_LIST(request);
336				request->id = id;
337				list_add_tail(&disc_work, request);
338			}
339		} else
340			disc_handler(n);
341		break;
342	case RDMA_CM_EVENT_DEVICE_REMOVAL:
343		/* Cleanup will occur after test completes. */
344		break;
345	default:
346		break;
347	}
348	rdma_ack_cm_event(event);
349}
350
351static int alloc_nodes(void)
352{
353	int ret, i;
354
355	nodes = calloc(sizeof *nodes, connections);
356	if (!nodes)
357		return -ENOMEM;
358
359	printf("creating id\n");
360	start_time(STEP_CREATE_ID);
361	for (i = 0; i < connections; i++) {
362		start_perf(&nodes[i], STEP_CREATE_ID);
363		if (dst_addr) {
364			ret = rdma_create_id(channel, &nodes[i].id, &nodes[i],
365					     hints.ai_port_space);
366			if (ret)
367				goto err;
368		}
369		end_perf(&nodes[i], STEP_CREATE_ID);
370	}
371	end_time(STEP_CREATE_ID);
372	return 0;
373
374err:
375	while (--i >= 0)
376		rdma_destroy_id(nodes[i].id);
377	free(nodes);
378	return ret;
379}
380
381static void cleanup_nodes(void)
382{
383	int i;
384
385	printf("destroying id\n");
386	start_time(STEP_DESTROY);
387	for (i = 0; i < connections; i++) {
388		start_perf(&nodes[i], STEP_DESTROY);
389		if (nodes[i].id)
390			rdma_destroy_id(nodes[i].id);
391		end_perf(&nodes[i], STEP_DESTROY);
392	}
393	end_time(STEP_DESTROY);
394}
395
396static void *process_events(void *arg)
397{
398	struct rdma_cm_event *event;
399	int ret = 0;
400
401	while (!ret) {
402		ret = rdma_get_cm_event(channel, &event);
403		if (!ret) {
404			cma_handler(event->id, event);
405		} else {
406			perror("failure in rdma_get_cm_event in process_server_events");
407			ret = errno;
408		}
409	}
410	return NULL;
411}
412
413static int run_server(void)
414{
415	pthread_t req_thread, disc_thread;
416	struct rdma_cm_id *listen_id;
417	int ret;
418
419	INIT_LIST(&req_work.list);
420	INIT_LIST(&disc_work.list);
421	ret = pthread_mutex_init(&req_work.lock, NULL);
422	if (ret) {
423		perror("initializing mutex for req work");
424		return ret;
425	}
426
427	ret = pthread_mutex_init(&disc_work.lock, NULL);
428	if (ret) {
429		perror("initializing mutex for disc work");
430		return ret;
431	}
432
433	ret = pthread_cond_init(&req_work.cond, NULL);
434	if (ret) {
435		perror("initializing cond for req work");
436		return ret;
437	}
438
439	ret = pthread_cond_init(&disc_work.cond, NULL);
440	if (ret) {
441		perror("initializing cond for disc work");
442		return ret;
443	}
444
445	ret = pthread_create(&req_thread, NULL, req_handler_thread, NULL);
446	if (ret) {
447		perror("failed to create req handler thread");
448		return ret;
449	}
450
451	ret = pthread_create(&disc_thread, NULL, disc_handler_thread, NULL);
452	if (ret) {
453		perror("failed to create disconnect handler thread");
454		return ret;
455	}
456
457	ret = rdma_create_id(channel, &listen_id, NULL, hints.ai_port_space);
458	if (ret) {
459		perror("listen request failed");
460		return ret;
461	}
462
463	ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
464	if (ret) {
465		printf("getrdmaaddr error: %s\n", gai_strerror(ret));
466		goto out;
467	}
468
469	ret = rdma_bind_addr(listen_id, rai->ai_src_addr);
470	if (ret) {
471		perror("bind address failed");
472		goto out;
473	}
474
475	ret = rdma_listen(listen_id, 0);
476	if (ret) {
477		perror("failure trying to listen");
478		goto out;
479	}
480
481	process_events(NULL);
482 out:
483	rdma_destroy_id(listen_id);
484	return ret;
485}
486
487static int run_client(void)
488{
489	pthread_t event_thread;
490	int i, ret;
491
492	ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
493	if (ret) {
494		printf("getaddrinfo error: %s\n", gai_strerror(ret));
495		return ret;
496	}
497
498	conn_param.responder_resources = 1;
499	conn_param.initiator_depth = 1;
500	conn_param.retry_count = retries;
501	conn_param.private_data = rai->ai_connect;
502	conn_param.private_data_len = rai->ai_connect_len;
503
504	ret = pthread_create(&event_thread, NULL, process_events, NULL);
505	if (ret) {
506		perror("failure creating event thread");
507		return ret;
508	}
509
510	if (src_addr) {
511		printf("binding source address\n");
512		start_time(STEP_BIND);
513		for (i = 0; i < connections; i++) {
514			start_perf(&nodes[i], STEP_BIND);
515			ret = rdma_bind_addr(nodes[i].id, rai->ai_src_addr);
516			if (ret) {
517				perror("failure bind addr");
518				nodes[i].error = 1;
519				continue;
520			}
521			end_perf(&nodes[i], STEP_BIND);
522		}
523		end_time(STEP_BIND);
524	}
525
526	printf("resolving address\n");
527	start_time(STEP_RESOLVE_ADDR);
528	for (i = 0; i < connections; i++) {
529		if (nodes[i].error)
530			continue;
531		nodes[i].retries = retries;
532		start_perf(&nodes[i], STEP_RESOLVE_ADDR);
533		ret = rdma_resolve_addr(nodes[i].id, rai->ai_src_addr,
534					rai->ai_dst_addr, timeout);
535		if (ret) {
536			perror("failure getting addr");
537			nodes[i].error = 1;
538			continue;
539		}
540		started[STEP_RESOLVE_ADDR]++;
541	}
542	while (started[STEP_RESOLVE_ADDR] != completed[STEP_RESOLVE_ADDR]) sched_yield();
543	end_time(STEP_RESOLVE_ADDR);
544
545	printf("resolving route\n");
546	start_time(STEP_RESOLVE_ROUTE);
547	for (i = 0; i < connections; i++) {
548		if (nodes[i].error)
549			continue;
550		nodes[i].retries = retries;
551		start_perf(&nodes[i], STEP_RESOLVE_ROUTE);
552		ret = rdma_resolve_route(nodes[i].id, timeout);
553		if (ret) {
554			perror("failure resolving route");
555			nodes[i].error = 1;
556			continue;
557		}
558		started[STEP_RESOLVE_ROUTE]++;
559	}
560	while (started[STEP_RESOLVE_ROUTE] != completed[STEP_RESOLVE_ROUTE]) sched_yield();
561	end_time(STEP_RESOLVE_ROUTE);
562
563	printf("creating qp\n");
564	start_time(STEP_CREATE_QP);
565	for (i = 0; i < connections; i++) {
566		if (nodes[i].error)
567			continue;
568		start_perf(&nodes[i], STEP_CREATE_QP);
569		ret = rdma_create_qp(nodes[i].id, NULL, &init_qp_attr);
570		if (ret) {
571			perror("failure creating qp");
572			nodes[i].error = 1;
573			continue;
574		}
575		end_perf(&nodes[i], STEP_CREATE_QP);
576	}
577	end_time(STEP_CREATE_QP);
578
579	printf("connecting\n");
580	start_time(STEP_CONNECT);
581	for (i = 0; i < connections; i++) {
582		if (nodes[i].error)
583			continue;
584		start_perf(&nodes[i], STEP_CONNECT);
585		ret = rdma_connect(nodes[i].id, &conn_param);
586		if (ret) {
587			perror("failure rconnecting");
588			nodes[i].error = 1;
589			continue;
590		}
591		started[STEP_CONNECT]++;
592	}
593	while (started[STEP_CONNECT] != completed[STEP_CONNECT]) sched_yield();
594	end_time(STEP_CONNECT);
595
596	printf("disconnecting\n");
597	start_time(STEP_DISCONNECT);
598	for (i = 0; i < connections; i++) {
599		if (nodes[i].error)
600			continue;
601		start_perf(&nodes[i], STEP_DISCONNECT);
602		rdma_disconnect(nodes[i].id);
603		started[STEP_DISCONNECT]++;
604	}
605	while (started[STEP_DISCONNECT] != completed[STEP_DISCONNECT]) sched_yield();
606	end_time(STEP_DISCONNECT);
607
608	return ret;
609}
610
611int main(int argc, char **argv)
612{
613	int op, ret;
614
615	hints.ai_port_space = RDMA_PS_TCP;
616	hints.ai_qp_type = IBV_QPT_RC;
617	while ((op = getopt(argc, argv, "s:b:c:p:r:t:")) != -1) {
618		switch (op) {
619		case 's':
620			dst_addr = optarg;
621			break;
622		case 'b':
623			src_addr = optarg;
624			break;
625		case 'c':
626			connections = atoi(optarg);
627			break;
628		case 'p':
629			port = optarg;
630			break;
631		case 'r':
632			retries = atoi(optarg);
633			break;
634		case 't':
635			timeout = atoi(optarg);
636			break;
637		default:
638			printf("usage: %s\n", argv[0]);
639			printf("\t[-s server_address]\n");
640			printf("\t[-b bind_address]\n");
641			printf("\t[-c connections]\n");
642			printf("\t[-p port_number]\n");
643			printf("\t[-r retries]\n");
644			printf("\t[-t timeout_ms]\n");
645			exit(1);
646		}
647	}
648
649	init_qp_attr.cap.max_send_wr = 1;
650	init_qp_attr.cap.max_recv_wr = 1;
651	init_qp_attr.cap.max_send_sge = 1;
652	init_qp_attr.cap.max_recv_sge = 1;
653	init_qp_attr.qp_type = IBV_QPT_RC;
654
655	channel = rdma_create_event_channel();
656	if (!channel) {
657		printf("failed to create event channel\n");
658		exit(1);
659	}
660
661	if (dst_addr) {
662		alloc_nodes();
663		ret = run_client();
664	} else {
665		hints.ai_flags |= RAI_PASSIVE;
666		ret = run_server();
667	}
668
669	cleanup_nodes();
670	rdma_destroy_event_channel(channel);
671	if (rai)
672		rdma_freeaddrinfo(rai);
673
674	show_perf();
675	free(nodes);
676	return ret;
677}
678