srq_pingpong.c revision 331769
1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#define _GNU_SOURCE
33#include <config.h>
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <unistd.h>
38#include <string.h>
39#include <sys/types.h>
40#include <sys/socket.h>
41#include <sys/time.h>
42#include <netdb.h>
43#include <stdlib.h>
44#include <getopt.h>
45#include <arpa/inet.h>
46#include <time.h>
47
48#include "pingpong.h"
49
50enum {
51	PINGPONG_RECV_WRID = 1,
52	PINGPONG_SEND_WRID = 2,
53
54	MAX_QP             = 256,
55};
56
57static int page_size;
58
59struct pingpong_context {
60	struct ibv_context	*context;
61	struct ibv_comp_channel *channel;
62	struct ibv_pd		*pd;
63	struct ibv_mr		*mr;
64	struct ibv_cq		*cq;
65	struct ibv_srq		*srq;
66	struct ibv_qp		*qp[MAX_QP];
67	void			*buf;
68	int			 size;
69	int			 send_flags;
70	int			 num_qp;
71	int			 rx_depth;
72	int			 pending[MAX_QP];
73	struct ibv_port_attr	 portinfo;
74};
75
76struct pingpong_dest {
77	int lid;
78	int qpn;
79	int psn;
80	union ibv_gid gid;
81};
82
83static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84			  int sl, const struct pingpong_dest *my_dest,
85			  const struct pingpong_dest *dest, int sgid_idx)
86{
87	int i;
88
89	for (i = 0; i < ctx->num_qp; ++i) {
90		struct ibv_qp_attr attr = {
91			.qp_state		= IBV_QPS_RTR,
92			.path_mtu		= mtu,
93			.dest_qp_num		= dest[i].qpn,
94			.rq_psn			= dest[i].psn,
95			.max_dest_rd_atomic	= 1,
96			.min_rnr_timer		= 12,
97			.ah_attr		= {
98				.is_global	= 0,
99				.dlid		= dest[i].lid,
100				.sl		= sl,
101				.src_path_bits	= 0,
102				.port_num	= port
103			}
104		};
105
106		if (dest->gid.global.interface_id) {
107			attr.ah_attr.is_global = 1;
108			attr.ah_attr.grh.hop_limit = 1;
109			attr.ah_attr.grh.dgid = dest->gid;
110			attr.ah_attr.grh.sgid_index = sgid_idx;
111		}
112		if (ibv_modify_qp(ctx->qp[i], &attr,
113				  IBV_QP_STATE              |
114				  IBV_QP_AV                 |
115				  IBV_QP_PATH_MTU           |
116				  IBV_QP_DEST_QPN           |
117				  IBV_QP_RQ_PSN             |
118				  IBV_QP_MAX_DEST_RD_ATOMIC |
119				  IBV_QP_MIN_RNR_TIMER)) {
120			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
121			return 1;
122		}
123
124		attr.qp_state	    = IBV_QPS_RTS;
125		attr.timeout	    = 14;
126		attr.retry_cnt	    = 7;
127		attr.rnr_retry	    = 7;
128		attr.sq_psn	    = my_dest[i].psn;
129		attr.max_rd_atomic  = 1;
130		if (ibv_modify_qp(ctx->qp[i], &attr,
131				  IBV_QP_STATE              |
132				  IBV_QP_TIMEOUT            |
133				  IBV_QP_RETRY_CNT          |
134				  IBV_QP_RNR_RETRY          |
135				  IBV_QP_SQ_PSN             |
136				  IBV_QP_MAX_QP_RD_ATOMIC)) {
137			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
138			return 1;
139		}
140	}
141
142	return 0;
143}
144
145static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146						 const struct pingpong_dest *my_dest)
147{
148	struct addrinfo *res, *t;
149	struct addrinfo hints = {
150		.ai_family   = AF_INET,
151		.ai_socktype = SOCK_STREAM
152	};
153	char *service;
154	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
155	int n;
156	int r;
157	int i;
158	int sockfd = -1;
159	struct pingpong_dest *rem_dest = NULL;
160	char gid[33];
161
162	if (asprintf(&service, "%d", port) < 0)
163		return NULL;
164
165	n = getaddrinfo(servername, service, &hints, &res);
166
167	if (n < 0) {
168		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
169		free(service);
170		return NULL;
171	}
172
173	for (t = res; t; t = t->ai_next) {
174		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
175		if (sockfd >= 0) {
176			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
177				break;
178			close(sockfd);
179			sockfd = -1;
180		}
181	}
182
183	freeaddrinfo_null(res);
184	free(service);
185
186	if (sockfd < 0) {
187		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
188		return NULL;
189	}
190
191	for (i = 0; i < MAX_QP; ++i) {
192		gid_to_wire_gid(&my_dest[i].gid, gid);
193		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
194					my_dest[i].qpn, my_dest[i].psn, gid);
195		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
196			fprintf(stderr, "Couldn't send local address\n");
197			goto out;
198		}
199	}
200
201	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
202	if (!rem_dest)
203		goto out;
204
205	for (i = 0; i < MAX_QP; ++i) {
206		n = 0;
207		while (n < sizeof msg) {
208			r = read(sockfd, msg + n, sizeof msg - n);
209			if (r < 0) {
210				perror("client read");
211				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
212					n, (int) sizeof msg, i);
213				goto out;
214			}
215			n += r;
216		}
217
218		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
219							&rem_dest[i].psn, gid);
220		wire_gid_to_gid(gid, &rem_dest[i].gid);
221	}
222
223	if (write(sockfd, "done", sizeof "done") != sizeof "done") {
224		perror("client write");
225		goto out;
226	}
227out:
228	close(sockfd);
229	return rem_dest;
230}
231
232static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
233						 int ib_port, enum ibv_mtu mtu,
234						 int port, int sl,
235						 const struct pingpong_dest *my_dest,
236						 int sgid_idx)
237{
238	struct addrinfo *res, *t;
239	struct addrinfo hints = {
240		.ai_flags    = AI_PASSIVE,
241		.ai_family   = AF_INET,
242		.ai_socktype = SOCK_STREAM
243	};
244	char *service;
245	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
246	int n;
247	int r;
248	int i;
249	int sockfd = -1, connfd;
250	struct pingpong_dest *rem_dest = NULL;
251	char gid[33];
252
253	if (asprintf(&service, "%d", port) < 0)
254		return NULL;
255
256	n = getaddrinfo(NULL, service, &hints, &res);
257
258	if (n < 0) {
259		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
260		free(service);
261		return NULL;
262	}
263
264	for (t = res; t; t = t->ai_next) {
265		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
266		if (sockfd >= 0) {
267			n = 1;
268
269			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
270
271			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
272				break;
273			close(sockfd);
274			sockfd = -1;
275		}
276	}
277
278	freeaddrinfo_null(res);
279	free(service);
280
281	if (sockfd < 0) {
282		fprintf(stderr, "Couldn't listen to port %d\n", port);
283		return NULL;
284	}
285
286	listen(sockfd, 1);
287	connfd = accept(sockfd, NULL, NULL);
288	close(sockfd);
289	if (connfd < 0) {
290		fprintf(stderr, "accept() failed\n");
291		return NULL;
292	}
293
294	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
295	if (!rem_dest)
296		goto out;
297
298	for (i = 0; i < MAX_QP; ++i) {
299		n = 0;
300		while (n < sizeof msg) {
301			r = read(connfd, msg + n, sizeof msg - n);
302			if (r < 0) {
303				perror("server read");
304				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
305					n, (int) sizeof msg, i);
306				goto out;
307			}
308			n += r;
309		}
310
311		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
312							&rem_dest[i].psn, gid);
313		wire_gid_to_gid(gid, &rem_dest[i].gid);
314	}
315
316	if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
317								sgid_idx)) {
318		fprintf(stderr, "Couldn't connect to remote QP\n");
319		free(rem_dest);
320		rem_dest = NULL;
321		goto out;
322	}
323
324	for (i = 0; i < MAX_QP; ++i) {
325		gid_to_wire_gid(&my_dest[i].gid, gid);
326		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
327					my_dest[i].qpn, my_dest[i].psn, gid);
328		if (write(connfd, msg, sizeof msg) != sizeof msg) {
329			fprintf(stderr, "Couldn't send local address\n");
330			free(rem_dest);
331			rem_dest = NULL;
332			goto out;
333		}
334	}
335
336	if (read(connfd, msg, sizeof msg) != sizeof "done") {
337		perror("client write");
338		free(rem_dest);
339		rem_dest = NULL;
340		goto out;
341	}
342
343out:
344	close(connfd);
345	return rem_dest;
346}
347
348static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
349					    int num_qp, int rx_depth, int port,
350					    int use_event)
351{
352	struct pingpong_context *ctx;
353	int i;
354
355	ctx = calloc(1, sizeof *ctx);
356	if (!ctx)
357		return NULL;
358
359	ctx->size       = size;
360	ctx->send_flags = IBV_SEND_SIGNALED;
361	ctx->num_qp     = num_qp;
362	ctx->rx_depth   = rx_depth;
363
364	ctx->buf = memalign(page_size, size);
365	if (!ctx->buf) {
366		fprintf(stderr, "Couldn't allocate work buf.\n");
367		goto clean_ctx;
368	}
369
370	memset(ctx->buf, 0, size);
371
372	ctx->context = ibv_open_device(ib_dev);
373	if (!ctx->context) {
374		fprintf(stderr, "Couldn't get context for %s\n",
375			ibv_get_device_name(ib_dev));
376		goto clean_buffer;
377	}
378
379	if (use_event) {
380		ctx->channel = ibv_create_comp_channel(ctx->context);
381		if (!ctx->channel) {
382			fprintf(stderr, "Couldn't create completion channel\n");
383			goto clean_device;
384		}
385	} else
386		ctx->channel = NULL;
387
388	ctx->pd = ibv_alloc_pd(ctx->context);
389	if (!ctx->pd) {
390		fprintf(stderr, "Couldn't allocate PD\n");
391		goto clean_comp_channel;
392	}
393
394	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
395	if (!ctx->mr) {
396		fprintf(stderr, "Couldn't register MR\n");
397		goto clean_pd;
398	}
399
400	ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
401				ctx->channel, 0);
402	if (!ctx->cq) {
403		fprintf(stderr, "Couldn't create CQ\n");
404		goto clean_mr;
405	}
406
407	{
408		struct ibv_srq_init_attr attr = {
409			.attr = {
410				.max_wr  = rx_depth,
411				.max_sge = 1
412			}
413		};
414
415		ctx->srq = ibv_create_srq(ctx->pd, &attr);
416		if (!ctx->srq)  {
417			fprintf(stderr, "Couldn't create SRQ\n");
418			goto clean_cq;
419		}
420	}
421
422	for (i = 0; i < num_qp; ++i) {
423		struct ibv_qp_attr attr;
424		struct ibv_qp_init_attr init_attr = {
425			.send_cq = ctx->cq,
426			.recv_cq = ctx->cq,
427			.srq     = ctx->srq,
428			.cap     = {
429				.max_send_wr  = 1,
430				.max_send_sge = 1,
431			},
432			.qp_type = IBV_QPT_RC
433		};
434
435		ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr);
436		if (!ctx->qp[i])  {
437			fprintf(stderr, "Couldn't create QP[%d]\n", i);
438			goto clean_qps;
439		}
440		ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr);
441		if (init_attr.cap.max_inline_data >= size) {
442			ctx->send_flags |= IBV_SEND_INLINE;
443		}
444	}
445
446	for (i = 0; i < num_qp; ++i) {
447		struct ibv_qp_attr attr = {
448			.qp_state        = IBV_QPS_INIT,
449			.pkey_index      = 0,
450			.port_num        = port,
451			.qp_access_flags = 0
452		};
453
454		if (ibv_modify_qp(ctx->qp[i], &attr,
455				  IBV_QP_STATE              |
456				  IBV_QP_PKEY_INDEX         |
457				  IBV_QP_PORT               |
458				  IBV_QP_ACCESS_FLAGS)) {
459			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
460			goto clean_qps_full;
461		}
462	}
463
464	return ctx;
465
466clean_qps_full:
467	i = num_qp;
468
469clean_qps:
470	for (--i; i >= 0; --i)
471		ibv_destroy_qp(ctx->qp[i]);
472
473	ibv_destroy_srq(ctx->srq);
474
475clean_cq:
476	ibv_destroy_cq(ctx->cq);
477
478clean_mr:
479	ibv_dereg_mr(ctx->mr);
480
481clean_pd:
482	ibv_dealloc_pd(ctx->pd);
483
484clean_comp_channel:
485	if (ctx->channel)
486		ibv_destroy_comp_channel(ctx->channel);
487
488clean_device:
489	ibv_close_device(ctx->context);
490
491clean_buffer:
492	free(ctx->buf);
493
494clean_ctx:
495	free(ctx);
496
497	return NULL;
498}
499
500static int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
501{
502	int i;
503
504	for (i = 0; i < num_qp; ++i) {
505		if (ibv_destroy_qp(ctx->qp[i])) {
506			fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
507			return 1;
508		}
509	}
510
511	if (ibv_destroy_srq(ctx->srq)) {
512		fprintf(stderr, "Couldn't destroy SRQ\n");
513		return 1;
514	}
515
516	if (ibv_destroy_cq(ctx->cq)) {
517		fprintf(stderr, "Couldn't destroy CQ\n");
518		return 1;
519	}
520
521	if (ibv_dereg_mr(ctx->mr)) {
522		fprintf(stderr, "Couldn't deregister MR\n");
523		return 1;
524	}
525
526	if (ibv_dealloc_pd(ctx->pd)) {
527		fprintf(stderr, "Couldn't deallocate PD\n");
528		return 1;
529	}
530
531	if (ctx->channel) {
532		if (ibv_destroy_comp_channel(ctx->channel)) {
533			fprintf(stderr, "Couldn't destroy completion channel\n");
534			return 1;
535		}
536	}
537
538	if (ibv_close_device(ctx->context)) {
539		fprintf(stderr, "Couldn't release context\n");
540		return 1;
541	}
542
543	free(ctx->buf);
544	free(ctx);
545
546	return 0;
547}
548
549static int pp_post_recv(struct pingpong_context *ctx, int n)
550{
551	struct ibv_sge list = {
552		.addr	= (uintptr_t) ctx->buf,
553		.length = ctx->size,
554		.lkey	= ctx->mr->lkey
555	};
556	struct ibv_recv_wr wr = {
557		.wr_id	    = PINGPONG_RECV_WRID,
558		.sg_list    = &list,
559		.num_sge    = 1,
560	};
561	struct ibv_recv_wr *bad_wr;
562	int i;
563
564	for (i = 0; i < n; ++i)
565		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
566			break;
567
568	return i;
569}
570
571static int pp_post_send(struct pingpong_context *ctx, int qp_index)
572{
573	struct ibv_sge list = {
574		.addr	= (uintptr_t) ctx->buf,
575		.length = ctx->size,
576		.lkey	= ctx->mr->lkey
577	};
578	struct ibv_send_wr wr = {
579		.wr_id	    = PINGPONG_SEND_WRID,
580		.sg_list    = &list,
581		.num_sge    = 1,
582		.opcode     = IBV_WR_SEND,
583		.send_flags = ctx->send_flags,
584	};
585	struct ibv_send_wr *bad_wr;
586
587	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
588}
589
590static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
591{
592	int i;
593
594	for (i = 0; i < num_qp; ++i)
595		if (ctx->qp[i]->qp_num == qpn)
596			return i;
597
598	return -1;
599}
600
601static void usage(const char *argv0)
602{
603	printf("Usage:\n");
604	printf("  %s            start a server and wait for connection\n", argv0);
605	printf("  %s <host>     connect to server at <host>\n", argv0);
606	printf("\n");
607	printf("Options:\n");
608	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
609	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
610	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
611	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
612	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
613	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
614	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
615	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
616	printf("  -l, --sl=<sl>          service level value\n");
617	printf("  -e, --events           sleep on CQ events (default poll)\n");
618	printf("  -g, --gid-idx=<gid index> local port gid index\n");
619}
620
621int main(int argc, char *argv[])
622{
623	struct ibv_device      **dev_list;
624	struct ibv_device	*ib_dev;
625	struct ibv_wc		*wc;
626	struct pingpong_context *ctx;
627	struct pingpong_dest     my_dest[MAX_QP];
628	struct pingpong_dest    *rem_dest;
629	struct timeval           start, end;
630	char                    *ib_devname = NULL;
631	char                    *servername = NULL;
632	unsigned int             port = 18515;
633	int                      ib_port = 1;
634	unsigned int             size = 4096;
635	enum ibv_mtu		 mtu = IBV_MTU_1024;
636	unsigned int             num_qp = 16;
637	unsigned int             rx_depth = 500;
638	unsigned int             iters = 1000;
639	int                      use_event = 0;
640	int                      routs;
641	int                      rcnt, scnt;
642	int			 num_wc;
643	int                      i;
644	int                      num_cq_events = 0;
645	int                      sl = 0;
646	int			 gidx = -1;
647	char			 gid[33];
648
649	srand48(getpid() * time(NULL));
650
651	while (1) {
652		int c;
653
654		static struct option long_options[] = {
655			{ .name = "port",     .has_arg = 1, .val = 'p' },
656			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
657			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
658			{ .name = "size",     .has_arg = 1, .val = 's' },
659			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
660			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
661			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
662			{ .name = "iters",    .has_arg = 1, .val = 'n' },
663			{ .name = "sl",       .has_arg = 1, .val = 'l' },
664			{ .name = "events",   .has_arg = 0, .val = 'e' },
665			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
666			{}
667		};
668
669		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:",
670							long_options, NULL);
671		if (c == -1)
672			break;
673
674		switch (c) {
675		case 'p':
676			port = strtoul(optarg, NULL, 0);
677			if (port > 65535) {
678				usage(argv[0]);
679				return 1;
680			}
681			break;
682
683		case 'd':
684			ib_devname = strdupa(optarg);
685			break;
686
687		case 'i':
688			ib_port = strtol(optarg, NULL, 0);
689			if (ib_port < 1) {
690				usage(argv[0]);
691				return 1;
692			}
693			break;
694
695		case 's':
696			size = strtoul(optarg, NULL, 0);
697			if (size < 1) {
698				usage(argv[0]);
699				return 1;
700			}
701			break;
702
703		case 'm':
704			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
705			if (mtu == 0) {
706				usage(argv[0]);
707				return 1;
708			}
709			break;
710
711		case 'q':
712			num_qp = strtoul(optarg, NULL, 0);
713			break;
714
715		case 'r':
716			rx_depth = strtoul(optarg, NULL, 0);
717			break;
718
719		case 'n':
720			iters = strtoul(optarg, NULL, 0);
721			break;
722
723		case 'l':
724			sl = strtol(optarg, NULL, 0);
725			break;
726
727		case 'e':
728			++use_event;
729			break;
730
731		case 'g':
732			gidx = strtol(optarg, NULL, 0);
733			break;
734
735		default:
736			usage(argv[0]);
737			return 1;
738		}
739	}
740
741	if (optind == argc - 1)
742		servername = strdupa(argv[optind]);
743	else if (optind < argc) {
744		usage(argv[0]);
745		return 1;
746	}
747
748	if (num_qp > rx_depth) {
749		fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
750			"must have at least one receive per QP.\n",
751			rx_depth, num_qp);
752		return 1;
753	}
754
755	num_wc = num_qp + rx_depth;
756	wc     = alloca(num_wc * sizeof *wc);
757
758	page_size = sysconf(_SC_PAGESIZE);
759
760	dev_list = ibv_get_device_list(NULL);
761	if (!dev_list) {
762		perror("Failed to get IB devices list");
763		return 1;
764	}
765
766	if (!ib_devname) {
767		ib_dev = *dev_list;
768		if (!ib_dev) {
769			fprintf(stderr, "No IB devices found\n");
770			return 1;
771		}
772	} else {
773		for (i = 0; dev_list[i]; ++i)
774			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
775				break;
776		ib_dev = dev_list[i];
777		if (!ib_dev) {
778			fprintf(stderr, "IB device %s not found\n", ib_devname);
779			return 1;
780		}
781	}
782
783	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
784	if (!ctx)
785		return 1;
786
787	routs = pp_post_recv(ctx, ctx->rx_depth);
788	if (routs < ctx->rx_depth) {
789		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
790		return 1;
791	}
792
793	if (use_event)
794		if (ibv_req_notify_cq(ctx->cq, 0)) {
795			fprintf(stderr, "Couldn't request CQ notification\n");
796			return 1;
797		}
798
799	memset(my_dest, 0, sizeof my_dest);
800
801	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
802		fprintf(stderr, "Couldn't get port info\n");
803		return 1;
804	}
805	for (i = 0; i < num_qp; ++i) {
806		my_dest[i].qpn = ctx->qp[i]->qp_num;
807		my_dest[i].psn = lrand48() & 0xffffff;
808		my_dest[i].lid = ctx->portinfo.lid;
809		if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET
810							&& !my_dest[i].lid) {
811			fprintf(stderr, "Couldn't get local LID\n");
812			return 1;
813		}
814
815		if (gidx >= 0) {
816			if (ibv_query_gid(ctx->context, ib_port, gidx,
817							&my_dest[i].gid)) {
818				fprintf(stderr, "Could not get local gid for "
819							"gid index %d\n", gidx);
820				return 1;
821			}
822		} else
823			memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
824
825		inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
826		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
827			"GID %s\n", my_dest[i].lid, my_dest[i].qpn,
828			my_dest[i].psn, gid);
829	}
830
831	if (servername)
832		rem_dest = pp_client_exch_dest(servername, port, my_dest);
833	else
834		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
835								my_dest, gidx);
836
837	if (!rem_dest)
838		return 1;
839
840	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
841
842	for (i = 0; i < num_qp; ++i) {
843		inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
844		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
845			"GID %s\n", rem_dest[i].lid, rem_dest[i].qpn,
846			rem_dest[i].psn, gid);
847	}
848
849	if (servername)
850		if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
851									gidx))
852			return 1;
853
854	if (servername)
855		for (i = 0; i < num_qp; ++i) {
856			if (pp_post_send(ctx, i)) {
857				fprintf(stderr, "Couldn't post send\n");
858				return 1;
859			}
860			ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
861		}
862	else
863		for (i = 0; i < num_qp; ++i)
864			ctx->pending[i] = PINGPONG_RECV_WRID;
865
866	if (gettimeofday(&start, NULL)) {
867		perror("gettimeofday");
868		return 1;
869	}
870
871	rcnt = scnt = 0;
872	while (rcnt < iters || scnt < iters) {
873		if (use_event) {
874			struct ibv_cq *ev_cq;
875			void          *ev_ctx;
876
877			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
878				fprintf(stderr, "Failed to get cq_event\n");
879				return 1;
880			}
881
882			++num_cq_events;
883
884			if (ev_cq != ctx->cq) {
885				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
886				return 1;
887			}
888
889			if (ibv_req_notify_cq(ctx->cq, 0)) {
890				fprintf(stderr, "Couldn't request CQ notification\n");
891				return 1;
892			}
893		}
894
895		{
896			int ne, qp_ind;
897
898			do {
899				ne = ibv_poll_cq(ctx->cq, num_wc, wc);
900				if (ne < 0) {
901					fprintf(stderr, "poll CQ failed %d\n", ne);
902					return 1;
903				}
904			} while (!use_event && ne < 1);
905
906			for (i = 0; i < ne; ++i) {
907				if (wc[i].status != IBV_WC_SUCCESS) {
908					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
909						ibv_wc_status_str(wc[i].status),
910						wc[i].status, (int) wc[i].wr_id);
911					return 1;
912				}
913
914				qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
915				if (qp_ind < 0) {
916					fprintf(stderr, "Couldn't find QPN %06x\n",
917						wc[i].qp_num);
918					return 1;
919				}
920
921				switch ((int) wc[i].wr_id) {
922				case PINGPONG_SEND_WRID:
923					++scnt;
924					break;
925
926				case PINGPONG_RECV_WRID:
927					if (--routs <= num_qp) {
928						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
929						if (routs < ctx->rx_depth) {
930							fprintf(stderr,
931								"Couldn't post receive (%d)\n",
932								routs);
933							return 1;
934						}
935					}
936
937					++rcnt;
938					break;
939
940				default:
941					fprintf(stderr, "Completion for unknown wr_id %d\n",
942						(int) wc[i].wr_id);
943					return 1;
944				}
945
946				ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
947				if (scnt < iters && !ctx->pending[qp_ind]) {
948					if (pp_post_send(ctx, qp_ind)) {
949						fprintf(stderr, "Couldn't post send\n");
950						return 1;
951					}
952					ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
953							       PINGPONG_SEND_WRID;
954				}
955
956			}
957		}
958	}
959
960	if (gettimeofday(&end, NULL)) {
961		perror("gettimeofday");
962		return 1;
963	}
964
965	{
966		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
967			(end.tv_usec - start.tv_usec);
968		long long bytes = (long long) size * iters * 2;
969
970		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
971		       bytes, usec / 1000000., bytes * 8. / usec);
972		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
973		       iters, usec / 1000000., usec / iters);
974	}
975
976	ibv_ack_cq_events(ctx->cq, num_cq_events);
977
978	if (pp_close_ctx(ctx, num_qp))
979		return 1;
980
981	ibv_free_device_list(dev_list);
982	free(rem_dest);
983
984	return 0;
985}
986