1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#define _GNU_SOURCE
33#include <config.h>
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <unistd.h>
38#include <string.h>
39#include <sys/types.h>
40#include <sys/socket.h>
41#include <sys/time.h>
42#include <netdb.h>
43#include <stdlib.h>
44#include <getopt.h>
45#include <arpa/inet.h>
46#include <time.h>
47
48#include "pingpong.h"
49
50enum {
51	PINGPONG_RECV_WRID = 1,
52	PINGPONG_SEND_WRID = 2,
53
54	MAX_QP             = 256,
55};
56
57static int page_size;
58
59struct pingpong_context {
60	struct ibv_context	*context;
61	struct ibv_comp_channel *channel;
62	struct ibv_pd		*pd;
63	struct ibv_mr		*mr;
64	struct ibv_cq		*cq;
65	struct ibv_srq		*srq;
66	struct ibv_qp		*qp[MAX_QP];
67	void			*buf;
68	int			 size;
69	int			 send_flags;
70	int			 num_qp;
71	int			 rx_depth;
72	int			 pending[MAX_QP];
73	struct ibv_port_attr	 portinfo;
74};
75
76struct pingpong_dest {
77	int lid;
78	int qpn;
79	int psn;
80	union ibv_gid gid;
81};
82
83static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84			  int sl, const struct pingpong_dest *my_dest,
85			  const struct pingpong_dest *dest, int sgid_idx)
86{
87	int i;
88
89	for (i = 0; i < ctx->num_qp; ++i) {
90		struct ibv_qp_attr attr = {
91			.qp_state		= IBV_QPS_RTR,
92			.path_mtu		= mtu,
93			.dest_qp_num		= dest[i].qpn,
94			.rq_psn			= dest[i].psn,
95			.max_dest_rd_atomic	= 1,
96			.min_rnr_timer		= 12,
97			.ah_attr		= {
98				.is_global	= 0,
99				.dlid		= dest[i].lid,
100				.sl		= sl,
101				.src_path_bits	= 0,
102				.port_num	= port
103			}
104		};
105
106		if (dest->gid.global.interface_id) {
107			attr.ah_attr.is_global = 1;
108			attr.ah_attr.grh.hop_limit = 1;
109			attr.ah_attr.grh.dgid = dest->gid;
110			attr.ah_attr.grh.sgid_index = sgid_idx;
111		}
112		if (ibv_modify_qp(ctx->qp[i], &attr,
113				  IBV_QP_STATE              |
114				  IBV_QP_AV                 |
115				  IBV_QP_PATH_MTU           |
116				  IBV_QP_DEST_QPN           |
117				  IBV_QP_RQ_PSN             |
118				  IBV_QP_MAX_DEST_RD_ATOMIC |
119				  IBV_QP_MIN_RNR_TIMER)) {
120			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
121			return 1;
122		}
123
124		attr.qp_state	    = IBV_QPS_RTS;
125		attr.timeout	    = 14;
126		attr.retry_cnt	    = 7;
127		attr.rnr_retry	    = 7;
128		attr.sq_psn	    = my_dest[i].psn;
129		attr.max_rd_atomic  = 1;
130		if (ibv_modify_qp(ctx->qp[i], &attr,
131				  IBV_QP_STATE              |
132				  IBV_QP_TIMEOUT            |
133				  IBV_QP_RETRY_CNT          |
134				  IBV_QP_RNR_RETRY          |
135				  IBV_QP_SQ_PSN             |
136				  IBV_QP_MAX_QP_RD_ATOMIC)) {
137			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
138			return 1;
139		}
140	}
141
142	return 0;
143}
144
145static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146						 const struct pingpong_dest *my_dest)
147{
148	struct addrinfo *res, *t;
149	struct addrinfo hints = {
150		.ai_family   = AF_UNSPEC,
151		.ai_socktype = SOCK_STREAM
152	};
153	char *service;
154	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
155	int n;
156	int r;
157	int i;
158	int sockfd = -1;
159	struct pingpong_dest *rem_dest = NULL;
160	char gid[33];
161
162	if (asprintf(&service, "%d", port) < 0)
163		return NULL;
164
165	n = getaddrinfo(servername, service, &hints, &res);
166
167	if (n < 0) {
168		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
169		free(service);
170		return NULL;
171	}
172
173	for (t = res; t; t = t->ai_next) {
174		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
175		if (sockfd >= 0) {
176			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
177				break;
178			close(sockfd);
179			sockfd = -1;
180		}
181	}
182
183	freeaddrinfo_null(res);
184	free(service);
185
186	if (sockfd < 0) {
187		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
188		return NULL;
189	}
190
191	for (i = 0; i < MAX_QP; ++i) {
192		gid_to_wire_gid(&my_dest[i].gid, gid);
193		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
194					my_dest[i].qpn, my_dest[i].psn, gid);
195		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
196			fprintf(stderr, "Couldn't send local address\n");
197			goto out;
198		}
199	}
200
201	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
202	if (!rem_dest)
203		goto out;
204
205	for (i = 0; i < MAX_QP; ++i) {
206		n = 0;
207		while (n < sizeof msg) {
208			r = read(sockfd, msg + n, sizeof msg - n);
209			if (r < 0) {
210				perror("client read");
211				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
212					n, (int) sizeof msg, i);
213				goto out;
214			}
215			n += r;
216		}
217
218		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
219							&rem_dest[i].psn, gid);
220		wire_gid_to_gid(gid, &rem_dest[i].gid);
221	}
222
223	if (write(sockfd, "done", sizeof "done") != sizeof "done") {
224		perror("client write");
225		goto out;
226	}
227out:
228	close(sockfd);
229	return rem_dest;
230}
231
232static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
233						 int ib_port, enum ibv_mtu mtu,
234						 int port, int sl,
235						 const struct pingpong_dest *my_dest,
236						 int sgid_idx)
237{
238	struct addrinfo *res, *t;
239	struct addrinfo hints = {
240		.ai_flags    = AI_PASSIVE,
241		.ai_family   = AF_UNSPEC,
242		.ai_socktype = SOCK_STREAM
243	};
244	char *service;
245	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
246	int n;
247	int r;
248	int i;
249	int sockfd = -1, connfd;
250	struct pingpong_dest *rem_dest = NULL;
251	char gid[33];
252
253	if (asprintf(&service, "%d", port) < 0)
254		return NULL;
255
256	n = getaddrinfo(NULL, service, &hints, &res);
257
258	if (n < 0) {
259		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
260		free(service);
261		return NULL;
262	}
263
264	for (t = res; t; t = t->ai_next) {
265		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
266		if (sockfd >= 0) {
267			n = 1;
268
269			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
270
271			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
272				break;
273			close(sockfd);
274			sockfd = -1;
275		}
276	}
277
278	freeaddrinfo_null(res);
279	free(service);
280
281	if (sockfd < 0) {
282		fprintf(stderr, "Couldn't listen to port %d\n", port);
283		return NULL;
284	}
285
286	if (listen(sockfd, 1) < 0) {
287		perror("listen() failed");
288		close(sockfd);
289		return NULL;
290	}
291	connfd = accept(sockfd, NULL, NULL);
292	close(sockfd);
293	if (connfd < 0) {
294		fprintf(stderr, "accept() failed\n");
295		return NULL;
296	}
297
298	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
299	if (!rem_dest)
300		goto out;
301
302	for (i = 0; i < MAX_QP; ++i) {
303		n = 0;
304		while (n < sizeof msg) {
305			r = read(connfd, msg + n, sizeof msg - n);
306			if (r < 0) {
307				perror("server read");
308				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
309					n, (int) sizeof msg, i);
310				goto out;
311			}
312			n += r;
313		}
314
315		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
316							&rem_dest[i].psn, gid);
317		wire_gid_to_gid(gid, &rem_dest[i].gid);
318	}
319
320	if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
321								sgid_idx)) {
322		fprintf(stderr, "Couldn't connect to remote QP\n");
323		free(rem_dest);
324		rem_dest = NULL;
325		goto out;
326	}
327
328	for (i = 0; i < MAX_QP; ++i) {
329		gid_to_wire_gid(&my_dest[i].gid, gid);
330		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
331					my_dest[i].qpn, my_dest[i].psn, gid);
332		if (write(connfd, msg, sizeof msg) != sizeof msg) {
333			fprintf(stderr, "Couldn't send local address\n");
334			free(rem_dest);
335			rem_dest = NULL;
336			goto out;
337		}
338	}
339
340	if (read(connfd, msg, sizeof msg) != sizeof "done") {
341		perror("client write");
342		free(rem_dest);
343		rem_dest = NULL;
344		goto out;
345	}
346
347out:
348	close(connfd);
349	return rem_dest;
350}
351
352static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
353					    int num_qp, int rx_depth, int port,
354					    int use_event)
355{
356	struct pingpong_context *ctx;
357	int i;
358
359	ctx = calloc(1, sizeof *ctx);
360	if (!ctx)
361		return NULL;
362
363	ctx->size       = size;
364	ctx->send_flags = IBV_SEND_SIGNALED;
365	ctx->num_qp     = num_qp;
366	ctx->rx_depth   = rx_depth;
367
368	ctx->buf = memalign(page_size, size);
369	if (!ctx->buf) {
370		fprintf(stderr, "Couldn't allocate work buf.\n");
371		goto clean_ctx;
372	}
373
374	memset(ctx->buf, 0, size);
375
376	ctx->context = ibv_open_device(ib_dev);
377	if (!ctx->context) {
378		fprintf(stderr, "Couldn't get context for %s\n",
379			ibv_get_device_name(ib_dev));
380		goto clean_buffer;
381	}
382
383	if (use_event) {
384		ctx->channel = ibv_create_comp_channel(ctx->context);
385		if (!ctx->channel) {
386			fprintf(stderr, "Couldn't create completion channel\n");
387			goto clean_device;
388		}
389	} else
390		ctx->channel = NULL;
391
392	ctx->pd = ibv_alloc_pd(ctx->context);
393	if (!ctx->pd) {
394		fprintf(stderr, "Couldn't allocate PD\n");
395		goto clean_comp_channel;
396	}
397
398	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
399	if (!ctx->mr) {
400		fprintf(stderr, "Couldn't register MR\n");
401		goto clean_pd;
402	}
403
404	ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
405				ctx->channel, 0);
406	if (!ctx->cq) {
407		fprintf(stderr, "Couldn't create CQ\n");
408		goto clean_mr;
409	}
410
411	{
412		struct ibv_srq_init_attr attr = {
413			.attr = {
414				.max_wr  = rx_depth,
415				.max_sge = 1
416			}
417		};
418
419		ctx->srq = ibv_create_srq(ctx->pd, &attr);
420		if (!ctx->srq)  {
421			fprintf(stderr, "Couldn't create SRQ\n");
422			goto clean_cq;
423		}
424	}
425
426	for (i = 0; i < num_qp; ++i) {
427		struct ibv_qp_attr attr;
428		struct ibv_qp_init_attr init_attr = {
429			.send_cq = ctx->cq,
430			.recv_cq = ctx->cq,
431			.srq     = ctx->srq,
432			.cap     = {
433				.max_send_wr  = 1,
434				.max_send_sge = 1,
435			},
436			.qp_type = IBV_QPT_RC
437		};
438
439		ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr);
440		if (!ctx->qp[i])  {
441			fprintf(stderr, "Couldn't create QP[%d]\n", i);
442			goto clean_qps;
443		}
444		ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr);
445		if (init_attr.cap.max_inline_data >= size) {
446			ctx->send_flags |= IBV_SEND_INLINE;
447		}
448	}
449
450	for (i = 0; i < num_qp; ++i) {
451		struct ibv_qp_attr attr = {
452			.qp_state        = IBV_QPS_INIT,
453			.pkey_index      = 0,
454			.port_num        = port,
455			.qp_access_flags = 0
456		};
457
458		if (ibv_modify_qp(ctx->qp[i], &attr,
459				  IBV_QP_STATE              |
460				  IBV_QP_PKEY_INDEX         |
461				  IBV_QP_PORT               |
462				  IBV_QP_ACCESS_FLAGS)) {
463			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
464			goto clean_qps_full;
465		}
466	}
467
468	return ctx;
469
470clean_qps_full:
471	i = num_qp;
472
473clean_qps:
474	for (--i; i >= 0; --i)
475		ibv_destroy_qp(ctx->qp[i]);
476
477	ibv_destroy_srq(ctx->srq);
478
479clean_cq:
480	ibv_destroy_cq(ctx->cq);
481
482clean_mr:
483	ibv_dereg_mr(ctx->mr);
484
485clean_pd:
486	ibv_dealloc_pd(ctx->pd);
487
488clean_comp_channel:
489	if (ctx->channel)
490		ibv_destroy_comp_channel(ctx->channel);
491
492clean_device:
493	ibv_close_device(ctx->context);
494
495clean_buffer:
496	free(ctx->buf);
497
498clean_ctx:
499	free(ctx);
500
501	return NULL;
502}
503
504static int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
505{
506	int i;
507
508	for (i = 0; i < num_qp; ++i) {
509		if (ibv_destroy_qp(ctx->qp[i])) {
510			fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
511			return 1;
512		}
513	}
514
515	if (ibv_destroy_srq(ctx->srq)) {
516		fprintf(stderr, "Couldn't destroy SRQ\n");
517		return 1;
518	}
519
520	if (ibv_destroy_cq(ctx->cq)) {
521		fprintf(stderr, "Couldn't destroy CQ\n");
522		return 1;
523	}
524
525	if (ibv_dereg_mr(ctx->mr)) {
526		fprintf(stderr, "Couldn't deregister MR\n");
527		return 1;
528	}
529
530	if (ibv_dealloc_pd(ctx->pd)) {
531		fprintf(stderr, "Couldn't deallocate PD\n");
532		return 1;
533	}
534
535	if (ctx->channel) {
536		if (ibv_destroy_comp_channel(ctx->channel)) {
537			fprintf(stderr, "Couldn't destroy completion channel\n");
538			return 1;
539		}
540	}
541
542	if (ibv_close_device(ctx->context)) {
543		fprintf(stderr, "Couldn't release context\n");
544		return 1;
545	}
546
547	free(ctx->buf);
548	free(ctx);
549
550	return 0;
551}
552
553static int pp_post_recv(struct pingpong_context *ctx, int n)
554{
555	struct ibv_sge list = {
556		.addr	= (uintptr_t) ctx->buf,
557		.length = ctx->size,
558		.lkey	= ctx->mr->lkey
559	};
560	struct ibv_recv_wr wr = {
561		.wr_id	    = PINGPONG_RECV_WRID,
562		.sg_list    = &list,
563		.num_sge    = 1,
564	};
565	struct ibv_recv_wr *bad_wr;
566	int i;
567
568	for (i = 0; i < n; ++i)
569		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
570			break;
571
572	return i;
573}
574
575static int pp_post_send(struct pingpong_context *ctx, int qp_index)
576{
577	struct ibv_sge list = {
578		.addr	= (uintptr_t) ctx->buf,
579		.length = ctx->size,
580		.lkey	= ctx->mr->lkey
581	};
582	struct ibv_send_wr wr = {
583		.wr_id	    = PINGPONG_SEND_WRID,
584		.sg_list    = &list,
585		.num_sge    = 1,
586		.opcode     = IBV_WR_SEND,
587		.send_flags = ctx->send_flags,
588	};
589	struct ibv_send_wr *bad_wr;
590
591	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
592}
593
594static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
595{
596	int i;
597
598	for (i = 0; i < num_qp; ++i)
599		if (ctx->qp[i]->qp_num == qpn)
600			return i;
601
602	return -1;
603}
604
605static void usage(const char *argv0)
606{
607	printf("Usage:\n");
608	printf("  %s            start a server and wait for connection\n", argv0);
609	printf("  %s <host>     connect to server at <host>\n", argv0);
610	printf("\n");
611	printf("Options:\n");
612	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
613	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
614	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
615	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
616	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
617	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
618	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
619	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
620	printf("  -l, --sl=<sl>          service level value\n");
621	printf("  -e, --events           sleep on CQ events (default poll)\n");
622	printf("  -g, --gid-idx=<gid index> local port gid index\n");
623}
624
625int main(int argc, char *argv[])
626{
627	struct ibv_device      **dev_list;
628	struct ibv_device	*ib_dev;
629	struct ibv_wc		*wc;
630	struct pingpong_context *ctx;
631	struct pingpong_dest     my_dest[MAX_QP];
632	struct pingpong_dest    *rem_dest;
633	struct timeval           start, end;
634	char                    *ib_devname = NULL;
635	char                    *servername = NULL;
636	unsigned int             port = 18515;
637	int                      ib_port = 1;
638	unsigned int             size = 4096;
639	enum ibv_mtu		 mtu = IBV_MTU_1024;
640	unsigned int             num_qp = 16;
641	unsigned int             rx_depth = 500;
642	unsigned int             iters = 1000;
643	int                      use_event = 0;
644	int                      routs;
645	int                      rcnt, scnt;
646	int			 num_wc;
647	int                      i;
648	int                      num_cq_events = 0;
649	int                      sl = 0;
650	int			 gidx = -1;
651	char			 gid[33];
652
653	srand48(getpid() * time(NULL));
654
655	while (1) {
656		int c;
657
658		static struct option long_options[] = {
659			{ .name = "port",     .has_arg = 1, .val = 'p' },
660			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
661			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
662			{ .name = "size",     .has_arg = 1, .val = 's' },
663			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
664			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
665			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
666			{ .name = "iters",    .has_arg = 1, .val = 'n' },
667			{ .name = "sl",       .has_arg = 1, .val = 'l' },
668			{ .name = "events",   .has_arg = 0, .val = 'e' },
669			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
670			{}
671		};
672
673		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:",
674							long_options, NULL);
675		if (c == -1)
676			break;
677
678		switch (c) {
679		case 'p':
680			port = strtoul(optarg, NULL, 0);
681			if (port > 65535) {
682				usage(argv[0]);
683				return 1;
684			}
685			break;
686
687		case 'd':
688			ib_devname = strdupa(optarg);
689			break;
690
691		case 'i':
692			ib_port = strtol(optarg, NULL, 0);
693			if (ib_port < 1) {
694				usage(argv[0]);
695				return 1;
696			}
697			break;
698
699		case 's':
700			size = strtoul(optarg, NULL, 0);
701			if (size < 1) {
702				usage(argv[0]);
703				return 1;
704			}
705			break;
706
707		case 'm':
708			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
709			if (mtu == 0) {
710				usage(argv[0]);
711				return 1;
712			}
713			break;
714
715		case 'q':
716			num_qp = strtoul(optarg, NULL, 0);
717			break;
718
719		case 'r':
720			rx_depth = strtoul(optarg, NULL, 0);
721			break;
722
723		case 'n':
724			iters = strtoul(optarg, NULL, 0);
725			break;
726
727		case 'l':
728			sl = strtol(optarg, NULL, 0);
729			break;
730
731		case 'e':
732			++use_event;
733			break;
734
735		case 'g':
736			gidx = strtol(optarg, NULL, 0);
737			break;
738
739		default:
740			usage(argv[0]);
741			return 1;
742		}
743	}
744
745	if (optind == argc - 1)
746		servername = strdupa(argv[optind]);
747	else if (optind < argc) {
748		usage(argv[0]);
749		return 1;
750	}
751
752	if (num_qp > rx_depth) {
753		fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
754			"must have at least one receive per QP.\n",
755			rx_depth, num_qp);
756		return 1;
757	}
758
759	num_wc = num_qp + rx_depth;
760	wc     = alloca(num_wc * sizeof *wc);
761
762	page_size = sysconf(_SC_PAGESIZE);
763
764	dev_list = ibv_get_device_list(NULL);
765	if (!dev_list) {
766		perror("Failed to get IB devices list");
767		return 1;
768	}
769
770	if (!ib_devname) {
771		ib_dev = *dev_list;
772		if (!ib_dev) {
773			fprintf(stderr, "No IB devices found\n");
774			return 1;
775		}
776	} else {
777		for (i = 0; dev_list[i]; ++i)
778			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
779				break;
780		ib_dev = dev_list[i];
781		if (!ib_dev) {
782			fprintf(stderr, "IB device %s not found\n", ib_devname);
783			return 1;
784		}
785	}
786
787	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
788	if (!ctx)
789		return 1;
790
791	routs = pp_post_recv(ctx, ctx->rx_depth);
792	if (routs < ctx->rx_depth) {
793		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
794		return 1;
795	}
796
797	if (use_event)
798		if (ibv_req_notify_cq(ctx->cq, 0)) {
799			fprintf(stderr, "Couldn't request CQ notification\n");
800			return 1;
801		}
802
803	memset(my_dest, 0, sizeof my_dest);
804
805	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
806		fprintf(stderr, "Couldn't get port info\n");
807		return 1;
808	}
809	for (i = 0; i < num_qp; ++i) {
810		my_dest[i].qpn = ctx->qp[i]->qp_num;
811		my_dest[i].psn = lrand48() & 0xffffff;
812		my_dest[i].lid = ctx->portinfo.lid;
813		if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET
814							&& !my_dest[i].lid) {
815			fprintf(stderr, "Couldn't get local LID\n");
816			return 1;
817		}
818
819		if (gidx >= 0) {
820			if (ibv_query_gid(ctx->context, ib_port, gidx,
821							&my_dest[i].gid)) {
822				fprintf(stderr, "Could not get local gid for "
823							"gid index %d\n", gidx);
824				return 1;
825			}
826		} else
827			memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
828
829		inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
830		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
831			"GID %s\n", my_dest[i].lid, my_dest[i].qpn,
832			my_dest[i].psn, gid);
833	}
834
835	if (servername)
836		rem_dest = pp_client_exch_dest(servername, port, my_dest);
837	else
838		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
839								my_dest, gidx);
840
841	if (!rem_dest)
842		return 1;
843
844	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
845
846	for (i = 0; i < num_qp; ++i) {
847		inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
848		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
849			"GID %s\n", rem_dest[i].lid, rem_dest[i].qpn,
850			rem_dest[i].psn, gid);
851	}
852
853	if (servername)
854		if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
855									gidx))
856			return 1;
857
858	if (servername)
859		for (i = 0; i < num_qp; ++i) {
860			if (pp_post_send(ctx, i)) {
861				fprintf(stderr, "Couldn't post send\n");
862				return 1;
863			}
864			ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
865		}
866	else
867		for (i = 0; i < num_qp; ++i)
868			ctx->pending[i] = PINGPONG_RECV_WRID;
869
870	if (gettimeofday(&start, NULL)) {
871		perror("gettimeofday");
872		return 1;
873	}
874
875	rcnt = scnt = 0;
876	while (rcnt < iters || scnt < iters) {
877		if (use_event) {
878			struct ibv_cq *ev_cq;
879			void          *ev_ctx;
880
881			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
882				fprintf(stderr, "Failed to get cq_event\n");
883				return 1;
884			}
885
886			++num_cq_events;
887
888			if (ev_cq != ctx->cq) {
889				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
890				return 1;
891			}
892
893			if (ibv_req_notify_cq(ctx->cq, 0)) {
894				fprintf(stderr, "Couldn't request CQ notification\n");
895				return 1;
896			}
897		}
898
899		{
900			int ne, qp_ind;
901
902			do {
903				ne = ibv_poll_cq(ctx->cq, num_wc, wc);
904				if (ne < 0) {
905					fprintf(stderr, "poll CQ failed %d\n", ne);
906					return 1;
907				}
908			} while (!use_event && ne < 1);
909
910			for (i = 0; i < ne; ++i) {
911				if (wc[i].status != IBV_WC_SUCCESS) {
912					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
913						ibv_wc_status_str(wc[i].status),
914						wc[i].status, (int) wc[i].wr_id);
915					return 1;
916				}
917
918				qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
919				if (qp_ind < 0) {
920					fprintf(stderr, "Couldn't find QPN %06x\n",
921						wc[i].qp_num);
922					return 1;
923				}
924
925				switch ((int) wc[i].wr_id) {
926				case PINGPONG_SEND_WRID:
927					++scnt;
928					break;
929
930				case PINGPONG_RECV_WRID:
931					if (--routs <= num_qp) {
932						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
933						if (routs < ctx->rx_depth) {
934							fprintf(stderr,
935								"Couldn't post receive (%d)\n",
936								routs);
937							return 1;
938						}
939					}
940
941					++rcnt;
942					break;
943
944				default:
945					fprintf(stderr, "Completion for unknown wr_id %d\n",
946						(int) wc[i].wr_id);
947					return 1;
948				}
949
950				ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
951				if (scnt < iters && !ctx->pending[qp_ind]) {
952					if (pp_post_send(ctx, qp_ind)) {
953						fprintf(stderr, "Couldn't post send\n");
954						return 1;
955					}
956					ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
957							       PINGPONG_SEND_WRID;
958				}
959
960			}
961		}
962	}
963
964	if (gettimeofday(&end, NULL)) {
965		perror("gettimeofday");
966		return 1;
967	}
968
969	{
970		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
971			(end.tv_usec - start.tv_usec);
972		long long bytes = (long long) size * iters * 2;
973
974		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
975		       bytes, usec / 1000000., bytes * 8. / usec);
976		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
977		       iters, usec / 1000000., usec / iters);
978	}
979
980	ibv_ack_cq_events(ctx->cq, num_cq_events);
981
982	if (pp_close_ctx(ctx, num_qp))
983		return 1;
984
985	ibv_free_device_list(dev_list);
986	free(rem_dest);
987
988	return 0;
989}
990