rc_pingpong.c revision 331769
1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#define _GNU_SOURCE
33#include <config.h>
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <unistd.h>
38#include <string.h>
39#include <sys/types.h>
40#include <sys/socket.h>
41#include <sys/time.h>
42#include <netdb.h>
43#include <stdlib.h>
44#include <getopt.h>
45#include <arpa/inet.h>
46#include <time.h>
47#include <inttypes.h>
48
49#include "pingpong.h"
50
51#include <sys/param.h>
52
53enum {
54	PINGPONG_RECV_WRID = 1,
55	PINGPONG_SEND_WRID = 2,
56};
57
58static int page_size;
59static int use_odp;
60static int use_ts;
61
62struct pingpong_context {
63	struct ibv_context	*context;
64	struct ibv_comp_channel *channel;
65	struct ibv_pd		*pd;
66	struct ibv_mr		*mr;
67	union {
68		struct ibv_cq		*cq;
69		struct ibv_cq_ex	*cq_ex;
70	} cq_s;
71	struct ibv_qp		*qp;
72	void			*buf;
73	int			 size;
74	int			 send_flags;
75	int			 rx_depth;
76	int			 pending;
77	struct ibv_port_attr     portinfo;
78	uint64_t		 completion_timestamp_mask;
79};
80
81static struct ibv_cq *pp_cq(struct pingpong_context *ctx)
82{
83	return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) :
84		ctx->cq_s.cq;
85}
86
87struct pingpong_dest {
88	int lid;
89	int qpn;
90	int psn;
91	union ibv_gid gid;
92};
93
94static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
95			  enum ibv_mtu mtu, int sl,
96			  struct pingpong_dest *dest, int sgid_idx)
97{
98	struct ibv_qp_attr attr = {
99		.qp_state		= IBV_QPS_RTR,
100		.path_mtu		= mtu,
101		.dest_qp_num		= dest->qpn,
102		.rq_psn			= dest->psn,
103		.max_dest_rd_atomic	= 1,
104		.min_rnr_timer		= 12,
105		.ah_attr		= {
106			.is_global	= 0,
107			.dlid		= dest->lid,
108			.sl		= sl,
109			.src_path_bits	= 0,
110			.port_num	= port
111		}
112	};
113
114	if (dest->gid.global.interface_id) {
115		attr.ah_attr.is_global = 1;
116		attr.ah_attr.grh.hop_limit = 1;
117		attr.ah_attr.grh.dgid = dest->gid;
118		attr.ah_attr.grh.sgid_index = sgid_idx;
119	}
120	if (ibv_modify_qp(ctx->qp, &attr,
121			  IBV_QP_STATE              |
122			  IBV_QP_AV                 |
123			  IBV_QP_PATH_MTU           |
124			  IBV_QP_DEST_QPN           |
125			  IBV_QP_RQ_PSN             |
126			  IBV_QP_MAX_DEST_RD_ATOMIC |
127			  IBV_QP_MIN_RNR_TIMER)) {
128		fprintf(stderr, "Failed to modify QP to RTR\n");
129		return 1;
130	}
131
132	attr.qp_state	    = IBV_QPS_RTS;
133	attr.timeout	    = 14;
134	attr.retry_cnt	    = 7;
135	attr.rnr_retry	    = 7;
136	attr.sq_psn	    = my_psn;
137	attr.max_rd_atomic  = 1;
138	if (ibv_modify_qp(ctx->qp, &attr,
139			  IBV_QP_STATE              |
140			  IBV_QP_TIMEOUT            |
141			  IBV_QP_RETRY_CNT          |
142			  IBV_QP_RNR_RETRY          |
143			  IBV_QP_SQ_PSN             |
144			  IBV_QP_MAX_QP_RD_ATOMIC)) {
145		fprintf(stderr, "Failed to modify QP to RTS\n");
146		return 1;
147	}
148
149	return 0;
150}
151
152static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
153						 const struct pingpong_dest *my_dest)
154{
155	struct addrinfo *res, *t;
156	struct addrinfo hints = {
157		.ai_family   = AF_INET,
158		.ai_socktype = SOCK_STREAM
159	};
160	char *service;
161	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
162	int n;
163	int sockfd = -1;
164	struct pingpong_dest *rem_dest = NULL;
165	char gid[33];
166
167	if (asprintf(&service, "%d", port) < 0)
168		return NULL;
169
170	n = getaddrinfo(servername, service, &hints, &res);
171
172	if (n < 0) {
173		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
174		free(service);
175		return NULL;
176	}
177
178	for (t = res; t; t = t->ai_next) {
179		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
180		if (sockfd >= 0) {
181			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
182				break;
183			close(sockfd);
184			sockfd = -1;
185		}
186	}
187
188	freeaddrinfo_null(res);
189	free(service);
190
191	if (sockfd < 0) {
192		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
193		return NULL;
194	}
195
196	gid_to_wire_gid(&my_dest->gid, gid);
197	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
198							my_dest->psn, gid);
199	if (write(sockfd, msg, sizeof msg) != sizeof msg) {
200		fprintf(stderr, "Couldn't send local address\n");
201		goto out;
202	}
203
204	if (read(sockfd, msg, sizeof msg) != sizeof msg ||
205	    write(sockfd, "done", sizeof "done") != sizeof "done") {
206		perror("client read/write");
207		fprintf(stderr, "Couldn't read/write remote address\n");
208		goto out;
209	}
210
211	rem_dest = malloc(sizeof *rem_dest);
212	if (!rem_dest)
213		goto out;
214
215	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
216						&rem_dest->psn, gid);
217	wire_gid_to_gid(gid, &rem_dest->gid);
218
219out:
220	close(sockfd);
221	return rem_dest;
222}
223
224static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
225						 int ib_port, enum ibv_mtu mtu,
226						 int port, int sl,
227						 const struct pingpong_dest *my_dest,
228						 int sgid_idx)
229{
230	struct addrinfo *res, *t;
231	struct addrinfo hints = {
232		.ai_flags    = AI_PASSIVE,
233		.ai_family   = AF_INET,
234		.ai_socktype = SOCK_STREAM
235	};
236	char *service;
237	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
238	int n;
239	int sockfd = -1, connfd;
240	struct pingpong_dest *rem_dest = NULL;
241	char gid[33];
242
243	if (asprintf(&service, "%d", port) < 0)
244		return NULL;
245
246	n = getaddrinfo(NULL, service, &hints, &res);
247
248	if (n < 0) {
249		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
250		free(service);
251		return NULL;
252	}
253
254	for (t = res; t; t = t->ai_next) {
255		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
256		if (sockfd >= 0) {
257			n = 1;
258
259			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
260
261			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
262				break;
263			close(sockfd);
264			sockfd = -1;
265		}
266	}
267
268	freeaddrinfo_null(res);
269	free(service);
270
271	if (sockfd < 0) {
272		fprintf(stderr, "Couldn't listen to port %d\n", port);
273		return NULL;
274	}
275
276	listen(sockfd, 1);
277	connfd = accept(sockfd, NULL, NULL);
278	close(sockfd);
279	if (connfd < 0) {
280		fprintf(stderr, "accept() failed\n");
281		return NULL;
282	}
283
284	n = read(connfd, msg, sizeof msg);
285	if (n != sizeof msg) {
286		perror("server read");
287		fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
288		goto out;
289	}
290
291	rem_dest = malloc(sizeof *rem_dest);
292	if (!rem_dest)
293		goto out;
294
295	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
296							&rem_dest->psn, gid);
297	wire_gid_to_gid(gid, &rem_dest->gid);
298
299	if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest,
300								sgid_idx)) {
301		fprintf(stderr, "Couldn't connect to remote QP\n");
302		free(rem_dest);
303		rem_dest = NULL;
304		goto out;
305	}
306
307
308	gid_to_wire_gid(&my_dest->gid, gid);
309	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
310							my_dest->psn, gid);
311	if (write(connfd, msg, sizeof msg) != sizeof msg ||
312	    read(connfd, msg, sizeof msg) != sizeof "done") {
313		fprintf(stderr, "Couldn't send/recv local address\n");
314		free(rem_dest);
315		rem_dest = NULL;
316		goto out;
317	}
318
319
320out:
321	close(connfd);
322	return rem_dest;
323}
324
325static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
326					    int rx_depth, int port,
327					    int use_event)
328{
329	struct pingpong_context *ctx;
330	int access_flags = IBV_ACCESS_LOCAL_WRITE;
331
332	ctx = calloc(1, sizeof *ctx);
333	if (!ctx)
334		return NULL;
335
336	ctx->size       = size;
337	ctx->send_flags = IBV_SEND_SIGNALED;
338	ctx->rx_depth   = rx_depth;
339
340	ctx->buf = memalign(page_size, size);
341	if (!ctx->buf) {
342		fprintf(stderr, "Couldn't allocate work buf.\n");
343		goto clean_ctx;
344	}
345
346	/* FIXME memset(ctx->buf, 0, size); */
347	memset(ctx->buf, 0x7b, size);
348
349	ctx->context = ibv_open_device(ib_dev);
350	if (!ctx->context) {
351		fprintf(stderr, "Couldn't get context for %s\n",
352			ibv_get_device_name(ib_dev));
353		goto clean_buffer;
354	}
355
356	if (use_event) {
357		ctx->channel = ibv_create_comp_channel(ctx->context);
358		if (!ctx->channel) {
359			fprintf(stderr, "Couldn't create completion channel\n");
360			goto clean_device;
361		}
362	} else
363		ctx->channel = NULL;
364
365	ctx->pd = ibv_alloc_pd(ctx->context);
366	if (!ctx->pd) {
367		fprintf(stderr, "Couldn't allocate PD\n");
368		goto clean_comp_channel;
369	}
370
371	if (use_odp || use_ts) {
372		const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND |
373					      IBV_ODP_SUPPORT_RECV;
374		struct ibv_device_attr_ex attrx;
375
376		if (ibv_query_device_ex(ctx->context, NULL, &attrx)) {
377			fprintf(stderr, "Couldn't query device for its features\n");
378			goto clean_comp_channel;
379		}
380
381		if (use_odp) {
382			if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) ||
383			    (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) {
384				fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n");
385				goto clean_comp_channel;
386			}
387			access_flags |= IBV_ACCESS_ON_DEMAND;
388		}
389
390		if (use_ts) {
391			if (!attrx.completion_timestamp_mask) {
392				fprintf(stderr, "The device isn't completion timestamp capable\n");
393				goto clean_comp_channel;
394			}
395			ctx->completion_timestamp_mask = attrx.completion_timestamp_mask;
396		}
397	}
398	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags);
399
400	if (!ctx->mr) {
401		fprintf(stderr, "Couldn't register MR\n");
402		goto clean_pd;
403	}
404
405	if (use_ts) {
406		struct ibv_cq_init_attr_ex attr_ex = {
407			.cqe = rx_depth + 1,
408			.cq_context = NULL,
409			.channel = ctx->channel,
410			.comp_vector = 0,
411			.wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
412		};
413
414		ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex);
415	} else {
416		ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
417					     ctx->channel, 0);
418	}
419
420	if (!pp_cq(ctx)) {
421		fprintf(stderr, "Couldn't create CQ\n");
422		goto clean_mr;
423	}
424
425	{
426		struct ibv_qp_attr attr;
427		struct ibv_qp_init_attr init_attr = {
428			.send_cq = pp_cq(ctx),
429			.recv_cq = pp_cq(ctx),
430			.cap     = {
431				.max_send_wr  = 1,
432				.max_recv_wr  = rx_depth,
433				.max_send_sge = 1,
434				.max_recv_sge = 1
435			},
436			.qp_type = IBV_QPT_RC
437		};
438
439		ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
440		if (!ctx->qp)  {
441			fprintf(stderr, "Couldn't create QP\n");
442			goto clean_cq;
443		}
444
445		ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
446		if (init_attr.cap.max_inline_data >= size) {
447			ctx->send_flags |= IBV_SEND_INLINE;
448		}
449	}
450
451	{
452		struct ibv_qp_attr attr = {
453			.qp_state        = IBV_QPS_INIT,
454			.pkey_index      = 0,
455			.port_num        = port,
456			.qp_access_flags = 0
457		};
458
459		if (ibv_modify_qp(ctx->qp, &attr,
460				  IBV_QP_STATE              |
461				  IBV_QP_PKEY_INDEX         |
462				  IBV_QP_PORT               |
463				  IBV_QP_ACCESS_FLAGS)) {
464			fprintf(stderr, "Failed to modify QP to INIT\n");
465			goto clean_qp;
466		}
467	}
468
469	return ctx;
470
471clean_qp:
472	ibv_destroy_qp(ctx->qp);
473
474clean_cq:
475	ibv_destroy_cq(pp_cq(ctx));
476
477clean_mr:
478	ibv_dereg_mr(ctx->mr);
479
480clean_pd:
481	ibv_dealloc_pd(ctx->pd);
482
483clean_comp_channel:
484	if (ctx->channel)
485		ibv_destroy_comp_channel(ctx->channel);
486
487clean_device:
488	ibv_close_device(ctx->context);
489
490clean_buffer:
491	free(ctx->buf);
492
493clean_ctx:
494	free(ctx);
495
496	return NULL;
497}
498
499static int pp_close_ctx(struct pingpong_context *ctx)
500{
501	if (ibv_destroy_qp(ctx->qp)) {
502		fprintf(stderr, "Couldn't destroy QP\n");
503		return 1;
504	}
505
506	if (ibv_destroy_cq(pp_cq(ctx))) {
507		fprintf(stderr, "Couldn't destroy CQ\n");
508		return 1;
509	}
510
511	if (ibv_dereg_mr(ctx->mr)) {
512		fprintf(stderr, "Couldn't deregister MR\n");
513		return 1;
514	}
515
516	if (ibv_dealloc_pd(ctx->pd)) {
517		fprintf(stderr, "Couldn't deallocate PD\n");
518		return 1;
519	}
520
521	if (ctx->channel) {
522		if (ibv_destroy_comp_channel(ctx->channel)) {
523			fprintf(stderr, "Couldn't destroy completion channel\n");
524			return 1;
525		}
526	}
527
528	if (ibv_close_device(ctx->context)) {
529		fprintf(stderr, "Couldn't release context\n");
530		return 1;
531	}
532
533	free(ctx->buf);
534	free(ctx);
535
536	return 0;
537}
538
539static int pp_post_recv(struct pingpong_context *ctx, int n)
540{
541	struct ibv_sge list = {
542		.addr	= (uintptr_t) ctx->buf,
543		.length = ctx->size,
544		.lkey	= ctx->mr->lkey
545	};
546	struct ibv_recv_wr wr = {
547		.wr_id	    = PINGPONG_RECV_WRID,
548		.sg_list    = &list,
549		.num_sge    = 1,
550	};
551	struct ibv_recv_wr *bad_wr;
552	int i;
553
554	for (i = 0; i < n; ++i)
555		if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
556			break;
557
558	return i;
559}
560
561static int pp_post_send(struct pingpong_context *ctx)
562{
563	struct ibv_sge list = {
564		.addr	= (uintptr_t) ctx->buf,
565		.length = ctx->size,
566		.lkey	= ctx->mr->lkey
567	};
568	struct ibv_send_wr wr = {
569		.wr_id	    = PINGPONG_SEND_WRID,
570		.sg_list    = &list,
571		.num_sge    = 1,
572		.opcode     = IBV_WR_SEND,
573		.send_flags = ctx->send_flags,
574	};
575	struct ibv_send_wr *bad_wr;
576
577	return ibv_post_send(ctx->qp, &wr, &bad_wr);
578}
579
580struct ts_params {
581	uint64_t		 comp_recv_max_time_delta;
582	uint64_t		 comp_recv_min_time_delta;
583	uint64_t		 comp_recv_total_time_delta;
584	uint64_t		 comp_recv_prev_time;
585	int			 last_comp_with_ts;
586	unsigned int		 comp_with_time_iters;
587};
588
589static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt,
590				  int *rcnt, int *routs, int iters,
591				  uint64_t wr_id, enum ibv_wc_status status,
592				  uint64_t completion_timestamp,
593				  struct ts_params *ts)
594{
595	if (status != IBV_WC_SUCCESS) {
596		fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
597			ibv_wc_status_str(status),
598			status, (int)wr_id);
599		return 1;
600	}
601
602	switch ((int)wr_id) {
603	case PINGPONG_SEND_WRID:
604		++(*scnt);
605		break;
606
607	case PINGPONG_RECV_WRID:
608		if (--(*routs) <= 1) {
609			*routs += pp_post_recv(ctx, ctx->rx_depth - *routs);
610			if (*routs < ctx->rx_depth) {
611				fprintf(stderr,
612					"Couldn't post receive (%d)\n",
613					*routs);
614				return 1;
615			}
616		}
617
618		++(*rcnt);
619		if (use_ts) {
620			if (ts->last_comp_with_ts) {
621				uint64_t delta;
622
623				/* checking whether the clock was wrapped around */
624				if (completion_timestamp >= ts->comp_recv_prev_time)
625					delta = completion_timestamp - ts->comp_recv_prev_time;
626				else
627					delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time +
628						completion_timestamp + 1;
629
630				ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta);
631				ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta);
632				ts->comp_recv_total_time_delta += delta;
633				ts->comp_with_time_iters++;
634			}
635
636			ts->comp_recv_prev_time = completion_timestamp;
637			ts->last_comp_with_ts = 1;
638		} else {
639			ts->last_comp_with_ts = 0;
640		}
641
642		break;
643
644	default:
645		fprintf(stderr, "Completion for unknown wr_id %d\n",
646			(int)wr_id);
647		return 1;
648	}
649
650	ctx->pending &= ~(int)wr_id;
651	if (*scnt < iters && !ctx->pending) {
652		if (pp_post_send(ctx)) {
653			fprintf(stderr, "Couldn't post send\n");
654			return 1;
655		}
656		ctx->pending = PINGPONG_RECV_WRID |
657			PINGPONG_SEND_WRID;
658	}
659
660	return 0;
661}
662
663static void usage(const char *argv0)
664{
665	printf("Usage:\n");
666	printf("  %s            start a server and wait for connection\n", argv0);
667	printf("  %s <host>     connect to server at <host>\n", argv0);
668	printf("\n");
669	printf("Options:\n");
670	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
671	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
672	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
673	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
674	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
675	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
676	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
677	printf("  -l, --sl=<sl>          service level value\n");
678	printf("  -e, --events           sleep on CQ events (default poll)\n");
679	printf("  -g, --gid-idx=<gid index> local port gid index\n");
680	printf("  -o, --odp		    use on demand paging\n");
681	printf("  -t, --ts	            get CQE with timestamp\n");
682}
683
684int main(int argc, char *argv[])
685{
686	struct ibv_device      **dev_list;
687	struct ibv_device	*ib_dev;
688	struct pingpong_context *ctx;
689	struct pingpong_dest     my_dest;
690	struct pingpong_dest    *rem_dest;
691	struct timeval           start, end;
692	char                    *ib_devname = NULL;
693	char                    *servername = NULL;
694	unsigned int             port = 18515;
695	int                      ib_port = 1;
696	unsigned int             size = 4096;
697	enum ibv_mtu		 mtu = IBV_MTU_1024;
698	unsigned int             rx_depth = 500;
699	unsigned int             iters = 1000;
700	int                      use_event = 0;
701	int                      routs;
702	int                      rcnt, scnt;
703	int                      num_cq_events = 0;
704	int                      sl = 0;
705	int			 gidx = -1;
706	char			 gid[33];
707	struct ts_params	 ts;
708
709	srand48(getpid() * time(NULL));
710
711	while (1) {
712		int c;
713
714		static struct option long_options[] = {
715			{ .name = "port",     .has_arg = 1, .val = 'p' },
716			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
717			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
718			{ .name = "size",     .has_arg = 1, .val = 's' },
719			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
720			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
721			{ .name = "iters",    .has_arg = 1, .val = 'n' },
722			{ .name = "sl",       .has_arg = 1, .val = 'l' },
723			{ .name = "events",   .has_arg = 0, .val = 'e' },
724			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
725			{ .name = "odp",      .has_arg = 0, .val = 'o' },
726			{ .name = "ts",       .has_arg = 0, .val = 't' },
727			{}
728		};
729
730		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot",
731				long_options, NULL);
732
733		if (c == -1)
734			break;
735
736		switch (c) {
737		case 'p':
738			port = strtoul(optarg, NULL, 0);
739			if (port > 65535) {
740				usage(argv[0]);
741				return 1;
742			}
743			break;
744
745		case 'd':
746			ib_devname = strdupa(optarg);
747			break;
748
749		case 'i':
750			ib_port = strtol(optarg, NULL, 0);
751			if (ib_port < 1) {
752				usage(argv[0]);
753				return 1;
754			}
755			break;
756
757		case 's':
758			size = strtoul(optarg, NULL, 0);
759			break;
760
761		case 'm':
762			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
763			if (mtu == 0) {
764				usage(argv[0]);
765				return 1;
766			}
767			break;
768
769		case 'r':
770			rx_depth = strtoul(optarg, NULL, 0);
771			break;
772
773		case 'n':
774			iters = strtoul(optarg, NULL, 0);
775			break;
776
777		case 'l':
778			sl = strtol(optarg, NULL, 0);
779			break;
780
781		case 'e':
782			++use_event;
783			break;
784
785		case 'g':
786			gidx = strtol(optarg, NULL, 0);
787			break;
788
789		case 'o':
790			use_odp = 1;
791			break;
792		case 't':
793			use_ts = 1;
794			break;
795
796		default:
797			usage(argv[0]);
798			return 1;
799		}
800	}
801
802	if (optind == argc - 1)
803		servername = strdupa(argv[optind]);
804	else if (optind < argc) {
805		usage(argv[0]);
806		return 1;
807	}
808
809	if (use_ts) {
810		ts.comp_recv_max_time_delta = 0;
811		ts.comp_recv_min_time_delta = 0xffffffff;
812		ts.comp_recv_total_time_delta = 0;
813		ts.comp_recv_prev_time = 0;
814		ts.last_comp_with_ts = 0;
815		ts.comp_with_time_iters = 0;
816	}
817
818	page_size = sysconf(_SC_PAGESIZE);
819
820	dev_list = ibv_get_device_list(NULL);
821	if (!dev_list) {
822		perror("Failed to get IB devices list");
823		return 1;
824	}
825
826	if (!ib_devname) {
827		ib_dev = *dev_list;
828		if (!ib_dev) {
829			fprintf(stderr, "No IB devices found\n");
830			return 1;
831		}
832	} else {
833		int i;
834		for (i = 0; dev_list[i]; ++i)
835			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
836				break;
837		ib_dev = dev_list[i];
838		if (!ib_dev) {
839			fprintf(stderr, "IB device %s not found\n", ib_devname);
840			return 1;
841		}
842	}
843
844	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
845	if (!ctx)
846		return 1;
847
848	routs = pp_post_recv(ctx, ctx->rx_depth);
849	if (routs < ctx->rx_depth) {
850		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
851		return 1;
852	}
853
854	if (use_event)
855		if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
856			fprintf(stderr, "Couldn't request CQ notification\n");
857			return 1;
858		}
859
860
861	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
862		fprintf(stderr, "Couldn't get port info\n");
863		return 1;
864	}
865
866	my_dest.lid = ctx->portinfo.lid;
867	if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET &&
868							!my_dest.lid) {
869		fprintf(stderr, "Couldn't get local LID\n");
870		return 1;
871	}
872
873	if (gidx >= 0) {
874		if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
875			fprintf(stderr, "can't read sgid of index %d\n", gidx);
876			return 1;
877		}
878	} else
879		memset(&my_dest.gid, 0, sizeof my_dest.gid);
880
881	my_dest.qpn = ctx->qp->qp_num;
882	my_dest.psn = lrand48() & 0xffffff;
883	inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid);
884	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
885	       my_dest.lid, my_dest.qpn, my_dest.psn, gid);
886
887
888	if (servername)
889		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
890	else
891		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
892								&my_dest, gidx);
893
894	if (!rem_dest)
895		return 1;
896
897	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
898	printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
899	       rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid);
900
901	if (servername)
902		if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest,
903					gidx))
904			return 1;
905
906	ctx->pending = PINGPONG_RECV_WRID;
907
908	if (servername) {
909		if (pp_post_send(ctx)) {
910			fprintf(stderr, "Couldn't post send\n");
911			return 1;
912		}
913		ctx->pending |= PINGPONG_SEND_WRID;
914	}
915
916	if (gettimeofday(&start, NULL)) {
917		perror("gettimeofday");
918		return 1;
919	}
920
921	rcnt = scnt = 0;
922	while (rcnt < iters || scnt < iters) {
923		int ret;
924
925		if (use_event) {
926			struct ibv_cq *ev_cq;
927			void          *ev_ctx;
928
929			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
930				fprintf(stderr, "Failed to get cq_event\n");
931				return 1;
932			}
933
934			++num_cq_events;
935
936			if (ev_cq != pp_cq(ctx)) {
937				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
938				return 1;
939			}
940
941			if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
942				fprintf(stderr, "Couldn't request CQ notification\n");
943				return 1;
944			}
945		}
946
947		if (use_ts) {
948			struct ibv_poll_cq_attr attr = {};
949
950			do {
951				ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr);
952			} while (!use_event && ret == ENOENT);
953
954			if (ret) {
955				fprintf(stderr, "poll CQ failed %d\n", ret);
956				return ret;
957			}
958			ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
959					      iters,
960					      ctx->cq_s.cq_ex->wr_id,
961					      ctx->cq_s.cq_ex->status,
962					      ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
963					      &ts);
964			if (ret) {
965				ibv_end_poll(ctx->cq_s.cq_ex);
966				return ret;
967			}
968			ret = ibv_next_poll(ctx->cq_s.cq_ex);
969			if (!ret)
970				ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
971						      iters,
972						      ctx->cq_s.cq_ex->wr_id,
973						      ctx->cq_s.cq_ex->status,
974						      ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
975						      &ts);
976			ibv_end_poll(ctx->cq_s.cq_ex);
977			if (ret && ret != ENOENT) {
978				fprintf(stderr, "poll CQ failed %d\n", ret);
979				return ret;
980			}
981		} else {
982			int ne, i;
983			struct ibv_wc wc[2];
984
985			do {
986				ne = ibv_poll_cq(pp_cq(ctx), 2, wc);
987				if (ne < 0) {
988					fprintf(stderr, "poll CQ failed %d\n", ne);
989					return 1;
990				}
991			} while (!use_event && ne < 1);
992
993			for (i = 0; i < ne; ++i) {
994				ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
995						      iters,
996						      wc[i].wr_id,
997						      wc[i].status,
998						      0, &ts);
999				if (ret) {
1000					fprintf(stderr, "parse WC failed %d\n", ne);
1001					return 1;
1002				}
1003			}
1004		}
1005	}
1006
1007	if (gettimeofday(&end, NULL)) {
1008		perror("gettimeofday");
1009		return 1;
1010	}
1011
1012	{
1013		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
1014			(end.tv_usec - start.tv_usec);
1015		long long bytes = (long long) size * iters * 2;
1016
1017		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
1018		       bytes, usec / 1000000., bytes * 8. / usec);
1019		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
1020		       iters, usec / 1000000., usec / iters);
1021
1022		if (use_ts && ts.comp_with_time_iters) {
1023			printf("Max receive completion clock cycles = %" PRIu64 "\n",
1024			       ts.comp_recv_max_time_delta);
1025			printf("Min receive completion clock cycles = %" PRIu64 "\n",
1026			       ts.comp_recv_min_time_delta);
1027			printf("Average receive completion clock cycles = %f\n",
1028			       (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters);
1029		}
1030	}
1031
1032	ibv_ack_cq_events(pp_cq(ctx), num_cq_events);
1033
1034	if (pp_close_ctx(ctx))
1035		return 1;
1036
1037	ibv_free_device_list(dev_list);
1038	free(rem_dest);
1039
1040	return 0;
1041}
1042