1219820Sjeff/*
2219820Sjeff * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3219820Sjeff *
4219820Sjeff * This software is available to you under a choice of one of two
5219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
6219820Sjeff * General Public License (GPL) Version 2, available from the file
7219820Sjeff * COPYING in the main directory of this source tree, or the
8219820Sjeff * OpenIB.org BSD license below:
9219820Sjeff *
10219820Sjeff *     Redistribution and use in source and binary forms, with or
11219820Sjeff *     without modification, are permitted provided that the following
12219820Sjeff *     conditions are met:
13219820Sjeff *
14219820Sjeff *      - Redistributions of source code must retain the above
15219820Sjeff *        copyright notice, this list of conditions and the following
16219820Sjeff *        disclaimer.
17219820Sjeff *
18219820Sjeff *      - Redistributions in binary form must reproduce the above
19219820Sjeff *        copyright notice, this list of conditions and the following
20219820Sjeff *        disclaimer in the documentation and/or other materials
21219820Sjeff *        provided with the distribution.
22219820Sjeff *
23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30219820Sjeff * SOFTWARE.
31219820Sjeff */
32219820Sjeff
33219820Sjeff#if HAVE_CONFIG_H
34219820Sjeff#  include <config.h>
35219820Sjeff#endif /* HAVE_CONFIG_H */
36219820Sjeff
37219820Sjeff#include <stdio.h>
38219820Sjeff#include <stdlib.h>
39219820Sjeff#include <unistd.h>
40219820Sjeff#include <string.h>
41219820Sjeff#include <sys/types.h>
42219820Sjeff#include <sys/socket.h>
43219820Sjeff#include <sys/time.h>
44219820Sjeff#include <netdb.h>
45219820Sjeff#include <getopt.h>
46219820Sjeff#include <arpa/inet.h>
47219820Sjeff#include <time.h>
48219820Sjeff
49219820Sjeff#include "pingpong.h"
50219820Sjeff
51219820Sjeffenum {
52219820Sjeff	PINGPONG_RECV_WRID = 1,
53219820Sjeff	PINGPONG_SEND_WRID = 2,
54219820Sjeff
55219820Sjeff	MAX_QP             = 256,
56219820Sjeff};
57219820Sjeff
58219820Sjeffstatic int page_size;
59219820Sjeff
60219820Sjeffstruct pingpong_context {
61219820Sjeff	struct ibv_context	*context;
62219820Sjeff	struct ibv_comp_channel *channel;
63219820Sjeff	struct ibv_pd		*pd;
64219820Sjeff	struct ibv_mr		*mr;
65219820Sjeff	struct ibv_cq		*cq;
66219820Sjeff	struct ibv_srq		*srq;
67219820Sjeff	struct ibv_qp		*qp[MAX_QP];
68219820Sjeff	void			*buf;
69219820Sjeff	int			 size;
70219820Sjeff	int			 num_qp;
71219820Sjeff	int			 rx_depth;
72219820Sjeff	int			 pending[MAX_QP];
73219820Sjeff	struct ibv_port_attr	 portinfo;
74219820Sjeff};
75219820Sjeff
76219820Sjeffstruct pingpong_dest {
77219820Sjeff	int lid;
78219820Sjeff	int qpn;
79219820Sjeff	int psn;
80219820Sjeff	union ibv_gid gid;
81219820Sjeff};
82219820Sjeff
83219820Sjeffstatic int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84219820Sjeff			  int sl, const struct pingpong_dest *my_dest,
85219820Sjeff			  const struct pingpong_dest *dest, int sgid_idx)
86219820Sjeff{
87219820Sjeff	int i;
88219820Sjeff
89219820Sjeff	for (i = 0; i < ctx->num_qp; ++i) {
90219820Sjeff		struct ibv_qp_attr attr = {
91219820Sjeff			.qp_state		= IBV_QPS_RTR,
92219820Sjeff			.path_mtu		= mtu,
93219820Sjeff			.dest_qp_num		= dest[i].qpn,
94219820Sjeff			.rq_psn			= dest[i].psn,
95219820Sjeff			.max_dest_rd_atomic	= 1,
96219820Sjeff			.min_rnr_timer		= 12,
97219820Sjeff			.ah_attr		= {
98219820Sjeff				.is_global	= 0,
99219820Sjeff				.dlid		= dest[i].lid,
100219820Sjeff				.sl		= sl,
101219820Sjeff				.src_path_bits	= 0,
102219820Sjeff				.port_num	= port
103219820Sjeff			}
104219820Sjeff		};
105219820Sjeff
106219820Sjeff		if (dest->gid.global.interface_id) {
107219820Sjeff			attr.ah_attr.is_global = 1;
108219820Sjeff			attr.ah_attr.grh.hop_limit = 1;
109219820Sjeff			attr.ah_attr.grh.dgid = dest->gid;
110219820Sjeff			attr.ah_attr.grh.sgid_index = sgid_idx;
111219820Sjeff		}
112219820Sjeff		if (ibv_modify_qp(ctx->qp[i], &attr,
113219820Sjeff				  IBV_QP_STATE              |
114219820Sjeff				  IBV_QP_AV                 |
115219820Sjeff				  IBV_QP_PATH_MTU           |
116219820Sjeff				  IBV_QP_DEST_QPN           |
117219820Sjeff				  IBV_QP_RQ_PSN             |
118219820Sjeff				  IBV_QP_MAX_DEST_RD_ATOMIC |
119219820Sjeff				  IBV_QP_MIN_RNR_TIMER)) {
120219820Sjeff			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
121219820Sjeff			return 1;
122219820Sjeff		}
123219820Sjeff
124219820Sjeff		attr.qp_state	    = IBV_QPS_RTS;
125219820Sjeff		attr.timeout	    = 14;
126219820Sjeff		attr.retry_cnt	    = 7;
127219820Sjeff		attr.rnr_retry	    = 7;
128219820Sjeff		attr.sq_psn	    = my_dest[i].psn;
129219820Sjeff		attr.max_rd_atomic  = 1;
130219820Sjeff		if (ibv_modify_qp(ctx->qp[i], &attr,
131219820Sjeff				  IBV_QP_STATE              |
132219820Sjeff				  IBV_QP_TIMEOUT            |
133219820Sjeff				  IBV_QP_RETRY_CNT          |
134219820Sjeff				  IBV_QP_RNR_RETRY          |
135219820Sjeff				  IBV_QP_SQ_PSN             |
136219820Sjeff				  IBV_QP_MAX_QP_RD_ATOMIC)) {
137219820Sjeff			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
138219820Sjeff			return 1;
139219820Sjeff		}
140219820Sjeff	}
141219820Sjeff
142219820Sjeff	return 0;
143219820Sjeff}
144219820Sjeff
145219820Sjeffstatic struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146219820Sjeff						 const struct pingpong_dest *my_dest)
147219820Sjeff{
148219820Sjeff	struct addrinfo *res, *t;
149219820Sjeff	struct addrinfo hints = {
150219820Sjeff		.ai_family   = AF_INET,
151219820Sjeff		.ai_socktype = SOCK_STREAM
152219820Sjeff	};
153219820Sjeff	char *service;
154219820Sjeff	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
155219820Sjeff	int n;
156219820Sjeff	int r;
157219820Sjeff	int i;
158219820Sjeff	int sockfd = -1;
159219820Sjeff	struct pingpong_dest *rem_dest = NULL;
160219820Sjeff	char gid[33];
161219820Sjeff
162219820Sjeff	if (asprintf(&service, "%d", port) < 0)
163219820Sjeff		return NULL;
164219820Sjeff
165219820Sjeff	n = getaddrinfo(servername, service, &hints, &res);
166219820Sjeff
167219820Sjeff	if (n < 0) {
168219820Sjeff		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
169219820Sjeff		free(service);
170219820Sjeff		return NULL;
171219820Sjeff	}
172219820Sjeff
173219820Sjeff	for (t = res; t; t = t->ai_next) {
174219820Sjeff		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
175219820Sjeff		if (sockfd >= 0) {
176219820Sjeff			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
177219820Sjeff				break;
178219820Sjeff			close(sockfd);
179219820Sjeff			sockfd = -1;
180219820Sjeff		}
181219820Sjeff	}
182219820Sjeff
183219820Sjeff	freeaddrinfo(res);
184219820Sjeff	free(service);
185219820Sjeff
186219820Sjeff	if (sockfd < 0) {
187219820Sjeff		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
188219820Sjeff		return NULL;
189219820Sjeff	}
190219820Sjeff
191219820Sjeff	for (i = 0; i < MAX_QP; ++i) {
192219820Sjeff		gid_to_wire_gid(&my_dest[i].gid, gid);
193219820Sjeff		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
194219820Sjeff		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
195219820Sjeff			fprintf(stderr, "Couldn't send local address\n");
196219820Sjeff			goto out;
197219820Sjeff		}
198219820Sjeff	}
199219820Sjeff
200219820Sjeff	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
201219820Sjeff	if (!rem_dest)
202219820Sjeff		goto out;
203219820Sjeff
204219820Sjeff	for (i = 0; i < MAX_QP; ++i) {
205219820Sjeff		n = 0;
206219820Sjeff		while (n < sizeof msg) {
207219820Sjeff			r = read(sockfd, msg + n, sizeof msg - n);
208219820Sjeff			if (r < 0) {
209219820Sjeff				perror("client read");
210219820Sjeff				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
211219820Sjeff					n, (int) sizeof msg, i);
212219820Sjeff				goto out;
213219820Sjeff			}
214219820Sjeff			n += r;
215219820Sjeff		}
216219820Sjeff
217219820Sjeff		sscanf(msg, "%x:%x:%x:%s",
218219820Sjeff		       &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid);
219219820Sjeff		wire_gid_to_gid(gid, &rem_dest[i].gid);
220219820Sjeff	}
221219820Sjeff
222219820Sjeff	write(sockfd, "done", sizeof "done");
223219820Sjeff
224219820Sjeffout:
225219820Sjeff	close(sockfd);
226219820Sjeff	return rem_dest;
227219820Sjeff}
228219820Sjeff
229219820Sjeffstatic struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
230219820Sjeff						 int ib_port, enum ibv_mtu mtu,
231219820Sjeff						 int port, int sl,
232219820Sjeff						 const struct pingpong_dest *my_dest,
233219820Sjeff						 int sgid_idx)
234219820Sjeff{
235219820Sjeff	struct addrinfo *res, *t;
236219820Sjeff	struct addrinfo hints = {
237219820Sjeff		.ai_flags    = AI_PASSIVE,
238219820Sjeff		.ai_family   = AF_INET,
239219820Sjeff		.ai_socktype = SOCK_STREAM
240219820Sjeff	};
241219820Sjeff	char *service;
242219820Sjeff	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
243219820Sjeff	int n;
244219820Sjeff	int r;
245219820Sjeff	int i;
246219820Sjeff	int sockfd = -1, connfd;
247219820Sjeff	struct pingpong_dest *rem_dest = NULL;
248219820Sjeff	char gid[33];
249219820Sjeff
250219820Sjeff	if (asprintf(&service, "%d", port) < 0)
251219820Sjeff		return NULL;
252219820Sjeff
253219820Sjeff	n = getaddrinfo(NULL, service, &hints, &res);
254219820Sjeff
255219820Sjeff	if (n < 0) {
256219820Sjeff		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
257219820Sjeff		free(service);
258219820Sjeff		return NULL;
259219820Sjeff	}
260219820Sjeff
261219820Sjeff	for (t = res; t; t = t->ai_next) {
262219820Sjeff		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
263219820Sjeff		if (sockfd >= 0) {
264219820Sjeff			n = 1;
265219820Sjeff
266219820Sjeff			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
267219820Sjeff
268219820Sjeff			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
269219820Sjeff				break;
270219820Sjeff			close(sockfd);
271219820Sjeff			sockfd = -1;
272219820Sjeff		}
273219820Sjeff	}
274219820Sjeff
275219820Sjeff	freeaddrinfo(res);
276219820Sjeff	free(service);
277219820Sjeff
278219820Sjeff	if (sockfd < 0) {
279219820Sjeff		fprintf(stderr, "Couldn't listen to port %d\n", port);
280219820Sjeff		return NULL;
281219820Sjeff	}
282219820Sjeff
283219820Sjeff	listen(sockfd, 1);
284219820Sjeff	connfd = accept(sockfd, NULL, 0);
285219820Sjeff	close(sockfd);
286219820Sjeff	if (connfd < 0) {
287219820Sjeff		fprintf(stderr, "accept() failed\n");
288219820Sjeff		return NULL;
289219820Sjeff	}
290219820Sjeff
291219820Sjeff	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
292219820Sjeff	if (!rem_dest)
293219820Sjeff		goto out;
294219820Sjeff
295219820Sjeff	for (i = 0; i < MAX_QP; ++i) {
296219820Sjeff		n = 0;
297219820Sjeff		while (n < sizeof msg) {
298219820Sjeff			r = read(connfd, msg + n, sizeof msg - n);
299219820Sjeff			if (r < 0) {
300219820Sjeff				perror("server read");
301219820Sjeff				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
302219820Sjeff					n, (int) sizeof msg, i);
303219820Sjeff				goto out;
304219820Sjeff			}
305219820Sjeff			n += r;
306219820Sjeff		}
307219820Sjeff
308219820Sjeff		sscanf(msg, "%x:%x:%x:%s",
309219820Sjeff		       &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid);
310219820Sjeff		wire_gid_to_gid(gid, &rem_dest[i].gid);
311219820Sjeff	}
312219820Sjeff
313219820Sjeff	if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, sgid_idx)) {
314219820Sjeff		fprintf(stderr, "Couldn't connect to remote QP\n");
315219820Sjeff		free(rem_dest);
316219820Sjeff		rem_dest = NULL;
317219820Sjeff		goto out;
318219820Sjeff	}
319219820Sjeff
320219820Sjeff	for (i = 0; i < MAX_QP; ++i) {
321219820Sjeff		gid_to_wire_gid(&my_dest[i].gid, gid);
322219820Sjeff		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
323219820Sjeff		if (write(connfd, msg, sizeof msg) != sizeof msg) {
324219820Sjeff			fprintf(stderr, "Couldn't send local address\n");
325219820Sjeff			free(rem_dest);
326219820Sjeff			rem_dest = NULL;
327219820Sjeff			goto out;
328219820Sjeff		}
329219820Sjeff	}
330219820Sjeff
331219820Sjeff	read(connfd, msg, sizeof msg);
332219820Sjeff
333219820Sjeffout:
334219820Sjeff	close(connfd);
335219820Sjeff	return rem_dest;
336219820Sjeff}
337219820Sjeff
338219820Sjeffstatic struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
339219820Sjeff					    int num_qp, int rx_depth, int port,
340219820Sjeff					    int use_event)
341219820Sjeff{
342219820Sjeff	struct pingpong_context *ctx;
343219820Sjeff	int i;
344219820Sjeff
345219820Sjeff	ctx = calloc(1, sizeof *ctx);
346219820Sjeff	if (!ctx)
347219820Sjeff		return NULL;
348219820Sjeff
349219820Sjeff	ctx->size     = size;
350219820Sjeff	ctx->num_qp   = num_qp;
351219820Sjeff	ctx->rx_depth = rx_depth;
352219820Sjeff
353219820Sjeff	ctx->buf = malloc(roundup(size, page_size));
354219820Sjeff	if (!ctx->buf) {
355219820Sjeff		fprintf(stderr, "Couldn't allocate work buf.\n");
356219820Sjeff		return NULL;
357219820Sjeff	}
358219820Sjeff
359219820Sjeff	memset(ctx->buf, 0, size);
360219820Sjeff
361219820Sjeff	ctx->context = ibv_open_device(ib_dev);
362219820Sjeff	if (!ctx->context) {
363219820Sjeff		fprintf(stderr, "Couldn't get context for %s\n",
364219820Sjeff			ibv_get_device_name(ib_dev));
365219820Sjeff		return NULL;
366219820Sjeff	}
367219820Sjeff
368219820Sjeff	if (use_event) {
369219820Sjeff		ctx->channel = ibv_create_comp_channel(ctx->context);
370219820Sjeff		if (!ctx->channel) {
371219820Sjeff			fprintf(stderr, "Couldn't create completion channel\n");
372219820Sjeff			return NULL;
373219820Sjeff		}
374219820Sjeff	} else
375219820Sjeff		ctx->channel = NULL;
376219820Sjeff
377219820Sjeff	ctx->pd = ibv_alloc_pd(ctx->context);
378219820Sjeff	if (!ctx->pd) {
379219820Sjeff		fprintf(stderr, "Couldn't allocate PD\n");
380219820Sjeff		return NULL;
381219820Sjeff	}
382219820Sjeff
383219820Sjeff	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
384219820Sjeff	if (!ctx->mr) {
385219820Sjeff		fprintf(stderr, "Couldn't register MR\n");
386219820Sjeff		return NULL;
387219820Sjeff	}
388219820Sjeff
389219820Sjeff	ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
390219820Sjeff				ctx->channel, 0);
391219820Sjeff	if (!ctx->cq) {
392219820Sjeff		fprintf(stderr, "Couldn't create CQ\n");
393219820Sjeff		return NULL;
394219820Sjeff	}
395219820Sjeff
396219820Sjeff	{
397219820Sjeff		struct ibv_srq_init_attr attr = {
398219820Sjeff			.attr = {
399219820Sjeff				.max_wr  = rx_depth,
400219820Sjeff				.max_sge = 1
401219820Sjeff			}
402219820Sjeff		};
403219820Sjeff
404219820Sjeff		ctx->srq = ibv_create_srq(ctx->pd, &attr);
405219820Sjeff		if (!ctx->srq)  {
406219820Sjeff			fprintf(stderr, "Couldn't create SRQ\n");
407219820Sjeff			return NULL;
408219820Sjeff		}
409219820Sjeff	}
410219820Sjeff
411219820Sjeff	for (i = 0; i < num_qp; ++i) {
412219820Sjeff		struct ibv_qp_init_attr attr = {
413219820Sjeff			.send_cq = ctx->cq,
414219820Sjeff			.recv_cq = ctx->cq,
415219820Sjeff			.srq     = ctx->srq,
416219820Sjeff			.cap     = {
417219820Sjeff				.max_send_wr  = 1,
418219820Sjeff				.max_send_sge = 1,
419219820Sjeff			},
420219820Sjeff			.qp_type = IBV_QPT_RC
421219820Sjeff		};
422219820Sjeff
423219820Sjeff		ctx->qp[i] = ibv_create_qp(ctx->pd, &attr);
424219820Sjeff		if (!ctx->qp[i])  {
425219820Sjeff			fprintf(stderr, "Couldn't create QP[%d]\n", i);
426219820Sjeff			return NULL;
427219820Sjeff		}
428219820Sjeff	}
429219820Sjeff
430219820Sjeff	for (i = 0; i < num_qp; ++i) {
431219820Sjeff		struct ibv_qp_attr attr = {
432219820Sjeff			.qp_state        = IBV_QPS_INIT,
433219820Sjeff			.pkey_index      = 0,
434219820Sjeff			.port_num        = port,
435219820Sjeff			.qp_access_flags = 0
436219820Sjeff		};
437219820Sjeff
438219820Sjeff		if (ibv_modify_qp(ctx->qp[i], &attr,
439219820Sjeff				  IBV_QP_STATE              |
440219820Sjeff				  IBV_QP_PKEY_INDEX         |
441219820Sjeff				  IBV_QP_PORT               |
442219820Sjeff				  IBV_QP_ACCESS_FLAGS)) {
443219820Sjeff			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
444219820Sjeff			return NULL;
445219820Sjeff		}
446219820Sjeff	}
447219820Sjeff
448219820Sjeff	return ctx;
449219820Sjeff}
450219820Sjeff
451219820Sjeffint pp_close_ctx(struct pingpong_context *ctx, int num_qp)
452219820Sjeff{
453219820Sjeff	int i;
454219820Sjeff
455219820Sjeff	for (i = 0; i < num_qp; ++i) {
456219820Sjeff		if (ibv_destroy_qp(ctx->qp[i])) {
457219820Sjeff			fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
458219820Sjeff			return 1;
459219820Sjeff		}
460219820Sjeff	}
461219820Sjeff
462219820Sjeff	if (ibv_destroy_srq(ctx->srq)) {
463219820Sjeff		fprintf(stderr, "Couldn't destroy SRQ\n");
464219820Sjeff		return 1;
465219820Sjeff	}
466219820Sjeff
467219820Sjeff	if (ibv_destroy_cq(ctx->cq)) {
468219820Sjeff		fprintf(stderr, "Couldn't destroy CQ\n");
469219820Sjeff		return 1;
470219820Sjeff	}
471219820Sjeff
472219820Sjeff	if (ibv_dereg_mr(ctx->mr)) {
473219820Sjeff		fprintf(stderr, "Couldn't deregister MR\n");
474219820Sjeff		return 1;
475219820Sjeff	}
476219820Sjeff
477219820Sjeff	if (ibv_dealloc_pd(ctx->pd)) {
478219820Sjeff		fprintf(stderr, "Couldn't deallocate PD\n");
479219820Sjeff		return 1;
480219820Sjeff	}
481219820Sjeff
482219820Sjeff	if (ctx->channel) {
483219820Sjeff		if (ibv_destroy_comp_channel(ctx->channel)) {
484219820Sjeff			fprintf(stderr, "Couldn't destroy completion channel\n");
485219820Sjeff			return 1;
486219820Sjeff		}
487219820Sjeff	}
488219820Sjeff
489219820Sjeff	if (ibv_close_device(ctx->context)) {
490219820Sjeff		fprintf(stderr, "Couldn't release context\n");
491219820Sjeff		return 1;
492219820Sjeff	}
493219820Sjeff
494219820Sjeff	free(ctx->buf);
495219820Sjeff	free(ctx);
496219820Sjeff
497219820Sjeff	return 0;
498219820Sjeff}
499219820Sjeff
500219820Sjeffstatic int pp_post_recv(struct pingpong_context *ctx, int n)
501219820Sjeff{
502219820Sjeff	struct ibv_sge list = {
503219820Sjeff		.addr	= (uintptr_t) ctx->buf,
504219820Sjeff		.length = ctx->size,
505219820Sjeff		.lkey	= ctx->mr->lkey
506219820Sjeff	};
507219820Sjeff	struct ibv_recv_wr wr = {
508219820Sjeff		.wr_id	    = PINGPONG_RECV_WRID,
509219820Sjeff		.sg_list    = &list,
510219820Sjeff		.num_sge    = 1,
511219820Sjeff	};
512219820Sjeff	struct ibv_recv_wr *bad_wr;
513219820Sjeff	int i;
514219820Sjeff
515219820Sjeff	for (i = 0; i < n; ++i)
516219820Sjeff		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
517219820Sjeff			break;
518219820Sjeff
519219820Sjeff	return i;
520219820Sjeff}
521219820Sjeff
522219820Sjeffstatic int pp_post_send(struct pingpong_context *ctx, int qp_index)
523219820Sjeff{
524219820Sjeff	struct ibv_sge list = {
525219820Sjeff		.addr	= (uintptr_t) ctx->buf,
526219820Sjeff		.length = ctx->size,
527219820Sjeff		.lkey	= ctx->mr->lkey
528219820Sjeff	};
529219820Sjeff	struct ibv_send_wr wr = {
530219820Sjeff		.wr_id	    = PINGPONG_SEND_WRID,
531219820Sjeff		.sg_list    = &list,
532219820Sjeff		.num_sge    = 1,
533219820Sjeff		.opcode     = IBV_WR_SEND,
534219820Sjeff		.send_flags = IBV_SEND_SIGNALED,
535219820Sjeff	};
536219820Sjeff	struct ibv_send_wr *bad_wr;
537219820Sjeff
538219820Sjeff	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
539219820Sjeff}
540219820Sjeff
541219820Sjeffstatic int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
542219820Sjeff{
543219820Sjeff	int i;
544219820Sjeff
545219820Sjeff	for (i = 0; i < num_qp; ++i)
546219820Sjeff		if (ctx->qp[i]->qp_num == qpn)
547219820Sjeff			return i;
548219820Sjeff
549219820Sjeff	return -1;
550219820Sjeff}
551219820Sjeff
552219820Sjeffstatic void usage(const char *argv0)
553219820Sjeff{
554219820Sjeff	printf("Usage:\n");
555219820Sjeff	printf("  %s            start a server and wait for connection\n", argv0);
556219820Sjeff	printf("  %s <host>     connect to server at <host>\n", argv0);
557219820Sjeff	printf("\n");
558219820Sjeff	printf("Options:\n");
559219820Sjeff	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
560219820Sjeff	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
561219820Sjeff	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
562219820Sjeff	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
563219820Sjeff	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
564219820Sjeff	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
565219820Sjeff	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
566219820Sjeff	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
567219820Sjeff	printf("  -l, --sl=<sl>          service level value\n");
568219820Sjeff	printf("  -e, --events           sleep on CQ events (default poll)\n");
569219820Sjeff	printf("  -g, --gid-idx=<gid index> local port gid index\n");
570219820Sjeff}
571219820Sjeff
572219820Sjeffint main(int argc, char *argv[])
573219820Sjeff{
574219820Sjeff	struct ibv_device      **dev_list;
575219820Sjeff	struct ibv_device	*ib_dev;
576219820Sjeff	struct ibv_wc		*wc;
577219820Sjeff	struct pingpong_context *ctx;
578219820Sjeff	struct pingpong_dest     my_dest[MAX_QP];
579219820Sjeff	struct pingpong_dest    *rem_dest;
580219820Sjeff	struct timeval           start, end;
581219820Sjeff	char                    *ib_devname = NULL;
582219820Sjeff	char                    *servername = NULL;
583219820Sjeff	int                      port = 18515;
584219820Sjeff	int                      ib_port = 1;
585219820Sjeff	int                      size = 4096;
586219820Sjeff	enum ibv_mtu		 mtu = IBV_MTU_1024;
587219820Sjeff	int                      num_qp = 16;
588219820Sjeff	int                      rx_depth = 500;
589219820Sjeff	int                      iters = 1000;
590219820Sjeff	int                      use_event = 0;
591219820Sjeff	int                      routs;
592219820Sjeff	int                      rcnt, scnt;
593219820Sjeff	int			 num_wc;
594219820Sjeff	int                      i;
595219820Sjeff	int                      num_cq_events = 0;
596219820Sjeff	int                      sl = 0;
597219820Sjeff	int			 gidx = -1;
598219820Sjeff	char			 gid[33];
599219820Sjeff
600219820Sjeff	srand48(getpid() * time(NULL));
601219820Sjeff
602219820Sjeff	while (1) {
603219820Sjeff		int c;
604219820Sjeff
605219820Sjeff		static struct option long_options[] = {
606219820Sjeff			{ .name = "port",     .has_arg = 1, .val = 'p' },
607219820Sjeff			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
608219820Sjeff			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
609219820Sjeff			{ .name = "size",     .has_arg = 1, .val = 's' },
610219820Sjeff			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
611219820Sjeff			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
612219820Sjeff			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
613219820Sjeff			{ .name = "iters",    .has_arg = 1, .val = 'n' },
614219820Sjeff			{ .name = "sl",       .has_arg = 1, .val = 'l' },
615219820Sjeff			{ .name = "events",   .has_arg = 0, .val = 'e' },
616219820Sjeff			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
617219820Sjeff			{ 0 }
618219820Sjeff		};
619219820Sjeff
620219820Sjeff		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL);
621219820Sjeff		if (c == -1)
622219820Sjeff			break;
623219820Sjeff
624219820Sjeff		switch (c) {
625219820Sjeff		case 'p':
626219820Sjeff			port = strtol(optarg, NULL, 0);
627219820Sjeff			if (port < 0 || port > 65535) {
628219820Sjeff				usage(argv[0]);
629219820Sjeff				return 1;
630219820Sjeff			}
631219820Sjeff			break;
632219820Sjeff
633219820Sjeff		case 'd':
634219820Sjeff			ib_devname = strdup(optarg);
635219820Sjeff			break;
636219820Sjeff
637219820Sjeff		case 'i':
638219820Sjeff			ib_port = strtol(optarg, NULL, 0);
639219820Sjeff			if (ib_port < 0) {
640219820Sjeff				usage(argv[0]);
641219820Sjeff				return 1;
642219820Sjeff			}
643219820Sjeff			break;
644219820Sjeff
645219820Sjeff		case 's':
646219820Sjeff			size = strtol(optarg, NULL, 0);
647219820Sjeff			break;
648219820Sjeff
649219820Sjeff		case 'm':
650219820Sjeff			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
651219820Sjeff			if (mtu < 0) {
652219820Sjeff				usage(argv[0]);
653219820Sjeff				return 1;
654219820Sjeff			}
655219820Sjeff			break;
656219820Sjeff
657219820Sjeff		case 'q':
658219820Sjeff			num_qp = strtol(optarg, NULL, 0);
659219820Sjeff			break;
660219820Sjeff
661219820Sjeff		case 'r':
662219820Sjeff			rx_depth = strtol(optarg, NULL, 0);
663219820Sjeff			break;
664219820Sjeff
665219820Sjeff		case 'n':
666219820Sjeff			iters = strtol(optarg, NULL, 0);
667219820Sjeff			break;
668219820Sjeff
669219820Sjeff		case 'l':
670219820Sjeff			sl = strtol(optarg, NULL, 0);
671219820Sjeff			break;
672219820Sjeff
673219820Sjeff		case 'e':
674219820Sjeff			++use_event;
675219820Sjeff			break;
676219820Sjeff
677219820Sjeff		case 'g':
678219820Sjeff			gidx = strtol(optarg, NULL, 0);
679219820Sjeff			break;
680219820Sjeff
681219820Sjeff		default:
682219820Sjeff			usage(argv[0]);
683219820Sjeff			return 1;
684219820Sjeff		}
685219820Sjeff	}
686219820Sjeff
687219820Sjeff	if (optind == argc - 1)
688219820Sjeff		servername = strdup(argv[optind]);
689219820Sjeff	else if (optind < argc) {
690219820Sjeff		usage(argv[0]);
691219820Sjeff		return 1;
692219820Sjeff	}
693219820Sjeff
694219820Sjeff	if (num_qp > rx_depth) {
695219820Sjeff		fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
696219820Sjeff			"must have at least one receive per QP.\n",
697219820Sjeff			rx_depth, num_qp);
698219820Sjeff		return 1;
699219820Sjeff	}
700219820Sjeff
701219820Sjeff	num_wc = num_qp + rx_depth;
702219820Sjeff	wc     = alloca(num_wc * sizeof *wc);
703219820Sjeff
704219820Sjeff	page_size = sysconf(_SC_PAGESIZE);
705219820Sjeff
706219820Sjeff	dev_list = ibv_get_device_list(NULL);
707219820Sjeff	if (!dev_list) {
708219820Sjeff		perror("Failed to get IB devices list");
709219820Sjeff		return 1;
710219820Sjeff	}
711219820Sjeff
712219820Sjeff	if (!ib_devname) {
713219820Sjeff		ib_dev = *dev_list;
714219820Sjeff		if (!ib_dev) {
715219820Sjeff			fprintf(stderr, "No IB devices found\n");
716219820Sjeff			return 1;
717219820Sjeff		}
718219820Sjeff	} else {
719219820Sjeff		int i;
720219820Sjeff		for (i = 0; dev_list[i]; ++i)
721219820Sjeff			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
722219820Sjeff				break;
723219820Sjeff		ib_dev = dev_list[i];
724219820Sjeff		if (!ib_dev) {
725219820Sjeff			fprintf(stderr, "IB device %s not found\n", ib_devname);
726219820Sjeff			return 1;
727219820Sjeff		}
728219820Sjeff	}
729219820Sjeff
730219820Sjeff	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
731219820Sjeff	if (!ctx)
732219820Sjeff		return 1;
733219820Sjeff
734219820Sjeff	routs = pp_post_recv(ctx, ctx->rx_depth);
735219820Sjeff	if (routs < ctx->rx_depth) {
736219820Sjeff		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
737219820Sjeff		return 1;
738219820Sjeff	}
739219820Sjeff
740219820Sjeff	if (use_event)
741219820Sjeff		if (ibv_req_notify_cq(ctx->cq, 0)) {
742219820Sjeff			fprintf(stderr, "Couldn't request CQ notification\n");
743219820Sjeff			return 1;
744219820Sjeff		}
745219820Sjeff
746219820Sjeff	memset(my_dest, 0, sizeof my_dest);
747219820Sjeff
748219820Sjeff	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
749219820Sjeff		fprintf(stderr, "Couldn't get port info\n");
750219820Sjeff		return 1;
751219820Sjeff	}
752219820Sjeff	for (i = 0; i < num_qp; ++i) {
753219820Sjeff		my_dest[i].qpn = ctx->qp[i]->qp_num;
754219820Sjeff		my_dest[i].psn = lrand48() & 0xffffff;
755219820Sjeff		my_dest[i].lid = ctx->portinfo.lid;
756219820Sjeff		if (ctx->portinfo.link_layer == IBV_LINK_LAYER_INFINIBAND && !my_dest[i].lid) {
757219820Sjeff			fprintf(stderr, "Couldn't get local LID\n");
758219820Sjeff			return 1;
759219820Sjeff		}
760219820Sjeff
761219820Sjeff		if (gidx >= 0) {
762219820Sjeff			if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest[i].gid)) {
763219820Sjeff				fprintf(stderr, "Could not get local gid for gid index %d\n", gidx);
764219820Sjeff				return 1;
765219820Sjeff			}
766219820Sjeff		} else
767219820Sjeff			memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
768219820Sjeff
769219820Sjeff		inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
770219820Sjeff		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
771219820Sjeff		       my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
772219820Sjeff	}
773219820Sjeff
774219820Sjeff	if (servername)
775219820Sjeff		rem_dest = pp_client_exch_dest(servername, port, my_dest);
776219820Sjeff	else
777219820Sjeff		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, my_dest, gidx);
778219820Sjeff
779219820Sjeff	if (!rem_dest)
780219820Sjeff		return 1;
781219820Sjeff
782219820Sjeff	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
783219820Sjeff
784219820Sjeff	for (i = 0; i < num_qp; ++i) {
785219820Sjeff		inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
786219820Sjeff		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
787219820Sjeff		       rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn, gid);
788219820Sjeff	}
789219820Sjeff
790219820Sjeff	if (servername)
791219820Sjeff		if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx))
792219820Sjeff			return 1;
793219820Sjeff
794219820Sjeff	if (servername)
795219820Sjeff		for (i = 0; i < num_qp; ++i) {
796219820Sjeff			if (pp_post_send(ctx, i)) {
797219820Sjeff				fprintf(stderr, "Couldn't post send\n");
798219820Sjeff				return 1;
799219820Sjeff			}
800219820Sjeff			ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
801219820Sjeff		}
802219820Sjeff	else
803219820Sjeff		for (i = 0; i < num_qp; ++i)
804219820Sjeff			ctx->pending[i] = PINGPONG_RECV_WRID;
805219820Sjeff
806219820Sjeff	if (gettimeofday(&start, NULL)) {
807219820Sjeff		perror("gettimeofday");
808219820Sjeff		return 1;
809219820Sjeff	}
810219820Sjeff
811219820Sjeff	rcnt = scnt = 0;
812219820Sjeff	while (rcnt < iters || scnt < iters) {
813219820Sjeff		if (use_event) {
814219820Sjeff			struct ibv_cq *ev_cq;
815219820Sjeff			void          *ev_ctx;
816219820Sjeff
817219820Sjeff			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
818219820Sjeff				fprintf(stderr, "Failed to get cq_event\n");
819219820Sjeff				return 1;
820219820Sjeff			}
821219820Sjeff
822219820Sjeff			++num_cq_events;
823219820Sjeff
824219820Sjeff			if (ev_cq != ctx->cq) {
825219820Sjeff				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
826219820Sjeff				return 1;
827219820Sjeff			}
828219820Sjeff
829219820Sjeff			if (ibv_req_notify_cq(ctx->cq, 0)) {
830219820Sjeff				fprintf(stderr, "Couldn't request CQ notification\n");
831219820Sjeff				return 1;
832219820Sjeff			}
833219820Sjeff		}
834219820Sjeff
835219820Sjeff		{
836219820Sjeff			int ne, qp_ind;
837219820Sjeff
838219820Sjeff			do {
839219820Sjeff				ne = ibv_poll_cq(ctx->cq, num_wc, wc);
840219820Sjeff				if (ne < 0) {
841219820Sjeff					fprintf(stderr, "poll CQ failed %d\n", ne);
842219820Sjeff					return 1;
843219820Sjeff				}
844219820Sjeff			} while (!use_event && ne < 1);
845219820Sjeff
846219820Sjeff			for (i = 0; i < ne; ++i) {
847219820Sjeff				if (wc[i].status != IBV_WC_SUCCESS) {
848219820Sjeff					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
849219820Sjeff						ibv_wc_status_str(wc[i].status),
850219820Sjeff						wc[i].status, (int) wc[i].wr_id);
851219820Sjeff					return 1;
852219820Sjeff				}
853219820Sjeff
854219820Sjeff				qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
855219820Sjeff				if (qp_ind < 0) {
856219820Sjeff					fprintf(stderr, "Couldn't find QPN %06x\n",
857219820Sjeff						wc[i].qp_num);
858219820Sjeff					return 1;
859219820Sjeff				}
860219820Sjeff
861219820Sjeff				switch ((int) wc[i].wr_id) {
862219820Sjeff				case PINGPONG_SEND_WRID:
863219820Sjeff					++scnt;
864219820Sjeff					break;
865219820Sjeff
866219820Sjeff				case PINGPONG_RECV_WRID:
867219820Sjeff					if (--routs <= num_qp) {
868219820Sjeff						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
869219820Sjeff						if (routs < ctx->rx_depth) {
870219820Sjeff							fprintf(stderr,
871219820Sjeff								"Couldn't post receive (%d)\n",
872219820Sjeff								routs);
873219820Sjeff							return 1;
874219820Sjeff						}
875219820Sjeff					}
876219820Sjeff
877219820Sjeff					++rcnt;
878219820Sjeff					break;
879219820Sjeff
880219820Sjeff				default:
881219820Sjeff					fprintf(stderr, "Completion for unknown wr_id %d\n",
882219820Sjeff						(int) wc[i].wr_id);
883219820Sjeff					return 1;
884219820Sjeff				}
885219820Sjeff
886219820Sjeff				ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
887219820Sjeff				if (scnt < iters && !ctx->pending[qp_ind]) {
888219820Sjeff					if (pp_post_send(ctx, qp_ind)) {
889219820Sjeff						fprintf(stderr, "Couldn't post send\n");
890219820Sjeff						return 1;
891219820Sjeff					}
892219820Sjeff					ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
893219820Sjeff							       PINGPONG_SEND_WRID;
894219820Sjeff				}
895219820Sjeff
896219820Sjeff			}
897219820Sjeff		}
898219820Sjeff	}
899219820Sjeff
900219820Sjeff	if (gettimeofday(&end, NULL)) {
901219820Sjeff		perror("gettimeofday");
902219820Sjeff		return 1;
903219820Sjeff	}
904219820Sjeff
905219820Sjeff	{
906219820Sjeff		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
907219820Sjeff			(end.tv_usec - start.tv_usec);
908219820Sjeff		long long bytes = (long long) size * iters * 2;
909219820Sjeff
910219820Sjeff		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
911219820Sjeff		       bytes, usec / 1000000., bytes * 8. / usec);
912219820Sjeff		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
913219820Sjeff		       iters, usec / 1000000., usec / iters);
914219820Sjeff	}
915219820Sjeff
916219820Sjeff	ibv_ack_cq_events(ctx->cq, num_cq_events);
917219820Sjeff
918219820Sjeff	if (pp_close_ctx(ctx, num_qp))
919219820Sjeff		return 1;
920219820Sjeff
921219820Sjeff	ibv_free_device_list(dev_list);
922219820Sjeff	free(rem_dest);
923219820Sjeff
924219820Sjeff	return 0;
925219820Sjeff}
926