cmd.c revision 331769
1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
4 * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <config.h>
36
37#include <stdio.h>
38#include <unistd.h>
39#include <stdlib.h>
40#include <errno.h>
41#include <alloca.h>
42#include <string.h>
43
44#include "ibverbs.h"
45#include <sys/param.h>
46
47int ibv_cmd_get_context(struct ibv_context *context, struct ibv_get_context *cmd,
48			size_t cmd_size, struct ibv_get_context_resp *resp,
49			size_t resp_size)
50{
51	if (abi_ver < IB_USER_VERBS_MIN_ABI_VERSION)
52		return ENOSYS;
53
54	IBV_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp, resp_size);
55
56	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
57		return errno;
58
59	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
60
61	context->async_fd         = resp->async_fd;
62	context->num_comp_vectors = resp->num_comp_vectors;
63
64	return 0;
65}
66
67static void copy_query_dev_fields(struct ibv_device_attr *device_attr,
68				  struct ibv_query_device_resp *resp,
69				  uint64_t *raw_fw_ver)
70{
71	*raw_fw_ver				= resp->fw_ver;
72	device_attr->node_guid			= resp->node_guid;
73	device_attr->sys_image_guid		= resp->sys_image_guid;
74	device_attr->max_mr_size		= resp->max_mr_size;
75	device_attr->page_size_cap		= resp->page_size_cap;
76	device_attr->vendor_id			= resp->vendor_id;
77	device_attr->vendor_part_id		= resp->vendor_part_id;
78	device_attr->hw_ver			= resp->hw_ver;
79	device_attr->max_qp			= resp->max_qp;
80	device_attr->max_qp_wr			= resp->max_qp_wr;
81	device_attr->device_cap_flags		= resp->device_cap_flags;
82	device_attr->max_sge			= resp->max_sge;
83	device_attr->max_sge_rd			= resp->max_sge_rd;
84	device_attr->max_cq			= resp->max_cq;
85	device_attr->max_cqe			= resp->max_cqe;
86	device_attr->max_mr			= resp->max_mr;
87	device_attr->max_pd			= resp->max_pd;
88	device_attr->max_qp_rd_atom		= resp->max_qp_rd_atom;
89	device_attr->max_ee_rd_atom		= resp->max_ee_rd_atom;
90	device_attr->max_res_rd_atom		= resp->max_res_rd_atom;
91	device_attr->max_qp_init_rd_atom	= resp->max_qp_init_rd_atom;
92	device_attr->max_ee_init_rd_atom	= resp->max_ee_init_rd_atom;
93	device_attr->atomic_cap			= resp->atomic_cap;
94	device_attr->max_ee			= resp->max_ee;
95	device_attr->max_rdd			= resp->max_rdd;
96	device_attr->max_mw			= resp->max_mw;
97	device_attr->max_raw_ipv6_qp		= resp->max_raw_ipv6_qp;
98	device_attr->max_raw_ethy_qp		= resp->max_raw_ethy_qp;
99	device_attr->max_mcast_grp		= resp->max_mcast_grp;
100	device_attr->max_mcast_qp_attach	= resp->max_mcast_qp_attach;
101	device_attr->max_total_mcast_qp_attach	= resp->max_total_mcast_qp_attach;
102	device_attr->max_ah			= resp->max_ah;
103	device_attr->max_fmr			= resp->max_fmr;
104	device_attr->max_map_per_fmr		= resp->max_map_per_fmr;
105	device_attr->max_srq			= resp->max_srq;
106	device_attr->max_srq_wr			= resp->max_srq_wr;
107	device_attr->max_srq_sge		= resp->max_srq_sge;
108	device_attr->max_pkeys			= resp->max_pkeys;
109	device_attr->local_ca_ack_delay		= resp->local_ca_ack_delay;
110	device_attr->phys_port_cnt		= resp->phys_port_cnt;
111}
112
113int ibv_cmd_query_device(struct ibv_context *context,
114			 struct ibv_device_attr *device_attr,
115			 uint64_t *raw_fw_ver,
116			 struct ibv_query_device *cmd, size_t cmd_size)
117{
118	struct ibv_query_device_resp resp;
119
120	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_DEVICE, &resp, sizeof resp);
121
122	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
123		return errno;
124
125	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
126
127	memset(device_attr->fw_ver, 0, sizeof device_attr->fw_ver);
128	copy_query_dev_fields(device_attr, &resp, raw_fw_ver);
129
130	return 0;
131}
132
133int ibv_cmd_query_device_ex(struct ibv_context *context,
134			    const struct ibv_query_device_ex_input *input,
135			    struct ibv_device_attr_ex *attr, size_t attr_size,
136			    uint64_t *raw_fw_ver,
137			    struct ibv_query_device_ex *cmd,
138			    size_t cmd_core_size,
139			    size_t cmd_size,
140			    struct ibv_query_device_resp_ex *resp,
141			    size_t resp_core_size,
142			    size_t resp_size)
143{
144	int err;
145
146	if (input && input->comp_mask)
147		return EINVAL;
148
149	if (attr_size < offsetof(struct ibv_device_attr_ex, comp_mask) +
150			sizeof(attr->comp_mask))
151		return EINVAL;
152
153	if (resp_core_size < offsetof(struct ibv_query_device_resp_ex,
154				      response_length) +
155			     sizeof(resp->response_length))
156		return EINVAL;
157
158	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
159			       QUERY_DEVICE_EX, resp, resp_core_size,
160			       resp_size);
161	cmd->comp_mask = 0;
162	cmd->reserved = 0;
163	memset(attr->orig_attr.fw_ver, 0, sizeof(attr->orig_attr.fw_ver));
164	memset(&attr->comp_mask, 0, attr_size - sizeof(attr->orig_attr));
165	err = write(context->cmd_fd, cmd, cmd_size);
166	if (err != cmd_size)
167		return errno;
168
169	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
170	copy_query_dev_fields(&attr->orig_attr, &resp->base, raw_fw_ver);
171	/* Report back supported comp_mask bits. For now no comp_mask bit is
172	 * defined */
173	attr->comp_mask = resp->comp_mask & 0;
174	if (attr_size >= offsetof(struct ibv_device_attr_ex, odp_caps) +
175			 sizeof(attr->odp_caps)) {
176		if (resp->response_length >=
177		    offsetof(struct ibv_query_device_resp_ex, odp_caps) +
178		    sizeof(resp->odp_caps)) {
179			attr->odp_caps.general_caps = resp->odp_caps.general_caps;
180			attr->odp_caps.per_transport_caps.rc_odp_caps =
181				resp->odp_caps.per_transport_caps.rc_odp_caps;
182			attr->odp_caps.per_transport_caps.uc_odp_caps =
183				resp->odp_caps.per_transport_caps.uc_odp_caps;
184			attr->odp_caps.per_transport_caps.ud_odp_caps =
185				resp->odp_caps.per_transport_caps.ud_odp_caps;
186		}
187	}
188
189	if (attr_size >= offsetof(struct ibv_device_attr_ex,
190				  completion_timestamp_mask) +
191			 sizeof(attr->completion_timestamp_mask)) {
192		if (resp->response_length >=
193		    offsetof(struct ibv_query_device_resp_ex, timestamp_mask) +
194		    sizeof(resp->timestamp_mask))
195			attr->completion_timestamp_mask = resp->timestamp_mask;
196	}
197
198	if (attr_size >= offsetof(struct ibv_device_attr_ex, hca_core_clock) +
199			 sizeof(attr->hca_core_clock)) {
200		if (resp->response_length >=
201		    offsetof(struct ibv_query_device_resp_ex, hca_core_clock) +
202		    sizeof(resp->hca_core_clock))
203			attr->hca_core_clock = resp->hca_core_clock;
204	}
205
206	if (attr_size >= offsetof(struct ibv_device_attr_ex, device_cap_flags_ex) +
207			 sizeof(attr->device_cap_flags_ex)) {
208		if (resp->response_length >=
209		    offsetof(struct ibv_query_device_resp_ex, device_cap_flags_ex) +
210		    sizeof(resp->device_cap_flags_ex))
211			attr->device_cap_flags_ex = resp->device_cap_flags_ex;
212	}
213
214	if (attr_size >= offsetof(struct ibv_device_attr_ex, rss_caps) +
215			 sizeof(attr->rss_caps)) {
216		if (resp->response_length >=
217		    offsetof(struct ibv_query_device_resp_ex, rss_caps) +
218		    sizeof(resp->rss_caps)) {
219			attr->rss_caps.supported_qpts = resp->rss_caps.supported_qpts;
220			attr->rss_caps.max_rwq_indirection_tables = resp->rss_caps.max_rwq_indirection_tables;
221			attr->rss_caps.max_rwq_indirection_table_size = resp->rss_caps.max_rwq_indirection_table_size;
222		}
223	}
224
225	if (attr_size >= offsetof(struct ibv_device_attr_ex, max_wq_type_rq) +
226			 sizeof(attr->max_wq_type_rq)) {
227		if (resp->response_length >=
228		    offsetof(struct ibv_query_device_resp_ex, max_wq_type_rq) +
229		    sizeof(resp->max_wq_type_rq))
230			attr->max_wq_type_rq = resp->max_wq_type_rq;
231	}
232
233	if (attr_size >= offsetof(struct ibv_device_attr_ex, raw_packet_caps) +
234			 sizeof(attr->raw_packet_caps)) {
235		if (resp->response_length >=
236		    offsetof(struct ibv_query_device_resp_ex, raw_packet_caps) +
237		    sizeof(resp->raw_packet_caps))
238			attr->raw_packet_caps = resp->raw_packet_caps;
239	}
240
241	return 0;
242}
243
244int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
245		       struct ibv_port_attr *port_attr,
246		       struct ibv_query_port *cmd, size_t cmd_size)
247{
248	struct ibv_query_port_resp resp;
249
250	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp, sizeof resp);
251	cmd->port_num = port_num;
252	memset(cmd->reserved, 0, sizeof cmd->reserved);
253
254	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
255		return errno;
256
257	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
258
259	port_attr->state      	   = resp.state;
260	port_attr->max_mtu         = resp.max_mtu;
261	port_attr->active_mtu      = resp.active_mtu;
262	port_attr->gid_tbl_len     = resp.gid_tbl_len;
263	port_attr->port_cap_flags  = resp.port_cap_flags;
264	port_attr->max_msg_sz      = resp.max_msg_sz;
265	port_attr->bad_pkey_cntr   = resp.bad_pkey_cntr;
266	port_attr->qkey_viol_cntr  = resp.qkey_viol_cntr;
267	port_attr->pkey_tbl_len    = resp.pkey_tbl_len;
268	port_attr->lid 	      	   = resp.lid;
269	port_attr->sm_lid 	   = resp.sm_lid;
270	port_attr->lmc 	      	   = resp.lmc;
271	port_attr->max_vl_num      = resp.max_vl_num;
272	port_attr->sm_sl      	   = resp.sm_sl;
273	port_attr->subnet_timeout  = resp.subnet_timeout;
274	port_attr->init_type_reply = resp.init_type_reply;
275	port_attr->active_width    = resp.active_width;
276	port_attr->active_speed    = resp.active_speed;
277	port_attr->phys_state      = resp.phys_state;
278	port_attr->link_layer      = resp.link_layer;
279
280	return 0;
281}
282
283int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
284		     struct ibv_alloc_pd *cmd, size_t cmd_size,
285		     struct ibv_alloc_pd_resp *resp, size_t resp_size)
286{
287	IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, resp, resp_size);
288
289	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
290		return errno;
291
292	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
293
294	pd->handle  = resp->pd_handle;
295	pd->context = context;
296
297	return 0;
298}
299
300int ibv_cmd_dealloc_pd(struct ibv_pd *pd)
301{
302	struct ibv_dealloc_pd cmd;
303
304	IBV_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD);
305	cmd.pd_handle = pd->handle;
306
307	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
308		return errno;
309
310	return 0;
311}
312
313int ibv_cmd_open_xrcd(struct ibv_context *context, struct verbs_xrcd *xrcd,
314		      int vxrcd_size,
315		      struct ibv_xrcd_init_attr *attr,
316		      struct ibv_open_xrcd *cmd, size_t cmd_size,
317		      struct ibv_open_xrcd_resp *resp, size_t resp_size)
318{
319	IBV_INIT_CMD_RESP(cmd, cmd_size, OPEN_XRCD, resp, resp_size);
320
321	if (attr->comp_mask >= IBV_XRCD_INIT_ATTR_RESERVED)
322		return ENOSYS;
323
324	if (!(attr->comp_mask & IBV_XRCD_INIT_ATTR_FD) ||
325	    !(attr->comp_mask & IBV_XRCD_INIT_ATTR_OFLAGS))
326		return EINVAL;
327
328	cmd->fd = attr->fd;
329	cmd->oflags = attr->oflags;
330	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
331		return errno;
332
333	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
334
335	xrcd->xrcd.context = context;
336	xrcd->comp_mask = 0;
337	if (vext_field_avail(struct verbs_xrcd, handle, vxrcd_size)) {
338		xrcd->comp_mask = VERBS_XRCD_HANDLE;
339		xrcd->handle  = resp->xrcd_handle;
340	}
341
342	return 0;
343}
344
345int ibv_cmd_close_xrcd(struct verbs_xrcd *xrcd)
346{
347	struct ibv_close_xrcd cmd;
348
349	IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRCD);
350	cmd.xrcd_handle = xrcd->handle;
351
352	if (write(xrcd->xrcd.context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
353		return errno;
354
355	return 0;
356}
357
358int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
359		   uint64_t hca_va, int access,
360		   struct ibv_mr *mr, struct ibv_reg_mr *cmd,
361		   size_t cmd_size,
362		   struct ibv_reg_mr_resp *resp, size_t resp_size)
363{
364
365	IBV_INIT_CMD_RESP(cmd, cmd_size, REG_MR, resp, resp_size);
366
367	cmd->start 	  = (uintptr_t) addr;
368	cmd->length 	  = length;
369	cmd->hca_va 	  = hca_va;
370	cmd->pd_handle 	  = pd->handle;
371	cmd->access_flags = access;
372
373	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
374		return errno;
375
376	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
377
378	mr->handle  = resp->mr_handle;
379	mr->lkey    = resp->lkey;
380	mr->rkey    = resp->rkey;
381	mr->context = pd->context;
382
383	return 0;
384}
385
386int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr,
387		     size_t length, uint64_t hca_va, int access,
388		     struct ibv_pd *pd, struct ibv_rereg_mr *cmd,
389		     size_t cmd_sz, struct ibv_rereg_mr_resp *resp,
390		     size_t resp_sz)
391{
392	IBV_INIT_CMD_RESP(cmd, cmd_sz, REREG_MR, resp, resp_sz);
393
394	cmd->mr_handle	  = mr->handle;
395	cmd->flags	  = flags;
396	cmd->start	  = (uintptr_t)addr;
397	cmd->length	  = length;
398	cmd->hca_va	  = hca_va;
399	cmd->pd_handle	  = (flags & IBV_REREG_MR_CHANGE_PD) ? pd->handle : 0;
400	cmd->access_flags = access;
401
402	if (write(mr->context->cmd_fd, cmd, cmd_sz) != cmd_sz)
403		return errno;
404
405	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_sz);
406
407	mr->lkey    = resp->lkey;
408	mr->rkey    = resp->rkey;
409	if (flags & IBV_REREG_MR_CHANGE_PD)
410		mr->context = pd->context;
411
412	return 0;
413}
414
415int ibv_cmd_dereg_mr(struct ibv_mr *mr)
416{
417	struct ibv_dereg_mr cmd;
418
419	IBV_INIT_CMD(&cmd, sizeof cmd, DEREG_MR);
420	cmd.mr_handle = mr->handle;
421
422	if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
423		return errno;
424
425	return 0;
426}
427
428int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type,
429		     struct ibv_mw *mw, struct ibv_alloc_mw *cmd,
430		     size_t cmd_size,
431		     struct ibv_alloc_mw_resp *resp, size_t resp_size)
432{
433	IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_MW, resp, resp_size);
434	cmd->pd_handle	= pd->handle;
435	cmd->mw_type	= type;
436	memset(cmd->reserved, 0, sizeof(cmd->reserved));
437
438	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
439		return errno;
440
441	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
442
443	mw->context = pd->context;
444	mw->pd      = pd;
445	mw->rkey    = resp->rkey;
446	mw->handle  = resp->mw_handle;
447	mw->type    = type;
448
449	return 0;
450}
451
452int ibv_cmd_dealloc_mw(struct ibv_mw *mw,
453		       struct ibv_dealloc_mw *cmd, size_t cmd_size)
454{
455	IBV_INIT_CMD(cmd, cmd_size, DEALLOC_MW);
456	cmd->mw_handle = mw->handle;
457	cmd->reserved = 0;
458
459	if (write(mw->context->cmd_fd, cmd, cmd_size) != cmd_size)
460		return errno;
461
462	return 0;
463}
464
465int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
466		      struct ibv_comp_channel *channel,
467		      int comp_vector, struct ibv_cq *cq,
468		      struct ibv_create_cq *cmd, size_t cmd_size,
469		      struct ibv_create_cq_resp *resp, size_t resp_size)
470{
471	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, resp, resp_size);
472	cmd->user_handle   = (uintptr_t) cq;
473	cmd->cqe           = cqe;
474	cmd->comp_vector   = comp_vector;
475	cmd->comp_channel  = channel ? channel->fd : -1;
476	cmd->reserved      = 0;
477
478	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
479		return errno;
480
481	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
482
483	cq->handle  = resp->cq_handle;
484	cq->cqe     = resp->cqe;
485	cq->context = context;
486
487	return 0;
488}
489
490int ibv_cmd_create_cq_ex(struct ibv_context *context,
491			 struct ibv_cq_init_attr_ex *cq_attr,
492			 struct ibv_cq_ex *cq,
493			 struct ibv_create_cq_ex *cmd,
494			 size_t cmd_core_size,
495			 size_t cmd_size,
496			 struct ibv_create_cq_resp_ex *resp,
497			 size_t resp_core_size,
498			 size_t resp_size)
499{
500	int err;
501
502	memset(cmd, 0, cmd_core_size);
503	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, CREATE_CQ_EX, resp,
504			       resp_core_size, resp_size);
505
506	if (cq_attr->comp_mask & ~(IBV_CQ_INIT_ATTR_MASK_RESERVED - 1))
507		return EINVAL;
508
509	cmd->user_handle   = (uintptr_t)cq;
510	cmd->cqe           = cq_attr->cqe;
511	cmd->comp_vector   = cq_attr->comp_vector;
512	cmd->comp_channel  = cq_attr->channel ? cq_attr->channel->fd : -1;
513	cmd->comp_mask = 0;
514
515	if (cmd_core_size >= offsetof(struct ibv_create_cq_ex, flags) +
516	    sizeof(cmd->flags)) {
517		if ((cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) &&
518		    (cq_attr->flags & ~(IBV_CREATE_CQ_ATTR_RESERVED - 1)))
519			return EOPNOTSUPP;
520
521		if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
522			cmd->flags |= IBV_CREATE_CQ_EX_KERNEL_FLAG_COMPLETION_TIMESTAMP;
523	}
524
525	err = write(context->cmd_fd, cmd, cmd_size);
526	if (err != cmd_size)
527		return errno;
528
529	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
530
531	cq->handle  = resp->base.cq_handle;
532	cq->cqe     = resp->base.cqe;
533	cq->context = context;
534
535	return 0;
536}
537
538int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
539{
540	struct ibv_poll_cq       cmd;
541	struct ibv_poll_cq_resp *resp;
542	int                      i;
543	int                      rsize;
544	int                      ret;
545
546	rsize = sizeof *resp + ne * sizeof(struct ibv_kern_wc);
547	resp  = malloc(rsize);
548	if (!resp)
549		return -1;
550
551	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize);
552	cmd.cq_handle = ibcq->handle;
553	cmd.ne        = ne;
554
555	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
556		ret = -1;
557		goto out;
558	}
559
560	(void) VALGRIND_MAKE_MEM_DEFINED(resp, rsize);
561
562	for (i = 0; i < resp->count; i++) {
563		wc[i].wr_id 	     = resp->wc[i].wr_id;
564		wc[i].status 	     = resp->wc[i].status;
565		wc[i].opcode 	     = resp->wc[i].opcode;
566		wc[i].vendor_err     = resp->wc[i].vendor_err;
567		wc[i].byte_len 	     = resp->wc[i].byte_len;
568		wc[i].imm_data 	     = resp->wc[i].imm_data;
569		wc[i].qp_num 	     = resp->wc[i].qp_num;
570		wc[i].src_qp 	     = resp->wc[i].src_qp;
571		wc[i].wc_flags 	     = resp->wc[i].wc_flags;
572		wc[i].pkey_index     = resp->wc[i].pkey_index;
573		wc[i].slid 	     = resp->wc[i].slid;
574		wc[i].sl 	     = resp->wc[i].sl;
575		wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
576	}
577
578	ret = resp->count;
579
580out:
581	free(resp);
582	return ret;
583}
584
585int ibv_cmd_req_notify_cq(struct ibv_cq *ibcq, int solicited_only)
586{
587	struct ibv_req_notify_cq cmd;
588
589	IBV_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ);
590	cmd.cq_handle = ibcq->handle;
591	cmd.solicited = !!solicited_only;
592
593	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
594		return errno;
595
596	return 0;
597}
598
599int ibv_cmd_resize_cq(struct ibv_cq *cq, int cqe,
600		      struct ibv_resize_cq *cmd, size_t cmd_size,
601		      struct ibv_resize_cq_resp *resp, size_t resp_size)
602{
603	IBV_INIT_CMD_RESP(cmd, cmd_size, RESIZE_CQ, resp, resp_size);
604	cmd->cq_handle = cq->handle;
605	cmd->cqe       = cqe;
606
607	if (write(cq->context->cmd_fd, cmd, cmd_size) != cmd_size)
608		return errno;
609
610	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
611
612	cq->cqe = resp->cqe;
613
614	return 0;
615}
616
617int ibv_cmd_destroy_cq(struct ibv_cq *cq)
618{
619	struct ibv_destroy_cq      cmd;
620	struct ibv_destroy_cq_resp resp;
621
622	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
623	cmd.cq_handle = cq->handle;
624	cmd.reserved  = 0;
625
626	if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
627		return errno;
628
629	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
630
631	pthread_mutex_lock(&cq->mutex);
632	while (cq->comp_events_completed  != resp.comp_events_reported ||
633	       cq->async_events_completed != resp.async_events_reported)
634		pthread_cond_wait(&cq->cond, &cq->mutex);
635	pthread_mutex_unlock(&cq->mutex);
636
637	return 0;
638}
639
640int ibv_cmd_create_srq(struct ibv_pd *pd,
641		       struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
642		       struct ibv_create_srq *cmd, size_t cmd_size,
643		       struct ibv_create_srq_resp *resp, size_t resp_size)
644{
645	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_SRQ, resp, resp_size);
646	cmd->user_handle = (uintptr_t) srq;
647	cmd->pd_handle 	 = pd->handle;
648	cmd->max_wr      = attr->attr.max_wr;
649	cmd->max_sge     = attr->attr.max_sge;
650	cmd->srq_limit   = attr->attr.srq_limit;
651
652	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
653		return errno;
654
655	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
656
657	srq->handle  = resp->srq_handle;
658	srq->context = pd->context;
659
660	if (abi_ver > 5) {
661		attr->attr.max_wr = resp->max_wr;
662		attr->attr.max_sge = resp->max_sge;
663	} else {
664		struct ibv_create_srq_resp_v5 *resp_v5 =
665			(struct ibv_create_srq_resp_v5 *) resp;
666
667		memmove((void *) resp + sizeof *resp,
668			(void *) resp_v5 + sizeof *resp_v5,
669			resp_size - sizeof *resp);
670	}
671
672	return 0;
673}
674
675int ibv_cmd_create_srq_ex(struct ibv_context *context,
676			  struct verbs_srq *srq, int vsrq_sz,
677			  struct ibv_srq_init_attr_ex *attr_ex,
678			  struct ibv_create_xsrq *cmd, size_t cmd_size,
679			  struct ibv_create_srq_resp *resp, size_t resp_size)
680{
681	struct verbs_xrcd *vxrcd = NULL;
682
683	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XSRQ, resp, resp_size);
684
685	if (attr_ex->comp_mask >= IBV_SRQ_INIT_ATTR_RESERVED)
686		return ENOSYS;
687
688	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_PD))
689		return EINVAL;
690
691	cmd->user_handle = (uintptr_t) srq;
692	cmd->pd_handle   = attr_ex->pd->handle;
693	cmd->max_wr      = attr_ex->attr.max_wr;
694	cmd->max_sge     = attr_ex->attr.max_sge;
695	cmd->srq_limit   = attr_ex->attr.srq_limit;
696
697	cmd->srq_type = (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ?
698			attr_ex->srq_type : IBV_SRQT_BASIC;
699	if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) {
700		if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ))
701			return EINVAL;
702
703		vxrcd = container_of(attr_ex->xrcd, struct verbs_xrcd, xrcd);
704		cmd->xrcd_handle = vxrcd->handle;
705		cmd->cq_handle   = attr_ex->cq->handle;
706	}
707
708	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
709		return errno;
710
711	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
712
713	srq->srq.handle           = resp->srq_handle;
714	srq->srq.context          = context;
715	srq->srq.srq_context      = attr_ex->srq_context;
716	srq->srq.pd               = attr_ex->pd;
717	srq->srq.events_completed = 0;
718	pthread_mutex_init(&srq->srq.mutex, NULL);
719	pthread_cond_init(&srq->srq.cond, NULL);
720
721	/*
722	 * check that the last field is available.
723	 * If it is than all the others exist as well
724	 */
725	if (vext_field_avail(struct verbs_srq, srq_num, vsrq_sz)) {
726		srq->comp_mask = IBV_SRQ_INIT_ATTR_TYPE;
727		srq->srq_type = (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ?
728				attr_ex->srq_type : IBV_SRQT_BASIC;
729		if (srq->srq_type == IBV_SRQT_XRC) {
730			srq->comp_mask |= VERBS_SRQ_NUM;
731			srq->srq_num = resp->srqn;
732		}
733		if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) {
734			srq->comp_mask |= VERBS_SRQ_XRCD;
735			srq->xrcd = vxrcd;
736		}
737		if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ) {
738			srq->comp_mask |= VERBS_SRQ_CQ;
739			srq->cq = attr_ex->cq;
740		}
741	}
742
743	attr_ex->attr.max_wr = resp->max_wr;
744	attr_ex->attr.max_sge = resp->max_sge;
745
746	return 0;
747}
748
749
750static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq,
751				 struct ibv_srq_attr *srq_attr,
752				 int srq_attr_mask,
753				 struct ibv_modify_srq *new_cmd,
754				 size_t new_cmd_size)
755{
756	struct ibv_modify_srq_v3 *cmd;
757	size_t cmd_size;
758
759	cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
760	cmd      = alloca(cmd_size);
761	memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof *new_cmd);
762
763	IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
764
765	cmd->srq_handle	= srq->handle;
766	cmd->attr_mask	= srq_attr_mask;
767	cmd->max_wr	= srq_attr->max_wr;
768	cmd->srq_limit	= srq_attr->srq_limit;
769	cmd->max_sge	= 0;
770	cmd->reserved	= 0;
771
772	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
773		return errno;
774
775	return 0;
776}
777
778int ibv_cmd_modify_srq(struct ibv_srq *srq,
779		       struct ibv_srq_attr *srq_attr,
780		       int srq_attr_mask,
781		       struct ibv_modify_srq *cmd, size_t cmd_size)
782{
783	if (abi_ver == 3)
784		return ibv_cmd_modify_srq_v3(srq, srq_attr, srq_attr_mask,
785					     cmd, cmd_size);
786
787	IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
788
789	cmd->srq_handle	= srq->handle;
790	cmd->attr_mask	= srq_attr_mask;
791	cmd->max_wr	= srq_attr->max_wr;
792	cmd->srq_limit	= srq_attr->srq_limit;
793
794	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
795		return errno;
796
797	return 0;
798}
799
800int ibv_cmd_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
801		      struct ibv_query_srq *cmd, size_t cmd_size)
802{
803	struct ibv_query_srq_resp resp;
804
805	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_SRQ, &resp, sizeof resp);
806	cmd->srq_handle = srq->handle;
807	cmd->reserved   = 0;
808
809	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
810		return errno;
811
812	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
813
814	srq_attr->max_wr    = resp.max_wr;
815	srq_attr->max_sge   = resp.max_sge;
816	srq_attr->srq_limit = resp.srq_limit;
817
818	return 0;
819}
820
821int ibv_cmd_destroy_srq(struct ibv_srq *srq)
822{
823	struct ibv_destroy_srq      cmd;
824	struct ibv_destroy_srq_resp resp;
825
826	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_SRQ, &resp, sizeof resp);
827	cmd.srq_handle = srq->handle;
828	cmd.reserved   = 0;
829
830	if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
831		return errno;
832
833	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
834
835	pthread_mutex_lock(&srq->mutex);
836	while (srq->events_completed != resp.events_reported)
837		pthread_cond_wait(&srq->cond, &srq->mutex);
838	pthread_mutex_unlock(&srq->mutex);
839
840	return 0;
841}
842
843static int create_qp_ex_common(struct verbs_qp *qp,
844			       struct ibv_qp_init_attr_ex *qp_attr,
845			       struct verbs_xrcd *vxrcd,
846			       struct ibv_create_qp_common *cmd)
847{
848	cmd->user_handle = (uintptr_t)qp;
849
850	if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_XRCD) {
851		vxrcd = container_of(qp_attr->xrcd, struct verbs_xrcd, xrcd);
852		cmd->pd_handle	= vxrcd->handle;
853	} else {
854		if (!(qp_attr->comp_mask & IBV_QP_INIT_ATTR_PD))
855			return EINVAL;
856
857		cmd->pd_handle	= qp_attr->pd->handle;
858		if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_IND_TABLE) {
859			if (cmd->max_recv_wr || cmd->max_recv_sge ||
860			    cmd->recv_cq_handle || qp_attr->srq)
861				return EINVAL;
862
863			/* send_cq is optinal */
864			if (qp_attr->cap.max_send_wr)
865				cmd->send_cq_handle = qp_attr->send_cq->handle;
866		} else {
867			cmd->send_cq_handle = qp_attr->send_cq->handle;
868
869			if (qp_attr->qp_type != IBV_QPT_XRC_SEND) {
870				cmd->recv_cq_handle = qp_attr->recv_cq->handle;
871				cmd->srq_handle = qp_attr->srq ? qp_attr->srq->handle :
872								 0;
873			}
874		}
875	}
876
877	cmd->max_send_wr     = qp_attr->cap.max_send_wr;
878	cmd->max_recv_wr     = qp_attr->cap.max_recv_wr;
879	cmd->max_send_sge    = qp_attr->cap.max_send_sge;
880	cmd->max_recv_sge    = qp_attr->cap.max_recv_sge;
881	cmd->max_inline_data = qp_attr->cap.max_inline_data;
882	cmd->sq_sig_all	     = qp_attr->sq_sig_all;
883	cmd->qp_type         = qp_attr->qp_type;
884	cmd->is_srq	     = !!qp_attr->srq;
885	cmd->reserved	     = 0;
886
887	return 0;
888}
889
890static void create_qp_handle_resp_common(struct ibv_context *context,
891					 struct verbs_qp *qp,
892					 struct ibv_qp_init_attr_ex *qp_attr,
893					 struct ibv_create_qp_resp *resp,
894					 struct verbs_xrcd *vxrcd,
895					 int vqp_sz)
896{
897	if (abi_ver > 3) {
898		qp_attr->cap.max_recv_sge    = resp->max_recv_sge;
899		qp_attr->cap.max_send_sge    = resp->max_send_sge;
900		qp_attr->cap.max_recv_wr     = resp->max_recv_wr;
901		qp_attr->cap.max_send_wr     = resp->max_send_wr;
902		qp_attr->cap.max_inline_data = resp->max_inline_data;
903	}
904
905	qp->qp.handle		= resp->qp_handle;
906	qp->qp.qp_num		= resp->qpn;
907	qp->qp.context		= context;
908	qp->qp.qp_context	= qp_attr->qp_context;
909	qp->qp.pd		= qp_attr->pd;
910	qp->qp.send_cq		= qp_attr->send_cq;
911	qp->qp.recv_cq		= qp_attr->recv_cq;
912	qp->qp.srq		= qp_attr->srq;
913	qp->qp.qp_type		= qp_attr->qp_type;
914	qp->qp.state		= IBV_QPS_RESET;
915	qp->qp.events_completed = 0;
916	pthread_mutex_init(&qp->qp.mutex, NULL);
917	pthread_cond_init(&qp->qp.cond, NULL);
918
919	qp->comp_mask = 0;
920	if (vext_field_avail(struct verbs_qp, xrcd, vqp_sz) &&
921	    (qp_attr->comp_mask & IBV_QP_INIT_ATTR_XRCD)) {
922		qp->comp_mask |= VERBS_QP_XRCD;
923		qp->xrcd = vxrcd;
924	}
925}
926
927enum {
928	CREATE_QP_EX2_SUP_CREATE_FLAGS = IBV_QP_CREATE_BLOCK_SELF_MCAST_LB |
929					 IBV_QP_CREATE_SCATTER_FCS |
930					 IBV_QP_CREATE_CVLAN_STRIPPING,
931};
932
933int ibv_cmd_create_qp_ex2(struct ibv_context *context,
934			  struct verbs_qp *qp, int vqp_sz,
935			  struct ibv_qp_init_attr_ex *qp_attr,
936			  struct ibv_create_qp_ex *cmd,
937			  size_t cmd_core_size,
938			  size_t cmd_size,
939			  struct ibv_create_qp_resp_ex *resp,
940			  size_t resp_core_size,
941			  size_t resp_size)
942{
943	struct verbs_xrcd *vxrcd = NULL;
944	int err;
945
946	if (qp_attr->comp_mask >= IBV_QP_INIT_ATTR_RESERVED)
947		return EINVAL;
948
949	if (resp_core_size <
950	    offsetof(struct ibv_create_qp_resp_ex, response_length) +
951	    sizeof(resp->response_length))
952		return EINVAL;
953
954	memset(cmd, 0, cmd_core_size);
955
956	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, CREATE_QP_EX, resp,
957			       resp_core_size, resp_size);
958
959	err = create_qp_ex_common(qp, qp_attr, vxrcd, &cmd->base);
960	if (err)
961		return err;
962
963	if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) {
964		if (qp_attr->create_flags & ~CREATE_QP_EX2_SUP_CREATE_FLAGS)
965			return EINVAL;
966		if (cmd_core_size < offsetof(struct ibv_create_qp_ex, create_flags) +
967				    sizeof(qp_attr->create_flags))
968			return EINVAL;
969		cmd->create_flags = qp_attr->create_flags;
970	}
971
972	if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_IND_TABLE) {
973		if (cmd_core_size < offsetof(struct ibv_create_qp_ex, ind_tbl_handle) +
974				    sizeof(cmd->ind_tbl_handle))
975			return EINVAL;
976		cmd->ind_tbl_handle = qp_attr->rwq_ind_tbl->ind_tbl_handle;
977		cmd->comp_mask = IBV_CREATE_QP_EX_KERNEL_MASK_IND_TABLE;
978	}
979
980	err = write(context->cmd_fd, cmd, cmd_size);
981	if (err != cmd_size)
982		return errno;
983
984	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
985
986	create_qp_handle_resp_common(context, qp, qp_attr, &resp->base, vxrcd,
987				     vqp_sz);
988
989	return 0;
990}
991
992int ibv_cmd_create_qp_ex(struct ibv_context *context,
993			 struct verbs_qp *qp, int vqp_sz,
994			 struct ibv_qp_init_attr_ex *attr_ex,
995			 struct ibv_create_qp *cmd, size_t cmd_size,
996			 struct ibv_create_qp_resp *resp, size_t resp_size)
997{
998	struct verbs_xrcd *vxrcd = NULL;
999	int err;
1000
1001	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
1002
1003	if (attr_ex->comp_mask > (IBV_QP_INIT_ATTR_XRCD | IBV_QP_INIT_ATTR_PD))
1004		return ENOSYS;
1005
1006	err = create_qp_ex_common(qp, attr_ex, vxrcd,
1007				  (struct ibv_create_qp_common *)&cmd->user_handle);
1008	if (err)
1009		return err;
1010
1011	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
1012		return errno;
1013
1014	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1015
1016	if (abi_ver == 4) {
1017		struct ibv_create_qp_resp_v4 *resp_v4 =
1018			(struct ibv_create_qp_resp_v4 *)resp;
1019
1020		memmove((void *)resp + sizeof *resp,
1021			(void *)resp_v4 + sizeof *resp_v4,
1022			resp_size - sizeof *resp);
1023	} else if (abi_ver <= 3) {
1024		struct ibv_create_qp_resp_v3 *resp_v3 =
1025			(struct ibv_create_qp_resp_v3 *)resp;
1026
1027		memmove((void *)resp + sizeof *resp,
1028			(void *)resp_v3 + sizeof *resp_v3,
1029			resp_size - sizeof *resp);
1030	}
1031
1032	create_qp_handle_resp_common(context, qp, attr_ex, resp, vxrcd, vqp_sz);
1033
1034	return 0;
1035}
1036
1037int ibv_cmd_create_qp(struct ibv_pd *pd,
1038		      struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
1039		      struct ibv_create_qp *cmd, size_t cmd_size,
1040		      struct ibv_create_qp_resp *resp, size_t resp_size)
1041{
1042	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
1043
1044	cmd->user_handle     = (uintptr_t) qp;
1045	cmd->pd_handle       = pd->handle;
1046	cmd->send_cq_handle  = attr->send_cq->handle;
1047	cmd->recv_cq_handle  = attr->recv_cq->handle;
1048	cmd->srq_handle      = attr->srq ? attr->srq->handle : 0;
1049	cmd->max_send_wr     = attr->cap.max_send_wr;
1050	cmd->max_recv_wr     = attr->cap.max_recv_wr;
1051	cmd->max_send_sge    = attr->cap.max_send_sge;
1052	cmd->max_recv_sge    = attr->cap.max_recv_sge;
1053	cmd->max_inline_data = attr->cap.max_inline_data;
1054	cmd->sq_sig_all	     = attr->sq_sig_all;
1055	cmd->qp_type 	     = attr->qp_type;
1056	cmd->is_srq 	     = !!attr->srq;
1057	cmd->reserved	     = 0;
1058
1059	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
1060		return errno;
1061
1062	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1063
1064	qp->handle 		  = resp->qp_handle;
1065	qp->qp_num 		  = resp->qpn;
1066	qp->context		  = pd->context;
1067
1068	if (abi_ver > 3) {
1069		attr->cap.max_recv_sge    = resp->max_recv_sge;
1070		attr->cap.max_send_sge    = resp->max_send_sge;
1071		attr->cap.max_recv_wr     = resp->max_recv_wr;
1072		attr->cap.max_send_wr     = resp->max_send_wr;
1073		attr->cap.max_inline_data = resp->max_inline_data;
1074	}
1075
1076	if (abi_ver == 4) {
1077		struct ibv_create_qp_resp_v4 *resp_v4 =
1078			(struct ibv_create_qp_resp_v4 *) resp;
1079
1080		memmove((void *) resp + sizeof *resp,
1081			(void *) resp_v4 + sizeof *resp_v4,
1082			resp_size - sizeof *resp);
1083	} else if (abi_ver <= 3) {
1084		struct ibv_create_qp_resp_v3 *resp_v3 =
1085			(struct ibv_create_qp_resp_v3 *) resp;
1086
1087		memmove((void *) resp + sizeof *resp,
1088			(void *) resp_v3 + sizeof *resp_v3,
1089			resp_size - sizeof *resp);
1090	}
1091
1092	return 0;
1093}
1094
1095int ibv_cmd_open_qp(struct ibv_context *context, struct verbs_qp *qp,
1096		    int vqp_sz,
1097		    struct ibv_qp_open_attr *attr,
1098		    struct ibv_open_qp *cmd, size_t cmd_size,
1099		    struct ibv_create_qp_resp *resp, size_t resp_size)
1100{
1101	struct verbs_xrcd *xrcd;
1102	IBV_INIT_CMD_RESP(cmd, cmd_size, OPEN_QP, resp, resp_size);
1103
1104	if (attr->comp_mask >= IBV_QP_OPEN_ATTR_RESERVED)
1105		return ENOSYS;
1106
1107	if (!(attr->comp_mask & IBV_QP_OPEN_ATTR_XRCD) ||
1108	    !(attr->comp_mask & IBV_QP_OPEN_ATTR_NUM) ||
1109	    !(attr->comp_mask & IBV_QP_OPEN_ATTR_TYPE))
1110		return EINVAL;
1111
1112	xrcd = container_of(attr->xrcd, struct verbs_xrcd, xrcd);
1113	cmd->user_handle = (uintptr_t) qp;
1114	cmd->pd_handle   = xrcd->handle;
1115	cmd->qpn         = attr->qp_num;
1116	cmd->qp_type     = attr->qp_type;
1117
1118	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
1119		return errno;
1120
1121	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1122
1123	qp->qp.handle     = resp->qp_handle;
1124	qp->qp.context    = context;
1125	qp->qp.qp_context = attr->qp_context;
1126	qp->qp.pd	  = NULL;
1127	qp->qp.send_cq	  = NULL;
1128	qp->qp.recv_cq    = NULL;
1129	qp->qp.srq	  = NULL;
1130	qp->qp.qp_num	  = attr->qp_num;
1131	qp->qp.qp_type	  = attr->qp_type;
1132	qp->qp.state	  = IBV_QPS_UNKNOWN;
1133	qp->qp.events_completed = 0;
1134	pthread_mutex_init(&qp->qp.mutex, NULL);
1135	pthread_cond_init(&qp->qp.cond, NULL);
1136	qp->comp_mask = 0;
1137	if (vext_field_avail(struct verbs_qp, xrcd, vqp_sz)) {
1138		qp->comp_mask = VERBS_QP_XRCD;
1139		qp->xrcd	 = xrcd;
1140	}
1141
1142	return 0;
1143}
1144
1145int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1146		     int attr_mask,
1147		     struct ibv_qp_init_attr *init_attr,
1148		     struct ibv_query_qp *cmd, size_t cmd_size)
1149{
1150	struct ibv_query_qp_resp resp;
1151
1152	/*
1153	 * Masks over IBV_QP_DEST_QPN are not supported by
1154	 * that not extended command.
1155	 */
1156	if (attr_mask & ~((IBV_QP_DEST_QPN << 1) - 1))
1157		return EOPNOTSUPP;
1158
1159	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_QP, &resp, sizeof resp);
1160	cmd->qp_handle = qp->handle;
1161	cmd->attr_mask = attr_mask;
1162
1163	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1164		return errno;
1165
1166	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1167
1168	attr->qkey                          = resp.qkey;
1169	attr->rq_psn                        = resp.rq_psn;
1170	attr->sq_psn                        = resp.sq_psn;
1171	attr->dest_qp_num                   = resp.dest_qp_num;
1172	attr->qp_access_flags               = resp.qp_access_flags;
1173	attr->pkey_index                    = resp.pkey_index;
1174	attr->alt_pkey_index                = resp.alt_pkey_index;
1175	attr->qp_state                      = resp.qp_state;
1176	attr->cur_qp_state                  = resp.cur_qp_state;
1177	attr->path_mtu                      = resp.path_mtu;
1178	attr->path_mig_state                = resp.path_mig_state;
1179	attr->sq_draining                   = resp.sq_draining;
1180	attr->max_rd_atomic                 = resp.max_rd_atomic;
1181	attr->max_dest_rd_atomic            = resp.max_dest_rd_atomic;
1182	attr->min_rnr_timer                 = resp.min_rnr_timer;
1183	attr->port_num                      = resp.port_num;
1184	attr->timeout                       = resp.timeout;
1185	attr->retry_cnt                     = resp.retry_cnt;
1186	attr->rnr_retry                     = resp.rnr_retry;
1187	attr->alt_port_num                  = resp.alt_port_num;
1188	attr->alt_timeout                   = resp.alt_timeout;
1189	attr->cap.max_send_wr               = resp.max_send_wr;
1190	attr->cap.max_recv_wr               = resp.max_recv_wr;
1191	attr->cap.max_send_sge              = resp.max_send_sge;
1192	attr->cap.max_recv_sge              = resp.max_recv_sge;
1193	attr->cap.max_inline_data           = resp.max_inline_data;
1194
1195	memcpy(attr->ah_attr.grh.dgid.raw, resp.dest.dgid, 16);
1196	attr->ah_attr.grh.flow_label        = resp.dest.flow_label;
1197	attr->ah_attr.dlid                  = resp.dest.dlid;
1198	attr->ah_attr.grh.sgid_index        = resp.dest.sgid_index;
1199	attr->ah_attr.grh.hop_limit         = resp.dest.hop_limit;
1200	attr->ah_attr.grh.traffic_class     = resp.dest.traffic_class;
1201	attr->ah_attr.sl                    = resp.dest.sl;
1202	attr->ah_attr.src_path_bits         = resp.dest.src_path_bits;
1203	attr->ah_attr.static_rate           = resp.dest.static_rate;
1204	attr->ah_attr.is_global             = resp.dest.is_global;
1205	attr->ah_attr.port_num              = resp.dest.port_num;
1206
1207	memcpy(attr->alt_ah_attr.grh.dgid.raw, resp.alt_dest.dgid, 16);
1208	attr->alt_ah_attr.grh.flow_label    = resp.alt_dest.flow_label;
1209	attr->alt_ah_attr.dlid              = resp.alt_dest.dlid;
1210	attr->alt_ah_attr.grh.sgid_index    = resp.alt_dest.sgid_index;
1211	attr->alt_ah_attr.grh.hop_limit     = resp.alt_dest.hop_limit;
1212	attr->alt_ah_attr.grh.traffic_class = resp.alt_dest.traffic_class;
1213	attr->alt_ah_attr.sl                = resp.alt_dest.sl;
1214	attr->alt_ah_attr.src_path_bits     = resp.alt_dest.src_path_bits;
1215	attr->alt_ah_attr.static_rate       = resp.alt_dest.static_rate;
1216	attr->alt_ah_attr.is_global         = resp.alt_dest.is_global;
1217	attr->alt_ah_attr.port_num          = resp.alt_dest.port_num;
1218
1219	init_attr->qp_context               = qp->qp_context;
1220	init_attr->send_cq                  = qp->send_cq;
1221	init_attr->recv_cq                  = qp->recv_cq;
1222	init_attr->srq                      = qp->srq;
1223	init_attr->qp_type                  = qp->qp_type;
1224	init_attr->cap.max_send_wr          = resp.max_send_wr;
1225	init_attr->cap.max_recv_wr          = resp.max_recv_wr;
1226	init_attr->cap.max_send_sge         = resp.max_send_sge;
1227	init_attr->cap.max_recv_sge         = resp.max_recv_sge;
1228	init_attr->cap.max_inline_data      = resp.max_inline_data;
1229	init_attr->sq_sig_all               = resp.sq_sig_all;
1230
1231	return 0;
1232}
1233
1234static void copy_modify_qp_fields(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1235				  int attr_mask,
1236				  struct ibv_modify_qp_common *cmd)
1237{
1238	cmd->qp_handle = qp->handle;
1239	cmd->attr_mask = attr_mask;
1240
1241	if (attr_mask & IBV_QP_STATE)
1242		cmd->qp_state = attr->qp_state;
1243	if (attr_mask & IBV_QP_CUR_STATE)
1244		cmd->cur_qp_state = attr->cur_qp_state;
1245	if (attr_mask & IBV_QP_EN_SQD_ASYNC_NOTIFY)
1246		cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
1247	if (attr_mask & IBV_QP_ACCESS_FLAGS)
1248		cmd->qp_access_flags = attr->qp_access_flags;
1249	if (attr_mask & IBV_QP_PKEY_INDEX)
1250		cmd->pkey_index = attr->pkey_index;
1251	if (attr_mask & IBV_QP_PORT)
1252		cmd->port_num = attr->port_num;
1253	if (attr_mask & IBV_QP_QKEY)
1254		cmd->qkey = attr->qkey;
1255
1256	if (attr_mask & IBV_QP_AV) {
1257		memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
1258		cmd->dest.flow_label = attr->ah_attr.grh.flow_label;
1259		cmd->dest.dlid = attr->ah_attr.dlid;
1260		cmd->dest.reserved = 0;
1261		cmd->dest.sgid_index = attr->ah_attr.grh.sgid_index;
1262		cmd->dest.hop_limit = attr->ah_attr.grh.hop_limit;
1263		cmd->dest.traffic_class = attr->ah_attr.grh.traffic_class;
1264		cmd->dest.sl = attr->ah_attr.sl;
1265		cmd->dest.src_path_bits = attr->ah_attr.src_path_bits;
1266		cmd->dest.static_rate = attr->ah_attr.static_rate;
1267		cmd->dest.is_global = attr->ah_attr.is_global;
1268		cmd->dest.port_num = attr->ah_attr.port_num;
1269	}
1270
1271	if (attr_mask & IBV_QP_PATH_MTU)
1272		cmd->path_mtu = attr->path_mtu;
1273	if (attr_mask & IBV_QP_TIMEOUT)
1274		cmd->timeout = attr->timeout;
1275	if (attr_mask & IBV_QP_RETRY_CNT)
1276		cmd->retry_cnt = attr->retry_cnt;
1277	if (attr_mask & IBV_QP_RNR_RETRY)
1278		cmd->rnr_retry = attr->rnr_retry;
1279	if (attr_mask & IBV_QP_RQ_PSN)
1280		cmd->rq_psn = attr->rq_psn;
1281	if (attr_mask & IBV_QP_MAX_QP_RD_ATOMIC)
1282		cmd->max_rd_atomic = attr->max_rd_atomic;
1283
1284	if (attr_mask & IBV_QP_ALT_PATH) {
1285		cmd->alt_pkey_index = attr->alt_pkey_index;
1286		cmd->alt_port_num = attr->alt_port_num;
1287		cmd->alt_timeout = attr->alt_timeout;
1288
1289		memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
1290		cmd->alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
1291		cmd->alt_dest.dlid = attr->alt_ah_attr.dlid;
1292		cmd->alt_dest.reserved = 0;
1293		cmd->alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
1294		cmd->alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
1295		cmd->alt_dest.traffic_class =
1296		    attr->alt_ah_attr.grh.traffic_class;
1297		cmd->alt_dest.sl = attr->alt_ah_attr.sl;
1298		cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
1299		cmd->alt_dest.static_rate = attr->alt_ah_attr.static_rate;
1300		cmd->alt_dest.is_global = attr->alt_ah_attr.is_global;
1301		cmd->alt_dest.port_num = attr->alt_ah_attr.port_num;
1302	}
1303
1304	if (attr_mask & IBV_QP_MIN_RNR_TIMER)
1305		cmd->min_rnr_timer = attr->min_rnr_timer;
1306	if (attr_mask & IBV_QP_SQ_PSN)
1307		cmd->sq_psn = attr->sq_psn;
1308	if (attr_mask & IBV_QP_MAX_DEST_RD_ATOMIC)
1309		cmd->max_dest_rd_atomic = attr->max_dest_rd_atomic;
1310	if (attr_mask & IBV_QP_PATH_MIG_STATE)
1311		cmd->path_mig_state = attr->path_mig_state;
1312	if (attr_mask & IBV_QP_DEST_QPN)
1313		cmd->dest_qp_num = attr->dest_qp_num;
1314
1315	cmd->reserved[0] = cmd->reserved[1] = 0;
1316}
1317
1318int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1319		      int attr_mask,
1320		      struct ibv_modify_qp *cmd, size_t cmd_size)
1321{
1322	/*
1323	 * Masks over IBV_QP_DEST_QPN are only supported by
1324	 * ibv_cmd_modify_qp_ex.
1325	 */
1326	if (attr_mask & ~((IBV_QP_DEST_QPN << 1) - 1))
1327		return EOPNOTSUPP;
1328
1329	IBV_INIT_CMD(cmd, cmd_size, MODIFY_QP);
1330
1331	copy_modify_qp_fields(qp, attr, attr_mask, &cmd->base);
1332
1333	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1334		return errno;
1335
1336	return 0;
1337}
1338
1339int ibv_cmd_modify_qp_ex(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1340			 int attr_mask, struct ibv_modify_qp_ex *cmd,
1341			 size_t cmd_core_size, size_t cmd_size,
1342			 struct ibv_modify_qp_resp_ex *resp,
1343			 size_t resp_core_size, size_t resp_size)
1344{
1345	if (resp_core_size < offsetof(struct ibv_modify_qp_resp_ex,
1346			     response_length) + sizeof(resp->response_length))
1347		return EINVAL;
1348
1349	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, MODIFY_QP_EX,
1350			       resp, resp_core_size, resp_size);
1351
1352	copy_modify_qp_fields(qp, attr, attr_mask, &cmd->base);
1353
1354	if (attr_mask & IBV_QP_RATE_LIMIT) {
1355		if (cmd_size >= offsetof(struct ibv_modify_qp_ex, rate_limit) +
1356		    sizeof(cmd->rate_limit))
1357			cmd->rate_limit = attr->rate_limit;
1358		else
1359			return EINVAL;
1360	}
1361
1362	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1363		return errno;
1364
1365	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1366
1367	return 0;
1368}
1369
1370int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
1371		      struct ibv_send_wr **bad_wr)
1372{
1373	struct ibv_post_send     *cmd;
1374	struct ibv_post_send_resp resp;
1375	struct ibv_send_wr       *i;
1376	struct ibv_kern_send_wr  *n, *tmp;
1377	struct ibv_sge           *s;
1378	unsigned                  wr_count = 0;
1379	unsigned                  sge_count = 0;
1380	int                       cmd_size;
1381	int                       ret = 0;
1382
1383	for (i = wr; i; i = i->next) {
1384		wr_count++;
1385		sge_count += i->num_sge;
1386	}
1387
1388	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1389	cmd  = alloca(cmd_size);
1390
1391	IBV_INIT_CMD_RESP(cmd, cmd_size, POST_SEND, &resp, sizeof resp);
1392	cmd->qp_handle = ibqp->handle;
1393	cmd->wr_count  = wr_count;
1394	cmd->sge_count = sge_count;
1395	cmd->wqe_size  = sizeof *n;
1396
1397	n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd);
1398	s = (struct ibv_sge *) (n + wr_count);
1399
1400	tmp = n;
1401	for (i = wr; i; i = i->next) {
1402		tmp->wr_id 	= i->wr_id;
1403		tmp->num_sge 	= i->num_sge;
1404		tmp->opcode 	= i->opcode;
1405		tmp->send_flags = i->send_flags;
1406		tmp->imm_data 	= i->imm_data;
1407		if (ibqp->qp_type == IBV_QPT_UD) {
1408			tmp->wr.ud.ah 	       = i->wr.ud.ah->handle;
1409			tmp->wr.ud.remote_qpn  = i->wr.ud.remote_qpn;
1410			tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
1411		} else {
1412			switch (i->opcode) {
1413			case IBV_WR_RDMA_WRITE:
1414			case IBV_WR_RDMA_WRITE_WITH_IMM:
1415			case IBV_WR_RDMA_READ:
1416				tmp->wr.rdma.remote_addr =
1417					i->wr.rdma.remote_addr;
1418				tmp->wr.rdma.rkey = i->wr.rdma.rkey;
1419				break;
1420			case IBV_WR_ATOMIC_CMP_AND_SWP:
1421			case IBV_WR_ATOMIC_FETCH_AND_ADD:
1422				tmp->wr.atomic.remote_addr =
1423					i->wr.atomic.remote_addr;
1424				tmp->wr.atomic.compare_add =
1425					i->wr.atomic.compare_add;
1426				tmp->wr.atomic.swap = i->wr.atomic.swap;
1427				tmp->wr.atomic.rkey = i->wr.atomic.rkey;
1428				break;
1429			default:
1430				break;
1431			}
1432		}
1433
1434		if (tmp->num_sge) {
1435			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1436			s += tmp->num_sge;
1437		}
1438
1439		tmp++;
1440	}
1441
1442	resp.bad_wr = 0;
1443	if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1444		ret = errno;
1445
1446	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1447
1448	wr_count = resp.bad_wr;
1449	if (wr_count) {
1450		i = wr;
1451		while (--wr_count)
1452			i = i->next;
1453		*bad_wr = i;
1454	} else if (ret)
1455		*bad_wr = wr;
1456
1457	return ret;
1458}
1459
1460int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
1461		      struct ibv_recv_wr **bad_wr)
1462{
1463	struct ibv_post_recv     *cmd;
1464	struct ibv_post_recv_resp resp;
1465	struct ibv_recv_wr       *i;
1466	struct ibv_kern_recv_wr  *n, *tmp;
1467	struct ibv_sge           *s;
1468	unsigned                  wr_count = 0;
1469	unsigned                  sge_count = 0;
1470	int                       cmd_size;
1471	int                       ret = 0;
1472
1473	for (i = wr; i; i = i->next) {
1474		wr_count++;
1475		sge_count += i->num_sge;
1476	}
1477
1478	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1479	cmd  = alloca(cmd_size);
1480
1481	IBV_INIT_CMD_RESP(cmd, cmd_size, POST_RECV, &resp, sizeof resp);
1482	cmd->qp_handle = ibqp->handle;
1483	cmd->wr_count  = wr_count;
1484	cmd->sge_count = sge_count;
1485	cmd->wqe_size  = sizeof *n;
1486
1487	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
1488	s = (struct ibv_sge *) (n + wr_count);
1489
1490	tmp = n;
1491	for (i = wr; i; i = i->next) {
1492		tmp->wr_id   = i->wr_id;
1493		tmp->num_sge = i->num_sge;
1494
1495		if (tmp->num_sge) {
1496			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1497			s += tmp->num_sge;
1498		}
1499
1500		tmp++;
1501	}
1502
1503	resp.bad_wr = 0;
1504	if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1505		ret = errno;
1506
1507	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1508
1509	wr_count = resp.bad_wr;
1510	if (wr_count) {
1511		i = wr;
1512		while (--wr_count)
1513			i = i->next;
1514		*bad_wr = i;
1515	} else if (ret)
1516		*bad_wr = wr;
1517
1518	return ret;
1519}
1520
1521int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
1522		      struct ibv_recv_wr **bad_wr)
1523{
1524	struct ibv_post_srq_recv *cmd;
1525	struct ibv_post_srq_recv_resp resp;
1526	struct ibv_recv_wr       *i;
1527	struct ibv_kern_recv_wr  *n, *tmp;
1528	struct ibv_sge           *s;
1529	unsigned                  wr_count = 0;
1530	unsigned                  sge_count = 0;
1531	int                       cmd_size;
1532	int                       ret = 0;
1533
1534	for (i = wr; i; i = i->next) {
1535		wr_count++;
1536		sge_count += i->num_sge;
1537	}
1538
1539	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1540	cmd  = alloca(cmd_size);
1541
1542	IBV_INIT_CMD_RESP(cmd, cmd_size, POST_SRQ_RECV, &resp, sizeof resp);
1543	cmd->srq_handle = srq->handle;
1544	cmd->wr_count  = wr_count;
1545	cmd->sge_count = sge_count;
1546	cmd->wqe_size  = sizeof *n;
1547
1548	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
1549	s = (struct ibv_sge *) (n + wr_count);
1550
1551	tmp = n;
1552	for (i = wr; i; i = i->next) {
1553		tmp->wr_id = i->wr_id;
1554		tmp->num_sge = i->num_sge;
1555
1556		if (tmp->num_sge) {
1557			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1558			s += tmp->num_sge;
1559		}
1560
1561		tmp++;
1562	}
1563
1564	resp.bad_wr = 0;
1565	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
1566		ret = errno;
1567
1568	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1569
1570	wr_count = resp.bad_wr;
1571	if (wr_count) {
1572		i = wr;
1573		while (--wr_count)
1574			i = i->next;
1575		*bad_wr = i;
1576	} else if (ret)
1577		*bad_wr = wr;
1578
1579	return ret;
1580}
1581
1582int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
1583		      struct ibv_ah_attr *attr,
1584		      struct ibv_create_ah_resp *resp,
1585		      size_t resp_size)
1586{
1587	struct ibv_create_ah      cmd;
1588
1589	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, resp, resp_size);
1590	cmd.user_handle            = (uintptr_t) ah;
1591	cmd.pd_handle              = pd->handle;
1592	cmd.attr.dlid              = attr->dlid;
1593	cmd.attr.sl                = attr->sl;
1594	cmd.attr.src_path_bits     = attr->src_path_bits;
1595	cmd.attr.static_rate       = attr->static_rate;
1596	cmd.attr.is_global         = attr->is_global;
1597	cmd.attr.port_num          = attr->port_num;
1598	cmd.attr.grh.flow_label    = attr->grh.flow_label;
1599	cmd.attr.grh.sgid_index    = attr->grh.sgid_index;
1600	cmd.attr.grh.hop_limit     = attr->grh.hop_limit;
1601	cmd.attr.grh.traffic_class = attr->grh.traffic_class;
1602	memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
1603
1604	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1605		return errno;
1606
1607	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1608
1609	ah->handle  = resp->handle;
1610	ah->context = pd->context;
1611
1612	return 0;
1613}
1614
1615int ibv_cmd_destroy_ah(struct ibv_ah *ah)
1616{
1617	struct ibv_destroy_ah cmd;
1618
1619	IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH);
1620	cmd.ah_handle = ah->handle;
1621
1622	if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1623		return errno;
1624
1625	return 0;
1626}
1627
1628int ibv_cmd_destroy_qp(struct ibv_qp *qp)
1629{
1630	struct ibv_destroy_qp      cmd;
1631	struct ibv_destroy_qp_resp resp;
1632
1633	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_QP, &resp, sizeof resp);
1634	cmd.qp_handle = qp->handle;
1635	cmd.reserved  = 0;
1636
1637	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1638		return errno;
1639
1640	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1641
1642	pthread_mutex_lock(&qp->mutex);
1643	while (qp->events_completed != resp.events_reported)
1644		pthread_cond_wait(&qp->cond, &qp->mutex);
1645	pthread_mutex_unlock(&qp->mutex);
1646
1647	return 0;
1648}
1649
1650int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1651{
1652	struct ibv_attach_mcast cmd;
1653
1654	IBV_INIT_CMD(&cmd, sizeof cmd, ATTACH_MCAST);
1655	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
1656	cmd.qp_handle = qp->handle;
1657	cmd.mlid      = lid;
1658	cmd.reserved  = 0;
1659
1660	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1661		return errno;
1662
1663	return 0;
1664}
1665
1666int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1667{
1668	struct ibv_detach_mcast cmd;
1669
1670	IBV_INIT_CMD(&cmd, sizeof cmd, DETACH_MCAST);
1671	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
1672	cmd.qp_handle = qp->handle;
1673	cmd.mlid      = lid;
1674	cmd.reserved  = 0;
1675
1676	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1677		return errno;
1678
1679	return 0;
1680}
1681
1682static int buffer_is_zero(char *addr, ssize_t size)
1683{
1684	return addr[0] == 0 && !memcmp(addr, addr + 1, size - 1);
1685}
1686
1687static int get_filters_size(struct ibv_flow_spec *ib_spec,
1688			    struct ibv_kern_spec *kern_spec,
1689			    int *ib_filter_size, int *kern_filter_size,
1690			    enum ibv_flow_spec_type type)
1691{
1692	void *ib_spec_filter_mask;
1693	int curr_kern_filter_size;
1694	int min_filter_size;
1695
1696	*ib_filter_size = (ib_spec->hdr.size - sizeof(ib_spec->hdr)) / 2;
1697
1698	switch (type) {
1699	case IBV_FLOW_SPEC_IPV4_EXT:
1700		min_filter_size =
1701			offsetof(struct ibv_kern_ipv4_ext_filter, flags) +
1702			sizeof(kern_spec->ipv4_ext.mask.flags);
1703		curr_kern_filter_size = min_filter_size;
1704		ib_spec_filter_mask = (void *)&ib_spec->ipv4_ext.val +
1705			*ib_filter_size;
1706		break;
1707	case IBV_FLOW_SPEC_IPV6:
1708		min_filter_size =
1709			offsetof(struct ibv_kern_ipv6_filter, hop_limit) +
1710			sizeof(kern_spec->ipv6.mask.hop_limit);
1711		curr_kern_filter_size = min_filter_size;
1712		ib_spec_filter_mask = (void *)&ib_spec->ipv6.val +
1713			*ib_filter_size;
1714		break;
1715	case IBV_FLOW_SPEC_VXLAN_TUNNEL:
1716		min_filter_size =
1717			offsetof(struct ibv_kern_tunnel_filter,
1718				 tunnel_id) +
1719			sizeof(kern_spec->tunnel.mask.tunnel_id);
1720		curr_kern_filter_size = min_filter_size;
1721		ib_spec_filter_mask = (void *)&ib_spec->tunnel.val +
1722			*ib_filter_size;
1723		break;
1724	default:
1725		return EINVAL;
1726	}
1727
1728	if (*ib_filter_size < min_filter_size)
1729		return EINVAL;
1730
1731	if (*ib_filter_size > curr_kern_filter_size &&
1732	    !buffer_is_zero(ib_spec_filter_mask + curr_kern_filter_size,
1733			    *ib_filter_size - curr_kern_filter_size))
1734		return EOPNOTSUPP;
1735
1736	*kern_filter_size = min_t(int, curr_kern_filter_size, *ib_filter_size);
1737
1738	return 0;
1739}
1740
1741static int ib_spec_to_kern_spec(struct ibv_flow_spec *ib_spec,
1742				struct ibv_kern_spec *kern_spec)
1743{
1744	int kern_filter_size;
1745	int ib_filter_size;
1746	int ret;
1747
1748	kern_spec->hdr.type = ib_spec->hdr.type;
1749
1750	switch (kern_spec->hdr.type) {
1751	case IBV_FLOW_SPEC_ETH:
1752	case IBV_FLOW_SPEC_ETH | IBV_FLOW_SPEC_INNER:
1753		kern_spec->eth.size = sizeof(struct ibv_kern_spec_eth);
1754		memcpy(&kern_spec->eth.val, &ib_spec->eth.val,
1755		       sizeof(struct ibv_flow_eth_filter));
1756		memcpy(&kern_spec->eth.mask, &ib_spec->eth.mask,
1757		       sizeof(struct ibv_flow_eth_filter));
1758		break;
1759	case IBV_FLOW_SPEC_IPV4:
1760	case IBV_FLOW_SPEC_IPV4 | IBV_FLOW_SPEC_INNER:
1761		kern_spec->ipv4.size = sizeof(struct ibv_kern_spec_ipv4);
1762		memcpy(&kern_spec->ipv4.val, &ib_spec->ipv4.val,
1763		       sizeof(struct ibv_flow_ipv4_filter));
1764		memcpy(&kern_spec->ipv4.mask, &ib_spec->ipv4.mask,
1765		       sizeof(struct ibv_flow_ipv4_filter));
1766		break;
1767	case IBV_FLOW_SPEC_IPV4_EXT:
1768	case IBV_FLOW_SPEC_IPV4_EXT | IBV_FLOW_SPEC_INNER:
1769		ret = get_filters_size(ib_spec, kern_spec,
1770				       &ib_filter_size, &kern_filter_size,
1771				       IBV_FLOW_SPEC_IPV4_EXT);
1772		if (ret)
1773			return ret;
1774
1775		kern_spec->hdr.type = IBV_FLOW_SPEC_IPV4 |
1776				     (IBV_FLOW_SPEC_INNER & ib_spec->hdr.type);
1777		kern_spec->ipv4_ext.size = sizeof(struct
1778						  ibv_kern_spec_ipv4_ext);
1779		memcpy(&kern_spec->ipv4_ext.val, &ib_spec->ipv4_ext.val,
1780		       kern_filter_size);
1781		memcpy(&kern_spec->ipv4_ext.mask, (void *)&ib_spec->ipv4_ext.val
1782		       + ib_filter_size, kern_filter_size);
1783		break;
1784	case IBV_FLOW_SPEC_IPV6:
1785	case IBV_FLOW_SPEC_IPV6 | IBV_FLOW_SPEC_INNER:
1786		ret = get_filters_size(ib_spec, kern_spec,
1787				       &ib_filter_size, &kern_filter_size,
1788				       IBV_FLOW_SPEC_IPV6);
1789		if (ret)
1790			return ret;
1791
1792		kern_spec->ipv6.size = sizeof(struct ibv_kern_spec_ipv6);
1793		memcpy(&kern_spec->ipv6.val, &ib_spec->ipv6.val,
1794		       kern_filter_size);
1795		memcpy(&kern_spec->ipv6.mask, (void *)&ib_spec->ipv6.val
1796		       + ib_filter_size, kern_filter_size);
1797		break;
1798	case IBV_FLOW_SPEC_TCP:
1799	case IBV_FLOW_SPEC_UDP:
1800	case IBV_FLOW_SPEC_TCP | IBV_FLOW_SPEC_INNER:
1801	case IBV_FLOW_SPEC_UDP | IBV_FLOW_SPEC_INNER:
1802		kern_spec->tcp_udp.size = sizeof(struct ibv_kern_spec_tcp_udp);
1803		memcpy(&kern_spec->tcp_udp.val, &ib_spec->tcp_udp.val,
1804		       sizeof(struct ibv_flow_ipv4_filter));
1805		memcpy(&kern_spec->tcp_udp.mask, &ib_spec->tcp_udp.mask,
1806		       sizeof(struct ibv_flow_tcp_udp_filter));
1807		break;
1808	case IBV_FLOW_SPEC_VXLAN_TUNNEL:
1809		ret = get_filters_size(ib_spec, kern_spec,
1810				       &ib_filter_size, &kern_filter_size,
1811				       IBV_FLOW_SPEC_VXLAN_TUNNEL);
1812		if (ret)
1813			return ret;
1814
1815		kern_spec->tunnel.size = sizeof(struct ibv_kern_spec_tunnel);
1816		memcpy(&kern_spec->tunnel.val, &ib_spec->tunnel.val,
1817		       kern_filter_size);
1818		memcpy(&kern_spec->tunnel.mask, (void *)&ib_spec->tunnel.val
1819		       + ib_filter_size, kern_filter_size);
1820		break;
1821	case IBV_FLOW_SPEC_ACTION_TAG:
1822		kern_spec->flow_tag.size =
1823			sizeof(struct ibv_kern_spec_action_tag);
1824		kern_spec->flow_tag.tag_id = ib_spec->flow_tag.tag_id;
1825		break;
1826	case IBV_FLOW_SPEC_ACTION_DROP:
1827		kern_spec->drop.size = sizeof(struct ibv_kern_spec_action_drop);
1828		break;
1829	default:
1830		return EINVAL;
1831	}
1832	return 0;
1833}
1834
1835struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
1836				     struct ibv_flow_attr *flow_attr)
1837{
1838	struct ibv_create_flow *cmd;
1839	struct ibv_create_flow_resp resp;
1840	struct ibv_flow *flow_id;
1841	size_t cmd_size;
1842	size_t written_size;
1843	int i, err;
1844	void *kern_spec;
1845	void *ib_spec;
1846
1847	cmd_size = sizeof(*cmd) + (flow_attr->num_of_specs *
1848				  sizeof(struct ibv_kern_spec));
1849	cmd = alloca(cmd_size);
1850	flow_id = malloc(sizeof(*flow_id));
1851	if (!flow_id)
1852		return NULL;
1853	memset(cmd, 0, cmd_size);
1854
1855	cmd->qp_handle = qp->handle;
1856
1857	cmd->flow_attr.type = flow_attr->type;
1858	cmd->flow_attr.priority = flow_attr->priority;
1859	cmd->flow_attr.num_of_specs = flow_attr->num_of_specs;
1860	cmd->flow_attr.port = flow_attr->port;
1861	cmd->flow_attr.flags = flow_attr->flags;
1862
1863	kern_spec = cmd + 1;
1864	ib_spec = flow_attr + 1;
1865	for (i = 0; i < flow_attr->num_of_specs; i++) {
1866		err = ib_spec_to_kern_spec(ib_spec, kern_spec);
1867		if (err) {
1868			errno = err;
1869			goto err;
1870		}
1871		cmd->flow_attr.size +=
1872			((struct ibv_kern_spec *)kern_spec)->hdr.size;
1873		kern_spec += ((struct ibv_kern_spec *)kern_spec)->hdr.size;
1874		ib_spec += ((struct ibv_flow_spec *)ib_spec)->hdr.size;
1875	}
1876
1877	written_size = sizeof(*cmd) + cmd->flow_attr.size;
1878	IBV_INIT_CMD_RESP_EX_VCMD(cmd, written_size, written_size, CREATE_FLOW,
1879				  &resp, sizeof(resp));
1880	if (write(qp->context->cmd_fd, cmd, written_size) != written_size)
1881		goto err;
1882
1883	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
1884
1885	flow_id->context = qp->context;
1886	flow_id->handle = resp.flow_handle;
1887	return flow_id;
1888err:
1889	free(flow_id);
1890	return NULL;
1891}
1892
1893int ibv_cmd_destroy_flow(struct ibv_flow *flow_id)
1894{
1895	struct ibv_destroy_flow cmd;
1896	int ret = 0;
1897
1898	memset(&cmd, 0, sizeof(cmd));
1899	IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_FLOW);
1900	cmd.flow_handle = flow_id->handle;
1901
1902	if (write(flow_id->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
1903		ret = errno;
1904	free(flow_id);
1905	return ret;
1906}
1907
1908int ibv_cmd_create_wq(struct ibv_context *context,
1909		      struct ibv_wq_init_attr *wq_init_attr,
1910		      struct ibv_wq *wq,
1911		      struct ibv_create_wq *cmd,
1912		      size_t cmd_core_size,
1913		      size_t cmd_size,
1914		      struct ibv_create_wq_resp *resp,
1915		      size_t resp_core_size,
1916		      size_t resp_size)
1917{
1918	int err;
1919
1920	if (wq_init_attr->comp_mask >= IBV_WQ_INIT_ATTR_RESERVED)
1921		return EINVAL;
1922
1923	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
1924			       CREATE_WQ, resp,
1925			       resp_core_size, resp_size);
1926
1927	cmd->user_handle   = (uintptr_t)wq;
1928	cmd->pd_handle           = wq_init_attr->pd->handle;
1929	cmd->cq_handle   = wq_init_attr->cq->handle;
1930	cmd->wq_type = wq_init_attr->wq_type;
1931	cmd->max_sge = wq_init_attr->max_sge;
1932	cmd->max_wr = wq_init_attr->max_wr;
1933	cmd->comp_mask = 0;
1934
1935	if (cmd_core_size >= offsetof(struct ibv_create_wq, create_flags) +
1936	    sizeof(cmd->create_flags)) {
1937		if (wq_init_attr->comp_mask & IBV_WQ_INIT_ATTR_FLAGS) {
1938			if (wq_init_attr->create_flags & ~(IBV_WQ_FLAGS_RESERVED - 1))
1939				return EOPNOTSUPP;
1940			cmd->create_flags = wq_init_attr->create_flags;
1941		}
1942	}
1943
1944	err = write(context->cmd_fd, cmd, cmd_size);
1945	if (err != cmd_size)
1946		return errno;
1947
1948	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1949
1950	if (resp->response_length < resp_core_size)
1951		return EINVAL;
1952
1953	wq->handle  = resp->wq_handle;
1954	wq_init_attr->max_wr = resp->max_wr;
1955	wq_init_attr->max_sge = resp->max_sge;
1956	wq->wq_num = resp->wqn;
1957	wq->context = context;
1958	wq->cq = wq_init_attr->cq;
1959	wq->pd = wq_init_attr->pd;
1960	wq->wq_type = wq_init_attr->wq_type;
1961
1962	return 0;
1963}
1964
1965int ibv_cmd_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr,
1966		      struct ibv_modify_wq *cmd, size_t cmd_core_size,
1967		      size_t cmd_size)
1968{
1969	if (attr->attr_mask >= IBV_WQ_ATTR_RESERVED)
1970		return EINVAL;
1971
1972	memset(cmd, 0, cmd_core_size);
1973	IBV_INIT_CMD_EX(cmd, cmd_size, MODIFY_WQ);
1974
1975	cmd->curr_wq_state = attr->curr_wq_state;
1976	cmd->wq_state = attr->wq_state;
1977	if (cmd_core_size >= offsetof(struct ibv_modify_wq, flags_mask) +
1978	    sizeof(cmd->flags_mask)) {
1979		if (attr->attr_mask & IBV_WQ_ATTR_FLAGS) {
1980			if (attr->flags_mask & ~(IBV_WQ_FLAGS_RESERVED - 1))
1981				return EOPNOTSUPP;
1982			cmd->flags = attr->flags;
1983			cmd->flags_mask = attr->flags_mask;
1984		}
1985	}
1986	cmd->wq_handle = wq->handle;
1987	cmd->attr_mask = attr->attr_mask;
1988
1989	if (write(wq->context->cmd_fd, cmd, cmd_size) != cmd_size)
1990		return errno;
1991
1992	if (attr->attr_mask & IBV_WQ_ATTR_STATE)
1993		wq->state = attr->wq_state;
1994
1995	return 0;
1996}
1997
1998int ibv_cmd_destroy_wq(struct ibv_wq *wq)
1999{
2000	struct ibv_destroy_wq cmd;
2001	struct ibv_destroy_wq_resp resp;
2002	int ret = 0;
2003
2004	memset(&cmd, 0, sizeof(cmd));
2005	memset(&resp, 0, sizeof(resp));
2006
2007	IBV_INIT_CMD_RESP_EX(&cmd, sizeof(cmd), DESTROY_WQ, &resp, sizeof(resp));
2008	cmd.wq_handle = wq->handle;
2009
2010	if (write(wq->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
2011		return errno;
2012
2013	if (resp.response_length < sizeof(resp))
2014		return EINVAL;
2015
2016	pthread_mutex_lock(&wq->mutex);
2017	while (wq->events_completed != resp.events_reported)
2018		pthread_cond_wait(&wq->cond, &wq->mutex);
2019	pthread_mutex_unlock(&wq->mutex);
2020
2021	return ret;
2022}
2023
2024int ibv_cmd_create_rwq_ind_table(struct ibv_context *context,
2025				 struct ibv_rwq_ind_table_init_attr *init_attr,
2026				 struct ibv_rwq_ind_table *rwq_ind_table,
2027				 struct ibv_create_rwq_ind_table *cmd,
2028				 size_t cmd_core_size,
2029				 size_t cmd_size,
2030				 struct ibv_create_rwq_ind_table_resp *resp,
2031				 size_t resp_core_size,
2032				 size_t resp_size)
2033{
2034	int err, i;
2035	uint32_t required_tbl_size, alloc_tbl_size;
2036	uint32_t *tbl_start;
2037	int num_tbl_entries;
2038
2039	if (init_attr->comp_mask >= IBV_CREATE_IND_TABLE_RESERVED)
2040		return EINVAL;
2041
2042	alloc_tbl_size = cmd_core_size - sizeof(*cmd);
2043	num_tbl_entries = 1 << init_attr->log_ind_tbl_size;
2044
2045	/* Data must be u64 aligned */
2046	required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ?
2047			sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t));
2048
2049	if (alloc_tbl_size < required_tbl_size)
2050		return EINVAL;
2051
2052	tbl_start = (uint32_t *)((uint8_t *)cmd + sizeof(*cmd));
2053	for (i = 0; i < num_tbl_entries; i++)
2054		tbl_start[i] = init_attr->ind_tbl[i]->handle;
2055
2056	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
2057			       CREATE_RWQ_IND_TBL, resp,
2058			       resp_core_size, resp_size);
2059	cmd->log_ind_tbl_size = init_attr->log_ind_tbl_size;
2060	cmd->comp_mask = 0;
2061
2062	err = write(context->cmd_fd, cmd, cmd_size);
2063	if (err != cmd_size)
2064		return errno;
2065
2066	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
2067
2068	if (resp->response_length < resp_core_size)
2069		return EINVAL;
2070
2071	rwq_ind_table->ind_tbl_handle = resp->ind_tbl_handle;
2072	rwq_ind_table->ind_tbl_num = resp->ind_tbl_num;
2073	rwq_ind_table->context = context;
2074	return 0;
2075}
2076
2077int ibv_cmd_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2078{
2079	struct ibv_destroy_rwq_ind_table cmd;
2080	int ret = 0;
2081
2082	memset(&cmd, 0, sizeof(cmd));
2083	IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_RWQ_IND_TBL);
2084	cmd.ind_tbl_handle = rwq_ind_table->ind_tbl_handle;
2085
2086	if (write(rwq_ind_table->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
2087		ret = errno;
2088
2089	return ret;
2090}
2091