1321936Shselasky/*
2321936Shselasky * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
3321936Shselasky *
4321936Shselasky * This software is available to you under a choice of one of two
5321936Shselasky * licenses.  You may choose to be licensed under the terms of the GNU
6321936Shselasky * General Public License (GPL) Version 2, available from the file
7321936Shselasky * COPYING in the main directory of this source tree, or the
8321936Shselasky * OpenIB.org BSD license below:
9321936Shselasky *
10321936Shselasky *     Redistribution and use in source and binary forms, with or
11321936Shselasky *     without modification, are permitted provided that the following
12321936Shselasky *     conditions are met:
13321936Shselasky *
14321936Shselasky *      - Redistributions of source code must retain the above
15321936Shselasky *        copyright notice, this list of conditions and the following
16321936Shselasky *        disclaimer.
17321936Shselasky *
18321936Shselasky *      - Redistributions in binary form must reproduce the above
19321936Shselasky *        copyright notice, this list of conditions and the following
20321936Shselasky *        disclaimer in the documentation and/or other materials
21321936Shselasky *        provided with the distribution.
22321936Shselasky *
23321936Shselasky * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24321936Shselasky * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25321936Shselasky * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26321936Shselasky * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27321936Shselasky * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28321936Shselasky * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29321936Shselasky * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30321936Shselasky * SOFTWARE.
31321936Shselasky */
32321936Shselasky
33321936Shselasky#include <config.h>
34321936Shselasky
35321936Shselasky#include <infiniband/endian.h>
36321936Shselasky#include <stdlib.h>
37321936Shselasky#include <stdio.h>
38321936Shselasky#include <string.h>
39321936Shselasky#include <pthread.h>
40321936Shselasky#include <errno.h>
41321936Shselasky
42321936Shselasky#include "mlx4.h"
43321936Shselasky#include "mlx4-abi.h"
44321936Shselasky#include "wqe.h"
45321936Shselasky
46321936Shselaskyint mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
47321936Shselasky{
48321936Shselasky	struct ibv_query_device cmd;
49321936Shselasky	uint64_t raw_fw_ver;
50321936Shselasky	unsigned major, minor, sub_minor;
51321936Shselasky	int ret;
52321936Shselasky
53321936Shselasky	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
54321936Shselasky	if (ret)
55321936Shselasky		return ret;
56321936Shselasky
57321936Shselasky	major     = (raw_fw_ver >> 32) & 0xffff;
58321936Shselasky	minor     = (raw_fw_ver >> 16) & 0xffff;
59321936Shselasky	sub_minor = raw_fw_ver & 0xffff;
60321936Shselasky
61321936Shselasky	snprintf(attr->fw_ver, sizeof attr->fw_ver,
62321936Shselasky		 "%d.%d.%03d", major, minor, sub_minor);
63321936Shselasky
64321936Shselasky	return 0;
65321936Shselasky}
66321936Shselasky
67321936Shselaskyint mlx4_query_device_ex(struct ibv_context *context,
68321936Shselasky			 const struct ibv_query_device_ex_input *input,
69321936Shselasky			 struct ibv_device_attr_ex *attr,
70321936Shselasky			 size_t attr_size)
71321936Shselasky{
72321936Shselasky	struct mlx4_context *mctx = to_mctx(context);
73321936Shselasky	struct mlx4_query_device_ex_resp resp = {};
74321936Shselasky	struct mlx4_query_device_ex cmd = {};
75321936Shselasky	uint64_t raw_fw_ver;
76321936Shselasky	unsigned sub_minor;
77321936Shselasky	unsigned major;
78321936Shselasky	unsigned minor;
79321936Shselasky	int err;
80321936Shselasky
81321936Shselasky	err = ibv_cmd_query_device_ex(context, input, attr, attr_size,
82321936Shselasky				      &raw_fw_ver,
83321936Shselasky				      &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd),
84321936Shselasky				      &resp.ibv_resp, sizeof(resp.ibv_resp),
85321936Shselasky				      sizeof(resp));
86321936Shselasky	if (err)
87321936Shselasky		return err;
88321936Shselasky
89321936Shselasky	if (resp.comp_mask & MLX4_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET) {
90321936Shselasky		mctx->core_clock.offset = resp.hca_core_clock_offset;
91321936Shselasky		mctx->core_clock.offset_valid = 1;
92321936Shselasky	}
93321936Shselasky
94321936Shselasky	major     = (raw_fw_ver >> 32) & 0xffff;
95321936Shselasky	minor     = (raw_fw_ver >> 16) & 0xffff;
96321936Shselasky	sub_minor = raw_fw_ver & 0xffff;
97321936Shselasky
98321936Shselasky	snprintf(attr->orig_attr.fw_ver, sizeof attr->orig_attr.fw_ver,
99321936Shselasky		 "%d.%d.%03d", major, minor, sub_minor);
100321936Shselasky
101321936Shselasky	return 0;
102321936Shselasky}
103321936Shselasky
104321936Shselasky#define READL(ptr) (*((uint32_t *)(ptr)))
105321936Shselaskystatic int mlx4_read_clock(struct ibv_context *context, uint64_t *cycles)
106321936Shselasky{
107321936Shselasky	unsigned int clockhi, clocklo, clockhi1;
108321936Shselasky	int i;
109321936Shselasky	struct mlx4_context *ctx = to_mctx(context);
110321936Shselasky
111321936Shselasky	if (!ctx->hca_core_clock)
112321936Shselasky		return -EOPNOTSUPP;
113321936Shselasky
114321936Shselasky	/* Handle wraparound */
115321936Shselasky	for (i = 0; i < 2; i++) {
116321936Shselasky		clockhi = be32toh(READL(ctx->hca_core_clock));
117321936Shselasky		clocklo = be32toh(READL(ctx->hca_core_clock + 4));
118321936Shselasky		clockhi1 = be32toh(READL(ctx->hca_core_clock));
119321936Shselasky		if (clockhi == clockhi1)
120321936Shselasky			break;
121321936Shselasky	}
122321936Shselasky
123321936Shselasky	*cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
124321936Shselasky
125321936Shselasky	return 0;
126321936Shselasky}
127321936Shselasky
128321936Shselaskyint mlx4_query_rt_values(struct ibv_context *context,
129321936Shselasky			 struct ibv_values_ex *values)
130321936Shselasky{
131321936Shselasky	uint32_t comp_mask = 0;
132321936Shselasky	int err = 0;
133321936Shselasky
134321936Shselasky	if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
135321936Shselasky		uint64_t cycles;
136321936Shselasky
137321936Shselasky		err = mlx4_read_clock(context, &cycles);
138321936Shselasky		if (!err) {
139321936Shselasky			values->raw_clock.tv_sec = 0;
140321936Shselasky			values->raw_clock.tv_nsec = cycles;
141321936Shselasky			comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
142321936Shselasky		}
143321936Shselasky	}
144321936Shselasky
145321936Shselasky	values->comp_mask = comp_mask;
146321936Shselasky
147321936Shselasky	return err;
148321936Shselasky}
149321936Shselasky
150321936Shselaskyint mlx4_query_port(struct ibv_context *context, uint8_t port,
151321936Shselasky		     struct ibv_port_attr *attr)
152321936Shselasky{
153321936Shselasky	struct ibv_query_port cmd;
154321936Shselasky	int err;
155321936Shselasky
156321936Shselasky	err = ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
157321936Shselasky	if (!err && port <= MLX4_PORTS_NUM && port > 0) {
158321936Shselasky		struct mlx4_context *mctx = to_mctx(context);
159321936Shselasky		if (!mctx->port_query_cache[port - 1].valid) {
160321936Shselasky			mctx->port_query_cache[port - 1].link_layer =
161321936Shselasky				attr->link_layer;
162321936Shselasky			mctx->port_query_cache[port - 1].caps =
163321936Shselasky				attr->port_cap_flags;
164321936Shselasky			mctx->port_query_cache[port - 1].valid = 1;
165321936Shselasky		}
166321936Shselasky	}
167321936Shselasky
168321936Shselasky	return err;
169321936Shselasky}
170321936Shselasky
171321936Shselasky/* Only the fields in the port cache will be valid */
172321936Shselaskystatic int query_port_cache(struct ibv_context *context, uint8_t port_num,
173321936Shselasky			    struct ibv_port_attr *port_attr)
174321936Shselasky{
175321936Shselasky	struct mlx4_context *mctx = to_mctx(context);
176321936Shselasky	if (port_num <= 0 || port_num > MLX4_PORTS_NUM)
177321936Shselasky		return -EINVAL;
178321936Shselasky	if (mctx->port_query_cache[port_num - 1].valid) {
179321936Shselasky		port_attr->link_layer =
180321936Shselasky			mctx->
181321936Shselasky			port_query_cache[port_num - 1].
182321936Shselasky			link_layer;
183321936Shselasky		port_attr->port_cap_flags =
184321936Shselasky			mctx->
185321936Shselasky			port_query_cache[port_num - 1].
186321936Shselasky			caps;
187321936Shselasky		return 0;
188321936Shselasky	}
189321936Shselasky	return mlx4_query_port(context, port_num,
190321936Shselasky			       (struct ibv_port_attr *)port_attr);
191321936Shselasky
192321936Shselasky}
193321936Shselasky
194321936Shselaskystruct ibv_pd *mlx4_alloc_pd(struct ibv_context *context)
195321936Shselasky{
196321936Shselasky	struct ibv_alloc_pd       cmd;
197321936Shselasky	struct mlx4_alloc_pd_resp resp;
198321936Shselasky	struct mlx4_pd		 *pd;
199321936Shselasky
200321936Shselasky	pd = malloc(sizeof *pd);
201321936Shselasky	if (!pd)
202321936Shselasky		return NULL;
203321936Shselasky
204321936Shselasky	if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
205321936Shselasky			     &resp.ibv_resp, sizeof resp)) {
206321936Shselasky		free(pd);
207321936Shselasky		return NULL;
208321936Shselasky	}
209321936Shselasky
210321936Shselasky	pd->pdn = resp.pdn;
211321936Shselasky
212321936Shselasky	return &pd->ibv_pd;
213321936Shselasky}
214321936Shselasky
215321936Shselaskyint mlx4_free_pd(struct ibv_pd *pd)
216321936Shselasky{
217321936Shselasky	int ret;
218321936Shselasky
219321936Shselasky	ret = ibv_cmd_dealloc_pd(pd);
220321936Shselasky	if (ret)
221321936Shselasky		return ret;
222321936Shselasky
223321936Shselasky	free(to_mpd(pd));
224321936Shselasky	return 0;
225321936Shselasky}
226321936Shselasky
227321936Shselaskystruct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
228321936Shselasky				struct ibv_xrcd_init_attr *attr)
229321936Shselasky{
230321936Shselasky	struct ibv_open_xrcd cmd;
231321936Shselasky	struct ibv_open_xrcd_resp resp;
232321936Shselasky	struct verbs_xrcd *xrcd;
233321936Shselasky	int ret;
234321936Shselasky
235321936Shselasky	xrcd = calloc(1, sizeof *xrcd);
236321936Shselasky	if (!xrcd)
237321936Shselasky		return NULL;
238321936Shselasky
239321936Shselasky	ret = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), attr,
240321936Shselasky				&cmd, sizeof cmd, &resp, sizeof resp);
241321936Shselasky	if (ret)
242321936Shselasky		goto err;
243321936Shselasky
244321936Shselasky	return &xrcd->xrcd;
245321936Shselasky
246321936Shselaskyerr:
247321936Shselasky	free(xrcd);
248321936Shselasky	return NULL;
249321936Shselasky}
250321936Shselasky
251321936Shselaskyint mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
252321936Shselasky{
253321936Shselasky	struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
254321936Shselasky	int ret;
255321936Shselasky
256321936Shselasky	ret = ibv_cmd_close_xrcd(xrcd);
257321936Shselasky	if (!ret)
258321936Shselasky		free(xrcd);
259321936Shselasky
260321936Shselasky	return ret;
261321936Shselasky}
262321936Shselasky
263321936Shselaskystruct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
264321936Shselasky			   int access)
265321936Shselasky{
266321936Shselasky	struct ibv_mr *mr;
267321936Shselasky	struct ibv_reg_mr cmd;
268321936Shselasky	struct ibv_reg_mr_resp resp;
269321936Shselasky	int ret;
270321936Shselasky
271321936Shselasky	mr = malloc(sizeof *mr);
272321936Shselasky	if (!mr)
273321936Shselasky		return NULL;
274321936Shselasky
275321936Shselasky	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
276321936Shselasky			     access, mr, &cmd, sizeof cmd,
277321936Shselasky			     &resp, sizeof resp);
278321936Shselasky	if (ret) {
279321936Shselasky		free(mr);
280321936Shselasky		return NULL;
281321936Shselasky	}
282321936Shselasky
283321936Shselasky	return mr;
284321936Shselasky}
285321936Shselasky
286321936Shselaskyint mlx4_rereg_mr(struct ibv_mr *mr,
287321936Shselasky		  int flags,
288321936Shselasky		  struct ibv_pd *pd, void *addr,
289321936Shselasky		  size_t length, int access)
290321936Shselasky{
291321936Shselasky	struct ibv_rereg_mr cmd;
292321936Shselasky	struct ibv_rereg_mr_resp resp;
293321936Shselasky
294321936Shselasky	if (flags & IBV_REREG_MR_KEEP_VALID)
295321936Shselasky		return ENOTSUP;
296321936Shselasky
297321936Shselasky	return ibv_cmd_rereg_mr(mr, flags, addr, length,
298321936Shselasky				(uintptr_t)addr,
299321936Shselasky				access, pd,
300321936Shselasky				&cmd, sizeof(cmd),
301321936Shselasky				&resp, sizeof(resp));
302321936Shselasky}
303321936Shselasky
304321936Shselaskyint mlx4_dereg_mr(struct ibv_mr *mr)
305321936Shselasky{
306321936Shselasky	int ret;
307321936Shselasky
308321936Shselasky	ret = ibv_cmd_dereg_mr(mr);
309321936Shselasky	if (ret)
310321936Shselasky		return ret;
311321936Shselasky
312321936Shselasky	free(mr);
313321936Shselasky	return 0;
314321936Shselasky}
315321936Shselasky
316321936Shselaskystruct ibv_mw *mlx4_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
317321936Shselasky{
318321936Shselasky	struct ibv_mw *mw;
319321936Shselasky	struct ibv_alloc_mw cmd;
320321936Shselasky	struct ibv_alloc_mw_resp resp;
321321936Shselasky	int ret;
322321936Shselasky
323321936Shselasky	mw = calloc(1, sizeof(*mw));
324321936Shselasky	if (!mw)
325321936Shselasky		return NULL;
326321936Shselasky
327321936Shselasky	ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd),
328321936Shselasky			     &resp, sizeof(resp));
329321936Shselasky
330321936Shselasky	if (ret) {
331321936Shselasky		free(mw);
332321936Shselasky		return NULL;
333321936Shselasky	}
334321936Shselasky
335321936Shselasky	return mw;
336321936Shselasky}
337321936Shselasky
338321936Shselaskyint mlx4_dealloc_mw(struct ibv_mw *mw)
339321936Shselasky{
340321936Shselasky	int ret;
341321936Shselasky	struct ibv_dealloc_mw cmd;
342321936Shselasky
343321936Shselasky	ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd));
344321936Shselasky	if (ret)
345321936Shselasky		return ret;
346321936Shselasky
347321936Shselasky	free(mw);
348321936Shselasky	return 0;
349321936Shselasky}
350321936Shselasky
351321936Shselaskyint mlx4_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
352321936Shselasky		 struct ibv_mw_bind *mw_bind)
353321936Shselasky{
354321936Shselasky	struct ibv_send_wr *bad_wr = NULL;
355321936Shselasky	struct ibv_send_wr wr = { };
356321936Shselasky	int ret;
357321936Shselasky
358321936Shselasky
359321936Shselasky	wr.opcode = IBV_WR_BIND_MW;
360321936Shselasky	wr.next = NULL;
361321936Shselasky
362321936Shselasky	wr.wr_id = mw_bind->wr_id;
363321936Shselasky	wr.send_flags = mw_bind->send_flags;
364321936Shselasky
365321936Shselasky	wr.bind_mw.mw = mw;
366321936Shselasky	wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey);
367321936Shselasky	wr.bind_mw.bind_info = mw_bind->bind_info;
368321936Shselasky
369321936Shselasky	ret = mlx4_post_send(qp, &wr, &bad_wr);
370321936Shselasky
371321936Shselasky	if (ret)
372321936Shselasky		return ret;
373321936Shselasky
374321936Shselasky	/* updating the mw with the latest rkey. */
375321936Shselasky	mw->rkey = wr.bind_mw.rkey;
376321936Shselasky
377321936Shselasky	return 0;
378321936Shselasky}
379321936Shselasky
380321936Shselaskyint align_queue_size(int req)
381321936Shselasky{
382321936Shselasky	int nent;
383321936Shselasky
384321936Shselasky	for (nent = 1; nent < req; nent <<= 1)
385321936Shselasky		; /* nothing */
386321936Shselasky
387321936Shselasky	return nent;
388321936Shselasky}
389321936Shselasky
390321936Shselaskyenum {
391321936Shselasky	CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS	|
392321936Shselasky				       IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
393321936Shselasky};
394321936Shselasky
395321936Shselaskyenum {
396321936Shselasky	CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS
397321936Shselasky};
398321936Shselasky
399321936Shselaskyenum {
400321936Shselasky	CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED
401321936Shselasky};
402321936Shselasky
403321936Shselasky
404321936Shselaskystatic int mlx4_cmd_create_cq(struct ibv_context *context,
405321936Shselasky			      struct ibv_cq_init_attr_ex *cq_attr,
406321936Shselasky			      struct mlx4_cq *cq)
407321936Shselasky{
408321936Shselasky	struct mlx4_create_cq      cmd = {};
409321936Shselasky	struct mlx4_create_cq_resp resp = {};
410321936Shselasky	int ret;
411321936Shselasky
412321936Shselasky	cmd.buf_addr = (uintptr_t) cq->buf.buf;
413321936Shselasky	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
414321936Shselasky
415321936Shselasky	ret = ibv_cmd_create_cq(context, cq_attr->cqe, cq_attr->channel,
416321936Shselasky				cq_attr->comp_vector,
417321936Shselasky				ibv_cq_ex_to_cq(&cq->ibv_cq),
418321936Shselasky				&cmd.ibv_cmd, sizeof(cmd),
419321936Shselasky				&resp.ibv_resp, sizeof(resp));
420321936Shselasky	if (!ret)
421321936Shselasky		cq->cqn = resp.cqn;
422321936Shselasky
423321936Shselasky	return ret;
424321936Shselasky
425321936Shselasky}
426321936Shselasky
427321936Shselaskystatic int mlx4_cmd_create_cq_ex(struct ibv_context *context,
428321936Shselasky				 struct ibv_cq_init_attr_ex *cq_attr,
429321936Shselasky				 struct mlx4_cq *cq)
430321936Shselasky{
431321936Shselasky	struct mlx4_create_cq_ex      cmd = {};
432321936Shselasky	struct mlx4_create_cq_resp_ex resp = {};
433321936Shselasky	int ret;
434321936Shselasky
435321936Shselasky	cmd.buf_addr = (uintptr_t) cq->buf.buf;
436321936Shselasky	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
437321936Shselasky
438321936Shselasky	ret = ibv_cmd_create_cq_ex(context, cq_attr,
439321936Shselasky				   &cq->ibv_cq, &cmd.ibv_cmd,
440321936Shselasky				   sizeof(cmd.ibv_cmd),
441321936Shselasky				   sizeof(cmd),
442321936Shselasky				   &resp.ibv_resp,
443321936Shselasky				   sizeof(resp.ibv_resp),
444321936Shselasky				   sizeof(resp));
445321936Shselasky	if (!ret)
446321936Shselasky		cq->cqn = resp.cqn;
447321936Shselasky
448321936Shselasky	return ret;
449321936Shselasky}
450321936Shselasky
451321936Shselaskystatic struct ibv_cq_ex *create_cq(struct ibv_context *context,
452321936Shselasky				   struct ibv_cq_init_attr_ex *cq_attr,
453321936Shselasky				   int cq_alloc_flags)
454321936Shselasky{
455321936Shselasky	struct mlx4_cq      *cq;
456321936Shselasky	int                  ret;
457321936Shselasky	struct mlx4_context *mctx = to_mctx(context);
458321936Shselasky
459321936Shselasky	/* Sanity check CQ size before proceeding */
460321936Shselasky	if (cq_attr->cqe > 0x3fffff) {
461321936Shselasky		errno = EINVAL;
462321936Shselasky		return NULL;
463321936Shselasky	}
464321936Shselasky
465321936Shselasky	if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) {
466321936Shselasky		errno = ENOTSUP;
467321936Shselasky		return NULL;
468321936Shselasky	}
469321936Shselasky
470321936Shselasky	if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
471321936Shselasky	    cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) {
472321936Shselasky		errno = ENOTSUP;
473321936Shselasky		return NULL;
474321936Shselasky	}
475321936Shselasky
476321936Shselasky	if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS)
477321936Shselasky		return NULL;
478321936Shselasky
479321936Shselasky	/* mlx4 devices don't support slid and sl in cqe when completion
480321936Shselasky	 * timestamp is enabled in the CQ
481321936Shselasky	*/
482321936Shselasky	if ((cq_attr->wc_flags & (IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL)) &&
483321936Shselasky	    (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)) {
484321936Shselasky		errno = ENOTSUP;
485321936Shselasky		return NULL;
486321936Shselasky	}
487321936Shselasky
488321936Shselasky	cq = malloc(sizeof *cq);
489321936Shselasky	if (!cq)
490321936Shselasky		return NULL;
491321936Shselasky
492321936Shselasky	cq->cons_index = 0;
493321936Shselasky
494321936Shselasky	if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
495321936Shselasky		goto err;
496321936Shselasky
497321936Shselasky	cq_attr->cqe = align_queue_size(cq_attr->cqe + 1);
498321936Shselasky
499321936Shselasky	if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cq_attr->cqe, mctx->cqe_size))
500321936Shselasky		goto err;
501321936Shselasky
502321936Shselasky	cq->cqe_size = mctx->cqe_size;
503321936Shselasky	cq->set_ci_db  = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
504321936Shselasky	if (!cq->set_ci_db)
505321936Shselasky		goto err_buf;
506321936Shselasky
507321936Shselasky	cq->arm_db     = cq->set_ci_db + 1;
508321936Shselasky	*cq->arm_db    = 0;
509321936Shselasky	cq->arm_sn     = 1;
510321936Shselasky	*cq->set_ci_db = 0;
511321936Shselasky	cq->flags = cq_alloc_flags;
512321936Shselasky
513321936Shselasky	if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
514321936Shselasky	    cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED)
515321936Shselasky		cq->flags |= MLX4_CQ_FLAGS_SINGLE_THREADED;
516321936Shselasky
517321936Shselasky	--cq_attr->cqe;
518321936Shselasky	if (cq_alloc_flags & MLX4_CQ_FLAGS_EXTENDED)
519321936Shselasky		ret = mlx4_cmd_create_cq_ex(context, cq_attr, cq);
520321936Shselasky	else
521321936Shselasky		ret = mlx4_cmd_create_cq(context, cq_attr, cq);
522321936Shselasky
523321936Shselasky	if (ret)
524321936Shselasky		goto err_db;
525321936Shselasky
526321936Shselasky
527321936Shselasky	if (cq_alloc_flags & MLX4_CQ_FLAGS_EXTENDED)
528321936Shselasky		mlx4_cq_fill_pfns(cq, cq_attr);
529321936Shselasky
530321936Shselasky	return &cq->ibv_cq;
531321936Shselasky
532321936Shselaskyerr_db:
533321936Shselasky	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_CQ, cq->set_ci_db);
534321936Shselasky
535321936Shselaskyerr_buf:
536321936Shselasky	mlx4_free_buf(&cq->buf);
537321936Shselasky
538321936Shselaskyerr:
539321936Shselasky	free(cq);
540321936Shselasky
541321936Shselasky	return NULL;
542321936Shselasky}
543321936Shselasky
544321936Shselaskystruct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
545321936Shselasky			      struct ibv_comp_channel *channel,
546321936Shselasky			      int comp_vector)
547321936Shselasky{
548321936Shselasky	struct ibv_cq_ex *cq;
549321936Shselasky	struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel,
550321936Shselasky					      .comp_vector = comp_vector,
551321936Shselasky					      .wc_flags = IBV_WC_STANDARD_FLAGS};
552321936Shselasky
553321936Shselasky	cq = create_cq(context, &cq_attr, 0);
554321936Shselasky	return cq ? ibv_cq_ex_to_cq(cq) : NULL;
555321936Shselasky}
556321936Shselasky
557321936Shselaskystruct ibv_cq_ex *mlx4_create_cq_ex(struct ibv_context *context,
558321936Shselasky				    struct ibv_cq_init_attr_ex *cq_attr)
559321936Shselasky{
560321936Shselasky	/*
561321936Shselasky	 * Make local copy since some attributes might be adjusted
562321936Shselasky	 * for internal use.
563321936Shselasky	 */
564321936Shselasky	struct ibv_cq_init_attr_ex cq_attr_c = {.cqe = cq_attr->cqe,
565321936Shselasky						.channel = cq_attr->channel,
566321936Shselasky						.comp_vector = cq_attr->comp_vector,
567321936Shselasky						.wc_flags = cq_attr->wc_flags,
568321936Shselasky						.comp_mask = cq_attr->comp_mask,
569321936Shselasky						.flags = cq_attr->flags};
570321936Shselasky
571321936Shselasky	return create_cq(context, &cq_attr_c, MLX4_CQ_FLAGS_EXTENDED);
572321936Shselasky}
573321936Shselasky
574321936Shselaskyint mlx4_resize_cq(struct ibv_cq *ibcq, int cqe)
575321936Shselasky{
576321936Shselasky	struct mlx4_cq *cq = to_mcq(ibcq);
577321936Shselasky	struct mlx4_resize_cq cmd;
578321936Shselasky	struct ibv_resize_cq_resp resp;
579321936Shselasky	struct mlx4_buf buf;
580321936Shselasky	int old_cqe, outst_cqe, ret;
581321936Shselasky
582321936Shselasky	/* Sanity check CQ size before proceeding */
583321936Shselasky	if (cqe > 0x3fffff)
584321936Shselasky		return EINVAL;
585321936Shselasky
586321936Shselasky	pthread_spin_lock(&cq->lock);
587321936Shselasky
588321936Shselasky	cqe = align_queue_size(cqe + 1);
589321936Shselasky	if (cqe == ibcq->cqe + 1) {
590321936Shselasky		ret = 0;
591321936Shselasky		goto out;
592321936Shselasky	}
593321936Shselasky
594321936Shselasky	/* Can't be smaller then the number of outstanding CQEs */
595321936Shselasky	outst_cqe = mlx4_get_outstanding_cqes(cq);
596321936Shselasky	if (cqe < outst_cqe + 1) {
597321936Shselasky		ret = EINVAL;
598321936Shselasky		goto out;
599321936Shselasky	}
600321936Shselasky
601321936Shselasky	ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe, cq->cqe_size);
602321936Shselasky	if (ret)
603321936Shselasky		goto out;
604321936Shselasky
605321936Shselasky	old_cqe = ibcq->cqe;
606321936Shselasky	cmd.buf_addr = (uintptr_t) buf.buf;
607321936Shselasky
608321936Shselasky	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
609321936Shselasky				&resp, sizeof resp);
610321936Shselasky	if (ret) {
611321936Shselasky		mlx4_free_buf(&buf);
612321936Shselasky		goto out;
613321936Shselasky	}
614321936Shselasky
615321936Shselasky	mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
616321936Shselasky
617321936Shselasky	mlx4_free_buf(&cq->buf);
618321936Shselasky	cq->buf = buf;
619321936Shselasky	mlx4_update_cons_index(cq);
620321936Shselasky
621321936Shselaskyout:
622321936Shselasky	pthread_spin_unlock(&cq->lock);
623321936Shselasky	return ret;
624321936Shselasky}
625321936Shselasky
626321936Shselaskyint mlx4_destroy_cq(struct ibv_cq *cq)
627321936Shselasky{
628321936Shselasky	int ret;
629321936Shselasky
630321936Shselasky	ret = ibv_cmd_destroy_cq(cq);
631321936Shselasky	if (ret)
632321936Shselasky		return ret;
633321936Shselasky
634321936Shselasky	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
635321936Shselasky	mlx4_free_buf(&to_mcq(cq)->buf);
636321936Shselasky	free(to_mcq(cq));
637321936Shselasky
638321936Shselasky	return 0;
639321936Shselasky}
640321936Shselasky
641321936Shselaskystruct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
642321936Shselasky				struct ibv_srq_init_attr *attr)
643321936Shselasky{
644321936Shselasky	struct mlx4_create_srq      cmd;
645321936Shselasky	struct mlx4_create_srq_resp resp;
646321936Shselasky	struct mlx4_srq		   *srq;
647321936Shselasky	int			    ret;
648321936Shselasky
649321936Shselasky	/* Sanity check SRQ size before proceeding */
650321936Shselasky	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
651321936Shselasky		return NULL;
652321936Shselasky
653321936Shselasky	srq = malloc(sizeof *srq);
654321936Shselasky	if (!srq)
655321936Shselasky		return NULL;
656321936Shselasky
657321936Shselasky	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
658321936Shselasky		goto err;
659321936Shselasky
660321936Shselasky	srq->max     = align_queue_size(attr->attr.max_wr + 1);
661321936Shselasky	srq->max_gs  = attr->attr.max_sge;
662321936Shselasky	srq->counter = 0;
663321936Shselasky	srq->ext_srq = 0;
664321936Shselasky
665321936Shselasky	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
666321936Shselasky		goto err;
667321936Shselasky
668321936Shselasky	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
669321936Shselasky	if (!srq->db)
670321936Shselasky		goto err_free;
671321936Shselasky
672321936Shselasky	*srq->db = 0;
673321936Shselasky
674321936Shselasky	cmd.buf_addr = (uintptr_t) srq->buf.buf;
675321936Shselasky	cmd.db_addr  = (uintptr_t) srq->db;
676321936Shselasky
677321936Shselasky	ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
678321936Shselasky				 &cmd.ibv_cmd, sizeof cmd,
679321936Shselasky				 &resp.ibv_resp, sizeof resp);
680321936Shselasky	if (ret)
681321936Shselasky		goto err_db;
682321936Shselasky
683321936Shselasky	return &srq->verbs_srq.srq;
684321936Shselasky
685321936Shselaskyerr_db:
686321936Shselasky	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
687321936Shselasky
688321936Shselaskyerr_free:
689321936Shselasky	free(srq->wrid);
690321936Shselasky	mlx4_free_buf(&srq->buf);
691321936Shselasky
692321936Shselaskyerr:
693321936Shselasky	free(srq);
694321936Shselasky
695321936Shselasky	return NULL;
696321936Shselasky}
697321936Shselasky
698321936Shselaskystruct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
699321936Shselasky				   struct ibv_srq_init_attr_ex *attr_ex)
700321936Shselasky{
701321936Shselasky	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
702321936Shselasky	    (attr_ex->srq_type == IBV_SRQT_BASIC))
703321936Shselasky		return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
704321936Shselasky	else if (attr_ex->srq_type == IBV_SRQT_XRC)
705321936Shselasky		return mlx4_create_xrc_srq(context, attr_ex);
706321936Shselasky
707321936Shselasky	return NULL;
708321936Shselasky}
709321936Shselasky
710321936Shselaskyint mlx4_modify_srq(struct ibv_srq *srq,
711321936Shselasky		     struct ibv_srq_attr *attr,
712321936Shselasky		     int attr_mask)
713321936Shselasky{
714321936Shselasky	struct ibv_modify_srq cmd;
715321936Shselasky
716321936Shselasky	return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
717321936Shselasky}
718321936Shselasky
719321936Shselaskyint mlx4_query_srq(struct ibv_srq *srq,
720321936Shselasky		    struct ibv_srq_attr *attr)
721321936Shselasky{
722321936Shselasky	struct ibv_query_srq cmd;
723321936Shselasky
724321936Shselasky	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
725321936Shselasky}
726321936Shselasky
727321936Shselaskyint mlx4_destroy_srq(struct ibv_srq *srq)
728321936Shselasky{
729321936Shselasky	int ret;
730321936Shselasky
731321936Shselasky	if (to_msrq(srq)->ext_srq)
732321936Shselasky		return mlx4_destroy_xrc_srq(srq);
733321936Shselasky
734321936Shselasky	ret = ibv_cmd_destroy_srq(srq);
735321936Shselasky	if (ret)
736321936Shselasky		return ret;
737321936Shselasky
738321936Shselasky	mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db);
739321936Shselasky	mlx4_free_buf(&to_msrq(srq)->buf);
740321936Shselasky	free(to_msrq(srq)->wrid);
741321936Shselasky	free(to_msrq(srq));
742321936Shselasky
743321936Shselasky	return 0;
744321936Shselasky}
745321936Shselasky
746321936Shselaskystatic int mlx4_cmd_create_qp_ex(struct ibv_context *context,
747321936Shselasky				 struct ibv_qp_init_attr_ex *attr,
748321936Shselasky				 struct mlx4_create_qp *cmd,
749321936Shselasky				 struct mlx4_qp *qp)
750321936Shselasky{
751321936Shselasky	struct mlx4_create_qp_ex cmd_ex;
752321936Shselasky	struct mlx4_create_qp_resp_ex resp;
753321936Shselasky	int ret;
754321936Shselasky
755321936Shselasky	memset(&cmd_ex, 0, sizeof(cmd_ex));
756321936Shselasky	memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle,
757321936Shselasky	       offsetof(typeof(cmd->ibv_cmd), is_srq) +
758321936Shselasky	       sizeof(cmd->ibv_cmd.is_srq) -
759321936Shselasky	       offsetof(typeof(cmd->ibv_cmd), user_handle));
760321936Shselasky
761321936Shselasky	memcpy(&cmd_ex.drv_ex, &cmd->buf_addr,
762321936Shselasky	       offsetof(typeof(*cmd), sq_no_prefetch) +
763321936Shselasky	       sizeof(cmd->sq_no_prefetch) - sizeof(cmd->ibv_cmd));
764321936Shselasky
765321936Shselasky	ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp,
766321936Shselasky				    sizeof(qp->verbs_qp), attr,
767321936Shselasky				    &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd),
768321936Shselasky				    sizeof(cmd_ex), &resp.ibv_resp,
769321936Shselasky				    sizeof(resp.ibv_resp), sizeof(resp));
770321936Shselasky	return ret;
771321936Shselasky}
772321936Shselasky
773321936Shselaskyenum {
774321936Shselasky	MLX4_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD |
775321936Shselasky					IBV_QP_INIT_ATTR_XRCD |
776321936Shselasky					IBV_QP_INIT_ATTR_CREATE_FLAGS),
777321936Shselasky};
778321936Shselasky
779321936Shselaskyenum {
780321936Shselasky	MLX4_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS),
781321936Shselasky};
782321936Shselasky
783321936Shselaskystruct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
784321936Shselasky				 struct ibv_qp_init_attr_ex *attr)
785321936Shselasky{
786321936Shselasky	struct mlx4_context *ctx = to_mctx(context);
787321936Shselasky	struct mlx4_create_qp     cmd;
788321936Shselasky	struct ibv_create_qp_resp resp;
789321936Shselasky	struct mlx4_qp		 *qp;
790321936Shselasky	int			  ret;
791321936Shselasky
792321936Shselasky	/* Sanity check QP size before proceeding */
793321936Shselasky	if (ctx->max_qp_wr) { /* mlx4_query_device succeeded */
794321936Shselasky		if (attr->cap.max_send_wr  > ctx->max_qp_wr ||
795321936Shselasky		    attr->cap.max_recv_wr  > ctx->max_qp_wr ||
796321936Shselasky		    attr->cap.max_send_sge > ctx->max_sge   ||
797321936Shselasky		    attr->cap.max_recv_sge > ctx->max_sge)
798321936Shselasky			return NULL;
799321936Shselasky	} else {
800321936Shselasky		if (attr->cap.max_send_wr  > 65536 ||
801321936Shselasky		    attr->cap.max_recv_wr  > 65536 ||
802321936Shselasky		    attr->cap.max_send_sge > 64    ||
803321936Shselasky		    attr->cap.max_recv_sge > 64)
804321936Shselasky			return NULL;
805321936Shselasky	}
806321936Shselasky	if (attr->cap.max_inline_data > 1024)
807321936Shselasky		return NULL;
808321936Shselasky
809321936Shselasky	if (attr->comp_mask & ~MLX4_CREATE_QP_SUP_COMP_MASK)
810321936Shselasky		return NULL;
811321936Shselasky
812321936Shselasky	qp = calloc(1, sizeof *qp);
813321936Shselasky	if (!qp)
814321936Shselasky		return NULL;
815321936Shselasky
816321936Shselasky	if (attr->qp_type == IBV_QPT_XRC_RECV) {
817321936Shselasky		attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
818321936Shselasky	} else {
819321936Shselasky		mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
820321936Shselasky		/*
821321936Shselasky		 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
822321936Shselasky		 * allow HW to prefetch.
823321936Shselasky		 */
824321936Shselasky		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
825321936Shselasky		qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
826321936Shselasky	}
827321936Shselasky
828321936Shselasky	if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
829321936Shselasky	    attr->qp_type == IBV_QPT_XRC_RECV) {
830321936Shselasky		attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
831321936Shselasky	} else {
832321936Shselasky		qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
833321936Shselasky		if (attr->cap.max_recv_sge < 1)
834321936Shselasky			attr->cap.max_recv_sge = 1;
835321936Shselasky		if (attr->cap.max_recv_wr < 1)
836321936Shselasky			attr->cap.max_recv_wr = 1;
837321936Shselasky	}
838321936Shselasky
839321936Shselasky	if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
840321936Shselasky		goto err;
841321936Shselasky
842321936Shselasky	mlx4_init_qp_indices(qp);
843321936Shselasky
844321936Shselasky	if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
845321936Shselasky	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
846321936Shselasky		goto err_free;
847321936Shselasky
848321936Shselasky	if (attr->cap.max_recv_sge) {
849321936Shselasky		qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
850321936Shselasky		if (!qp->db)
851321936Shselasky			goto err_free;
852321936Shselasky
853321936Shselasky		*qp->db = 0;
854321936Shselasky		cmd.db_addr = (uintptr_t) qp->db;
855321936Shselasky	} else {
856321936Shselasky		cmd.db_addr = 0;
857321936Shselasky	}
858321936Shselasky
859321936Shselasky	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
860321936Shselasky	cmd.log_sq_stride   = qp->sq.wqe_shift;
861321936Shselasky	for (cmd.log_sq_bb_count = 0;
862321936Shselasky	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
863321936Shselasky	     ++cmd.log_sq_bb_count)
864321936Shselasky		; /* nothing */
865321936Shselasky	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
866321936Shselasky	memset(cmd.reserved, 0, sizeof cmd.reserved);
867321936Shselasky	pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
868321936Shselasky
869321936Shselasky	if (attr->comp_mask & MLX4_CREATE_QP_EX2_COMP_MASK)
870321936Shselasky		ret = mlx4_cmd_create_qp_ex(context, attr, &cmd, qp);
871321936Shselasky	else
872321936Shselasky		ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp,
873321936Shselasky					   sizeof(qp->verbs_qp), attr,
874321936Shselasky					   &cmd.ibv_cmd, sizeof(cmd), &resp,
875321936Shselasky					   sizeof(resp));
876321936Shselasky	if (ret)
877321936Shselasky		goto err_rq_db;
878321936Shselasky
879321936Shselasky	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
880321936Shselasky		ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
881321936Shselasky		if (ret)
882321936Shselasky			goto err_destroy;
883321936Shselasky	}
884321936Shselasky	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
885321936Shselasky
886321936Shselasky	qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
887321936Shselasky	qp->rq.max_gs  = attr->cap.max_recv_sge;
888321936Shselasky	if (attr->qp_type != IBV_QPT_XRC_RECV)
889321936Shselasky		mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
890321936Shselasky
891321936Shselasky	qp->doorbell_qpn    = htobe32(qp->verbs_qp.qp.qp_num << 8);
892321936Shselasky	if (attr->sq_sig_all)
893321936Shselasky		qp->sq_signal_bits = htobe32(MLX4_WQE_CTRL_CQ_UPDATE);
894321936Shselasky	else
895321936Shselasky		qp->sq_signal_bits = 0;
896321936Shselasky
897321936Shselasky	return &qp->verbs_qp.qp;
898321936Shselasky
899321936Shselaskyerr_destroy:
900321936Shselasky	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
901321936Shselasky
902321936Shselaskyerr_rq_db:
903321936Shselasky	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
904321936Shselasky	if (attr->cap.max_recv_sge)
905321936Shselasky		mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
906321936Shselasky
907321936Shselaskyerr_free:
908321936Shselasky	free(qp->sq.wrid);
909321936Shselasky	if (qp->rq.wqe_cnt)
910321936Shselasky		free(qp->rq.wrid);
911321936Shselasky	mlx4_free_buf(&qp->buf);
912321936Shselasky
913321936Shselaskyerr:
914321936Shselasky	free(qp);
915321936Shselasky
916321936Shselasky	return NULL;
917321936Shselasky}
918321936Shselasky
919321936Shselaskystruct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
920321936Shselasky{
921321936Shselasky	struct ibv_qp_init_attr_ex attr_ex;
922321936Shselasky	struct ibv_qp *qp;
923321936Shselasky
924321936Shselasky	memcpy(&attr_ex, attr, sizeof *attr);
925321936Shselasky	attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
926321936Shselasky	attr_ex.pd = pd;
927321936Shselasky	qp = mlx4_create_qp_ex(pd->context, &attr_ex);
928321936Shselasky	if (qp)
929321936Shselasky		memcpy(attr, &attr_ex, sizeof *attr);
930321936Shselasky	return qp;
931321936Shselasky}
932321936Shselasky
933321936Shselaskystruct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
934321936Shselasky{
935321936Shselasky	struct ibv_open_qp cmd;
936321936Shselasky	struct ibv_create_qp_resp resp;
937321936Shselasky	struct mlx4_qp *qp;
938321936Shselasky	int ret;
939321936Shselasky
940321936Shselasky	qp = calloc(1, sizeof *qp);
941321936Shselasky	if (!qp)
942321936Shselasky		return NULL;
943321936Shselasky
944321936Shselasky	ret = ibv_cmd_open_qp(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr,
945321936Shselasky			      &cmd, sizeof cmd, &resp, sizeof resp);
946321936Shselasky	if (ret)
947321936Shselasky		goto err;
948321936Shselasky
949321936Shselasky	return &qp->verbs_qp.qp;
950321936Shselasky
951321936Shselaskyerr:
952321936Shselasky	free(qp);
953321936Shselasky	return NULL;
954321936Shselasky}
955321936Shselasky
956321936Shselaskyint mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
957321936Shselasky		   int attr_mask,
958321936Shselasky		   struct ibv_qp_init_attr *init_attr)
959321936Shselasky{
960321936Shselasky	struct ibv_query_qp cmd;
961321936Shselasky	struct mlx4_qp *qp = to_mqp(ibqp);
962321936Shselasky	int ret;
963321936Shselasky
964321936Shselasky	ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof cmd);
965321936Shselasky	if (ret)
966321936Shselasky		return ret;
967321936Shselasky
968321936Shselasky	init_attr->cap.max_send_wr     = qp->sq.max_post;
969321936Shselasky	init_attr->cap.max_send_sge    = qp->sq.max_gs;
970321936Shselasky	init_attr->cap.max_inline_data = qp->max_inline_data;
971321936Shselasky
972321936Shselasky	attr->cap = init_attr->cap;
973321936Shselasky
974321936Shselasky	return 0;
975321936Shselasky}
976321936Shselasky
977321936Shselaskyint mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
978321936Shselasky		    int attr_mask)
979321936Shselasky{
980321936Shselasky	struct ibv_modify_qp cmd = {};
981321936Shselasky	struct ibv_port_attr port_attr;
982321936Shselasky	struct mlx4_qp *mqp = to_mqp(qp);
983321936Shselasky	struct ibv_device_attr device_attr;
984321936Shselasky	int ret;
985321936Shselasky
986321936Shselasky	memset(&device_attr, 0, sizeof(device_attr));
987321936Shselasky	if (attr_mask & IBV_QP_PORT) {
988321936Shselasky		ret = ibv_query_port(qp->context, attr->port_num,
989321936Shselasky				     &port_attr);
990321936Shselasky		if (ret)
991321936Shselasky			return ret;
992321936Shselasky		mqp->link_layer = port_attr.link_layer;
993321936Shselasky
994321936Shselasky		ret = ibv_query_device(qp->context, &device_attr);
995321936Shselasky		if (ret)
996321936Shselasky			return ret;
997321936Shselasky
998321936Shselasky		switch(qp->qp_type) {
999321936Shselasky		case IBV_QPT_UD:
1000321936Shselasky			if ((mqp->link_layer == IBV_LINK_LAYER_INFINIBAND) &&
1001321936Shselasky			    (device_attr.device_cap_flags & IBV_DEVICE_UD_IP_CSUM))
1002321936Shselasky				mqp->qp_cap_cache |= MLX4_CSUM_SUPPORT_UD_OVER_IB |
1003321936Shselasky						MLX4_RX_CSUM_VALID;
1004321936Shselasky			break;
1005321936Shselasky		case IBV_QPT_RAW_PACKET:
1006321936Shselasky			if ((mqp->link_layer == IBV_LINK_LAYER_ETHERNET) &&
1007321936Shselasky			    (device_attr.device_cap_flags & IBV_DEVICE_RAW_IP_CSUM))
1008321936Shselasky				mqp->qp_cap_cache |= MLX4_CSUM_SUPPORT_RAW_OVER_ETH |
1009321936Shselasky						MLX4_RX_CSUM_VALID;
1010321936Shselasky			break;
1011321936Shselasky		default:
1012321936Shselasky			break;
1013321936Shselasky		}
1014321936Shselasky
1015321936Shselasky	}
1016321936Shselasky
1017321936Shselasky	if (qp->state == IBV_QPS_RESET &&
1018321936Shselasky	    attr_mask & IBV_QP_STATE   &&
1019321936Shselasky	    attr->qp_state == IBV_QPS_INIT) {
1020321936Shselasky		mlx4_qp_init_sq_ownership(to_mqp(qp));
1021321936Shselasky	}
1022321936Shselasky
1023321936Shselasky	ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd);
1024321936Shselasky
1025321936Shselasky	if (!ret		       &&
1026321936Shselasky	    (attr_mask & IBV_QP_STATE) &&
1027321936Shselasky	    attr->qp_state == IBV_QPS_RESET) {
1028321936Shselasky		if (qp->recv_cq)
1029321936Shselasky			mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
1030321936Shselasky				      qp->srq ? to_msrq(qp->srq) : NULL);
1031321936Shselasky		if (qp->send_cq && qp->send_cq != qp->recv_cq)
1032321936Shselasky			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
1033321936Shselasky
1034321936Shselasky		mlx4_init_qp_indices(to_mqp(qp));
1035321936Shselasky		if (to_mqp(qp)->rq.wqe_cnt)
1036321936Shselasky			*to_mqp(qp)->db = 0;
1037321936Shselasky	}
1038321936Shselasky
1039321936Shselasky	return ret;
1040321936Shselasky}
1041321936Shselasky
1042321936Shselaskystatic void mlx4_lock_cqs(struct ibv_qp *qp)
1043321936Shselasky{
1044321936Shselasky	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
1045321936Shselasky	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
1046321936Shselasky
1047321936Shselasky	if (!qp->send_cq || !qp->recv_cq) {
1048321936Shselasky		if (qp->send_cq)
1049321936Shselasky			pthread_spin_lock(&send_cq->lock);
1050321936Shselasky		else if (qp->recv_cq)
1051321936Shselasky			pthread_spin_lock(&recv_cq->lock);
1052321936Shselasky	} else if (send_cq == recv_cq) {
1053321936Shselasky		pthread_spin_lock(&send_cq->lock);
1054321936Shselasky	} else if (send_cq->cqn < recv_cq->cqn) {
1055321936Shselasky		pthread_spin_lock(&send_cq->lock);
1056321936Shselasky		pthread_spin_lock(&recv_cq->lock);
1057321936Shselasky	} else {
1058321936Shselasky		pthread_spin_lock(&recv_cq->lock);
1059321936Shselasky		pthread_spin_lock(&send_cq->lock);
1060321936Shselasky	}
1061321936Shselasky}
1062321936Shselasky
1063321936Shselaskystatic void mlx4_unlock_cqs(struct ibv_qp *qp)
1064321936Shselasky{
1065321936Shselasky	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
1066321936Shselasky	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
1067321936Shselasky
1068321936Shselasky
1069321936Shselasky	if (!qp->send_cq || !qp->recv_cq) {
1070321936Shselasky		if (qp->send_cq)
1071321936Shselasky			pthread_spin_unlock(&send_cq->lock);
1072321936Shselasky		else if (qp->recv_cq)
1073321936Shselasky			pthread_spin_unlock(&recv_cq->lock);
1074321936Shselasky	} else if (send_cq == recv_cq) {
1075321936Shselasky		pthread_spin_unlock(&send_cq->lock);
1076321936Shselasky	} else if (send_cq->cqn < recv_cq->cqn) {
1077321936Shselasky		pthread_spin_unlock(&recv_cq->lock);
1078321936Shselasky		pthread_spin_unlock(&send_cq->lock);
1079321936Shselasky	} else {
1080321936Shselasky		pthread_spin_unlock(&send_cq->lock);
1081321936Shselasky		pthread_spin_unlock(&recv_cq->lock);
1082321936Shselasky	}
1083321936Shselasky}
1084321936Shselasky
1085321936Shselaskyint mlx4_destroy_qp(struct ibv_qp *ibqp)
1086321936Shselasky{
1087321936Shselasky	struct mlx4_qp *qp = to_mqp(ibqp);
1088321936Shselasky	int ret;
1089321936Shselasky
1090321936Shselasky	pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex);
1091321936Shselasky	ret = ibv_cmd_destroy_qp(ibqp);
1092321936Shselasky	if (ret) {
1093321936Shselasky		pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
1094321936Shselasky		return ret;
1095321936Shselasky	}
1096321936Shselasky
1097321936Shselasky	mlx4_lock_cqs(ibqp);
1098321936Shselasky
1099321936Shselasky	if (ibqp->recv_cq)
1100321936Shselasky		__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
1101321936Shselasky				ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1102321936Shselasky	if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
1103321936Shselasky		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
1104321936Shselasky
1105321936Shselasky	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
1106321936Shselasky		mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
1107321936Shselasky
1108321936Shselasky	mlx4_unlock_cqs(ibqp);
1109321936Shselasky	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
1110321936Shselasky
1111321936Shselasky	if (qp->rq.wqe_cnt) {
1112321936Shselasky		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
1113321936Shselasky		free(qp->rq.wrid);
1114321936Shselasky	}
1115321936Shselasky	if (qp->sq.wqe_cnt)
1116321936Shselasky		free(qp->sq.wrid);
1117321936Shselasky	mlx4_free_buf(&qp->buf);
1118321936Shselasky	free(qp);
1119321936Shselasky
1120321936Shselasky	return 0;
1121321936Shselasky}
1122321936Shselasky
1123321936Shselaskystatic int link_local_gid(const union ibv_gid *gid)
1124321936Shselasky{
1125321936Shselasky	uint32_t *tmp = (uint32_t *)gid->raw;
1126321936Shselasky	uint32_t hi = tmp[0];
1127321936Shselasky	uint32_t lo = tmp[1];
1128321936Shselasky
1129321936Shselasky	if (hi == htobe32(0xfe800000) && lo == 0)
1130321936Shselasky		return 1;
1131321936Shselasky
1132321936Shselasky	return 0;
1133321936Shselasky}
1134321936Shselasky
1135321936Shselaskystatic int is_multicast_gid(const union ibv_gid *gid)
1136321936Shselasky{
1137321936Shselasky	return gid->raw[0] == 0xff;
1138321936Shselasky}
1139321936Shselasky
1140321936Shselaskystatic uint16_t get_vlan_id(union ibv_gid *gid)
1141321936Shselasky{
1142321936Shselasky	uint16_t vid;
1143321936Shselasky	vid = gid->raw[11] << 8 | gid->raw[12];
1144321936Shselasky	return vid < 0x1000 ? vid : 0xffff;
1145321936Shselasky}
1146321936Shselasky
1147321936Shselaskystatic int mlx4_resolve_grh_to_l2(struct ibv_pd *pd, struct mlx4_ah *ah,
1148321936Shselasky				  struct ibv_ah_attr *attr)
1149321936Shselasky{
1150321936Shselasky	int err, i;
1151321936Shselasky	uint16_t vid;
1152321936Shselasky	union ibv_gid sgid;
1153321936Shselasky
1154321936Shselasky	if (link_local_gid(&attr->grh.dgid)) {
1155321936Shselasky		memcpy(ah->mac, &attr->grh.dgid.raw[8], 3);
1156321936Shselasky		memcpy(ah->mac + 3, &attr->grh.dgid.raw[13], 3);
1157321936Shselasky		ah->mac[0] ^= 2;
1158321936Shselasky
1159321936Shselasky		vid = get_vlan_id(&attr->grh.dgid);
1160321936Shselasky	} else if (is_multicast_gid(&attr->grh.dgid)) {
1161321936Shselasky		ah->mac[0] = 0x33;
1162321936Shselasky		ah->mac[1] = 0x33;
1163321936Shselasky		for (i = 2; i < 6; ++i)
1164321936Shselasky			ah->mac[i] = attr->grh.dgid.raw[i + 10];
1165321936Shselasky
1166321936Shselasky		err = ibv_query_gid(pd->context, attr->port_num,
1167321936Shselasky				    attr->grh.sgid_index, &sgid);
1168321936Shselasky		if (err)
1169321936Shselasky			return err;
1170321936Shselasky
1171321936Shselasky		ah->av.dlid = htobe16(0xc000);
1172321936Shselasky		ah->av.port_pd |= htobe32(1 << 31);
1173321936Shselasky
1174321936Shselasky		vid = get_vlan_id(&sgid);
1175321936Shselasky	} else
1176321936Shselasky		return 1;
1177321936Shselasky
1178321936Shselasky	if (vid != 0xffff) {
1179321936Shselasky		ah->av.port_pd |= htobe32(1 << 29);
1180321936Shselasky		ah->vlan = vid | ((attr->sl & 7) << 13);
1181321936Shselasky	}
1182321936Shselasky
1183321936Shselasky	return 0;
1184321936Shselasky}
1185321936Shselasky
1186321936Shselaskystruct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
1187321936Shselasky{
1188321936Shselasky	struct mlx4_ah *ah;
1189321936Shselasky	struct ibv_port_attr port_attr;
1190321936Shselasky
1191321936Shselasky	if (query_port_cache(pd->context, attr->port_num, &port_attr))
1192321936Shselasky		return NULL;
1193321936Shselasky
1194321936Shselasky	ah = malloc(sizeof *ah);
1195321936Shselasky	if (!ah)
1196321936Shselasky		return NULL;
1197321936Shselasky
1198321936Shselasky	memset(&ah->av, 0, sizeof ah->av);
1199321936Shselasky
1200321936Shselasky	ah->av.port_pd   = htobe32(to_mpd(pd)->pdn | (attr->port_num << 24));
1201321936Shselasky
1202321936Shselasky	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
1203321936Shselasky		ah->av.g_slid = attr->src_path_bits;
1204321936Shselasky		ah->av.dlid   = htobe16(attr->dlid);
1205321936Shselasky		ah->av.sl_tclass_flowlabel = htobe32(attr->sl << 28);
1206321936Shselasky	} else
1207321936Shselasky		ah->av.sl_tclass_flowlabel = htobe32(attr->sl << 29);
1208321936Shselasky
1209321936Shselasky	if (attr->static_rate) {
1210321936Shselasky		ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET;
1211321936Shselasky		/* XXX check rate cap? */
1212321936Shselasky	}
1213321936Shselasky	if (attr->is_global) {
1214321936Shselasky		ah->av.g_slid   |= 0x80;
1215321936Shselasky		ah->av.gid_index = attr->grh.sgid_index;
1216321936Shselasky		ah->av.hop_limit = attr->grh.hop_limit;
1217321936Shselasky		ah->av.sl_tclass_flowlabel |=
1218321936Shselasky			htobe32((attr->grh.traffic_class << 20) |
1219321936Shselasky				    attr->grh.flow_label);
1220321936Shselasky		memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
1221321936Shselasky	}
1222321936Shselasky
1223321936Shselasky	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
1224321936Shselasky		if (port_attr.port_cap_flags & IBV_PORT_IP_BASED_GIDS) {
1225321936Shselasky			uint16_t vid;
1226321936Shselasky
1227321936Shselasky			if (ibv_resolve_eth_l2_from_gid(pd->context, attr,
1228321936Shselasky							ah->mac, &vid)) {
1229321936Shselasky				free(ah);
1230321936Shselasky				return NULL;
1231321936Shselasky			}
1232321936Shselasky
1233321936Shselasky			if (vid <= 0xfff) {
1234321936Shselasky				ah->av.port_pd |= htobe32(1 << 29);
1235321936Shselasky				ah->vlan = vid |
1236321936Shselasky					((attr->sl & 7) << 13);
1237321936Shselasky			}
1238321936Shselasky
1239321936Shselasky		} else {
1240321936Shselasky			if (mlx4_resolve_grh_to_l2(pd, ah, attr)) {
1241321936Shselasky				free(ah);
1242321936Shselasky				return NULL;
1243321936Shselasky			}
1244321936Shselasky		}
1245321936Shselasky	}
1246321936Shselasky
1247321936Shselasky	return &ah->ibv_ah;
1248321936Shselasky}
1249321936Shselasky
1250321936Shselaskyint mlx4_destroy_ah(struct ibv_ah *ah)
1251321936Shselasky{
1252321936Shselasky	free(to_mah(ah));
1253321936Shselasky
1254321936Shselasky	return 0;
1255321936Shselasky}
1256