1/*-
2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3 *
4 * Copyright (C) 2019 - 2023 Intel Corporation
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenFabrics.org BSD license below:
11 *
12 *   Redistribution and use in source and binary forms, with or
13 *   without modification, are permitted provided that the following
14 *   conditions are met:
15 *
16 *    - Redistributions of source code must retain the above
17 *	copyright notice, this list of conditions and the following
18 *	disclaimer.
19 *
20 *    - Redistributions in binary form must reproduce the above
21 *	copyright notice, this list of conditions and the following
22 *	disclaimer in the documentation and/or other materials
23 *	provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <config.h>
36#include <stdlib.h>
37#include <stdio.h>
38#include <string.h>
39#include <unistd.h>
40#include <signal.h>
41#include <errno.h>
42#include <sys/param.h>
43#include <sys/mman.h>
44#include <netinet/in.h>
45#include <sys/stat.h>
46#include <fcntl.h>
47#include <stdbool.h>
48#include <infiniband/opcode.h>
49
50#include "irdma_umain.h"
51#include "abi.h"
52
53static inline void
54print_fw_ver(uint64_t fw_ver, char *str, size_t len)
55{
56	uint16_t major, minor;
57
58	major = fw_ver >> 32 & 0xffff;
59	minor = fw_ver & 0xffff;
60
61	snprintf(str, len, "%d.%d", major, minor);
62}
63
64/**
65 * irdma_uquery_device_ex - query device attributes including extended properties
66 * @context: user context for the device
67 * @input: extensible input struct for ibv_query_device_ex verb
68 * @attr: extended device attribute struct
69 * @attr_size: size of extended device attribute struct
70 **/
71int
72irdma_uquery_device_ex(struct ibv_context *context,
73		       const struct ibv_query_device_ex_input *input,
74		       struct ibv_device_attr_ex *attr, size_t attr_size)
75{
76	struct irdma_query_device_ex cmd = {};
77	struct irdma_query_device_ex_resp resp = {};
78	uint64_t fw_ver;
79	int ret;
80
81	ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver,
82				      &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd),
83				      &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp));
84	if (ret)
85		return ret;
86
87	print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver));
88
89	return 0;
90}
91
92/**
93 * irdma_uquery_device - call driver to query device for max resources
94 * @context: user context for the device
95 * @attr: where to save all the mx resources from the driver
96 **/
97int
98irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr)
99{
100	struct ibv_query_device cmd;
101	uint64_t fw_ver;
102	int ret;
103
104	ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd));
105	if (ret)
106		return ret;
107
108	print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver));
109
110	return 0;
111}
112
113/**
114 * irdma_uquery_port - get port attributes (msg size, lnk, mtu...)
115 * @context: user context of the device
116 * @port: port for the attributes
117 * @attr: to return port attributes
118 **/
119int
120irdma_uquery_port(struct ibv_context *context, uint8_t port,
121		  struct ibv_port_attr *attr)
122{
123	struct ibv_query_port cmd;
124
125	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
126}
127
128/**
129 * irdma_ualloc_pd - allocates protection domain and return pd ptr
130 * @context: user context of the device
131 **/
132struct ibv_pd *
133irdma_ualloc_pd(struct ibv_context *context)
134{
135	struct ibv_alloc_pd cmd;
136	struct irdma_ualloc_pd_resp resp = {};
137	struct irdma_upd *iwupd;
138	int err;
139
140	iwupd = calloc(1, sizeof(*iwupd));
141	if (!iwupd)
142		return NULL;
143
144	err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd),
145			       &resp.ibv_resp, sizeof(resp));
146	if (err)
147		goto err_free;
148
149	iwupd->pd_id = resp.pd_id;
150
151	return &iwupd->ibv_pd;
152
153err_free:
154	free(iwupd);
155
156	errno = err;
157	return NULL;
158}
159
160/**
161 * irdma_ufree_pd - free pd resources
162 * @pd: pd to free resources
163 */
164int
165irdma_ufree_pd(struct ibv_pd *pd)
166{
167	struct irdma_upd *iwupd;
168	int ret;
169
170	iwupd = container_of(pd, struct irdma_upd, ibv_pd);
171	ret = ibv_cmd_dealloc_pd(pd);
172	if (ret)
173		return ret;
174
175	free(iwupd);
176
177	return 0;
178}
179
180/**
181 * irdma_ureg_mr - register user memory region
182 * @pd: pd for the mr
183 * @addr: user address of the memory region
184 * @length: length of the memory
185 * @hca_va: hca_va
186 * @access: access allowed on this mr
187 */
188struct ibv_mr *
189irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
190	      int access)
191{
192	struct verbs_mr *vmr;
193	struct irdma_ureg_mr cmd = {};
194	struct ibv_reg_mr_resp resp;
195	int err;
196
197	vmr = malloc(sizeof(*vmr));
198	if (!vmr)
199		return NULL;
200
201	cmd.reg_type = IRDMA_MEMREG_TYPE_MEM;
202	err = ibv_cmd_reg_mr(pd, addr, length,
203			     (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd,
204			     sizeof(cmd), &resp, sizeof(resp));
205	if (err) {
206		free(vmr);
207		errno = err;
208		return NULL;
209	}
210
211	return &vmr->ibv_mr;
212}
213
214/*
215 * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the
216 * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory
217 * @access: access allowed on this mr
218 */
219int
220irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
221		void *addr, size_t length, int access)
222{
223	struct irdma_urereg_mr cmd = {};
224	struct ibv_rereg_mr_resp resp;
225
226	cmd.reg_type = IRDMA_MEMREG_TYPE_MEM;
227	return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr,
228				access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp,
229				sizeof(resp));
230}
231
232/**
233 * irdma_udereg_mr - re-register memory region
234 * @mr: mr that was allocated
235 */
236int
237irdma_udereg_mr(struct ibv_mr *mr)
238{
239	struct verbs_mr *vmr;
240	int ret;
241
242	vmr = container_of(mr, struct verbs_mr, ibv_mr);
243
244	ret = ibv_cmd_dereg_mr(mr);
245	if (ret)
246		return ret;
247
248	return 0;
249}
250
251/**
252 * irdma_ualloc_mw - allocate memory window
253 * @pd: protection domain
254 * @type: memory window type
255 */
256struct ibv_mw *
257irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
258{
259	struct ibv_mw *mw;
260	struct ibv_alloc_mw cmd;
261	struct ibv_alloc_mw_resp resp;
262	int err;
263
264	mw = calloc(1, sizeof(*mw));
265	if (!mw)
266		return NULL;
267
268	err = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp,
269			       sizeof(resp));
270	if (err) {
271		printf("%s: Failed to alloc memory window\n",
272		       __func__);
273		free(mw);
274		errno = err;
275		return NULL;
276	}
277
278	return mw;
279}
280
281/**
282 * irdma_ubind_mw - bind a memory window
283 * @qp: qp to post WR
284 * @mw: memory window to bind
285 * @mw_bind: bind info
286 */
287int
288irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
289	       struct ibv_mw_bind *mw_bind)
290{
291	struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
292	struct verbs_mr *vmr;
293
294	struct ibv_send_wr wr = {};
295	struct ibv_send_wr *bad_wr;
296	int err;
297
298	if (!bind_info->mr && (bind_info->addr || bind_info->length))
299		return EINVAL;
300
301	if (bind_info->mr) {
302		vmr = verbs_get_mr(bind_info->mr);
303		if (vmr->mr_type != IBV_MR_TYPE_MR)
304			return ENOTSUP;
305
306		if (vmr->access & IBV_ACCESS_ZERO_BASED)
307			return EINVAL;
308
309		if (mw->pd != bind_info->mr->pd)
310			return EPERM;
311	}
312
313	wr.opcode = IBV_WR_BIND_MW;
314	wr.bind_mw.bind_info = mw_bind->bind_info;
315	wr.bind_mw.mw = mw;
316	wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey);
317
318	wr.wr_id = mw_bind->wr_id;
319	wr.send_flags = mw_bind->send_flags;
320
321	err = irdma_upost_send(qp, &wr, &bad_wr);
322	if (!err)
323		mw->rkey = wr.bind_mw.rkey;
324
325	return err;
326}
327
328/**
329 * irdma_udealloc_mw - deallocate memory window
330 * @mw: memory window to dealloc
331 */
332int
333irdma_udealloc_mw(struct ibv_mw *mw)
334{
335	int ret;
336	struct ibv_dealloc_mw cmd;
337
338	ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd));
339	if (ret)
340		return ret;
341	free(mw);
342
343	return 0;
344}
345
346static void *
347irdma_alloc_hw_buf(size_t size)
348{
349	void *buf;
350
351	buf = memalign(IRDMA_HW_PAGE_SIZE, size);
352
353	if (!buf)
354		return NULL;
355	if (ibv_dontfork_range(buf, size)) {
356		free(buf);
357		return NULL;
358	}
359
360	return buf;
361}
362
363static void
364irdma_free_hw_buf(void *buf, size_t size)
365{
366	ibv_dofork_range(buf, size);
367	free(buf);
368}
369
370/**
371 * get_cq_size - returns actual cqe needed by HW
372 * @ncqe: minimum cqes requested by application
373 * @hw_rev: HW generation
374 * @cqe_64byte_ena: enable 64byte cqe
375 */
376static inline int
377get_cq_size(int ncqe, u8 hw_rev)
378{
379	ncqe++;
380
381	/* Completions with immediate require 1 extra entry */
382	if (hw_rev > IRDMA_GEN_1)
383		ncqe *= 2;
384
385	if (ncqe < IRDMA_U_MINCQ_SIZE)
386		ncqe = IRDMA_U_MINCQ_SIZE;
387
388	return ncqe;
389}
390
391static inline size_t get_cq_total_bytes(u32 cq_size) {
392	return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE);
393}
394
395/**
396 * ucreate_cq - irdma util function to create a CQ
397 * @context: ibv context
398 * @attr_ex: CQ init attributes
399 * @ext_cq: flag to create an extendable or normal CQ
400 */
401static struct ibv_cq_ex *
402ucreate_cq(struct ibv_context *context,
403	   struct ibv_cq_init_attr_ex *attr_ex,
404	   bool ext_cq)
405{
406	struct irdma_cq_uk_init_info info = {};
407	struct irdma_ureg_mr reg_mr_cmd = {};
408	struct irdma_ucreate_cq_ex cmd = {};
409	struct irdma_ucreate_cq_ex_resp resp = {};
410	struct ibv_reg_mr_resp reg_mr_resp = {};
411	struct irdma_ureg_mr reg_mr_shadow_cmd = {};
412	struct ibv_reg_mr_resp reg_mr_shadow_resp = {};
413	struct irdma_uk_attrs *uk_attrs;
414	struct irdma_uvcontext *iwvctx;
415	struct irdma_ucq *iwucq;
416	size_t total_size;
417	u32 cq_pages;
418	int ret, ncqe;
419	u8 hw_rev;
420
421	iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx);
422	uk_attrs = &iwvctx->uk_attrs;
423	hw_rev = uk_attrs->hw_rev;
424
425	if (ext_cq) {
426		u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX;
427
428		if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) {
429			errno = EOPNOTSUPP;
430			return NULL;
431		}
432	}
433
434	if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) {
435		errno = EINVAL;
436		return NULL;
437	}
438
439	/* save the cqe requested by application */
440	ncqe = attr_ex->cqe;
441
442	iwucq = calloc(1, sizeof(*iwucq));
443	if (!iwucq)
444		return NULL;
445
446	ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE);
447	if (ret) {
448		free(iwucq);
449		errno = ret;
450		return NULL;
451	}
452
453	info.cq_size = get_cq_size(attr_ex->cqe, hw_rev);
454	total_size = get_cq_total_bytes(info.cq_size);
455	iwucq->comp_vector = attr_ex->comp_vector;
456	LIST_INIT(&iwucq->resize_list);
457	cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT;
458
459	if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE))
460		total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE;
461
462	iwucq->buf_size = total_size;
463	info.cq_base = irdma_alloc_hw_buf(total_size);
464	if (!info.cq_base) {
465		ret = ENOMEM;
466		goto err_cq_base;
467	}
468
469	memset(info.cq_base, 0, total_size);
470	reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ;
471	reg_mr_cmd.cq_pages = cq_pages;
472
473	ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base,
474			     total_size, (uintptr_t)info.cq_base,
475			     IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr,
476			     &reg_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd),
477			     &reg_mr_resp, sizeof(reg_mr_resp));
478	if (ret)
479		goto err_dereg_mr;
480
481	iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
482
483	if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) {
484		info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE);
485		if (!info.shadow_area) {
486			ret = ENOMEM;
487			goto err_alloc_shadow;
488		}
489
490		memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE);
491		reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ;
492		reg_mr_shadow_cmd.cq_pages = 1;
493
494		ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area,
495				     IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area,
496				     IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr,
497				     &reg_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd),
498				     &reg_mr_shadow_resp, sizeof(reg_mr_shadow_resp));
499		if (ret) {
500			irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE);
501			goto err_alloc_shadow;
502		}
503
504		iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
505
506	} else {
507		info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT));
508	}
509
510	attr_ex->cqe = info.cq_size;
511	cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base);
512	cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area);
513
514	ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex,
515				   &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp,
516				   sizeof(resp.ibv_resp), sizeof(resp));
517	attr_ex->cqe = ncqe;
518	if (ret)
519		goto err_create_cq;
520
521	if (ext_cq)
522		irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex);
523	info.cq_id = resp.cq_id;
524	/* Do not report the CQE's reserved for immediate and burned by HW */
525	iwucq->verbs_cq.cq.cqe = ncqe;
526	info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET);
527	irdma_uk_cq_init(&iwucq->cq, &info);
528	return &iwucq->verbs_cq.cq_ex;
529
530err_create_cq:
531	if (iwucq->vmr_shadow_area.ibv_mr.handle) {
532		ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr);
533		irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE);
534	}
535err_alloc_shadow:
536	ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr);
537err_dereg_mr:
538	irdma_free_hw_buf(info.cq_base, total_size);
539err_cq_base:
540	printf("%s: failed to initialize CQ\n", __func__);
541	pthread_spin_destroy(&iwucq->lock);
542
543	free(iwucq);
544
545	errno = ret;
546	return NULL;
547}
548
549struct ibv_cq *
550irdma_ucreate_cq(struct ibv_context *context, int cqe,
551		 struct ibv_comp_channel *channel,
552		 int comp_vector)
553{
554	struct ibv_cq_init_attr_ex attr_ex = {
555		.cqe = cqe,
556		.channel = channel,
557		.comp_vector = comp_vector,
558	};
559	struct ibv_cq_ex *ibvcq_ex;
560
561	ibvcq_ex = ucreate_cq(context, &attr_ex, false);
562
563	return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL;
564}
565
566struct ibv_cq_ex *
567irdma_ucreate_cq_ex(struct ibv_context *context,
568		    struct ibv_cq_init_attr_ex *attr_ex)
569{
570	return ucreate_cq(context, attr_ex, true);
571}
572
573/**
574 * irdma_free_cq_buf - free memory for cq buffer
575 * @cq_buf: cq buf to free
576 */
577static void
578irdma_free_cq_buf(struct irdma_cq_buf *cq_buf)
579{
580	ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr);
581	irdma_free_hw_buf(cq_buf->cq.cq_base, get_cq_total_bytes(cq_buf->cq.cq_size));
582	free(cq_buf);
583}
584
585/**
586 * irdma_process_resize_list - process the cq list to remove buffers
587 * @iwucq: cq which owns the list
588 * @lcqe_buf: cq buf where the last cqe is found
589 */
590static int
591irdma_process_resize_list(struct irdma_ucq *iwucq,
592			  struct irdma_cq_buf *lcqe_buf)
593{
594	struct irdma_cq_buf *cq_buf, *next;
595	int cq_cnt = 0;
596
597	LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) {
598		if (cq_buf == lcqe_buf)
599			return cq_cnt;
600
601		LIST_REMOVE(cq_buf, list);
602		irdma_free_cq_buf(cq_buf);
603		cq_cnt++;
604	}
605
606	return cq_cnt;
607}
608
609/**
610 * irdma_udestroy_cq - destroys cq
611 * @cq: ptr to cq to be destroyed
612 */
613int
614irdma_udestroy_cq(struct ibv_cq *cq)
615{
616	struct irdma_uk_attrs *uk_attrs;
617	struct irdma_uvcontext *iwvctx;
618	struct irdma_ucq *iwucq;
619	int ret;
620
621	iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq);
622	iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx);
623	uk_attrs = &iwvctx->uk_attrs;
624
625	ret = pthread_spin_destroy(&iwucq->lock);
626	if (ret)
627		goto err;
628
629	irdma_process_resize_list(iwucq, NULL);
630	ret = ibv_cmd_destroy_cq(cq);
631	if (ret)
632		goto err;
633
634	ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr);
635	irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size);
636
637	if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) {
638		ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr);
639		irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE);
640	}
641	free(iwucq);
642	return 0;
643
644err:
645	return ret;
646}
647
648static enum ibv_wc_status
649irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode)
650{
651	switch (opcode) {
652	case FLUSH_PROT_ERR:
653		return IBV_WC_LOC_PROT_ERR;
654	case FLUSH_REM_ACCESS_ERR:
655		return IBV_WC_REM_ACCESS_ERR;
656	case FLUSH_LOC_QP_OP_ERR:
657		return IBV_WC_LOC_QP_OP_ERR;
658	case FLUSH_REM_OP_ERR:
659		return IBV_WC_REM_OP_ERR;
660	case FLUSH_LOC_LEN_ERR:
661		return IBV_WC_LOC_LEN_ERR;
662	case FLUSH_GENERAL_ERR:
663		return IBV_WC_WR_FLUSH_ERR;
664	case FLUSH_MW_BIND_ERR:
665		return IBV_WC_MW_BIND_ERR;
666	case FLUSH_REM_INV_REQ_ERR:
667		return IBV_WC_REM_INV_REQ_ERR;
668	case FLUSH_RETRY_EXC_ERR:
669		return IBV_WC_RETRY_EXC_ERR;
670	case FLUSH_FATAL_ERR:
671	default:
672		return IBV_WC_FATAL_ERR;
673	}
674}
675
676static inline void
677set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry)
678{
679	switch (cur_cqe->op_type) {
680	case IRDMA_OP_TYPE_RDMA_WRITE:
681	case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
682		entry->opcode = IBV_WC_RDMA_WRITE;
683		break;
684	case IRDMA_OP_TYPE_RDMA_READ:
685		entry->opcode = IBV_WC_RDMA_READ;
686		break;
687	case IRDMA_OP_TYPE_SEND_SOL:
688	case IRDMA_OP_TYPE_SEND_SOL_INV:
689	case IRDMA_OP_TYPE_SEND_INV:
690	case IRDMA_OP_TYPE_SEND:
691		entry->opcode = IBV_WC_SEND;
692		break;
693	case IRDMA_OP_TYPE_BIND_MW:
694		entry->opcode = IBV_WC_BIND_MW;
695		break;
696	case IRDMA_OP_TYPE_INV_STAG:
697		entry->opcode = IBV_WC_LOCAL_INV;
698		break;
699	default:
700		entry->status = IBV_WC_GENERAL_ERR;
701		printf("%s: Invalid opcode = %d in CQE\n",
702		       __func__, cur_cqe->op_type);
703	}
704}
705
706static inline void
707set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe,
708		struct ibv_wc *entry, bool send_imm_support)
709{
710	if (!send_imm_support) {
711		entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM :
712		    IBV_WC_RECV;
713		return;
714	}
715	switch (cur_cqe->op_type) {
716	case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
717	case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
718		entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
719		break;
720	default:
721		entry->opcode = IBV_WC_RECV;
722	}
723}
724
725/**
726 * irdma_process_cqe_ext - process current cqe for extended CQ
727 * @cur_cqe - current cqe info
728 */
729static void
730irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe)
731{
732	struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe);
733	struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex;
734
735	ibvcq_ex->wr_id = cur_cqe->wr_id;
736	if (cur_cqe->error)
737		ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ?
738		    irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR;
739	else
740		ibvcq_ex->status = IBV_WC_SUCCESS;
741}
742
743/**
744 * irdma_process_cqe - process current cqe info
745 * @entry - ibv_wc object to fill in for non-extended CQ
746 * @cur_cqe - current cqe info
747 */
748static void
749irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe)
750{
751	struct irdma_qp_uk *qp;
752	struct ibv_qp *ib_qp;
753
754	entry->wc_flags = 0;
755	entry->wr_id = cur_cqe->wr_id;
756	entry->qp_num = cur_cqe->qp_id;
757	qp = cur_cqe->qp_handle;
758	ib_qp = qp->back_qp;
759
760	if (cur_cqe->error) {
761		entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ?
762		    irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR;
763		entry->vendor_err = cur_cqe->major_err << 16 |
764		    cur_cqe->minor_err;
765	} else {
766		entry->status = IBV_WC_SUCCESS;
767	}
768
769	if (cur_cqe->imm_valid) {
770		entry->imm_data = htonl(cur_cqe->imm_data);
771		entry->wc_flags |= IBV_WC_WITH_IMM;
772	}
773
774	if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) {
775		set_ib_wc_op_sq(cur_cqe, entry);
776	} else {
777		set_ib_wc_op_rq(cur_cqe, entry,
778				qp->qp_caps & IRDMA_SEND_WITH_IMM ?
779				true : false);
780		if (ib_qp->qp_type != IBV_QPT_UD &&
781		    cur_cqe->stag_invalid_set) {
782			entry->invalidated_rkey = cur_cqe->inv_stag;
783			entry->wc_flags |= IBV_WC_WITH_INV;
784		}
785	}
786
787	if (ib_qp->qp_type == IBV_QPT_UD) {
788		entry->src_qp = cur_cqe->ud_src_qpn;
789		entry->wc_flags |= IBV_WC_GRH;
790	} else {
791		entry->src_qp = cur_cqe->qp_id;
792	}
793	entry->byte_len = cur_cqe->bytes_xfered;
794}
795
796/**
797 * irdma_poll_one - poll one entry of the CQ
798 * @ukcq: ukcq to poll
799 * @cur_cqe: current CQE info to be filled in
800 * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
801 *
802 * Returns the internal irdma device error code or 0 on success
803 */
804static int
805irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe,
806	       struct ibv_wc *entry)
807{
808	int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe);
809
810	if (ret)
811		return ret;
812
813	if (!entry)
814		irdma_process_cqe_ext(cur_cqe);
815	else
816		irdma_process_cqe(entry, cur_cqe);
817
818	return 0;
819}
820
821/**
822 * __irdma_upoll_cq - irdma util function to poll device CQ
823 * @iwucq: irdma cq to poll
824 * @num_entries: max cq entries to poll
825 * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ
826 *
827 * Returns non-negative value equal to the number of completions
828 * found. On failure, EINVAL
829 */
830static int
831__irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries,
832		 struct ibv_wc *entry)
833{
834	struct irdma_cq_buf *cq_buf, *next;
835	struct irdma_cq_buf *last_buf = NULL;
836	struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe;
837	bool cq_new_cqe = false;
838	int resized_bufs = 0;
839	int npolled = 0;
840	int ret;
841
842	/* go through the list of previously resized CQ buffers */
843	LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) {
844		while (npolled < num_entries) {
845			ret = irdma_poll_one(&cq_buf->cq, cur_cqe,
846					     entry ? entry + npolled : NULL);
847			if (!ret) {
848				++npolled;
849				cq_new_cqe = true;
850				continue;
851			}
852			if (ret == ENOENT)
853				break;
854			/* QP using the CQ is destroyed. Skip reporting this CQE */
855			if (ret == EFAULT) {
856				cq_new_cqe = true;
857				continue;
858			}
859			goto error;
860		}
861
862		/* save the resized CQ buffer which received the last cqe */
863		if (cq_new_cqe)
864			last_buf = cq_buf;
865		cq_new_cqe = false;
866	}
867
868	/* check the current CQ for new cqes */
869	while (npolled < num_entries) {
870		ret = irdma_poll_one(&iwucq->cq, cur_cqe,
871				     entry ? entry + npolled : NULL);
872		if (!ret) {
873			++npolled;
874			cq_new_cqe = true;
875			continue;
876		}
877		if (ret == ENOENT)
878			break;
879		/* QP using the CQ is destroyed. Skip reporting this CQE */
880		if (ret == EFAULT) {
881			cq_new_cqe = true;
882			continue;
883		}
884		goto error;
885	}
886
887	if (cq_new_cqe)
888		/* all previous CQ resizes are complete */
889		resized_bufs = irdma_process_resize_list(iwucq, NULL);
890	else if (last_buf)
891		/* only CQ resizes up to the last_buf are complete */
892		resized_bufs = irdma_process_resize_list(iwucq, last_buf);
893	if (resized_bufs)
894		/* report to the HW the number of complete CQ resizes */
895		irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs);
896
897	return npolled;
898
899error:
900	printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret);
901
902	return EINVAL;
903}
904
905/**
906 * irdma_upoll_cq - verb API callback to poll device CQ
907 * @cq: ibv_cq to poll
908 * @num_entries: max cq entries to poll
909 * @entry: pointer to array of ibv_wc objects to be filled in for each completion
910 *
911 * Returns non-negative value equal to the number of completions
912 * found and a negative error code on failure
913 */
914int
915irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry)
916{
917	struct irdma_ucq *iwucq;
918	int ret;
919
920	iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq);
921	ret = pthread_spin_lock(&iwucq->lock);
922	if (ret)
923		return -ret;
924
925	ret = __irdma_upoll_cq(iwucq, num_entries, entry);
926
927	pthread_spin_unlock(&iwucq->lock);
928
929	return ret;
930}
931
932/**
933 * irdma_start_poll - verb_ex API callback to poll batch of WC's
934 * @ibvcq_ex: ibv extended CQ
935 * @attr: attributes (not used)
936 *
937 * Start polling batch of work completions. Return 0 on success, ENONENT when
938 * no completions are available on CQ. And an error code on errors
939 */
940static int
941irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr)
942{
943	struct irdma_ucq *iwucq;
944	int ret;
945
946	iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex);
947	ret = pthread_spin_lock(&iwucq->lock);
948	if (ret)
949		return ret;
950
951	ret = __irdma_upoll_cq(iwucq, 1, NULL);
952	if (ret == 1)
953		return 0;
954
955	/* No Completions on CQ */
956	if (!ret)
957		ret = ENOENT;
958
959	pthread_spin_unlock(&iwucq->lock);
960
961	return ret;
962}
963
964/**
965 * irdma_next_poll - verb_ex API callback to get next WC
966 * @ibvcq_ex: ibv extended CQ
967 *
968 * Return 0 on success, ENONENT when no completions are available on CQ.
969 * And an error code on errors
970 */
971static int
972irdma_next_poll(struct ibv_cq_ex *ibvcq_ex)
973{
974	struct irdma_ucq *iwucq;
975	int ret;
976
977	iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex);
978	ret = __irdma_upoll_cq(iwucq, 1, NULL);
979	if (ret == 1)
980		return 0;
981
982	/* No Completions on CQ */
983	if (!ret)
984		ret = ENOENT;
985
986	return ret;
987}
988
989/**
990 * irdma_end_poll - verb_ex API callback to end polling of WC's
991 * @ibvcq_ex: ibv extended CQ
992 */
993static void
994irdma_end_poll(struct ibv_cq_ex *ibvcq_ex)
995{
996	struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq,
997					       verbs_cq.cq_ex);
998
999	pthread_spin_unlock(&iwucq->lock);
1000}
1001
1002static enum ibv_wc_opcode
1003irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex)
1004{
1005	struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq,
1006					       verbs_cq.cq_ex);
1007
1008	switch (iwucq->cur_cqe.op_type) {
1009	case IRDMA_OP_TYPE_RDMA_WRITE:
1010	case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
1011		return IBV_WC_RDMA_WRITE;
1012	case IRDMA_OP_TYPE_RDMA_READ:
1013		return IBV_WC_RDMA_READ;
1014	case IRDMA_OP_TYPE_SEND_SOL:
1015	case IRDMA_OP_TYPE_SEND_SOL_INV:
1016	case IRDMA_OP_TYPE_SEND_INV:
1017	case IRDMA_OP_TYPE_SEND:
1018		return IBV_WC_SEND;
1019	case IRDMA_OP_TYPE_BIND_MW:
1020		return IBV_WC_BIND_MW;
1021	case IRDMA_OP_TYPE_REC:
1022		return IBV_WC_RECV;
1023	case IRDMA_OP_TYPE_REC_IMM:
1024		return IBV_WC_RECV_RDMA_WITH_IMM;
1025	case IRDMA_OP_TYPE_INV_STAG:
1026		return IBV_WC_LOCAL_INV;
1027	}
1028
1029	printf("%s: Invalid opcode = %d in CQE\n", __func__,
1030	       iwucq->cur_cqe.op_type);
1031
1032	return 0;
1033}
1034
1035static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){
1036	struct irdma_cq_poll_info *cur_cqe;
1037	struct irdma_ucq *iwucq;
1038
1039	iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex);
1040	cur_cqe = &iwucq->cur_cqe;
1041
1042	return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0;
1043}
1044
1045static int
1046irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex)
1047{
1048	struct irdma_cq_poll_info *cur_cqe;
1049	struct irdma_ucq *iwucq;
1050	struct irdma_qp_uk *qp;
1051	struct ibv_qp *ib_qp;
1052	int wc_flags = 0;
1053
1054	iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex);
1055	cur_cqe = &iwucq->cur_cqe;
1056	qp = cur_cqe->qp_handle;
1057	ib_qp = qp->back_qp;
1058
1059	if (cur_cqe->imm_valid)
1060		wc_flags |= IBV_WC_WITH_IMM;
1061
1062	if (ib_qp->qp_type == IBV_QPT_UD) {
1063		wc_flags |= IBV_WC_GRH;
1064	} else {
1065		if (cur_cqe->stag_invalid_set) {
1066			switch (cur_cqe->op_type) {
1067			case IRDMA_OP_TYPE_REC:
1068				wc_flags |= IBV_WC_WITH_INV;
1069				break;
1070			case IRDMA_OP_TYPE_REC_IMM:
1071				wc_flags |= IBV_WC_WITH_INV;
1072				break;
1073			}
1074		}
1075	}
1076
1077	return wc_flags;
1078}
1079
1080static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){
1081	struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq,
1082					       verbs_cq.cq_ex);
1083
1084	return iwucq->cur_cqe.bytes_xfered;
1085}
1086
1087static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){
1088	struct irdma_cq_poll_info *cur_cqe;
1089	struct irdma_ucq *iwucq;
1090
1091	iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex);
1092	cur_cqe = &iwucq->cur_cqe;
1093
1094	return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0;
1095}
1096
1097static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){
1098	struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq,
1099					       verbs_cq.cq_ex);
1100
1101	return iwucq->cur_cqe.qp_id;
1102}
1103
1104static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){
1105	struct irdma_cq_poll_info *cur_cqe;
1106	struct irdma_ucq *iwucq;
1107	struct irdma_qp_uk *qp;
1108	struct ibv_qp *ib_qp;
1109
1110	iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex);
1111	cur_cqe = &iwucq->cur_cqe;
1112	qp = cur_cqe->qp_handle;
1113	ib_qp = qp->back_qp;
1114
1115	return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id;
1116}
1117
1118static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){
1119	return 0;
1120}
1121
1122void
1123irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq,
1124			       struct ibv_cq_init_attr_ex *attr_ex)
1125{
1126	struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex;
1127
1128	ibvcq_ex->start_poll = irdma_start_poll;
1129	ibvcq_ex->end_poll = irdma_end_poll;
1130	ibvcq_ex->next_poll = irdma_next_poll;
1131
1132	ibvcq_ex->read_opcode = irdma_wc_read_opcode;
1133	ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err;
1134	ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags;
1135
1136	if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
1137		ibvcq_ex->read_byte_len = irdma_wc_read_byte_len;
1138	if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM)
1139		ibvcq_ex->read_imm_data = irdma_wc_read_imm_data;
1140	if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM)
1141		ibvcq_ex->read_qp_num = irdma_wc_read_qp_num;
1142	if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP)
1143		ibvcq_ex->read_src_qp = irdma_wc_read_src_qp;
1144	if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL)
1145		ibvcq_ex->read_sl = irdma_wc_read_sl;
1146}
1147
1148/**
1149 * irdma_arm_cq - arm of cq
1150 * @iwucq: cq to which arm
1151 * @cq_notify: notification params
1152 */
1153static void
1154irdma_arm_cq(struct irdma_ucq *iwucq,
1155	     enum irdma_cmpl_notify cq_notify)
1156{
1157	iwucq->is_armed = true;
1158	iwucq->arm_sol = true;
1159	iwucq->skip_arm = false;
1160	iwucq->skip_sol = true;
1161	irdma_uk_cq_request_notification(&iwucq->cq, cq_notify);
1162}
1163
1164/**
1165 * irdma_uarm_cq - callback for arm of cq
1166 * @cq: cq to arm
1167 * @solicited: to get notify params
1168 */
1169int
1170irdma_uarm_cq(struct ibv_cq *cq, int solicited)
1171{
1172	struct irdma_ucq *iwucq;
1173	enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT;
1174	int ret;
1175
1176	iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq);
1177	if (solicited)
1178		cq_notify = IRDMA_CQ_COMPL_SOLICITED;
1179
1180	ret = pthread_spin_lock(&iwucq->lock);
1181	if (ret)
1182		return ret;
1183
1184	if (iwucq->is_armed) {
1185		if (iwucq->arm_sol && !solicited) {
1186			irdma_arm_cq(iwucq, cq_notify);
1187		} else {
1188			iwucq->skip_arm = true;
1189			iwucq->skip_sol = solicited ? true : false;
1190		}
1191	} else {
1192		irdma_arm_cq(iwucq, cq_notify);
1193	}
1194
1195	pthread_spin_unlock(&iwucq->lock);
1196
1197	return 0;
1198}
1199
1200/**
1201 * irdma_cq_event - cq to do completion event
1202 * @cq: cq to arm
1203 */
1204void
1205irdma_cq_event(struct ibv_cq *cq)
1206{
1207	struct irdma_ucq *iwucq;
1208
1209	iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq);
1210	if (pthread_spin_lock(&iwucq->lock))
1211		return;
1212
1213	if (iwucq->skip_arm)
1214		irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT);
1215	else
1216		iwucq->is_armed = false;
1217
1218	pthread_spin_unlock(&iwucq->lock);
1219}
1220
1221void *
1222irdma_mmap(int fd, off_t offset)
1223{
1224	void *map;
1225
1226	map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED,
1227		   fd, offset);
1228	if (map == MAP_FAILED)
1229		return map;
1230
1231	if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) {
1232		munmap(map, IRDMA_HW_PAGE_SIZE);
1233		return MAP_FAILED;
1234	}
1235
1236	return map;
1237}
1238
1239void
1240irdma_munmap(void *map)
1241{
1242	ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE);
1243	munmap(map, IRDMA_HW_PAGE_SIZE);
1244}
1245
1246/**
1247 * irdma_destroy_vmapped_qp - destroy resources for qp
1248 * @iwuqp: qp struct for resources
1249 */
1250static int
1251irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp)
1252{
1253	int ret;
1254
1255	ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp);
1256	if (ret)
1257		return ret;
1258
1259	if (iwuqp->qp.push_db)
1260		irdma_munmap(iwuqp->qp.push_db);
1261	if (iwuqp->qp.push_wqe)
1262		irdma_munmap(iwuqp->qp.push_wqe);
1263
1264	ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr);
1265
1266	return 0;
1267}
1268
1269/**
1270 * irdma_vmapped_qp - create resources for qp
1271 * @iwuqp: qp struct for resources
1272 * @pd: pd for the qp
1273 * @attr: attributes of qp passed
1274 * @resp: response back from create qp
1275 * @info: uk info for initializing user level qp
1276 * @abi_ver: abi version of the create qp command
1277 */
1278static int
1279irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd,
1280		 struct ibv_qp_init_attr *attr,
1281		 struct irdma_qp_uk_init_info *info,
1282		 bool legacy_mode)
1283{
1284	struct irdma_ucreate_qp cmd = {};
1285	size_t sqsize, rqsize, totalqpsize;
1286	struct irdma_ucreate_qp_resp resp = {};
1287	struct irdma_ureg_mr reg_mr_cmd = {};
1288	struct ibv_reg_mr_resp reg_mr_resp = {};
1289	int ret;
1290
1291	sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE);
1292	rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE);
1293	totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE;
1294	info->sq = irdma_alloc_hw_buf(totalqpsize);
1295	iwuqp->buf_size = totalqpsize;
1296
1297	if (!info->sq)
1298		return ENOMEM;
1299
1300	memset(info->sq, 0, totalqpsize);
1301	info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE];
1302	info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem;
1303
1304	reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP;
1305	reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT;
1306	reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT;
1307
1308	ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize,
1309			     (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE,
1310			     &iwuqp->vmr.ibv_mr, &reg_mr_cmd.ibv_cmd,
1311			     sizeof(reg_mr_cmd), &reg_mr_resp,
1312			     sizeof(reg_mr_resp));
1313	if (ret)
1314		goto err_dereg_mr;
1315
1316	cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq);
1317	cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp;
1318	cmd.comp_mask |= IRDMA_CREATE_QP_USE_START_WQE_IDX;
1319
1320	ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd,
1321				sizeof(cmd), &resp.ibv_resp,
1322				sizeof(struct irdma_ucreate_qp_resp));
1323	if (ret)
1324		goto err_qp;
1325
1326	info->sq_size = resp.actual_sq_size;
1327	info->rq_size = resp.actual_rq_size;
1328	info->first_sq_wq = legacy_mode ? 1 : resp.lsmm;
1329	if (resp.comp_mask & IRDMA_CREATE_QP_USE_START_WQE_IDX)
1330		info->start_wqe_idx = resp.start_wqe_idx;
1331	info->qp_caps = resp.qp_caps;
1332	info->qp_id = resp.qp_id;
1333	iwuqp->irdma_drv_opt = resp.irdma_drv_opt;
1334	iwuqp->ibv_qp.qp_num = resp.qp_id;
1335
1336	iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq,
1337				      verbs_cq.cq);
1338	iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq,
1339				      verbs_cq.cq);
1340	iwuqp->send_cq->uqp = iwuqp;
1341	iwuqp->recv_cq->uqp = iwuqp;
1342
1343	return 0;
1344err_qp:
1345	ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr);
1346err_dereg_mr:
1347	printf("%s: failed to create QP, status %d\n", __func__, ret);
1348	irdma_free_hw_buf(info->sq, iwuqp->buf_size);
1349	return ret;
1350}
1351
1352/**
1353 * irdma_ucreate_qp - create qp on user app
1354 * @pd: pd for the qp
1355 * @attr: attributes of the qp to be created (sizes, sge, cq)
1356 */
1357struct ibv_qp *
1358irdma_ucreate_qp(struct ibv_pd *pd,
1359		 struct ibv_qp_init_attr *attr)
1360{
1361	struct irdma_qp_uk_init_info info = {};
1362	struct irdma_uk_attrs *uk_attrs;
1363	struct irdma_uvcontext *iwvctx;
1364	struct irdma_uqp *iwuqp;
1365	int status;
1366
1367	if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) {
1368		printf("%s: failed to create QP, unsupported QP type: 0x%x\n",
1369		       __func__, attr->qp_type);
1370		errno = EOPNOTSUPP;
1371		return NULL;
1372	}
1373
1374	iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx);
1375	uk_attrs = &iwvctx->uk_attrs;
1376
1377	if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
1378	    attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags ||
1379	    attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
1380	    attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta ||
1381	    attr->cap.max_inline_data > uk_attrs->max_hw_inline) {
1382		errno = EINVAL;
1383		return NULL;
1384	}
1385
1386	info.uk_attrs = uk_attrs;
1387	info.sq_size = attr->cap.max_send_wr;
1388	info.rq_size = attr->cap.max_recv_wr;
1389	info.max_sq_frag_cnt = attr->cap.max_send_sge;
1390	info.max_rq_frag_cnt = attr->cap.max_recv_sge;
1391	info.max_inline_data = attr->cap.max_inline_data;
1392	info.abi_ver = iwvctx->abi_ver;
1393
1394	status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift);
1395	if (status) {
1396		printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n",
1397		       __func__, attr->cap.max_send_wr, attr->cap.max_send_sge,
1398		       attr->cap.max_inline_data);
1399		errno = status;
1400		return NULL;
1401	}
1402
1403	status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift);
1404	if (status) {
1405		printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n",
1406		       __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge);
1407		errno = status;
1408		return NULL;
1409	}
1410
1411	iwuqp = memalign(1024, sizeof(*iwuqp));
1412	if (!iwuqp)
1413		return NULL;
1414
1415	memset(iwuqp, 0, sizeof(*iwuqp));
1416
1417	status = pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE);
1418	if (status)
1419		goto err_free_qp;
1420
1421	info.sq_size = info.sq_depth >> info.sq_shift;
1422	info.rq_size = info.rq_depth >> info.rq_shift;
1423	/**
1424	 * Maintain backward compatibility with older ABI which pass sq
1425	 * and rq depth (in quanta) in cap.max_send_wr a cap.max_recv_wr
1426	 */
1427	if (!iwvctx->use_raw_attrs) {
1428		attr->cap.max_send_wr = info.sq_size;
1429		attr->cap.max_recv_wr = info.rq_size;
1430	}
1431
1432	info.wqe_alloc_db = (u32 *)iwvctx->db;
1433	info.legacy_mode = iwvctx->legacy_mode;
1434	info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array));
1435	if (!info.sq_wrtrk_array) {
1436		status = errno;	/* preserve errno */
1437		goto err_destroy_lock;
1438	}
1439
1440	info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array));
1441	if (!info.rq_wrid_array) {
1442		status = errno;	/* preserve errno */
1443		goto err_free_sq_wrtrk;
1444	}
1445
1446	iwuqp->sq_sig_all = attr->sq_sig_all;
1447	iwuqp->qp_type = attr->qp_type;
1448	status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode);
1449	if (status)
1450		goto err_free_rq_wrid;
1451
1452	iwuqp->qp.back_qp = iwuqp;
1453	iwuqp->qp.lock = &iwuqp->lock;
1454
1455	status = irdma_uk_qp_init(&iwuqp->qp, &info);
1456	if (status)
1457		goto err_free_vmap_qp;
1458
1459	attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift;
1460	attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift;
1461
1462	return &iwuqp->ibv_qp;
1463
1464err_free_vmap_qp:
1465	irdma_destroy_vmapped_qp(iwuqp);
1466	irdma_free_hw_buf(info.sq, iwuqp->buf_size);
1467err_free_rq_wrid:
1468	free(info.rq_wrid_array);
1469err_free_sq_wrtrk:
1470	free(info.sq_wrtrk_array);
1471err_destroy_lock:
1472	pthread_spin_destroy(&iwuqp->lock);
1473err_free_qp:
1474	printf("%s: failed to create QP\n", __func__);
1475	free(iwuqp);
1476
1477	errno = status;
1478	return NULL;
1479}
1480
1481/**
1482 * irdma_uquery_qp - query qp for some attribute
1483 * @qp: qp for the attributes query
1484 * @attr: to return the attributes
1485 * @attr_mask: mask of what is query for
1486 * @init_attr: initial attributes during create_qp
1487 */
1488int
1489irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
1490		struct ibv_qp_init_attr *init_attr)
1491{
1492	struct ibv_query_qp cmd;
1493
1494	return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd,
1495				sizeof(cmd));
1496}
1497
1498/**
1499 * irdma_umodify_qp - send qp modify to driver
1500 * @qp: qp to modify
1501 * @attr: attribute to modify
1502 * @attr_mask: mask of the attribute
1503 */
1504int
1505irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask)
1506{
1507	struct irdma_umodify_qp_resp resp = {};
1508	struct ibv_modify_qp cmd = {};
1509	struct irdma_modify_qp_cmd cmd_ex = {};
1510	struct irdma_uvcontext *iwvctx;
1511	struct irdma_uqp *iwuqp;
1512
1513	iwuqp = container_of(qp, struct irdma_uqp, ibv_qp);
1514	iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx);
1515
1516	if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE &&
1517	    iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) {
1518		u64 offset;
1519		void *map;
1520		int ret;
1521
1522		ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd,
1523					   sizeof(cmd_ex.ibv_cmd),
1524					   sizeof(cmd_ex), &resp.ibv_resp,
1525					   sizeof(resp.ibv_resp),
1526					   sizeof(resp));
1527		if (!ret)
1528			iwuqp->qp.rd_fence_rate = resp.rd_fence_rate;
1529		if (ret || !resp.push_valid)
1530			return ret;
1531
1532		if (iwuqp->qp.push_wqe)
1533			return ret;
1534
1535		offset = resp.push_wqe_mmap_key;
1536		map = irdma_mmap(qp->context->cmd_fd, offset);
1537		if (map == MAP_FAILED)
1538			return ret;
1539
1540		iwuqp->qp.push_wqe = map;
1541
1542		offset = resp.push_db_mmap_key;
1543		map = irdma_mmap(qp->context->cmd_fd, offset);
1544		if (map == MAP_FAILED) {
1545			irdma_munmap(iwuqp->qp.push_wqe);
1546			iwuqp->qp.push_wqe = NULL;
1547			printf("failed to map push page, errno %d\n", errno);
1548			return ret;
1549		}
1550		iwuqp->qp.push_wqe += resp.push_offset;
1551		iwuqp->qp.push_db = map + resp.push_offset;
1552
1553		return ret;
1554	} else {
1555		return ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
1556	}
1557}
1558
1559static void
1560irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush)
1561{
1562	struct irdma_umodify_qp_resp resp = {};
1563	struct irdma_modify_qp_cmd cmd_ex = {};
1564	struct ibv_qp_attr attr = {};
1565
1566	attr.qp_state = IBV_QPS_ERR;
1567	cmd_ex.sq_flush = sq_flush;
1568	cmd_ex.rq_flush = rq_flush;
1569
1570	ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE,
1571			     &cmd_ex.ibv_cmd,
1572			     sizeof(cmd_ex.ibv_cmd),
1573			     sizeof(cmd_ex), &resp.ibv_resp,
1574			     sizeof(resp.ibv_resp),
1575			     sizeof(resp));
1576}
1577
1578/**
1579 * irdma_clean_cqes - clean cq entries for qp
1580 * @qp: qp for which completions are cleaned
1581 * @iwcq: cq to be cleaned
1582 */
1583static void
1584irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq)
1585{
1586	struct irdma_cq_uk *ukcq = &iwucq->cq;
1587	int ret;
1588
1589	ret = pthread_spin_lock(&iwucq->lock);
1590	if (ret)
1591		return;
1592
1593	irdma_uk_clean_cq(qp, ukcq);
1594	pthread_spin_unlock(&iwucq->lock);
1595}
1596
1597/**
1598 * irdma_udestroy_qp - destroy qp
1599 * @qp: qp to destroy
1600 */
1601int
1602irdma_udestroy_qp(struct ibv_qp *qp)
1603{
1604	struct irdma_uqp *iwuqp;
1605	int ret;
1606
1607	iwuqp = container_of(qp, struct irdma_uqp, ibv_qp);
1608	ret = pthread_spin_destroy(&iwuqp->lock);
1609	if (ret)
1610		goto err;
1611
1612	ret = irdma_destroy_vmapped_qp(iwuqp);
1613	if (ret)
1614		goto err;
1615
1616	/* Clean any pending completions from the cq(s) */
1617	if (iwuqp->send_cq)
1618		irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq);
1619
1620	if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq)
1621		irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq);
1622
1623	if (iwuqp->qp.sq_wrtrk_array)
1624		free(iwuqp->qp.sq_wrtrk_array);
1625	if (iwuqp->qp.rq_wrid_array)
1626		free(iwuqp->qp.rq_wrid_array);
1627
1628	irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size);
1629	free(iwuqp);
1630	return 0;
1631
1632err:
1633	printf("%s: failed to destroy QP, status %d\n",
1634	       __func__, ret);
1635	return ret;
1636}
1637
1638/**
1639 * calc_type2_mw_stag - calculate type 2 MW stag
1640 * @rkey: desired rkey of the MW
1641 * @mw_rkey: type2 memory window rkey
1642 *
1643 * compute type2 memory window stag by taking lower 8 bits
1644 * of the desired rkey and leaving 24 bits if mw->rkey unchanged
1645 */
1646static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) {
1647	const u32 mask = 0xff;
1648
1649	return (rkey & mask) | (mw_rkey & ~mask);
1650}
1651
1652/**
1653 * irdma_post_send -  post send wr for user application
1654 * @ib_qp: qp to post wr
1655 * @ib_wr: work request ptr
1656 * @bad_wr: return of bad wr if err
1657 */
1658int
1659irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
1660		 struct ibv_send_wr **bad_wr)
1661{
1662	struct irdma_post_sq_info info;
1663	struct irdma_uvcontext *iwvctx;
1664	struct irdma_uk_attrs *uk_attrs;
1665	struct irdma_uqp *iwuqp;
1666	bool reflush = false;
1667	int err = 0;
1668
1669	iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp);
1670	iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx);
1671	uk_attrs = &iwvctx->uk_attrs;
1672
1673	err = pthread_spin_lock(&iwuqp->lock);
1674	if (err)
1675		return err;
1676
1677	if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) &&
1678	    ib_qp->state == IBV_QPS_ERR)
1679		reflush = true;
1680
1681	while (ib_wr) {
1682		memset(&info, 0, sizeof(info));
1683		info.wr_id = (u64)(ib_wr->wr_id);
1684		if ((ib_wr->send_flags & IBV_SEND_SIGNALED) ||
1685		    iwuqp->sq_sig_all)
1686			info.signaled = true;
1687		if (ib_wr->send_flags & IBV_SEND_FENCE)
1688			info.read_fence = true;
1689
1690		switch (ib_wr->opcode) {
1691		case IBV_WR_SEND_WITH_IMM:
1692			if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) {
1693				info.imm_data_valid = true;
1694				info.imm_data = ntohl(ib_wr->imm_data);
1695			} else {
1696				err = EINVAL;
1697				break;
1698			}
1699			/* fallthrough */
1700		case IBV_WR_SEND:
1701		case IBV_WR_SEND_WITH_INV:
1702			if (ib_wr->opcode == IBV_WR_SEND ||
1703			    ib_wr->opcode == IBV_WR_SEND_WITH_IMM) {
1704				if (ib_wr->send_flags & IBV_SEND_SOLICITED)
1705					info.op_type = IRDMA_OP_TYPE_SEND_SOL;
1706				else
1707					info.op_type = IRDMA_OP_TYPE_SEND;
1708			} else {
1709				if (ib_wr->send_flags & IBV_SEND_SOLICITED)
1710					info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV;
1711				else
1712					info.op_type = IRDMA_OP_TYPE_SEND_INV;
1713				info.stag_to_inv = ib_wr->imm_data;
1714			}
1715			info.op.send.num_sges = ib_wr->num_sge;
1716			info.op.send.sg_list = (struct ibv_sge *)ib_wr->sg_list;
1717			if (ib_qp->qp_type == IBV_QPT_UD) {
1718				struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah,
1719								    struct irdma_uah, ibv_ah);
1720
1721				info.op.send.ah_id = ah->ah_id;
1722				info.op.send.qkey = ib_wr->wr.ud.remote_qkey;
1723				info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn;
1724			}
1725
1726			if (ib_wr->send_flags & IBV_SEND_INLINE)
1727				err = irdma_uk_inline_send(&iwuqp->qp, &info, false);
1728			else
1729				err = irdma_uk_send(&iwuqp->qp, &info, false);
1730			break;
1731		case IBV_WR_RDMA_WRITE_WITH_IMM:
1732			if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) {
1733				info.imm_data_valid = true;
1734				info.imm_data = ntohl(ib_wr->imm_data);
1735			} else {
1736				err = EINVAL;
1737				break;
1738			}
1739			/* fallthrough */
1740		case IBV_WR_RDMA_WRITE:
1741			if (ib_wr->send_flags & IBV_SEND_SOLICITED)
1742				info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL;
1743			else
1744				info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
1745
1746			info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
1747			info.op.rdma_write.lo_sg_list = ib_wr->sg_list;
1748			info.op.rdma_write.rem_addr.addr = ib_wr->wr.rdma.remote_addr;
1749			info.op.rdma_write.rem_addr.lkey = ib_wr->wr.rdma.rkey;
1750			if (ib_wr->send_flags & IBV_SEND_INLINE)
1751				err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false);
1752			else
1753				err = irdma_uk_rdma_write(&iwuqp->qp, &info, false);
1754			break;
1755		case IBV_WR_RDMA_READ:
1756			if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) {
1757				err = EINVAL;
1758				break;
1759			}
1760			info.op_type = IRDMA_OP_TYPE_RDMA_READ;
1761			info.op.rdma_read.rem_addr.addr = ib_wr->wr.rdma.remote_addr;
1762			info.op.rdma_read.rem_addr.lkey = ib_wr->wr.rdma.rkey;
1763
1764			info.op.rdma_read.lo_sg_list = ib_wr->sg_list;
1765			info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
1766			err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false);
1767			break;
1768		case IBV_WR_BIND_MW:
1769			if (ib_qp->qp_type != IBV_QPT_RC) {
1770				err = EINVAL;
1771				break;
1772			}
1773			info.op_type = IRDMA_OP_TYPE_BIND_MW;
1774			info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey;
1775			if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) {
1776				info.op.bind_window.mem_window_type_1 = true;
1777				info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey;
1778			} else {
1779				struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr);
1780
1781				if (vmr->access & IBV_ACCESS_ZERO_BASED) {
1782					err = EINVAL;
1783					break;
1784				}
1785				info.op.bind_window.mw_stag =
1786				    calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey);
1787				ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag;
1788
1789			}
1790
1791			if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) {
1792				info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED;
1793				info.op.bind_window.va = NULL;
1794			} else {
1795				info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED;
1796				info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr;
1797			}
1798			info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length;
1799			info.op.bind_window.ena_reads =
1800			    (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0;
1801			info.op.bind_window.ena_writes =
1802			    (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0;
1803
1804			err = irdma_uk_mw_bind(&iwuqp->qp, &info, false);
1805			break;
1806		case IBV_WR_LOCAL_INV:
1807			info.op_type = IRDMA_OP_TYPE_INV_STAG;
1808			info.op.inv_local_stag.target_stag = ib_wr->imm_data;
1809			err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true);
1810			break;
1811		default:
1812			/* error */
1813			err = EINVAL;
1814			printf("%s: post work request failed, invalid opcode: 0x%x\n",
1815			       __func__, ib_wr->opcode);
1816			break;
1817		}
1818		if (err)
1819			break;
1820
1821		ib_wr = ib_wr->next;
1822	}
1823
1824	if (err)
1825		*bad_wr = ib_wr;
1826
1827	irdma_uk_qp_post_wr(&iwuqp->qp);
1828	if (reflush)
1829		irdma_issue_flush(ib_qp, 1, 0);
1830
1831	pthread_spin_unlock(&iwuqp->lock);
1832
1833	return err;
1834}
1835
1836/**
1837 * irdma_post_recv - post receive wr for user application
1838 * @ib_wr: work request for receive
1839 * @bad_wr: bad wr caused an error
1840 */
1841int
1842irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
1843		 struct ibv_recv_wr **bad_wr)
1844{
1845	struct irdma_post_rq_info post_recv = {};
1846	struct irdma_uqp *iwuqp;
1847	bool reflush = false;
1848	int err = 0;
1849
1850	iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp);
1851	err = pthread_spin_lock(&iwuqp->lock);
1852	if (err)
1853		return err;
1854
1855	if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) &&
1856	    ib_qp->state == IBV_QPS_ERR)
1857		reflush = true;
1858
1859	while (ib_wr) {
1860		if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) {
1861			*bad_wr = ib_wr;
1862			err = EINVAL;
1863			goto error;
1864		}
1865		post_recv.num_sges = ib_wr->num_sge;
1866		post_recv.wr_id = ib_wr->wr_id;
1867		post_recv.sg_list = ib_wr->sg_list;
1868		err = irdma_uk_post_receive(&iwuqp->qp, &post_recv);
1869		if (err) {
1870			*bad_wr = ib_wr;
1871			goto error;
1872		}
1873
1874		if (reflush)
1875			irdma_issue_flush(ib_qp, 0, 1);
1876
1877		ib_wr = ib_wr->next;
1878	}
1879error:
1880	pthread_spin_unlock(&iwuqp->lock);
1881
1882	return err;
1883}
1884
1885/**
1886 * irdma_ucreate_ah - create address handle associated with a pd
1887 * @ibpd: pd for the address handle
1888 * @attr: attributes of address handle
1889 */
1890struct ibv_ah *
1891irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr)
1892{
1893	struct irdma_uah *ah;
1894	union ibv_gid sgid;
1895	struct irdma_ucreate_ah_resp resp = {};
1896	int err;
1897
1898	if (ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index,
1899			  &sgid)) {
1900		fprintf(stderr, "irdma: Error from ibv_query_gid.\n");
1901		errno = ENOENT;
1902		return NULL;
1903	}
1904
1905	ah = calloc(1, sizeof(*ah));
1906	if (!ah)
1907		return NULL;
1908
1909	err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp,
1910				sizeof(resp));
1911	if (err) {
1912		free(ah);
1913		errno = err;
1914		return NULL;
1915	}
1916
1917	ah->ah_id = resp.ah_id;
1918
1919	return &ah->ibv_ah;
1920}
1921
1922/**
1923 * irdma_udestroy_ah - destroy the address handle
1924 * @ibah: address handle
1925 */
1926int
1927irdma_udestroy_ah(struct ibv_ah *ibah)
1928{
1929	struct irdma_uah *ah;
1930	int ret;
1931
1932	ah = container_of(ibah, struct irdma_uah, ibv_ah);
1933
1934	ret = ibv_cmd_destroy_ah(ibah);
1935	if (ret)
1936		return ret;
1937
1938	free(ah);
1939
1940	return 0;
1941}
1942
1943/**
1944 * irdma_uattach_mcast - Attach qp to multicast group implemented
1945 * @qp: The queue pair
1946 * @gid:The Global ID for multicast group
1947 * @lid: The Local ID
1948 */
1949int
1950irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
1951		    uint16_t lid)
1952{
1953	return ibv_cmd_attach_mcast(qp, gid, lid);
1954}
1955
1956/**
1957 * irdma_udetach_mcast - Detach qp from multicast group
1958 * @qp: The queue pair
1959 * @gid:The Global ID for multicast group
1960 * @lid: The Local ID
1961 */
1962int
1963irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
1964		    uint16_t lid)
1965{
1966	return ibv_cmd_detach_mcast(qp, gid, lid);
1967}
1968
1969/**
1970 * irdma_uresize_cq - resizes a cq
1971 * @cq: cq to resize
1972 * @cqe: the number of cqes of the new cq
1973 */
1974int
1975irdma_uresize_cq(struct ibv_cq *cq, int cqe)
1976{
1977	struct irdma_uvcontext *iwvctx;
1978	struct irdma_uk_attrs *uk_attrs;
1979	struct irdma_uresize_cq cmd = {};
1980	struct ibv_resize_cq_resp resp = {};
1981	struct irdma_ureg_mr reg_mr_cmd = {};
1982	struct ibv_reg_mr_resp reg_mr_resp = {};
1983	struct irdma_cq_buf *cq_buf = NULL;
1984	struct irdma_cqe *cq_base = NULL;
1985	struct verbs_mr new_mr = {};
1986	struct irdma_ucq *iwucq;
1987	size_t cq_size;
1988	u32 cq_pages;
1989	int cqe_needed;
1990	int ret = 0;
1991
1992	iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq);
1993	iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx);
1994	uk_attrs = &iwvctx->uk_attrs;
1995
1996	if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE))
1997		return EOPNOTSUPP;
1998
1999	if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1)
2000		return EINVAL;
2001
2002	cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev);
2003	if (cqe_needed == iwucq->cq.cq_size)
2004		return 0;
2005
2006	cq_size = get_cq_total_bytes(cqe_needed);
2007	cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT;
2008	cq_base = irdma_alloc_hw_buf(cq_size);
2009	if (!cq_base)
2010		return ENOMEM;
2011
2012	memset(cq_base, 0, cq_size);
2013
2014	cq_buf = malloc(sizeof(*cq_buf));
2015	if (!cq_buf) {
2016		ret = ENOMEM;
2017		goto err_buf;
2018	}
2019
2020	new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd;
2021	reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ;
2022	reg_mr_cmd.cq_pages = cq_pages;
2023
2024	ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size,
2025			     (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE,
2026			     &new_mr.ibv_mr, &reg_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd),
2027			     &reg_mr_resp, sizeof(reg_mr_resp));
2028	if (ret)
2029		goto err_dereg_mr;
2030
2031	ret = pthread_spin_lock(&iwucq->lock);
2032	if (ret)
2033		goto err_lock;
2034
2035	cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base);
2036	ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd,
2037				sizeof(cmd), &resp, sizeof(resp));
2038	if (ret)
2039		goto err_resize;
2040
2041	memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq));
2042	cq_buf->vmr = iwucq->vmr;
2043	iwucq->vmr = new_mr;
2044	irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed);
2045	iwucq->verbs_cq.cq.cqe = cqe;
2046	LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list);
2047
2048	pthread_spin_unlock(&iwucq->lock);
2049
2050	return ret;
2051
2052err_resize:
2053	pthread_spin_unlock(&iwucq->lock);
2054err_lock:
2055	ibv_cmd_dereg_mr(&new_mr.ibv_mr);
2056err_dereg_mr:
2057	free(cq_buf);
2058err_buf:
2059	fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret);
2060	irdma_free_hw_buf(cq_base, cq_size);
2061	return ret;
2062}
2063