1/* This file is part of the Emulex RoCE Device Driver for
2 * RoCE (RDMA over Converged Ethernet) adapters.
3 * Copyright (C) 2012-2015 Emulex. All rights reserved.
4 * EMULEX and SLI are trademarks of Emulex.
5 * www.emulex.com
6 *
7 * This software is available to you under a choice of one of two licenses.
8 * You may choose to be licensed under the terms of the GNU General Public
9 * License (GPL) Version 2, available from the file COPYING in the main
10 * directory of this source tree, or the BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * - Redistributions of source code must retain the above copyright notice,
17 *   this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above copyright
20 *   notice, this list of conditions and the following disclaimer in
21 *   the documentation and/or other materials provided with the distribution.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 * Contact Information:
36 * linux-drivers@emulex.com
37 *
38 * Emulex
39 * 3333 Susan Street
40 * Costa Mesa, CA 92626
41 */
42
43#include <linux/dma-mapping.h>
44#include <net/addrconf.h>
45#include <rdma/ib_verbs.h>
46#include <rdma/ib_user_verbs.h>
47#include <rdma/iw_cm.h>
48#include <rdma/ib_umem.h>
49#include <rdma/ib_addr.h>
50#include <rdma/ib_cache.h>
51#include <rdma/uverbs_ioctl.h>
52
53#include "ocrdma.h"
54#include "ocrdma_hw.h"
55#include "ocrdma_verbs.h"
56#include <rdma/ocrdma-abi.h>
57
58int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
59{
60	if (index > 0)
61		return -EINVAL;
62
63	*pkey = 0xffff;
64	return 0;
65}
66
67int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
68			struct ib_udata *uhw)
69{
70	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
71
72	if (uhw->inlen || uhw->outlen)
73		return -EINVAL;
74
75	memset(attr, 0, sizeof *attr);
76	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
77	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
78	addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
79			    dev->nic_info.mac_addr);
80	attr->max_mr_size = dev->attr.max_mr_size;
81	attr->page_size_cap = 0xffff000;
82	attr->vendor_id = dev->nic_info.pdev->vendor;
83	attr->vendor_part_id = dev->nic_info.pdev->device;
84	attr->hw_ver = dev->asic_id;
85	attr->max_qp = dev->attr.max_qp;
86	attr->max_ah = OCRDMA_MAX_AH;
87	attr->max_qp_wr = dev->attr.max_wqe;
88
89	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
90					IB_DEVICE_RC_RNR_NAK_GEN |
91					IB_DEVICE_SHUTDOWN_PORT |
92					IB_DEVICE_SYS_IMAGE_GUID |
93					IB_DEVICE_MEM_MGT_EXTENSIONS;
94	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
95	attr->max_send_sge = dev->attr.max_send_sge;
96	attr->max_recv_sge = dev->attr.max_recv_sge;
97	attr->max_sge_rd = dev->attr.max_rdma_sge;
98	attr->max_cq = dev->attr.max_cq;
99	attr->max_cqe = dev->attr.max_cqe;
100	attr->max_mr = dev->attr.max_mr;
101	attr->max_mw = dev->attr.max_mw;
102	attr->max_pd = dev->attr.max_pd;
103	attr->atomic_cap = 0;
104	attr->max_qp_rd_atom =
105	    min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
106	attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
107	attr->max_srq = dev->attr.max_srq;
108	attr->max_srq_sge = dev->attr.max_srq_sge;
109	attr->max_srq_wr = dev->attr.max_rqe;
110	attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
111	attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
112	attr->max_pkeys = 1;
113	return 0;
114}
115
116static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
117					    u16 *ib_speed, u8 *ib_width)
118{
119	int status;
120	u8 speed;
121
122	status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
123	if (status)
124		speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
125
126	switch (speed) {
127	case OCRDMA_PHYS_LINK_SPEED_1GBPS:
128		*ib_speed = IB_SPEED_SDR;
129		*ib_width = IB_WIDTH_1X;
130		break;
131
132	case OCRDMA_PHYS_LINK_SPEED_10GBPS:
133		*ib_speed = IB_SPEED_QDR;
134		*ib_width = IB_WIDTH_1X;
135		break;
136
137	case OCRDMA_PHYS_LINK_SPEED_20GBPS:
138		*ib_speed = IB_SPEED_DDR;
139		*ib_width = IB_WIDTH_4X;
140		break;
141
142	case OCRDMA_PHYS_LINK_SPEED_40GBPS:
143		*ib_speed = IB_SPEED_QDR;
144		*ib_width = IB_WIDTH_4X;
145		break;
146
147	default:
148		/* Unsupported */
149		*ib_speed = IB_SPEED_SDR;
150		*ib_width = IB_WIDTH_1X;
151	}
152}
153
154int ocrdma_query_port(struct ib_device *ibdev,
155		      u32 port, struct ib_port_attr *props)
156{
157	enum ib_port_state port_state;
158	struct ocrdma_dev *dev;
159	struct net_device *netdev;
160
161	/* props being zeroed by the caller, avoid zeroing it here */
162	dev = get_ocrdma_dev(ibdev);
163	netdev = dev->nic_info.netdev;
164	if (netif_running(netdev) && netif_oper_up(netdev)) {
165		port_state = IB_PORT_ACTIVE;
166		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
167	} else {
168		port_state = IB_PORT_DOWN;
169		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
170	}
171	props->max_mtu = IB_MTU_4096;
172	props->active_mtu = iboe_get_mtu(netdev->mtu);
173	props->lid = 0;
174	props->lmc = 0;
175	props->sm_lid = 0;
176	props->sm_sl = 0;
177	props->state = port_state;
178	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
179				IB_PORT_DEVICE_MGMT_SUP |
180				IB_PORT_VENDOR_CLASS_SUP;
181	props->ip_gids = true;
182	props->gid_tbl_len = OCRDMA_MAX_SGID;
183	props->pkey_tbl_len = 1;
184	props->bad_pkey_cntr = 0;
185	props->qkey_viol_cntr = 0;
186	get_link_speed_and_width(dev, &props->active_speed,
187				 &props->active_width);
188	props->max_msg_sz = 0x80000000;
189	props->max_vl_num = 4;
190	return 0;
191}
192
193static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
194			   unsigned long len)
195{
196	struct ocrdma_mm *mm;
197
198	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
199	if (mm == NULL)
200		return -ENOMEM;
201	mm->key.phy_addr = phy_addr;
202	mm->key.len = len;
203	INIT_LIST_HEAD(&mm->entry);
204
205	mutex_lock(&uctx->mm_list_lock);
206	list_add_tail(&mm->entry, &uctx->mm_head);
207	mutex_unlock(&uctx->mm_list_lock);
208	return 0;
209}
210
211static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
212			    unsigned long len)
213{
214	struct ocrdma_mm *mm, *tmp;
215
216	mutex_lock(&uctx->mm_list_lock);
217	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
218		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
219			continue;
220
221		list_del(&mm->entry);
222		kfree(mm);
223		break;
224	}
225	mutex_unlock(&uctx->mm_list_lock);
226}
227
228static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
229			      unsigned long len)
230{
231	bool found = false;
232	struct ocrdma_mm *mm;
233
234	mutex_lock(&uctx->mm_list_lock);
235	list_for_each_entry(mm, &uctx->mm_head, entry) {
236		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
237			continue;
238
239		found = true;
240		break;
241	}
242	mutex_unlock(&uctx->mm_list_lock);
243	return found;
244}
245
246
247static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
248{
249	u16 pd_bitmap_idx = 0;
250	unsigned long *pd_bitmap;
251
252	if (dpp_pool) {
253		pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
254		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
255						    dev->pd_mgr->max_dpp_pd);
256		__set_bit(pd_bitmap_idx, pd_bitmap);
257		dev->pd_mgr->pd_dpp_count++;
258		if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
259			dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
260	} else {
261		pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
262		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
263						    dev->pd_mgr->max_normal_pd);
264		__set_bit(pd_bitmap_idx, pd_bitmap);
265		dev->pd_mgr->pd_norm_count++;
266		if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
267			dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
268	}
269	return pd_bitmap_idx;
270}
271
272static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
273					bool dpp_pool)
274{
275	u16 pd_count;
276	u16 pd_bit_index;
277
278	pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count :
279			      dev->pd_mgr->pd_norm_count;
280	if (pd_count == 0)
281		return -EINVAL;
282
283	if (dpp_pool) {
284		pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start;
285		if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) {
286			return -EINVAL;
287		} else {
288			__clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap);
289			dev->pd_mgr->pd_dpp_count--;
290		}
291	} else {
292		pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start;
293		if (pd_bit_index >= dev->pd_mgr->max_normal_pd) {
294			return -EINVAL;
295		} else {
296			__clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap);
297			dev->pd_mgr->pd_norm_count--;
298		}
299	}
300
301	return 0;
302}
303
304static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
305				   bool dpp_pool)
306{
307	int status;
308
309	mutex_lock(&dev->dev_lock);
310	status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool);
311	mutex_unlock(&dev->dev_lock);
312	return status;
313}
314
315static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
316{
317	u16 pd_idx = 0;
318	int status = 0;
319
320	mutex_lock(&dev->dev_lock);
321	if (pd->dpp_enabled) {
322		/* try allocating DPP PD, if not available then normal PD */
323		if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) {
324			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true);
325			pd->id = dev->pd_mgr->pd_dpp_start + pd_idx;
326			pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx;
327		} else if (dev->pd_mgr->pd_norm_count <
328			   dev->pd_mgr->max_normal_pd) {
329			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
330			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
331			pd->dpp_enabled = false;
332		} else {
333			status = -EINVAL;
334		}
335	} else {
336		if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) {
337			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
338			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
339		} else {
340			status = -EINVAL;
341		}
342	}
343	mutex_unlock(&dev->dev_lock);
344	return status;
345}
346
347/*
348 * NOTE:
349 *
350 * ocrdma_ucontext must be used here because this function is also
351 * called from ocrdma_alloc_ucontext where ib_udata does not have
352 * valid ib_ucontext pointer. ib_uverbs_get_context does not call
353 * uobj_{alloc|get_xxx} helpers which are used to store the
354 * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so
355 * ib_udata does NOT imply valid ib_ucontext here!
356 */
357static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
358			    struct ocrdma_ucontext *uctx,
359			    struct ib_udata *udata)
360{
361	int status;
362
363	if (udata && uctx && dev->attr.max_dpp_pds) {
364		pd->dpp_enabled =
365			ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
366		pd->num_dpp_qp =
367			pd->dpp_enabled ? (dev->nic_info.db_page_size /
368					   dev->attr.wqe_size) : 0;
369	}
370
371	if (dev->pd_mgr->pd_prealloc_valid)
372		return ocrdma_get_pd_num(dev, pd);
373
374retry:
375	status = ocrdma_mbx_alloc_pd(dev, pd);
376	if (status) {
377		if (pd->dpp_enabled) {
378			pd->dpp_enabled = false;
379			pd->num_dpp_qp = 0;
380			goto retry;
381		}
382		return status;
383	}
384
385	return 0;
386}
387
388static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
389				 struct ocrdma_pd *pd)
390{
391	return (uctx->cntxt_pd == pd);
392}
393
394static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
395			      struct ocrdma_pd *pd)
396{
397	if (dev->pd_mgr->pd_prealloc_valid)
398		ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
399	else
400		ocrdma_mbx_dealloc_pd(dev, pd);
401}
402
403static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
404				    struct ocrdma_ucontext *uctx,
405				    struct ib_udata *udata)
406{
407	struct ib_device *ibdev = &dev->ibdev;
408	struct ib_pd *pd;
409	int status;
410
411	pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
412	if (!pd)
413		return -ENOMEM;
414
415	pd->device  = ibdev;
416	uctx->cntxt_pd = get_ocrdma_pd(pd);
417
418	status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata);
419	if (status) {
420		kfree(uctx->cntxt_pd);
421		goto err;
422	}
423
424	uctx->cntxt_pd->uctx = uctx;
425	uctx->cntxt_pd->ibpd.device = &dev->ibdev;
426err:
427	return status;
428}
429
430static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
431{
432	struct ocrdma_pd *pd = uctx->cntxt_pd;
433	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
434
435	if (uctx->pd_in_use) {
436		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
437		       __func__, dev->id, pd->id);
438	}
439	uctx->cntxt_pd = NULL;
440	_ocrdma_dealloc_pd(dev, pd);
441	kfree(pd);
442}
443
444static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
445{
446	struct ocrdma_pd *pd = NULL;
447
448	mutex_lock(&uctx->mm_list_lock);
449	if (!uctx->pd_in_use) {
450		uctx->pd_in_use = true;
451		pd = uctx->cntxt_pd;
452	}
453	mutex_unlock(&uctx->mm_list_lock);
454
455	return pd;
456}
457
458static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
459{
460	mutex_lock(&uctx->mm_list_lock);
461	uctx->pd_in_use = false;
462	mutex_unlock(&uctx->mm_list_lock);
463}
464
465int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
466{
467	struct ib_device *ibdev = uctx->device;
468	int status;
469	struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx);
470	struct ocrdma_alloc_ucontext_resp resp = {};
471	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
472	struct pci_dev *pdev = dev->nic_info.pdev;
473	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
474
475	if (!udata)
476		return -EFAULT;
477	INIT_LIST_HEAD(&ctx->mm_head);
478	mutex_init(&ctx->mm_list_lock);
479
480	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
481					    &ctx->ah_tbl.pa, GFP_KERNEL);
482	if (!ctx->ah_tbl.va)
483		return -ENOMEM;
484
485	ctx->ah_tbl.len = map_len;
486
487	resp.ah_tbl_len = ctx->ah_tbl.len;
488	resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
489
490	status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
491	if (status)
492		goto map_err;
493
494	status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
495	if (status)
496		goto pd_err;
497
498	resp.dev_id = dev->id;
499	resp.max_inline_data = dev->attr.max_inline_data;
500	resp.wqe_size = dev->attr.wqe_size;
501	resp.rqe_size = dev->attr.rqe_size;
502	resp.dpp_wqe_size = dev->attr.wqe_size;
503
504	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
505	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
506	if (status)
507		goto cpy_err;
508	return 0;
509
510cpy_err:
511	ocrdma_dealloc_ucontext_pd(ctx);
512pd_err:
513	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
514map_err:
515	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
516			  ctx->ah_tbl.pa);
517	return status;
518}
519
520void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
521{
522	struct ocrdma_mm *mm, *tmp;
523	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
524	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
525	struct pci_dev *pdev = dev->nic_info.pdev;
526
527	ocrdma_dealloc_ucontext_pd(uctx);
528
529	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
530	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
531			  uctx->ah_tbl.pa);
532
533	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
534		list_del(&mm->entry);
535		kfree(mm);
536	}
537}
538
539int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
540{
541	struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
542	struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
543	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
544	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
545	unsigned long len = (vma->vm_end - vma->vm_start);
546	int status;
547	bool found;
548
549	if (vma->vm_start & (PAGE_SIZE - 1))
550		return -EINVAL;
551	found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
552	if (!found)
553		return -EINVAL;
554
555	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
556		dev->nic_info.db_total_size)) &&
557		(len <=	dev->nic_info.db_page_size)) {
558		if (vma->vm_flags & VM_READ)
559			return -EPERM;
560
561		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
562		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
563					    len, vma->vm_page_prot);
564	} else if (dev->nic_info.dpp_unmapped_len &&
565		(vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
566		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
567			dev->nic_info.dpp_unmapped_len)) &&
568		(len <= dev->nic_info.dpp_unmapped_len)) {
569		if (vma->vm_flags & VM_READ)
570			return -EPERM;
571
572		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
573		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
574					    len, vma->vm_page_prot);
575	} else {
576		status = remap_pfn_range(vma, vma->vm_start,
577					 vma->vm_pgoff, len, vma->vm_page_prot);
578	}
579	return status;
580}
581
582static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
583				struct ib_udata *udata)
584{
585	int status;
586	u64 db_page_addr;
587	u64 dpp_page_addr = 0;
588	u32 db_page_size;
589	struct ocrdma_alloc_pd_uresp rsp;
590	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
591		udata, struct ocrdma_ucontext, ibucontext);
592
593	memset(&rsp, 0, sizeof(rsp));
594	rsp.id = pd->id;
595	rsp.dpp_enabled = pd->dpp_enabled;
596	db_page_addr = ocrdma_get_db_addr(dev, pd->id);
597	db_page_size = dev->nic_info.db_page_size;
598
599	status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
600	if (status)
601		return status;
602
603	if (pd->dpp_enabled) {
604		dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
605				(pd->id * PAGE_SIZE);
606		status = ocrdma_add_mmap(uctx, dpp_page_addr,
607				 PAGE_SIZE);
608		if (status)
609			goto dpp_map_err;
610		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
611		rsp.dpp_page_addr_lo = dpp_page_addr;
612	}
613
614	status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
615	if (status)
616		goto ucopy_err;
617
618	pd->uctx = uctx;
619	return 0;
620
621ucopy_err:
622	if (pd->dpp_enabled)
623		ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
624dpp_map_err:
625	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
626	return status;
627}
628
629int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
630{
631	struct ib_device *ibdev = ibpd->device;
632	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
633	struct ocrdma_pd *pd;
634	int status;
635	u8 is_uctx_pd = false;
636	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
637		udata, struct ocrdma_ucontext, ibucontext);
638
639	if (udata) {
640		pd = ocrdma_get_ucontext_pd(uctx);
641		if (pd) {
642			is_uctx_pd = true;
643			goto pd_mapping;
644		}
645	}
646
647	pd = get_ocrdma_pd(ibpd);
648	status = _ocrdma_alloc_pd(dev, pd, uctx, udata);
649	if (status)
650		goto exit;
651
652pd_mapping:
653	if (udata) {
654		status = ocrdma_copy_pd_uresp(dev, pd, udata);
655		if (status)
656			goto err;
657	}
658	return 0;
659
660err:
661	if (is_uctx_pd)
662		ocrdma_release_ucontext_pd(uctx);
663	else
664		_ocrdma_dealloc_pd(dev, pd);
665exit:
666	return status;
667}
668
669int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
670{
671	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
672	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
673	struct ocrdma_ucontext *uctx = NULL;
674	u64 usr_db;
675
676	uctx = pd->uctx;
677	if (uctx) {
678		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
679			(pd->id * PAGE_SIZE);
680		if (pd->dpp_enabled)
681			ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
682		usr_db = ocrdma_get_db_addr(dev, pd->id);
683		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
684
685		if (is_ucontext_pd(uctx, pd)) {
686			ocrdma_release_ucontext_pd(uctx);
687			return 0;
688		}
689	}
690	_ocrdma_dealloc_pd(dev, pd);
691	return 0;
692}
693
694static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
695			    u32 pdid, int acc, u32 num_pbls, u32 addr_check)
696{
697	int status;
698
699	mr->hwmr.fr_mr = 0;
700	mr->hwmr.local_rd = 1;
701	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
702	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
703	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
704	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
705	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
706	mr->hwmr.num_pbls = num_pbls;
707
708	status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
709	if (status)
710		return status;
711
712	mr->ibmr.lkey = mr->hwmr.lkey;
713	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
714		mr->ibmr.rkey = mr->hwmr.lkey;
715	return 0;
716}
717
718struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
719{
720	int status;
721	struct ocrdma_mr *mr;
722	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
723	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
724
725	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
726		pr_err("%s err, invalid access rights\n", __func__);
727		return ERR_PTR(-EINVAL);
728	}
729
730	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
731	if (!mr)
732		return ERR_PTR(-ENOMEM);
733
734	status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
735				   OCRDMA_ADDR_CHECK_DISABLE);
736	if (status) {
737		kfree(mr);
738		return ERR_PTR(status);
739	}
740
741	return &mr->ibmr;
742}
743
744static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
745				   struct ocrdma_hw_mr *mr)
746{
747	struct pci_dev *pdev = dev->nic_info.pdev;
748	int i = 0;
749
750	if (mr->pbl_table) {
751		for (i = 0; i < mr->num_pbls; i++) {
752			if (!mr->pbl_table[i].va)
753				continue;
754			dma_free_coherent(&pdev->dev, mr->pbl_size,
755					  mr->pbl_table[i].va,
756					  mr->pbl_table[i].pa);
757		}
758		kfree(mr->pbl_table);
759		mr->pbl_table = NULL;
760	}
761}
762
763static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
764			      u32 num_pbes)
765{
766	u32 num_pbls = 0;
767	u32 idx = 0;
768	int status = 0;
769	u32 pbl_size;
770
771	do {
772		pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
773		if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
774			status = -EFAULT;
775			break;
776		}
777		num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
778		num_pbls = num_pbls / (pbl_size / sizeof(u64));
779		idx++;
780	} while (num_pbls >= dev->attr.max_num_mr_pbl);
781
782	mr->hwmr.num_pbes = num_pbes;
783	mr->hwmr.num_pbls = num_pbls;
784	mr->hwmr.pbl_size = pbl_size;
785	return status;
786}
787
788static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
789{
790	int status = 0;
791	int i;
792	u32 dma_len = mr->pbl_size;
793	struct pci_dev *pdev = dev->nic_info.pdev;
794	void *va;
795	dma_addr_t pa;
796
797	mr->pbl_table = kcalloc(mr->num_pbls, sizeof(struct ocrdma_pbl),
798				GFP_KERNEL);
799
800	if (!mr->pbl_table)
801		return -ENOMEM;
802
803	for (i = 0; i < mr->num_pbls; i++) {
804		va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
805		if (!va) {
806			ocrdma_free_mr_pbl_tbl(dev, mr);
807			status = -ENOMEM;
808			break;
809		}
810		mr->pbl_table[i].va = va;
811		mr->pbl_table[i].pa = pa;
812	}
813	return status;
814}
815
816static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
817{
818	struct ocrdma_pbe *pbe;
819	struct ib_block_iter biter;
820	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
821	int pbe_cnt;
822	u64 pg_addr;
823
824	if (!mr->hwmr.num_pbes)
825		return;
826
827	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
828	pbe_cnt = 0;
829
830	rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
831		/* store the page address in pbe */
832		pg_addr = rdma_block_iter_dma_address(&biter);
833		pbe->pa_lo = cpu_to_le32(pg_addr);
834		pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
835		pbe_cnt += 1;
836		pbe++;
837
838		/* if the given pbl is full storing the pbes,
839		 * move to next pbl.
840		 */
841		if (pbe_cnt == (mr->hwmr.pbl_size / sizeof(u64))) {
842			pbl_tbl++;
843			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
844			pbe_cnt = 0;
845		}
846	}
847}
848
849struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
850				 u64 usr_addr, int acc, struct ib_udata *udata)
851{
852	int status = -ENOMEM;
853	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
854	struct ocrdma_mr *mr;
855	struct ocrdma_pd *pd;
856
857	pd = get_ocrdma_pd(ibpd);
858
859	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
860		return ERR_PTR(-EINVAL);
861
862	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
863	if (!mr)
864		return ERR_PTR(status);
865	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
866	if (IS_ERR(mr->umem)) {
867		status = -EFAULT;
868		goto umem_err;
869	}
870	status = ocrdma_get_pbl_info(
871		dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
872	if (status)
873		goto umem_err;
874
875	mr->hwmr.pbe_size = PAGE_SIZE;
876	mr->hwmr.va = usr_addr;
877	mr->hwmr.len = len;
878	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
879	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
880	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
881	mr->hwmr.local_rd = 1;
882	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
883	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
884	if (status)
885		goto umem_err;
886	build_user_pbes(dev, mr);
887	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
888	if (status)
889		goto mbx_err;
890	mr->ibmr.lkey = mr->hwmr.lkey;
891	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
892		mr->ibmr.rkey = mr->hwmr.lkey;
893
894	return &mr->ibmr;
895
896mbx_err:
897	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
898umem_err:
899	kfree(mr);
900	return ERR_PTR(status);
901}
902
903int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
904{
905	struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
906	struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
907
908	(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
909
910	kfree(mr->pages);
911	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
912
913	/* it could be user registered memory. */
914	ib_umem_release(mr->umem);
915	kfree(mr);
916
917	/* Don't stop cleanup, in case FW is unresponsive */
918	if (dev->mqe_ctx.fw_error_state) {
919		pr_err("%s(%d) fw not responding.\n",
920		       __func__, dev->id);
921	}
922	return 0;
923}
924
925static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
926				struct ib_udata *udata)
927{
928	int status;
929	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
930		udata, struct ocrdma_ucontext, ibucontext);
931	struct ocrdma_create_cq_uresp uresp;
932
933	/* this must be user flow! */
934	if (!udata)
935		return -EINVAL;
936
937	memset(&uresp, 0, sizeof(uresp));
938	uresp.cq_id = cq->id;
939	uresp.page_size = PAGE_ALIGN(cq->len);
940	uresp.num_pages = 1;
941	uresp.max_hw_cqe = cq->max_hw_cqe;
942	uresp.page_addr[0] = virt_to_phys(cq->va);
943	uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
944	uresp.db_page_size = dev->nic_info.db_page_size;
945	uresp.phase_change = cq->phase_change ? 1 : 0;
946	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
947	if (status) {
948		pr_err("%s(%d) copy error cqid=0x%x.\n",
949		       __func__, dev->id, cq->id);
950		goto err;
951	}
952	status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
953	if (status)
954		goto err;
955	status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
956	if (status) {
957		ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
958		goto err;
959	}
960	cq->ucontext = uctx;
961err:
962	return status;
963}
964
965int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
966		     struct ib_udata *udata)
967{
968	struct ib_device *ibdev = ibcq->device;
969	int entries = attr->cqe;
970	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
971	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
972	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
973		udata, struct ocrdma_ucontext, ibucontext);
974	u16 pd_id = 0;
975	int status;
976	struct ocrdma_create_cq_ureq ureq;
977
978	if (attr->flags)
979		return -EOPNOTSUPP;
980
981	if (udata) {
982		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
983			return -EFAULT;
984	} else
985		ureq.dpp_cq = 0;
986
987	spin_lock_init(&cq->cq_lock);
988	spin_lock_init(&cq->comp_handler_lock);
989	INIT_LIST_HEAD(&cq->sq_head);
990	INIT_LIST_HEAD(&cq->rq_head);
991
992	if (udata)
993		pd_id = uctx->cntxt_pd->id;
994
995	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
996	if (status)
997		return status;
998
999	if (udata) {
1000		status = ocrdma_copy_cq_uresp(dev, cq, udata);
1001		if (status)
1002			goto ctx_err;
1003	}
1004	cq->phase = OCRDMA_CQE_VALID;
1005	dev->cq_tbl[cq->id] = cq;
1006	return 0;
1007
1008ctx_err:
1009	ocrdma_mbx_destroy_cq(dev, cq);
1010	return status;
1011}
1012
1013int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
1014		     struct ib_udata *udata)
1015{
1016	int status = 0;
1017	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1018
1019	if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
1020		status = -EINVAL;
1021		return status;
1022	}
1023	ibcq->cqe = new_cnt;
1024	return status;
1025}
1026
1027static void ocrdma_flush_cq(struct ocrdma_cq *cq)
1028{
1029	int cqe_cnt;
1030	int valid_count = 0;
1031	unsigned long flags;
1032
1033	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
1034	struct ocrdma_cqe *cqe = NULL;
1035
1036	cqe = cq->va;
1037	cqe_cnt = cq->cqe_cnt;
1038
1039	/* Last irq might have scheduled a polling thread
1040	 * sync-up with it before hard flushing.
1041	 */
1042	spin_lock_irqsave(&cq->cq_lock, flags);
1043	while (cqe_cnt) {
1044		if (is_cqe_valid(cq, cqe))
1045			valid_count++;
1046		cqe++;
1047		cqe_cnt--;
1048	}
1049	ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
1050	spin_unlock_irqrestore(&cq->cq_lock, flags);
1051}
1052
1053int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1054{
1055	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1056	struct ocrdma_eq *eq = NULL;
1057	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
1058	int pdid = 0;
1059	u32 irq, indx;
1060
1061	dev->cq_tbl[cq->id] = NULL;
1062	indx = ocrdma_get_eq_table_index(dev, cq->eqn);
1063
1064	eq = &dev->eq_tbl[indx];
1065	irq = ocrdma_get_irq(dev, eq);
1066	synchronize_irq(irq);
1067	ocrdma_flush_cq(cq);
1068
1069	ocrdma_mbx_destroy_cq(dev, cq);
1070	if (cq->ucontext) {
1071		pdid = cq->ucontext->cntxt_pd->id;
1072		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
1073				PAGE_ALIGN(cq->len));
1074		ocrdma_del_mmap(cq->ucontext,
1075				ocrdma_get_db_addr(dev, pdid),
1076				dev->nic_info.db_page_size);
1077	}
1078	return 0;
1079}
1080
1081static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1082{
1083	int status = -EINVAL;
1084
1085	if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
1086		dev->qp_tbl[qp->id] = qp;
1087		status = 0;
1088	}
1089	return status;
1090}
1091
1092static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1093{
1094	dev->qp_tbl[qp->id] = NULL;
1095}
1096
1097static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
1098				  struct ib_qp_init_attr *attrs,
1099				  struct ib_udata *udata)
1100{
1101	if ((attrs->qp_type != IB_QPT_GSI) &&
1102	    (attrs->qp_type != IB_QPT_RC) &&
1103	    (attrs->qp_type != IB_QPT_UC) &&
1104	    (attrs->qp_type != IB_QPT_UD)) {
1105		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1106		       __func__, dev->id, attrs->qp_type);
1107		return -EOPNOTSUPP;
1108	}
1109	/* Skip the check for QP1 to support CM size of 128 */
1110	if ((attrs->qp_type != IB_QPT_GSI) &&
1111	    (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
1112		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1113		       __func__, dev->id, attrs->cap.max_send_wr);
1114		pr_err("%s(%d) supported send_wr=0x%x\n",
1115		       __func__, dev->id, dev->attr.max_wqe);
1116		return -EINVAL;
1117	}
1118	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
1119		pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1120		       __func__, dev->id, attrs->cap.max_recv_wr);
1121		pr_err("%s(%d) supported recv_wr=0x%x\n",
1122		       __func__, dev->id, dev->attr.max_rqe);
1123		return -EINVAL;
1124	}
1125	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
1126		pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1127		       __func__, dev->id, attrs->cap.max_inline_data);
1128		pr_err("%s(%d) supported inline data size=0x%x\n",
1129		       __func__, dev->id, dev->attr.max_inline_data);
1130		return -EINVAL;
1131	}
1132	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
1133		pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1134		       __func__, dev->id, attrs->cap.max_send_sge);
1135		pr_err("%s(%d) supported send_sge=0x%x\n",
1136		       __func__, dev->id, dev->attr.max_send_sge);
1137		return -EINVAL;
1138	}
1139	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
1140		pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1141		       __func__, dev->id, attrs->cap.max_recv_sge);
1142		pr_err("%s(%d) supported recv_sge=0x%x\n",
1143		       __func__, dev->id, dev->attr.max_recv_sge);
1144		return -EINVAL;
1145	}
1146	/* unprivileged user space cannot create special QP */
1147	if (udata && attrs->qp_type == IB_QPT_GSI) {
1148		pr_err
1149		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1150		     __func__, dev->id, attrs->qp_type);
1151		return -EINVAL;
1152	}
1153	/* allow creating only one GSI type of QP */
1154	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
1155		pr_err("%s(%d) GSI special QPs already created.\n",
1156		       __func__, dev->id);
1157		return -EINVAL;
1158	}
1159	/* verify consumer QPs are not trying to use GSI QP's CQ */
1160	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
1161		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
1162			(dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
1163			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1164				__func__, dev->id);
1165			return -EINVAL;
1166		}
1167	}
1168	return 0;
1169}
1170
1171static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
1172				struct ib_udata *udata, int dpp_offset,
1173				int dpp_credit_lmt, int srq)
1174{
1175	int status;
1176	u64 usr_db;
1177	struct ocrdma_create_qp_uresp uresp;
1178	struct ocrdma_pd *pd = qp->pd;
1179	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
1180
1181	memset(&uresp, 0, sizeof(uresp));
1182	usr_db = dev->nic_info.unmapped_db +
1183			(pd->id * dev->nic_info.db_page_size);
1184	uresp.qp_id = qp->id;
1185	uresp.sq_dbid = qp->sq.dbid;
1186	uresp.num_sq_pages = 1;
1187	uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
1188	uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
1189	uresp.num_wqe_allocated = qp->sq.max_cnt;
1190	if (!srq) {
1191		uresp.rq_dbid = qp->rq.dbid;
1192		uresp.num_rq_pages = 1;
1193		uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
1194		uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
1195		uresp.num_rqe_allocated = qp->rq.max_cnt;
1196	}
1197	uresp.db_page_addr = usr_db;
1198	uresp.db_page_size = dev->nic_info.db_page_size;
1199	uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
1200	uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1201	uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
1202
1203	if (qp->dpp_enabled) {
1204		uresp.dpp_credit = dpp_credit_lmt;
1205		uresp.dpp_offset = dpp_offset;
1206	}
1207	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1208	if (status) {
1209		pr_err("%s(%d) user copy error.\n", __func__, dev->id);
1210		goto err;
1211	}
1212	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
1213				 uresp.sq_page_size);
1214	if (status)
1215		goto err;
1216
1217	if (!srq) {
1218		status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
1219					 uresp.rq_page_size);
1220		if (status)
1221			goto rq_map_err;
1222	}
1223	return status;
1224rq_map_err:
1225	ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
1226err:
1227	return status;
1228}
1229
1230static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
1231			     struct ocrdma_pd *pd)
1232{
1233	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1234		qp->sq_db = dev->nic_info.db +
1235			(pd->id * dev->nic_info.db_page_size) +
1236			OCRDMA_DB_GEN2_SQ_OFFSET;
1237		qp->rq_db = dev->nic_info.db +
1238			(pd->id * dev->nic_info.db_page_size) +
1239			OCRDMA_DB_GEN2_RQ_OFFSET;
1240	} else {
1241		qp->sq_db = dev->nic_info.db +
1242			(pd->id * dev->nic_info.db_page_size) +
1243			OCRDMA_DB_SQ_OFFSET;
1244		qp->rq_db = dev->nic_info.db +
1245			(pd->id * dev->nic_info.db_page_size) +
1246			OCRDMA_DB_RQ_OFFSET;
1247	}
1248}
1249
1250static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
1251{
1252	qp->wqe_wr_id_tbl =
1253	    kcalloc(qp->sq.max_cnt, sizeof(*(qp->wqe_wr_id_tbl)),
1254		    GFP_KERNEL);
1255	if (qp->wqe_wr_id_tbl == NULL)
1256		return -ENOMEM;
1257	qp->rqe_wr_id_tbl =
1258	    kcalloc(qp->rq.max_cnt, sizeof(u64), GFP_KERNEL);
1259	if (qp->rqe_wr_id_tbl == NULL)
1260		return -ENOMEM;
1261
1262	return 0;
1263}
1264
1265static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1266				      struct ocrdma_pd *pd,
1267				      struct ib_qp_init_attr *attrs)
1268{
1269	qp->pd = pd;
1270	spin_lock_init(&qp->q_lock);
1271	INIT_LIST_HEAD(&qp->sq_entry);
1272	INIT_LIST_HEAD(&qp->rq_entry);
1273
1274	qp->qp_type = attrs->qp_type;
1275	qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1276	qp->max_inline_data = attrs->cap.max_inline_data;
1277	qp->sq.max_sges = attrs->cap.max_send_sge;
1278	qp->rq.max_sges = attrs->cap.max_recv_sge;
1279	qp->state = OCRDMA_QPS_RST;
1280	qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
1281}
1282
1283static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1284				   struct ib_qp_init_attr *attrs)
1285{
1286	if (attrs->qp_type == IB_QPT_GSI) {
1287		dev->gsi_qp_created = 1;
1288		dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1289		dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1290	}
1291}
1292
1293int ocrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
1294		     struct ib_udata *udata)
1295{
1296	int status;
1297	struct ib_pd *ibpd = ibqp->pd;
1298	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1299	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1300	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1301	struct ocrdma_create_qp_ureq ureq;
1302	u16 dpp_credit_lmt, dpp_offset;
1303
1304	if (attrs->create_flags)
1305		return -EOPNOTSUPP;
1306
1307	status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
1308	if (status)
1309		goto gen_err;
1310
1311	memset(&ureq, 0, sizeof(ureq));
1312	if (udata) {
1313		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1314			return -EFAULT;
1315	}
1316	ocrdma_set_qp_init_params(qp, pd, attrs);
1317	if (udata == NULL)
1318		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1319					OCRDMA_QP_FAST_REG);
1320
1321	mutex_lock(&dev->dev_lock);
1322	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1323					ureq.dpp_cq_id,
1324					&dpp_offset, &dpp_credit_lmt);
1325	if (status)
1326		goto mbx_err;
1327
1328	/* user space QP's wr_id table are managed in library */
1329	if (udata == NULL) {
1330		status = ocrdma_alloc_wr_id_tbl(qp);
1331		if (status)
1332			goto map_err;
1333	}
1334
1335	status = ocrdma_add_qpn_map(dev, qp);
1336	if (status)
1337		goto map_err;
1338	ocrdma_set_qp_db(dev, qp, pd);
1339	if (udata) {
1340		status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1341					      dpp_credit_lmt,
1342					      (attrs->srq != NULL));
1343		if (status)
1344			goto cpy_err;
1345	}
1346	ocrdma_store_gsi_qp_cq(dev, attrs);
1347	qp->ibqp.qp_num = qp->id;
1348	mutex_unlock(&dev->dev_lock);
1349	return 0;
1350
1351cpy_err:
1352	ocrdma_del_qpn_map(dev, qp);
1353map_err:
1354	ocrdma_mbx_destroy_qp(dev, qp);
1355mbx_err:
1356	mutex_unlock(&dev->dev_lock);
1357	kfree(qp->wqe_wr_id_tbl);
1358	kfree(qp->rqe_wr_id_tbl);
1359	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1360gen_err:
1361	return status;
1362}
1363
1364int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1365		      int attr_mask)
1366{
1367	int status = 0;
1368	struct ocrdma_qp *qp;
1369	struct ocrdma_dev *dev;
1370	enum ib_qp_state old_qps;
1371
1372	qp = get_ocrdma_qp(ibqp);
1373	dev = get_ocrdma_dev(ibqp->device);
1374	if (attr_mask & IB_QP_STATE)
1375		status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
1376	/* if new and previous states are same hw doesn't need to
1377	 * know about it.
1378	 */
1379	if (status < 0)
1380		return status;
1381	return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
1382}
1383
1384int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1385		     int attr_mask, struct ib_udata *udata)
1386{
1387	unsigned long flags;
1388	int status = -EINVAL;
1389	struct ocrdma_qp *qp;
1390	struct ocrdma_dev *dev;
1391	enum ib_qp_state old_qps, new_qps;
1392
1393	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1394		return -EOPNOTSUPP;
1395
1396	qp = get_ocrdma_qp(ibqp);
1397	dev = get_ocrdma_dev(ibqp->device);
1398
1399	/* syncronize with multiple context trying to change, retrive qps */
1400	mutex_lock(&dev->dev_lock);
1401	/* syncronize with wqe, rqe posting and cqe processing contexts */
1402	spin_lock_irqsave(&qp->q_lock, flags);
1403	old_qps = get_ibqp_state(qp->state);
1404	if (attr_mask & IB_QP_STATE)
1405		new_qps = attr->qp_state;
1406	else
1407		new_qps = old_qps;
1408	spin_unlock_irqrestore(&qp->q_lock, flags);
1409
1410	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1411		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1412		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1413		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1414		       old_qps, new_qps);
1415		goto param_err;
1416	}
1417
1418	status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1419	if (status > 0)
1420		status = 0;
1421param_err:
1422	mutex_unlock(&dev->dev_lock);
1423	return status;
1424}
1425
1426static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1427{
1428	switch (mtu) {
1429	case 256:
1430		return IB_MTU_256;
1431	case 512:
1432		return IB_MTU_512;
1433	case 1024:
1434		return IB_MTU_1024;
1435	case 2048:
1436		return IB_MTU_2048;
1437	case 4096:
1438		return IB_MTU_4096;
1439	default:
1440		return IB_MTU_1024;
1441	}
1442}
1443
1444static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1445{
1446	int ib_qp_acc_flags = 0;
1447
1448	if (qp_cap_flags & OCRDMA_QP_INB_WR)
1449		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1450	if (qp_cap_flags & OCRDMA_QP_INB_RD)
1451		ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1452	return ib_qp_acc_flags;
1453}
1454
1455int ocrdma_query_qp(struct ib_qp *ibqp,
1456		    struct ib_qp_attr *qp_attr,
1457		    int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1458{
1459	int status;
1460	u32 qp_state;
1461	struct ocrdma_qp_params params;
1462	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1463	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1464
1465	memset(&params, 0, sizeof(params));
1466	mutex_lock(&dev->dev_lock);
1467	status = ocrdma_mbx_query_qp(dev, qp, &params);
1468	mutex_unlock(&dev->dev_lock);
1469	if (status)
1470		goto mbx_err;
1471	if (qp->qp_type == IB_QPT_UD)
1472		qp_attr->qkey = params.qkey;
1473	qp_attr->path_mtu =
1474		ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1475				OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1476				OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1477	qp_attr->path_mig_state = IB_MIG_MIGRATED;
1478	qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1479	qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1480	qp_attr->dest_qp_num =
1481	    params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1482
1483	qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1484	qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1485	qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1486	qp_attr->cap.max_send_sge = qp->sq.max_sges;
1487	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1488	qp_attr->cap.max_inline_data = qp->max_inline_data;
1489	qp_init_attr->cap = qp_attr->cap;
1490	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
1491
1492	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
1493			params.rnt_rc_sl_fl &
1494			  OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
1495			qp->sgid_idx,
1496			(params.hop_lmt_rq_psn &
1497			 OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1498			 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
1499			(params.tclass_sq_psn &
1500			 OCRDMA_QP_PARAMS_TCLASS_MASK) >>
1501			 OCRDMA_QP_PARAMS_TCLASS_SHIFT);
1502	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
1503
1504	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
1505	rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
1506					   OCRDMA_QP_PARAMS_SL_MASK) >>
1507					   OCRDMA_QP_PARAMS_SL_SHIFT);
1508	qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1509			    OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1510				OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1511	qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1512			      OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1513				OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1514	qp_attr->retry_cnt =
1515	    (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1516		OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1517	qp_attr->min_rnr_timer = 0;
1518	qp_attr->pkey_index = 0;
1519	qp_attr->port_num = 1;
1520	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
1521	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
1522	qp_attr->alt_pkey_index = 0;
1523	qp_attr->alt_port_num = 0;
1524	qp_attr->alt_timeout = 0;
1525	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1526	qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1527		    OCRDMA_QP_PARAMS_STATE_SHIFT;
1528	qp_attr->qp_state = get_ibqp_state(qp_state);
1529	qp_attr->cur_qp_state = qp_attr->qp_state;
1530	qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1531	qp_attr->max_dest_rd_atomic =
1532	    params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1533	qp_attr->max_rd_atomic =
1534	    params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1535	qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1536				OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1537	/* Sync driver QP state with FW */
1538	ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL);
1539mbx_err:
1540	return status;
1541}
1542
1543static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx)
1544{
1545	unsigned int i = idx / 32;
1546	u32 mask = (1U << (idx % 32));
1547
1548	srq->idx_bit_fields[i] ^= mask;
1549}
1550
1551static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1552{
1553	return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
1554}
1555
1556static int is_hw_sq_empty(struct ocrdma_qp *qp)
1557{
1558	return (qp->sq.tail == qp->sq.head);
1559}
1560
1561static int is_hw_rq_empty(struct ocrdma_qp *qp)
1562{
1563	return (qp->rq.tail == qp->rq.head);
1564}
1565
1566static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1567{
1568	return q->va + (q->head * q->entry_size);
1569}
1570
1571static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1572				      u32 idx)
1573{
1574	return q->va + (idx * q->entry_size);
1575}
1576
1577static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1578{
1579	q->head = (q->head + 1) & q->max_wqe_idx;
1580}
1581
1582static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1583{
1584	q->tail = (q->tail + 1) & q->max_wqe_idx;
1585}
1586
1587/* discard the cqe for a given QP */
1588static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1589{
1590	unsigned long cq_flags;
1591	unsigned long flags;
1592	u32 cur_getp, stop_getp;
1593	struct ocrdma_cqe *cqe;
1594	u32 qpn = 0, wqe_idx = 0;
1595
1596	spin_lock_irqsave(&cq->cq_lock, cq_flags);
1597
1598	/* traverse through the CQEs in the hw CQ,
1599	 * find the matching CQE for a given qp,
1600	 * mark the matching one discarded by clearing qpn.
1601	 * ring the doorbell in the poll_cq() as
1602	 * we don't complete out of order cqe.
1603	 */
1604
1605	cur_getp = cq->getp;
1606	/* find upto when do we reap the cq. */
1607	stop_getp = cur_getp;
1608	do {
1609		if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1610			break;
1611
1612		cqe = cq->va + cur_getp;
1613		/* if (a) done reaping whole hw cq, or
1614		 *    (b) qp_xq becomes empty.
1615		 * then exit
1616		 */
1617		qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1618		/* if previously discarded cqe found, skip that too. */
1619		/* check for matching qp */
1620		if (qpn == 0 || qpn != qp->id)
1621			goto skip_cqe;
1622
1623		if (is_cqe_for_sq(cqe)) {
1624			ocrdma_hwq_inc_tail(&qp->sq);
1625		} else {
1626			if (qp->srq) {
1627				wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
1628					OCRDMA_CQE_BUFTAG_SHIFT) &
1629					qp->srq->rq.max_wqe_idx;
1630				BUG_ON(wqe_idx < 1);
1631				spin_lock_irqsave(&qp->srq->q_lock, flags);
1632				ocrdma_hwq_inc_tail(&qp->srq->rq);
1633				ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
1634				spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1635
1636			} else {
1637				ocrdma_hwq_inc_tail(&qp->rq);
1638			}
1639		}
1640		/* mark cqe discarded so that it is not picked up later
1641		 * in the poll_cq().
1642		 */
1643		cqe->cmn.qpn = 0;
1644skip_cqe:
1645		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1646	} while (cur_getp != stop_getp);
1647	spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1648}
1649
1650void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1651{
1652	int found = false;
1653	unsigned long flags;
1654	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
1655	/* sync with any active CQ poll */
1656
1657	spin_lock_irqsave(&dev->flush_q_lock, flags);
1658	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1659	if (found)
1660		list_del(&qp->sq_entry);
1661	if (!qp->srq) {
1662		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1663		if (found)
1664			list_del(&qp->rq_entry);
1665	}
1666	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1667}
1668
1669int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1670{
1671	struct ocrdma_pd *pd;
1672	struct ocrdma_qp *qp;
1673	struct ocrdma_dev *dev;
1674	struct ib_qp_attr attrs;
1675	int attr_mask;
1676	unsigned long flags;
1677
1678	qp = get_ocrdma_qp(ibqp);
1679	dev = get_ocrdma_dev(ibqp->device);
1680
1681	pd = qp->pd;
1682
1683	/* change the QP state to ERROR */
1684	if (qp->state != OCRDMA_QPS_RST) {
1685		attrs.qp_state = IB_QPS_ERR;
1686		attr_mask = IB_QP_STATE;
1687		_ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1688	}
1689	/* ensure that CQEs for newly created QP (whose id may be same with
1690	 * one which just getting destroyed are same), dont get
1691	 * discarded until the old CQEs are discarded.
1692	 */
1693	mutex_lock(&dev->dev_lock);
1694	(void) ocrdma_mbx_destroy_qp(dev, qp);
1695
1696	/*
1697	 * acquire CQ lock while destroy is in progress, in order to
1698	 * protect against proessing in-flight CQEs for this QP.
1699	 */
1700	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1701	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
1702		spin_lock(&qp->rq_cq->cq_lock);
1703		ocrdma_del_qpn_map(dev, qp);
1704		spin_unlock(&qp->rq_cq->cq_lock);
1705	} else {
1706		ocrdma_del_qpn_map(dev, qp);
1707	}
1708	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1709
1710	if (!pd->uctx) {
1711		ocrdma_discard_cqes(qp, qp->sq_cq);
1712		ocrdma_discard_cqes(qp, qp->rq_cq);
1713	}
1714	mutex_unlock(&dev->dev_lock);
1715
1716	if (pd->uctx) {
1717		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
1718				PAGE_ALIGN(qp->sq.len));
1719		if (!qp->srq)
1720			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
1721					PAGE_ALIGN(qp->rq.len));
1722	}
1723
1724	ocrdma_del_flush_qp(qp);
1725
1726	kfree(qp->wqe_wr_id_tbl);
1727	kfree(qp->rqe_wr_id_tbl);
1728	return 0;
1729}
1730
1731static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
1732				struct ib_udata *udata)
1733{
1734	int status;
1735	struct ocrdma_create_srq_uresp uresp;
1736
1737	memset(&uresp, 0, sizeof(uresp));
1738	uresp.rq_dbid = srq->rq.dbid;
1739	uresp.num_rq_pages = 1;
1740	uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
1741	uresp.rq_page_size = srq->rq.len;
1742	uresp.db_page_addr = dev->nic_info.unmapped_db +
1743	    (srq->pd->id * dev->nic_info.db_page_size);
1744	uresp.db_page_size = dev->nic_info.db_page_size;
1745	uresp.num_rqe_allocated = srq->rq.max_cnt;
1746	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1747		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1748		uresp.db_shift = 24;
1749	} else {
1750		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1751		uresp.db_shift = 16;
1752	}
1753
1754	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1755	if (status)
1756		return status;
1757	status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1758				 uresp.rq_page_size);
1759	if (status)
1760		return status;
1761	return status;
1762}
1763
1764int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1765		      struct ib_udata *udata)
1766{
1767	int status;
1768	struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd);
1769	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1770	struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq);
1771
1772	if (init_attr->srq_type != IB_SRQT_BASIC)
1773		return -EOPNOTSUPP;
1774
1775	if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1776		return -EINVAL;
1777	if (init_attr->attr.max_wr > dev->attr.max_rqe)
1778		return -EINVAL;
1779
1780	spin_lock_init(&srq->q_lock);
1781	srq->pd = pd;
1782	srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1783	status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
1784	if (status)
1785		return status;
1786
1787	if (!udata) {
1788		srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64),
1789					     GFP_KERNEL);
1790		if (!srq->rqe_wr_id_tbl) {
1791			status = -ENOMEM;
1792			goto arm_err;
1793		}
1794
1795		srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1796		    (srq->rq.max_cnt % 32 ? 1 : 0);
1797		srq->idx_bit_fields =
1798		    kmalloc_array(srq->bit_fields_len, sizeof(u32),
1799				  GFP_KERNEL);
1800		if (!srq->idx_bit_fields) {
1801			status = -ENOMEM;
1802			goto arm_err;
1803		}
1804		memset(srq->idx_bit_fields, 0xff,
1805		       srq->bit_fields_len * sizeof(u32));
1806	}
1807
1808	if (init_attr->attr.srq_limit) {
1809		status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1810		if (status)
1811			goto arm_err;
1812	}
1813
1814	if (udata) {
1815		status = ocrdma_copy_srq_uresp(dev, srq, udata);
1816		if (status)
1817			goto arm_err;
1818	}
1819
1820	return 0;
1821
1822arm_err:
1823	ocrdma_mbx_destroy_srq(dev, srq);
1824	kfree(srq->rqe_wr_id_tbl);
1825	kfree(srq->idx_bit_fields);
1826	return status;
1827}
1828
1829int ocrdma_modify_srq(struct ib_srq *ibsrq,
1830		      struct ib_srq_attr *srq_attr,
1831		      enum ib_srq_attr_mask srq_attr_mask,
1832		      struct ib_udata *udata)
1833{
1834	int status;
1835	struct ocrdma_srq *srq;
1836
1837	srq = get_ocrdma_srq(ibsrq);
1838	if (srq_attr_mask & IB_SRQ_MAX_WR)
1839		status = -EINVAL;
1840	else
1841		status = ocrdma_mbx_modify_srq(srq, srq_attr);
1842	return status;
1843}
1844
1845int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1846{
1847	struct ocrdma_srq *srq;
1848
1849	srq = get_ocrdma_srq(ibsrq);
1850	return ocrdma_mbx_query_srq(srq, srq_attr);
1851}
1852
1853int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1854{
1855	struct ocrdma_srq *srq;
1856	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1857
1858	srq = get_ocrdma_srq(ibsrq);
1859
1860	ocrdma_mbx_destroy_srq(dev, srq);
1861
1862	if (srq->pd->uctx)
1863		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
1864				PAGE_ALIGN(srq->rq.len));
1865
1866	kfree(srq->idx_bit_fields);
1867	kfree(srq->rqe_wr_id_tbl);
1868	return 0;
1869}
1870
1871/* unprivileged verbs and their support functions. */
1872static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1873				struct ocrdma_hdr_wqe *hdr,
1874				const struct ib_send_wr *wr)
1875{
1876	struct ocrdma_ewqe_ud_hdr *ud_hdr =
1877		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1878	struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
1879
1880	ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
1881	if (qp->qp_type == IB_QPT_GSI)
1882		ud_hdr->qkey = qp->qkey;
1883	else
1884		ud_hdr->qkey = ud_wr(wr)->remote_qkey;
1885	ud_hdr->rsvd_ahid = ah->id;
1886	ud_hdr->hdr_type = ah->hdr_type;
1887	if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
1888		hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
1889}
1890
1891static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1892			      struct ocrdma_sge *sge, int num_sge,
1893			      struct ib_sge *sg_list)
1894{
1895	int i;
1896
1897	for (i = 0; i < num_sge; i++) {
1898		sge[i].lrkey = sg_list[i].lkey;
1899		sge[i].addr_lo = sg_list[i].addr;
1900		sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1901		sge[i].len = sg_list[i].length;
1902		hdr->total_len += sg_list[i].length;
1903	}
1904	if (num_sge == 0)
1905		memset(sge, 0, sizeof(*sge));
1906}
1907
1908static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
1909{
1910	uint32_t total_len = 0, i;
1911
1912	for (i = 0; i < num_sge; i++)
1913		total_len += sg_list[i].length;
1914	return total_len;
1915}
1916
1917
1918static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1919				    struct ocrdma_hdr_wqe *hdr,
1920				    struct ocrdma_sge *sge,
1921				    const struct ib_send_wr *wr, u32 wqe_size)
1922{
1923	int i;
1924	char *dpp_addr;
1925
1926	if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
1927		hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
1928		if (unlikely(hdr->total_len > qp->max_inline_data)) {
1929			pr_err("%s() supported_len=0x%x,\n"
1930			       " unsupported len req=0x%x\n", __func__,
1931				qp->max_inline_data, hdr->total_len);
1932			return -EINVAL;
1933		}
1934		dpp_addr = (char *)sge;
1935		for (i = 0; i < wr->num_sge; i++) {
1936			memcpy(dpp_addr,
1937			       (void *)(unsigned long)wr->sg_list[i].addr,
1938			       wr->sg_list[i].length);
1939			dpp_addr += wr->sg_list[i].length;
1940		}
1941
1942		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1943		if (0 == hdr->total_len)
1944			wqe_size += sizeof(struct ocrdma_sge);
1945		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1946	} else {
1947		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1948		if (wr->num_sge)
1949			wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
1950		else
1951			wqe_size += sizeof(struct ocrdma_sge);
1952		hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1953	}
1954	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1955	return 0;
1956}
1957
1958static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1959			     const struct ib_send_wr *wr)
1960{
1961	struct ocrdma_sge *sge;
1962	u32 wqe_size = sizeof(*hdr);
1963
1964	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
1965		ocrdma_build_ud_hdr(qp, hdr, wr);
1966		sge = (struct ocrdma_sge *)(hdr + 2);
1967		wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
1968	} else {
1969		sge = (struct ocrdma_sge *)(hdr + 1);
1970	}
1971
1972	return ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1973}
1974
1975static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1976			      const struct ib_send_wr *wr)
1977{
1978	int status;
1979	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1980	struct ocrdma_sge *sge = ext_rw + 1;
1981	u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
1982
1983	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1984	if (status)
1985		return status;
1986	ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
1987	ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
1988	ext_rw->lrkey = rdma_wr(wr)->rkey;
1989	ext_rw->len = hdr->total_len;
1990	return 0;
1991}
1992
1993static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1994			      const struct ib_send_wr *wr)
1995{
1996	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1997	struct ocrdma_sge *sge = ext_rw + 1;
1998	u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
1999	    sizeof(struct ocrdma_hdr_wqe);
2000
2001	ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
2002	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2003	hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
2004	hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2005
2006	ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2007	ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2008	ext_rw->lrkey = rdma_wr(wr)->rkey;
2009	ext_rw->len = hdr->total_len;
2010}
2011
2012static int get_encoded_page_size(int pg_sz)
2013{
2014	/* Max size is 256M 4096 << 16 */
2015	int i = 0;
2016	for (; i < 17; i++)
2017		if (pg_sz == (4096 << i))
2018			break;
2019	return i;
2020}
2021
2022static int ocrdma_build_reg(struct ocrdma_qp *qp,
2023			    struct ocrdma_hdr_wqe *hdr,
2024			    const struct ib_reg_wr *wr)
2025{
2026	u64 fbo;
2027	struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
2028	struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
2029	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
2030	struct ocrdma_pbe *pbe;
2031	u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
2032	int num_pbes = 0, i;
2033
2034	wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
2035
2036	hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
2037	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2038
2039	if (wr->access & IB_ACCESS_LOCAL_WRITE)
2040		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
2041	if (wr->access & IB_ACCESS_REMOTE_WRITE)
2042		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
2043	if (wr->access & IB_ACCESS_REMOTE_READ)
2044		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
2045	hdr->lkey = wr->key;
2046	hdr->total_len = mr->ibmr.length;
2047
2048	fbo = mr->ibmr.iova - mr->pages[0];
2049
2050	fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
2051	fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
2052	fast_reg->fbo_hi = upper_32_bits(fbo);
2053	fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
2054	fast_reg->num_sges = mr->npages;
2055	fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
2056
2057	pbe = pbl_tbl->va;
2058	for (i = 0; i < mr->npages; i++) {
2059		u64 buf_addr = mr->pages[i];
2060
2061		pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2062		pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2063		num_pbes += 1;
2064		pbe++;
2065
2066		/* if the pbl is full storing the pbes,
2067		 * move to next pbl.
2068		*/
2069		if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
2070			pbl_tbl++;
2071			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2072		}
2073	}
2074
2075	return 0;
2076}
2077
2078static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
2079{
2080	u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
2081
2082	iowrite32(val, qp->sq_db);
2083}
2084
2085int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2086		     const struct ib_send_wr **bad_wr)
2087{
2088	int status = 0;
2089	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2090	struct ocrdma_hdr_wqe *hdr;
2091	unsigned long flags;
2092
2093	spin_lock_irqsave(&qp->q_lock, flags);
2094	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
2095		spin_unlock_irqrestore(&qp->q_lock, flags);
2096		*bad_wr = wr;
2097		return -EINVAL;
2098	}
2099
2100	while (wr) {
2101		if (qp->qp_type == IB_QPT_UD &&
2102		    (wr->opcode != IB_WR_SEND &&
2103		     wr->opcode != IB_WR_SEND_WITH_IMM)) {
2104			*bad_wr = wr;
2105			status = -EINVAL;
2106			break;
2107		}
2108		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
2109		    wr->num_sge > qp->sq.max_sges) {
2110			*bad_wr = wr;
2111			status = -ENOMEM;
2112			break;
2113		}
2114		hdr = ocrdma_hwq_head(&qp->sq);
2115		hdr->cw = 0;
2116		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2117			hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2118		if (wr->send_flags & IB_SEND_FENCE)
2119			hdr->cw |=
2120			    (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
2121		if (wr->send_flags & IB_SEND_SOLICITED)
2122			hdr->cw |=
2123			    (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
2124		hdr->total_len = 0;
2125		switch (wr->opcode) {
2126		case IB_WR_SEND_WITH_IMM:
2127			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2128			hdr->immdt = ntohl(wr->ex.imm_data);
2129			fallthrough;
2130		case IB_WR_SEND:
2131			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2132			ocrdma_build_send(qp, hdr, wr);
2133			break;
2134		case IB_WR_SEND_WITH_INV:
2135			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
2136			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2137			hdr->lkey = wr->ex.invalidate_rkey;
2138			status = ocrdma_build_send(qp, hdr, wr);
2139			break;
2140		case IB_WR_RDMA_WRITE_WITH_IMM:
2141			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2142			hdr->immdt = ntohl(wr->ex.imm_data);
2143			fallthrough;
2144		case IB_WR_RDMA_WRITE:
2145			hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
2146			status = ocrdma_build_write(qp, hdr, wr);
2147			break;
2148		case IB_WR_RDMA_READ:
2149			ocrdma_build_read(qp, hdr, wr);
2150			break;
2151		case IB_WR_LOCAL_INV:
2152			hdr->cw |=
2153			    (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
2154			hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
2155					sizeof(struct ocrdma_sge)) /
2156				OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
2157			hdr->lkey = wr->ex.invalidate_rkey;
2158			break;
2159		case IB_WR_REG_MR:
2160			status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
2161			break;
2162		default:
2163			status = -EINVAL;
2164			break;
2165		}
2166		if (status) {
2167			*bad_wr = wr;
2168			break;
2169		}
2170		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2171			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
2172		else
2173			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
2174		qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
2175		ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
2176				   OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
2177		/* make sure wqe is written before adapter can access it */
2178		wmb();
2179		/* inform hw to start processing it */
2180		ocrdma_ring_sq_db(qp);
2181
2182		/* update pointer, counter for next wr */
2183		ocrdma_hwq_inc_head(&qp->sq);
2184		wr = wr->next;
2185	}
2186	spin_unlock_irqrestore(&qp->q_lock, flags);
2187	return status;
2188}
2189
2190static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
2191{
2192	u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
2193
2194	iowrite32(val, qp->rq_db);
2195}
2196
2197static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
2198			     const struct ib_recv_wr *wr, u16 tag)
2199{
2200	u32 wqe_size = 0;
2201	struct ocrdma_sge *sge;
2202	if (wr->num_sge)
2203		wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
2204	else
2205		wqe_size = sizeof(*sge) + sizeof(*rqe);
2206
2207	rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
2208				OCRDMA_WQE_SIZE_SHIFT);
2209	rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2210	rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2211	rqe->total_len = 0;
2212	rqe->rsvd_tag = tag;
2213	sge = (struct ocrdma_sge *)(rqe + 1);
2214	ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
2215	ocrdma_cpu_to_le32(rqe, wqe_size);
2216}
2217
2218int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2219		     const struct ib_recv_wr **bad_wr)
2220{
2221	int status = 0;
2222	unsigned long flags;
2223	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2224	struct ocrdma_hdr_wqe *rqe;
2225
2226	spin_lock_irqsave(&qp->q_lock, flags);
2227	if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
2228		spin_unlock_irqrestore(&qp->q_lock, flags);
2229		*bad_wr = wr;
2230		return -EINVAL;
2231	}
2232	while (wr) {
2233		if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
2234		    wr->num_sge > qp->rq.max_sges) {
2235			*bad_wr = wr;
2236			status = -ENOMEM;
2237			break;
2238		}
2239		rqe = ocrdma_hwq_head(&qp->rq);
2240		ocrdma_build_rqe(rqe, wr, 0);
2241
2242		qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
2243		/* make sure rqe is written before adapter can access it */
2244		wmb();
2245
2246		/* inform hw to start processing it */
2247		ocrdma_ring_rq_db(qp);
2248
2249		/* update pointer, counter for next wr */
2250		ocrdma_hwq_inc_head(&qp->rq);
2251		wr = wr->next;
2252	}
2253	spin_unlock_irqrestore(&qp->q_lock, flags);
2254	return status;
2255}
2256
2257/* cqe for srq's rqe can potentially arrive out of order.
2258 * index gives the entry in the shadow table where to store
2259 * the wr_id. tag/index is returned in cqe to reference back
2260 * for a given rqe.
2261 */
2262static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
2263{
2264	int row = 0;
2265	int indx = 0;
2266
2267	for (row = 0; row < srq->bit_fields_len; row++) {
2268		if (srq->idx_bit_fields[row]) {
2269			indx = ffs(srq->idx_bit_fields[row]);
2270			indx = (row * 32) + (indx - 1);
2271			BUG_ON(indx >= srq->rq.max_cnt);
2272			ocrdma_srq_toggle_bit(srq, indx);
2273			break;
2274		}
2275	}
2276
2277	BUG_ON(row == srq->bit_fields_len);
2278	return indx + 1; /* Use from index 1 */
2279}
2280
2281static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
2282{
2283	u32 val = srq->rq.dbid | (1 << 16);
2284
2285	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
2286}
2287
2288int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
2289			 const struct ib_recv_wr **bad_wr)
2290{
2291	int status = 0;
2292	unsigned long flags;
2293	struct ocrdma_srq *srq;
2294	struct ocrdma_hdr_wqe *rqe;
2295	u16 tag;
2296
2297	srq = get_ocrdma_srq(ibsrq);
2298
2299	spin_lock_irqsave(&srq->q_lock, flags);
2300	while (wr) {
2301		if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
2302		    wr->num_sge > srq->rq.max_sges) {
2303			status = -ENOMEM;
2304			*bad_wr = wr;
2305			break;
2306		}
2307		tag = ocrdma_srq_get_idx(srq);
2308		rqe = ocrdma_hwq_head(&srq->rq);
2309		ocrdma_build_rqe(rqe, wr, tag);
2310
2311		srq->rqe_wr_id_tbl[tag] = wr->wr_id;
2312		/* make sure rqe is written before adapter can perform DMA */
2313		wmb();
2314		/* inform hw to start processing it */
2315		ocrdma_ring_srq_db(srq);
2316		/* update pointer, counter for next wr */
2317		ocrdma_hwq_inc_head(&srq->rq);
2318		wr = wr->next;
2319	}
2320	spin_unlock_irqrestore(&srq->q_lock, flags);
2321	return status;
2322}
2323
2324static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
2325{
2326	enum ib_wc_status ibwc_status;
2327
2328	switch (status) {
2329	case OCRDMA_CQE_GENERAL_ERR:
2330		ibwc_status = IB_WC_GENERAL_ERR;
2331		break;
2332	case OCRDMA_CQE_LOC_LEN_ERR:
2333		ibwc_status = IB_WC_LOC_LEN_ERR;
2334		break;
2335	case OCRDMA_CQE_LOC_QP_OP_ERR:
2336		ibwc_status = IB_WC_LOC_QP_OP_ERR;
2337		break;
2338	case OCRDMA_CQE_LOC_EEC_OP_ERR:
2339		ibwc_status = IB_WC_LOC_EEC_OP_ERR;
2340		break;
2341	case OCRDMA_CQE_LOC_PROT_ERR:
2342		ibwc_status = IB_WC_LOC_PROT_ERR;
2343		break;
2344	case OCRDMA_CQE_WR_FLUSH_ERR:
2345		ibwc_status = IB_WC_WR_FLUSH_ERR;
2346		break;
2347	case OCRDMA_CQE_MW_BIND_ERR:
2348		ibwc_status = IB_WC_MW_BIND_ERR;
2349		break;
2350	case OCRDMA_CQE_BAD_RESP_ERR:
2351		ibwc_status = IB_WC_BAD_RESP_ERR;
2352		break;
2353	case OCRDMA_CQE_LOC_ACCESS_ERR:
2354		ibwc_status = IB_WC_LOC_ACCESS_ERR;
2355		break;
2356	case OCRDMA_CQE_REM_INV_REQ_ERR:
2357		ibwc_status = IB_WC_REM_INV_REQ_ERR;
2358		break;
2359	case OCRDMA_CQE_REM_ACCESS_ERR:
2360		ibwc_status = IB_WC_REM_ACCESS_ERR;
2361		break;
2362	case OCRDMA_CQE_REM_OP_ERR:
2363		ibwc_status = IB_WC_REM_OP_ERR;
2364		break;
2365	case OCRDMA_CQE_RETRY_EXC_ERR:
2366		ibwc_status = IB_WC_RETRY_EXC_ERR;
2367		break;
2368	case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2369		ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2370		break;
2371	case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2372		ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2373		break;
2374	case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2375		ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2376		break;
2377	case OCRDMA_CQE_REM_ABORT_ERR:
2378		ibwc_status = IB_WC_REM_ABORT_ERR;
2379		break;
2380	case OCRDMA_CQE_INV_EECN_ERR:
2381		ibwc_status = IB_WC_INV_EECN_ERR;
2382		break;
2383	case OCRDMA_CQE_INV_EEC_STATE_ERR:
2384		ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2385		break;
2386	case OCRDMA_CQE_FATAL_ERR:
2387		ibwc_status = IB_WC_FATAL_ERR;
2388		break;
2389	case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2390		ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2391		break;
2392	default:
2393		ibwc_status = IB_WC_GENERAL_ERR;
2394		break;
2395	}
2396	return ibwc_status;
2397}
2398
2399static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2400		      u32 wqe_idx)
2401{
2402	struct ocrdma_hdr_wqe *hdr;
2403	struct ocrdma_sge *rw;
2404	int opcode;
2405
2406	hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2407
2408	ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2409	/* Undo the hdr->cw swap */
2410	opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2411	switch (opcode) {
2412	case OCRDMA_WRITE:
2413		ibwc->opcode = IB_WC_RDMA_WRITE;
2414		break;
2415	case OCRDMA_READ:
2416		rw = (struct ocrdma_sge *)(hdr + 1);
2417		ibwc->opcode = IB_WC_RDMA_READ;
2418		ibwc->byte_len = rw->len;
2419		break;
2420	case OCRDMA_SEND:
2421		ibwc->opcode = IB_WC_SEND;
2422		break;
2423	case OCRDMA_FR_MR:
2424		ibwc->opcode = IB_WC_REG_MR;
2425		break;
2426	case OCRDMA_LKEY_INV:
2427		ibwc->opcode = IB_WC_LOCAL_INV;
2428		break;
2429	default:
2430		ibwc->status = IB_WC_GENERAL_ERR;
2431		pr_err("%s() invalid opcode received = 0x%x\n",
2432		       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2433		break;
2434	}
2435}
2436
2437static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2438						struct ocrdma_cqe *cqe)
2439{
2440	if (is_cqe_for_sq(cqe)) {
2441		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2442				cqe->flags_status_srcqpn) &
2443					~OCRDMA_CQE_STATUS_MASK);
2444		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2445				cqe->flags_status_srcqpn) |
2446				(OCRDMA_CQE_WR_FLUSH_ERR <<
2447					OCRDMA_CQE_STATUS_SHIFT));
2448	} else {
2449		if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2450			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2451					cqe->flags_status_srcqpn) &
2452						~OCRDMA_CQE_UD_STATUS_MASK);
2453			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2454					cqe->flags_status_srcqpn) |
2455					(OCRDMA_CQE_WR_FLUSH_ERR <<
2456						OCRDMA_CQE_UD_STATUS_SHIFT));
2457		} else {
2458			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2459					cqe->flags_status_srcqpn) &
2460						~OCRDMA_CQE_STATUS_MASK);
2461			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2462					cqe->flags_status_srcqpn) |
2463					(OCRDMA_CQE_WR_FLUSH_ERR <<
2464						OCRDMA_CQE_STATUS_SHIFT));
2465		}
2466	}
2467}
2468
2469static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2470				  struct ocrdma_qp *qp, int status)
2471{
2472	bool expand = false;
2473
2474	ibwc->byte_len = 0;
2475	ibwc->qp = &qp->ibqp;
2476	ibwc->status = ocrdma_to_ibwc_err(status);
2477
2478	ocrdma_flush_qp(qp);
2479	ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
2480
2481	/* if wqe/rqe pending for which cqe needs to be returned,
2482	 * trigger inflating it.
2483	 */
2484	if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2485		expand = true;
2486		ocrdma_set_cqe_status_flushed(qp, cqe);
2487	}
2488	return expand;
2489}
2490
2491static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2492				  struct ocrdma_qp *qp, int status)
2493{
2494	ibwc->opcode = IB_WC_RECV;
2495	ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2496	ocrdma_hwq_inc_tail(&qp->rq);
2497
2498	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2499}
2500
2501static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2502				  struct ocrdma_qp *qp, int status)
2503{
2504	ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2505	ocrdma_hwq_inc_tail(&qp->sq);
2506
2507	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2508}
2509
2510
2511static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2512				 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2513				 bool *polled, bool *stop)
2514{
2515	bool expand;
2516	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2517	int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2518		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2519	if (status < OCRDMA_MAX_CQE_ERR)
2520		atomic_inc(&dev->cqe_err_stats[status]);
2521
2522	/* when hw sq is empty, but rq is not empty, so we continue
2523	 * to keep the cqe in order to get the cq event again.
2524	 */
2525	if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2526		/* when cq for rq and sq is same, it is safe to return
2527		 * flush cqe for RQEs.
2528		 */
2529		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2530			*polled = true;
2531			status = OCRDMA_CQE_WR_FLUSH_ERR;
2532			expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2533		} else {
2534			/* stop processing further cqe as this cqe is used for
2535			 * triggering cq event on buddy cq of RQ.
2536			 * When QP is destroyed, this cqe will be removed
2537			 * from the cq's hardware q.
2538			 */
2539			*polled = false;
2540			*stop = true;
2541			expand = false;
2542		}
2543	} else if (is_hw_sq_empty(qp)) {
2544		/* Do nothing */
2545		expand = false;
2546		*polled = false;
2547		*stop = false;
2548	} else {
2549		*polled = true;
2550		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2551	}
2552	return expand;
2553}
2554
2555static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2556				     struct ocrdma_cqe *cqe,
2557				     struct ib_wc *ibwc, bool *polled)
2558{
2559	bool expand = false;
2560	int tail = qp->sq.tail;
2561	u32 wqe_idx;
2562
2563	if (!qp->wqe_wr_id_tbl[tail].signaled) {
2564		*polled = false;    /* WC cannot be consumed yet */
2565	} else {
2566		ibwc->status = IB_WC_SUCCESS;
2567		ibwc->wc_flags = 0;
2568		ibwc->qp = &qp->ibqp;
2569		ocrdma_update_wc(qp, ibwc, tail);
2570		*polled = true;
2571	}
2572	wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
2573			OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
2574	if (tail != wqe_idx)
2575		expand = true; /* Coalesced CQE can't be consumed yet */
2576
2577	ocrdma_hwq_inc_tail(&qp->sq);
2578	return expand;
2579}
2580
2581static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2582			     struct ib_wc *ibwc, bool *polled, bool *stop)
2583{
2584	int status;
2585	bool expand;
2586
2587	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2588		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2589
2590	if (status == OCRDMA_CQE_SUCCESS)
2591		expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2592	else
2593		expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2594	return expand;
2595}
2596
2597static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
2598				 struct ocrdma_cqe *cqe)
2599{
2600	int status;
2601	u16 hdr_type = 0;
2602
2603	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2604		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2605	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2606						OCRDMA_CQE_SRCQP_MASK;
2607	ibwc->pkey_index = 0;
2608	ibwc->wc_flags = IB_WC_GRH;
2609	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2610			  OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
2611			  OCRDMA_CQE_UD_XFER_LEN_MASK;
2612
2613	if (ocrdma_is_udp_encap_supported(dev)) {
2614		hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2615			    OCRDMA_CQE_UD_L3TYPE_SHIFT) &
2616			    OCRDMA_CQE_UD_L3TYPE_MASK;
2617		ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
2618		ibwc->network_hdr_type = hdr_type;
2619	}
2620
2621	return status;
2622}
2623
2624static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2625				       struct ocrdma_cqe *cqe,
2626				       struct ocrdma_qp *qp)
2627{
2628	unsigned long flags;
2629	struct ocrdma_srq *srq;
2630	u32 wqe_idx;
2631
2632	srq = get_ocrdma_srq(qp->ibqp.srq);
2633	wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
2634		OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
2635	BUG_ON(wqe_idx < 1);
2636
2637	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2638	spin_lock_irqsave(&srq->q_lock, flags);
2639	ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
2640	spin_unlock_irqrestore(&srq->q_lock, flags);
2641	ocrdma_hwq_inc_tail(&srq->rq);
2642}
2643
2644static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2645				struct ib_wc *ibwc, bool *polled, bool *stop,
2646				int status)
2647{
2648	bool expand;
2649	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2650
2651	if (status < OCRDMA_MAX_CQE_ERR)
2652		atomic_inc(&dev->cqe_err_stats[status]);
2653
2654	/* when hw_rq is empty, but wq is not empty, so continue
2655	 * to keep the cqe to get the cq event again.
2656	 */
2657	if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2658		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2659			*polled = true;
2660			status = OCRDMA_CQE_WR_FLUSH_ERR;
2661			expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2662		} else {
2663			*polled = false;
2664			*stop = true;
2665			expand = false;
2666		}
2667	} else if (is_hw_rq_empty(qp)) {
2668		/* Do nothing */
2669		expand = false;
2670		*polled = false;
2671		*stop = false;
2672	} else {
2673		*polled = true;
2674		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2675	}
2676	return expand;
2677}
2678
2679static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2680				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2681{
2682	struct ocrdma_dev *dev;
2683
2684	dev = get_ocrdma_dev(qp->ibqp.device);
2685	ibwc->opcode = IB_WC_RECV;
2686	ibwc->qp = &qp->ibqp;
2687	ibwc->status = IB_WC_SUCCESS;
2688
2689	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2690		ocrdma_update_ud_rcqe(dev, ibwc, cqe);
2691	else
2692		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2693
2694	if (is_cqe_imm(cqe)) {
2695		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2696		ibwc->wc_flags |= IB_WC_WITH_IMM;
2697	} else if (is_cqe_wr_imm(cqe)) {
2698		ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2699		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2700		ibwc->wc_flags |= IB_WC_WITH_IMM;
2701	} else if (is_cqe_invalidated(cqe)) {
2702		ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2703		ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2704	}
2705	if (qp->ibqp.srq) {
2706		ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2707	} else {
2708		ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2709		ocrdma_hwq_inc_tail(&qp->rq);
2710	}
2711}
2712
2713static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2714			     struct ib_wc *ibwc, bool *polled, bool *stop)
2715{
2716	int status;
2717	bool expand = false;
2718
2719	ibwc->wc_flags = 0;
2720	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2721		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2722					OCRDMA_CQE_UD_STATUS_MASK) >>
2723					OCRDMA_CQE_UD_STATUS_SHIFT;
2724	} else {
2725		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2726			     OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2727	}
2728
2729	if (status == OCRDMA_CQE_SUCCESS) {
2730		*polled = true;
2731		ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2732	} else {
2733		expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2734					      status);
2735	}
2736	return expand;
2737}
2738
2739static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2740				   u16 cur_getp)
2741{
2742	if (cq->phase_change) {
2743		if (cur_getp == 0)
2744			cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2745	} else {
2746		/* clear valid bit */
2747		cqe->flags_status_srcqpn = 0;
2748	}
2749}
2750
2751static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2752			    struct ib_wc *ibwc)
2753{
2754	u16 qpn = 0;
2755	int i = 0;
2756	bool expand = false;
2757	int polled_hw_cqes = 0;
2758	struct ocrdma_qp *qp = NULL;
2759	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
2760	struct ocrdma_cqe *cqe;
2761	u16 cur_getp; bool polled = false; bool stop = false;
2762
2763	cur_getp = cq->getp;
2764	while (num_entries) {
2765		cqe = cq->va + cur_getp;
2766		/* check whether valid cqe or not */
2767		if (!is_cqe_valid(cq, cqe))
2768			break;
2769		qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2770		/* ignore discarded cqe */
2771		if (qpn == 0)
2772			goto skip_cqe;
2773		qp = dev->qp_tbl[qpn];
2774		BUG_ON(qp == NULL);
2775
2776		if (is_cqe_for_sq(cqe)) {
2777			expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2778						  &stop);
2779		} else {
2780			expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2781						  &stop);
2782		}
2783		if (expand)
2784			goto expand_cqe;
2785		if (stop)
2786			goto stop_cqe;
2787		/* clear qpn to avoid duplicate processing by discard_cqe() */
2788		cqe->cmn.qpn = 0;
2789skip_cqe:
2790		polled_hw_cqes += 1;
2791		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2792		ocrdma_change_cq_phase(cq, cqe, cur_getp);
2793expand_cqe:
2794		if (polled) {
2795			num_entries -= 1;
2796			i += 1;
2797			ibwc = ibwc + 1;
2798			polled = false;
2799		}
2800	}
2801stop_cqe:
2802	cq->getp = cur_getp;
2803
2804	if (polled_hw_cqes)
2805		ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
2806
2807	return i;
2808}
2809
2810/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2811static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2812			      struct ocrdma_qp *qp, struct ib_wc *ibwc)
2813{
2814	int err_cqes = 0;
2815
2816	while (num_entries) {
2817		if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2818			break;
2819		if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2820			ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2821			ocrdma_hwq_inc_tail(&qp->sq);
2822		} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2823			ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2824			ocrdma_hwq_inc_tail(&qp->rq);
2825		} else {
2826			return err_cqes;
2827		}
2828		ibwc->byte_len = 0;
2829		ibwc->status = IB_WC_WR_FLUSH_ERR;
2830		ibwc = ibwc + 1;
2831		err_cqes += 1;
2832		num_entries -= 1;
2833	}
2834	return err_cqes;
2835}
2836
2837int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2838{
2839	int cqes_to_poll = num_entries;
2840	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2841	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2842	int num_os_cqe = 0, err_cqes = 0;
2843	struct ocrdma_qp *qp;
2844	unsigned long flags;
2845
2846	/* poll cqes from adapter CQ */
2847	spin_lock_irqsave(&cq->cq_lock, flags);
2848	num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2849	spin_unlock_irqrestore(&cq->cq_lock, flags);
2850	cqes_to_poll -= num_os_cqe;
2851
2852	if (cqes_to_poll) {
2853		wc = wc + num_os_cqe;
2854		/* adapter returns single error cqe when qp moves to
2855		 * error state. So insert error cqes with wc_status as
2856		 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2857		 * respectively which uses this CQ.
2858		 */
2859		spin_lock_irqsave(&dev->flush_q_lock, flags);
2860		list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2861			if (cqes_to_poll == 0)
2862				break;
2863			err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2864			cqes_to_poll -= err_cqes;
2865			num_os_cqe += err_cqes;
2866			wc = wc + err_cqes;
2867		}
2868		spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2869	}
2870	return num_os_cqe;
2871}
2872
2873int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2874{
2875	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2876	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2877	u16 cq_id;
2878	unsigned long flags;
2879	bool arm_needed = false, sol_needed = false;
2880
2881	cq_id = cq->id;
2882
2883	spin_lock_irqsave(&cq->cq_lock, flags);
2884	if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2885		arm_needed = true;
2886	if (cq_flags & IB_CQ_SOLICITED)
2887		sol_needed = true;
2888
2889	ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
2890	spin_unlock_irqrestore(&cq->cq_lock, flags);
2891
2892	return 0;
2893}
2894
2895struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2896			      u32 max_num_sg)
2897{
2898	int status;
2899	struct ocrdma_mr *mr;
2900	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
2901	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
2902
2903	if (mr_type != IB_MR_TYPE_MEM_REG)
2904		return ERR_PTR(-EINVAL);
2905
2906	if (max_num_sg > dev->attr.max_pages_per_frmr)
2907		return ERR_PTR(-EINVAL);
2908
2909	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2910	if (!mr)
2911		return ERR_PTR(-ENOMEM);
2912
2913	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
2914	if (!mr->pages) {
2915		status = -ENOMEM;
2916		goto pl_err;
2917	}
2918
2919	status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
2920	if (status)
2921		goto pbl_err;
2922	mr->hwmr.fr_mr = 1;
2923	mr->hwmr.remote_rd = 0;
2924	mr->hwmr.remote_wr = 0;
2925	mr->hwmr.local_rd = 0;
2926	mr->hwmr.local_wr = 0;
2927	mr->hwmr.mw_bind = 0;
2928	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
2929	if (status)
2930		goto pbl_err;
2931	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
2932	if (status)
2933		goto mbx_err;
2934	mr->ibmr.rkey = mr->hwmr.lkey;
2935	mr->ibmr.lkey = mr->hwmr.lkey;
2936	dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
2937		(unsigned long) mr;
2938	return &mr->ibmr;
2939mbx_err:
2940	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
2941pbl_err:
2942	kfree(mr->pages);
2943pl_err:
2944	kfree(mr);
2945	return ERR_PTR(-ENOMEM);
2946}
2947
2948static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
2949{
2950	struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
2951
2952	if (unlikely(mr->npages == mr->hwmr.num_pbes))
2953		return -ENOMEM;
2954
2955	mr->pages[mr->npages++] = addr;
2956
2957	return 0;
2958}
2959
2960int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2961		     unsigned int *sg_offset)
2962{
2963	struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
2964
2965	mr->npages = 0;
2966
2967	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
2968}
2969