1// SPDX-License-Identifier: GPL-2.0
2
3/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4/*          Kai Shen <kaishen@linux.alibaba.com> */
5/* Copyright (c) 2020-2022, Alibaba Group. */
6
7/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8/* Copyright (c) 2008-2019, IBM Corporation */
9
10/* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11
12#include <linux/vmalloc.h>
13#include <net/addrconf.h>
14#include <rdma/erdma-abi.h>
15#include <rdma/ib_umem.h>
16#include <rdma/uverbs_ioctl.h>
17
18#include "erdma.h"
19#include "erdma_cm.h"
20#include "erdma_verbs.h"
21
22static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
23				      u64 *addr0, u64 *addr1)
24{
25	struct erdma_mtt *mtt = mem->mtt;
26
27	if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
28		*addr0 = mtt->buf_dma;
29		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
30				   ERDMA_MR_MTT_1LEVEL);
31	} else {
32		*addr0 = mtt->buf[0];
33		memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
34		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
35				   ERDMA_MR_MTT_0LEVEL);
36	}
37}
38
39static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
40{
41	struct erdma_dev *dev = to_edev(qp->ibqp.device);
42	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
43	struct erdma_cmdq_create_qp_req req;
44	struct erdma_uqp *user_qp;
45	u64 resp0, resp1;
46	int err;
47
48	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
49				CMDQ_OPCODE_CREATE_QP);
50
51	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
52			      ilog2(qp->attrs.sq_size)) |
53		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
54	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
55			      ilog2(qp->attrs.rq_size)) |
56		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
57
58	if (rdma_is_kernel_res(&qp->ibqp.res)) {
59		u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
60
61		req.sq_cqn_mtt_cfg =
62			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
63				   pgsz_range) |
64			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
65		req.rq_cqn_mtt_cfg =
66			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
67				   pgsz_range) |
68			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
69
70		req.sq_mtt_cfg =
71			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
72			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
73			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
74				   ERDMA_MR_MTT_0LEVEL);
75		req.rq_mtt_cfg = req.sq_mtt_cfg;
76
77		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
78		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
79		req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
80					  (qp->attrs.sq_size << SQEBB_SHIFT);
81		req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
82					  (qp->attrs.rq_size << RQE_SHIFT);
83	} else {
84		user_qp = &qp->user_qp;
85		req.sq_cqn_mtt_cfg = FIELD_PREP(
86			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
87			ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
88		req.sq_cqn_mtt_cfg |=
89			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
90
91		req.rq_cqn_mtt_cfg = FIELD_PREP(
92			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
93			ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
94		req.rq_cqn_mtt_cfg |=
95			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
96
97		req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
98		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
99					     user_qp->sq_mem.mtt_nents);
100
101		req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
102		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
103					     user_qp->rq_mem.mtt_nents);
104
105		assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
106					  &req.sq_buf_addr, req.sq_mtt_entry);
107		assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
108					  &req.rq_buf_addr, req.rq_mtt_entry);
109
110		req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
111		req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
112
113		if (uctx->ext_db.enable) {
114			req.sq_cqn_mtt_cfg |=
115				FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
116			req.db_cfg =
117				FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
118					   uctx->ext_db.sdb_off) |
119				FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
120					   uctx->ext_db.rdb_off);
121		}
122	}
123
124	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
125				  &resp1);
126	if (!err)
127		qp->attrs.cookie =
128			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
129
130	return err;
131}
132
133static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
134{
135	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
136	u32 mtt_level = ERDMA_MR_MTT_0LEVEL;
137	struct erdma_cmdq_reg_mr_req req;
138
139	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
140
141	if (mr->type == ERDMA_MR_TYPE_FRMR ||
142	    mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
143		if (mr->mem.mtt->continuous) {
144			req.phy_addr[0] = mr->mem.mtt->buf_dma;
145			mtt_level = ERDMA_MR_MTT_1LEVEL;
146		} else {
147			req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
148			mtt_level = mr->mem.mtt->level;
149		}
150	} else if (mr->type != ERDMA_MR_TYPE_DMA) {
151		memcpy(req.phy_addr, mr->mem.mtt->buf,
152		       MTT_SIZE(mr->mem.page_cnt));
153	}
154
155	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
156		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
157		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
158	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
159		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
160		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
161	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
162			      ilog2(mr->mem.page_size)) |
163		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
164		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
165
166	if (mr->type == ERDMA_MR_TYPE_DMA)
167		goto post_cmd;
168
169	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
170		req.start_va = mr->mem.va;
171		req.size = mr->mem.len;
172	}
173
174	if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
175		req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
176		req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
177				       PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
178		req.size_h = upper_32_bits(mr->mem.len);
179		req.mtt_cnt_h = mr->mem.page_cnt >> 20;
180	}
181
182post_cmd:
183	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
184}
185
186static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
187{
188	struct erdma_dev *dev = to_edev(cq->ibcq.device);
189	struct erdma_cmdq_create_cq_req req;
190	struct erdma_mem *mem;
191	u32 page_size;
192
193	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
194				CMDQ_OPCODE_CREATE_CQ);
195
196	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
197		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
198	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
199
200	if (rdma_is_kernel_res(&cq->ibcq.res)) {
201		page_size = SZ_32M;
202		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
203				       ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
204		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
205		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
206
207		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
208			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
209				       ERDMA_MR_MTT_0LEVEL);
210
211		req.first_page_offset = 0;
212		req.cq_db_info_addr =
213			cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
214	} else {
215		mem = &cq->user_cq.qbuf_mem;
216		req.cfg0 |=
217			FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
218				   ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
219		if (mem->mtt_nents == 1) {
220			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
221			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
222			req.cfg1 |=
223				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
224					   ERDMA_MR_MTT_0LEVEL);
225		} else {
226			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
227			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
228			req.cfg1 |=
229				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
230					   ERDMA_MR_MTT_1LEVEL);
231		}
232		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
233				       mem->mtt_nents);
234
235		req.first_page_offset = mem->page_offset;
236		req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
237
238		if (uctx->ext_db.enable) {
239			req.cfg1 |= FIELD_PREP(
240				ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
241			req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
242					      uctx->ext_db.cdb_off);
243		}
244	}
245
246	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
247}
248
249static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
250{
251	int idx;
252	unsigned long flags;
253
254	spin_lock_irqsave(&res_cb->lock, flags);
255	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
256				 res_cb->next_alloc_idx);
257	if (idx == res_cb->max_cap) {
258		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
259		if (idx == res_cb->max_cap) {
260			res_cb->next_alloc_idx = 1;
261			spin_unlock_irqrestore(&res_cb->lock, flags);
262			return -ENOSPC;
263		}
264	}
265
266	set_bit(idx, res_cb->bitmap);
267	res_cb->next_alloc_idx = idx + 1;
268	spin_unlock_irqrestore(&res_cb->lock, flags);
269
270	return idx;
271}
272
273static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
274{
275	unsigned long flags;
276	u32 used;
277
278	spin_lock_irqsave(&res_cb->lock, flags);
279	used = __test_and_clear_bit(idx, res_cb->bitmap);
280	spin_unlock_irqrestore(&res_cb->lock, flags);
281	WARN_ON(!used);
282}
283
284static struct rdma_user_mmap_entry *
285erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
286			     u32 size, u8 mmap_flag, u64 *mmap_offset)
287{
288	struct erdma_user_mmap_entry *entry =
289		kzalloc(sizeof(*entry), GFP_KERNEL);
290	int ret;
291
292	if (!entry)
293		return NULL;
294
295	entry->address = (u64)address;
296	entry->mmap_flag = mmap_flag;
297
298	size = PAGE_ALIGN(size);
299
300	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
301					  size);
302	if (ret) {
303		kfree(entry);
304		return NULL;
305	}
306
307	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
308
309	return &entry->rdma_entry;
310}
311
312int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
313		       struct ib_udata *unused)
314{
315	struct erdma_dev *dev = to_edev(ibdev);
316
317	memset(attr, 0, sizeof(*attr));
318
319	attr->max_mr_size = dev->attrs.max_mr_size;
320	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
321	attr->vendor_part_id = dev->pdev->device;
322	attr->hw_ver = dev->pdev->revision;
323	attr->max_qp = dev->attrs.max_qp - 1;
324	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
325	attr->max_qp_rd_atom = dev->attrs.max_ord;
326	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
327	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
328	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
329	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
330	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
331	attr->max_send_sge = dev->attrs.max_send_sge;
332	attr->max_recv_sge = dev->attrs.max_recv_sge;
333	attr->max_sge_rd = dev->attrs.max_sge_rd;
334	attr->max_cq = dev->attrs.max_cq - 1;
335	attr->max_cqe = dev->attrs.max_cqe;
336	attr->max_mr = dev->attrs.max_mr;
337	attr->max_pd = dev->attrs.max_pd;
338	attr->max_mw = dev->attrs.max_mw;
339	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
340	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
341
342	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
343		attr->atomic_cap = IB_ATOMIC_GLOB;
344
345	attr->fw_ver = dev->attrs.fw_version;
346
347	if (dev->netdev)
348		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
349				    dev->netdev->dev_addr);
350
351	return 0;
352}
353
354int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
355		    union ib_gid *gid)
356{
357	struct erdma_dev *dev = to_edev(ibdev);
358
359	memset(gid, 0, sizeof(*gid));
360	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
361
362	return 0;
363}
364
365int erdma_query_port(struct ib_device *ibdev, u32 port,
366		     struct ib_port_attr *attr)
367{
368	struct erdma_dev *dev = to_edev(ibdev);
369	struct net_device *ndev = dev->netdev;
370
371	memset(attr, 0, sizeof(*attr));
372
373	attr->gid_tbl_len = 1;
374	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
375	attr->max_msg_sz = -1;
376
377	if (!ndev)
378		goto out;
379
380	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
381	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
382	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
383	if (netif_running(ndev) && netif_carrier_ok(ndev))
384		dev->state = IB_PORT_ACTIVE;
385	else
386		dev->state = IB_PORT_DOWN;
387	attr->state = dev->state;
388
389out:
390	if (dev->state == IB_PORT_ACTIVE)
391		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
392	else
393		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
394
395	return 0;
396}
397
398int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
399			     struct ib_port_immutable *port_immutable)
400{
401	port_immutable->gid_tbl_len = 1;
402	port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
403
404	return 0;
405}
406
407int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
408{
409	struct erdma_pd *pd = to_epd(ibpd);
410	struct erdma_dev *dev = to_edev(ibpd->device);
411	int pdn;
412
413	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
414	if (pdn < 0)
415		return pdn;
416
417	pd->pdn = pdn;
418
419	return 0;
420}
421
422int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
423{
424	struct erdma_pd *pd = to_epd(ibpd);
425	struct erdma_dev *dev = to_edev(ibpd->device);
426
427	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
428
429	return 0;
430}
431
432static void erdma_flush_worker(struct work_struct *work)
433{
434	struct delayed_work *dwork = to_delayed_work(work);
435	struct erdma_qp *qp =
436		container_of(dwork, struct erdma_qp, reflush_dwork);
437	struct erdma_cmdq_reflush_req req;
438
439	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
440				CMDQ_OPCODE_REFLUSH);
441	req.qpn = QP_ID(qp);
442	req.sq_pi = qp->kern_qp.sq_pi;
443	req.rq_pi = qp->kern_qp.rq_pi;
444	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
445}
446
447static int erdma_qp_validate_cap(struct erdma_dev *dev,
448				 struct ib_qp_init_attr *attrs)
449{
450	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
451	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
452	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
453	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
454	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
455	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
456		return -EINVAL;
457	}
458
459	return 0;
460}
461
462static int erdma_qp_validate_attr(struct erdma_dev *dev,
463				  struct ib_qp_init_attr *attrs)
464{
465	if (attrs->qp_type != IB_QPT_RC)
466		return -EOPNOTSUPP;
467
468	if (attrs->srq)
469		return -EOPNOTSUPP;
470
471	if (!attrs->send_cq || !attrs->recv_cq)
472		return -EOPNOTSUPP;
473
474	return 0;
475}
476
477static void free_kernel_qp(struct erdma_qp *qp)
478{
479	struct erdma_dev *dev = qp->dev;
480
481	vfree(qp->kern_qp.swr_tbl);
482	vfree(qp->kern_qp.rwr_tbl);
483
484	if (qp->kern_qp.sq_buf)
485		dma_free_coherent(
486			&dev->pdev->dev,
487			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
488			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
489
490	if (qp->kern_qp.rq_buf)
491		dma_free_coherent(
492			&dev->pdev->dev,
493			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
494			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
495}
496
497static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
498			  struct ib_qp_init_attr *attrs)
499{
500	struct erdma_kqp *kqp = &qp->kern_qp;
501	int size;
502
503	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
504		kqp->sig_all = 1;
505
506	kqp->sq_pi = 0;
507	kqp->sq_ci = 0;
508	kqp->rq_pi = 0;
509	kqp->rq_ci = 0;
510	kqp->hw_sq_db =
511		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
512	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
513
514	kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
515	kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
516	if (!kqp->swr_tbl || !kqp->rwr_tbl)
517		goto err_out;
518
519	size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
520	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
521					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
522	if (!kqp->sq_buf)
523		goto err_out;
524
525	size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
526	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
527					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
528	if (!kqp->rq_buf)
529		goto err_out;
530
531	kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
532	kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
533
534	return 0;
535
536err_out:
537	free_kernel_qp(qp);
538	return -ENOMEM;
539}
540
541static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
542{
543	struct erdma_mtt *mtt = mem->mtt;
544	struct ib_block_iter biter;
545	u32 idx = 0;
546
547	while (mtt->low_level)
548		mtt = mtt->low_level;
549
550	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
551		mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
552}
553
554static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
555					       size_t size)
556{
557	struct erdma_mtt *mtt;
558
559	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
560	if (!mtt)
561		return ERR_PTR(-ENOMEM);
562
563	mtt->size = size;
564	mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
565	if (!mtt->buf)
566		goto err_free_mtt;
567
568	mtt->continuous = true;
569	mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
570				      DMA_TO_DEVICE);
571	if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
572		goto err_free_mtt_buf;
573
574	return mtt;
575
576err_free_mtt_buf:
577	kfree(mtt->buf);
578
579err_free_mtt:
580	kfree(mtt);
581
582	return ERR_PTR(-ENOMEM);
583}
584
585static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
586				     struct erdma_mtt *mtt)
587{
588	dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE);
589	vfree(mtt->sglist);
590}
591
592static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
593				      struct erdma_mtt *mtt)
594{
595	erdma_destroy_mtt_buf_sg(dev, mtt);
596	vfree(mtt->buf);
597	kfree(mtt);
598}
599
600static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
601				  struct erdma_mtt *low_mtt)
602{
603	struct scatterlist *sg;
604	u32 idx = 0, i;
605
606	for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
607		mtt->buf[idx++] = sg_dma_address(sg);
608}
609
610static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
611{
612	struct scatterlist *sglist;
613	void *buf = mtt->buf;
614	u32 npages, i, nsg;
615	struct page *pg;
616
617	/* Failed if buf is not page aligned */
618	if ((uintptr_t)buf & ~PAGE_MASK)
619		return -EINVAL;
620
621	npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
622	sglist = vzalloc(npages * sizeof(*sglist));
623	if (!sglist)
624		return -ENOMEM;
625
626	sg_init_table(sglist, npages);
627	for (i = 0; i < npages; i++) {
628		pg = vmalloc_to_page(buf);
629		if (!pg)
630			goto err;
631		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
632		buf += PAGE_SIZE;
633	}
634
635	nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
636	if (!nsg)
637		goto err;
638
639	mtt->sglist = sglist;
640	mtt->nsg = nsg;
641
642	return 0;
643err:
644	vfree(sglist);
645
646	return -ENOMEM;
647}
648
649static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
650						  size_t size)
651{
652	struct erdma_mtt *mtt;
653	int ret = -ENOMEM;
654
655	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
656	if (!mtt)
657		return ERR_PTR(-ENOMEM);
658
659	mtt->size = ALIGN(size, PAGE_SIZE);
660	mtt->buf = vzalloc(mtt->size);
661	mtt->continuous = false;
662	if (!mtt->buf)
663		goto err_free_mtt;
664
665	ret = erdma_create_mtt_buf_sg(dev, mtt);
666	if (ret)
667		goto err_free_mtt_buf;
668
669	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
670		  mtt->size, mtt->nsg);
671
672	return mtt;
673
674err_free_mtt_buf:
675	vfree(mtt->buf);
676
677err_free_mtt:
678	kfree(mtt);
679
680	return ERR_PTR(ret);
681}
682
683static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
684					  bool force_continuous)
685{
686	struct erdma_mtt *mtt, *tmp_mtt;
687	int ret, level = 0;
688
689	ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
690		  force_continuous);
691
692	if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
693		force_continuous = true;
694
695	if (force_continuous)
696		return erdma_create_cont_mtt(dev, size);
697
698	mtt = erdma_create_scatter_mtt(dev, size);
699	if (IS_ERR(mtt))
700		return mtt;
701	level = 1;
702
703	/* convergence the mtt table. */
704	while (mtt->nsg != 1 && level <= 3) {
705		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
706		if (IS_ERR(tmp_mtt)) {
707			ret = PTR_ERR(tmp_mtt);
708			goto err_free_mtt;
709		}
710		erdma_init_middle_mtt(tmp_mtt, mtt);
711		tmp_mtt->low_level = mtt;
712		mtt = tmp_mtt;
713		level++;
714	}
715
716	if (level > 3) {
717		ret = -ENOMEM;
718		goto err_free_mtt;
719	}
720
721	mtt->level = level;
722	ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
723		  mtt->level, mtt->sglist[0].dma_address);
724
725	return mtt;
726err_free_mtt:
727	while (mtt) {
728		tmp_mtt = mtt->low_level;
729		erdma_destroy_scatter_mtt(dev, mtt);
730		mtt = tmp_mtt;
731	}
732
733	return ERR_PTR(ret);
734}
735
736static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
737{
738	struct erdma_mtt *tmp_mtt;
739
740	if (mtt->continuous) {
741		dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
742				 DMA_TO_DEVICE);
743		kfree(mtt->buf);
744		kfree(mtt);
745	} else {
746		while (mtt) {
747			tmp_mtt = mtt->low_level;
748			erdma_destroy_scatter_mtt(dev, mtt);
749			mtt = tmp_mtt;
750		}
751	}
752}
753
754static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
755			   u64 start, u64 len, int access, u64 virt,
756			   unsigned long req_page_size, bool force_continuous)
757{
758	int ret = 0;
759
760	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
761	if (IS_ERR(mem->umem)) {
762		ret = PTR_ERR(mem->umem);
763		mem->umem = NULL;
764		return ret;
765	}
766
767	mem->va = virt;
768	mem->len = len;
769	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
770	mem->page_offset = start & (mem->page_size - 1);
771	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
772	mem->page_cnt = mem->mtt_nents;
773	mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
774				    force_continuous);
775	if (IS_ERR(mem->mtt)) {
776		ret = PTR_ERR(mem->mtt);
777		goto error_ret;
778	}
779
780	erdma_fill_bottom_mtt(dev, mem);
781
782	return 0;
783
784error_ret:
785	if (mem->umem) {
786		ib_umem_release(mem->umem);
787		mem->umem = NULL;
788	}
789
790	return ret;
791}
792
793static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
794{
795	if (mem->mtt)
796		erdma_destroy_mtt(dev, mem->mtt);
797
798	if (mem->umem) {
799		ib_umem_release(mem->umem);
800		mem->umem = NULL;
801	}
802}
803
804static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
805				    u64 dbrecords_va,
806				    struct erdma_user_dbrecords_page **dbr_page,
807				    dma_addr_t *dma_addr)
808{
809	struct erdma_user_dbrecords_page *page = NULL;
810	int rv = 0;
811
812	mutex_lock(&ctx->dbrecords_page_mutex);
813
814	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
815		if (page->va == (dbrecords_va & PAGE_MASK))
816			goto found;
817
818	page = kmalloc(sizeof(*page), GFP_KERNEL);
819	if (!page) {
820		rv = -ENOMEM;
821		goto out;
822	}
823
824	page->va = (dbrecords_va & PAGE_MASK);
825	page->refcnt = 0;
826
827	page->umem = ib_umem_get(ctx->ibucontext.device,
828				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
829	if (IS_ERR(page->umem)) {
830		rv = PTR_ERR(page->umem);
831		kfree(page);
832		goto out;
833	}
834
835	list_add(&page->list, &ctx->dbrecords_page_list);
836
837found:
838	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
839		    (dbrecords_va & ~PAGE_MASK);
840	*dbr_page = page;
841	page->refcnt++;
842
843out:
844	mutex_unlock(&ctx->dbrecords_page_mutex);
845	return rv;
846}
847
848static void
849erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
850			   struct erdma_user_dbrecords_page **dbr_page)
851{
852	if (!ctx || !(*dbr_page))
853		return;
854
855	mutex_lock(&ctx->dbrecords_page_mutex);
856	if (--(*dbr_page)->refcnt == 0) {
857		list_del(&(*dbr_page)->list);
858		ib_umem_release((*dbr_page)->umem);
859		kfree(*dbr_page);
860	}
861
862	*dbr_page = NULL;
863	mutex_unlock(&ctx->dbrecords_page_mutex);
864}
865
866static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
867			u64 va, u32 len, u64 db_info_va)
868{
869	dma_addr_t db_info_dma_addr;
870	u32 rq_offset;
871	int ret;
872
873	if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
874		   qp->attrs.rq_size * RQE_SIZE))
875		return -EINVAL;
876
877	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
878			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
879			      (SZ_1M - SZ_4K), true);
880	if (ret)
881		return ret;
882
883	rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
884	qp->user_qp.rq_offset = rq_offset;
885
886	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
887			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
888			      (SZ_1M - SZ_4K), true);
889	if (ret)
890		goto put_sq_mtt;
891
892	ret = erdma_map_user_dbrecords(uctx, db_info_va,
893				       &qp->user_qp.user_dbr_page,
894				       &db_info_dma_addr);
895	if (ret)
896		goto put_rq_mtt;
897
898	qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
899	qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
900
901	return 0;
902
903put_rq_mtt:
904	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
905
906put_sq_mtt:
907	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
908
909	return ret;
910}
911
912static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
913{
914	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
915	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
916	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
917}
918
919int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
920		    struct ib_udata *udata)
921{
922	struct erdma_qp *qp = to_eqp(ibqp);
923	struct erdma_dev *dev = to_edev(ibqp->device);
924	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
925		udata, struct erdma_ucontext, ibucontext);
926	struct erdma_ureq_create_qp ureq;
927	struct erdma_uresp_create_qp uresp;
928	int ret;
929
930	ret = erdma_qp_validate_cap(dev, attrs);
931	if (ret)
932		goto err_out;
933
934	ret = erdma_qp_validate_attr(dev, attrs);
935	if (ret)
936		goto err_out;
937
938	qp->scq = to_ecq(attrs->send_cq);
939	qp->rcq = to_ecq(attrs->recv_cq);
940	qp->dev = dev;
941	qp->attrs.cc = dev->attrs.cc;
942
943	init_rwsem(&qp->state_lock);
944	kref_init(&qp->ref);
945	init_completion(&qp->safe_free);
946
947	ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
948			      XA_LIMIT(1, dev->attrs.max_qp - 1),
949			      &dev->next_alloc_qpn, GFP_KERNEL);
950	if (ret < 0) {
951		ret = -ENOMEM;
952		goto err_out;
953	}
954
955	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
956					       ERDMA_MAX_WQEBB_PER_SQE);
957	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
958
959	if (uctx) {
960		ret = ib_copy_from_udata(&ureq, udata,
961					 min(sizeof(ureq), udata->inlen));
962		if (ret)
963			goto err_out_xa;
964
965		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
966				   ureq.db_record_va);
967		if (ret)
968			goto err_out_xa;
969
970		memset(&uresp, 0, sizeof(uresp));
971
972		uresp.num_sqe = qp->attrs.sq_size;
973		uresp.num_rqe = qp->attrs.rq_size;
974		uresp.qp_id = QP_ID(qp);
975		uresp.rq_offset = qp->user_qp.rq_offset;
976
977		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
978		if (ret)
979			goto err_out_cmd;
980	} else {
981		init_kernel_qp(dev, qp, attrs);
982	}
983
984	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
985	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
986	qp->attrs.state = ERDMA_QP_STATE_IDLE;
987	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
988
989	ret = create_qp_cmd(uctx, qp);
990	if (ret)
991		goto err_out_cmd;
992
993	spin_lock_init(&qp->lock);
994
995	return 0;
996
997err_out_cmd:
998	if (uctx)
999		free_user_qp(qp, uctx);
1000	else
1001		free_kernel_qp(qp);
1002err_out_xa:
1003	xa_erase(&dev->qp_xa, QP_ID(qp));
1004err_out:
1005	return ret;
1006}
1007
1008static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
1009{
1010	int stag_idx;
1011
1012	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
1013	if (stag_idx < 0)
1014		return stag_idx;
1015
1016	/* For now, we always let key field be zero. */
1017	*stag = (stag_idx << 8);
1018
1019	return 0;
1020}
1021
1022struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
1023{
1024	struct erdma_dev *dev = to_edev(ibpd->device);
1025	struct erdma_mr *mr;
1026	u32 stag;
1027	int ret;
1028
1029	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1030	if (!mr)
1031		return ERR_PTR(-ENOMEM);
1032
1033	ret = erdma_create_stag(dev, &stag);
1034	if (ret)
1035		goto out_free;
1036
1037	mr->type = ERDMA_MR_TYPE_DMA;
1038
1039	mr->ibmr.lkey = stag;
1040	mr->ibmr.rkey = stag;
1041	mr->ibmr.pd = ibpd;
1042	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
1043	ret = regmr_cmd(dev, mr);
1044	if (ret)
1045		goto out_remove_stag;
1046
1047	return &mr->ibmr;
1048
1049out_remove_stag:
1050	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1051		       mr->ibmr.lkey >> 8);
1052
1053out_free:
1054	kfree(mr);
1055
1056	return ERR_PTR(ret);
1057}
1058
1059struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
1060				u32 max_num_sg)
1061{
1062	struct erdma_mr *mr;
1063	struct erdma_dev *dev = to_edev(ibpd->device);
1064	int ret;
1065	u32 stag;
1066
1067	if (mr_type != IB_MR_TYPE_MEM_REG)
1068		return ERR_PTR(-EOPNOTSUPP);
1069
1070	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
1071		return ERR_PTR(-EINVAL);
1072
1073	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1074	if (!mr)
1075		return ERR_PTR(-ENOMEM);
1076
1077	ret = erdma_create_stag(dev, &stag);
1078	if (ret)
1079		goto out_free;
1080
1081	mr->type = ERDMA_MR_TYPE_FRMR;
1082
1083	mr->ibmr.lkey = stag;
1084	mr->ibmr.rkey = stag;
1085	mr->ibmr.pd = ibpd;
1086	/* update it in FRMR. */
1087	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
1088		     ERDMA_MR_ACC_RW;
1089
1090	mr->mem.page_size = PAGE_SIZE; /* update it later. */
1091	mr->mem.page_cnt = max_num_sg;
1092	mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
1093	if (IS_ERR(mr->mem.mtt)) {
1094		ret = PTR_ERR(mr->mem.mtt);
1095		goto out_remove_stag;
1096	}
1097
1098	ret = regmr_cmd(dev, mr);
1099	if (ret)
1100		goto out_destroy_mtt;
1101
1102	return &mr->ibmr;
1103
1104out_destroy_mtt:
1105	erdma_destroy_mtt(dev, mr->mem.mtt);
1106
1107out_remove_stag:
1108	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1109		       mr->ibmr.lkey >> 8);
1110
1111out_free:
1112	kfree(mr);
1113
1114	return ERR_PTR(ret);
1115}
1116
1117static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
1118{
1119	struct erdma_mr *mr = to_emr(ibmr);
1120
1121	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
1122		return -1;
1123
1124	mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
1125	mr->mem.mtt_nents++;
1126
1127	return 0;
1128}
1129
1130int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1131		    unsigned int *sg_offset)
1132{
1133	struct erdma_mr *mr = to_emr(ibmr);
1134	int num;
1135
1136	mr->mem.mtt_nents = 0;
1137
1138	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
1139			     erdma_set_page);
1140
1141	return num;
1142}
1143
1144struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
1145				u64 virt, int access, struct ib_udata *udata)
1146{
1147	struct erdma_mr *mr = NULL;
1148	struct erdma_dev *dev = to_edev(ibpd->device);
1149	u32 stag;
1150	int ret;
1151
1152	if (!len || len > dev->attrs.max_mr_size)
1153		return ERR_PTR(-EINVAL);
1154
1155	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1156	if (!mr)
1157		return ERR_PTR(-ENOMEM);
1158
1159	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
1160			      SZ_2G - SZ_4K, false);
1161	if (ret)
1162		goto err_out_free;
1163
1164	ret = erdma_create_stag(dev, &stag);
1165	if (ret)
1166		goto err_out_put_mtt;
1167
1168	mr->ibmr.lkey = mr->ibmr.rkey = stag;
1169	mr->ibmr.pd = ibpd;
1170	mr->mem.va = virt;
1171	mr->mem.len = len;
1172	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
1173	mr->valid = 1;
1174	mr->type = ERDMA_MR_TYPE_NORMAL;
1175
1176	ret = regmr_cmd(dev, mr);
1177	if (ret)
1178		goto err_out_mr;
1179
1180	return &mr->ibmr;
1181
1182err_out_mr:
1183	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1184		       mr->ibmr.lkey >> 8);
1185
1186err_out_put_mtt:
1187	put_mtt_entries(dev, &mr->mem);
1188
1189err_out_free:
1190	kfree(mr);
1191
1192	return ERR_PTR(ret);
1193}
1194
1195int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1196{
1197	struct erdma_mr *mr;
1198	struct erdma_dev *dev = to_edev(ibmr->device);
1199	struct erdma_cmdq_dereg_mr_req req;
1200	int ret;
1201
1202	mr = to_emr(ibmr);
1203
1204	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1205				CMDQ_OPCODE_DEREG_MR);
1206
1207	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
1208		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
1209
1210	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1211	if (ret)
1212		return ret;
1213
1214	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
1215
1216	put_mtt_entries(dev, &mr->mem);
1217
1218	kfree(mr);
1219	return 0;
1220}
1221
1222int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1223{
1224	struct erdma_cq *cq = to_ecq(ibcq);
1225	struct erdma_dev *dev = to_edev(ibcq->device);
1226	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1227		udata, struct erdma_ucontext, ibucontext);
1228	int err;
1229	struct erdma_cmdq_destroy_cq_req req;
1230
1231	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1232				CMDQ_OPCODE_DESTROY_CQ);
1233	req.cqn = cq->cqn;
1234
1235	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1236	if (err)
1237		return err;
1238
1239	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1240		dma_free_coherent(&dev->pdev->dev,
1241				  WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1242				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1243	} else {
1244		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1245		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1246	}
1247
1248	xa_erase(&dev->cq_xa, cq->cqn);
1249
1250	return 0;
1251}
1252
1253int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1254{
1255	struct erdma_qp *qp = to_eqp(ibqp);
1256	struct erdma_dev *dev = to_edev(ibqp->device);
1257	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1258		udata, struct erdma_ucontext, ibucontext);
1259	struct erdma_qp_attrs qp_attrs;
1260	int err;
1261	struct erdma_cmdq_destroy_qp_req req;
1262
1263	down_write(&qp->state_lock);
1264	qp_attrs.state = ERDMA_QP_STATE_ERROR;
1265	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1266	up_write(&qp->state_lock);
1267
1268	cancel_delayed_work_sync(&qp->reflush_dwork);
1269
1270	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1271				CMDQ_OPCODE_DESTROY_QP);
1272	req.qpn = QP_ID(qp);
1273
1274	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1275	if (err)
1276		return err;
1277
1278	erdma_qp_put(qp);
1279	wait_for_completion(&qp->safe_free);
1280
1281	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1282		vfree(qp->kern_qp.swr_tbl);
1283		vfree(qp->kern_qp.rwr_tbl);
1284		dma_free_coherent(
1285			&dev->pdev->dev,
1286			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1287			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1288		dma_free_coherent(
1289			&dev->pdev->dev,
1290			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1291			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1292	} else {
1293		put_mtt_entries(dev, &qp->user_qp.sq_mem);
1294		put_mtt_entries(dev, &qp->user_qp.rq_mem);
1295		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1296	}
1297
1298	if (qp->cep)
1299		erdma_cep_put(qp->cep);
1300	xa_erase(&dev->qp_xa, QP_ID(qp));
1301
1302	return 0;
1303}
1304
1305void erdma_qp_get_ref(struct ib_qp *ibqp)
1306{
1307	erdma_qp_get(to_eqp(ibqp));
1308}
1309
1310void erdma_qp_put_ref(struct ib_qp *ibqp)
1311{
1312	erdma_qp_put(to_eqp(ibqp));
1313}
1314
1315int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1316{
1317	struct rdma_user_mmap_entry *rdma_entry;
1318	struct erdma_user_mmap_entry *entry;
1319	pgprot_t prot;
1320	int err;
1321
1322	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1323	if (!rdma_entry)
1324		return -EINVAL;
1325
1326	entry = to_emmap(rdma_entry);
1327
1328	switch (entry->mmap_flag) {
1329	case ERDMA_MMAP_IO_NC:
1330		/* map doorbell. */
1331		prot = pgprot_device(vma->vm_page_prot);
1332		break;
1333	default:
1334		err = -EINVAL;
1335		goto put_entry;
1336	}
1337
1338	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1339				prot, rdma_entry);
1340
1341put_entry:
1342	rdma_user_mmap_entry_put(rdma_entry);
1343	return err;
1344}
1345
1346void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1347{
1348	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1349
1350	kfree(entry);
1351}
1352
1353static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
1354			      bool ext_db_en)
1355{
1356	struct erdma_cmdq_ext_db_req req = {};
1357	u64 val0, val1;
1358	int ret;
1359
1360	/*
1361	 * CAP_SYS_RAWIO is required if hardware does not support extend
1362	 * doorbell mechanism.
1363	 */
1364	if (!ext_db_en && !capable(CAP_SYS_RAWIO))
1365		return -EPERM;
1366
1367	if (!ext_db_en) {
1368		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
1369		ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1370		ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1371		return 0;
1372	}
1373
1374	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1375				CMDQ_OPCODE_ALLOC_DB);
1376
1377	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1378		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1379		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1380
1381	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1);
1382	if (ret)
1383		return ret;
1384
1385	ctx->ext_db.enable = true;
1386	ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
1387	ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
1388	ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
1389
1390	ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
1391	ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
1392	ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
1393
1394	return 0;
1395}
1396
1397static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
1398{
1399	struct erdma_cmdq_ext_db_req req = {};
1400	int ret;
1401
1402	if (!ctx->ext_db.enable)
1403		return;
1404
1405	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1406				CMDQ_OPCODE_FREE_DB);
1407
1408	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1409		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1410		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1411
1412	req.sdb_off = ctx->ext_db.sdb_off;
1413	req.rdb_off = ctx->ext_db.rdb_off;
1414	req.cdb_off = ctx->ext_db.cdb_off;
1415
1416	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1417	if (ret)
1418		ibdev_err_ratelimited(&dev->ibdev,
1419				      "free db resources failed %d", ret);
1420}
1421
1422static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1423{
1424	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1425	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1426	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1427}
1428
1429int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1430{
1431	struct erdma_ucontext *ctx = to_ectx(ibctx);
1432	struct erdma_dev *dev = to_edev(ibctx->device);
1433	int ret;
1434	struct erdma_uresp_alloc_ctx uresp = {};
1435
1436	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1437		ret = -ENOMEM;
1438		goto err_out;
1439	}
1440
1441	if (udata->outlen < sizeof(uresp)) {
1442		ret = -EINVAL;
1443		goto err_out;
1444	}
1445
1446	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1447	mutex_init(&ctx->dbrecords_page_mutex);
1448
1449	ret = alloc_db_resources(dev, ctx,
1450				 !!(dev->attrs.cap_flags &
1451				    ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
1452	if (ret)
1453		goto err_out;
1454
1455	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1456		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1457	if (!ctx->sq_db_mmap_entry) {
1458		ret = -ENOMEM;
1459		goto err_free_ext_db;
1460	}
1461
1462	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1463		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1464	if (!ctx->rq_db_mmap_entry) {
1465		ret = -EINVAL;
1466		goto err_put_mmap_entries;
1467	}
1468
1469	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1470		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1471	if (!ctx->cq_db_mmap_entry) {
1472		ret = -EINVAL;
1473		goto err_put_mmap_entries;
1474	}
1475
1476	uresp.dev_id = dev->pdev->device;
1477
1478	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1479	if (ret)
1480		goto err_put_mmap_entries;
1481
1482	return 0;
1483
1484err_put_mmap_entries:
1485	erdma_uctx_user_mmap_entries_remove(ctx);
1486
1487err_free_ext_db:
1488	free_db_resources(dev, ctx);
1489
1490err_out:
1491	atomic_dec(&dev->num_ctx);
1492	return ret;
1493}
1494
1495void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1496{
1497	struct erdma_dev *dev = to_edev(ibctx->device);
1498	struct erdma_ucontext *ctx = to_ectx(ibctx);
1499
1500	erdma_uctx_user_mmap_entries_remove(ctx);
1501	free_db_resources(dev, ctx);
1502	atomic_dec(&dev->num_ctx);
1503}
1504
1505static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1506	[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1507	[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1508	[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1509	[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1510	[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1511	[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1512	[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1513};
1514
1515int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1516		    struct ib_udata *udata)
1517{
1518	struct erdma_qp_attrs new_attrs;
1519	enum erdma_qp_attr_mask erdma_attr_mask = 0;
1520	struct erdma_qp *qp = to_eqp(ibqp);
1521	int ret = 0;
1522
1523	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1524		return -EOPNOTSUPP;
1525
1526	memset(&new_attrs, 0, sizeof(new_attrs));
1527
1528	if (attr_mask & IB_QP_STATE) {
1529		new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1530
1531		erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1532	}
1533
1534	down_write(&qp->state_lock);
1535
1536	ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1537
1538	up_write(&qp->state_lock);
1539
1540	return ret;
1541}
1542
1543int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1544		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1545{
1546	struct erdma_qp *qp;
1547	struct erdma_dev *dev;
1548
1549	if (ibqp && qp_attr && qp_init_attr) {
1550		qp = to_eqp(ibqp);
1551		dev = to_edev(ibqp->device);
1552	} else {
1553		return -EINVAL;
1554	}
1555
1556	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1557	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1558
1559	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1560	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1561	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1562	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1563
1564	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1565	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1566	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1567
1568	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1569				   IB_ACCESS_REMOTE_WRITE |
1570				   IB_ACCESS_REMOTE_READ;
1571
1572	qp_init_attr->cap = qp_attr->cap;
1573
1574	return 0;
1575}
1576
1577static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1578			      struct erdma_ureq_create_cq *ureq)
1579{
1580	int ret;
1581	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1582
1583	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
1584			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1585			      true);
1586	if (ret)
1587		return ret;
1588
1589	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1590				       &cq->user_cq.user_dbr_page,
1591				       &cq->user_cq.db_info_dma_addr);
1592	if (ret)
1593		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1594
1595	return ret;
1596}
1597
1598static int erdma_init_kernel_cq(struct erdma_cq *cq)
1599{
1600	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1601
1602	cq->kern_cq.qbuf =
1603		dma_alloc_coherent(&dev->pdev->dev,
1604				   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1605				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1606	if (!cq->kern_cq.qbuf)
1607		return -ENOMEM;
1608
1609	cq->kern_cq.db_record =
1610		(u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1611	spin_lock_init(&cq->kern_cq.lock);
1612	/* use default cqdb addr */
1613	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1614
1615	return 0;
1616}
1617
1618int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1619		    struct ib_udata *udata)
1620{
1621	struct erdma_cq *cq = to_ecq(ibcq);
1622	struct erdma_dev *dev = to_edev(ibcq->device);
1623	unsigned int depth = attr->cqe;
1624	int ret;
1625	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1626		udata, struct erdma_ucontext, ibucontext);
1627
1628	if (depth > dev->attrs.max_cqe)
1629		return -EINVAL;
1630
1631	depth = roundup_pow_of_two(depth);
1632	cq->ibcq.cqe = depth;
1633	cq->depth = depth;
1634	cq->assoc_eqn = attr->comp_vector + 1;
1635
1636	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1637			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1638			      &dev->next_alloc_cqn, GFP_KERNEL);
1639	if (ret < 0)
1640		return ret;
1641
1642	if (!rdma_is_kernel_res(&ibcq->res)) {
1643		struct erdma_ureq_create_cq ureq;
1644		struct erdma_uresp_create_cq uresp;
1645
1646		ret = ib_copy_from_udata(&ureq, udata,
1647					 min(udata->inlen, sizeof(ureq)));
1648		if (ret)
1649			goto err_out_xa;
1650
1651		ret = erdma_init_user_cq(ctx, cq, &ureq);
1652		if (ret)
1653			goto err_out_xa;
1654
1655		uresp.cq_id = cq->cqn;
1656		uresp.num_cqe = depth;
1657
1658		ret = ib_copy_to_udata(udata, &uresp,
1659				       min(sizeof(uresp), udata->outlen));
1660		if (ret)
1661			goto err_free_res;
1662	} else {
1663		ret = erdma_init_kernel_cq(cq);
1664		if (ret)
1665			goto err_out_xa;
1666	}
1667
1668	ret = create_cq_cmd(ctx, cq);
1669	if (ret)
1670		goto err_free_res;
1671
1672	return 0;
1673
1674err_free_res:
1675	if (!rdma_is_kernel_res(&ibcq->res)) {
1676		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1677		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1678	} else {
1679		dma_free_coherent(&dev->pdev->dev,
1680				  WARPPED_BUFSIZE(depth << CQE_SHIFT),
1681				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1682	}
1683
1684err_out_xa:
1685	xa_erase(&dev->cq_xa, cq->cqn);
1686
1687	return ret;
1688}
1689
1690void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
1691{
1692	struct erdma_cmdq_config_mtu_req req;
1693
1694	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1695				CMDQ_OPCODE_CONF_MTU);
1696	req.mtu = mtu;
1697
1698	erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1699}
1700
1701void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1702{
1703	struct ib_event event;
1704
1705	event.device = &dev->ibdev;
1706	event.element.port_num = 1;
1707	event.event = reason;
1708
1709	ib_dispatch_event(&event);
1710}
1711
1712enum counters {
1713	ERDMA_STATS_TX_REQS_CNT,
1714	ERDMA_STATS_TX_PACKETS_CNT,
1715	ERDMA_STATS_TX_BYTES_CNT,
1716	ERDMA_STATS_TX_DISABLE_DROP_CNT,
1717	ERDMA_STATS_TX_BPS_METER_DROP_CNT,
1718	ERDMA_STATS_TX_PPS_METER_DROP_CNT,
1719
1720	ERDMA_STATS_RX_PACKETS_CNT,
1721	ERDMA_STATS_RX_BYTES_CNT,
1722	ERDMA_STATS_RX_DISABLE_DROP_CNT,
1723	ERDMA_STATS_RX_BPS_METER_DROP_CNT,
1724	ERDMA_STATS_RX_PPS_METER_DROP_CNT,
1725
1726	ERDMA_STATS_MAX
1727};
1728
1729static const struct rdma_stat_desc erdma_descs[] = {
1730	[ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt",
1731	[ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt",
1732	[ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt",
1733	[ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt",
1734	[ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt",
1735	[ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt",
1736	[ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt",
1737	[ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt",
1738	[ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt",
1739	[ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt",
1740	[ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt",
1741};
1742
1743struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
1744						u32 port_num)
1745{
1746	return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX,
1747					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
1748}
1749
1750static int erdma_query_hw_stats(struct erdma_dev *dev,
1751				struct rdma_hw_stats *stats)
1752{
1753	struct erdma_cmdq_query_stats_resp *resp;
1754	struct erdma_cmdq_query_req req;
1755	dma_addr_t dma_addr;
1756	int err;
1757
1758	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1759				CMDQ_OPCODE_GET_STATS);
1760
1761	resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr);
1762	if (!resp)
1763		return -ENOMEM;
1764
1765	req.target_addr = dma_addr;
1766	req.target_length = ERDMA_HW_RESP_SIZE;
1767
1768	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1769	if (err)
1770		goto out;
1771
1772	if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) {
1773		err = -EINVAL;
1774		goto out;
1775	}
1776
1777	memcpy(&stats->value[0], &resp->tx_req_cnt,
1778	       sizeof(u64) * stats->num_counters);
1779
1780out:
1781	dma_pool_free(dev->resp_pool, resp, dma_addr);
1782
1783	return err;
1784}
1785
1786int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1787		       u32 port, int index)
1788{
1789	struct erdma_dev *dev = to_edev(ibdev);
1790	int ret;
1791
1792	if (port == 0)
1793		return 0;
1794
1795	ret = erdma_query_hw_stats(dev, stats);
1796	if (ret)
1797		return ret;
1798
1799	return stats->num_counters;
1800}
1801