1/*
2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/pci.h>
35#include <rdma/ib_addr.h>
36#include <rdma/ib_umem.h>
37#include <rdma/uverbs_ioctl.h>
38#include "hns_roce_common.h"
39#include "hns_roce_device.h"
40#include "hns_roce_hem.h"
41
42static void flush_work_handle(struct work_struct *work)
43{
44	struct hns_roce_work *flush_work = container_of(work,
45					struct hns_roce_work, work);
46	struct hns_roce_qp *hr_qp = container_of(flush_work,
47					struct hns_roce_qp, flush_work);
48	struct device *dev = flush_work->hr_dev->dev;
49	struct ib_qp_attr attr;
50	int attr_mask;
51	int ret;
52
53	attr_mask = IB_QP_STATE;
54	attr.qp_state = IB_QPS_ERR;
55
56	if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
57		ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
58		if (ret)
59			dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n",
60				ret);
61	}
62
63	/*
64	 * make sure we signal QP destroy leg that flush QP was completed
65	 * so that it can safely proceed ahead now and destroy QP
66	 */
67	if (refcount_dec_and_test(&hr_qp->refcount))
68		complete(&hr_qp->free);
69}
70
71void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
72{
73	struct hns_roce_work *flush_work = &hr_qp->flush_work;
74
75	flush_work->hr_dev = hr_dev;
76	INIT_WORK(&flush_work->work, flush_work_handle);
77	refcount_inc(&hr_qp->refcount);
78	queue_work(hr_dev->irq_workq, &flush_work->work);
79}
80
81void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
82{
83	/*
84	 * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state
85	 * gets into errored mode. Hence, as a workaround to this
86	 * hardware limitation, driver needs to assist in flushing. But
87	 * the flushing operation uses mailbox to convey the QP state to
88	 * the hardware and which can sleep due to the mutex protection
89	 * around the mailbox calls. Hence, use the deferred flush for
90	 * now.
91	 */
92	if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
93		init_flush_work(dev, qp);
94}
95
96void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
97{
98	struct device *dev = hr_dev->dev;
99	struct hns_roce_qp *qp;
100
101	xa_lock(&hr_dev->qp_table_xa);
102	qp = __hns_roce_qp_lookup(hr_dev, qpn);
103	if (qp)
104		refcount_inc(&qp->refcount);
105	xa_unlock(&hr_dev->qp_table_xa);
106
107	if (!qp) {
108		dev_warn(dev, "async event for bogus QP %08x\n", qpn);
109		return;
110	}
111
112	if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
113	    event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
114	    event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
115	    event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
116	    event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
117		qp->state = IB_QPS_ERR;
118
119		flush_cqe(hr_dev, qp);
120	}
121
122	qp->event(qp, (enum hns_roce_event)event_type);
123
124	if (refcount_dec_and_test(&qp->refcount))
125		complete(&qp->free);
126}
127
128static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
129				 enum hns_roce_event type)
130{
131	struct ib_qp *ibqp = &hr_qp->ibqp;
132	struct ib_event event;
133
134	if (ibqp->event_handler) {
135		event.device = ibqp->device;
136		event.element.qp = ibqp;
137		switch (type) {
138		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
139			event.event = IB_EVENT_PATH_MIG;
140			break;
141		case HNS_ROCE_EVENT_TYPE_COMM_EST:
142			event.event = IB_EVENT_COMM_EST;
143			break;
144		case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
145			event.event = IB_EVENT_SQ_DRAINED;
146			break;
147		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
148			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
149			break;
150		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
151			event.event = IB_EVENT_QP_FATAL;
152			break;
153		case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
154			event.event = IB_EVENT_PATH_MIG_ERR;
155			break;
156		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
157			event.event = IB_EVENT_QP_REQ_ERR;
158			break;
159		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
160		case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
161		case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
162			event.event = IB_EVENT_QP_ACCESS_ERR;
163			break;
164		default:
165			dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
166				type, hr_qp->qpn);
167			return;
168		}
169		ibqp->event_handler(&event, ibqp->qp_context);
170	}
171}
172
173static u8 get_affinity_cq_bank(u8 qp_bank)
174{
175	return (qp_bank >> 1) & CQ_BANKID_MASK;
176}
177
178static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
179					struct hns_roce_bank *bank)
180{
181#define INVALID_LOAD_QPNUM 0xFFFFFFFF
182	struct ib_cq *scq = init_attr->send_cq;
183	u32 least_load = INVALID_LOAD_QPNUM;
184	unsigned long cqn = 0;
185	u8 bankid = 0;
186	u32 bankcnt;
187	u8 i;
188
189	if (scq)
190		cqn = to_hr_cq(scq)->cqn;
191
192	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
193		if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
194			continue;
195
196		bankcnt = bank[i].inuse;
197		if (bankcnt < least_load) {
198			least_load = bankcnt;
199			bankid = i;
200		}
201	}
202
203	return bankid;
204}
205
206static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
207				 unsigned long *qpn)
208{
209	int id;
210
211	id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL);
212	if (id < 0) {
213		id = ida_alloc_range(&bank->ida, bank->min, bank->max,
214				     GFP_KERNEL);
215		if (id < 0)
216			return id;
217	}
218
219	/* the QPN should keep increasing until the max value is reached. */
220	bank->next = (id + 1) > bank->max ? bank->min : id + 1;
221
222	/* the lower 3 bits is bankid */
223	*qpn = (id << 3) | bankid;
224
225	return 0;
226}
227static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
228		     struct ib_qp_init_attr *init_attr)
229{
230	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
231	unsigned long num = 0;
232	u8 bankid;
233	int ret;
234
235	if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
236		num = 1;
237	} else {
238		mutex_lock(&qp_table->bank_mutex);
239		bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
240
241		ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
242					    &num);
243		if (ret) {
244			ibdev_err(&hr_dev->ib_dev,
245				  "failed to alloc QPN, ret = %d\n", ret);
246			mutex_unlock(&qp_table->bank_mutex);
247			return ret;
248		}
249
250		qp_table->bank[bankid].inuse++;
251		mutex_unlock(&qp_table->bank_mutex);
252	}
253
254	hr_qp->qpn = num;
255
256	return 0;
257}
258
259static void add_qp_to_list(struct hns_roce_dev *hr_dev,
260			   struct hns_roce_qp *hr_qp,
261			   struct ib_cq *send_cq, struct ib_cq *recv_cq)
262{
263	struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
264	unsigned long flags;
265
266	hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
267	hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
268
269	spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
270	hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
271
272	list_add_tail(&hr_qp->node, &hr_dev->qp_list);
273	if (hr_send_cq)
274		list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
275	if (hr_recv_cq)
276		list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
277
278	hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
279	spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
280}
281
282static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
283			     struct hns_roce_qp *hr_qp,
284			     struct ib_qp_init_attr *init_attr)
285{
286	struct xarray *xa = &hr_dev->qp_table_xa;
287	int ret;
288
289	if (!hr_qp->qpn)
290		return -EINVAL;
291
292	ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
293	if (ret)
294		dev_err(hr_dev->dev, "failed to xa store for QPC\n");
295	else
296		/* add QP to device's QP list for softwc */
297		add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
298			       init_attr->recv_cq);
299
300	return ret;
301}
302
303static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
304{
305	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
306	struct device *dev = hr_dev->dev;
307	int ret;
308
309	if (!hr_qp->qpn)
310		return -EINVAL;
311
312	/* Alloc memory for QPC */
313	ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
314	if (ret) {
315		dev_err(dev, "failed to get QPC table\n");
316		goto err_out;
317	}
318
319	/* Alloc memory for IRRL */
320	ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
321	if (ret) {
322		dev_err(dev, "failed to get IRRL table\n");
323		goto err_put_qp;
324	}
325
326	if (hr_dev->caps.trrl_entry_sz) {
327		/* Alloc memory for TRRL */
328		ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
329					 hr_qp->qpn);
330		if (ret) {
331			dev_err(dev, "failed to get TRRL table\n");
332			goto err_put_irrl;
333		}
334	}
335
336	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
337		/* Alloc memory for SCC CTX */
338		ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
339					 hr_qp->qpn);
340		if (ret) {
341			dev_err(dev, "failed to get SCC CTX table\n");
342			goto err_put_trrl;
343		}
344	}
345
346	return 0;
347
348err_put_trrl:
349	if (hr_dev->caps.trrl_entry_sz)
350		hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
351
352err_put_irrl:
353	hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
354
355err_put_qp:
356	hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
357
358err_out:
359	return ret;
360}
361
362static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
363{
364	rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
365}
366
367void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
368{
369	struct xarray *xa = &hr_dev->qp_table_xa;
370	unsigned long flags;
371
372	list_del(&hr_qp->node);
373
374	if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
375		list_del(&hr_qp->sq_node);
376
377	if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
378	    hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
379		list_del(&hr_qp->rq_node);
380
381	xa_lock_irqsave(xa, flags);
382	__xa_erase(xa, hr_qp->qpn);
383	xa_unlock_irqrestore(xa, flags);
384}
385
386static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
387{
388	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
389
390	if (hr_dev->caps.trrl_entry_sz)
391		hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
392	hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
393}
394
395static inline u8 get_qp_bankid(unsigned long qpn)
396{
397	/* The lower 3 bits of QPN are used to hash to different banks */
398	return (u8)(qpn & GENMASK(2, 0));
399}
400
401static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
402{
403	u8 bankid;
404
405	if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
406		return;
407
408	if (hr_qp->qpn < hr_dev->caps.reserved_qps)
409		return;
410
411	bankid = get_qp_bankid(hr_qp->qpn);
412
413	ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3);
414
415	mutex_lock(&hr_dev->qp_table.bank_mutex);
416	hr_dev->qp_table.bank[bankid].inuse--;
417	mutex_unlock(&hr_dev->qp_table.bank_mutex);
418}
419
420static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
421		       bool user)
422{
423	u32 max_sge = dev->caps.max_rq_sg;
424
425	if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
426		return max_sge;
427
428	/* Reserve SGEs only for HIP08 in kernel; The userspace driver will
429	 * calculate number of max_sge with reserved SGEs when allocating wqe
430	 * buf, so there is no need to do this again in kernel. But the number
431	 * may exceed the capacity of SGEs recorded in the firmware, so the
432	 * kernel driver should just adapt the value accordingly.
433	 */
434	if (user)
435		max_sge = roundup_pow_of_two(max_sge + 1);
436	else
437		hr_qp->rq.rsv_sge = 1;
438
439	return max_sge;
440}
441
442static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
443		       struct hns_roce_qp *hr_qp, int has_rq, bool user)
444{
445	u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
446	u32 cnt;
447
448	/* If srq exist, set zero for relative number of rq */
449	if (!has_rq) {
450		hr_qp->rq.wqe_cnt = 0;
451		hr_qp->rq.max_gs = 0;
452		cap->max_recv_wr = 0;
453		cap->max_recv_sge = 0;
454
455		return 0;
456	}
457
458	/* Check the validity of QP support capacity */
459	if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
460	    cap->max_recv_sge > max_sge) {
461		ibdev_err(&hr_dev->ib_dev,
462			  "RQ config error, depth = %u, sge = %u\n",
463			  cap->max_recv_wr, cap->max_recv_sge);
464		return -EINVAL;
465	}
466
467	cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
468	if (cnt > hr_dev->caps.max_wqes) {
469		ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
470			  cap->max_recv_wr);
471		return -EINVAL;
472	}
473
474	hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
475					      hr_qp->rq.rsv_sge);
476
477	hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
478				    hr_qp->rq.max_gs);
479
480	hr_qp->rq.wqe_cnt = cnt;
481
482	cap->max_recv_wr = cnt;
483	cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
484
485	return 0;
486}
487
488static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
489			       struct ib_qp_cap *cap)
490{
491	if (cap->max_inline_data) {
492		cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
493		return min(cap->max_inline_data,
494			   hr_dev->caps.max_sq_inline);
495	}
496
497	return 0;
498}
499
500static void update_inline_data(struct hns_roce_qp *hr_qp,
501			       struct ib_qp_cap *cap)
502{
503	u32 sge_num = hr_qp->sq.ext_sge_cnt;
504
505	if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
506		if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
507		      hr_qp->ibqp.qp_type == IB_QPT_UD))
508			sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
509
510		cap->max_inline_data = max(cap->max_inline_data,
511					   sge_num * HNS_ROCE_SGE_SIZE);
512	}
513
514	hr_qp->max_inline_data = cap->max_inline_data;
515}
516
517static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
518					 u32 max_send_sge)
519{
520	unsigned int std_sge_num;
521	unsigned int min_sge;
522
523	std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
524	min_sge = is_ud_or_gsi ? 1 : 0;
525	return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
526				min_sge;
527}
528
529static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
530						  u32 max_inline_data)
531{
532	unsigned int inline_sge;
533
534	inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
535
536	/*
537	 * if max_inline_data less than
538	 * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
539	 * In addition to ud's mode, no need to extend sge.
540	 */
541	if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
542		inline_sge = 0;
543
544	return inline_sge;
545}
546
547static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
548			      struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
549{
550	bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
551				hr_qp->ibqp.qp_type == IB_QPT_UD);
552	unsigned int std_sge_num;
553	u32 inline_ext_sge = 0;
554	u32 ext_wqe_sge_cnt;
555	u32 total_sge_cnt;
556
557	cap->max_inline_data = get_max_inline_data(hr_dev, cap);
558
559	hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
560	std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
561	ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
562							cap->max_send_sge);
563
564	if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
565		inline_ext_sge = max(ext_wqe_sge_cnt,
566				     get_sge_num_from_max_inl_data(is_ud_or_gsi,
567							 cap->max_inline_data));
568		hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
569					roundup_pow_of_two(inline_ext_sge) : 0;
570
571		hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
572		hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
573
574		ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
575	} else {
576		hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
577		hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
578		hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
579	}
580
581	/* If the number of extended sge is not zero, they MUST use the
582	 * space of HNS_HW_PAGE_SIZE at least.
583	 */
584	if (ext_wqe_sge_cnt) {
585		total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
586		hr_qp->sge.sge_cnt = max(total_sge_cnt,
587				(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
588	}
589
590	update_inline_data(hr_qp, cap);
591}
592
593static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
594					struct ib_qp_cap *cap,
595					struct hns_roce_ib_create_qp *ucmd)
596{
597	u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
598	u8 max_sq_stride = ilog2(roundup_sq_stride);
599
600	/* Sanity check SQ size before proceeding */
601	if (ucmd->log_sq_stride > max_sq_stride ||
602	    ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
603		ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n");
604		return -EINVAL;
605	}
606
607	if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
608		ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n",
609			  cap->max_send_sge);
610		return -EINVAL;
611	}
612
613	return 0;
614}
615
616static int set_user_sq_size(struct hns_roce_dev *hr_dev,
617			    struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
618			    struct hns_roce_ib_create_qp *ucmd)
619{
620	struct ib_device *ibdev = &hr_dev->ib_dev;
621	u32 cnt = 0;
622	int ret;
623
624	if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
625	    cnt > hr_dev->caps.max_wqes)
626		return -EINVAL;
627
628	ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
629	if (ret) {
630		ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
631			  ret);
632		return ret;
633	}
634
635	set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
636
637	hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
638	hr_qp->sq.wqe_cnt = cnt;
639	cap->max_send_sge = hr_qp->sq.max_gs;
640
641	return 0;
642}
643
644static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
645			    struct hns_roce_qp *hr_qp,
646			    struct hns_roce_buf_attr *buf_attr)
647{
648	int buf_size;
649	int idx = 0;
650
651	hr_qp->buff_size = 0;
652
653	/* SQ WQE */
654	hr_qp->sq.offset = 0;
655	buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
656					  hr_qp->sq.wqe_shift);
657	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
658		buf_attr->region[idx].size = buf_size;
659		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
660		idx++;
661		hr_qp->buff_size += buf_size;
662	}
663
664	/* extend SGE WQE in SQ */
665	hr_qp->sge.offset = hr_qp->buff_size;
666	buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
667					  hr_qp->sge.sge_shift);
668	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
669		buf_attr->region[idx].size = buf_size;
670		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
671		idx++;
672		hr_qp->buff_size += buf_size;
673	}
674
675	/* RQ WQE */
676	hr_qp->rq.offset = hr_qp->buff_size;
677	buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
678					  hr_qp->rq.wqe_shift);
679	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
680		buf_attr->region[idx].size = buf_size;
681		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
682		idx++;
683		hr_qp->buff_size += buf_size;
684	}
685
686	if (hr_qp->buff_size < 1)
687		return -EINVAL;
688
689	buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
690	buf_attr->region_count = idx;
691
692	return 0;
693}
694
695static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
696			      struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
697{
698	struct ib_device *ibdev = &hr_dev->ib_dev;
699	u32 cnt;
700
701	if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
702	    cap->max_send_sge > hr_dev->caps.max_sq_sg) {
703		ibdev_err(ibdev, "failed to check SQ WR or SGE num.\n");
704		return -EINVAL;
705	}
706
707	cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
708	if (cnt > hr_dev->caps.max_wqes) {
709		ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n",
710			  cnt);
711		return -EINVAL;
712	}
713
714	hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
715	hr_qp->sq.wqe_cnt = cnt;
716
717	set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
718
719	/* sync the parameters of kernel QP to user's configuration */
720	cap->max_send_wr = cnt;
721	cap->max_send_sge = hr_qp->sq.max_gs;
722
723	return 0;
724}
725
726static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
727{
728	if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
729		return 0;
730
731	return 1;
732}
733
734static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
735{
736	if (attr->qp_type == IB_QPT_XRC_INI ||
737	    attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
738	    !attr->cap.max_recv_wr)
739		return 0;
740
741	return 1;
742}
743
744static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
745			struct ib_qp_init_attr *init_attr,
746			struct ib_udata *udata, unsigned long addr)
747{
748	struct ib_device *ibdev = &hr_dev->ib_dev;
749	struct hns_roce_buf_attr buf_attr = {};
750	int ret;
751
752	ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
753	if (ret) {
754		ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
755		goto err_inline;
756	}
757	ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
758				  PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
759				  udata, addr);
760	if (ret) {
761		ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
762		goto err_inline;
763	}
764
765	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
766		hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
767
768	return 0;
769
770err_inline:
771
772	return ret;
773}
774
775static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
776{
777	hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
778}
779
780static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
781				   struct ib_qp_init_attr *init_attr,
782				   struct ib_udata *udata,
783				   struct hns_roce_ib_create_qp_resp *resp,
784				   struct hns_roce_ib_create_qp *ucmd)
785{
786	return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
787		udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
788		hns_roce_qp_has_sq(init_attr) &&
789		udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr));
790}
791
792static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev,
793				   struct ib_qp_init_attr *init_attr,
794				   struct ib_udata *udata,
795				   struct hns_roce_ib_create_qp_resp *resp)
796{
797	return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
798		udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
799		hns_roce_qp_has_rq(init_attr));
800}
801
802static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
803				     struct ib_qp_init_attr *init_attr)
804{
805	return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
806		hns_roce_qp_has_rq(init_attr));
807}
808
809static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
810			 struct hns_roce_dev *hr_dev,
811			 struct ib_udata *udata,
812			 struct hns_roce_ib_create_qp_resp *resp)
813{
814	struct hns_roce_ucontext *uctx =
815		rdma_udata_to_drv_context(udata,
816			struct hns_roce_ucontext, ibucontext);
817	struct rdma_user_mmap_entry *rdma_entry;
818	u64 address;
819
820	address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
821
822	hr_qp->dwqe_mmap_entry =
823		hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
824						HNS_ROCE_DWQE_SIZE,
825						HNS_ROCE_MMAP_TYPE_DWQE);
826
827	if (!hr_qp->dwqe_mmap_entry) {
828		ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
829		return -ENOMEM;
830	}
831
832	rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
833	resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
834
835	return 0;
836}
837
838static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
839			    struct hns_roce_qp *hr_qp,
840			    struct ib_qp_init_attr *init_attr,
841			    struct ib_udata *udata,
842			    struct hns_roce_ib_create_qp *ucmd,
843			    struct hns_roce_ib_create_qp_resp *resp)
844{
845	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
846		struct hns_roce_ucontext, ibucontext);
847	struct ib_device *ibdev = &hr_dev->ib_dev;
848	int ret;
849
850	if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
851		ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
852		if (ret) {
853			ibdev_err(ibdev,
854				  "failed to map user SQ doorbell, ret = %d.\n",
855				  ret);
856			goto err_out;
857		}
858		hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
859	}
860
861	if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
862		ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
863		if (ret) {
864			ibdev_err(ibdev,
865				  "failed to map user RQ doorbell, ret = %d.\n",
866				  ret);
867			goto err_sdb;
868		}
869		hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
870	}
871
872	return 0;
873
874err_sdb:
875	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
876		hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
877err_out:
878	return ret;
879}
880
881static int alloc_kernel_qp_db(struct hns_roce_dev *hr_dev,
882			      struct hns_roce_qp *hr_qp,
883			      struct ib_qp_init_attr *init_attr)
884{
885	struct ib_device *ibdev = &hr_dev->ib_dev;
886	int ret;
887
888	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
889		hr_qp->sq.db_reg = hr_dev->mem_base +
890				   HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
891	else
892		hr_qp->sq.db_reg = hr_dev->reg_base + hr_dev->sdb_offset +
893				   DB_REG_OFFSET * hr_dev->priv_uar.index;
894
895	hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
896			   DB_REG_OFFSET * hr_dev->priv_uar.index;
897
898	if (kernel_qp_has_rdb(hr_dev, init_attr)) {
899		ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
900		if (ret) {
901			ibdev_err(ibdev,
902				  "failed to alloc kernel RQ doorbell, ret = %d.\n",
903				  ret);
904			return ret;
905		}
906		*hr_qp->rdb.db_record = 0;
907		hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
908	}
909
910	return 0;
911}
912
913static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
914		       struct ib_qp_init_attr *init_attr,
915		       struct ib_udata *udata,
916		       struct hns_roce_ib_create_qp *ucmd,
917		       struct hns_roce_ib_create_qp_resp *resp)
918{
919	int ret;
920
921	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
922		hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
923
924	if (udata) {
925		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
926			ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
927			if (ret)
928				return ret;
929		}
930
931		ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
932				       resp);
933		if (ret)
934			goto err_remove_qp;
935	} else {
936		ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
937		if (ret)
938			return ret;
939	}
940
941	return 0;
942
943err_remove_qp:
944	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
945		qp_user_mmap_entry_remove(hr_qp);
946
947	return ret;
948}
949
950static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
951		       struct ib_udata *udata)
952{
953	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
954		udata, struct hns_roce_ucontext, ibucontext);
955
956	if (udata) {
957		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
958			hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
959		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
960			hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
961		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
962			qp_user_mmap_entry_remove(hr_qp);
963	} else {
964		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
965			hns_roce_free_db(hr_dev, &hr_qp->rdb);
966	}
967}
968
969static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
970			     struct hns_roce_qp *hr_qp)
971{
972	struct ib_device *ibdev = &hr_dev->ib_dev;
973	u64 *sq_wrid = NULL;
974	u64 *rq_wrid = NULL;
975	int ret;
976
977	sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
978	if (ZERO_OR_NULL_PTR(sq_wrid)) {
979		ibdev_err(ibdev, "failed to alloc SQ wrid.\n");
980		return -ENOMEM;
981	}
982
983	if (hr_qp->rq.wqe_cnt) {
984		rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
985		if (ZERO_OR_NULL_PTR(rq_wrid)) {
986			ibdev_err(ibdev, "failed to alloc RQ wrid.\n");
987			ret = -ENOMEM;
988			goto err_sq;
989		}
990	}
991
992	hr_qp->sq.wrid = sq_wrid;
993	hr_qp->rq.wrid = rq_wrid;
994	return 0;
995err_sq:
996	kfree(sq_wrid);
997
998	return ret;
999}
1000
1001static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
1002{
1003	kfree(hr_qp->rq.wrid);
1004	kfree(hr_qp->sq.wrid);
1005}
1006
1007static void default_congest_type(struct hns_roce_dev *hr_dev,
1008				 struct hns_roce_qp *hr_qp)
1009{
1010	if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
1011	    hr_qp->ibqp.qp_type == IB_QPT_GSI)
1012		hr_qp->cong_type = CONG_TYPE_DCQCN;
1013	else
1014		hr_qp->cong_type = hr_dev->caps.default_cong_type;
1015}
1016
1017static int set_congest_type(struct hns_roce_qp *hr_qp,
1018			    struct hns_roce_ib_create_qp *ucmd)
1019{
1020	struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
1021
1022	switch (ucmd->cong_type_flags) {
1023	case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
1024		hr_qp->cong_type = CONG_TYPE_DCQCN;
1025		break;
1026	case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
1027		hr_qp->cong_type = CONG_TYPE_LDCP;
1028		break;
1029	case HNS_ROCE_CREATE_QP_FLAGS_HC3:
1030		hr_qp->cong_type = CONG_TYPE_HC3;
1031		break;
1032	case HNS_ROCE_CREATE_QP_FLAGS_DIP:
1033		hr_qp->cong_type = CONG_TYPE_DIP;
1034		break;
1035	default:
1036		return -EINVAL;
1037	}
1038
1039	if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
1040		return -EOPNOTSUPP;
1041
1042	if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
1043	    hr_qp->cong_type != CONG_TYPE_DCQCN)
1044		return -EOPNOTSUPP;
1045
1046	return 0;
1047}
1048
1049static int set_congest_param(struct hns_roce_dev *hr_dev,
1050			     struct hns_roce_qp *hr_qp,
1051			     struct hns_roce_ib_create_qp *ucmd)
1052{
1053	if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
1054		return set_congest_type(hr_qp, ucmd);
1055
1056	default_congest_type(hr_dev, hr_qp);
1057
1058	return 0;
1059}
1060
1061static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
1062			struct ib_qp_init_attr *init_attr,
1063			struct ib_udata *udata,
1064			struct hns_roce_ib_create_qp *ucmd)
1065{
1066	struct ib_device *ibdev = &hr_dev->ib_dev;
1067	struct hns_roce_ucontext *uctx;
1068	int ret;
1069
1070	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1071		hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
1072	else
1073		hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
1074
1075	ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
1076			  hns_roce_qp_has_rq(init_attr), !!udata);
1077	if (ret) {
1078		ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
1079			  ret);
1080		return ret;
1081	}
1082
1083	if (udata) {
1084		ret = ib_copy_from_udata(ucmd, udata,
1085					 min(udata->inlen, sizeof(*ucmd)));
1086		if (ret) {
1087			ibdev_err(ibdev,
1088				  "failed to copy QP ucmd, ret = %d\n", ret);
1089			return ret;
1090		}
1091
1092		uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
1093						 ibucontext);
1094		hr_qp->config = uctx->config;
1095		ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
1096		if (ret)
1097			ibdev_err(ibdev,
1098				  "failed to set user SQ size, ret = %d.\n",
1099				  ret);
1100
1101		ret = set_congest_param(hr_dev, hr_qp, ucmd);
1102		if (ret)
1103			return ret;
1104	} else {
1105		if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
1106			hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
1107		ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
1108		if (ret)
1109			ibdev_err(ibdev,
1110				  "failed to set kernel SQ size, ret = %d.\n",
1111				  ret);
1112
1113		default_congest_type(hr_dev, hr_qp);
1114	}
1115
1116	return ret;
1117}
1118
1119static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
1120				     struct ib_pd *ib_pd,
1121				     struct ib_qp_init_attr *init_attr,
1122				     struct ib_udata *udata,
1123				     struct hns_roce_qp *hr_qp)
1124{
1125	struct hns_roce_ib_create_qp_resp resp = {};
1126	struct ib_device *ibdev = &hr_dev->ib_dev;
1127	struct hns_roce_ib_create_qp ucmd = {};
1128	int ret;
1129
1130	mutex_init(&hr_qp->mutex);
1131	spin_lock_init(&hr_qp->sq.lock);
1132	spin_lock_init(&hr_qp->rq.lock);
1133
1134	hr_qp->state = IB_QPS_RESET;
1135	hr_qp->flush_flag = 0;
1136
1137	if (init_attr->create_flags)
1138		return -EOPNOTSUPP;
1139
1140	ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
1141	if (ret) {
1142		ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
1143		return ret;
1144	}
1145
1146	if (!udata) {
1147		ret = alloc_kernel_wrid(hr_dev, hr_qp);
1148		if (ret) {
1149			ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
1150				  ret);
1151			return ret;
1152		}
1153	}
1154
1155	ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
1156	if (ret) {
1157		ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
1158		goto err_buf;
1159	}
1160
1161	ret = alloc_qpn(hr_dev, hr_qp, init_attr);
1162	if (ret) {
1163		ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
1164		goto err_qpn;
1165	}
1166
1167	ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
1168	if (ret) {
1169		ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n",
1170			  ret);
1171		goto err_db;
1172	}
1173
1174	ret = alloc_qpc(hr_dev, hr_qp);
1175	if (ret) {
1176		ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n",
1177			  ret);
1178		goto err_qpc;
1179	}
1180
1181	ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
1182	if (ret) {
1183		ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret);
1184		goto err_store;
1185	}
1186
1187	if (udata) {
1188		resp.cap_flags = hr_qp->en_flags;
1189		ret = ib_copy_to_udata(udata, &resp,
1190				       min(udata->outlen, sizeof(resp)));
1191		if (ret) {
1192			ibdev_err(ibdev, "copy qp resp failed!\n");
1193			goto err_store;
1194		}
1195	}
1196
1197	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
1198		ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
1199		if (ret)
1200			goto err_flow_ctrl;
1201	}
1202
1203	hr_qp->ibqp.qp_num = hr_qp->qpn;
1204	hr_qp->event = hns_roce_ib_qp_event;
1205	refcount_set(&hr_qp->refcount, 1);
1206	init_completion(&hr_qp->free);
1207
1208	return 0;
1209
1210err_flow_ctrl:
1211	hns_roce_qp_remove(hr_dev, hr_qp);
1212err_store:
1213	free_qpc(hr_dev, hr_qp);
1214err_qpc:
1215	free_qp_db(hr_dev, hr_qp, udata);
1216err_db:
1217	free_qpn(hr_dev, hr_qp);
1218err_qpn:
1219	free_qp_buf(hr_dev, hr_qp);
1220err_buf:
1221	free_kernel_wrid(hr_qp);
1222	return ret;
1223}
1224
1225void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
1226			 struct ib_udata *udata)
1227{
1228	if (refcount_dec_and_test(&hr_qp->refcount))
1229		complete(&hr_qp->free);
1230	wait_for_completion(&hr_qp->free);
1231
1232	free_qpc(hr_dev, hr_qp);
1233	free_qpn(hr_dev, hr_qp);
1234	free_qp_buf(hr_dev, hr_qp);
1235	free_kernel_wrid(hr_qp);
1236	free_qp_db(hr_dev, hr_qp, udata);
1237}
1238
1239static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
1240			 bool is_user)
1241{
1242	switch (type) {
1243	case IB_QPT_XRC_INI:
1244	case IB_QPT_XRC_TGT:
1245		if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
1246			goto out;
1247		break;
1248	case IB_QPT_UD:
1249		if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 &&
1250		    is_user)
1251			goto out;
1252		break;
1253	case IB_QPT_RC:
1254	case IB_QPT_GSI:
1255		break;
1256	default:
1257		goto out;
1258	}
1259
1260	return 0;
1261
1262out:
1263	ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type);
1264
1265	return -EOPNOTSUPP;
1266}
1267
1268int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
1269		       struct ib_udata *udata)
1270{
1271	struct ib_device *ibdev = qp->device;
1272	struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
1273	struct hns_roce_qp *hr_qp = to_hr_qp(qp);
1274	struct ib_pd *pd = qp->pd;
1275	int ret;
1276
1277	ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
1278	if (ret)
1279		goto err_out;
1280
1281	if (init_attr->qp_type == IB_QPT_XRC_TGT)
1282		hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
1283
1284	if (init_attr->qp_type == IB_QPT_GSI) {
1285		hr_qp->port = init_attr->port_num - 1;
1286		hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
1287	}
1288
1289	ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
1290	if (ret)
1291		ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
1292			  init_attr->qp_type, ret);
1293
1294err_out:
1295	if (ret)
1296		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]);
1297
1298	return ret;
1299}
1300
1301int to_hr_qp_type(int qp_type)
1302{
1303	switch (qp_type) {
1304	case IB_QPT_RC:
1305		return SERV_TYPE_RC;
1306	case IB_QPT_UD:
1307	case IB_QPT_GSI:
1308		return SERV_TYPE_UD;
1309	case IB_QPT_XRC_INI:
1310	case IB_QPT_XRC_TGT:
1311		return SERV_TYPE_XRC;
1312	default:
1313		return -1;
1314	}
1315}
1316
1317static int check_mtu_validate(struct hns_roce_dev *hr_dev,
1318			      struct hns_roce_qp *hr_qp,
1319			      struct ib_qp_attr *attr, int attr_mask)
1320{
1321	enum ib_mtu active_mtu;
1322	int p;
1323
1324	p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
1325	active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
1326
1327	if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
1328	    attr->path_mtu > hr_dev->caps.max_mtu) ||
1329	    attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
1330		ibdev_err(&hr_dev->ib_dev,
1331			"attr path_mtu(%d)invalid while modify qp",
1332			attr->path_mtu);
1333		return -EINVAL;
1334	}
1335
1336	return 0;
1337}
1338
1339static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1340				  int attr_mask)
1341{
1342	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
1343	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
1344	int p;
1345
1346	if ((attr_mask & IB_QP_PORT) &&
1347	    (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
1348		ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n",
1349			  attr->port_num);
1350		return -EINVAL;
1351	}
1352
1353	if (attr_mask & IB_QP_PKEY_INDEX) {
1354		p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
1355		if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
1356			ibdev_err(&hr_dev->ib_dev,
1357				  "invalid attr, pkey_index = %u.\n",
1358				  attr->pkey_index);
1359			return -EINVAL;
1360		}
1361	}
1362
1363	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1364	    attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
1365		ibdev_err(&hr_dev->ib_dev,
1366			  "invalid attr, max_rd_atomic = %u.\n",
1367			  attr->max_rd_atomic);
1368		return -EINVAL;
1369	}
1370
1371	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1372	    attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
1373		ibdev_err(&hr_dev->ib_dev,
1374			  "invalid attr, max_dest_rd_atomic = %u.\n",
1375			  attr->max_dest_rd_atomic);
1376		return -EINVAL;
1377	}
1378
1379	if (attr_mask & IB_QP_PATH_MTU)
1380		return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
1381
1382	return 0;
1383}
1384
1385int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1386		       int attr_mask, struct ib_udata *udata)
1387{
1388	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
1389	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
1390	enum ib_qp_state cur_state, new_state;
1391	int ret = -EINVAL;
1392
1393	mutex_lock(&hr_qp->mutex);
1394
1395	if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
1396		goto out;
1397
1398	cur_state = hr_qp->state;
1399	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1400
1401	if (ibqp->uobject &&
1402	    (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
1403		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) {
1404			hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
1405
1406			if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
1407				hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
1408		} else {
1409			ibdev_warn(&hr_dev->ib_dev,
1410				  "flush cqe is not supported in userspace!\n");
1411			goto out;
1412		}
1413	}
1414
1415	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1416				attr_mask)) {
1417		ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
1418		goto out;
1419	}
1420
1421	ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
1422	if (ret)
1423		goto out;
1424
1425	if (cur_state == new_state && cur_state == IB_QPS_RESET)
1426		goto out;
1427
1428	ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
1429				    new_state, udata);
1430
1431out:
1432	mutex_unlock(&hr_qp->mutex);
1433	if (ret)
1434		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]);
1435
1436	return ret;
1437}
1438
1439void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
1440		       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1441{
1442	if (unlikely(send_cq == NULL && recv_cq == NULL)) {
1443		__acquire(&send_cq->lock);
1444		__acquire(&recv_cq->lock);
1445	} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
1446		spin_lock_irq(&send_cq->lock);
1447		__acquire(&recv_cq->lock);
1448	} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
1449		spin_lock_irq(&recv_cq->lock);
1450		__acquire(&send_cq->lock);
1451	} else if (send_cq == recv_cq) {
1452		spin_lock_irq(&send_cq->lock);
1453		__acquire(&recv_cq->lock);
1454	} else if (send_cq->cqn < recv_cq->cqn) {
1455		spin_lock_irq(&send_cq->lock);
1456		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
1457	} else {
1458		spin_lock_irq(&recv_cq->lock);
1459		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
1460	}
1461}
1462
1463void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
1464			 struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
1465			 __releases(&recv_cq->lock)
1466{
1467	if (unlikely(send_cq == NULL && recv_cq == NULL)) {
1468		__release(&recv_cq->lock);
1469		__release(&send_cq->lock);
1470	} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
1471		__release(&recv_cq->lock);
1472		spin_unlock(&send_cq->lock);
1473	} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
1474		__release(&send_cq->lock);
1475		spin_unlock(&recv_cq->lock);
1476	} else if (send_cq == recv_cq) {
1477		__release(&recv_cq->lock);
1478		spin_unlock_irq(&send_cq->lock);
1479	} else if (send_cq->cqn < recv_cq->cqn) {
1480		spin_unlock(&recv_cq->lock);
1481		spin_unlock_irq(&send_cq->lock);
1482	} else {
1483		spin_unlock(&send_cq->lock);
1484		spin_unlock_irq(&recv_cq->lock);
1485	}
1486}
1487
1488static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset)
1489{
1490	return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
1491}
1492
1493void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
1494{
1495	return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
1496}
1497
1498void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
1499{
1500	return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
1501}
1502
1503void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n)
1504{
1505	return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
1506}
1507
1508bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
1509			  struct ib_cq *ib_cq)
1510{
1511	struct hns_roce_cq *hr_cq;
1512	u32 cur;
1513
1514	cur = hr_wq->head - hr_wq->tail;
1515	if (likely(cur + nreq < hr_wq->wqe_cnt))
1516		return false;
1517
1518	hr_cq = to_hr_cq(ib_cq);
1519	spin_lock(&hr_cq->lock);
1520	cur = hr_wq->head - hr_wq->tail;
1521	spin_unlock(&hr_cq->lock);
1522
1523	return cur + nreq >= hr_wq->wqe_cnt;
1524}
1525
1526int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
1527{
1528	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
1529	unsigned int reserved_from_bot;
1530	unsigned int i;
1531
1532	qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
1533					sizeof(u32), GFP_KERNEL);
1534	if (!qp_table->idx_table.spare_idx)
1535		return -ENOMEM;
1536
1537	mutex_init(&qp_table->scc_mutex);
1538	mutex_init(&qp_table->bank_mutex);
1539	xa_init(&hr_dev->qp_table_xa);
1540
1541	reserved_from_bot = hr_dev->caps.reserved_qps;
1542
1543	for (i = 0; i < reserved_from_bot; i++) {
1544		hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++;
1545		hr_dev->qp_table.bank[get_qp_bankid(i)].min++;
1546	}
1547
1548	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
1549		ida_init(&hr_dev->qp_table.bank[i].ida);
1550		hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps /
1551					       HNS_ROCE_QP_BANK_NUM - 1;
1552		hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
1553	}
1554
1555	return 0;
1556}
1557
1558void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
1559{
1560	int i;
1561
1562	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
1563		ida_destroy(&hr_dev->qp_table.bank[i].ida);
1564	kfree(hr_dev->qp_table.idx_table.spare_idx);
1565}
1566