1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4/*          Kai Shen <kaishen@linux.alibaba.com> */
5/* Copyright (c) 2020-2022, Alibaba Group. */
6
7#include "erdma.h"
8
9static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
10{
11	struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
12	u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
13		      FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
14		      FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
15		      FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
16
17	*cmdq->cq.db_record = db_data;
18	writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
19
20	atomic64_inc(&cmdq->cq.armed_num);
21}
22
23static void kick_cmdq_db(struct erdma_cmdq *cmdq)
24{
25	struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
26	u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
27
28	*cmdq->sq.db_record = db_data;
29	writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
30}
31
32static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
33{
34	int comp_idx;
35
36	spin_lock(&cmdq->lock);
37	comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
38				       cmdq->max_outstandings);
39	if (comp_idx == cmdq->max_outstandings) {
40		spin_unlock(&cmdq->lock);
41		return ERR_PTR(-ENOMEM);
42	}
43
44	__set_bit(comp_idx, cmdq->comp_wait_bitmap);
45	spin_unlock(&cmdq->lock);
46
47	return &cmdq->wait_pool[comp_idx];
48}
49
50static void put_comp_wait(struct erdma_cmdq *cmdq,
51			  struct erdma_comp_wait *comp_wait)
52{
53	int used;
54
55	cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
56	spin_lock(&cmdq->lock);
57	used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
58	spin_unlock(&cmdq->lock);
59
60	WARN_ON(!used);
61}
62
63static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
64				    struct erdma_cmdq *cmdq)
65{
66	int i;
67
68	cmdq->wait_pool =
69		devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
70			     sizeof(struct erdma_comp_wait), GFP_KERNEL);
71	if (!cmdq->wait_pool)
72		return -ENOMEM;
73
74	spin_lock_init(&cmdq->lock);
75	cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
76		&dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
77	if (!cmdq->comp_wait_bitmap)
78		return -ENOMEM;
79
80	for (i = 0; i < cmdq->max_outstandings; i++) {
81		init_completion(&cmdq->wait_pool[i].wait_event);
82		cmdq->wait_pool[i].ctx_id = i;
83	}
84
85	return 0;
86}
87
88static int erdma_cmdq_sq_init(struct erdma_dev *dev)
89{
90	struct erdma_cmdq *cmdq = &dev->cmdq;
91	struct erdma_cmdq_sq *sq = &cmdq->sq;
92	u32 buf_size;
93
94	sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
95	sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
96
97	buf_size = sq->depth << SQEBB_SHIFT;
98
99	sq->qbuf =
100		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
101				   &sq->qbuf_dma_addr, GFP_KERNEL);
102	if (!sq->qbuf)
103		return -ENOMEM;
104
105	sq->db_record = (u64 *)(sq->qbuf + buf_size);
106
107	spin_lock_init(&sq->lock);
108
109	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
110			  upper_32_bits(sq->qbuf_dma_addr));
111	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
112			  lower_32_bits(sq->qbuf_dma_addr));
113	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
114	erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
115			  sq->qbuf_dma_addr + buf_size);
116
117	return 0;
118}
119
120static int erdma_cmdq_cq_init(struct erdma_dev *dev)
121{
122	struct erdma_cmdq *cmdq = &dev->cmdq;
123	struct erdma_cmdq_cq *cq = &cmdq->cq;
124	u32 buf_size;
125
126	cq->depth = cmdq->sq.depth;
127	buf_size = cq->depth << CQE_SHIFT;
128
129	cq->qbuf =
130		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
131				   &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
132	if (!cq->qbuf)
133		return -ENOMEM;
134
135	spin_lock_init(&cq->lock);
136
137	cq->db_record = (u64 *)(cq->qbuf + buf_size);
138
139	atomic64_set(&cq->armed_num, 0);
140
141	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
142			  upper_32_bits(cq->qbuf_dma_addr));
143	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
144			  lower_32_bits(cq->qbuf_dma_addr));
145	erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
146			  cq->qbuf_dma_addr + buf_size);
147
148	return 0;
149}
150
151static int erdma_cmdq_eq_init(struct erdma_dev *dev)
152{
153	struct erdma_cmdq *cmdq = &dev->cmdq;
154	struct erdma_eq *eq = &cmdq->eq;
155	u32 buf_size;
156
157	eq->depth = cmdq->max_outstandings;
158	buf_size = eq->depth << EQE_SHIFT;
159
160	eq->qbuf =
161		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
162				   &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
163	if (!eq->qbuf)
164		return -ENOMEM;
165
166	spin_lock_init(&eq->lock);
167	atomic64_set(&eq->event_num, 0);
168
169	eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG;
170	eq->db_record = (u64 *)(eq->qbuf + buf_size);
171
172	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
173			  upper_32_bits(eq->qbuf_dma_addr));
174	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
175			  lower_32_bits(eq->qbuf_dma_addr));
176	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
177	erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
178			  eq->qbuf_dma_addr + buf_size);
179
180	return 0;
181}
182
183int erdma_cmdq_init(struct erdma_dev *dev)
184{
185	struct erdma_cmdq *cmdq = &dev->cmdq;
186	int err;
187
188	cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
189	cmdq->use_event = false;
190
191	sema_init(&cmdq->credits, cmdq->max_outstandings);
192
193	err = erdma_cmdq_wait_res_init(dev, cmdq);
194	if (err)
195		return err;
196
197	err = erdma_cmdq_sq_init(dev);
198	if (err)
199		return err;
200
201	err = erdma_cmdq_cq_init(dev);
202	if (err)
203		goto err_destroy_sq;
204
205	err = erdma_cmdq_eq_init(dev);
206	if (err)
207		goto err_destroy_cq;
208
209	set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
210
211	return 0;
212
213err_destroy_cq:
214	dma_free_coherent(&dev->pdev->dev,
215			  (cmdq->cq.depth << CQE_SHIFT) +
216				  ERDMA_EXTRA_BUFFER_SIZE,
217			  cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
218
219err_destroy_sq:
220	dma_free_coherent(&dev->pdev->dev,
221			  (cmdq->sq.depth << SQEBB_SHIFT) +
222				  ERDMA_EXTRA_BUFFER_SIZE,
223			  cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
224
225	return err;
226}
227
228void erdma_finish_cmdq_init(struct erdma_dev *dev)
229{
230	/* after device init successfully, change cmdq to event mode. */
231	dev->cmdq.use_event = true;
232	arm_cmdq_cq(&dev->cmdq);
233}
234
235void erdma_cmdq_destroy(struct erdma_dev *dev)
236{
237	struct erdma_cmdq *cmdq = &dev->cmdq;
238
239	clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
240
241	dma_free_coherent(&dev->pdev->dev,
242			  (cmdq->eq.depth << EQE_SHIFT) +
243				  ERDMA_EXTRA_BUFFER_SIZE,
244			  cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
245	dma_free_coherent(&dev->pdev->dev,
246			  (cmdq->sq.depth << SQEBB_SHIFT) +
247				  ERDMA_EXTRA_BUFFER_SIZE,
248			  cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
249	dma_free_coherent(&dev->pdev->dev,
250			  (cmdq->cq.depth << CQE_SHIFT) +
251				  ERDMA_EXTRA_BUFFER_SIZE,
252			  cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
253}
254
255static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
256{
257	__be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
258				      cmdq->cq.depth, CQE_SHIFT);
259	u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
260			      be32_to_cpu(READ_ONCE(*cqe)));
261
262	return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
263}
264
265static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
266			  struct erdma_comp_wait *comp_wait)
267{
268	__le64 *wqe;
269	u64 hdr = *req;
270
271	comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
272	reinit_completion(&comp_wait->wait_event);
273	comp_wait->sq_pi = cmdq->sq.pi;
274
275	wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
276			      SQEBB_SHIFT);
277	memcpy(wqe, req, req_len);
278
279	cmdq->sq.pi += cmdq->sq.wqebb_cnt;
280	hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
281	       FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
282			  comp_wait->ctx_id) |
283	       FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
284	*wqe = cpu_to_le64(hdr);
285
286	kick_cmdq_db(cmdq);
287}
288
289static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
290{
291	struct erdma_comp_wait *comp_wait;
292	u32 hdr0, sqe_idx;
293	__be32 *cqe;
294	u16 ctx_id;
295	u64 *sqe;
296
297	cqe = get_next_valid_cmdq_cqe(cmdq);
298	if (!cqe)
299		return -EAGAIN;
300
301	cmdq->cq.ci++;
302
303	dma_rmb();
304	hdr0 = be32_to_cpu(*cqe);
305	sqe_idx = be32_to_cpu(*(cqe + 1));
306
307	sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
308			      SQEBB_SHIFT);
309	ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
310	comp_wait = &cmdq->wait_pool[ctx_id];
311	if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
312		return -EIO;
313
314	comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
315	comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
316	cmdq->sq.ci += cmdq->sq.wqebb_cnt;
317	/* Copy 16B comp data after cqe hdr to outer */
318	be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
319
320	if (cmdq->use_event)
321		complete(&comp_wait->wait_event);
322
323	return 0;
324}
325
326static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
327{
328	unsigned long flags;
329	u16 comp_num;
330
331	spin_lock_irqsave(&cmdq->cq.lock, flags);
332
333	/* We must have less than # of max_outstandings
334	 * completions at one time.
335	 */
336	for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
337		if (erdma_poll_single_cmd_completion(cmdq))
338			break;
339
340	if (comp_num && cmdq->use_event)
341		arm_cmdq_cq(cmdq);
342
343	spin_unlock_irqrestore(&cmdq->cq.lock, flags);
344}
345
346void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
347{
348	int got_event = 0;
349
350	if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
351	    !cmdq->use_event)
352		return;
353
354	while (get_next_valid_eqe(&cmdq->eq)) {
355		cmdq->eq.ci++;
356		got_event++;
357	}
358
359	if (got_event) {
360		cmdq->cq.cmdsn++;
361		erdma_polling_cmd_completions(cmdq);
362	}
363
364	notify_eq(&cmdq->eq);
365}
366
367static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
368				     struct erdma_cmdq *cmdq, u32 timeout)
369{
370	unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
371
372	while (1) {
373		erdma_polling_cmd_completions(cmdq);
374		if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
375			break;
376
377		if (time_is_before_jiffies(comp_timeout))
378			return -ETIME;
379
380		msleep(20);
381	}
382
383	return 0;
384}
385
386static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
387				     struct erdma_cmdq *cmdq, u32 timeout)
388{
389	unsigned long flags = 0;
390
391	wait_for_completion_timeout(&comp_ctx->wait_event,
392				    msecs_to_jiffies(timeout));
393
394	if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
395		spin_lock_irqsave(&cmdq->cq.lock, flags);
396		comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
397		spin_unlock_irqrestore(&cmdq->cq.lock, flags);
398		return -ETIME;
399	}
400
401	return 0;
402}
403
404void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
405{
406	*hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
407	       FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
408}
409
410int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
411			u64 *resp0, u64 *resp1)
412{
413	struct erdma_comp_wait *comp_wait;
414	int ret;
415
416	if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
417		return -ENODEV;
418
419	down(&cmdq->credits);
420
421	comp_wait = get_comp_wait(cmdq);
422	if (IS_ERR(comp_wait)) {
423		clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
424		set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
425		up(&cmdq->credits);
426		return PTR_ERR(comp_wait);
427	}
428
429	spin_lock(&cmdq->sq.lock);
430	push_cmdq_sqe(cmdq, req, req_size, comp_wait);
431	spin_unlock(&cmdq->sq.lock);
432
433	if (cmdq->use_event)
434		ret = erdma_wait_cmd_completion(comp_wait, cmdq,
435						ERDMA_CMDQ_TIMEOUT_MS);
436	else
437		ret = erdma_poll_cmd_completion(comp_wait, cmdq,
438						ERDMA_CMDQ_TIMEOUT_MS);
439
440	if (ret) {
441		set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
442		clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
443		goto out;
444	}
445
446	if (comp_wait->comp_status)
447		ret = -EIO;
448
449	if (resp0 && resp1) {
450		*resp0 = *((u64 *)&comp_wait->comp_data[0]);
451		*resp1 = *((u64 *)&comp_wait->comp_data[2]);
452	}
453	put_comp_wait(cmdq, comp_wait);
454
455out:
456	up(&cmdq->credits);
457
458	return ret;
459}
460