1/*-
2 * Copyright (c) 2021-2022 NVIDIA corporation & affiliates.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * The internal queue, IQ, code is more or less a stripped down copy
28 * of the existing SQ managing code with exception of:
29 *
30 * - an optional single segment memory buffer which can be read or
31 *   written as a whole by the hardware, may be provided.
32 *
33 * - an optional completion callback for all transmit operations, may
34 *   be provided.
35 *
36 * - does not support mbufs.
37 */
38
39#include <dev/mlx5/mlx5_en/en.h>
40
41static void
42mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
43{
44	const struct mlx5_cqe64 *cqe;
45	u16 ci;
46	u16 iqcc;
47
48	/*
49	 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
50	 * otherwise a cq overrun may occur
51	 */
52	iqcc = iq->cc;
53
54	while (budget-- > 0) {
55
56		cqe = mlx5e_get_cqe(&iq->cq);
57		if (!cqe)
58			break;
59
60		mlx5_cqwq_pop(&iq->cq.wq);
61
62		ci = iqcc & iq->wq.sz_m1;
63
64		if (likely(iq->data[ci].dma_sync != 0)) {
65			/* make sure data written by hardware is visible to CPU */
66			bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
67			bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
68
69			iq->data[ci].dma_sync = 0;
70		}
71
72		if (likely(iq->data[ci].callback != NULL)) {
73			iq->data[ci].callback(iq->data[ci].arg);
74			iq->data[ci].callback = NULL;
75		}
76
77		if (unlikely(iq->data[ci].p_refcount != NULL)) {
78			atomic_add_int(iq->data[ci].p_refcount, -1);
79			iq->data[ci].p_refcount = NULL;
80		}
81		iqcc += iq->data[ci].num_wqebbs;
82	}
83
84	mlx5_cqwq_update_db_record(&iq->cq.wq);
85
86	/* Ensure cq space is freed before enabling more cqes */
87	atomic_thread_fence_rel();
88
89	iq->cc = iqcc;
90}
91
92static void
93mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
94{
95	struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
96
97	mtx_lock(&iq->comp_lock);
98	mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
99	mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
100	mtx_unlock(&iq->comp_lock);
101}
102
103void
104mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
105{
106	u16 pi = iq->pc & iq->wq.sz_m1;
107	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
108
109	mtx_assert(&iq->lock, MA_OWNED);
110
111	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
112
113	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
114	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
115	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
116
117	/* Copy data for doorbell */
118	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
119
120	iq->data[pi].callback = NULL;
121	iq->data[pi].arg = NULL;
122	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
123	iq->data[pi].dma_sync = 0;
124	iq->pc += iq->data[pi].num_wqebbs;
125}
126
127static void
128mlx5e_iq_free_db(struct mlx5e_iq *iq)
129{
130	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
131	int x;
132
133	for (x = 0; x != wq_sz; x++) {
134		if (likely(iq->data[x].dma_sync != 0)) {
135			bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
136			iq->data[x].dma_sync = 0;
137		}
138		if (likely(iq->data[x].callback != NULL)) {
139			iq->data[x].callback(iq->data[x].arg);
140			iq->data[x].callback = NULL;
141		}
142		if (unlikely(iq->data[x].p_refcount != NULL)) {
143			atomic_add_int(iq->data[x].p_refcount, -1);
144			iq->data[x].p_refcount = NULL;
145		}
146		bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
147	}
148	free(iq->data, M_MLX5EN);
149}
150
151static int
152mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
153{
154	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
155	int err;
156	int x;
157
158	iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
159	    mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
160
161	/* Create DMA descriptor maps */
162	for (x = 0; x != wq_sz; x++) {
163		err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
164		if (err != 0) {
165			while (x--)
166				bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
167			free(iq->data, M_MLX5EN);
168			return (err);
169		}
170	}
171	return (0);
172}
173
174static int
175mlx5e_iq_create(struct mlx5e_channel *c,
176    struct mlx5e_sq_param *param,
177    struct mlx5e_iq *iq)
178{
179	struct mlx5e_priv *priv = c->priv;
180	struct mlx5_core_dev *mdev = priv->mdev;
181	void *sqc = param->sqc;
182	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
183	int err;
184
185	/* Create DMA descriptor TAG */
186	if ((err = -bus_dma_tag_create(
187	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
188	    1,				/* any alignment */
189	    0,				/* no boundary */
190	    BUS_SPACE_MAXADDR,		/* lowaddr */
191	    BUS_SPACE_MAXADDR,		/* highaddr */
192	    NULL, NULL,			/* filter, filterarg */
193	    PAGE_SIZE,			/* maxsize */
194	    1,				/* nsegments */
195	    PAGE_SIZE,			/* maxsegsize */
196	    0,				/* flags */
197	    NULL, NULL,			/* lockfunc, lockfuncarg */
198	    &iq->dma_tag)))
199		goto done;
200
201	iq->mkey_be = cpu_to_be32(priv->mr.key);
202	iq->priv = priv;
203
204	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq,
205	    &iq->wq, &iq->wq_ctrl);
206	if (err)
207		goto err_free_dma_tag;
208
209	iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
210
211	err = mlx5e_iq_alloc_db(iq);
212	if (err)
213		goto err_iq_wq_destroy;
214
215	return (0);
216
217err_iq_wq_destroy:
218	mlx5_wq_destroy(&iq->wq_ctrl);
219
220err_free_dma_tag:
221	bus_dma_tag_destroy(iq->dma_tag);
222done:
223	return (err);
224}
225
226static void
227mlx5e_iq_destroy(struct mlx5e_iq *iq)
228{
229	mlx5e_iq_free_db(iq);
230	mlx5_wq_destroy(&iq->wq_ctrl);
231	bus_dma_tag_destroy(iq->dma_tag);
232}
233
234static int
235mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
236    const struct mlx5_sq_bfreg *bfreg, int tis_num)
237{
238	void *in;
239	void *sqc;
240	void *wq;
241	int inlen;
242	int err;
243	u8 ts_format;
244
245	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
246	    sizeof(u64) * iq->wq_ctrl.buf.npages;
247	in = mlx5_vzalloc(inlen);
248	if (in == NULL)
249		return (-ENOMEM);
250
251	iq->uar_map = bfreg->map;
252
253	ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
254	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
255	wq = MLX5_ADDR_OF(sqc, sqc, wq);
256
257	memcpy(sqc, param->sqc, sizeof(param->sqc));
258
259	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
260	MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
261	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
262	MLX5_SET(sqc, sqc, ts_format, ts_format);
263	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
264	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
265	MLX5_SET(sqc, sqc, allow_swp, 1);
266
267	/* SQ remap support requires reg_umr privileges level */
268	if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
269		MLX5_SET(sqc, sqc, qos_remap_en, 1);
270		if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
271			MLX5_SET(sqc, sqc, reg_umr, 1);
272		 else
273			mlx5_en_err(iq->priv->ifp,
274			    "No reg umr SQ capability, SQ remap disabled\n");
275	}
276
277	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
278	MLX5_SET(wq, wq, uar_page, bfreg->index);
279	MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
280	    MLX5_ADAPTER_PAGE_SHIFT);
281	MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
282
283	mlx5_fill_page_array(&iq->wq_ctrl.buf,
284	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
285
286	err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
287
288	kvfree(in);
289
290	return (err);
291}
292
293static int
294mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
295{
296	void *in;
297	void *sqc;
298	int inlen;
299	int err;
300
301	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
302	in = mlx5_vzalloc(inlen);
303	if (in == NULL)
304		return (-ENOMEM);
305
306	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
307
308	MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
309	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
310	MLX5_SET(sqc, sqc, state, next_state);
311
312	err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
313
314	kvfree(in);
315
316	return (err);
317}
318
319static void
320mlx5e_iq_disable(struct mlx5e_iq *iq)
321{
322	mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
323}
324
325int
326mlx5e_iq_open(struct mlx5e_channel *c,
327    struct mlx5e_sq_param *sq_param,
328    struct mlx5e_cq_param *cq_param,
329    struct mlx5e_iq *iq)
330{
331	int err;
332
333	err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
334	    &mlx5e_iq_completion, c->ix);
335	if (err)
336		return (err);
337
338	err = mlx5e_iq_create(c, sq_param, iq);
339	if (err)
340		goto err_close_cq;
341
342	err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
343	if (err)
344		goto err_destroy_sq;
345
346	err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
347	if (err)
348		goto err_disable_sq;
349
350	WRITE_ONCE(iq->running, 1);
351
352	return (0);
353
354err_disable_sq:
355	mlx5e_iq_disable(iq);
356err_destroy_sq:
357	mlx5e_iq_destroy(iq);
358err_close_cq:
359	mlx5e_close_cq(&iq->cq);
360
361	return (err);
362}
363
364static void
365mlx5e_iq_drain(struct mlx5e_iq *iq)
366{
367	struct mlx5_core_dev *mdev = iq->priv->mdev;
368
369	/*
370	 * Check if already stopped.
371	 *
372	 * NOTE: Serialization of this function is managed by the
373	 * caller ensuring the priv's state lock is locked or in case
374	 * of rate limit support, a single thread manages drain and
375	 * resume of SQs. The "running" variable can therefore safely
376	 * be read without any locks.
377	 */
378	if (READ_ONCE(iq->running) == 0)
379		return;
380
381	/* don't put more packets into the SQ */
382	WRITE_ONCE(iq->running, 0);
383
384	/* wait till SQ is empty or link is down */
385	mtx_lock(&iq->lock);
386	while (iq->cc != iq->pc &&
387	    (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
388	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
389	    pci_channel_offline(mdev->pdev) == 0) {
390		mtx_unlock(&iq->lock);
391		msleep(1);
392		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
393		mtx_lock(&iq->lock);
394	}
395	mtx_unlock(&iq->lock);
396
397	/* error out remaining requests */
398	(void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
399
400	/* wait till SQ is empty */
401	mtx_lock(&iq->lock);
402	while (iq->cc != iq->pc &&
403	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
404	    pci_channel_offline(mdev->pdev) == 0) {
405		mtx_unlock(&iq->lock);
406		msleep(1);
407		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
408		mtx_lock(&iq->lock);
409	}
410	mtx_unlock(&iq->lock);
411}
412
413void
414mlx5e_iq_close(struct mlx5e_iq *iq)
415{
416	mlx5e_iq_drain(iq);
417	mlx5e_iq_disable(iq);
418	mlx5e_iq_destroy(iq);
419	mlx5e_close_cq(&iq->cq);
420}
421
422void
423mlx5e_iq_static_init(struct mlx5e_iq *iq)
424{
425	mtx_init(&iq->lock, "mlx5iq",
426	    MTX_NETWORK_LOCK " IQ", MTX_DEF);
427	mtx_init(&iq->comp_lock, "mlx5iq_comp",
428	    MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
429}
430
431void
432mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
433{
434	mtx_destroy(&iq->lock);
435	mtx_destroy(&iq->comp_lock);
436}
437
438void
439mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
440{
441	mtx_assert(&iq->lock, MA_OWNED);
442
443	/* Check if we need to write the doorbell */
444	if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
445		return;
446
447	/* Ensure wqe is visible to device before updating doorbell record */
448	wmb();
449
450	*iq->wq.db = cpu_to_be32(iq->pc);
451
452	/*
453	 * Ensure the doorbell record is visible to device before ringing
454	 * the doorbell:
455	 */
456	wmb();
457
458	mlx5_write64(iq->doorbell.d32, iq->uar_map,
459	    MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
460
461	iq->doorbell.d64 = 0;
462}
463
464static inline bool
465mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
466{
467        u16 cc = iq->cc;
468        u16 pc = iq->pc;
469
470        return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
471}
472
473int
474mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
475{
476	u16 pi;
477
478	mtx_assert(&iq->lock, MA_OWNED);
479
480	if (unlikely(iq->running == 0))
481		return (-1);
482	if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
483		return (-1);
484
485	/* Align IQ edge with NOPs to avoid WQE wrap around */
486	pi = ((~iq->pc) & iq->wq.sz_m1);
487	if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
488		/* Send one multi NOP message instead of many */
489		mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
490		pi = ((~iq->pc) & iq->wq.sz_m1);
491		if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
492			return (-1);
493	}
494	return (iq->pc & iq->wq.sz_m1);
495}
496
497static void
498mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
499    int nseg, int error)
500{
501	u64 *pdma_address = arg;
502
503	if (unlikely(error || nseg != 1))
504		panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
505
506	*pdma_address = segs[0].ds_addr;
507}
508
509CTASSERT(BUS_DMASYNC_POSTREAD != 0);
510CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
511
512void
513mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
514    u64 *pdma_address, u32 dma_sync)
515{
516	int error;
517
518	error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
519	    &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
520	if (unlikely(error))
521		panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
522
523	switch (dma_sync) {
524	case BUS_DMASYNC_PREREAD:
525		iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
526		break;
527	case BUS_DMASYNC_PREWRITE:
528		iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
529		break;
530	default:
531		panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
532	}
533
534	/* make sure data in buffer is visible to hardware */
535	bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);
536}
537