1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
3
4#include <linux/module.h>
5#include <linux/vdpa.h>
6#include <linux/vringh.h>
7#include <uapi/linux/virtio_net.h>
8#include <uapi/linux/virtio_ids.h>
9#include <uapi/linux/vdpa.h>
10#include <uapi/linux/vhost_types.h>
11#include <linux/virtio_config.h>
12#include <linux/auxiliary_bus.h>
13#include <linux/mlx5/cq.h>
14#include <linux/mlx5/qp.h>
15#include <linux/mlx5/device.h>
16#include <linux/mlx5/driver.h>
17#include <linux/mlx5/vport.h>
18#include <linux/mlx5/fs.h>
19#include <linux/mlx5/mlx5_ifc_vdpa.h>
20#include <linux/mlx5/mpfs.h>
21#include "mlx5_vdpa.h"
22#include "mlx5_vnet.h"
23
24MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25MODULE_DESCRIPTION("Mellanox VDPA driver");
26MODULE_LICENSE("Dual BSD/GPL");
27
28#define VALID_FEATURES_MASK                                                                        \
29	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42
43#define VALID_STATUS_MASK                                                                          \
44	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46
47#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48
49#define MLX5V_UNTAGGED 0x1000
50
51struct mlx5_vdpa_cq_buf {
52	struct mlx5_frag_buf_ctrl fbc;
53	struct mlx5_frag_buf frag_buf;
54	int cqe_size;
55	int nent;
56};
57
58struct mlx5_vdpa_cq {
59	struct mlx5_core_cq mcq;
60	struct mlx5_vdpa_cq_buf buf;
61	struct mlx5_db db;
62	int cqe;
63};
64
65struct mlx5_vdpa_umem {
66	struct mlx5_frag_buf_ctrl fbc;
67	struct mlx5_frag_buf frag_buf;
68	int size;
69	u32 id;
70};
71
72struct mlx5_vdpa_qp {
73	struct mlx5_core_qp mqp;
74	struct mlx5_frag_buf frag_buf;
75	struct mlx5_db db;
76	u16 head;
77	bool fw;
78};
79
80struct mlx5_vq_restore_info {
81	u32 num_ent;
82	u64 desc_addr;
83	u64 device_addr;
84	u64 driver_addr;
85	u16 avail_index;
86	u16 used_index;
87	struct msi_map map;
88	bool ready;
89	bool restore;
90};
91
92struct mlx5_vdpa_virtqueue {
93	bool ready;
94	u64 desc_addr;
95	u64 device_addr;
96	u64 driver_addr;
97	u32 num_ent;
98
99	/* Resources for implementing the notification channel from the device
100	 * to the driver. fwqp is the firmware end of an RC connection; the
101	 * other end is vqqp used by the driver. cq is where completions are
102	 * reported.
103	 */
104	struct mlx5_vdpa_cq cq;
105	struct mlx5_vdpa_qp fwqp;
106	struct mlx5_vdpa_qp vqqp;
107
108	/* umem resources are required for the virtqueue operation. They're use
109	 * is internal and they must be provided by the driver.
110	 */
111	struct mlx5_vdpa_umem umem1;
112	struct mlx5_vdpa_umem umem2;
113	struct mlx5_vdpa_umem umem3;
114
115	u32 counter_set_id;
116	bool initialized;
117	int index;
118	u32 virtq_id;
119	struct mlx5_vdpa_net *ndev;
120	u16 avail_idx;
121	u16 used_idx;
122	int fw_state;
123
124	u64 modified_fields;
125
126	struct mlx5_vdpa_mr *vq_mr;
127	struct mlx5_vdpa_mr *desc_mr;
128
129	struct msi_map map;
130
131	/* keep last in the struct */
132	struct mlx5_vq_restore_info ri;
133};
134
135static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
136{
137	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
138		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
139			return idx < 2;
140		else
141			return idx < 3;
142	}
143
144	return idx <= mvdev->max_idx;
145}
146
147static void free_resources(struct mlx5_vdpa_net *ndev);
148static void init_mvqs(struct mlx5_vdpa_net *ndev);
149static int setup_driver(struct mlx5_vdpa_dev *mvdev);
150static void teardown_driver(struct mlx5_vdpa_net *ndev);
151
152static bool mlx5_vdpa_debug;
153
154#define MLX5_LOG_VIO_FLAG(_feature)                                                                \
155	do {                                                                                       \
156		if (features & BIT_ULL(_feature))                                                  \
157			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
158	} while (0)
159
160#define MLX5_LOG_VIO_STAT(_status)                                                                 \
161	do {                                                                                       \
162		if (status & (_status))                                                            \
163			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
164	} while (0)
165
166/* TODO: cross-endian support */
167static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
168{
169	return virtio_legacy_is_little_endian() ||
170		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
171}
172
173static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
174{
175	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
176}
177
178static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
179{
180	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
181}
182
183static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
184{
185	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
186		return 2;
187
188	return mvdev->max_vqs;
189}
190
191static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
192{
193	return idx == ctrl_vq_idx(mvdev);
194}
195
196static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
197{
198	if (status & ~VALID_STATUS_MASK)
199		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
200			       status & ~VALID_STATUS_MASK);
201
202	if (!mlx5_vdpa_debug)
203		return;
204
205	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
206	if (set && !status) {
207		mlx5_vdpa_info(mvdev, "driver resets the device\n");
208		return;
209	}
210
211	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
212	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
213	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
214	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
215	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
216	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
217}
218
219static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
220{
221	if (features & ~VALID_FEATURES_MASK)
222		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
223			       features & ~VALID_FEATURES_MASK);
224
225	if (!mlx5_vdpa_debug)
226		return;
227
228	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
229	if (!features)
230		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
231
232	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
233	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
234	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
235	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
236	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
237	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
238	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
239	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
240	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
241	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
242	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
243	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
244	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
245	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
246	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
247	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
248	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
249	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
250	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
251	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
252	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
253	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
254	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
255	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
256	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
257	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
258	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
259	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
260	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
261	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
262	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
263	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
264	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
265	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
266}
267
268static int create_tis(struct mlx5_vdpa_net *ndev)
269{
270	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
271	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
272	void *tisc;
273	int err;
274
275	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
276	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
277	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
278	if (err)
279		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
280
281	return err;
282}
283
284static void destroy_tis(struct mlx5_vdpa_net *ndev)
285{
286	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
287}
288
289#define MLX5_VDPA_CQE_SIZE 64
290#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
291
292static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
293{
294	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
295	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
296	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
297	int err;
298
299	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
300				       ndev->mvdev.mdev->priv.numa_node);
301	if (err)
302		return err;
303
304	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
305
306	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
307	buf->nent = nent;
308
309	return 0;
310}
311
312static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
313{
314	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
315
316	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
317					ndev->mvdev.mdev->priv.numa_node);
318}
319
320static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
321{
322	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
323}
324
325static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
326{
327	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
328}
329
330static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
331{
332	struct mlx5_cqe64 *cqe64;
333	void *cqe;
334	int i;
335
336	for (i = 0; i < buf->nent; i++) {
337		cqe = get_cqe(vcq, i);
338		cqe64 = cqe;
339		cqe64->op_own = MLX5_CQE_INVALID << 4;
340	}
341}
342
343static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
344{
345	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
346
347	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
348	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
349		return cqe64;
350
351	return NULL;
352}
353
354static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
355{
356	vqp->head += n;
357	vqp->db.db[0] = cpu_to_be32(vqp->head);
358}
359
360static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
361		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
362{
363	struct mlx5_vdpa_qp *vqp;
364	__be64 *pas;
365	void *qpc;
366
367	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
368	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
369	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
370	if (vqp->fw) {
371		/* Firmware QP is allocated by the driver for the firmware's
372		 * use so we can skip part of the params as they will be chosen by firmware
373		 */
374		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
375		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
376		MLX5_SET(qpc, qpc, no_sq, 1);
377		return;
378	}
379
380	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
381	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
382	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
383	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
384	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
385	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
386	MLX5_SET(qpc, qpc, no_sq, 1);
387	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
388	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
389	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
390	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
391	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
392}
393
394static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
395{
396	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
397					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
398					ndev->mvdev.mdev->priv.numa_node);
399}
400
401static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
402{
403	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
404}
405
406static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
407		     struct mlx5_vdpa_qp *vqp)
408{
409	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
410	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
411	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
412	void *qpc;
413	void *in;
414	int err;
415
416	if (!vqp->fw) {
417		vqp = &mvq->vqqp;
418		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
419		if (err)
420			return err;
421
422		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
423		if (err)
424			goto err_db;
425		inlen += vqp->frag_buf.npages * sizeof(__be64);
426	}
427
428	in = kzalloc(inlen, GFP_KERNEL);
429	if (!in) {
430		err = -ENOMEM;
431		goto err_kzalloc;
432	}
433
434	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
435	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
436	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
437	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
438	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
439	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
440	if (!vqp->fw)
441		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
442	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
443	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
444	kfree(in);
445	if (err)
446		goto err_kzalloc;
447
448	vqp->mqp.uid = ndev->mvdev.res.uid;
449	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
450
451	if (!vqp->fw)
452		rx_post(vqp, mvq->num_ent);
453
454	return 0;
455
456err_kzalloc:
457	if (!vqp->fw)
458		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
459err_db:
460	if (!vqp->fw)
461		rq_buf_free(ndev, vqp);
462
463	return err;
464}
465
466static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
467{
468	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
469
470	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
471	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
472	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
473	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
474		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
475	if (!vqp->fw) {
476		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
477		rq_buf_free(ndev, vqp);
478	}
479}
480
481static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
482{
483	return get_sw_cqe(cq, cq->mcq.cons_index);
484}
485
486static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
487{
488	struct mlx5_cqe64 *cqe64;
489
490	cqe64 = next_cqe_sw(vcq);
491	if (!cqe64)
492		return -EAGAIN;
493
494	vcq->mcq.cons_index++;
495	return 0;
496}
497
498static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
499{
500	struct mlx5_vdpa_net *ndev = mvq->ndev;
501	struct vdpa_callback *event_cb;
502
503	event_cb = &ndev->event_cbs[mvq->index];
504	mlx5_cq_set_ci(&mvq->cq.mcq);
505
506	/* make sure CQ cosumer update is visible to the hardware before updating
507	 * RX doorbell record.
508	 */
509	dma_wmb();
510	rx_post(&mvq->vqqp, num);
511	if (event_cb->callback)
512		event_cb->callback(event_cb->private);
513}
514
515static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
516{
517	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
518	struct mlx5_vdpa_net *ndev = mvq->ndev;
519	void __iomem *uar_page = ndev->mvdev.res.uar->map;
520	int num = 0;
521
522	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
523		num++;
524		if (num > mvq->num_ent / 2) {
525			/* If completions keep coming while we poll, we want to
526			 * let the hardware know that we consumed them by
527			 * updating the doorbell record.  We also let vdpa core
528			 * know about this so it passes it on the virtio driver
529			 * on the guest.
530			 */
531			mlx5_vdpa_handle_completions(mvq, num);
532			num = 0;
533		}
534	}
535
536	if (num)
537		mlx5_vdpa_handle_completions(mvq, num);
538
539	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
540}
541
542static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
543{
544	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
545	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
546	void __iomem *uar_page = ndev->mvdev.res.uar->map;
547	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
548	struct mlx5_vdpa_cq *vcq = &mvq->cq;
549	__be64 *pas;
550	int inlen;
551	void *cqc;
552	void *in;
553	int err;
554	int eqn;
555
556	err = mlx5_db_alloc(mdev, &vcq->db);
557	if (err)
558		return err;
559
560	vcq->mcq.set_ci_db = vcq->db.db;
561	vcq->mcq.arm_db = vcq->db.db + 1;
562	vcq->mcq.cqe_sz = 64;
563
564	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
565	if (err)
566		goto err_db;
567
568	cq_frag_buf_init(vcq, &vcq->buf);
569
570	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
571		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
572	in = kzalloc(inlen, GFP_KERNEL);
573	if (!in) {
574		err = -ENOMEM;
575		goto err_vzalloc;
576	}
577
578	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
579	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
580	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
581
582	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
583	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
584
585	/* Use vector 0 by default. Consider adding code to choose least used
586	 * vector.
587	 */
588	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
589	if (err)
590		goto err_vec;
591
592	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
593	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
594	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
595	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
596	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
597
598	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
599	if (err)
600		goto err_vec;
601
602	vcq->mcq.comp = mlx5_vdpa_cq_comp;
603	vcq->cqe = num_ent;
604	vcq->mcq.set_ci_db = vcq->db.db;
605	vcq->mcq.arm_db = vcq->db.db + 1;
606	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
607	kfree(in);
608	return 0;
609
610err_vec:
611	kfree(in);
612err_vzalloc:
613	cq_frag_buf_free(ndev, &vcq->buf);
614err_db:
615	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
616	return err;
617}
618
619static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
620{
621	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
622	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
623	struct mlx5_vdpa_cq *vcq = &mvq->cq;
624
625	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
626		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
627		return;
628	}
629	cq_frag_buf_free(ndev, &vcq->buf);
630	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
631}
632
633static int read_umem_params(struct mlx5_vdpa_net *ndev)
634{
635	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
636	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
637	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
638	int out_size;
639	void *caps;
640	void *out;
641	int err;
642
643	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
644	out = kzalloc(out_size, GFP_KERNEL);
645	if (!out)
646		return -ENOMEM;
647
648	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
649	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
650	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
651	if (err) {
652		mlx5_vdpa_warn(&ndev->mvdev,
653			"Failed reading vdpa umem capabilities with err %d\n", err);
654		goto out;
655	}
656
657	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
658
659	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
660	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
661
662	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
663	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
664
665	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
666	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
667
668out:
669	kfree(out);
670	return 0;
671}
672
673static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
674			  struct mlx5_vdpa_umem **umemp)
675{
676	u32 p_a;
677	u32 p_b;
678
679	switch (num) {
680	case 1:
681		p_a = ndev->umem_1_buffer_param_a;
682		p_b = ndev->umem_1_buffer_param_b;
683		*umemp = &mvq->umem1;
684		break;
685	case 2:
686		p_a = ndev->umem_2_buffer_param_a;
687		p_b = ndev->umem_2_buffer_param_b;
688		*umemp = &mvq->umem2;
689		break;
690	case 3:
691		p_a = ndev->umem_3_buffer_param_a;
692		p_b = ndev->umem_3_buffer_param_b;
693		*umemp = &mvq->umem3;
694		break;
695	}
696
697	(*umemp)->size = p_a * mvq->num_ent + p_b;
698}
699
700static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
701{
702	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
703}
704
705static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
706{
707	int inlen;
708	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
709	void *um;
710	void *in;
711	int err;
712	__be64 *pas;
713	struct mlx5_vdpa_umem *umem;
714
715	set_umem_size(ndev, mvq, num, &umem);
716	err = umem_frag_buf_alloc(ndev, umem, umem->size);
717	if (err)
718		return err;
719
720	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
721
722	in = kzalloc(inlen, GFP_KERNEL);
723	if (!in) {
724		err = -ENOMEM;
725		goto err_in;
726	}
727
728	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
729	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
730	um = MLX5_ADDR_OF(create_umem_in, in, umem);
731	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
732	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
733
734	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
735	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
736
737	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
738	if (err) {
739		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
740		goto err_cmd;
741	}
742
743	kfree(in);
744	umem->id = MLX5_GET(create_umem_out, out, umem_id);
745
746	return 0;
747
748err_cmd:
749	kfree(in);
750err_in:
751	umem_frag_buf_free(ndev, umem);
752	return err;
753}
754
755static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
756{
757	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
758	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
759	struct mlx5_vdpa_umem *umem;
760
761	switch (num) {
762	case 1:
763		umem = &mvq->umem1;
764		break;
765	case 2:
766		umem = &mvq->umem2;
767		break;
768	case 3:
769		umem = &mvq->umem3;
770		break;
771	}
772
773	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
774	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
775	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
776		return;
777
778	umem_frag_buf_free(ndev, umem);
779}
780
781static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
782{
783	int num;
784	int err;
785
786	for (num = 1; num <= 3; num++) {
787		err = create_umem(ndev, mvq, num);
788		if (err)
789			goto err_umem;
790	}
791	return 0;
792
793err_umem:
794	for (num--; num > 0; num--)
795		umem_destroy(ndev, mvq, num);
796
797	return err;
798}
799
800static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
801{
802	int num;
803
804	for (num = 3; num > 0; num--)
805		umem_destroy(ndev, mvq, num);
806}
807
808static int get_queue_type(struct mlx5_vdpa_net *ndev)
809{
810	u32 type_mask;
811
812	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
813
814	/* prefer split queue */
815	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
816		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
817
818	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
819
820	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
821}
822
823static bool vq_is_tx(u16 idx)
824{
825	return idx % 2;
826}
827
828enum {
829	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
830	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
831	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
832	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
833	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
834	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
835	MLX5_VIRTIO_NET_F_CSUM = 10,
836	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
837	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
838};
839
840static u16 get_features(u64 features)
841{
842	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
843	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
844	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
845	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
846	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
847	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
848	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
849	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
850}
851
852static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
853{
854	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
855	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
856}
857
858static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
859{
860	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
861		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
862		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
863}
864
865static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
866{
867	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
868	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
869	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
870	struct mlx5_vdpa_mr *vq_mr;
871	struct mlx5_vdpa_mr *vq_desc_mr;
872	void *obj_context;
873	u16 mlx_features;
874	void *cmd_hdr;
875	void *vq_ctx;
876	void *in;
877	int err;
878
879	err = umems_create(ndev, mvq);
880	if (err)
881		return err;
882
883	in = kzalloc(inlen, GFP_KERNEL);
884	if (!in) {
885		err = -ENOMEM;
886		goto err_alloc;
887	}
888
889	mlx_features = get_features(ndev->mvdev.actual_features);
890	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
891
892	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
893	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
894	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
895
896	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
897	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
898	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
899	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
900		 mlx_features >> 3);
901	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
902		 mlx_features & 7);
903	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
904	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
905
906	if (vq_is_tx(mvq->index))
907		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
908
909	if (mvq->map.virq) {
910		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
911		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
912	} else {
913		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
914		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
915	}
916
917	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
918	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
919	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
920		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
921	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
922	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
923	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
924	vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
925	if (vq_mr)
926		MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
927
928	vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
929	if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
930		MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
931
932	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
933	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
934	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
935	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
936	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
937	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
938	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
939	if (counters_supported(&ndev->mvdev))
940		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
941
942	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
943	if (err)
944		goto err_cmd;
945
946	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
947	kfree(in);
948	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
949
950	mlx5_vdpa_get_mr(mvdev, vq_mr);
951	mvq->vq_mr = vq_mr;
952
953	if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
954		mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
955		mvq->desc_mr = vq_desc_mr;
956	}
957
958	return 0;
959
960err_cmd:
961	kfree(in);
962err_alloc:
963	umems_destroy(ndev, mvq);
964	return err;
965}
966
967static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
968{
969	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
970	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
971
972	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
973		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
974	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
975	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
976	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
977		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
978	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
979		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
980		return;
981	}
982	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
983	umems_destroy(ndev, mvq);
984
985	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
986	mvq->vq_mr = NULL;
987
988	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
989	mvq->desc_mr = NULL;
990}
991
992static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
993{
994	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
995}
996
997static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
998{
999	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1000}
1001
1002static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1003			int *outlen, u32 qpn, u32 rqpn)
1004{
1005	void *qpc;
1006	void *pp;
1007
1008	switch (cmd) {
1009	case MLX5_CMD_OP_2RST_QP:
1010		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1011		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1012		*in = kzalloc(*inlen, GFP_KERNEL);
1013		*out = kzalloc(*outlen, GFP_KERNEL);
1014		if (!*in || !*out)
1015			goto outerr;
1016
1017		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1018		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1019		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1020		break;
1021	case MLX5_CMD_OP_RST2INIT_QP:
1022		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1023		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1024		*in = kzalloc(*inlen, GFP_KERNEL);
1025		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1026		if (!*in || !*out)
1027			goto outerr;
1028
1029		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1030		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1031		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1032		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1033		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1034		MLX5_SET(qpc, qpc, rwe, 1);
1035		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1036		MLX5_SET(ads, pp, vhca_port_num, 1);
1037		break;
1038	case MLX5_CMD_OP_INIT2RTR_QP:
1039		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1040		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1041		*in = kzalloc(*inlen, GFP_KERNEL);
1042		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1043		if (!*in || !*out)
1044			goto outerr;
1045
1046		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1047		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1048		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1049		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1050		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1051		MLX5_SET(qpc, qpc, log_msg_max, 30);
1052		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1053		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1054		MLX5_SET(ads, pp, fl, 1);
1055		break;
1056	case MLX5_CMD_OP_RTR2RTS_QP:
1057		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1058		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1059		*in = kzalloc(*inlen, GFP_KERNEL);
1060		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1061		if (!*in || !*out)
1062			goto outerr;
1063
1064		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1065		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1066		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1067		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1068		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1069		MLX5_SET(ads, pp, ack_timeout, 14);
1070		MLX5_SET(qpc, qpc, retry_count, 7);
1071		MLX5_SET(qpc, qpc, rnr_retry, 7);
1072		break;
1073	default:
1074		goto outerr_nullify;
1075	}
1076
1077	return;
1078
1079outerr:
1080	kfree(*in);
1081	kfree(*out);
1082outerr_nullify:
1083	*in = NULL;
1084	*out = NULL;
1085}
1086
1087static void free_inout(void *in, void *out)
1088{
1089	kfree(in);
1090	kfree(out);
1091}
1092
1093/* Two QPs are used by each virtqueue. One is used by the driver and one by
1094 * firmware. The fw argument indicates whether the subjected QP is the one used
1095 * by firmware.
1096 */
1097static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1098{
1099	int outlen;
1100	int inlen;
1101	void *out;
1102	void *in;
1103	int err;
1104
1105	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1106	if (!in || !out)
1107		return -ENOMEM;
1108
1109	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1110	free_inout(in, out);
1111	return err;
1112}
1113
1114static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1115{
1116	int err;
1117
1118	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1119	if (err)
1120		return err;
1121
1122	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1123	if (err)
1124		return err;
1125
1126	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1127	if (err)
1128		return err;
1129
1130	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1131	if (err)
1132		return err;
1133
1134	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1135	if (err)
1136		return err;
1137
1138	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1139	if (err)
1140		return err;
1141
1142	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1143}
1144
1145struct mlx5_virtq_attr {
1146	u8 state;
1147	u16 available_index;
1148	u16 used_index;
1149};
1150
1151static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1152			   struct mlx5_virtq_attr *attr)
1153{
1154	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1155	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1156	void *out;
1157	void *obj_context;
1158	void *cmd_hdr;
1159	int err;
1160
1161	out = kzalloc(outlen, GFP_KERNEL);
1162	if (!out)
1163		return -ENOMEM;
1164
1165	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1166
1167	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1168	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1169	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1170	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1171	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1172	if (err)
1173		goto err_cmd;
1174
1175	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1176	memset(attr, 0, sizeof(*attr));
1177	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1178	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1179	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1180	kfree(out);
1181	return 0;
1182
1183err_cmd:
1184	kfree(out);
1185	return err;
1186}
1187
1188static bool is_resumable(struct mlx5_vdpa_net *ndev)
1189{
1190	return ndev->mvdev.vdev.config->resume;
1191}
1192
1193static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1194{
1195	switch (oldstate) {
1196	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1197		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1198	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1199		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1200	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1201		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1202	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1203	default:
1204		return false;
1205	}
1206}
1207
1208static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1209{
1210	/* Only state is always modifiable */
1211	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1212		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1213		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1214
1215	return true;
1216}
1217
1218static int modify_virtqueue(struct mlx5_vdpa_net *ndev,
1219			    struct mlx5_vdpa_virtqueue *mvq,
1220			    int state)
1221{
1222	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1223	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1224	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1225	struct mlx5_vdpa_mr *desc_mr = NULL;
1226	struct mlx5_vdpa_mr *vq_mr = NULL;
1227	bool state_change = false;
1228	void *obj_context;
1229	void *cmd_hdr;
1230	void *vq_ctx;
1231	void *in;
1232	int err;
1233
1234	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1235		return 0;
1236
1237	if (!modifiable_virtqueue_fields(mvq))
1238		return -EINVAL;
1239
1240	in = kzalloc(inlen, GFP_KERNEL);
1241	if (!in)
1242		return -ENOMEM;
1243
1244	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1245
1246	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1247	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1248	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1249	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1250
1251	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1252	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1253
1254	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) {
1255		if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1256			err = -EINVAL;
1257			goto done;
1258		}
1259
1260		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1261		state_change = true;
1262	}
1263
1264	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1265		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1266		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1267		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1268	}
1269
1270	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1271		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1272
1273	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1274		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1275
1276	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1277		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1278
1279		if (vq_mr)
1280			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1281		else
1282			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1283	}
1284
1285	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1286		desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1287
1288		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1289			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1290		else
1291			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1292	}
1293
1294	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1295	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1296	if (err)
1297		goto done;
1298
1299	if (state_change)
1300		mvq->fw_state = state;
1301
1302	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1303		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1304		mlx5_vdpa_get_mr(mvdev, vq_mr);
1305		mvq->vq_mr = vq_mr;
1306	}
1307
1308	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1309		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1310		mlx5_vdpa_get_mr(mvdev, desc_mr);
1311		mvq->desc_mr = desc_mr;
1312	}
1313
1314	mvq->modified_fields = 0;
1315
1316done:
1317	kfree(in);
1318	return err;
1319}
1320
1321static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev,
1322				  struct mlx5_vdpa_virtqueue *mvq,
1323				  unsigned int state)
1324{
1325	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1326	return modify_virtqueue(ndev, mvq, state);
1327}
1328
1329static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1330{
1331	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1332	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1333	void *cmd_hdr;
1334	int err;
1335
1336	if (!counters_supported(&ndev->mvdev))
1337		return 0;
1338
1339	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1340
1341	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1342	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1343	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1344
1345	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1346	if (err)
1347		return err;
1348
1349	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1350
1351	return 0;
1352}
1353
1354static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1355{
1356	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1357	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1358
1359	if (!counters_supported(&ndev->mvdev))
1360		return;
1361
1362	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1363	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1364	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1365	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1366	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1367		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1368}
1369
1370static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1371{
1372	struct vdpa_callback *cb = priv;
1373
1374	if (cb->callback)
1375		return cb->callback(cb->private);
1376
1377	return IRQ_HANDLED;
1378}
1379
1380static void alloc_vector(struct mlx5_vdpa_net *ndev,
1381			 struct mlx5_vdpa_virtqueue *mvq)
1382{
1383	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1384	struct mlx5_vdpa_irq_pool_entry *ent;
1385	int err;
1386	int i;
1387
1388	for (i = 0; i < irqp->num_ent; i++) {
1389		ent = &irqp->entries[i];
1390		if (!ent->used) {
1391			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1392				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1393			ent->dev_id = &ndev->event_cbs[mvq->index];
1394			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1395					  ent->name, ent->dev_id);
1396			if (err)
1397				return;
1398
1399			ent->used = true;
1400			mvq->map = ent->map;
1401			return;
1402		}
1403	}
1404}
1405
1406static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1407			   struct mlx5_vdpa_virtqueue *mvq)
1408{
1409	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1410	int i;
1411
1412	for (i = 0; i < irqp->num_ent; i++)
1413		if (mvq->map.virq == irqp->entries[i].map.virq) {
1414			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1415			irqp->entries[i].used = false;
1416			return;
1417		}
1418}
1419
1420static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1421{
1422	u16 idx = mvq->index;
1423	int err;
1424
1425	if (!mvq->num_ent)
1426		return 0;
1427
1428	if (mvq->initialized)
1429		return 0;
1430
1431	err = cq_create(ndev, idx, mvq->num_ent);
1432	if (err)
1433		return err;
1434
1435	err = qp_create(ndev, mvq, &mvq->fwqp);
1436	if (err)
1437		goto err_fwqp;
1438
1439	err = qp_create(ndev, mvq, &mvq->vqqp);
1440	if (err)
1441		goto err_vqqp;
1442
1443	err = connect_qps(ndev, mvq);
1444	if (err)
1445		goto err_connect;
1446
1447	err = counter_set_alloc(ndev, mvq);
1448	if (err)
1449		goto err_connect;
1450
1451	alloc_vector(ndev, mvq);
1452	err = create_virtqueue(ndev, mvq);
1453	if (err)
1454		goto err_vq;
1455
1456	if (mvq->ready) {
1457		err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1458		if (err) {
1459			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1460				       idx, err);
1461			goto err_modify;
1462		}
1463	}
1464
1465	mvq->initialized = true;
1466	return 0;
1467
1468err_modify:
1469	destroy_virtqueue(ndev, mvq);
1470err_vq:
1471	dealloc_vector(ndev, mvq);
1472	counter_set_dealloc(ndev, mvq);
1473err_connect:
1474	qp_destroy(ndev, &mvq->vqqp);
1475err_vqqp:
1476	qp_destroy(ndev, &mvq->fwqp);
1477err_fwqp:
1478	cq_destroy(ndev, idx);
1479	return err;
1480}
1481
1482static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1483{
1484	struct mlx5_virtq_attr attr;
1485
1486	if (!mvq->initialized)
1487		return;
1488
1489	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1490		return;
1491
1492	if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1493		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1494
1495	if (query_virtqueue(ndev, mvq, &attr)) {
1496		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1497		return;
1498	}
1499	mvq->avail_idx = attr.available_index;
1500	mvq->used_idx = attr.used_index;
1501}
1502
1503static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1504{
1505	int i;
1506
1507	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1508		suspend_vq(ndev, &ndev->vqs[i]);
1509}
1510
1511static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1512{
1513	if (!mvq->initialized || !is_resumable(ndev))
1514		return;
1515
1516	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)
1517		return;
1518
1519	if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY))
1520		mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index);
1521}
1522
1523static void resume_vqs(struct mlx5_vdpa_net *ndev)
1524{
1525	for (int i = 0; i < ndev->mvdev.max_vqs; i++)
1526		resume_vq(ndev, &ndev->vqs[i]);
1527}
1528
1529static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1530{
1531	if (!mvq->initialized)
1532		return;
1533
1534	suspend_vq(ndev, mvq);
1535	mvq->modified_fields = 0;
1536	destroy_virtqueue(ndev, mvq);
1537	dealloc_vector(ndev, mvq);
1538	counter_set_dealloc(ndev, mvq);
1539	qp_destroy(ndev, &mvq->vqqp);
1540	qp_destroy(ndev, &mvq->fwqp);
1541	cq_destroy(ndev, mvq->index);
1542	mvq->initialized = false;
1543}
1544
1545static int create_rqt(struct mlx5_vdpa_net *ndev)
1546{
1547	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1548	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1549	__be32 *list;
1550	void *rqtc;
1551	int inlen;
1552	void *in;
1553	int i, j;
1554	int err;
1555
1556	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1557	in = kzalloc(inlen, GFP_KERNEL);
1558	if (!in)
1559		return -ENOMEM;
1560
1561	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1562	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1563
1564	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1565	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1566	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1567	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1568		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1569
1570	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1571	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1572	kfree(in);
1573	if (err)
1574		return err;
1575
1576	return 0;
1577}
1578
1579#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1580
1581static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1582{
1583	int act_sz = roundup_pow_of_two(num / 2);
1584	__be32 *list;
1585	void *rqtc;
1586	int inlen;
1587	void *in;
1588	int i, j;
1589	int err;
1590
1591	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1592	in = kzalloc(inlen, GFP_KERNEL);
1593	if (!in)
1594		return -ENOMEM;
1595
1596	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1597	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1598	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1599	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1600
1601	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1602	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1603		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1604
1605	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1606	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1607	kfree(in);
1608	if (err)
1609		return err;
1610
1611	return 0;
1612}
1613
1614static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1615{
1616	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1617}
1618
1619static int create_tir(struct mlx5_vdpa_net *ndev)
1620{
1621#define HASH_IP_L4PORTS                                                                            \
1622	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1623	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1624	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1625						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1626						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1627						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1628						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1629	void *rss_key;
1630	void *outer;
1631	void *tirc;
1632	void *in;
1633	int err;
1634
1635	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1636	if (!in)
1637		return -ENOMEM;
1638
1639	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1640	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1641	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1642
1643	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1644	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1645	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1646	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1647
1648	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1649	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1650	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1651	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1652
1653	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1654	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1655
1656	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1657	kfree(in);
1658	if (err)
1659		return err;
1660
1661	mlx5_vdpa_add_tirn(ndev);
1662	return err;
1663}
1664
1665static void destroy_tir(struct mlx5_vdpa_net *ndev)
1666{
1667	mlx5_vdpa_remove_tirn(ndev);
1668	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1669}
1670
1671#define MAX_STEERING_ENT 0x8000
1672#define MAX_STEERING_GROUPS 2
1673
1674#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1675       #define NUM_DESTS 2
1676#else
1677       #define NUM_DESTS 1
1678#endif
1679
1680static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1681				 struct macvlan_node *node,
1682				 struct mlx5_flow_act *flow_act,
1683				 struct mlx5_flow_destination *dests)
1684{
1685#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1686	int err;
1687
1688	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1689	if (IS_ERR(node->ucast_counter.counter))
1690		return PTR_ERR(node->ucast_counter.counter);
1691
1692	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1693	if (IS_ERR(node->mcast_counter.counter)) {
1694		err = PTR_ERR(node->mcast_counter.counter);
1695		goto err_mcast_counter;
1696	}
1697
1698	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1699	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1700	return 0;
1701
1702err_mcast_counter:
1703	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1704	return err;
1705#else
1706	return 0;
1707#endif
1708}
1709
1710static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1711				     struct macvlan_node *node)
1712{
1713#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1714	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1715	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1716#endif
1717}
1718
1719static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1720					struct macvlan_node *node)
1721{
1722	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1723	struct mlx5_flow_act flow_act = {};
1724	struct mlx5_flow_spec *spec;
1725	void *headers_c;
1726	void *headers_v;
1727	u8 *dmac_c;
1728	u8 *dmac_v;
1729	int err;
1730	u16 vid;
1731
1732	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1733	if (!spec)
1734		return -ENOMEM;
1735
1736	vid = key2vid(node->macvlan);
1737	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1738	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1739	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1740	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1741	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1742	eth_broadcast_addr(dmac_c);
1743	ether_addr_copy(dmac_v, mac);
1744	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1745		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1746		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1747	}
1748	if (node->tagged) {
1749		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1750		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1751	}
1752	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1753	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1754	dests[0].tir_num = ndev->res.tirn;
1755	err = add_steering_counters(ndev, node, &flow_act, dests);
1756	if (err)
1757		goto out_free;
1758
1759#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1760	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1761#endif
1762	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1763	if (IS_ERR(node->ucast_rule)) {
1764		err = PTR_ERR(node->ucast_rule);
1765		goto err_ucast;
1766	}
1767
1768#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1769	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1770#endif
1771
1772	memset(dmac_c, 0, ETH_ALEN);
1773	memset(dmac_v, 0, ETH_ALEN);
1774	dmac_c[0] = 1;
1775	dmac_v[0] = 1;
1776	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1777	if (IS_ERR(node->mcast_rule)) {
1778		err = PTR_ERR(node->mcast_rule);
1779		goto err_mcast;
1780	}
1781	kvfree(spec);
1782	mlx5_vdpa_add_rx_counters(ndev, node);
1783	return 0;
1784
1785err_mcast:
1786	mlx5_del_flow_rules(node->ucast_rule);
1787err_ucast:
1788	remove_steering_counters(ndev, node);
1789out_free:
1790	kvfree(spec);
1791	return err;
1792}
1793
1794static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1795					 struct macvlan_node *node)
1796{
1797	mlx5_vdpa_remove_rx_counters(ndev, node);
1798	mlx5_del_flow_rules(node->ucast_rule);
1799	mlx5_del_flow_rules(node->mcast_rule);
1800}
1801
1802static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1803{
1804	u64 val;
1805
1806	if (!tagged)
1807		vlan = MLX5V_UNTAGGED;
1808
1809	val = (u64)vlan << 48 |
1810	      (u64)mac[0] << 40 |
1811	      (u64)mac[1] << 32 |
1812	      (u64)mac[2] << 24 |
1813	      (u64)mac[3] << 16 |
1814	      (u64)mac[4] << 8 |
1815	      (u64)mac[5];
1816
1817	return val;
1818}
1819
1820static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1821{
1822	struct macvlan_node *pos;
1823	u32 idx;
1824
1825	idx = hash_64(value, 8); // tbd 8
1826	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1827		if (pos->macvlan == value)
1828			return pos;
1829	}
1830	return NULL;
1831}
1832
1833static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1834{
1835	struct macvlan_node *ptr;
1836	u64 val;
1837	u32 idx;
1838	int err;
1839
1840	val = search_val(mac, vid, tagged);
1841	if (mac_vlan_lookup(ndev, val))
1842		return -EEXIST;
1843
1844	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1845	if (!ptr)
1846		return -ENOMEM;
1847
1848	ptr->tagged = tagged;
1849	ptr->macvlan = val;
1850	ptr->ndev = ndev;
1851	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1852	if (err)
1853		goto err_add;
1854
1855	idx = hash_64(val, 8);
1856	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1857	return 0;
1858
1859err_add:
1860	kfree(ptr);
1861	return err;
1862}
1863
1864static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1865{
1866	struct macvlan_node *ptr;
1867
1868	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1869	if (!ptr)
1870		return;
1871
1872	hlist_del(&ptr->hlist);
1873	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1874	remove_steering_counters(ndev, ptr);
1875	kfree(ptr);
1876}
1877
1878static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1879{
1880	struct macvlan_node *pos;
1881	struct hlist_node *n;
1882	int i;
1883
1884	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1885		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1886			hlist_del(&pos->hlist);
1887			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1888			remove_steering_counters(ndev, pos);
1889			kfree(pos);
1890		}
1891	}
1892}
1893
1894static int setup_steering(struct mlx5_vdpa_net *ndev)
1895{
1896	struct mlx5_flow_table_attr ft_attr = {};
1897	struct mlx5_flow_namespace *ns;
1898	int err;
1899
1900	ft_attr.max_fte = MAX_STEERING_ENT;
1901	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1902
1903	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1904	if (!ns) {
1905		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1906		return -EOPNOTSUPP;
1907	}
1908
1909	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1910	if (IS_ERR(ndev->rxft)) {
1911		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1912		return PTR_ERR(ndev->rxft);
1913	}
1914	mlx5_vdpa_add_rx_flow_table(ndev);
1915
1916	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1917	if (err)
1918		goto err_add;
1919
1920	return 0;
1921
1922err_add:
1923	mlx5_vdpa_remove_rx_flow_table(ndev);
1924	mlx5_destroy_flow_table(ndev->rxft);
1925	return err;
1926}
1927
1928static void teardown_steering(struct mlx5_vdpa_net *ndev)
1929{
1930	clear_mac_vlan_table(ndev);
1931	mlx5_vdpa_remove_rx_flow_table(ndev);
1932	mlx5_destroy_flow_table(ndev->rxft);
1933}
1934
1935static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1936{
1937	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1938	struct mlx5_control_vq *cvq = &mvdev->cvq;
1939	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1940	struct mlx5_core_dev *pfmdev;
1941	size_t read;
1942	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1943
1944	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1945	switch (cmd) {
1946	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1947		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1948		if (read != ETH_ALEN)
1949			break;
1950
1951		if (!memcmp(ndev->config.mac, mac, 6)) {
1952			status = VIRTIO_NET_OK;
1953			break;
1954		}
1955
1956		if (is_zero_ether_addr(mac))
1957			break;
1958
1959		if (!is_zero_ether_addr(ndev->config.mac)) {
1960			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1961				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1962					       ndev->config.mac);
1963				break;
1964			}
1965		}
1966
1967		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1968			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1969				       mac);
1970			break;
1971		}
1972
1973		/* backup the original mac address so that if failed to add the forward rules
1974		 * we could restore it
1975		 */
1976		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1977
1978		memcpy(ndev->config.mac, mac, ETH_ALEN);
1979
1980		/* Need recreate the flow table entry, so that the packet could forward back
1981		 */
1982		mac_vlan_del(ndev, mac_back, 0, false);
1983
1984		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1985			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1986
1987			/* Although it hardly run here, we still need double check */
1988			if (is_zero_ether_addr(mac_back)) {
1989				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1990				break;
1991			}
1992
1993			/* Try to restore original mac address to MFPS table, and try to restore
1994			 * the forward rule entry.
1995			 */
1996			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1997				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1998					       ndev->config.mac);
1999			}
2000
2001			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2002				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2003					       mac_back);
2004			}
2005
2006			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2007
2008			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2009				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2010
2011			break;
2012		}
2013
2014		status = VIRTIO_NET_OK;
2015		break;
2016
2017	default:
2018		break;
2019	}
2020
2021	return status;
2022}
2023
2024static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2025{
2026	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2027	int cur_qps = ndev->cur_num_vqs / 2;
2028	int err;
2029	int i;
2030
2031	if (cur_qps > newqps) {
2032		err = modify_rqt(ndev, 2 * newqps);
2033		if (err)
2034			return err;
2035
2036		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
2037			teardown_vq(ndev, &ndev->vqs[i]);
2038
2039		ndev->cur_num_vqs = 2 * newqps;
2040	} else {
2041		ndev->cur_num_vqs = 2 * newqps;
2042		for (i = cur_qps * 2; i < 2 * newqps; i++) {
2043			err = setup_vq(ndev, &ndev->vqs[i]);
2044			if (err)
2045				goto clean_added;
2046		}
2047		err = modify_rqt(ndev, 2 * newqps);
2048		if (err)
2049			goto clean_added;
2050	}
2051	return 0;
2052
2053clean_added:
2054	for (--i; i >= 2 * cur_qps; --i)
2055		teardown_vq(ndev, &ndev->vqs[i]);
2056
2057	ndev->cur_num_vqs = 2 * cur_qps;
2058
2059	return err;
2060}
2061
2062static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2063{
2064	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2065	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2066	struct mlx5_control_vq *cvq = &mvdev->cvq;
2067	struct virtio_net_ctrl_mq mq;
2068	size_t read;
2069	u16 newqps;
2070
2071	switch (cmd) {
2072	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2073		/* This mq feature check aligns with pre-existing userspace
2074		 * implementation.
2075		 *
2076		 * Without it, an untrusted driver could fake a multiqueue config
2077		 * request down to a non-mq device that may cause kernel to
2078		 * panic due to uninitialized resources for extra vqs. Even with
2079		 * a well behaving guest driver, it is not expected to allow
2080		 * changing the number of vqs on a non-mq device.
2081		 */
2082		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2083			break;
2084
2085		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2086		if (read != sizeof(mq))
2087			break;
2088
2089		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2090		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2091		    newqps > ndev->rqt_size)
2092			break;
2093
2094		if (ndev->cur_num_vqs == 2 * newqps) {
2095			status = VIRTIO_NET_OK;
2096			break;
2097		}
2098
2099		if (!change_num_qps(mvdev, newqps))
2100			status = VIRTIO_NET_OK;
2101
2102		break;
2103	default:
2104		break;
2105	}
2106
2107	return status;
2108}
2109
2110static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2111{
2112	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2113	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2114	struct mlx5_control_vq *cvq = &mvdev->cvq;
2115	__virtio16 vlan;
2116	size_t read;
2117	u16 id;
2118
2119	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2120		return status;
2121
2122	switch (cmd) {
2123	case VIRTIO_NET_CTRL_VLAN_ADD:
2124		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2125		if (read != sizeof(vlan))
2126			break;
2127
2128		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2129		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2130			break;
2131
2132		status = VIRTIO_NET_OK;
2133		break;
2134	case VIRTIO_NET_CTRL_VLAN_DEL:
2135		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2136		if (read != sizeof(vlan))
2137			break;
2138
2139		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2140		mac_vlan_del(ndev, ndev->config.mac, id, true);
2141		status = VIRTIO_NET_OK;
2142		break;
2143	default:
2144		break;
2145	}
2146
2147	return status;
2148}
2149
2150static void mlx5_cvq_kick_handler(struct work_struct *work)
2151{
2152	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2153	struct virtio_net_ctrl_hdr ctrl;
2154	struct mlx5_vdpa_wq_ent *wqent;
2155	struct mlx5_vdpa_dev *mvdev;
2156	struct mlx5_control_vq *cvq;
2157	struct mlx5_vdpa_net *ndev;
2158	size_t read, write;
2159	int err;
2160
2161	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2162	mvdev = wqent->mvdev;
2163	ndev = to_mlx5_vdpa_ndev(mvdev);
2164	cvq = &mvdev->cvq;
2165
2166	down_write(&ndev->reslock);
2167
2168	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2169		goto out;
2170
2171	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2172		goto out;
2173
2174	if (!cvq->ready)
2175		goto out;
2176
2177	while (true) {
2178		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2179					   GFP_ATOMIC);
2180		if (err <= 0)
2181			break;
2182
2183		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2184		if (read != sizeof(ctrl))
2185			break;
2186
2187		cvq->received_desc++;
2188		switch (ctrl.class) {
2189		case VIRTIO_NET_CTRL_MAC:
2190			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2191			break;
2192		case VIRTIO_NET_CTRL_MQ:
2193			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2194			break;
2195		case VIRTIO_NET_CTRL_VLAN:
2196			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2197			break;
2198		default:
2199			break;
2200		}
2201
2202		/* Make sure data is written before advancing index */
2203		smp_wmb();
2204
2205		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2206		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2207		vringh_kiov_cleanup(&cvq->riov);
2208		vringh_kiov_cleanup(&cvq->wiov);
2209
2210		if (vringh_need_notify_iotlb(&cvq->vring))
2211			vringh_notify(&cvq->vring);
2212
2213		cvq->completed_desc++;
2214		queue_work(mvdev->wq, &wqent->work);
2215		break;
2216	}
2217
2218out:
2219	up_write(&ndev->reslock);
2220}
2221
2222static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2223{
2224	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2225	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2226	struct mlx5_vdpa_virtqueue *mvq;
2227
2228	if (!is_index_valid(mvdev, idx))
2229		return;
2230
2231	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2232		if (!mvdev->wq || !mvdev->cvq.ready)
2233			return;
2234
2235		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2236		return;
2237	}
2238
2239	mvq = &ndev->vqs[idx];
2240	if (unlikely(!mvq->ready))
2241		return;
2242
2243	iowrite16(idx, ndev->mvdev.res.kick_addr);
2244}
2245
2246static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2247				    u64 driver_area, u64 device_area)
2248{
2249	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2250	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2251	struct mlx5_vdpa_virtqueue *mvq;
2252
2253	if (!is_index_valid(mvdev, idx))
2254		return -EINVAL;
2255
2256	if (is_ctrl_vq_idx(mvdev, idx)) {
2257		mvdev->cvq.desc_addr = desc_area;
2258		mvdev->cvq.device_addr = device_area;
2259		mvdev->cvq.driver_addr = driver_area;
2260		return 0;
2261	}
2262
2263	mvq = &ndev->vqs[idx];
2264	mvq->desc_addr = desc_area;
2265	mvq->device_addr = device_area;
2266	mvq->driver_addr = driver_area;
2267	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2268	return 0;
2269}
2270
2271static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2272{
2273	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2274	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2275	struct mlx5_vdpa_virtqueue *mvq;
2276
2277	if (!is_index_valid(mvdev, idx))
2278		return;
2279
2280        if (is_ctrl_vq_idx(mvdev, idx)) {
2281                struct mlx5_control_vq *cvq = &mvdev->cvq;
2282
2283                cvq->vring.vring.num = num;
2284                return;
2285        }
2286
2287	mvq = &ndev->vqs[idx];
2288	mvq->num_ent = num;
2289}
2290
2291static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2292{
2293	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2294	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2295
2296	ndev->event_cbs[idx] = *cb;
2297	if (is_ctrl_vq_idx(mvdev, idx))
2298		mvdev->cvq.event_cb = *cb;
2299}
2300
2301static void mlx5_cvq_notify(struct vringh *vring)
2302{
2303	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2304
2305	if (!cvq->event_cb.callback)
2306		return;
2307
2308	cvq->event_cb.callback(cvq->event_cb.private);
2309}
2310
2311static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2312{
2313	struct mlx5_control_vq *cvq = &mvdev->cvq;
2314
2315	cvq->ready = ready;
2316	if (!ready)
2317		return;
2318
2319	cvq->vring.notify = mlx5_cvq_notify;
2320}
2321
2322static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2323{
2324	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2325	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2326	struct mlx5_vdpa_virtqueue *mvq;
2327	int err;
2328
2329	if (!mvdev->actual_features)
2330		return;
2331
2332	if (!is_index_valid(mvdev, idx))
2333		return;
2334
2335	if (is_ctrl_vq_idx(mvdev, idx)) {
2336		set_cvq_ready(mvdev, ready);
2337		return;
2338	}
2339
2340	mvq = &ndev->vqs[idx];
2341	if (!ready) {
2342		suspend_vq(ndev, mvq);
2343	} else {
2344		err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2345		if (err) {
2346			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2347			ready = false;
2348		}
2349	}
2350
2351
2352	mvq->ready = ready;
2353}
2354
2355static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2356{
2357	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2358	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2359
2360	if (!is_index_valid(mvdev, idx))
2361		return false;
2362
2363	if (is_ctrl_vq_idx(mvdev, idx))
2364		return mvdev->cvq.ready;
2365
2366	return ndev->vqs[idx].ready;
2367}
2368
2369static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2370				  const struct vdpa_vq_state *state)
2371{
2372	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2373	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2374	struct mlx5_vdpa_virtqueue *mvq;
2375
2376	if (!is_index_valid(mvdev, idx))
2377		return -EINVAL;
2378
2379	if (is_ctrl_vq_idx(mvdev, idx)) {
2380		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2381		return 0;
2382	}
2383
2384	mvq = &ndev->vqs[idx];
2385	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2386		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2387		return -EINVAL;
2388	}
2389
2390	mvq->used_idx = state->split.avail_index;
2391	mvq->avail_idx = state->split.avail_index;
2392	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2393				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2394	return 0;
2395}
2396
2397static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2398{
2399	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2400	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2401	struct mlx5_vdpa_virtqueue *mvq;
2402	struct mlx5_virtq_attr attr;
2403	int err;
2404
2405	if (!is_index_valid(mvdev, idx))
2406		return -EINVAL;
2407
2408	if (is_ctrl_vq_idx(mvdev, idx)) {
2409		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2410		return 0;
2411	}
2412
2413	mvq = &ndev->vqs[idx];
2414	/* If the virtq object was destroyed, use the value saved at
2415	 * the last minute of suspend_vq. This caters for userspace
2416	 * that cares about emulating the index after vq is stopped.
2417	 */
2418	if (!mvq->initialized) {
2419		/* Firmware returns a wrong value for the available index.
2420		 * Since both values should be identical, we take the value of
2421		 * used_idx which is reported correctly.
2422		 */
2423		state->split.avail_index = mvq->used_idx;
2424		return 0;
2425	}
2426
2427	err = query_virtqueue(ndev, mvq, &attr);
2428	if (err) {
2429		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2430		return err;
2431	}
2432	state->split.avail_index = attr.used_index;
2433	return 0;
2434}
2435
2436static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2437{
2438	return PAGE_SIZE;
2439}
2440
2441static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2442{
2443	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2444
2445	if (is_ctrl_vq_idx(mvdev, idx))
2446		return MLX5_VDPA_CVQ_GROUP;
2447
2448	return MLX5_VDPA_DATAVQ_GROUP;
2449}
2450
2451static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2452{
2453	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2454
2455	if (is_ctrl_vq_idx(mvdev, idx))
2456		return MLX5_VDPA_CVQ_GROUP;
2457
2458	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2459}
2460
2461static u64 mlx_to_vritio_features(u16 dev_features)
2462{
2463	u64 result = 0;
2464
2465	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2466		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2467	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2468		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2469	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2470		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2471	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2472		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2473	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2474		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2475	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2476		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2477	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2478		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2479	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2480		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2481	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2482		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2483
2484	return result;
2485}
2486
2487static u64 get_supported_features(struct mlx5_core_dev *mdev)
2488{
2489	u64 mlx_vdpa_features = 0;
2490	u16 dev_features;
2491
2492	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2493	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2494	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2495		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2496	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2497	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2498	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2499	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2500	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2501	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2502	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2503	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2504
2505	return mlx_vdpa_features;
2506}
2507
2508static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2509{
2510	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2511	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2512
2513	print_features(mvdev, ndev->mvdev.mlx_features, false);
2514	return ndev->mvdev.mlx_features;
2515}
2516
2517static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2518{
2519	/* Minimum features to expect */
2520	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2521		return -EOPNOTSUPP;
2522
2523	/* Double check features combination sent down by the driver.
2524	 * Fail invalid features due to absence of the depended feature.
2525	 *
2526	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2527	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2528	 * By failing the invalid features sent down by untrusted drivers,
2529	 * we're assured the assumption made upon is_index_valid() and
2530	 * is_ctrl_vq_idx() will not be compromised.
2531	 */
2532	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2533            BIT_ULL(VIRTIO_NET_F_MQ))
2534		return -EINVAL;
2535
2536	return 0;
2537}
2538
2539static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2540{
2541	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2542	int err;
2543	int i;
2544
2545	for (i = 0; i < mvdev->max_vqs; i++) {
2546		err = setup_vq(ndev, &ndev->vqs[i]);
2547		if (err)
2548			goto err_vq;
2549	}
2550
2551	return 0;
2552
2553err_vq:
2554	for (--i; i >= 0; i--)
2555		teardown_vq(ndev, &ndev->vqs[i]);
2556
2557	return err;
2558}
2559
2560static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2561{
2562	struct mlx5_vdpa_virtqueue *mvq;
2563	int i;
2564
2565	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2566		mvq = &ndev->vqs[i];
2567		if (!mvq->initialized)
2568			continue;
2569
2570		teardown_vq(ndev, mvq);
2571	}
2572}
2573
2574static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2575{
2576	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2577		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2578			/* MQ supported. CVQ index is right above the last data virtqueue's */
2579			mvdev->max_idx = mvdev->max_vqs;
2580		} else {
2581			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2582			 * CVQ gets index 2
2583			 */
2584			mvdev->max_idx = 2;
2585		}
2586	} else {
2587		/* Two data virtqueues only: one for rx and one for tx */
2588		mvdev->max_idx = 1;
2589	}
2590}
2591
2592static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2593{
2594	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2595	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2596	int err;
2597
2598	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2599	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2600	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2601	if (vport)
2602		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2603
2604	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2605	if (err)
2606		return 0;
2607
2608	return MLX5_GET(query_vport_state_out, out, state);
2609}
2610
2611static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2612{
2613	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2614	    VPORT_STATE_UP)
2615		return true;
2616
2617	return false;
2618}
2619
2620static void update_carrier(struct work_struct *work)
2621{
2622	struct mlx5_vdpa_wq_ent *wqent;
2623	struct mlx5_vdpa_dev *mvdev;
2624	struct mlx5_vdpa_net *ndev;
2625
2626	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2627	mvdev = wqent->mvdev;
2628	ndev = to_mlx5_vdpa_ndev(mvdev);
2629	if (get_link_state(mvdev))
2630		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2631	else
2632		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2633
2634	if (ndev->config_cb.callback)
2635		ndev->config_cb.callback(ndev->config_cb.private);
2636
2637	kfree(wqent);
2638}
2639
2640static int queue_link_work(struct mlx5_vdpa_net *ndev)
2641{
2642	struct mlx5_vdpa_wq_ent *wqent;
2643
2644	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2645	if (!wqent)
2646		return -ENOMEM;
2647
2648	wqent->mvdev = &ndev->mvdev;
2649	INIT_WORK(&wqent->work, update_carrier);
2650	queue_work(ndev->mvdev.wq, &wqent->work);
2651	return 0;
2652}
2653
2654static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2655{
2656	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2657	struct mlx5_eqe *eqe = param;
2658	int ret = NOTIFY_DONE;
2659
2660	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2661		switch (eqe->sub_type) {
2662		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2663		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2664			if (queue_link_work(ndev))
2665				return NOTIFY_DONE;
2666
2667			ret = NOTIFY_OK;
2668			break;
2669		default:
2670			return NOTIFY_DONE;
2671		}
2672		return ret;
2673	}
2674	return ret;
2675}
2676
2677static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2678{
2679	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2680		return;
2681
2682	ndev->nb.notifier_call = event_handler;
2683	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2684	ndev->nb_registered = true;
2685	queue_link_work(ndev);
2686}
2687
2688static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2689{
2690	if (!ndev->nb_registered)
2691		return;
2692
2693	ndev->nb_registered = false;
2694	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2695	if (ndev->mvdev.wq)
2696		flush_workqueue(ndev->mvdev.wq);
2697}
2698
2699static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2700{
2701	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2702}
2703
2704static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2705{
2706	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2707	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2708	int err;
2709
2710	print_features(mvdev, features, true);
2711
2712	err = verify_driver_features(mvdev, features);
2713	if (err)
2714		return err;
2715
2716	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2717	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2718		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2719	else
2720		ndev->rqt_size = 1;
2721
2722	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2723	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2724	 *
2725	 * Multiqueue is disabled by default.
2726	 * The driver enables multiqueue by sending a command using class
2727	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2728	 * operation, as follows: ...
2729	 */
2730	ndev->cur_num_vqs = 2;
2731
2732	update_cvq_info(mvdev);
2733	return err;
2734}
2735
2736static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2737{
2738	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2739	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2740
2741	ndev->config_cb = *cb;
2742}
2743
2744#define MLX5_VDPA_MAX_VQ_ENTRIES 256
2745static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2746{
2747	return MLX5_VDPA_MAX_VQ_ENTRIES;
2748}
2749
2750static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2751{
2752	return VIRTIO_ID_NET;
2753}
2754
2755static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2756{
2757	return PCI_VENDOR_ID_MELLANOX;
2758}
2759
2760static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2761{
2762	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2763	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2764
2765	print_status(mvdev, ndev->mvdev.status, false);
2766	return ndev->mvdev.status;
2767}
2768
2769static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2770{
2771	struct mlx5_vq_restore_info *ri = &mvq->ri;
2772	struct mlx5_virtq_attr attr = {};
2773	int err;
2774
2775	if (mvq->initialized) {
2776		err = query_virtqueue(ndev, mvq, &attr);
2777		if (err)
2778			return err;
2779	}
2780
2781	ri->avail_index = attr.available_index;
2782	ri->used_index = attr.used_index;
2783	ri->ready = mvq->ready;
2784	ri->num_ent = mvq->num_ent;
2785	ri->desc_addr = mvq->desc_addr;
2786	ri->device_addr = mvq->device_addr;
2787	ri->driver_addr = mvq->driver_addr;
2788	ri->map = mvq->map;
2789	ri->restore = true;
2790	return 0;
2791}
2792
2793static int save_channels_info(struct mlx5_vdpa_net *ndev)
2794{
2795	int i;
2796
2797	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2798		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2799		save_channel_info(ndev, &ndev->vqs[i]);
2800	}
2801	return 0;
2802}
2803
2804static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2805{
2806	int i;
2807
2808	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2809		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2810}
2811
2812static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2813{
2814	struct mlx5_vdpa_virtqueue *mvq;
2815	struct mlx5_vq_restore_info *ri;
2816	int i;
2817
2818	mlx5_clear_vqs(ndev);
2819	init_mvqs(ndev);
2820	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2821		mvq = &ndev->vqs[i];
2822		ri = &mvq->ri;
2823		if (!ri->restore)
2824			continue;
2825
2826		mvq->avail_idx = ri->avail_index;
2827		mvq->used_idx = ri->used_index;
2828		mvq->ready = ri->ready;
2829		mvq->num_ent = ri->num_ent;
2830		mvq->desc_addr = ri->desc_addr;
2831		mvq->device_addr = ri->device_addr;
2832		mvq->driver_addr = ri->driver_addr;
2833		mvq->map = ri->map;
2834	}
2835}
2836
2837static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2838				struct mlx5_vdpa_mr *new_mr,
2839				unsigned int asid)
2840{
2841	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2842	bool teardown = !is_resumable(ndev);
2843	int err;
2844
2845	suspend_vqs(ndev);
2846	if (teardown) {
2847		err = save_channels_info(ndev);
2848		if (err)
2849			return err;
2850
2851		teardown_driver(ndev);
2852	}
2853
2854	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2855
2856	for (int i = 0; i < ndev->cur_num_vqs; i++)
2857		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
2858						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
2859
2860	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2861		return 0;
2862
2863	if (teardown) {
2864		restore_channels_info(ndev);
2865		err = setup_driver(mvdev);
2866		if (err)
2867			return err;
2868	}
2869
2870	resume_vqs(ndev);
2871
2872	return 0;
2873}
2874
2875/* reslock must be held for this function */
2876static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2877{
2878	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2879	int err;
2880
2881	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2882
2883	if (ndev->setup) {
2884		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2885		err = 0;
2886		goto out;
2887	}
2888	mlx5_vdpa_add_debugfs(ndev);
2889
2890	err = read_umem_params(ndev);
2891	if (err)
2892		goto err_setup;
2893
2894	err = setup_virtqueues(mvdev);
2895	if (err) {
2896		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2897		goto err_setup;
2898	}
2899
2900	err = create_rqt(ndev);
2901	if (err) {
2902		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2903		goto err_rqt;
2904	}
2905
2906	err = create_tir(ndev);
2907	if (err) {
2908		mlx5_vdpa_warn(mvdev, "create_tir\n");
2909		goto err_tir;
2910	}
2911
2912	err = setup_steering(ndev);
2913	if (err) {
2914		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2915		goto err_fwd;
2916	}
2917	ndev->setup = true;
2918
2919	return 0;
2920
2921err_fwd:
2922	destroy_tir(ndev);
2923err_tir:
2924	destroy_rqt(ndev);
2925err_rqt:
2926	teardown_virtqueues(ndev);
2927err_setup:
2928	mlx5_vdpa_remove_debugfs(ndev);
2929out:
2930	return err;
2931}
2932
2933/* reslock must be held for this function */
2934static void teardown_driver(struct mlx5_vdpa_net *ndev)
2935{
2936
2937	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2938
2939	if (!ndev->setup)
2940		return;
2941
2942	mlx5_vdpa_remove_debugfs(ndev);
2943	teardown_steering(ndev);
2944	destroy_tir(ndev);
2945	destroy_rqt(ndev);
2946	teardown_virtqueues(ndev);
2947	ndev->setup = false;
2948}
2949
2950static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2951{
2952	int i;
2953
2954	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2955		ndev->vqs[i].ready = false;
2956		ndev->vqs[i].modified_fields = 0;
2957	}
2958
2959	ndev->mvdev.cvq.ready = false;
2960}
2961
2962static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2963{
2964	struct mlx5_control_vq *cvq = &mvdev->cvq;
2965	int err = 0;
2966
2967	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2968		u16 idx = cvq->vring.last_avail_idx;
2969
2970		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2971					cvq->vring.vring.num, false,
2972					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2973					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2974					(struct vring_used *)(uintptr_t)cvq->device_addr);
2975
2976		if (!err)
2977			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2978	}
2979	return err;
2980}
2981
2982static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2983{
2984	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2985	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2986	int err;
2987
2988	print_status(mvdev, status, true);
2989
2990	down_write(&ndev->reslock);
2991
2992	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2993		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2994			err = setup_cvq_vring(mvdev);
2995			if (err) {
2996				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2997				goto err_setup;
2998			}
2999			register_link_notifier(ndev);
3000			err = setup_driver(mvdev);
3001			if (err) {
3002				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3003				goto err_driver;
3004			}
3005		} else {
3006			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3007			goto err_clear;
3008		}
3009	}
3010
3011	ndev->mvdev.status = status;
3012	up_write(&ndev->reslock);
3013	return;
3014
3015err_driver:
3016	unregister_link_notifier(ndev);
3017err_setup:
3018	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3019	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3020err_clear:
3021	up_write(&ndev->reslock);
3022}
3023
3024static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3025{
3026	int i;
3027
3028	/* default mapping all groups are mapped to asid 0 */
3029	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3030		mvdev->group2asid[i] = 0;
3031}
3032
3033static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3034{
3035	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3036	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3037
3038	print_status(mvdev, 0, true);
3039	mlx5_vdpa_info(mvdev, "performing device reset\n");
3040
3041	down_write(&ndev->reslock);
3042	unregister_link_notifier(ndev);
3043	teardown_driver(ndev);
3044	clear_vqs_ready(ndev);
3045	if (flags & VDPA_RESET_F_CLEAN_MAP)
3046		mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3047	ndev->mvdev.status = 0;
3048	ndev->mvdev.suspended = false;
3049	ndev->cur_num_vqs = 0;
3050	ndev->mvdev.cvq.received_desc = 0;
3051	ndev->mvdev.cvq.completed_desc = 0;
3052	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3053	ndev->mvdev.actual_features = 0;
3054	init_group_to_asid_map(mvdev);
3055	++mvdev->generation;
3056
3057	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3058	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3059		if (mlx5_vdpa_create_dma_mr(mvdev))
3060			mlx5_vdpa_warn(mvdev, "create MR failed\n");
3061	}
3062	up_write(&ndev->reslock);
3063
3064	return 0;
3065}
3066
3067static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3068{
3069	return mlx5_vdpa_compat_reset(vdev, 0);
3070}
3071
3072static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3073{
3074	return sizeof(struct virtio_net_config);
3075}
3076
3077static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3078				 unsigned int len)
3079{
3080	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3081	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3082
3083	if (offset + len <= sizeof(struct virtio_net_config))
3084		memcpy(buf, (u8 *)&ndev->config + offset, len);
3085}
3086
3087static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3088				 unsigned int len)
3089{
3090	/* not supported */
3091}
3092
3093static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3094{
3095	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3096
3097	return mvdev->generation;
3098}
3099
3100static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3101			unsigned int asid)
3102{
3103	struct mlx5_vdpa_mr *new_mr;
3104	int err;
3105
3106	if (asid >= MLX5_VDPA_NUM_AS)
3107		return -EINVAL;
3108
3109	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3110		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3111		if (IS_ERR(new_mr)) {
3112			err = PTR_ERR(new_mr);
3113			mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
3114			return err;
3115		}
3116	} else {
3117		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3118		new_mr = NULL;
3119	}
3120
3121	if (!mvdev->mr[asid]) {
3122		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3123	} else {
3124		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3125		if (err) {
3126			mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
3127			goto out_err;
3128		}
3129	}
3130
3131	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3132
3133out_err:
3134	mlx5_vdpa_put_mr(mvdev, new_mr);
3135	return err;
3136}
3137
3138static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3139			     struct vhost_iotlb *iotlb)
3140{
3141	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3142	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3143	int err = -EINVAL;
3144
3145	down_write(&ndev->reslock);
3146	err = set_map_data(mvdev, iotlb, asid);
3147	up_write(&ndev->reslock);
3148	return err;
3149}
3150
3151static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3152{
3153	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3154	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3155	int err;
3156
3157	down_write(&ndev->reslock);
3158	err = mlx5_vdpa_reset_mr(mvdev, asid);
3159	up_write(&ndev->reslock);
3160	return err;
3161}
3162
3163static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3164{
3165	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3166
3167	if (is_ctrl_vq_idx(mvdev, idx))
3168		return &vdev->dev;
3169
3170	return mvdev->vdev.dma_dev;
3171}
3172
3173static void free_irqs(struct mlx5_vdpa_net *ndev)
3174{
3175	struct mlx5_vdpa_irq_pool_entry *ent;
3176	int i;
3177
3178	if (!msix_mode_supported(&ndev->mvdev))
3179		return;
3180
3181	if (!ndev->irqp.entries)
3182		return;
3183
3184	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3185		ent = ndev->irqp.entries + i;
3186		if (ent->map.virq)
3187			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3188	}
3189	kfree(ndev->irqp.entries);
3190}
3191
3192static void mlx5_vdpa_free(struct vdpa_device *vdev)
3193{
3194	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3195	struct mlx5_core_dev *pfmdev;
3196	struct mlx5_vdpa_net *ndev;
3197
3198	ndev = to_mlx5_vdpa_ndev(mvdev);
3199
3200	free_resources(ndev);
3201	mlx5_vdpa_destroy_mr_resources(mvdev);
3202	if (!is_zero_ether_addr(ndev->config.mac)) {
3203		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3204		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3205	}
3206	mlx5_vdpa_free_resources(&ndev->mvdev);
3207	free_irqs(ndev);
3208	kfree(ndev->event_cbs);
3209	kfree(ndev->vqs);
3210}
3211
3212static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3213{
3214	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3215	struct vdpa_notification_area ret = {};
3216	struct mlx5_vdpa_net *ndev;
3217	phys_addr_t addr;
3218
3219	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3220		return ret;
3221
3222	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3223	 * notification to avoid the risk of mapping pages that contain BAR of more
3224	 * than one SF
3225	 */
3226	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3227		return ret;
3228
3229	ndev = to_mlx5_vdpa_ndev(mvdev);
3230	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3231	ret.addr = addr;
3232	ret.size = PAGE_SIZE;
3233	return ret;
3234}
3235
3236static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3237{
3238	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3239	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3240	struct mlx5_vdpa_virtqueue *mvq;
3241
3242	if (!is_index_valid(mvdev, idx))
3243		return -EINVAL;
3244
3245	if (is_ctrl_vq_idx(mvdev, idx))
3246		return -EOPNOTSUPP;
3247
3248	mvq = &ndev->vqs[idx];
3249	if (!mvq->map.virq)
3250		return -EOPNOTSUPP;
3251
3252	return mvq->map.virq;
3253}
3254
3255static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3256{
3257	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3258
3259	return mvdev->actual_features;
3260}
3261
3262static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3263			     u64 *received_desc, u64 *completed_desc)
3264{
3265	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3266	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3267	void *cmd_hdr;
3268	void *ctx;
3269	int err;
3270
3271	if (!counters_supported(&ndev->mvdev))
3272		return -EOPNOTSUPP;
3273
3274	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3275		return -EAGAIN;
3276
3277	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3278
3279	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3280	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3281	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3282	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3283
3284	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3285	if (err)
3286		return err;
3287
3288	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3289	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3290	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3291	return 0;
3292}
3293
3294static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3295					 struct sk_buff *msg,
3296					 struct netlink_ext_ack *extack)
3297{
3298	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3299	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3300	struct mlx5_vdpa_virtqueue *mvq;
3301	struct mlx5_control_vq *cvq;
3302	u64 received_desc;
3303	u64 completed_desc;
3304	int err = 0;
3305
3306	down_read(&ndev->reslock);
3307	if (!is_index_valid(mvdev, idx)) {
3308		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3309		err = -EINVAL;
3310		goto out_err;
3311	}
3312
3313	if (idx == ctrl_vq_idx(mvdev)) {
3314		cvq = &mvdev->cvq;
3315		received_desc = cvq->received_desc;
3316		completed_desc = cvq->completed_desc;
3317		goto out;
3318	}
3319
3320	mvq = &ndev->vqs[idx];
3321	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3322	if (err) {
3323		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3324		goto out_err;
3325	}
3326
3327out:
3328	err = -EMSGSIZE;
3329	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3330		goto out_err;
3331
3332	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3333			      VDPA_ATTR_PAD))
3334		goto out_err;
3335
3336	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3337		goto out_err;
3338
3339	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3340			      VDPA_ATTR_PAD))
3341		goto out_err;
3342
3343	err = 0;
3344out_err:
3345	up_read(&ndev->reslock);
3346	return err;
3347}
3348
3349static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3350{
3351	struct mlx5_control_vq *cvq;
3352
3353	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3354		return;
3355
3356	cvq = &mvdev->cvq;
3357	cvq->ready = false;
3358}
3359
3360static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3361{
3362	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3363	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3364	struct mlx5_vdpa_virtqueue *mvq;
3365	int i;
3366
3367	mlx5_vdpa_info(mvdev, "suspending device\n");
3368
3369	down_write(&ndev->reslock);
3370	unregister_link_notifier(ndev);
3371	for (i = 0; i < ndev->cur_num_vqs; i++) {
3372		mvq = &ndev->vqs[i];
3373		suspend_vq(ndev, mvq);
3374	}
3375	mlx5_vdpa_cvq_suspend(mvdev);
3376	mvdev->suspended = true;
3377	up_write(&ndev->reslock);
3378	return 0;
3379}
3380
3381static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3382{
3383	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3384	struct mlx5_vdpa_net *ndev;
3385
3386	ndev = to_mlx5_vdpa_ndev(mvdev);
3387
3388	mlx5_vdpa_info(mvdev, "resuming device\n");
3389
3390	down_write(&ndev->reslock);
3391	mvdev->suspended = false;
3392	resume_vqs(ndev);
3393	register_link_notifier(ndev);
3394	up_write(&ndev->reslock);
3395	return 0;
3396}
3397
3398static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3399			       unsigned int asid)
3400{
3401	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3402	int err = 0;
3403
3404	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3405		return -EINVAL;
3406
3407	mvdev->group2asid[group] = asid;
3408
3409	mutex_lock(&mvdev->mr_mtx);
3410	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3411		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3412	mutex_unlock(&mvdev->mr_mtx);
3413
3414	return err;
3415}
3416
3417static const struct vdpa_config_ops mlx5_vdpa_ops = {
3418	.set_vq_address = mlx5_vdpa_set_vq_address,
3419	.set_vq_num = mlx5_vdpa_set_vq_num,
3420	.kick_vq = mlx5_vdpa_kick_vq,
3421	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3422	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3423	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3424	.set_vq_state = mlx5_vdpa_set_vq_state,
3425	.get_vq_state = mlx5_vdpa_get_vq_state,
3426	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3427	.get_vq_notification = mlx5_get_vq_notification,
3428	.get_vq_irq = mlx5_get_vq_irq,
3429	.get_vq_align = mlx5_vdpa_get_vq_align,
3430	.get_vq_group = mlx5_vdpa_get_vq_group,
3431	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3432	.get_device_features = mlx5_vdpa_get_device_features,
3433	.get_backend_features = mlx5_vdpa_get_backend_features,
3434	.set_driver_features = mlx5_vdpa_set_driver_features,
3435	.get_driver_features = mlx5_vdpa_get_driver_features,
3436	.set_config_cb = mlx5_vdpa_set_config_cb,
3437	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3438	.get_device_id = mlx5_vdpa_get_device_id,
3439	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3440	.get_status = mlx5_vdpa_get_status,
3441	.set_status = mlx5_vdpa_set_status,
3442	.reset = mlx5_vdpa_reset,
3443	.compat_reset = mlx5_vdpa_compat_reset,
3444	.get_config_size = mlx5_vdpa_get_config_size,
3445	.get_config = mlx5_vdpa_get_config,
3446	.set_config = mlx5_vdpa_set_config,
3447	.get_generation = mlx5_vdpa_get_generation,
3448	.set_map = mlx5_vdpa_set_map,
3449	.reset_map = mlx5_vdpa_reset_map,
3450	.set_group_asid = mlx5_set_group_asid,
3451	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3452	.free = mlx5_vdpa_free,
3453	.suspend = mlx5_vdpa_suspend,
3454	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3455};
3456
3457static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3458{
3459	u16 hw_mtu;
3460	int err;
3461
3462	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3463	if (err)
3464		return err;
3465
3466	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3467	return 0;
3468}
3469
3470static int alloc_resources(struct mlx5_vdpa_net *ndev)
3471{
3472	struct mlx5_vdpa_net_resources *res = &ndev->res;
3473	int err;
3474
3475	if (res->valid) {
3476		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3477		return -EEXIST;
3478	}
3479
3480	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3481	if (err)
3482		return err;
3483
3484	err = create_tis(ndev);
3485	if (err)
3486		goto err_tis;
3487
3488	res->valid = true;
3489
3490	return 0;
3491
3492err_tis:
3493	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3494	return err;
3495}
3496
3497static void free_resources(struct mlx5_vdpa_net *ndev)
3498{
3499	struct mlx5_vdpa_net_resources *res = &ndev->res;
3500
3501	if (!res->valid)
3502		return;
3503
3504	destroy_tis(ndev);
3505	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3506	res->valid = false;
3507}
3508
3509static void init_mvqs(struct mlx5_vdpa_net *ndev)
3510{
3511	struct mlx5_vdpa_virtqueue *mvq;
3512	int i;
3513
3514	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3515		mvq = &ndev->vqs[i];
3516		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3517		mvq->index = i;
3518		mvq->ndev = ndev;
3519		mvq->fwqp.fw = true;
3520		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3521	}
3522	for (; i < ndev->mvdev.max_vqs; i++) {
3523		mvq = &ndev->vqs[i];
3524		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3525		mvq->index = i;
3526		mvq->ndev = ndev;
3527	}
3528}
3529
3530struct mlx5_vdpa_mgmtdev {
3531	struct vdpa_mgmt_dev mgtdev;
3532	struct mlx5_adev *madev;
3533	struct mlx5_vdpa_net *ndev;
3534	struct vdpa_config_ops vdpa_ops;
3535};
3536
3537static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3538{
3539	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3540	void *in;
3541	int err;
3542
3543	in = kvzalloc(inlen, GFP_KERNEL);
3544	if (!in)
3545		return -ENOMEM;
3546
3547	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3548	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3549		 mtu + MLX5V_ETH_HARD_MTU);
3550	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3551		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3552
3553	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3554
3555	kvfree(in);
3556	return err;
3557}
3558
3559static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3560{
3561	struct mlx5_vdpa_irq_pool_entry *ent;
3562	int i;
3563
3564	if (!msix_mode_supported(&ndev->mvdev))
3565		return;
3566
3567	if (!ndev->mvdev.mdev->pdev)
3568		return;
3569
3570	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3571	if (!ndev->irqp.entries)
3572		return;
3573
3574
3575	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3576		ent = ndev->irqp.entries + i;
3577		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3578			 dev_name(&ndev->mvdev.vdev.dev), i);
3579		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3580		if (!ent->map.virq)
3581			return;
3582
3583		ndev->irqp.num_ent++;
3584	}
3585}
3586
3587static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3588			     const struct vdpa_dev_set_config *add_config)
3589{
3590	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3591	struct virtio_net_config *config;
3592	struct mlx5_core_dev *pfmdev;
3593	struct mlx5_vdpa_dev *mvdev;
3594	struct mlx5_vdpa_net *ndev;
3595	struct mlx5_core_dev *mdev;
3596	u64 device_features;
3597	u32 max_vqs;
3598	u16 mtu;
3599	int err;
3600
3601	if (mgtdev->ndev)
3602		return -ENOSPC;
3603
3604	mdev = mgtdev->madev->mdev;
3605	device_features = mgtdev->mgtdev.supported_features;
3606	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3607		if (add_config->device_features & ~device_features) {
3608			dev_warn(mdev->device,
3609				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3610				 add_config->device_features, device_features);
3611			return -EINVAL;
3612		}
3613		device_features &= add_config->device_features;
3614	} else {
3615		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3616	}
3617	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3618	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3619		dev_warn(mdev->device,
3620			 "Must provision minimum features 0x%llx for this device",
3621			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3622		return -EOPNOTSUPP;
3623	}
3624
3625	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3626	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3627		dev_warn(mdev->device, "missing support for split virtqueues\n");
3628		return -EOPNOTSUPP;
3629	}
3630
3631	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3632			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3633	if (max_vqs < 2) {
3634		dev_warn(mdev->device,
3635			 "%d virtqueues are supported. At least 2 are required\n",
3636			 max_vqs);
3637		return -EAGAIN;
3638	}
3639
3640	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3641		if (add_config->net.max_vq_pairs > max_vqs / 2)
3642			return -EINVAL;
3643		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3644	} else {
3645		max_vqs = 2;
3646	}
3647
3648	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3649				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3650	if (IS_ERR(ndev))
3651		return PTR_ERR(ndev);
3652
3653	ndev->mvdev.max_vqs = max_vqs;
3654	mvdev = &ndev->mvdev;
3655	mvdev->mdev = mdev;
3656
3657	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3658	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3659	if (!ndev->vqs || !ndev->event_cbs) {
3660		err = -ENOMEM;
3661		goto err_alloc;
3662	}
3663
3664	init_mvqs(ndev);
3665	allocate_irqs(ndev);
3666	init_rwsem(&ndev->reslock);
3667	config = &ndev->config;
3668
3669	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3670		err = config_func_mtu(mdev, add_config->net.mtu);
3671		if (err)
3672			goto err_alloc;
3673	}
3674
3675	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3676		err = query_mtu(mdev, &mtu);
3677		if (err)
3678			goto err_alloc;
3679
3680		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3681	}
3682
3683	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3684		if (get_link_state(mvdev))
3685			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3686		else
3687			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3688	}
3689
3690	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3691		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3692	/* No bother setting mac address in config if not going to provision _F_MAC */
3693	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3694		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3695		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3696		if (err)
3697			goto err_alloc;
3698	}
3699
3700	if (!is_zero_ether_addr(config->mac)) {
3701		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3702		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3703		if (err)
3704			goto err_alloc;
3705	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3706		/*
3707		 * We used to clear _F_MAC feature bit if seeing
3708		 * zero mac address when device features are not
3709		 * specifically provisioned. Keep the behaviour
3710		 * so old scripts do not break.
3711		 */
3712		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3713	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3714		/* Don't provision zero mac address for _F_MAC */
3715		mlx5_vdpa_warn(&ndev->mvdev,
3716			       "No mac address provisioned?\n");
3717		err = -EINVAL;
3718		goto err_alloc;
3719	}
3720
3721	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3722		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3723
3724	ndev->mvdev.mlx_features = device_features;
3725	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3726	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3727	if (err)
3728		goto err_mpfs;
3729
3730	INIT_LIST_HEAD(&mvdev->mr_list_head);
3731
3732	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3733		err = mlx5_vdpa_create_dma_mr(mvdev);
3734		if (err)
3735			goto err_res;
3736	}
3737
3738	err = alloc_resources(ndev);
3739	if (err)
3740		goto err_mr;
3741
3742	ndev->cvq_ent.mvdev = mvdev;
3743	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3744	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3745	if (!mvdev->wq) {
3746		err = -ENOMEM;
3747		goto err_res2;
3748	}
3749
3750	mvdev->vdev.mdev = &mgtdev->mgtdev;
3751	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3752	if (err)
3753		goto err_reg;
3754
3755	mgtdev->ndev = ndev;
3756	return 0;
3757
3758err_reg:
3759	destroy_workqueue(mvdev->wq);
3760err_res2:
3761	free_resources(ndev);
3762err_mr:
3763	mlx5_vdpa_destroy_mr_resources(mvdev);
3764err_res:
3765	mlx5_vdpa_free_resources(&ndev->mvdev);
3766err_mpfs:
3767	if (!is_zero_ether_addr(config->mac))
3768		mlx5_mpfs_del_mac(pfmdev, config->mac);
3769err_alloc:
3770	put_device(&mvdev->vdev.dev);
3771	return err;
3772}
3773
3774static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3775{
3776	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3777	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3778	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3779	struct workqueue_struct *wq;
3780
3781	unregister_link_notifier(ndev);
3782	_vdpa_unregister_device(dev);
3783	wq = mvdev->wq;
3784	mvdev->wq = NULL;
3785	destroy_workqueue(wq);
3786	mgtdev->ndev = NULL;
3787}
3788
3789static const struct vdpa_mgmtdev_ops mdev_ops = {
3790	.dev_add = mlx5_vdpa_dev_add,
3791	.dev_del = mlx5_vdpa_dev_del,
3792};
3793
3794static struct virtio_device_id id_table[] = {
3795	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3796	{ 0 },
3797};
3798
3799static int mlx5v_probe(struct auxiliary_device *adev,
3800		       const struct auxiliary_device_id *id)
3801
3802{
3803	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3804	struct mlx5_core_dev *mdev = madev->mdev;
3805	struct mlx5_vdpa_mgmtdev *mgtdev;
3806	int err;
3807
3808	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3809	if (!mgtdev)
3810		return -ENOMEM;
3811
3812	mgtdev->mgtdev.ops = &mdev_ops;
3813	mgtdev->mgtdev.device = mdev->device;
3814	mgtdev->mgtdev.id_table = id_table;
3815	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3816					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3817					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3818					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3819	mgtdev->mgtdev.max_supported_vqs =
3820		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3821	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3822	mgtdev->madev = madev;
3823	mgtdev->vdpa_ops = mlx5_vdpa_ops;
3824
3825	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3826		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3827
3828	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
3829		mgtdev->vdpa_ops.resume = NULL;
3830
3831	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3832	if (err)
3833		goto reg_err;
3834
3835	auxiliary_set_drvdata(adev, mgtdev);
3836
3837	return 0;
3838
3839reg_err:
3840	kfree(mgtdev);
3841	return err;
3842}
3843
3844static void mlx5v_remove(struct auxiliary_device *adev)
3845{
3846	struct mlx5_vdpa_mgmtdev *mgtdev;
3847
3848	mgtdev = auxiliary_get_drvdata(adev);
3849	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3850	kfree(mgtdev);
3851}
3852
3853static const struct auxiliary_device_id mlx5v_id_table[] = {
3854	{ .name = MLX5_ADEV_NAME ".vnet", },
3855	{},
3856};
3857
3858MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3859
3860static struct auxiliary_driver mlx5v_driver = {
3861	.name = "vnet",
3862	.probe = mlx5v_probe,
3863	.remove = mlx5v_remove,
3864	.id_table = mlx5v_id_table,
3865};
3866
3867module_auxiliary_driver(mlx5v_driver);
3868