1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4 */
5
6#include "mlx5_ib.h"
7#include <linux/mlx5/eswitch.h>
8#include <linux/mlx5/vport.h>
9#include "counters.h"
10#include "ib_rep.h"
11#include "qp.h"
12
13struct mlx5_ib_counter {
14	const char *name;
15	size_t offset;
16	u32 type;
17};
18
19#define INIT_Q_COUNTER(_name)		\
20	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
21
22#define INIT_VPORT_Q_COUNTER(_name)		\
23	{ .name = "vport_" #_name, .offset =	\
24		MLX5_BYTE_OFF(query_q_counter_out, _name)}
25
26static const struct mlx5_ib_counter basic_q_cnts[] = {
27	INIT_Q_COUNTER(rx_write_requests),
28	INIT_Q_COUNTER(rx_read_requests),
29	INIT_Q_COUNTER(rx_atomic_requests),
30	INIT_Q_COUNTER(rx_dct_connect),
31	INIT_Q_COUNTER(out_of_buffer),
32};
33
34static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
35	INIT_Q_COUNTER(out_of_sequence),
36};
37
38static const struct mlx5_ib_counter retrans_q_cnts[] = {
39	INIT_Q_COUNTER(duplicate_request),
40	INIT_Q_COUNTER(rnr_nak_retry_err),
41	INIT_Q_COUNTER(packet_seq_err),
42	INIT_Q_COUNTER(implied_nak_seq_err),
43	INIT_Q_COUNTER(local_ack_timeout_err),
44};
45
46static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
47	INIT_VPORT_Q_COUNTER(rx_write_requests),
48	INIT_VPORT_Q_COUNTER(rx_read_requests),
49	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
50	INIT_VPORT_Q_COUNTER(rx_dct_connect),
51	INIT_VPORT_Q_COUNTER(out_of_buffer),
52};
53
54static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
55	INIT_VPORT_Q_COUNTER(out_of_sequence),
56};
57
58static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
59	INIT_VPORT_Q_COUNTER(duplicate_request),
60	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
61	INIT_VPORT_Q_COUNTER(packet_seq_err),
62	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
63	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
64};
65
66#define INIT_CONG_COUNTER(_name)		\
67	{ .name = #_name, .offset =	\
68		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
69
70static const struct mlx5_ib_counter cong_cnts[] = {
71	INIT_CONG_COUNTER(rp_cnp_ignored),
72	INIT_CONG_COUNTER(rp_cnp_handled),
73	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
74	INIT_CONG_COUNTER(np_cnp_sent),
75};
76
77static const struct mlx5_ib_counter extended_err_cnts[] = {
78	INIT_Q_COUNTER(resp_local_length_error),
79	INIT_Q_COUNTER(resp_cqe_error),
80	INIT_Q_COUNTER(req_cqe_error),
81	INIT_Q_COUNTER(req_remote_invalid_request),
82	INIT_Q_COUNTER(req_remote_access_errors),
83	INIT_Q_COUNTER(resp_remote_access_errors),
84	INIT_Q_COUNTER(resp_cqe_flush_error),
85	INIT_Q_COUNTER(req_cqe_flush_error),
86};
87
88static const struct mlx5_ib_counter roce_accl_cnts[] = {
89	INIT_Q_COUNTER(roce_adp_retrans),
90	INIT_Q_COUNTER(roce_adp_retrans_to),
91	INIT_Q_COUNTER(roce_slow_restart),
92	INIT_Q_COUNTER(roce_slow_restart_cnps),
93	INIT_Q_COUNTER(roce_slow_restart_trans),
94};
95
96static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
97	INIT_VPORT_Q_COUNTER(resp_local_length_error),
98	INIT_VPORT_Q_COUNTER(resp_cqe_error),
99	INIT_VPORT_Q_COUNTER(req_cqe_error),
100	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
101	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
102	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
103	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
104	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
105};
106
107static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
108	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
109	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
110	INIT_VPORT_Q_COUNTER(roce_slow_restart),
111	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
112	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
113};
114
115#define INIT_EXT_PPCNT_COUNTER(_name)		\
116	{ .name = #_name, .offset =	\
117	MLX5_BYTE_OFF(ppcnt_reg, \
118		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
119
120static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
121	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
122};
123
124#define INIT_OP_COUNTER(_name, _type)		\
125	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
126
127static const struct mlx5_ib_counter basic_op_cnts[] = {
128	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
129};
130
131static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
132	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
133};
134
135static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
136	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
137};
138
139static int mlx5_ib_read_counters(struct ib_counters *counters,
140				 struct ib_counters_read_attr *read_attr,
141				 struct uverbs_attr_bundle *attrs)
142{
143	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
144	struct mlx5_read_counters_attr mread_attr = {};
145	struct mlx5_ib_flow_counters_desc *desc;
146	int ret, i;
147
148	mutex_lock(&mcounters->mcntrs_mutex);
149	if (mcounters->cntrs_max_index > read_attr->ncounters) {
150		ret = -EINVAL;
151		goto err_bound;
152	}
153
154	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
155				 GFP_KERNEL);
156	if (!mread_attr.out) {
157		ret = -ENOMEM;
158		goto err_bound;
159	}
160
161	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
162	mread_attr.flags = read_attr->flags;
163	ret = mcounters->read_counters(counters->device, &mread_attr);
164	if (ret)
165		goto err_read;
166
167	/* do the pass over the counters data array to assign according to the
168	 * descriptions and indexing pairs
169	 */
170	desc = mcounters->counters_data;
171	for (i = 0; i < mcounters->ncounters; i++)
172		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
173
174err_read:
175	kfree(mread_attr.out);
176err_bound:
177	mutex_unlock(&mcounters->mcntrs_mutex);
178	return ret;
179}
180
181static int mlx5_ib_destroy_counters(struct ib_counters *counters)
182{
183	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
184
185	mlx5_ib_counters_clear_description(counters);
186	if (mcounters->hw_cntrs_hndl)
187		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
188				mcounters->hw_cntrs_hndl);
189	return 0;
190}
191
192static int mlx5_ib_create_counters(struct ib_counters *counters,
193				   struct uverbs_attr_bundle *attrs)
194{
195	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
196
197	mutex_init(&mcounters->mcntrs_mutex);
198	return 0;
199}
200
201static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
202{
203	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
204	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
205}
206
207static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
208						   u32 port_num)
209{
210	if ((is_mdev_switchdev_mode(dev->mdev) &&
211	     !vport_qcounters_supported(dev)) || !port_num)
212		return &dev->port[0].cnts;
213
214	return is_mdev_switchdev_mode(dev->mdev) ?
215	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
216}
217
218/**
219 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
220 * @dev:	Pointer to mlx5 IB device
221 * @port_num:	Zero based port number
222 *
223 * mlx5_ib_get_counters_id() Returns counters set id to use for given
224 * device port combination in switchdev and non switchdev mode of the
225 * parent device.
226 */
227u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
228{
229	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
230
231	return cnts->set_id;
232}
233
234static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
235{
236	struct rdma_hw_stats *stats;
237	u32 num_hw_counters;
238	int i;
239
240	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
241			  cnts->num_ext_ppcnt_counters;
242	stats = rdma_alloc_hw_stats_struct(cnts->descs,
243					   num_hw_counters +
244					   cnts->num_op_counters,
245					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
246	if (!stats)
247		return NULL;
248
249	for (i = 0; i < cnts->num_op_counters; i++)
250		set_bit(num_hw_counters + i, stats->is_disabled);
251
252	return stats;
253}
254
255static struct rdma_hw_stats *
256mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
257{
258	struct mlx5_ib_dev *dev = to_mdev(ibdev);
259	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
260
261	return do_alloc_stats(cnts);
262}
263
264static struct rdma_hw_stats *
265mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
266{
267	struct mlx5_ib_dev *dev = to_mdev(ibdev);
268	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
269
270	return do_alloc_stats(cnts);
271}
272
273static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
274				    const struct mlx5_ib_counters *cnts,
275				    struct rdma_hw_stats *stats,
276				    u16 set_id)
277{
278	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
279	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
280	__be32 val;
281	int ret, i;
282
283	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
284	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
285	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
286	if (ret)
287		return ret;
288
289	for (i = 0; i < cnts->num_q_counters; i++) {
290		val = *(__be32 *)((void *)out + cnts->offsets[i]);
291		stats->value[i] = (u64)be32_to_cpu(val);
292	}
293
294	return 0;
295}
296
297static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
298					    const struct mlx5_ib_counters *cnts,
299					    struct rdma_hw_stats *stats)
300{
301	int offset = cnts->num_q_counters + cnts->num_cong_counters;
302	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
303	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
304	int ret, i;
305	void *out;
306
307	out = kvzalloc(sz, GFP_KERNEL);
308	if (!out)
309		return -ENOMEM;
310
311	MLX5_SET(ppcnt_reg, in, local_port, 1);
312	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
313	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
314				   0, 0);
315	if (ret)
316		goto free;
317
318	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
319		stats->value[i + offset] =
320			be64_to_cpup((__be64 *)(out +
321				    cnts->offsets[i + offset]));
322free:
323	kvfree(out);
324	return ret;
325}
326
327static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
328					  u32 port_num,
329					  const struct mlx5_ib_counters *cnts,
330					  struct rdma_hw_stats *stats)
331
332{
333	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
334	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
335	struct mlx5_core_dev *mdev;
336	__be32 val;
337	int ret, i;
338
339	if (!dev->port[port_num].rep ||
340	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
341		return 0;
342
343	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
344	if (!mdev)
345		return -EOPNOTSUPP;
346
347	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
348	MLX5_SET(query_q_counter_in, in, other_vport, 1);
349	MLX5_SET(query_q_counter_in, in, vport_number,
350		 dev->port[port_num].rep->vport);
351	MLX5_SET(query_q_counter_in, in, aggregate, 1);
352	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
353	if (ret)
354		return ret;
355
356	for (i = 0; i < cnts->num_q_counters; i++) {
357		val = *(__be32 *)((void *)out + cnts->offsets[i]);
358		stats->value[i] = (u64)be32_to_cpu(val);
359	}
360
361	return 0;
362}
363
364static int do_get_hw_stats(struct ib_device *ibdev,
365			   struct rdma_hw_stats *stats,
366			   u32 port_num, int index)
367{
368	struct mlx5_ib_dev *dev = to_mdev(ibdev);
369	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
370	struct mlx5_core_dev *mdev;
371	int ret, num_counters;
372
373	if (!stats)
374		return -EINVAL;
375
376	num_counters = cnts->num_q_counters +
377		       cnts->num_cong_counters +
378		       cnts->num_ext_ppcnt_counters;
379
380	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
381		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
382						     stats);
383	else
384		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
385					       cnts->set_id);
386	if (ret)
387		return ret;
388
389	/* We don't expose device counters over Vports */
390	if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
391		goto done;
392
393	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
394		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
395		if (ret)
396			return ret;
397	}
398
399	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
400		if (!port_num)
401			port_num = 1;
402		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
403		if (!mdev) {
404			/* If port is not affiliated yet, its in down state
405			 * which doesn't have any counters yet, so it would be
406			 * zero. So no need to read from the HCA.
407			 */
408			goto done;
409		}
410		ret = mlx5_lag_query_cong_counters(dev->mdev,
411						   stats->value +
412						   cnts->num_q_counters,
413						   cnts->num_cong_counters,
414						   cnts->offsets +
415						   cnts->num_q_counters);
416
417		mlx5_ib_put_native_port_mdev(dev, port_num);
418		if (ret)
419			return ret;
420	}
421
422done:
423	return num_counters;
424}
425
426static int do_get_op_stat(struct ib_device *ibdev,
427			  struct rdma_hw_stats *stats,
428			  u32 port_num, int index)
429{
430	struct mlx5_ib_dev *dev = to_mdev(ibdev);
431	const struct mlx5_ib_counters *cnts;
432	const struct mlx5_ib_op_fc *opfcs;
433	u64 packets = 0, bytes;
434	u32 type;
435	int ret;
436
437	cnts = get_counters(dev, port_num);
438
439	opfcs = cnts->opfcs;
440	type = *(u32 *)cnts->descs[index].priv;
441	if (type >= MLX5_IB_OPCOUNTER_MAX)
442		return -EINVAL;
443
444	if (!opfcs[type].fc)
445		goto out;
446
447	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
448			    &packets, &bytes);
449	if (ret)
450		return ret;
451
452out:
453	stats->value[index] = packets;
454	return index;
455}
456
457static int do_get_op_stats(struct ib_device *ibdev,
458			   struct rdma_hw_stats *stats,
459			   u32 port_num)
460{
461	struct mlx5_ib_dev *dev = to_mdev(ibdev);
462	const struct mlx5_ib_counters *cnts;
463	int index, ret, num_hw_counters;
464
465	cnts = get_counters(dev, port_num);
466	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
467			  cnts->num_ext_ppcnt_counters;
468	for (index = num_hw_counters;
469	     index < (num_hw_counters + cnts->num_op_counters); index++) {
470		ret = do_get_op_stat(ibdev, stats, port_num, index);
471		if (ret != index)
472			return ret;
473	}
474
475	return cnts->num_op_counters;
476}
477
478static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
479				struct rdma_hw_stats *stats,
480				u32 port_num, int index)
481{
482	int num_counters, num_hw_counters, num_op_counters;
483	struct mlx5_ib_dev *dev = to_mdev(ibdev);
484	const struct mlx5_ib_counters *cnts;
485
486	cnts = get_counters(dev, port_num);
487	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
488		cnts->num_ext_ppcnt_counters;
489	num_counters = num_hw_counters + cnts->num_op_counters;
490
491	if (index < 0 || index > num_counters)
492		return -EINVAL;
493	else if (index > 0 && index < num_hw_counters)
494		return do_get_hw_stats(ibdev, stats, port_num, index);
495	else if (index >= num_hw_counters && index < num_counters)
496		return do_get_op_stat(ibdev, stats, port_num, index);
497
498	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
499	if (num_hw_counters < 0)
500		return num_hw_counters;
501
502	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
503	if (num_op_counters < 0)
504		return num_op_counters;
505
506	return num_hw_counters + num_op_counters;
507}
508
509static struct rdma_hw_stats *
510mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
511{
512	struct mlx5_ib_dev *dev = to_mdev(counter->device);
513	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
514
515	return do_alloc_stats(cnts);
516}
517
518static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
519{
520	struct mlx5_ib_dev *dev = to_mdev(counter->device);
521	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
522
523	return mlx5_ib_query_q_counters(dev->mdev, cnts,
524					counter->stats, counter->id);
525}
526
527static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
528{
529	struct mlx5_ib_dev *dev = to_mdev(counter->device);
530	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
531
532	if (!counter->id)
533		return 0;
534
535	MLX5_SET(dealloc_q_counter_in, in, opcode,
536		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
537	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
538	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
539}
540
541static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
542				   struct ib_qp *qp)
543{
544	struct mlx5_ib_dev *dev = to_mdev(qp->device);
545	int err;
546
547	if (!counter->id) {
548		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
549		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
550
551		MLX5_SET(alloc_q_counter_in, in, opcode,
552			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
553		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
554		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
555		if (err)
556			return err;
557		counter->id =
558			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
559	}
560
561	err = mlx5_ib_qp_set_counter(qp, counter);
562	if (err)
563		goto fail_set_counter;
564
565	return 0;
566
567fail_set_counter:
568	mlx5_ib_counter_dealloc(counter);
569	counter->id = 0;
570
571	return err;
572}
573
574static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
575{
576	return mlx5_ib_qp_set_counter(qp, NULL);
577}
578
579static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
580				  struct rdma_stat_desc *descs, size_t *offsets,
581				  u32 port_num)
582{
583	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
584			port_num != MLX5_VPORT_PF;
585	const struct mlx5_ib_counter *names;
586	int j = 0, i, size;
587
588	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
589	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
590			  ARRAY_SIZE(basic_q_cnts);
591	for (i = 0; i < size; i++, j++) {
592		descs[j].name = names[i].name;
593		offsets[j] = names[i].offset;
594	}
595
596	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
597	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
598			  ARRAY_SIZE(out_of_seq_q_cnts);
599	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
600		for (i = 0; i < size; i++, j++) {
601			descs[j].name = names[i].name;
602			offsets[j] = names[i].offset;
603		}
604	}
605
606	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
607	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
608			  ARRAY_SIZE(retrans_q_cnts);
609	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
610		for (i = 0; i < size; i++, j++) {
611			descs[j].name = names[i].name;
612			offsets[j] = names[i].offset;
613		}
614	}
615
616	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
617	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
618			  ARRAY_SIZE(extended_err_cnts);
619	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
620		for (i = 0; i < size; i++, j++) {
621			descs[j].name = names[i].name;
622			offsets[j] = names[i].offset;
623		}
624	}
625
626	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
627	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
628			  ARRAY_SIZE(roce_accl_cnts);
629	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
630		for (i = 0; i < size; i++, j++) {
631			descs[j].name = names[i].name;
632			offsets[j] = names[i].offset;
633		}
634	}
635
636	if (is_vport)
637		return;
638
639	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
640		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
641			descs[j].name = cong_cnts[i].name;
642			offsets[j] = cong_cnts[i].offset;
643		}
644	}
645
646	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
647		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
648			descs[j].name = ext_ppcnt_cnts[i].name;
649			offsets[j] = ext_ppcnt_cnts[i].offset;
650		}
651	}
652
653	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
654		descs[j].name = basic_op_cnts[i].name;
655		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
656		descs[j].priv = &basic_op_cnts[i].type;
657	}
658
659	if (MLX5_CAP_FLOWTABLE(dev->mdev,
660			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
661		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
662			descs[j].name = rdmarx_cnp_op_cnts[i].name;
663			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
664			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
665		}
666	}
667
668	if (MLX5_CAP_FLOWTABLE(dev->mdev,
669			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
670		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
671			descs[j].name = rdmatx_cnp_op_cnts[i].name;
672			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
673			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
674		}
675	}
676}
677
678
679static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
680				    struct mlx5_ib_counters *cnts, u32 port_num)
681{
682	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
683			port_num != MLX5_VPORT_PF;
684	u32 num_counters, num_op_counters = 0, size;
685
686	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
687			  ARRAY_SIZE(basic_q_cnts);
688	num_counters = size;
689
690	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
691			  ARRAY_SIZE(out_of_seq_q_cnts);
692	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
693		num_counters += size;
694
695	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
696			  ARRAY_SIZE(retrans_q_cnts);
697	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
698		num_counters += size;
699
700	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
701			  ARRAY_SIZE(extended_err_cnts);
702	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
703		num_counters += size;
704
705	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
706			  ARRAY_SIZE(roce_accl_cnts);
707	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
708		num_counters += size;
709
710	cnts->num_q_counters = num_counters;
711
712	if (is_vport)
713		goto skip_non_qcounters;
714
715	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
716		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
717		num_counters += ARRAY_SIZE(cong_cnts);
718	}
719	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
720		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
721		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
722	}
723
724	num_op_counters = ARRAY_SIZE(basic_op_cnts);
725
726	if (MLX5_CAP_FLOWTABLE(dev->mdev,
727			       ft_field_support_2_nic_receive_rdma.bth_opcode))
728		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
729
730	if (MLX5_CAP_FLOWTABLE(dev->mdev,
731			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
732		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
733
734skip_non_qcounters:
735	cnts->num_op_counters = num_op_counters;
736	num_counters += num_op_counters;
737	cnts->descs = kcalloc(num_counters,
738			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
739	if (!cnts->descs)
740		return -ENOMEM;
741
742	cnts->offsets = kcalloc(num_counters,
743				sizeof(*cnts->offsets), GFP_KERNEL);
744	if (!cnts->offsets)
745		goto err;
746
747	return 0;
748
749err:
750	kfree(cnts->descs);
751	cnts->descs = NULL;
752	return -ENOMEM;
753}
754
755static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
756{
757	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
758	int num_cnt_ports = dev->num_ports;
759	int i, j;
760
761	if (is_mdev_switchdev_mode(dev->mdev))
762		num_cnt_ports = min(2, num_cnt_ports);
763
764	MLX5_SET(dealloc_q_counter_in, in, opcode,
765		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
766
767	for (i = 0; i < num_cnt_ports; i++) {
768		if (dev->port[i].cnts.set_id) {
769			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
770				 dev->port[i].cnts.set_id);
771			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
772		}
773		kfree(dev->port[i].cnts.descs);
774		kfree(dev->port[i].cnts.offsets);
775
776		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
777			if (!dev->port[i].cnts.opfcs[j].fc)
778				continue;
779
780			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
781				mlx5_ib_fs_remove_op_fc(dev,
782					&dev->port[i].cnts.opfcs[j], j);
783			mlx5_fc_destroy(dev->mdev,
784					dev->port[i].cnts.opfcs[j].fc);
785			dev->port[i].cnts.opfcs[j].fc = NULL;
786		}
787	}
788}
789
790static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
791{
792	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
793	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
794	int num_cnt_ports = dev->num_ports;
795	int err = 0;
796	int i;
797	bool is_shared;
798
799	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
800	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
801
802	/*
803	 * In switchdev we need to allocate two ports, one that is used for
804	 * the device Q_counters and it is essentially the real Q_counters of
805	 * this device, while the other is used as a helper for PF to be able to
806	 * query all other vports.
807	 */
808	if (is_mdev_switchdev_mode(dev->mdev))
809		num_cnt_ports = min(2, num_cnt_ports);
810
811	for (i = 0; i < num_cnt_ports; i++) {
812		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
813		if (err)
814			goto err_alloc;
815
816		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
817				      dev->port[i].cnts.offsets, i);
818
819		MLX5_SET(alloc_q_counter_in, in, uid,
820			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
821
822		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
823		if (err) {
824			mlx5_ib_warn(dev,
825				     "couldn't allocate queue counter for port %d, err %d\n",
826				     i + 1, err);
827			goto err_alloc;
828		}
829
830		dev->port[i].cnts.set_id =
831			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
832	}
833	return 0;
834
835err_alloc:
836	mlx5_ib_dealloc_counters(dev);
837	return err;
838}
839
840static int read_flow_counters(struct ib_device *ibdev,
841			      struct mlx5_read_counters_attr *read_attr)
842{
843	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
844	struct mlx5_ib_dev *dev = to_mdev(ibdev);
845
846	return mlx5_fc_query(dev->mdev, fc,
847			     &read_attr->out[IB_COUNTER_PACKETS],
848			     &read_attr->out[IB_COUNTER_BYTES]);
849}
850
851/* flow counters currently expose two counters packets and bytes */
852#define FLOW_COUNTERS_NUM 2
853static int counters_set_description(
854	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
855	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
856{
857	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
858	u32 cntrs_max_index = 0;
859	int i;
860
861	if (counters_type != MLX5_IB_COUNTERS_FLOW)
862		return -EINVAL;
863
864	/* init the fields for the object */
865	mcounters->type = counters_type;
866	mcounters->read_counters = read_flow_counters;
867	mcounters->counters_num = FLOW_COUNTERS_NUM;
868	mcounters->ncounters = ncounters;
869	/* each counter entry have both description and index pair */
870	for (i = 0; i < ncounters; i++) {
871		if (desc_data[i].description > IB_COUNTER_BYTES)
872			return -EINVAL;
873
874		if (cntrs_max_index <= desc_data[i].index)
875			cntrs_max_index = desc_data[i].index + 1;
876	}
877
878	mutex_lock(&mcounters->mcntrs_mutex);
879	mcounters->counters_data = desc_data;
880	mcounters->cntrs_max_index = cntrs_max_index;
881	mutex_unlock(&mcounters->mcntrs_mutex);
882
883	return 0;
884}
885
886#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
887int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
888				   struct mlx5_ib_create_flow *ucmd)
889{
890	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
891	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
892	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
893	bool hw_hndl = false;
894	int ret = 0;
895
896	if (ucmd && ucmd->ncounters_data != 0) {
897		cntrs_data = ucmd->data;
898		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
899			return -EINVAL;
900
901		desc_data = kcalloc(cntrs_data->ncounters,
902				    sizeof(*desc_data),
903				    GFP_KERNEL);
904		if (!desc_data)
905			return  -ENOMEM;
906
907		if (copy_from_user(desc_data,
908				   u64_to_user_ptr(cntrs_data->counters_data),
909				   sizeof(*desc_data) * cntrs_data->ncounters)) {
910			ret = -EFAULT;
911			goto free;
912		}
913	}
914
915	if (!mcounters->hw_cntrs_hndl) {
916		mcounters->hw_cntrs_hndl = mlx5_fc_create(
917			to_mdev(ibcounters->device)->mdev, false);
918		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
919			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
920			goto free;
921		}
922		hw_hndl = true;
923	}
924
925	if (desc_data) {
926		/* counters already bound to at least one flow */
927		if (mcounters->cntrs_max_index) {
928			ret = -EINVAL;
929			goto free_hndl;
930		}
931
932		ret = counters_set_description(ibcounters,
933					       MLX5_IB_COUNTERS_FLOW,
934					       desc_data,
935					       cntrs_data->ncounters);
936		if (ret)
937			goto free_hndl;
938
939	} else if (!mcounters->cntrs_max_index) {
940		/* counters not bound yet, must have udata passed */
941		ret = -EINVAL;
942		goto free_hndl;
943	}
944
945	return 0;
946
947free_hndl:
948	if (hw_hndl) {
949		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
950				mcounters->hw_cntrs_hndl);
951		mcounters->hw_cntrs_hndl = NULL;
952	}
953free:
954	kfree(desc_data);
955	return ret;
956}
957
958void mlx5_ib_counters_clear_description(struct ib_counters *counters)
959{
960	struct mlx5_ib_mcounters *mcounters;
961
962	if (!counters || atomic_read(&counters->usecnt) != 1)
963		return;
964
965	mcounters = to_mcounters(counters);
966
967	mutex_lock(&mcounters->mcntrs_mutex);
968	kfree(mcounters->counters_data);
969	mcounters->counters_data = NULL;
970	mcounters->cntrs_max_index = 0;
971	mutex_unlock(&mcounters->mcntrs_mutex);
972}
973
974static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
975			       unsigned int index, bool enable)
976{
977	struct mlx5_ib_dev *dev = to_mdev(device);
978	struct mlx5_ib_counters *cnts;
979	struct mlx5_ib_op_fc *opfc;
980	u32 num_hw_counters, type;
981	int ret;
982
983	cnts = &dev->port[port - 1].cnts;
984	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
985		cnts->num_ext_ppcnt_counters;
986	if (index < num_hw_counters ||
987	    index >= (num_hw_counters + cnts->num_op_counters))
988		return -EINVAL;
989
990	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
991		return -EINVAL;
992
993	type = *(u32 *)cnts->descs[index].priv;
994	if (type >= MLX5_IB_OPCOUNTER_MAX)
995		return -EINVAL;
996
997	opfc = &cnts->opfcs[type];
998
999	if (enable) {
1000		if (opfc->fc)
1001			return -EEXIST;
1002
1003		opfc->fc = mlx5_fc_create(dev->mdev, false);
1004		if (IS_ERR(opfc->fc))
1005			return PTR_ERR(opfc->fc);
1006
1007		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1008		if (ret) {
1009			mlx5_fc_destroy(dev->mdev, opfc->fc);
1010			opfc->fc = NULL;
1011		}
1012		return ret;
1013	}
1014
1015	if (!opfc->fc)
1016		return -EINVAL;
1017
1018	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1019	mlx5_fc_destroy(dev->mdev, opfc->fc);
1020	opfc->fc = NULL;
1021	return 0;
1022}
1023
1024static const struct ib_device_ops hw_stats_ops = {
1025	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1026	.get_hw_stats = mlx5_ib_get_hw_stats,
1027	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1028	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1029	.counter_dealloc = mlx5_ib_counter_dealloc,
1030	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1031	.counter_update_stats = mlx5_ib_counter_update_stats,
1032	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
1033			  mlx5_ib_modify_stat : NULL,
1034};
1035
1036static const struct ib_device_ops hw_switchdev_vport_op = {
1037	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1038};
1039
1040static const struct ib_device_ops hw_switchdev_stats_ops = {
1041	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1042	.get_hw_stats = mlx5_ib_get_hw_stats,
1043	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1044	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1045	.counter_dealloc = mlx5_ib_counter_dealloc,
1046	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1047	.counter_update_stats = mlx5_ib_counter_update_stats,
1048};
1049
1050static const struct ib_device_ops counters_ops = {
1051	.create_counters = mlx5_ib_create_counters,
1052	.destroy_counters = mlx5_ib_destroy_counters,
1053	.read_counters = mlx5_ib_read_counters,
1054
1055	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1056};
1057
1058int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1059{
1060	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1061
1062	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1063		return 0;
1064
1065	if (is_mdev_switchdev_mode(dev->mdev)) {
1066		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1067		if (vport_qcounters_supported(dev))
1068			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1069	} else
1070		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1071	return mlx5_ib_alloc_counters(dev);
1072}
1073
1074void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1075{
1076	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1077		return;
1078
1079	mlx5_ib_dealloc_counters(dev);
1080}
1081