1// SPDX-License-Identifier: GPL-2.0-only
2/****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2022 Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include "tc_counters.h"
12#include "tc_encap_actions.h"
13#include "mae_counter_format.h"
14#include "mae.h"
15#include "rx_common.h"
16
17/* Counter-management hashtables */
18
19static const struct rhashtable_params efx_tc_counter_id_ht_params = {
20	.key_len	= offsetof(struct efx_tc_counter_index, linkage),
21	.key_offset	= 0,
22	.head_offset	= offsetof(struct efx_tc_counter_index, linkage),
23};
24
25static const struct rhashtable_params efx_tc_counter_ht_params = {
26	.key_len	= offsetof(struct efx_tc_counter, linkage),
27	.key_offset	= 0,
28	.head_offset	= offsetof(struct efx_tc_counter, linkage),
29};
30
31static void efx_tc_counter_free(void *ptr, void *__unused)
32{
33	struct efx_tc_counter *cnt = ptr;
34
35	WARN_ON(!list_empty(&cnt->users));
36	/* We'd like to synchronize_rcu() here, but unfortunately we aren't
37	 * removing the element from the hashtable (it's not clear that's a
38	 * safe thing to do in an rhashtable_free_and_destroy free_fn), so
39	 * threads could still be obtaining new pointers to *cnt if they can
40	 * race against this function at all.
41	 */
42	flush_work(&cnt->work);
43	EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
44	kfree(cnt);
45}
46
47static void efx_tc_counter_id_free(void *ptr, void *__unused)
48{
49	struct efx_tc_counter_index *ctr = ptr;
50
51	WARN_ON(refcount_read(&ctr->ref));
52	kfree(ctr);
53}
54
55int efx_tc_init_counters(struct efx_nic *efx)
56{
57	int rc;
58
59	rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params);
60	if (rc < 0)
61		goto fail_counter_id_ht;
62	rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params);
63	if (rc < 0)
64		goto fail_counter_ht;
65	return 0;
66fail_counter_ht:
67	rhashtable_destroy(&efx->tc->counter_id_ht);
68fail_counter_id_ht:
69	return rc;
70}
71
72/* Only call this in init failure teardown.
73 * Normal exit should fini instead as there may be entries in the table.
74 */
75void efx_tc_destroy_counters(struct efx_nic *efx)
76{
77	rhashtable_destroy(&efx->tc->counter_ht);
78	rhashtable_destroy(&efx->tc->counter_id_ht);
79}
80
81void efx_tc_fini_counters(struct efx_nic *efx)
82{
83	rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL);
84	rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL);
85}
86
87static void efx_tc_counter_work(struct work_struct *work)
88{
89	struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work);
90	struct efx_tc_encap_action *encap;
91	struct efx_tc_action_set *act;
92	unsigned long touched;
93	struct neighbour *n;
94
95	spin_lock_bh(&cnt->lock);
96	touched = READ_ONCE(cnt->touched);
97
98	list_for_each_entry(act, &cnt->users, count_user) {
99		encap = act->encap_md;
100		if (!encap)
101			continue;
102		if (!encap->neigh) /* can't happen */
103			continue;
104		if (time_after_eq(encap->neigh->used, touched))
105			continue;
106		encap->neigh->used = touched;
107		/* We have passed traffic using this ARP entry, so
108		 * indicate to the ARP cache that it's still active
109		 */
110		if (encap->neigh->dst_ip)
111			n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip,
112					 encap->neigh->egdev);
113		else
114#if IS_ENABLED(CONFIG_IPV6)
115			n = neigh_lookup(ipv6_stub->nd_tbl,
116					 &encap->neigh->dst_ip6,
117					 encap->neigh->egdev);
118#else
119			n = NULL;
120#endif
121		if (!n)
122			continue;
123
124		neigh_event_send(n, NULL);
125		neigh_release(n);
126	}
127	spin_unlock_bh(&cnt->lock);
128}
129
130/* Counter allocation */
131
132struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
133						      int type)
134{
135	struct efx_tc_counter *cnt;
136	int rc, rc2;
137
138	cnt = kzalloc(sizeof(*cnt), GFP_USER);
139	if (!cnt)
140		return ERR_PTR(-ENOMEM);
141
142	spin_lock_init(&cnt->lock);
143	INIT_WORK(&cnt->work, efx_tc_counter_work);
144	cnt->touched = jiffies;
145	cnt->type = type;
146
147	rc = efx_mae_allocate_counter(efx, cnt);
148	if (rc)
149		goto fail1;
150	INIT_LIST_HEAD(&cnt->users);
151	rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage,
152				    efx_tc_counter_ht_params);
153	if (rc)
154		goto fail2;
155	return cnt;
156fail2:
157	/* If we get here, it implies that we couldn't insert into the table,
158	 * which in turn probably means that the fw_id was already taken.
159	 * In that case, it's unclear whether we really 'own' the fw_id; but
160	 * the firmware seemed to think we did, so it's proper to free it.
161	 */
162	rc2 = efx_mae_free_counter(efx, cnt);
163	if (rc2)
164		netif_warn(efx, hw, efx->net_dev,
165			   "Failed to free MAE counter %u, rc %d\n",
166			   cnt->fw_id, rc2);
167fail1:
168	kfree(cnt);
169	return ERR_PTR(rc > 0 ? -EIO : rc);
170}
171
172void efx_tc_flower_release_counter(struct efx_nic *efx,
173				   struct efx_tc_counter *cnt)
174{
175	int rc;
176
177	rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage,
178			       efx_tc_counter_ht_params);
179	rc = efx_mae_free_counter(efx, cnt);
180	if (rc)
181		netif_warn(efx, hw, efx->net_dev,
182			   "Failed to free MAE counter %u, rc %d\n",
183			   cnt->fw_id, rc);
184	WARN_ON(!list_empty(&cnt->users));
185	/* This doesn't protect counter updates coming in arbitrarily long
186	 * after we deleted the counter.  The RCU just ensures that we won't
187	 * free the counter while another thread has a pointer to it.
188	 * Ensuring we don't update the wrong counter if the ID gets re-used
189	 * is handled by the generation count.
190	 */
191	synchronize_rcu();
192	flush_work(&cnt->work);
193	EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
194	kfree(cnt);
195}
196
197static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id(
198				struct efx_nic *efx, int type, u32 fw_id)
199{
200	struct efx_tc_counter key = {};
201
202	key.fw_id = fw_id;
203	key.type = type;
204
205	return rhashtable_lookup_fast(&efx->tc->counter_ht, &key,
206				      efx_tc_counter_ht_params);
207}
208
209/* TC cookie to counter mapping */
210
211void efx_tc_flower_put_counter_index(struct efx_nic *efx,
212				     struct efx_tc_counter_index *ctr)
213{
214	if (!refcount_dec_and_test(&ctr->ref))
215		return; /* still in use */
216	rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage,
217			       efx_tc_counter_id_ht_params);
218	efx_tc_flower_release_counter(efx, ctr->cnt);
219	kfree(ctr);
220}
221
222struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
223				struct efx_nic *efx, unsigned long cookie,
224				enum efx_tc_counter_type type)
225{
226	struct efx_tc_counter_index *ctr, *old;
227	struct efx_tc_counter *cnt;
228
229	ctr = kzalloc(sizeof(*ctr), GFP_USER);
230	if (!ctr)
231		return ERR_PTR(-ENOMEM);
232	ctr->cookie = cookie;
233	old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht,
234						&ctr->linkage,
235						efx_tc_counter_id_ht_params);
236	if (old) {
237		/* don't need our new entry */
238		kfree(ctr);
239		if (IS_ERR(old)) /* oh dear, it's actually an error */
240			return ERR_CAST(old);
241		if (!refcount_inc_not_zero(&old->ref))
242			return ERR_PTR(-EAGAIN);
243		/* existing entry found */
244		ctr = old;
245	} else {
246		cnt = efx_tc_flower_allocate_counter(efx, type);
247		if (IS_ERR(cnt)) {
248			rhashtable_remove_fast(&efx->tc->counter_id_ht,
249					       &ctr->linkage,
250					       efx_tc_counter_id_ht_params);
251			kfree(ctr);
252			return (void *)cnt; /* it's an ERR_PTR */
253		}
254		ctr->cnt = cnt;
255		refcount_set(&ctr->ref, 1);
256	}
257	return ctr;
258}
259
260struct efx_tc_counter_index *efx_tc_flower_find_counter_index(
261				struct efx_nic *efx, unsigned long cookie)
262{
263	struct efx_tc_counter_index key = {};
264
265	key.cookie = cookie;
266	return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key,
267				      efx_tc_counter_id_ht_params);
268}
269
270/* TC Channel.  Counter updates are delivered on this channel's RXQ. */
271
272static void efx_tc_handle_no_channel(struct efx_nic *efx)
273{
274	netif_warn(efx, drv, efx->net_dev,
275		   "MAE counters require MSI-X and 1 additional interrupt vector.\n");
276}
277
278static int efx_tc_probe_channel(struct efx_channel *channel)
279{
280	struct efx_rx_queue *rx_queue = &channel->rx_queue;
281
282	channel->irq_moderation_us = 0;
283	rx_queue->core_index = 0;
284
285	INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits);
286
287	return 0;
288}
289
290static int efx_tc_start_channel(struct efx_channel *channel)
291{
292	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
293	struct efx_nic *efx = channel->efx;
294
295	return efx_mae_start_counters(efx, rx_queue);
296}
297
298static void efx_tc_stop_channel(struct efx_channel *channel)
299{
300	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
301	struct efx_nic *efx = channel->efx;
302	int rc;
303
304	rc = efx_mae_stop_counters(efx, rx_queue);
305	if (rc)
306		netif_warn(efx, drv, efx->net_dev,
307			   "Failed to stop MAE counters streaming, rc=%d.\n",
308			   rc);
309	rx_queue->grant_credits = false;
310	flush_work(&rx_queue->grant_work);
311}
312
313static void efx_tc_remove_channel(struct efx_channel *channel)
314{
315}
316
317static void efx_tc_get_channel_name(struct efx_channel *channel,
318				    char *buf, size_t len)
319{
320	snprintf(buf, len, "%s-mae", channel->efx->name);
321}
322
323static void efx_tc_counter_update(struct efx_nic *efx,
324				  enum efx_tc_counter_type counter_type,
325				  u32 counter_idx, u64 packets, u64 bytes,
326				  u32 mark)
327{
328	struct efx_tc_counter *cnt;
329
330	rcu_read_lock(); /* Protect against deletion of 'cnt' */
331	cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx);
332	if (!cnt) {
333		/* This can legitimately happen when a counter is removed,
334		 * with updates for the counter still in-flight; however this
335		 * should be an infrequent occurrence.
336		 */
337		if (net_ratelimit())
338			netif_dbg(efx, drv, efx->net_dev,
339				  "Got update for unwanted MAE counter %u type %u\n",
340				  counter_idx, counter_type);
341		goto out;
342	}
343
344	spin_lock_bh(&cnt->lock);
345	if ((s32)mark - (s32)cnt->gen < 0) {
346		/* This counter update packet is from before the counter was
347		 * allocated; thus it must be for a previous counter with
348		 * the same ID that has since been freed, and it should be
349		 * ignored.
350		 */
351	} else {
352		/* Update latest seen generation count.  This ensures that
353		 * even a long-lived counter won't start getting ignored if
354		 * the generation count wraps around, unless it somehow
355		 * manages to go 1<<31 generations without an update.
356		 */
357		cnt->gen = mark;
358		/* update counter values */
359		cnt->packets += packets;
360		cnt->bytes += bytes;
361		cnt->touched = jiffies;
362	}
363	spin_unlock_bh(&cnt->lock);
364	schedule_work(&cnt->work);
365out:
366	rcu_read_unlock();
367}
368
369static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark)
370{
371	u16 n_counters, i;
372
373	/* Header format:
374	 * + |   0    |   1    |   2    |   3    |
375	 * 0 |version |         reserved         |
376	 * 4 |    seq_index    |   n_counters    |
377	 */
378
379	n_counters = le16_to_cpu(*(const __le16 *)(data + 6));
380
381	/* Counter update entry format:
382	 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
383	 * |  counter_idx  |     packet_count      |      byte_count       |
384	 */
385	for (i = 0; i < n_counters; i++) {
386		const void *entry = data + 8 + 16 * i;
387		u64 packet_count, byte_count;
388		u32 counter_idx;
389
390		counter_idx = le32_to_cpu(*(const __le32 *)entry);
391		packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) |
392			       ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32);
393		byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) |
394			     ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16);
395		efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx,
396				      packet_count, byte_count, mark);
397	}
398}
399
400#define TCV2_HDR_PTR(pkt, field)						\
401	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7),	\
402	 (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
403#define TCV2_HDR_BYTE(pkt, field)						\
404	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
405	 *TCV2_HDR_PTR(pkt, field))
406#define TCV2_HDR_WORD(pkt, field)						\
407	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
408	 (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15),	\
409	 *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
410#define TCV2_PKT_PTR(pkt, poff, i, field)					\
411	((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7),	\
412	 (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff +		\
413	 i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
414
415/* Read a little-endian 48-bit field with 16-bit alignment */
416static u64 efx_tc_read48(const __le16 *field)
417{
418	u64 out = 0;
419	int i;
420
421	for (i = 0; i < 3; i++)
422		out |= (u64)le16_to_cpu(field[i]) << (i * 16);
423	return out;
424}
425
426static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx,
427						    const u8 *data, u32 mark)
428{
429	u8 payload_offset, header_offset, ident;
430	enum efx_tc_counter_type type;
431	u16 n_counters, i;
432
433	ident = TCV2_HDR_BYTE(data, IDENTIFIER);
434	switch (ident) {
435	case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR:
436		type = EFX_TC_COUNTER_TYPE_AR;
437		break;
438	case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT:
439		type = EFX_TC_COUNTER_TYPE_CT;
440		break;
441	case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR:
442		type = EFX_TC_COUNTER_TYPE_OR;
443		break;
444	default:
445		if (net_ratelimit())
446			netif_err(efx, drv, efx->net_dev,
447				  "ignored v2 MAE counter packet (bad identifier %u"
448				  "), counters may be inaccurate\n", ident);
449		return EFX_TC_COUNTER_TYPE_MAX;
450	}
451	header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET);
452	/* mae_counter_format.h implies that this offset is fixed, since it
453	 * carries on with SOP-based LBNs for the fields in this header
454	 */
455	if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) {
456		if (net_ratelimit())
457			netif_err(efx, drv, efx->net_dev,
458				  "choked on v2 MAE counter packet (bad header_offset %u"
459				  "), counters may be inaccurate\n", header_offset);
460		return EFX_TC_COUNTER_TYPE_MAX;
461	}
462	payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET);
463	n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT));
464
465	for (i = 0; i < n_counters; i++) {
466		const void *counter_idx_p, *packet_count_p, *byte_count_p;
467		u64 packet_count, byte_count;
468		u32 counter_idx;
469
470		/* 24-bit field with 32-bit alignment */
471		counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX);
472		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24);
473		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31);
474		counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff;
475		/* 48-bit field with 16-bit alignment */
476		packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT);
477		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48);
478		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15);
479		packet_count = efx_tc_read48((const __le16 *)packet_count_p);
480		/* 48-bit field with 16-bit alignment */
481		byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT);
482		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48);
483		BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15);
484		byte_count = efx_tc_read48((const __le16 *)byte_count_p);
485
486		if (type == EFX_TC_COUNTER_TYPE_CT) {
487			/* CT counters are 1-bit saturating counters to update
488			 * the lastuse time in CT stats. A received CT counter
489			 * should have packet counter to 0 and only LSB bit on
490			 * in byte counter.
491			 */
492			if (packet_count || byte_count != 1)
493				netdev_warn_once(efx->net_dev,
494						 "CT counter with inconsistent state (%llu, %llu)\n",
495						 packet_count, byte_count);
496			/* Do not increment the driver's byte counter */
497			byte_count = 0;
498		}
499
500		efx_tc_counter_update(efx, type, counter_idx, packet_count,
501				      byte_count, mark);
502	}
503	return type;
504}
505
506/* We always swallow the packet, whether successful or not, since it's not
507 * a network packet and shouldn't ever be forwarded to the stack.
508 * @mark is the generation count for counter allocations.
509 */
510static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark)
511{
512	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
513	struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue,
514						     channel->rx_pkt_index);
515	const u8 *data = efx_rx_buf_va(rx_buf);
516	struct efx_nic *efx = rx_queue->efx;
517	enum efx_tc_counter_type type;
518	u8 version;
519
520	/* version is always first byte of packet */
521	version = *data;
522	switch (version) {
523	case 1:
524		type = EFX_TC_COUNTER_TYPE_AR;
525		efx_tc_rx_version_1(efx, data, mark);
526		break;
527	case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2
528		type = efx_tc_rx_version_2(efx, data, mark);
529		break;
530	default:
531		if (net_ratelimit())
532			netif_err(efx, drv, efx->net_dev,
533				  "choked on MAE counter packet (bad version %u"
534				  "); counters may be inaccurate\n",
535				  version);
536		goto out;
537	}
538
539	if (type < EFX_TC_COUNTER_TYPE_MAX) {
540		/* Update seen_gen unconditionally, to avoid a missed wakeup if
541		 * we race with efx_mae_stop_counters().
542		 */
543		efx->tc->seen_gen[type] = mark;
544		if (efx->tc->flush_counters &&
545		    (s32)(efx->tc->flush_gen[type] - mark) <= 0)
546			wake_up(&efx->tc->flush_wq);
547	}
548out:
549	efx_free_rx_buffers(rx_queue, rx_buf, 1);
550	channel->rx_pkt_n_frags = 0;
551	return true;
552}
553
554const struct efx_channel_type efx_tc_channel_type = {
555	.handle_no_channel	= efx_tc_handle_no_channel,
556	.pre_probe		= efx_tc_probe_channel,
557	.start			= efx_tc_start_channel,
558	.stop			= efx_tc_stop_channel,
559	.post_remove		= efx_tc_remove_channel,
560	.get_name		= efx_tc_get_channel_name,
561	.receive_raw		= efx_tc_rx,
562	.keep_eventq		= true,
563};
564