1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Test that packets are sampled when tc-sample is used and that reported
5# metadata is correct. Two sets of hosts (with and without LAG) are used, since
6# metadata extraction in mlxsw is a bit different when LAG is involved.
7#
8# +---------------------------------+       +---------------------------------+
9# | H1 (vrf)                        |       | H3 (vrf)                        |
10# |    + $h1                        |       |    + $h3_lag                    |
11# |    | 192.0.2.1/28               |       |    | 192.0.2.17/28              |
12# |    |                            |       |    |                            |
13# |    |  default via 192.0.2.2     |       |    |  default via 192.0.2.18    |
14# +----|----------------------------+       +----|----------------------------+
15#      |                                         |
16# +----|-----------------------------------------|----------------------------+
17# |    | 192.0.2.2/28                            | 192.0.2.18/28              |
18# |    + $rp1                                    + $rp3_lag                   |
19# |                                                                           |
20# |    + $rp2                                    + $rp4_lag                   |
21# |    | 198.51.100.2/28                         | 198.51.100.18/28           |
22# +----|-----------------------------------------|----------------------------+
23#      |                                         |
24# +----|----------------------------+       +----|----------------------------+
25# |    |  default via 198.51.100.2  |       |    |  default via 198.51.100.18 |
26# |    |                            |       |    |                            |
27# |    | 198.51.100.1/28            |       |    | 198.51.100.17/28           |
28# |    + $h2                        |       |    + $h4_lag                    |
29# | H2 (vrf)                        |       | H4 (vrf)                        |
30# +---------------------------------+       +---------------------------------+
31
32lib_dir=$(dirname $0)/../../../net/forwarding
33
34ALL_TESTS="
35	tc_sample_rate_test
36	tc_sample_max_rate_test
37	tc_sample_conflict_test
38	tc_sample_group_conflict_test
39	tc_sample_md_iif_test
40	tc_sample_md_lag_iif_test
41	tc_sample_md_oif_test
42	tc_sample_md_lag_oif_test
43	tc_sample_md_out_tc_test
44	tc_sample_md_out_tc_occ_test
45	tc_sample_md_latency_test
46	tc_sample_acl_group_conflict_test
47	tc_sample_acl_rate_test
48	tc_sample_acl_max_rate_test
49"
50NUM_NETIFS=8
51CAPTURE_FILE=$(mktemp)
52source $lib_dir/lib.sh
53source $lib_dir/devlink_lib.sh
54source mlxsw_lib.sh
55
56# Available at https://github.com/Mellanox/libpsample
57require_command psample
58
59h1_create()
60{
61	simple_if_init $h1 192.0.2.1/28
62
63	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
64}
65
66h1_destroy()
67{
68	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
69
70	simple_if_fini $h1 192.0.2.1/28
71}
72
73h2_create()
74{
75	simple_if_init $h2 198.51.100.1/28
76
77	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
78}
79
80h2_destroy()
81{
82	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
83
84	simple_if_fini $h2 198.51.100.1/28
85}
86
87h3_create()
88{
89	ip link set dev $h3 down
90	ip link add name ${h3}_bond type bond mode 802.3ad
91	ip link set dev $h3 master ${h3}_bond
92
93	simple_if_init ${h3}_bond 192.0.2.17/28
94
95	ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
96}
97
98h3_destroy()
99{
100	ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
101
102	simple_if_fini ${h3}_bond 192.0.2.17/28
103
104	ip link set dev $h3 nomaster
105	ip link del dev ${h3}_bond
106}
107
108h4_create()
109{
110	ip link set dev $h4 down
111	ip link add name ${h4}_bond type bond mode 802.3ad
112	ip link set dev $h4 master ${h4}_bond
113
114	simple_if_init ${h4}_bond 198.51.100.17/28
115
116	ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
117}
118
119h4_destroy()
120{
121	ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
122
123	simple_if_fini ${h4}_bond 198.51.100.17/28
124
125	ip link set dev $h4 nomaster
126	ip link del dev ${h4}_bond
127}
128
129router_create()
130{
131	ip link set dev $rp1 up
132	__addr_add_del $rp1 add 192.0.2.2/28
133	tc qdisc add dev $rp1 clsact
134
135	ip link set dev $rp2 up
136	__addr_add_del $rp2 add 198.51.100.2/28
137	tc qdisc add dev $rp2 clsact
138
139	ip link add name ${rp3}_bond type bond mode 802.3ad
140	ip link set dev $rp3 master ${rp3}_bond
141	__addr_add_del ${rp3}_bond add 192.0.2.18/28
142	tc qdisc add dev $rp3 clsact
143	ip link set dev ${rp3}_bond up
144
145	ip link add name ${rp4}_bond type bond mode 802.3ad
146	ip link set dev $rp4 master ${rp4}_bond
147	__addr_add_del ${rp4}_bond add 198.51.100.18/28
148	tc qdisc add dev $rp4 clsact
149	ip link set dev ${rp4}_bond up
150}
151
152router_destroy()
153{
154	ip link set dev ${rp4}_bond down
155	tc qdisc del dev $rp4 clsact
156	__addr_add_del ${rp4}_bond del 198.51.100.18/28
157	ip link set dev $rp4 nomaster
158	ip link del dev ${rp4}_bond
159
160	ip link set dev ${rp3}_bond down
161	tc qdisc del dev $rp3 clsact
162	__addr_add_del ${rp3}_bond del 192.0.2.18/28
163	ip link set dev $rp3 nomaster
164	ip link del dev ${rp3}_bond
165
166	tc qdisc del dev $rp2 clsact
167	__addr_add_del $rp2 del 198.51.100.2/28
168	ip link set dev $rp2 down
169
170	tc qdisc del dev $rp1 clsact
171	__addr_add_del $rp1 del 192.0.2.2/28
172	ip link set dev $rp1 down
173}
174
175setup_prepare()
176{
177	h1=${NETIFS[p1]}
178	rp1=${NETIFS[p2]}
179	rp2=${NETIFS[p3]}
180	h2=${NETIFS[p4]}
181	h3=${NETIFS[p5]}
182	rp3=${NETIFS[p6]}
183	h4=${NETIFS[p7]}
184	rp4=${NETIFS[p8]}
185
186	vrf_prepare
187
188	h1_create
189	h2_create
190	h3_create
191	h4_create
192	router_create
193}
194
195cleanup()
196{
197	pre_cleanup
198
199	rm -f $CAPTURE_FILE
200
201	router_destroy
202	h4_destroy
203	h3_destroy
204	h2_destroy
205	h1_destroy
206
207	vrf_cleanup
208}
209
210psample_capture_start()
211{
212	rm -f $CAPTURE_FILE
213
214	psample &> $CAPTURE_FILE &
215
216	sleep 1
217}
218
219psample_capture_stop()
220{
221	{ kill %% && wait %%; } 2>/dev/null
222}
223
224__tc_sample_rate_test()
225{
226	local desc=$1; shift
227	local dip=$1; shift
228	local pkts pct
229
230	RET=0
231
232	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
233		skip_sw action sample rate 32 group 1
234	check_err $? "Failed to configure sampling rule"
235
236	psample_capture_start
237
238	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
239		-B $dip -t udp dp=52768,sp=42768 -q
240
241	psample_capture_stop
242
243	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
244	pct=$((100 * (pkts - 10000) / 10000))
245	(( -25 <= pct && pct <= 25))
246	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
247
248	log_test "tc sample rate ($desc)"
249
250	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
251}
252
253tc_sample_rate_test()
254{
255	__tc_sample_rate_test "forward" 198.51.100.1
256	__tc_sample_rate_test "local receive" 192.0.2.2
257}
258
259tc_sample_max_rate_test()
260{
261	RET=0
262
263	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
264		skip_sw action sample rate $((35 * 10 ** 8)) group 1
265	check_err $? "Failed to configure sampling rule with max rate"
266
267	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
268
269	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
270		skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
271		group 1 &> /dev/null
272	check_fail $? "Managed to configure sampling rate above maximum"
273
274	log_test "tc sample maximum rate"
275}
276
277tc_sample_conflict_test()
278{
279	RET=0
280
281	# Test that two sampling rules cannot be configured on the same port,
282	# even when they share the same parameters.
283
284	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
285		skip_sw action sample rate 1024 group 1
286	check_err $? "Failed to configure sampling rule"
287
288	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
289		skip_sw action sample rate 1024 group 1 &> /dev/null
290	check_fail $? "Managed to configure second sampling rule"
291
292	# Delete the first rule and make sure the second rule can now be
293	# configured.
294
295	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
296
297	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
298		skip_sw action sample rate 1024 group 1
299	check_err $? "Failed to configure sampling rule after deletion"
300
301	log_test "tc sample conflict test"
302
303	tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
304}
305
306tc_sample_group_conflict_test()
307{
308	RET=0
309
310	# Test that two sampling rules cannot be configured on the same port
311	# with different groups.
312
313	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
314		skip_sw action sample rate 1024 group 1
315	check_err $? "Failed to configure sampling rule"
316
317	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
318		skip_sw action sample rate 1024 group 2 &> /dev/null
319	check_fail $? "Managed to configure sampling rule with conflicting group"
320
321	log_test "tc sample group conflict test"
322
323	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
324}
325
326tc_sample_md_iif_test()
327{
328	local rp1_ifindex
329
330	RET=0
331
332	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
333		skip_sw action sample rate 5 group 1
334	check_err $? "Failed to configure sampling rule"
335
336	psample_capture_start
337
338	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
339		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
340
341	psample_capture_stop
342
343	rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
344	grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
345	check_err $? "Sampled packets do not have expected in-ifindex"
346
347	log_test "tc sample iif"
348
349	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
350}
351
352tc_sample_md_lag_iif_test()
353{
354	local rp3_ifindex
355
356	RET=0
357
358	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
359		skip_sw action sample rate 5 group 1
360	check_err $? "Failed to configure sampling rule"
361
362	psample_capture_start
363
364	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
365		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
366
367	psample_capture_stop
368
369	rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
370	grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
371	check_err $? "Sampled packets do not have expected in-ifindex"
372
373	log_test "tc sample lag iif"
374
375	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
376}
377
378tc_sample_md_oif_test()
379{
380	local rp2_ifindex
381
382	RET=0
383
384	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
385		skip_sw action sample rate 5 group 1
386	check_err $? "Failed to configure sampling rule"
387
388	psample_capture_start
389
390	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
391		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
392
393	psample_capture_stop
394
395	rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
396	grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
397	check_err $? "Sampled packets do not have expected out-ifindex"
398
399	log_test "tc sample oif"
400
401	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
402}
403
404tc_sample_md_lag_oif_test()
405{
406	local rp4_ifindex
407
408	RET=0
409
410	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
411		skip_sw action sample rate 5 group 1
412	check_err $? "Failed to configure sampling rule"
413
414	psample_capture_start
415
416	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
417		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
418
419	psample_capture_stop
420
421	rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
422	grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
423	check_err $? "Sampled packets do not have expected out-ifindex"
424
425	log_test "tc sample lag oif"
426
427	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
428}
429
430tc_sample_md_out_tc_test()
431{
432	RET=0
433
434	# Output traffic class is not supported on Spectrum-1.
435	mlxsw_only_on_spectrum 2+ || return
436
437	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
438		skip_sw action sample rate 5 group 1
439	check_err $? "Failed to configure sampling rule"
440
441	# By default, all the packets should go to the same traffic class (0).
442
443	psample_capture_start
444
445	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
446		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
447
448	psample_capture_stop
449
450	grep -q -e "out-tc 0 " $CAPTURE_FILE
451	check_err $? "Sampled packets do not have expected out-tc (0)"
452
453	# Map all priorities to highest traffic class (7) and check reported
454	# out-tc.
455	tc qdisc replace dev $rp2 root handle 1: \
456		prio bands 3 priomap 0 0 0 0 0 0 0 0
457
458	psample_capture_start
459
460	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
461		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
462
463	psample_capture_stop
464
465	grep -q -e "out-tc 7 " $CAPTURE_FILE
466	check_err $? "Sampled packets do not have expected out-tc (7)"
467
468	log_test "tc sample out-tc"
469
470	tc qdisc del dev $rp2 root handle 1:
471	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
472}
473
474tc_sample_md_out_tc_occ_test()
475{
476	local backlog pct occ
477
478	RET=0
479
480	# Output traffic class occupancy is not supported on Spectrum-1.
481	mlxsw_only_on_spectrum 2+ || return
482
483	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
484		skip_sw action sample rate 1024 group 1
485	check_err $? "Failed to configure sampling rule"
486
487	# Configure a shaper on egress to create congestion.
488	tc qdisc replace dev $rp2 root handle 1: \
489		tbf rate 1Mbit burst 256k limit 1M
490
491	psample_capture_start
492
493	ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
494		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
495
496	# Allow congestion to reach steady state.
497	sleep 10
498
499	backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
500
501	# Kill mausezahn.
502	{ kill %% && wait %%; } 2>/dev/null
503
504	psample_capture_stop
505
506	# Record last congestion sample.
507	occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
508		cut -d ' ' -f 16)
509
510	pct=$((100 * (occ - backlog) / backlog))
511	(( -1 <= pct && pct <= 1))
512	check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
513
514	log_test "tc sample out-tc-occ"
515
516	tc qdisc del dev $rp2 root handle 1:
517	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
518}
519
520tc_sample_md_latency_test()
521{
522	RET=0
523
524	# Egress sampling not supported on Spectrum-1.
525	mlxsw_only_on_spectrum 2+ || return
526
527	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
528		skip_sw action sample rate 5 group 1
529	check_err $? "Failed to configure sampling rule"
530
531	psample_capture_start
532
533	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
534		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
535
536	psample_capture_stop
537
538	grep -q -e "latency " $CAPTURE_FILE
539	check_err $? "Sampled packets do not have latency attribute"
540
541	log_test "tc sample latency"
542
543	tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
544}
545
546tc_sample_acl_group_conflict_test()
547{
548	RET=0
549
550	# Test that two flower sampling rules cannot be configured on the same
551	# port with different groups.
552
553	# Policy-based sampling is not supported on Spectrum-1.
554	mlxsw_only_on_spectrum 2+ || return
555
556	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
557		skip_sw action sample rate 1024 group 1
558	check_err $? "Failed to configure sampling rule"
559
560	tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
561		skip_sw action sample rate 1024 group 1
562	check_err $? "Failed to configure sampling rule with same group"
563
564	tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
565		skip_sw action sample rate 1024 group 2 &> /dev/null
566	check_fail $? "Managed to configure sampling rule with conflicting group"
567
568	log_test "tc sample (w/ flower) group conflict test"
569
570	tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
571	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
572}
573
574__tc_sample_acl_rate_test()
575{
576	local bind=$1; shift
577	local port=$1; shift
578	local pkts pct
579
580	RET=0
581
582	# Policy-based sampling is not supported on Spectrum-1.
583	mlxsw_only_on_spectrum 2+ || return
584
585	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
586		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
587	check_err $? "Failed to configure sampling rule"
588
589	psample_capture_start
590
591	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
592		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
593
594	psample_capture_stop
595
596	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
597	pct=$((100 * (pkts - 10000) / 10000))
598	(( -25 <= pct && pct <= 25))
599	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
600
601	# Setup a filter that should not match any packet and make sure packets
602	# are not sampled.
603	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
604
605	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
606		skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
607	check_err $? "Failed to configure sampling rule"
608
609	psample_capture_start
610
611	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
612		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
613
614	psample_capture_stop
615
616	grep -q -e "group 1 " $CAPTURE_FILE
617	check_fail $? "Sampled packets when should not"
618
619	log_test "tc sample (w/ flower) rate ($bind)"
620
621	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
622}
623
624tc_sample_acl_rate_test()
625{
626	__tc_sample_acl_rate_test ingress $rp1
627	__tc_sample_acl_rate_test egress $rp2
628}
629
630tc_sample_acl_max_rate_test()
631{
632	RET=0
633
634	# Policy-based sampling is not supported on Spectrum-1.
635	mlxsw_only_on_spectrum 2+ || return
636
637	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
638		skip_sw action sample rate $((2 ** 24 - 1)) group 1
639	check_err $? "Failed to configure sampling rule with max rate"
640
641	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
642
643	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
644		skip_sw action sample rate $((2 ** 24)) \
645		group 1 &> /dev/null
646	check_fail $? "Managed to configure sampling rate above maximum"
647
648	log_test "tc sample (w/ flower) maximum rate"
649}
650
651trap cleanup EXIT
652
653setup_prepare
654setup_wait
655
656tests_run
657
658exit $EXIT_STATUS
659