1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Setup/topology:
5#
6#    NS1             NS2             NS3
7#   veth1 <---> veth2   veth3 <---> veth4 (the top route)
8#   veth5 <---> veth6   veth7 <---> veth8 (the bottom route)
9#
10#   each vethN gets IPv[4|6]_N address
11#
12#   IPv*_SRC = IPv*_1
13#   IPv*_DST = IPv*_4
14#
15#   all tests test pings from IPv*_SRC to IPv*_DST
16#
17#   by default, routes are configured to allow packets to go
18#   IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
19#
20#   a GRE device is installed in NS3 with IPv*_GRE, and
21#   NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
22#   (the bottom route)
23#
24# Tests:
25#
26#   1. routes NS2->IPv*_DST are brought down, so the only way a ping
27#      from IP*_SRC to IP*_DST can work is via IPv*_GRE
28#
29#   2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
30#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
31#
32#       ping: SRC->[encap at veth1:egress]->GRE:decap->DST
33#       ping replies go DST->SRC directly
34#
35#   2b. in an ingress test, a bpf LWT_IN program is installed on veth2
36#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
37#
38#       ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
39#       ping replies go DST->SRC directly
40
41BPF_FILE="test_lwt_ip_encap.bpf.o"
42if [[ $EUID -ne 0 ]]; then
43	echo "This script must be run as root"
44	echo "FAIL"
45	exit 1
46fi
47
48readonly NS1="ns1-$(mktemp -u XXXXXX)"
49readonly NS2="ns2-$(mktemp -u XXXXXX)"
50readonly NS3="ns3-$(mktemp -u XXXXXX)"
51
52readonly IPv4_1="172.16.1.100"
53readonly IPv4_2="172.16.2.100"
54readonly IPv4_3="172.16.3.100"
55readonly IPv4_4="172.16.4.100"
56readonly IPv4_5="172.16.5.100"
57readonly IPv4_6="172.16.6.100"
58readonly IPv4_7="172.16.7.100"
59readonly IPv4_8="172.16.8.100"
60readonly IPv4_GRE="172.16.16.100"
61
62readonly IPv4_SRC=$IPv4_1
63readonly IPv4_DST=$IPv4_4
64
65readonly IPv6_1="fb01::1"
66readonly IPv6_2="fb02::1"
67readonly IPv6_3="fb03::1"
68readonly IPv6_4="fb04::1"
69readonly IPv6_5="fb05::1"
70readonly IPv6_6="fb06::1"
71readonly IPv6_7="fb07::1"
72readonly IPv6_8="fb08::1"
73readonly IPv6_GRE="fb10::1"
74
75readonly IPv6_SRC=$IPv6_1
76readonly IPv6_DST=$IPv6_4
77
78TEST_STATUS=0
79TESTS_SUCCEEDED=0
80TESTS_FAILED=0
81
82TMPFILE=""
83
84process_test_results()
85{
86	if [[ "${TEST_STATUS}" -eq 0 ]] ; then
87		echo "PASS"
88		TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
89	else
90		echo "FAIL"
91		TESTS_FAILED=$((TESTS_FAILED+1))
92	fi
93}
94
95print_test_summary_and_exit()
96{
97	echo "passed tests: ${TESTS_SUCCEEDED}"
98	echo "failed tests: ${TESTS_FAILED}"
99	if [ "${TESTS_FAILED}" -eq "0" ] ; then
100		exit 0
101	else
102		exit 1
103	fi
104}
105
106setup()
107{
108	set -e  # exit on error
109	TEST_STATUS=0
110
111	# create devices and namespaces
112	ip netns add "${NS1}"
113	ip netns add "${NS2}"
114	ip netns add "${NS3}"
115
116	# rp_filter gets confused by what these tests are doing, so disable it
117	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
118	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
119	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
120	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
121	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
122	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
123
124	# disable IPv6 DAD because it sometimes takes too long and fails tests
125	ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
126	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
127	ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
128	ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
129	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
130	ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
131
132	ip link add veth1 type veth peer name veth2
133	ip link add veth3 type veth peer name veth4
134	ip link add veth5 type veth peer name veth6
135	ip link add veth7 type veth peer name veth8
136
137	ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
138	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
139
140	ip link set veth1 netns ${NS1}
141	ip link set veth2 netns ${NS2}
142	ip link set veth3 netns ${NS2}
143	ip link set veth4 netns ${NS3}
144	ip link set veth5 netns ${NS1}
145	ip link set veth6 netns ${NS2}
146	ip link set veth7 netns ${NS2}
147	ip link set veth8 netns ${NS3}
148
149	if [ ! -z "${VRF}" ] ; then
150		ip -netns ${NS1} link add red type vrf table 1001
151		ip -netns ${NS1} link set red up
152		ip -netns ${NS1} route add table 1001 unreachable default metric 8192
153		ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
154		ip -netns ${NS1} link set veth1 vrf red
155		ip -netns ${NS1} link set veth5 vrf red
156
157		ip -netns ${NS2} link add red type vrf table 1001
158		ip -netns ${NS2} link set red up
159		ip -netns ${NS2} route add table 1001 unreachable default metric 8192
160		ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
161		ip -netns ${NS2} link set veth2 vrf red
162		ip -netns ${NS2} link set veth3 vrf red
163		ip -netns ${NS2} link set veth6 vrf red
164		ip -netns ${NS2} link set veth7 vrf red
165	fi
166
167	# configure addesses: the top route (1-2-3-4)
168	ip -netns ${NS1}    addr add ${IPv4_1}/24  dev veth1
169	ip -netns ${NS2}    addr add ${IPv4_2}/24  dev veth2
170	ip -netns ${NS2}    addr add ${IPv4_3}/24  dev veth3
171	ip -netns ${NS3}    addr add ${IPv4_4}/24  dev veth4
172	ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
173	ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
174	ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
175	ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
176
177	# configure addresses: the bottom route (5-6-7-8)
178	ip -netns ${NS1}    addr add ${IPv4_5}/24  dev veth5
179	ip -netns ${NS2}    addr add ${IPv4_6}/24  dev veth6
180	ip -netns ${NS2}    addr add ${IPv4_7}/24  dev veth7
181	ip -netns ${NS3}    addr add ${IPv4_8}/24  dev veth8
182	ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
183	ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
184	ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
185	ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
186
187	ip -netns ${NS1} link set dev veth1 up
188	ip -netns ${NS2} link set dev veth2 up
189	ip -netns ${NS2} link set dev veth3 up
190	ip -netns ${NS3} link set dev veth4 up
191	ip -netns ${NS1} link set dev veth5 up
192	ip -netns ${NS2} link set dev veth6 up
193	ip -netns ${NS2} link set dev veth7 up
194	ip -netns ${NS3} link set dev veth8 up
195
196	# configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
197	# the bottom route to specific bottom addresses
198
199	# NS1
200	# top route
201	ip -netns ${NS1}    route add ${IPv4_2}/32  dev veth1 ${VRF}
202	ip -netns ${NS1}    route add default dev veth1 via ${IPv4_2} ${VRF}  # go top by default
203	ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
204	ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF}  # go top by default
205	# bottom route
206	ip -netns ${NS1}    route add ${IPv4_6}/32  dev veth5 ${VRF}
207	ip -netns ${NS1}    route add ${IPv4_7}/32  dev veth5 via ${IPv4_6} ${VRF}
208	ip -netns ${NS1}    route add ${IPv4_8}/32  dev veth5 via ${IPv4_6} ${VRF}
209	ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
210	ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
211	ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
212
213	# NS2
214	# top route
215	ip -netns ${NS2}    route add ${IPv4_1}/32  dev veth2 ${VRF}
216	ip -netns ${NS2}    route add ${IPv4_4}/32  dev veth3 ${VRF}
217	ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
218	ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
219	# bottom route
220	ip -netns ${NS2}    route add ${IPv4_5}/32  dev veth6 ${VRF}
221	ip -netns ${NS2}    route add ${IPv4_8}/32  dev veth7 ${VRF}
222	ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
223	ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
224
225	# NS3
226	# top route
227	ip -netns ${NS3}    route add ${IPv4_3}/32  dev veth4
228	ip -netns ${NS3}    route add ${IPv4_1}/32  dev veth4 via ${IPv4_3}
229	ip -netns ${NS3}    route add ${IPv4_2}/32  dev veth4 via ${IPv4_3}
230	ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
231	ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
232	ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
233	# bottom route
234	ip -netns ${NS3}    route add ${IPv4_7}/32  dev veth8
235	ip -netns ${NS3}    route add ${IPv4_5}/32  dev veth8 via ${IPv4_7}
236	ip -netns ${NS3}    route add ${IPv4_6}/32  dev veth8 via ${IPv4_7}
237	ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
238	ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
239	ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
240
241	# configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
242	ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
243	ip -netns ${NS3} link set gre_dev up
244	ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
245	ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
246	ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
247
248
249	# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
250	ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
251	ip -netns ${NS3} link set gre6_dev up
252	ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
253	ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
254	ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
255
256	TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
257
258	sleep 1  # reduce flakiness
259	set +e
260}
261
262cleanup()
263{
264	if [ -f ${TMPFILE} ] ; then
265		rm ${TMPFILE}
266	fi
267
268	ip netns del ${NS1} 2> /dev/null
269	ip netns del ${NS2} 2> /dev/null
270	ip netns del ${NS3} 2> /dev/null
271}
272
273trap cleanup EXIT
274
275remove_routes_to_gredev()
276{
277	ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
278	ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
279	ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
280	ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
281}
282
283add_unreachable_routes_to_gredev()
284{
285	ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
286	ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
287	ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
288	ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
289}
290
291test_ping()
292{
293	local readonly PROTO=$1
294	local readonly EXPECTED=$2
295	local RET=0
296
297	if [ "${PROTO}" == "IPv4" ] ; then
298		ip netns exec ${NS1} ping  -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
299		RET=$?
300	elif [ "${PROTO}" == "IPv6" ] ; then
301		ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
302		RET=$?
303	else
304		echo "    test_ping: unknown PROTO: ${PROTO}"
305		TEST_STATUS=1
306	fi
307
308	if [ "0" != "${RET}" ]; then
309		RET=1
310	fi
311
312	if [ "${EXPECTED}" != "${RET}" ] ; then
313		echo "    test_ping failed: expected: ${EXPECTED}; got ${RET}"
314		TEST_STATUS=1
315	fi
316}
317
318test_gso()
319{
320	local readonly PROTO=$1
321	local readonly PKT_SZ=5000
322	local IP_DST=""
323	: > ${TMPFILE}  # trim the capture file
324
325	# check that nc is present
326	command -v nc >/dev/null 2>&1 || \
327		{ echo >&2 "nc is not available: skipping TSO tests"; return; }
328
329	# listen on port 9000, capture TCP into $TMPFILE
330	if [ "${PROTO}" == "IPv4" ] ; then
331		IP_DST=${IPv4_DST}
332		ip netns exec ${NS3} bash -c \
333			"nc -4 -l -p 9000 > ${TMPFILE} &"
334	elif [ "${PROTO}" == "IPv6" ] ; then
335		IP_DST=${IPv6_DST}
336		ip netns exec ${NS3} bash -c \
337			"nc -6 -l -p 9000 > ${TMPFILE} &"
338		RET=$?
339	else
340		echo "    test_gso: unknown PROTO: ${PROTO}"
341		TEST_STATUS=1
342	fi
343	sleep 1  # let nc start listening
344
345	# send a packet larger than MTU
346	ip netns exec ${NS1} bash -c \
347		"dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
348	sleep 2 # let the packet get delivered
349
350	# verify we received all expected bytes
351	SZ=$(stat -c %s ${TMPFILE})
352	if [ "$SZ" != "$PKT_SZ" ] ; then
353		echo "    test_gso failed: ${PROTO}"
354		TEST_STATUS=1
355	fi
356}
357
358test_egress()
359{
360	local readonly ENCAP=$1
361	echo "starting egress ${ENCAP} encap test ${VRF}"
362	setup
363
364	# by default, pings work
365	test_ping IPv4 0
366	test_ping IPv6 0
367
368	# remove NS2->DST routes, ping fails
369	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3 ${VRF}
370	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
371	test_ping IPv4 1
372	test_ping IPv6 1
373
374	# install replacement routes (LWT/eBPF), pings succeed
375	if [ "${ENCAP}" == "IPv4" ] ; then
376		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
377			${BPF_FILE} sec encap_gre dev veth1 ${VRF}
378		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
379			${BPF_FILE} sec encap_gre dev veth1 ${VRF}
380	elif [ "${ENCAP}" == "IPv6" ] ; then
381		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
382			${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
383		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
384			${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
385	else
386		echo "    unknown encap ${ENCAP}"
387		TEST_STATUS=1
388	fi
389	test_ping IPv4 0
390	test_ping IPv6 0
391
392	# skip GSO tests with VRF: VRF routing needs properly assigned
393	# source IP/device, which is easy to do with ping and hard with dd/nc.
394	if [ -z "${VRF}" ] ; then
395		test_gso IPv4
396		test_gso IPv6
397	fi
398
399	# a negative test: remove routes to GRE devices: ping fails
400	remove_routes_to_gredev
401	test_ping IPv4 1
402	test_ping IPv6 1
403
404	# another negative test
405	add_unreachable_routes_to_gredev
406	test_ping IPv4 1
407	test_ping IPv6 1
408
409	cleanup
410	process_test_results
411}
412
413test_ingress()
414{
415	local readonly ENCAP=$1
416	echo "starting ingress ${ENCAP} encap test ${VRF}"
417	setup
418
419	# need to wait a bit for IPv6 to autoconf, otherwise
420	# ping6 sometimes fails with "unable to bind to address"
421
422	# by default, pings work
423	test_ping IPv4 0
424	test_ping IPv6 0
425
426	# remove NS2->DST routes, pings fail
427	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3 ${VRF}
428	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
429	test_ping IPv4 1
430	test_ping IPv6 1
431
432	# install replacement routes (LWT/eBPF), pings succeed
433	if [ "${ENCAP}" == "IPv4" ] ; then
434		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
435			${BPF_FILE} sec encap_gre dev veth2 ${VRF}
436		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
437			${BPF_FILE} sec encap_gre dev veth2 ${VRF}
438	elif [ "${ENCAP}" == "IPv6" ] ; then
439		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
440			${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
441		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
442			${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
443	else
444		echo "FAIL: unknown encap ${ENCAP}"
445		TEST_STATUS=1
446	fi
447	test_ping IPv4 0
448	test_ping IPv6 0
449
450	# a negative test: remove routes to GRE devices: ping fails
451	remove_routes_to_gredev
452	test_ping IPv4 1
453	test_ping IPv6 1
454
455	# another negative test
456	add_unreachable_routes_to_gredev
457	test_ping IPv4 1
458	test_ping IPv6 1
459
460	cleanup
461	process_test_results
462}
463
464VRF=""
465test_egress IPv4
466test_egress IPv6
467test_ingress IPv4
468test_ingress IPv6
469
470VRF="vrf red"
471test_egress IPv4
472test_egress IPv6
473test_ingress IPv4
474test_ingress IPv6
475
476print_test_summary_and_exit
477