1#!/bin/bash
2
3# This script demonstrates interaction of conntrack and vrf.
4# The vrf driver calls the netfilter hooks again, with oif/iif
5# pointing at the VRF device.
6#
7# For ingress, this means first iteration has iifname of lower/real
8# device.  In this script, thats veth0.
9# Second iteration is iifname set to vrf device, tvrf in this script.
10#
11# For egress, this is reversed: first iteration has the vrf device,
12# second iteration is done with the lower/real/veth0 device.
13#
14# test_ct_zone_in demonstrates unexpected change of nftables
15# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
16# connection on VRF rcv"
17#
18# It was possible to assign conntrack zone to a packet (or mark it for
19# `notracking`) in the prerouting chain before conntrack, based on real iif.
20#
21# After the change, the zone assignment is lost and the zone is assigned based
22# on the VRF master interface (in case such a rule exists).
23# assignment is lost. Instead, assignment based on the `iif` matching
24# Thus it is impossible to distinguish packets based on the original
25# interface.
26#
27# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
28# that was supposed to be fixed by the commit mentioned above to make sure
29# that any fix to test case 1 won't break masquerade again.
30
31source lib.sh
32
33IP0=172.30.30.1
34IP1=172.30.30.2
35PFXL=30
36ret=0
37
38cleanup()
39{
40	ip netns pids $ns0 | xargs kill 2>/dev/null
41	ip netns pids $ns1 | xargs kill 2>/dev/null
42
43	cleanup_all_ns
44}
45
46checktool "nft --version" "run test without nft"
47checktool "conntrack --version" "run test without conntrack"
48checktool "socat -h" "run test without socat"
49
50trap cleanup EXIT
51
52setup_ns ns0 ns1
53
54ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
55ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
56ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
57
58if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
59	echo "SKIP: Could not add veth device"
60	exit $ksft_skip
61fi
62
63if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
64	echo "SKIP: Could not add vrf device"
65	exit $ksft_skip
66fi
67
68ip -net "$ns0" li set veth0 master tvrf
69ip -net "$ns0" li set tvrf up
70ip -net "$ns0" li set veth0 up
71ip -net "$ns1" li set veth0 up
72
73ip -net "$ns0" addr add $IP0/$PFXL dev veth0
74ip -net "$ns1" addr add $IP1/$PFXL dev veth0
75
76listener_ready()
77{
78        local ns="$1"
79
80        ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555"
81}
82
83ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null &
84busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1"
85
86# test vrf ingress handling.
87# The incoming connection should be placed in conntrack zone 1,
88# as decided by the first iteration of the ruleset.
89test_ct_zone_in()
90{
91ip netns exec "$ns0" nft -f - <<EOF
92table testct {
93	chain rawpre {
94		type filter hook prerouting priority raw;
95
96		iif { veth0, tvrf } counter meta nftrace set 1
97		iif veth0 counter ct zone set 1 counter return
98		iif tvrf counter ct zone set 2 counter return
99		ip protocol icmp counter
100		notrack counter
101	}
102
103	chain rawout {
104		type filter hook output priority raw;
105
106		oif veth0 counter ct zone set 1 counter return
107		oif tvrf counter ct zone set 2 counter return
108		notrack counter
109	}
110}
111EOF
112	ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null
113
114	# should be in zone 1, not zone 2
115	count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
116	if [ "$count" -eq 1 ]; then
117		echo "PASS: entry found in conntrack zone 1"
118	else
119		echo "FAIL: entry not found in conntrack zone 1"
120		count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
121		if [ "$count" -eq 1 ]; then
122			echo "FAIL: entry found in zone 2 instead"
123		else
124			echo "FAIL: entry not in zone 1 or 2, dumping table"
125			ip netns exec "$ns0" conntrack -L
126			ip netns exec "$ns0" nft list ruleset
127		fi
128	fi
129}
130
131# add masq rule that gets evaluated w. outif set to vrf device.
132# This tests the first iteration of the packet through conntrack,
133# oifname is the vrf device.
134test_masquerade_vrf()
135{
136	local qdisc=$1
137
138	if [ "$qdisc" != "default" ]; then
139		tc -net "$ns0" qdisc add dev tvrf root "$qdisc"
140	fi
141
142	ip netns exec "$ns0" conntrack -F 2>/dev/null
143
144ip netns exec "$ns0" nft -f - <<EOF
145flush ruleset
146table ip nat {
147	chain rawout {
148		type filter hook output priority raw;
149
150		oif tvrf ct state untracked counter
151	}
152	chain postrouting2 {
153		type filter hook postrouting priority mangle;
154
155		oif tvrf ct state untracked counter
156	}
157	chain postrouting {
158		type nat hook postrouting priority 0;
159		# NB: masquerade should always be combined with 'oif(name) bla',
160		# lack of this is intentional here, we want to exercise double-snat.
161		ip saddr 172.30.30.0/30 counter masquerade random
162	}
163}
164EOF
165	if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
166		echo "FAIL: connect failure with masquerade + sport rewrite on vrf device"
167		ret=1
168		return
169	fi
170
171	# must also check that nat table was evaluated on second (lower device) iteration.
172	if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' &&
173	   ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then
174		echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
175	else
176		echo "FAIL: vrf rules have unexpected counter value"
177		ret=1
178	fi
179
180	if [ "$qdisc" != "default" ]; then
181		tc -net "$ns0" qdisc del dev tvrf root
182	fi
183}
184
185# add masq rule that gets evaluated w. outif set to veth device.
186# This tests the 2nd iteration of the packet through conntrack,
187# oifname is the lower device (veth0 in this case).
188test_masquerade_veth()
189{
190	ip netns exec "$ns0" conntrack -F 2>/dev/null
191ip netns exec "$ns0" nft -f - <<EOF
192flush ruleset
193table ip nat {
194	chain postrouting {
195		type nat hook postrouting priority 0;
196		meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
197	}
198}
199EOF
200	if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
201		echo "FAIL: connect failure with masquerade + sport rewrite on veth device"
202		ret=1
203		return
204	fi
205
206	# must also check that nat table was evaluated on second (lower device) iteration.
207	if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then
208		echo "PASS: connect with masquerade + sport rewrite on veth device"
209	else
210		echo "FAIL: vrf masq rule has unexpected counter value"
211		ret=1
212	fi
213}
214
215test_ct_zone_in
216test_masquerade_vrf "default"
217test_masquerade_vrf "pfifo"
218test_masquerade_veth
219
220exit $ret
221