1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _INET_ECN_H_
3#define _INET_ECN_H_
4
5#include <linux/ip.h>
6#include <linux/skbuff.h>
7#include <linux/if_vlan.h>
8
9#include <net/inet_sock.h>
10#include <net/dsfield.h>
11#include <net/checksum.h>
12
13enum {
14	INET_ECN_NOT_ECT = 0,
15	INET_ECN_ECT_1 = 1,
16	INET_ECN_ECT_0 = 2,
17	INET_ECN_CE = 3,
18	INET_ECN_MASK = 3,
19};
20
21extern int sysctl_tunnel_ecn_log;
22
23static inline int INET_ECN_is_ce(__u8 dsfield)
24{
25	return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
26}
27
28static inline int INET_ECN_is_not_ect(__u8 dsfield)
29{
30	return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
31}
32
33static inline int INET_ECN_is_capable(__u8 dsfield)
34{
35	return dsfield & INET_ECN_ECT_0;
36}
37
38/*
39 * RFC 3168 9.1.1
40 *  The full-functionality option for ECN encapsulation is to copy the
41 *  ECN codepoint of the inside header to the outside header on
42 *  encapsulation if the inside header is not-ECT or ECT, and to set the
43 *  ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
44 *  the inside header is CE.
45 */
46static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
47{
48	outer &= ~INET_ECN_MASK;
49	outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
50					  INET_ECN_ECT_0;
51	return outer;
52}
53
54static inline void INET_ECN_xmit(struct sock *sk)
55{
56	inet_sk(sk)->tos |= INET_ECN_ECT_0;
57	if (inet6_sk(sk) != NULL)
58		inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
59}
60
61static inline void INET_ECN_dontxmit(struct sock *sk)
62{
63	inet_sk(sk)->tos &= ~INET_ECN_MASK;
64	if (inet6_sk(sk) != NULL)
65		inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
66}
67
68#define IP6_ECN_flow_init(label) do {		\
69      (label) &= ~htonl(INET_ECN_MASK << 20);	\
70    } while (0)
71
72#define	IP6_ECN_flow_xmit(sk, label) do {				\
73	if (INET_ECN_is_capable(inet6_sk(sk)->tclass))			\
74		(label) |= htonl(INET_ECN_ECT_0 << 20);			\
75    } while (0)
76
77static inline int IP_ECN_set_ce(struct iphdr *iph)
78{
79	u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
80	__be16 check_add;
81
82	/*
83	 * After the last operation we have (in binary):
84	 * INET_ECN_NOT_ECT => 01
85	 * INET_ECN_ECT_1   => 10
86	 * INET_ECN_ECT_0   => 11
87	 * INET_ECN_CE      => 00
88	 */
89	if (!(ecn & 2))
90		return !ecn;
91
92	/*
93	 * The following gives us:
94	 * INET_ECN_ECT_1 => check += htons(0xFFFD)
95	 * INET_ECN_ECT_0 => check += htons(0xFFFE)
96	 */
97	check_add = (__force __be16)((__force u16)htons(0xFFFB) +
98				     (__force u16)htons(ecn));
99
100	iph->check = csum16_add(iph->check, check_add);
101	iph->tos |= INET_ECN_CE;
102	return 1;
103}
104
105static inline int IP_ECN_set_ect1(struct iphdr *iph)
106{
107	if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
108		return 0;
109
110	iph->check = csum16_add(iph->check, htons(0x1));
111	iph->tos ^= INET_ECN_MASK;
112	return 1;
113}
114
115static inline void IP_ECN_clear(struct iphdr *iph)
116{
117	iph->tos &= ~INET_ECN_MASK;
118}
119
120static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
121{
122	dscp &= ~INET_ECN_MASK;
123	ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
124}
125
126struct ipv6hdr;
127
128/* Note:
129 * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
130 * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
131 * In IPv6 case, no checksum compensates the change in IPv6 header,
132 * so we have to update skb->csum.
133 */
134static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
135{
136	__be32 from, to;
137
138	if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
139		return 0;
140
141	from = *(__be32 *)iph;
142	to = from | htonl(INET_ECN_CE << 20);
143	*(__be32 *)iph = to;
144	if (skb->ip_summed == CHECKSUM_COMPLETE)
145		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
146				     (__force __wsum)to);
147	return 1;
148}
149
150static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
151{
152	__be32 from, to;
153
154	if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
155		return 0;
156
157	from = *(__be32 *)iph;
158	to = from ^ htonl(INET_ECN_MASK << 20);
159	*(__be32 *)iph = to;
160	if (skb->ip_summed == CHECKSUM_COMPLETE)
161		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
162				     (__force __wsum)to);
163	return 1;
164}
165
166static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
167{
168	dscp &= ~INET_ECN_MASK;
169	ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
170}
171
172static inline int INET_ECN_set_ce(struct sk_buff *skb)
173{
174	switch (skb_protocol(skb, true)) {
175	case cpu_to_be16(ETH_P_IP):
176		if (skb_network_header(skb) + sizeof(struct iphdr) <=
177		    skb_tail_pointer(skb))
178			return IP_ECN_set_ce(ip_hdr(skb));
179		break;
180
181	case cpu_to_be16(ETH_P_IPV6):
182		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
183		    skb_tail_pointer(skb))
184			return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
185		break;
186	}
187
188	return 0;
189}
190
191static inline int skb_get_dsfield(struct sk_buff *skb)
192{
193	switch (skb_protocol(skb, true)) {
194	case cpu_to_be16(ETH_P_IP):
195		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
196			break;
197		return ipv4_get_dsfield(ip_hdr(skb));
198
199	case cpu_to_be16(ETH_P_IPV6):
200		if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
201			break;
202		return ipv6_get_dsfield(ipv6_hdr(skb));
203	}
204
205	return -1;
206}
207
208static inline int INET_ECN_set_ect1(struct sk_buff *skb)
209{
210	switch (skb_protocol(skb, true)) {
211	case cpu_to_be16(ETH_P_IP):
212		if (skb_network_header(skb) + sizeof(struct iphdr) <=
213		    skb_tail_pointer(skb))
214			return IP_ECN_set_ect1(ip_hdr(skb));
215		break;
216
217	case cpu_to_be16(ETH_P_IPV6):
218		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
219		    skb_tail_pointer(skb))
220			return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
221		break;
222	}
223
224	return 0;
225}
226
227/*
228 * RFC 6040 4.2
229 *  To decapsulate the inner header at the tunnel egress, a compliant
230 *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
231 *  intersection of the appropriate arriving inner header (row) and outer
232 *  header (column) in Figure 4
233 *
234 *      +---------+------------------------------------------------+
235 *      |Arriving |            Arriving Outer Header               |
236 *      |   Inner +---------+------------+------------+------------+
237 *      |  Header | Not-ECT | ECT(0)     | ECT(1)     |     CE     |
238 *      +---------+---------+------------+------------+------------+
239 *      | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
240 *      |  ECT(0) |  ECT(0) | ECT(0)     | ECT(1)     |     CE     |
241 *      |  ECT(1) |  ECT(1) | ECT(1) (!) | ECT(1)     |     CE     |
242 *      |    CE   |      CE |     CE     |     CE(!!!)|     CE     |
243 *      +---------+---------+------------+------------+------------+
244 *
245 *             Figure 4: New IP in IP Decapsulation Behaviour
246 *
247 *  returns 0 on success
248 *          1 if something is broken and should be logged (!!! above)
249 *          2 if packet should be dropped
250 */
251static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
252{
253	if (INET_ECN_is_not_ect(inner)) {
254		switch (outer & INET_ECN_MASK) {
255		case INET_ECN_NOT_ECT:
256			return 0;
257		case INET_ECN_ECT_0:
258		case INET_ECN_ECT_1:
259			return 1;
260		case INET_ECN_CE:
261			return 2;
262		}
263	}
264
265	*set_ce = INET_ECN_is_ce(outer);
266	return 0;
267}
268
269static inline int INET_ECN_decapsulate(struct sk_buff *skb,
270				       __u8 outer, __u8 inner)
271{
272	bool set_ce = false;
273	int rc;
274
275	rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
276	if (!rc) {
277		if (set_ce)
278			INET_ECN_set_ce(skb);
279		else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
280			INET_ECN_set_ect1(skb);
281	}
282
283	return rc;
284}
285
286static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
287				     struct sk_buff *skb)
288{
289	__u8 inner;
290
291	switch (skb_protocol(skb, true)) {
292	case htons(ETH_P_IP):
293		inner = ip_hdr(skb)->tos;
294		break;
295	case htons(ETH_P_IPV6):
296		inner = ipv6_get_dsfield(ipv6_hdr(skb));
297		break;
298	default:
299		return 0;
300	}
301
302	return INET_ECN_decapsulate(skb, oiph->tos, inner);
303}
304
305static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
306				      struct sk_buff *skb)
307{
308	__u8 inner;
309
310	switch (skb_protocol(skb, true)) {
311	case htons(ETH_P_IP):
312		inner = ip_hdr(skb)->tos;
313		break;
314	case htons(ETH_P_IPV6):
315		inner = ipv6_get_dsfield(ipv6_hdr(skb));
316		break;
317	default:
318		return 0;
319	}
320
321	return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
322}
323#endif
324