1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
3 *
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 *
11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include "opt_inet.h"
41#include "opt_inet6.h"
42
43#include <sys/param.h>
44#include <sys/jail.h>
45#include <sys/systm.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/sockio.h>
49#include <sys/mbuf.h>
50#include <sys/errno.h>
51#include <sys/kernel.h>
52#include <sys/sysctl.h>
53#include <sys/malloc.h>
54#include <sys/proc.h>
55
56#include <net/if.h>
57#include <net/if_var.h>
58#include <net/vnet.h>
59
60#include <netinet/in.h>
61#include <netinet/in_var.h>
62#include <netinet/in_pcb.h>
63#include <netinet/ip.h>
64#include <netinet/ip_encap.h>
65#include <netinet/ip_var.h>
66#include <netinet/udp.h>
67#include <netinet/udp_var.h>
68
69#ifdef INET6
70#include <netinet/ip6.h>
71#endif
72
73#include <net/if_gre.h>
74#include <machine/in_cksum.h>
75
76#define	GRE_TTL			30
77VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
78#define	V_ip_gre_ttl		VNET(ip_gre_ttl)
79SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
80    &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
81
82struct in_gre_socket {
83	struct gre_socket		base;
84	in_addr_t			addr;
85};
86VNET_DEFINE_STATIC(struct gre_sockets *, ipv4_sockets) = NULL;
87VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL;
88VNET_DEFINE_STATIC(struct gre_list *, ipv4_srchashtbl) = NULL;
89#define	V_ipv4_sockets		VNET(ipv4_sockets)
90#define	V_ipv4_hashtbl		VNET(ipv4_hashtbl)
91#define	V_ipv4_srchashtbl	VNET(ipv4_srchashtbl)
92#define	GRE_HASH(src, dst)	(V_ipv4_hashtbl[\
93    in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
94#define	GRE_SRCHASH(src)	(V_ipv4_srchashtbl[\
95    fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
96#define	GRE_SOCKHASH(src)	(V_ipv4_sockets[\
97    fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
98#define	GRE_HASH_SC(sc)		GRE_HASH((sc)->gre_oip.ip_src.s_addr,\
99    (sc)->gre_oip.ip_dst.s_addr)
100
101static uint32_t
102in_gre_hashval(in_addr_t src, in_addr_t dst)
103{
104	uint32_t ret;
105
106	ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
107	return (fnv_32_buf(&dst, sizeof(dst), ret));
108}
109
110static struct gre_socket*
111in_gre_lookup_socket(in_addr_t addr)
112{
113	struct gre_socket *gs;
114	struct in_gre_socket *s;
115
116	CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) {
117		s = __containerof(gs, struct in_gre_socket, base);
118		if (s->addr == addr)
119			break;
120	}
121	return (gs);
122}
123
124static int
125in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst,
126    uint32_t opts)
127{
128	struct gre_list *head;
129	struct gre_softc *tmp;
130	struct gre_socket *gs;
131
132	if (sc->gre_family == AF_INET &&
133	    sc->gre_oip.ip_src.s_addr == src &&
134	    sc->gre_oip.ip_dst.s_addr == dst &&
135	    (sc->gre_options & GRE_UDPENCAP) == (opts & GRE_UDPENCAP))
136		return (EEXIST);
137
138	if (opts & GRE_UDPENCAP) {
139		gs = in_gre_lookup_socket(src);
140		if (gs == NULL)
141			return (0);
142		head = &gs->list;
143	} else
144		head = &GRE_HASH(src, dst);
145
146	CK_LIST_FOREACH(tmp, head, chain) {
147		if (tmp == sc)
148			continue;
149		if (tmp->gre_oip.ip_src.s_addr == src &&
150		    tmp->gre_oip.ip_dst.s_addr == dst)
151			return (EADDRNOTAVAIL);
152	}
153	return (0);
154}
155
156static int
157in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg)
158{
159	const struct ip *ip;
160	struct gre_softc *sc;
161
162	if (V_ipv4_hashtbl == NULL)
163		return (0);
164
165	NET_EPOCH_ASSERT();
166	ip = mtod(m, const struct ip *);
167	CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr,
168	    ip->ip_src.s_addr), chain) {
169		/*
170		 * This is an inbound packet, its ip_dst is source address
171		 * in softc.
172		 */
173		if (sc->gre_oip.ip_src.s_addr == ip->ip_dst.s_addr &&
174		    sc->gre_oip.ip_dst.s_addr == ip->ip_src.s_addr) {
175			if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
176				return (0);
177			*arg = sc;
178			return (ENCAP_DRV_LOOKUP);
179		}
180	}
181	return (0);
182}
183
184/*
185 * Check that ingress address belongs to local host.
186 */
187static void
188in_gre_set_running(struct gre_softc *sc)
189{
190
191	if (in_localip(sc->gre_oip.ip_src))
192		GRE2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
193	else
194		GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
195}
196
197/*
198 * ifaddr_event handler.
199 * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent
200 * source address spoofing.
201 */
202static void
203in_gre_srcaddr(void *arg __unused, const struct sockaddr *sa,
204    int event __unused)
205{
206	const struct sockaddr_in *sin;
207	struct gre_softc *sc;
208
209	/* Check that VNET is ready */
210	if (V_ipv4_hashtbl == NULL)
211		return;
212
213	NET_EPOCH_ASSERT();
214	sin = (const struct sockaddr_in *)sa;
215	CK_LIST_FOREACH(sc, &GRE_SRCHASH(sin->sin_addr.s_addr), srchash) {
216		if (sc->gre_oip.ip_src.s_addr != sin->sin_addr.s_addr)
217			continue;
218		in_gre_set_running(sc);
219	}
220}
221
222static void
223in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
224    const struct sockaddr *sa, void *ctx)
225{
226	struct epoch_tracker et;
227	struct gre_socket *gs;
228	struct gre_softc *sc;
229	in_addr_t dst;
230
231	NET_EPOCH_ENTER(et);
232	/*
233	 * udp_append() holds reference to inp, it is safe to check
234	 * inp_flags2 without INP_RLOCK().
235	 * If socket was closed before we have entered NET_EPOCH section,
236	 * INP_FREED flag should be set. Otherwise it should be safe to
237	 * make access to ctx data, because gre_so will be freed by
238	 * gre_sofree() via NET_EPOCH_CALL().
239	 */
240	if (__predict_false(inp->inp_flags2 & INP_FREED)) {
241		NET_EPOCH_EXIT(et);
242		m_freem(m);
243		return;
244	}
245
246	gs = (struct gre_socket *)ctx;
247	dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr;
248	CK_LIST_FOREACH(sc, &gs->list, chain) {
249		if (sc->gre_oip.ip_dst.s_addr == dst)
250			break;
251	}
252	if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
253		gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
254		NET_EPOCH_EXIT(et);
255		return;
256	}
257	m_freem(m);
258	NET_EPOCH_EXIT(et);
259}
260
261static int
262in_gre_setup_socket(struct gre_softc *sc)
263{
264	struct sockopt sopt;
265	struct sockaddr_in sin;
266	struct in_gre_socket *s;
267	struct gre_socket *gs;
268	in_addr_t addr;
269	int error, value;
270
271	/*
272	 * NOTE: we are protected with gre_ioctl_sx lock.
273	 *
274	 * First check that socket is already configured.
275	 * If so, check that source addres was not changed.
276	 * If address is different, check that there are no other tunnels
277	 * and close socket.
278	 */
279	addr = sc->gre_oip.ip_src.s_addr;
280	gs = sc->gre_so;
281	if (gs != NULL) {
282		s = __containerof(gs, struct in_gre_socket, base);
283		if (s->addr != addr) {
284			if (CK_LIST_EMPTY(&gs->list)) {
285				CK_LIST_REMOVE(gs, chain);
286				soclose(gs->so);
287				NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx);
288			}
289			gs = sc->gre_so = NULL;
290		}
291	}
292
293	if (gs == NULL) {
294		/*
295		 * Check that socket for given address is already
296		 * configured.
297		 */
298		gs = in_gre_lookup_socket(addr);
299		if (gs == NULL) {
300			s = malloc(sizeof(*s), M_GRE, M_WAITOK | M_ZERO);
301			s->addr = addr;
302			gs = &s->base;
303
304			error = socreate(sc->gre_family, &gs->so,
305			    SOCK_DGRAM, IPPROTO_UDP, curthread->td_ucred,
306			    curthread);
307			if (error != 0) {
308				if_printf(GRE2IFP(sc),
309				    "cannot create socket: %d\n", error);
310				free(s, M_GRE);
311				return (error);
312			}
313
314			error = udp_set_kernel_tunneling(gs->so,
315			    in_gre_udp_input, NULL, gs);
316			if (error != 0) {
317				if_printf(GRE2IFP(sc),
318				    "cannot set UDP tunneling: %d\n", error);
319				goto fail;
320			}
321
322			memset(&sopt, 0, sizeof(sopt));
323			sopt.sopt_dir = SOPT_SET;
324			sopt.sopt_level = IPPROTO_IP;
325			sopt.sopt_name = IP_BINDANY;
326			sopt.sopt_val = &value;
327			sopt.sopt_valsize = sizeof(value);
328			value = 1;
329			error = sosetopt(gs->so, &sopt);
330			if (error != 0) {
331				if_printf(GRE2IFP(sc),
332				    "cannot set IP_BINDANY opt: %d\n", error);
333				goto fail;
334			}
335
336			memset(&sin, 0, sizeof(sin));
337			sin.sin_family = AF_INET;
338			sin.sin_len = sizeof(sin);
339			sin.sin_addr.s_addr = addr;
340			sin.sin_port = htons(GRE_UDPPORT);
341			error = sobind(gs->so, (struct sockaddr *)&sin,
342			    curthread);
343			if (error != 0) {
344				if_printf(GRE2IFP(sc),
345				    "cannot bind socket: %d\n", error);
346				goto fail;
347			}
348			/* Add socket to the chain */
349			CK_LIST_INSERT_HEAD(&GRE_SOCKHASH(addr), gs, chain);
350		}
351	}
352
353	/* Add softc to the socket's list */
354	CK_LIST_INSERT_HEAD(&gs->list, sc, chain);
355	sc->gre_so = gs;
356	return (0);
357fail:
358	soclose(gs->so);
359	free(s, M_GRE);
360	return (error);
361}
362
363static int
364in_gre_attach(struct gre_softc *sc)
365{
366	struct grehdr *gh;
367	int error;
368
369	if (sc->gre_options & GRE_UDPENCAP) {
370		sc->gre_csumflags = CSUM_UDP;
371		sc->gre_hlen = sizeof(struct greudp);
372		sc->gre_oip.ip_p = IPPROTO_UDP;
373		gh = &sc->gre_udphdr->gi_gre;
374		gre_update_udphdr(sc, &sc->gre_udp,
375		    in_pseudo(sc->gre_oip.ip_src.s_addr,
376		    sc->gre_oip.ip_dst.s_addr, 0));
377	} else {
378		sc->gre_hlen = sizeof(struct greip);
379		sc->gre_oip.ip_p = IPPROTO_GRE;
380		gh = &sc->gre_iphdr->gi_gre;
381	}
382	sc->gre_oip.ip_v = IPVERSION;
383	sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
384	gre_update_hdr(sc, gh);
385
386	/*
387	 * If we return error, this means that sc is not linked,
388	 * and caller should reset gre_family and free(sc->gre_hdr).
389	 */
390	if (sc->gre_options & GRE_UDPENCAP) {
391		error = in_gre_setup_socket(sc);
392		if (error != 0)
393			return (error);
394	} else
395		CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
396	CK_LIST_INSERT_HEAD(&GRE_SRCHASH(sc->gre_oip.ip_src.s_addr),
397	    sc, srchash);
398
399	/* Set IFF_DRV_RUNNING if interface is ready */
400	in_gre_set_running(sc);
401	return (0);
402}
403
404int
405in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value)
406{
407	int error;
408
409	/* NOTE: we are protected with gre_ioctl_sx lock */
410	MPASS(cmd == GRESKEY || cmd == GRESOPTS || cmd == GRESPORT);
411	MPASS(sc->gre_family == AF_INET);
412
413	/*
414	 * If we are going to change encapsulation protocol, do check
415	 * for duplicate tunnels. Return EEXIST here to do not confuse
416	 * user.
417	 */
418	if (cmd == GRESOPTS &&
419	    (sc->gre_options & GRE_UDPENCAP) != (value & GRE_UDPENCAP) &&
420	    in_gre_checkdup(sc, sc->gre_oip.ip_src.s_addr,
421		sc->gre_oip.ip_dst.s_addr, value) == EADDRNOTAVAIL)
422		return (EEXIST);
423
424	CK_LIST_REMOVE(sc, chain);
425	CK_LIST_REMOVE(sc, srchash);
426	GRE_WAIT();
427	switch (cmd) {
428	case GRESKEY:
429		sc->gre_key = value;
430		break;
431	case GRESOPTS:
432		sc->gre_options = value;
433		break;
434	case GRESPORT:
435		sc->gre_port = value;
436		break;
437	}
438	error = in_gre_attach(sc);
439	if (error != 0) {
440		sc->gre_family = 0;
441		free(sc->gre_hdr, M_GRE);
442	}
443	return (error);
444}
445
446int
447in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data)
448{
449	struct ifreq *ifr = (struct ifreq *)data;
450	struct sockaddr_in *dst, *src;
451	struct ip *ip;
452	int error;
453
454	/* NOTE: we are protected with gre_ioctl_sx lock */
455	error = EINVAL;
456	switch (cmd) {
457	case SIOCSIFPHYADDR:
458		src = &((struct in_aliasreq *)data)->ifra_addr;
459		dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
460
461		/* sanity checks */
462		if (src->sin_family != dst->sin_family ||
463		    src->sin_family != AF_INET ||
464		    src->sin_len != dst->sin_len ||
465		    src->sin_len != sizeof(*src))
466			break;
467		if (src->sin_addr.s_addr == INADDR_ANY ||
468		    dst->sin_addr.s_addr == INADDR_ANY) {
469			error = EADDRNOTAVAIL;
470			break;
471		}
472		if (V_ipv4_hashtbl == NULL) {
473			V_ipv4_hashtbl = gre_hashinit();
474			V_ipv4_srchashtbl = gre_hashinit();
475			V_ipv4_sockets = (struct gre_sockets *)gre_hashinit();
476		}
477		error = in_gre_checkdup(sc, src->sin_addr.s_addr,
478		    dst->sin_addr.s_addr, sc->gre_options);
479		if (error == EADDRNOTAVAIL)
480			break;
481		if (error == EEXIST) {
482			/* Addresses are the same. Just return. */
483			error = 0;
484			break;
485		}
486		ip = malloc(sizeof(struct greudp) + 3 * sizeof(uint32_t),
487		    M_GRE, M_WAITOK | M_ZERO);
488		ip->ip_src.s_addr = src->sin_addr.s_addr;
489		ip->ip_dst.s_addr = dst->sin_addr.s_addr;
490		if (sc->gre_family != 0) {
491			/* Detach existing tunnel first */
492			CK_LIST_REMOVE(sc, chain);
493			CK_LIST_REMOVE(sc, srchash);
494			GRE_WAIT();
495			free(sc->gre_hdr, M_GRE);
496			/* XXX: should we notify about link state change? */
497		}
498		sc->gre_family = AF_INET;
499		sc->gre_hdr = ip;
500		sc->gre_oseq = 0;
501		sc->gre_iseq = UINT32_MAX;
502		error = in_gre_attach(sc);
503		if (error != 0) {
504			sc->gre_family = 0;
505			free(sc->gre_hdr, M_GRE);
506		}
507		break;
508	case SIOCGIFPSRCADDR:
509	case SIOCGIFPDSTADDR:
510		if (sc->gre_family != AF_INET) {
511			error = EADDRNOTAVAIL;
512			break;
513		}
514		src = (struct sockaddr_in *)&ifr->ifr_addr;
515		memset(src, 0, sizeof(*src));
516		src->sin_family = AF_INET;
517		src->sin_len = sizeof(*src);
518		src->sin_addr = (cmd == SIOCGIFPSRCADDR) ?
519		    sc->gre_oip.ip_src: sc->gre_oip.ip_dst;
520		error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
521		if (error != 0)
522			memset(src, 0, sizeof(*src));
523		break;
524	}
525	return (error);
526}
527
528int
529in_gre_output(struct mbuf *m, int af, int hlen)
530{
531	struct greip *gi;
532
533	gi = mtod(m, struct greip *);
534	switch (af) {
535	case AF_INET:
536		/*
537		 * gre_transmit() has used M_PREPEND() that doesn't guarantee
538		 * m_data is contiguous more than hlen bytes. Use m_copydata()
539		 * here to avoid m_pullup().
540		 */
541		m_copydata(m, hlen + offsetof(struct ip, ip_tos),
542		    sizeof(u_char), &gi->gi_ip.ip_tos);
543		m_copydata(m, hlen + offsetof(struct ip, ip_id),
544		    sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id);
545		break;
546#ifdef INET6
547	case AF_INET6:
548		gi->gi_ip.ip_tos = 0; /* XXX */
549		ip_fillid(&gi->gi_ip);
550		break;
551#endif
552	}
553	gi->gi_ip.ip_ttl = V_ip_gre_ttl;
554	gi->gi_ip.ip_len = htons(m->m_pkthdr.len);
555	return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
556}
557
558static const struct srcaddrtab *ipv4_srcaddrtab = NULL;
559static const struct encaptab *ecookie = NULL;
560static const struct encap_config ipv4_encap_cfg = {
561	.proto = IPPROTO_GRE,
562	.min_length = sizeof(struct greip) + sizeof(struct ip),
563	.exact_match = ENCAP_DRV_LOOKUP,
564	.lookup = in_gre_lookup,
565	.input = gre_input
566};
567
568void
569in_gre_init(void)
570{
571
572	if (!IS_DEFAULT_VNET(curvnet))
573		return;
574	ipv4_srcaddrtab = ip_encap_register_srcaddr(in_gre_srcaddr,
575	    NULL, M_WAITOK);
576	ecookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
577}
578
579void
580in_gre_uninit(void)
581{
582
583	if (IS_DEFAULT_VNET(curvnet)) {
584		ip_encap_detach(ecookie);
585		ip_encap_unregister_srcaddr(ipv4_srcaddrtab);
586	}
587	if (V_ipv4_hashtbl != NULL) {
588		gre_hashdestroy(V_ipv4_hashtbl);
589		V_ipv4_hashtbl = NULL;
590		GRE_WAIT();
591		gre_hashdestroy(V_ipv4_srchashtbl);
592		gre_hashdestroy((struct gre_list *)V_ipv4_sockets);
593	}
594}
595