Deleted Added
full compact
if_gre.c (248324) if_gre.c (249925)
1/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
1/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
2/* $FreeBSD: head/sys/net/if_gre.c 248324 2013-03-15 12:55:30Z glebius $ */
2/* $FreeBSD: head/sys/net/if_gre.c 249925 2013-04-26 12:50:32Z glebius $ */
3
4/*-
5 * Copyright (c) 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 *
11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35/*
36 * Encapsulate L3 protocols into IP
37 * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
38 * If_gre is compatible with Cisco GRE tunnels, so you can
39 * have a NetBSD box as the other end of a tunnel interface of a Cisco
40 * router. See gre(4) for more details.
41 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
42 */
43
44#include "opt_atalk.h"
45#include "opt_inet.h"
46#include "opt_inet6.h"
47
48#include <sys/param.h>
49#include <sys/jail.h>
50#include <sys/kernel.h>
51#include <sys/libkern.h>
52#include <sys/malloc.h>
53#include <sys/module.h>
54#include <sys/mbuf.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/protosw.h>
58#include <sys/socket.h>
59#include <sys/sockio.h>
60#include <sys/sysctl.h>
61#include <sys/systm.h>
62
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_clone.h>
66#include <net/if_types.h>
67#include <net/route.h>
68#include <net/vnet.h>
69
70#ifdef INET
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/in_var.h>
74#include <netinet/ip.h>
75#include <netinet/ip_gre.h>
76#include <netinet/ip_var.h>
77#include <netinet/ip_encap.h>
78#else
79#error "Huh? if_gre without inet?"
80#endif
81
82#include <net/bpf.h>
83
84#include <net/if_gre.h>
85
86/*
87 * It is not easy to calculate the right value for a GRE MTU.
88 * We leave this task to the admin and use the same default that
89 * other vendors use.
90 */
91#define GREMTU 1476
92
93#define MTAG_COOKIE_GRE 1307983903
94#define MTAG_GRE_NESTING 1
95struct mtag_gre_nesting {
96 uint16_t count;
97 uint16_t max;
98 struct ifnet *ifp[];
99};
100
101/*
102 * gre_mtx protects all global variables in if_gre.c.
103 * XXX: gre_softc data not protected yet.
104 */
105struct mtx gre_mtx;
106static const char grename[] = "gre";
107static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
108
109struct gre_softc_head gre_softc_list;
110
111static int gre_clone_create(struct if_clone *, int, caddr_t);
112static void gre_clone_destroy(struct ifnet *);
113static struct if_clone *gre_cloner;
114
115static int gre_ioctl(struct ifnet *, u_long, caddr_t);
3
4/*-
5 * Copyright (c) 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 *
11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35/*
36 * Encapsulate L3 protocols into IP
37 * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
38 * If_gre is compatible with Cisco GRE tunnels, so you can
39 * have a NetBSD box as the other end of a tunnel interface of a Cisco
40 * router. See gre(4) for more details.
41 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
42 */
43
44#include "opt_atalk.h"
45#include "opt_inet.h"
46#include "opt_inet6.h"
47
48#include <sys/param.h>
49#include <sys/jail.h>
50#include <sys/kernel.h>
51#include <sys/libkern.h>
52#include <sys/malloc.h>
53#include <sys/module.h>
54#include <sys/mbuf.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/protosw.h>
58#include <sys/socket.h>
59#include <sys/sockio.h>
60#include <sys/sysctl.h>
61#include <sys/systm.h>
62
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_clone.h>
66#include <net/if_types.h>
67#include <net/route.h>
68#include <net/vnet.h>
69
70#ifdef INET
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/in_var.h>
74#include <netinet/ip.h>
75#include <netinet/ip_gre.h>
76#include <netinet/ip_var.h>
77#include <netinet/ip_encap.h>
78#else
79#error "Huh? if_gre without inet?"
80#endif
81
82#include <net/bpf.h>
83
84#include <net/if_gre.h>
85
86/*
87 * It is not easy to calculate the right value for a GRE MTU.
88 * We leave this task to the admin and use the same default that
89 * other vendors use.
90 */
91#define GREMTU 1476
92
93#define MTAG_COOKIE_GRE 1307983903
94#define MTAG_GRE_NESTING 1
95struct mtag_gre_nesting {
96 uint16_t count;
97 uint16_t max;
98 struct ifnet *ifp[];
99};
100
101/*
102 * gre_mtx protects all global variables in if_gre.c.
103 * XXX: gre_softc data not protected yet.
104 */
105struct mtx gre_mtx;
106static const char grename[] = "gre";
107static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
108
109struct gre_softc_head gre_softc_list;
110
111static int gre_clone_create(struct if_clone *, int, caddr_t);
112static void gre_clone_destroy(struct ifnet *);
113static struct if_clone *gre_cloner;
114
115static int gre_ioctl(struct ifnet *, u_long, caddr_t);
116static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
117 struct route *ro);
116static int gre_output(struct ifnet *, struct mbuf *,
117 const struct sockaddr *, struct route *);
118
119static int gre_compute_route(struct gre_softc *sc);
120
121static void greattach(void);
122
123#ifdef INET
124extern struct domain inetdomain;
125static const struct protosw in_gre_protosw = {
126 .pr_type = SOCK_RAW,
127 .pr_domain = &inetdomain,
128 .pr_protocol = IPPROTO_GRE,
129 .pr_flags = PR_ATOMIC|PR_ADDR,
130 .pr_input = gre_input,
131 .pr_output = (pr_output_t *)rip_output,
132 .pr_ctlinput = rip_ctlinput,
133 .pr_ctloutput = rip_ctloutput,
134 .pr_usrreqs = &rip_usrreqs
135};
136static const struct protosw in_mobile_protosw = {
137 .pr_type = SOCK_RAW,
138 .pr_domain = &inetdomain,
139 .pr_protocol = IPPROTO_MOBILE,
140 .pr_flags = PR_ATOMIC|PR_ADDR,
141 .pr_input = gre_mobile_input,
142 .pr_output = (pr_output_t *)rip_output,
143 .pr_ctlinput = rip_ctlinput,
144 .pr_ctloutput = rip_ctloutput,
145 .pr_usrreqs = &rip_usrreqs
146};
147#endif
148
149SYSCTL_DECL(_net_link);
150static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
151 "Generic Routing Encapsulation");
152#ifndef MAX_GRE_NEST
153/*
154 * This macro controls the default upper limitation on nesting of gre tunnels.
155 * Since, setting a large value to this macro with a careless configuration
156 * may introduce system crash, we don't allow any nestings by default.
157 * If you need to configure nested gre tunnels, you can define this macro
158 * in your kernel configuration file. However, if you do so, please be
159 * careful to configure the tunnels so that it won't make a loop.
160 */
161#define MAX_GRE_NEST 1
162#endif
163static int max_gre_nesting = MAX_GRE_NEST;
164SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
165 &max_gre_nesting, 0, "Max nested tunnels");
166
167/* ARGSUSED */
168static void
169greattach(void)
170{
171
172 mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
173 LIST_INIT(&gre_softc_list);
174 gre_cloner = if_clone_simple(grename, gre_clone_create,
175 gre_clone_destroy, 0);
176}
177
178static int
179gre_clone_create(ifc, unit, params)
180 struct if_clone *ifc;
181 int unit;
182 caddr_t params;
183{
184 struct gre_softc *sc;
185
186 sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
187
188 GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
189 if (GRE2IFP(sc) == NULL) {
190 free(sc, M_GRE);
191 return (ENOSPC);
192 }
193
194 GRE2IFP(sc)->if_softc = sc;
195 if_initname(GRE2IFP(sc), grename, unit);
196
197 GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
198 GRE2IFP(sc)->if_addrlen = 0;
199 GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
200 GRE2IFP(sc)->if_mtu = GREMTU;
201 GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
202 GRE2IFP(sc)->if_output = gre_output;
203 GRE2IFP(sc)->if_ioctl = gre_ioctl;
204 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
205 sc->g_proto = IPPROTO_GRE;
206 GRE2IFP(sc)->if_flags |= IFF_LINK0;
207 sc->encap = NULL;
208 sc->gre_fibnum = curthread->td_proc->p_fibnum;
209 sc->wccp_ver = WCCP_V1;
210 sc->key = 0;
211 if_attach(GRE2IFP(sc));
212 bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
213 mtx_lock(&gre_mtx);
214 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
215 mtx_unlock(&gre_mtx);
216 return (0);
217}
218
219static void
220gre_clone_destroy(ifp)
221 struct ifnet *ifp;
222{
223 struct gre_softc *sc = ifp->if_softc;
224
225 mtx_lock(&gre_mtx);
226 LIST_REMOVE(sc, sc_list);
227 mtx_unlock(&gre_mtx);
228
229#ifdef INET
230 if (sc->encap != NULL)
231 encap_detach(sc->encap);
232#endif
233 bpfdetach(ifp);
234 if_detach(ifp);
235 if_free(ifp);
236 free(sc, M_GRE);
237}
238
239/*
240 * The output routine. Takes a packet and encapsulates it in the protocol
241 * given by sc->g_proto. See also RFC 1701 and RFC 2004
242 */
243static int
118
119static int gre_compute_route(struct gre_softc *sc);
120
121static void greattach(void);
122
123#ifdef INET
124extern struct domain inetdomain;
125static const struct protosw in_gre_protosw = {
126 .pr_type = SOCK_RAW,
127 .pr_domain = &inetdomain,
128 .pr_protocol = IPPROTO_GRE,
129 .pr_flags = PR_ATOMIC|PR_ADDR,
130 .pr_input = gre_input,
131 .pr_output = (pr_output_t *)rip_output,
132 .pr_ctlinput = rip_ctlinput,
133 .pr_ctloutput = rip_ctloutput,
134 .pr_usrreqs = &rip_usrreqs
135};
136static const struct protosw in_mobile_protosw = {
137 .pr_type = SOCK_RAW,
138 .pr_domain = &inetdomain,
139 .pr_protocol = IPPROTO_MOBILE,
140 .pr_flags = PR_ATOMIC|PR_ADDR,
141 .pr_input = gre_mobile_input,
142 .pr_output = (pr_output_t *)rip_output,
143 .pr_ctlinput = rip_ctlinput,
144 .pr_ctloutput = rip_ctloutput,
145 .pr_usrreqs = &rip_usrreqs
146};
147#endif
148
149SYSCTL_DECL(_net_link);
150static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
151 "Generic Routing Encapsulation");
152#ifndef MAX_GRE_NEST
153/*
154 * This macro controls the default upper limitation on nesting of gre tunnels.
155 * Since, setting a large value to this macro with a careless configuration
156 * may introduce system crash, we don't allow any nestings by default.
157 * If you need to configure nested gre tunnels, you can define this macro
158 * in your kernel configuration file. However, if you do so, please be
159 * careful to configure the tunnels so that it won't make a loop.
160 */
161#define MAX_GRE_NEST 1
162#endif
163static int max_gre_nesting = MAX_GRE_NEST;
164SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
165 &max_gre_nesting, 0, "Max nested tunnels");
166
167/* ARGSUSED */
168static void
169greattach(void)
170{
171
172 mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
173 LIST_INIT(&gre_softc_list);
174 gre_cloner = if_clone_simple(grename, gre_clone_create,
175 gre_clone_destroy, 0);
176}
177
178static int
179gre_clone_create(ifc, unit, params)
180 struct if_clone *ifc;
181 int unit;
182 caddr_t params;
183{
184 struct gre_softc *sc;
185
186 sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
187
188 GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
189 if (GRE2IFP(sc) == NULL) {
190 free(sc, M_GRE);
191 return (ENOSPC);
192 }
193
194 GRE2IFP(sc)->if_softc = sc;
195 if_initname(GRE2IFP(sc), grename, unit);
196
197 GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
198 GRE2IFP(sc)->if_addrlen = 0;
199 GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
200 GRE2IFP(sc)->if_mtu = GREMTU;
201 GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
202 GRE2IFP(sc)->if_output = gre_output;
203 GRE2IFP(sc)->if_ioctl = gre_ioctl;
204 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
205 sc->g_proto = IPPROTO_GRE;
206 GRE2IFP(sc)->if_flags |= IFF_LINK0;
207 sc->encap = NULL;
208 sc->gre_fibnum = curthread->td_proc->p_fibnum;
209 sc->wccp_ver = WCCP_V1;
210 sc->key = 0;
211 if_attach(GRE2IFP(sc));
212 bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
213 mtx_lock(&gre_mtx);
214 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
215 mtx_unlock(&gre_mtx);
216 return (0);
217}
218
219static void
220gre_clone_destroy(ifp)
221 struct ifnet *ifp;
222{
223 struct gre_softc *sc = ifp->if_softc;
224
225 mtx_lock(&gre_mtx);
226 LIST_REMOVE(sc, sc_list);
227 mtx_unlock(&gre_mtx);
228
229#ifdef INET
230 if (sc->encap != NULL)
231 encap_detach(sc->encap);
232#endif
233 bpfdetach(ifp);
234 if_detach(ifp);
235 if_free(ifp);
236 free(sc, M_GRE);
237}
238
239/*
240 * The output routine. Takes a packet and encapsulates it in the protocol
241 * given by sc->g_proto. See also RFC 1701 and RFC 2004
242 */
243static int
244gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
244gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
245 struct route *ro)
246{
247 int error = 0;
248 struct gre_softc *sc = ifp->if_softc;
249 struct greip *gh;
250 struct ip *ip;
251 struct m_tag *mtag;
252 struct mtag_gre_nesting *gt;
253 size_t len;
254 u_short gre_ip_id = 0;
255 uint8_t gre_ip_tos = 0;
256 u_int16_t etype = 0;
257 struct mobile_h mob_h;
258 u_int32_t af;
259 int extra = 0, max;
260
261 /*
262 * gre may cause infinite recursion calls when misconfigured. High
263 * nesting level may cause stack exhaustion. We'll prevent this by
264 * detecting loops and by introducing upper limit.
265 */
266 mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
267 if (mtag != NULL) {
268 struct ifnet **ifp2;
269
270 gt = (struct mtag_gre_nesting *)(mtag + 1);
271 gt->count++;
272 if (gt->count > min(gt->max,max_gre_nesting)) {
273 printf("%s: hit maximum recursion limit %u on %s\n",
274 __func__, gt->count - 1, ifp->if_xname);
275 m_freem(m);
276 error = EIO; /* is there better errno? */
277 goto end;
278 }
279
280 ifp2 = gt->ifp;
281 for (max = gt->count - 1; max > 0; max--) {
282 if (*ifp2 == ifp)
283 break;
284 ifp2++;
285 }
286 if (*ifp2 == ifp) {
287 printf("%s: detected loop with nexting %u on %s\n",
288 __func__, gt->count-1, ifp->if_xname);
289 m_freem(m);
290 error = EIO; /* is there better errno? */
291 goto end;
292 }
293 *ifp2 = ifp;
294
295 } else {
296 /*
297 * Given that people should NOT increase max_gre_nesting beyond
298 * their real needs, we allocate once per packet rather than
299 * allocating an mtag once per passing through gre.
300 *
301 * Note: the sysctl does not actually check for saneness, so we
302 * limit the maximum numbers of possible recursions here.
303 */
304 max = imin(max_gre_nesting, 256);
305 /* If someone sets the sysctl <= 0, we want at least 1. */
306 max = imax(max, 1);
307 len = sizeof(struct mtag_gre_nesting) +
308 max * sizeof(struct ifnet *);
309 mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
310 M_NOWAIT);
311 if (mtag == NULL) {
312 m_freem(m);
313 error = ENOMEM;
314 goto end;
315 }
316 gt = (struct mtag_gre_nesting *)(mtag + 1);
317 bzero(gt, len);
318 gt->count = 1;
319 gt->max = max;
320 *gt->ifp = ifp;
321 m_tag_prepend(m, mtag);
322 }
323
324 if (!((ifp->if_flags & IFF_UP) &&
325 (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
326 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
327 m_freem(m);
328 error = ENETDOWN;
329 goto end;
330 }
331
332 gh = NULL;
333 ip = NULL;
334
335 /* BPF writes need to be handled specially. */
245 struct route *ro)
246{
247 int error = 0;
248 struct gre_softc *sc = ifp->if_softc;
249 struct greip *gh;
250 struct ip *ip;
251 struct m_tag *mtag;
252 struct mtag_gre_nesting *gt;
253 size_t len;
254 u_short gre_ip_id = 0;
255 uint8_t gre_ip_tos = 0;
256 u_int16_t etype = 0;
257 struct mobile_h mob_h;
258 u_int32_t af;
259 int extra = 0, max;
260
261 /*
262 * gre may cause infinite recursion calls when misconfigured. High
263 * nesting level may cause stack exhaustion. We'll prevent this by
264 * detecting loops and by introducing upper limit.
265 */
266 mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
267 if (mtag != NULL) {
268 struct ifnet **ifp2;
269
270 gt = (struct mtag_gre_nesting *)(mtag + 1);
271 gt->count++;
272 if (gt->count > min(gt->max,max_gre_nesting)) {
273 printf("%s: hit maximum recursion limit %u on %s\n",
274 __func__, gt->count - 1, ifp->if_xname);
275 m_freem(m);
276 error = EIO; /* is there better errno? */
277 goto end;
278 }
279
280 ifp2 = gt->ifp;
281 for (max = gt->count - 1; max > 0; max--) {
282 if (*ifp2 == ifp)
283 break;
284 ifp2++;
285 }
286 if (*ifp2 == ifp) {
287 printf("%s: detected loop with nexting %u on %s\n",
288 __func__, gt->count-1, ifp->if_xname);
289 m_freem(m);
290 error = EIO; /* is there better errno? */
291 goto end;
292 }
293 *ifp2 = ifp;
294
295 } else {
296 /*
297 * Given that people should NOT increase max_gre_nesting beyond
298 * their real needs, we allocate once per packet rather than
299 * allocating an mtag once per passing through gre.
300 *
301 * Note: the sysctl does not actually check for saneness, so we
302 * limit the maximum numbers of possible recursions here.
303 */
304 max = imin(max_gre_nesting, 256);
305 /* If someone sets the sysctl <= 0, we want at least 1. */
306 max = imax(max, 1);
307 len = sizeof(struct mtag_gre_nesting) +
308 max * sizeof(struct ifnet *);
309 mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
310 M_NOWAIT);
311 if (mtag == NULL) {
312 m_freem(m);
313 error = ENOMEM;
314 goto end;
315 }
316 gt = (struct mtag_gre_nesting *)(mtag + 1);
317 bzero(gt, len);
318 gt->count = 1;
319 gt->max = max;
320 *gt->ifp = ifp;
321 m_tag_prepend(m, mtag);
322 }
323
324 if (!((ifp->if_flags & IFF_UP) &&
325 (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
326 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
327 m_freem(m);
328 error = ENETDOWN;
329 goto end;
330 }
331
332 gh = NULL;
333 ip = NULL;
334
335 /* BPF writes need to be handled specially. */
336 if (dst->sa_family == AF_UNSPEC) {
336 if (dst->sa_family == AF_UNSPEC)
337 bcopy(dst->sa_data, &af, sizeof(af));
337 bcopy(dst->sa_data, &af, sizeof(af));
338 dst->sa_family = af;
339 }
340
341 if (bpf_peers_present(ifp->if_bpf)) {
338 else
342 af = dst->sa_family;
339 af = dst->sa_family;
340
341 if (bpf_peers_present(ifp->if_bpf))
343 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
342 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
344 }
345
346 m->m_flags &= ~(M_BCAST|M_MCAST);
347
348 if (sc->g_proto == IPPROTO_MOBILE) {
343
344 m->m_flags &= ~(M_BCAST|M_MCAST);
345
346 if (sc->g_proto == IPPROTO_MOBILE) {
349 if (dst->sa_family == AF_INET) {
347 if (af == AF_INET) {
350 struct mbuf *m0;
351 int msiz;
352
353 ip = mtod(m, struct ip *);
354
355 /*
356 * RFC2004 specifies that fragmented diagrams shouldn't
357 * be encapsulated.
358 */
359 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
360 _IF_DROP(&ifp->if_snd);
361 m_freem(m);
362 error = EINVAL; /* is there better errno? */
363 goto end;
364 }
365 memset(&mob_h, 0, MOB_H_SIZ_L);
366 mob_h.proto = (ip->ip_p) << 8;
367 mob_h.odst = ip->ip_dst.s_addr;
368 ip->ip_dst.s_addr = sc->g_dst.s_addr;
369
370 /*
371 * If the packet comes from our host, we only change
372 * the destination address in the IP header.
373 * Else we also need to save and change the source
374 */
375 if (in_hosteq(ip->ip_src, sc->g_src)) {
376 msiz = MOB_H_SIZ_S;
377 } else {
378 mob_h.proto |= MOB_H_SBIT;
379 mob_h.osrc = ip->ip_src.s_addr;
380 ip->ip_src.s_addr = sc->g_src.s_addr;
381 msiz = MOB_H_SIZ_L;
382 }
383 mob_h.proto = htons(mob_h.proto);
384 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
385
386 if ((m->m_data - msiz) < m->m_pktdat) {
387 m0 = m_gethdr(M_NOWAIT, MT_DATA);
388 if (m0 == NULL) {
389 _IF_DROP(&ifp->if_snd);
390 m_freem(m);
391 error = ENOBUFS;
392 goto end;
393 }
394 m0->m_next = m;
395 m->m_data += sizeof(struct ip);
396 m->m_len -= sizeof(struct ip);
397 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
398 m0->m_len = msiz + sizeof(struct ip);
399 m0->m_data += max_linkhdr;
400 memcpy(mtod(m0, caddr_t), (caddr_t)ip,
401 sizeof(struct ip));
402 m = m0;
403 } else { /* we have some space left in the old one */
404 m->m_data -= msiz;
405 m->m_len += msiz;
406 m->m_pkthdr.len += msiz;
407 bcopy(ip, mtod(m, caddr_t),
408 sizeof(struct ip));
409 }
410 ip = mtod(m, struct ip *);
411 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
412 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
413 } else { /* AF_INET */
414 _IF_DROP(&ifp->if_snd);
415 m_freem(m);
416 error = EINVAL;
417 goto end;
418 }
419 } else if (sc->g_proto == IPPROTO_GRE) {
348 struct mbuf *m0;
349 int msiz;
350
351 ip = mtod(m, struct ip *);
352
353 /*
354 * RFC2004 specifies that fragmented diagrams shouldn't
355 * be encapsulated.
356 */
357 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
358 _IF_DROP(&ifp->if_snd);
359 m_freem(m);
360 error = EINVAL; /* is there better errno? */
361 goto end;
362 }
363 memset(&mob_h, 0, MOB_H_SIZ_L);
364 mob_h.proto = (ip->ip_p) << 8;
365 mob_h.odst = ip->ip_dst.s_addr;
366 ip->ip_dst.s_addr = sc->g_dst.s_addr;
367
368 /*
369 * If the packet comes from our host, we only change
370 * the destination address in the IP header.
371 * Else we also need to save and change the source
372 */
373 if (in_hosteq(ip->ip_src, sc->g_src)) {
374 msiz = MOB_H_SIZ_S;
375 } else {
376 mob_h.proto |= MOB_H_SBIT;
377 mob_h.osrc = ip->ip_src.s_addr;
378 ip->ip_src.s_addr = sc->g_src.s_addr;
379 msiz = MOB_H_SIZ_L;
380 }
381 mob_h.proto = htons(mob_h.proto);
382 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
383
384 if ((m->m_data - msiz) < m->m_pktdat) {
385 m0 = m_gethdr(M_NOWAIT, MT_DATA);
386 if (m0 == NULL) {
387 _IF_DROP(&ifp->if_snd);
388 m_freem(m);
389 error = ENOBUFS;
390 goto end;
391 }
392 m0->m_next = m;
393 m->m_data += sizeof(struct ip);
394 m->m_len -= sizeof(struct ip);
395 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
396 m0->m_len = msiz + sizeof(struct ip);
397 m0->m_data += max_linkhdr;
398 memcpy(mtod(m0, caddr_t), (caddr_t)ip,
399 sizeof(struct ip));
400 m = m0;
401 } else { /* we have some space left in the old one */
402 m->m_data -= msiz;
403 m->m_len += msiz;
404 m->m_pkthdr.len += msiz;
405 bcopy(ip, mtod(m, caddr_t),
406 sizeof(struct ip));
407 }
408 ip = mtod(m, struct ip *);
409 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
410 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
411 } else { /* AF_INET */
412 _IF_DROP(&ifp->if_snd);
413 m_freem(m);
414 error = EINVAL;
415 goto end;
416 }
417 } else if (sc->g_proto == IPPROTO_GRE) {
420 switch (dst->sa_family) {
418 switch (af) {
421 case AF_INET:
422 ip = mtod(m, struct ip *);
423 gre_ip_tos = ip->ip_tos;
424 gre_ip_id = ip->ip_id;
425 if (sc->wccp_ver == WCCP_V2) {
426 extra = sizeof(uint32_t);
427 etype = WCCP_PROTOCOL_TYPE;
428 } else {
429 etype = ETHERTYPE_IP;
430 }
431 break;
432#ifdef INET6
433 case AF_INET6:
434 gre_ip_id = ip_newid();
435 etype = ETHERTYPE_IPV6;
436 break;
437#endif
438#ifdef NETATALK
439 case AF_APPLETALK:
440 etype = ETHERTYPE_ATALK;
441 break;
442#endif
443 default:
444 _IF_DROP(&ifp->if_snd);
445 m_freem(m);
446 error = EAFNOSUPPORT;
447 goto end;
448 }
449
450 /* Reserve space for GRE header + optional GRE key */
451 int hdrlen = sizeof(struct greip) + extra;
452 if (sc->key)
453 hdrlen += sizeof(uint32_t);
454 M_PREPEND(m, hdrlen, M_NOWAIT);
455 } else {
456 _IF_DROP(&ifp->if_snd);
457 m_freem(m);
458 error = EINVAL;
459 goto end;
460 }
461
462 if (m == NULL) { /* mbuf allocation failed */
463 _IF_DROP(&ifp->if_snd);
464 error = ENOBUFS;
465 goto end;
466 }
467
468 M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
469
470 gh = mtod(m, struct greip *);
471 if (sc->g_proto == IPPROTO_GRE) {
472 uint32_t *options = gh->gi_options;
473
474 memset((void *)gh, 0, sizeof(struct greip) + extra);
475 gh->gi_ptype = htons(etype);
476 gh->gi_flags = 0;
477
478 /* Add key option */
479 if (sc->key)
480 {
481 gh->gi_flags |= htons(GRE_KP);
482 *(options++) = htonl(sc->key);
483 }
484 }
485
486 gh->gi_pr = sc->g_proto;
487 if (sc->g_proto != IPPROTO_MOBILE) {
488 gh->gi_src = sc->g_src;
489 gh->gi_dst = sc->g_dst;
490 ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
491 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
492 ((struct ip*)gh)->ip_ttl = GRE_TTL;
493 ((struct ip*)gh)->ip_tos = gre_ip_tos;
494 ((struct ip*)gh)->ip_id = gre_ip_id;
495 gh->gi_len = htons(m->m_pkthdr.len);
496 }
497
498 ifp->if_opackets++;
499 ifp->if_obytes += m->m_pkthdr.len;
500 /*
501 * Send it off and with IP_FORWARD flag to prevent it from
502 * overwriting the ip_id again. ip_id is already set to the
503 * ip_id of the encapsulated packet.
504 */
505 error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
506 (struct ip_moptions *)NULL, (struct inpcb *)NULL);
507 end:
508 if (error)
509 ifp->if_oerrors++;
510 return (error);
511}
512
513static int
514gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
515{
516 struct ifreq *ifr = (struct ifreq *)data;
517 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
518 struct in_aliasreq *aifr = (struct in_aliasreq *)data;
519 struct gre_softc *sc = ifp->if_softc;
520 struct sockaddr_in si;
521 struct sockaddr *sa = NULL;
522 int error, adj;
523 struct sockaddr_in sp, sm, dp, dm;
524 uint32_t key;
525
526 error = 0;
527 adj = 0;
528
529 switch (cmd) {
530 case SIOCSIFADDR:
531 ifp->if_flags |= IFF_UP;
532 break;
533 case SIOCSIFDSTADDR:
534 break;
535 case SIOCSIFFLAGS:
536 /*
537 * XXXRW: Isn't this priv_check() redundant to the ifnet
538 * layer check?
539 */
540 if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
541 break;
542 if ((ifr->ifr_flags & IFF_LINK0) != 0)
543 sc->g_proto = IPPROTO_GRE;
544 else
545 sc->g_proto = IPPROTO_MOBILE;
546 if ((ifr->ifr_flags & IFF_LINK2) != 0)
547 sc->wccp_ver = WCCP_V2;
548 else
549 sc->wccp_ver = WCCP_V1;
550 goto recompute;
551 case SIOCSIFMTU:
552 /*
553 * XXXRW: Isn't this priv_check() redundant to the ifnet
554 * layer check?
555 */
556 if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
557 break;
558 if (ifr->ifr_mtu < 576) {
559 error = EINVAL;
560 break;
561 }
562 ifp->if_mtu = ifr->ifr_mtu;
563 break;
564 case SIOCGIFMTU:
565 ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
566 break;
567 case SIOCADDMULTI:
568 /*
569 * XXXRW: Isn't this priv_checkr() redundant to the ifnet
570 * layer check?
571 */
572 if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
573 break;
574 if (ifr == 0) {
575 error = EAFNOSUPPORT;
576 break;
577 }
578 switch (ifr->ifr_addr.sa_family) {
579#ifdef INET
580 case AF_INET:
581 break;
582#endif
583#ifdef INET6
584 case AF_INET6:
585 break;
586#endif
587 default:
588 error = EAFNOSUPPORT;
589 break;
590 }
591 break;
592 case SIOCDELMULTI:
593 /*
594 * XXXRW: Isn't this priv_check() redundant to the ifnet
595 * layer check?
596 */
597 if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
598 break;
599 if (ifr == 0) {
600 error = EAFNOSUPPORT;
601 break;
602 }
603 switch (ifr->ifr_addr.sa_family) {
604#ifdef INET
605 case AF_INET:
606 break;
607#endif
608#ifdef INET6
609 case AF_INET6:
610 break;
611#endif
612 default:
613 error = EAFNOSUPPORT;
614 break;
615 }
616 break;
617 case GRESPROTO:
618 /*
619 * XXXRW: Isn't this priv_check() redundant to the ifnet
620 * layer check?
621 */
622 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
623 break;
624 sc->g_proto = ifr->ifr_flags;
625 switch (sc->g_proto) {
626 case IPPROTO_GRE:
627 ifp->if_flags |= IFF_LINK0;
628 break;
629 case IPPROTO_MOBILE:
630 ifp->if_flags &= ~IFF_LINK0;
631 break;
632 default:
633 error = EPROTONOSUPPORT;
634 break;
635 }
636 goto recompute;
637 case GREGPROTO:
638 ifr->ifr_flags = sc->g_proto;
639 break;
640 case GRESADDRS:
641 case GRESADDRD:
642 error = priv_check(curthread, PRIV_NET_GRE);
643 if (error)
644 return (error);
645 /*
646 * set tunnel endpoints, compute a less specific route
647 * to the remote end and mark if as up
648 */
649 sa = &ifr->ifr_addr;
650 if (cmd == GRESADDRS)
651 sc->g_src = (satosin(sa))->sin_addr;
652 if (cmd == GRESADDRD)
653 sc->g_dst = (satosin(sa))->sin_addr;
654 recompute:
655#ifdef INET
656 if (sc->encap != NULL) {
657 encap_detach(sc->encap);
658 sc->encap = NULL;
659 }
660#endif
661 if ((sc->g_src.s_addr != INADDR_ANY) &&
662 (sc->g_dst.s_addr != INADDR_ANY)) {
663 bzero(&sp, sizeof(sp));
664 bzero(&sm, sizeof(sm));
665 bzero(&dp, sizeof(dp));
666 bzero(&dm, sizeof(dm));
667 sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
668 sizeof(struct sockaddr_in);
669 sp.sin_family = sm.sin_family = dp.sin_family =
670 dm.sin_family = AF_INET;
671 sp.sin_addr = sc->g_src;
672 dp.sin_addr = sc->g_dst;
673 sm.sin_addr.s_addr = dm.sin_addr.s_addr =
674 INADDR_BROADCAST;
675#ifdef INET
676 sc->encap = encap_attach(AF_INET, sc->g_proto,
677 sintosa(&sp), sintosa(&sm), sintosa(&dp),
678 sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
679 &in_gre_protosw : &in_mobile_protosw, sc);
680 if (sc->encap == NULL)
681 printf("%s: unable to attach encap\n",
682 if_name(GRE2IFP(sc)));
683#endif
684 if (sc->route.ro_rt != 0) /* free old route */
685 RTFREE(sc->route.ro_rt);
686 if (gre_compute_route(sc) == 0)
687 ifp->if_drv_flags |= IFF_DRV_RUNNING;
688 else
689 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
690 }
691 break;
692 case GREGADDRS:
693 memset(&si, 0, sizeof(si));
694 si.sin_family = AF_INET;
695 si.sin_len = sizeof(struct sockaddr_in);
696 si.sin_addr.s_addr = sc->g_src.s_addr;
697 sa = sintosa(&si);
698 error = prison_if(curthread->td_ucred, sa);
699 if (error != 0)
700 break;
701 ifr->ifr_addr = *sa;
702 break;
703 case GREGADDRD:
704 memset(&si, 0, sizeof(si));
705 si.sin_family = AF_INET;
706 si.sin_len = sizeof(struct sockaddr_in);
707 si.sin_addr.s_addr = sc->g_dst.s_addr;
708 sa = sintosa(&si);
709 error = prison_if(curthread->td_ucred, sa);
710 if (error != 0)
711 break;
712 ifr->ifr_addr = *sa;
713 break;
714 case SIOCSIFPHYADDR:
715 /*
716 * XXXRW: Isn't this priv_check() redundant to the ifnet
717 * layer check?
718 */
719 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
720 break;
721 if (aifr->ifra_addr.sin_family != AF_INET ||
722 aifr->ifra_dstaddr.sin_family != AF_INET) {
723 error = EAFNOSUPPORT;
724 break;
725 }
726 if (aifr->ifra_addr.sin_len != sizeof(si) ||
727 aifr->ifra_dstaddr.sin_len != sizeof(si)) {
728 error = EINVAL;
729 break;
730 }
731 sc->g_src = aifr->ifra_addr.sin_addr;
732 sc->g_dst = aifr->ifra_dstaddr.sin_addr;
733 goto recompute;
734 case SIOCSLIFPHYADDR:
735 /*
736 * XXXRW: Isn't this priv_check() redundant to the ifnet
737 * layer check?
738 */
739 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
740 break;
741 if (lifr->addr.ss_family != AF_INET ||
742 lifr->dstaddr.ss_family != AF_INET) {
743 error = EAFNOSUPPORT;
744 break;
745 }
746 if (lifr->addr.ss_len != sizeof(si) ||
747 lifr->dstaddr.ss_len != sizeof(si)) {
748 error = EINVAL;
749 break;
750 }
751 sc->g_src = (satosin(&lifr->addr))->sin_addr;
752 sc->g_dst =
753 (satosin(&lifr->dstaddr))->sin_addr;
754 goto recompute;
755 case SIOCDIFPHYADDR:
756 /*
757 * XXXRW: Isn't this priv_check() redundant to the ifnet
758 * layer check?
759 */
760 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
761 break;
762 sc->g_src.s_addr = INADDR_ANY;
763 sc->g_dst.s_addr = INADDR_ANY;
764 goto recompute;
765 case SIOCGLIFPHYADDR:
766 if (sc->g_src.s_addr == INADDR_ANY ||
767 sc->g_dst.s_addr == INADDR_ANY) {
768 error = EADDRNOTAVAIL;
769 break;
770 }
771 memset(&si, 0, sizeof(si));
772 si.sin_family = AF_INET;
773 si.sin_len = sizeof(struct sockaddr_in);
774 si.sin_addr.s_addr = sc->g_src.s_addr;
775 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
776 if (error != 0)
777 break;
778 memcpy(&lifr->addr, &si, sizeof(si));
779 si.sin_addr.s_addr = sc->g_dst.s_addr;
780 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
781 if (error != 0)
782 break;
783 memcpy(&lifr->dstaddr, &si, sizeof(si));
784 break;
785 case SIOCGIFPSRCADDR:
786#ifdef INET6
787 case SIOCGIFPSRCADDR_IN6:
788#endif
789 if (sc->g_src.s_addr == INADDR_ANY) {
790 error = EADDRNOTAVAIL;
791 break;
792 }
793 memset(&si, 0, sizeof(si));
794 si.sin_family = AF_INET;
795 si.sin_len = sizeof(struct sockaddr_in);
796 si.sin_addr.s_addr = sc->g_src.s_addr;
797 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
798 if (error != 0)
799 break;
800 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
801 break;
802 case SIOCGIFPDSTADDR:
803#ifdef INET6
804 case SIOCGIFPDSTADDR_IN6:
805#endif
806 if (sc->g_dst.s_addr == INADDR_ANY) {
807 error = EADDRNOTAVAIL;
808 break;
809 }
810 memset(&si, 0, sizeof(si));
811 si.sin_family = AF_INET;
812 si.sin_len = sizeof(struct sockaddr_in);
813 si.sin_addr.s_addr = sc->g_dst.s_addr;
814 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
815 if (error != 0)
816 break;
817 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
818 break;
819 case GRESKEY:
820 error = priv_check(curthread, PRIV_NET_GRE);
821 if (error)
822 break;
823 error = copyin(ifr->ifr_data, &key, sizeof(key));
824 if (error)
825 break;
826 /* adjust MTU for option header */
827 if (key == 0 && sc->key != 0) /* clear */
828 adj += sizeof(key);
829 else if (key != 0 && sc->key == 0) /* set */
830 adj -= sizeof(key);
831
832 if (ifp->if_mtu + adj < 576) {
833 error = EINVAL;
834 break;
835 }
836 ifp->if_mtu += adj;
837 sc->key = key;
838 break;
839 case GREGKEY:
840 error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
841 break;
842
843 default:
844 error = EINVAL;
845 break;
846 }
847
848 return (error);
849}
850
851/*
852 * computes a route to our destination that is not the one
853 * which would be taken by ip_output(), as this one will loop back to
854 * us. If the interface is p2p as a--->b, then a routing entry exists
855 * If we now send a packet to b (e.g. ping b), this will come down here
856 * gets src=a, dst=b tacked on and would from ip_output() sent back to
857 * if_gre.
858 * Goal here is to compute a route to b that is less specific than
859 * a-->b. We know that this one exists as in normal operation we have
860 * at least a default route which matches.
861 */
862static int
863gre_compute_route(struct gre_softc *sc)
864{
865 struct route *ro;
866
867 ro = &sc->route;
868
869 memset(ro, 0, sizeof(struct route));
870 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
871 ro->ro_dst.sa_family = AF_INET;
872 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
873
874 /*
875 * toggle last bit, so our interface is not found, but a less
876 * specific route. I'd rather like to specify a shorter mask,
877 * but this is not possible. Should work though. XXX
878 * XXX MRT Use a different FIB for the tunnel to solve this problem.
879 */
880 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
881 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
882 htonl(0x01);
883 }
884
885#ifdef DIAGNOSTIC
886 printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
887 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
888#endif
889
890 rtalloc_fib(ro, sc->gre_fibnum);
891
892 /*
893 * check if this returned a route at all and this route is no
894 * recursion to ourself
895 */
896 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
897#ifdef DIAGNOSTIC
898 if (ro->ro_rt == NULL)
899 printf(" - no route found!\n");
900 else
901 printf(" - route loops back to ourself!\n");
902#endif
903 return EADDRNOTAVAIL;
904 }
905
906 /*
907 * now change it back - else ip_output will just drop
908 * the route and search one to this interface ...
909 */
910 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
911 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
912
913#ifdef DIAGNOSTIC
914 printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
915 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
916 printf("\n");
917#endif
918
919 return 0;
920}
921
922/*
923 * do a checksum of a buffer - much like in_cksum, which operates on
924 * mbufs.
925 */
926u_int16_t
927gre_in_cksum(u_int16_t *p, u_int len)
928{
929 u_int32_t sum = 0;
930 int nwords = len >> 1;
931
932 while (nwords-- != 0)
933 sum += *p++;
934
935 if (len & 1) {
936 union {
937 u_short w;
938 u_char c[2];
939 } u;
940 u.c[0] = *(u_char *)p;
941 u.c[1] = 0;
942 sum += u.w;
943 }
944
945 /* end-around-carry */
946 sum = (sum >> 16) + (sum & 0xffff);
947 sum += (sum >> 16);
948 return (~sum);
949}
950
951static int
952gremodevent(module_t mod, int type, void *data)
953{
954
955 switch (type) {
956 case MOD_LOAD:
957 greattach();
958 break;
959 case MOD_UNLOAD:
960 if_clone_detach(gre_cloner);
961 mtx_destroy(&gre_mtx);
962 break;
963 default:
964 return EOPNOTSUPP;
965 }
966 return 0;
967}
968
969static moduledata_t gre_mod = {
970 "if_gre",
971 gremodevent,
972 0
973};
974
975DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
976MODULE_VERSION(if_gre, 1);
419 case AF_INET:
420 ip = mtod(m, struct ip *);
421 gre_ip_tos = ip->ip_tos;
422 gre_ip_id = ip->ip_id;
423 if (sc->wccp_ver == WCCP_V2) {
424 extra = sizeof(uint32_t);
425 etype = WCCP_PROTOCOL_TYPE;
426 } else {
427 etype = ETHERTYPE_IP;
428 }
429 break;
430#ifdef INET6
431 case AF_INET6:
432 gre_ip_id = ip_newid();
433 etype = ETHERTYPE_IPV6;
434 break;
435#endif
436#ifdef NETATALK
437 case AF_APPLETALK:
438 etype = ETHERTYPE_ATALK;
439 break;
440#endif
441 default:
442 _IF_DROP(&ifp->if_snd);
443 m_freem(m);
444 error = EAFNOSUPPORT;
445 goto end;
446 }
447
448 /* Reserve space for GRE header + optional GRE key */
449 int hdrlen = sizeof(struct greip) + extra;
450 if (sc->key)
451 hdrlen += sizeof(uint32_t);
452 M_PREPEND(m, hdrlen, M_NOWAIT);
453 } else {
454 _IF_DROP(&ifp->if_snd);
455 m_freem(m);
456 error = EINVAL;
457 goto end;
458 }
459
460 if (m == NULL) { /* mbuf allocation failed */
461 _IF_DROP(&ifp->if_snd);
462 error = ENOBUFS;
463 goto end;
464 }
465
466 M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
467
468 gh = mtod(m, struct greip *);
469 if (sc->g_proto == IPPROTO_GRE) {
470 uint32_t *options = gh->gi_options;
471
472 memset((void *)gh, 0, sizeof(struct greip) + extra);
473 gh->gi_ptype = htons(etype);
474 gh->gi_flags = 0;
475
476 /* Add key option */
477 if (sc->key)
478 {
479 gh->gi_flags |= htons(GRE_KP);
480 *(options++) = htonl(sc->key);
481 }
482 }
483
484 gh->gi_pr = sc->g_proto;
485 if (sc->g_proto != IPPROTO_MOBILE) {
486 gh->gi_src = sc->g_src;
487 gh->gi_dst = sc->g_dst;
488 ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
489 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
490 ((struct ip*)gh)->ip_ttl = GRE_TTL;
491 ((struct ip*)gh)->ip_tos = gre_ip_tos;
492 ((struct ip*)gh)->ip_id = gre_ip_id;
493 gh->gi_len = htons(m->m_pkthdr.len);
494 }
495
496 ifp->if_opackets++;
497 ifp->if_obytes += m->m_pkthdr.len;
498 /*
499 * Send it off and with IP_FORWARD flag to prevent it from
500 * overwriting the ip_id again. ip_id is already set to the
501 * ip_id of the encapsulated packet.
502 */
503 error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
504 (struct ip_moptions *)NULL, (struct inpcb *)NULL);
505 end:
506 if (error)
507 ifp->if_oerrors++;
508 return (error);
509}
510
511static int
512gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
513{
514 struct ifreq *ifr = (struct ifreq *)data;
515 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
516 struct in_aliasreq *aifr = (struct in_aliasreq *)data;
517 struct gre_softc *sc = ifp->if_softc;
518 struct sockaddr_in si;
519 struct sockaddr *sa = NULL;
520 int error, adj;
521 struct sockaddr_in sp, sm, dp, dm;
522 uint32_t key;
523
524 error = 0;
525 adj = 0;
526
527 switch (cmd) {
528 case SIOCSIFADDR:
529 ifp->if_flags |= IFF_UP;
530 break;
531 case SIOCSIFDSTADDR:
532 break;
533 case SIOCSIFFLAGS:
534 /*
535 * XXXRW: Isn't this priv_check() redundant to the ifnet
536 * layer check?
537 */
538 if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
539 break;
540 if ((ifr->ifr_flags & IFF_LINK0) != 0)
541 sc->g_proto = IPPROTO_GRE;
542 else
543 sc->g_proto = IPPROTO_MOBILE;
544 if ((ifr->ifr_flags & IFF_LINK2) != 0)
545 sc->wccp_ver = WCCP_V2;
546 else
547 sc->wccp_ver = WCCP_V1;
548 goto recompute;
549 case SIOCSIFMTU:
550 /*
551 * XXXRW: Isn't this priv_check() redundant to the ifnet
552 * layer check?
553 */
554 if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
555 break;
556 if (ifr->ifr_mtu < 576) {
557 error = EINVAL;
558 break;
559 }
560 ifp->if_mtu = ifr->ifr_mtu;
561 break;
562 case SIOCGIFMTU:
563 ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
564 break;
565 case SIOCADDMULTI:
566 /*
567 * XXXRW: Isn't this priv_checkr() redundant to the ifnet
568 * layer check?
569 */
570 if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
571 break;
572 if (ifr == 0) {
573 error = EAFNOSUPPORT;
574 break;
575 }
576 switch (ifr->ifr_addr.sa_family) {
577#ifdef INET
578 case AF_INET:
579 break;
580#endif
581#ifdef INET6
582 case AF_INET6:
583 break;
584#endif
585 default:
586 error = EAFNOSUPPORT;
587 break;
588 }
589 break;
590 case SIOCDELMULTI:
591 /*
592 * XXXRW: Isn't this priv_check() redundant to the ifnet
593 * layer check?
594 */
595 if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
596 break;
597 if (ifr == 0) {
598 error = EAFNOSUPPORT;
599 break;
600 }
601 switch (ifr->ifr_addr.sa_family) {
602#ifdef INET
603 case AF_INET:
604 break;
605#endif
606#ifdef INET6
607 case AF_INET6:
608 break;
609#endif
610 default:
611 error = EAFNOSUPPORT;
612 break;
613 }
614 break;
615 case GRESPROTO:
616 /*
617 * XXXRW: Isn't this priv_check() redundant to the ifnet
618 * layer check?
619 */
620 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
621 break;
622 sc->g_proto = ifr->ifr_flags;
623 switch (sc->g_proto) {
624 case IPPROTO_GRE:
625 ifp->if_flags |= IFF_LINK0;
626 break;
627 case IPPROTO_MOBILE:
628 ifp->if_flags &= ~IFF_LINK0;
629 break;
630 default:
631 error = EPROTONOSUPPORT;
632 break;
633 }
634 goto recompute;
635 case GREGPROTO:
636 ifr->ifr_flags = sc->g_proto;
637 break;
638 case GRESADDRS:
639 case GRESADDRD:
640 error = priv_check(curthread, PRIV_NET_GRE);
641 if (error)
642 return (error);
643 /*
644 * set tunnel endpoints, compute a less specific route
645 * to the remote end and mark if as up
646 */
647 sa = &ifr->ifr_addr;
648 if (cmd == GRESADDRS)
649 sc->g_src = (satosin(sa))->sin_addr;
650 if (cmd == GRESADDRD)
651 sc->g_dst = (satosin(sa))->sin_addr;
652 recompute:
653#ifdef INET
654 if (sc->encap != NULL) {
655 encap_detach(sc->encap);
656 sc->encap = NULL;
657 }
658#endif
659 if ((sc->g_src.s_addr != INADDR_ANY) &&
660 (sc->g_dst.s_addr != INADDR_ANY)) {
661 bzero(&sp, sizeof(sp));
662 bzero(&sm, sizeof(sm));
663 bzero(&dp, sizeof(dp));
664 bzero(&dm, sizeof(dm));
665 sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
666 sizeof(struct sockaddr_in);
667 sp.sin_family = sm.sin_family = dp.sin_family =
668 dm.sin_family = AF_INET;
669 sp.sin_addr = sc->g_src;
670 dp.sin_addr = sc->g_dst;
671 sm.sin_addr.s_addr = dm.sin_addr.s_addr =
672 INADDR_BROADCAST;
673#ifdef INET
674 sc->encap = encap_attach(AF_INET, sc->g_proto,
675 sintosa(&sp), sintosa(&sm), sintosa(&dp),
676 sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
677 &in_gre_protosw : &in_mobile_protosw, sc);
678 if (sc->encap == NULL)
679 printf("%s: unable to attach encap\n",
680 if_name(GRE2IFP(sc)));
681#endif
682 if (sc->route.ro_rt != 0) /* free old route */
683 RTFREE(sc->route.ro_rt);
684 if (gre_compute_route(sc) == 0)
685 ifp->if_drv_flags |= IFF_DRV_RUNNING;
686 else
687 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
688 }
689 break;
690 case GREGADDRS:
691 memset(&si, 0, sizeof(si));
692 si.sin_family = AF_INET;
693 si.sin_len = sizeof(struct sockaddr_in);
694 si.sin_addr.s_addr = sc->g_src.s_addr;
695 sa = sintosa(&si);
696 error = prison_if(curthread->td_ucred, sa);
697 if (error != 0)
698 break;
699 ifr->ifr_addr = *sa;
700 break;
701 case GREGADDRD:
702 memset(&si, 0, sizeof(si));
703 si.sin_family = AF_INET;
704 si.sin_len = sizeof(struct sockaddr_in);
705 si.sin_addr.s_addr = sc->g_dst.s_addr;
706 sa = sintosa(&si);
707 error = prison_if(curthread->td_ucred, sa);
708 if (error != 0)
709 break;
710 ifr->ifr_addr = *sa;
711 break;
712 case SIOCSIFPHYADDR:
713 /*
714 * XXXRW: Isn't this priv_check() redundant to the ifnet
715 * layer check?
716 */
717 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
718 break;
719 if (aifr->ifra_addr.sin_family != AF_INET ||
720 aifr->ifra_dstaddr.sin_family != AF_INET) {
721 error = EAFNOSUPPORT;
722 break;
723 }
724 if (aifr->ifra_addr.sin_len != sizeof(si) ||
725 aifr->ifra_dstaddr.sin_len != sizeof(si)) {
726 error = EINVAL;
727 break;
728 }
729 sc->g_src = aifr->ifra_addr.sin_addr;
730 sc->g_dst = aifr->ifra_dstaddr.sin_addr;
731 goto recompute;
732 case SIOCSLIFPHYADDR:
733 /*
734 * XXXRW: Isn't this priv_check() redundant to the ifnet
735 * layer check?
736 */
737 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
738 break;
739 if (lifr->addr.ss_family != AF_INET ||
740 lifr->dstaddr.ss_family != AF_INET) {
741 error = EAFNOSUPPORT;
742 break;
743 }
744 if (lifr->addr.ss_len != sizeof(si) ||
745 lifr->dstaddr.ss_len != sizeof(si)) {
746 error = EINVAL;
747 break;
748 }
749 sc->g_src = (satosin(&lifr->addr))->sin_addr;
750 sc->g_dst =
751 (satosin(&lifr->dstaddr))->sin_addr;
752 goto recompute;
753 case SIOCDIFPHYADDR:
754 /*
755 * XXXRW: Isn't this priv_check() redundant to the ifnet
756 * layer check?
757 */
758 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
759 break;
760 sc->g_src.s_addr = INADDR_ANY;
761 sc->g_dst.s_addr = INADDR_ANY;
762 goto recompute;
763 case SIOCGLIFPHYADDR:
764 if (sc->g_src.s_addr == INADDR_ANY ||
765 sc->g_dst.s_addr == INADDR_ANY) {
766 error = EADDRNOTAVAIL;
767 break;
768 }
769 memset(&si, 0, sizeof(si));
770 si.sin_family = AF_INET;
771 si.sin_len = sizeof(struct sockaddr_in);
772 si.sin_addr.s_addr = sc->g_src.s_addr;
773 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
774 if (error != 0)
775 break;
776 memcpy(&lifr->addr, &si, sizeof(si));
777 si.sin_addr.s_addr = sc->g_dst.s_addr;
778 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
779 if (error != 0)
780 break;
781 memcpy(&lifr->dstaddr, &si, sizeof(si));
782 break;
783 case SIOCGIFPSRCADDR:
784#ifdef INET6
785 case SIOCGIFPSRCADDR_IN6:
786#endif
787 if (sc->g_src.s_addr == INADDR_ANY) {
788 error = EADDRNOTAVAIL;
789 break;
790 }
791 memset(&si, 0, sizeof(si));
792 si.sin_family = AF_INET;
793 si.sin_len = sizeof(struct sockaddr_in);
794 si.sin_addr.s_addr = sc->g_src.s_addr;
795 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
796 if (error != 0)
797 break;
798 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
799 break;
800 case SIOCGIFPDSTADDR:
801#ifdef INET6
802 case SIOCGIFPDSTADDR_IN6:
803#endif
804 if (sc->g_dst.s_addr == INADDR_ANY) {
805 error = EADDRNOTAVAIL;
806 break;
807 }
808 memset(&si, 0, sizeof(si));
809 si.sin_family = AF_INET;
810 si.sin_len = sizeof(struct sockaddr_in);
811 si.sin_addr.s_addr = sc->g_dst.s_addr;
812 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
813 if (error != 0)
814 break;
815 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
816 break;
817 case GRESKEY:
818 error = priv_check(curthread, PRIV_NET_GRE);
819 if (error)
820 break;
821 error = copyin(ifr->ifr_data, &key, sizeof(key));
822 if (error)
823 break;
824 /* adjust MTU for option header */
825 if (key == 0 && sc->key != 0) /* clear */
826 adj += sizeof(key);
827 else if (key != 0 && sc->key == 0) /* set */
828 adj -= sizeof(key);
829
830 if (ifp->if_mtu + adj < 576) {
831 error = EINVAL;
832 break;
833 }
834 ifp->if_mtu += adj;
835 sc->key = key;
836 break;
837 case GREGKEY:
838 error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
839 break;
840
841 default:
842 error = EINVAL;
843 break;
844 }
845
846 return (error);
847}
848
849/*
850 * computes a route to our destination that is not the one
851 * which would be taken by ip_output(), as this one will loop back to
852 * us. If the interface is p2p as a--->b, then a routing entry exists
853 * If we now send a packet to b (e.g. ping b), this will come down here
854 * gets src=a, dst=b tacked on and would from ip_output() sent back to
855 * if_gre.
856 * Goal here is to compute a route to b that is less specific than
857 * a-->b. We know that this one exists as in normal operation we have
858 * at least a default route which matches.
859 */
860static int
861gre_compute_route(struct gre_softc *sc)
862{
863 struct route *ro;
864
865 ro = &sc->route;
866
867 memset(ro, 0, sizeof(struct route));
868 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
869 ro->ro_dst.sa_family = AF_INET;
870 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
871
872 /*
873 * toggle last bit, so our interface is not found, but a less
874 * specific route. I'd rather like to specify a shorter mask,
875 * but this is not possible. Should work though. XXX
876 * XXX MRT Use a different FIB for the tunnel to solve this problem.
877 */
878 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
879 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
880 htonl(0x01);
881 }
882
883#ifdef DIAGNOSTIC
884 printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
885 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
886#endif
887
888 rtalloc_fib(ro, sc->gre_fibnum);
889
890 /*
891 * check if this returned a route at all and this route is no
892 * recursion to ourself
893 */
894 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
895#ifdef DIAGNOSTIC
896 if (ro->ro_rt == NULL)
897 printf(" - no route found!\n");
898 else
899 printf(" - route loops back to ourself!\n");
900#endif
901 return EADDRNOTAVAIL;
902 }
903
904 /*
905 * now change it back - else ip_output will just drop
906 * the route and search one to this interface ...
907 */
908 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
909 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
910
911#ifdef DIAGNOSTIC
912 printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
913 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
914 printf("\n");
915#endif
916
917 return 0;
918}
919
920/*
921 * do a checksum of a buffer - much like in_cksum, which operates on
922 * mbufs.
923 */
924u_int16_t
925gre_in_cksum(u_int16_t *p, u_int len)
926{
927 u_int32_t sum = 0;
928 int nwords = len >> 1;
929
930 while (nwords-- != 0)
931 sum += *p++;
932
933 if (len & 1) {
934 union {
935 u_short w;
936 u_char c[2];
937 } u;
938 u.c[0] = *(u_char *)p;
939 u.c[1] = 0;
940 sum += u.w;
941 }
942
943 /* end-around-carry */
944 sum = (sum >> 16) + (sum & 0xffff);
945 sum += (sum >> 16);
946 return (~sum);
947}
948
949static int
950gremodevent(module_t mod, int type, void *data)
951{
952
953 switch (type) {
954 case MOD_LOAD:
955 greattach();
956 break;
957 case MOD_UNLOAD:
958 if_clone_detach(gre_cloner);
959 mtx_destroy(&gre_mtx);
960 break;
961 default:
962 return EOPNOTSUPP;
963 }
964 return 0;
965}
966
967static moduledata_t gre_mod = {
968 "if_gre",
969 gremodevent,
970 0
971};
972
973DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
974MODULE_VERSION(if_gre, 1);