if_gre.c revision 250523
1162562Sjhb/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ 2162562Sjhb/* $FreeBSD: head/sys/net/if_gre.c 250523 2013-05-11 19:05:38Z hrs $ */ 3162562Sjhb 4162562Sjhb/*- 5162562Sjhb * Copyright (c) 1998 The NetBSD Foundation, Inc. 6162562Sjhb * All rights reserved. 7162562Sjhb * 8162562Sjhb * This code is derived from software contributed to The NetBSD Foundation 9162562Sjhb * by Heiko W.Rupp <hwr@pilhuhn.de> 10162562Sjhb * 11162562Sjhb * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12162562Sjhb * 13162562Sjhb * Redistribution and use in source and binary forms, with or without 14162562Sjhb * modification, are permitted provided that the following conditions 15162562Sjhb * are met: 16162562Sjhb * 1. Redistributions of source code must retain the above copyright 17162562Sjhb * notice, this list of conditions and the following disclaimer. 18162562Sjhb * 2. Redistributions in binary form must reproduce the above copyright 19162562Sjhb * notice, this list of conditions and the following disclaimer in the 20162562Sjhb * documentation and/or other materials provided with the distribution. 21162562Sjhb * 22162562Sjhb * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23162562Sjhb * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24162562Sjhb * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25162562Sjhb * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26162562Sjhb * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27162562Sjhb * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28162562Sjhb * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29162562Sjhb * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30162562Sjhb * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31162562Sjhb * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32162562Sjhb * POSSIBILITY OF SUCH DAMAGE. 33162562Sjhb */ 34162562Sjhb 35162562Sjhb/* 36162562Sjhb * Encapsulate L3 protocols into IP 37162562Sjhb * See RFC 2784 (successor of RFC 1701 and 1702) for more details. 38162562Sjhb * If_gre is compatible with Cisco GRE tunnels, so you can 39162562Sjhb * have a NetBSD box as the other end of a tunnel interface of a Cisco 40162562Sjhb * router. See gre(4) for more details. 41162562Sjhb * Also supported: IP in IP encaps (proto 55) as of RFC 2004 42162562Sjhb */ 43162562Sjhb 44162562Sjhb#include "opt_atalk.h" 45162562Sjhb#include "opt_inet.h" 46162562Sjhb#include "opt_inet6.h" 47162562Sjhb 48162562Sjhb#include <sys/param.h> 49162562Sjhb#include <sys/jail.h> 50162562Sjhb#include <sys/kernel.h> 51162562Sjhb#include <sys/libkern.h> 52162562Sjhb#include <sys/malloc.h> 53162562Sjhb#include <sys/module.h> 54162562Sjhb#include <sys/mbuf.h> 55162562Sjhb#include <sys/priv.h> 56162562Sjhb#include <sys/proc.h> 57162562Sjhb#include <sys/protosw.h> 58162562Sjhb#include <sys/socket.h> 59162562Sjhb#include <sys/sockio.h> 60162562Sjhb#include <sys/sysctl.h> 61162562Sjhb#include <sys/systm.h> 62162562Sjhb 63162562Sjhb#include <net/ethernet.h> 64162562Sjhb#include <net/if.h> 65162562Sjhb#include <net/if_clone.h> 66162562Sjhb#include <net/if_types.h> 67162562Sjhb#include <net/route.h> 68162562Sjhb#include <net/vnet.h> 69162562Sjhb 70162562Sjhb#ifdef INET 71162562Sjhb#include <netinet/in.h> 72162562Sjhb#include <netinet/in_systm.h> 73162562Sjhb#include <netinet/in_var.h> 74162562Sjhb#include <netinet/ip.h> 75162562Sjhb#include <netinet/ip_gre.h> 76162562Sjhb#include <netinet/ip_var.h> 77162562Sjhb#include <netinet/ip_encap.h> 78162562Sjhb#else 79162562Sjhb#error "Huh? if_gre without inet?" 80162562Sjhb#endif 81162562Sjhb 82162562Sjhb#include <net/bpf.h> 83162562Sjhb 84162562Sjhb#include <net/if_gre.h> 85162562Sjhb 86162562Sjhb/* 87162562Sjhb * It is not easy to calculate the right value for a GRE MTU. 88162562Sjhb * We leave this task to the admin and use the same default that 89162562Sjhb * other vendors use. 90162562Sjhb */ 91162562Sjhb#define GREMTU 1476 92162562Sjhb 93162562Sjhb#define MTAG_COOKIE_GRE 1307983903 94162562Sjhb#define MTAG_GRE_NESTING 1 95162562Sjhbstruct mtag_gre_nesting { 96162562Sjhb uint16_t count; 97162562Sjhb uint16_t max; 98162562Sjhb struct ifnet *ifp[]; 99162562Sjhb}; 100162562Sjhb 101162562Sjhb/* 102162562Sjhb * gre_mtx protects all global variables in if_gre.c. 103162562Sjhb * XXX: gre_softc data not protected yet. 104162562Sjhb */ 105162562Sjhbstruct mtx gre_mtx; 106162562Sjhbstatic const char grename[] = "gre"; 107162562Sjhbstatic MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); 108162562Sjhb 109162562Sjhbstruct gre_softc_head gre_softc_list; 110162562Sjhb 111162562Sjhbstatic int gre_clone_create(struct if_clone *, int, caddr_t); 112162562Sjhbstatic void gre_clone_destroy(struct ifnet *); 113162562Sjhbstatic struct if_clone *gre_cloner; 114162562Sjhb 115162562Sjhbstatic int gre_ioctl(struct ifnet *, u_long, caddr_t); 116162562Sjhbstatic int gre_output(struct ifnet *, struct mbuf *, 117162562Sjhb const struct sockaddr *, struct route *); 118162562Sjhb 119162562Sjhbstatic int gre_compute_route(struct gre_softc *sc); 120162562Sjhb 121162562Sjhbstatic void greattach(void); 122162562Sjhb 123162562Sjhb#ifdef INET 124162562Sjhbextern struct domain inetdomain; 125162562Sjhbstatic const struct protosw in_gre_protosw = { 126162562Sjhb .pr_type = SOCK_RAW, 127162562Sjhb .pr_domain = &inetdomain, 128162562Sjhb .pr_protocol = IPPROTO_GRE, 129162562Sjhb .pr_flags = PR_ATOMIC|PR_ADDR, 130162562Sjhb .pr_input = gre_input, 131162562Sjhb .pr_output = (pr_output_t *)rip_output, 132162562Sjhb .pr_ctlinput = rip_ctlinput, 133162562Sjhb .pr_ctloutput = rip_ctloutput, 134162562Sjhb .pr_usrreqs = &rip_usrreqs 135162562Sjhb}; 136162562Sjhbstatic const struct protosw in_mobile_protosw = { 137162562Sjhb .pr_type = SOCK_RAW, 138162562Sjhb .pr_domain = &inetdomain, 139162562Sjhb .pr_protocol = IPPROTO_MOBILE, 140162562Sjhb .pr_flags = PR_ATOMIC|PR_ADDR, 141162562Sjhb .pr_input = gre_mobile_input, 142162562Sjhb .pr_output = (pr_output_t *)rip_output, 143162562Sjhb .pr_ctlinput = rip_ctlinput, 144162562Sjhb .pr_ctloutput = rip_ctloutput, 145162562Sjhb .pr_usrreqs = &rip_usrreqs 146162562Sjhb}; 147162562Sjhb#endif 148162562Sjhb 149162562SjhbSYSCTL_DECL(_net_link); 150162562Sjhbstatic SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, 151162562Sjhb "Generic Routing Encapsulation"); 152162562Sjhb#ifndef MAX_GRE_NEST 153162562Sjhb/* 154162562Sjhb * This macro controls the default upper limitation on nesting of gre tunnels. 155162562Sjhb * Since, setting a large value to this macro with a careless configuration 156162562Sjhb * may introduce system crash, we don't allow any nestings by default. 157162562Sjhb * If you need to configure nested gre tunnels, you can define this macro 158162562Sjhb * in your kernel configuration file. However, if you do so, please be 159162562Sjhb * careful to configure the tunnels so that it won't make a loop. 160162562Sjhb */ 161162562Sjhb#define MAX_GRE_NEST 1 162162562Sjhb#endif 163162562Sjhbstatic int max_gre_nesting = MAX_GRE_NEST; 164162562SjhbSYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW, 165162562Sjhb &max_gre_nesting, 0, "Max nested tunnels"); 166162562Sjhb 167162562Sjhb/* ARGSUSED */ 168162562Sjhbstatic void 169162562Sjhbgreattach(void) 170162562Sjhb{ 171162562Sjhb 172182321Sjhb mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF); 173162562Sjhb LIST_INIT(&gre_softc_list); 174162562Sjhb gre_cloner = if_clone_simple(grename, gre_clone_create, 175162562Sjhb gre_clone_destroy, 0); 176162562Sjhb} 177162562Sjhb 178162562Sjhbstatic int 179162562Sjhbgre_clone_create(ifc, unit, params) 180162562Sjhb struct if_clone *ifc; 181162562Sjhb int unit; 182162562Sjhb caddr_t params; 183162562Sjhb{ 184162562Sjhb struct gre_softc *sc; 185162562Sjhb 186162562Sjhb sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); 187276065Sjhb 188276065Sjhb GRE2IFP(sc) = if_alloc(IFT_TUNNEL); 189162562Sjhb if (GRE2IFP(sc) == NULL) { 190162562Sjhb free(sc, M_GRE); 191162562Sjhb return (ENOSPC); 192162562Sjhb } 193162562Sjhb 194162562Sjhb GRE2IFP(sc)->if_softc = sc; 195162562Sjhb if_initname(GRE2IFP(sc), grename, unit); 196162562Sjhb 197162562Sjhb GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; 198276065Sjhb GRE2IFP(sc)->if_addrlen = 0; 199276065Sjhb GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */ 200276065Sjhb GRE2IFP(sc)->if_mtu = GREMTU; 201162562Sjhb GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 202162562Sjhb GRE2IFP(sc)->if_output = gre_output; 203162562Sjhb GRE2IFP(sc)->if_ioctl = gre_ioctl; 204162562Sjhb sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; 205162562Sjhb sc->g_proto = IPPROTO_GRE; 206162562Sjhb GRE2IFP(sc)->if_flags |= IFF_LINK0; 207162562Sjhb sc->encap = NULL; 208162562Sjhb sc->gre_fibnum = curthread->td_proc->p_fibnum; 209162562Sjhb sc->wccp_ver = WCCP_V1; 210162562Sjhb sc->key = 0; 211162562Sjhb if_attach(GRE2IFP(sc)); 212162562Sjhb bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 213162562Sjhb mtx_lock(&gre_mtx); 214162562Sjhb LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list); 215162562Sjhb mtx_unlock(&gre_mtx); 216162562Sjhb return (0); 217162562Sjhb} 218162562Sjhb 219162562Sjhbstatic void 220162562Sjhbgre_clone_destroy(ifp) 221162562Sjhb struct ifnet *ifp; 222162562Sjhb{ 223162562Sjhb struct gre_softc *sc = ifp->if_softc; 224162562Sjhb 225162562Sjhb mtx_lock(&gre_mtx); 226162562Sjhb LIST_REMOVE(sc, sc_list); 227162562Sjhb mtx_unlock(&gre_mtx); 228162562Sjhb 229162562Sjhb#ifdef INET 230276065Sjhb if (sc->encap != NULL) 231276065Sjhb encap_detach(sc->encap); 232162562Sjhb#endif 233162562Sjhb bpfdetach(ifp); 234162562Sjhb if_detach(ifp); 235162562Sjhb if_free(ifp); 236162562Sjhb free(sc, M_GRE); 237162562Sjhb} 238162562Sjhb 239162562Sjhb/* 240162562Sjhb * The output routine. Takes a packet and encapsulates it in the protocol 241162562Sjhb * given by sc->g_proto. See also RFC 1701 and RFC 2004 242162562Sjhb */ 243162562Sjhbstatic int 244162562Sjhbgre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 245162562Sjhb struct route *ro) 246162562Sjhb{ 247162562Sjhb int error = 0; 248162562Sjhb struct gre_softc *sc = ifp->if_softc; 249162562Sjhb struct greip *gh; 250162562Sjhb struct ip *ip; 251162562Sjhb struct m_tag *mtag; 252162562Sjhb struct mtag_gre_nesting *gt; 253162562Sjhb size_t len; 254276065Sjhb u_short gre_ip_id = 0; 255276065Sjhb uint8_t gre_ip_tos = 0; 256162562Sjhb u_int16_t etype = 0; 257162562Sjhb struct mobile_h mob_h; 258162562Sjhb u_int32_t af; 259162562Sjhb int extra = 0, max; 260162562Sjhb 261162562Sjhb /* 262162562Sjhb * gre may cause infinite recursion calls when misconfigured. High 263162562Sjhb * nesting level may cause stack exhaustion. We'll prevent this by 264162562Sjhb * detecting loops and by introducing upper limit. 265162562Sjhb */ 266162562Sjhb mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); 267162562Sjhb if (mtag != NULL) { 268162562Sjhb struct ifnet **ifp2; 269162562Sjhb 270162562Sjhb gt = (struct mtag_gre_nesting *)(mtag + 1); 271162562Sjhb gt->count++; 272162562Sjhb if (gt->count > min(gt->max,max_gre_nesting)) { 273162562Sjhb printf("%s: hit maximum recursion limit %u on %s\n", 274162562Sjhb __func__, gt->count - 1, ifp->if_xname); 275162562Sjhb m_freem(m); 276162562Sjhb error = EIO; /* is there better errno? */ 277162562Sjhb goto end; 278162562Sjhb } 279162562Sjhb 280162562Sjhb ifp2 = gt->ifp; 281162562Sjhb for (max = gt->count - 1; max > 0; max--) { 282162562Sjhb if (*ifp2 == ifp) 283162562Sjhb break; 284162562Sjhb ifp2++; 285162562Sjhb } 286276065Sjhb if (*ifp2 == ifp) { 287276065Sjhb printf("%s: detected loop with nexting %u on %s\n", 288162562Sjhb __func__, gt->count-1, ifp->if_xname); 289162562Sjhb m_freem(m); 290162562Sjhb error = EIO; /* is there better errno? */ 291162562Sjhb goto end; 292162562Sjhb } 293162562Sjhb *ifp2 = ifp; 294162562Sjhb 295162562Sjhb } else { 296162562Sjhb /* 297162562Sjhb * Given that people should NOT increase max_gre_nesting beyond 298162562Sjhb * their real needs, we allocate once per packet rather than 299162562Sjhb * allocating an mtag once per passing through gre. 300162562Sjhb * 301162562Sjhb * Note: the sysctl does not actually check for saneness, so we 302276065Sjhb * limit the maximum numbers of possible recursions here. 303276065Sjhb */ 304162562Sjhb max = imin(max_gre_nesting, 256); 305162562Sjhb /* If someone sets the sysctl <= 0, we want at least 1. */ 306162562Sjhb max = imax(max, 1); 307162562Sjhb len = sizeof(struct mtag_gre_nesting) + 308162562Sjhb max * sizeof(struct ifnet *); 309162562Sjhb mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, 310162562Sjhb M_NOWAIT); 311162562Sjhb if (mtag == NULL) { 312162562Sjhb m_freem(m); 313162562Sjhb error = ENOMEM; 314162562Sjhb goto end; 315162562Sjhb } 316162562Sjhb gt = (struct mtag_gre_nesting *)(mtag + 1); 317162562Sjhb bzero(gt, len); 318162562Sjhb gt->count = 1; 319162562Sjhb gt->max = max; 320162562Sjhb *gt->ifp = ifp; 321162562Sjhb m_tag_prepend(m, mtag); 322162562Sjhb } 323162562Sjhb 324278321Sjhb if (!((ifp->if_flags & IFF_UP) && 325278321Sjhb (ifp->if_drv_flags & IFF_DRV_RUNNING)) || 326162562Sjhb sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { 327162562Sjhb m_freem(m); 328162562Sjhb error = ENETDOWN; 329162562Sjhb goto end; 330162562Sjhb } 331162562Sjhb 332162562Sjhb gh = NULL; 333162562Sjhb ip = NULL; 334162562Sjhb 335162562Sjhb /* BPF writes need to be handled specially. */ 336162562Sjhb if (dst->sa_family == AF_UNSPEC) 337162562Sjhb bcopy(dst->sa_data, &af, sizeof(af)); 338162562Sjhb else 339162562Sjhb af = dst->sa_family; 340162562Sjhb 341162562Sjhb if (bpf_peers_present(ifp->if_bpf)) 342162562Sjhb bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); 343162562Sjhb 344162562Sjhb if ((ifp->if_flags & IFF_MONITOR) != 0) { 345162562Sjhb m_freem(m); 346162562Sjhb error = ENETDOWN; 347162562Sjhb goto end; 348162562Sjhb } 349162562Sjhb 350162562Sjhb m->m_flags &= ~(M_BCAST|M_MCAST); 351162562Sjhb 352162562Sjhb if (sc->g_proto == IPPROTO_MOBILE) { 353162562Sjhb if (af == AF_INET) { 354162562Sjhb struct mbuf *m0; 355162562Sjhb int msiz; 356162562Sjhb 357162562Sjhb ip = mtod(m, struct ip *); 358162562Sjhb 359162562Sjhb /* 360162562Sjhb * RFC2004 specifies that fragmented diagrams shouldn't 361162562Sjhb * be encapsulated. 362162562Sjhb */ 363162562Sjhb if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 364162562Sjhb _IF_DROP(&ifp->if_snd); 365162562Sjhb m_freem(m); 366162562Sjhb error = EINVAL; /* is there better errno? */ 367162562Sjhb goto end; 368162562Sjhb } 369162562Sjhb memset(&mob_h, 0, MOB_H_SIZ_L); 370162562Sjhb mob_h.proto = (ip->ip_p) << 8; 371162562Sjhb mob_h.odst = ip->ip_dst.s_addr; 372162562Sjhb ip->ip_dst.s_addr = sc->g_dst.s_addr; 373162562Sjhb 374162562Sjhb /* 375162562Sjhb * If the packet comes from our host, we only change 376162562Sjhb * the destination address in the IP header. 377162562Sjhb * Else we also need to save and change the source 378162562Sjhb */ 379162562Sjhb if (in_hosteq(ip->ip_src, sc->g_src)) { 380162562Sjhb msiz = MOB_H_SIZ_S; 381162562Sjhb } else { 382162562Sjhb mob_h.proto |= MOB_H_SBIT; 383162562Sjhb mob_h.osrc = ip->ip_src.s_addr; 384162562Sjhb ip->ip_src.s_addr = sc->g_src.s_addr; 385162562Sjhb msiz = MOB_H_SIZ_L; 386162562Sjhb } 387162562Sjhb mob_h.proto = htons(mob_h.proto); 388162562Sjhb mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); 389162562Sjhb 390162562Sjhb if ((m->m_data - msiz) < m->m_pktdat) { 391162562Sjhb m0 = m_gethdr(M_NOWAIT, MT_DATA); 392162562Sjhb if (m0 == NULL) { 393162562Sjhb _IF_DROP(&ifp->if_snd); 394162562Sjhb m_freem(m); 395162562Sjhb error = ENOBUFS; 396162562Sjhb goto end; 397162562Sjhb } 398162562Sjhb m0->m_next = m; 399162562Sjhb m->m_data += sizeof(struct ip); 400162562Sjhb m->m_len -= sizeof(struct ip); 401162562Sjhb m0->m_pkthdr.len = m->m_pkthdr.len + msiz; 402162562Sjhb m0->m_len = msiz + sizeof(struct ip); 403162562Sjhb m0->m_data += max_linkhdr; 404162562Sjhb memcpy(mtod(m0, caddr_t), (caddr_t)ip, 405162562Sjhb sizeof(struct ip)); 406162562Sjhb m = m0; 407162562Sjhb } else { /* we have some space left in the old one */ 408162562Sjhb m->m_data -= msiz; 409162562Sjhb m->m_len += msiz; 410162562Sjhb m->m_pkthdr.len += msiz; 411162562Sjhb bcopy(ip, mtod(m, caddr_t), 412162562Sjhb sizeof(struct ip)); 413162562Sjhb } 414162562Sjhb ip = mtod(m, struct ip *); 415162562Sjhb memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz); 416162562Sjhb ip->ip_len = htons(ntohs(ip->ip_len) + msiz); 417162562Sjhb } else { /* AF_INET */ 418162562Sjhb _IF_DROP(&ifp->if_snd); 419162562Sjhb m_freem(m); 420162562Sjhb error = EINVAL; 421162562Sjhb goto end; 422162562Sjhb } 423162562Sjhb } else if (sc->g_proto == IPPROTO_GRE) { 424162562Sjhb switch (af) { 425162562Sjhb case AF_INET: 426162562Sjhb ip = mtod(m, struct ip *); 427162562Sjhb gre_ip_tos = ip->ip_tos; 428162562Sjhb gre_ip_id = ip->ip_id; 429162562Sjhb if (sc->wccp_ver == WCCP_V2) { 430162562Sjhb extra = sizeof(uint32_t); 431162562Sjhb etype = WCCP_PROTOCOL_TYPE; 432162562Sjhb } else { 433162562Sjhb etype = ETHERTYPE_IP; 434162562Sjhb } 435162562Sjhb break; 436162562Sjhb#ifdef INET6 437162562Sjhb case AF_INET6: 438162562Sjhb gre_ip_id = ip_newid(); 439162562Sjhb etype = ETHERTYPE_IPV6; 440162562Sjhb break; 441162562Sjhb#endif 442162562Sjhb#ifdef NETATALK 443162562Sjhb case AF_APPLETALK: 444162562Sjhb etype = ETHERTYPE_ATALK; 445162562Sjhb break; 446162562Sjhb#endif 447162562Sjhb default: 448162562Sjhb _IF_DROP(&ifp->if_snd); 449278321Sjhb m_freem(m); 450162562Sjhb error = EAFNOSUPPORT; 451162562Sjhb goto end; 452162562Sjhb } 453162562Sjhb 454162562Sjhb /* Reserve space for GRE header + optional GRE key */ 455162562Sjhb int hdrlen = sizeof(struct greip) + extra; 456162562Sjhb if (sc->key) 457162562Sjhb hdrlen += sizeof(uint32_t); 458162562Sjhb M_PREPEND(m, hdrlen, M_NOWAIT); 459162562Sjhb } else { 460162562Sjhb _IF_DROP(&ifp->if_snd); 461162562Sjhb m_freem(m); 462162562Sjhb error = EINVAL; 463278321Sjhb goto end; 464162562Sjhb } 465162562Sjhb 466162562Sjhb if (m == NULL) { /* mbuf allocation failed */ 467162562Sjhb _IF_DROP(&ifp->if_snd); 468162562Sjhb error = ENOBUFS; 469162562Sjhb goto end; 470162562Sjhb } 471162562Sjhb 472162562Sjhb M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ 473162562Sjhb 474162562Sjhb gh = mtod(m, struct greip *); 475162562Sjhb if (sc->g_proto == IPPROTO_GRE) { 476248705Smelifaro uint32_t *options = gh->gi_options; 477162562Sjhb 478162562Sjhb memset((void *)gh, 0, sizeof(struct greip) + extra); 479162562Sjhb gh->gi_ptype = htons(etype); 480162562Sjhb gh->gi_flags = 0; 481162562Sjhb 482162562Sjhb /* Add key option */ 483162562Sjhb if (sc->key) 484248705Smelifaro { 485162562Sjhb gh->gi_flags |= htons(GRE_KP); 486162562Sjhb *(options++) = htonl(sc->key); 487162562Sjhb } 488172836Sjulian } 489162562Sjhb 490162562Sjhb gh->gi_pr = sc->g_proto; 491162562Sjhb if (sc->g_proto != IPPROTO_MOBILE) { 492162562Sjhb gh->gi_src = sc->g_src; 493162562Sjhb gh->gi_dst = sc->g_dst; 494162562Sjhb ((struct ip*)gh)->ip_v = IPPROTO_IPV4; 495172836Sjulian ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2; 496162562Sjhb ((struct ip*)gh)->ip_ttl = GRE_TTL; 497162562Sjhb ((struct ip*)gh)->ip_tos = gre_ip_tos; 498162562Sjhb ((struct ip*)gh)->ip_id = gre_ip_id; 499278321Sjhb gh->gi_len = htons(m->m_pkthdr.len); 500278321Sjhb } 501278321Sjhb 502278321Sjhb ifp->if_opackets++; 503278321Sjhb ifp->if_obytes += m->m_pkthdr.len; 504278321Sjhb /* 505278321Sjhb * Send it off and with IP_FORWARD flag to prevent it from 506278321Sjhb * overwriting the ip_id again. ip_id is already set to the 507278321Sjhb * ip_id of the encapsulated packet. 508278321Sjhb */ 509278321Sjhb error = ip_output(m, NULL, &sc->route, IP_FORWARDING, 510278321Sjhb (struct ip_moptions *)NULL, (struct inpcb *)NULL); 511278321Sjhb end: 512278321Sjhb if (error) 513278321Sjhb ifp->if_oerrors++; 514162562Sjhb return (error); 515162562Sjhb} 516162562Sjhb 517162562Sjhbstatic int 518162562Sjhbgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 519162562Sjhb{ 520162562Sjhb struct ifreq *ifr = (struct ifreq *)data; 521162562Sjhb struct if_laddrreq *lifr = (struct if_laddrreq *)data; 522278321Sjhb struct in_aliasreq *aifr = (struct in_aliasreq *)data; 523162562Sjhb struct gre_softc *sc = ifp->if_softc; 524162562Sjhb struct sockaddr_in si; 525162562Sjhb struct sockaddr *sa = NULL; 526162562Sjhb int error, adj; 527162562Sjhb struct sockaddr_in sp, sm, dp, dm; 528162562Sjhb uint32_t key; 529162562Sjhb 530162562Sjhb error = 0; 531162562Sjhb adj = 0; 532162562Sjhb 533162562Sjhb switch (cmd) { 534162562Sjhb case SIOCSIFADDR: 535162562Sjhb ifp->if_flags |= IFF_UP; 536162562Sjhb break; 537162562Sjhb case SIOCSIFDSTADDR: 538162562Sjhb break; 539162562Sjhb case SIOCSIFFLAGS: 540162562Sjhb /* 541162562Sjhb * XXXRW: Isn't this priv_check() redundant to the ifnet 542162562Sjhb * layer check? 543162562Sjhb */ 544162562Sjhb if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0) 545162562Sjhb break; 546162562Sjhb if ((ifr->ifr_flags & IFF_LINK0) != 0) 547162562Sjhb sc->g_proto = IPPROTO_GRE; 548162562Sjhb else 549162562Sjhb sc->g_proto = IPPROTO_MOBILE; 550162562Sjhb if ((ifr->ifr_flags & IFF_LINK2) != 0) 551162562Sjhb sc->wccp_ver = WCCP_V2; 552162562Sjhb else 553162562Sjhb sc->wccp_ver = WCCP_V1; 554162562Sjhb goto recompute; 555162562Sjhb case SIOCSIFMTU: 556162562Sjhb /* 557162562Sjhb * XXXRW: Isn't this priv_check() redundant to the ifnet 558162562Sjhb * layer check? 559162562Sjhb */ 560162562Sjhb if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0) 561162562Sjhb break; 562162562Sjhb if (ifr->ifr_mtu < 576) { 563162562Sjhb error = EINVAL; 564162562Sjhb break; 565162562Sjhb } 566162562Sjhb ifp->if_mtu = ifr->ifr_mtu; 567162562Sjhb break; 568162562Sjhb case SIOCGIFMTU: 569162562Sjhb ifr->ifr_mtu = GRE2IFP(sc)->if_mtu; 570162562Sjhb break; 571162562Sjhb case SIOCADDMULTI: 572162562Sjhb /* 573162562Sjhb * XXXRW: Isn't this priv_checkr() redundant to the ifnet 574162562Sjhb * layer check? 575162562Sjhb */ 576162562Sjhb if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0) 577162562Sjhb break; 578162562Sjhb if (ifr == 0) { 579162562Sjhb error = EAFNOSUPPORT; 580162562Sjhb break; 581162562Sjhb } 582162562Sjhb switch (ifr->ifr_addr.sa_family) { 583162562Sjhb#ifdef INET 584162562Sjhb case AF_INET: 585162562Sjhb break; 586162562Sjhb#endif 587162562Sjhb#ifdef INET6 588162562Sjhb case AF_INET6: 589162562Sjhb break; 590162562Sjhb#endif 591162562Sjhb default: 592162562Sjhb error = EAFNOSUPPORT; 593182321Sjhb break; 594162562Sjhb } 595162562Sjhb break; 596162562Sjhb case SIOCDELMULTI: 597162562Sjhb /* 598162562Sjhb * XXXRW: Isn't this priv_check() redundant to the ifnet 599162562Sjhb * layer check? 600162562Sjhb */ 601162562Sjhb if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0) 602162562Sjhb break; 603162562Sjhb if (ifr == 0) { 604162562Sjhb error = EAFNOSUPPORT; 605162562Sjhb break; 606162562Sjhb } 607162562Sjhb switch (ifr->ifr_addr.sa_family) { 608162562Sjhb#ifdef INET 609162562Sjhb case AF_INET: 610162562Sjhb break; 611162562Sjhb#endif 612162562Sjhb#ifdef INET6 613162562Sjhb case AF_INET6: 614162562Sjhb break; 615162562Sjhb#endif 616162562Sjhb default: 617162562Sjhb error = EAFNOSUPPORT; 618162562Sjhb break; 619162562Sjhb } 620162562Sjhb break; 621162562Sjhb case GRESPROTO: 622162562Sjhb /* 623162562Sjhb * XXXRW: Isn't this priv_check() redundant to the ifnet 624162562Sjhb * layer check? 625162562Sjhb */ 626162562Sjhb if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 627162562Sjhb break; 628162562Sjhb sc->g_proto = ifr->ifr_flags; 629162562Sjhb switch (sc->g_proto) { 630162562Sjhb case IPPROTO_GRE: 631162562Sjhb ifp->if_flags |= IFF_LINK0; 632162562Sjhb break; 633162562Sjhb case IPPROTO_MOBILE: 634162562Sjhb ifp->if_flags &= ~IFF_LINK0; 635182321Sjhb break; 636162562Sjhb default: 637162562Sjhb error = EPROTONOSUPPORT; 638162562Sjhb break; 639162562Sjhb } 640162562Sjhb goto recompute; 641162562Sjhb case GREGPROTO: 642162562Sjhb ifr->ifr_flags = sc->g_proto; 643 break; 644 case GRESADDRS: 645 case GRESADDRD: 646 error = priv_check(curthread, PRIV_NET_GRE); 647 if (error) 648 return (error); 649 /* 650 * set tunnel endpoints, compute a less specific route 651 * to the remote end and mark if as up 652 */ 653 sa = &ifr->ifr_addr; 654 if (cmd == GRESADDRS) 655 sc->g_src = (satosin(sa))->sin_addr; 656 if (cmd == GRESADDRD) 657 sc->g_dst = (satosin(sa))->sin_addr; 658 recompute: 659#ifdef INET 660 if (sc->encap != NULL) { 661 encap_detach(sc->encap); 662 sc->encap = NULL; 663 } 664#endif 665 if ((sc->g_src.s_addr != INADDR_ANY) && 666 (sc->g_dst.s_addr != INADDR_ANY)) { 667 bzero(&sp, sizeof(sp)); 668 bzero(&sm, sizeof(sm)); 669 bzero(&dp, sizeof(dp)); 670 bzero(&dm, sizeof(dm)); 671 sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len = 672 sizeof(struct sockaddr_in); 673 sp.sin_family = sm.sin_family = dp.sin_family = 674 dm.sin_family = AF_INET; 675 sp.sin_addr = sc->g_src; 676 dp.sin_addr = sc->g_dst; 677 sm.sin_addr.s_addr = dm.sin_addr.s_addr = 678 INADDR_BROADCAST; 679#ifdef INET 680 sc->encap = encap_attach(AF_INET, sc->g_proto, 681 sintosa(&sp), sintosa(&sm), sintosa(&dp), 682 sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ? 683 &in_gre_protosw : &in_mobile_protosw, sc); 684 if (sc->encap == NULL) 685 printf("%s: unable to attach encap\n", 686 if_name(GRE2IFP(sc))); 687#endif 688 if (sc->route.ro_rt != 0) /* free old route */ 689 RTFREE(sc->route.ro_rt); 690 if (gre_compute_route(sc) == 0) 691 ifp->if_drv_flags |= IFF_DRV_RUNNING; 692 else 693 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 694 } 695 break; 696 case GREGADDRS: 697 memset(&si, 0, sizeof(si)); 698 si.sin_family = AF_INET; 699 si.sin_len = sizeof(struct sockaddr_in); 700 si.sin_addr.s_addr = sc->g_src.s_addr; 701 sa = sintosa(&si); 702 error = prison_if(curthread->td_ucred, sa); 703 if (error != 0) 704 break; 705 ifr->ifr_addr = *sa; 706 break; 707 case GREGADDRD: 708 memset(&si, 0, sizeof(si)); 709 si.sin_family = AF_INET; 710 si.sin_len = sizeof(struct sockaddr_in); 711 si.sin_addr.s_addr = sc->g_dst.s_addr; 712 sa = sintosa(&si); 713 error = prison_if(curthread->td_ucred, sa); 714 if (error != 0) 715 break; 716 ifr->ifr_addr = *sa; 717 break; 718 case SIOCSIFPHYADDR: 719 /* 720 * XXXRW: Isn't this priv_check() redundant to the ifnet 721 * layer check? 722 */ 723 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 724 break; 725 if (aifr->ifra_addr.sin_family != AF_INET || 726 aifr->ifra_dstaddr.sin_family != AF_INET) { 727 error = EAFNOSUPPORT; 728 break; 729 } 730 if (aifr->ifra_addr.sin_len != sizeof(si) || 731 aifr->ifra_dstaddr.sin_len != sizeof(si)) { 732 error = EINVAL; 733 break; 734 } 735 sc->g_src = aifr->ifra_addr.sin_addr; 736 sc->g_dst = aifr->ifra_dstaddr.sin_addr; 737 goto recompute; 738 case SIOCSLIFPHYADDR: 739 /* 740 * XXXRW: Isn't this priv_check() redundant to the ifnet 741 * layer check? 742 */ 743 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 744 break; 745 if (lifr->addr.ss_family != AF_INET || 746 lifr->dstaddr.ss_family != AF_INET) { 747 error = EAFNOSUPPORT; 748 break; 749 } 750 if (lifr->addr.ss_len != sizeof(si) || 751 lifr->dstaddr.ss_len != sizeof(si)) { 752 error = EINVAL; 753 break; 754 } 755 sc->g_src = (satosin(&lifr->addr))->sin_addr; 756 sc->g_dst = 757 (satosin(&lifr->dstaddr))->sin_addr; 758 goto recompute; 759 case SIOCDIFPHYADDR: 760 /* 761 * XXXRW: Isn't this priv_check() redundant to the ifnet 762 * layer check? 763 */ 764 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 765 break; 766 sc->g_src.s_addr = INADDR_ANY; 767 sc->g_dst.s_addr = INADDR_ANY; 768 goto recompute; 769 case SIOCGLIFPHYADDR: 770 if (sc->g_src.s_addr == INADDR_ANY || 771 sc->g_dst.s_addr == INADDR_ANY) { 772 error = EADDRNOTAVAIL; 773 break; 774 } 775 memset(&si, 0, sizeof(si)); 776 si.sin_family = AF_INET; 777 si.sin_len = sizeof(struct sockaddr_in); 778 si.sin_addr.s_addr = sc->g_src.s_addr; 779 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 780 if (error != 0) 781 break; 782 memcpy(&lifr->addr, &si, sizeof(si)); 783 si.sin_addr.s_addr = sc->g_dst.s_addr; 784 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 785 if (error != 0) 786 break; 787 memcpy(&lifr->dstaddr, &si, sizeof(si)); 788 break; 789 case SIOCGIFPSRCADDR: 790#ifdef INET6 791 case SIOCGIFPSRCADDR_IN6: 792#endif 793 if (sc->g_src.s_addr == INADDR_ANY) { 794 error = EADDRNOTAVAIL; 795 break; 796 } 797 memset(&si, 0, sizeof(si)); 798 si.sin_family = AF_INET; 799 si.sin_len = sizeof(struct sockaddr_in); 800 si.sin_addr.s_addr = sc->g_src.s_addr; 801 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 802 if (error != 0) 803 break; 804 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); 805 break; 806 case SIOCGIFPDSTADDR: 807#ifdef INET6 808 case SIOCGIFPDSTADDR_IN6: 809#endif 810 if (sc->g_dst.s_addr == INADDR_ANY) { 811 error = EADDRNOTAVAIL; 812 break; 813 } 814 memset(&si, 0, sizeof(si)); 815 si.sin_family = AF_INET; 816 si.sin_len = sizeof(struct sockaddr_in); 817 si.sin_addr.s_addr = sc->g_dst.s_addr; 818 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 819 if (error != 0) 820 break; 821 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); 822 break; 823 case GRESKEY: 824 error = priv_check(curthread, PRIV_NET_GRE); 825 if (error) 826 break; 827 error = copyin(ifr->ifr_data, &key, sizeof(key)); 828 if (error) 829 break; 830 /* adjust MTU for option header */ 831 if (key == 0 && sc->key != 0) /* clear */ 832 adj += sizeof(key); 833 else if (key != 0 && sc->key == 0) /* set */ 834 adj -= sizeof(key); 835 836 if (ifp->if_mtu + adj < 576) { 837 error = EINVAL; 838 break; 839 } 840 ifp->if_mtu += adj; 841 sc->key = key; 842 break; 843 case GREGKEY: 844 error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key)); 845 break; 846 847 default: 848 error = EINVAL; 849 break; 850 } 851 852 return (error); 853} 854 855/* 856 * computes a route to our destination that is not the one 857 * which would be taken by ip_output(), as this one will loop back to 858 * us. If the interface is p2p as a--->b, then a routing entry exists 859 * If we now send a packet to b (e.g. ping b), this will come down here 860 * gets src=a, dst=b tacked on and would from ip_output() sent back to 861 * if_gre. 862 * Goal here is to compute a route to b that is less specific than 863 * a-->b. We know that this one exists as in normal operation we have 864 * at least a default route which matches. 865 */ 866static int 867gre_compute_route(struct gre_softc *sc) 868{ 869 struct route *ro; 870 871 ro = &sc->route; 872 873 memset(ro, 0, sizeof(struct route)); 874 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 875 ro->ro_dst.sa_family = AF_INET; 876 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 877 878 /* 879 * toggle last bit, so our interface is not found, but a less 880 * specific route. I'd rather like to specify a shorter mask, 881 * but this is not possible. Should work though. XXX 882 * XXX MRT Use a different FIB for the tunnel to solve this problem. 883 */ 884 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { 885 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= 886 htonl(0x01); 887 } 888 889#ifdef DIAGNOSTIC 890 printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)), 891 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); 892#endif 893 894 rtalloc_fib(ro, sc->gre_fibnum); 895 896 /* 897 * check if this returned a route at all and this route is no 898 * recursion to ourself 899 */ 900 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) { 901#ifdef DIAGNOSTIC 902 if (ro->ro_rt == NULL) 903 printf(" - no route found!\n"); 904 else 905 printf(" - route loops back to ourself!\n"); 906#endif 907 return EADDRNOTAVAIL; 908 } 909 910 /* 911 * now change it back - else ip_output will just drop 912 * the route and search one to this interface ... 913 */ 914 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) 915 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 916 917#ifdef DIAGNOSTIC 918 printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp), 919 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr)); 920 printf("\n"); 921#endif 922 923 return 0; 924} 925 926/* 927 * do a checksum of a buffer - much like in_cksum, which operates on 928 * mbufs. 929 */ 930u_int16_t 931gre_in_cksum(u_int16_t *p, u_int len) 932{ 933 u_int32_t sum = 0; 934 int nwords = len >> 1; 935 936 while (nwords-- != 0) 937 sum += *p++; 938 939 if (len & 1) { 940 union { 941 u_short w; 942 u_char c[2]; 943 } u; 944 u.c[0] = *(u_char *)p; 945 u.c[1] = 0; 946 sum += u.w; 947 } 948 949 /* end-around-carry */ 950 sum = (sum >> 16) + (sum & 0xffff); 951 sum += (sum >> 16); 952 return (~sum); 953} 954 955static int 956gremodevent(module_t mod, int type, void *data) 957{ 958 959 switch (type) { 960 case MOD_LOAD: 961 greattach(); 962 break; 963 case MOD_UNLOAD: 964 if_clone_detach(gre_cloner); 965 mtx_destroy(&gre_mtx); 966 break; 967 default: 968 return EOPNOTSUPP; 969 } 970 return 0; 971} 972 973static moduledata_t gre_mod = { 974 "if_gre", 975 gremodevent, 976 0 977}; 978 979DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 980MODULE_VERSION(if_gre, 1); 981