1/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ 2/* $FreeBSD$ */ 3 4/*- 5 * Copyright (c) 1998 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35/* 36 * Encapsulate L3 protocols into IP 37 * See RFC 2784 (successor of RFC 1701 and 1702) for more details. 38 * If_gre is compatible with Cisco GRE tunnels, so you can 39 * have a NetBSD box as the other end of a tunnel interface of a Cisco 40 * router. See gre(4) for more details. 41 * Also supported: IP in IP encaps (proto 55) as of RFC 2004 42 */ 43 44#include "opt_atalk.h" 45#include "opt_inet.h" 46#include "opt_inet6.h" 47 48#include <sys/param.h> 49#include <sys/jail.h> 50#include <sys/kernel.h> 51#include <sys/libkern.h> 52#include <sys/malloc.h> 53#include <sys/module.h> 54#include <sys/mbuf.h> 55#include <sys/priv.h> 56#include <sys/proc.h> 57#include <sys/protosw.h> 58#include <sys/socket.h> 59#include <sys/sockio.h> 60#include <sys/sysctl.h> 61#include <sys/systm.h> 62 63#include <net/ethernet.h> 64#include <net/if.h> 65#include <net/if_clone.h> 66#include <net/if_types.h> 67#include <net/route.h> 68#include <net/vnet.h> 69 70#ifdef INET 71#include <netinet/in.h> 72#include <netinet/in_systm.h> 73#include <netinet/in_var.h> 74#include <netinet/ip.h> 75#include <netinet/ip_gre.h> 76#include <netinet/ip_var.h> 77#include <netinet/ip_encap.h> 78#else 79#error "Huh? if_gre without inet?" 80#endif 81 82#include <net/bpf.h> 83 84#include <net/if_gre.h> 85 86/* 87 * It is not easy to calculate the right value for a GRE MTU. 88 * We leave this task to the admin and use the same default that 89 * other vendors use. 90 */ 91#define GREMTU 1476 92 93#define GRENAME "gre" 94 95#define MTAG_COOKIE_GRE 1307983903 96#define MTAG_GRE_NESTING 1 97struct mtag_gre_nesting { 98 uint16_t count; 99 uint16_t max; 100 struct ifnet *ifp[]; 101}; 102 103/* 104 * gre_mtx protects all global variables in if_gre.c. 105 * XXX: gre_softc data not protected yet. 106 */ 107struct mtx gre_mtx; 108static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation"); 109 110struct gre_softc_head gre_softc_list; 111 112static int gre_clone_create(struct if_clone *, int, caddr_t); 113static void gre_clone_destroy(struct ifnet *); 114static int gre_ioctl(struct ifnet *, u_long, caddr_t); 115static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *, 116 struct route *ro); 117 118IFC_SIMPLE_DECLARE(gre, 0); 119 120static int gre_compute_route(struct gre_softc *sc); 121 122static void greattach(void); 123 124#ifdef INET 125extern struct domain inetdomain; 126static const struct protosw in_gre_protosw = { 127 .pr_type = SOCK_RAW, 128 .pr_domain = &inetdomain, 129 .pr_protocol = IPPROTO_GRE, 130 .pr_flags = PR_ATOMIC|PR_ADDR, 131 .pr_input = gre_input, 132 .pr_output = (pr_output_t *)rip_output, 133 .pr_ctlinput = rip_ctlinput, 134 .pr_ctloutput = rip_ctloutput, 135 .pr_usrreqs = &rip_usrreqs 136}; 137static const struct protosw in_mobile_protosw = { 138 .pr_type = SOCK_RAW, 139 .pr_domain = &inetdomain, 140 .pr_protocol = IPPROTO_MOBILE, 141 .pr_flags = PR_ATOMIC|PR_ADDR, 142 .pr_input = gre_mobile_input, 143 .pr_output = (pr_output_t *)rip_output, 144 .pr_ctlinput = rip_ctlinput, 145 .pr_ctloutput = rip_ctloutput, 146 .pr_usrreqs = &rip_usrreqs 147}; 148#endif 149 150SYSCTL_DECL(_net_link); 151static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, 152 "Generic Routing Encapsulation"); 153#ifndef MAX_GRE_NEST 154/* 155 * This macro controls the default upper limitation on nesting of gre tunnels. 156 * Since, setting a large value to this macro with a careless configuration 157 * may introduce system crash, we don't allow any nestings by default. 158 * If you need to configure nested gre tunnels, you can define this macro 159 * in your kernel configuration file. However, if you do so, please be 160 * careful to configure the tunnels so that it won't make a loop. 161 */ 162#define MAX_GRE_NEST 1 163#endif 164static int max_gre_nesting = MAX_GRE_NEST; 165SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW, 166 &max_gre_nesting, 0, "Max nested tunnels"); 167 168/* ARGSUSED */ 169static void 170greattach(void) 171{ 172 173 mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF); 174 LIST_INIT(&gre_softc_list); 175 if_clone_attach(&gre_cloner); 176} 177 178static int 179gre_clone_create(ifc, unit, params) 180 struct if_clone *ifc; 181 int unit; 182 caddr_t params; 183{ 184 struct gre_softc *sc; 185 186 sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); 187 188 GRE2IFP(sc) = if_alloc(IFT_TUNNEL); 189 if (GRE2IFP(sc) == NULL) { 190 free(sc, M_GRE); 191 return (ENOSPC); 192 } 193 194 GRE2IFP(sc)->if_softc = sc; 195 if_initname(GRE2IFP(sc), ifc->ifc_name, unit); 196 197 GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; 198 GRE2IFP(sc)->if_addrlen = 0; 199 GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */ 200 GRE2IFP(sc)->if_mtu = GREMTU; 201 GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 202 GRE2IFP(sc)->if_output = gre_output; 203 GRE2IFP(sc)->if_ioctl = gre_ioctl; 204 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; 205 sc->g_proto = IPPROTO_GRE; 206 GRE2IFP(sc)->if_flags |= IFF_LINK0; 207 sc->encap = NULL; 208 sc->gre_fibnum = curthread->td_proc->p_fibnum; 209 sc->wccp_ver = WCCP_V1; 210 sc->key = 0; 211 if_attach(GRE2IFP(sc)); 212 bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 213 mtx_lock(&gre_mtx); 214 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list); 215 mtx_unlock(&gre_mtx); 216 return (0); 217} 218 219static void 220gre_clone_destroy(ifp) 221 struct ifnet *ifp; 222{ 223 struct gre_softc *sc = ifp->if_softc; 224 225 mtx_lock(&gre_mtx); 226 LIST_REMOVE(sc, sc_list); 227 mtx_unlock(&gre_mtx); 228 229#ifdef INET 230 if (sc->encap != NULL) 231 encap_detach(sc->encap); 232#endif 233 bpfdetach(ifp); 234 if_detach(ifp); 235 if_free(ifp); 236 free(sc, M_GRE); 237} 238 239/* 240 * The output routine. Takes a packet and encapsulates it in the protocol 241 * given by sc->g_proto. See also RFC 1701 and RFC 2004 242 */ 243static int 244gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 245 struct route *ro) 246{ 247 int error = 0; 248 struct gre_softc *sc = ifp->if_softc; 249 struct greip *gh; 250 struct ip *ip; 251 struct m_tag *mtag; 252 struct mtag_gre_nesting *gt; 253 size_t len; 254 u_short gre_ip_id = 0; 255 uint8_t gre_ip_tos = 0; 256 u_int16_t etype = 0; 257 struct mobile_h mob_h; 258 u_int32_t af; 259 int extra = 0, max; 260 261 /* 262 * gre may cause infinite recursion calls when misconfigured. High 263 * nesting level may cause stack exhaustion. We'll prevent this by 264 * detecting loops and by introducing upper limit. 265 */ 266 mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); 267 if (mtag != NULL) { 268 struct ifnet **ifp2; 269 270 gt = (struct mtag_gre_nesting *)(mtag + 1); 271 gt->count++; 272 if (gt->count > min(gt->max,max_gre_nesting)) { 273 printf("%s: hit maximum recursion limit %u on %s\n", 274 __func__, gt->count - 1, ifp->if_xname); 275 m_freem(m); 276 error = EIO; /* is there better errno? */ 277 goto end; 278 } 279 280 ifp2 = gt->ifp; 281 for (max = gt->count - 1; max > 0; max--) { 282 if (*ifp2 == ifp) 283 break; 284 ifp2++; 285 } 286 if (*ifp2 == ifp) { 287 printf("%s: detected loop with nexting %u on %s\n", 288 __func__, gt->count-1, ifp->if_xname); 289 m_freem(m); 290 error = EIO; /* is there better errno? */ 291 goto end; 292 } 293 *ifp2 = ifp; 294 295 } else { 296 /* 297 * Given that people should NOT increase max_gre_nesting beyond 298 * their real needs, we allocate once per packet rather than 299 * allocating an mtag once per passing through gre. 300 * 301 * Note: the sysctl does not actually check for saneness, so we 302 * limit the maximum numbers of possible recursions here. 303 */ 304 max = imin(max_gre_nesting, 256); 305 /* If someone sets the sysctl <= 0, we want at least 1. */ 306 max = imax(max, 1); 307 len = sizeof(struct mtag_gre_nesting) + 308 max * sizeof(struct ifnet *); 309 mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, 310 M_NOWAIT); 311 if (mtag == NULL) { 312 m_freem(m); 313 error = ENOMEM; 314 goto end; 315 } 316 gt = (struct mtag_gre_nesting *)(mtag + 1); 317 bzero(gt, len); 318 gt->count = 1; 319 gt->max = max; 320 *gt->ifp = ifp; 321 m_tag_prepend(m, mtag); 322 } 323 324 if (!((ifp->if_flags & IFF_UP) && 325 (ifp->if_drv_flags & IFF_DRV_RUNNING)) || 326 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { 327 m_freem(m); 328 error = ENETDOWN; 329 goto end; 330 } 331 332 gh = NULL; 333 ip = NULL; 334 335 /* BPF writes need to be handled specially. */ 336 if (dst->sa_family == AF_UNSPEC) { 337 bcopy(dst->sa_data, &af, sizeof(af)); 338 dst->sa_family = af; 339 } 340 341 if (bpf_peers_present(ifp->if_bpf)) { 342 af = dst->sa_family; 343 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); 344 } 345 346 if ((ifp->if_flags & IFF_MONITOR) != 0) { 347 m_freem(m); 348 error = ENETDOWN; 349 goto end; 350 } 351 352 m->m_flags &= ~(M_BCAST|M_MCAST); 353 354 if (sc->g_proto == IPPROTO_MOBILE) { 355 if (dst->sa_family == AF_INET) { 356 struct mbuf *m0; 357 int msiz; 358 359 ip = mtod(m, struct ip *); 360 361 /* 362 * RFC2004 specifies that fragmented diagrams shouldn't 363 * be encapsulated. 364 */ 365 if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 366 _IF_DROP(&ifp->if_snd); 367 m_freem(m); 368 error = EINVAL; /* is there better errno? */ 369 goto end; 370 } 371 memset(&mob_h, 0, MOB_H_SIZ_L); 372 mob_h.proto = (ip->ip_p) << 8; 373 mob_h.odst = ip->ip_dst.s_addr; 374 ip->ip_dst.s_addr = sc->g_dst.s_addr; 375 376 /* 377 * If the packet comes from our host, we only change 378 * the destination address in the IP header. 379 * Else we also need to save and change the source 380 */ 381 if (in_hosteq(ip->ip_src, sc->g_src)) { 382 msiz = MOB_H_SIZ_S; 383 } else { 384 mob_h.proto |= MOB_H_SBIT; 385 mob_h.osrc = ip->ip_src.s_addr; 386 ip->ip_src.s_addr = sc->g_src.s_addr; 387 msiz = MOB_H_SIZ_L; 388 } 389 mob_h.proto = htons(mob_h.proto); 390 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); 391 392 if ((m->m_data - msiz) < m->m_pktdat) { 393 /* need new mbuf */ 394 MGETHDR(m0, M_DONTWAIT, MT_DATA); 395 if (m0 == NULL) { 396 _IF_DROP(&ifp->if_snd); 397 m_freem(m); 398 error = ENOBUFS; 399 goto end; 400 } 401 m0->m_next = m; 402 m->m_data += sizeof(struct ip); 403 m->m_len -= sizeof(struct ip); 404 m0->m_pkthdr.len = m->m_pkthdr.len + msiz; 405 m0->m_len = msiz + sizeof(struct ip); 406 m0->m_data += max_linkhdr; 407 memcpy(mtod(m0, caddr_t), (caddr_t)ip, 408 sizeof(struct ip)); 409 m = m0; 410 } else { /* we have some space left in the old one */ 411 m->m_data -= msiz; 412 m->m_len += msiz; 413 m->m_pkthdr.len += msiz; 414 bcopy(ip, mtod(m, caddr_t), 415 sizeof(struct ip)); 416 } 417 ip = mtod(m, struct ip *); 418 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz); 419 ip->ip_len = ntohs(ip->ip_len) + msiz; 420 } else { /* AF_INET */ 421 _IF_DROP(&ifp->if_snd); 422 m_freem(m); 423 error = EINVAL; 424 goto end; 425 } 426 } else if (sc->g_proto == IPPROTO_GRE) { 427 switch (dst->sa_family) { 428 case AF_INET: 429 ip = mtod(m, struct ip *); 430 gre_ip_tos = ip->ip_tos; 431 gre_ip_id = ip->ip_id; 432 if (sc->wccp_ver == WCCP_V2) { 433 extra = sizeof(uint32_t); 434 etype = WCCP_PROTOCOL_TYPE; 435 } else { 436 etype = ETHERTYPE_IP; 437 } 438 break; 439#ifdef INET6 440 case AF_INET6: 441 gre_ip_id = ip_newid(); 442 etype = ETHERTYPE_IPV6; 443 break; 444#endif 445#ifdef NETATALK 446 case AF_APPLETALK: 447 etype = ETHERTYPE_ATALK; 448 break; 449#endif 450 default: 451 _IF_DROP(&ifp->if_snd); 452 m_freem(m); 453 error = EAFNOSUPPORT; 454 goto end; 455 } 456 457 /* Reserve space for GRE header + optional GRE key */ 458 int hdrlen = sizeof(struct greip) + extra; 459 if (sc->key) 460 hdrlen += sizeof(uint32_t); 461 M_PREPEND(m, hdrlen, M_DONTWAIT); 462 } else { 463 _IF_DROP(&ifp->if_snd); 464 m_freem(m); 465 error = EINVAL; 466 goto end; 467 } 468 469 if (m == NULL) { /* mbuf allocation failed */ 470 _IF_DROP(&ifp->if_snd); 471 error = ENOBUFS; 472 goto end; 473 } 474 475 M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ 476 477 gh = mtod(m, struct greip *); 478 if (sc->g_proto == IPPROTO_GRE) { 479 uint32_t *options = gh->gi_options; 480 481 memset((void *)gh, 0, sizeof(struct greip) + extra); 482 gh->gi_ptype = htons(etype); 483 gh->gi_flags = 0; 484 485 /* Add key option */ 486 if (sc->key) 487 { 488 gh->gi_flags |= htons(GRE_KP); 489 *(options++) = htonl(sc->key); 490 } 491 } 492 493 gh->gi_pr = sc->g_proto; 494 if (sc->g_proto != IPPROTO_MOBILE) { 495 gh->gi_src = sc->g_src; 496 gh->gi_dst = sc->g_dst; 497 ((struct ip*)gh)->ip_v = IPPROTO_IPV4; 498 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2; 499 ((struct ip*)gh)->ip_ttl = GRE_TTL; 500 ((struct ip*)gh)->ip_tos = gre_ip_tos; 501 ((struct ip*)gh)->ip_id = gre_ip_id; 502 gh->gi_len = m->m_pkthdr.len; 503 } 504 505 ifp->if_opackets++; 506 ifp->if_obytes += m->m_pkthdr.len; 507 /* 508 * Send it off and with IP_FORWARD flag to prevent it from 509 * overwriting the ip_id again. ip_id is already set to the 510 * ip_id of the encapsulated packet. 511 */ 512 error = ip_output(m, NULL, &sc->route, IP_FORWARDING, 513 (struct ip_moptions *)NULL, (struct inpcb *)NULL); 514 end: 515 if (error) 516 ifp->if_oerrors++; 517 return (error); 518} 519 520static int 521gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 522{ 523 struct ifreq *ifr = (struct ifreq *)data; 524 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 525 struct in_aliasreq *aifr = (struct in_aliasreq *)data; 526 struct gre_softc *sc = ifp->if_softc; 527 int s; 528 struct sockaddr_in si; 529 struct sockaddr *sa = NULL; 530 int error, adj; 531 struct sockaddr_in sp, sm, dp, dm; 532 uint32_t key; 533 534 error = 0; 535 adj = 0; 536 537 s = splnet(); 538 switch (cmd) { 539 case SIOCSIFADDR: 540 ifp->if_flags |= IFF_UP; 541 break; 542 case SIOCSIFDSTADDR: 543 break; 544 case SIOCSIFFLAGS: 545 /* 546 * XXXRW: Isn't this priv_check() redundant to the ifnet 547 * layer check? 548 */ 549 if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0) 550 break; 551 if ((ifr->ifr_flags & IFF_LINK0) != 0) 552 sc->g_proto = IPPROTO_GRE; 553 else 554 sc->g_proto = IPPROTO_MOBILE; 555 if ((ifr->ifr_flags & IFF_LINK2) != 0) 556 sc->wccp_ver = WCCP_V2; 557 else 558 sc->wccp_ver = WCCP_V1; 559 goto recompute; 560 case SIOCSIFMTU: 561 /* 562 * XXXRW: Isn't this priv_check() redundant to the ifnet 563 * layer check? 564 */ 565 if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0) 566 break; 567 if (ifr->ifr_mtu < 576) { 568 error = EINVAL; 569 break; 570 } 571 ifp->if_mtu = ifr->ifr_mtu; 572 break; 573 case SIOCGIFMTU: 574 ifr->ifr_mtu = GRE2IFP(sc)->if_mtu; 575 break; 576 case SIOCADDMULTI: 577 /* 578 * XXXRW: Isn't this priv_checkr() redundant to the ifnet 579 * layer check? 580 */ 581 if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0) 582 break; 583 if (ifr == 0) { 584 error = EAFNOSUPPORT; 585 break; 586 } 587 switch (ifr->ifr_addr.sa_family) { 588#ifdef INET 589 case AF_INET: 590 break; 591#endif 592#ifdef INET6 593 case AF_INET6: 594 break; 595#endif 596 default: 597 error = EAFNOSUPPORT; 598 break; 599 } 600 break; 601 case SIOCDELMULTI: 602 /* 603 * XXXRW: Isn't this priv_check() redundant to the ifnet 604 * layer check? 605 */ 606 if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0) 607 break; 608 if (ifr == 0) { 609 error = EAFNOSUPPORT; 610 break; 611 } 612 switch (ifr->ifr_addr.sa_family) { 613#ifdef INET 614 case AF_INET: 615 break; 616#endif 617#ifdef INET6 618 case AF_INET6: 619 break; 620#endif 621 default: 622 error = EAFNOSUPPORT; 623 break; 624 } 625 break; 626 case GRESPROTO: 627 /* 628 * XXXRW: Isn't this priv_check() redundant to the ifnet 629 * layer check? 630 */ 631 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 632 break; 633 sc->g_proto = ifr->ifr_flags; 634 switch (sc->g_proto) { 635 case IPPROTO_GRE: 636 ifp->if_flags |= IFF_LINK0; 637 break; 638 case IPPROTO_MOBILE: 639 ifp->if_flags &= ~IFF_LINK0; 640 break; 641 default: 642 error = EPROTONOSUPPORT; 643 break; 644 } 645 goto recompute; 646 case GREGPROTO: 647 ifr->ifr_flags = sc->g_proto; 648 break; 649 case GRESADDRS: 650 case GRESADDRD: 651 error = priv_check(curthread, PRIV_NET_GRE); 652 if (error) 653 return (error); 654 /* 655 * set tunnel endpoints, compute a less specific route 656 * to the remote end and mark if as up 657 */ 658 sa = &ifr->ifr_addr; 659 if (cmd == GRESADDRS) 660 sc->g_src = (satosin(sa))->sin_addr; 661 if (cmd == GRESADDRD) 662 sc->g_dst = (satosin(sa))->sin_addr; 663 recompute: 664#ifdef INET 665 if (sc->encap != NULL) { 666 encap_detach(sc->encap); 667 sc->encap = NULL; 668 } 669#endif 670 if ((sc->g_src.s_addr != INADDR_ANY) && 671 (sc->g_dst.s_addr != INADDR_ANY)) { 672 bzero(&sp, sizeof(sp)); 673 bzero(&sm, sizeof(sm)); 674 bzero(&dp, sizeof(dp)); 675 bzero(&dm, sizeof(dm)); 676 sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len = 677 sizeof(struct sockaddr_in); 678 sp.sin_family = sm.sin_family = dp.sin_family = 679 dm.sin_family = AF_INET; 680 sp.sin_addr = sc->g_src; 681 dp.sin_addr = sc->g_dst; 682 sm.sin_addr.s_addr = dm.sin_addr.s_addr = 683 INADDR_BROADCAST; 684#ifdef INET 685 sc->encap = encap_attach(AF_INET, sc->g_proto, 686 sintosa(&sp), sintosa(&sm), sintosa(&dp), 687 sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ? 688 &in_gre_protosw : &in_mobile_protosw, sc); 689 if (sc->encap == NULL) 690 printf("%s: unable to attach encap\n", 691 if_name(GRE2IFP(sc))); 692#endif 693 if (sc->route.ro_rt != 0) /* free old route */ 694 RTFREE(sc->route.ro_rt); 695 if (gre_compute_route(sc) == 0) 696 ifp->if_drv_flags |= IFF_DRV_RUNNING; 697 else 698 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 699 } 700 break; 701 case GREGADDRS: 702 memset(&si, 0, sizeof(si)); 703 si.sin_family = AF_INET; 704 si.sin_len = sizeof(struct sockaddr_in); 705 si.sin_addr.s_addr = sc->g_src.s_addr; 706 sa = sintosa(&si); 707 error = prison_if(curthread->td_ucred, sa); 708 if (error != 0) 709 break; 710 ifr->ifr_addr = *sa; 711 break; 712 case GREGADDRD: 713 memset(&si, 0, sizeof(si)); 714 si.sin_family = AF_INET; 715 si.sin_len = sizeof(struct sockaddr_in); 716 si.sin_addr.s_addr = sc->g_dst.s_addr; 717 sa = sintosa(&si); 718 error = prison_if(curthread->td_ucred, sa); 719 if (error != 0) 720 break; 721 ifr->ifr_addr = *sa; 722 break; 723 case SIOCSIFPHYADDR: 724 /* 725 * XXXRW: Isn't this priv_check() redundant to the ifnet 726 * layer check? 727 */ 728 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 729 break; 730 if (aifr->ifra_addr.sin_family != AF_INET || 731 aifr->ifra_dstaddr.sin_family != AF_INET) { 732 error = EAFNOSUPPORT; 733 break; 734 } 735 if (aifr->ifra_addr.sin_len != sizeof(si) || 736 aifr->ifra_dstaddr.sin_len != sizeof(si)) { 737 error = EINVAL; 738 break; 739 } 740 sc->g_src = aifr->ifra_addr.sin_addr; 741 sc->g_dst = aifr->ifra_dstaddr.sin_addr; 742 goto recompute; 743 case SIOCSLIFPHYADDR: 744 /* 745 * XXXRW: Isn't this priv_check() redundant to the ifnet 746 * layer check? 747 */ 748 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 749 break; 750 if (lifr->addr.ss_family != AF_INET || 751 lifr->dstaddr.ss_family != AF_INET) { 752 error = EAFNOSUPPORT; 753 break; 754 } 755 if (lifr->addr.ss_len != sizeof(si) || 756 lifr->dstaddr.ss_len != sizeof(si)) { 757 error = EINVAL; 758 break; 759 } 760 sc->g_src = (satosin(&lifr->addr))->sin_addr; 761 sc->g_dst = 762 (satosin(&lifr->dstaddr))->sin_addr; 763 goto recompute; 764 case SIOCDIFPHYADDR: 765 /* 766 * XXXRW: Isn't this priv_check() redundant to the ifnet 767 * layer check? 768 */ 769 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 770 break; 771 sc->g_src.s_addr = INADDR_ANY; 772 sc->g_dst.s_addr = INADDR_ANY; 773 goto recompute; 774 case SIOCGLIFPHYADDR: 775 if (sc->g_src.s_addr == INADDR_ANY || 776 sc->g_dst.s_addr == INADDR_ANY) { 777 error = EADDRNOTAVAIL; 778 break; 779 } 780 memset(&si, 0, sizeof(si)); 781 si.sin_family = AF_INET; 782 si.sin_len = sizeof(struct sockaddr_in); 783 si.sin_addr.s_addr = sc->g_src.s_addr; 784 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 785 if (error != 0) 786 break; 787 memcpy(&lifr->addr, &si, sizeof(si)); 788 si.sin_addr.s_addr = sc->g_dst.s_addr; 789 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 790 if (error != 0) 791 break; 792 memcpy(&lifr->dstaddr, &si, sizeof(si)); 793 break; 794 case SIOCGIFPSRCADDR: 795#ifdef INET6 796 case SIOCGIFPSRCADDR_IN6: 797#endif 798 if (sc->g_src.s_addr == INADDR_ANY) { 799 error = EADDRNOTAVAIL; 800 break; 801 } 802 memset(&si, 0, sizeof(si)); 803 si.sin_family = AF_INET; 804 si.sin_len = sizeof(struct sockaddr_in); 805 si.sin_addr.s_addr = sc->g_src.s_addr; 806 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 807 if (error != 0) 808 break; 809 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); 810 break; 811 case SIOCGIFPDSTADDR: 812#ifdef INET6 813 case SIOCGIFPDSTADDR_IN6: 814#endif 815 if (sc->g_dst.s_addr == INADDR_ANY) { 816 error = EADDRNOTAVAIL; 817 break; 818 } 819 memset(&si, 0, sizeof(si)); 820 si.sin_family = AF_INET; 821 si.sin_len = sizeof(struct sockaddr_in); 822 si.sin_addr.s_addr = sc->g_dst.s_addr; 823 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 824 if (error != 0) 825 break; 826 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); 827 break; 828 case GRESKEY: 829 error = priv_check(curthread, PRIV_NET_GRE); 830 if (error) 831 break; 832 error = copyin(ifr->ifr_data, &key, sizeof(key)); 833 if (error) 834 break; 835 /* adjust MTU for option header */ 836 if (key == 0 && sc->key != 0) /* clear */ 837 adj += sizeof(key); 838 else if (key != 0 && sc->key == 0) /* set */ 839 adj -= sizeof(key); 840 841 if (ifp->if_mtu + adj < 576) { 842 error = EINVAL; 843 break; 844 } 845 ifp->if_mtu += adj; 846 sc->key = key; 847 break; 848 case GREGKEY: 849 error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key)); 850 break; 851 852 default: 853 error = EINVAL; 854 break; 855 } 856 857 splx(s); 858 return (error); 859} 860 861/* 862 * computes a route to our destination that is not the one 863 * which would be taken by ip_output(), as this one will loop back to 864 * us. If the interface is p2p as a--->b, then a routing entry exists 865 * If we now send a packet to b (e.g. ping b), this will come down here 866 * gets src=a, dst=b tacked on and would from ip_output() sent back to 867 * if_gre. 868 * Goal here is to compute a route to b that is less specific than 869 * a-->b. We know that this one exists as in normal operation we have 870 * at least a default route which matches. 871 */ 872static int 873gre_compute_route(struct gre_softc *sc) 874{ 875 struct route *ro; 876 877 ro = &sc->route; 878 879 memset(ro, 0, sizeof(struct route)); 880 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 881 ro->ro_dst.sa_family = AF_INET; 882 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 883 884 /* 885 * toggle last bit, so our interface is not found, but a less 886 * specific route. I'd rather like to specify a shorter mask, 887 * but this is not possible. Should work though. XXX 888 * XXX MRT Use a different FIB for the tunnel to solve this problem. 889 */ 890 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { 891 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= 892 htonl(0x01); 893 } 894 895#ifdef DIAGNOSTIC 896 printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)), 897 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); 898#endif 899 900 rtalloc_fib(ro, sc->gre_fibnum); 901 902 /* 903 * check if this returned a route at all and this route is no 904 * recursion to ourself 905 */ 906 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) { 907#ifdef DIAGNOSTIC 908 if (ro->ro_rt == NULL) 909 printf(" - no route found!\n"); 910 else 911 printf(" - route loops back to ourself!\n"); 912#endif 913 return EADDRNOTAVAIL; 914 } 915 916 /* 917 * now change it back - else ip_output will just drop 918 * the route and search one to this interface ... 919 */ 920 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) 921 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 922 923#ifdef DIAGNOSTIC 924 printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp), 925 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr)); 926 printf("\n"); 927#endif 928 929 return 0; 930} 931 932/* 933 * do a checksum of a buffer - much like in_cksum, which operates on 934 * mbufs. 935 */ 936u_int16_t 937gre_in_cksum(u_int16_t *p, u_int len) 938{ 939 u_int32_t sum = 0; 940 int nwords = len >> 1; 941 942 while (nwords-- != 0) 943 sum += *p++; 944 945 if (len & 1) { 946 union { 947 u_short w; 948 u_char c[2]; 949 } u; 950 u.c[0] = *(u_char *)p; 951 u.c[1] = 0; 952 sum += u.w; 953 } 954 955 /* end-around-carry */ 956 sum = (sum >> 16) + (sum & 0xffff); 957 sum += (sum >> 16); 958 return (~sum); 959} 960 961static int 962gremodevent(module_t mod, int type, void *data) 963{ 964 965 switch (type) { 966 case MOD_LOAD: 967 greattach(); 968 break; 969 case MOD_UNLOAD: 970 if_clone_detach(&gre_cloner); 971 mtx_destroy(&gre_mtx); 972 break; 973 default: 974 return EOPNOTSUPP; 975 } 976 return 0; 977} 978 979static moduledata_t gre_mod = { 980 "if_gre", 981 gremodevent, 982 0 983}; 984 985DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 986MODULE_VERSION(if_gre, 1); 987