1/* $NetBSD: if_mpls.c,v 1.8 2011/07/03 18:46:12 kefren Exp $ */ 2 3/* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mihai Chelaru <kefren@NetBSD.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.8 2011/07/03 18:46:12 kefren Exp $"); 34 35#include "opt_inet.h" 36#include "opt_mpls.h" 37 38#include <sys/param.h> 39 40#include <sys/errno.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/sysctl.h> 44 45#include <net/bpf.h> 46#include <net/if.h> 47#include <net/if_types.h> 48#include <net/netisr.h> 49#include <net/route.h> 50 51#ifdef INET 52#include <netinet/in.h> 53#include <netinet/in_systm.h> 54#include <netinet/in_var.h> 55#include <netinet/ip.h> 56#endif 57 58#ifdef INET6 59#include <netinet/ip6.h> 60#include <netinet6/in6_var.h> 61#include <netinet6/ip6_var.h> 62#endif 63 64#include <netmpls/mpls.h> 65#include <netmpls/mpls_var.h> 66 67#include "if_mpls.h" 68 69void ifmplsattach(int); 70 71static int mpls_clone_create(struct if_clone *, int); 72static int mpls_clone_destroy(struct ifnet *); 73 74static struct if_clone mpls_if_cloner = 75 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy); 76 77 78static void mpls_input(struct ifnet *, struct mbuf *); 79static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 80 struct rtentry *); 81static int mpls_ioctl(struct ifnet *, u_long, void *); 82static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *); 83static int mpls_lse(struct mbuf *); 84 85#ifdef INET 86static int mpls_unlabel_inet(struct mbuf *); 87static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint); 88#endif 89 90#ifdef INET6 91static int mpls_unlabel_inet6(struct mbuf *); 92static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint); 93#endif 94 95static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *); 96 97extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond, 98 mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6; 99 100/* ARGSUSED */ 101void 102ifmplsattach(int count) 103{ 104 if_clone_attach(&mpls_if_cloner); 105} 106 107static int 108mpls_clone_create(struct if_clone *ifc, int unit) 109{ 110 struct mpls_softc *sc; 111 112 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 113 114 if_initname(&sc->sc_if, ifc->ifc_name, unit); 115 sc->sc_if.if_softc = sc; 116 sc->sc_if.if_type = IFT_MPLS; 117 sc->sc_if.if_addrlen = 0; 118 sc->sc_if.if_hdrlen = sizeof(union mpls_shim); 119 sc->sc_if.if_dlt = DLT_NULL; 120 sc->sc_if.if_mtu = 1500; 121 sc->sc_if.if_flags = 0; 122 sc->sc_if.if_input = mpls_input; 123 sc->sc_if.if_output = mpls_output; 124 sc->sc_if.if_ioctl = mpls_ioctl; 125 126 if_attach(&sc->sc_if); 127 if_alloc_sadl(&sc->sc_if); 128 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t)); 129 return 0; 130} 131 132static int 133mpls_clone_destroy(struct ifnet *ifp) 134{ 135 int s; 136 137 bpf_detach(ifp); 138 139 s = splnet(); 140 if_detach(ifp); 141 splx(s); 142 143 free(ifp->if_softc, M_DEVBUF); 144 return 0; 145} 146 147static void 148mpls_input(struct ifnet *ifp, struct mbuf *m) 149{ 150#if 0 151 /* 152 * TODO - kefren 153 * I'd love to unshim the packet, guess family 154 * and pass it to bpf 155 */ 156 bpf_mtap_af(ifp, AF_MPLS, m); 157#endif 158 159 mpls_lse(m); 160} 161 162void 163mplsintr(void) 164{ 165 struct mbuf *m; 166 int s; 167 168 while (!IF_IS_EMPTY(&mplsintrq)) { 169 s = splnet(); 170 IF_DEQUEUE(&mplsintrq, m); 171 splx(s); 172 173 if (!m) 174 return; 175 176 if (((m->m_flags & M_PKTHDR) == 0) || 177 (m->m_pkthdr.rcvif == 0)) 178 panic("mplsintr(): no pkthdr or rcvif"); 179 180#ifdef MBUFTRACE 181 m_claimm(m, &mpls_owner); 182#endif 183 mpls_input(m->m_pkthdr.rcvif, m); 184 } 185} 186 187/* 188 * prepend shim and deliver 189 */ 190static int 191mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt) 192{ 193 union mpls_shim mh, *pms; 194 struct rtentry *rt1; 195 int err; 196 uint psize = sizeof(struct sockaddr_mpls); 197 198 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 199 m_freem(m); 200 return ENETDOWN; 201 } 202 203 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) { 204 m_freem(m); 205 return EINVAL; 206 } 207 208 bpf_mtap_af(ifp, dst->sa_family, m); 209 210 memset(&mh, 0, sizeof(mh)); 211 mh.s_addr = MPLS_GETSADDR(rt); 212 mh.shim.bos = 1; 213 mh.shim.exp = 0; 214 mh.shim.ttl = mpls_defttl; 215 216 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 217 218 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) { 219 pms++; 220 if (mh.shim.label != MPLS_LABEL_IMPLNULL && 221 ((m = mpls_prepend_shim(m, &mh)) == NULL)) 222 return ENOBUFS; 223 memset(&mh, 0, sizeof(mh)); 224 mh.s_addr = ntohl(pms->s_addr); 225 mh.shim.bos = mh.shim.exp = 0; 226 mh.shim.ttl = mpls_defttl; 227 psize += sizeof(mh); 228 } 229 230 switch(dst->sa_family) { 231#ifdef INET 232 case AF_INET: 233 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls)); 234 break; 235#endif 236#ifdef INET6 237 case AF_INET6: 238 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls)); 239 break; 240#endif 241 default: 242 m = mpls_prepend_shim(m, &mh); 243 break; 244 } 245 246 if (m == NULL) { 247 IF_DROP(&ifp->if_snd); 248 ifp->if_oerrors++; 249 return ENOBUFS; 250 } 251 252 ifp->if_opackets++; 253 ifp->if_obytes += m->m_pkthdr.len; 254 255 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) { 256 m_freem(m); 257 return EHOSTUNREACH; 258 } 259 260 err = mpls_send_frame(m, rt1->rt_ifp, rt); 261 RTFREE(rt1); 262 return err; 263} 264 265static int 266mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data) 267{ 268 int error = 0, s = splnet(); 269 struct ifreq *ifr = data; 270 271 switch(cmd) { 272 case SIOCINITIFADDR: 273 ifp->if_flags |= IFF_UP | IFF_RUNNING; 274 break; 275 case SIOCSIFMTU: 276 if (ifr != NULL && ifr->ifr_mtu < 576) { 277 error = EINVAL; 278 break; 279 } 280 /* FALLTHROUGH */ 281 case SIOCGIFMTU: 282 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 283 error = 0; 284 break; 285 case SIOCSIFFLAGS: 286 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 287 break; 288 if (ifp->if_flags & IFF_UP) 289 ifp->if_flags |= IFF_RUNNING; 290 break; 291 default: 292 error = ifioctl_common(ifp, cmd, data); 293 break; 294 } 295 splx(s); 296 return error; 297} 298 299/* 300 * MPLS Label Switch Engine 301 */ 302static int 303mpls_lse(struct mbuf *m) 304{ 305 struct sockaddr_mpls dst; 306 union mpls_shim tshim, *htag; 307 struct rtentry *rt = NULL; 308 int error = ENOBUFS; 309 uint psize = sizeof(struct sockaddr_mpls); 310 311 if (m->m_len < sizeof(union mpls_shim) && 312 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 313 goto done; 314 315 dst.smpls_len = sizeof(struct sockaddr_mpls); 316 dst.smpls_family = AF_MPLS; 317 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 318 319 /* Check if we're accepting MPLS Frames */ 320 error = EINVAL; 321 if (!mpls_accept) 322 goto done; 323 324 /* TTL decrement */ 325 if ((m = mpls_ttl_dec(m)) == NULL) 326 goto done; 327 328 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) { 329 /* Don't swap reserved labels */ 330 switch (dst.smpls_addr.shim.label) { 331#ifdef INET 332 case MPLS_LABEL_IPV4NULL: 333 /* Pop shim and push mbuf to IP stack */ 334 if (dst.smpls_addr.shim.bos) 335 error = mpls_unlabel_inet(m); 336 break; 337#endif 338#ifdef INET6 339 case MPLS_LABEL_IPV6NULL: 340 /* Pop shim and push mbuf to IPv6 stack */ 341 if (dst.smpls_addr.shim.bos) 342 error = mpls_unlabel_inet6(m); 343 break; 344#endif 345 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */ 346 case MPLS_LABEL_IMPLNULL: /* This is logical only */ 347 default: /* Rest are not allowed */ 348 break; 349 } 350 goto done; 351 } 352 353 /* Check if we should do MPLS forwarding */ 354 error = EHOSTUNREACH; 355 if (!mpls_forwarding) 356 goto done; 357 358 /* Get a route to dst */ 359 dst.smpls_addr.shim.ttl = 360 dst.smpls_addr.shim.bos = 361 dst.smpls_addr.shim.exp = 0; 362 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr); 363 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL) 364 goto done; 365 366 /* MPLS packet with no MPLS tagged route ? */ 367 if ((rt->rt_flags & RTF_GATEWAY) == 0 || 368 rt_gettag(rt) == NULL || 369 rt_gettag(rt)->sa_family != AF_MPLS) 370 goto done; 371 372 tshim.s_addr = MPLS_GETSADDR(rt); 373 374 /* Swap labels */ 375 if ((m->m_len < sizeof(union mpls_shim)) && 376 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) { 377 error = ENOBUFS; 378 goto done; 379 } 380 381 /* Replace only the label */ 382 htag = mtod(m, union mpls_shim *); 383 htag->s_addr = ntohl(htag->s_addr); 384 htag->shim.label = tshim.shim.label; 385 htag->s_addr = htonl(htag->s_addr); 386 387 /* check if there is anything more to prepend */ 388 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 389 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) { 390 htag++; 391 memset(&tshim, 0, sizeof(tshim)); 392 tshim.s_addr = ntohl(htag->s_addr); 393 tshim.shim.bos = tshim.shim.exp = 0; 394 tshim.shim.ttl = mpls_defttl; 395 if (tshim.shim.label != MPLS_LABEL_IMPLNULL && 396 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) 397 return ENOBUFS; 398 psize += sizeof(tshim); 399 } 400 401 error = mpls_send_frame(m, rt->rt_ifp, rt); 402 403done: 404 if (error != 0 && m != NULL) 405 m_freem(m); 406 if (rt != NULL) 407 RTFREE(rt); 408 409 return error; 410} 411 412static int 413mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) 414{ 415 union mpls_shim msh; 416 417 if ((rt->rt_flags & RTF_GATEWAY) == 0) 418 return EHOSTUNREACH; 419 420 rt->rt_use++; 421 422 msh.s_addr = MPLS_GETSADDR(rt); 423 if (msh.shim.label == MPLS_LABEL_IMPLNULL || 424 (m->m_flags & (M_MCAST | M_BCAST))) { 425 m_adj(m, sizeof(union mpls_shim)); 426 m->m_pkthdr.csum_flags = 0; 427 } 428 429 switch(ifp->if_type) { 430 /* only these are supported for now */ 431 case IFT_ETHER: 432 case IFT_TUNNEL: 433 case IFT_LOOP: 434 return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); 435 break; 436 default: 437 return ENETUNREACH; 438 } 439 return 0; 440} 441 442 443 444#ifdef INET 445static int 446mpls_unlabel_inet(struct mbuf *m) 447{ 448 int s, iphlen; 449 struct ip *iph; 450 union mpls_shim *ms; 451 struct ifqueue *inq; 452 453 if (mpls_mapttl_inet || mpls_mapprec_inet) { 454 455 /* get shim info */ 456 ms = mtod(m, union mpls_shim *); 457 ms->s_addr = ntohl(ms->s_addr); 458 459 /* and get rid of it */ 460 m_adj(m, sizeof(union mpls_shim)); 461 462 /* get ip header */ 463 if (m->m_len < sizeof (struct ip) && 464 (m = m_pullup(m, sizeof(struct ip))) == NULL) 465 return ENOBUFS; 466 iph = mtod(m, struct ip *); 467 iphlen = iph->ip_hl << 2; 468 469 /* get it all */ 470 if (m->m_len < iphlen) { 471 if ((m = m_pullup(m, iphlen)) == NULL) 472 return ENOBUFS; 473 iph = mtod(m, struct ip *); 474 } 475 476 /* check ipsum */ 477 if (in_cksum(m, iphlen) != 0) { 478 m_freem(m); 479 return EINVAL; 480 } 481 482 /* set IP ttl from MPLS ttl */ 483 if (mpls_mapttl_inet) 484 iph->ip_ttl = ms->shim.ttl; 485 486 /* set IP Precedence from MPLS Exp */ 487 if (mpls_mapprec_inet) { 488 iph->ip_tos = (iph->ip_tos << 3) >> 3; 489 iph->ip_tos |= ms->shim.exp << 5; 490 } 491 492 /* reset ipsum because we modified TTL and TOS */ 493 iph->ip_sum = 0; 494 iph->ip_sum = in_cksum(m, iphlen); 495 } else 496 m_adj(m, sizeof(union mpls_shim)); 497 498 /* Put it on IP queue */ 499 inq = &ipintrq; 500 s = splnet(); 501 if (IF_QFULL(inq)) { 502 IF_DROP(inq); 503 splx(s); 504 m_freem(m); 505 return ENOBUFS; 506 } 507 IF_ENQUEUE(inq, m); 508 splx(s); 509 schednetisr(NETISR_IP); 510 511 return 0; 512} 513 514/* 515 * Prepend MPLS label 516 */ 517static struct mbuf * 518mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset) 519{ 520 struct ip iphdr; 521 522 if (mpls_mapttl_inet || mpls_mapprec_inet) { 523 if ((m->m_len < sizeof(struct ip)) && 524 (m = m_pullup(m, offset + sizeof(struct ip))) == 0) 525 return NULL; /* XXX */ 526 m_copydata(m, offset, sizeof(struct ip), &iphdr); 527 528 /* Map TTL */ 529 if (mpls_mapttl_inet) 530 ms->shim.ttl = iphdr.ip_ttl; 531 532 /* Copy IP precedence to EXP */ 533 if (mpls_mapprec_inet) 534 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5; 535 } 536 537 if ((m = mpls_prepend_shim(m, ms)) == NULL) 538 return NULL; 539 540 return m; 541} 542 543#endif /* INET */ 544 545#ifdef INET6 546 547static int 548mpls_unlabel_inet6(struct mbuf *m) 549{ 550 struct ip6_hdr *ip6hdr; 551 union mpls_shim ms; 552 struct ifqueue *inq; 553 int s; 554 555 /* TODO: mapclass */ 556 if (mpls_mapttl_inet6) { 557 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 558 m_adj(m, sizeof(union mpls_shim)); 559 560 if (m->m_len < sizeof (struct ip6_hdr) && 561 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) 562 return ENOBUFS; 563 ip6hdr = mtod(m, struct ip6_hdr *); 564 565 /* Because we just decremented this in mpls_lse */ 566 ip6hdr->ip6_hlim = ms.shim.ttl + 1; 567 } else 568 m_adj(m, sizeof(union mpls_shim)); 569 570 /* Put it back on IPv6 stack */ 571 schednetisr(NETISR_IPV6); 572 inq = &ip6intrq; 573 s = splnet(); 574 if (IF_QFULL(inq)) { 575 IF_DROP(inq); 576 splx(s); 577 m_freem(m); 578 return ENOBUFS; 579 } 580 581 IF_ENQUEUE(inq, m); 582 splx(s); 583 584 return 0; 585} 586 587static struct mbuf * 588mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset) 589{ 590 struct ip6_hdr ip6h; 591 592 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) { 593 if (m->m_len < sizeof(struct ip6_hdr) && 594 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0) 595 return NULL; 596 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h); 597 598 if (mpls_mapttl_inet6) 599 ms->shim.ttl = ip6h.ip6_hlim; 600 601 if (mpls_mapclass_inet6) 602 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5; 603 } 604 605 if ((m = mpls_prepend_shim(m, ms)) == NULL) 606 return NULL; 607 608 return m; 609} 610 611#endif /* INET6 */ 612 613static struct mbuf * 614mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms) 615{ 616 union mpls_shim *shim; 617 618 M_PREPEND(m, sizeof(*ms), M_DONTWAIT); 619 if (m == NULL) 620 return NULL; 621 622 if (m->m_len < sizeof(union mpls_shim) && 623 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) 624 return NULL; 625 626 shim = mtod(m, union mpls_shim *); 627 628 memcpy(shim, ms, sizeof(*shim)); 629 shim->s_addr = htonl(shim->s_addr); 630 631 return m; 632} 633