ip_output.c revision 125952
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: head/sys/netinet/ip_output.c 125952 2004-02-18 00:04:52Z mlaier $ 35 */ 36 37#include "opt_ipfw.h" 38#include "opt_ipdn.h" 39#include "opt_ipdivert.h" 40#include "opt_ipfilter.h" 41#include "opt_ipsec.h" 42#include "opt_mac.h" 43#include "opt_pfil_hooks.h" 44#include "opt_random_ip_id.h" 45#include "opt_mbuf_stress_test.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/mac.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/protosw.h> 54#include <sys/socket.h> 55#include <sys/socketvar.h> 56#include <sys/sysctl.h> 57 58#include <net/if.h> 59#include <net/route.h> 60 61#include <netinet/in.h> 62#include <netinet/in_systm.h> 63#include <netinet/ip.h> 64#include <netinet/in_pcb.h> 65#include <netinet/in_var.h> 66#include <netinet/ip_var.h> 67 68#ifdef PFIL_HOOKS 69#include <net/pfil.h> 70#endif 71 72#include <machine/in_cksum.h> 73 74static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 75 76#ifdef IPSEC 77#include <netinet6/ipsec.h> 78#include <netkey/key.h> 79#ifdef IPSEC_DEBUG 80#include <netkey/key_debug.h> 81#else 82#define KEYDEBUG(lev,arg) 83#endif 84#endif /*IPSEC*/ 85 86#ifdef FAST_IPSEC 87#include <netipsec/ipsec.h> 88#include <netipsec/xform.h> 89#include <netipsec/key.h> 90#endif /*FAST_IPSEC*/ 91 92#include <netinet/ip_fw.h> 93#include <netinet/ip_dummynet.h> 94 95#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 96 x, (ntohl(a.s_addr)>>24)&0xFF,\ 97 (ntohl(a.s_addr)>>16)&0xFF,\ 98 (ntohl(a.s_addr)>>8)&0xFF,\ 99 (ntohl(a.s_addr))&0xFF, y); 100 101u_short ip_id; 102 103#ifdef MBUF_STRESS_TEST 104int mbuf_frag_size = 0; 105SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 106 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 107#endif 108 109static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 110static struct ifnet *ip_multicast_if(struct in_addr *, int *); 111static void ip_mloopback 112 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 113static int ip_getmoptions 114 (struct sockopt *, struct ip_moptions *); 115static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 116static int ip_setmoptions 117 (struct sockopt *, struct ip_moptions **); 118 119int ip_optcopy(struct ip *, struct ip *); 120 121 122extern struct protosw inetsw[]; 123 124/* 125 * IP output. The packet in mbuf chain m contains a skeletal IP 126 * header (with len, off, ttl, proto, tos, src, dst). 127 * The mbuf chain containing the packet will be freed. 128 * The mbuf opt, if present, will not be freed. 129 * In the IP forwarding case, the packet will arrive with options already 130 * inserted, so must have a NULL opt pointer. 131 */ 132int 133ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, 134 int flags, struct ip_moptions *imo, struct inpcb *inp) 135{ 136 struct ip *ip; 137 struct ifnet *ifp = NULL; /* keep compiler happy */ 138 struct mbuf *m; 139 int hlen = sizeof (struct ip); 140 int len, off, error = 0; 141 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 142 struct in_ifaddr *ia = NULL; 143 int isbroadcast, sw_csum; 144 struct in_addr pkt_dst; 145 struct route iproute; 146#ifdef IPSEC 147 struct secpolicy *sp = NULL; 148#endif 149#ifdef FAST_IPSEC 150 struct m_tag *mtag; 151 struct secpolicy *sp = NULL; 152 struct tdb_ident *tdbi; 153 int s; 154#endif /* FAST_IPSEC */ 155 struct ip_fw_args args; 156 int src_was_INADDR_ANY = 0; /* as the name says... */ 157 158 args.eh = NULL; 159 args.rule = NULL; 160 args.next_hop = NULL; 161 args.divert_rule = 0; /* divert cookie */ 162 163 /* Grab info from MT_TAG mbufs prepended to the chain. */ 164 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { 165 switch(m0->_m_tag_id) { 166 default: 167 printf("ip_output: unrecognised MT_TAG tag %d\n", 168 m0->_m_tag_id); 169 break; 170 171 case PACKET_TAG_DUMMYNET: 172 /* 173 * the packet was already tagged, so part of the 174 * processing was already done, and we need to go down. 175 * Get parameters from the header. 176 */ 177 args.rule = ((struct dn_pkt *)m0)->rule; 178 opt = NULL ; 179 ro = & ( ((struct dn_pkt *)m0)->ro ) ; 180 imo = NULL ; 181 dst = ((struct dn_pkt *)m0)->dn_dst ; 182 ifp = ((struct dn_pkt *)m0)->ifp ; 183 flags = ((struct dn_pkt *)m0)->flags ; 184 break; 185 186 case PACKET_TAG_DIVERT: 187 args.divert_rule = (intptr_t)m0->m_data & 0xffff; 188 break; 189 190 case PACKET_TAG_IPFORWARD: 191 args.next_hop = (struct sockaddr_in *)m0->m_data; 192 break; 193 } 194 } 195 m = m0; 196 197 M_ASSERTPKTHDR(m); 198 199 if (ro == NULL) { 200 ro = &iproute; 201 bzero(ro, sizeof (*ro)); 202 } 203 204 if (inp != NULL) 205 INP_LOCK_ASSERT(inp); 206 207 if (args.rule != NULL) { /* dummynet already saw us */ 208 ip = mtod(m, struct ip *); 209 hlen = ip->ip_hl << 2 ; 210 if (ro->ro_rt) 211 ia = ifatoia(ro->ro_rt->rt_ifa); 212 goto sendit; 213 } 214 215 if (opt) { 216 len = 0; 217 m = ip_insertoptions(m, opt, &len); 218 if (len != 0) 219 hlen = len; 220 } 221 ip = mtod(m, struct ip *); 222 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 223 224 /* 225 * Fill in IP header. If we are not allowing fragmentation, 226 * then the ip_id field is meaningless, but we don't set it 227 * to zero. Doing so causes various problems when devices along 228 * the path (routers, load balancers, firewalls, etc.) illegally 229 * disable DF on our packet. Note that a 16-bit counter 230 * will wrap around in less than 10 seconds at 100 Mbit/s on a 231 * medium with MTU 1500. See Steven M. Bellovin, "A Technique 232 * for Counting NATted Hosts", Proc. IMW'02, available at 233 * <http://www.research.att.com/~smb/papers/fnat.pdf>. 234 */ 235 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 236 ip->ip_v = IPVERSION; 237 ip->ip_hl = hlen >> 2; 238#ifdef RANDOM_IP_ID 239 ip->ip_id = ip_randomid(); 240#else 241 ip->ip_id = htons(ip_id++); 242#endif 243 ipstat.ips_localout++; 244 } else { 245 hlen = ip->ip_hl << 2; 246 } 247 248 dst = (struct sockaddr_in *)&ro->ro_dst; 249 /* 250 * If there is a cached route, 251 * check that it is to the same destination 252 * and is still up. If not, free it and try again. 253 * The address family should also be checked in case of sharing the 254 * cache with IPv6. 255 */ 256 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 257 dst->sin_family != AF_INET || 258 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 259 RTFREE(ro->ro_rt); 260 ro->ro_rt = (struct rtentry *)0; 261 } 262 if (ro->ro_rt == 0) { 263 bzero(dst, sizeof(*dst)); 264 dst->sin_family = AF_INET; 265 dst->sin_len = sizeof(*dst); 266 dst->sin_addr = pkt_dst; 267 } 268 /* 269 * If routing to interface only, 270 * short circuit routing lookup. 271 */ 272 if (flags & IP_ROUTETOIF) { 273 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 274 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 275 ipstat.ips_noroute++; 276 error = ENETUNREACH; 277 goto bad; 278 } 279 ifp = ia->ia_ifp; 280 ip->ip_ttl = 1; 281 isbroadcast = in_broadcast(dst->sin_addr, ifp); 282 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 283 imo != NULL && imo->imo_multicast_ifp != NULL) { 284 /* 285 * Bypass the normal routing lookup for multicast 286 * packets if the interface is specified. 287 */ 288 ifp = imo->imo_multicast_ifp; 289 IFP_TO_IA(ifp, ia); 290 isbroadcast = 0; /* fool gcc */ 291 } else { 292 /* 293 * We want to do any cloning requested by the link layer, 294 * as this is probably required in all cases for correct 295 * operation (as it is for ARP). 296 */ 297 if (ro->ro_rt == 0) 298 rtalloc(ro); 299 if (ro->ro_rt == 0) { 300 ipstat.ips_noroute++; 301 error = EHOSTUNREACH; 302 goto bad; 303 } 304 ia = ifatoia(ro->ro_rt->rt_ifa); 305 ifp = ro->ro_rt->rt_ifp; 306 ro->ro_rt->rt_rmx.rmx_pksent++; 307 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 308 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 309 if (ro->ro_rt->rt_flags & RTF_HOST) 310 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 311 else 312 isbroadcast = in_broadcast(dst->sin_addr, ifp); 313 } 314 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 315 struct in_multi *inm; 316 317 m->m_flags |= M_MCAST; 318 /* 319 * IP destination address is multicast. Make sure "dst" 320 * still points to the address in "ro". (It may have been 321 * changed to point to a gateway address, above.) 322 */ 323 dst = (struct sockaddr_in *)&ro->ro_dst; 324 /* 325 * See if the caller provided any multicast options 326 */ 327 if (imo != NULL) { 328 ip->ip_ttl = imo->imo_multicast_ttl; 329 if (imo->imo_multicast_vif != -1) 330 ip->ip_src.s_addr = 331 ip_mcast_src ? 332 ip_mcast_src(imo->imo_multicast_vif) : 333 INADDR_ANY; 334 } else 335 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 336 /* 337 * Confirm that the outgoing interface supports multicast. 338 */ 339 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 340 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 341 ipstat.ips_noroute++; 342 error = ENETUNREACH; 343 goto bad; 344 } 345 } 346 /* 347 * If source address not specified yet, use address 348 * of outgoing interface. 349 */ 350 if (ip->ip_src.s_addr == INADDR_ANY) { 351 /* Interface may have no addresses. */ 352 if (ia != NULL) 353 ip->ip_src = IA_SIN(ia)->sin_addr; 354 } 355 356 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 357 /* 358 * XXX 359 * delayed checksums are not currently 360 * compatible with IP multicast routing 361 */ 362 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 363 in_delayed_cksum(m); 364 m->m_pkthdr.csum_flags &= 365 ~CSUM_DELAY_DATA; 366 } 367 } 368 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 369 if (inm != NULL && 370 (imo == NULL || imo->imo_multicast_loop)) { 371 /* 372 * If we belong to the destination multicast group 373 * on the outgoing interface, and the caller did not 374 * forbid loopback, loop back a copy. 375 */ 376 ip_mloopback(ifp, m, dst, hlen); 377 } 378 else { 379 /* 380 * If we are acting as a multicast router, perform 381 * multicast forwarding as if the packet had just 382 * arrived on the interface to which we are about 383 * to send. The multicast forwarding function 384 * recursively calls this function, using the 385 * IP_FORWARDING flag to prevent infinite recursion. 386 * 387 * Multicasts that are looped back by ip_mloopback(), 388 * above, will be forwarded by the ip_input() routine, 389 * if necessary. 390 */ 391 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 392 /* 393 * If rsvp daemon is not running, do not 394 * set ip_moptions. This ensures that the packet 395 * is multicast and not just sent down one link 396 * as prescribed by rsvpd. 397 */ 398 if (!rsvp_on) 399 imo = NULL; 400 if (ip_mforward && 401 ip_mforward(ip, ifp, m, imo) != 0) { 402 m_freem(m); 403 goto done; 404 } 405 } 406 } 407 408 /* 409 * Multicasts with a time-to-live of zero may be looped- 410 * back, above, but must not be transmitted on a network. 411 * Also, multicasts addressed to the loopback interface 412 * are not sent -- the above call to ip_mloopback() will 413 * loop back a copy if this host actually belongs to the 414 * destination group on the loopback interface. 415 */ 416 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 417 m_freem(m); 418 goto done; 419 } 420 421 goto sendit; 422 } 423#ifndef notdef 424 /* 425 * If the source address is not specified yet, use the address 426 * of the outoing interface. In case, keep note we did that, so 427 * if the the firewall changes the next-hop causing the output 428 * interface to change, we can fix that. 429 */ 430 if (ip->ip_src.s_addr == INADDR_ANY) { 431 /* Interface may have no addresses. */ 432 if (ia != NULL) { 433 ip->ip_src = IA_SIN(ia)->sin_addr; 434 src_was_INADDR_ANY = 1; 435 } 436 } 437#endif /* notdef */ 438 /* 439 * Verify that we have any chance at all of being able to queue 440 * the packet or packet fragments 441 */ 442 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 443 ifp->if_snd.ifq_maxlen) { 444 error = ENOBUFS; 445 ipstat.ips_odropped++; 446 goto bad; 447 } 448 449 /* 450 * Look for broadcast address and 451 * verify user is allowed to send 452 * such a packet. 453 */ 454 if (isbroadcast) { 455 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 456 error = EADDRNOTAVAIL; 457 goto bad; 458 } 459 if ((flags & IP_ALLOWBROADCAST) == 0) { 460 error = EACCES; 461 goto bad; 462 } 463 /* don't allow broadcast messages to be fragmented */ 464 if (ip->ip_len > ifp->if_mtu) { 465 error = EMSGSIZE; 466 goto bad; 467 } 468 if (flags & IP_SENDONES) 469 ip->ip_dst.s_addr = INADDR_BROADCAST; 470 m->m_flags |= M_BCAST; 471 } else { 472 m->m_flags &= ~M_BCAST; 473 } 474 475sendit: 476#ifdef IPSEC 477 /* get SP for this packet */ 478 if (inp == NULL) 479 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 480 flags, &error); 481 else 482 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); 483 484 if (sp == NULL) { 485 ipsecstat.out_inval++; 486 goto bad; 487 } 488 489 error = 0; 490 491 /* check policy */ 492 switch (sp->policy) { 493 case IPSEC_POLICY_DISCARD: 494 /* 495 * This packet is just discarded. 496 */ 497 ipsecstat.out_polvio++; 498 goto bad; 499 500 case IPSEC_POLICY_BYPASS: 501 case IPSEC_POLICY_NONE: 502 case IPSEC_POLICY_TCP: 503 /* no need to do IPsec. */ 504 goto skip_ipsec; 505 506 case IPSEC_POLICY_IPSEC: 507 if (sp->req == NULL) { 508 /* acquire a policy */ 509 error = key_spdacquire(sp); 510 goto bad; 511 } 512 break; 513 514 case IPSEC_POLICY_ENTRUST: 515 default: 516 printf("ip_output: Invalid policy found. %d\n", sp->policy); 517 } 518 { 519 struct ipsec_output_state state; 520 bzero(&state, sizeof(state)); 521 state.m = m; 522 if (flags & IP_ROUTETOIF) { 523 state.ro = &iproute; 524 bzero(&iproute, sizeof(iproute)); 525 } else 526 state.ro = ro; 527 state.dst = (struct sockaddr *)dst; 528 529 ip->ip_sum = 0; 530 531 /* 532 * XXX 533 * delayed checksums are not currently compatible with IPsec 534 */ 535 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 536 in_delayed_cksum(m); 537 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 538 } 539 540 ip->ip_len = htons(ip->ip_len); 541 ip->ip_off = htons(ip->ip_off); 542 543 error = ipsec4_output(&state, sp, flags); 544 545 m = state.m; 546 if (flags & IP_ROUTETOIF) { 547 /* 548 * if we have tunnel mode SA, we may need to ignore 549 * IP_ROUTETOIF. 550 */ 551 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 552 flags &= ~IP_ROUTETOIF; 553 ro = state.ro; 554 } 555 } else 556 ro = state.ro; 557 dst = (struct sockaddr_in *)state.dst; 558 if (error) { 559 /* mbuf is already reclaimed in ipsec4_output. */ 560 m0 = NULL; 561 switch (error) { 562 case EHOSTUNREACH: 563 case ENETUNREACH: 564 case EMSGSIZE: 565 case ENOBUFS: 566 case ENOMEM: 567 break; 568 default: 569 printf("ip4_output (ipsec): error code %d\n", error); 570 /*fall through*/ 571 case ENOENT: 572 /* don't show these error codes to the user */ 573 error = 0; 574 break; 575 } 576 goto bad; 577 } 578 579 /* be sure to update variables that are affected by ipsec4_output() */ 580 ip = mtod(m, struct ip *); 581 hlen = ip->ip_hl << 2; 582 if (ro->ro_rt == NULL) { 583 if ((flags & IP_ROUTETOIF) == 0) { 584 printf("ip_output: " 585 "can't update route after IPsec processing\n"); 586 error = EHOSTUNREACH; /*XXX*/ 587 goto bad; 588 } 589 } else { 590 if (state.encap) { 591 ia = ifatoia(ro->ro_rt->rt_ifa); 592 ifp = ro->ro_rt->rt_ifp; 593 } 594 } 595 } 596 597 /* make it flipped, again. */ 598 ip->ip_len = ntohs(ip->ip_len); 599 ip->ip_off = ntohs(ip->ip_off); 600skip_ipsec: 601#endif /*IPSEC*/ 602#ifdef FAST_IPSEC 603 /* 604 * Check the security policy (SP) for the packet and, if 605 * required, do IPsec-related processing. There are two 606 * cases here; the first time a packet is sent through 607 * it will be untagged and handled by ipsec4_checkpolicy. 608 * If the packet is resubmitted to ip_output (e.g. after 609 * AH, ESP, etc. processing), there will be a tag to bypass 610 * the lookup and related policy checking. 611 */ 612 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 613 s = splnet(); 614 if (mtag != NULL) { 615 tdbi = (struct tdb_ident *)(mtag + 1); 616 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 617 if (sp == NULL) 618 error = -EINVAL; /* force silent drop */ 619 m_tag_delete(m, mtag); 620 } else { 621 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 622 &error, inp); 623 } 624 /* 625 * There are four return cases: 626 * sp != NULL apply IPsec policy 627 * sp == NULL, error == 0 no IPsec handling needed 628 * sp == NULL, error == -EINVAL discard packet w/o error 629 * sp == NULL, error != 0 discard packet, report error 630 */ 631 if (sp != NULL) { 632 /* Loop detection, check if ipsec processing already done */ 633 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 634 for (mtag = m_tag_first(m); mtag != NULL; 635 mtag = m_tag_next(m, mtag)) { 636 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 637 continue; 638 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 639 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 640 continue; 641 /* 642 * Check if policy has an SA associated with it. 643 * This can happen when an SP has yet to acquire 644 * an SA; e.g. on first reference. If it occurs, 645 * then we let ipsec4_process_packet do its thing. 646 */ 647 if (sp->req->sav == NULL) 648 break; 649 tdbi = (struct tdb_ident *)(mtag + 1); 650 if (tdbi->spi == sp->req->sav->spi && 651 tdbi->proto == sp->req->sav->sah->saidx.proto && 652 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, 653 sizeof (union sockaddr_union)) == 0) { 654 /* 655 * No IPsec processing is needed, free 656 * reference to SP. 657 * 658 * NB: null pointer to avoid free at 659 * done: below. 660 */ 661 KEY_FREESP(&sp), sp = NULL; 662 splx(s); 663 goto spd_done; 664 } 665 } 666 667 /* 668 * Do delayed checksums now because we send before 669 * this is done in the normal processing path. 670 */ 671 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 672 in_delayed_cksum(m); 673 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 674 } 675 676 ip->ip_len = htons(ip->ip_len); 677 ip->ip_off = htons(ip->ip_off); 678 679 /* NB: callee frees mbuf */ 680 error = ipsec4_process_packet(m, sp->req, flags, 0); 681 /* 682 * Preserve KAME behaviour: ENOENT can be returned 683 * when an SA acquire is in progress. Don't propagate 684 * this to user-level; it confuses applications. 685 * 686 * XXX this will go away when the SADB is redone. 687 */ 688 if (error == ENOENT) 689 error = 0; 690 splx(s); 691 goto done; 692 } else { 693 splx(s); 694 695 if (error != 0) { 696 /* 697 * Hack: -EINVAL is used to signal that a packet 698 * should be silently discarded. This is typically 699 * because we asked key management for an SA and 700 * it was delayed (e.g. kicked up to IKE). 701 */ 702 if (error == -EINVAL) 703 error = 0; 704 goto bad; 705 } else { 706 /* No IPsec processing for this packet. */ 707 } 708#ifdef notyet 709 /* 710 * If deferred crypto processing is needed, check that 711 * the interface supports it. 712 */ 713 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 714 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { 715 /* notify IPsec to do its own crypto */ 716 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 717 error = EHOSTUNREACH; 718 goto bad; 719 } 720#endif 721 } 722spd_done: 723#endif /* FAST_IPSEC */ 724 725 /* 726 * IpHack's section. 727 * - Xlate: translate packet's addr/port (NAT). 728 * - Firewall: deny/allow/etc. 729 * - Wrap: fake packet's addr/port <unimpl.> 730 * - Encapsulate: put it in another IP and send out. <unimp.> 731 */ 732#ifdef PFIL_HOOKS 733 /* 734 * Run through list of hooks for output packets. 735 */ 736 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT); 737 if (error != 0 || m == NULL) 738 goto done; 739 ip = mtod(m, struct ip *); 740#endif /* PFIL_HOOKS */ 741 742 /* 743 * Check with the firewall... 744 * but not if we are already being fwd'd from a firewall. 745 */ 746 if (fw_enable && IPFW_LOADED && !args.next_hop) { 747 struct sockaddr_in *old = dst; 748 749 args.m = m; 750 args.next_hop = dst; 751 args.oif = ifp; 752 off = ip_fw_chk_ptr(&args); 753 m = args.m; 754 dst = args.next_hop; 755 756 /* 757 * On return we must do the following: 758 * m == NULL -> drop the pkt (old interface, deprecated) 759 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 760 * 1<=off<= 0xffff -> DIVERT 761 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 762 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 763 * dst != old -> IPFIREWALL_FORWARD 764 * off==0, dst==old -> accept 765 * If some of the above modules are not compiled in, then 766 * we should't have to check the corresponding condition 767 * (because the ipfw control socket should not accept 768 * unsupported rules), but better play safe and drop 769 * packets in case of doubt. 770 */ 771 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 772 if (m) 773 m_freem(m); 774 error = EACCES; 775 goto done; 776 } 777 ip = mtod(m, struct ip *); 778 if (off == 0 && dst == old) /* common case */ 779 goto pass; 780 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 781 /* 782 * pass the pkt to dummynet. Need to include 783 * pipe number, m, ifp, ro, dst because these are 784 * not recomputed in the next pass. 785 * All other parameters have been already used and 786 * so they are not needed anymore. 787 * XXX note: if the ifp or ro entry are deleted 788 * while a pkt is in dummynet, we are in trouble! 789 */ 790 args.ro = ro; 791 args.dst = dst; 792 args.flags = flags; 793 794 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 795 &args); 796 goto done; 797 } 798#ifdef IPDIVERT 799 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 800 struct mbuf *clone = NULL; 801 802 /* Clone packet if we're doing a 'tee' */ 803 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 804 clone = m_dup(m, M_DONTWAIT); 805 806 /* 807 * XXX 808 * delayed checksums are not currently compatible 809 * with divert sockets. 810 */ 811 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 812 in_delayed_cksum(m); 813 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 814 } 815 816 /* Restore packet header fields to original values */ 817 ip->ip_len = htons(ip->ip_len); 818 ip->ip_off = htons(ip->ip_off); 819 820 /* Deliver packet to divert input routine */ 821 divert_packet(m, 0, off & 0xffff, args.divert_rule); 822 823 /* If 'tee', continue with original packet */ 824 if (clone != NULL) { 825 m = clone; 826 ip = mtod(m, struct ip *); 827 goto pass; 828 } 829 goto done; 830 } 831#endif 832 833 /* IPFIREWALL_FORWARD */ 834 /* 835 * Check dst to make sure it is directly reachable on the 836 * interface we previously thought it was. 837 * If it isn't (which may be likely in some situations) we have 838 * to re-route it (ie, find a route for the next-hop and the 839 * associated interface) and set them here. This is nested 840 * forwarding which in most cases is undesirable, except where 841 * such control is nigh impossible. So we do it here. 842 * And I'm babbling. 843 */ 844 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 845#if 0 846 /* 847 * XXX To improve readability, this block should be 848 * changed into a function call as below: 849 */ 850 error = ip_ipforward(&m, &dst, &ifp); 851 if (error) 852 goto bad; 853 if (m == NULL) /* ip_input consumed the mbuf */ 854 goto done; 855#else 856 struct in_ifaddr *ia; 857 858 /* 859 * XXX sro_fwd below is static, and a pointer 860 * to it gets passed to routines downstream. 861 * This could have surprisingly bad results in 862 * practice, because its content is overwritten 863 * by subsequent packets. 864 */ 865 /* There must be a better way to do this next line... */ 866 static struct route sro_fwd; 867 struct route *ro_fwd = &sro_fwd; 868 869#if 0 870 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 871 dst->sin_addr, "\n"); 872#endif 873 874 /* 875 * We need to figure out if we have been forwarded 876 * to a local socket. If so, then we should somehow 877 * "loop back" to ip_input, and get directed to the 878 * PCB as if we had received this packet. This is 879 * because it may be dificult to identify the packets 880 * you want to forward until they are being output 881 * and have selected an interface. (e.g. locally 882 * initiated packets) If we used the loopback inteface, 883 * we would not be able to control what happens 884 * as the packet runs through ip_input() as 885 * it is done through an ISR. 886 */ 887 LIST_FOREACH(ia, 888 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 889 /* 890 * If the addr to forward to is one 891 * of ours, we pretend to 892 * be the destination for this packet. 893 */ 894 if (IA_SIN(ia)->sin_addr.s_addr == 895 dst->sin_addr.s_addr) 896 break; 897 } 898 if (ia) { /* tell ip_input "dont filter" */ 899 struct m_hdr tag; 900 901 tag.mh_type = MT_TAG; 902 tag.mh_flags = PACKET_TAG_IPFORWARD; 903 tag.mh_data = (caddr_t)args.next_hop; 904 tag.mh_next = m; 905 tag.mh_nextpkt = NULL; 906 907 if (m->m_pkthdr.rcvif == NULL) 908 m->m_pkthdr.rcvif = ifunit("lo0"); 909 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 910 m->m_pkthdr.csum_flags |= 911 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 912 m0->m_pkthdr.csum_data = 0xffff; 913 } 914 m->m_pkthdr.csum_flags |= 915 CSUM_IP_CHECKED | CSUM_IP_VALID; 916 ip->ip_len = htons(ip->ip_len); 917 ip->ip_off = htons(ip->ip_off); 918 ip_input((struct mbuf *)&tag); 919 goto done; 920 } 921 /* 922 * Some of the logic for this was 923 * nicked from above. 924 */ 925 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 926 927 ro_fwd->ro_rt = 0; 928 rtalloc_ign(ro_fwd, RTF_CLONING); 929 930 if (ro_fwd->ro_rt == 0) { 931 ipstat.ips_noroute++; 932 error = EHOSTUNREACH; 933 goto bad; 934 } 935 936 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 937 ifp = ro_fwd->ro_rt->rt_ifp; 938 ro_fwd->ro_rt->rt_rmx.rmx_pksent++; 939 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 940 dst = (struct sockaddr_in *) 941 ro_fwd->ro_rt->rt_gateway; 942 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 943 isbroadcast = 944 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 945 else 946 isbroadcast = in_broadcast(dst->sin_addr, ifp); 947 if (ro->ro_rt) 948 RTFREE(ro->ro_rt); 949 ro->ro_rt = ro_fwd->ro_rt; 950 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 951 952#endif /* ... block to be put into a function */ 953 /* 954 * If we added a default src ip earlier, 955 * which would have been gotten from the-then 956 * interface, do it again, from the new one. 957 */ 958 if (src_was_INADDR_ANY) 959 ip->ip_src = IA_SIN(ia)->sin_addr; 960 goto pass ; 961 } 962 963 /* 964 * if we get here, none of the above matches, and 965 * we have to drop the pkt 966 */ 967 m_freem(m); 968 error = EACCES; /* not sure this is the right error msg */ 969 goto done; 970 } 971 972pass: 973 /* 127/8 must not appear on wire - RFC1122. */ 974 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 975 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 976 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 977 ipstat.ips_badaddr++; 978 error = EADDRNOTAVAIL; 979 goto bad; 980 } 981 } 982 983 m->m_pkthdr.csum_flags |= CSUM_IP; 984 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 985 if (sw_csum & CSUM_DELAY_DATA) { 986 in_delayed_cksum(m); 987 sw_csum &= ~CSUM_DELAY_DATA; 988 } 989 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 990 991 /* 992 * If small enough for interface, or the interface will take 993 * care of the fragmentation for us, can just send directly. 994 */ 995 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT && 996 ((ip->ip_off & IP_DF) == 0))) { 997 ip->ip_len = htons(ip->ip_len); 998 ip->ip_off = htons(ip->ip_off); 999 ip->ip_sum = 0; 1000 if (sw_csum & CSUM_DELAY_IP) 1001 ip->ip_sum = in_cksum(m, hlen); 1002 1003 /* Record statistics for this interface address. */ 1004 if (!(flags & IP_FORWARDING) && ia) { 1005 ia->ia_ifa.if_opackets++; 1006 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1007 } 1008 1009#ifdef IPSEC 1010 /* clean ipsec history once it goes out of the node */ 1011 ipsec_delaux(m); 1012#endif 1013 1014#ifdef MBUF_STRESS_TEST 1015 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 1016 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 1017#endif 1018 error = (*ifp->if_output)(ifp, m, 1019 (struct sockaddr *)dst, ro->ro_rt); 1020 goto done; 1021 } 1022 1023 if (ip->ip_off & IP_DF) { 1024 error = EMSGSIZE; 1025 /* 1026 * This case can happen if the user changed the MTU 1027 * of an interface after enabling IP on it. Because 1028 * most netifs don't keep track of routes pointing to 1029 * them, there is no way for one to update all its 1030 * routes when the MTU is changed. 1031 */ 1032 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 1033 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1034 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1035 } 1036 ipstat.ips_cantfrag++; 1037 goto bad; 1038 } 1039 1040 /* 1041 * Too large for interface; fragment if possible. If successful, 1042 * on return, m will point to a list of packets to be sent. 1043 */ 1044 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); 1045 if (error) 1046 goto bad; 1047 for (; m; m = m0) { 1048 m0 = m->m_nextpkt; 1049 m->m_nextpkt = 0; 1050#ifdef IPSEC 1051 /* clean ipsec history once it goes out of the node */ 1052 ipsec_delaux(m); 1053#endif 1054 if (error == 0) { 1055 /* Record statistics for this interface address. */ 1056 if (ia != NULL) { 1057 ia->ia_ifa.if_opackets++; 1058 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1059 } 1060 1061 error = (*ifp->if_output)(ifp, m, 1062 (struct sockaddr *)dst, ro->ro_rt); 1063 } else 1064 m_freem(m); 1065 } 1066 1067 if (error == 0) 1068 ipstat.ips_fragmented++; 1069 1070done: 1071 if (ro == &iproute && ro->ro_rt) { 1072 RTFREE(ro->ro_rt); 1073 ro->ro_rt = NULL; 1074 } 1075#ifdef IPSEC 1076 if (sp != NULL) { 1077 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1078 printf("DP ip_output call free SP:%p\n", sp)); 1079 key_freesp(sp); 1080 } 1081#endif 1082#ifdef FAST_IPSEC 1083 if (sp != NULL) 1084 KEY_FREESP(&sp); 1085#endif 1086 return (error); 1087bad: 1088 m_freem(m); 1089 goto done; 1090} 1091 1092/* 1093 * Create a chain of fragments which fit the given mtu. m_frag points to the 1094 * mbuf to be fragmented; on return it points to the chain with the fragments. 1095 * Return 0 if no error. If error, m_frag may contain a partially built 1096 * chain of fragments that should be freed by the caller. 1097 * 1098 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 1099 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 1100 */ 1101int 1102ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 1103 u_long if_hwassist_flags, int sw_csum) 1104{ 1105 int error = 0; 1106 int hlen = ip->ip_hl << 2; 1107 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 1108 int off; 1109 struct mbuf *m0 = *m_frag; /* the original packet */ 1110 int firstlen; 1111 struct mbuf **mnext; 1112 int nfrags; 1113 1114 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 1115 ipstat.ips_cantfrag++; 1116 return EMSGSIZE; 1117 } 1118 1119 /* 1120 * Must be able to put at least 8 bytes per fragment. 1121 */ 1122 if (len < 8) 1123 return EMSGSIZE; 1124 1125 /* 1126 * If the interface will not calculate checksums on 1127 * fragmented packets, then do it here. 1128 */ 1129 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1130 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 1131 in_delayed_cksum(m0); 1132 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1133 } 1134 1135 if (len > PAGE_SIZE) { 1136 /* 1137 * Fragment large datagrams such that each segment 1138 * contains a multiple of PAGE_SIZE amount of data, 1139 * plus headers. This enables a receiver to perform 1140 * page-flipping zero-copy optimizations. 1141 * 1142 * XXX When does this help given that sender and receiver 1143 * could have different page sizes, and also mtu could 1144 * be less than the receiver's page size ? 1145 */ 1146 int newlen; 1147 struct mbuf *m; 1148 1149 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 1150 off += m->m_len; 1151 1152 /* 1153 * firstlen (off - hlen) must be aligned on an 1154 * 8-byte boundary 1155 */ 1156 if (off < hlen) 1157 goto smart_frag_failure; 1158 off = ((off - hlen) & ~7) + hlen; 1159 newlen = (~PAGE_MASK) & mtu; 1160 if ((newlen + sizeof (struct ip)) > mtu) { 1161 /* we failed, go back the default */ 1162smart_frag_failure: 1163 newlen = len; 1164 off = hlen + len; 1165 } 1166 len = newlen; 1167 1168 } else { 1169 off = hlen + len; 1170 } 1171 1172 firstlen = off - hlen; 1173 mnext = &m0->m_nextpkt; /* pointer to next packet */ 1174 1175 /* 1176 * Loop through length of segment after first fragment, 1177 * make new header and copy data of each part and link onto chain. 1178 * Here, m0 is the original packet, m is the fragment being created. 1179 * The fragments are linked off the m_nextpkt of the original 1180 * packet, which after processing serves as the first fragment. 1181 */ 1182 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 1183 struct ip *mhip; /* ip header on the fragment */ 1184 struct mbuf *m; 1185 int mhlen = sizeof (struct ip); 1186 1187 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1188 if (m == 0) { 1189 error = ENOBUFS; 1190 ipstat.ips_odropped++; 1191 goto done; 1192 } 1193 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1194 /* 1195 * In the first mbuf, leave room for the link header, then 1196 * copy the original IP header including options. The payload 1197 * goes into an additional mbuf chain returned by m_copy(). 1198 */ 1199 m->m_data += max_linkhdr; 1200 mhip = mtod(m, struct ip *); 1201 *mhip = *ip; 1202 if (hlen > sizeof (struct ip)) { 1203 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1204 mhip->ip_v = IPVERSION; 1205 mhip->ip_hl = mhlen >> 2; 1206 } 1207 m->m_len = mhlen; 1208 /* XXX do we need to add ip->ip_off below ? */ 1209 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1210 if (off + len >= ip->ip_len) { /* last fragment */ 1211 len = ip->ip_len - off; 1212 m->m_flags |= M_LASTFRAG; 1213 } else 1214 mhip->ip_off |= IP_MF; 1215 mhip->ip_len = htons((u_short)(len + mhlen)); 1216 m->m_next = m_copy(m0, off, len); 1217 if (m->m_next == 0) { /* copy failed */ 1218 m_free(m); 1219 error = ENOBUFS; /* ??? */ 1220 ipstat.ips_odropped++; 1221 goto done; 1222 } 1223 m->m_pkthdr.len = mhlen + len; 1224 m->m_pkthdr.rcvif = (struct ifnet *)0; 1225#ifdef MAC 1226 mac_create_fragment(m0, m); 1227#endif 1228 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1229 mhip->ip_off = htons(mhip->ip_off); 1230 mhip->ip_sum = 0; 1231 if (sw_csum & CSUM_DELAY_IP) 1232 mhip->ip_sum = in_cksum(m, mhlen); 1233 *mnext = m; 1234 mnext = &m->m_nextpkt; 1235 } 1236 ipstat.ips_ofragments += nfrags; 1237 1238 /* set first marker for fragment chain */ 1239 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1240 m0->m_pkthdr.csum_data = nfrags; 1241 1242 /* 1243 * Update first fragment by trimming what's been copied out 1244 * and updating header. 1245 */ 1246 m_adj(m0, hlen + firstlen - ip->ip_len); 1247 m0->m_pkthdr.len = hlen + firstlen; 1248 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 1249 ip->ip_off |= IP_MF; 1250 ip->ip_off = htons(ip->ip_off); 1251 ip->ip_sum = 0; 1252 if (sw_csum & CSUM_DELAY_IP) 1253 ip->ip_sum = in_cksum(m0, hlen); 1254 1255done: 1256 *m_frag = m0; 1257 return error; 1258} 1259 1260void 1261in_delayed_cksum(struct mbuf *m) 1262{ 1263 struct ip *ip; 1264 u_short csum, offset; 1265 1266 ip = mtod(m, struct ip *); 1267 offset = ip->ip_hl << 2 ; 1268 csum = in_cksum_skip(m, ip->ip_len, offset); 1269 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1270 csum = 0xffff; 1271 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1272 1273 if (offset + sizeof(u_short) > m->m_len) { 1274 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1275 m->m_len, offset, ip->ip_p); 1276 /* 1277 * XXX 1278 * this shouldn't happen, but if it does, the 1279 * correct behavior may be to insert the checksum 1280 * in the existing chain instead of rearranging it. 1281 */ 1282 m = m_pullup(m, offset + sizeof(u_short)); 1283 } 1284 *(u_short *)(m->m_data + offset) = csum; 1285} 1286 1287/* 1288 * Insert IP options into preformed packet. 1289 * Adjust IP destination as required for IP source routing, 1290 * as indicated by a non-zero in_addr at the start of the options. 1291 * 1292 * XXX This routine assumes that the packet has no options in place. 1293 */ 1294static struct mbuf * 1295ip_insertoptions(m, opt, phlen) 1296 register struct mbuf *m; 1297 struct mbuf *opt; 1298 int *phlen; 1299{ 1300 register struct ipoption *p = mtod(opt, struct ipoption *); 1301 struct mbuf *n; 1302 register struct ip *ip = mtod(m, struct ip *); 1303 unsigned optlen; 1304 1305 optlen = opt->m_len - sizeof(p->ipopt_dst); 1306 if (optlen + ip->ip_len > IP_MAXPACKET) { 1307 *phlen = 0; 1308 return (m); /* XXX should fail */ 1309 } 1310 if (p->ipopt_dst.s_addr) 1311 ip->ip_dst = p->ipopt_dst; 1312 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1313 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1314 if (n == 0) { 1315 *phlen = 0; 1316 return (m); 1317 } 1318 n->m_pkthdr.rcvif = (struct ifnet *)0; 1319#ifdef MAC 1320 mac_create_mbuf_from_mbuf(m, n); 1321#endif 1322 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1323 m->m_len -= sizeof(struct ip); 1324 m->m_data += sizeof(struct ip); 1325 n->m_next = m; 1326 m = n; 1327 m->m_len = optlen + sizeof(struct ip); 1328 m->m_data += max_linkhdr; 1329 bcopy(ip, mtod(m, void *), sizeof(struct ip)); 1330 } else { 1331 m->m_data -= optlen; 1332 m->m_len += optlen; 1333 m->m_pkthdr.len += optlen; 1334 bcopy(ip, mtod(m, void *), sizeof(struct ip)); 1335 } 1336 ip = mtod(m, struct ip *); 1337 bcopy(p->ipopt_list, ip + 1, optlen); 1338 *phlen = sizeof(struct ip) + optlen; 1339 ip->ip_v = IPVERSION; 1340 ip->ip_hl = *phlen >> 2; 1341 ip->ip_len += optlen; 1342 return (m); 1343} 1344 1345/* 1346 * Copy options from ip to jp, 1347 * omitting those not copied during fragmentation. 1348 */ 1349int 1350ip_optcopy(ip, jp) 1351 struct ip *ip, *jp; 1352{ 1353 register u_char *cp, *dp; 1354 int opt, optlen, cnt; 1355 1356 cp = (u_char *)(ip + 1); 1357 dp = (u_char *)(jp + 1); 1358 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1359 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1360 opt = cp[0]; 1361 if (opt == IPOPT_EOL) 1362 break; 1363 if (opt == IPOPT_NOP) { 1364 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1365 *dp++ = IPOPT_NOP; 1366 optlen = 1; 1367 continue; 1368 } 1369 1370 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1371 ("ip_optcopy: malformed ipv4 option")); 1372 optlen = cp[IPOPT_OLEN]; 1373 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1374 ("ip_optcopy: malformed ipv4 option")); 1375 1376 /* bogus lengths should have been caught by ip_dooptions */ 1377 if (optlen > cnt) 1378 optlen = cnt; 1379 if (IPOPT_COPIED(opt)) { 1380 bcopy(cp, dp, optlen); 1381 dp += optlen; 1382 } 1383 } 1384 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1385 *dp++ = IPOPT_EOL; 1386 return (optlen); 1387} 1388 1389/* 1390 * IP socket option processing. 1391 */ 1392int 1393ip_ctloutput(so, sopt) 1394 struct socket *so; 1395 struct sockopt *sopt; 1396{ 1397 struct inpcb *inp = sotoinpcb(so); 1398 int error, optval; 1399 1400 error = optval = 0; 1401 if (sopt->sopt_level != IPPROTO_IP) { 1402 return (EINVAL); 1403 } 1404 1405 switch (sopt->sopt_dir) { 1406 case SOPT_SET: 1407 switch (sopt->sopt_name) { 1408 case IP_OPTIONS: 1409#ifdef notyet 1410 case IP_RETOPTS: 1411#endif 1412 { 1413 struct mbuf *m; 1414 if (sopt->sopt_valsize > MLEN) { 1415 error = EMSGSIZE; 1416 break; 1417 } 1418 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1419 if (m == 0) { 1420 error = ENOBUFS; 1421 break; 1422 } 1423 m->m_len = sopt->sopt_valsize; 1424 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1425 m->m_len); 1426 1427 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1428 m)); 1429 } 1430 1431 case IP_TOS: 1432 case IP_TTL: 1433 case IP_RECVOPTS: 1434 case IP_RECVRETOPTS: 1435 case IP_RECVDSTADDR: 1436 case IP_RECVTTL: 1437 case IP_RECVIF: 1438 case IP_FAITH: 1439 case IP_ONESBCAST: 1440 error = sooptcopyin(sopt, &optval, sizeof optval, 1441 sizeof optval); 1442 if (error) 1443 break; 1444 1445 switch (sopt->sopt_name) { 1446 case IP_TOS: 1447 inp->inp_ip_tos = optval; 1448 break; 1449 1450 case IP_TTL: 1451 inp->inp_ip_ttl = optval; 1452 break; 1453#define OPTSET(bit) \ 1454 if (optval) \ 1455 inp->inp_flags |= bit; \ 1456 else \ 1457 inp->inp_flags &= ~bit; 1458 1459 case IP_RECVOPTS: 1460 OPTSET(INP_RECVOPTS); 1461 break; 1462 1463 case IP_RECVRETOPTS: 1464 OPTSET(INP_RECVRETOPTS); 1465 break; 1466 1467 case IP_RECVDSTADDR: 1468 OPTSET(INP_RECVDSTADDR); 1469 break; 1470 1471 case IP_RECVTTL: 1472 OPTSET(INP_RECVTTL); 1473 break; 1474 1475 case IP_RECVIF: 1476 OPTSET(INP_RECVIF); 1477 break; 1478 1479 case IP_FAITH: 1480 OPTSET(INP_FAITH); 1481 break; 1482 1483 case IP_ONESBCAST: 1484 OPTSET(INP_ONESBCAST); 1485 break; 1486 } 1487 break; 1488#undef OPTSET 1489 1490 case IP_MULTICAST_IF: 1491 case IP_MULTICAST_VIF: 1492 case IP_MULTICAST_TTL: 1493 case IP_MULTICAST_LOOP: 1494 case IP_ADD_MEMBERSHIP: 1495 case IP_DROP_MEMBERSHIP: 1496 error = ip_setmoptions(sopt, &inp->inp_moptions); 1497 break; 1498 1499 case IP_PORTRANGE: 1500 error = sooptcopyin(sopt, &optval, sizeof optval, 1501 sizeof optval); 1502 if (error) 1503 break; 1504 1505 switch (optval) { 1506 case IP_PORTRANGE_DEFAULT: 1507 inp->inp_flags &= ~(INP_LOWPORT); 1508 inp->inp_flags &= ~(INP_HIGHPORT); 1509 break; 1510 1511 case IP_PORTRANGE_HIGH: 1512 inp->inp_flags &= ~(INP_LOWPORT); 1513 inp->inp_flags |= INP_HIGHPORT; 1514 break; 1515 1516 case IP_PORTRANGE_LOW: 1517 inp->inp_flags &= ~(INP_HIGHPORT); 1518 inp->inp_flags |= INP_LOWPORT; 1519 break; 1520 1521 default: 1522 error = EINVAL; 1523 break; 1524 } 1525 break; 1526 1527#if defined(IPSEC) || defined(FAST_IPSEC) 1528 case IP_IPSEC_POLICY: 1529 { 1530 caddr_t req; 1531 size_t len = 0; 1532 int priv; 1533 struct mbuf *m; 1534 int optname; 1535 1536 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1537 break; 1538 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1539 break; 1540 priv = (sopt->sopt_td != NULL && 1541 suser(sopt->sopt_td) != 0) ? 0 : 1; 1542 req = mtod(m, caddr_t); 1543 len = m->m_len; 1544 optname = sopt->sopt_name; 1545 error = ipsec4_set_policy(inp, optname, req, len, priv); 1546 m_freem(m); 1547 break; 1548 } 1549#endif /*IPSEC*/ 1550 1551 default: 1552 error = ENOPROTOOPT; 1553 break; 1554 } 1555 break; 1556 1557 case SOPT_GET: 1558 switch (sopt->sopt_name) { 1559 case IP_OPTIONS: 1560 case IP_RETOPTS: 1561 if (inp->inp_options) 1562 error = sooptcopyout(sopt, 1563 mtod(inp->inp_options, 1564 char *), 1565 inp->inp_options->m_len); 1566 else 1567 sopt->sopt_valsize = 0; 1568 break; 1569 1570 case IP_TOS: 1571 case IP_TTL: 1572 case IP_RECVOPTS: 1573 case IP_RECVRETOPTS: 1574 case IP_RECVDSTADDR: 1575 case IP_RECVTTL: 1576 case IP_RECVIF: 1577 case IP_PORTRANGE: 1578 case IP_FAITH: 1579 case IP_ONESBCAST: 1580 switch (sopt->sopt_name) { 1581 1582 case IP_TOS: 1583 optval = inp->inp_ip_tos; 1584 break; 1585 1586 case IP_TTL: 1587 optval = inp->inp_ip_ttl; 1588 break; 1589 1590#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1591 1592 case IP_RECVOPTS: 1593 optval = OPTBIT(INP_RECVOPTS); 1594 break; 1595 1596 case IP_RECVRETOPTS: 1597 optval = OPTBIT(INP_RECVRETOPTS); 1598 break; 1599 1600 case IP_RECVDSTADDR: 1601 optval = OPTBIT(INP_RECVDSTADDR); 1602 break; 1603 1604 case IP_RECVTTL: 1605 optval = OPTBIT(INP_RECVTTL); 1606 break; 1607 1608 case IP_RECVIF: 1609 optval = OPTBIT(INP_RECVIF); 1610 break; 1611 1612 case IP_PORTRANGE: 1613 if (inp->inp_flags & INP_HIGHPORT) 1614 optval = IP_PORTRANGE_HIGH; 1615 else if (inp->inp_flags & INP_LOWPORT) 1616 optval = IP_PORTRANGE_LOW; 1617 else 1618 optval = 0; 1619 break; 1620 1621 case IP_FAITH: 1622 optval = OPTBIT(INP_FAITH); 1623 break; 1624 1625 case IP_ONESBCAST: 1626 optval = OPTBIT(INP_ONESBCAST); 1627 break; 1628 } 1629 error = sooptcopyout(sopt, &optval, sizeof optval); 1630 break; 1631 1632 case IP_MULTICAST_IF: 1633 case IP_MULTICAST_VIF: 1634 case IP_MULTICAST_TTL: 1635 case IP_MULTICAST_LOOP: 1636 case IP_ADD_MEMBERSHIP: 1637 case IP_DROP_MEMBERSHIP: 1638 error = ip_getmoptions(sopt, inp->inp_moptions); 1639 break; 1640 1641#if defined(IPSEC) || defined(FAST_IPSEC) 1642 case IP_IPSEC_POLICY: 1643 { 1644 struct mbuf *m = NULL; 1645 caddr_t req = NULL; 1646 size_t len = 0; 1647 1648 if (m != 0) { 1649 req = mtod(m, caddr_t); 1650 len = m->m_len; 1651 } 1652 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1653 if (error == 0) 1654 error = soopt_mcopyout(sopt, m); /* XXX */ 1655 if (error == 0) 1656 m_freem(m); 1657 break; 1658 } 1659#endif /*IPSEC*/ 1660 1661 default: 1662 error = ENOPROTOOPT; 1663 break; 1664 } 1665 break; 1666 } 1667 return (error); 1668} 1669 1670/* 1671 * Set up IP options in pcb for insertion in output packets. 1672 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1673 * with destination address if source routed. 1674 */ 1675static int 1676ip_pcbopts(optname, pcbopt, m) 1677 int optname; 1678 struct mbuf **pcbopt; 1679 register struct mbuf *m; 1680{ 1681 register int cnt, optlen; 1682 register u_char *cp; 1683 u_char opt; 1684 1685 /* turn off any old options */ 1686 if (*pcbopt) 1687 (void)m_free(*pcbopt); 1688 *pcbopt = 0; 1689 if (m == (struct mbuf *)0 || m->m_len == 0) { 1690 /* 1691 * Only turning off any previous options. 1692 */ 1693 if (m) 1694 (void)m_free(m); 1695 return (0); 1696 } 1697 1698 if (m->m_len % sizeof(int32_t)) 1699 goto bad; 1700 /* 1701 * IP first-hop destination address will be stored before 1702 * actual options; move other options back 1703 * and clear it when none present. 1704 */ 1705 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1706 goto bad; 1707 cnt = m->m_len; 1708 m->m_len += sizeof(struct in_addr); 1709 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1710 bcopy(mtod(m, void *), cp, (unsigned)cnt); 1711 bzero(mtod(m, void *), sizeof(struct in_addr)); 1712 1713 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1714 opt = cp[IPOPT_OPTVAL]; 1715 if (opt == IPOPT_EOL) 1716 break; 1717 if (opt == IPOPT_NOP) 1718 optlen = 1; 1719 else { 1720 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1721 goto bad; 1722 optlen = cp[IPOPT_OLEN]; 1723 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1724 goto bad; 1725 } 1726 switch (opt) { 1727 1728 default: 1729 break; 1730 1731 case IPOPT_LSRR: 1732 case IPOPT_SSRR: 1733 /* 1734 * user process specifies route as: 1735 * ->A->B->C->D 1736 * D must be our final destination (but we can't 1737 * check that since we may not have connected yet). 1738 * A is first hop destination, which doesn't appear in 1739 * actual IP option, but is stored before the options. 1740 */ 1741 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1742 goto bad; 1743 m->m_len -= sizeof(struct in_addr); 1744 cnt -= sizeof(struct in_addr); 1745 optlen -= sizeof(struct in_addr); 1746 cp[IPOPT_OLEN] = optlen; 1747 /* 1748 * Move first hop before start of options. 1749 */ 1750 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1751 sizeof(struct in_addr)); 1752 /* 1753 * Then copy rest of options back 1754 * to close up the deleted entry. 1755 */ 1756 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)), 1757 &cp[IPOPT_OFFSET+1], 1758 (unsigned)cnt + sizeof(struct in_addr)); 1759 break; 1760 } 1761 } 1762 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1763 goto bad; 1764 *pcbopt = m; 1765 return (0); 1766 1767bad: 1768 (void)m_free(m); 1769 return (EINVAL); 1770} 1771 1772/* 1773 * XXX 1774 * The whole multicast option thing needs to be re-thought. 1775 * Several of these options are equally applicable to non-multicast 1776 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1777 * standard option (IP_TTL). 1778 */ 1779 1780/* 1781 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1782 */ 1783static struct ifnet * 1784ip_multicast_if(a, ifindexp) 1785 struct in_addr *a; 1786 int *ifindexp; 1787{ 1788 int ifindex; 1789 struct ifnet *ifp; 1790 1791 if (ifindexp) 1792 *ifindexp = 0; 1793 if (ntohl(a->s_addr) >> 24 == 0) { 1794 ifindex = ntohl(a->s_addr) & 0xffffff; 1795 if (ifindex < 0 || if_index < ifindex) 1796 return NULL; 1797 ifp = ifnet_byindex(ifindex); 1798 if (ifindexp) 1799 *ifindexp = ifindex; 1800 } else { 1801 INADDR_TO_IFP(*a, ifp); 1802 } 1803 return ifp; 1804} 1805 1806/* 1807 * Set the IP multicast options in response to user setsockopt(). 1808 */ 1809static int 1810ip_setmoptions(sopt, imop) 1811 struct sockopt *sopt; 1812 struct ip_moptions **imop; 1813{ 1814 int error = 0; 1815 int i; 1816 struct in_addr addr; 1817 struct ip_mreq mreq; 1818 struct ifnet *ifp; 1819 struct ip_moptions *imo = *imop; 1820 struct route ro; 1821 struct sockaddr_in *dst; 1822 int ifindex; 1823 int s; 1824 1825 if (imo == NULL) { 1826 /* 1827 * No multicast option buffer attached to the pcb; 1828 * allocate one and initialize to default values. 1829 */ 1830 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1831 M_WAITOK); 1832 1833 if (imo == NULL) 1834 return (ENOBUFS); 1835 *imop = imo; 1836 imo->imo_multicast_ifp = NULL; 1837 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1838 imo->imo_multicast_vif = -1; 1839 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1840 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1841 imo->imo_num_memberships = 0; 1842 } 1843 1844 switch (sopt->sopt_name) { 1845 /* store an index number for the vif you wanna use in the send */ 1846 case IP_MULTICAST_VIF: 1847 if (legal_vif_num == 0) { 1848 error = EOPNOTSUPP; 1849 break; 1850 } 1851 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1852 if (error) 1853 break; 1854 if (!legal_vif_num(i) && (i != -1)) { 1855 error = EINVAL; 1856 break; 1857 } 1858 imo->imo_multicast_vif = i; 1859 break; 1860 1861 case IP_MULTICAST_IF: 1862 /* 1863 * Select the interface for outgoing multicast packets. 1864 */ 1865 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1866 if (error) 1867 break; 1868 /* 1869 * INADDR_ANY is used to remove a previous selection. 1870 * When no interface is selected, a default one is 1871 * chosen every time a multicast packet is sent. 1872 */ 1873 if (addr.s_addr == INADDR_ANY) { 1874 imo->imo_multicast_ifp = NULL; 1875 break; 1876 } 1877 /* 1878 * The selected interface is identified by its local 1879 * IP address. Find the interface and confirm that 1880 * it supports multicasting. 1881 */ 1882 s = splimp(); 1883 ifp = ip_multicast_if(&addr, &ifindex); 1884 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1885 splx(s); 1886 error = EADDRNOTAVAIL; 1887 break; 1888 } 1889 imo->imo_multicast_ifp = ifp; 1890 if (ifindex) 1891 imo->imo_multicast_addr = addr; 1892 else 1893 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1894 splx(s); 1895 break; 1896 1897 case IP_MULTICAST_TTL: 1898 /* 1899 * Set the IP time-to-live for outgoing multicast packets. 1900 * The original multicast API required a char argument, 1901 * which is inconsistent with the rest of the socket API. 1902 * We allow either a char or an int. 1903 */ 1904 if (sopt->sopt_valsize == 1) { 1905 u_char ttl; 1906 error = sooptcopyin(sopt, &ttl, 1, 1); 1907 if (error) 1908 break; 1909 imo->imo_multicast_ttl = ttl; 1910 } else { 1911 u_int ttl; 1912 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1913 sizeof ttl); 1914 if (error) 1915 break; 1916 if (ttl > 255) 1917 error = EINVAL; 1918 else 1919 imo->imo_multicast_ttl = ttl; 1920 } 1921 break; 1922 1923 case IP_MULTICAST_LOOP: 1924 /* 1925 * Set the loopback flag for outgoing multicast packets. 1926 * Must be zero or one. The original multicast API required a 1927 * char argument, which is inconsistent with the rest 1928 * of the socket API. We allow either a char or an int. 1929 */ 1930 if (sopt->sopt_valsize == 1) { 1931 u_char loop; 1932 error = sooptcopyin(sopt, &loop, 1, 1); 1933 if (error) 1934 break; 1935 imo->imo_multicast_loop = !!loop; 1936 } else { 1937 u_int loop; 1938 error = sooptcopyin(sopt, &loop, sizeof loop, 1939 sizeof loop); 1940 if (error) 1941 break; 1942 imo->imo_multicast_loop = !!loop; 1943 } 1944 break; 1945 1946 case IP_ADD_MEMBERSHIP: 1947 /* 1948 * Add a multicast group membership. 1949 * Group must be a valid IP multicast address. 1950 */ 1951 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1952 if (error) 1953 break; 1954 1955 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1956 error = EINVAL; 1957 break; 1958 } 1959 s = splimp(); 1960 /* 1961 * If no interface address was provided, use the interface of 1962 * the route to the given multicast address. 1963 */ 1964 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1965 bzero((caddr_t)&ro, sizeof(ro)); 1966 dst = (struct sockaddr_in *)&ro.ro_dst; 1967 dst->sin_len = sizeof(*dst); 1968 dst->sin_family = AF_INET; 1969 dst->sin_addr = mreq.imr_multiaddr; 1970 rtalloc_ign(&ro, RTF_CLONING); 1971 if (ro.ro_rt == NULL) { 1972 error = EADDRNOTAVAIL; 1973 splx(s); 1974 break; 1975 } 1976 ifp = ro.ro_rt->rt_ifp; 1977 RTFREE(ro.ro_rt); 1978 } 1979 else { 1980 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1981 } 1982 1983 /* 1984 * See if we found an interface, and confirm that it 1985 * supports multicast. 1986 */ 1987 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1988 error = EADDRNOTAVAIL; 1989 splx(s); 1990 break; 1991 } 1992 /* 1993 * See if the membership already exists or if all the 1994 * membership slots are full. 1995 */ 1996 for (i = 0; i < imo->imo_num_memberships; ++i) { 1997 if (imo->imo_membership[i]->inm_ifp == ifp && 1998 imo->imo_membership[i]->inm_addr.s_addr 1999 == mreq.imr_multiaddr.s_addr) 2000 break; 2001 } 2002 if (i < imo->imo_num_memberships) { 2003 error = EADDRINUSE; 2004 splx(s); 2005 break; 2006 } 2007 if (i == IP_MAX_MEMBERSHIPS) { 2008 error = ETOOMANYREFS; 2009 splx(s); 2010 break; 2011 } 2012 /* 2013 * Everything looks good; add a new record to the multicast 2014 * address list for the given interface. 2015 */ 2016 if ((imo->imo_membership[i] = 2017 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 2018 error = ENOBUFS; 2019 splx(s); 2020 break; 2021 } 2022 ++imo->imo_num_memberships; 2023 splx(s); 2024 break; 2025 2026 case IP_DROP_MEMBERSHIP: 2027 /* 2028 * Drop a multicast group membership. 2029 * Group must be a valid IP multicast address. 2030 */ 2031 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 2032 if (error) 2033 break; 2034 2035 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 2036 error = EINVAL; 2037 break; 2038 } 2039 2040 s = splimp(); 2041 /* 2042 * If an interface address was specified, get a pointer 2043 * to its ifnet structure. 2044 */ 2045 if (mreq.imr_interface.s_addr == INADDR_ANY) 2046 ifp = NULL; 2047 else { 2048 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 2049 if (ifp == NULL) { 2050 error = EADDRNOTAVAIL; 2051 splx(s); 2052 break; 2053 } 2054 } 2055 /* 2056 * Find the membership in the membership array. 2057 */ 2058 for (i = 0; i < imo->imo_num_memberships; ++i) { 2059 if ((ifp == NULL || 2060 imo->imo_membership[i]->inm_ifp == ifp) && 2061 imo->imo_membership[i]->inm_addr.s_addr == 2062 mreq.imr_multiaddr.s_addr) 2063 break; 2064 } 2065 if (i == imo->imo_num_memberships) { 2066 error = EADDRNOTAVAIL; 2067 splx(s); 2068 break; 2069 } 2070 /* 2071 * Give up the multicast address record to which the 2072 * membership points. 2073 */ 2074 in_delmulti(imo->imo_membership[i]); 2075 /* 2076 * Remove the gap in the membership array. 2077 */ 2078 for (++i; i < imo->imo_num_memberships; ++i) 2079 imo->imo_membership[i-1] = imo->imo_membership[i]; 2080 --imo->imo_num_memberships; 2081 splx(s); 2082 break; 2083 2084 default: 2085 error = EOPNOTSUPP; 2086 break; 2087 } 2088 2089 /* 2090 * If all options have default values, no need to keep the mbuf. 2091 */ 2092 if (imo->imo_multicast_ifp == NULL && 2093 imo->imo_multicast_vif == -1 && 2094 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2095 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2096 imo->imo_num_memberships == 0) { 2097 free(*imop, M_IPMOPTS); 2098 *imop = NULL; 2099 } 2100 2101 return (error); 2102} 2103 2104/* 2105 * Return the IP multicast options in response to user getsockopt(). 2106 */ 2107static int 2108ip_getmoptions(sopt, imo) 2109 struct sockopt *sopt; 2110 register struct ip_moptions *imo; 2111{ 2112 struct in_addr addr; 2113 struct in_ifaddr *ia; 2114 int error, optval; 2115 u_char coptval; 2116 2117 error = 0; 2118 switch (sopt->sopt_name) { 2119 case IP_MULTICAST_VIF: 2120 if (imo != NULL) 2121 optval = imo->imo_multicast_vif; 2122 else 2123 optval = -1; 2124 error = sooptcopyout(sopt, &optval, sizeof optval); 2125 break; 2126 2127 case IP_MULTICAST_IF: 2128 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2129 addr.s_addr = INADDR_ANY; 2130 else if (imo->imo_multicast_addr.s_addr) { 2131 /* return the value user has set */ 2132 addr = imo->imo_multicast_addr; 2133 } else { 2134 IFP_TO_IA(imo->imo_multicast_ifp, ia); 2135 addr.s_addr = (ia == NULL) ? INADDR_ANY 2136 : IA_SIN(ia)->sin_addr.s_addr; 2137 } 2138 error = sooptcopyout(sopt, &addr, sizeof addr); 2139 break; 2140 2141 case IP_MULTICAST_TTL: 2142 if (imo == 0) 2143 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2144 else 2145 optval = coptval = imo->imo_multicast_ttl; 2146 if (sopt->sopt_valsize == 1) 2147 error = sooptcopyout(sopt, &coptval, 1); 2148 else 2149 error = sooptcopyout(sopt, &optval, sizeof optval); 2150 break; 2151 2152 case IP_MULTICAST_LOOP: 2153 if (imo == 0) 2154 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2155 else 2156 optval = coptval = imo->imo_multicast_loop; 2157 if (sopt->sopt_valsize == 1) 2158 error = sooptcopyout(sopt, &coptval, 1); 2159 else 2160 error = sooptcopyout(sopt, &optval, sizeof optval); 2161 break; 2162 2163 default: 2164 error = ENOPROTOOPT; 2165 break; 2166 } 2167 return (error); 2168} 2169 2170/* 2171 * Discard the IP multicast options. 2172 */ 2173void 2174ip_freemoptions(imo) 2175 register struct ip_moptions *imo; 2176{ 2177 register int i; 2178 2179 if (imo != NULL) { 2180 for (i = 0; i < imo->imo_num_memberships; ++i) 2181 in_delmulti(imo->imo_membership[i]); 2182 free(imo, M_IPMOPTS); 2183 } 2184} 2185 2186/* 2187 * Routine called from ip_output() to loop back a copy of an IP multicast 2188 * packet to the input queue of a specified interface. Note that this 2189 * calls the output routine of the loopback "driver", but with an interface 2190 * pointer that might NOT be a loopback interface -- evil, but easier than 2191 * replicating that code here. 2192 */ 2193static void 2194ip_mloopback(ifp, m, dst, hlen) 2195 struct ifnet *ifp; 2196 register struct mbuf *m; 2197 register struct sockaddr_in *dst; 2198 int hlen; 2199{ 2200 register struct ip *ip; 2201 struct mbuf *copym; 2202 2203 copym = m_copy(m, 0, M_COPYALL); 2204 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2205 copym = m_pullup(copym, hlen); 2206 if (copym != NULL) { 2207 /* 2208 * We don't bother to fragment if the IP length is greater 2209 * than the interface's MTU. Can this possibly matter? 2210 */ 2211 ip = mtod(copym, struct ip *); 2212 ip->ip_len = htons(ip->ip_len); 2213 ip->ip_off = htons(ip->ip_off); 2214 ip->ip_sum = 0; 2215 ip->ip_sum = in_cksum(copym, hlen); 2216 /* 2217 * NB: 2218 * It's not clear whether there are any lingering 2219 * reentrancy problems in other areas which might 2220 * be exposed by using ip_input directly (in 2221 * particular, everything which modifies the packet 2222 * in-place). Yet another option is using the 2223 * protosw directly to deliver the looped back 2224 * packet. For the moment, we'll err on the side 2225 * of safety by using if_simloop(). 2226 */ 2227#if 1 /* XXX */ 2228 if (dst->sin_family != AF_INET) { 2229 printf("ip_mloopback: bad address family %d\n", 2230 dst->sin_family); 2231 dst->sin_family = AF_INET; 2232 } 2233#endif 2234 2235#ifdef notdef 2236 copym->m_pkthdr.rcvif = ifp; 2237 ip_input(copym); 2238#else 2239 /* if the checksum hasn't been computed, mark it as valid */ 2240 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2241 copym->m_pkthdr.csum_flags |= 2242 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2243 copym->m_pkthdr.csum_data = 0xffff; 2244 } 2245 if_simloop(ifp, copym, dst->sin_family, 0); 2246#endif 2247 } 2248} 2249