ip_output.c revision 115471
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: head/sys/netinet/ip_output.c 115471 2003-05-31 17:55:21Z wollman $ 35 */ 36 37#include "opt_ipfw.h" 38#include "opt_ipdn.h" 39#include "opt_ipdivert.h" 40#include "opt_ipfilter.h" 41#include "opt_ipsec.h" 42#include "opt_mac.h" 43#include "opt_pfil_hooks.h" 44#include "opt_random_ip_id.h" 45#include "opt_mbuf_stress_test.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/mac.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/protosw.h> 54#include <sys/socket.h> 55#include <sys/socketvar.h> 56#include <sys/sysctl.h> 57 58#include <net/if.h> 59#include <net/route.h> 60 61#include <netinet/in.h> 62#include <netinet/in_systm.h> 63#include <netinet/ip.h> 64#include <netinet/in_pcb.h> 65#include <netinet/in_var.h> 66#include <netinet/ip_var.h> 67 68#include <machine/in_cksum.h> 69 70static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 71 72#ifdef IPSEC 73#include <netinet6/ipsec.h> 74#include <netkey/key.h> 75#ifdef IPSEC_DEBUG 76#include <netkey/key_debug.h> 77#else 78#define KEYDEBUG(lev,arg) 79#endif 80#endif /*IPSEC*/ 81 82#ifdef FAST_IPSEC 83#include <netipsec/ipsec.h> 84#include <netipsec/xform.h> 85#include <netipsec/key.h> 86#endif /*FAST_IPSEC*/ 87 88#include <netinet/ip_fw.h> 89#include <netinet/ip_dummynet.h> 90 91#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 92 x, (ntohl(a.s_addr)>>24)&0xFF,\ 93 (ntohl(a.s_addr)>>16)&0xFF,\ 94 (ntohl(a.s_addr)>>8)&0xFF,\ 95 (ntohl(a.s_addr))&0xFF, y); 96 97u_short ip_id; 98 99#ifdef MBUF_STRESS_TEST 100int mbuf_frag_size = 0; 101SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 102 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 103#endif 104 105static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 106static struct ifnet *ip_multicast_if(struct in_addr *, int *); 107static void ip_mloopback 108 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 109static int ip_getmoptions 110 (struct sockopt *, struct ip_moptions *); 111static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 112static int ip_setmoptions 113 (struct sockopt *, struct ip_moptions **); 114 115int ip_optcopy(struct ip *, struct ip *); 116 117 118extern struct protosw inetsw[]; 119 120/* 121 * IP output. The packet in mbuf chain m contains a skeletal IP 122 * header (with len, off, ttl, proto, tos, src, dst). 123 * The mbuf chain containing the packet will be freed. 124 * The mbuf opt, if present, will not be freed. 125 */ 126int 127ip_output(m0, opt, ro, flags, imo, inp) 128 struct mbuf *m0; 129 struct mbuf *opt; 130 struct route *ro; 131 int flags; 132 struct ip_moptions *imo; 133 struct inpcb *inp; 134{ 135 struct ip *ip, *mhip; 136 struct ifnet *ifp = NULL; /* keep compiler happy */ 137 struct mbuf *m; 138 int hlen = sizeof (struct ip); 139 int len, off, error = 0; 140 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 141 struct in_ifaddr *ia = NULL; 142 int isbroadcast, sw_csum; 143 struct in_addr pkt_dst; 144#ifdef IPSEC 145 struct route iproute; 146 struct secpolicy *sp = NULL; 147#endif 148#ifdef FAST_IPSEC 149 struct route iproute; 150 struct m_tag *mtag; 151 struct secpolicy *sp = NULL; 152 struct tdb_ident *tdbi; 153 int s; 154#endif /* FAST_IPSEC */ 155 struct ip_fw_args args; 156 int src_was_INADDR_ANY = 0; /* as the name says... */ 157#ifdef PFIL_HOOKS 158 struct packet_filter_hook *pfh; 159 struct mbuf *m1; 160 int rv; 161#endif /* PFIL_HOOKS */ 162 163 args.eh = NULL; 164 args.rule = NULL; 165 args.next_hop = NULL; 166 args.divert_rule = 0; /* divert cookie */ 167 168 /* Grab info from MT_TAG mbufs prepended to the chain. */ 169 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { 170 switch(m0->_m_tag_id) { 171 default: 172 printf("ip_output: unrecognised MT_TAG tag %d\n", 173 m0->_m_tag_id); 174 break; 175 176 case PACKET_TAG_DUMMYNET: 177 /* 178 * the packet was already tagged, so part of the 179 * processing was already done, and we need to go down. 180 * Get parameters from the header. 181 */ 182 args.rule = ((struct dn_pkt *)m0)->rule; 183 opt = NULL ; 184 ro = & ( ((struct dn_pkt *)m0)->ro ) ; 185 imo = NULL ; 186 dst = ((struct dn_pkt *)m0)->dn_dst ; 187 ifp = ((struct dn_pkt *)m0)->ifp ; 188 flags = ((struct dn_pkt *)m0)->flags ; 189 break; 190 191 case PACKET_TAG_DIVERT: 192 args.divert_rule = (intptr_t)m0->m_data & 0xffff; 193 break; 194 195 case PACKET_TAG_IPFORWARD: 196 args.next_hop = (struct sockaddr_in *)m0->m_data; 197 break; 198 } 199 } 200 m = m0; 201 202 M_ASSERTPKTHDR(m); 203#ifndef FAST_IPSEC 204 KASSERT(ro != NULL, ("ip_output: no route, proto %d", 205 mtod(m, struct ip *)->ip_p)); 206#endif 207 208 if (args.rule != NULL) { /* dummynet already saw us */ 209 ip = mtod(m, struct ip *); 210 hlen = ip->ip_hl << 2 ; 211 if (ro->ro_rt) 212 ia = ifatoia(ro->ro_rt->rt_ifa); 213 goto sendit; 214 } 215 216 if (opt) { 217 len = 0; 218 m = ip_insertoptions(m, opt, &len); 219 if (len != 0) 220 hlen = len; 221 } 222 ip = mtod(m, struct ip *); 223 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 224 225 /* 226 * Fill in IP header. If we are not allowing fragmentation, 227 * then the ip_id field is meaningless, so send it as zero 228 * to reduce information leakage. Otherwise, if we are not 229 * randomizing ip_id, then don't bother to convert it to network 230 * byte order -- it's just a nonce. Note that a 16-bit counter 231 * will wrap around in less than 10 seconds at 100 Mbit/s on a 232 * medium with MTU 1500. See Steven M. Bellovin, "A Technique 233 * for Counting NATted Hosts", Proc. IMW'02, available at 234 * <http://www.research.att.com/~smb/papers/fnat.pdf>. 235 */ 236 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 237 ip->ip_v = IPVERSION; 238 ip->ip_hl = hlen >> 2; 239 if ((ip->ip_off & IP_DF) == 0) { 240 ip->ip_off = 0; 241#ifdef RANDOM_IP_ID 242 ip->ip_id = ip_randomid(); 243#else 244 ip->ip_id = ip_id++; 245#endif 246 } else { 247 ip->ip_off = IP_DF; 248 ip->ip_id = 0; 249 } 250 ipstat.ips_localout++; 251 } else { 252 hlen = ip->ip_hl << 2; 253 } 254 255#ifdef FAST_IPSEC 256 if (ro == NULL) { 257 ro = &iproute; 258 bzero(ro, sizeof (*ro)); 259 } 260#endif /* FAST_IPSEC */ 261 dst = (struct sockaddr_in *)&ro->ro_dst; 262 /* 263 * If there is a cached route, 264 * check that it is to the same destination 265 * and is still up. If not, free it and try again. 266 * The address family should also be checked in case of sharing the 267 * cache with IPv6. 268 */ 269 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 270 dst->sin_family != AF_INET || 271 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 272 RTFREE(ro->ro_rt); 273 ro->ro_rt = (struct rtentry *)0; 274 } 275 if (ro->ro_rt == 0) { 276 bzero(dst, sizeof(*dst)); 277 dst->sin_family = AF_INET; 278 dst->sin_len = sizeof(*dst); 279 dst->sin_addr = pkt_dst; 280 } 281 /* 282 * If routing to interface only, 283 * short circuit routing lookup. 284 */ 285 if (flags & IP_ROUTETOIF) { 286 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 287 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 288 ipstat.ips_noroute++; 289 error = ENETUNREACH; 290 goto bad; 291 } 292 ifp = ia->ia_ifp; 293 ip->ip_ttl = 1; 294 isbroadcast = in_broadcast(dst->sin_addr, ifp); 295 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 296 imo != NULL && imo->imo_multicast_ifp != NULL) { 297 /* 298 * Bypass the normal routing lookup for multicast 299 * packets if the interface is specified. 300 */ 301 ifp = imo->imo_multicast_ifp; 302 IFP_TO_IA(ifp, ia); 303 isbroadcast = 0; /* fool gcc */ 304 } else { 305 /* 306 * If this is the case, we probably don't want to allocate 307 * a protocol-cloned route since we didn't get one from the 308 * ULP. This lets TCP do its thing, while not burdening 309 * forwarding or ICMP with the overhead of cloning a route. 310 * Of course, we still want to do any cloning requested by 311 * the link layer, as this is probably required in all cases 312 * for correct operation (as it is for ARP). 313 */ 314 if (ro->ro_rt == 0) 315 rtalloc_ign(ro, RTF_PRCLONING); 316 if (ro->ro_rt == 0) { 317 ipstat.ips_noroute++; 318 error = EHOSTUNREACH; 319 goto bad; 320 } 321 ia = ifatoia(ro->ro_rt->rt_ifa); 322 ifp = ro->ro_rt->rt_ifp; 323 ro->ro_rt->rt_use++; 324 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 325 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 326 if (ro->ro_rt->rt_flags & RTF_HOST) 327 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 328 else 329 isbroadcast = in_broadcast(dst->sin_addr, ifp); 330 } 331 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 332 struct in_multi *inm; 333 334 m->m_flags |= M_MCAST; 335 /* 336 * IP destination address is multicast. Make sure "dst" 337 * still points to the address in "ro". (It may have been 338 * changed to point to a gateway address, above.) 339 */ 340 dst = (struct sockaddr_in *)&ro->ro_dst; 341 /* 342 * See if the caller provided any multicast options 343 */ 344 if (imo != NULL) { 345 ip->ip_ttl = imo->imo_multicast_ttl; 346 if (imo->imo_multicast_vif != -1) 347 ip->ip_src.s_addr = 348 ip_mcast_src ? 349 ip_mcast_src(imo->imo_multicast_vif) : 350 INADDR_ANY; 351 } else 352 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 353 /* 354 * Confirm that the outgoing interface supports multicast. 355 */ 356 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 357 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 358 ipstat.ips_noroute++; 359 error = ENETUNREACH; 360 goto bad; 361 } 362 } 363 /* 364 * If source address not specified yet, use address 365 * of outgoing interface. 366 */ 367 if (ip->ip_src.s_addr == INADDR_ANY) { 368 /* Interface may have no addresses. */ 369 if (ia != NULL) 370 ip->ip_src = IA_SIN(ia)->sin_addr; 371 } 372 373 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 374 /* 375 * XXX 376 * delayed checksums are not currently 377 * compatible with IP multicast routing 378 */ 379 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 380 in_delayed_cksum(m); 381 m->m_pkthdr.csum_flags &= 382 ~CSUM_DELAY_DATA; 383 } 384 } 385 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 386 if (inm != NULL && 387 (imo == NULL || imo->imo_multicast_loop)) { 388 /* 389 * If we belong to the destination multicast group 390 * on the outgoing interface, and the caller did not 391 * forbid loopback, loop back a copy. 392 */ 393 ip_mloopback(ifp, m, dst, hlen); 394 } 395 else { 396 /* 397 * If we are acting as a multicast router, perform 398 * multicast forwarding as if the packet had just 399 * arrived on the interface to which we are about 400 * to send. The multicast forwarding function 401 * recursively calls this function, using the 402 * IP_FORWARDING flag to prevent infinite recursion. 403 * 404 * Multicasts that are looped back by ip_mloopback(), 405 * above, will be forwarded by the ip_input() routine, 406 * if necessary. 407 */ 408 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 409 /* 410 * If rsvp daemon is not running, do not 411 * set ip_moptions. This ensures that the packet 412 * is multicast and not just sent down one link 413 * as prescribed by rsvpd. 414 */ 415 if (!rsvp_on) 416 imo = NULL; 417 if (ip_mforward && 418 ip_mforward(ip, ifp, m, imo) != 0) { 419 m_freem(m); 420 goto done; 421 } 422 } 423 } 424 425 /* 426 * Multicasts with a time-to-live of zero may be looped- 427 * back, above, but must not be transmitted on a network. 428 * Also, multicasts addressed to the loopback interface 429 * are not sent -- the above call to ip_mloopback() will 430 * loop back a copy if this host actually belongs to the 431 * destination group on the loopback interface. 432 */ 433 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 434 m_freem(m); 435 goto done; 436 } 437 438 goto sendit; 439 } 440#ifndef notdef 441 /* 442 * If the source address is not specified yet, use the address 443 * of the outoing interface. In case, keep note we did that, so 444 * if the the firewall changes the next-hop causing the output 445 * interface to change, we can fix that. 446 */ 447 if (ip->ip_src.s_addr == INADDR_ANY) { 448 /* Interface may have no addresses. */ 449 if (ia != NULL) { 450 ip->ip_src = IA_SIN(ia)->sin_addr; 451 src_was_INADDR_ANY = 1; 452 } 453 } 454#endif /* notdef */ 455 /* 456 * Verify that we have any chance at all of being able to queue 457 * the packet or packet fragments 458 */ 459 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 460 ifp->if_snd.ifq_maxlen) { 461 error = ENOBUFS; 462 ipstat.ips_odropped++; 463 goto bad; 464 } 465 466 /* 467 * Look for broadcast address and 468 * verify user is allowed to send 469 * such a packet. 470 */ 471 if (isbroadcast) { 472 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 473 error = EADDRNOTAVAIL; 474 goto bad; 475 } 476 if ((flags & IP_ALLOWBROADCAST) == 0) { 477 error = EACCES; 478 goto bad; 479 } 480 /* don't allow broadcast messages to be fragmented */ 481 if ((u_short)ip->ip_len > ifp->if_mtu) { 482 error = EMSGSIZE; 483 goto bad; 484 } 485 m->m_flags |= M_BCAST; 486 } else { 487 m->m_flags &= ~M_BCAST; 488 } 489 490sendit: 491#ifdef IPSEC 492 /* get SP for this packet */ 493 if (inp == NULL) 494 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 495 else 496 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); 497 498 if (sp == NULL) { 499 ipsecstat.out_inval++; 500 goto bad; 501 } 502 503 error = 0; 504 505 /* check policy */ 506 switch (sp->policy) { 507 case IPSEC_POLICY_DISCARD: 508 /* 509 * This packet is just discarded. 510 */ 511 ipsecstat.out_polvio++; 512 goto bad; 513 514 case IPSEC_POLICY_BYPASS: 515 case IPSEC_POLICY_NONE: 516 /* no need to do IPsec. */ 517 goto skip_ipsec; 518 519 case IPSEC_POLICY_IPSEC: 520 if (sp->req == NULL) { 521 /* acquire a policy */ 522 error = key_spdacquire(sp); 523 goto bad; 524 } 525 break; 526 527 case IPSEC_POLICY_ENTRUST: 528 default: 529 printf("ip_output: Invalid policy found. %d\n", sp->policy); 530 } 531 { 532 struct ipsec_output_state state; 533 bzero(&state, sizeof(state)); 534 state.m = m; 535 if (flags & IP_ROUTETOIF) { 536 state.ro = &iproute; 537 bzero(&iproute, sizeof(iproute)); 538 } else 539 state.ro = ro; 540 state.dst = (struct sockaddr *)dst; 541 542 ip->ip_sum = 0; 543 544 /* 545 * XXX 546 * delayed checksums are not currently compatible with IPsec 547 */ 548 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 549 in_delayed_cksum(m); 550 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 551 } 552 553 ip->ip_len = htons(ip->ip_len); 554 ip->ip_off = htons(ip->ip_off); 555 556 error = ipsec4_output(&state, sp, flags); 557 558 m = state.m; 559 if (flags & IP_ROUTETOIF) { 560 /* 561 * if we have tunnel mode SA, we may need to ignore 562 * IP_ROUTETOIF. 563 */ 564 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 565 flags &= ~IP_ROUTETOIF; 566 ro = state.ro; 567 } 568 } else 569 ro = state.ro; 570 dst = (struct sockaddr_in *)state.dst; 571 if (error) { 572 /* mbuf is already reclaimed in ipsec4_output. */ 573 m0 = NULL; 574 switch (error) { 575 case EHOSTUNREACH: 576 case ENETUNREACH: 577 case EMSGSIZE: 578 case ENOBUFS: 579 case ENOMEM: 580 break; 581 default: 582 printf("ip4_output (ipsec): error code %d\n", error); 583 /*fall through*/ 584 case ENOENT: 585 /* don't show these error codes to the user */ 586 error = 0; 587 break; 588 } 589 goto bad; 590 } 591 } 592 593 /* be sure to update variables that are affected by ipsec4_output() */ 594 ip = mtod(m, struct ip *); 595 hlen = ip->ip_hl << 2; 596 if (ro->ro_rt == NULL) { 597 if ((flags & IP_ROUTETOIF) == 0) { 598 printf("ip_output: " 599 "can't update route after IPsec processing\n"); 600 error = EHOSTUNREACH; /*XXX*/ 601 goto bad; 602 } 603 } else { 604 ia = ifatoia(ro->ro_rt->rt_ifa); 605 ifp = ro->ro_rt->rt_ifp; 606 } 607 608 /* make it flipped, again. */ 609 ip->ip_len = ntohs(ip->ip_len); 610 ip->ip_off = ntohs(ip->ip_off); 611skip_ipsec: 612#endif /*IPSEC*/ 613#ifdef FAST_IPSEC 614 /* 615 * Check the security policy (SP) for the packet and, if 616 * required, do IPsec-related processing. There are two 617 * cases here; the first time a packet is sent through 618 * it will be untagged and handled by ipsec4_checkpolicy. 619 * If the packet is resubmitted to ip_output (e.g. after 620 * AH, ESP, etc. processing), there will be a tag to bypass 621 * the lookup and related policy checking. 622 */ 623 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 624 s = splnet(); 625 if (mtag != NULL) { 626 tdbi = (struct tdb_ident *)(mtag + 1); 627 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 628 if (sp == NULL) 629 error = -EINVAL; /* force silent drop */ 630 m_tag_delete(m, mtag); 631 } else { 632 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 633 &error, inp); 634 } 635 /* 636 * There are four return cases: 637 * sp != NULL apply IPsec policy 638 * sp == NULL, error == 0 no IPsec handling needed 639 * sp == NULL, error == -EINVAL discard packet w/o error 640 * sp == NULL, error != 0 discard packet, report error 641 */ 642 if (sp != NULL) { 643 /* Loop detection, check if ipsec processing already done */ 644 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 645 for (mtag = m_tag_first(m); mtag != NULL; 646 mtag = m_tag_next(m, mtag)) { 647 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 648 continue; 649 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 650 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 651 continue; 652 /* 653 * Check if policy has an SA associated with it. 654 * This can happen when an SP has yet to acquire 655 * an SA; e.g. on first reference. If it occurs, 656 * then we let ipsec4_process_packet do its thing. 657 */ 658 if (sp->req->sav == NULL) 659 break; 660 tdbi = (struct tdb_ident *)(mtag + 1); 661 if (tdbi->spi == sp->req->sav->spi && 662 tdbi->proto == sp->req->sav->sah->saidx.proto && 663 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, 664 sizeof (union sockaddr_union)) == 0) { 665 /* 666 * No IPsec processing is needed, free 667 * reference to SP. 668 * 669 * NB: null pointer to avoid free at 670 * done: below. 671 */ 672 KEY_FREESP(&sp), sp = NULL; 673 splx(s); 674 goto spd_done; 675 } 676 } 677 678 /* 679 * Do delayed checksums now because we send before 680 * this is done in the normal processing path. 681 */ 682 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 683 in_delayed_cksum(m); 684 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 685 } 686 687 ip->ip_len = htons(ip->ip_len); 688 ip->ip_off = htons(ip->ip_off); 689 690 /* NB: callee frees mbuf */ 691 error = ipsec4_process_packet(m, sp->req, flags, 0); 692 /* 693 * Preserve KAME behaviour: ENOENT can be returned 694 * when an SA acquire is in progress. Don't propagate 695 * this to user-level; it confuses applications. 696 * 697 * XXX this will go away when the SADB is redone. 698 */ 699 if (error == ENOENT) 700 error = 0; 701 splx(s); 702 goto done; 703 } else { 704 splx(s); 705 706 if (error != 0) { 707 /* 708 * Hack: -EINVAL is used to signal that a packet 709 * should be silently discarded. This is typically 710 * because we asked key management for an SA and 711 * it was delayed (e.g. kicked up to IKE). 712 */ 713 if (error == -EINVAL) 714 error = 0; 715 goto bad; 716 } else { 717 /* No IPsec processing for this packet. */ 718 } 719#ifdef notyet 720 /* 721 * If deferred crypto processing is needed, check that 722 * the interface supports it. 723 */ 724 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 725 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { 726 /* notify IPsec to do its own crypto */ 727 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 728 error = EHOSTUNREACH; 729 goto bad; 730 } 731#endif 732 } 733spd_done: 734#endif /* FAST_IPSEC */ 735 736 /* 737 * IpHack's section. 738 * - Xlate: translate packet's addr/port (NAT). 739 * - Firewall: deny/allow/etc. 740 * - Wrap: fake packet's addr/port <unimpl.> 741 * - Encapsulate: put it in another IP and send out. <unimp.> 742 */ 743#ifdef PFIL_HOOKS 744 /* 745 * Run through list of hooks for output packets. 746 */ 747 m1 = m; 748 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); 749 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) 750 if (pfh->pfil_func) { 751 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1); 752 if (rv) { 753 error = EHOSTUNREACH; 754 goto done; 755 } 756 m = m1; 757 if (m == NULL) 758 goto done; 759 ip = mtod(m, struct ip *); 760 } 761#endif /* PFIL_HOOKS */ 762 763 /* 764 * Check with the firewall... 765 * but not if we are already being fwd'd from a firewall. 766 */ 767 if (fw_enable && IPFW_LOADED && !args.next_hop) { 768 struct sockaddr_in *old = dst; 769 770 args.m = m; 771 args.next_hop = dst; 772 args.oif = ifp; 773 off = ip_fw_chk_ptr(&args); 774 m = args.m; 775 dst = args.next_hop; 776 777 /* 778 * On return we must do the following: 779 * m == NULL -> drop the pkt (old interface, deprecated) 780 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 781 * 1<=off<= 0xffff -> DIVERT 782 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 783 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 784 * dst != old -> IPFIREWALL_FORWARD 785 * off==0, dst==old -> accept 786 * If some of the above modules are not compiled in, then 787 * we should't have to check the corresponding condition 788 * (because the ipfw control socket should not accept 789 * unsupported rules), but better play safe and drop 790 * packets in case of doubt. 791 */ 792 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 793 if (m) 794 m_freem(m); 795 error = EACCES; 796 goto done; 797 } 798 ip = mtod(m, struct ip *); 799 if (off == 0 && dst == old) /* common case */ 800 goto pass; 801 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 802 /* 803 * pass the pkt to dummynet. Need to include 804 * pipe number, m, ifp, ro, dst because these are 805 * not recomputed in the next pass. 806 * All other parameters have been already used and 807 * so they are not needed anymore. 808 * XXX note: if the ifp or ro entry are deleted 809 * while a pkt is in dummynet, we are in trouble! 810 */ 811 args.ro = ro; 812 args.dst = dst; 813 args.flags = flags; 814 815 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 816 &args); 817 goto done; 818 } 819#ifdef IPDIVERT 820 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 821 struct mbuf *clone = NULL; 822 823 /* Clone packet if we're doing a 'tee' */ 824 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 825 clone = m_dup(m, M_DONTWAIT); 826 827 /* 828 * XXX 829 * delayed checksums are not currently compatible 830 * with divert sockets. 831 */ 832 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 833 in_delayed_cksum(m); 834 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 835 } 836 837 /* Restore packet header fields to original values */ 838 ip->ip_len = htons(ip->ip_len); 839 ip->ip_off = htons(ip->ip_off); 840 841 /* Deliver packet to divert input routine */ 842 divert_packet(m, 0, off & 0xffff, args.divert_rule); 843 844 /* If 'tee', continue with original packet */ 845 if (clone != NULL) { 846 m = clone; 847 ip = mtod(m, struct ip *); 848 goto pass; 849 } 850 goto done; 851 } 852#endif 853 854 /* IPFIREWALL_FORWARD */ 855 /* 856 * Check dst to make sure it is directly reachable on the 857 * interface we previously thought it was. 858 * If it isn't (which may be likely in some situations) we have 859 * to re-route it (ie, find a route for the next-hop and the 860 * associated interface) and set them here. This is nested 861 * forwarding which in most cases is undesirable, except where 862 * such control is nigh impossible. So we do it here. 863 * And I'm babbling. 864 */ 865 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 866#if 0 867 /* 868 * XXX To improve readability, this block should be 869 * changed into a function call as below: 870 */ 871 error = ip_ipforward(&m, &dst, &ifp); 872 if (error) 873 goto bad; 874 if (m == NULL) /* ip_input consumed the mbuf */ 875 goto done; 876#else 877 struct in_ifaddr *ia; 878 879 /* 880 * XXX sro_fwd below is static, and a pointer 881 * to it gets passed to routines downstream. 882 * This could have surprisingly bad results in 883 * practice, because its content is overwritten 884 * by subsequent packets. 885 */ 886 /* There must be a better way to do this next line... */ 887 static struct route sro_fwd; 888 struct route *ro_fwd = &sro_fwd; 889 890#if 0 891 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 892 dst->sin_addr, "\n"); 893#endif 894 895 /* 896 * We need to figure out if we have been forwarded 897 * to a local socket. If so, then we should somehow 898 * "loop back" to ip_input, and get directed to the 899 * PCB as if we had received this packet. This is 900 * because it may be dificult to identify the packets 901 * you want to forward until they are being output 902 * and have selected an interface. (e.g. locally 903 * initiated packets) If we used the loopback inteface, 904 * we would not be able to control what happens 905 * as the packet runs through ip_input() as 906 * it is done through an ISR. 907 */ 908 LIST_FOREACH(ia, 909 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 910 /* 911 * If the addr to forward to is one 912 * of ours, we pretend to 913 * be the destination for this packet. 914 */ 915 if (IA_SIN(ia)->sin_addr.s_addr == 916 dst->sin_addr.s_addr) 917 break; 918 } 919 if (ia) { /* tell ip_input "dont filter" */ 920 struct m_hdr tag; 921 922 tag.mh_type = MT_TAG; 923 tag.mh_flags = PACKET_TAG_IPFORWARD; 924 tag.mh_data = (caddr_t)args.next_hop; 925 tag.mh_next = m; 926 927 if (m->m_pkthdr.rcvif == NULL) 928 m->m_pkthdr.rcvif = ifunit("lo0"); 929 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 930 m->m_pkthdr.csum_flags |= 931 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 932 m0->m_pkthdr.csum_data = 0xffff; 933 } 934 m->m_pkthdr.csum_flags |= 935 CSUM_IP_CHECKED | CSUM_IP_VALID; 936 ip->ip_len = htons(ip->ip_len); 937 ip->ip_off = htons(ip->ip_off); 938 ip_input((struct mbuf *)&tag); 939 goto done; 940 } 941 /* Some of the logic for this was 942 * nicked from above. 943 * 944 * This rewrites the cached route in a local PCB. 945 * Is this what we want to do? 946 */ 947 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 948 949 ro_fwd->ro_rt = 0; 950 rtalloc_ign(ro_fwd, RTF_PRCLONING); 951 952 if (ro_fwd->ro_rt == 0) { 953 ipstat.ips_noroute++; 954 error = EHOSTUNREACH; 955 goto bad; 956 } 957 958 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 959 ifp = ro_fwd->ro_rt->rt_ifp; 960 ro_fwd->ro_rt->rt_use++; 961 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 962 dst = (struct sockaddr_in *) 963 ro_fwd->ro_rt->rt_gateway; 964 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 965 isbroadcast = 966 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 967 else 968 isbroadcast = in_broadcast(dst->sin_addr, ifp); 969 if (ro->ro_rt) 970 RTFREE(ro->ro_rt); 971 ro->ro_rt = ro_fwd->ro_rt; 972 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 973 974#endif /* ... block to be put into a function */ 975 /* 976 * If we added a default src ip earlier, 977 * which would have been gotten from the-then 978 * interface, do it again, from the new one. 979 */ 980 if (src_was_INADDR_ANY) 981 ip->ip_src = IA_SIN(ia)->sin_addr; 982 goto pass ; 983 } 984 985 /* 986 * if we get here, none of the above matches, and 987 * we have to drop the pkt 988 */ 989 m_freem(m); 990 error = EACCES; /* not sure this is the right error msg */ 991 goto done; 992 } 993 994pass: 995 /* 127/8 must not appear on wire - RFC1122. */ 996 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 997 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 998 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 999 ipstat.ips_badaddr++; 1000 error = EADDRNOTAVAIL; 1001 goto bad; 1002 } 1003 } 1004 1005 m->m_pkthdr.csum_flags |= CSUM_IP; 1006 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 1007 if (sw_csum & CSUM_DELAY_DATA) { 1008 in_delayed_cksum(m); 1009 sw_csum &= ~CSUM_DELAY_DATA; 1010 } 1011 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 1012 1013 /* 1014 * If small enough for interface, or the interface will take 1015 * care of the fragmentation for us, can just send directly. 1016 */ 1017 if ((u_short)ip->ip_len <= ifp->if_mtu || 1018 ifp->if_hwassist & CSUM_FRAGMENT) { 1019 ip->ip_len = htons(ip->ip_len); 1020 ip->ip_off = htons(ip->ip_off); 1021 ip->ip_sum = 0; 1022 if (sw_csum & CSUM_DELAY_IP) 1023 ip->ip_sum = in_cksum(m, hlen); 1024 1025 /* Record statistics for this interface address. */ 1026 if (!(flags & IP_FORWARDING) && ia) { 1027 ia->ia_ifa.if_opackets++; 1028 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1029 } 1030 1031#ifdef IPSEC 1032 /* clean ipsec history once it goes out of the node */ 1033 ipsec_delaux(m); 1034#endif 1035 1036#ifdef MBUF_STRESS_TEST 1037 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) { 1038 struct mbuf *m1, *m2; 1039 int length, tmp; 1040 1041 tmp = length = m->m_pkthdr.len; 1042 1043 while ((length -= mbuf_frag_size) >= 1) { 1044 m1 = m_split(m, length, M_DONTWAIT); 1045 if (m1 == NULL) 1046 break; 1047 m1->m_flags &= ~M_PKTHDR; 1048 m2 = m; 1049 while (m2->m_next != NULL) 1050 m2 = m2->m_next; 1051 m2->m_next = m1; 1052 } 1053 m->m_pkthdr.len = tmp; 1054 } 1055#endif 1056 error = (*ifp->if_output)(ifp, m, 1057 (struct sockaddr *)dst, ro->ro_rt); 1058 goto done; 1059 } 1060 /* 1061 * Too large for interface; fragment if possible. 1062 * Must be able to put at least 8 bytes per fragment. 1063 */ 1064 if (ip->ip_off & IP_DF) { 1065 error = EMSGSIZE; 1066 /* 1067 * This case can happen if the user changed the MTU 1068 * of an interface after enabling IP on it. Because 1069 * most netifs don't keep track of routes pointing to 1070 * them, there is no way for one to update all its 1071 * routes when the MTU is changed. 1072 */ 1073 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 1074 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 1075 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1076 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1077 } 1078 ipstat.ips_cantfrag++; 1079 goto bad; 1080 } 1081 len = (ifp->if_mtu - hlen) &~ 7; 1082 if (len < 8) { 1083 error = EMSGSIZE; 1084 goto bad; 1085 } 1086 1087 /* 1088 * if the interface will not calculate checksums on 1089 * fragmented packets, then do it here. 1090 */ 1091 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1092 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 1093 in_delayed_cksum(m); 1094 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1095 } 1096 1097 if (len > PAGE_SIZE) { 1098 /* 1099 * Fragement large datagrams such that each segment 1100 * contains a multiple of PAGE_SIZE amount of data, 1101 * plus headers. This enables a receiver to perform 1102 * page-flipping zero-copy optimizations. 1103 */ 1104 1105 int newlen; 1106 struct mbuf *mtmp; 1107 1108 for (mtmp = m, off = 0; 1109 mtmp && ((off + mtmp->m_len) <= ifp->if_mtu); 1110 mtmp = mtmp->m_next) { 1111 off += mtmp->m_len; 1112 } 1113 /* 1114 * firstlen (off - hlen) must be aligned on an 1115 * 8-byte boundary 1116 */ 1117 if (off < hlen) 1118 goto smart_frag_failure; 1119 off = ((off - hlen) & ~7) + hlen; 1120 newlen = (~PAGE_MASK) & ifp->if_mtu; 1121 if ((newlen + sizeof (struct ip)) > ifp->if_mtu) { 1122 /* we failed, go back the default */ 1123smart_frag_failure: 1124 newlen = len; 1125 off = hlen + len; 1126 } 1127 1128/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n", 1129 len, hlen, sizeof (struct ip), newlen, off);*/ 1130 1131 len = newlen; 1132 1133 } else { 1134 off = hlen + len; 1135 } 1136 1137 1138 1139 { 1140 int mhlen, firstlen = off - hlen; 1141 struct mbuf **mnext = &m->m_nextpkt; 1142 int nfrags = 1; 1143 1144 /* 1145 * Loop through length of segment after first fragment, 1146 * make new header and copy data of each part and link onto chain. 1147 */ 1148 m0 = m; 1149 mhlen = sizeof (struct ip); 1150 for (; off < (u_short)ip->ip_len; off += len) { 1151 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1152 if (m == 0) { 1153 error = ENOBUFS; 1154 ipstat.ips_odropped++; 1155 goto sendorfree; 1156 } 1157 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1158 m->m_data += max_linkhdr; 1159 mhip = mtod(m, struct ip *); 1160 *mhip = *ip; 1161 if (hlen > sizeof (struct ip)) { 1162 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1163 mhip->ip_v = IPVERSION; 1164 mhip->ip_hl = mhlen >> 2; 1165 } 1166 m->m_len = mhlen; 1167 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1168 if (off + len >= (u_short)ip->ip_len) 1169 len = (u_short)ip->ip_len - off; 1170 else 1171 mhip->ip_off |= IP_MF; 1172 mhip->ip_len = htons((u_short)(len + mhlen)); 1173 m->m_next = m_copy(m0, off, len); 1174 if (m->m_next == 0) { 1175 (void) m_free(m); 1176 error = ENOBUFS; /* ??? */ 1177 ipstat.ips_odropped++; 1178 goto sendorfree; 1179 } 1180 m->m_pkthdr.len = mhlen + len; 1181 m->m_pkthdr.rcvif = (struct ifnet *)0; 1182#ifdef MAC 1183 mac_create_fragment(m0, m); 1184#endif 1185 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1186 mhip->ip_off = htons(mhip->ip_off); 1187 mhip->ip_sum = 0; 1188 if (sw_csum & CSUM_DELAY_IP) 1189 mhip->ip_sum = in_cksum(m, mhlen); 1190 *mnext = m; 1191 mnext = &m->m_nextpkt; 1192 nfrags++; 1193 } 1194 ipstat.ips_ofragments += nfrags; 1195 1196 /* set first/last markers for fragment chain */ 1197 m->m_flags |= M_LASTFRAG; 1198 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1199 m0->m_pkthdr.csum_data = nfrags; 1200 1201 /* 1202 * Update first fragment by trimming what's been copied out 1203 * and updating header, then send each fragment (in order). 1204 */ 1205 m = m0; 1206 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 1207 m->m_pkthdr.len = hlen + firstlen; 1208 ip->ip_len = htons((u_short)m->m_pkthdr.len); 1209 ip->ip_off |= IP_MF; 1210 ip->ip_off = htons(ip->ip_off); 1211 ip->ip_sum = 0; 1212 if (sw_csum & CSUM_DELAY_IP) 1213 ip->ip_sum = in_cksum(m, hlen); 1214sendorfree: 1215 for (m = m0; m; m = m0) { 1216 m0 = m->m_nextpkt; 1217 m->m_nextpkt = 0; 1218#ifdef IPSEC 1219 /* clean ipsec history once it goes out of the node */ 1220 ipsec_delaux(m); 1221#endif 1222 if (error == 0) { 1223 /* Record statistics for this interface address. */ 1224 if (ia != NULL) { 1225 ia->ia_ifa.if_opackets++; 1226 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1227 } 1228 1229 error = (*ifp->if_output)(ifp, m, 1230 (struct sockaddr *)dst, ro->ro_rt); 1231 } else 1232 m_freem(m); 1233 } 1234 1235 if (error == 0) 1236 ipstat.ips_fragmented++; 1237 } 1238done: 1239#ifdef IPSEC 1240 if (ro == &iproute && ro->ro_rt) { 1241 RTFREE(ro->ro_rt); 1242 ro->ro_rt = NULL; 1243 } 1244 if (sp != NULL) { 1245 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1246 printf("DP ip_output call free SP:%p\n", sp)); 1247 key_freesp(sp); 1248 } 1249#endif /* IPSEC */ 1250#ifdef FAST_IPSEC 1251 if (ro == &iproute && ro->ro_rt) { 1252 RTFREE(ro->ro_rt); 1253 ro->ro_rt = NULL; 1254 } 1255 if (sp != NULL) 1256 KEY_FREESP(&sp); 1257#endif /* FAST_IPSEC */ 1258 return (error); 1259bad: 1260 m_freem(m); 1261 goto done; 1262} 1263 1264void 1265in_delayed_cksum(struct mbuf *m) 1266{ 1267 struct ip *ip; 1268 u_short csum, offset; 1269 1270 ip = mtod(m, struct ip *); 1271 offset = ip->ip_hl << 2 ; 1272 csum = in_cksum_skip(m, ip->ip_len, offset); 1273 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1274 csum = 0xffff; 1275 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1276 1277 if (offset + sizeof(u_short) > m->m_len) { 1278 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1279 m->m_len, offset, ip->ip_p); 1280 /* 1281 * XXX 1282 * this shouldn't happen, but if it does, the 1283 * correct behavior may be to insert the checksum 1284 * in the existing chain instead of rearranging it. 1285 */ 1286 m = m_pullup(m, offset + sizeof(u_short)); 1287 } 1288 *(u_short *)(m->m_data + offset) = csum; 1289} 1290 1291/* 1292 * Insert IP options into preformed packet. 1293 * Adjust IP destination as required for IP source routing, 1294 * as indicated by a non-zero in_addr at the start of the options. 1295 * 1296 * XXX This routine assumes that the packet has no options in place. 1297 */ 1298static struct mbuf * 1299ip_insertoptions(m, opt, phlen) 1300 register struct mbuf *m; 1301 struct mbuf *opt; 1302 int *phlen; 1303{ 1304 register struct ipoption *p = mtod(opt, struct ipoption *); 1305 struct mbuf *n; 1306 register struct ip *ip = mtod(m, struct ip *); 1307 unsigned optlen; 1308 1309 optlen = opt->m_len - sizeof(p->ipopt_dst); 1310 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { 1311 *phlen = 0; 1312 return (m); /* XXX should fail */ 1313 } 1314 if (p->ipopt_dst.s_addr) 1315 ip->ip_dst = p->ipopt_dst; 1316 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1317 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1318 if (n == 0) { 1319 *phlen = 0; 1320 return (m); 1321 } 1322 n->m_pkthdr.rcvif = (struct ifnet *)0; 1323#ifdef MAC 1324 mac_create_mbuf_from_mbuf(m, n); 1325#endif 1326 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1327 m->m_len -= sizeof(struct ip); 1328 m->m_data += sizeof(struct ip); 1329 n->m_next = m; 1330 m = n; 1331 m->m_len = optlen + sizeof(struct ip); 1332 m->m_data += max_linkhdr; 1333 bcopy(ip, mtod(m, void *), sizeof(struct ip)); 1334 } else { 1335 m->m_data -= optlen; 1336 m->m_len += optlen; 1337 m->m_pkthdr.len += optlen; 1338 bcopy(ip, mtod(m, void *), sizeof(struct ip)); 1339 } 1340 ip = mtod(m, struct ip *); 1341 bcopy(p->ipopt_list, ip + 1, optlen); 1342 *phlen = sizeof(struct ip) + optlen; 1343 ip->ip_v = IPVERSION; 1344 ip->ip_hl = *phlen >> 2; 1345 ip->ip_len += optlen; 1346 return (m); 1347} 1348 1349/* 1350 * Copy options from ip to jp, 1351 * omitting those not copied during fragmentation. 1352 */ 1353int 1354ip_optcopy(ip, jp) 1355 struct ip *ip, *jp; 1356{ 1357 register u_char *cp, *dp; 1358 int opt, optlen, cnt; 1359 1360 cp = (u_char *)(ip + 1); 1361 dp = (u_char *)(jp + 1); 1362 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1363 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1364 opt = cp[0]; 1365 if (opt == IPOPT_EOL) 1366 break; 1367 if (opt == IPOPT_NOP) { 1368 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1369 *dp++ = IPOPT_NOP; 1370 optlen = 1; 1371 continue; 1372 } 1373 1374 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1375 ("ip_optcopy: malformed ipv4 option")); 1376 optlen = cp[IPOPT_OLEN]; 1377 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1378 ("ip_optcopy: malformed ipv4 option")); 1379 1380 /* bogus lengths should have been caught by ip_dooptions */ 1381 if (optlen > cnt) 1382 optlen = cnt; 1383 if (IPOPT_COPIED(opt)) { 1384 bcopy(cp, dp, optlen); 1385 dp += optlen; 1386 } 1387 } 1388 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1389 *dp++ = IPOPT_EOL; 1390 return (optlen); 1391} 1392 1393/* 1394 * IP socket option processing. 1395 */ 1396int 1397ip_ctloutput(so, sopt) 1398 struct socket *so; 1399 struct sockopt *sopt; 1400{ 1401 struct inpcb *inp = sotoinpcb(so); 1402 int error, optval; 1403 1404 error = optval = 0; 1405 if (sopt->sopt_level != IPPROTO_IP) { 1406 return (EINVAL); 1407 } 1408 1409 switch (sopt->sopt_dir) { 1410 case SOPT_SET: 1411 switch (sopt->sopt_name) { 1412 case IP_OPTIONS: 1413#ifdef notyet 1414 case IP_RETOPTS: 1415#endif 1416 { 1417 struct mbuf *m; 1418 if (sopt->sopt_valsize > MLEN) { 1419 error = EMSGSIZE; 1420 break; 1421 } 1422 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1423 if (m == 0) { 1424 error = ENOBUFS; 1425 break; 1426 } 1427 m->m_len = sopt->sopt_valsize; 1428 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1429 m->m_len); 1430 1431 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1432 m)); 1433 } 1434 1435 case IP_TOS: 1436 case IP_TTL: 1437 case IP_RECVOPTS: 1438 case IP_RECVRETOPTS: 1439 case IP_RECVDSTADDR: 1440 case IP_RECVTTL: 1441 case IP_RECVIF: 1442 case IP_FAITH: 1443 error = sooptcopyin(sopt, &optval, sizeof optval, 1444 sizeof optval); 1445 if (error) 1446 break; 1447 1448 switch (sopt->sopt_name) { 1449 case IP_TOS: 1450 inp->inp_ip_tos = optval; 1451 break; 1452 1453 case IP_TTL: 1454 inp->inp_ip_ttl = optval; 1455 break; 1456#define OPTSET(bit) \ 1457 if (optval) \ 1458 inp->inp_flags |= bit; \ 1459 else \ 1460 inp->inp_flags &= ~bit; 1461 1462 case IP_RECVOPTS: 1463 OPTSET(INP_RECVOPTS); 1464 break; 1465 1466 case IP_RECVRETOPTS: 1467 OPTSET(INP_RECVRETOPTS); 1468 break; 1469 1470 case IP_RECVDSTADDR: 1471 OPTSET(INP_RECVDSTADDR); 1472 break; 1473 1474 case IP_RECVTTL: 1475 OPTSET(INP_RECVTTL); 1476 break; 1477 1478 case IP_RECVIF: 1479 OPTSET(INP_RECVIF); 1480 break; 1481 1482 case IP_FAITH: 1483 OPTSET(INP_FAITH); 1484 break; 1485 } 1486 break; 1487#undef OPTSET 1488 1489 case IP_MULTICAST_IF: 1490 case IP_MULTICAST_VIF: 1491 case IP_MULTICAST_TTL: 1492 case IP_MULTICAST_LOOP: 1493 case IP_ADD_MEMBERSHIP: 1494 case IP_DROP_MEMBERSHIP: 1495 error = ip_setmoptions(sopt, &inp->inp_moptions); 1496 break; 1497 1498 case IP_PORTRANGE: 1499 error = sooptcopyin(sopt, &optval, sizeof optval, 1500 sizeof optval); 1501 if (error) 1502 break; 1503 1504 switch (optval) { 1505 case IP_PORTRANGE_DEFAULT: 1506 inp->inp_flags &= ~(INP_LOWPORT); 1507 inp->inp_flags &= ~(INP_HIGHPORT); 1508 break; 1509 1510 case IP_PORTRANGE_HIGH: 1511 inp->inp_flags &= ~(INP_LOWPORT); 1512 inp->inp_flags |= INP_HIGHPORT; 1513 break; 1514 1515 case IP_PORTRANGE_LOW: 1516 inp->inp_flags &= ~(INP_HIGHPORT); 1517 inp->inp_flags |= INP_LOWPORT; 1518 break; 1519 1520 default: 1521 error = EINVAL; 1522 break; 1523 } 1524 break; 1525 1526#if defined(IPSEC) || defined(FAST_IPSEC) 1527 case IP_IPSEC_POLICY: 1528 { 1529 caddr_t req; 1530 size_t len = 0; 1531 int priv; 1532 struct mbuf *m; 1533 int optname; 1534 1535 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1536 break; 1537 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1538 break; 1539 priv = (sopt->sopt_td != NULL && 1540 suser(sopt->sopt_td) != 0) ? 0 : 1; 1541 req = mtod(m, caddr_t); 1542 len = m->m_len; 1543 optname = sopt->sopt_name; 1544 error = ipsec4_set_policy(inp, optname, req, len, priv); 1545 m_freem(m); 1546 break; 1547 } 1548#endif /*IPSEC*/ 1549 1550 default: 1551 error = ENOPROTOOPT; 1552 break; 1553 } 1554 break; 1555 1556 case SOPT_GET: 1557 switch (sopt->sopt_name) { 1558 case IP_OPTIONS: 1559 case IP_RETOPTS: 1560 if (inp->inp_options) 1561 error = sooptcopyout(sopt, 1562 mtod(inp->inp_options, 1563 char *), 1564 inp->inp_options->m_len); 1565 else 1566 sopt->sopt_valsize = 0; 1567 break; 1568 1569 case IP_TOS: 1570 case IP_TTL: 1571 case IP_RECVOPTS: 1572 case IP_RECVRETOPTS: 1573 case IP_RECVDSTADDR: 1574 case IP_RECVTTL: 1575 case IP_RECVIF: 1576 case IP_PORTRANGE: 1577 case IP_FAITH: 1578 switch (sopt->sopt_name) { 1579 1580 case IP_TOS: 1581 optval = inp->inp_ip_tos; 1582 break; 1583 1584 case IP_TTL: 1585 optval = inp->inp_ip_ttl; 1586 break; 1587 1588#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1589 1590 case IP_RECVOPTS: 1591 optval = OPTBIT(INP_RECVOPTS); 1592 break; 1593 1594 case IP_RECVRETOPTS: 1595 optval = OPTBIT(INP_RECVRETOPTS); 1596 break; 1597 1598 case IP_RECVDSTADDR: 1599 optval = OPTBIT(INP_RECVDSTADDR); 1600 break; 1601 1602 case IP_RECVTTL: 1603 optval = OPTBIT(INP_RECVTTL); 1604 break; 1605 1606 case IP_RECVIF: 1607 optval = OPTBIT(INP_RECVIF); 1608 break; 1609 1610 case IP_PORTRANGE: 1611 if (inp->inp_flags & INP_HIGHPORT) 1612 optval = IP_PORTRANGE_HIGH; 1613 else if (inp->inp_flags & INP_LOWPORT) 1614 optval = IP_PORTRANGE_LOW; 1615 else 1616 optval = 0; 1617 break; 1618 1619 case IP_FAITH: 1620 optval = OPTBIT(INP_FAITH); 1621 break; 1622 } 1623 error = sooptcopyout(sopt, &optval, sizeof optval); 1624 break; 1625 1626 case IP_MULTICAST_IF: 1627 case IP_MULTICAST_VIF: 1628 case IP_MULTICAST_TTL: 1629 case IP_MULTICAST_LOOP: 1630 case IP_ADD_MEMBERSHIP: 1631 case IP_DROP_MEMBERSHIP: 1632 error = ip_getmoptions(sopt, inp->inp_moptions); 1633 break; 1634 1635#if defined(IPSEC) || defined(FAST_IPSEC) 1636 case IP_IPSEC_POLICY: 1637 { 1638 struct mbuf *m = NULL; 1639 caddr_t req = NULL; 1640 size_t len = 0; 1641 1642 if (m != 0) { 1643 req = mtod(m, caddr_t); 1644 len = m->m_len; 1645 } 1646 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1647 if (error == 0) 1648 error = soopt_mcopyout(sopt, m); /* XXX */ 1649 if (error == 0) 1650 m_freem(m); 1651 break; 1652 } 1653#endif /*IPSEC*/ 1654 1655 default: 1656 error = ENOPROTOOPT; 1657 break; 1658 } 1659 break; 1660 } 1661 return (error); 1662} 1663 1664/* 1665 * Set up IP options in pcb for insertion in output packets. 1666 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1667 * with destination address if source routed. 1668 */ 1669static int 1670ip_pcbopts(optname, pcbopt, m) 1671 int optname; 1672 struct mbuf **pcbopt; 1673 register struct mbuf *m; 1674{ 1675 register int cnt, optlen; 1676 register u_char *cp; 1677 u_char opt; 1678 1679 /* turn off any old options */ 1680 if (*pcbopt) 1681 (void)m_free(*pcbopt); 1682 *pcbopt = 0; 1683 if (m == (struct mbuf *)0 || m->m_len == 0) { 1684 /* 1685 * Only turning off any previous options. 1686 */ 1687 if (m) 1688 (void)m_free(m); 1689 return (0); 1690 } 1691 1692 if (m->m_len % sizeof(int32_t)) 1693 goto bad; 1694 /* 1695 * IP first-hop destination address will be stored before 1696 * actual options; move other options back 1697 * and clear it when none present. 1698 */ 1699 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1700 goto bad; 1701 cnt = m->m_len; 1702 m->m_len += sizeof(struct in_addr); 1703 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1704 bcopy(mtod(m, void *), cp, (unsigned)cnt); 1705 bzero(mtod(m, void *), sizeof(struct in_addr)); 1706 1707 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1708 opt = cp[IPOPT_OPTVAL]; 1709 if (opt == IPOPT_EOL) 1710 break; 1711 if (opt == IPOPT_NOP) 1712 optlen = 1; 1713 else { 1714 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1715 goto bad; 1716 optlen = cp[IPOPT_OLEN]; 1717 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1718 goto bad; 1719 } 1720 switch (opt) { 1721 1722 default: 1723 break; 1724 1725 case IPOPT_LSRR: 1726 case IPOPT_SSRR: 1727 /* 1728 * user process specifies route as: 1729 * ->A->B->C->D 1730 * D must be our final destination (but we can't 1731 * check that since we may not have connected yet). 1732 * A is first hop destination, which doesn't appear in 1733 * actual IP option, but is stored before the options. 1734 */ 1735 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1736 goto bad; 1737 m->m_len -= sizeof(struct in_addr); 1738 cnt -= sizeof(struct in_addr); 1739 optlen -= sizeof(struct in_addr); 1740 cp[IPOPT_OLEN] = optlen; 1741 /* 1742 * Move first hop before start of options. 1743 */ 1744 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1745 sizeof(struct in_addr)); 1746 /* 1747 * Then copy rest of options back 1748 * to close up the deleted entry. 1749 */ 1750 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)), 1751 &cp[IPOPT_OFFSET+1], 1752 (unsigned)cnt + sizeof(struct in_addr)); 1753 break; 1754 } 1755 } 1756 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1757 goto bad; 1758 *pcbopt = m; 1759 return (0); 1760 1761bad: 1762 (void)m_free(m); 1763 return (EINVAL); 1764} 1765 1766/* 1767 * XXX 1768 * The whole multicast option thing needs to be re-thought. 1769 * Several of these options are equally applicable to non-multicast 1770 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1771 * standard option (IP_TTL). 1772 */ 1773 1774/* 1775 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1776 */ 1777static struct ifnet * 1778ip_multicast_if(a, ifindexp) 1779 struct in_addr *a; 1780 int *ifindexp; 1781{ 1782 int ifindex; 1783 struct ifnet *ifp; 1784 1785 if (ifindexp) 1786 *ifindexp = 0; 1787 if (ntohl(a->s_addr) >> 24 == 0) { 1788 ifindex = ntohl(a->s_addr) & 0xffffff; 1789 if (ifindex < 0 || if_index < ifindex) 1790 return NULL; 1791 ifp = ifnet_byindex(ifindex); 1792 if (ifindexp) 1793 *ifindexp = ifindex; 1794 } else { 1795 INADDR_TO_IFP(*a, ifp); 1796 } 1797 return ifp; 1798} 1799 1800/* 1801 * Set the IP multicast options in response to user setsockopt(). 1802 */ 1803static int 1804ip_setmoptions(sopt, imop) 1805 struct sockopt *sopt; 1806 struct ip_moptions **imop; 1807{ 1808 int error = 0; 1809 int i; 1810 struct in_addr addr; 1811 struct ip_mreq mreq; 1812 struct ifnet *ifp; 1813 struct ip_moptions *imo = *imop; 1814 struct route ro; 1815 struct sockaddr_in *dst; 1816 int ifindex; 1817 int s; 1818 1819 if (imo == NULL) { 1820 /* 1821 * No multicast option buffer attached to the pcb; 1822 * allocate one and initialize to default values. 1823 */ 1824 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1825 M_WAITOK); 1826 1827 if (imo == NULL) 1828 return (ENOBUFS); 1829 *imop = imo; 1830 imo->imo_multicast_ifp = NULL; 1831 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1832 imo->imo_multicast_vif = -1; 1833 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1834 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1835 imo->imo_num_memberships = 0; 1836 } 1837 1838 switch (sopt->sopt_name) { 1839 /* store an index number for the vif you wanna use in the send */ 1840 case IP_MULTICAST_VIF: 1841 if (legal_vif_num == 0) { 1842 error = EOPNOTSUPP; 1843 break; 1844 } 1845 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1846 if (error) 1847 break; 1848 if (!legal_vif_num(i) && (i != -1)) { 1849 error = EINVAL; 1850 break; 1851 } 1852 imo->imo_multicast_vif = i; 1853 break; 1854 1855 case IP_MULTICAST_IF: 1856 /* 1857 * Select the interface for outgoing multicast packets. 1858 */ 1859 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1860 if (error) 1861 break; 1862 /* 1863 * INADDR_ANY is used to remove a previous selection. 1864 * When no interface is selected, a default one is 1865 * chosen every time a multicast packet is sent. 1866 */ 1867 if (addr.s_addr == INADDR_ANY) { 1868 imo->imo_multicast_ifp = NULL; 1869 break; 1870 } 1871 /* 1872 * The selected interface is identified by its local 1873 * IP address. Find the interface and confirm that 1874 * it supports multicasting. 1875 */ 1876 s = splimp(); 1877 ifp = ip_multicast_if(&addr, &ifindex); 1878 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1879 splx(s); 1880 error = EADDRNOTAVAIL; 1881 break; 1882 } 1883 imo->imo_multicast_ifp = ifp; 1884 if (ifindex) 1885 imo->imo_multicast_addr = addr; 1886 else 1887 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1888 splx(s); 1889 break; 1890 1891 case IP_MULTICAST_TTL: 1892 /* 1893 * Set the IP time-to-live for outgoing multicast packets. 1894 * The original multicast API required a char argument, 1895 * which is inconsistent with the rest of the socket API. 1896 * We allow either a char or an int. 1897 */ 1898 if (sopt->sopt_valsize == 1) { 1899 u_char ttl; 1900 error = sooptcopyin(sopt, &ttl, 1, 1); 1901 if (error) 1902 break; 1903 imo->imo_multicast_ttl = ttl; 1904 } else { 1905 u_int ttl; 1906 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1907 sizeof ttl); 1908 if (error) 1909 break; 1910 if (ttl > 255) 1911 error = EINVAL; 1912 else 1913 imo->imo_multicast_ttl = ttl; 1914 } 1915 break; 1916 1917 case IP_MULTICAST_LOOP: 1918 /* 1919 * Set the loopback flag for outgoing multicast packets. 1920 * Must be zero or one. The original multicast API required a 1921 * char argument, which is inconsistent with the rest 1922 * of the socket API. We allow either a char or an int. 1923 */ 1924 if (sopt->sopt_valsize == 1) { 1925 u_char loop; 1926 error = sooptcopyin(sopt, &loop, 1, 1); 1927 if (error) 1928 break; 1929 imo->imo_multicast_loop = !!loop; 1930 } else { 1931 u_int loop; 1932 error = sooptcopyin(sopt, &loop, sizeof loop, 1933 sizeof loop); 1934 if (error) 1935 break; 1936 imo->imo_multicast_loop = !!loop; 1937 } 1938 break; 1939 1940 case IP_ADD_MEMBERSHIP: 1941 /* 1942 * Add a multicast group membership. 1943 * Group must be a valid IP multicast address. 1944 */ 1945 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1946 if (error) 1947 break; 1948 1949 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1950 error = EINVAL; 1951 break; 1952 } 1953 s = splimp(); 1954 /* 1955 * If no interface address was provided, use the interface of 1956 * the route to the given multicast address. 1957 */ 1958 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1959 bzero((caddr_t)&ro, sizeof(ro)); 1960 dst = (struct sockaddr_in *)&ro.ro_dst; 1961 dst->sin_len = sizeof(*dst); 1962 dst->sin_family = AF_INET; 1963 dst->sin_addr = mreq.imr_multiaddr; 1964 rtalloc(&ro); 1965 if (ro.ro_rt == NULL) { 1966 error = EADDRNOTAVAIL; 1967 splx(s); 1968 break; 1969 } 1970 ifp = ro.ro_rt->rt_ifp; 1971 rtfree(ro.ro_rt); 1972 } 1973 else { 1974 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1975 } 1976 1977 /* 1978 * See if we found an interface, and confirm that it 1979 * supports multicast. 1980 */ 1981 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1982 error = EADDRNOTAVAIL; 1983 splx(s); 1984 break; 1985 } 1986 /* 1987 * See if the membership already exists or if all the 1988 * membership slots are full. 1989 */ 1990 for (i = 0; i < imo->imo_num_memberships; ++i) { 1991 if (imo->imo_membership[i]->inm_ifp == ifp && 1992 imo->imo_membership[i]->inm_addr.s_addr 1993 == mreq.imr_multiaddr.s_addr) 1994 break; 1995 } 1996 if (i < imo->imo_num_memberships) { 1997 error = EADDRINUSE; 1998 splx(s); 1999 break; 2000 } 2001 if (i == IP_MAX_MEMBERSHIPS) { 2002 error = ETOOMANYREFS; 2003 splx(s); 2004 break; 2005 } 2006 /* 2007 * Everything looks good; add a new record to the multicast 2008 * address list for the given interface. 2009 */ 2010 if ((imo->imo_membership[i] = 2011 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 2012 error = ENOBUFS; 2013 splx(s); 2014 break; 2015 } 2016 ++imo->imo_num_memberships; 2017 splx(s); 2018 break; 2019 2020 case IP_DROP_MEMBERSHIP: 2021 /* 2022 * Drop a multicast group membership. 2023 * Group must be a valid IP multicast address. 2024 */ 2025 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 2026 if (error) 2027 break; 2028 2029 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 2030 error = EINVAL; 2031 break; 2032 } 2033 2034 s = splimp(); 2035 /* 2036 * If an interface address was specified, get a pointer 2037 * to its ifnet structure. 2038 */ 2039 if (mreq.imr_interface.s_addr == INADDR_ANY) 2040 ifp = NULL; 2041 else { 2042 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 2043 if (ifp == NULL) { 2044 error = EADDRNOTAVAIL; 2045 splx(s); 2046 break; 2047 } 2048 } 2049 /* 2050 * Find the membership in the membership array. 2051 */ 2052 for (i = 0; i < imo->imo_num_memberships; ++i) { 2053 if ((ifp == NULL || 2054 imo->imo_membership[i]->inm_ifp == ifp) && 2055 imo->imo_membership[i]->inm_addr.s_addr == 2056 mreq.imr_multiaddr.s_addr) 2057 break; 2058 } 2059 if (i == imo->imo_num_memberships) { 2060 error = EADDRNOTAVAIL; 2061 splx(s); 2062 break; 2063 } 2064 /* 2065 * Give up the multicast address record to which the 2066 * membership points. 2067 */ 2068 in_delmulti(imo->imo_membership[i]); 2069 /* 2070 * Remove the gap in the membership array. 2071 */ 2072 for (++i; i < imo->imo_num_memberships; ++i) 2073 imo->imo_membership[i-1] = imo->imo_membership[i]; 2074 --imo->imo_num_memberships; 2075 splx(s); 2076 break; 2077 2078 default: 2079 error = EOPNOTSUPP; 2080 break; 2081 } 2082 2083 /* 2084 * If all options have default values, no need to keep the mbuf. 2085 */ 2086 if (imo->imo_multicast_ifp == NULL && 2087 imo->imo_multicast_vif == -1 && 2088 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2089 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2090 imo->imo_num_memberships == 0) { 2091 free(*imop, M_IPMOPTS); 2092 *imop = NULL; 2093 } 2094 2095 return (error); 2096} 2097 2098/* 2099 * Return the IP multicast options in response to user getsockopt(). 2100 */ 2101static int 2102ip_getmoptions(sopt, imo) 2103 struct sockopt *sopt; 2104 register struct ip_moptions *imo; 2105{ 2106 struct in_addr addr; 2107 struct in_ifaddr *ia; 2108 int error, optval; 2109 u_char coptval; 2110 2111 error = 0; 2112 switch (sopt->sopt_name) { 2113 case IP_MULTICAST_VIF: 2114 if (imo != NULL) 2115 optval = imo->imo_multicast_vif; 2116 else 2117 optval = -1; 2118 error = sooptcopyout(sopt, &optval, sizeof optval); 2119 break; 2120 2121 case IP_MULTICAST_IF: 2122 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2123 addr.s_addr = INADDR_ANY; 2124 else if (imo->imo_multicast_addr.s_addr) { 2125 /* return the value user has set */ 2126 addr = imo->imo_multicast_addr; 2127 } else { 2128 IFP_TO_IA(imo->imo_multicast_ifp, ia); 2129 addr.s_addr = (ia == NULL) ? INADDR_ANY 2130 : IA_SIN(ia)->sin_addr.s_addr; 2131 } 2132 error = sooptcopyout(sopt, &addr, sizeof addr); 2133 break; 2134 2135 case IP_MULTICAST_TTL: 2136 if (imo == 0) 2137 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2138 else 2139 optval = coptval = imo->imo_multicast_ttl; 2140 if (sopt->sopt_valsize == 1) 2141 error = sooptcopyout(sopt, &coptval, 1); 2142 else 2143 error = sooptcopyout(sopt, &optval, sizeof optval); 2144 break; 2145 2146 case IP_MULTICAST_LOOP: 2147 if (imo == 0) 2148 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2149 else 2150 optval = coptval = imo->imo_multicast_loop; 2151 if (sopt->sopt_valsize == 1) 2152 error = sooptcopyout(sopt, &coptval, 1); 2153 else 2154 error = sooptcopyout(sopt, &optval, sizeof optval); 2155 break; 2156 2157 default: 2158 error = ENOPROTOOPT; 2159 break; 2160 } 2161 return (error); 2162} 2163 2164/* 2165 * Discard the IP multicast options. 2166 */ 2167void 2168ip_freemoptions(imo) 2169 register struct ip_moptions *imo; 2170{ 2171 register int i; 2172 2173 if (imo != NULL) { 2174 for (i = 0; i < imo->imo_num_memberships; ++i) 2175 in_delmulti(imo->imo_membership[i]); 2176 free(imo, M_IPMOPTS); 2177 } 2178} 2179 2180/* 2181 * Routine called from ip_output() to loop back a copy of an IP multicast 2182 * packet to the input queue of a specified interface. Note that this 2183 * calls the output routine of the loopback "driver", but with an interface 2184 * pointer that might NOT be a loopback interface -- evil, but easier than 2185 * replicating that code here. 2186 */ 2187static void 2188ip_mloopback(ifp, m, dst, hlen) 2189 struct ifnet *ifp; 2190 register struct mbuf *m; 2191 register struct sockaddr_in *dst; 2192 int hlen; 2193{ 2194 register struct ip *ip; 2195 struct mbuf *copym; 2196 2197 copym = m_copy(m, 0, M_COPYALL); 2198 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2199 copym = m_pullup(copym, hlen); 2200 if (copym != NULL) { 2201 /* 2202 * We don't bother to fragment if the IP length is greater 2203 * than the interface's MTU. Can this possibly matter? 2204 */ 2205 ip = mtod(copym, struct ip *); 2206 ip->ip_len = htons(ip->ip_len); 2207 ip->ip_off = htons(ip->ip_off); 2208 ip->ip_sum = 0; 2209 ip->ip_sum = in_cksum(copym, hlen); 2210 /* 2211 * NB: 2212 * It's not clear whether there are any lingering 2213 * reentrancy problems in other areas which might 2214 * be exposed by using ip_input directly (in 2215 * particular, everything which modifies the packet 2216 * in-place). Yet another option is using the 2217 * protosw directly to deliver the looped back 2218 * packet. For the moment, we'll err on the side 2219 * of safety by using if_simloop(). 2220 */ 2221#if 1 /* XXX */ 2222 if (dst->sin_family != AF_INET) { 2223 printf("ip_mloopback: bad address family %d\n", 2224 dst->sin_family); 2225 dst->sin_family = AF_INET; 2226 } 2227#endif 2228 2229#ifdef notdef 2230 copym->m_pkthdr.rcvif = ifp; 2231 ip_input(copym); 2232#else 2233 /* if the checksum hasn't been computed, mark it as valid */ 2234 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2235 copym->m_pkthdr.csum_flags |= 2236 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2237 copym->m_pkthdr.csum_data = 0xffff; 2238 } 2239 if_simloop(ifp, copym, dst->sin_family, 0); 2240#endif 2241 } 2242} 2243