ip_output.c revision 105586
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: head/sys/netinet/ip_output.c 105586 2002-10-20 22:52:07Z phk $ 35 */ 36 37#include "opt_ipfw.h" 38#include "opt_ipdn.h" 39#include "opt_ipdivert.h" 40#include "opt_ipfilter.h" 41#include "opt_ipsec.h" 42#include "opt_mac.h" 43#include "opt_pfil_hooks.h" 44#include "opt_random_ip_id.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/mac.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/protosw.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55 56#include <net/if.h> 57#include <net/route.h> 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/ip.h> 62#include <netinet/in_pcb.h> 63#include <netinet/in_var.h> 64#include <netinet/ip_var.h> 65 66#include <machine/in_cksum.h> 67 68static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 69 70#ifdef IPSEC 71#include <netinet6/ipsec.h> 72#include <netkey/key.h> 73#ifdef IPSEC_DEBUG 74#include <netkey/key_debug.h> 75#else 76#define KEYDEBUG(lev,arg) 77#endif 78#endif /*IPSEC*/ 79 80#ifdef FAST_IPSEC 81#include <netipsec/ipsec.h> 82#include <netipsec/xform.h> 83#include <netipsec/key.h> 84#endif /*FAST_IPSEC*/ 85 86#include <netinet/ip_fw.h> 87#include <netinet/ip_dummynet.h> 88 89#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 90 x, (ntohl(a.s_addr)>>24)&0xFF,\ 91 (ntohl(a.s_addr)>>16)&0xFF,\ 92 (ntohl(a.s_addr)>>8)&0xFF,\ 93 (ntohl(a.s_addr))&0xFF, y); 94 95u_short ip_id; 96 97static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 98static struct ifnet *ip_multicast_if(struct in_addr *, int *); 99static void ip_mloopback 100 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 101static int ip_getmoptions 102 (struct sockopt *, struct ip_moptions *); 103static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 104static int ip_setmoptions 105 (struct sockopt *, struct ip_moptions **); 106 107int ip_optcopy(struct ip *, struct ip *); 108 109 110extern struct protosw inetsw[]; 111 112/* 113 * IP output. The packet in mbuf chain m contains a skeletal IP 114 * header (with len, off, ttl, proto, tos, src, dst). 115 * The mbuf chain containing the packet will be freed. 116 * The mbuf opt, if present, will not be freed. 117 */ 118int 119ip_output(m0, opt, ro, flags, imo, inp) 120 struct mbuf *m0; 121 struct mbuf *opt; 122 struct route *ro; 123 int flags; 124 struct ip_moptions *imo; 125 struct inpcb *inp; 126{ 127 struct ip *ip, *mhip; 128 struct ifnet *ifp = NULL; /* keep compiler happy */ 129 struct mbuf *m; 130 int hlen = sizeof (struct ip); 131 int len, off, error = 0; 132 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 133 struct in_ifaddr *ia = NULL; 134 int isbroadcast, sw_csum; 135 struct in_addr pkt_dst; 136#ifdef IPSEC 137 struct route iproute; 138 struct secpolicy *sp = NULL; 139 struct socket *so = inp ? inp->inp_socket : NULL; 140#endif 141#ifdef FAST_IPSEC 142 struct route iproute; 143 struct m_tag *mtag; 144 struct secpolicy *sp = NULL; 145 struct tdb_ident *tdbi; 146 int s; 147#endif /* FAST_IPSEC */ 148 struct ip_fw_args args; 149 int src_was_INADDR_ANY = 0; /* as the name says... */ 150#ifdef PFIL_HOOKS 151 struct packet_filter_hook *pfh; 152 struct mbuf *m1; 153 int rv; 154#endif /* PFIL_HOOKS */ 155 156 args.eh = NULL; 157 args.rule = NULL; 158 args.next_hop = NULL; 159 args.divert_rule = 0; /* divert cookie */ 160 161 /* Grab info from MT_TAG mbufs prepended to the chain. */ 162 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { 163 switch(m0->_m_tag_id) { 164 default: 165 printf("ip_output: unrecognised MT_TAG tag %d\n", 166 m0->_m_tag_id); 167 break; 168 169 case PACKET_TAG_DUMMYNET: 170 /* 171 * the packet was already tagged, so part of the 172 * processing was already done, and we need to go down. 173 * Get parameters from the header. 174 */ 175 args.rule = ((struct dn_pkt *)m0)->rule; 176 opt = NULL ; 177 ro = & ( ((struct dn_pkt *)m0)->ro ) ; 178 imo = NULL ; 179 dst = ((struct dn_pkt *)m0)->dn_dst ; 180 ifp = ((struct dn_pkt *)m0)->ifp ; 181 flags = ((struct dn_pkt *)m0)->flags ; 182 break; 183 184 case PACKET_TAG_DIVERT: 185 args.divert_rule = (intptr_t)m0->m_data & 0xffff; 186 break; 187 188 case PACKET_TAG_IPFORWARD: 189 args.next_hop = (struct sockaddr_in *)m0->m_data; 190 break; 191 } 192 } 193 m = m0; 194 195 KASSERT(!m || (m->m_flags & M_PKTHDR) != 0, ("ip_output: no HDR")); 196#ifndef FAST_IPSEC 197 KASSERT(ro != NULL, ("ip_output: no route, proto %d", 198 mtod(m, struct ip *)->ip_p)); 199#endif 200 201 if (args.rule != NULL) { /* dummynet already saw us */ 202 ip = mtod(m, struct ip *); 203 hlen = ip->ip_hl << 2 ; 204 if (ro->ro_rt) 205 ia = ifatoia(ro->ro_rt->rt_ifa); 206 goto sendit; 207 } 208 209 if (opt) { 210 len = 0; 211 m = ip_insertoptions(m, opt, &len); 212 if (len != 0) 213 hlen = len; 214 } 215 ip = mtod(m, struct ip *); 216 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 217 218 /* 219 * Fill in IP header. 220 */ 221 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 222 ip->ip_v = IPVERSION; 223 ip->ip_hl = hlen >> 2; 224 ip->ip_off &= IP_DF; 225#ifdef RANDOM_IP_ID 226 ip->ip_id = ip_randomid(); 227#else 228 ip->ip_id = htons(ip_id++); 229#endif 230 ipstat.ips_localout++; 231 } else { 232 hlen = ip->ip_hl << 2; 233 } 234 235#ifdef FAST_IPSEC 236 if (ro == NULL) { 237 ro = &iproute; 238 bzero(ro, sizeof (*ro)); 239 } 240#endif /* FAST_IPSEC */ 241 dst = (struct sockaddr_in *)&ro->ro_dst; 242 /* 243 * If there is a cached route, 244 * check that it is to the same destination 245 * and is still up. If not, free it and try again. 246 * The address family should also be checked in case of sharing the 247 * cache with IPv6. 248 */ 249 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 250 dst->sin_family != AF_INET || 251 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 252 RTFREE(ro->ro_rt); 253 ro->ro_rt = (struct rtentry *)0; 254 } 255 if (ro->ro_rt == 0) { 256 bzero(dst, sizeof(*dst)); 257 dst->sin_family = AF_INET; 258 dst->sin_len = sizeof(*dst); 259 dst->sin_addr = pkt_dst; 260 } 261 /* 262 * If routing to interface only, 263 * short circuit routing lookup. 264 */ 265 if (flags & IP_ROUTETOIF) { 266 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 267 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 268 ipstat.ips_noroute++; 269 error = ENETUNREACH; 270 goto bad; 271 } 272 ifp = ia->ia_ifp; 273 ip->ip_ttl = 1; 274 isbroadcast = in_broadcast(dst->sin_addr, ifp); 275 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 276 imo != NULL && imo->imo_multicast_ifp != NULL) { 277 /* 278 * Bypass the normal routing lookup for multicast 279 * packets if the interface is specified. 280 */ 281 ifp = imo->imo_multicast_ifp; 282 IFP_TO_IA(ifp, ia); 283 isbroadcast = 0; /* fool gcc */ 284 } else { 285 /* 286 * If this is the case, we probably don't want to allocate 287 * a protocol-cloned route since we didn't get one from the 288 * ULP. This lets TCP do its thing, while not burdening 289 * forwarding or ICMP with the overhead of cloning a route. 290 * Of course, we still want to do any cloning requested by 291 * the link layer, as this is probably required in all cases 292 * for correct operation (as it is for ARP). 293 */ 294 if (ro->ro_rt == 0) 295 rtalloc_ign(ro, RTF_PRCLONING); 296 if (ro->ro_rt == 0) { 297 ipstat.ips_noroute++; 298 error = EHOSTUNREACH; 299 goto bad; 300 } 301 ia = ifatoia(ro->ro_rt->rt_ifa); 302 ifp = ro->ro_rt->rt_ifp; 303 ro->ro_rt->rt_use++; 304 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 305 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 306 if (ro->ro_rt->rt_flags & RTF_HOST) 307 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 308 else 309 isbroadcast = in_broadcast(dst->sin_addr, ifp); 310 } 311 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 312 struct in_multi *inm; 313 314 m->m_flags |= M_MCAST; 315 /* 316 * IP destination address is multicast. Make sure "dst" 317 * still points to the address in "ro". (It may have been 318 * changed to point to a gateway address, above.) 319 */ 320 dst = (struct sockaddr_in *)&ro->ro_dst; 321 /* 322 * See if the caller provided any multicast options 323 */ 324 if (imo != NULL) { 325 ip->ip_ttl = imo->imo_multicast_ttl; 326 if (imo->imo_multicast_vif != -1) 327 ip->ip_src.s_addr = 328 ip_mcast_src(imo->imo_multicast_vif); 329 } else 330 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 331 /* 332 * Confirm that the outgoing interface supports multicast. 333 */ 334 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 335 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 336 ipstat.ips_noroute++; 337 error = ENETUNREACH; 338 goto bad; 339 } 340 } 341 /* 342 * If source address not specified yet, use address 343 * of outgoing interface. 344 */ 345 if (ip->ip_src.s_addr == INADDR_ANY) { 346 /* Interface may have no addresses. */ 347 if (ia != NULL) 348 ip->ip_src = IA_SIN(ia)->sin_addr; 349 } 350 351 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 352 /* 353 * XXX 354 * delayed checksums are not currently 355 * compatible with IP multicast routing 356 */ 357 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 358 in_delayed_cksum(m); 359 m->m_pkthdr.csum_flags &= 360 ~CSUM_DELAY_DATA; 361 } 362 } 363 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 364 if (inm != NULL && 365 (imo == NULL || imo->imo_multicast_loop)) { 366 /* 367 * If we belong to the destination multicast group 368 * on the outgoing interface, and the caller did not 369 * forbid loopback, loop back a copy. 370 */ 371 ip_mloopback(ifp, m, dst, hlen); 372 } 373 else { 374 /* 375 * If we are acting as a multicast router, perform 376 * multicast forwarding as if the packet had just 377 * arrived on the interface to which we are about 378 * to send. The multicast forwarding function 379 * recursively calls this function, using the 380 * IP_FORWARDING flag to prevent infinite recursion. 381 * 382 * Multicasts that are looped back by ip_mloopback(), 383 * above, will be forwarded by the ip_input() routine, 384 * if necessary. 385 */ 386 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 387 /* 388 * Check if rsvp daemon is running. If not, don't 389 * set ip_moptions. This ensures that the packet 390 * is multicast and not just sent down one link 391 * as prescribed by rsvpd. 392 */ 393 if (!rsvp_on) 394 imo = NULL; 395 if (ip_mforward(ip, ifp, m, imo) != 0) { 396 m_freem(m); 397 goto done; 398 } 399 } 400 } 401 402 /* 403 * Multicasts with a time-to-live of zero may be looped- 404 * back, above, but must not be transmitted on a network. 405 * Also, multicasts addressed to the loopback interface 406 * are not sent -- the above call to ip_mloopback() will 407 * loop back a copy if this host actually belongs to the 408 * destination group on the loopback interface. 409 */ 410 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 411 m_freem(m); 412 goto done; 413 } 414 415 goto sendit; 416 } 417#ifndef notdef 418 /* 419 * If the source address is not specified yet, use the address 420 * of the outoing interface. In case, keep note we did that, so 421 * if the the firewall changes the next-hop causing the output 422 * interface to change, we can fix that. 423 */ 424 if (ip->ip_src.s_addr == INADDR_ANY) { 425 /* Interface may have no addresses. */ 426 if (ia != NULL) { 427 ip->ip_src = IA_SIN(ia)->sin_addr; 428 src_was_INADDR_ANY = 1; 429 } 430 } 431#endif /* notdef */ 432 /* 433 * Verify that we have any chance at all of being able to queue 434 * the packet or packet fragments 435 */ 436 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 437 ifp->if_snd.ifq_maxlen) { 438 error = ENOBUFS; 439 ipstat.ips_odropped++; 440 goto bad; 441 } 442 443 /* 444 * Look for broadcast address and 445 * verify user is allowed to send 446 * such a packet. 447 */ 448 if (isbroadcast) { 449 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 450 error = EADDRNOTAVAIL; 451 goto bad; 452 } 453 if ((flags & IP_ALLOWBROADCAST) == 0) { 454 error = EACCES; 455 goto bad; 456 } 457 /* don't allow broadcast messages to be fragmented */ 458 if ((u_short)ip->ip_len > ifp->if_mtu) { 459 error = EMSGSIZE; 460 goto bad; 461 } 462 m->m_flags |= M_BCAST; 463 } else { 464 m->m_flags &= ~M_BCAST; 465 } 466 467sendit: 468#ifdef IPSEC 469 /* get SP for this packet */ 470 if (so == NULL) 471 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 472 else 473 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 474 475 if (sp == NULL) { 476 ipsecstat.out_inval++; 477 goto bad; 478 } 479 480 error = 0; 481 482 /* check policy */ 483 switch (sp->policy) { 484 case IPSEC_POLICY_DISCARD: 485 /* 486 * This packet is just discarded. 487 */ 488 ipsecstat.out_polvio++; 489 goto bad; 490 491 case IPSEC_POLICY_BYPASS: 492 case IPSEC_POLICY_NONE: 493 /* no need to do IPsec. */ 494 goto skip_ipsec; 495 496 case IPSEC_POLICY_IPSEC: 497 if (sp->req == NULL) { 498 /* acquire a policy */ 499 error = key_spdacquire(sp); 500 goto bad; 501 } 502 break; 503 504 case IPSEC_POLICY_ENTRUST: 505 default: 506 printf("ip_output: Invalid policy found. %d\n", sp->policy); 507 } 508 { 509 struct ipsec_output_state state; 510 bzero(&state, sizeof(state)); 511 state.m = m; 512 if (flags & IP_ROUTETOIF) { 513 state.ro = &iproute; 514 bzero(&iproute, sizeof(iproute)); 515 } else 516 state.ro = ro; 517 state.dst = (struct sockaddr *)dst; 518 519 ip->ip_sum = 0; 520 521 /* 522 * XXX 523 * delayed checksums are not currently compatible with IPsec 524 */ 525 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 526 in_delayed_cksum(m); 527 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 528 } 529 530 ip->ip_len = htons(ip->ip_len); 531 ip->ip_off = htons(ip->ip_off); 532 533 error = ipsec4_output(&state, sp, flags); 534 535 m = state.m; 536 if (flags & IP_ROUTETOIF) { 537 /* 538 * if we have tunnel mode SA, we may need to ignore 539 * IP_ROUTETOIF. 540 */ 541 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 542 flags &= ~IP_ROUTETOIF; 543 ro = state.ro; 544 } 545 } else 546 ro = state.ro; 547 dst = (struct sockaddr_in *)state.dst; 548 if (error) { 549 /* mbuf is already reclaimed in ipsec4_output. */ 550 m0 = NULL; 551 switch (error) { 552 case EHOSTUNREACH: 553 case ENETUNREACH: 554 case EMSGSIZE: 555 case ENOBUFS: 556 case ENOMEM: 557 break; 558 default: 559 printf("ip4_output (ipsec): error code %d\n", error); 560 /*fall through*/ 561 case ENOENT: 562 /* don't show these error codes to the user */ 563 error = 0; 564 break; 565 } 566 goto bad; 567 } 568 } 569 570 /* be sure to update variables that are affected by ipsec4_output() */ 571 ip = mtod(m, struct ip *); 572 hlen = ip->ip_hl << 2; 573 if (ro->ro_rt == NULL) { 574 if ((flags & IP_ROUTETOIF) == 0) { 575 printf("ip_output: " 576 "can't update route after IPsec processing\n"); 577 error = EHOSTUNREACH; /*XXX*/ 578 goto bad; 579 } 580 } else { 581 ia = ifatoia(ro->ro_rt->rt_ifa); 582 ifp = ro->ro_rt->rt_ifp; 583 } 584 585 /* make it flipped, again. */ 586 ip->ip_len = ntohs(ip->ip_len); 587 ip->ip_off = ntohs(ip->ip_off); 588skip_ipsec: 589#endif /*IPSEC*/ 590#ifdef FAST_IPSEC 591 /* 592 * Check the security policy (SP) for the packet and, if 593 * required, do IPsec-related processing. There are two 594 * cases here; the first time a packet is sent through 595 * it will be untagged and handled by ipsec4_checkpolicy. 596 * If the packet is resubmitted to ip_output (e.g. after 597 * AH, ESP, etc. processing), there will be a tag to bypass 598 * the lookup and related policy checking. 599 */ 600 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 601 s = splnet(); 602 if (mtag != NULL) { 603 tdbi = (struct tdb_ident *)(mtag + 1); 604 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 605 if (sp == NULL) 606 error = -EINVAL; /* force silent drop */ 607 m_tag_delete(m, mtag); 608 } else { 609 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 610 &error, inp); 611 } 612 /* 613 * There are four return cases: 614 * sp != NULL apply IPsec policy 615 * sp == NULL, error == 0 no IPsec handling needed 616 * sp == NULL, error == -EINVAL discard packet w/o error 617 * sp == NULL, error != 0 discard packet, report error 618 */ 619 if (sp != NULL) { 620 /* Loop detection, check if ipsec processing already done */ 621 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 622 for (mtag = m_tag_first(m); mtag != NULL; 623 mtag = m_tag_next(m, mtag)) { 624 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 625 continue; 626 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 627 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 628 continue; 629 /* 630 * Check if policy has an SA associated with it. 631 * This can happen when an SP has yet to acquire 632 * an SA; e.g. on first reference. If it occurs, 633 * then we let ipsec4_process_packet do its thing. 634 */ 635 if (sp->req->sav == NULL) 636 break; 637 tdbi = (struct tdb_ident *)(mtag + 1); 638 if (tdbi->spi == sp->req->sav->spi && 639 tdbi->proto == sp->req->sav->sah->saidx.proto && 640 bcmp(&tdbi->dst, &sp->spidx.dst, 641 sizeof (union sockaddr_union)) == 0) { 642 /* 643 * No IPsec processing is needed, free 644 * reference to SP. 645 * 646 * NB: null pointer to avoid free at 647 * done: below. 648 */ 649 KEY_FREESP(&sp), sp = NULL; 650 splx(s); 651 goto spd_done; 652 } 653 } 654 655 /* 656 * Do delayed checksums now because we send before 657 * this is done in the normal processing path. 658 */ 659 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 660 in_delayed_cksum(m); 661 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 662 } 663 664 ip->ip_len = htons(ip->ip_len); 665 ip->ip_off = htons(ip->ip_off); 666 667 /* NB: callee frees mbuf */ 668 error = ipsec4_process_packet(m, sp->req, flags, 0); 669 splx(s); 670 goto done; 671 } else { 672 splx(s); 673 674 if (error != 0) { 675 /* 676 * Hack: -EINVAL is used to signal that a packet 677 * should be silently discarded. This is typically 678 * because we asked key management for an SA and 679 * it was delayed (e.g. kicked up to IKE). 680 */ 681 if (error == -EINVAL) 682 error = 0; 683 goto bad; 684 } else { 685 /* No IPsec processing for this packet. */ 686 } 687#ifdef notyet 688 /* 689 * If deferred crypto processing is needed, check that 690 * the interface supports it. 691 */ 692 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 693 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { 694 /* notify IPsec to do its own crypto */ 695 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 696 error = EHOSTUNREACH; 697 goto bad; 698 } 699#endif 700 } 701spd_done: 702#endif /* FAST_IPSEC */ 703 704 /* 705 * IpHack's section. 706 * - Xlate: translate packet's addr/port (NAT). 707 * - Firewall: deny/allow/etc. 708 * - Wrap: fake packet's addr/port <unimpl.> 709 * - Encapsulate: put it in another IP and send out. <unimp.> 710 */ 711#ifdef PFIL_HOOKS 712 /* 713 * Run through list of hooks for output packets. 714 */ 715 m1 = m; 716 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); 717 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) 718 if (pfh->pfil_func) { 719 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1); 720 if (rv) { 721 error = EHOSTUNREACH; 722 goto done; 723 } 724 m = m1; 725 if (m == NULL) 726 goto done; 727 ip = mtod(m, struct ip *); 728 } 729#endif /* PFIL_HOOKS */ 730 731 /* 732 * Check with the firewall... 733 * but not if we are already being fwd'd from a firewall. 734 */ 735 if (fw_enable && IPFW_LOADED && !args.next_hop) { 736 struct sockaddr_in *old = dst; 737 738 args.m = m; 739 args.next_hop = dst; 740 args.oif = ifp; 741 off = ip_fw_chk_ptr(&args); 742 m = args.m; 743 dst = args.next_hop; 744 745 /* 746 * On return we must do the following: 747 * m == NULL -> drop the pkt (old interface, deprecated) 748 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 749 * 1<=off<= 0xffff -> DIVERT 750 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 751 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 752 * dst != old -> IPFIREWALL_FORWARD 753 * off==0, dst==old -> accept 754 * If some of the above modules are not compiled in, then 755 * we should't have to check the corresponding condition 756 * (because the ipfw control socket should not accept 757 * unsupported rules), but better play safe and drop 758 * packets in case of doubt. 759 */ 760 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 761 if (m) 762 m_freem(m); 763 error = EACCES; 764 goto done; 765 } 766 ip = mtod(m, struct ip *); 767 if (off == 0 && dst == old) /* common case */ 768 goto pass; 769 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 770 /* 771 * pass the pkt to dummynet. Need to include 772 * pipe number, m, ifp, ro, dst because these are 773 * not recomputed in the next pass. 774 * All other parameters have been already used and 775 * so they are not needed anymore. 776 * XXX note: if the ifp or ro entry are deleted 777 * while a pkt is in dummynet, we are in trouble! 778 */ 779 args.ro = ro; 780 args.dst = dst; 781 args.flags = flags; 782 783 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 784 &args); 785 goto done; 786 } 787#ifdef IPDIVERT 788 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 789 struct mbuf *clone = NULL; 790 791 /* Clone packet if we're doing a 'tee' */ 792 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 793 clone = m_dup(m, M_DONTWAIT); 794 795 /* 796 * XXX 797 * delayed checksums are not currently compatible 798 * with divert sockets. 799 */ 800 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 801 in_delayed_cksum(m); 802 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 803 } 804 805 /* Restore packet header fields to original values */ 806 ip->ip_len = htons(ip->ip_len); 807 ip->ip_off = htons(ip->ip_off); 808 809 /* Deliver packet to divert input routine */ 810 divert_packet(m, 0, off & 0xffff, args.divert_rule); 811 812 /* If 'tee', continue with original packet */ 813 if (clone != NULL) { 814 m = clone; 815 ip = mtod(m, struct ip *); 816 goto pass; 817 } 818 goto done; 819 } 820#endif 821 822 /* IPFIREWALL_FORWARD */ 823 /* 824 * Check dst to make sure it is directly reachable on the 825 * interface we previously thought it was. 826 * If it isn't (which may be likely in some situations) we have 827 * to re-route it (ie, find a route for the next-hop and the 828 * associated interface) and set them here. This is nested 829 * forwarding which in most cases is undesirable, except where 830 * such control is nigh impossible. So we do it here. 831 * And I'm babbling. 832 */ 833 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 834#if 0 835 /* 836 * XXX To improve readability, this block should be 837 * changed into a function call as below: 838 */ 839 error = ip_ipforward(&m, &dst, &ifp); 840 if (error) 841 goto bad; 842 if (m == NULL) /* ip_input consumed the mbuf */ 843 goto done; 844#else 845 struct in_ifaddr *ia; 846 847 /* 848 * XXX sro_fwd below is static, and a pointer 849 * to it gets passed to routines downstream. 850 * This could have surprisingly bad results in 851 * practice, because its content is overwritten 852 * by subsequent packets. 853 */ 854 /* There must be a better way to do this next line... */ 855 static struct route sro_fwd; 856 struct route *ro_fwd = &sro_fwd; 857 858#if 0 859 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 860 dst->sin_addr, "\n"); 861#endif 862 863 /* 864 * We need to figure out if we have been forwarded 865 * to a local socket. If so, then we should somehow 866 * "loop back" to ip_input, and get directed to the 867 * PCB as if we had received this packet. This is 868 * because it may be dificult to identify the packets 869 * you want to forward until they are being output 870 * and have selected an interface. (e.g. locally 871 * initiated packets) If we used the loopback inteface, 872 * we would not be able to control what happens 873 * as the packet runs through ip_input() as 874 * it is done through a ISR. 875 */ 876 LIST_FOREACH(ia, 877 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 878 /* 879 * If the addr to forward to is one 880 * of ours, we pretend to 881 * be the destination for this packet. 882 */ 883 if (IA_SIN(ia)->sin_addr.s_addr == 884 dst->sin_addr.s_addr) 885 break; 886 } 887 if (ia) { /* tell ip_input "dont filter" */ 888 struct m_hdr tag; 889 890 tag.mh_type = MT_TAG; 891 tag.mh_flags = PACKET_TAG_IPFORWARD; 892 tag.mh_data = (caddr_t)args.next_hop; 893 tag.mh_next = m; 894 895 if (m->m_pkthdr.rcvif == NULL) 896 m->m_pkthdr.rcvif = ifunit("lo0"); 897 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 898 m->m_pkthdr.csum_flags |= 899 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 900 m0->m_pkthdr.csum_data = 0xffff; 901 } 902 m->m_pkthdr.csum_flags |= 903 CSUM_IP_CHECKED | CSUM_IP_VALID; 904 ip->ip_len = htons(ip->ip_len); 905 ip->ip_off = htons(ip->ip_off); 906 ip_input((struct mbuf *)&tag); 907 goto done; 908 } 909 /* Some of the logic for this was 910 * nicked from above. 911 * 912 * This rewrites the cached route in a local PCB. 913 * Is this what we want to do? 914 */ 915 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 916 917 ro_fwd->ro_rt = 0; 918 rtalloc_ign(ro_fwd, RTF_PRCLONING); 919 920 if (ro_fwd->ro_rt == 0) { 921 ipstat.ips_noroute++; 922 error = EHOSTUNREACH; 923 goto bad; 924 } 925 926 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 927 ifp = ro_fwd->ro_rt->rt_ifp; 928 ro_fwd->ro_rt->rt_use++; 929 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 930 dst = (struct sockaddr_in *) 931 ro_fwd->ro_rt->rt_gateway; 932 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 933 isbroadcast = 934 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 935 else 936 isbroadcast = in_broadcast(dst->sin_addr, ifp); 937 if (ro->ro_rt) 938 RTFREE(ro->ro_rt); 939 ro->ro_rt = ro_fwd->ro_rt; 940 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 941 942#endif /* ... block to be put into a function */ 943 /* 944 * If we added a default src ip earlier, 945 * which would have been gotten from the-then 946 * interface, do it again, from the new one. 947 */ 948 if (src_was_INADDR_ANY) 949 ip->ip_src = IA_SIN(ia)->sin_addr; 950 goto pass ; 951 } 952 953 /* 954 * if we get here, none of the above matches, and 955 * we have to drop the pkt 956 */ 957 m_freem(m); 958 error = EACCES; /* not sure this is the right error msg */ 959 goto done; 960 } 961 962pass: 963 /* 127/8 must not appear on wire - RFC1122. */ 964 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 965 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 966 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 967 ipstat.ips_badaddr++; 968 error = EADDRNOTAVAIL; 969 goto bad; 970 } 971 } 972 973 m->m_pkthdr.csum_flags |= CSUM_IP; 974 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 975 if (sw_csum & CSUM_DELAY_DATA) { 976 in_delayed_cksum(m); 977 sw_csum &= ~CSUM_DELAY_DATA; 978 } 979 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 980 981 /* 982 * If small enough for interface, or the interface will take 983 * care of the fragmentation for us, can just send directly. 984 */ 985 if ((u_short)ip->ip_len <= ifp->if_mtu || 986 ifp->if_hwassist & CSUM_FRAGMENT) { 987 ip->ip_len = htons(ip->ip_len); 988 ip->ip_off = htons(ip->ip_off); 989 ip->ip_sum = 0; 990 if (sw_csum & CSUM_DELAY_IP) 991 ip->ip_sum = in_cksum(m, hlen); 992 993 /* Record statistics for this interface address. */ 994 if (!(flags & IP_FORWARDING) && ia) { 995 ia->ia_ifa.if_opackets++; 996 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 997 } 998 999#ifdef IPSEC 1000 /* clean ipsec history once it goes out of the node */ 1001 ipsec_delaux(m); 1002#endif 1003 1004 error = (*ifp->if_output)(ifp, m, 1005 (struct sockaddr *)dst, ro->ro_rt); 1006 goto done; 1007 } 1008 /* 1009 * Too large for interface; fragment if possible. 1010 * Must be able to put at least 8 bytes per fragment. 1011 */ 1012 if (ip->ip_off & IP_DF) { 1013 error = EMSGSIZE; 1014 /* 1015 * This case can happen if the user changed the MTU 1016 * of an interface after enabling IP on it. Because 1017 * most netifs don't keep track of routes pointing to 1018 * them, there is no way for one to update all its 1019 * routes when the MTU is changed. 1020 */ 1021 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 1022 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 1023 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1024 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1025 } 1026 ipstat.ips_cantfrag++; 1027 goto bad; 1028 } 1029 len = (ifp->if_mtu - hlen) &~ 7; 1030 if (len < 8) { 1031 error = EMSGSIZE; 1032 goto bad; 1033 } 1034 1035 /* 1036 * if the interface will not calculate checksums on 1037 * fragmented packets, then do it here. 1038 */ 1039 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1040 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 1041 in_delayed_cksum(m); 1042 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1043 } 1044 1045 if (len > PAGE_SIZE) { 1046 /* 1047 * Fragement large datagrams such that each segment 1048 * contains a multiple of PAGE_SIZE amount of data, 1049 * plus headers. This enables a receiver to perform 1050 * page-flipping zero-copy optimizations. 1051 */ 1052 1053 int newlen; 1054 struct mbuf *mtmp; 1055 1056 for (mtmp = m, off = 0; 1057 mtmp && ((off + mtmp->m_len) <= ifp->if_mtu); 1058 mtmp = mtmp->m_next) { 1059 off += mtmp->m_len; 1060 } 1061 /* 1062 * firstlen (off - hlen) must be aligned on an 1063 * 8-byte boundary 1064 */ 1065 if (off < hlen) 1066 goto smart_frag_failure; 1067 off = ((off - hlen) & ~7) + hlen; 1068 newlen = (~PAGE_MASK) & ifp->if_mtu; 1069 if ((newlen + sizeof (struct ip)) > ifp->if_mtu) { 1070 /* we failed, go back the default */ 1071smart_frag_failure: 1072 newlen = len; 1073 off = hlen + len; 1074 } 1075 1076/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n", 1077 len, hlen, sizeof (struct ip), newlen, off);*/ 1078 1079 len = newlen; 1080 1081 } else { 1082 off = hlen + len; 1083 } 1084 1085 1086 1087 { 1088 int mhlen, firstlen = off - hlen; 1089 struct mbuf **mnext = &m->m_nextpkt; 1090 int nfrags = 1; 1091 1092 /* 1093 * Loop through length of segment after first fragment, 1094 * make new header and copy data of each part and link onto chain. 1095 */ 1096 m0 = m; 1097 mhlen = sizeof (struct ip); 1098 for (; off < (u_short)ip->ip_len; off += len) { 1099 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1100 if (m == 0) { 1101 error = ENOBUFS; 1102 ipstat.ips_odropped++; 1103 goto sendorfree; 1104 } 1105 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1106 m->m_data += max_linkhdr; 1107 mhip = mtod(m, struct ip *); 1108 *mhip = *ip; 1109 if (hlen > sizeof (struct ip)) { 1110 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1111 mhip->ip_v = IPVERSION; 1112 mhip->ip_hl = mhlen >> 2; 1113 } 1114 m->m_len = mhlen; 1115 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1116 if (off + len >= (u_short)ip->ip_len) 1117 len = (u_short)ip->ip_len - off; 1118 else 1119 mhip->ip_off |= IP_MF; 1120 mhip->ip_len = htons((u_short)(len + mhlen)); 1121 m->m_next = m_copy(m0, off, len); 1122 if (m->m_next == 0) { 1123 (void) m_free(m); 1124 error = ENOBUFS; /* ??? */ 1125 ipstat.ips_odropped++; 1126 goto sendorfree; 1127 } 1128 m->m_pkthdr.len = mhlen + len; 1129 m->m_pkthdr.rcvif = (struct ifnet *)0; 1130#ifdef MAC 1131 mac_create_fragment(m0, m); 1132#endif 1133 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1134 mhip->ip_off = htons(mhip->ip_off); 1135 mhip->ip_sum = 0; 1136 if (sw_csum & CSUM_DELAY_IP) 1137 mhip->ip_sum = in_cksum(m, mhlen); 1138 *mnext = m; 1139 mnext = &m->m_nextpkt; 1140 nfrags++; 1141 } 1142 ipstat.ips_ofragments += nfrags; 1143 1144 /* set first/last markers for fragment chain */ 1145 m->m_flags |= M_LASTFRAG; 1146 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1147 m0->m_pkthdr.csum_data = nfrags; 1148 1149 /* 1150 * Update first fragment by trimming what's been copied out 1151 * and updating header, then send each fragment (in order). 1152 */ 1153 m = m0; 1154 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 1155 m->m_pkthdr.len = hlen + firstlen; 1156 ip->ip_len = htons((u_short)m->m_pkthdr.len); 1157 ip->ip_off |= IP_MF; 1158 ip->ip_off = htons(ip->ip_off); 1159 ip->ip_sum = 0; 1160 if (sw_csum & CSUM_DELAY_IP) 1161 ip->ip_sum = in_cksum(m, hlen); 1162sendorfree: 1163 for (m = m0; m; m = m0) { 1164 m0 = m->m_nextpkt; 1165 m->m_nextpkt = 0; 1166#ifdef IPSEC 1167 /* clean ipsec history once it goes out of the node */ 1168 ipsec_delaux(m); 1169#endif 1170 if (error == 0) { 1171 /* Record statistics for this interface address. */ 1172 if (ia != NULL) { 1173 ia->ia_ifa.if_opackets++; 1174 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1175 } 1176 1177 error = (*ifp->if_output)(ifp, m, 1178 (struct sockaddr *)dst, ro->ro_rt); 1179 } else 1180 m_freem(m); 1181 } 1182 1183 if (error == 0) 1184 ipstat.ips_fragmented++; 1185 } 1186done: 1187#ifdef IPSEC 1188 if (ro == &iproute && ro->ro_rt) { 1189 RTFREE(ro->ro_rt); 1190 ro->ro_rt = NULL; 1191 } 1192 if (sp != NULL) { 1193 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1194 printf("DP ip_output call free SP:%p\n", sp)); 1195 key_freesp(sp); 1196 } 1197#endif /* IPSEC */ 1198#ifdef FAST_IPSEC 1199 if (ro == &iproute && ro->ro_rt) { 1200 RTFREE(ro->ro_rt); 1201 ro->ro_rt = NULL; 1202 } 1203 if (sp != NULL) 1204 KEY_FREESP(&sp); 1205#endif /* FAST_IPSEC */ 1206 return (error); 1207bad: 1208 m_freem(m); 1209 goto done; 1210} 1211 1212void 1213in_delayed_cksum(struct mbuf *m) 1214{ 1215 struct ip *ip; 1216 u_short csum, offset; 1217 1218 ip = mtod(m, struct ip *); 1219 offset = ip->ip_hl << 2 ; 1220 csum = in_cksum_skip(m, ip->ip_len, offset); 1221 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1222 csum = 0xffff; 1223 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1224 1225 if (offset + sizeof(u_short) > m->m_len) { 1226 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1227 m->m_len, offset, ip->ip_p); 1228 /* 1229 * XXX 1230 * this shouldn't happen, but if it does, the 1231 * correct behavior may be to insert the checksum 1232 * in the existing chain instead of rearranging it. 1233 */ 1234 m = m_pullup(m, offset + sizeof(u_short)); 1235 } 1236 *(u_short *)(m->m_data + offset) = csum; 1237} 1238 1239/* 1240 * Insert IP options into preformed packet. 1241 * Adjust IP destination as required for IP source routing, 1242 * as indicated by a non-zero in_addr at the start of the options. 1243 * 1244 * XXX This routine assumes that the packet has no options in place. 1245 */ 1246static struct mbuf * 1247ip_insertoptions(m, opt, phlen) 1248 register struct mbuf *m; 1249 struct mbuf *opt; 1250 int *phlen; 1251{ 1252 register struct ipoption *p = mtod(opt, struct ipoption *); 1253 struct mbuf *n; 1254 register struct ip *ip = mtod(m, struct ip *); 1255 unsigned optlen; 1256 1257 optlen = opt->m_len - sizeof(p->ipopt_dst); 1258 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { 1259 *phlen = 0; 1260 return (m); /* XXX should fail */ 1261 } 1262 if (p->ipopt_dst.s_addr) 1263 ip->ip_dst = p->ipopt_dst; 1264 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1265 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1266 if (n == 0) { 1267 *phlen = 0; 1268 return (m); 1269 } 1270 n->m_pkthdr.rcvif = (struct ifnet *)0; 1271#ifdef MAC 1272 mac_create_mbuf_from_mbuf(m, n); 1273#endif 1274 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1275 m->m_len -= sizeof(struct ip); 1276 m->m_data += sizeof(struct ip); 1277 n->m_next = m; 1278 m = n; 1279 m->m_len = optlen + sizeof(struct ip); 1280 m->m_data += max_linkhdr; 1281 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1282 } else { 1283 m->m_data -= optlen; 1284 m->m_len += optlen; 1285 m->m_pkthdr.len += optlen; 1286 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1287 } 1288 ip = mtod(m, struct ip *); 1289 bcopy(p->ipopt_list, ip + 1, optlen); 1290 *phlen = sizeof(struct ip) + optlen; 1291 ip->ip_v = IPVERSION; 1292 ip->ip_hl = *phlen >> 2; 1293 ip->ip_len += optlen; 1294 return (m); 1295} 1296 1297/* 1298 * Copy options from ip to jp, 1299 * omitting those not copied during fragmentation. 1300 */ 1301int 1302ip_optcopy(ip, jp) 1303 struct ip *ip, *jp; 1304{ 1305 register u_char *cp, *dp; 1306 int opt, optlen, cnt; 1307 1308 cp = (u_char *)(ip + 1); 1309 dp = (u_char *)(jp + 1); 1310 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1311 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1312 opt = cp[0]; 1313 if (opt == IPOPT_EOL) 1314 break; 1315 if (opt == IPOPT_NOP) { 1316 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1317 *dp++ = IPOPT_NOP; 1318 optlen = 1; 1319 continue; 1320 } 1321 1322 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1323 ("ip_optcopy: malformed ipv4 option")); 1324 optlen = cp[IPOPT_OLEN]; 1325 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1326 ("ip_optcopy: malformed ipv4 option")); 1327 1328 /* bogus lengths should have been caught by ip_dooptions */ 1329 if (optlen > cnt) 1330 optlen = cnt; 1331 if (IPOPT_COPIED(opt)) { 1332 bcopy(cp, dp, optlen); 1333 dp += optlen; 1334 } 1335 } 1336 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1337 *dp++ = IPOPT_EOL; 1338 return (optlen); 1339} 1340 1341/* 1342 * IP socket option processing. 1343 */ 1344int 1345ip_ctloutput(so, sopt) 1346 struct socket *so; 1347 struct sockopt *sopt; 1348{ 1349 struct inpcb *inp = sotoinpcb(so); 1350 int error, optval; 1351 1352 error = optval = 0; 1353 if (sopt->sopt_level != IPPROTO_IP) { 1354 return (EINVAL); 1355 } 1356 1357 switch (sopt->sopt_dir) { 1358 case SOPT_SET: 1359 switch (sopt->sopt_name) { 1360 case IP_OPTIONS: 1361#ifdef notyet 1362 case IP_RETOPTS: 1363#endif 1364 { 1365 struct mbuf *m; 1366 if (sopt->sopt_valsize > MLEN) { 1367 error = EMSGSIZE; 1368 break; 1369 } 1370 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1371 if (m == 0) { 1372 error = ENOBUFS; 1373 break; 1374 } 1375 m->m_len = sopt->sopt_valsize; 1376 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1377 m->m_len); 1378 1379 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1380 m)); 1381 } 1382 1383 case IP_TOS: 1384 case IP_TTL: 1385 case IP_RECVOPTS: 1386 case IP_RECVRETOPTS: 1387 case IP_RECVDSTADDR: 1388 case IP_RECVIF: 1389 case IP_FAITH: 1390 error = sooptcopyin(sopt, &optval, sizeof optval, 1391 sizeof optval); 1392 if (error) 1393 break; 1394 1395 switch (sopt->sopt_name) { 1396 case IP_TOS: 1397 inp->inp_ip_tos = optval; 1398 break; 1399 1400 case IP_TTL: 1401 inp->inp_ip_ttl = optval; 1402 break; 1403#define OPTSET(bit) \ 1404 if (optval) \ 1405 inp->inp_flags |= bit; \ 1406 else \ 1407 inp->inp_flags &= ~bit; 1408 1409 case IP_RECVOPTS: 1410 OPTSET(INP_RECVOPTS); 1411 break; 1412 1413 case IP_RECVRETOPTS: 1414 OPTSET(INP_RECVRETOPTS); 1415 break; 1416 1417 case IP_RECVDSTADDR: 1418 OPTSET(INP_RECVDSTADDR); 1419 break; 1420 1421 case IP_RECVIF: 1422 OPTSET(INP_RECVIF); 1423 break; 1424 1425 case IP_FAITH: 1426 OPTSET(INP_FAITH); 1427 break; 1428 } 1429 break; 1430#undef OPTSET 1431 1432 case IP_MULTICAST_IF: 1433 case IP_MULTICAST_VIF: 1434 case IP_MULTICAST_TTL: 1435 case IP_MULTICAST_LOOP: 1436 case IP_ADD_MEMBERSHIP: 1437 case IP_DROP_MEMBERSHIP: 1438 error = ip_setmoptions(sopt, &inp->inp_moptions); 1439 break; 1440 1441 case IP_PORTRANGE: 1442 error = sooptcopyin(sopt, &optval, sizeof optval, 1443 sizeof optval); 1444 if (error) 1445 break; 1446 1447 switch (optval) { 1448 case IP_PORTRANGE_DEFAULT: 1449 inp->inp_flags &= ~(INP_LOWPORT); 1450 inp->inp_flags &= ~(INP_HIGHPORT); 1451 break; 1452 1453 case IP_PORTRANGE_HIGH: 1454 inp->inp_flags &= ~(INP_LOWPORT); 1455 inp->inp_flags |= INP_HIGHPORT; 1456 break; 1457 1458 case IP_PORTRANGE_LOW: 1459 inp->inp_flags &= ~(INP_HIGHPORT); 1460 inp->inp_flags |= INP_LOWPORT; 1461 break; 1462 1463 default: 1464 error = EINVAL; 1465 break; 1466 } 1467 break; 1468 1469#if defined(IPSEC) || defined(FAST_IPSEC) 1470 case IP_IPSEC_POLICY: 1471 { 1472 caddr_t req; 1473 size_t len = 0; 1474 int priv; 1475 struct mbuf *m; 1476 int optname; 1477 1478 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1479 break; 1480 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1481 break; 1482 priv = (sopt->sopt_td != NULL && 1483 suser(sopt->sopt_td) != 0) ? 0 : 1; 1484 req = mtod(m, caddr_t); 1485 len = m->m_len; 1486 optname = sopt->sopt_name; 1487 error = ipsec4_set_policy(inp, optname, req, len, priv); 1488 m_freem(m); 1489 break; 1490 } 1491#endif /*IPSEC*/ 1492 1493 default: 1494 error = ENOPROTOOPT; 1495 break; 1496 } 1497 break; 1498 1499 case SOPT_GET: 1500 switch (sopt->sopt_name) { 1501 case IP_OPTIONS: 1502 case IP_RETOPTS: 1503 if (inp->inp_options) 1504 error = sooptcopyout(sopt, 1505 mtod(inp->inp_options, 1506 char *), 1507 inp->inp_options->m_len); 1508 else 1509 sopt->sopt_valsize = 0; 1510 break; 1511 1512 case IP_TOS: 1513 case IP_TTL: 1514 case IP_RECVOPTS: 1515 case IP_RECVRETOPTS: 1516 case IP_RECVDSTADDR: 1517 case IP_RECVIF: 1518 case IP_PORTRANGE: 1519 case IP_FAITH: 1520 switch (sopt->sopt_name) { 1521 1522 case IP_TOS: 1523 optval = inp->inp_ip_tos; 1524 break; 1525 1526 case IP_TTL: 1527 optval = inp->inp_ip_ttl; 1528 break; 1529 1530#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1531 1532 case IP_RECVOPTS: 1533 optval = OPTBIT(INP_RECVOPTS); 1534 break; 1535 1536 case IP_RECVRETOPTS: 1537 optval = OPTBIT(INP_RECVRETOPTS); 1538 break; 1539 1540 case IP_RECVDSTADDR: 1541 optval = OPTBIT(INP_RECVDSTADDR); 1542 break; 1543 1544 case IP_RECVIF: 1545 optval = OPTBIT(INP_RECVIF); 1546 break; 1547 1548 case IP_PORTRANGE: 1549 if (inp->inp_flags & INP_HIGHPORT) 1550 optval = IP_PORTRANGE_HIGH; 1551 else if (inp->inp_flags & INP_LOWPORT) 1552 optval = IP_PORTRANGE_LOW; 1553 else 1554 optval = 0; 1555 break; 1556 1557 case IP_FAITH: 1558 optval = OPTBIT(INP_FAITH); 1559 break; 1560 } 1561 error = sooptcopyout(sopt, &optval, sizeof optval); 1562 break; 1563 1564 case IP_MULTICAST_IF: 1565 case IP_MULTICAST_VIF: 1566 case IP_MULTICAST_TTL: 1567 case IP_MULTICAST_LOOP: 1568 case IP_ADD_MEMBERSHIP: 1569 case IP_DROP_MEMBERSHIP: 1570 error = ip_getmoptions(sopt, inp->inp_moptions); 1571 break; 1572 1573#if defined(IPSEC) || defined(FAST_IPSEC) 1574 case IP_IPSEC_POLICY: 1575 { 1576 struct mbuf *m = NULL; 1577 caddr_t req = NULL; 1578 size_t len = 0; 1579 1580 if (m != 0) { 1581 req = mtod(m, caddr_t); 1582 len = m->m_len; 1583 } 1584 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1585 if (error == 0) 1586 error = soopt_mcopyout(sopt, m); /* XXX */ 1587 if (error == 0) 1588 m_freem(m); 1589 break; 1590 } 1591#endif /*IPSEC*/ 1592 1593 default: 1594 error = ENOPROTOOPT; 1595 break; 1596 } 1597 break; 1598 } 1599 return (error); 1600} 1601 1602/* 1603 * Set up IP options in pcb for insertion in output packets. 1604 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1605 * with destination address if source routed. 1606 */ 1607static int 1608ip_pcbopts(optname, pcbopt, m) 1609 int optname; 1610 struct mbuf **pcbopt; 1611 register struct mbuf *m; 1612{ 1613 register int cnt, optlen; 1614 register u_char *cp; 1615 u_char opt; 1616 1617 /* turn off any old options */ 1618 if (*pcbopt) 1619 (void)m_free(*pcbopt); 1620 *pcbopt = 0; 1621 if (m == (struct mbuf *)0 || m->m_len == 0) { 1622 /* 1623 * Only turning off any previous options. 1624 */ 1625 if (m) 1626 (void)m_free(m); 1627 return (0); 1628 } 1629 1630 if (m->m_len % sizeof(int32_t)) 1631 goto bad; 1632 /* 1633 * IP first-hop destination address will be stored before 1634 * actual options; move other options back 1635 * and clear it when none present. 1636 */ 1637 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1638 goto bad; 1639 cnt = m->m_len; 1640 m->m_len += sizeof(struct in_addr); 1641 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1642 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1643 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1644 1645 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1646 opt = cp[IPOPT_OPTVAL]; 1647 if (opt == IPOPT_EOL) 1648 break; 1649 if (opt == IPOPT_NOP) 1650 optlen = 1; 1651 else { 1652 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1653 goto bad; 1654 optlen = cp[IPOPT_OLEN]; 1655 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1656 goto bad; 1657 } 1658 switch (opt) { 1659 1660 default: 1661 break; 1662 1663 case IPOPT_LSRR: 1664 case IPOPT_SSRR: 1665 /* 1666 * user process specifies route as: 1667 * ->A->B->C->D 1668 * D must be our final destination (but we can't 1669 * check that since we may not have connected yet). 1670 * A is first hop destination, which doesn't appear in 1671 * actual IP option, but is stored before the options. 1672 */ 1673 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1674 goto bad; 1675 m->m_len -= sizeof(struct in_addr); 1676 cnt -= sizeof(struct in_addr); 1677 optlen -= sizeof(struct in_addr); 1678 cp[IPOPT_OLEN] = optlen; 1679 /* 1680 * Move first hop before start of options. 1681 */ 1682 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1683 sizeof(struct in_addr)); 1684 /* 1685 * Then copy rest of options back 1686 * to close up the deleted entry. 1687 */ 1688 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1689 sizeof(struct in_addr)), 1690 (caddr_t)&cp[IPOPT_OFFSET+1], 1691 (unsigned)cnt + sizeof(struct in_addr)); 1692 break; 1693 } 1694 } 1695 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1696 goto bad; 1697 *pcbopt = m; 1698 return (0); 1699 1700bad: 1701 (void)m_free(m); 1702 return (EINVAL); 1703} 1704 1705/* 1706 * XXX 1707 * The whole multicast option thing needs to be re-thought. 1708 * Several of these options are equally applicable to non-multicast 1709 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1710 * standard option (IP_TTL). 1711 */ 1712 1713/* 1714 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1715 */ 1716static struct ifnet * 1717ip_multicast_if(a, ifindexp) 1718 struct in_addr *a; 1719 int *ifindexp; 1720{ 1721 int ifindex; 1722 struct ifnet *ifp; 1723 1724 if (ifindexp) 1725 *ifindexp = 0; 1726 if (ntohl(a->s_addr) >> 24 == 0) { 1727 ifindex = ntohl(a->s_addr) & 0xffffff; 1728 if (ifindex < 0 || if_index < ifindex) 1729 return NULL; 1730 ifp = ifnet_byindex(ifindex); 1731 if (ifindexp) 1732 *ifindexp = ifindex; 1733 } else { 1734 INADDR_TO_IFP(*a, ifp); 1735 } 1736 return ifp; 1737} 1738 1739/* 1740 * Set the IP multicast options in response to user setsockopt(). 1741 */ 1742static int 1743ip_setmoptions(sopt, imop) 1744 struct sockopt *sopt; 1745 struct ip_moptions **imop; 1746{ 1747 int error = 0; 1748 int i; 1749 struct in_addr addr; 1750 struct ip_mreq mreq; 1751 struct ifnet *ifp; 1752 struct ip_moptions *imo = *imop; 1753 struct route ro; 1754 struct sockaddr_in *dst; 1755 int ifindex; 1756 int s; 1757 1758 if (imo == NULL) { 1759 /* 1760 * No multicast option buffer attached to the pcb; 1761 * allocate one and initialize to default values. 1762 */ 1763 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1764 M_WAITOK); 1765 1766 if (imo == NULL) 1767 return (ENOBUFS); 1768 *imop = imo; 1769 imo->imo_multicast_ifp = NULL; 1770 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1771 imo->imo_multicast_vif = -1; 1772 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1773 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1774 imo->imo_num_memberships = 0; 1775 } 1776 1777 switch (sopt->sopt_name) { 1778 /* store an index number for the vif you wanna use in the send */ 1779 case IP_MULTICAST_VIF: 1780 if (legal_vif_num == 0) { 1781 error = EOPNOTSUPP; 1782 break; 1783 } 1784 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1785 if (error) 1786 break; 1787 if (!legal_vif_num(i) && (i != -1)) { 1788 error = EINVAL; 1789 break; 1790 } 1791 imo->imo_multicast_vif = i; 1792 break; 1793 1794 case IP_MULTICAST_IF: 1795 /* 1796 * Select the interface for outgoing multicast packets. 1797 */ 1798 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1799 if (error) 1800 break; 1801 /* 1802 * INADDR_ANY is used to remove a previous selection. 1803 * When no interface is selected, a default one is 1804 * chosen every time a multicast packet is sent. 1805 */ 1806 if (addr.s_addr == INADDR_ANY) { 1807 imo->imo_multicast_ifp = NULL; 1808 break; 1809 } 1810 /* 1811 * The selected interface is identified by its local 1812 * IP address. Find the interface and confirm that 1813 * it supports multicasting. 1814 */ 1815 s = splimp(); 1816 ifp = ip_multicast_if(&addr, &ifindex); 1817 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1818 splx(s); 1819 error = EADDRNOTAVAIL; 1820 break; 1821 } 1822 imo->imo_multicast_ifp = ifp; 1823 if (ifindex) 1824 imo->imo_multicast_addr = addr; 1825 else 1826 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1827 splx(s); 1828 break; 1829 1830 case IP_MULTICAST_TTL: 1831 /* 1832 * Set the IP time-to-live for outgoing multicast packets. 1833 * The original multicast API required a char argument, 1834 * which is inconsistent with the rest of the socket API. 1835 * We allow either a char or an int. 1836 */ 1837 if (sopt->sopt_valsize == 1) { 1838 u_char ttl; 1839 error = sooptcopyin(sopt, &ttl, 1, 1); 1840 if (error) 1841 break; 1842 imo->imo_multicast_ttl = ttl; 1843 } else { 1844 u_int ttl; 1845 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1846 sizeof ttl); 1847 if (error) 1848 break; 1849 if (ttl > 255) 1850 error = EINVAL; 1851 else 1852 imo->imo_multicast_ttl = ttl; 1853 } 1854 break; 1855 1856 case IP_MULTICAST_LOOP: 1857 /* 1858 * Set the loopback flag for outgoing multicast packets. 1859 * Must be zero or one. The original multicast API required a 1860 * char argument, which is inconsistent with the rest 1861 * of the socket API. We allow either a char or an int. 1862 */ 1863 if (sopt->sopt_valsize == 1) { 1864 u_char loop; 1865 error = sooptcopyin(sopt, &loop, 1, 1); 1866 if (error) 1867 break; 1868 imo->imo_multicast_loop = !!loop; 1869 } else { 1870 u_int loop; 1871 error = sooptcopyin(sopt, &loop, sizeof loop, 1872 sizeof loop); 1873 if (error) 1874 break; 1875 imo->imo_multicast_loop = !!loop; 1876 } 1877 break; 1878 1879 case IP_ADD_MEMBERSHIP: 1880 /* 1881 * Add a multicast group membership. 1882 * Group must be a valid IP multicast address. 1883 */ 1884 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1885 if (error) 1886 break; 1887 1888 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1889 error = EINVAL; 1890 break; 1891 } 1892 s = splimp(); 1893 /* 1894 * If no interface address was provided, use the interface of 1895 * the route to the given multicast address. 1896 */ 1897 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1898 bzero((caddr_t)&ro, sizeof(ro)); 1899 dst = (struct sockaddr_in *)&ro.ro_dst; 1900 dst->sin_len = sizeof(*dst); 1901 dst->sin_family = AF_INET; 1902 dst->sin_addr = mreq.imr_multiaddr; 1903 rtalloc(&ro); 1904 if (ro.ro_rt == NULL) { 1905 error = EADDRNOTAVAIL; 1906 splx(s); 1907 break; 1908 } 1909 ifp = ro.ro_rt->rt_ifp; 1910 rtfree(ro.ro_rt); 1911 } 1912 else { 1913 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1914 } 1915 1916 /* 1917 * See if we found an interface, and confirm that it 1918 * supports multicast. 1919 */ 1920 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1921 error = EADDRNOTAVAIL; 1922 splx(s); 1923 break; 1924 } 1925 /* 1926 * See if the membership already exists or if all the 1927 * membership slots are full. 1928 */ 1929 for (i = 0; i < imo->imo_num_memberships; ++i) { 1930 if (imo->imo_membership[i]->inm_ifp == ifp && 1931 imo->imo_membership[i]->inm_addr.s_addr 1932 == mreq.imr_multiaddr.s_addr) 1933 break; 1934 } 1935 if (i < imo->imo_num_memberships) { 1936 error = EADDRINUSE; 1937 splx(s); 1938 break; 1939 } 1940 if (i == IP_MAX_MEMBERSHIPS) { 1941 error = ETOOMANYREFS; 1942 splx(s); 1943 break; 1944 } 1945 /* 1946 * Everything looks good; add a new record to the multicast 1947 * address list for the given interface. 1948 */ 1949 if ((imo->imo_membership[i] = 1950 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1951 error = ENOBUFS; 1952 splx(s); 1953 break; 1954 } 1955 ++imo->imo_num_memberships; 1956 splx(s); 1957 break; 1958 1959 case IP_DROP_MEMBERSHIP: 1960 /* 1961 * Drop a multicast group membership. 1962 * Group must be a valid IP multicast address. 1963 */ 1964 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1965 if (error) 1966 break; 1967 1968 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1969 error = EINVAL; 1970 break; 1971 } 1972 1973 s = splimp(); 1974 /* 1975 * If an interface address was specified, get a pointer 1976 * to its ifnet structure. 1977 */ 1978 if (mreq.imr_interface.s_addr == INADDR_ANY) 1979 ifp = NULL; 1980 else { 1981 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1982 if (ifp == NULL) { 1983 error = EADDRNOTAVAIL; 1984 splx(s); 1985 break; 1986 } 1987 } 1988 /* 1989 * Find the membership in the membership array. 1990 */ 1991 for (i = 0; i < imo->imo_num_memberships; ++i) { 1992 if ((ifp == NULL || 1993 imo->imo_membership[i]->inm_ifp == ifp) && 1994 imo->imo_membership[i]->inm_addr.s_addr == 1995 mreq.imr_multiaddr.s_addr) 1996 break; 1997 } 1998 if (i == imo->imo_num_memberships) { 1999 error = EADDRNOTAVAIL; 2000 splx(s); 2001 break; 2002 } 2003 /* 2004 * Give up the multicast address record to which the 2005 * membership points. 2006 */ 2007 in_delmulti(imo->imo_membership[i]); 2008 /* 2009 * Remove the gap in the membership array. 2010 */ 2011 for (++i; i < imo->imo_num_memberships; ++i) 2012 imo->imo_membership[i-1] = imo->imo_membership[i]; 2013 --imo->imo_num_memberships; 2014 splx(s); 2015 break; 2016 2017 default: 2018 error = EOPNOTSUPP; 2019 break; 2020 } 2021 2022 /* 2023 * If all options have default values, no need to keep the mbuf. 2024 */ 2025 if (imo->imo_multicast_ifp == NULL && 2026 imo->imo_multicast_vif == -1 && 2027 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2028 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2029 imo->imo_num_memberships == 0) { 2030 free(*imop, M_IPMOPTS); 2031 *imop = NULL; 2032 } 2033 2034 return (error); 2035} 2036 2037/* 2038 * Return the IP multicast options in response to user getsockopt(). 2039 */ 2040static int 2041ip_getmoptions(sopt, imo) 2042 struct sockopt *sopt; 2043 register struct ip_moptions *imo; 2044{ 2045 struct in_addr addr; 2046 struct in_ifaddr *ia; 2047 int error, optval; 2048 u_char coptval; 2049 2050 error = 0; 2051 switch (sopt->sopt_name) { 2052 case IP_MULTICAST_VIF: 2053 if (imo != NULL) 2054 optval = imo->imo_multicast_vif; 2055 else 2056 optval = -1; 2057 error = sooptcopyout(sopt, &optval, sizeof optval); 2058 break; 2059 2060 case IP_MULTICAST_IF: 2061 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2062 addr.s_addr = INADDR_ANY; 2063 else if (imo->imo_multicast_addr.s_addr) { 2064 /* return the value user has set */ 2065 addr = imo->imo_multicast_addr; 2066 } else { 2067 IFP_TO_IA(imo->imo_multicast_ifp, ia); 2068 addr.s_addr = (ia == NULL) ? INADDR_ANY 2069 : IA_SIN(ia)->sin_addr.s_addr; 2070 } 2071 error = sooptcopyout(sopt, &addr, sizeof addr); 2072 break; 2073 2074 case IP_MULTICAST_TTL: 2075 if (imo == 0) 2076 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2077 else 2078 optval = coptval = imo->imo_multicast_ttl; 2079 if (sopt->sopt_valsize == 1) 2080 error = sooptcopyout(sopt, &coptval, 1); 2081 else 2082 error = sooptcopyout(sopt, &optval, sizeof optval); 2083 break; 2084 2085 case IP_MULTICAST_LOOP: 2086 if (imo == 0) 2087 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2088 else 2089 optval = coptval = imo->imo_multicast_loop; 2090 if (sopt->sopt_valsize == 1) 2091 error = sooptcopyout(sopt, &coptval, 1); 2092 else 2093 error = sooptcopyout(sopt, &optval, sizeof optval); 2094 break; 2095 2096 default: 2097 error = ENOPROTOOPT; 2098 break; 2099 } 2100 return (error); 2101} 2102 2103/* 2104 * Discard the IP multicast options. 2105 */ 2106void 2107ip_freemoptions(imo) 2108 register struct ip_moptions *imo; 2109{ 2110 register int i; 2111 2112 if (imo != NULL) { 2113 for (i = 0; i < imo->imo_num_memberships; ++i) 2114 in_delmulti(imo->imo_membership[i]); 2115 free(imo, M_IPMOPTS); 2116 } 2117} 2118 2119/* 2120 * Routine called from ip_output() to loop back a copy of an IP multicast 2121 * packet to the input queue of a specified interface. Note that this 2122 * calls the output routine of the loopback "driver", but with an interface 2123 * pointer that might NOT be a loopback interface -- evil, but easier than 2124 * replicating that code here. 2125 */ 2126static void 2127ip_mloopback(ifp, m, dst, hlen) 2128 struct ifnet *ifp; 2129 register struct mbuf *m; 2130 register struct sockaddr_in *dst; 2131 int hlen; 2132{ 2133 register struct ip *ip; 2134 struct mbuf *copym; 2135 2136 copym = m_copy(m, 0, M_COPYALL); 2137 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2138 copym = m_pullup(copym, hlen); 2139 if (copym != NULL) { 2140 /* 2141 * We don't bother to fragment if the IP length is greater 2142 * than the interface's MTU. Can this possibly matter? 2143 */ 2144 ip = mtod(copym, struct ip *); 2145 ip->ip_len = htons(ip->ip_len); 2146 ip->ip_off = htons(ip->ip_off); 2147 ip->ip_sum = 0; 2148 ip->ip_sum = in_cksum(copym, hlen); 2149 /* 2150 * NB: 2151 * It's not clear whether there are any lingering 2152 * reentrancy problems in other areas which might 2153 * be exposed by using ip_input directly (in 2154 * particular, everything which modifies the packet 2155 * in-place). Yet another option is using the 2156 * protosw directly to deliver the looped back 2157 * packet. For the moment, we'll err on the side 2158 * of safety by using if_simloop(). 2159 */ 2160#if 1 /* XXX */ 2161 if (dst->sin_family != AF_INET) { 2162 printf("ip_mloopback: bad address family %d\n", 2163 dst->sin_family); 2164 dst->sin_family = AF_INET; 2165 } 2166#endif 2167 2168#ifdef notdef 2169 copym->m_pkthdr.rcvif = ifp; 2170 ip_input(copym); 2171#else 2172 /* if the checksum hasn't been computed, mark it as valid */ 2173 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2174 copym->m_pkthdr.csum_flags |= 2175 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2176 copym->m_pkthdr.csum_data = 0xffff; 2177 } 2178 if_simloop(ifp, copym, dst->sin_family, 0); 2179#endif 2180 } 2181} 2182