ip_output.c revision 79934
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: head/sys/netinet/ip_output.c 79934 2001-07-19 07:10:30Z ru $ 35 */ 36 37#define _IP_VHL 38 39#include "opt_ipfw.h" 40#include "opt_ipdn.h" 41#include "opt_ipdivert.h" 42#include "opt_ipfilter.h" 43#include "opt_ipsec.h" 44#include "opt_pfil_hooks.h" 45#include "opt_random_ip_id.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/protosw.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55 56#include <net/if.h> 57#include <net/route.h> 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/ip.h> 62#include <netinet/in_pcb.h> 63#include <netinet/in_var.h> 64#include <netinet/ip_var.h> 65 66#include "faith.h" 67 68#include <machine/in_cksum.h> 69 70static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 71 72#ifdef IPSEC 73#include <netinet6/ipsec.h> 74#include <netkey/key.h> 75#ifdef IPSEC_DEBUG 76#include <netkey/key_debug.h> 77#else 78#define KEYDEBUG(lev,arg) 79#endif 80#endif /*IPSEC*/ 81 82#include <netinet/ip_fw.h> 83 84#ifdef DUMMYNET 85#include <netinet/ip_dummynet.h> 86#endif 87 88#ifdef IPFIREWALL_FORWARD_DEBUG 89#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\ 90 (ntohl(a.s_addr)>>16)&0xFF,\ 91 (ntohl(a.s_addr)>>8)&0xFF,\ 92 (ntohl(a.s_addr))&0xFF); 93#endif 94 95u_short ip_id; 96 97static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 98static struct ifnet *ip_multicast_if __P((struct in_addr *, int *)); 99static void ip_mloopback 100 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int)); 101static int ip_getmoptions 102 __P((struct sockopt *, struct ip_moptions *)); 103static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *)); 104static int ip_setmoptions 105 __P((struct sockopt *, struct ip_moptions **)); 106 107int ip_optcopy __P((struct ip *, struct ip *)); 108 109 110extern struct protosw inetsw[]; 111 112/* 113 * IP output. The packet in mbuf chain m contains a skeletal IP 114 * header (with len, off, ttl, proto, tos, src, dst). 115 * The mbuf chain containing the packet will be freed. 116 * The mbuf opt, if present, will not be freed. 117 */ 118int 119ip_output(m0, opt, ro, flags, imo) 120 struct mbuf *m0; 121 struct mbuf *opt; 122 struct route *ro; 123 int flags; 124 struct ip_moptions *imo; 125{ 126 struct ip *ip, *mhip; 127 struct ifnet *ifp; 128 struct mbuf *m = m0; 129 int hlen = sizeof (struct ip); 130 int len, off, error = 0; 131 struct sockaddr_in *dst; 132 struct in_ifaddr *ia; 133 int isbroadcast, sw_csum; 134#ifdef IPSEC 135 struct route iproute; 136 struct socket *so = NULL; 137 struct secpolicy *sp = NULL; 138#endif 139 u_int16_t divert_cookie; /* firewall cookie */ 140#ifdef PFIL_HOOKS 141 struct packet_filter_hook *pfh; 142 struct mbuf *m1; 143 int rv; 144#endif /* PFIL_HOOKS */ 145#ifdef IPFIREWALL_FORWARD 146 int fwd_rewrite_src = 0; 147#endif 148 struct ip_fw_chain *rule = NULL; 149 150#ifdef IPDIVERT 151 /* Get and reset firewall cookie */ 152 divert_cookie = ip_divert_cookie; 153 ip_divert_cookie = 0; 154#else 155 divert_cookie = 0; 156#endif 157 158#if defined(IPFIREWALL) && defined(DUMMYNET) 159 /* 160 * dummynet packet are prepended a vestigial mbuf with 161 * m_type = MT_DUMMYNET and m_data pointing to the matching 162 * rule. 163 */ 164 if (m->m_type == MT_DUMMYNET) { 165 /* 166 * the packet was already tagged, so part of the 167 * processing was already done, and we need to go down. 168 * Get parameters from the header. 169 */ 170 rule = (struct ip_fw_chain *)(m->m_data) ; 171 opt = NULL ; 172 ro = & ( ((struct dn_pkt *)m)->ro ) ; 173 imo = NULL ; 174 dst = ((struct dn_pkt *)m)->dn_dst ; 175 ifp = ((struct dn_pkt *)m)->ifp ; 176 flags = ((struct dn_pkt *)m)->flags ; 177 178 m0 = m = m->m_next ; 179#ifdef IPSEC 180 so = ipsec_getsocket(m); 181 (void)ipsec_setsocket(m, NULL); 182#endif 183 ip = mtod(m, struct ip *); 184 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 185 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; 186 goto sendit; 187 } else 188 rule = NULL ; 189#endif 190#ifdef IPSEC 191 so = ipsec_getsocket(m); 192 (void)ipsec_setsocket(m, NULL); 193#endif 194 195#ifdef DIAGNOSTIC 196 if ((m->m_flags & M_PKTHDR) == 0) 197 panic("ip_output no HDR"); 198 if (!ro) 199 panic("ip_output no route, proto = %d", 200 mtod(m, struct ip *)->ip_p); 201#endif 202 if (opt) { 203 m = ip_insertoptions(m, opt, &len); 204 hlen = len; 205 } 206 ip = mtod(m, struct ip *); 207 /* 208 * Fill in IP header. 209 */ 210 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 211 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 212 ip->ip_off &= IP_DF; 213#ifdef RANDOM_IP_ID 214 ip->ip_id = ip_randomid(); 215#else 216 ip->ip_id = htons(ip_id++); 217#endif 218 ipstat.ips_localout++; 219 } else { 220 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 221 } 222 223 dst = (struct sockaddr_in *)&ro->ro_dst; 224 /* 225 * If there is a cached route, 226 * check that it is to the same destination 227 * and is still up. If not, free it and try again. 228 */ 229 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 230 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 231 RTFREE(ro->ro_rt); 232 ro->ro_rt = (struct rtentry *)0; 233 } 234 if (ro->ro_rt == 0) { 235 dst->sin_family = AF_INET; 236 dst->sin_len = sizeof(*dst); 237 dst->sin_addr = ip->ip_dst; 238 } 239 /* 240 * If routing to interface only, 241 * short circuit routing lookup. 242 */ 243#define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) 244#define sintosa(sin) ((struct sockaddr *)(sin)) 245 if (flags & IP_ROUTETOIF) { 246 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 247 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 248 ipstat.ips_noroute++; 249 error = ENETUNREACH; 250 goto bad; 251 } 252 ifp = ia->ia_ifp; 253 ip->ip_ttl = 1; 254 isbroadcast = in_broadcast(dst->sin_addr, ifp); 255 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 256 (imo != NULL) && 257 (imo->imo_multicast_ifp != NULL)) { 258 /* 259 * bypass the normal routing lookup for 260 * multicast packets if the interface is 261 * specified 262 */ 263 /* No Operation */ 264 } else { 265 /* 266 * If this is the case, we probably don't want to allocate 267 * a protocol-cloned route since we didn't get one from the 268 * ULP. This lets TCP do its thing, while not burdening 269 * forwarding or ICMP with the overhead of cloning a route. 270 * Of course, we still want to do any cloning requested by 271 * the link layer, as this is probably required in all cases 272 * for correct operation (as it is for ARP). 273 */ 274 if (ro->ro_rt == 0) 275 rtalloc_ign(ro, RTF_PRCLONING); 276 if (ro->ro_rt == 0) { 277 ipstat.ips_noroute++; 278 error = EHOSTUNREACH; 279 goto bad; 280 } 281 ia = ifatoia(ro->ro_rt->rt_ifa); 282 ifp = ro->ro_rt->rt_ifp; 283 ro->ro_rt->rt_use++; 284 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 285 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 286 if (ro->ro_rt->rt_flags & RTF_HOST) 287 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 288 else 289 isbroadcast = in_broadcast(dst->sin_addr, ifp); 290 } 291 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 292 struct in_multi *inm; 293 294 m->m_flags |= M_MCAST; 295 /* 296 * IP destination address is multicast. Make sure "dst" 297 * still points to the address in "ro". (It may have been 298 * changed to point to a gateway address, above.) 299 */ 300 dst = (struct sockaddr_in *)&ro->ro_dst; 301 /* 302 * See if the caller provided any multicast options 303 */ 304 if (imo != NULL) { 305 ip->ip_ttl = imo->imo_multicast_ttl; 306 if (imo->imo_multicast_ifp != NULL) 307 ifp = imo->imo_multicast_ifp; 308 if (imo->imo_multicast_vif != -1) 309 ip->ip_src.s_addr = 310 ip_mcast_src(imo->imo_multicast_vif); 311 } else 312 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 313 /* 314 * Confirm that the outgoing interface supports multicast. 315 */ 316 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 317 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 318 ipstat.ips_noroute++; 319 error = ENETUNREACH; 320 goto bad; 321 } 322 } 323 /* 324 * If source address not specified yet, use address 325 * of outgoing interface. 326 */ 327 if (ip->ip_src.s_addr == INADDR_ANY) { 328 register struct in_ifaddr *ia1; 329 330 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) 331 if (ia1->ia_ifp == ifp) { 332 ip->ip_src = IA_SIN(ia1)->sin_addr; 333 break; 334 } 335 } 336 337 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 338 if (inm != NULL && 339 (imo == NULL || imo->imo_multicast_loop)) { 340 /* 341 * If we belong to the destination multicast group 342 * on the outgoing interface, and the caller did not 343 * forbid loopback, loop back a copy. 344 */ 345 ip_mloopback(ifp, m, dst, hlen); 346 } 347 else { 348 /* 349 * If we are acting as a multicast router, perform 350 * multicast forwarding as if the packet had just 351 * arrived on the interface to which we are about 352 * to send. The multicast forwarding function 353 * recursively calls this function, using the 354 * IP_FORWARDING flag to prevent infinite recursion. 355 * 356 * Multicasts that are looped back by ip_mloopback(), 357 * above, will be forwarded by the ip_input() routine, 358 * if necessary. 359 */ 360 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 361 /* 362 * Check if rsvp daemon is running. If not, don't 363 * set ip_moptions. This ensures that the packet 364 * is multicast and not just sent down one link 365 * as prescribed by rsvpd. 366 */ 367 if (!rsvp_on) 368 imo = NULL; 369 if (ip_mforward(ip, ifp, m, imo) != 0) { 370 m_freem(m); 371 goto done; 372 } 373 } 374 } 375 376 /* 377 * Multicasts with a time-to-live of zero may be looped- 378 * back, above, but must not be transmitted on a network. 379 * Also, multicasts addressed to the loopback interface 380 * are not sent -- the above call to ip_mloopback() will 381 * loop back a copy if this host actually belongs to the 382 * destination group on the loopback interface. 383 */ 384 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 385 m_freem(m); 386 goto done; 387 } 388 389 goto sendit; 390 } 391#ifndef notdef 392 /* 393 * If source address not specified yet, use address 394 * of outgoing interface. 395 */ 396 if (ip->ip_src.s_addr == INADDR_ANY) { 397 ip->ip_src = IA_SIN(ia)->sin_addr; 398#ifdef IPFIREWALL_FORWARD 399 /* Keep note that we did this - if the firewall changes 400 * the next-hop, our interface may change, changing the 401 * default source IP. It's a shame so much effort happens 402 * twice. Oh well. 403 */ 404 fwd_rewrite_src++; 405#endif /* IPFIREWALL_FORWARD */ 406 } 407#endif /* notdef */ 408 /* 409 * Verify that we have any chance at all of being able to queue 410 * the packet or packet fragments 411 */ 412 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 413 ifp->if_snd.ifq_maxlen) { 414 error = ENOBUFS; 415 goto bad; 416 } 417 418 /* 419 * Look for broadcast address and 420 * and verify user is allowed to send 421 * such a packet. 422 */ 423 if (isbroadcast) { 424 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 425 error = EADDRNOTAVAIL; 426 goto bad; 427 } 428 if ((flags & IP_ALLOWBROADCAST) == 0) { 429 error = EACCES; 430 goto bad; 431 } 432 /* don't allow broadcast messages to be fragmented */ 433 if ((u_short)ip->ip_len > ifp->if_mtu) { 434 error = EMSGSIZE; 435 goto bad; 436 } 437 m->m_flags |= M_BCAST; 438 } else { 439 m->m_flags &= ~M_BCAST; 440 } 441 442sendit: 443#ifdef IPSEC 444 /* get SP for this packet */ 445 if (so == NULL) 446 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 447 else 448 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 449 450 if (sp == NULL) { 451 ipsecstat.out_inval++; 452 goto bad; 453 } 454 455 error = 0; 456 457 /* check policy */ 458 switch (sp->policy) { 459 case IPSEC_POLICY_DISCARD: 460 /* 461 * This packet is just discarded. 462 */ 463 ipsecstat.out_polvio++; 464 goto bad; 465 466 case IPSEC_POLICY_BYPASS: 467 case IPSEC_POLICY_NONE: 468 /* no need to do IPsec. */ 469 goto skip_ipsec; 470 471 case IPSEC_POLICY_IPSEC: 472 if (sp->req == NULL) { 473 /* acquire a policy */ 474 error = key_spdacquire(sp); 475 goto bad; 476 } 477 break; 478 479 case IPSEC_POLICY_ENTRUST: 480 default: 481 printf("ip_output: Invalid policy found. %d\n", sp->policy); 482 } 483 { 484 struct ipsec_output_state state; 485 bzero(&state, sizeof(state)); 486 state.m = m; 487 if (flags & IP_ROUTETOIF) { 488 state.ro = &iproute; 489 bzero(&iproute, sizeof(iproute)); 490 } else 491 state.ro = ro; 492 state.dst = (struct sockaddr *)dst; 493 494 ip->ip_sum = 0; 495 496 /* 497 * XXX 498 * delayed checksums are not currently compatible with IPsec 499 */ 500 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 501 in_delayed_cksum(m); 502 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 503 } 504 505 HTONS(ip->ip_len); 506 HTONS(ip->ip_off); 507 508 error = ipsec4_output(&state, sp, flags); 509 510 m = state.m; 511 if (flags & IP_ROUTETOIF) { 512 /* 513 * if we have tunnel mode SA, we may need to ignore 514 * IP_ROUTETOIF. 515 */ 516 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 517 flags &= ~IP_ROUTETOIF; 518 ro = state.ro; 519 } 520 } else 521 ro = state.ro; 522 dst = (struct sockaddr_in *)state.dst; 523 if (error) { 524 /* mbuf is already reclaimed in ipsec4_output. */ 525 m0 = NULL; 526 switch (error) { 527 case EHOSTUNREACH: 528 case ENETUNREACH: 529 case EMSGSIZE: 530 case ENOBUFS: 531 case ENOMEM: 532 break; 533 default: 534 printf("ip4_output (ipsec): error code %d\n", error); 535 /*fall through*/ 536 case ENOENT: 537 /* don't show these error codes to the user */ 538 error = 0; 539 break; 540 } 541 goto bad; 542 } 543 } 544 545 /* be sure to update variables that are affected by ipsec4_output() */ 546 ip = mtod(m, struct ip *); 547#ifdef _IP_VHL 548 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 549#else 550 hlen = ip->ip_hl << 2; 551#endif 552 if (ro->ro_rt == NULL) { 553 if ((flags & IP_ROUTETOIF) == 0) { 554 printf("ip_output: " 555 "can't update route after IPsec processing\n"); 556 error = EHOSTUNREACH; /*XXX*/ 557 goto bad; 558 } 559 } else { 560 ia = ifatoia(ro->ro_rt->rt_ifa); 561 ifp = ro->ro_rt->rt_ifp; 562 } 563 564 /* make it flipped, again. */ 565 NTOHS(ip->ip_len); 566 NTOHS(ip->ip_off); 567skip_ipsec: 568#endif /*IPSEC*/ 569 570 /* 571 * IpHack's section. 572 * - Xlate: translate packet's addr/port (NAT). 573 * - Firewall: deny/allow/etc. 574 * - Wrap: fake packet's addr/port <unimpl.> 575 * - Encapsulate: put it in another IP and send out. <unimp.> 576 */ 577#ifdef PFIL_HOOKS 578 /* 579 * Run through list of hooks for output packets. 580 */ 581 m1 = m; 582 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); 583 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) 584 if (pfh->pfil_func) { 585 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1); 586 if (rv) { 587 error = EHOSTUNREACH; 588 goto done; 589 } 590 m = m1; 591 if (m == NULL) 592 goto done; 593 ip = mtod(m, struct ip *); 594 } 595#endif /* PFIL_HOOKS */ 596 597 /* 598 * Check with the firewall... 599 */ 600 if (fw_enable && ip_fw_chk_ptr) { 601 struct sockaddr_in *old = dst; 602 603 off = (*ip_fw_chk_ptr)(&ip, 604 hlen, ifp, &divert_cookie, &m, &rule, &dst); 605 /* 606 * On return we must do the following: 607 * m == NULL -> drop the pkt (old interface, deprecated) 608 * (off & 0x40000) -> drop the pkt (new interface) 609 * 1<=off<= 0xffff -> DIVERT 610 * (off & 0x10000) -> send to a DUMMYNET pipe 611 * (off & 0x20000) -> TEE the packet 612 * dst != old -> IPFIREWALL_FORWARD 613 * off==0, dst==old -> accept 614 * If some of the above modules is not compiled in, then 615 * we should't have to check the corresponding condition 616 * (because the ipfw control socket should not accept 617 * unsupported rules), but better play safe and drop 618 * packets in case of doubt. 619 */ 620 if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */ 621 if (m) 622 m_freem(m); 623 error = EACCES ; 624 goto done; 625 } 626 if (!m) { /* firewall said to reject */ 627 static int __debug=10; 628 if (__debug >0) { 629 printf("firewall returns NULL, please update!\n"); 630 __debug-- ; 631 } 632 error = EACCES; 633 goto done; 634 } 635 if (off == 0 && dst == old) /* common case */ 636 goto pass ; 637#ifdef DUMMYNET 638 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) { 639 /* 640 * pass the pkt to dummynet. Need to include 641 * pipe number, m, ifp, ro, dst because these are 642 * not recomputed in the next pass. 643 * All other parameters have been already used and 644 * so they are not needed anymore. 645 * XXX note: if the ifp or ro entry are deleted 646 * while a pkt is in dummynet, we are in trouble! 647 */ 648 error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m, 649 ifp,ro,dst,rule, flags); 650 goto done; 651 } 652#endif 653#ifdef IPDIVERT 654 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 655 struct mbuf *clone = NULL; 656 657 /* Clone packet if we're doing a 'tee' */ 658 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 659 clone = m_dup(m, M_DONTWAIT); 660 661 /* 662 * XXX 663 * delayed checksums are not currently compatible 664 * with divert sockets. 665 */ 666 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 667 in_delayed_cksum(m); 668 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 669 } 670 671 /* Restore packet header fields to original values */ 672 HTONS(ip->ip_len); 673 HTONS(ip->ip_off); 674 675 /* Deliver packet to divert input routine */ 676 ip_divert_cookie = divert_cookie; 677 divert_packet(m, 0, off & 0xffff); 678 679 /* If 'tee', continue with original packet */ 680 if (clone != NULL) { 681 m = clone; 682 ip = mtod(m, struct ip *); 683 goto pass; 684 } 685 goto done; 686 } 687#endif 688 689#ifdef IPFIREWALL_FORWARD 690 /* Here we check dst to make sure it's directly reachable on the 691 * interface we previously thought it was. 692 * If it isn't (which may be likely in some situations) we have 693 * to re-route it (ie, find a route for the next-hop and the 694 * associated interface) and set them here. This is nested 695 * forwarding which in most cases is undesirable, except where 696 * such control is nigh impossible. So we do it here. 697 * And I'm babbling. 698 */ 699 if (off == 0 && old != dst) { 700 struct in_ifaddr *ia; 701 702 /* It's changed... */ 703 /* There must be a better way to do this next line... */ 704 static struct route sro_fwd, *ro_fwd = &sro_fwd; 705#ifdef IPFIREWALL_FORWARD_DEBUG 706 printf("IPFIREWALL_FORWARD: New dst ip: "); 707 print_ip(dst->sin_addr); 708 printf("\n"); 709#endif 710 /* 711 * We need to figure out if we have been forwarded 712 * to a local socket. If so then we should somehow 713 * "loop back" to ip_input, and get directed to the 714 * PCB as if we had received this packet. This is 715 * because it may be dificult to identify the packets 716 * you want to forward until they are being output 717 * and have selected an interface. (e.g. locally 718 * initiated packets) If we used the loopback inteface, 719 * we would not be able to control what happens 720 * as the packet runs through ip_input() as 721 * it is done through a ISR. 722 */ 723 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 724 /* 725 * If the addr to forward to is one 726 * of ours, we pretend to 727 * be the destination for this packet. 728 */ 729 if (IA_SIN(ia)->sin_addr.s_addr == 730 dst->sin_addr.s_addr) 731 break; 732 } 733 if (ia) { 734 /* tell ip_input "dont filter" */ 735 ip_fw_fwd_addr = dst; 736 if (m->m_pkthdr.rcvif == NULL) 737 m->m_pkthdr.rcvif = ifunit("lo0"); 738 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 739 m->m_pkthdr.csum_flags |= 740 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 741 m0->m_pkthdr.csum_data = 0xffff; 742 } 743 m->m_pkthdr.csum_flags |= 744 CSUM_IP_CHECKED | CSUM_IP_VALID; 745 HTONS(ip->ip_len); 746 HTONS(ip->ip_off); 747 ip_input(m); 748 goto done; 749 } 750 /* Some of the logic for this was 751 * nicked from above. 752 * 753 * This rewrites the cached route in a local PCB. 754 * Is this what we want to do? 755 */ 756 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 757 758 ro_fwd->ro_rt = 0; 759 rtalloc_ign(ro_fwd, RTF_PRCLONING); 760 761 if (ro_fwd->ro_rt == 0) { 762 ipstat.ips_noroute++; 763 error = EHOSTUNREACH; 764 goto bad; 765 } 766 767 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 768 ifp = ro_fwd->ro_rt->rt_ifp; 769 ro_fwd->ro_rt->rt_use++; 770 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 771 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway; 772 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 773 isbroadcast = 774 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 775 else 776 isbroadcast = in_broadcast(dst->sin_addr, ifp); 777 RTFREE(ro->ro_rt); 778 ro->ro_rt = ro_fwd->ro_rt; 779 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 780 781 /* 782 * If we added a default src ip earlier, 783 * which would have been gotten from the-then 784 * interface, do it again, from the new one. 785 */ 786 if (fwd_rewrite_src) 787 ip->ip_src = IA_SIN(ia)->sin_addr; 788 goto pass ; 789 } 790#endif /* IPFIREWALL_FORWARD */ 791 /* 792 * if we get here, none of the above matches, and 793 * we have to drop the pkt 794 */ 795 m_freem(m); 796 error = EACCES; /* not sure this is the right error msg */ 797 goto done; 798 } 799 800pass: 801 m->m_pkthdr.csum_flags |= CSUM_IP; 802 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 803 if (sw_csum & CSUM_DELAY_DATA) { 804 in_delayed_cksum(m); 805 sw_csum &= ~CSUM_DELAY_DATA; 806 } 807 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 808 809 /* 810 * If small enough for interface, or the interface will take 811 * care of the fragmentation for us, can just send directly. 812 */ 813 if ((u_short)ip->ip_len <= ifp->if_mtu || 814 ifp->if_hwassist & CSUM_FRAGMENT) { 815 HTONS(ip->ip_len); 816 HTONS(ip->ip_off); 817 ip->ip_sum = 0; 818 if (sw_csum & CSUM_DELAY_IP) { 819 if (ip->ip_vhl == IP_VHL_BORING) { 820 ip->ip_sum = in_cksum_hdr(ip); 821 } else { 822 ip->ip_sum = in_cksum(m, hlen); 823 } 824 } 825 826 /* Record statistics for this interface address. */ 827 if (!(flags & IP_FORWARDING)) { 828 ia->ia_ifa.if_opackets++; 829 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 830 } 831 832#ifdef IPSEC 833 /* clean ipsec history once it goes out of the node */ 834 ipsec_delaux(m); 835#endif 836 837 error = (*ifp->if_output)(ifp, m, 838 (struct sockaddr *)dst, ro->ro_rt); 839 goto done; 840 } 841 /* 842 * Too large for interface; fragment if possible. 843 * Must be able to put at least 8 bytes per fragment. 844 */ 845 if (ip->ip_off & IP_DF) { 846 error = EMSGSIZE; 847 /* 848 * This case can happen if the user changed the MTU 849 * of an interface after enabling IP on it. Because 850 * most netifs don't keep track of routes pointing to 851 * them, there is no way for one to update all its 852 * routes when the MTU is changed. 853 */ 854 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 855 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 856 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 857 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 858 } 859 ipstat.ips_cantfrag++; 860 goto bad; 861 } 862 len = (ifp->if_mtu - hlen) &~ 7; 863 if (len < 8) { 864 error = EMSGSIZE; 865 goto bad; 866 } 867 868 /* 869 * if the interface will not calculate checksums on 870 * fragmented packets, then do it here. 871 */ 872 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 873 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 874 in_delayed_cksum(m); 875 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 876 } 877 878 { 879 int mhlen, firstlen = len; 880 struct mbuf **mnext = &m->m_nextpkt; 881 int nfrags = 1; 882 883 /* 884 * Loop through length of segment after first fragment, 885 * make new header and copy data of each part and link onto chain. 886 */ 887 m0 = m; 888 mhlen = sizeof (struct ip); 889 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { 890 MGETHDR(m, M_DONTWAIT, MT_HEADER); 891 if (m == 0) { 892 error = ENOBUFS; 893 ipstat.ips_odropped++; 894 goto sendorfree; 895 } 896 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 897 m->m_data += max_linkhdr; 898 mhip = mtod(m, struct ip *); 899 *mhip = *ip; 900 if (hlen > sizeof (struct ip)) { 901 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 902 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 903 } 904 m->m_len = mhlen; 905 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 906 if (off + len >= (u_short)ip->ip_len) 907 len = (u_short)ip->ip_len - off; 908 else 909 mhip->ip_off |= IP_MF; 910 mhip->ip_len = htons((u_short)(len + mhlen)); 911 m->m_next = m_copy(m0, off, len); 912 if (m->m_next == 0) { 913 (void) m_free(m); 914 error = ENOBUFS; /* ??? */ 915 ipstat.ips_odropped++; 916 goto sendorfree; 917 } 918 m->m_pkthdr.len = mhlen + len; 919 m->m_pkthdr.rcvif = (struct ifnet *)0; 920 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 921 HTONS(mhip->ip_off); 922 mhip->ip_sum = 0; 923 if (sw_csum & CSUM_DELAY_IP) { 924 if (mhip->ip_vhl == IP_VHL_BORING) { 925 mhip->ip_sum = in_cksum_hdr(mhip); 926 } else { 927 mhip->ip_sum = in_cksum(m, mhlen); 928 } 929 } 930 *mnext = m; 931 mnext = &m->m_nextpkt; 932 nfrags++; 933 } 934 ipstat.ips_ofragments += nfrags; 935 936 /* set first/last markers for fragment chain */ 937 m->m_flags |= M_LASTFRAG; 938 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 939 m0->m_pkthdr.csum_data = nfrags; 940 941 /* 942 * Update first fragment by trimming what's been copied out 943 * and updating header, then send each fragment (in order). 944 */ 945 m = m0; 946 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 947 m->m_pkthdr.len = hlen + firstlen; 948 ip->ip_len = htons((u_short)m->m_pkthdr.len); 949 ip->ip_off |= IP_MF; 950 HTONS(ip->ip_off); 951 ip->ip_sum = 0; 952 if (sw_csum & CSUM_DELAY_IP) { 953 if (ip->ip_vhl == IP_VHL_BORING) { 954 ip->ip_sum = in_cksum_hdr(ip); 955 } else { 956 ip->ip_sum = in_cksum(m, hlen); 957 } 958 } 959sendorfree: 960 for (m = m0; m; m = m0) { 961 m0 = m->m_nextpkt; 962 m->m_nextpkt = 0; 963#ifdef IPSEC 964 /* clean ipsec history once it goes out of the node */ 965 ipsec_delaux(m); 966#endif 967 if (error == 0) { 968 /* Record statistics for this interface address. */ 969 ia->ia_ifa.if_opackets++; 970 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 971 972 error = (*ifp->if_output)(ifp, m, 973 (struct sockaddr *)dst, ro->ro_rt); 974 } else 975 m_freem(m); 976 } 977 978 if (error == 0) 979 ipstat.ips_fragmented++; 980 } 981done: 982#ifdef IPSEC 983 if (ro == &iproute && ro->ro_rt) { 984 RTFREE(ro->ro_rt); 985 ro->ro_rt = NULL; 986 } 987 if (sp != NULL) { 988 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 989 printf("DP ip_output call free SP:%p\n", sp)); 990 key_freesp(sp); 991 } 992#endif /* IPSEC */ 993 return (error); 994bad: 995 m_freem(m0); 996 goto done; 997} 998 999void 1000in_delayed_cksum(struct mbuf *m) 1001{ 1002 struct ip *ip; 1003 u_short csum, offset; 1004 1005 ip = mtod(m, struct ip *); 1006 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 1007 csum = in_cksum_skip(m, ip->ip_len, offset); 1008 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1009 csum = 0xffff; 1010 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1011 1012 if (offset + sizeof(u_short) > m->m_len) { 1013 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1014 m->m_len, offset, ip->ip_p); 1015 /* 1016 * XXX 1017 * this shouldn't happen, but if it does, the 1018 * correct behavior may be to insert the checksum 1019 * in the existing chain instead of rearranging it. 1020 */ 1021 m = m_pullup(m, offset + sizeof(u_short)); 1022 } 1023 *(u_short *)(m->m_data + offset) = csum; 1024} 1025 1026/* 1027 * Insert IP options into preformed packet. 1028 * Adjust IP destination as required for IP source routing, 1029 * as indicated by a non-zero in_addr at the start of the options. 1030 * 1031 * XXX This routine assumes that the packet has no options in place. 1032 */ 1033static struct mbuf * 1034ip_insertoptions(m, opt, phlen) 1035 register struct mbuf *m; 1036 struct mbuf *opt; 1037 int *phlen; 1038{ 1039 register struct ipoption *p = mtod(opt, struct ipoption *); 1040 struct mbuf *n; 1041 register struct ip *ip = mtod(m, struct ip *); 1042 unsigned optlen; 1043 1044 optlen = opt->m_len - sizeof(p->ipopt_dst); 1045 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) 1046 return (m); /* XXX should fail */ 1047 if (p->ipopt_dst.s_addr) 1048 ip->ip_dst = p->ipopt_dst; 1049 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1050 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1051 if (n == 0) 1052 return (m); 1053 n->m_pkthdr.rcvif = (struct ifnet *)0; 1054 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1055 m->m_len -= sizeof(struct ip); 1056 m->m_data += sizeof(struct ip); 1057 n->m_next = m; 1058 m = n; 1059 m->m_len = optlen + sizeof(struct ip); 1060 m->m_data += max_linkhdr; 1061 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1062 } else { 1063 m->m_data -= optlen; 1064 m->m_len += optlen; 1065 m->m_pkthdr.len += optlen; 1066 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1067 } 1068 ip = mtod(m, struct ip *); 1069 bcopy(p->ipopt_list, ip + 1, optlen); 1070 *phlen = sizeof(struct ip) + optlen; 1071 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1072 ip->ip_len += optlen; 1073 return (m); 1074} 1075 1076/* 1077 * Copy options from ip to jp, 1078 * omitting those not copied during fragmentation. 1079 */ 1080int 1081ip_optcopy(ip, jp) 1082 struct ip *ip, *jp; 1083{ 1084 register u_char *cp, *dp; 1085 int opt, optlen, cnt; 1086 1087 cp = (u_char *)(ip + 1); 1088 dp = (u_char *)(jp + 1); 1089 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 1090 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1091 opt = cp[0]; 1092 if (opt == IPOPT_EOL) 1093 break; 1094 if (opt == IPOPT_NOP) { 1095 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1096 *dp++ = IPOPT_NOP; 1097 optlen = 1; 1098 continue; 1099 } 1100#ifdef DIAGNOSTIC 1101 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1102 panic("malformed IPv4 option passed to ip_optcopy"); 1103#endif 1104 optlen = cp[IPOPT_OLEN]; 1105#ifdef DIAGNOSTIC 1106 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1107 panic("malformed IPv4 option passed to ip_optcopy"); 1108#endif 1109 /* bogus lengths should have been caught by ip_dooptions */ 1110 if (optlen > cnt) 1111 optlen = cnt; 1112 if (IPOPT_COPIED(opt)) { 1113 bcopy(cp, dp, optlen); 1114 dp += optlen; 1115 } 1116 } 1117 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1118 *dp++ = IPOPT_EOL; 1119 return (optlen); 1120} 1121 1122/* 1123 * IP socket option processing. 1124 */ 1125int 1126ip_ctloutput(so, sopt) 1127 struct socket *so; 1128 struct sockopt *sopt; 1129{ 1130 struct inpcb *inp = sotoinpcb(so); 1131 int error, optval; 1132 1133 error = optval = 0; 1134 if (sopt->sopt_level != IPPROTO_IP) { 1135 return (EINVAL); 1136 } 1137 1138 switch (sopt->sopt_dir) { 1139 case SOPT_SET: 1140 switch (sopt->sopt_name) { 1141 case IP_OPTIONS: 1142#ifdef notyet 1143 case IP_RETOPTS: 1144#endif 1145 { 1146 struct mbuf *m; 1147 if (sopt->sopt_valsize > MLEN) { 1148 error = EMSGSIZE; 1149 break; 1150 } 1151 MGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1152 if (m == 0) { 1153 error = ENOBUFS; 1154 break; 1155 } 1156 m->m_len = sopt->sopt_valsize; 1157 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1158 m->m_len); 1159 1160 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1161 m)); 1162 } 1163 1164 case IP_TOS: 1165 case IP_TTL: 1166 case IP_RECVOPTS: 1167 case IP_RECVRETOPTS: 1168 case IP_RECVDSTADDR: 1169 case IP_RECVIF: 1170#if defined(NFAITH) && NFAITH > 0 1171 case IP_FAITH: 1172#endif 1173 error = sooptcopyin(sopt, &optval, sizeof optval, 1174 sizeof optval); 1175 if (error) 1176 break; 1177 1178 switch (sopt->sopt_name) { 1179 case IP_TOS: 1180 inp->inp_ip_tos = optval; 1181 break; 1182 1183 case IP_TTL: 1184 inp->inp_ip_ttl = optval; 1185 break; 1186#define OPTSET(bit) \ 1187 if (optval) \ 1188 inp->inp_flags |= bit; \ 1189 else \ 1190 inp->inp_flags &= ~bit; 1191 1192 case IP_RECVOPTS: 1193 OPTSET(INP_RECVOPTS); 1194 break; 1195 1196 case IP_RECVRETOPTS: 1197 OPTSET(INP_RECVRETOPTS); 1198 break; 1199 1200 case IP_RECVDSTADDR: 1201 OPTSET(INP_RECVDSTADDR); 1202 break; 1203 1204 case IP_RECVIF: 1205 OPTSET(INP_RECVIF); 1206 break; 1207 1208#if defined(NFAITH) && NFAITH > 0 1209 case IP_FAITH: 1210 OPTSET(INP_FAITH); 1211 break; 1212#endif 1213 } 1214 break; 1215#undef OPTSET 1216 1217 case IP_MULTICAST_IF: 1218 case IP_MULTICAST_VIF: 1219 case IP_MULTICAST_TTL: 1220 case IP_MULTICAST_LOOP: 1221 case IP_ADD_MEMBERSHIP: 1222 case IP_DROP_MEMBERSHIP: 1223 error = ip_setmoptions(sopt, &inp->inp_moptions); 1224 break; 1225 1226 case IP_PORTRANGE: 1227 error = sooptcopyin(sopt, &optval, sizeof optval, 1228 sizeof optval); 1229 if (error) 1230 break; 1231 1232 switch (optval) { 1233 case IP_PORTRANGE_DEFAULT: 1234 inp->inp_flags &= ~(INP_LOWPORT); 1235 inp->inp_flags &= ~(INP_HIGHPORT); 1236 break; 1237 1238 case IP_PORTRANGE_HIGH: 1239 inp->inp_flags &= ~(INP_LOWPORT); 1240 inp->inp_flags |= INP_HIGHPORT; 1241 break; 1242 1243 case IP_PORTRANGE_LOW: 1244 inp->inp_flags &= ~(INP_HIGHPORT); 1245 inp->inp_flags |= INP_LOWPORT; 1246 break; 1247 1248 default: 1249 error = EINVAL; 1250 break; 1251 } 1252 break; 1253 1254#ifdef IPSEC 1255 case IP_IPSEC_POLICY: 1256 { 1257 caddr_t req; 1258 size_t len = 0; 1259 int priv; 1260 struct mbuf *m; 1261 int optname; 1262 1263 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1264 break; 1265 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1266 break; 1267 priv = (sopt->sopt_p != NULL && 1268 suser(sopt->sopt_p) != 0) ? 0 : 1; 1269 req = mtod(m, caddr_t); 1270 len = m->m_len; 1271 optname = sopt->sopt_name; 1272 error = ipsec4_set_policy(inp, optname, req, len, priv); 1273 m_freem(m); 1274 break; 1275 } 1276#endif /*IPSEC*/ 1277 1278 default: 1279 error = ENOPROTOOPT; 1280 break; 1281 } 1282 break; 1283 1284 case SOPT_GET: 1285 switch (sopt->sopt_name) { 1286 case IP_OPTIONS: 1287 case IP_RETOPTS: 1288 if (inp->inp_options) 1289 error = sooptcopyout(sopt, 1290 mtod(inp->inp_options, 1291 char *), 1292 inp->inp_options->m_len); 1293 else 1294 sopt->sopt_valsize = 0; 1295 break; 1296 1297 case IP_TOS: 1298 case IP_TTL: 1299 case IP_RECVOPTS: 1300 case IP_RECVRETOPTS: 1301 case IP_RECVDSTADDR: 1302 case IP_RECVIF: 1303 case IP_PORTRANGE: 1304#if defined(NFAITH) && NFAITH > 0 1305 case IP_FAITH: 1306#endif 1307 switch (sopt->sopt_name) { 1308 1309 case IP_TOS: 1310 optval = inp->inp_ip_tos; 1311 break; 1312 1313 case IP_TTL: 1314 optval = inp->inp_ip_ttl; 1315 break; 1316 1317#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1318 1319 case IP_RECVOPTS: 1320 optval = OPTBIT(INP_RECVOPTS); 1321 break; 1322 1323 case IP_RECVRETOPTS: 1324 optval = OPTBIT(INP_RECVRETOPTS); 1325 break; 1326 1327 case IP_RECVDSTADDR: 1328 optval = OPTBIT(INP_RECVDSTADDR); 1329 break; 1330 1331 case IP_RECVIF: 1332 optval = OPTBIT(INP_RECVIF); 1333 break; 1334 1335 case IP_PORTRANGE: 1336 if (inp->inp_flags & INP_HIGHPORT) 1337 optval = IP_PORTRANGE_HIGH; 1338 else if (inp->inp_flags & INP_LOWPORT) 1339 optval = IP_PORTRANGE_LOW; 1340 else 1341 optval = 0; 1342 break; 1343 1344#if defined(NFAITH) && NFAITH > 0 1345 case IP_FAITH: 1346 optval = OPTBIT(INP_FAITH); 1347 break; 1348#endif 1349 } 1350 error = sooptcopyout(sopt, &optval, sizeof optval); 1351 break; 1352 1353 case IP_MULTICAST_IF: 1354 case IP_MULTICAST_VIF: 1355 case IP_MULTICAST_TTL: 1356 case IP_MULTICAST_LOOP: 1357 case IP_ADD_MEMBERSHIP: 1358 case IP_DROP_MEMBERSHIP: 1359 error = ip_getmoptions(sopt, inp->inp_moptions); 1360 break; 1361 1362#ifdef IPSEC 1363 case IP_IPSEC_POLICY: 1364 { 1365 struct mbuf *m = NULL; 1366 caddr_t req = NULL; 1367 size_t len = 0; 1368 1369 if (m != 0) { 1370 req = mtod(m, caddr_t); 1371 len = m->m_len; 1372 } 1373 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1374 if (error == 0) 1375 error = soopt_mcopyout(sopt, m); /* XXX */ 1376 if (error == 0) 1377 m_freem(m); 1378 break; 1379 } 1380#endif /*IPSEC*/ 1381 1382 default: 1383 error = ENOPROTOOPT; 1384 break; 1385 } 1386 break; 1387 } 1388 return (error); 1389} 1390 1391/* 1392 * Set up IP options in pcb for insertion in output packets. 1393 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1394 * with destination address if source routed. 1395 */ 1396static int 1397ip_pcbopts(optname, pcbopt, m) 1398 int optname; 1399 struct mbuf **pcbopt; 1400 register struct mbuf *m; 1401{ 1402 register int cnt, optlen; 1403 register u_char *cp; 1404 u_char opt; 1405 1406 /* turn off any old options */ 1407 if (*pcbopt) 1408 (void)m_free(*pcbopt); 1409 *pcbopt = 0; 1410 if (m == (struct mbuf *)0 || m->m_len == 0) { 1411 /* 1412 * Only turning off any previous options. 1413 */ 1414 if (m) 1415 (void)m_free(m); 1416 return (0); 1417 } 1418 1419 if (m->m_len % sizeof(int32_t)) 1420 goto bad; 1421 /* 1422 * IP first-hop destination address will be stored before 1423 * actual options; move other options back 1424 * and clear it when none present. 1425 */ 1426 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1427 goto bad; 1428 cnt = m->m_len; 1429 m->m_len += sizeof(struct in_addr); 1430 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1431 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1432 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1433 1434 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1435 opt = cp[IPOPT_OPTVAL]; 1436 if (opt == IPOPT_EOL) 1437 break; 1438 if (opt == IPOPT_NOP) 1439 optlen = 1; 1440 else { 1441 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1442 goto bad; 1443 optlen = cp[IPOPT_OLEN]; 1444 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1445 goto bad; 1446 } 1447 switch (opt) { 1448 1449 default: 1450 break; 1451 1452 case IPOPT_LSRR: 1453 case IPOPT_SSRR: 1454 /* 1455 * user process specifies route as: 1456 * ->A->B->C->D 1457 * D must be our final destination (but we can't 1458 * check that since we may not have connected yet). 1459 * A is first hop destination, which doesn't appear in 1460 * actual IP option, but is stored before the options. 1461 */ 1462 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1463 goto bad; 1464 m->m_len -= sizeof(struct in_addr); 1465 cnt -= sizeof(struct in_addr); 1466 optlen -= sizeof(struct in_addr); 1467 cp[IPOPT_OLEN] = optlen; 1468 /* 1469 * Move first hop before start of options. 1470 */ 1471 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1472 sizeof(struct in_addr)); 1473 /* 1474 * Then copy rest of options back 1475 * to close up the deleted entry. 1476 */ 1477 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1478 sizeof(struct in_addr)), 1479 (caddr_t)&cp[IPOPT_OFFSET+1], 1480 (unsigned)cnt + sizeof(struct in_addr)); 1481 break; 1482 } 1483 } 1484 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1485 goto bad; 1486 *pcbopt = m; 1487 return (0); 1488 1489bad: 1490 (void)m_free(m); 1491 return (EINVAL); 1492} 1493 1494/* 1495 * XXX 1496 * The whole multicast option thing needs to be re-thought. 1497 * Several of these options are equally applicable to non-multicast 1498 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1499 * standard option (IP_TTL). 1500 */ 1501 1502/* 1503 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1504 */ 1505static struct ifnet * 1506ip_multicast_if(a, ifindexp) 1507 struct in_addr *a; 1508 int *ifindexp; 1509{ 1510 int ifindex; 1511 struct ifnet *ifp; 1512 1513 if (ifindexp) 1514 *ifindexp = 0; 1515 if (ntohl(a->s_addr) >> 24 == 0) { 1516 ifindex = ntohl(a->s_addr) & 0xffffff; 1517 if (ifindex < 0 || if_index < ifindex) 1518 return NULL; 1519 ifp = ifindex2ifnet[ifindex]; 1520 if (ifindexp) 1521 *ifindexp = ifindex; 1522 } else { 1523 INADDR_TO_IFP(*a, ifp); 1524 } 1525 return ifp; 1526} 1527 1528/* 1529 * Set the IP multicast options in response to user setsockopt(). 1530 */ 1531static int 1532ip_setmoptions(sopt, imop) 1533 struct sockopt *sopt; 1534 struct ip_moptions **imop; 1535{ 1536 int error = 0; 1537 int i; 1538 struct in_addr addr; 1539 struct ip_mreq mreq; 1540 struct ifnet *ifp; 1541 struct ip_moptions *imo = *imop; 1542 struct route ro; 1543 struct sockaddr_in *dst; 1544 int ifindex; 1545 int s; 1546 1547 if (imo == NULL) { 1548 /* 1549 * No multicast option buffer attached to the pcb; 1550 * allocate one and initialize to default values. 1551 */ 1552 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1553 M_WAITOK); 1554 1555 if (imo == NULL) 1556 return (ENOBUFS); 1557 *imop = imo; 1558 imo->imo_multicast_ifp = NULL; 1559 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1560 imo->imo_multicast_vif = -1; 1561 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1562 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1563 imo->imo_num_memberships = 0; 1564 } 1565 1566 switch (sopt->sopt_name) { 1567 /* store an index number for the vif you wanna use in the send */ 1568 case IP_MULTICAST_VIF: 1569 if (legal_vif_num == 0) { 1570 error = EOPNOTSUPP; 1571 break; 1572 } 1573 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1574 if (error) 1575 break; 1576 if (!legal_vif_num(i) && (i != -1)) { 1577 error = EINVAL; 1578 break; 1579 } 1580 imo->imo_multicast_vif = i; 1581 break; 1582 1583 case IP_MULTICAST_IF: 1584 /* 1585 * Select the interface for outgoing multicast packets. 1586 */ 1587 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1588 if (error) 1589 break; 1590 /* 1591 * INADDR_ANY is used to remove a previous selection. 1592 * When no interface is selected, a default one is 1593 * chosen every time a multicast packet is sent. 1594 */ 1595 if (addr.s_addr == INADDR_ANY) { 1596 imo->imo_multicast_ifp = NULL; 1597 break; 1598 } 1599 /* 1600 * The selected interface is identified by its local 1601 * IP address. Find the interface and confirm that 1602 * it supports multicasting. 1603 */ 1604 s = splimp(); 1605 ifp = ip_multicast_if(&addr, &ifindex); 1606 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1607 splx(s); 1608 error = EADDRNOTAVAIL; 1609 break; 1610 } 1611 imo->imo_multicast_ifp = ifp; 1612 if (ifindex) 1613 imo->imo_multicast_addr = addr; 1614 else 1615 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1616 splx(s); 1617 break; 1618 1619 case IP_MULTICAST_TTL: 1620 /* 1621 * Set the IP time-to-live for outgoing multicast packets. 1622 * The original multicast API required a char argument, 1623 * which is inconsistent with the rest of the socket API. 1624 * We allow either a char or an int. 1625 */ 1626 if (sopt->sopt_valsize == 1) { 1627 u_char ttl; 1628 error = sooptcopyin(sopt, &ttl, 1, 1); 1629 if (error) 1630 break; 1631 imo->imo_multicast_ttl = ttl; 1632 } else { 1633 u_int ttl; 1634 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1635 sizeof ttl); 1636 if (error) 1637 break; 1638 if (ttl > 255) 1639 error = EINVAL; 1640 else 1641 imo->imo_multicast_ttl = ttl; 1642 } 1643 break; 1644 1645 case IP_MULTICAST_LOOP: 1646 /* 1647 * Set the loopback flag for outgoing multicast packets. 1648 * Must be zero or one. The original multicast API required a 1649 * char argument, which is inconsistent with the rest 1650 * of the socket API. We allow either a char or an int. 1651 */ 1652 if (sopt->sopt_valsize == 1) { 1653 u_char loop; 1654 error = sooptcopyin(sopt, &loop, 1, 1); 1655 if (error) 1656 break; 1657 imo->imo_multicast_loop = !!loop; 1658 } else { 1659 u_int loop; 1660 error = sooptcopyin(sopt, &loop, sizeof loop, 1661 sizeof loop); 1662 if (error) 1663 break; 1664 imo->imo_multicast_loop = !!loop; 1665 } 1666 break; 1667 1668 case IP_ADD_MEMBERSHIP: 1669 /* 1670 * Add a multicast group membership. 1671 * Group must be a valid IP multicast address. 1672 */ 1673 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1674 if (error) 1675 break; 1676 1677 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1678 error = EINVAL; 1679 break; 1680 } 1681 s = splimp(); 1682 /* 1683 * If no interface address was provided, use the interface of 1684 * the route to the given multicast address. 1685 */ 1686 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1687 bzero((caddr_t)&ro, sizeof(ro)); 1688 dst = (struct sockaddr_in *)&ro.ro_dst; 1689 dst->sin_len = sizeof(*dst); 1690 dst->sin_family = AF_INET; 1691 dst->sin_addr = mreq.imr_multiaddr; 1692 rtalloc(&ro); 1693 if (ro.ro_rt == NULL) { 1694 error = EADDRNOTAVAIL; 1695 splx(s); 1696 break; 1697 } 1698 ifp = ro.ro_rt->rt_ifp; 1699 rtfree(ro.ro_rt); 1700 } 1701 else { 1702 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1703 } 1704 1705 /* 1706 * See if we found an interface, and confirm that it 1707 * supports multicast. 1708 */ 1709 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1710 error = EADDRNOTAVAIL; 1711 splx(s); 1712 break; 1713 } 1714 /* 1715 * See if the membership already exists or if all the 1716 * membership slots are full. 1717 */ 1718 for (i = 0; i < imo->imo_num_memberships; ++i) { 1719 if (imo->imo_membership[i]->inm_ifp == ifp && 1720 imo->imo_membership[i]->inm_addr.s_addr 1721 == mreq.imr_multiaddr.s_addr) 1722 break; 1723 } 1724 if (i < imo->imo_num_memberships) { 1725 error = EADDRINUSE; 1726 splx(s); 1727 break; 1728 } 1729 if (i == IP_MAX_MEMBERSHIPS) { 1730 error = ETOOMANYREFS; 1731 splx(s); 1732 break; 1733 } 1734 /* 1735 * Everything looks good; add a new record to the multicast 1736 * address list for the given interface. 1737 */ 1738 if ((imo->imo_membership[i] = 1739 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1740 error = ENOBUFS; 1741 splx(s); 1742 break; 1743 } 1744 ++imo->imo_num_memberships; 1745 splx(s); 1746 break; 1747 1748 case IP_DROP_MEMBERSHIP: 1749 /* 1750 * Drop a multicast group membership. 1751 * Group must be a valid IP multicast address. 1752 */ 1753 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1754 if (error) 1755 break; 1756 1757 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1758 error = EINVAL; 1759 break; 1760 } 1761 1762 s = splimp(); 1763 /* 1764 * If an interface address was specified, get a pointer 1765 * to its ifnet structure. 1766 */ 1767 if (mreq.imr_interface.s_addr == INADDR_ANY) 1768 ifp = NULL; 1769 else { 1770 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1771 if (ifp == NULL) { 1772 error = EADDRNOTAVAIL; 1773 splx(s); 1774 break; 1775 } 1776 } 1777 /* 1778 * Find the membership in the membership array. 1779 */ 1780 for (i = 0; i < imo->imo_num_memberships; ++i) { 1781 if ((ifp == NULL || 1782 imo->imo_membership[i]->inm_ifp == ifp) && 1783 imo->imo_membership[i]->inm_addr.s_addr == 1784 mreq.imr_multiaddr.s_addr) 1785 break; 1786 } 1787 if (i == imo->imo_num_memberships) { 1788 error = EADDRNOTAVAIL; 1789 splx(s); 1790 break; 1791 } 1792 /* 1793 * Give up the multicast address record to which the 1794 * membership points. 1795 */ 1796 in_delmulti(imo->imo_membership[i]); 1797 /* 1798 * Remove the gap in the membership array. 1799 */ 1800 for (++i; i < imo->imo_num_memberships; ++i) 1801 imo->imo_membership[i-1] = imo->imo_membership[i]; 1802 --imo->imo_num_memberships; 1803 splx(s); 1804 break; 1805 1806 default: 1807 error = EOPNOTSUPP; 1808 break; 1809 } 1810 1811 /* 1812 * If all options have default values, no need to keep the mbuf. 1813 */ 1814 if (imo->imo_multicast_ifp == NULL && 1815 imo->imo_multicast_vif == -1 && 1816 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1817 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1818 imo->imo_num_memberships == 0) { 1819 free(*imop, M_IPMOPTS); 1820 *imop = NULL; 1821 } 1822 1823 return (error); 1824} 1825 1826/* 1827 * Return the IP multicast options in response to user getsockopt(). 1828 */ 1829static int 1830ip_getmoptions(sopt, imo) 1831 struct sockopt *sopt; 1832 register struct ip_moptions *imo; 1833{ 1834 struct in_addr addr; 1835 struct in_ifaddr *ia; 1836 int error, optval; 1837 u_char coptval; 1838 1839 error = 0; 1840 switch (sopt->sopt_name) { 1841 case IP_MULTICAST_VIF: 1842 if (imo != NULL) 1843 optval = imo->imo_multicast_vif; 1844 else 1845 optval = -1; 1846 error = sooptcopyout(sopt, &optval, sizeof optval); 1847 break; 1848 1849 case IP_MULTICAST_IF: 1850 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1851 addr.s_addr = INADDR_ANY; 1852 else if (imo->imo_multicast_addr.s_addr) { 1853 /* return the value user has set */ 1854 addr = imo->imo_multicast_addr; 1855 } else { 1856 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1857 addr.s_addr = (ia == NULL) ? INADDR_ANY 1858 : IA_SIN(ia)->sin_addr.s_addr; 1859 } 1860 error = sooptcopyout(sopt, &addr, sizeof addr); 1861 break; 1862 1863 case IP_MULTICAST_TTL: 1864 if (imo == 0) 1865 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1866 else 1867 optval = coptval = imo->imo_multicast_ttl; 1868 if (sopt->sopt_valsize == 1) 1869 error = sooptcopyout(sopt, &coptval, 1); 1870 else 1871 error = sooptcopyout(sopt, &optval, sizeof optval); 1872 break; 1873 1874 case IP_MULTICAST_LOOP: 1875 if (imo == 0) 1876 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1877 else 1878 optval = coptval = imo->imo_multicast_loop; 1879 if (sopt->sopt_valsize == 1) 1880 error = sooptcopyout(sopt, &coptval, 1); 1881 else 1882 error = sooptcopyout(sopt, &optval, sizeof optval); 1883 break; 1884 1885 default: 1886 error = ENOPROTOOPT; 1887 break; 1888 } 1889 return (error); 1890} 1891 1892/* 1893 * Discard the IP multicast options. 1894 */ 1895void 1896ip_freemoptions(imo) 1897 register struct ip_moptions *imo; 1898{ 1899 register int i; 1900 1901 if (imo != NULL) { 1902 for (i = 0; i < imo->imo_num_memberships; ++i) 1903 in_delmulti(imo->imo_membership[i]); 1904 free(imo, M_IPMOPTS); 1905 } 1906} 1907 1908/* 1909 * Routine called from ip_output() to loop back a copy of an IP multicast 1910 * packet to the input queue of a specified interface. Note that this 1911 * calls the output routine of the loopback "driver", but with an interface 1912 * pointer that might NOT be a loopback interface -- evil, but easier than 1913 * replicating that code here. 1914 */ 1915static void 1916ip_mloopback(ifp, m, dst, hlen) 1917 struct ifnet *ifp; 1918 register struct mbuf *m; 1919 register struct sockaddr_in *dst; 1920 int hlen; 1921{ 1922 register struct ip *ip; 1923 struct mbuf *copym; 1924 1925 copym = m_copy(m, 0, M_COPYALL); 1926 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1927 copym = m_pullup(copym, hlen); 1928 if (copym != NULL) { 1929 /* 1930 * We don't bother to fragment if the IP length is greater 1931 * than the interface's MTU. Can this possibly matter? 1932 */ 1933 ip = mtod(copym, struct ip *); 1934 HTONS(ip->ip_len); 1935 HTONS(ip->ip_off); 1936 ip->ip_sum = 0; 1937 if (ip->ip_vhl == IP_VHL_BORING) { 1938 ip->ip_sum = in_cksum_hdr(ip); 1939 } else { 1940 ip->ip_sum = in_cksum(copym, hlen); 1941 } 1942 /* 1943 * NB: 1944 * It's not clear whether there are any lingering 1945 * reentrancy problems in other areas which might 1946 * be exposed by using ip_input directly (in 1947 * particular, everything which modifies the packet 1948 * in-place). Yet another option is using the 1949 * protosw directly to deliver the looped back 1950 * packet. For the moment, we'll err on the side 1951 * of safety by using if_simloop(). 1952 */ 1953#if 1 /* XXX */ 1954 if (dst->sin_family != AF_INET) { 1955 printf("ip_mloopback: bad address family %d\n", 1956 dst->sin_family); 1957 dst->sin_family = AF_INET; 1958 } 1959#endif 1960 1961#ifdef notdef 1962 copym->m_pkthdr.rcvif = ifp; 1963 ip_input(copym); 1964#else 1965 /* if the checksum hasn't been computed, mark it as valid */ 1966 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1967 copym->m_pkthdr.csum_flags |= 1968 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1969 copym->m_pkthdr.csum_data = 0xffff; 1970 } 1971 if_simloop(ifp, copym, dst->sin_family, 0); 1972#endif 1973 } 1974} 1975