ip_output.c revision 77574
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: head/sys/netinet/ip_output.c 77574 2001-06-01 10:02:28Z kris $ 35 */ 36 37#define _IP_VHL 38 39#include "opt_ipfw.h" 40#include "opt_ipdn.h" 41#include "opt_ipdivert.h" 42#include "opt_ipfilter.h" 43#include "opt_ipsec.h" 44#include "opt_pfil_hooks.h" 45#include "opt_random_ip_id.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/protosw.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55 56#include <net/if.h> 57#include <net/route.h> 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/ip.h> 62#include <netinet/in_pcb.h> 63#include <netinet/in_var.h> 64#include <netinet/ip_var.h> 65 66#include "faith.h" 67 68#include <machine/in_cksum.h> 69 70static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 71 72#ifdef IPSEC 73#include <netinet6/ipsec.h> 74#include <netkey/key.h> 75#ifdef IPSEC_DEBUG 76#include <netkey/key_debug.h> 77#else 78#define KEYDEBUG(lev,arg) 79#endif 80#endif /*IPSEC*/ 81 82#include <netinet/ip_fw.h> 83 84#ifdef DUMMYNET 85#include <netinet/ip_dummynet.h> 86#endif 87 88#ifdef IPFIREWALL_FORWARD_DEBUG 89#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\ 90 (ntohl(a.s_addr)>>16)&0xFF,\ 91 (ntohl(a.s_addr)>>8)&0xFF,\ 92 (ntohl(a.s_addr))&0xFF); 93#endif 94 95u_short ip_id; 96 97static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 98static void ip_mloopback 99 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int)); 100static int ip_getmoptions 101 __P((struct sockopt *, struct ip_moptions *)); 102static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *)); 103static int ip_setmoptions 104 __P((struct sockopt *, struct ip_moptions **)); 105 106int ip_optcopy __P((struct ip *, struct ip *)); 107 108 109extern struct protosw inetsw[]; 110 111/* 112 * IP output. The packet in mbuf chain m contains a skeletal IP 113 * header (with len, off, ttl, proto, tos, src, dst). 114 * The mbuf chain containing the packet will be freed. 115 * The mbuf opt, if present, will not be freed. 116 */ 117int 118ip_output(m0, opt, ro, flags, imo) 119 struct mbuf *m0; 120 struct mbuf *opt; 121 struct route *ro; 122 int flags; 123 struct ip_moptions *imo; 124{ 125 struct ip *ip, *mhip; 126 struct ifnet *ifp; 127 struct mbuf *m = m0; 128 int hlen = sizeof (struct ip); 129 int len, off, error = 0; 130 struct sockaddr_in *dst; 131 struct in_ifaddr *ia; 132 int isbroadcast, sw_csum; 133#ifdef IPSEC 134 struct route iproute; 135 struct socket *so = NULL; 136 struct secpolicy *sp = NULL; 137#endif 138 u_int16_t divert_cookie; /* firewall cookie */ 139#ifdef PFIL_HOOKS 140 struct packet_filter_hook *pfh; 141 struct mbuf *m1; 142 int rv; 143#endif /* PFIL_HOOKS */ 144#ifdef IPFIREWALL_FORWARD 145 int fwd_rewrite_src = 0; 146#endif 147 struct ip_fw_chain *rule = NULL; 148 149#ifdef IPDIVERT 150 /* Get and reset firewall cookie */ 151 divert_cookie = ip_divert_cookie; 152 ip_divert_cookie = 0; 153#else 154 divert_cookie = 0; 155#endif 156 157#if defined(IPFIREWALL) && defined(DUMMYNET) 158 /* 159 * dummynet packet are prepended a vestigial mbuf with 160 * m_type = MT_DUMMYNET and m_data pointing to the matching 161 * rule. 162 */ 163 if (m->m_type == MT_DUMMYNET) { 164 /* 165 * the packet was already tagged, so part of the 166 * processing was already done, and we need to go down. 167 * Get parameters from the header. 168 */ 169 rule = (struct ip_fw_chain *)(m->m_data) ; 170 opt = NULL ; 171 ro = & ( ((struct dn_pkt *)m)->ro ) ; 172 imo = NULL ; 173 dst = ((struct dn_pkt *)m)->dn_dst ; 174 ifp = ((struct dn_pkt *)m)->ifp ; 175 flags = ((struct dn_pkt *)m)->flags ; 176 177 m0 = m = m->m_next ; 178#ifdef IPSEC 179 so = ipsec_getsocket(m); 180 ipsec_setsocket(m, NULL); 181#endif 182 ip = mtod(m, struct ip *); 183 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 184 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; 185 goto sendit; 186 } else 187 rule = NULL ; 188#endif 189#ifdef IPSEC 190 so = ipsec_getsocket(m); 191 ipsec_setsocket(m, NULL); 192#endif 193 194#ifdef DIAGNOSTIC 195 if ((m->m_flags & M_PKTHDR) == 0) 196 panic("ip_output no HDR"); 197 if (!ro) 198 panic("ip_output no route, proto = %d", 199 mtod(m, struct ip *)->ip_p); 200#endif 201 if (opt) { 202 m = ip_insertoptions(m, opt, &len); 203 hlen = len; 204 } 205 ip = mtod(m, struct ip *); 206 /* 207 * Fill in IP header. 208 */ 209 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 210 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 211 ip->ip_off &= IP_DF; 212#ifdef RANDOM_IP_ID 213 ip->ip_id = ip_randomid(); 214#else 215 ip->ip_id = htons(ip_id++); 216#endif 217 ipstat.ips_localout++; 218 } else { 219 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 220 } 221 222 dst = (struct sockaddr_in *)&ro->ro_dst; 223 /* 224 * If there is a cached route, 225 * check that it is to the same destination 226 * and is still up. If not, free it and try again. 227 */ 228 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 229 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 230 RTFREE(ro->ro_rt); 231 ro->ro_rt = (struct rtentry *)0; 232 } 233 if (ro->ro_rt == 0) { 234 dst->sin_family = AF_INET; 235 dst->sin_len = sizeof(*dst); 236 dst->sin_addr = ip->ip_dst; 237 } 238 /* 239 * If routing to interface only, 240 * short circuit routing lookup. 241 */ 242#define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) 243#define sintosa(sin) ((struct sockaddr *)(sin)) 244 if (flags & IP_ROUTETOIF) { 245 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 246 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 247 ipstat.ips_noroute++; 248 error = ENETUNREACH; 249 goto bad; 250 } 251 ifp = ia->ia_ifp; 252 ip->ip_ttl = 1; 253 isbroadcast = in_broadcast(dst->sin_addr, ifp); 254 } else { 255 /* 256 * If this is the case, we probably don't want to allocate 257 * a protocol-cloned route since we didn't get one from the 258 * ULP. This lets TCP do its thing, while not burdening 259 * forwarding or ICMP with the overhead of cloning a route. 260 * Of course, we still want to do any cloning requested by 261 * the link layer, as this is probably required in all cases 262 * for correct operation (as it is for ARP). 263 */ 264 if (ro->ro_rt == 0) 265 rtalloc_ign(ro, RTF_PRCLONING); 266 if (ro->ro_rt == 0) { 267 ipstat.ips_noroute++; 268 error = EHOSTUNREACH; 269 goto bad; 270 } 271 ia = ifatoia(ro->ro_rt->rt_ifa); 272 ifp = ro->ro_rt->rt_ifp; 273 ro->ro_rt->rt_use++; 274 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 275 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 276 if (ro->ro_rt->rt_flags & RTF_HOST) 277 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 278 else 279 isbroadcast = in_broadcast(dst->sin_addr, ifp); 280 } 281 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 282 struct in_multi *inm; 283 284 m->m_flags |= M_MCAST; 285 /* 286 * IP destination address is multicast. Make sure "dst" 287 * still points to the address in "ro". (It may have been 288 * changed to point to a gateway address, above.) 289 */ 290 dst = (struct sockaddr_in *)&ro->ro_dst; 291 /* 292 * See if the caller provided any multicast options 293 */ 294 if (imo != NULL) { 295 ip->ip_ttl = imo->imo_multicast_ttl; 296 if (imo->imo_multicast_ifp != NULL) 297 ifp = imo->imo_multicast_ifp; 298 if (imo->imo_multicast_vif != -1) 299 ip->ip_src.s_addr = 300 ip_mcast_src(imo->imo_multicast_vif); 301 } else 302 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 303 /* 304 * Confirm that the outgoing interface supports multicast. 305 */ 306 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 307 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 308 ipstat.ips_noroute++; 309 error = ENETUNREACH; 310 goto bad; 311 } 312 } 313 /* 314 * If source address not specified yet, use address 315 * of outgoing interface. 316 */ 317 if (ip->ip_src.s_addr == INADDR_ANY) { 318 register struct in_ifaddr *ia1; 319 320 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) 321 if (ia1->ia_ifp == ifp) { 322 ip->ip_src = IA_SIN(ia1)->sin_addr; 323 break; 324 } 325 } 326 327 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 328 if (inm != NULL && 329 (imo == NULL || imo->imo_multicast_loop)) { 330 /* 331 * If we belong to the destination multicast group 332 * on the outgoing interface, and the caller did not 333 * forbid loopback, loop back a copy. 334 */ 335 ip_mloopback(ifp, m, dst, hlen); 336 } 337 else { 338 /* 339 * If we are acting as a multicast router, perform 340 * multicast forwarding as if the packet had just 341 * arrived on the interface to which we are about 342 * to send. The multicast forwarding function 343 * recursively calls this function, using the 344 * IP_FORWARDING flag to prevent infinite recursion. 345 * 346 * Multicasts that are looped back by ip_mloopback(), 347 * above, will be forwarded by the ip_input() routine, 348 * if necessary. 349 */ 350 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 351 /* 352 * Check if rsvp daemon is running. If not, don't 353 * set ip_moptions. This ensures that the packet 354 * is multicast and not just sent down one link 355 * as prescribed by rsvpd. 356 */ 357 if (!rsvp_on) 358 imo = NULL; 359 if (ip_mforward(ip, ifp, m, imo) != 0) { 360 m_freem(m); 361 goto done; 362 } 363 } 364 } 365 366 /* 367 * Multicasts with a time-to-live of zero may be looped- 368 * back, above, but must not be transmitted on a network. 369 * Also, multicasts addressed to the loopback interface 370 * are not sent -- the above call to ip_mloopback() will 371 * loop back a copy if this host actually belongs to the 372 * destination group on the loopback interface. 373 */ 374 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 375 m_freem(m); 376 goto done; 377 } 378 379 goto sendit; 380 } 381#ifndef notdef 382 /* 383 * If source address not specified yet, use address 384 * of outgoing interface. 385 */ 386 if (ip->ip_src.s_addr == INADDR_ANY) { 387 ip->ip_src = IA_SIN(ia)->sin_addr; 388#ifdef IPFIREWALL_FORWARD 389 /* Keep note that we did this - if the firewall changes 390 * the next-hop, our interface may change, changing the 391 * default source IP. It's a shame so much effort happens 392 * twice. Oh well. 393 */ 394 fwd_rewrite_src++; 395#endif /* IPFIREWALL_FORWARD */ 396 } 397#endif /* notdef */ 398 /* 399 * Verify that we have any chance at all of being able to queue 400 * the packet or packet fragments 401 */ 402 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 403 ifp->if_snd.ifq_maxlen) { 404 error = ENOBUFS; 405 goto bad; 406 } 407 408 /* 409 * Look for broadcast address and 410 * and verify user is allowed to send 411 * such a packet. 412 */ 413 if (isbroadcast) { 414 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 415 error = EADDRNOTAVAIL; 416 goto bad; 417 } 418 if ((flags & IP_ALLOWBROADCAST) == 0) { 419 error = EACCES; 420 goto bad; 421 } 422 /* don't allow broadcast messages to be fragmented */ 423 if ((u_short)ip->ip_len > ifp->if_mtu) { 424 error = EMSGSIZE; 425 goto bad; 426 } 427 m->m_flags |= M_BCAST; 428 } else { 429 m->m_flags &= ~M_BCAST; 430 } 431 432sendit: 433 /* 434 * IpHack's section. 435 * - Xlate: translate packet's addr/port (NAT). 436 * - Firewall: deny/allow/etc. 437 * - Wrap: fake packet's addr/port <unimpl.> 438 * - Encapsulate: put it in another IP and send out. <unimp.> 439 */ 440#ifdef PFIL_HOOKS 441 /* 442 * Run through list of hooks for output packets. 443 */ 444 m1 = m; 445 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); 446 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) 447 if (pfh->pfil_func) { 448 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1); 449 if (rv) { 450 error = EHOSTUNREACH; 451 goto done; 452 } 453 m = m1; 454 if (m == NULL) 455 goto done; 456 ip = mtod(m, struct ip *); 457 } 458#endif /* PFIL_HOOKS */ 459 460 /* 461 * Check with the firewall... 462 */ 463 if (fw_enable && ip_fw_chk_ptr) { 464 struct sockaddr_in *old = dst; 465 466 off = (*ip_fw_chk_ptr)(&ip, 467 hlen, ifp, &divert_cookie, &m, &rule, &dst); 468 /* 469 * On return we must do the following: 470 * m == NULL -> drop the pkt (old interface, deprecated) 471 * (off & 0x40000) -> drop the pkt (new interface) 472 * 1<=off<= 0xffff -> DIVERT 473 * (off & 0x10000) -> send to a DUMMYNET pipe 474 * (off & 0x20000) -> TEE the packet 475 * dst != old -> IPFIREWALL_FORWARD 476 * off==0, dst==old -> accept 477 * If some of the above modules is not compiled in, then 478 * we should't have to check the corresponding condition 479 * (because the ipfw control socket should not accept 480 * unsupported rules), but better play safe and drop 481 * packets in case of doubt. 482 */ 483 if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */ 484 if (m) 485 m_freem(m); 486 error = EACCES ; 487 goto done; 488 } 489 if (!m) { /* firewall said to reject */ 490 static int __debug=10; 491 if (__debug >0) { 492 printf("firewall returns NULL, please update!\n"); 493 __debug-- ; 494 } 495 error = EACCES; 496 goto done; 497 } 498 if (off == 0 && dst == old) /* common case */ 499 goto pass ; 500#ifdef DUMMYNET 501 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) { 502 /* 503 * pass the pkt to dummynet. Need to include 504 * pipe number, m, ifp, ro, dst because these are 505 * not recomputed in the next pass. 506 * All other parameters have been already used and 507 * so they are not needed anymore. 508 * XXX note: if the ifp or ro entry are deleted 509 * while a pkt is in dummynet, we are in trouble! 510 */ 511 error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m, 512 ifp,ro,dst,rule, flags); 513 goto done; 514 } 515#endif 516#ifdef IPDIVERT 517 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 518 struct mbuf *clone = NULL; 519 520 /* Clone packet if we're doing a 'tee' */ 521 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 522 clone = m_dup(m, M_DONTWAIT); 523 524 /* 525 * XXX 526 * delayed checksums are not currently compatible 527 * with divert sockets. 528 */ 529 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 530 in_delayed_cksum(m); 531 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 532 } 533 534 /* Restore packet header fields to original values */ 535 HTONS(ip->ip_len); 536 HTONS(ip->ip_off); 537 538 /* Deliver packet to divert input routine */ 539 ip_divert_cookie = divert_cookie; 540 divert_packet(m, 0, off & 0xffff); 541 542 /* If 'tee', continue with original packet */ 543 if (clone != NULL) { 544 m = clone; 545 ip = mtod(m, struct ip *); 546 goto pass; 547 } 548 goto done; 549 } 550#endif 551 552#ifdef IPFIREWALL_FORWARD 553 /* Here we check dst to make sure it's directly reachable on the 554 * interface we previously thought it was. 555 * If it isn't (which may be likely in some situations) we have 556 * to re-route it (ie, find a route for the next-hop and the 557 * associated interface) and set them here. This is nested 558 * forwarding which in most cases is undesirable, except where 559 * such control is nigh impossible. So we do it here. 560 * And I'm babbling. 561 */ 562 if (off == 0 && old != dst) { 563 struct in_ifaddr *ia; 564 565 /* It's changed... */ 566 /* There must be a better way to do this next line... */ 567 static struct route sro_fwd, *ro_fwd = &sro_fwd; 568#ifdef IPFIREWALL_FORWARD_DEBUG 569 printf("IPFIREWALL_FORWARD: New dst ip: "); 570 print_ip(dst->sin_addr); 571 printf("\n"); 572#endif 573 /* 574 * We need to figure out if we have been forwarded 575 * to a local socket. If so then we should somehow 576 * "loop back" to ip_input, and get directed to the 577 * PCB as if we had received this packet. This is 578 * because it may be dificult to identify the packets 579 * you want to forward until they are being output 580 * and have selected an interface. (e.g. locally 581 * initiated packets) If we used the loopback inteface, 582 * we would not be able to control what happens 583 * as the packet runs through ip_input() as 584 * it is done through a ISR. 585 */ 586 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 587 /* 588 * If the addr to forward to is one 589 * of ours, we pretend to 590 * be the destination for this packet. 591 */ 592 if (IA_SIN(ia)->sin_addr.s_addr == 593 dst->sin_addr.s_addr) 594 break; 595 } 596 if (ia) { 597 /* tell ip_input "dont filter" */ 598 ip_fw_fwd_addr = dst; 599 if (m->m_pkthdr.rcvif == NULL) 600 m->m_pkthdr.rcvif = ifunit("lo0"); 601 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 602 m->m_pkthdr.csum_flags |= 603 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 604 m0->m_pkthdr.csum_data = 0xffff; 605 } 606 m->m_pkthdr.csum_flags |= 607 CSUM_IP_CHECKED | CSUM_IP_VALID; 608 HTONS(ip->ip_len); 609 HTONS(ip->ip_off); 610 ip_input(m); 611 goto done; 612 } 613 /* Some of the logic for this was 614 * nicked from above. 615 * 616 * This rewrites the cached route in a local PCB. 617 * Is this what we want to do? 618 */ 619 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 620 621 ro_fwd->ro_rt = 0; 622 rtalloc_ign(ro_fwd, RTF_PRCLONING); 623 624 if (ro_fwd->ro_rt == 0) { 625 ipstat.ips_noroute++; 626 error = EHOSTUNREACH; 627 goto bad; 628 } 629 630 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 631 ifp = ro_fwd->ro_rt->rt_ifp; 632 ro_fwd->ro_rt->rt_use++; 633 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 634 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway; 635 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 636 isbroadcast = 637 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 638 else 639 isbroadcast = in_broadcast(dst->sin_addr, ifp); 640 RTFREE(ro->ro_rt); 641 ro->ro_rt = ro_fwd->ro_rt; 642 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 643 644 /* 645 * If we added a default src ip earlier, 646 * which would have been gotten from the-then 647 * interface, do it again, from the new one. 648 */ 649 if (fwd_rewrite_src) 650 ip->ip_src = IA_SIN(ia)->sin_addr; 651 goto pass ; 652 } 653#endif /* IPFIREWALL_FORWARD */ 654 /* 655 * if we get here, none of the above matches, and 656 * we have to drop the pkt 657 */ 658 m_freem(m); 659 error = EACCES; /* not sure this is the right error msg */ 660 goto done; 661 } 662 663pass: 664#ifdef IPSEC 665 /* get SP for this packet */ 666 if (so == NULL) 667 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 668 else 669 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 670 671 if (sp == NULL) { 672 ipsecstat.out_inval++; 673 goto bad; 674 } 675 676 error = 0; 677 678 /* check policy */ 679 switch (sp->policy) { 680 case IPSEC_POLICY_DISCARD: 681 /* 682 * This packet is just discarded. 683 */ 684 ipsecstat.out_polvio++; 685 goto bad; 686 687 case IPSEC_POLICY_BYPASS: 688 case IPSEC_POLICY_NONE: 689 /* no need to do IPsec. */ 690 goto skip_ipsec; 691 692 case IPSEC_POLICY_IPSEC: 693 if (sp->req == NULL) { 694 /* XXX should be panic ? */ 695 printf("ip_output: No IPsec request specified.\n"); 696 error = EINVAL; 697 goto bad; 698 } 699 break; 700 701 case IPSEC_POLICY_ENTRUST: 702 default: 703 printf("ip_output: Invalid policy found. %d\n", sp->policy); 704 } 705 { 706 struct ipsec_output_state state; 707 bzero(&state, sizeof(state)); 708 state.m = m; 709 if (flags & IP_ROUTETOIF) { 710 state.ro = &iproute; 711 bzero(&iproute, sizeof(iproute)); 712 } else 713 state.ro = ro; 714 state.dst = (struct sockaddr *)dst; 715 716 ip->ip_sum = 0; 717 718 /* 719 * XXX 720 * delayed checksums are not currently compatible with IPsec 721 */ 722 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 723 in_delayed_cksum(m); 724 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 725 } 726 727 HTONS(ip->ip_len); 728 HTONS(ip->ip_off); 729 730 error = ipsec4_output(&state, sp, flags); 731 732 m = state.m; 733 if (flags & IP_ROUTETOIF) { 734 /* 735 * if we have tunnel mode SA, we may need to ignore 736 * IP_ROUTETOIF. 737 */ 738 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 739 flags &= ~IP_ROUTETOIF; 740 ro = state.ro; 741 } 742 } else 743 ro = state.ro; 744 dst = (struct sockaddr_in *)state.dst; 745 if (error) { 746 /* mbuf is already reclaimed in ipsec4_output. */ 747 m0 = NULL; 748 switch (error) { 749 case EHOSTUNREACH: 750 case ENETUNREACH: 751 case EMSGSIZE: 752 case ENOBUFS: 753 case ENOMEM: 754 break; 755 default: 756 printf("ip4_output (ipsec): error code %d\n", error); 757 /*fall through*/ 758 case ENOENT: 759 /* don't show these error codes to the user */ 760 error = 0; 761 break; 762 } 763 goto bad; 764 } 765 } 766 767 /* be sure to update variables that are affected by ipsec4_output() */ 768 ip = mtod(m, struct ip *); 769#ifdef _IP_VHL 770 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 771#else 772 hlen = ip->ip_hl << 2; 773#endif 774 if (ro->ro_rt == NULL) { 775 if ((flags & IP_ROUTETOIF) == 0) { 776 printf("ip_output: " 777 "can't update route after IPsec processing\n"); 778 error = EHOSTUNREACH; /*XXX*/ 779 goto bad; 780 } 781 } else { 782 ia = ifatoia(ro->ro_rt->rt_ifa); 783 ifp = ro->ro_rt->rt_ifp; 784 } 785 786 /* make it flipped, again. */ 787 NTOHS(ip->ip_len); 788 NTOHS(ip->ip_off); 789skip_ipsec: 790#endif /*IPSEC*/ 791 792 m->m_pkthdr.csum_flags |= CSUM_IP; 793 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 794 if (sw_csum & CSUM_DELAY_DATA) { 795 in_delayed_cksum(m); 796 sw_csum &= ~CSUM_DELAY_DATA; 797 } 798 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 799 800 /* 801 * If small enough for interface, or the interface will take 802 * care of the fragmentation for us, can just send directly. 803 */ 804 if ((u_short)ip->ip_len <= ifp->if_mtu || 805 ifp->if_hwassist & CSUM_FRAGMENT) { 806 HTONS(ip->ip_len); 807 HTONS(ip->ip_off); 808 ip->ip_sum = 0; 809 if (sw_csum & CSUM_DELAY_IP) { 810 if (ip->ip_vhl == IP_VHL_BORING) { 811 ip->ip_sum = in_cksum_hdr(ip); 812 } else { 813 ip->ip_sum = in_cksum(m, hlen); 814 } 815 } 816 817 /* Record statistics for this interface address. */ 818 if (!(flags & IP_FORWARDING)) { 819 ia->ia_ifa.if_opackets++; 820 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 821 } 822 823 error = (*ifp->if_output)(ifp, m, 824 (struct sockaddr *)dst, ro->ro_rt); 825 goto done; 826 } 827 /* 828 * Too large for interface; fragment if possible. 829 * Must be able to put at least 8 bytes per fragment. 830 */ 831 if (ip->ip_off & IP_DF) { 832 error = EMSGSIZE; 833 /* 834 * This case can happen if the user changed the MTU 835 * of an interface after enabling IP on it. Because 836 * most netifs don't keep track of routes pointing to 837 * them, there is no way for one to update all its 838 * routes when the MTU is changed. 839 */ 840 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 841 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 842 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 843 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 844 } 845 ipstat.ips_cantfrag++; 846 goto bad; 847 } 848 len = (ifp->if_mtu - hlen) &~ 7; 849 if (len < 8) { 850 error = EMSGSIZE; 851 goto bad; 852 } 853 854 /* 855 * if the interface will not calculate checksums on 856 * fragmented packets, then do it here. 857 */ 858 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 859 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 860 in_delayed_cksum(m); 861 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 862 } 863 864 { 865 int mhlen, firstlen = len; 866 struct mbuf **mnext = &m->m_nextpkt; 867 int nfrags = 1; 868 869 /* 870 * Loop through length of segment after first fragment, 871 * make new header and copy data of each part and link onto chain. 872 */ 873 m0 = m; 874 mhlen = sizeof (struct ip); 875 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { 876 MGETHDR(m, M_DONTWAIT, MT_HEADER); 877 if (m == 0) { 878 error = ENOBUFS; 879 ipstat.ips_odropped++; 880 goto sendorfree; 881 } 882 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 883 m->m_data += max_linkhdr; 884 mhip = mtod(m, struct ip *); 885 *mhip = *ip; 886 if (hlen > sizeof (struct ip)) { 887 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 888 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 889 } 890 m->m_len = mhlen; 891 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 892 if (off + len >= (u_short)ip->ip_len) 893 len = (u_short)ip->ip_len - off; 894 else 895 mhip->ip_off |= IP_MF; 896 mhip->ip_len = htons((u_short)(len + mhlen)); 897 m->m_next = m_copy(m0, off, len); 898 if (m->m_next == 0) { 899 (void) m_free(m); 900 error = ENOBUFS; /* ??? */ 901 ipstat.ips_odropped++; 902 goto sendorfree; 903 } 904 m->m_pkthdr.len = mhlen + len; 905 m->m_pkthdr.rcvif = (struct ifnet *)0; 906 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 907 HTONS(mhip->ip_off); 908 mhip->ip_sum = 0; 909 if (sw_csum & CSUM_DELAY_IP) { 910 if (mhip->ip_vhl == IP_VHL_BORING) { 911 mhip->ip_sum = in_cksum_hdr(mhip); 912 } else { 913 mhip->ip_sum = in_cksum(m, mhlen); 914 } 915 } 916 *mnext = m; 917 mnext = &m->m_nextpkt; 918 nfrags++; 919 } 920 ipstat.ips_ofragments += nfrags; 921 922 /* set first/last markers for fragment chain */ 923 m->m_flags |= M_LASTFRAG; 924 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 925 m0->m_pkthdr.csum_data = nfrags; 926 927 /* 928 * Update first fragment by trimming what's been copied out 929 * and updating header, then send each fragment (in order). 930 */ 931 m = m0; 932 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 933 m->m_pkthdr.len = hlen + firstlen; 934 ip->ip_len = htons((u_short)m->m_pkthdr.len); 935 ip->ip_off |= IP_MF; 936 HTONS(ip->ip_off); 937 ip->ip_sum = 0; 938 if (sw_csum & CSUM_DELAY_IP) { 939 if (ip->ip_vhl == IP_VHL_BORING) { 940 ip->ip_sum = in_cksum_hdr(ip); 941 } else { 942 ip->ip_sum = in_cksum(m, hlen); 943 } 944 } 945sendorfree: 946 for (m = m0; m; m = m0) { 947 m0 = m->m_nextpkt; 948 m->m_nextpkt = 0; 949 if (error == 0) { 950 /* Record statistics for this interface address. */ 951 ia->ia_ifa.if_opackets++; 952 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 953 954 error = (*ifp->if_output)(ifp, m, 955 (struct sockaddr *)dst, ro->ro_rt); 956 } else 957 m_freem(m); 958 } 959 960 if (error == 0) 961 ipstat.ips_fragmented++; 962 } 963done: 964#ifdef IPSEC 965 if (ro == &iproute && ro->ro_rt) { 966 RTFREE(ro->ro_rt); 967 ro->ro_rt = NULL; 968 } 969 if (sp != NULL) { 970 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 971 printf("DP ip_output call free SP:%p\n", sp)); 972 key_freesp(sp); 973 } 974#endif /* IPSEC */ 975 return (error); 976bad: 977 m_freem(m0); 978 goto done; 979} 980 981void 982in_delayed_cksum(struct mbuf *m) 983{ 984 struct ip *ip; 985 u_short csum, offset; 986 987 ip = mtod(m, struct ip *); 988 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 989 csum = in_cksum_skip(m, ip->ip_len, offset); 990 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 991 csum = 0xffff; 992 offset += m->m_pkthdr.csum_data; /* checksum offset */ 993 994 if (offset + sizeof(u_short) > m->m_len) { 995 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 996 m->m_len, offset, ip->ip_p); 997 /* 998 * XXX 999 * this shouldn't happen, but if it does, the 1000 * correct behavior may be to insert the checksum 1001 * in the existing chain instead of rearranging it. 1002 */ 1003 m = m_pullup(m, offset + sizeof(u_short)); 1004 } 1005 *(u_short *)(m->m_data + offset) = csum; 1006} 1007 1008/* 1009 * Insert IP options into preformed packet. 1010 * Adjust IP destination as required for IP source routing, 1011 * as indicated by a non-zero in_addr at the start of the options. 1012 * 1013 * XXX This routine assumes that the packet has no options in place. 1014 */ 1015static struct mbuf * 1016ip_insertoptions(m, opt, phlen) 1017 register struct mbuf *m; 1018 struct mbuf *opt; 1019 int *phlen; 1020{ 1021 register struct ipoption *p = mtod(opt, struct ipoption *); 1022 struct mbuf *n; 1023 register struct ip *ip = mtod(m, struct ip *); 1024 unsigned optlen; 1025 1026 optlen = opt->m_len - sizeof(p->ipopt_dst); 1027 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) 1028 return (m); /* XXX should fail */ 1029 if (p->ipopt_dst.s_addr) 1030 ip->ip_dst = p->ipopt_dst; 1031 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1032 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1033 if (n == 0) 1034 return (m); 1035 n->m_pkthdr.rcvif = (struct ifnet *)0; 1036 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1037 m->m_len -= sizeof(struct ip); 1038 m->m_data += sizeof(struct ip); 1039 n->m_next = m; 1040 m = n; 1041 m->m_len = optlen + sizeof(struct ip); 1042 m->m_data += max_linkhdr; 1043 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1044 } else { 1045 m->m_data -= optlen; 1046 m->m_len += optlen; 1047 m->m_pkthdr.len += optlen; 1048 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1049 } 1050 ip = mtod(m, struct ip *); 1051 bcopy(p->ipopt_list, ip + 1, optlen); 1052 *phlen = sizeof(struct ip) + optlen; 1053 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1054 ip->ip_len += optlen; 1055 return (m); 1056} 1057 1058/* 1059 * Copy options from ip to jp, 1060 * omitting those not copied during fragmentation. 1061 */ 1062int 1063ip_optcopy(ip, jp) 1064 struct ip *ip, *jp; 1065{ 1066 register u_char *cp, *dp; 1067 int opt, optlen, cnt; 1068 1069 cp = (u_char *)(ip + 1); 1070 dp = (u_char *)(jp + 1); 1071 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 1072 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1073 opt = cp[0]; 1074 if (opt == IPOPT_EOL) 1075 break; 1076 if (opt == IPOPT_NOP) { 1077 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1078 *dp++ = IPOPT_NOP; 1079 optlen = 1; 1080 continue; 1081 } 1082#ifdef DIAGNOSTIC 1083 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1084 panic("malformed IPv4 option passed to ip_optcopy"); 1085#endif 1086 optlen = cp[IPOPT_OLEN]; 1087#ifdef DIAGNOSTIC 1088 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1089 panic("malformed IPv4 option passed to ip_optcopy"); 1090#endif 1091 /* bogus lengths should have been caught by ip_dooptions */ 1092 if (optlen > cnt) 1093 optlen = cnt; 1094 if (IPOPT_COPIED(opt)) { 1095 bcopy(cp, dp, optlen); 1096 dp += optlen; 1097 } 1098 } 1099 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1100 *dp++ = IPOPT_EOL; 1101 return (optlen); 1102} 1103 1104/* 1105 * IP socket option processing. 1106 */ 1107int 1108ip_ctloutput(so, sopt) 1109 struct socket *so; 1110 struct sockopt *sopt; 1111{ 1112 struct inpcb *inp = sotoinpcb(so); 1113 int error, optval; 1114 1115 error = optval = 0; 1116 if (sopt->sopt_level != IPPROTO_IP) { 1117 return (EINVAL); 1118 } 1119 1120 switch (sopt->sopt_dir) { 1121 case SOPT_SET: 1122 switch (sopt->sopt_name) { 1123 case IP_OPTIONS: 1124#ifdef notyet 1125 case IP_RETOPTS: 1126#endif 1127 { 1128 struct mbuf *m; 1129 if (sopt->sopt_valsize > MLEN) { 1130 error = EMSGSIZE; 1131 break; 1132 } 1133 MGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1134 if (m == 0) { 1135 error = ENOBUFS; 1136 break; 1137 } 1138 m->m_len = sopt->sopt_valsize; 1139 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1140 m->m_len); 1141 1142 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1143 m)); 1144 } 1145 1146 case IP_TOS: 1147 case IP_TTL: 1148 case IP_RECVOPTS: 1149 case IP_RECVRETOPTS: 1150 case IP_RECVDSTADDR: 1151 case IP_RECVIF: 1152#if defined(NFAITH) && NFAITH > 0 1153 case IP_FAITH: 1154#endif 1155 error = sooptcopyin(sopt, &optval, sizeof optval, 1156 sizeof optval); 1157 if (error) 1158 break; 1159 1160 switch (sopt->sopt_name) { 1161 case IP_TOS: 1162 inp->inp_ip_tos = optval; 1163 break; 1164 1165 case IP_TTL: 1166 inp->inp_ip_ttl = optval; 1167 break; 1168#define OPTSET(bit) \ 1169 if (optval) \ 1170 inp->inp_flags |= bit; \ 1171 else \ 1172 inp->inp_flags &= ~bit; 1173 1174 case IP_RECVOPTS: 1175 OPTSET(INP_RECVOPTS); 1176 break; 1177 1178 case IP_RECVRETOPTS: 1179 OPTSET(INP_RECVRETOPTS); 1180 break; 1181 1182 case IP_RECVDSTADDR: 1183 OPTSET(INP_RECVDSTADDR); 1184 break; 1185 1186 case IP_RECVIF: 1187 OPTSET(INP_RECVIF); 1188 break; 1189 1190#if defined(NFAITH) && NFAITH > 0 1191 case IP_FAITH: 1192 OPTSET(INP_FAITH); 1193 break; 1194#endif 1195 } 1196 break; 1197#undef OPTSET 1198 1199 case IP_MULTICAST_IF: 1200 case IP_MULTICAST_VIF: 1201 case IP_MULTICAST_TTL: 1202 case IP_MULTICAST_LOOP: 1203 case IP_ADD_MEMBERSHIP: 1204 case IP_DROP_MEMBERSHIP: 1205 error = ip_setmoptions(sopt, &inp->inp_moptions); 1206 break; 1207 1208 case IP_PORTRANGE: 1209 error = sooptcopyin(sopt, &optval, sizeof optval, 1210 sizeof optval); 1211 if (error) 1212 break; 1213 1214 switch (optval) { 1215 case IP_PORTRANGE_DEFAULT: 1216 inp->inp_flags &= ~(INP_LOWPORT); 1217 inp->inp_flags &= ~(INP_HIGHPORT); 1218 break; 1219 1220 case IP_PORTRANGE_HIGH: 1221 inp->inp_flags &= ~(INP_LOWPORT); 1222 inp->inp_flags |= INP_HIGHPORT; 1223 break; 1224 1225 case IP_PORTRANGE_LOW: 1226 inp->inp_flags &= ~(INP_HIGHPORT); 1227 inp->inp_flags |= INP_LOWPORT; 1228 break; 1229 1230 default: 1231 error = EINVAL; 1232 break; 1233 } 1234 break; 1235 1236#ifdef IPSEC 1237 case IP_IPSEC_POLICY: 1238 { 1239 caddr_t req; 1240 size_t len = 0; 1241 int priv; 1242 struct mbuf *m; 1243 int optname; 1244 1245 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1246 break; 1247 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1248 break; 1249 priv = (sopt->sopt_p != NULL && 1250 suser(sopt->sopt_p) != 0) ? 0 : 1; 1251 req = mtod(m, caddr_t); 1252 len = m->m_len; 1253 optname = sopt->sopt_name; 1254 error = ipsec4_set_policy(inp, optname, req, len, priv); 1255 m_freem(m); 1256 break; 1257 } 1258#endif /*IPSEC*/ 1259 1260 default: 1261 error = ENOPROTOOPT; 1262 break; 1263 } 1264 break; 1265 1266 case SOPT_GET: 1267 switch (sopt->sopt_name) { 1268 case IP_OPTIONS: 1269 case IP_RETOPTS: 1270 if (inp->inp_options) 1271 error = sooptcopyout(sopt, 1272 mtod(inp->inp_options, 1273 char *), 1274 inp->inp_options->m_len); 1275 else 1276 sopt->sopt_valsize = 0; 1277 break; 1278 1279 case IP_TOS: 1280 case IP_TTL: 1281 case IP_RECVOPTS: 1282 case IP_RECVRETOPTS: 1283 case IP_RECVDSTADDR: 1284 case IP_RECVIF: 1285 case IP_PORTRANGE: 1286#if defined(NFAITH) && NFAITH > 0 1287 case IP_FAITH: 1288#endif 1289 switch (sopt->sopt_name) { 1290 1291 case IP_TOS: 1292 optval = inp->inp_ip_tos; 1293 break; 1294 1295 case IP_TTL: 1296 optval = inp->inp_ip_ttl; 1297 break; 1298 1299#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1300 1301 case IP_RECVOPTS: 1302 optval = OPTBIT(INP_RECVOPTS); 1303 break; 1304 1305 case IP_RECVRETOPTS: 1306 optval = OPTBIT(INP_RECVRETOPTS); 1307 break; 1308 1309 case IP_RECVDSTADDR: 1310 optval = OPTBIT(INP_RECVDSTADDR); 1311 break; 1312 1313 case IP_RECVIF: 1314 optval = OPTBIT(INP_RECVIF); 1315 break; 1316 1317 case IP_PORTRANGE: 1318 if (inp->inp_flags & INP_HIGHPORT) 1319 optval = IP_PORTRANGE_HIGH; 1320 else if (inp->inp_flags & INP_LOWPORT) 1321 optval = IP_PORTRANGE_LOW; 1322 else 1323 optval = 0; 1324 break; 1325 1326#if defined(NFAITH) && NFAITH > 0 1327 case IP_FAITH: 1328 optval = OPTBIT(INP_FAITH); 1329 break; 1330#endif 1331 } 1332 error = sooptcopyout(sopt, &optval, sizeof optval); 1333 break; 1334 1335 case IP_MULTICAST_IF: 1336 case IP_MULTICAST_VIF: 1337 case IP_MULTICAST_TTL: 1338 case IP_MULTICAST_LOOP: 1339 case IP_ADD_MEMBERSHIP: 1340 case IP_DROP_MEMBERSHIP: 1341 error = ip_getmoptions(sopt, inp->inp_moptions); 1342 break; 1343 1344#ifdef IPSEC 1345 case IP_IPSEC_POLICY: 1346 { 1347 struct mbuf *m = NULL; 1348 caddr_t req = NULL; 1349 size_t len = 0; 1350 1351 if (m != 0) { 1352 req = mtod(m, caddr_t); 1353 len = m->m_len; 1354 } 1355 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1356 if (error == 0) 1357 error = soopt_mcopyout(sopt, m); /* XXX */ 1358 if (error == 0) 1359 m_freem(m); 1360 break; 1361 } 1362#endif /*IPSEC*/ 1363 1364 default: 1365 error = ENOPROTOOPT; 1366 break; 1367 } 1368 break; 1369 } 1370 return (error); 1371} 1372 1373/* 1374 * Set up IP options in pcb for insertion in output packets. 1375 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1376 * with destination address if source routed. 1377 */ 1378static int 1379ip_pcbopts(optname, pcbopt, m) 1380 int optname; 1381 struct mbuf **pcbopt; 1382 register struct mbuf *m; 1383{ 1384 register int cnt, optlen; 1385 register u_char *cp; 1386 u_char opt; 1387 1388 /* turn off any old options */ 1389 if (*pcbopt) 1390 (void)m_free(*pcbopt); 1391 *pcbopt = 0; 1392 if (m == (struct mbuf *)0 || m->m_len == 0) { 1393 /* 1394 * Only turning off any previous options. 1395 */ 1396 if (m) 1397 (void)m_free(m); 1398 return (0); 1399 } 1400 1401 if (m->m_len % sizeof(int32_t)) 1402 goto bad; 1403 /* 1404 * IP first-hop destination address will be stored before 1405 * actual options; move other options back 1406 * and clear it when none present. 1407 */ 1408 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1409 goto bad; 1410 cnt = m->m_len; 1411 m->m_len += sizeof(struct in_addr); 1412 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1413 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1414 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1415 1416 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1417 opt = cp[IPOPT_OPTVAL]; 1418 if (opt == IPOPT_EOL) 1419 break; 1420 if (opt == IPOPT_NOP) 1421 optlen = 1; 1422 else { 1423 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1424 goto bad; 1425 optlen = cp[IPOPT_OLEN]; 1426 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1427 goto bad; 1428 } 1429 switch (opt) { 1430 1431 default: 1432 break; 1433 1434 case IPOPT_LSRR: 1435 case IPOPT_SSRR: 1436 /* 1437 * user process specifies route as: 1438 * ->A->B->C->D 1439 * D must be our final destination (but we can't 1440 * check that since we may not have connected yet). 1441 * A is first hop destination, which doesn't appear in 1442 * actual IP option, but is stored before the options. 1443 */ 1444 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1445 goto bad; 1446 m->m_len -= sizeof(struct in_addr); 1447 cnt -= sizeof(struct in_addr); 1448 optlen -= sizeof(struct in_addr); 1449 cp[IPOPT_OLEN] = optlen; 1450 /* 1451 * Move first hop before start of options. 1452 */ 1453 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1454 sizeof(struct in_addr)); 1455 /* 1456 * Then copy rest of options back 1457 * to close up the deleted entry. 1458 */ 1459 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1460 sizeof(struct in_addr)), 1461 (caddr_t)&cp[IPOPT_OFFSET+1], 1462 (unsigned)cnt + sizeof(struct in_addr)); 1463 break; 1464 } 1465 } 1466 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1467 goto bad; 1468 *pcbopt = m; 1469 return (0); 1470 1471bad: 1472 (void)m_free(m); 1473 return (EINVAL); 1474} 1475 1476/* 1477 * XXX 1478 * The whole multicast option thing needs to be re-thought. 1479 * Several of these options are equally applicable to non-multicast 1480 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1481 * standard option (IP_TTL). 1482 */ 1483/* 1484 * Set the IP multicast options in response to user setsockopt(). 1485 */ 1486static int 1487ip_setmoptions(sopt, imop) 1488 struct sockopt *sopt; 1489 struct ip_moptions **imop; 1490{ 1491 int error = 0; 1492 int i; 1493 struct in_addr addr; 1494 struct ip_mreq mreq; 1495 struct ifnet *ifp; 1496 struct ip_moptions *imo = *imop; 1497 struct route ro; 1498 struct sockaddr_in *dst; 1499 int s; 1500 1501 if (imo == NULL) { 1502 /* 1503 * No multicast option buffer attached to the pcb; 1504 * allocate one and initialize to default values. 1505 */ 1506 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1507 M_WAITOK); 1508 1509 if (imo == NULL) 1510 return (ENOBUFS); 1511 *imop = imo; 1512 imo->imo_multicast_ifp = NULL; 1513 imo->imo_multicast_vif = -1; 1514 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1515 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1516 imo->imo_num_memberships = 0; 1517 } 1518 1519 switch (sopt->sopt_name) { 1520 /* store an index number for the vif you wanna use in the send */ 1521 case IP_MULTICAST_VIF: 1522 if (legal_vif_num == 0) { 1523 error = EOPNOTSUPP; 1524 break; 1525 } 1526 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1527 if (error) 1528 break; 1529 if (!legal_vif_num(i) && (i != -1)) { 1530 error = EINVAL; 1531 break; 1532 } 1533 imo->imo_multicast_vif = i; 1534 break; 1535 1536 case IP_MULTICAST_IF: 1537 /* 1538 * Select the interface for outgoing multicast packets. 1539 */ 1540 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1541 if (error) 1542 break; 1543 /* 1544 * INADDR_ANY is used to remove a previous selection. 1545 * When no interface is selected, a default one is 1546 * chosen every time a multicast packet is sent. 1547 */ 1548 if (addr.s_addr == INADDR_ANY) { 1549 imo->imo_multicast_ifp = NULL; 1550 break; 1551 } 1552 /* 1553 * The selected interface is identified by its local 1554 * IP address. Find the interface and confirm that 1555 * it supports multicasting. 1556 */ 1557 s = splimp(); 1558 INADDR_TO_IFP(addr, ifp); 1559 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1560 splx(s); 1561 error = EADDRNOTAVAIL; 1562 break; 1563 } 1564 imo->imo_multicast_ifp = ifp; 1565 splx(s); 1566 break; 1567 1568 case IP_MULTICAST_TTL: 1569 /* 1570 * Set the IP time-to-live for outgoing multicast packets. 1571 * The original multicast API required a char argument, 1572 * which is inconsistent with the rest of the socket API. 1573 * We allow either a char or an int. 1574 */ 1575 if (sopt->sopt_valsize == 1) { 1576 u_char ttl; 1577 error = sooptcopyin(sopt, &ttl, 1, 1); 1578 if (error) 1579 break; 1580 imo->imo_multicast_ttl = ttl; 1581 } else { 1582 u_int ttl; 1583 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1584 sizeof ttl); 1585 if (error) 1586 break; 1587 if (ttl > 255) 1588 error = EINVAL; 1589 else 1590 imo->imo_multicast_ttl = ttl; 1591 } 1592 break; 1593 1594 case IP_MULTICAST_LOOP: 1595 /* 1596 * Set the loopback flag for outgoing multicast packets. 1597 * Must be zero or one. The original multicast API required a 1598 * char argument, which is inconsistent with the rest 1599 * of the socket API. We allow either a char or an int. 1600 */ 1601 if (sopt->sopt_valsize == 1) { 1602 u_char loop; 1603 error = sooptcopyin(sopt, &loop, 1, 1); 1604 if (error) 1605 break; 1606 imo->imo_multicast_loop = !!loop; 1607 } else { 1608 u_int loop; 1609 error = sooptcopyin(sopt, &loop, sizeof loop, 1610 sizeof loop); 1611 if (error) 1612 break; 1613 imo->imo_multicast_loop = !!loop; 1614 } 1615 break; 1616 1617 case IP_ADD_MEMBERSHIP: 1618 /* 1619 * Add a multicast group membership. 1620 * Group must be a valid IP multicast address. 1621 */ 1622 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1623 if (error) 1624 break; 1625 1626 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1627 error = EINVAL; 1628 break; 1629 } 1630 s = splimp(); 1631 /* 1632 * If no interface address was provided, use the interface of 1633 * the route to the given multicast address. 1634 */ 1635 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1636 bzero((caddr_t)&ro, sizeof(ro)); 1637 dst = (struct sockaddr_in *)&ro.ro_dst; 1638 dst->sin_len = sizeof(*dst); 1639 dst->sin_family = AF_INET; 1640 dst->sin_addr = mreq.imr_multiaddr; 1641 rtalloc(&ro); 1642 if (ro.ro_rt == NULL) { 1643 error = EADDRNOTAVAIL; 1644 splx(s); 1645 break; 1646 } 1647 ifp = ro.ro_rt->rt_ifp; 1648 rtfree(ro.ro_rt); 1649 } 1650 else { 1651 INADDR_TO_IFP(mreq.imr_interface, ifp); 1652 } 1653 1654 /* 1655 * See if we found an interface, and confirm that it 1656 * supports multicast. 1657 */ 1658 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1659 error = EADDRNOTAVAIL; 1660 splx(s); 1661 break; 1662 } 1663 /* 1664 * See if the membership already exists or if all the 1665 * membership slots are full. 1666 */ 1667 for (i = 0; i < imo->imo_num_memberships; ++i) { 1668 if (imo->imo_membership[i]->inm_ifp == ifp && 1669 imo->imo_membership[i]->inm_addr.s_addr 1670 == mreq.imr_multiaddr.s_addr) 1671 break; 1672 } 1673 if (i < imo->imo_num_memberships) { 1674 error = EADDRINUSE; 1675 splx(s); 1676 break; 1677 } 1678 if (i == IP_MAX_MEMBERSHIPS) { 1679 error = ETOOMANYREFS; 1680 splx(s); 1681 break; 1682 } 1683 /* 1684 * Everything looks good; add a new record to the multicast 1685 * address list for the given interface. 1686 */ 1687 if ((imo->imo_membership[i] = 1688 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1689 error = ENOBUFS; 1690 splx(s); 1691 break; 1692 } 1693 ++imo->imo_num_memberships; 1694 splx(s); 1695 break; 1696 1697 case IP_DROP_MEMBERSHIP: 1698 /* 1699 * Drop a multicast group membership. 1700 * Group must be a valid IP multicast address. 1701 */ 1702 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1703 if (error) 1704 break; 1705 1706 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1707 error = EINVAL; 1708 break; 1709 } 1710 1711 s = splimp(); 1712 /* 1713 * If an interface address was specified, get a pointer 1714 * to its ifnet structure. 1715 */ 1716 if (mreq.imr_interface.s_addr == INADDR_ANY) 1717 ifp = NULL; 1718 else { 1719 INADDR_TO_IFP(mreq.imr_interface, ifp); 1720 if (ifp == NULL) { 1721 error = EADDRNOTAVAIL; 1722 splx(s); 1723 break; 1724 } 1725 } 1726 /* 1727 * Find the membership in the membership array. 1728 */ 1729 for (i = 0; i < imo->imo_num_memberships; ++i) { 1730 if ((ifp == NULL || 1731 imo->imo_membership[i]->inm_ifp == ifp) && 1732 imo->imo_membership[i]->inm_addr.s_addr == 1733 mreq.imr_multiaddr.s_addr) 1734 break; 1735 } 1736 if (i == imo->imo_num_memberships) { 1737 error = EADDRNOTAVAIL; 1738 splx(s); 1739 break; 1740 } 1741 /* 1742 * Give up the multicast address record to which the 1743 * membership points. 1744 */ 1745 in_delmulti(imo->imo_membership[i]); 1746 /* 1747 * Remove the gap in the membership array. 1748 */ 1749 for (++i; i < imo->imo_num_memberships; ++i) 1750 imo->imo_membership[i-1] = imo->imo_membership[i]; 1751 --imo->imo_num_memberships; 1752 splx(s); 1753 break; 1754 1755 default: 1756 error = EOPNOTSUPP; 1757 break; 1758 } 1759 1760 /* 1761 * If all options have default values, no need to keep the mbuf. 1762 */ 1763 if (imo->imo_multicast_ifp == NULL && 1764 imo->imo_multicast_vif == -1 && 1765 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1766 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1767 imo->imo_num_memberships == 0) { 1768 free(*imop, M_IPMOPTS); 1769 *imop = NULL; 1770 } 1771 1772 return (error); 1773} 1774 1775/* 1776 * Return the IP multicast options in response to user getsockopt(). 1777 */ 1778static int 1779ip_getmoptions(sopt, imo) 1780 struct sockopt *sopt; 1781 register struct ip_moptions *imo; 1782{ 1783 struct in_addr addr; 1784 struct in_ifaddr *ia; 1785 int error, optval; 1786 u_char coptval; 1787 1788 error = 0; 1789 switch (sopt->sopt_name) { 1790 case IP_MULTICAST_VIF: 1791 if (imo != NULL) 1792 optval = imo->imo_multicast_vif; 1793 else 1794 optval = -1; 1795 error = sooptcopyout(sopt, &optval, sizeof optval); 1796 break; 1797 1798 case IP_MULTICAST_IF: 1799 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1800 addr.s_addr = INADDR_ANY; 1801 else { 1802 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1803 addr.s_addr = (ia == NULL) ? INADDR_ANY 1804 : IA_SIN(ia)->sin_addr.s_addr; 1805 } 1806 error = sooptcopyout(sopt, &addr, sizeof addr); 1807 break; 1808 1809 case IP_MULTICAST_TTL: 1810 if (imo == 0) 1811 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1812 else 1813 optval = coptval = imo->imo_multicast_ttl; 1814 if (sopt->sopt_valsize == 1) 1815 error = sooptcopyout(sopt, &coptval, 1); 1816 else 1817 error = sooptcopyout(sopt, &optval, sizeof optval); 1818 break; 1819 1820 case IP_MULTICAST_LOOP: 1821 if (imo == 0) 1822 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1823 else 1824 optval = coptval = imo->imo_multicast_loop; 1825 if (sopt->sopt_valsize == 1) 1826 error = sooptcopyout(sopt, &coptval, 1); 1827 else 1828 error = sooptcopyout(sopt, &optval, sizeof optval); 1829 break; 1830 1831 default: 1832 error = ENOPROTOOPT; 1833 break; 1834 } 1835 return (error); 1836} 1837 1838/* 1839 * Discard the IP multicast options. 1840 */ 1841void 1842ip_freemoptions(imo) 1843 register struct ip_moptions *imo; 1844{ 1845 register int i; 1846 1847 if (imo != NULL) { 1848 for (i = 0; i < imo->imo_num_memberships; ++i) 1849 in_delmulti(imo->imo_membership[i]); 1850 free(imo, M_IPMOPTS); 1851 } 1852} 1853 1854/* 1855 * Routine called from ip_output() to loop back a copy of an IP multicast 1856 * packet to the input queue of a specified interface. Note that this 1857 * calls the output routine of the loopback "driver", but with an interface 1858 * pointer that might NOT be a loopback interface -- evil, but easier than 1859 * replicating that code here. 1860 */ 1861static void 1862ip_mloopback(ifp, m, dst, hlen) 1863 struct ifnet *ifp; 1864 register struct mbuf *m; 1865 register struct sockaddr_in *dst; 1866 int hlen; 1867{ 1868 register struct ip *ip; 1869 struct mbuf *copym; 1870 1871 copym = m_copy(m, 0, M_COPYALL); 1872 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1873 copym = m_pullup(copym, hlen); 1874 if (copym != NULL) { 1875 /* 1876 * We don't bother to fragment if the IP length is greater 1877 * than the interface's MTU. Can this possibly matter? 1878 */ 1879 ip = mtod(copym, struct ip *); 1880 HTONS(ip->ip_len); 1881 HTONS(ip->ip_off); 1882 ip->ip_sum = 0; 1883 if (ip->ip_vhl == IP_VHL_BORING) { 1884 ip->ip_sum = in_cksum_hdr(ip); 1885 } else { 1886 ip->ip_sum = in_cksum(copym, hlen); 1887 } 1888 /* 1889 * NB: 1890 * It's not clear whether there are any lingering 1891 * reentrancy problems in other areas which might 1892 * be exposed by using ip_input directly (in 1893 * particular, everything which modifies the packet 1894 * in-place). Yet another option is using the 1895 * protosw directly to deliver the looped back 1896 * packet. For the moment, we'll err on the side 1897 * of safety by using if_simloop(). 1898 */ 1899#if 1 /* XXX */ 1900 if (dst->sin_family != AF_INET) { 1901 printf("ip_mloopback: bad address family %d\n", 1902 dst->sin_family); 1903 dst->sin_family = AF_INET; 1904 } 1905#endif 1906 1907#ifdef notdef 1908 copym->m_pkthdr.rcvif = ifp; 1909 ip_input(copym); 1910#else 1911 /* if the checksum hasn't been computed, mark it as valid */ 1912 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1913 copym->m_pkthdr.csum_flags |= 1914 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1915 copym->m_pkthdr.csum_data = 0xffff; 1916 } 1917 if_simloop(ifp, copym, dst->sin_family, 0); 1918#endif 1919 } 1920} 1921