ip_output.c revision 98613
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD: head/sys/netinet/ip_output.c 98613 2002-06-22 11:51:02Z luigi $ 35 */ 36 37#define _IP_VHL 38 39#include "opt_ipfw.h" 40#include "opt_ipdn.h" 41#include "opt_ipdivert.h" 42#include "opt_ipfilter.h" 43#include "opt_ipsec.h" 44#include "opt_pfil_hooks.h" 45#include "opt_random_ip_id.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/protosw.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55 56#include <net/if.h> 57#include <net/route.h> 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/ip.h> 62#include <netinet/in_pcb.h> 63#include <netinet/in_var.h> 64#include <netinet/ip_var.h> 65 66#include <machine/in_cksum.h> 67 68static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 69 70#ifdef IPSEC 71#include <netinet6/ipsec.h> 72#include <netkey/key.h> 73#ifdef IPSEC_DEBUG 74#include <netkey/key_debug.h> 75#else 76#define KEYDEBUG(lev,arg) 77#endif 78#endif /*IPSEC*/ 79 80#include <netinet/ip_fw.h> 81#include <netinet/ip_dummynet.h> 82 83#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 84 x, (ntohl(a.s_addr)>>24)&0xFF,\ 85 (ntohl(a.s_addr)>>16)&0xFF,\ 86 (ntohl(a.s_addr)>>8)&0xFF,\ 87 (ntohl(a.s_addr))&0xFF, y); 88 89u_short ip_id; 90 91static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 92static struct ifnet *ip_multicast_if(struct in_addr *, int *); 93static void ip_mloopback 94 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 95static int ip_getmoptions 96 (struct sockopt *, struct ip_moptions *); 97static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 98static int ip_setmoptions 99 (struct sockopt *, struct ip_moptions **); 100 101int ip_optcopy(struct ip *, struct ip *); 102 103 104extern struct protosw inetsw[]; 105 106/* 107 * IP output. The packet in mbuf chain m contains a skeletal IP 108 * header (with len, off, ttl, proto, tos, src, dst). 109 * The mbuf chain containing the packet will be freed. 110 * The mbuf opt, if present, will not be freed. 111 */ 112int 113ip_output(m0, opt, ro, flags, imo) 114 struct mbuf *m0; 115 struct mbuf *opt; 116 struct route *ro; 117 int flags; 118 struct ip_moptions *imo; 119{ 120 struct ip *ip, *mhip; 121 struct ifnet *ifp = NULL; /* keep compiler happy */ 122 struct mbuf *m; 123 int hlen = sizeof (struct ip); 124 int len, off, error = 0; 125 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 126 struct in_ifaddr *ia; 127 int isbroadcast, sw_csum; 128 struct in_addr pkt_dst; 129#ifdef IPSEC 130 struct route iproute; 131 struct socket *so = NULL; 132 struct secpolicy *sp = NULL; 133#endif 134 struct ip_fw_args args; 135 int src_was_INADDR_ANY = 0; /* as the name says... */ 136#ifdef PFIL_HOOKS 137 struct packet_filter_hook *pfh; 138 struct mbuf *m1; 139 int rv; 140#endif /* PFIL_HOOKS */ 141 142 args.eh = NULL; 143 args.rule = NULL; 144 args.next_hop = NULL; 145 args.divert_rule = 0; /* divert cookie */ 146 147 /* Grab info from MT_TAG mbufs prepended to the chain. */ 148 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { 149 switch(m0->m_tag_id) { 150 default: 151 printf("ip_output: unrecognised MT_TAG tag %d\n", 152 m0->m_tag_id); 153 break; 154 155 case PACKET_TAG_DUMMYNET: 156 /* 157 * the packet was already tagged, so part of the 158 * processing was already done, and we need to go down. 159 * Get parameters from the header. 160 */ 161 args.rule = ((struct dn_pkt *)m0)->rule; 162 opt = NULL ; 163 ro = & ( ((struct dn_pkt *)m0)->ro ) ; 164 imo = NULL ; 165 dst = ((struct dn_pkt *)m0)->dn_dst ; 166 ifp = ((struct dn_pkt *)m0)->ifp ; 167 flags = ((struct dn_pkt *)m0)->flags ; 168 break; 169 170 case PACKET_TAG_DIVERT: 171 args.divert_rule = (int)m0->m_data & 0xffff; 172 break; 173 174 case PACKET_TAG_IPFORWARD: 175 args.next_hop = (struct sockaddr_in *)m0->m_data; 176 break; 177 } 178 } 179 m = m0; 180 181 KASSERT(!m || (m->m_flags & M_PKTHDR) != 0, ("ip_output: no HDR")); 182 183 KASSERT(ro != NULL, ("ip_output: no route, proto %d", 184 mtod(m, struct ip *)->ip_p)); 185 186#ifdef IPSEC 187 so = ipsec_getsocket(m); 188 (void)ipsec_setsocket(m, NULL); 189#endif 190 if (args.rule != NULL) { /* dummynet already saw us */ 191 ip = mtod(m, struct ip *); 192 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 193 ia = ifatoia(ro->ro_rt->rt_ifa); 194 goto sendit; 195 } 196 197 if (opt) { 198 m = ip_insertoptions(m, opt, &len); 199 hlen = len; 200 } 201 ip = mtod(m, struct ip *); 202 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 203 204 /* 205 * Fill in IP header. 206 */ 207 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 208 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 209 ip->ip_off &= IP_DF; 210#ifdef RANDOM_IP_ID 211 ip->ip_id = ip_randomid(); 212#else 213 ip->ip_id = htons(ip_id++); 214#endif 215 ipstat.ips_localout++; 216 } else { 217 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 218 } 219 220 dst = (struct sockaddr_in *)&ro->ro_dst; 221 /* 222 * If there is a cached route, 223 * check that it is to the same destination 224 * and is still up. If not, free it and try again. 225 * The address family should also be checked in case of sharing the 226 * cache with IPv6. 227 */ 228 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 229 dst->sin_family != AF_INET || 230 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 231 RTFREE(ro->ro_rt); 232 ro->ro_rt = (struct rtentry *)0; 233 } 234 if (ro->ro_rt == 0) { 235 bzero(dst, sizeof(*dst)); 236 dst->sin_family = AF_INET; 237 dst->sin_len = sizeof(*dst); 238 dst->sin_addr = pkt_dst; 239 } 240 /* 241 * If routing to interface only, 242 * short circuit routing lookup. 243 */ 244 if (flags & IP_ROUTETOIF) { 245 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 246 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 247 ipstat.ips_noroute++; 248 error = ENETUNREACH; 249 goto bad; 250 } 251 ifp = ia->ia_ifp; 252 ip->ip_ttl = 1; 253 isbroadcast = in_broadcast(dst->sin_addr, ifp); 254 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 255 imo != NULL && imo->imo_multicast_ifp != NULL) { 256 /* 257 * Bypass the normal routing lookup for multicast 258 * packets if the interface is specified. 259 */ 260 ifp = imo->imo_multicast_ifp; 261 IFP_TO_IA(ifp, ia); 262 isbroadcast = 0; /* fool gcc */ 263 } else { 264 /* 265 * If this is the case, we probably don't want to allocate 266 * a protocol-cloned route since we didn't get one from the 267 * ULP. This lets TCP do its thing, while not burdening 268 * forwarding or ICMP with the overhead of cloning a route. 269 * Of course, we still want to do any cloning requested by 270 * the link layer, as this is probably required in all cases 271 * for correct operation (as it is for ARP). 272 */ 273 if (ro->ro_rt == 0) 274 rtalloc_ign(ro, RTF_PRCLONING); 275 if (ro->ro_rt == 0) { 276 ipstat.ips_noroute++; 277 error = EHOSTUNREACH; 278 goto bad; 279 } 280 ia = ifatoia(ro->ro_rt->rt_ifa); 281 ifp = ro->ro_rt->rt_ifp; 282 ro->ro_rt->rt_use++; 283 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 284 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 285 if (ro->ro_rt->rt_flags & RTF_HOST) 286 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 287 else 288 isbroadcast = in_broadcast(dst->sin_addr, ifp); 289 } 290 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 291 struct in_multi *inm; 292 293 m->m_flags |= M_MCAST; 294 /* 295 * IP destination address is multicast. Make sure "dst" 296 * still points to the address in "ro". (It may have been 297 * changed to point to a gateway address, above.) 298 */ 299 dst = (struct sockaddr_in *)&ro->ro_dst; 300 /* 301 * See if the caller provided any multicast options 302 */ 303 if (imo != NULL) { 304 ip->ip_ttl = imo->imo_multicast_ttl; 305 if (imo->imo_multicast_vif != -1) 306 ip->ip_src.s_addr = 307 ip_mcast_src(imo->imo_multicast_vif); 308 } else 309 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 310 /* 311 * Confirm that the outgoing interface supports multicast. 312 */ 313 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 314 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 315 ipstat.ips_noroute++; 316 error = ENETUNREACH; 317 goto bad; 318 } 319 } 320 /* 321 * If source address not specified yet, use address 322 * of outgoing interface. 323 */ 324 if (ip->ip_src.s_addr == INADDR_ANY) { 325 /* Interface may have no addresses. */ 326 if (ia != NULL) 327 ip->ip_src = IA_SIN(ia)->sin_addr; 328 } 329 330 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 331 /* 332 * XXX 333 * delayed checksums are not currently 334 * compatible with IP multicast routing 335 */ 336 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 337 in_delayed_cksum(m); 338 m->m_pkthdr.csum_flags &= 339 ~CSUM_DELAY_DATA; 340 } 341 } 342 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 343 if (inm != NULL && 344 (imo == NULL || imo->imo_multicast_loop)) { 345 /* 346 * If we belong to the destination multicast group 347 * on the outgoing interface, and the caller did not 348 * forbid loopback, loop back a copy. 349 */ 350 ip_mloopback(ifp, m, dst, hlen); 351 } 352 else { 353 /* 354 * If we are acting as a multicast router, perform 355 * multicast forwarding as if the packet had just 356 * arrived on the interface to which we are about 357 * to send. The multicast forwarding function 358 * recursively calls this function, using the 359 * IP_FORWARDING flag to prevent infinite recursion. 360 * 361 * Multicasts that are looped back by ip_mloopback(), 362 * above, will be forwarded by the ip_input() routine, 363 * if necessary. 364 */ 365 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 366 /* 367 * Check if rsvp daemon is running. If not, don't 368 * set ip_moptions. This ensures that the packet 369 * is multicast and not just sent down one link 370 * as prescribed by rsvpd. 371 */ 372 if (!rsvp_on) 373 imo = NULL; 374 if (ip_mforward(ip, ifp, m, imo) != 0) { 375 m_freem(m); 376 goto done; 377 } 378 } 379 } 380 381 /* 382 * Multicasts with a time-to-live of zero may be looped- 383 * back, above, but must not be transmitted on a network. 384 * Also, multicasts addressed to the loopback interface 385 * are not sent -- the above call to ip_mloopback() will 386 * loop back a copy if this host actually belongs to the 387 * destination group on the loopback interface. 388 */ 389 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 390 m_freem(m); 391 goto done; 392 } 393 394 goto sendit; 395 } 396#ifndef notdef 397 /* 398 * If the source address is not specified yet, use the address 399 * of the outoing interface. In case, keep note we did that, so 400 * if the the firewall changes the next-hop causing the output 401 * interface to change, we can fix that. 402 */ 403 if (ip->ip_src.s_addr == INADDR_ANY) { 404 /* Interface may have no addresses. */ 405 if (ia != NULL) { 406 ip->ip_src = IA_SIN(ia)->sin_addr; 407 src_was_INADDR_ANY = 1; 408 } 409 } 410#endif /* notdef */ 411 /* 412 * Verify that we have any chance at all of being able to queue 413 * the packet or packet fragments 414 */ 415 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 416 ifp->if_snd.ifq_maxlen) { 417 error = ENOBUFS; 418 ipstat.ips_odropped++; 419 goto bad; 420 } 421 422 /* 423 * Look for broadcast address and 424 * verify user is allowed to send 425 * such a packet. 426 */ 427 if (isbroadcast) { 428 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 429 error = EADDRNOTAVAIL; 430 goto bad; 431 } 432 if ((flags & IP_ALLOWBROADCAST) == 0) { 433 error = EACCES; 434 goto bad; 435 } 436 /* don't allow broadcast messages to be fragmented */ 437 if ((u_short)ip->ip_len > ifp->if_mtu) { 438 error = EMSGSIZE; 439 goto bad; 440 } 441 m->m_flags |= M_BCAST; 442 } else { 443 m->m_flags &= ~M_BCAST; 444 } 445 446sendit: 447#ifdef IPSEC 448 /* get SP for this packet */ 449 if (so == NULL) 450 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 451 else 452 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 453 454 if (sp == NULL) { 455 ipsecstat.out_inval++; 456 goto bad; 457 } 458 459 error = 0; 460 461 /* check policy */ 462 switch (sp->policy) { 463 case IPSEC_POLICY_DISCARD: 464 /* 465 * This packet is just discarded. 466 */ 467 ipsecstat.out_polvio++; 468 goto bad; 469 470 case IPSEC_POLICY_BYPASS: 471 case IPSEC_POLICY_NONE: 472 /* no need to do IPsec. */ 473 goto skip_ipsec; 474 475 case IPSEC_POLICY_IPSEC: 476 if (sp->req == NULL) { 477 /* acquire a policy */ 478 error = key_spdacquire(sp); 479 goto bad; 480 } 481 break; 482 483 case IPSEC_POLICY_ENTRUST: 484 default: 485 printf("ip_output: Invalid policy found. %d\n", sp->policy); 486 } 487 { 488 struct ipsec_output_state state; 489 bzero(&state, sizeof(state)); 490 state.m = m; 491 if (flags & IP_ROUTETOIF) { 492 state.ro = &iproute; 493 bzero(&iproute, sizeof(iproute)); 494 } else 495 state.ro = ro; 496 state.dst = (struct sockaddr *)dst; 497 498 ip->ip_sum = 0; 499 500 /* 501 * XXX 502 * delayed checksums are not currently compatible with IPsec 503 */ 504 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 505 in_delayed_cksum(m); 506 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 507 } 508 509 ip->ip_len = htons(ip->ip_len); 510 ip->ip_off = htons(ip->ip_off); 511 512 error = ipsec4_output(&state, sp, flags); 513 514 m = state.m; 515 if (flags & IP_ROUTETOIF) { 516 /* 517 * if we have tunnel mode SA, we may need to ignore 518 * IP_ROUTETOIF. 519 */ 520 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 521 flags &= ~IP_ROUTETOIF; 522 ro = state.ro; 523 } 524 } else 525 ro = state.ro; 526 dst = (struct sockaddr_in *)state.dst; 527 if (error) { 528 /* mbuf is already reclaimed in ipsec4_output. */ 529 m0 = NULL; 530 switch (error) { 531 case EHOSTUNREACH: 532 case ENETUNREACH: 533 case EMSGSIZE: 534 case ENOBUFS: 535 case ENOMEM: 536 break; 537 default: 538 printf("ip4_output (ipsec): error code %d\n", error); 539 /*fall through*/ 540 case ENOENT: 541 /* don't show these error codes to the user */ 542 error = 0; 543 break; 544 } 545 goto bad; 546 } 547 } 548 549 /* be sure to update variables that are affected by ipsec4_output() */ 550 ip = mtod(m, struct ip *); 551#ifdef _IP_VHL 552 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 553#else 554 hlen = ip->ip_hl << 2; 555#endif 556 if (ro->ro_rt == NULL) { 557 if ((flags & IP_ROUTETOIF) == 0) { 558 printf("ip_output: " 559 "can't update route after IPsec processing\n"); 560 error = EHOSTUNREACH; /*XXX*/ 561 goto bad; 562 } 563 } else { 564 ia = ifatoia(ro->ro_rt->rt_ifa); 565 ifp = ro->ro_rt->rt_ifp; 566 } 567 568 /* make it flipped, again. */ 569 ip->ip_len = ntohs(ip->ip_len); 570 ip->ip_off = ntohs(ip->ip_off); 571skip_ipsec: 572#endif /*IPSEC*/ 573 574 /* 575 * IpHack's section. 576 * - Xlate: translate packet's addr/port (NAT). 577 * - Firewall: deny/allow/etc. 578 * - Wrap: fake packet's addr/port <unimpl.> 579 * - Encapsulate: put it in another IP and send out. <unimp.> 580 */ 581#ifdef PFIL_HOOKS 582 /* 583 * Run through list of hooks for output packets. 584 */ 585 m1 = m; 586 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); 587 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) 588 if (pfh->pfil_func) { 589 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1); 590 if (rv) { 591 error = EHOSTUNREACH; 592 goto done; 593 } 594 m = m1; 595 if (m == NULL) 596 goto done; 597 ip = mtod(m, struct ip *); 598 } 599#endif /* PFIL_HOOKS */ 600 601 /* 602 * Check with the firewall... 603 * but not if we are already being fwd'd from a firewall. 604 */ 605 if (fw_enable && IPFW_LOADED && !args.next_hop) { 606 struct sockaddr_in *old = dst; 607 608 args.m = m; 609 args.next_hop = dst; 610 args.oif = ifp; 611 off = ip_fw_chk_ptr(&args); 612 m = args.m; 613 dst = args.next_hop; 614 615 /* 616 * On return we must do the following: 617 * m == NULL -> drop the pkt (old interface, deprecated) 618 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 619 * 1<=off<= 0xffff -> DIVERT 620 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 621 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 622 * dst != old -> IPFIREWALL_FORWARD 623 * off==0, dst==old -> accept 624 * If some of the above modules are not compiled in, then 625 * we should't have to check the corresponding condition 626 * (because the ipfw control socket should not accept 627 * unsupported rules), but better play safe and drop 628 * packets in case of doubt. 629 */ 630 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 631 if (m) 632 m_freem(m); 633 error = EACCES; 634 goto done; 635 } 636 ip = mtod(m, struct ip *); 637 if (off == 0 && dst == old) /* common case */ 638 goto pass; 639 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 640 /* 641 * pass the pkt to dummynet. Need to include 642 * pipe number, m, ifp, ro, dst because these are 643 * not recomputed in the next pass. 644 * All other parameters have been already used and 645 * so they are not needed anymore. 646 * XXX note: if the ifp or ro entry are deleted 647 * while a pkt is in dummynet, we are in trouble! 648 */ 649 args.ro = ro; 650 args.dst = dst; 651 args.flags = flags; 652 653 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 654 &args); 655 goto done; 656 } 657#ifdef IPDIVERT 658 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 659 struct mbuf *clone = NULL; 660 661 /* Clone packet if we're doing a 'tee' */ 662 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 663 clone = m_dup(m, M_DONTWAIT); 664 665 /* 666 * XXX 667 * delayed checksums are not currently compatible 668 * with divert sockets. 669 */ 670 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 671 in_delayed_cksum(m); 672 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 673 } 674 675 /* Restore packet header fields to original values */ 676 ip->ip_len = htons(ip->ip_len); 677 ip->ip_off = htons(ip->ip_off); 678 679 /* Deliver packet to divert input routine */ 680 divert_packet(m, 0, off & 0xffff, args.divert_rule); 681 682 /* If 'tee', continue with original packet */ 683 if (clone != NULL) { 684 m = clone; 685 ip = mtod(m, struct ip *); 686 goto pass; 687 } 688 goto done; 689 } 690#endif 691 692 /* IPFIREWALL_FORWARD */ 693 /* 694 * Check dst to make sure it is directly reachable on the 695 * interface we previously thought it was. 696 * If it isn't (which may be likely in some situations) we have 697 * to re-route it (ie, find a route for the next-hop and the 698 * associated interface) and set them here. This is nested 699 * forwarding which in most cases is undesirable, except where 700 * such control is nigh impossible. So we do it here. 701 * And I'm babbling. 702 */ 703 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 704#if 0 705 /* 706 * XXX To improve readability, this block should be 707 * changed into a function call as below: 708 */ 709 error = ip_ipforward(&m, &dst, &ifp); 710 if (error) 711 goto bad; 712 if (m == NULL) /* ip_input consumed the mbuf */ 713 goto done; 714#else 715 struct in_ifaddr *ia; 716 717 /* 718 * XXX sro_fwd below is static, and a pointer 719 * to it gets passed to routines downstream. 720 * This could have surprisingly bad results in 721 * practice, because its content is overwritten 722 * by subsequent packets. 723 */ 724 /* There must be a better way to do this next line... */ 725 static struct route sro_fwd; 726 struct route *ro_fwd = &sro_fwd; 727 728#if 0 729 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 730 dst->sin_addr, "\n"); 731#endif 732 733 /* 734 * We need to figure out if we have been forwarded 735 * to a local socket. If so, then we should somehow 736 * "loop back" to ip_input, and get directed to the 737 * PCB as if we had received this packet. This is 738 * because it may be dificult to identify the packets 739 * you want to forward until they are being output 740 * and have selected an interface. (e.g. locally 741 * initiated packets) If we used the loopback inteface, 742 * we would not be able to control what happens 743 * as the packet runs through ip_input() as 744 * it is done through a ISR. 745 */ 746 LIST_FOREACH(ia, 747 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 748 /* 749 * If the addr to forward to is one 750 * of ours, we pretend to 751 * be the destination for this packet. 752 */ 753 if (IA_SIN(ia)->sin_addr.s_addr == 754 dst->sin_addr.s_addr) 755 break; 756 } 757 if (ia) { /* tell ip_input "dont filter" */ 758 struct m_hdr tag; 759 760 tag.mh_type = MT_TAG; 761 tag.mh_flags = PACKET_TAG_IPFORWARD; 762 tag.mh_data = (caddr_t)args.next_hop; 763 tag.mh_next = m; 764 765 if (m->m_pkthdr.rcvif == NULL) 766 m->m_pkthdr.rcvif = ifunit("lo0"); 767 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 768 m->m_pkthdr.csum_flags |= 769 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 770 m0->m_pkthdr.csum_data = 0xffff; 771 } 772 m->m_pkthdr.csum_flags |= 773 CSUM_IP_CHECKED | CSUM_IP_VALID; 774 ip->ip_len = htons(ip->ip_len); 775 ip->ip_off = htons(ip->ip_off); 776 ip_input((struct mbuf *)&tag); 777 goto done; 778 } 779 /* Some of the logic for this was 780 * nicked from above. 781 * 782 * This rewrites the cached route in a local PCB. 783 * Is this what we want to do? 784 */ 785 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 786 787 ro_fwd->ro_rt = 0; 788 rtalloc_ign(ro_fwd, RTF_PRCLONING); 789 790 if (ro_fwd->ro_rt == 0) { 791 ipstat.ips_noroute++; 792 error = EHOSTUNREACH; 793 goto bad; 794 } 795 796 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 797 ifp = ro_fwd->ro_rt->rt_ifp; 798 ro_fwd->ro_rt->rt_use++; 799 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 800 dst = (struct sockaddr_in *) 801 ro_fwd->ro_rt->rt_gateway; 802 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 803 isbroadcast = 804 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 805 else 806 isbroadcast = in_broadcast(dst->sin_addr, ifp); 807 if (ro->ro_rt) 808 RTFREE(ro->ro_rt); 809 ro->ro_rt = ro_fwd->ro_rt; 810 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 811 812#endif /* ... block to be put into a function */ 813 /* 814 * If we added a default src ip earlier, 815 * which would have been gotten from the-then 816 * interface, do it again, from the new one. 817 */ 818 if (src_was_INADDR_ANY) 819 ip->ip_src = IA_SIN(ia)->sin_addr; 820 goto pass ; 821 } 822 823 /* 824 * if we get here, none of the above matches, and 825 * we have to drop the pkt 826 */ 827 m_freem(m); 828 error = EACCES; /* not sure this is the right error msg */ 829 goto done; 830 } 831 832pass: 833 /* 127/8 must not appear on wire - RFC1122. */ 834 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 835 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 836 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 837 ipstat.ips_badaddr++; 838 error = EADDRNOTAVAIL; 839 goto bad; 840 } 841 } 842 843 m->m_pkthdr.csum_flags |= CSUM_IP; 844 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 845 if (sw_csum & CSUM_DELAY_DATA) { 846 in_delayed_cksum(m); 847 sw_csum &= ~CSUM_DELAY_DATA; 848 } 849 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 850 851 /* 852 * If small enough for interface, or the interface will take 853 * care of the fragmentation for us, can just send directly. 854 */ 855 if ((u_short)ip->ip_len <= ifp->if_mtu || 856 ifp->if_hwassist & CSUM_FRAGMENT) { 857 ip->ip_len = htons(ip->ip_len); 858 ip->ip_off = htons(ip->ip_off); 859 ip->ip_sum = 0; 860 if (sw_csum & CSUM_DELAY_IP) { 861 if (ip->ip_vhl == IP_VHL_BORING) { 862 ip->ip_sum = in_cksum_hdr(ip); 863 } else { 864 ip->ip_sum = in_cksum(m, hlen); 865 } 866 } 867 868 /* Record statistics for this interface address. */ 869 if (!(flags & IP_FORWARDING) && ia) { 870 ia->ia_ifa.if_opackets++; 871 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 872 } 873 874#ifdef IPSEC 875 /* clean ipsec history once it goes out of the node */ 876 ipsec_delaux(m); 877#endif 878 879 error = (*ifp->if_output)(ifp, m, 880 (struct sockaddr *)dst, ro->ro_rt); 881 goto done; 882 } 883 /* 884 * Too large for interface; fragment if possible. 885 * Must be able to put at least 8 bytes per fragment. 886 */ 887 if (ip->ip_off & IP_DF) { 888 error = EMSGSIZE; 889 /* 890 * This case can happen if the user changed the MTU 891 * of an interface after enabling IP on it. Because 892 * most netifs don't keep track of routes pointing to 893 * them, there is no way for one to update all its 894 * routes when the MTU is changed. 895 */ 896 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 897 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 898 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 899 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 900 } 901 ipstat.ips_cantfrag++; 902 goto bad; 903 } 904 len = (ifp->if_mtu - hlen) &~ 7; 905 if (len < 8) { 906 error = EMSGSIZE; 907 goto bad; 908 } 909 910 /* 911 * if the interface will not calculate checksums on 912 * fragmented packets, then do it here. 913 */ 914 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 915 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 916 in_delayed_cksum(m); 917 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 918 } 919 920 { 921 int mhlen, firstlen = len; 922 struct mbuf **mnext = &m->m_nextpkt; 923 int nfrags = 1; 924 925 /* 926 * Loop through length of segment after first fragment, 927 * make new header and copy data of each part and link onto chain. 928 */ 929 m0 = m; 930 mhlen = sizeof (struct ip); 931 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { 932 MGETHDR(m, M_DONTWAIT, MT_HEADER); 933 if (m == 0) { 934 error = ENOBUFS; 935 ipstat.ips_odropped++; 936 goto sendorfree; 937 } 938 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 939 m->m_data += max_linkhdr; 940 mhip = mtod(m, struct ip *); 941 *mhip = *ip; 942 if (hlen > sizeof (struct ip)) { 943 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 944 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 945 } 946 m->m_len = mhlen; 947 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 948 if (off + len >= (u_short)ip->ip_len) 949 len = (u_short)ip->ip_len - off; 950 else 951 mhip->ip_off |= IP_MF; 952 mhip->ip_len = htons((u_short)(len + mhlen)); 953 m->m_next = m_copy(m0, off, len); 954 if (m->m_next == 0) { 955 (void) m_free(m); 956 error = ENOBUFS; /* ??? */ 957 ipstat.ips_odropped++; 958 goto sendorfree; 959 } 960 m->m_pkthdr.len = mhlen + len; 961 m->m_pkthdr.rcvif = (struct ifnet *)0; 962 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 963 mhip->ip_off = htons(mhip->ip_off); 964 mhip->ip_sum = 0; 965 if (sw_csum & CSUM_DELAY_IP) { 966 if (mhip->ip_vhl == IP_VHL_BORING) { 967 mhip->ip_sum = in_cksum_hdr(mhip); 968 } else { 969 mhip->ip_sum = in_cksum(m, mhlen); 970 } 971 } 972 *mnext = m; 973 mnext = &m->m_nextpkt; 974 nfrags++; 975 } 976 ipstat.ips_ofragments += nfrags; 977 978 /* set first/last markers for fragment chain */ 979 m->m_flags |= M_LASTFRAG; 980 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 981 m0->m_pkthdr.csum_data = nfrags; 982 983 /* 984 * Update first fragment by trimming what's been copied out 985 * and updating header, then send each fragment (in order). 986 */ 987 m = m0; 988 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 989 m->m_pkthdr.len = hlen + firstlen; 990 ip->ip_len = htons((u_short)m->m_pkthdr.len); 991 ip->ip_off |= IP_MF; 992 ip->ip_off = htons(ip->ip_off); 993 ip->ip_sum = 0; 994 if (sw_csum & CSUM_DELAY_IP) { 995 if (ip->ip_vhl == IP_VHL_BORING) { 996 ip->ip_sum = in_cksum_hdr(ip); 997 } else { 998 ip->ip_sum = in_cksum(m, hlen); 999 } 1000 } 1001sendorfree: 1002 for (m = m0; m; m = m0) { 1003 m0 = m->m_nextpkt; 1004 m->m_nextpkt = 0; 1005#ifdef IPSEC 1006 /* clean ipsec history once it goes out of the node */ 1007 ipsec_delaux(m); 1008#endif 1009 if (error == 0) { 1010 /* Record statistics for this interface address. */ 1011 if (ia != NULL) { 1012 ia->ia_ifa.if_opackets++; 1013 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1014 } 1015 1016 error = (*ifp->if_output)(ifp, m, 1017 (struct sockaddr *)dst, ro->ro_rt); 1018 } else 1019 m_freem(m); 1020 } 1021 1022 if (error == 0) 1023 ipstat.ips_fragmented++; 1024 } 1025done: 1026#ifdef IPSEC 1027 if (ro == &iproute && ro->ro_rt) { 1028 RTFREE(ro->ro_rt); 1029 ro->ro_rt = NULL; 1030 } 1031 if (sp != NULL) { 1032 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1033 printf("DP ip_output call free SP:%p\n", sp)); 1034 key_freesp(sp); 1035 } 1036#endif /* IPSEC */ 1037 return (error); 1038bad: 1039 m_freem(m); 1040 goto done; 1041} 1042 1043void 1044in_delayed_cksum(struct mbuf *m) 1045{ 1046 struct ip *ip; 1047 u_short csum, offset; 1048 1049 ip = mtod(m, struct ip *); 1050 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 1051 csum = in_cksum_skip(m, ip->ip_len, offset); 1052 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1053 csum = 0xffff; 1054 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1055 1056 if (offset + sizeof(u_short) > m->m_len) { 1057 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1058 m->m_len, offset, ip->ip_p); 1059 /* 1060 * XXX 1061 * this shouldn't happen, but if it does, the 1062 * correct behavior may be to insert the checksum 1063 * in the existing chain instead of rearranging it. 1064 */ 1065 m = m_pullup(m, offset + sizeof(u_short)); 1066 } 1067 *(u_short *)(m->m_data + offset) = csum; 1068} 1069 1070/* 1071 * Insert IP options into preformed packet. 1072 * Adjust IP destination as required for IP source routing, 1073 * as indicated by a non-zero in_addr at the start of the options. 1074 * 1075 * XXX This routine assumes that the packet has no options in place. 1076 */ 1077static struct mbuf * 1078ip_insertoptions(m, opt, phlen) 1079 register struct mbuf *m; 1080 struct mbuf *opt; 1081 int *phlen; 1082{ 1083 register struct ipoption *p = mtod(opt, struct ipoption *); 1084 struct mbuf *n; 1085 register struct ip *ip = mtod(m, struct ip *); 1086 unsigned optlen; 1087 1088 optlen = opt->m_len - sizeof(p->ipopt_dst); 1089 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) 1090 return (m); /* XXX should fail */ 1091 if (p->ipopt_dst.s_addr) 1092 ip->ip_dst = p->ipopt_dst; 1093 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1094 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1095 if (n == 0) 1096 return (m); 1097 n->m_pkthdr.rcvif = (struct ifnet *)0; 1098 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1099 m->m_len -= sizeof(struct ip); 1100 m->m_data += sizeof(struct ip); 1101 n->m_next = m; 1102 m = n; 1103 m->m_len = optlen + sizeof(struct ip); 1104 m->m_data += max_linkhdr; 1105 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1106 } else { 1107 m->m_data -= optlen; 1108 m->m_len += optlen; 1109 m->m_pkthdr.len += optlen; 1110 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1111 } 1112 ip = mtod(m, struct ip *); 1113 bcopy(p->ipopt_list, ip + 1, optlen); 1114 *phlen = sizeof(struct ip) + optlen; 1115 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1116 ip->ip_len += optlen; 1117 return (m); 1118} 1119 1120/* 1121 * Copy options from ip to jp, 1122 * omitting those not copied during fragmentation. 1123 */ 1124int 1125ip_optcopy(ip, jp) 1126 struct ip *ip, *jp; 1127{ 1128 register u_char *cp, *dp; 1129 int opt, optlen, cnt; 1130 1131 cp = (u_char *)(ip + 1); 1132 dp = (u_char *)(jp + 1); 1133 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 1134 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1135 opt = cp[0]; 1136 if (opt == IPOPT_EOL) 1137 break; 1138 if (opt == IPOPT_NOP) { 1139 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1140 *dp++ = IPOPT_NOP; 1141 optlen = 1; 1142 continue; 1143 } 1144 1145 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1146 ("ip_optcopy: malformed ipv4 option")); 1147 optlen = cp[IPOPT_OLEN]; 1148 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1149 ("ip_optcopy: malformed ipv4 option")); 1150 1151 /* bogus lengths should have been caught by ip_dooptions */ 1152 if (optlen > cnt) 1153 optlen = cnt; 1154 if (IPOPT_COPIED(opt)) { 1155 bcopy(cp, dp, optlen); 1156 dp += optlen; 1157 } 1158 } 1159 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1160 *dp++ = IPOPT_EOL; 1161 return (optlen); 1162} 1163 1164/* 1165 * IP socket option processing. 1166 */ 1167int 1168ip_ctloutput(so, sopt) 1169 struct socket *so; 1170 struct sockopt *sopt; 1171{ 1172 struct inpcb *inp = sotoinpcb(so); 1173 int error, optval; 1174 1175 error = optval = 0; 1176 if (sopt->sopt_level != IPPROTO_IP) { 1177 return (EINVAL); 1178 } 1179 1180 switch (sopt->sopt_dir) { 1181 case SOPT_SET: 1182 switch (sopt->sopt_name) { 1183 case IP_OPTIONS: 1184#ifdef notyet 1185 case IP_RETOPTS: 1186#endif 1187 { 1188 struct mbuf *m; 1189 if (sopt->sopt_valsize > MLEN) { 1190 error = EMSGSIZE; 1191 break; 1192 } 1193 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1194 if (m == 0) { 1195 error = ENOBUFS; 1196 break; 1197 } 1198 m->m_len = sopt->sopt_valsize; 1199 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1200 m->m_len); 1201 1202 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1203 m)); 1204 } 1205 1206 case IP_TOS: 1207 case IP_TTL: 1208 case IP_RECVOPTS: 1209 case IP_RECVRETOPTS: 1210 case IP_RECVDSTADDR: 1211 case IP_RECVIF: 1212 case IP_FAITH: 1213 error = sooptcopyin(sopt, &optval, sizeof optval, 1214 sizeof optval); 1215 if (error) 1216 break; 1217 1218 switch (sopt->sopt_name) { 1219 case IP_TOS: 1220 inp->inp_ip_tos = optval; 1221 break; 1222 1223 case IP_TTL: 1224 inp->inp_ip_ttl = optval; 1225 break; 1226#define OPTSET(bit) \ 1227 if (optval) \ 1228 inp->inp_flags |= bit; \ 1229 else \ 1230 inp->inp_flags &= ~bit; 1231 1232 case IP_RECVOPTS: 1233 OPTSET(INP_RECVOPTS); 1234 break; 1235 1236 case IP_RECVRETOPTS: 1237 OPTSET(INP_RECVRETOPTS); 1238 break; 1239 1240 case IP_RECVDSTADDR: 1241 OPTSET(INP_RECVDSTADDR); 1242 break; 1243 1244 case IP_RECVIF: 1245 OPTSET(INP_RECVIF); 1246 break; 1247 1248 case IP_FAITH: 1249 OPTSET(INP_FAITH); 1250 break; 1251 } 1252 break; 1253#undef OPTSET 1254 1255 case IP_MULTICAST_IF: 1256 case IP_MULTICAST_VIF: 1257 case IP_MULTICAST_TTL: 1258 case IP_MULTICAST_LOOP: 1259 case IP_ADD_MEMBERSHIP: 1260 case IP_DROP_MEMBERSHIP: 1261 error = ip_setmoptions(sopt, &inp->inp_moptions); 1262 break; 1263 1264 case IP_PORTRANGE: 1265 error = sooptcopyin(sopt, &optval, sizeof optval, 1266 sizeof optval); 1267 if (error) 1268 break; 1269 1270 switch (optval) { 1271 case IP_PORTRANGE_DEFAULT: 1272 inp->inp_flags &= ~(INP_LOWPORT); 1273 inp->inp_flags &= ~(INP_HIGHPORT); 1274 break; 1275 1276 case IP_PORTRANGE_HIGH: 1277 inp->inp_flags &= ~(INP_LOWPORT); 1278 inp->inp_flags |= INP_HIGHPORT; 1279 break; 1280 1281 case IP_PORTRANGE_LOW: 1282 inp->inp_flags &= ~(INP_HIGHPORT); 1283 inp->inp_flags |= INP_LOWPORT; 1284 break; 1285 1286 default: 1287 error = EINVAL; 1288 break; 1289 } 1290 break; 1291 1292#ifdef IPSEC 1293 case IP_IPSEC_POLICY: 1294 { 1295 caddr_t req; 1296 size_t len = 0; 1297 int priv; 1298 struct mbuf *m; 1299 int optname; 1300 1301 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1302 break; 1303 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1304 break; 1305 priv = (sopt->sopt_td != NULL && 1306 suser(sopt->sopt_td) != 0) ? 0 : 1; 1307 req = mtod(m, caddr_t); 1308 len = m->m_len; 1309 optname = sopt->sopt_name; 1310 error = ipsec4_set_policy(inp, optname, req, len, priv); 1311 m_freem(m); 1312 break; 1313 } 1314#endif /*IPSEC*/ 1315 1316 default: 1317 error = ENOPROTOOPT; 1318 break; 1319 } 1320 break; 1321 1322 case SOPT_GET: 1323 switch (sopt->sopt_name) { 1324 case IP_OPTIONS: 1325 case IP_RETOPTS: 1326 if (inp->inp_options) 1327 error = sooptcopyout(sopt, 1328 mtod(inp->inp_options, 1329 char *), 1330 inp->inp_options->m_len); 1331 else 1332 sopt->sopt_valsize = 0; 1333 break; 1334 1335 case IP_TOS: 1336 case IP_TTL: 1337 case IP_RECVOPTS: 1338 case IP_RECVRETOPTS: 1339 case IP_RECVDSTADDR: 1340 case IP_RECVIF: 1341 case IP_PORTRANGE: 1342 case IP_FAITH: 1343 switch (sopt->sopt_name) { 1344 1345 case IP_TOS: 1346 optval = inp->inp_ip_tos; 1347 break; 1348 1349 case IP_TTL: 1350 optval = inp->inp_ip_ttl; 1351 break; 1352 1353#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1354 1355 case IP_RECVOPTS: 1356 optval = OPTBIT(INP_RECVOPTS); 1357 break; 1358 1359 case IP_RECVRETOPTS: 1360 optval = OPTBIT(INP_RECVRETOPTS); 1361 break; 1362 1363 case IP_RECVDSTADDR: 1364 optval = OPTBIT(INP_RECVDSTADDR); 1365 break; 1366 1367 case IP_RECVIF: 1368 optval = OPTBIT(INP_RECVIF); 1369 break; 1370 1371 case IP_PORTRANGE: 1372 if (inp->inp_flags & INP_HIGHPORT) 1373 optval = IP_PORTRANGE_HIGH; 1374 else if (inp->inp_flags & INP_LOWPORT) 1375 optval = IP_PORTRANGE_LOW; 1376 else 1377 optval = 0; 1378 break; 1379 1380 case IP_FAITH: 1381 optval = OPTBIT(INP_FAITH); 1382 break; 1383 } 1384 error = sooptcopyout(sopt, &optval, sizeof optval); 1385 break; 1386 1387 case IP_MULTICAST_IF: 1388 case IP_MULTICAST_VIF: 1389 case IP_MULTICAST_TTL: 1390 case IP_MULTICAST_LOOP: 1391 case IP_ADD_MEMBERSHIP: 1392 case IP_DROP_MEMBERSHIP: 1393 error = ip_getmoptions(sopt, inp->inp_moptions); 1394 break; 1395 1396#ifdef IPSEC 1397 case IP_IPSEC_POLICY: 1398 { 1399 struct mbuf *m = NULL; 1400 caddr_t req = NULL; 1401 size_t len = 0; 1402 1403 if (m != 0) { 1404 req = mtod(m, caddr_t); 1405 len = m->m_len; 1406 } 1407 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1408 if (error == 0) 1409 error = soopt_mcopyout(sopt, m); /* XXX */ 1410 if (error == 0) 1411 m_freem(m); 1412 break; 1413 } 1414#endif /*IPSEC*/ 1415 1416 default: 1417 error = ENOPROTOOPT; 1418 break; 1419 } 1420 break; 1421 } 1422 return (error); 1423} 1424 1425/* 1426 * Set up IP options in pcb for insertion in output packets. 1427 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1428 * with destination address if source routed. 1429 */ 1430static int 1431ip_pcbopts(optname, pcbopt, m) 1432 int optname; 1433 struct mbuf **pcbopt; 1434 register struct mbuf *m; 1435{ 1436 register int cnt, optlen; 1437 register u_char *cp; 1438 u_char opt; 1439 1440 /* turn off any old options */ 1441 if (*pcbopt) 1442 (void)m_free(*pcbopt); 1443 *pcbopt = 0; 1444 if (m == (struct mbuf *)0 || m->m_len == 0) { 1445 /* 1446 * Only turning off any previous options. 1447 */ 1448 if (m) 1449 (void)m_free(m); 1450 return (0); 1451 } 1452 1453 if (m->m_len % sizeof(int32_t)) 1454 goto bad; 1455 /* 1456 * IP first-hop destination address will be stored before 1457 * actual options; move other options back 1458 * and clear it when none present. 1459 */ 1460 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1461 goto bad; 1462 cnt = m->m_len; 1463 m->m_len += sizeof(struct in_addr); 1464 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1465 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1466 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1467 1468 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1469 opt = cp[IPOPT_OPTVAL]; 1470 if (opt == IPOPT_EOL) 1471 break; 1472 if (opt == IPOPT_NOP) 1473 optlen = 1; 1474 else { 1475 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1476 goto bad; 1477 optlen = cp[IPOPT_OLEN]; 1478 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1479 goto bad; 1480 } 1481 switch (opt) { 1482 1483 default: 1484 break; 1485 1486 case IPOPT_LSRR: 1487 case IPOPT_SSRR: 1488 /* 1489 * user process specifies route as: 1490 * ->A->B->C->D 1491 * D must be our final destination (but we can't 1492 * check that since we may not have connected yet). 1493 * A is first hop destination, which doesn't appear in 1494 * actual IP option, but is stored before the options. 1495 */ 1496 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1497 goto bad; 1498 m->m_len -= sizeof(struct in_addr); 1499 cnt -= sizeof(struct in_addr); 1500 optlen -= sizeof(struct in_addr); 1501 cp[IPOPT_OLEN] = optlen; 1502 /* 1503 * Move first hop before start of options. 1504 */ 1505 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1506 sizeof(struct in_addr)); 1507 /* 1508 * Then copy rest of options back 1509 * to close up the deleted entry. 1510 */ 1511 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1512 sizeof(struct in_addr)), 1513 (caddr_t)&cp[IPOPT_OFFSET+1], 1514 (unsigned)cnt + sizeof(struct in_addr)); 1515 break; 1516 } 1517 } 1518 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1519 goto bad; 1520 *pcbopt = m; 1521 return (0); 1522 1523bad: 1524 (void)m_free(m); 1525 return (EINVAL); 1526} 1527 1528/* 1529 * XXX 1530 * The whole multicast option thing needs to be re-thought. 1531 * Several of these options are equally applicable to non-multicast 1532 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1533 * standard option (IP_TTL). 1534 */ 1535 1536/* 1537 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1538 */ 1539static struct ifnet * 1540ip_multicast_if(a, ifindexp) 1541 struct in_addr *a; 1542 int *ifindexp; 1543{ 1544 int ifindex; 1545 struct ifnet *ifp; 1546 1547 if (ifindexp) 1548 *ifindexp = 0; 1549 if (ntohl(a->s_addr) >> 24 == 0) { 1550 ifindex = ntohl(a->s_addr) & 0xffffff; 1551 if (ifindex < 0 || if_index < ifindex) 1552 return NULL; 1553 ifp = ifnet_byindex(ifindex); 1554 if (ifindexp) 1555 *ifindexp = ifindex; 1556 } else { 1557 INADDR_TO_IFP(*a, ifp); 1558 } 1559 return ifp; 1560} 1561 1562/* 1563 * Set the IP multicast options in response to user setsockopt(). 1564 */ 1565static int 1566ip_setmoptions(sopt, imop) 1567 struct sockopt *sopt; 1568 struct ip_moptions **imop; 1569{ 1570 int error = 0; 1571 int i; 1572 struct in_addr addr; 1573 struct ip_mreq mreq; 1574 struct ifnet *ifp; 1575 struct ip_moptions *imo = *imop; 1576 struct route ro; 1577 struct sockaddr_in *dst; 1578 int ifindex; 1579 int s; 1580 1581 if (imo == NULL) { 1582 /* 1583 * No multicast option buffer attached to the pcb; 1584 * allocate one and initialize to default values. 1585 */ 1586 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1587 M_WAITOK); 1588 1589 if (imo == NULL) 1590 return (ENOBUFS); 1591 *imop = imo; 1592 imo->imo_multicast_ifp = NULL; 1593 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1594 imo->imo_multicast_vif = -1; 1595 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1596 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1597 imo->imo_num_memberships = 0; 1598 } 1599 1600 switch (sopt->sopt_name) { 1601 /* store an index number for the vif you wanna use in the send */ 1602 case IP_MULTICAST_VIF: 1603 if (legal_vif_num == 0) { 1604 error = EOPNOTSUPP; 1605 break; 1606 } 1607 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1608 if (error) 1609 break; 1610 if (!legal_vif_num(i) && (i != -1)) { 1611 error = EINVAL; 1612 break; 1613 } 1614 imo->imo_multicast_vif = i; 1615 break; 1616 1617 case IP_MULTICAST_IF: 1618 /* 1619 * Select the interface for outgoing multicast packets. 1620 */ 1621 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1622 if (error) 1623 break; 1624 /* 1625 * INADDR_ANY is used to remove a previous selection. 1626 * When no interface is selected, a default one is 1627 * chosen every time a multicast packet is sent. 1628 */ 1629 if (addr.s_addr == INADDR_ANY) { 1630 imo->imo_multicast_ifp = NULL; 1631 break; 1632 } 1633 /* 1634 * The selected interface is identified by its local 1635 * IP address. Find the interface and confirm that 1636 * it supports multicasting. 1637 */ 1638 s = splimp(); 1639 ifp = ip_multicast_if(&addr, &ifindex); 1640 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1641 splx(s); 1642 error = EADDRNOTAVAIL; 1643 break; 1644 } 1645 imo->imo_multicast_ifp = ifp; 1646 if (ifindex) 1647 imo->imo_multicast_addr = addr; 1648 else 1649 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1650 splx(s); 1651 break; 1652 1653 case IP_MULTICAST_TTL: 1654 /* 1655 * Set the IP time-to-live for outgoing multicast packets. 1656 * The original multicast API required a char argument, 1657 * which is inconsistent with the rest of the socket API. 1658 * We allow either a char or an int. 1659 */ 1660 if (sopt->sopt_valsize == 1) { 1661 u_char ttl; 1662 error = sooptcopyin(sopt, &ttl, 1, 1); 1663 if (error) 1664 break; 1665 imo->imo_multicast_ttl = ttl; 1666 } else { 1667 u_int ttl; 1668 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1669 sizeof ttl); 1670 if (error) 1671 break; 1672 if (ttl > 255) 1673 error = EINVAL; 1674 else 1675 imo->imo_multicast_ttl = ttl; 1676 } 1677 break; 1678 1679 case IP_MULTICAST_LOOP: 1680 /* 1681 * Set the loopback flag for outgoing multicast packets. 1682 * Must be zero or one. The original multicast API required a 1683 * char argument, which is inconsistent with the rest 1684 * of the socket API. We allow either a char or an int. 1685 */ 1686 if (sopt->sopt_valsize == 1) { 1687 u_char loop; 1688 error = sooptcopyin(sopt, &loop, 1, 1); 1689 if (error) 1690 break; 1691 imo->imo_multicast_loop = !!loop; 1692 } else { 1693 u_int loop; 1694 error = sooptcopyin(sopt, &loop, sizeof loop, 1695 sizeof loop); 1696 if (error) 1697 break; 1698 imo->imo_multicast_loop = !!loop; 1699 } 1700 break; 1701 1702 case IP_ADD_MEMBERSHIP: 1703 /* 1704 * Add a multicast group membership. 1705 * Group must be a valid IP multicast address. 1706 */ 1707 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1708 if (error) 1709 break; 1710 1711 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1712 error = EINVAL; 1713 break; 1714 } 1715 s = splimp(); 1716 /* 1717 * If no interface address was provided, use the interface of 1718 * the route to the given multicast address. 1719 */ 1720 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1721 bzero((caddr_t)&ro, sizeof(ro)); 1722 dst = (struct sockaddr_in *)&ro.ro_dst; 1723 dst->sin_len = sizeof(*dst); 1724 dst->sin_family = AF_INET; 1725 dst->sin_addr = mreq.imr_multiaddr; 1726 rtalloc(&ro); 1727 if (ro.ro_rt == NULL) { 1728 error = EADDRNOTAVAIL; 1729 splx(s); 1730 break; 1731 } 1732 ifp = ro.ro_rt->rt_ifp; 1733 rtfree(ro.ro_rt); 1734 } 1735 else { 1736 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1737 } 1738 1739 /* 1740 * See if we found an interface, and confirm that it 1741 * supports multicast. 1742 */ 1743 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1744 error = EADDRNOTAVAIL; 1745 splx(s); 1746 break; 1747 } 1748 /* 1749 * See if the membership already exists or if all the 1750 * membership slots are full. 1751 */ 1752 for (i = 0; i < imo->imo_num_memberships; ++i) { 1753 if (imo->imo_membership[i]->inm_ifp == ifp && 1754 imo->imo_membership[i]->inm_addr.s_addr 1755 == mreq.imr_multiaddr.s_addr) 1756 break; 1757 } 1758 if (i < imo->imo_num_memberships) { 1759 error = EADDRINUSE; 1760 splx(s); 1761 break; 1762 } 1763 if (i == IP_MAX_MEMBERSHIPS) { 1764 error = ETOOMANYREFS; 1765 splx(s); 1766 break; 1767 } 1768 /* 1769 * Everything looks good; add a new record to the multicast 1770 * address list for the given interface. 1771 */ 1772 if ((imo->imo_membership[i] = 1773 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1774 error = ENOBUFS; 1775 splx(s); 1776 break; 1777 } 1778 ++imo->imo_num_memberships; 1779 splx(s); 1780 break; 1781 1782 case IP_DROP_MEMBERSHIP: 1783 /* 1784 * Drop a multicast group membership. 1785 * Group must be a valid IP multicast address. 1786 */ 1787 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1788 if (error) 1789 break; 1790 1791 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1792 error = EINVAL; 1793 break; 1794 } 1795 1796 s = splimp(); 1797 /* 1798 * If an interface address was specified, get a pointer 1799 * to its ifnet structure. 1800 */ 1801 if (mreq.imr_interface.s_addr == INADDR_ANY) 1802 ifp = NULL; 1803 else { 1804 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1805 if (ifp == NULL) { 1806 error = EADDRNOTAVAIL; 1807 splx(s); 1808 break; 1809 } 1810 } 1811 /* 1812 * Find the membership in the membership array. 1813 */ 1814 for (i = 0; i < imo->imo_num_memberships; ++i) { 1815 if ((ifp == NULL || 1816 imo->imo_membership[i]->inm_ifp == ifp) && 1817 imo->imo_membership[i]->inm_addr.s_addr == 1818 mreq.imr_multiaddr.s_addr) 1819 break; 1820 } 1821 if (i == imo->imo_num_memberships) { 1822 error = EADDRNOTAVAIL; 1823 splx(s); 1824 break; 1825 } 1826 /* 1827 * Give up the multicast address record to which the 1828 * membership points. 1829 */ 1830 in_delmulti(imo->imo_membership[i]); 1831 /* 1832 * Remove the gap in the membership array. 1833 */ 1834 for (++i; i < imo->imo_num_memberships; ++i) 1835 imo->imo_membership[i-1] = imo->imo_membership[i]; 1836 --imo->imo_num_memberships; 1837 splx(s); 1838 break; 1839 1840 default: 1841 error = EOPNOTSUPP; 1842 break; 1843 } 1844 1845 /* 1846 * If all options have default values, no need to keep the mbuf. 1847 */ 1848 if (imo->imo_multicast_ifp == NULL && 1849 imo->imo_multicast_vif == -1 && 1850 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1851 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1852 imo->imo_num_memberships == 0) { 1853 free(*imop, M_IPMOPTS); 1854 *imop = NULL; 1855 } 1856 1857 return (error); 1858} 1859 1860/* 1861 * Return the IP multicast options in response to user getsockopt(). 1862 */ 1863static int 1864ip_getmoptions(sopt, imo) 1865 struct sockopt *sopt; 1866 register struct ip_moptions *imo; 1867{ 1868 struct in_addr addr; 1869 struct in_ifaddr *ia; 1870 int error, optval; 1871 u_char coptval; 1872 1873 error = 0; 1874 switch (sopt->sopt_name) { 1875 case IP_MULTICAST_VIF: 1876 if (imo != NULL) 1877 optval = imo->imo_multicast_vif; 1878 else 1879 optval = -1; 1880 error = sooptcopyout(sopt, &optval, sizeof optval); 1881 break; 1882 1883 case IP_MULTICAST_IF: 1884 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1885 addr.s_addr = INADDR_ANY; 1886 else if (imo->imo_multicast_addr.s_addr) { 1887 /* return the value user has set */ 1888 addr = imo->imo_multicast_addr; 1889 } else { 1890 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1891 addr.s_addr = (ia == NULL) ? INADDR_ANY 1892 : IA_SIN(ia)->sin_addr.s_addr; 1893 } 1894 error = sooptcopyout(sopt, &addr, sizeof addr); 1895 break; 1896 1897 case IP_MULTICAST_TTL: 1898 if (imo == 0) 1899 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1900 else 1901 optval = coptval = imo->imo_multicast_ttl; 1902 if (sopt->sopt_valsize == 1) 1903 error = sooptcopyout(sopt, &coptval, 1); 1904 else 1905 error = sooptcopyout(sopt, &optval, sizeof optval); 1906 break; 1907 1908 case IP_MULTICAST_LOOP: 1909 if (imo == 0) 1910 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1911 else 1912 optval = coptval = imo->imo_multicast_loop; 1913 if (sopt->sopt_valsize == 1) 1914 error = sooptcopyout(sopt, &coptval, 1); 1915 else 1916 error = sooptcopyout(sopt, &optval, sizeof optval); 1917 break; 1918 1919 default: 1920 error = ENOPROTOOPT; 1921 break; 1922 } 1923 return (error); 1924} 1925 1926/* 1927 * Discard the IP multicast options. 1928 */ 1929void 1930ip_freemoptions(imo) 1931 register struct ip_moptions *imo; 1932{ 1933 register int i; 1934 1935 if (imo != NULL) { 1936 for (i = 0; i < imo->imo_num_memberships; ++i) 1937 in_delmulti(imo->imo_membership[i]); 1938 free(imo, M_IPMOPTS); 1939 } 1940} 1941 1942/* 1943 * Routine called from ip_output() to loop back a copy of an IP multicast 1944 * packet to the input queue of a specified interface. Note that this 1945 * calls the output routine of the loopback "driver", but with an interface 1946 * pointer that might NOT be a loopback interface -- evil, but easier than 1947 * replicating that code here. 1948 */ 1949static void 1950ip_mloopback(ifp, m, dst, hlen) 1951 struct ifnet *ifp; 1952 register struct mbuf *m; 1953 register struct sockaddr_in *dst; 1954 int hlen; 1955{ 1956 register struct ip *ip; 1957 struct mbuf *copym; 1958 1959 copym = m_copy(m, 0, M_COPYALL); 1960 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1961 copym = m_pullup(copym, hlen); 1962 if (copym != NULL) { 1963 /* 1964 * We don't bother to fragment if the IP length is greater 1965 * than the interface's MTU. Can this possibly matter? 1966 */ 1967 ip = mtod(copym, struct ip *); 1968 ip->ip_len = htons(ip->ip_len); 1969 ip->ip_off = htons(ip->ip_off); 1970 ip->ip_sum = 0; 1971 if (ip->ip_vhl == IP_VHL_BORING) { 1972 ip->ip_sum = in_cksum_hdr(ip); 1973 } else { 1974 ip->ip_sum = in_cksum(copym, hlen); 1975 } 1976 /* 1977 * NB: 1978 * It's not clear whether there are any lingering 1979 * reentrancy problems in other areas which might 1980 * be exposed by using ip_input directly (in 1981 * particular, everything which modifies the packet 1982 * in-place). Yet another option is using the 1983 * protosw directly to deliver the looped back 1984 * packet. For the moment, we'll err on the side 1985 * of safety by using if_simloop(). 1986 */ 1987#if 1 /* XXX */ 1988 if (dst->sin_family != AF_INET) { 1989 printf("ip_mloopback: bad address family %d\n", 1990 dst->sin_family); 1991 dst->sin_family = AF_INET; 1992 } 1993#endif 1994 1995#ifdef notdef 1996 copym->m_pkthdr.rcvif = ifp; 1997 ip_input(copym); 1998#else 1999 /* if the checksum hasn't been computed, mark it as valid */ 2000 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2001 copym->m_pkthdr.csum_flags |= 2002 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2003 copym->m_pkthdr.csum_data = 0xffff; 2004 } 2005 if_simloop(ifp, copym, dst->sin_family, 0); 2006#endif 2007 } 2008} 2009