1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1988, 1990, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $ 62 */ 63/* 64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 65 * support for mandatory and extensible security protections. This notice 66 * is included in support of clause 2.2 (b) of the Apple Public License, 67 * Version 2.0. 68 */ 69 70#define _IP_VHL 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/kernel.h> 75#include <sys/malloc.h> 76#include <sys/mbuf.h> 77#include <sys/protosw.h> 78#include <sys/socket.h> 79#include <sys/socketvar.h> 80#include <kern/locks.h> 81#include <sys/sysctl.h> 82#include <sys/mcache.h> 83 84#include <machine/endian.h> 85#include <pexpert/pexpert.h> 86 87#include <net/if.h> 88#include <net/if_dl.h> 89#include <net/if_types.h> 90#include <net/route.h> 91#include <net/ntstat.h> 92#include <net/net_osdep.h> 93 94#include <netinet/in.h> 95#include <netinet/in_systm.h> 96#include <netinet/ip.h> 97#include <netinet/in_pcb.h> 98#include <netinet/in_var.h> 99#include <netinet/ip_var.h> 100 101#include <netinet/kpi_ipfilter_var.h> 102 103#if CONFIG_MACF_NET 104#include <security/mac_framework.h> 105#endif 106 107#include <net/dlil.h> 108#include <sys/kdebug.h> 109#include <libkern/OSAtomic.h> 110 111#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1) 112#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3) 113#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1) 114#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1) 115 116#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8)) 117 118#if IPSEC 119#include <netinet6/ipsec.h> 120#include <netkey/key.h> 121#if IPSEC_DEBUG 122#include <netkey/key_debug.h> 123#else 124#define KEYDEBUG(lev,arg) 125#endif 126#endif /*IPSEC*/ 127 128#include <netinet/ip_fw.h> 129#include <netinet/ip_divert.h> 130#include <mach/sdt.h> 131 132#if DUMMYNET 133#include <netinet/ip_dummynet.h> 134#endif 135 136#if PF 137#include <net/pfvar.h> 138#endif /* PF */ 139 140#if IPFIREWALL_FORWARD_DEBUG 141#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\ 142 (ntohl(a.s_addr)>>16)&0xFF,\ 143 (ntohl(a.s_addr)>>8)&0xFF,\ 144 (ntohl(a.s_addr))&0xFF); 145#endif 146 147u_short ip_id; 148 149static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 150static void ip_mloopback(struct ifnet *, struct mbuf *, 151 struct sockaddr_in *, int); 152static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 153static void imo_trace(struct ip_moptions *, int); 154 155static void ip_out_cksum_stats(int, u_int32_t); 156static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int); 157 158int ip_optcopy(struct ip *, struct ip *); 159void in_delayed_cksum_offset(struct mbuf *, int ); 160void in_cksum_offset(struct mbuf* , size_t ); 161 162extern struct protosw inetsw[]; 163 164extern struct ip_linklocal_stat ip_linklocal_stat; 165extern lck_mtx_t *ip_mutex; 166 167/* temporary: for testing */ 168#if IPSEC 169extern int ipsec_bypass; 170#endif 171 172static int ip_maxchainsent = 0; 173SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED, 174 &ip_maxchainsent, 0, "use dlil_output_list"); 175#if DEBUG 176static int forge_ce = 0; 177SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW | CTLFLAG_LOCKED, 178 &forge_ce, 0, "Forge ECN CE"); 179#endif /* DEBUG */ 180 181static int ip_select_srcif_debug = 0; 182SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED, 183 &ip_select_srcif_debug, 0, "log source interface selection debug info"); 184 185#define IMO_TRACE_HIST_SIZE 32 /* size of trace history */ 186 187/* For gdb */ 188__private_extern__ unsigned int imo_trace_hist_size = IMO_TRACE_HIST_SIZE; 189 190struct ip_moptions_dbg { 191 struct ip_moptions imo; /* ip_moptions */ 192 u_int16_t imo_refhold_cnt; /* # of IMO_ADDREF */ 193 u_int16_t imo_refrele_cnt; /* # of IMO_REMREF */ 194 /* 195 * Alloc and free callers. 196 */ 197 ctrace_t imo_alloc; 198 ctrace_t imo_free; 199 /* 200 * Circular lists of IMO_ADDREF and IMO_REMREF callers. 201 */ 202 ctrace_t imo_refhold[IMO_TRACE_HIST_SIZE]; 203 ctrace_t imo_refrele[IMO_TRACE_HIST_SIZE]; 204}; 205 206#if DEBUG 207static unsigned int imo_debug = 1; /* debugging (enabled) */ 208#else 209static unsigned int imo_debug; /* debugging (disabled) */ 210#endif /* !DEBUG */ 211static unsigned int imo_size; /* size of zone element */ 212static struct zone *imo_zone; /* zone for ip_moptions */ 213 214#define IMO_ZONE_MAX 64 /* maximum elements in zone */ 215#define IMO_ZONE_NAME "ip_moptions" /* zone name */ 216 217/* 218 * IP output. The packet in mbuf chain m contains a skeletal IP 219 * header (with len, off, ttl, proto, tos, src, dst). 220 * The mbuf chain containing the packet will be freed. 221 * The mbuf opt, if present, will not be freed. 222 */ 223int 224ip_output( 225 struct mbuf *m0, 226 struct mbuf *opt, 227 struct route *ro, 228 int flags, 229 struct ip_moptions *imo, 230 struct ip_out_args *ipoa) 231{ 232 int error; 233 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa); 234 return error; 235} 236 237/* 238 * Returns: 0 Success 239 * ENOMEM 240 * EADDRNOTAVAIL 241 * ENETUNREACH 242 * EHOSTUNREACH 243 * EACCES 244 * EMSGSIZE 245 * ENOBUFS 246 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified] 247 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified] 248 * key_spdacquire:??? [IPSEC] 249 * ipsec4_output:??? [IPSEC] 250 * ip_dn_io_ptr:??? [dummynet] 251 * dlil_output:??? [DLIL] 252 * dlil_output_list:??? [DLIL] 253 * 254 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are 255 * only used as the error return from this function where one of 256 * these functions fails to return a policy. 257 */ 258int 259ip_output_list( 260 struct mbuf *m0, 261 int packetchain, 262 struct mbuf *opt, 263 struct route *ro, 264 int flags, 265 struct ip_moptions *imo, 266 struct ip_out_args *ipoa) 267{ 268 struct ip *ip; 269 struct ifnet *ifp = NULL; 270 struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt; 271 int hlen = sizeof (struct ip); 272 int len = 0, error = 0; 273 struct sockaddr_in *dst = NULL; 274 struct in_ifaddr *ia = NULL, *src_ia = NULL; 275 int isbroadcast, sw_csum; 276 struct in_addr pkt_dst; 277 struct ipf_pktopts *ippo = NULL, ipf_pktopts; 278#if IPSEC 279 struct ipsec_output_state ipsec_state; 280 struct route *ipsec_saved_route = NULL; 281 struct socket *so = NULL; 282 struct secpolicy *sp = NULL; 283#endif 284#if IPFIREWALL_FORWARD 285 int fwd_rewrite_src = 0; 286#endif 287#if IPFIREWALL 288 int off; 289 struct sockaddr_in *next_hop_from_ipfwd_tag = NULL; 290#endif 291#if IPFIREWALL || DUMMYNET 292 struct ip_fw_args args; 293 struct m_tag *tag; 294#endif 295 int didfilter = 0; 296 ipfilter_t inject_filter_ref = 0; 297#if DUMMYNET 298 struct route saved_route; 299 struct ip_out_args saved_ipoa; 300 struct sockaddr_in dst_buf; 301#endif /* DUMMYNET */ 302 struct mbuf * packetlist; 303 int pktcnt = 0, tso = 0; 304 u_int32_t bytecnt = 0; 305 unsigned int ifscope = IFSCOPE_NONE; 306 unsigned int nocell = 0; 307 boolean_t select_srcif, srcbound; 308 struct flowadv *adv = NULL; 309 310 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); 311 312#if IPSEC 313 bzero(&ipsec_state, sizeof(ipsec_state)); 314#endif /* IPSEC */ 315 316 packetlist = m0; 317#if IPFIREWALL || DUMMYNET 318 bzero(&args, sizeof(struct ip_fw_args)); 319 320 if (SLIST_EMPTY(&m0->m_pkthdr.tags)) 321 goto ipfw_tags_done; 322 323 /* Grab info from mtags prepended to the chain */ 324#if DUMMYNET 325 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, 326 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { 327 struct dn_pkt_tag *dn_tag; 328 329 dn_tag = (struct dn_pkt_tag *)(tag+1); 330 args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule; 331 args.fwa_pf_rule = dn_tag->dn_pf_rule; 332 opt = NULL; 333 saved_route = dn_tag->dn_ro; 334 ro = &saved_route; 335 336 imo = NULL; 337 bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf)); 338 dst = &dst_buf; 339 ifp = dn_tag->dn_ifp; 340 flags = dn_tag->dn_flags; 341 if ((dn_tag->dn_flags & IP_OUTARGS)) { 342 saved_ipoa = dn_tag->dn_ipoa; 343 ipoa = &saved_ipoa; 344 } 345 346 m_tag_delete(m0, tag); 347 } 348#endif /* DUMMYNET */ 349 350#if IPDIVERT 351 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, 352 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { 353 struct divert_tag *div_tag; 354 355 div_tag = (struct divert_tag *)(tag+1); 356 args.fwa_divert_rule = div_tag->cookie; 357 358 m_tag_delete(m0, tag); 359 } 360#endif /* IPDIVERT */ 361 362#if IPFIREWALL 363 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, 364 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { 365 struct ip_fwd_tag *ipfwd_tag; 366 367 ipfwd_tag = (struct ip_fwd_tag *)(tag+1); 368 next_hop_from_ipfwd_tag = ipfwd_tag->next_hop; 369 370 m_tag_delete(m0, tag); 371 } 372#endif /* IPFIREWALL */ 373 374ipfw_tags_done: 375#endif /* IPFIREWALL || DUMMYNET */ 376 377 m = m0; 378 379#if DIAGNOSTIC 380 if ( !m || (m->m_flags & M_PKTHDR) != 0) 381 panic("ip_output no HDR"); 382 if (!ro) 383 panic("ip_output no route, proto = %d", 384 mtod(m, struct ip *)->ip_p); 385#endif 386 387 bzero(&ipf_pktopts, sizeof(struct ipf_pktopts)); 388 ippo = &ipf_pktopts; 389 390 if (ip_doscopedroute && (flags & IP_OUTARGS)) { 391 /* 392 * In the forwarding case, only the ifscope value is used, 393 * as source interface selection doesn't take place. 394 */ 395 if ((select_srcif = (!(flags & IP_FORWARDING) && 396 (ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) { 397 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF; 398 } 399 400 if ((ipoa->ipoa_flags & IPOAF_BOUND_IF) && 401 ipoa->ipoa_boundif != IFSCOPE_NONE) { 402 ifscope = ipoa->ipoa_boundif; 403 ipf_pktopts.ippo_flags |= 404 (IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE)); 405 } 406 407 if ((srcbound = (ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR))) 408 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR; 409 } else { 410 select_srcif = FALSE; 411 srcbound = FALSE; 412 ifscope = IFSCOPE_NONE; 413 } 414 415 if ((flags & IP_OUTARGS) && (ipoa->ipoa_flags & IPOAF_NO_CELLULAR)) { 416 nocell = 1; 417 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; 418 } 419 420 if (flags & IP_OUTARGS) { 421 adv = &ipoa->ipoa_flowadv; 422 adv->code = FADV_SUCCESS; 423 } 424 425#if DUMMYNET 426 if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) { 427 /* dummynet already saw us */ 428 ip = mtod(m, struct ip *); 429 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 430 pkt_dst = ip->ip_dst; 431 if (ro->ro_rt != NULL) { 432 RT_LOCK_SPIN(ro->ro_rt); 433 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; 434 if (ia) { 435 /* Become a regular mutex */ 436 RT_CONVERT_LOCK(ro->ro_rt); 437 IFA_ADDREF(&ia->ia_ifa); 438 } 439 RT_UNLOCK(ro->ro_rt); 440 } 441#if IPSEC 442 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { 443 so = ipsec_getsocket(m); 444 (void)ipsec_setsocket(m, NULL); 445 } 446#endif /* IPSEC */ 447#if IPFIREWALL 448 if (args.fwa_ipfw_rule != NULL) 449 goto skip_ipsec; 450#endif /* #if IPFIREWALL */ 451 if (args.fwa_pf_rule != NULL) 452 goto sendit; 453 } 454#endif /* DUMMYNET */ 455 456#if IPSEC 457 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { 458 so = ipsec_getsocket(m); 459 (void)ipsec_setsocket(m, NULL); 460 } 461#endif 462loopit: 463 /* 464 * No need to proccess packet twice if we've 465 * already seen it 466 */ 467 if (!SLIST_EMPTY(&m->m_pkthdr.tags)) 468 inject_filter_ref = ipf_get_inject_filter(m); 469 else 470 inject_filter_ref = 0; 471 472 if (opt) { 473 m = ip_insertoptions(m, opt, &len); 474 hlen = len; 475 /* Update the chain */ 476 if (m != m0) { 477 if (m0 == packetlist) 478 packetlist = m; 479 m0 = m; 480 } 481 } 482 ip = mtod(m, struct ip *); 483#if IPFIREWALL 484 /* 485 * rdar://8542331 486 * 487 * When dealing with a packet chain, we need to reset "next_hop" because 488 * "dst" may have been changed to the gateway address below for the previous 489 * packet of the chain. This could cause the route to be inavertandly changed 490 * to the route to the gateway address (instead of the route to the destination). 491 */ 492 args.fwa_next_hop = next_hop_from_ipfwd_tag; 493 pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst; 494#else 495 pkt_dst = ip->ip_dst; 496#endif 497 498 /* 499 * We must not send if the packet is destined to network zero. 500 * RFC1122 3.2.1.3 (a) and (b). 501 */ 502 if (IN_ZERONET(ntohl(pkt_dst.s_addr))) { 503 error = EHOSTUNREACH; 504 goto bad; 505 } 506 507 /* 508 * Fill in IP header. 509 */ 510 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 511 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 512 ip->ip_off &= IP_DF; 513#if RANDOM_IP_ID 514 ip->ip_id = ip_randomid(); 515#else 516 ip->ip_id = htons(ip_id++); 517#endif 518 OSAddAtomic(1, &ipstat.ips_localout); 519 } else { 520 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 521 } 522 523#if DEBUG 524 /* For debugging, we let the stack forge congestion */ 525 if (forge_ce != 0 && 526 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 || 527 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) { 528 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE; 529 forge_ce--; 530 } 531#endif /* DEBUG */ 532 533 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, 534 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); 535 536 dst = (struct sockaddr_in *)(void *)&ro->ro_dst; 537 538 /* 539 * If there is a cached route, 540 * check that it is to the same destination 541 * and is still up. If not, free it and try again. 542 * The address family should also be checked in case of sharing the 543 * cache with IPv6. 544 */ 545 546 if (ro->ro_rt != NULL) { 547 if (ro->ro_rt->generation_id != route_generation && 548 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && 549 (ip->ip_src.s_addr != INADDR_ANY)) { 550 src_ia = ifa_foraddr(ip->ip_src.s_addr); 551 if (src_ia == NULL) { 552 error = EADDRNOTAVAIL; 553 goto bad; 554 } 555 IFA_REMREF(&src_ia->ia_ifa); 556 } 557 /* 558 * Test rt_flags without holding rt_lock for performance 559 * reasons; if the route is down it will hopefully be 560 * caught by the layer below (since it uses this route 561 * as a hint) or during the next transmit. 562 */ 563 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 564 dst->sin_family != AF_INET || 565 dst->sin_addr.s_addr != pkt_dst.s_addr) { 566 rtfree(ro->ro_rt); 567 ro->ro_rt = NULL; 568 } 569 /* 570 * If we're doing source interface selection, we may not 571 * want to use this route; only synch up the generation 572 * count otherwise. 573 */ 574 if (!select_srcif && ro->ro_rt != NULL && 575 ro->ro_rt->generation_id != route_generation) 576 ro->ro_rt->generation_id = route_generation; 577 } 578 if (ro->ro_rt == NULL) { 579 bzero(dst, sizeof(*dst)); 580 dst->sin_family = AF_INET; 581 dst->sin_len = sizeof(*dst); 582 dst->sin_addr = pkt_dst; 583 } 584 /* 585 * If routing to interface only, 586 * short circuit routing lookup. 587 */ 588 if (flags & IP_ROUTETOIF) { 589 if (ia) 590 IFA_REMREF(&ia->ia_ifa); 591 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) { 592 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 593 OSAddAtomic(1, &ipstat.ips_noroute); 594 error = ENETUNREACH; 595 goto bad; 596 } 597 } 598 ifp = ia->ia_ifp; 599 ip->ip_ttl = 1; 600 isbroadcast = in_broadcast(dst->sin_addr, ifp); 601 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) && 602 imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) { 603 /* 604 * Bypass the normal routing lookup for multicast 605 * packets if the interface is specified. 606 */ 607 isbroadcast = 0; 608 if (ia != NULL) 609 IFA_REMREF(&ia->ia_ifa); 610 611 /* Macro takes reference on ia */ 612 IFP_TO_IA(ifp, ia); 613 } else { 614 boolean_t cloneok = FALSE; 615 /* 616 * Perform source interface selection; the source IP address 617 * must belong to one of the addresses of the interface used 618 * by the route. For performance reasons, do this only if 619 * there is no route, or if the routing table has changed, 620 * or if we haven't done source interface selection on this 621 * route (for this PCB instance) before. 622 */ 623 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY && 624 (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) || 625 ro->ro_rt->generation_id != route_generation || 626 !(ro->ro_flags & ROF_SRCIF_SELECTED))) { 627 struct ifaddr *ifa; 628 629 /* Find the source interface */ 630 ifa = in_selectsrcif(ip, ro, ifscope); 631 632 /* 633 * If the source address belongs to a cellular interface 634 * and the caller forbids our using interfaces of such 635 * type, pretend that there is no source address. 636 */ 637 if (nocell && ifa != NULL && 638 ifa->ifa_ifp->if_type == IFT_CELLULAR) { 639 IFA_REMREF(ifa); 640 error = EADDRNOTAVAIL; 641 goto bad; 642 } 643 644 /* 645 * If the source address is spoofed (in the case of 646 * IP_RAWOUTPUT on an unbounded socket), or if this 647 * is destined for local/loopback, just let it go out 648 * using the interface of the route. Otherwise, 649 * there's no interface having such an address, 650 * so bail out. 651 */ 652 if (ifa == NULL && (!(flags & IP_RAWOUTPUT) || 653 srcbound) && ifscope != lo_ifp->if_index) { 654 error = EADDRNOTAVAIL; 655 goto bad; 656 } 657 658 /* 659 * If the caller didn't explicitly specify the scope, 660 * pick it up from the source interface. If the cached 661 * route was wrong and was blown away as part of source 662 * interface selection, don't mask out RTF_PRCLONING 663 * since that route may have been allocated by the ULP, 664 * unless the IP header was created by the caller or 665 * the destination is IPv4 LLA. The check for the 666 * latter is needed because IPv4 LLAs are never scoped 667 * in the current implementation, and we don't want to 668 * replace the resolved IPv4 LLA route with one whose 669 * gateway points to that of the default gateway on 670 * the primary interface of the system. 671 */ 672 if (ifa != NULL) { 673 if (ifscope == IFSCOPE_NONE) 674 ifscope = ifa->ifa_ifp->if_index; 675 IFA_REMREF(ifa); 676 cloneok = (!(flags & IP_RAWOUTPUT) && 677 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)))); 678 } 679 } 680 681 /* 682 * If this is the case, we probably don't want to allocate 683 * a protocol-cloned route since we didn't get one from the 684 * ULP. This lets TCP do its thing, while not burdening 685 * forwarding or ICMP with the overhead of cloning a route. 686 * Of course, we still want to do any cloning requested by 687 * the link layer, as this is probably required in all cases 688 * for correct operation (as it is for ARP). 689 */ 690 if (ro->ro_rt == NULL) { 691 unsigned long ign = RTF_PRCLONING; 692 /* 693 * We make an exception here: if the destination 694 * address is INADDR_BROADCAST, allocate a protocol- 695 * cloned host route so that we end up with a route 696 * marked with the RTF_BROADCAST flag. Otherwise, 697 * we would end up referring to the default route, 698 * instead of creating a cloned host route entry. 699 * That would introduce inconsistencies between ULPs 700 * that allocate a route and those that don't. The 701 * RTF_BROADCAST route is important since we'd want 702 * to send out undirected IP broadcast packets using 703 * link-level broadcast address. Another exception 704 * is for ULP-created routes that got blown away by 705 * source interface selection (see above). 706 * 707 * These exceptions will no longer be necessary when 708 * the RTF_PRCLONING scheme is no longer present. 709 */ 710 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST) 711 ign &= ~RTF_PRCLONING; 712 713 /* 714 * Loosen the route lookup criteria if the ifscope 715 * corresponds to the loopback interface; this is 716 * needed to support Application Layer Gateways 717 * listening on loopback, in conjunction with packet 718 * filter redirection rules. The final source IP 719 * address will be rewritten by the packet filter 720 * prior to the RFC1122 loopback check below. 721 */ 722 if (ifscope == lo_ifp->if_index) 723 rtalloc_ign(ro, ign); 724 else 725 rtalloc_scoped_ign(ro, ign, ifscope); 726 727 /* 728 * If the route points to a cellular interface and the 729 * caller forbids our using interfaces of such type, 730 * pretend that there is no route. 731 */ 732 if (nocell && ro->ro_rt != NULL) { 733 RT_LOCK_SPIN(ro->ro_rt); 734 if (ro->ro_rt->rt_ifp->if_type == 735 IFT_CELLULAR) { 736 RT_UNLOCK(ro->ro_rt); 737 rtfree(ro->ro_rt); 738 ro->ro_rt = NULL; 739 } else { 740 RT_UNLOCK(ro->ro_rt); 741 } 742 } 743 } 744 745 if (ro->ro_rt == NULL) { 746 OSAddAtomic(1, &ipstat.ips_noroute); 747 error = EHOSTUNREACH; 748 goto bad; 749 } 750 751 if (ia) 752 IFA_REMREF(&ia->ia_ifa); 753 RT_LOCK_SPIN(ro->ro_rt); 754 ia = ifatoia(ro->ro_rt->rt_ifa); 755 if (ia) { 756 /* Become a regular mutex */ 757 RT_CONVERT_LOCK(ro->ro_rt); 758 IFA_ADDREF(&ia->ia_ifa); 759 } 760 ifp = ro->ro_rt->rt_ifp; 761 ro->ro_rt->rt_use++; 762 if (ro->ro_rt->rt_flags & RTF_GATEWAY) { 763 dst = (struct sockaddr_in *)(void *) 764 ro->ro_rt->rt_gateway; 765 } 766 if (ro->ro_rt->rt_flags & RTF_HOST) { 767 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 768 } else { 769 /* Become a regular mutex */ 770 RT_CONVERT_LOCK(ro->ro_rt); 771 isbroadcast = in_broadcast(dst->sin_addr, ifp); 772 } 773 RT_UNLOCK(ro->ro_rt); 774 } 775 776 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 777 struct in_multi *inm; 778 u_int32_t vif; 779 u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL; 780 u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP; 781 782 m->m_flags |= M_MCAST; 783 /* 784 * IP destination address is multicast. Make sure "dst" 785 * still points to the address in "ro". (It may have been 786 * changed to point to a gateway address, above.) 787 */ 788 dst = (struct sockaddr_in *)(void *)&ro->ro_dst; 789 /* 790 * See if the caller provided any multicast options 791 */ 792 if (imo != NULL) { 793 IMO_LOCK(imo); 794 vif = imo->imo_multicast_vif; 795 ttl = imo->imo_multicast_ttl; 796 loop = imo->imo_multicast_loop; 797 if ((flags & IP_RAWOUTPUT) == 0) 798 ip->ip_ttl = ttl; 799 if (imo->imo_multicast_ifp != NULL) 800 ifp = imo->imo_multicast_ifp; 801 IMO_UNLOCK(imo); 802#if MROUTING 803 if (vif != -1 && ((flags & IP_RAWOUTPUT) == 0 || 804 ip->ip_src.s_addr == INADDR_ANY)) 805 ip->ip_src.s_addr = ip_mcast_src(vif); 806#endif /* MROUTING */ 807 } else if ((flags & IP_RAWOUTPUT) == 0) { 808 vif = -1; 809 ip->ip_ttl = ttl; 810 } 811 /* 812 * Confirm that the outgoing interface supports multicast. 813 */ 814 if (imo == NULL || vif == -1) { 815 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 816 OSAddAtomic(1, &ipstat.ips_noroute); 817 error = ENETUNREACH; 818 goto bad; 819 } 820 } 821 /* 822 * If source address not specified yet, use address 823 * of outgoing interface. 824 */ 825 if (ip->ip_src.s_addr == INADDR_ANY) { 826 struct in_ifaddr *ia1; 827 lck_rw_lock_shared(in_ifaddr_rwlock); 828 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) { 829 IFA_LOCK_SPIN(&ia1->ia_ifa); 830 if (ia1->ia_ifp == ifp) { 831 ip->ip_src = IA_SIN(ia1)->sin_addr; 832 IFA_UNLOCK(&ia1->ia_ifa); 833 break; 834 } 835 IFA_UNLOCK(&ia1->ia_ifa); 836 } 837 lck_rw_done(in_ifaddr_rwlock); 838 if (ip->ip_src.s_addr == INADDR_ANY) { 839 error = ENETUNREACH; 840 goto bad; 841 } 842 } 843 844 in_multihead_lock_shared(); 845 IN_LOOKUP_MULTI(&pkt_dst, ifp, inm); 846 in_multihead_lock_done(); 847 if (inm != NULL && (imo == NULL || loop)) { 848 /* 849 * If we belong to the destination multicast group 850 * on the outgoing interface, and the caller did not 851 * forbid loopback, loop back a copy. 852 */ 853 if (!TAILQ_EMPTY(&ipv4_filters)) { 854 struct ipfilter *filter; 855 int seen = (inject_filter_ref == 0); 856 857 if (imo != NULL) { 858 ipf_pktopts.ippo_flags |= IPPOF_MCAST_OPTS; 859 ipf_pktopts.ippo_mcast_ifnet = ifp; 860 ipf_pktopts.ippo_mcast_ttl = ttl; 861 ipf_pktopts.ippo_mcast_loop = loop; 862 } 863 864 ipf_ref(); 865 866 /* 4135317 - always pass network byte order to filter */ 867 868#if BYTE_ORDER != BIG_ENDIAN 869 HTONS(ip->ip_len); 870 HTONS(ip->ip_off); 871#endif 872 873 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { 874 if (seen == 0) { 875 if ((struct ipfilter *)inject_filter_ref == filter) 876 seen = 1; 877 } else if (filter->ipf_filter.ipf_output) { 878 errno_t result; 879 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); 880 if (result == EJUSTRETURN) { 881 ipf_unref(); 882 INM_REMREF(inm); 883 goto done; 884 } 885 if (result != 0) { 886 ipf_unref(); 887 INM_REMREF(inm); 888 goto bad; 889 } 890 } 891 } 892 893 /* set back to host byte order */ 894 ip = mtod(m, struct ip *); 895 896#if BYTE_ORDER != BIG_ENDIAN 897 NTOHS(ip->ip_len); 898 NTOHS(ip->ip_off); 899#endif 900 901 ipf_unref(); 902 didfilter = 1; 903 } 904 ip_mloopback(ifp, m, dst, hlen); 905 } 906#if MROUTING 907 else { 908 /* 909 * If we are acting as a multicast router, perform 910 * multicast forwarding as if the packet had just 911 * arrived on the interface to which we are about 912 * to send. The multicast forwarding function 913 * recursively calls this function, using the 914 * IP_FORWARDING flag to prevent infinite recursion. 915 * 916 * Multicasts that are looped back by ip_mloopback(), 917 * above, will be forwarded by the ip_input() routine, 918 * if necessary. 919 */ 920 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 921 /* 922 * Check if rsvp daemon is running. If not, don't 923 * set ip_moptions. This ensures that the packet 924 * is multicast and not just sent down one link 925 * as prescribed by rsvpd. 926 */ 927 if (!rsvp_on) 928 imo = NULL; 929 if (ip_mforward(ip, ifp, m, imo) != 0) { 930 m_freem(m); 931 if (inm != NULL) 932 INM_REMREF(inm); 933 OSAddAtomic(1, &ipstat.ips_cantforward); 934 goto done; 935 } 936 } 937 } 938#endif /* MROUTING */ 939 if (inm != NULL) 940 INM_REMREF(inm); 941 /* 942 * Multicasts with a time-to-live of zero may be looped- 943 * back, above, but must not be transmitted on a network. 944 * Also, multicasts addressed to the loopback interface 945 * are not sent -- the above call to ip_mloopback() will 946 * loop back a copy if this host actually belongs to the 947 * destination group on the loopback interface. 948 */ 949 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 950 m_freem(m); 951 goto done; 952 } 953 954 goto sendit; 955 } 956 /* 957 * If source address not specified yet, use address 958 * of outgoing interface. 959 */ 960 if (ip->ip_src.s_addr == INADDR_ANY) { 961 IFA_LOCK_SPIN(&ia->ia_ifa); 962 ip->ip_src = IA_SIN(ia)->sin_addr; 963 IFA_UNLOCK(&ia->ia_ifa); 964#if IPFIREWALL_FORWARD 965 /* Keep note that we did this - if the firewall changes 966 * the next-hop, our interface may change, changing the 967 * default source IP. It's a shame so much effort happens 968 * twice. Oh well. 969 */ 970 fwd_rewrite_src++; 971#endif /* IPFIREWALL_FORWARD */ 972 } 973 974 /* 975 * Look for broadcast address and 976 * and verify user is allowed to send 977 * such a packet. 978 */ 979 if (isbroadcast) { 980 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 981 error = EADDRNOTAVAIL; 982 goto bad; 983 } 984 if ((flags & IP_ALLOWBROADCAST) == 0) { 985 error = EACCES; 986 goto bad; 987 } 988 /* don't allow broadcast messages to be fragmented */ 989 if ((u_short)ip->ip_len > ifp->if_mtu) { 990 error = EMSGSIZE; 991 goto bad; 992 } 993 m->m_flags |= M_BCAST; 994 } else { 995 m->m_flags &= ~M_BCAST; 996 } 997 998sendit: 999#if PF 1000 /* Invoke outbound packet filter */ 1001 if (PF_IS_ENABLED) { 1002 int rc; 1003 1004 m0 = m; /* Save for later */ 1005#if DUMMYNET 1006 args.fwa_m = m; 1007 args.fwa_next_hop = dst; 1008 args.fwa_oif = ifp; 1009 args.fwa_ro = ro; 1010 args.fwa_dst = dst; 1011 args.fwa_oflags = flags; 1012 if (flags & IP_OUTARGS) 1013 args.fwa_ipoa = ipoa; 1014 rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, &args); 1015#else /* DUMMYNET */ 1016 rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, NULL); 1017#endif /* DUMMYNET */ 1018 if (rc != 0 || m == NULL) { 1019 /* Move to the next packet */ 1020 m = *mppn; 1021 1022 /* Skip ahead if first packet in list got dropped */ 1023 if (packetlist == m0) 1024 packetlist = m; 1025 1026 if (m != NULL) { 1027 m0 = m; 1028 /* Next packet in the chain */ 1029 goto loopit; 1030 } else if (packetlist != NULL) { 1031 /* No more packet; send down the chain */ 1032 goto sendchain; 1033 } 1034 /* Nothing left; we're done */ 1035 goto done; 1036 } 1037 m0 = m; 1038 ip = mtod(m, struct ip *); 1039 pkt_dst = ip->ip_dst; 1040 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1041 } 1042#endif /* PF */ 1043 /* 1044 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt 1045 */ 1046 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 1047 ip_linklocal_stat.iplls_out_total++; 1048 if (ip->ip_ttl != MAXTTL) { 1049 ip_linklocal_stat.iplls_out_badttl++; 1050 ip->ip_ttl = MAXTTL; 1051 } 1052 } 1053 1054 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) { 1055 struct ipfilter *filter; 1056 int seen = (inject_filter_ref == 0); 1057 ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS; 1058 1059 /* Check that a TSO frame isn't passed to a filter. 1060 * This could happen if a filter is inserted while 1061 * TCP is sending the TSO packet. 1062 */ 1063 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) { 1064 error = EMSGSIZE; 1065 goto bad; 1066 } 1067 1068 ipf_ref(); 1069 1070 /* 4135317 - always pass network byte order to filter */ 1071 1072#if BYTE_ORDER != BIG_ENDIAN 1073 HTONS(ip->ip_len); 1074 HTONS(ip->ip_off); 1075#endif 1076 1077 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { 1078 if (seen == 0) { 1079 if ((struct ipfilter *)inject_filter_ref == filter) 1080 seen = 1; 1081 } else if (filter->ipf_filter.ipf_output) { 1082 errno_t result; 1083 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); 1084 if (result == EJUSTRETURN) { 1085 ipf_unref(); 1086 goto done; 1087 } 1088 if (result != 0) { 1089 ipf_unref(); 1090 goto bad; 1091 } 1092 } 1093 } 1094 1095 /* set back to host byte order */ 1096 ip = mtod(m, struct ip *); 1097 1098#if BYTE_ORDER != BIG_ENDIAN 1099 NTOHS(ip->ip_len); 1100 NTOHS(ip->ip_off); 1101#endif 1102 1103 ipf_unref(); 1104 } 1105 1106#if IPSEC 1107 /* temporary for testing only: bypass ipsec alltogether */ 1108 1109 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0) 1110 goto skip_ipsec; 1111 1112 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); 1113 1114 1115 /* get SP for this packet */ 1116 if (so == NULL) 1117 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 1118 else 1119 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 1120 1121 if (sp == NULL) { 1122 IPSEC_STAT_INCREMENT(ipsecstat.out_inval); 1123 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); 1124 goto bad; 1125 } 1126 1127 error = 0; 1128 1129 /* check policy */ 1130 switch (sp->policy) { 1131 case IPSEC_POLICY_DISCARD: 1132 case IPSEC_POLICY_GENERATE: 1133 /* 1134 * This packet is just discarded. 1135 */ 1136 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio); 1137 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0); 1138 goto bad; 1139 1140 case IPSEC_POLICY_BYPASS: 1141 case IPSEC_POLICY_NONE: 1142 /* no need to do IPsec. */ 1143 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0); 1144 goto skip_ipsec; 1145 1146 case IPSEC_POLICY_IPSEC: 1147 if (sp->req == NULL) { 1148 /* acquire a policy */ 1149 error = key_spdacquire(sp); 1150 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0); 1151 goto bad; 1152 } 1153 break; 1154 1155 case IPSEC_POLICY_ENTRUST: 1156 default: 1157 printf("ip_output: Invalid policy found. %d\n", sp->policy); 1158 } 1159 { 1160 ipsec_state.m = m; 1161 if (flags & IP_ROUTETOIF) { 1162 bzero(&ipsec_state.ro, sizeof(ipsec_state.ro)); 1163 } else 1164 route_copyout(&ipsec_state.ro, ro, sizeof(ipsec_state.ro)); 1165 ipsec_state.dst = (struct sockaddr *)dst; 1166 1167 ip->ip_sum = 0; 1168 1169 /* 1170 * XXX 1171 * delayed checksums are not currently compatible with IPsec 1172 */ 1173 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1174 in_delayed_cksum(m); 1175 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1176 } 1177 1178 1179#if BYTE_ORDER != BIG_ENDIAN 1180 HTONS(ip->ip_len); 1181 HTONS(ip->ip_off); 1182#endif 1183 1184 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, 1185 struct ip *, ip, struct ifnet *, ifp, 1186 struct ip *, ip, struct ip6_hdr *, NULL); 1187 1188 error = ipsec4_output(&ipsec_state, sp, flags); 1189 1190 m0 = m = ipsec_state.m; 1191 1192 if (flags & IP_ROUTETOIF) { 1193 /* 1194 * if we have tunnel mode SA, we may need to ignore 1195 * IP_ROUTETOIF. 1196 */ 1197 if (ipsec_state.tunneled) { 1198 flags &= ~IP_ROUTETOIF; 1199 ipsec_saved_route = ro; 1200 ro = &ipsec_state.ro; 1201 } 1202 } else { 1203 ipsec_saved_route = ro; 1204 ro = &ipsec_state.ro; 1205 } 1206 dst = (struct sockaddr_in *)(void *)ipsec_state.dst; 1207 if (error) { 1208 /* mbuf is already reclaimed in ipsec4_output. */ 1209 m0 = NULL; 1210 switch (error) { 1211 case EHOSTUNREACH: 1212 case ENETUNREACH: 1213 case EMSGSIZE: 1214 case ENOBUFS: 1215 case ENOMEM: 1216 break; 1217 default: 1218 printf("ip4_output (ipsec): error code %d\n", error); 1219 /*fall through*/ 1220 case ENOENT: 1221 /* don't show these error codes to the user */ 1222 error = 0; 1223 break; 1224 } 1225 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0); 1226 goto bad; 1227 } 1228 } 1229 1230 /* be sure to update variables that are affected by ipsec4_output() */ 1231 ip = mtod(m, struct ip *); 1232 1233#ifdef _IP_VHL 1234 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1235#else 1236 hlen = ip->ip_hl << 2; 1237#endif 1238 /* Check that there wasn't a route change and src is still valid */ 1239 if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) { 1240 if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL && 1241 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { 1242 error = EADDRNOTAVAIL; 1243 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1244 5,0,0,0,0); 1245 goto bad; 1246 } 1247 rtfree(ro->ro_rt); 1248 ro->ro_rt = NULL; 1249 if (src_ia != NULL) 1250 IFA_REMREF(&src_ia->ia_ifa); 1251 } 1252 1253 if (ro->ro_rt == NULL) { 1254 if ((flags & IP_ROUTETOIF) == 0) { 1255 printf("ip_output: can't update route after " 1256 "IPsec processing\n"); 1257 error = EHOSTUNREACH; /*XXX*/ 1258 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1259 6,0,0,0,0); 1260 goto bad; 1261 } 1262 } else { 1263 if (ia) 1264 IFA_REMREF(&ia->ia_ifa); 1265 RT_LOCK_SPIN(ro->ro_rt); 1266 ia = ifatoia(ro->ro_rt->rt_ifa); 1267 if (ia) { 1268 /* Become a regular mutex */ 1269 RT_CONVERT_LOCK(ro->ro_rt); 1270 IFA_ADDREF(&ia->ia_ifa); 1271 } 1272 ifp = ro->ro_rt->rt_ifp; 1273 RT_UNLOCK(ro->ro_rt); 1274 } 1275 1276 /* make it flipped, again. */ 1277 1278#if BYTE_ORDER != BIG_ENDIAN 1279 NTOHS(ip->ip_len); 1280 NTOHS(ip->ip_off); 1281#endif 1282 1283 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff); 1284 1285 /* Pass to filters again */ 1286 if (!TAILQ_EMPTY(&ipv4_filters)) { 1287 struct ipfilter *filter; 1288 1289 ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS; 1290 1291 /* Check that a TSO frame isn't passed to a filter. 1292 * This could happen if a filter is inserted while 1293 * TCP is sending the TSO packet. 1294 */ 1295 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) { 1296 error = EMSGSIZE; 1297 goto bad; 1298 } 1299 1300 ipf_ref(); 1301 1302 /* 4135317 - always pass network byte order to filter */ 1303 1304#if BYTE_ORDER != BIG_ENDIAN 1305 HTONS(ip->ip_len); 1306 HTONS(ip->ip_off); 1307#endif 1308 1309 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { 1310 if (filter->ipf_filter.ipf_output) { 1311 errno_t result; 1312 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); 1313 if (result == EJUSTRETURN) { 1314 ipf_unref(); 1315 goto done; 1316 } 1317 if (result != 0) { 1318 ipf_unref(); 1319 goto bad; 1320 } 1321 } 1322 } 1323 1324 /* set back to host byte order */ 1325 ip = mtod(m, struct ip *); 1326 1327#if BYTE_ORDER != BIG_ENDIAN 1328 NTOHS(ip->ip_len); 1329 NTOHS(ip->ip_off); 1330#endif 1331 1332 ipf_unref(); 1333 } 1334skip_ipsec: 1335#endif /*IPSEC*/ 1336 1337#if IPFIREWALL 1338 /* 1339 * Check with the firewall... 1340 * but not if we are already being fwd'd from a firewall. 1341 */ 1342 if (fw_enable && IPFW_LOADED && !args.fwa_next_hop) { 1343 struct sockaddr_in *old = dst; 1344 1345 args.fwa_m = m; 1346 args.fwa_next_hop = dst; 1347 args.fwa_oif = ifp; 1348 off = ip_fw_chk_ptr(&args); 1349 m = args.fwa_m; 1350 dst = args.fwa_next_hop; 1351 1352 /* 1353 * On return we must do the following: 1354 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new) 1355 * 1<=off<= 0xffff -> DIVERT 1356 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 1357 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 1358 * dst != old -> IPFIREWALL_FORWARD 1359 * off==0, dst==old -> accept 1360 * If some of the above modules is not compiled in, then 1361 * we should't have to check the corresponding condition 1362 * (because the ipfw control socket should not accept 1363 * unsupported rules), but better play safe and drop 1364 * packets in case of doubt. 1365 */ 1366 m0 = m; 1367 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 1368 if (m) 1369 m_freem(m); 1370 error = EACCES ; 1371 goto done ; 1372 } 1373 ip = mtod(m, struct ip *); 1374 1375 if (off == 0 && dst == old) {/* common case */ 1376 goto pass ; 1377 } 1378#if DUMMYNET 1379 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 1380 /* 1381 * pass the pkt to dummynet. Need to include 1382 * pipe number, m, ifp, ro, dst because these are 1383 * not recomputed in the next pass. 1384 * All other parameters have been already used and 1385 * so they are not needed anymore. 1386 * XXX note: if the ifp or ro entry are deleted 1387 * while a pkt is in dummynet, we are in trouble! 1388 */ 1389 args.fwa_ro = ro; 1390 args.fwa_dst = dst; 1391 args.fwa_oflags = flags; 1392 if (flags & IP_OUTARGS) 1393 args.fwa_ipoa = ipoa; 1394 1395 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 1396 &args, DN_CLIENT_IPFW); 1397 goto done; 1398 } 1399#endif /* DUMMYNET */ 1400#if IPDIVERT 1401 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 1402 struct mbuf *clone = NULL; 1403 1404 /* Clone packet if we're doing a 'tee' */ 1405 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 1406 clone = m_dup(m, M_DONTWAIT); 1407 /* 1408 * XXX 1409 * delayed checksums are not currently compatible 1410 * with divert sockets. 1411 */ 1412 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1413 in_delayed_cksum(m); 1414 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1415 } 1416 1417 /* Restore packet header fields to original values */ 1418 1419#if BYTE_ORDER != BIG_ENDIAN 1420 HTONS(ip->ip_len); 1421 HTONS(ip->ip_off); 1422#endif 1423 1424 /* Deliver packet to divert input routine */ 1425 divert_packet(m, 0, off & 0xffff, args.fwa_divert_rule); 1426 1427 /* If 'tee', continue with original packet */ 1428 if (clone != NULL) { 1429 m0 = m = clone; 1430 ip = mtod(m, struct ip *); 1431 goto pass; 1432 } 1433 goto done; 1434 } 1435#endif 1436 1437#if IPFIREWALL_FORWARD 1438 /* Here we check dst to make sure it's directly reachable on the 1439 * interface we previously thought it was. 1440 * If it isn't (which may be likely in some situations) we have 1441 * to re-route it (ie, find a route for the next-hop and the 1442 * associated interface) and set them here. This is nested 1443 * forwarding which in most cases is undesirable, except where 1444 * such control is nigh impossible. So we do it here. 1445 * And I'm babbling. 1446 */ 1447 if (off == 0 && old != dst) { 1448 struct in_ifaddr *ia_fw; 1449 1450 /* It's changed... */ 1451 /* There must be a better way to do this next line... */ 1452 static struct route sro_fwd, *ro_fwd = &sro_fwd; 1453#if IPFIREWALL_FORWARD_DEBUG 1454 printf("IPFIREWALL_FORWARD: New dst ip: "); 1455 print_ip(dst->sin_addr); 1456 printf("\n"); 1457#endif 1458 /* 1459 * We need to figure out if we have been forwarded 1460 * to a local socket. If so then we should somehow 1461 * "loop back" to ip_input, and get directed to the 1462 * PCB as if we had received this packet. This is 1463 * because it may be dificult to identify the packets 1464 * you want to forward until they are being output 1465 * and have selected an interface. (e.g. locally 1466 * initiated packets) If we used the loopback inteface, 1467 * we would not be able to control what happens 1468 * as the packet runs through ip_input() as 1469 * it is done through a ISR. 1470 */ 1471 lck_rw_lock_shared(in_ifaddr_rwlock); 1472 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) { 1473 /* 1474 * If the addr to forward to is one 1475 * of ours, we pretend to 1476 * be the destination for this packet. 1477 */ 1478 IFA_LOCK_SPIN(&ia_fw->ia_ifa); 1479 if (IA_SIN(ia_fw)->sin_addr.s_addr == 1480 dst->sin_addr.s_addr) { 1481 IFA_UNLOCK(&ia_fw->ia_ifa); 1482 break; 1483 } 1484 IFA_UNLOCK(&ia_fw->ia_ifa); 1485 } 1486 lck_rw_done(in_ifaddr_rwlock); 1487 if (ia_fw) { 1488 /* tell ip_input "dont filter" */ 1489 struct m_tag *fwd_tag; 1490 struct ip_fwd_tag *ipfwd_tag; 1491 1492 fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID, 1493 KERNEL_TAG_TYPE_IPFORWARD, 1494 sizeof (*ipfwd_tag), M_NOWAIT, m); 1495 if (fwd_tag == NULL) { 1496 error = ENOBUFS; 1497 goto bad; 1498 } 1499 1500 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); 1501 ipfwd_tag->next_hop = args.fwa_next_hop; 1502 1503 m_tag_prepend(m, fwd_tag); 1504 1505 if (m->m_pkthdr.rcvif == NULL) 1506 m->m_pkthdr.rcvif = lo_ifp; 1507 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & 1508 m->m_pkthdr.csum_flags) == 0) { 1509 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1510 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1511 m->m_pkthdr.csum_flags |= 1512 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1513 m->m_pkthdr.csum_data = 0xffff; 1514 } 1515 m->m_pkthdr.csum_flags |= 1516 CSUM_IP_CHECKED | CSUM_IP_VALID; 1517 } 1518 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1519 in_delayed_cksum(m); 1520 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1521 ip->ip_sum = in_cksum(m, hlen); 1522 } 1523 1524#if BYTE_ORDER != BIG_ENDIAN 1525 HTONS(ip->ip_len); 1526 HTONS(ip->ip_off); 1527#endif 1528 1529 /* we need to call dlil_output to run filters 1530 * and resync to avoid recursion loops. 1531 */ 1532 if (lo_ifp) { 1533 dlil_output(lo_ifp, PF_INET, m, 0, 1534 (struct sockaddr *)dst, 0, adv); 1535 } 1536 else { 1537 printf("ip_output: no loopback ifp for forwarding!!!\n"); 1538 } 1539 goto done; 1540 } 1541 /* Some of the logic for this was 1542 * nicked from above. 1543 * 1544 * This rewrites the cached route in a local PCB. 1545 * Is this what we want to do? 1546 */ 1547 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 1548 1549 ro_fwd->ro_rt = NULL; 1550 rtalloc_ign(ro_fwd, RTF_PRCLONING); 1551 1552 if (ro_fwd->ro_rt == NULL) { 1553 OSAddAtomic(1, &ipstat.ips_noroute); 1554 error = EHOSTUNREACH; 1555 goto bad; 1556 } 1557 1558 RT_LOCK_SPIN(ro_fwd->ro_rt); 1559 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa); 1560 if (ia_fw != NULL) { 1561 /* Become a regular mutex */ 1562 RT_CONVERT_LOCK(ro_fwd->ro_rt); 1563 IFA_ADDREF(&ia_fw->ia_ifa); 1564 } 1565 ifp = ro_fwd->ro_rt->rt_ifp; 1566 ro_fwd->ro_rt->rt_use++; 1567 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 1568 dst = (struct sockaddr_in *)(void *)ro_fwd->ro_rt->rt_gateway; 1569 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) { 1570 isbroadcast = 1571 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 1572 } else { 1573 /* Become a regular mutex */ 1574 RT_CONVERT_LOCK(ro_fwd->ro_rt); 1575 isbroadcast = in_broadcast(dst->sin_addr, ifp); 1576 } 1577 RT_UNLOCK(ro_fwd->ro_rt); 1578 rtfree(ro->ro_rt); 1579 ro->ro_rt = ro_fwd->ro_rt; 1580 dst = (struct sockaddr_in *)(void *)&ro_fwd->ro_dst; 1581 1582 /* 1583 * If we added a default src ip earlier, 1584 * which would have been gotten from the-then 1585 * interface, do it again, from the new one. 1586 */ 1587 if (ia_fw != NULL) { 1588 if (fwd_rewrite_src) { 1589 IFA_LOCK_SPIN(&ia_fw->ia_ifa); 1590 ip->ip_src = IA_SIN(ia_fw)->sin_addr; 1591 IFA_UNLOCK(&ia_fw->ia_ifa); 1592 } 1593 IFA_REMREF(&ia_fw->ia_ifa); 1594 } 1595 goto pass ; 1596 } 1597#endif /* IPFIREWALL_FORWARD */ 1598 /* 1599 * if we get here, none of the above matches, and 1600 * we have to drop the pkt 1601 */ 1602 m_freem(m); 1603 error = EACCES; /* not sure this is the right error msg */ 1604 goto done; 1605 } 1606 1607pass: 1608#endif /* IPFIREWALL */ 1609#if __APPLE__ 1610 /* Do not allow loopback address to wind up on a wire */ 1611 if ((ifp->if_flags & IFF_LOOPBACK) == 0 && 1612 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 1613 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { 1614 OSAddAtomic(1, &ipstat.ips_badaddr); 1615 m_freem(m); 1616 /* 1617 * Do not simply drop the packet just like a firewall -- we want the 1618 * the application to feel the pain. 1619 * Return ENETUNREACH like ip6_output does in some similar cases. 1620 * This can startle the otherwise clueless process that specifies 1621 * loopback as the source address. 1622 */ 1623 error = ENETUNREACH; 1624 goto done; 1625 } 1626#endif 1627 m->m_pkthdr.csum_flags |= CSUM_IP; 1628 tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4); 1629 1630 sw_csum = m->m_pkthdr.csum_flags 1631 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); 1632 1633 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) { 1634 /* 1635 * Special case code for GMACE 1636 * frames that can be checksumed by GMACE SUM16 HW: 1637 * frame >64, no fragments, no UDP 1638 */ 1639 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP) 1640 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) { 1641 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */ 1642 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */ 1643 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF; 1644 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */ 1645 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ; 1646 m->m_pkthdr.csum_data += offset; 1647 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */ 1648 } else { 1649 /* let the software handle any UDP or TCP checksums */ 1650 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags); 1651 } 1652 } else if (apple_hwcksum_tx == 0) { 1653 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) & 1654 m->m_pkthdr.csum_flags; 1655 } 1656 1657 if (sw_csum & CSUM_DELAY_DATA) { 1658 in_delayed_cksum(m); 1659 sw_csum &= ~CSUM_DELAY_DATA; 1660 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1661 } 1662 1663 if (apple_hwcksum_tx != 0) { 1664 m->m_pkthdr.csum_flags &= 1665 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); 1666 } else { 1667 m->m_pkthdr.csum_flags = 0; 1668 } 1669 1670 /* 1671 * If small enough for interface, or the interface will take 1672 * care of the fragmentation for us, can just send directly. 1673 */ 1674 if ((u_short)ip->ip_len <= ifp->if_mtu || tso || 1675 ifp->if_hwassist & CSUM_FRAGMENT) { 1676 if (tso) 1677 m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4; 1678 1679 1680#if BYTE_ORDER != BIG_ENDIAN 1681 HTONS(ip->ip_len); 1682 HTONS(ip->ip_off); 1683#endif 1684 1685 ip->ip_sum = 0; 1686 if (sw_csum & CSUM_DELAY_IP) { 1687 ip->ip_sum = in_cksum(m, hlen); 1688 } 1689 1690#ifndef __APPLE__ 1691 /* Record statistics for this interface address. */ 1692 if (!(flags & IP_FORWARDING) && ia != NULL) { 1693 ia->ia_ifa.if_opackets++; 1694 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1695 } 1696#endif 1697 1698#if IPSEC 1699 /* clean ipsec history once it goes out of the node */ 1700 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) 1701 ipsec_delaux(m); 1702#endif 1703 if (packetchain == 0) { 1704 if (ro->ro_rt && nstat_collect) 1705 nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0); 1706 error = dlil_output(ifp, PF_INET, m, ro->ro_rt, 1707 (struct sockaddr *)dst, 0, adv); 1708 goto done; 1709 } 1710 else { /* packet chaining allows us to reuse the route for all packets */ 1711 bytecnt += m->m_pkthdr.len; 1712 mppn = &m->m_nextpkt; 1713 m = m->m_nextpkt; 1714 if (m == NULL) { 1715#if PF 1716sendchain: 1717#endif /* PF */ 1718 if (pktcnt > ip_maxchainsent) 1719 ip_maxchainsent = pktcnt; 1720 if (ro->ro_rt && nstat_collect) 1721 nstat_route_tx(ro->ro_rt, pktcnt, bytecnt, 0); 1722 //send 1723 error = dlil_output(ifp, PF_INET, packetlist, 1724 ro->ro_rt, (struct sockaddr *)dst, 0, adv); 1725 pktcnt = 0; 1726 bytecnt = 0; 1727 goto done; 1728 1729 } 1730 m0 = m; 1731 pktcnt++; 1732 goto loopit; 1733 } 1734 } 1735 /* 1736 * Too large for interface; fragment if possible. 1737 * Must be able to put at least 8 bytes per fragment. 1738 */ 1739 1740 if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) || 1741 pktcnt > 0) { 1742 error = EMSGSIZE; 1743 /* 1744 * This case can happen if the user changed the MTU 1745 * of an interface after enabling IP on it. Because 1746 * most netifs don't keep track of routes pointing to 1747 * them, there is no way for one to update all its 1748 * routes when the MTU is changed. 1749 */ 1750 if (ro->ro_rt) { 1751 RT_LOCK_SPIN(ro->ro_rt); 1752 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 1753 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 1754 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1755 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1756 } 1757 RT_UNLOCK(ro->ro_rt); 1758 } 1759 if (pktcnt > 0) { 1760 m0 = packetlist; 1761 } 1762 OSAddAtomic(1, &ipstat.ips_cantfrag); 1763 goto bad; 1764 } 1765 1766 error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum); 1767 if (error != 0) { 1768 m0 = m = NULL; 1769 goto bad; 1770 } 1771 1772 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, 1773 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); 1774 1775 for (m = m0; m; m = m0) { 1776 m0 = m->m_nextpkt; 1777 m->m_nextpkt = 0; 1778#if IPSEC 1779 /* clean ipsec history once it goes out of the node */ 1780 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) 1781 ipsec_delaux(m); 1782#endif 1783 if (error == 0) { 1784#ifndef __APPLE__ 1785 /* Record statistics for this interface address. */ 1786 if (ia != NULL) { 1787 ia->ia_ifa.if_opackets++; 1788 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1789 } 1790#endif 1791 if ((packetchain != 0) && (pktcnt > 0)) 1792 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist); 1793 if (ro->ro_rt && nstat_collect) 1794 nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0); 1795 error = dlil_output(ifp, PF_INET, m, ro->ro_rt, 1796 (struct sockaddr *)dst, 0, adv); 1797 } else 1798 m_freem(m); 1799 } 1800 1801 if (error == 0) 1802 OSAddAtomic(1, &ipstat.ips_fragmented); 1803 1804done: 1805 if (ia) { 1806 IFA_REMREF(&ia->ia_ifa); 1807 ia = NULL; 1808 } 1809#if IPSEC 1810 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { 1811 if (ipsec_state.ro.ro_rt) 1812 rtfree(ipsec_state.ro.ro_rt); 1813 if (sp != NULL) { 1814 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1815 printf("DP ip_output call free SP:%x\n", sp)); 1816 key_freesp(sp, KEY_SADB_UNLOCKED); 1817 } 1818 } 1819#endif /* IPSEC */ 1820 1821 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0); 1822 return (error); 1823bad: 1824 m_freem(m0); 1825 goto done; 1826} 1827 1828int 1829ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) 1830{ 1831 struct ip *ip, *mhip; 1832 int len, hlen, mhlen, firstlen, off, error = 0; 1833 struct mbuf **mnext = &m->m_nextpkt, *m0; 1834 int nfrags = 1; 1835 1836 ip = mtod(m, struct ip *); 1837#ifdef _IP_VHL 1838 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1839#else 1840 hlen = ip->ip_hl << 2; 1841#endif 1842 1843 firstlen = len = (mtu - hlen) &~ 7; 1844 if (len < 8) { 1845 m_freem(m); 1846 return (EMSGSIZE); 1847 } 1848 1849 /* 1850 * if the interface will not calculate checksums on 1851 * fragmented packets, then do it here. 1852 */ 1853 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1854 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 1855 in_delayed_cksum(m); 1856 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1857 } 1858 1859 /* 1860 * Loop through length of segment after first fragment, 1861 * make new header and copy data of each part and link onto chain. 1862 */ 1863 m0 = m; 1864 mhlen = sizeof (struct ip); 1865 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { 1866 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ 1867 if (m == 0) { 1868 error = ENOBUFS; 1869 OSAddAtomic(1, &ipstat.ips_odropped); 1870 goto sendorfree; 1871 } 1872 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1873 m->m_data += max_linkhdr; 1874 mhip = mtod(m, struct ip *); 1875 *mhip = *ip; 1876 if (hlen > sizeof (struct ip)) { 1877 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1878 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 1879 } 1880 m->m_len = mhlen; 1881 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); 1882 if (ip->ip_off & IP_MF) 1883 mhip->ip_off |= IP_MF; 1884 if (off + len >= (u_short)ip->ip_len) 1885 len = (u_short)ip->ip_len - off; 1886 else 1887 mhip->ip_off |= IP_MF; 1888 mhip->ip_len = htons((u_short)(len + mhlen)); 1889 m->m_next = m_copy(m0, off, len); 1890 if (m->m_next == 0) { 1891 (void) m_free(m); 1892 error = ENOBUFS; /* ??? */ 1893 OSAddAtomic(1, &ipstat.ips_odropped); 1894 goto sendorfree; 1895 } 1896 m->m_pkthdr.len = mhlen + len; 1897 m->m_pkthdr.rcvif = 0; 1898 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1899 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; 1900 1901 M_COPY_PFTAG(m, m0); 1902 m_set_service_class(m, m0->m_pkthdr.svc); 1903 1904#if CONFIG_MACF_NET 1905 mac_netinet_fragment(m0, m); 1906#endif 1907 1908#if BYTE_ORDER != BIG_ENDIAN 1909 HTONS(mhip->ip_off); 1910#endif 1911 1912 mhip->ip_sum = 0; 1913 if (sw_csum & CSUM_DELAY_IP) { 1914 mhip->ip_sum = in_cksum(m, mhlen); 1915 } 1916 *mnext = m; 1917 mnext = &m->m_nextpkt; 1918 nfrags++; 1919 } 1920 OSAddAtomic(nfrags, &ipstat.ips_ofragments); 1921 1922 /* set first/last markers for fragment chain */ 1923 m->m_flags |= M_LASTFRAG; 1924 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1925 m0->m_pkthdr.csum_data = nfrags; 1926 1927 /* 1928 * Update first fragment by trimming what's been copied out 1929 * and updating header, then send each fragment (in order). 1930 */ 1931 m = m0; 1932 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 1933 m->m_pkthdr.len = hlen + firstlen; 1934 ip->ip_len = htons((u_short)m->m_pkthdr.len); 1935 ip->ip_off |= IP_MF; 1936 1937#if BYTE_ORDER != BIG_ENDIAN 1938 HTONS(ip->ip_off); 1939#endif 1940 1941 ip->ip_sum = 0; 1942 if (sw_csum & CSUM_DELAY_IP) { 1943 ip->ip_sum = in_cksum(m, hlen); 1944 } 1945sendorfree: 1946 if (error) 1947 m_freem_list(m0); 1948 1949 return (error); 1950} 1951 1952static void 1953ip_out_cksum_stats(int proto, u_int32_t len) 1954{ 1955 switch (proto) { 1956 case IPPROTO_TCP: 1957 tcp_out_cksum_stats(len); 1958 break; 1959 case IPPROTO_UDP: 1960 udp_out_cksum_stats(len); 1961 break; 1962 default: 1963 /* keep only TCP or UDP stats for now */ 1964 break; 1965 } 1966} 1967 1968void 1969in_delayed_cksum_offset(struct mbuf *m0, int ip_offset) 1970{ 1971 struct ip *ip; 1972 unsigned char buf[sizeof(struct ip)]; 1973 u_short csum, offset, ip_len; 1974 1975 /* Save copy of first mbuf pointer and the ip_offset before modifying */ 1976 struct mbuf *m = m0; 1977 int ip_offset_copy = ip_offset; 1978 1979 while (ip_offset >= m->m_len) { 1980 ip_offset -= m->m_len; 1981 m = m->m_next; 1982 if (m == NULL) { 1983 printf("in_delayed_cksum_withoffset failed - " 1984 "ip_offset wasn't in the packet\n"); 1985 return; 1986 } 1987 } 1988 1989 /* 1990 * In case the IP header is not contiguous, or not 32-bit 1991 * aligned, copy it to a local buffer. 1992 */ 1993 if ((ip_offset + sizeof(struct ip) > m->m_len) || 1994 !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { 1995#if DEBUG 1996 printf("delayed m_pullup, m->len: %d off: %d\n", 1997 m->m_len, ip_offset); 1998#endif 1999 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); 2000 2001 ip = (struct ip *)(void *)buf; 2002 } else { 2003 ip = (struct ip*)(void *)(m->m_data + ip_offset); 2004 } 2005 2006 /* Gross */ 2007 if (ip_offset) { 2008 m->m_len -= ip_offset; 2009 m->m_data += ip_offset; 2010 } 2011 2012 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 2013 2014 /* 2015 * We could be in the context of an IP or interface filter; in the 2016 * former case, ip_len would be in host (correct) order while for 2017 * the latter it would be in network order. Because of this, we 2018 * attempt to interpret the length field by comparing it against 2019 * the actual packet length. If the comparison fails, byte swap 2020 * the length and check again. If it still fails, then the packet 2021 * is bogus and we give up. 2022 */ 2023 ip_len = ip->ip_len; 2024 if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) { 2025 ip_len = SWAP16(ip_len); 2026 if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) { 2027 printf("in_delayed_cksum_offset: ip_len %d (%d) " 2028 "doesn't match actual length %d\n", ip->ip_len, 2029 ip_len, (m0->m_pkthdr.len - ip_offset_copy)); 2030 return; 2031 } 2032 } 2033 2034 csum = in_cksum_skip(m, ip_len, offset); 2035 2036 /* Update stats */ 2037 ip_out_cksum_stats(ip->ip_p, ip_len - offset); 2038 2039 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 2040 csum = 0xffff; 2041 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ 2042 2043 /* Gross */ 2044 if (ip_offset) { 2045 if (M_LEADINGSPACE(m) < ip_offset) 2046 panic("in_delayed_cksum_offset - chain modified!\n"); 2047 m->m_len += ip_offset; 2048 m->m_data -= ip_offset; 2049 } 2050 2051 if (offset > ip_len) /* bogus offset */ 2052 return; 2053 2054 /* Insert the checksum in the existing chain */ 2055 if (offset + ip_offset + sizeof(u_short) > m->m_len) { 2056 char tmp[2]; 2057 2058#if DEBUG 2059 printf("delayed m_copyback, m->len: %d off: %d p: %d\n", 2060 m->m_len, offset + ip_offset, ip->ip_p); 2061#endif 2062 *(u_short *)(void *)tmp = csum; 2063 m_copyback(m, offset + ip_offset, 2, tmp); 2064 } else if (IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { 2065 *(u_short *)(void *)(m->m_data + offset + ip_offset) = csum; 2066 } else { 2067 bcopy(&csum, (m->m_data + offset + ip_offset), sizeof (csum)); 2068 } 2069} 2070 2071void 2072in_delayed_cksum(struct mbuf *m) 2073{ 2074 in_delayed_cksum_offset(m, 0); 2075} 2076 2077void 2078in_cksum_offset(struct mbuf* m, size_t ip_offset) 2079{ 2080 struct ip* ip = NULL; 2081 int hlen = 0; 2082 unsigned char buf[sizeof(struct ip)]; 2083 int swapped = 0; 2084 2085 /* Save copy of first mbuf pointer and the ip_offset before modifying */ 2086 struct mbuf* m0 = m; 2087 size_t ip_offset_copy = ip_offset; 2088 2089 while (ip_offset >= m->m_len) { 2090 ip_offset -= m->m_len; 2091 m = m->m_next; 2092 if (m == NULL) { 2093 printf("in_cksum_offset failed - ip_offset wasn't " 2094 "in the packet\n"); 2095 return; 2096 } 2097 } 2098 2099 /* 2100 * In case the IP header is not contiguous, or not 32-bit 2101 * aligned, copy it to a local buffer. 2102 */ 2103 if ((ip_offset + sizeof(struct ip) > m->m_len) || 2104 !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { 2105#if DEBUG 2106 printf("in_cksum_offset - delayed m_pullup, m->len: %d " 2107 "off: %lu\n", m->m_len, ip_offset); 2108#endif 2109 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); 2110 2111 ip = (struct ip *)(void *)buf; 2112 ip->ip_sum = 0; 2113 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, 2114 (caddr_t)&ip->ip_sum); 2115 } else { 2116 ip = (struct ip*)(void *)(m->m_data + ip_offset); 2117 ip->ip_sum = 0; 2118 } 2119 2120 /* Gross */ 2121 if (ip_offset) { 2122 m->m_len -= ip_offset; 2123 m->m_data += ip_offset; 2124 } 2125 2126#ifdef _IP_VHL 2127 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 2128#else 2129 hlen = ip->ip_hl << 2; 2130#endif 2131 /* 2132 * We could be in the context of an IP or interface filter; in the 2133 * former case, ip_len would be in host order while for the latter 2134 * it would be in network (correct) order. Because of this, we 2135 * attempt to interpret the length field by comparing it against 2136 * the actual packet length. If the comparison fails, byte swap 2137 * the length and check again. If it still fails, then the packet 2138 * is bogus and we give up. 2139 */ 2140 if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) { 2141 ip->ip_len = SWAP16(ip->ip_len); 2142 swapped = 1; 2143 if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) { 2144 ip->ip_len = SWAP16(ip->ip_len); 2145 printf("in_cksum_offset: ip_len %d (%d) " 2146 "doesn't match actual length %lu\n", 2147 ip->ip_len, SWAP16(ip->ip_len), 2148 (m0->m_pkthdr.len - ip_offset_copy)); 2149 return; 2150 } 2151 } 2152 2153 ip->ip_sum = 0; 2154 ip->ip_sum = in_cksum(m, hlen); 2155 if (swapped) 2156 ip->ip_len = SWAP16(ip->ip_len); 2157 2158 /* Gross */ 2159 if (ip_offset) { 2160 if (M_LEADINGSPACE(m) < ip_offset) 2161 panic("in_cksum_offset - chain modified!\n"); 2162 m->m_len += ip_offset; 2163 m->m_data -= ip_offset; 2164 } 2165 2166 /* 2167 * Insert the checksum in the existing chain if IP header not 2168 * contiguous, or if it's not 32-bit aligned, i.e. all the cases 2169 * where it was copied to a local buffer. 2170 */ 2171 if (ip_offset + sizeof(struct ip) > m->m_len) { 2172 char tmp[2]; 2173 2174#if DEBUG 2175 printf("in_cksum_offset m_copyback, m->len: %u off: %lu " 2176 "p: %d\n", m->m_len, 2177 ip_offset + offsetof(struct ip, ip_sum), ip->ip_p); 2178#endif 2179 *(u_short *)(void *)tmp = ip->ip_sum; 2180 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp); 2181 } else if (!IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { 2182 bcopy(&ip->ip_sum, 2183 (m->m_data + ip_offset + offsetof(struct ip, ip_sum)), 2184 sizeof (u_short)); 2185 } 2186} 2187 2188/* 2189 * Insert IP options into preformed packet. 2190 * Adjust IP destination as required for IP source routing, 2191 * as indicated by a non-zero in_addr at the start of the options. 2192 * 2193 * XXX This routine assumes that the packet has no options in place. 2194 */ 2195static struct mbuf * 2196ip_insertoptions(m, opt, phlen) 2197 register struct mbuf *m; 2198 struct mbuf *opt; 2199 int *phlen; 2200{ 2201 register struct ipoption *p = mtod(opt, struct ipoption *); 2202 struct mbuf *n; 2203 register struct ip *ip = mtod(m, struct ip *); 2204 unsigned optlen; 2205 2206 optlen = opt->m_len - sizeof(p->ipopt_dst); 2207 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) 2208 return (m); /* XXX should fail */ 2209 if (p->ipopt_dst.s_addr) 2210 ip->ip_dst = p->ipopt_dst; 2211 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 2212 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */ 2213 if (n == 0) 2214 return (m); 2215 n->m_pkthdr.rcvif = 0; 2216#if CONFIG_MACF_NET 2217 mac_mbuf_label_copy(m, n); 2218#endif 2219 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 2220 m->m_len -= sizeof(struct ip); 2221 m->m_data += sizeof(struct ip); 2222 n->m_next = m; 2223 m = n; 2224 m->m_len = optlen + sizeof(struct ip); 2225 m->m_data += max_linkhdr; 2226 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 2227 } else { 2228 m->m_data -= optlen; 2229 m->m_len += optlen; 2230 m->m_pkthdr.len += optlen; 2231 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 2232 } 2233 ip = mtod(m, struct ip *); 2234 bcopy(p->ipopt_list, ip + 1, optlen); 2235 *phlen = sizeof(struct ip) + optlen; 2236 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 2237 ip->ip_len += optlen; 2238 return (m); 2239} 2240 2241/* 2242 * Copy options from ip to jp, 2243 * omitting those not copied during fragmentation. 2244 */ 2245int 2246ip_optcopy(ip, jp) 2247 struct ip *ip, *jp; 2248{ 2249 register u_char *cp, *dp; 2250 int opt, optlen, cnt; 2251 2252 cp = (u_char *)(ip + 1); 2253 dp = (u_char *)(jp + 1); 2254 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 2255 for (; cnt > 0; cnt -= optlen, cp += optlen) { 2256 opt = cp[0]; 2257 if (opt == IPOPT_EOL) 2258 break; 2259 if (opt == IPOPT_NOP) { 2260 /* Preserve for IP mcast tunnel's LSRR alignment. */ 2261 *dp++ = IPOPT_NOP; 2262 optlen = 1; 2263 continue; 2264 } 2265#if DIAGNOSTIC 2266 if (cnt < IPOPT_OLEN + sizeof(*cp)) 2267 panic("malformed IPv4 option passed to ip_optcopy"); 2268#endif 2269 optlen = cp[IPOPT_OLEN]; 2270#if DIAGNOSTIC 2271 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 2272 panic("malformed IPv4 option passed to ip_optcopy"); 2273#endif 2274 /* bogus lengths should have been caught by ip_dooptions */ 2275 if (optlen > cnt) 2276 optlen = cnt; 2277 if (IPOPT_COPIED(opt)) { 2278 bcopy(cp, dp, optlen); 2279 dp += optlen; 2280 } 2281 } 2282 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 2283 *dp++ = IPOPT_EOL; 2284 return (optlen); 2285} 2286 2287/* 2288 * IP socket option processing. 2289 */ 2290int 2291ip_ctloutput(so, sopt) 2292 struct socket *so; 2293 struct sockopt *sopt; 2294{ 2295 struct inpcb *inp = sotoinpcb(so); 2296 int error, optval; 2297 2298 error = optval = 0; 2299 if (sopt->sopt_level != IPPROTO_IP) { 2300 return (EINVAL); 2301 } 2302 2303 switch (sopt->sopt_dir) { 2304 case SOPT_SET: 2305 switch (sopt->sopt_name) { 2306 case IP_OPTIONS: 2307#ifdef notyet 2308 case IP_RETOPTS: 2309#endif 2310 { 2311 struct mbuf *m; 2312 if (sopt->sopt_valsize > MLEN) { 2313 error = EMSGSIZE; 2314 break; 2315 } 2316 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT, 2317 MT_HEADER); 2318 if (m == 0) { 2319 error = ENOBUFS; 2320 break; 2321 } 2322 m->m_len = sopt->sopt_valsize; 2323 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 2324 m->m_len); 2325 if (error) 2326 break; 2327 2328 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 2329 m)); 2330 } 2331 2332 case IP_TOS: 2333 case IP_TTL: 2334 case IP_RECVOPTS: 2335 case IP_RECVRETOPTS: 2336 case IP_RECVDSTADDR: 2337 case IP_RECVIF: 2338 case IP_RECVTTL: 2339 case IP_RECVPKTINFO: 2340 error = sooptcopyin(sopt, &optval, sizeof optval, 2341 sizeof optval); 2342 if (error) 2343 break; 2344 2345 switch (sopt->sopt_name) { 2346 case IP_TOS: 2347 inp->inp_ip_tos = optval; 2348 break; 2349 2350 case IP_TTL: 2351 inp->inp_ip_ttl = optval; 2352 break; 2353#define OPTSET(bit) \ 2354 if (optval) \ 2355 inp->inp_flags |= bit; \ 2356 else \ 2357 inp->inp_flags &= ~bit; 2358 2359 case IP_RECVOPTS: 2360 OPTSET(INP_RECVOPTS); 2361 break; 2362 2363 case IP_RECVRETOPTS: 2364 OPTSET(INP_RECVRETOPTS); 2365 break; 2366 2367 case IP_RECVDSTADDR: 2368 OPTSET(INP_RECVDSTADDR); 2369 break; 2370 2371 case IP_RECVIF: 2372 OPTSET(INP_RECVIF); 2373 break; 2374 2375 case IP_RECVTTL: 2376 OPTSET(INP_RECVTTL); 2377 break; 2378 2379 case IP_RECVPKTINFO: 2380 OPTSET(INP_PKTINFO); 2381 break; 2382 } 2383 break; 2384#undef OPTSET 2385 2386#if CONFIG_FORCE_OUT_IFP 2387 /* 2388 * Apple private interface, similar to IP_BOUND_IF, except 2389 * that the parameter is a NULL-terminated string containing 2390 * the name of the network interface; an emptry string means 2391 * unbind. Applications are encouraged to use IP_BOUND_IF 2392 * instead, as that is the current "official" API. 2393 */ 2394 case IP_FORCE_OUT_IFP: { 2395 char ifname[IFNAMSIZ]; 2396 unsigned int ifscope; 2397 2398 /* This option is settable only for IPv4 */ 2399 if (!(inp->inp_vflag & INP_IPV4)) { 2400 error = EINVAL; 2401 break; 2402 } 2403 2404 /* Verify interface name parameter is sane */ 2405 if (sopt->sopt_valsize > sizeof(ifname)) { 2406 error = EINVAL; 2407 break; 2408 } 2409 2410 /* Copy the interface name */ 2411 if (sopt->sopt_valsize != 0) { 2412 error = sooptcopyin(sopt, ifname, 2413 sizeof (ifname), sopt->sopt_valsize); 2414 if (error) 2415 break; 2416 } 2417 2418 if (sopt->sopt_valsize == 0 || ifname[0] == '\0') { 2419 /* Unbind this socket from any interface */ 2420 ifscope = IFSCOPE_NONE; 2421 } else { 2422 ifnet_t ifp; 2423 2424 /* Verify name is NULL terminated */ 2425 if (ifname[sopt->sopt_valsize - 1] != '\0') { 2426 error = EINVAL; 2427 break; 2428 } 2429 2430 /* Bail out if given bogus interface name */ 2431 if (ifnet_find_by_name(ifname, &ifp) != 0) { 2432 error = ENXIO; 2433 break; 2434 } 2435 2436 /* Bind this socket to this interface */ 2437 ifscope = ifp->if_index; 2438 2439 /* 2440 * Won't actually free; since we don't release 2441 * this later, we should do it now. 2442 */ 2443 ifnet_release(ifp); 2444 } 2445 error = inp_bindif(inp, ifscope); 2446 } 2447 break; 2448#endif 2449 /* 2450 * Multicast socket options are processed by the in_mcast 2451 * module. 2452 */ 2453 case IP_MULTICAST_IF: 2454 case IP_MULTICAST_IFINDEX: 2455 case IP_MULTICAST_VIF: 2456 case IP_MULTICAST_TTL: 2457 case IP_MULTICAST_LOOP: 2458 case IP_ADD_MEMBERSHIP: 2459 case IP_DROP_MEMBERSHIP: 2460 case IP_ADD_SOURCE_MEMBERSHIP: 2461 case IP_DROP_SOURCE_MEMBERSHIP: 2462 case IP_BLOCK_SOURCE: 2463 case IP_UNBLOCK_SOURCE: 2464 case IP_MSFILTER: 2465 case MCAST_JOIN_GROUP: 2466 case MCAST_LEAVE_GROUP: 2467 case MCAST_JOIN_SOURCE_GROUP: 2468 case MCAST_LEAVE_SOURCE_GROUP: 2469 case MCAST_BLOCK_SOURCE: 2470 case MCAST_UNBLOCK_SOURCE: 2471 error = inp_setmoptions(inp, sopt); 2472 break; 2473 2474 case IP_PORTRANGE: 2475 error = sooptcopyin(sopt, &optval, sizeof optval, 2476 sizeof optval); 2477 if (error) 2478 break; 2479 2480 switch (optval) { 2481 case IP_PORTRANGE_DEFAULT: 2482 inp->inp_flags &= ~(INP_LOWPORT); 2483 inp->inp_flags &= ~(INP_HIGHPORT); 2484 break; 2485 2486 case IP_PORTRANGE_HIGH: 2487 inp->inp_flags &= ~(INP_LOWPORT); 2488 inp->inp_flags |= INP_HIGHPORT; 2489 break; 2490 2491 case IP_PORTRANGE_LOW: 2492 inp->inp_flags &= ~(INP_HIGHPORT); 2493 inp->inp_flags |= INP_LOWPORT; 2494 break; 2495 2496 default: 2497 error = EINVAL; 2498 break; 2499 } 2500 break; 2501 2502#if IPSEC 2503 case IP_IPSEC_POLICY: 2504 { 2505 caddr_t req = NULL; 2506 size_t len = 0; 2507 int priv; 2508 struct mbuf *m; 2509 int optname; 2510 2511 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 2512 break; 2513 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 2514 break; 2515 priv = (proc_suser(sopt->sopt_p) == 0); 2516 if (m) { 2517 req = mtod(m, caddr_t); 2518 len = m->m_len; 2519 } 2520 optname = sopt->sopt_name; 2521 error = ipsec4_set_policy(inp, optname, req, len, priv); 2522 m_freem(m); 2523 break; 2524 } 2525#endif /*IPSEC*/ 2526 2527#if TRAFFIC_MGT 2528 case IP_TRAFFIC_MGT_BACKGROUND: 2529 { 2530 unsigned background = 0; 2531 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background)); 2532 if (error) 2533 break; 2534 2535 if (background) { 2536 socket_set_traffic_mgt_flags_locked(so, 2537 TRAFFIC_MGT_SO_BACKGROUND); 2538 } else { 2539 socket_clear_traffic_mgt_flags_locked(so, 2540 TRAFFIC_MGT_SO_BACKGROUND); 2541 } 2542 2543 break; 2544 } 2545#endif /* TRAFFIC_MGT */ 2546 2547 /* 2548 * On a multihomed system, scoped routing can be used to 2549 * restrict the source interface used for sending packets. 2550 * The socket option IP_BOUND_IF binds a particular AF_INET 2551 * socket to an interface such that data sent on the socket 2552 * is restricted to that interface. This is unlike the 2553 * SO_DONTROUTE option where the routing table is bypassed; 2554 * therefore it allows for a greater flexibility and control 2555 * over the system behavior, and does not place any restriction 2556 * on the destination address type (e.g. unicast, multicast, 2557 * or broadcast if applicable) or whether or not the host is 2558 * directly reachable. Note that in the multicast transmit 2559 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over 2560 * IP_BOUND_IF, since the former practically bypasses the 2561 * routing table; in this case, IP_BOUND_IF sets the default 2562 * interface used for sending multicast packets in the absence 2563 * of an explicit multicast transmit interface. 2564 */ 2565 case IP_BOUND_IF: 2566 /* This option is settable only for IPv4 */ 2567 if (!(inp->inp_vflag & INP_IPV4)) { 2568 error = EINVAL; 2569 break; 2570 } 2571 2572 error = sooptcopyin(sopt, &optval, sizeof (optval), 2573 sizeof (optval)); 2574 2575 if (error) 2576 break; 2577 2578 error = inp_bindif(inp, optval); 2579 break; 2580 2581 case IP_NO_IFT_CELLULAR: 2582 /* This option is settable only for IPv4 */ 2583 if (!(inp->inp_vflag & INP_IPV4)) { 2584 error = EINVAL; 2585 break; 2586 } 2587 2588 error = sooptcopyin(sopt, &optval, sizeof (optval), 2589 sizeof (optval)); 2590 2591 if (error) 2592 break; 2593 2594 error = inp_nocellular(inp, optval); 2595 break; 2596 2597 case IP_OUT_IF: 2598 /* This option is not settable */ 2599 error = EINVAL; 2600 break; 2601 2602 default: 2603 error = ENOPROTOOPT; 2604 break; 2605 } 2606 break; 2607 2608 case SOPT_GET: 2609 switch (sopt->sopt_name) { 2610 case IP_OPTIONS: 2611 case IP_RETOPTS: 2612 if (inp->inp_options) 2613 error = sooptcopyout(sopt, 2614 mtod(inp->inp_options, 2615 char *), 2616 inp->inp_options->m_len); 2617 else 2618 sopt->sopt_valsize = 0; 2619 break; 2620 2621 case IP_TOS: 2622 case IP_TTL: 2623 case IP_RECVOPTS: 2624 case IP_RECVRETOPTS: 2625 case IP_RECVDSTADDR: 2626 case IP_RECVIF: 2627 case IP_RECVTTL: 2628 case IP_PORTRANGE: 2629 case IP_RECVPKTINFO: 2630 switch (sopt->sopt_name) { 2631 2632 case IP_TOS: 2633 optval = inp->inp_ip_tos; 2634 break; 2635 2636 case IP_TTL: 2637 optval = inp->inp_ip_ttl; 2638 break; 2639 2640#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 2641 2642 case IP_RECVOPTS: 2643 optval = OPTBIT(INP_RECVOPTS); 2644 break; 2645 2646 case IP_RECVRETOPTS: 2647 optval = OPTBIT(INP_RECVRETOPTS); 2648 break; 2649 2650 case IP_RECVDSTADDR: 2651 optval = OPTBIT(INP_RECVDSTADDR); 2652 break; 2653 2654 case IP_RECVIF: 2655 optval = OPTBIT(INP_RECVIF); 2656 break; 2657 2658 case IP_RECVTTL: 2659 optval = OPTBIT(INP_RECVTTL); 2660 break; 2661 2662 case IP_PORTRANGE: 2663 if (inp->inp_flags & INP_HIGHPORT) 2664 optval = IP_PORTRANGE_HIGH; 2665 else if (inp->inp_flags & INP_LOWPORT) 2666 optval = IP_PORTRANGE_LOW; 2667 else 2668 optval = 0; 2669 break; 2670 2671 case IP_RECVPKTINFO: 2672 optval = OPTBIT(INP_PKTINFO); 2673 break; 2674 } 2675 error = sooptcopyout(sopt, &optval, sizeof optval); 2676 break; 2677 2678 case IP_MULTICAST_IF: 2679 case IP_MULTICAST_IFINDEX: 2680 case IP_MULTICAST_VIF: 2681 case IP_MULTICAST_TTL: 2682 case IP_MULTICAST_LOOP: 2683 case IP_MSFILTER: 2684 error = inp_getmoptions(inp, sopt); 2685 break; 2686 2687#if IPSEC 2688 case IP_IPSEC_POLICY: 2689 { 2690 struct mbuf *m = NULL; 2691 caddr_t req = NULL; 2692 size_t len = 0; 2693 2694 if (m != 0) { 2695 req = mtod(m, caddr_t); 2696 len = m->m_len; 2697 } 2698 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 2699 if (error == 0) 2700 error = soopt_mcopyout(sopt, m); /* XXX */ 2701 if (error == 0) 2702 m_freem(m); 2703 break; 2704 } 2705#endif /*IPSEC*/ 2706 2707#if TRAFFIC_MGT 2708 case IP_TRAFFIC_MGT_BACKGROUND: 2709 { 2710 unsigned background = (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND); 2711 return (sooptcopyout(sopt, &background, sizeof(background))); 2712 break; 2713 } 2714#endif /* TRAFFIC_MGT */ 2715 2716 case IP_BOUND_IF: 2717 if (inp->inp_flags & INP_BOUND_IF) 2718 optval = inp->inp_boundifp->if_index; 2719 error = sooptcopyout(sopt, &optval, sizeof (optval)); 2720 break; 2721 2722 case IP_NO_IFT_CELLULAR: 2723 optval = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0; 2724 error = sooptcopyout(sopt, &optval, sizeof (optval)); 2725 break; 2726 2727 case IP_OUT_IF: 2728 optval = (inp->inp_last_outifp != NULL) ? 2729 inp->inp_last_outifp->if_index : 0; 2730 error = sooptcopyout(sopt, &optval, sizeof (optval)); 2731 break; 2732 2733 default: 2734 error = ENOPROTOOPT; 2735 break; 2736 } 2737 break; 2738 } 2739 return (error); 2740} 2741 2742/* 2743 * Set up IP options in pcb for insertion in output packets. 2744 * Store in mbuf with pointer in pcbopt, adding pseudo-option 2745 * with destination address if source routed. 2746 */ 2747static int 2748ip_pcbopts( 2749 __unused int optname, 2750 struct mbuf **pcbopt, 2751 register struct mbuf *m) 2752{ 2753 register int cnt, optlen; 2754 register u_char *cp; 2755 u_char opt; 2756 2757 /* turn off any old options */ 2758 if (*pcbopt) 2759 (void)m_free(*pcbopt); 2760 *pcbopt = 0; 2761 if (m == (struct mbuf *)0 || m->m_len == 0) { 2762 /* 2763 * Only turning off any previous options. 2764 */ 2765 if (m) 2766 (void)m_free(m); 2767 return (0); 2768 } 2769 2770#ifndef vax 2771 if (m->m_len % sizeof(int32_t)) 2772 goto bad; 2773#endif 2774 /* 2775 * IP first-hop destination address will be stored before 2776 * actual options; move other options back 2777 * and clear it when none present. 2778 */ 2779 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 2780 goto bad; 2781 cnt = m->m_len; 2782 m->m_len += sizeof(struct in_addr); 2783 cp = mtod(m, u_char *) + sizeof(struct in_addr); 2784 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 2785 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 2786 2787 for (; cnt > 0; cnt -= optlen, cp += optlen) { 2788 opt = cp[IPOPT_OPTVAL]; 2789 if (opt == IPOPT_EOL) 2790 break; 2791 if (opt == IPOPT_NOP) 2792 optlen = 1; 2793 else { 2794 if (cnt < IPOPT_OLEN + sizeof(*cp)) 2795 goto bad; 2796 optlen = cp[IPOPT_OLEN]; 2797 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 2798 goto bad; 2799 } 2800 switch (opt) { 2801 2802 default: 2803 break; 2804 2805 case IPOPT_LSRR: 2806 case IPOPT_SSRR: 2807 /* 2808 * user process specifies route as: 2809 * ->A->B->C->D 2810 * D must be our final destination (but we can't 2811 * check that since we may not have connected yet). 2812 * A is first hop destination, which doesn't appear in 2813 * actual IP option, but is stored before the options. 2814 */ 2815 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 2816 goto bad; 2817 m->m_len -= sizeof(struct in_addr); 2818 cnt -= sizeof(struct in_addr); 2819 optlen -= sizeof(struct in_addr); 2820 cp[IPOPT_OLEN] = optlen; 2821 /* 2822 * Move first hop before start of options. 2823 */ 2824 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 2825 sizeof(struct in_addr)); 2826 /* 2827 * Then copy rest of options back 2828 * to close up the deleted entry. 2829 */ 2830 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 2831 sizeof(struct in_addr)), 2832 (caddr_t)&cp[IPOPT_OFFSET+1], 2833 (unsigned)cnt + sizeof(struct in_addr)); 2834 break; 2835 } 2836 } 2837 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 2838 goto bad; 2839 *pcbopt = m; 2840 return (0); 2841 2842bad: 2843 (void)m_free(m); 2844 return (EINVAL); 2845} 2846 2847void 2848ip_moptions_init(void) 2849{ 2850 PE_parse_boot_argn("ifa_debug", &imo_debug, sizeof (imo_debug)); 2851 2852 imo_size = (imo_debug == 0) ? sizeof (struct ip_moptions) : 2853 sizeof (struct ip_moptions_dbg); 2854 2855 imo_zone = zinit(imo_size, IMO_ZONE_MAX * imo_size, 0, 2856 IMO_ZONE_NAME); 2857 if (imo_zone == NULL) { 2858 panic("%s: failed allocating %s", __func__, IMO_ZONE_NAME); 2859 /* NOTREACHED */ 2860 } 2861 zone_change(imo_zone, Z_EXPAND, TRUE); 2862} 2863 2864void 2865imo_addref(struct ip_moptions *imo, int locked) 2866{ 2867 if (!locked) 2868 IMO_LOCK(imo); 2869 else 2870 IMO_LOCK_ASSERT_HELD(imo); 2871 2872 if (++imo->imo_refcnt == 0) { 2873 panic("%s: imo %p wraparound refcnt\n", __func__, imo); 2874 /* NOTREACHED */ 2875 } else if (imo->imo_trace != NULL) { 2876 (*imo->imo_trace)(imo, TRUE); 2877 } 2878 2879 if (!locked) 2880 IMO_UNLOCK(imo); 2881} 2882 2883void 2884imo_remref(struct ip_moptions *imo) 2885{ 2886 int i; 2887 2888 IMO_LOCK(imo); 2889 if (imo->imo_refcnt == 0) { 2890 panic("%s: imo %p negative refcnt", __func__, imo); 2891 /* NOTREACHED */ 2892 } else if (imo->imo_trace != NULL) { 2893 (*imo->imo_trace)(imo, FALSE); 2894 } 2895 2896 --imo->imo_refcnt; 2897 if (imo->imo_refcnt > 0) { 2898 IMO_UNLOCK(imo); 2899 return; 2900 } 2901 2902 for (i = 0; i < imo->imo_num_memberships; ++i) { 2903 struct in_mfilter *imf; 2904 2905 imf = imo->imo_mfilters ? &imo->imo_mfilters[i] : NULL; 2906 if (imf != NULL) 2907 imf_leave(imf); 2908 2909 (void) in_leavegroup(imo->imo_membership[i], imf); 2910 2911 if (imf != NULL) 2912 imf_purge(imf); 2913 2914 INM_REMREF(imo->imo_membership[i]); 2915 imo->imo_membership[i] = NULL; 2916 } 2917 imo->imo_num_memberships = 0; 2918 if (imo->imo_mfilters != NULL) { 2919 FREE(imo->imo_mfilters, M_INMFILTER); 2920 imo->imo_mfilters = NULL; 2921 } 2922 if (imo->imo_membership != NULL) { 2923 FREE(imo->imo_membership, M_IPMOPTS); 2924 imo->imo_membership = NULL; 2925 } 2926 IMO_UNLOCK(imo); 2927 2928 lck_mtx_destroy(&imo->imo_lock, ifa_mtx_grp); 2929 2930 if (!(imo->imo_debug & IFD_ALLOC)) { 2931 panic("%s: imo %p cannot be freed", __func__, imo); 2932 /* NOTREACHED */ 2933 } 2934 zfree(imo_zone, imo); 2935} 2936 2937static void 2938imo_trace(struct ip_moptions *imo, int refhold) 2939{ 2940 struct ip_moptions_dbg *imo_dbg = (struct ip_moptions_dbg *)imo; 2941 ctrace_t *tr; 2942 u_int32_t idx; 2943 u_int16_t *cnt; 2944 2945 if (!(imo->imo_debug & IFD_DEBUG)) { 2946 panic("%s: imo %p has no debug structure", __func__, imo); 2947 /* NOTREACHED */ 2948 } 2949 if (refhold) { 2950 cnt = &imo_dbg->imo_refhold_cnt; 2951 tr = imo_dbg->imo_refhold; 2952 } else { 2953 cnt = &imo_dbg->imo_refrele_cnt; 2954 tr = imo_dbg->imo_refrele; 2955 } 2956 2957 idx = atomic_add_16_ov(cnt, 1) % IMO_TRACE_HIST_SIZE; 2958 ctrace_record(&tr[idx]); 2959} 2960 2961struct ip_moptions * 2962ip_allocmoptions(int how) 2963{ 2964 struct ip_moptions *imo; 2965 2966 imo = (how == M_WAITOK) ? zalloc(imo_zone) : zalloc_noblock(imo_zone); 2967 if (imo != NULL) { 2968 bzero(imo, imo_size); 2969 lck_mtx_init(&imo->imo_lock, ifa_mtx_grp, ifa_mtx_attr); 2970 imo->imo_debug |= IFD_ALLOC; 2971 if (imo_debug != 0) { 2972 imo->imo_debug |= IFD_DEBUG; 2973 imo->imo_trace = imo_trace; 2974 } 2975 IMO_ADDREF(imo); 2976 } 2977 2978 return (imo); 2979} 2980 2981/* 2982 * Routine called from ip_output() to loop back a copy of an IP multicast 2983 * packet to the input queue of a specified interface. Note that this 2984 * calls the output routine of the loopback "driver", but with an interface 2985 * pointer that might NOT be a loopback interface -- evil, but easier than 2986 * replicating that code here. 2987 */ 2988static void 2989ip_mloopback(ifp, m, dst, hlen) 2990 struct ifnet *ifp; 2991 register struct mbuf *m; 2992 register struct sockaddr_in *dst; 2993 int hlen; 2994{ 2995 register struct ip *ip; 2996 struct mbuf *copym; 2997 int sw_csum = (apple_hwcksum_tx == 0); 2998 2999 copym = m_copy(m, 0, M_COPYALL); 3000 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 3001 copym = m_pullup(copym, hlen); 3002 3003 if (copym == NULL) 3004 return; 3005 3006 /* 3007 * We don't bother to fragment if the IP length is greater 3008 * than the interface's MTU. Can this possibly matter? 3009 */ 3010 ip = mtod(copym, struct ip *); 3011 3012#if BYTE_ORDER != BIG_ENDIAN 3013 HTONS(ip->ip_len); 3014 HTONS(ip->ip_off); 3015#endif 3016 3017 ip->ip_sum = 0; 3018 ip->ip_sum = in_cksum(copym, hlen); 3019 /* 3020 * NB: 3021 * It's not clear whether there are any lingering 3022 * reentrancy problems in other areas which might 3023 * be exposed by using ip_input directly (in 3024 * particular, everything which modifies the packet 3025 * in-place). Yet another option is using the 3026 * protosw directly to deliver the looped back 3027 * packet. For the moment, we'll err on the side 3028 * of safety by using if_simloop(). 3029 */ 3030#if 1 /* XXX */ 3031 if (dst->sin_family != AF_INET) { 3032 printf("ip_mloopback: bad address family %d\n", 3033 dst->sin_family); 3034 dst->sin_family = AF_INET; 3035 } 3036#endif 3037 3038 /* 3039 * Mark checksum as valid or calculate checksum for loopback. 3040 * 3041 * This is done this way because we have to embed the ifp of 3042 * the interface we will send the original copy of the packet 3043 * out on in the mbuf. ip_input will check if_hwassist of the 3044 * embedded ifp and ignore all csum_flags if if_hwassist is 0. 3045 * The UDP checksum has not been calculated yet. 3046 */ 3047 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) { 3048 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) { 3049 copym->m_pkthdr.csum_flags |= 3050 CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 3051 CSUM_IP_CHECKED | CSUM_IP_VALID; 3052 copym->m_pkthdr.csum_data = 0xffff; 3053 } else { 3054 3055#if BYTE_ORDER != BIG_ENDIAN 3056 NTOHS(ip->ip_len); 3057#endif 3058 3059 in_delayed_cksum(copym); 3060 3061#if BYTE_ORDER != BIG_ENDIAN 3062 HTONS(ip->ip_len); 3063#endif 3064 3065 } 3066 } 3067 3068 /* 3069 * TedW: 3070 * We need to send all loopback traffic down to dlil in case 3071 * a filter has tapped-in. 3072 */ 3073 3074 /* 3075 * Stuff the 'real' ifp into the pkthdr, to be used in matching 3076 * in ip_input(); we need the loopback ifp/dl_tag passed as args 3077 * to make the loopback driver compliant with the data link 3078 * requirements. 3079 */ 3080 if (lo_ifp) { 3081 copym->m_pkthdr.rcvif = ifp; 3082 dlil_output(lo_ifp, PF_INET, copym, 0, 3083 (struct sockaddr *) dst, 0, NULL); 3084 } else { 3085 printf("Warning: ip_output call to dlil_find_dltag failed!\n"); 3086 m_freem(copym); 3087 } 3088} 3089 3090/* 3091 * Given a source IP address (and route, if available), determine the best 3092 * interface to send the packet from. Checking for (and updating) the 3093 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done 3094 * without any locks based on the assumption that ip_output() is single- 3095 * threaded per-pcb, i.e. for any given pcb there can only be one thread 3096 * performing output at the IP layer. 3097 * 3098 * This routine is analogous to in6_selectroute() for IPv6. 3099 */ 3100static struct ifaddr * 3101in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) 3102{ 3103 struct ifaddr *ifa = NULL; 3104 struct in_addr src = ip->ip_src; 3105 struct in_addr dst = ip->ip_dst; 3106 struct ifnet *rt_ifp; 3107 char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN]; 3108 3109 if (ip_select_srcif_debug) { 3110 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src)); 3111 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst)); 3112 } 3113 3114 if (ro->ro_rt != NULL) 3115 RT_LOCK(ro->ro_rt); 3116 3117 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL; 3118 3119 /* 3120 * Given the source IP address, find a suitable source interface 3121 * to use for transmission; if the caller has specified a scope, 3122 * optimize the search by looking at the addresses only for that 3123 * interface. This is still suboptimal, however, as we need to 3124 * traverse the per-interface list. 3125 */ 3126 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) { 3127 unsigned int scope = ifscope; 3128 3129 /* 3130 * If no scope is specified and the route is stale (pointing 3131 * to a defunct interface) use the current primary interface; 3132 * this happens when switching between interfaces configured 3133 * with the same IP address. Otherwise pick up the scope 3134 * information from the route; the ULP may have looked up a 3135 * correct route and we just need to verify it here and mark 3136 * it with the ROF_SRCIF_SELECTED flag below. 3137 */ 3138 if (scope == IFSCOPE_NONE) { 3139 scope = rt_ifp->if_index; 3140 if (scope != get_primary_ifscope(AF_INET) && 3141 ro->ro_rt->generation_id != route_generation) 3142 scope = get_primary_ifscope(AF_INET); 3143 } 3144 3145 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope); 3146 3147 if (ifa == NULL && ip->ip_p != IPPROTO_UDP && 3148 ip->ip_p != IPPROTO_TCP && ipforwarding) { 3149 /* 3150 * If forwarding is enabled, and if the packet isn't 3151 * TCP or UDP, check if the source address belongs 3152 * to one of our own interfaces; if so, demote the 3153 * interface scope and do a route lookup right below. 3154 */ 3155 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr); 3156 if (ifa != NULL) { 3157 IFA_REMREF(ifa); 3158 ifa = NULL; 3159 ifscope = IFSCOPE_NONE; 3160 } 3161 } 3162 3163 if (ip_select_srcif_debug && ifa != NULL) { 3164 if (ro->ro_rt != NULL) { 3165 printf("%s->%s ifscope %d->%d ifa_if %s " 3166 "ro_if %s\n", s_src, s_dst, ifscope, 3167 scope, if_name(ifa->ifa_ifp), 3168 if_name(rt_ifp)); 3169 } else { 3170 printf("%s->%s ifscope %d->%d ifa_if %s\n", 3171 s_src, s_dst, ifscope, scope, 3172 if_name(ifa->ifa_ifp)); 3173 } 3174 } 3175 } 3176 3177 /* 3178 * Slow path; search for an interface having the corresponding source 3179 * IP address if the scope was not specified by the caller, and: 3180 * 3181 * 1) There currently isn't any route, or, 3182 * 2) The interface used by the route does not own that source 3183 * IP address; in this case, the route will get blown away 3184 * and we'll do a more specific scoped search using the newly 3185 * found interface. 3186 */ 3187 if (ifa == NULL && ifscope == IFSCOPE_NONE) { 3188 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr); 3189 3190 /* 3191 * If we have the IP address, but not the route, we don't 3192 * really know whether or not it belongs to the correct 3193 * interface (it could be shared across multiple interfaces.) 3194 * The only way to find out is to do a route lookup. 3195 */ 3196 if (ifa != NULL && ro->ro_rt == NULL) { 3197 struct rtentry *rt; 3198 struct sockaddr_in sin; 3199 struct ifaddr *oifa = NULL; 3200 3201 bzero(&sin, sizeof (sin)); 3202 sin.sin_family = AF_INET; 3203 sin.sin_len = sizeof (sin); 3204 sin.sin_addr = dst; 3205 3206 lck_mtx_lock(rnh_lock); 3207 if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL, 3208 rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) { 3209 RT_LOCK(rt); 3210 /* 3211 * If the route uses a different interface, 3212 * use that one instead. The IP address of 3213 * the ifaddr that we pick up here is not 3214 * relevant. 3215 */ 3216 if (ifa->ifa_ifp != rt->rt_ifp) { 3217 oifa = ifa; 3218 ifa = rt->rt_ifa; 3219 IFA_ADDREF(ifa); 3220 RT_UNLOCK(rt); 3221 } else { 3222 RT_UNLOCK(rt); 3223 } 3224 rtfree_locked(rt); 3225 } 3226 lck_mtx_unlock(rnh_lock); 3227 3228 if (oifa != NULL) { 3229 struct ifaddr *iifa; 3230 3231 /* 3232 * See if the interface pointed to by the 3233 * route is configured with the source IP 3234 * address of the packet. 3235 */ 3236 iifa = (struct ifaddr *)ifa_foraddr_scoped( 3237 src.s_addr, ifa->ifa_ifp->if_index); 3238 3239 if (iifa != NULL) { 3240 /* 3241 * Found it; drop the original one 3242 * as well as the route interface 3243 * address, and use this instead. 3244 */ 3245 IFA_REMREF(oifa); 3246 IFA_REMREF(ifa); 3247 ifa = iifa; 3248 } else if (!ipforwarding || 3249 (rt->rt_flags & RTF_GATEWAY)) { 3250 /* 3251 * This interface doesn't have that 3252 * source IP address; drop the route 3253 * interface address and just use the 3254 * original one, and let the caller 3255 * do a scoped route lookup. 3256 */ 3257 IFA_REMREF(ifa); 3258 ifa = oifa; 3259 } else { 3260 /* 3261 * Forwarding is enabled and the source 3262 * address belongs to one of our own 3263 * interfaces which isn't the outgoing 3264 * interface, and we have a route, and 3265 * the destination is on a network that 3266 * is directly attached (onlink); drop 3267 * the original one and use the route 3268 * interface address instead. 3269 */ 3270 IFA_REMREF(oifa); 3271 } 3272 } 3273 } else if (ifa != NULL && ro->ro_rt != NULL && 3274 !(ro->ro_rt->rt_flags & RTF_GATEWAY) && 3275 ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) { 3276 /* 3277 * Forwarding is enabled and the source address belongs 3278 * to one of our own interfaces which isn't the same 3279 * as the interface used by the known route; drop the 3280 * original one and use the route interface address. 3281 */ 3282 IFA_REMREF(ifa); 3283 ifa = ro->ro_rt->rt_ifa; 3284 IFA_ADDREF(ifa); 3285 } 3286 3287 if (ip_select_srcif_debug && ifa != NULL) { 3288 printf("%s->%s ifscope %d ifa_if %s\n", 3289 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp)); 3290 } 3291 } 3292 3293 if (ro->ro_rt != NULL) 3294 RT_LOCK_ASSERT_HELD(ro->ro_rt); 3295 /* 3296 * If there is a non-loopback route with the wrong interface, or if 3297 * there is no interface configured with such an address, blow it 3298 * away. Except for local/loopback, we look for one with a matching 3299 * interface scope/index. 3300 */ 3301 if (ro->ro_rt != NULL && 3302 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) || 3303 !(ro->ro_rt->rt_flags & RTF_UP))) { 3304 if (ip_select_srcif_debug) { 3305 if (ifa != NULL) { 3306 printf("%s->%s ifscope %d ro_if %s != " 3307 "ifa_if %s (cached route cleared)\n", 3308 s_src, s_dst, ifscope, if_name(rt_ifp), 3309 if_name(ifa->ifa_ifp)); 3310 } else { 3311 printf("%s->%s ifscope %d ro_if %s " 3312 "(no ifa_if found)\n", 3313 s_src, s_dst, ifscope, if_name(rt_ifp)); 3314 } 3315 } 3316 3317 RT_UNLOCK(ro->ro_rt); 3318 rtfree(ro->ro_rt); 3319 ro->ro_rt = NULL; 3320 ro->ro_flags &= ~ROF_SRCIF_SELECTED; 3321 3322 /* 3323 * If the destination is IPv4 LLA and the route's interface 3324 * doesn't match the source interface, then the source IP 3325 * address is wrong; it most likely belongs to the primary 3326 * interface associated with the IPv4 LL subnet. Drop the 3327 * packet rather than letting it go out and return an error 3328 * to the ULP. This actually applies not only to IPv4 LL 3329 * but other shared subnets; for now we explicitly test only 3330 * for the former case and save the latter for future. 3331 */ 3332 if (IN_LINKLOCAL(ntohl(dst.s_addr)) && 3333 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) { 3334 IFA_REMREF(ifa); 3335 ifa = NULL; 3336 } 3337 } 3338 3339 if (ip_select_srcif_debug && ifa == NULL) { 3340 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n", 3341 s_src, s_dst, ifscope); 3342 } 3343 3344 /* 3345 * If there is a route, mark it accordingly. If there isn't one, 3346 * we'll get here again during the next transmit (possibly with a 3347 * route) and the flag will get set at that point. For IPv4 LLA 3348 * destination, mark it only if the route has been fully resolved; 3349 * otherwise we want to come back here again when the route points 3350 * to the interface over which the ARP reply arrives on. 3351 */ 3352 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) || 3353 (ro->ro_rt->rt_gateway->sa_family == AF_LINK && 3354 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) { 3355 ro->ro_flags |= ROF_SRCIF_SELECTED; 3356 ro->ro_rt->generation_id = route_generation; 3357 } 3358 3359 if (ro->ro_rt != NULL) 3360 RT_UNLOCK(ro->ro_rt); 3361 3362 return (ifa); 3363} 3364