1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $ */ 30/* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ 31 32/* 33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the project nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61/* 62 * Copyright (c) 1982, 1986, 1988, 1990, 1993 63 * The Regents of the University of California. All rights reserved. 64 * 65 * Redistribution and use in source and binary forms, with or without 66 * modification, are permitted provided that the following conditions 67 * are met: 68 * 1. Redistributions of source code must retain the above copyright 69 * notice, this list of conditions and the following disclaimer. 70 * 2. Redistributions in binary form must reproduce the above copyright 71 * notice, this list of conditions and the following disclaimer in the 72 * documentation and/or other materials provided with the distribution. 73 * 3. All advertising materials mentioning features or use of this software 74 * must display the following acknowledgement: 75 * This product includes software developed by the University of 76 * California, Berkeley and its contributors. 77 * 4. Neither the name of the University nor the names of its contributors 78 * may be used to endorse or promote products derived from this software 79 * without specific prior written permission. 80 * 81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 91 * SUCH DAMAGE. 92 * 93 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 94 */ 95/* 96 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 97 * support for mandatory and extensible security protections. This notice 98 * is included in support of clause 2.2 (b) of the Apple Public License, 99 * Version 2.0. 100 */ 101 102#include <sys/param.h> 103#include <sys/malloc.h> 104#include <sys/mbuf.h> 105#include <sys/errno.h> 106#include <sys/protosw.h> 107#include <sys/socket.h> 108#include <sys/socketvar.h> 109#include <sys/systm.h> 110#include <sys/kernel.h> 111#include <sys/proc.h> 112#include <sys/kauth.h> 113#include <sys/mcache.h> 114#include <sys/sysctl.h> 115#include <kern/zalloc.h> 116 117#include <pexpert/pexpert.h> 118 119#include <net/if.h> 120#include <net/route.h> 121#include <net/dlil.h> 122 123#include <netinet/in.h> 124#include <netinet/in_var.h> 125#include <netinet/ip_var.h> 126#include <netinet6/in6_var.h> 127#include <netinet/ip6.h> 128#include <netinet6/ip6protosw.h> 129#include <netinet/icmp6.h> 130#include <netinet6/ip6_var.h> 131#include <netinet/in_pcb.h> 132#include <netinet6/nd6.h> 133#include <netinet6/scope6_var.h> 134#include <mach/sdt.h> 135 136#if IPSEC 137#include <netinet6/ipsec.h> 138#if INET6 139#include <netinet6/ipsec6.h> 140#endif 141#include <netkey/key.h> 142extern int ipsec_bypass; 143#endif /* IPSEC */ 144 145#if CONFIG_MACF_NET 146#include <security/mac.h> 147#endif /* MAC_NET */ 148 149#include <netinet6/ip6_fw.h> 150 151#if DUMMYNET 152#include <netinet/ip_fw.h> 153#include <netinet/ip_dummynet.h> 154#endif /* DUMMYNET */ 155 156#include <net/net_osdep.h> 157 158#include <netinet/kpi_ipfilter_var.h> 159 160#if PF 161#include <net/pfvar.h> 162#endif /* PF */ 163 164#ifndef __APPLE__ 165static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); 166#endif 167 168int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt); 169static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, 170 struct socket *, struct sockopt *sopt); 171static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto); 172static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt); 173static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto); 174static void im6o_trace(struct ip6_moptions *, int); 175static int ip6_copyexthdr(struct mbuf **, caddr_t, int); 176static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, 177 struct ip6_frag **); 178static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); 179static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); 180static int ip6_getpmtu (struct route_in6 *, struct route_in6 *, 181 struct ifnet *, struct in6_addr *, u_int32_t *, int *); 182 183#define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */ 184 185/* For gdb */ 186__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE; 187 188struct ip6_moptions_dbg { 189 struct ip6_moptions im6o; /* ip6_moptions */ 190 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */ 191 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */ 192 /* 193 * Alloc and free callers. 194 */ 195 ctrace_t im6o_alloc; 196 ctrace_t im6o_free; 197 /* 198 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers. 199 */ 200 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE]; 201 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE]; 202}; 203 204#if DEBUG 205static unsigned int im6o_debug = 1; /* debugging (enabled) */ 206#else 207static unsigned int im6o_debug; /* debugging (disabled) */ 208#endif /* !DEBUG */ 209 210static unsigned int im6o_size; /* size of zone element */ 211static struct zone *im6o_zone; /* zone for ip6_moptions */ 212 213#define IM6O_ZONE_MAX 64 /* maximum elements in zone */ 214#define IM6O_ZONE_NAME "ip6_moptions" /* zone name */ 215 216SYSCTL_DECL(_net_inet6_ip6); 217 218static int ip6_maxchainsent = 0; 219SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED, 220 &ip6_maxchainsent, 0, "use dlil_output_list"); 221 222/* 223 * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only 224 * walks through the packet chain and sends each mbuf separately. 225 */ 226int 227ip6_output_list( 228 struct mbuf *m0, 229 int packetlist, 230 struct ip6_pktopts *opt, 231 struct route_in6 *ro, 232 int flags, 233 struct ip6_moptions *im6o, 234 struct ifnet **ifpp, /* XXX: just for statistics */ 235 struct ip6_out_args *ip6oap) 236{ 237#pragma unused(packetlist) 238 struct mbuf *m = m0, *nextpkt; 239 int error = 0; 240 241 while (m) { 242 /* 243 * Break the chain before calling ip6_output() and free the 244 * mbufs if there was an error. 245 */ 246 nextpkt = m->m_nextpkt; 247 m->m_nextpkt = NULL; 248 error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oap); 249 if (error) { 250 if (nextpkt) 251 m_freem_list(nextpkt); 252 return (error); 253 } 254 m = nextpkt; 255 } 256 257 return (error); 258} 259 260/* 261 * IP6 output. The packet in mbuf chain m contains a skeletal IP6 262 * header (with pri, len, nxt, hlim, src, dst). 263 * This function may modify ver and hlim only. 264 * The mbuf chain containing the packet will be freed. 265 * The mbuf opt, if present, will not be freed. 266 * 267 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and 268 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one, 269 * which is rt_rmx.rmx_mtu. 270 */ 271int 272ip6_output( 273 struct mbuf *m0, 274 struct ip6_pktopts *opt, 275 struct route_in6 *ro, 276 int flags, 277 struct ip6_moptions *im6o, 278 struct ifnet **ifpp, /* XXX: just for statistics */ 279 struct ip6_out_args *ip6oap) 280{ 281 struct ip6_hdr *ip6, *mhip6; 282 struct ifnet *ifp = NULL, *origifp = NULL; 283 struct mbuf *m = m0; 284 int hlen, tlen, len, off; 285 struct route_in6 ip6route; 286 struct rtentry *rt = NULL; 287 struct sockaddr_in6 *dst, src_sa, dst_sa; 288 int error = 0; 289 struct in6_ifaddr *ia = NULL; 290 u_int32_t mtu; 291 int alwaysfrag = 0, dontfrag = 0; 292 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; 293 struct ip6_exthdrs exthdrs; 294 struct in6_addr finaldst, src0, dst0; 295 u_int32_t zone; 296 struct route_in6 *ro_pmtu = NULL; 297 int hdrsplit = 0; 298 int needipsec = 0; 299 ipfilter_t inject_filter_ref; 300 int tso; 301 boolean_t select_srcif; 302 struct ipf_pktopts *ippo = NULL, ipf_pktopts; 303 struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 }; 304 struct flowadv *adv = NULL; 305 u_int32_t ifmtu; 306#if DUMMYNET 307 struct m_tag *tag; 308 struct route_in6 saved_route; 309 struct route_in6 saved_ro_pmtu; 310 struct ip_fw_args args; 311 struct sockaddr_in6 dst_buf; 312 313 bzero(&args, sizeof(struct ip_fw_args)); 314#endif /* DUMMYNET */ 315 316 if ((flags & IPV6_OUTARGS) && ip6oap != NULL) { 317 ip6oa = *ip6oap; 318 adv = &ip6oap->ip6oa_flowadv; 319 adv->code = FADV_SUCCESS; 320 } 321 322#if IPSEC 323 int needipsectun = 0; 324 struct socket *so = NULL; 325 struct secpolicy *sp = NULL; 326 struct route_in6 *ipsec_saved_route = NULL; 327 struct ipsec_output_state ipsec_state; 328 329 bzero(&ipsec_state, sizeof(ipsec_state)); 330 331 /* for AH processing. stupid to have "socket" variable in IP layer... */ 332 if (ipsec_bypass == 0) 333 { 334 so = ipsec_getsocket(m); 335 (void)ipsec_setsocket(m, NULL); 336 } 337#endif /* IPSEC */ 338 339 bzero(&ipf_pktopts, sizeof(struct ipf_pktopts)); 340 ippo = &ipf_pktopts; 341 342 ip6 = mtod(m, struct ip6_hdr *); 343 inject_filter_ref = ipf_get_inject_filter(m); 344 345 /* Grab info from mtags prepended to the chain */ 346#if DUMMYNET 347 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, 348 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { 349 struct dn_pkt_tag *dn_tag; 350 351 dn_tag = (struct dn_pkt_tag *)(tag+1); 352 args.fwa_pf_rule = dn_tag->dn_pf_rule; 353 354 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf)); 355 dst = &dst_buf; 356 ifp = dn_tag->dn_ifp; 357 if (ifp) 358 ifnet_reference(ifp); 359 flags = dn_tag->dn_flags; 360 if (dn_tag->dn_flags & IPV6_OUTARGS) 361 ip6oa = dn_tag->dn_ip6oa; 362 363 saved_route = dn_tag->dn_ro6; 364 ro = &saved_route; 365 saved_ro_pmtu = dn_tag->dn_ro6_pmtu; 366 ro_pmtu = &saved_ro_pmtu; 367 origifp = dn_tag->dn_origifp; 368 if (origifp) 369 ifnet_reference(origifp); 370 mtu = dn_tag->dn_mtu; 371 alwaysfrag = dn_tag->dn_alwaysfrag; 372 unfragpartlen = dn_tag->dn_unfragpartlen; 373 374 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs)); 375 376 m_tag_delete(m0, tag); 377 } 378#endif /* DUMMYNET */ 379 380 finaldst = ip6->ip6_dst; 381 382 if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) { 383 if ((select_srcif = (!(flags & (IPV6_FORWARDING | 384 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) && 385 (ip6oa.ip6oa_flags & IP6OAF_SELECT_SRCIF)))) 386 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF; 387 388 if ((ip6oa.ip6oa_flags & IP6OAF_BOUND_IF) && 389 ip6oa.ip6oa_boundif != IFSCOPE_NONE) { 390 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF | 391 (ip6oa.ip6oa_boundif << IPPOF_SHIFT_IFSCOPE)); 392 } 393 394 if (ip6oa.ip6oa_flags & IP6OAF_BOUND_SRCADDR) 395 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR; 396 } else { 397 select_srcif = FALSE; 398 ip6oa.ip6oa_boundif = IFSCOPE_NONE; 399 ip6oa.ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_IF | 400 IP6OAF_BOUND_SRCADDR); 401 } 402 403 if ((flags & IPV6_OUTARGS) && (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR)) 404 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; 405 406#if DUMMYNET 407 if (args.fwa_pf_rule) { 408 ip6 = mtod(m, struct ip6_hdr *); 409 410 goto check_with_pf; 411 } 412#endif /* DUMMYNET */ 413 414#define MAKE_EXTHDR(hp, mp) \ 415 do { \ 416 if (hp) { \ 417 struct ip6_ext *eh = (struct ip6_ext *)(hp); \ 418 error = ip6_copyexthdr((mp), (caddr_t)(hp), \ 419 ((eh)->ip6e_len + 1) << 3); \ 420 if (error) \ 421 goto freehdrs; \ 422 } \ 423 } while (0) 424 425 bzero(&exthdrs, sizeof(exthdrs)); 426 427 if (opt) { 428 /* Hop-by-Hop options header */ 429 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); 430 /* Destination options header(1st part) */ 431 if (opt->ip6po_rthdr) { 432 /* 433 * Destination options header(1st part) 434 * This only makes sense with a routing header. 435 * See Section 9.2 of RFC 3542. 436 * Disabling this part just for MIP6 convenience is 437 * a bad idea. We need to think carefully about a 438 * way to make the advanced API coexist with MIP6 439 * options, which might automatically be inserted in 440 * the kernel. 441 */ 442 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); 443 } 444 /* Routing header */ 445 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); 446 /* Destination options header(2nd part) */ 447 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); 448 } 449 450#if IPSEC 451 if (ipsec_bypass != 0) 452 goto skip_ipsec; 453 454 /* get a security policy for this packet */ 455 if (so == NULL) 456 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); 457 else 458 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 459 460 if (sp == NULL) { 461 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); 462 goto freehdrs; 463 } 464 465 error = 0; 466 467 /* check policy */ 468 switch (sp->policy) { 469 case IPSEC_POLICY_DISCARD: 470 case IPSEC_POLICY_GENERATE: 471 /* 472 * This packet is just discarded. 473 */ 474 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio); 475 goto freehdrs; 476 477 case IPSEC_POLICY_BYPASS: 478 case IPSEC_POLICY_NONE: 479 /* no need to do IPsec. */ 480 needipsec = 0; 481 break; 482 483 case IPSEC_POLICY_IPSEC: 484 if (sp->req == NULL) { 485 /* acquire a policy */ 486 error = key_spdacquire(sp); 487 goto freehdrs; 488 } 489 needipsec = 1; 490 break; 491 492 case IPSEC_POLICY_ENTRUST: 493 default: 494 printf("ip6_output: Invalid policy found. %d\n", sp->policy); 495 } 496 skip_ipsec: 497#endif /* IPSEC */ 498 499 /* 500 * Calculate the total length of the extension header chain. 501 * Keep the length of the unfragmentable part for fragmentation. 502 */ 503 optlen = 0; 504 if (exthdrs.ip6e_hbh) 505 optlen += exthdrs.ip6e_hbh->m_len; 506 if (exthdrs.ip6e_dest1) 507 optlen += exthdrs.ip6e_dest1->m_len; 508 if (exthdrs.ip6e_rthdr) 509 optlen += exthdrs.ip6e_rthdr->m_len; 510 unfragpartlen = optlen + sizeof(struct ip6_hdr); 511 512 /* NOTE: we don't add AH/ESP length here. do that later. */ 513 if (exthdrs.ip6e_dest2) 514 optlen += exthdrs.ip6e_dest2->m_len; 515 516 517 if (needipsec && 518 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) { 519 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen); 520 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; 521 } 522 523 /* 524 * If we need IPsec, or there is at least one extension header, 525 * separate IP6 header from the payload. 526 */ 527 if ((needipsec || optlen) && !hdrsplit) { 528 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 529 m = NULL; 530 goto freehdrs; 531 } 532 m = exthdrs.ip6e_ip6; 533 hdrsplit++; 534 } 535 536 /* adjust pointer */ 537 ip6 = mtod(m, struct ip6_hdr *); 538 539 /* adjust mbuf packet header length */ 540 m->m_pkthdr.len += optlen; 541 plen = m->m_pkthdr.len - sizeof(*ip6); 542 543 /* If this is a jumbo payload, insert a jumbo payload option. */ 544 if (plen > IPV6_MAXPACKET) { 545 if (!hdrsplit) { 546 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 547 m = NULL; 548 goto freehdrs; 549 } 550 m = exthdrs.ip6e_ip6; 551 hdrsplit++; 552 } 553 /* adjust pointer */ 554 ip6 = mtod(m, struct ip6_hdr *); 555 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) 556 goto freehdrs; 557 ip6->ip6_plen = 0; 558 } else 559 ip6->ip6_plen = htons(plen); 560 561 /* 562 * Concatenate headers and fill in next header fields. 563 * Here we have, on "m" 564 * IPv6 payload 565 * and we insert headers accordingly. Finally, we should be getting: 566 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] 567 * 568 * during the header composing process, "m" points to IPv6 header. 569 * "mprev" points to an extension header prior to esp. 570 */ 571 { 572 u_char *nexthdrp = &ip6->ip6_nxt; 573 struct mbuf *mprev = m; 574 575 /* 576 * we treat dest2 specially. this makes IPsec processing 577 * much easier. the goal here is to make mprev point the 578 * mbuf prior to dest2. 579 * 580 * result: IPv6 dest2 payload 581 * m and mprev will point to IPv6 header. 582 */ 583 if (exthdrs.ip6e_dest2) { 584 if (!hdrsplit) 585 panic("assumption failed: hdr not split"); 586 exthdrs.ip6e_dest2->m_next = m->m_next; 587 m->m_next = exthdrs.ip6e_dest2; 588 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; 589 ip6->ip6_nxt = IPPROTO_DSTOPTS; 590 } 591 592#define MAKE_CHAIN(m, mp, p, i)\ 593 do {\ 594 if (m) {\ 595 if (!hdrsplit) \ 596 panic("assumption failed: hdr not split"); \ 597 *mtod((m), u_char *) = *(p);\ 598 *(p) = (i);\ 599 p = mtod((m), u_char *);\ 600 (m)->m_next = (mp)->m_next;\ 601 (mp)->m_next = (m);\ 602 (mp) = (m);\ 603 }\ 604 } while (0) 605 /* 606 * result: IPv6 hbh dest1 rthdr dest2 payload 607 * m will point to IPv6 header. mprev will point to the 608 * extension header prior to dest2 (rthdr in the above case). 609 */ 610 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, 611 nexthdrp, IPPROTO_HOPOPTS); 612 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, 613 nexthdrp, IPPROTO_DSTOPTS); 614 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, 615 nexthdrp, IPPROTO_ROUTING); 616 617 if (!TAILQ_EMPTY(&ipv6_filters)) { 618 struct ipfilter *filter; 619 int seen = (inject_filter_ref == 0); 620 int fixscope = 0; 621 622 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 623 ippo->ippo_flags |= IPPOF_MCAST_OPTS; 624 IM6O_LOCK(im6o); 625 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp; 626 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim; 627 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop; 628 IM6O_UNLOCK(im6o); 629 } 630 631 /* Hack: embed the scope_id in the destination */ 632 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) && 633 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) { 634 fixscope = 1; 635 ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id); 636 } 637 { 638 ipf_ref(); 639 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) { 640 /* 641 * No need to proccess packet twice if we've 642 * already seen it 643 */ 644 if (seen == 0) { 645 if ((struct ipfilter *)inject_filter_ref == filter) 646 seen = 1; 647 } else if (filter->ipf_filter.ipf_output) { 648 errno_t result; 649 650 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); 651 if (result == EJUSTRETURN) { 652 ipf_unref(); 653 goto done; 654 } 655 if (result != 0) { 656 ipf_unref(); 657 goto bad; 658 } 659 } 660 } 661 ipf_unref(); 662 } 663 ip6 = mtod(m, struct ip6_hdr *); 664 /* Hack: cleanup embedded scope_id if we put it there */ 665 if (fixscope) 666 ip6->ip6_dst.s6_addr16[1] = 0; 667 } 668 669#if IPSEC 670 if (!needipsec) 671 goto skip_ipsec2; 672 673 /* 674 * pointers after IPsec headers are not valid any more. 675 * other pointers need a great care too. 676 * (IPsec routines should not mangle mbufs prior to AH/ESP) 677 */ 678 exthdrs.ip6e_dest2 = NULL; 679 680 { 681 struct ip6_rthdr *rh = NULL; 682 int segleft_org = 0; 683 684 if (exthdrs.ip6e_rthdr) { 685 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); 686 segleft_org = rh->ip6r_segleft; 687 rh->ip6r_segleft = 0; 688 } 689 690 ipsec_state.m = m; 691 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags, 692 &needipsectun); 693 m = ipsec_state.m; 694 if (error) { 695 /* mbuf is already reclaimed in ipsec6_output_trans. */ 696 m = NULL; 697 switch (error) { 698 case EHOSTUNREACH: 699 case ENETUNREACH: 700 case EMSGSIZE: 701 case ENOBUFS: 702 case ENOMEM: 703 break; 704 default: 705 printf("ip6_output (ipsec): error code %d\n", error); 706 /* fall through */ 707 case ENOENT: 708 /* don't show these error codes to the user */ 709 error = 0; 710 break; 711 } 712 goto bad; 713 } 714 if (exthdrs.ip6e_rthdr) { 715 /* ah6_output doesn't modify mbuf chain */ 716 rh->ip6r_segleft = segleft_org; 717 } 718 } 719 } 720skip_ipsec2: 721#endif 722 723 /* 724 * If there is a routing header, replace the destination address field 725 * with the first hop of the routing header. 726 */ 727 if (exthdrs.ip6e_rthdr) { 728 struct ip6_rthdr *rh = 729 (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, 730 struct ip6_rthdr *)); 731 struct ip6_rthdr0 *rh0; 732 struct in6_addr *addr; 733 struct sockaddr_in6 sa; 734 735 switch (rh->ip6r_type) { 736 case IPV6_RTHDR_TYPE_0: 737 rh0 = (struct ip6_rthdr0 *)rh; 738 addr = (struct in6_addr *)(void *)(rh0 + 1); 739 740 /* 741 * construct a sockaddr_in6 form of 742 * the first hop. 743 * 744 * XXX: we may not have enough 745 * information about its scope zone; 746 * there is no standard API to pass 747 * the information from the 748 * application. 749 */ 750 bzero(&sa, sizeof(sa)); 751 sa.sin6_family = AF_INET6; 752 sa.sin6_len = sizeof(sa); 753 sa.sin6_addr = addr[0]; 754 if ((error = sa6_embedscope(&sa, 755 ip6_use_defzone)) != 0) { 756 goto bad; 757 } 758 ip6->ip6_dst = sa.sin6_addr; 759 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr) 760 * (rh0->ip6r0_segleft - 1)); 761 addr[rh0->ip6r0_segleft - 1] = finaldst; 762 /* XXX */ 763 in6_clearscope(addr + rh0->ip6r0_segleft - 1); 764 break; 765 default: /* is it possible? */ 766 error = EINVAL; 767 goto bad; 768 } 769 } 770 771 /* Source address validation */ 772 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && 773 (flags & IPV6_UNSPECSRC) == 0) { 774 error = EOPNOTSUPP; 775 ip6stat.ip6s_badscope++; 776 goto bad; 777 } 778 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { 779 error = EOPNOTSUPP; 780 ip6stat.ip6s_badscope++; 781 goto bad; 782 } 783 784 ip6stat.ip6s_localout++; 785 786 /* 787 * Route packet. 788 */ 789 if (ro == 0) { 790 ro = &ip6route; 791 bzero((caddr_t)ro, sizeof(*ro)); 792 } 793 ro_pmtu = ro; 794 if (opt && opt->ip6po_rthdr) 795 ro = &opt->ip6po_route; 796 dst = (struct sockaddr_in6 *)&ro->ro_dst; 797 798 if (ro && ro->ro_rt) 799 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); 800 /* 801 * if specified, try to fill in the traffic class field. 802 * do not override if a non-zero value is already set. 803 * we check the diffserv field and the ecn field separately. 804 */ 805 if (opt && opt->ip6po_tclass >= 0) { 806 int mask = 0; 807 808 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) 809 mask |= 0xfc; 810 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) 811 mask |= 0x03; 812 if (mask != 0) 813 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); 814 } 815 816 /* fill in or override the hop limit field, if necessary. */ 817 if (opt && opt->ip6po_hlim != -1) 818 ip6->ip6_hlim = opt->ip6po_hlim & 0xff; 819 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 820 if (im6o != NULL) { 821 IM6O_LOCK(im6o); 822 ip6->ip6_hlim = im6o->im6o_multicast_hlim; 823 IM6O_UNLOCK(im6o); 824 } else { 825 ip6->ip6_hlim = ip6_defmcasthlim; 826 } 827 } 828 829 /* 830 * If there is a cached route, check that it is to the same 831 * destination and is still up. If not, free it and try again. 832 * Test rt_flags without holding rt_lock for performance reasons; 833 * if the route is down it will hopefully be caught by the layer 834 * below (since it uses this route as a hint) or during the 835 * next transmit. 836 */ 837 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || 838 dst->sin6_family != AF_INET6 || 839 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) || 840 ro->ro_rt->generation_id != route_generation)) { 841 rtfree(ro->ro_rt); 842 ro->ro_rt = NULL; 843 } 844 if (ro->ro_rt == NULL) { 845 bzero(dst, sizeof(*dst)); 846 dst->sin6_family = AF_INET6; 847 dst->sin6_len = sizeof(struct sockaddr_in6); 848 dst->sin6_addr = ip6->ip6_dst; 849 } 850#if IPSEC 851 if (needipsec && needipsectun) { 852#if CONFIG_DTRACE 853 struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL; 854#endif 855 /* 856 * All the extension headers will become inaccessible 857 * (since they can be encrypted). 858 * Don't panic, we need no more updates to extension headers 859 * on inner IPv6 packet (since they are now encapsulated). 860 * 861 * IPv6 [ESP|AH] IPv6 [extension headers] payload 862 */ 863 bzero(&exthdrs, sizeof(exthdrs)); 864 exthdrs.ip6e_ip6 = m; 865 866 ipsec_state.m = m; 867 route_copyout(&ipsec_state.ro, (struct route *)ro, sizeof(ipsec_state.ro)); 868 ipsec_state.dst = (struct sockaddr *)dst; 869 870 /* Added a trace here so that we can see packets inside a tunnel */ 871 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, 872 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp, 873 struct ip *, NULL, struct ip6_hdr *, ip6); 874 875 error = ipsec6_output_tunnel(&ipsec_state, sp, flags); 876 if (ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */ 877 goto done; 878 m = ipsec_state.m; 879 ipsec_saved_route = ro; 880 ro = (struct route_in6 *)&ipsec_state.ro; 881 dst = (struct sockaddr_in6 *)(void *)ipsec_state.dst; 882 if (error) { 883 /* mbuf is already reclaimed in ipsec6_output_tunnel. */ 884 m0 = m = NULL; 885 m = NULL; 886 switch (error) { 887 case EHOSTUNREACH: 888 case ENETUNREACH: 889 case EMSGSIZE: 890 case ENOBUFS: 891 case ENOMEM: 892 break; 893 default: 894 printf("ip6_output (ipsec): error code %d\n", error); 895 /* fall through */ 896 case ENOENT: 897 /* don't show these error codes to the user */ 898 error = 0; 899 break; 900 } 901 goto bad; 902 } 903 /* 904 * The packet has been encapsulated so the ifscope is no longer valid 905 * since it does not apply to the outer address: ignore the ifscope. 906 */ 907 ip6oa.ip6oa_boundif = IFSCOPE_NONE; 908 ip6oa.ip6oa_flags &= ~IP6OAF_BOUND_IF; 909 if (opt != NULL && opt->ip6po_pktinfo != NULL) { 910 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE) 911 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE; 912 } 913 exthdrs.ip6e_ip6 = m; 914 } 915#endif /* IPSEC */ 916 917 /* for safety */ 918 if (ifp != NULL) { 919 ifnet_release(ifp); 920 ifp = NULL; 921 } 922 923 /* adjust pointer */ 924 ip6 = mtod(m, struct ip6_hdr *); 925 926 if (select_srcif) { 927 bzero(&src_sa, sizeof(src_sa)); 928 src_sa.sin6_family = AF_INET6; 929 src_sa.sin6_len = sizeof(src_sa); 930 src_sa.sin6_addr = ip6->ip6_src; 931 } 932 bzero(&dst_sa, sizeof(dst_sa)); 933 dst_sa.sin6_family = AF_INET6; 934 dst_sa.sin6_len = sizeof(dst_sa); 935 dst_sa.sin6_addr = ip6->ip6_dst; 936 937 /* 938 * in6_selectroute() might return an ifp with its reference held 939 * even in the error case, so make sure to release its reference. 940 */ 941 if ((error = in6_selectroute(select_srcif ? &src_sa : NULL, 942 &dst_sa, opt, im6o, ro, &ifp, &rt, 0, &ip6oa)) != 0) { 943 switch (error) { 944 case EHOSTUNREACH: 945 ip6stat.ip6s_noroute++; 946 break; 947 case EADDRNOTAVAIL: 948 default: 949 break; /* XXX statistics? */ 950 } 951 if (ifp != NULL) 952 in6_ifstat_inc(ifp, ifs6_out_discard); 953 /* ifp (if non-NULL) will be released at the end */ 954 goto bad; 955 } 956 if (rt == NULL) { 957 /* 958 * If in6_selectroute() does not return a route entry, 959 * dst may not have been updated. 960 */ 961 *dst = dst_sa; /* XXX */ 962 } 963 964 /* 965 * then rt (for unicast) and ifp must be non-NULL valid values. 966 */ 967 if ((flags & IPV6_FORWARDING) == 0) { 968 /* XXX: the FORWARDING flag can be set for mrouting. */ 969 in6_ifstat_inc(ifp, ifs6_out_request); 970 } 971 if (rt != NULL) { 972 RT_LOCK(rt); 973 ia = (struct in6_ifaddr *)(rt->rt_ifa); 974 if (ia != NULL) 975 IFA_ADDREF(&ia->ia_ifa); 976 rt->rt_use++; 977 RT_UNLOCK(rt); 978 } 979 980 /* 981 * The outgoing interface must be in the zone of source and 982 * destination addresses. We should use ia_ifp to support the 983 * case of sending packets to an address of our own. 984 */ 985 if (ia != NULL && ia->ia_ifp) { 986 ifnet_reference(ia->ia_ifp); /* for origifp */ 987 if (origifp != NULL) 988 ifnet_release(origifp); 989 origifp = ia->ia_ifp; 990 } else { 991 if (ifp != NULL) 992 ifnet_reference(ifp); /* for origifp */ 993 if (origifp != NULL) 994 ifnet_release(origifp); 995 origifp = ifp; 996 } 997 src0 = ip6->ip6_src; 998 if (in6_setscope(&src0, origifp, &zone)) 999 goto badscope; 1000 bzero(&src_sa, sizeof(src_sa)); 1001 src_sa.sin6_family = AF_INET6; 1002 src_sa.sin6_len = sizeof(src_sa); 1003 src_sa.sin6_addr = ip6->ip6_src; 1004 if (sa6_recoverscope(&src_sa, TRUE) || zone != src_sa.sin6_scope_id) 1005 goto badscope; 1006 1007 dst0 = ip6->ip6_dst; 1008 if (in6_setscope(&dst0, origifp, &zone)) 1009 goto badscope; 1010 /* re-initialize to be sure */ 1011 bzero(&dst_sa, sizeof(dst_sa)); 1012 dst_sa.sin6_family = AF_INET6; 1013 dst_sa.sin6_len = sizeof(dst_sa); 1014 dst_sa.sin6_addr = ip6->ip6_dst; 1015 if (sa6_recoverscope(&dst_sa, TRUE) || zone != dst_sa.sin6_scope_id) { 1016 goto badscope; 1017 } 1018 1019 /* scope check is done. */ 1020 goto routefound; 1021 1022 badscope: 1023 ip6stat.ip6s_badscope++; 1024 in6_ifstat_inc(origifp, ifs6_out_discard); 1025 if (error == 0) 1026 error = EHOSTUNREACH; /* XXX */ 1027 goto bad; 1028 1029 routefound: 1030 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 1031 if (opt && opt->ip6po_nextroute.ro_rt) { 1032 /* 1033 * The nexthop is explicitly specified by the 1034 * application. We assume the next hop is an IPv6 1035 * address. 1036 */ 1037 dst = (struct sockaddr_in6 *)(void *)opt->ip6po_nexthop; 1038 } 1039 else if ((rt->rt_flags & RTF_GATEWAY)) 1040 dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway; 1041 } 1042 1043 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 1044 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ 1045 } else { 1046 struct in6_multi *in6m; 1047 1048 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; 1049 1050 in6_ifstat_inc(ifp, ifs6_out_mcast); 1051 1052 /* 1053 * Confirm that the outgoing interface supports multicast. 1054 */ 1055 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1056 ip6stat.ip6s_noroute++; 1057 in6_ifstat_inc(ifp, ifs6_out_discard); 1058 error = ENETUNREACH; 1059 goto bad; 1060 } 1061 in6_multihead_lock_shared(); 1062 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m); 1063 in6_multihead_lock_done(); 1064 if (im6o != NULL) 1065 IM6O_LOCK(im6o); 1066 if (in6m != NULL && 1067 (im6o == NULL || im6o->im6o_multicast_loop)) { 1068 if (im6o != NULL) 1069 IM6O_UNLOCK(im6o); 1070 /* 1071 * If we belong to the destination multicast group 1072 * on the outgoing interface, and the caller did not 1073 * forbid loopback, loop back a copy. 1074 */ 1075 ip6_mloopback(ifp, m, dst); 1076 } else { 1077 if (im6o != NULL) 1078 IM6O_UNLOCK(im6o); 1079 /* 1080 * If we are acting as a multicast router, perform 1081 * multicast forwarding as if the packet had just 1082 * arrived on the interface to which we are about 1083 * to send. The multicast forwarding function 1084 * recursively calls this function, using the 1085 * IPV6_FORWARDING flag to prevent infinite recursion. 1086 * 1087 * Multicasts that are looped back by ip6_mloopback(), 1088 * above, will be forwarded by the ip6_input() routine, 1089 * if necessary. 1090 */ 1091#if MROUTING 1092 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { 1093 /* 1094 * XXX: ip6_mforward expects that rcvif is NULL 1095 * when it is called from the originating path. 1096 * However, it is not always the case, since 1097 * some versions of MGETHDR() does not 1098 * initialize the field. 1099 */ 1100 m->m_pkthdr.rcvif = NULL; 1101 if (ip6_mforward(ip6, ifp, m) != 0) { 1102 m_freem(m); 1103 if (in6m != NULL) 1104 IN6M_REMREF(in6m); 1105 goto done; 1106 } 1107 } 1108#endif 1109 } 1110 if (in6m != NULL) 1111 IN6M_REMREF(in6m); 1112 /* 1113 * Multicasts with a hoplimit of zero may be looped back, 1114 * above, but must not be transmitted on a network. 1115 * Also, multicasts addressed to the loopback interface 1116 * are not sent -- the above call to ip6_mloopback() will 1117 * loop back a copy if this host actually belongs to the 1118 * destination group on the loopback interface. 1119 */ 1120 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || 1121 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { 1122 m_freem(m); 1123 goto done; 1124 } 1125 } 1126 1127 /* 1128 * Fill the outgoing inteface to tell the upper layer 1129 * to increment per-interface statistics. 1130 */ 1131 if (ifpp != NULL) { 1132 ifnet_reference(ifp); /* for caller */ 1133 if (*ifpp != NULL) 1134 ifnet_release(*ifpp); 1135 *ifpp = ifp; 1136 } 1137 1138 /* Determine path MTU. */ 1139 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, 1140 &alwaysfrag)) != 0) 1141 goto bad; 1142 1143 /* 1144 * The caller of this function may specify to use the minimum MTU 1145 * in some cases. 1146 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU 1147 * setting. The logic is a bit complicated; by default, unicast 1148 * packets will follow path MTU while multicast packets will be sent at 1149 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets 1150 * including unicast ones will be sent at the minimum MTU. Multicast 1151 * packets will always be sent at the minimum MTU unless 1152 * IP6PO_MINMTU_DISABLE is explicitly specified. 1153 * See RFC 3542 for more details. 1154 */ 1155 if (mtu > IPV6_MMTU) { 1156 if ((flags & IPV6_MINMTU)) 1157 mtu = IPV6_MMTU; 1158 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) 1159 mtu = IPV6_MMTU; 1160 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && 1161 (opt == NULL || 1162 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { 1163 mtu = IPV6_MMTU; 1164 } 1165 } 1166 1167 /* 1168 * clear embedded scope identifiers if necessary. 1169 * in6_clearscope will touch the addresses only when necessary. 1170 */ 1171 in6_clearscope(&ip6->ip6_src); 1172 in6_clearscope(&ip6->ip6_dst); 1173 1174#if IPFW2 1175 /* 1176 * Check with the firewall... 1177 */ 1178 if (ip6_fw_enable && ip6_fw_chk_ptr) { 1179 u_short port = 0; 1180 m->m_pkthdr.rcvif = NULL; /* XXX */ 1181 /* If ipfw says divert, we have to just drop packet */ 1182 if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) { 1183 m_freem(m); 1184 goto done; 1185 } 1186 if (!m) { 1187 error = EACCES; 1188 goto done; 1189 } 1190 } 1191#endif 1192 1193 /* 1194 * If the outgoing packet contains a hop-by-hop options header, 1195 * it must be examined and processed even by the source node. 1196 * (RFC 2460, section 4.) 1197 */ 1198 if (exthdrs.ip6e_hbh) { 1199 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); 1200 u_int32_t dummy; /* XXX unused */ 1201 1202#if DIAGNOSTIC 1203 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) 1204 panic("ip6e_hbh is not continuous"); 1205#endif 1206 /* 1207 * XXX: if we have to send an ICMPv6 error to the sender, 1208 * we need the M_LOOP flag since icmp6_error() expects 1209 * the IPv6 and the hop-by-hop options header are 1210 * continuous unless the flag is set. 1211 */ 1212 m->m_flags |= M_LOOP; 1213 m->m_pkthdr.rcvif = ifp; 1214 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), 1215 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), 1216 &dummy, &plen) < 0) { 1217 /* m was already freed at this point */ 1218 error = EINVAL;/* better error? */ 1219 goto done; 1220 } 1221 m->m_flags &= ~M_LOOP; /* XXX */ 1222 m->m_pkthdr.rcvif = NULL; 1223 } 1224 1225#if DUMMYNET 1226check_with_pf: 1227#endif 1228#if PF 1229 if (PF_IS_ENABLED) { 1230#if DUMMYNET 1231 /* 1232 * TBD: Need to save opt->ip6po_flags for reinjection rdar://10434993 1233 */ 1234 args.fwa_m = m; 1235 args.fwa_oif = ifp; 1236 args.fwa_oflags = flags; 1237 if ((flags & IPV6_OUTARGS)) 1238 args.fwa_ip6oa = &ip6oa; 1239 args.fwa_ro6 = ro; 1240 args.fwa_dst6 = dst; 1241 args.fwa_ro6_pmtu = ro_pmtu; 1242 args.fwa_origifp = origifp; 1243 args.fwa_mtu = mtu; 1244 args.fwa_alwaysfrag = alwaysfrag; 1245 args.fwa_unfragpartlen = unfragpartlen; 1246 args.fwa_exthdrs = &exthdrs; 1247 /* Invoke outbound packet filter */ 1248 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args); 1249#else 1250 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL); 1251#endif /* DUMMYNET */ 1252 1253 if (error != 0 || m == NULL) { 1254 /* 1255 * Note that if we ever handle packet chain, we will 1256 * have to restore the linkage from the previous 1257 * packet to the next like in ip_outout_list() 1258 */ 1259 if (m != NULL) { 1260 panic("%s: unexpected packet %p\n", __func__, m); 1261 /* NOTREACHED */ 1262 } 1263 /* Already freed by callee */ 1264 goto done; 1265 } 1266 ip6 = mtod(m, struct ip6_hdr *); 1267 } 1268#endif /* PF */ 1269 1270 /* 1271 * Send the packet to the outgoing interface. 1272 * If necessary, do IPv6 fragmentation before sending. 1273 * 1274 * the logic here is rather complex: 1275 * 1: normal case (dontfrag == 0, alwaysfrag == 0) 1276 * 1-a: send as is if tlen <= path mtu 1277 * 1-b: fragment if tlen > path mtu 1278 * 1279 * 2: if user asks us not to fragment (dontfrag == 1) 1280 * 2-a: send as is if tlen <= interface mtu 1281 * 2-b: error if tlen > interface mtu 1282 * 1283 * 3: if we always need to attach fragment header (alwaysfrag == 1) 1284 * always fragment 1285 * 1286 * 4: if dontfrag == 1 && alwaysfrag == 1 1287 * error, as we cannot handle this conflicting request 1288 */ 1289 tlen = m->m_pkthdr.len; 1290 1291 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) 1292 dontfrag = 1; 1293 else 1294 dontfrag = 0; 1295 if (dontfrag && alwaysfrag) { /* case 4 */ 1296 /* conflicting request - can't transmit */ 1297 error = EMSGSIZE; 1298 goto bad; 1299 } 1300 1301 lck_rw_lock_shared(nd_if_rwlock); 1302 /* Access without acquiring nd_ifinfo lock for performance */ 1303 ifmtu = IN6_LINKMTU(ifp); 1304 lck_rw_done(nd_if_rwlock); 1305 1306 if (dontfrag && tlen > ifmtu) { /* case 2-b */ 1307 /* 1308 * Even if the DONTFRAG option is specified, we cannot send the 1309 * packet when the data length is larger than the MTU of the 1310 * outgoing interface. 1311 * Notify the error by sending IPV6_PATHMTU ancillary data as 1312 * well as returning an error code (the latter is not described 1313 * in the API spec.) 1314 */ 1315 u_int32_t mtu32; 1316 struct ip6ctlparam ip6cp; 1317 1318 mtu32 = (u_int32_t)mtu; 1319 bzero(&ip6cp, sizeof(ip6cp)); 1320 ip6cp.ip6c_cmdarg = (void *)&mtu32; 1321 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, 1322 (void *)&ip6cp); 1323 1324 error = EMSGSIZE; 1325 goto bad; 1326 } 1327 1328 /* 1329 * transmit packet without fragmentation 1330 */ 1331 tso = (ifp->if_hwassist & IFNET_TSO_IPV6) && 1332 (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6); 1333 if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */ 1334 (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) { 1335 int sw_csum; 1336 1337 ip6 = mtod(m, struct ip6_hdr *); 1338#ifdef IPSEC 1339 /* clean ipsec history once it goes out of the node */ 1340 ipsec_delaux(m); 1341#endif 1342 1343 if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */ 1344 sw_csum = m->m_pkthdr.csum_flags; 1345 else 1346 sw_csum = m->m_pkthdr.csum_flags & 1347 ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); 1348 1349 if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) { 1350 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen); 1351 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; 1352 } 1353 if (ro->ro_rt) 1354 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); 1355 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv); 1356 goto done; 1357 } 1358 1359 /* 1360 * try to fragment the packet. case 1-b and 3 1361 */ 1362 if (mtu < IPV6_MMTU) { 1363 /* path MTU cannot be less than IPV6_MMTU */ 1364 error = EMSGSIZE; 1365 in6_ifstat_inc(ifp, ifs6_out_fragfail); 1366 goto bad; 1367 } else if (ip6->ip6_plen == 0) { 1368 /* jumbo payload cannot be fragmented */ 1369 error = EMSGSIZE; 1370 in6_ifstat_inc(ifp, ifs6_out_fragfail); 1371 goto bad; 1372 } else { 1373 struct mbuf **mnext, *m_frgpart; 1374 struct ip6_frag *ip6f; 1375 u_int32_t id = htonl(ip6_randomid()); 1376 u_char nextproto; 1377 1378 /* 1379 * Too large for the destination or interface; 1380 * fragment if possible. 1381 * Must be able to put at least 8 bytes per fragment. 1382 */ 1383 hlen = unfragpartlen; 1384 if (mtu > IPV6_MAXPACKET) 1385 mtu = IPV6_MAXPACKET; 1386 1387 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; 1388 if (len < 8) { 1389 error = EMSGSIZE; 1390 in6_ifstat_inc(ifp, ifs6_out_fragfail); 1391 goto bad; 1392 } 1393 1394 mnext = &m->m_nextpkt; 1395 1396 /* 1397 * Change the next header field of the last header in the 1398 * unfragmentable part. 1399 */ 1400 if (exthdrs.ip6e_rthdr) { 1401 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); 1402 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; 1403 } else if (exthdrs.ip6e_dest1) { 1404 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); 1405 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; 1406 } else if (exthdrs.ip6e_hbh) { 1407 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); 1408 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; 1409 } else { 1410 nextproto = ip6->ip6_nxt; 1411 ip6->ip6_nxt = IPPROTO_FRAGMENT; 1412 } 1413 1414 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) { 1415 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen); 1416 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; 1417 } 1418 1419 /* 1420 * Loop through length of segment after first fragment, 1421 * make new header and copy data of each part and link onto 1422 * chain. 1423 */ 1424 m0 = m; 1425 for (off = hlen; off < tlen; off += len) { 1426 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ 1427 if (!m) { 1428 error = ENOBUFS; 1429 ip6stat.ip6s_odropped++; 1430 goto sendorfree; 1431 } 1432 m->m_pkthdr.rcvif = NULL; 1433 m->m_flags = m0->m_flags & M_COPYFLAGS; 1434 *mnext = m; 1435 mnext = &m->m_nextpkt; 1436 m->m_data += max_linkhdr; 1437 mhip6 = mtod(m, struct ip6_hdr *); 1438 *mhip6 = *ip6; 1439 m->m_len = sizeof(*mhip6); 1440 error = ip6_insertfraghdr(m0, m, hlen, &ip6f); 1441 if (error) { 1442 ip6stat.ip6s_odropped++; 1443 goto sendorfree; 1444 } 1445 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); 1446 if (off + len >= tlen) 1447 len = tlen - off; 1448 else 1449 ip6f->ip6f_offlg |= IP6F_MORE_FRAG; 1450 mhip6->ip6_plen = htons((u_short)(len + hlen + 1451 sizeof(*ip6f) - 1452 sizeof(struct ip6_hdr))); 1453 if ((m_frgpart = m_copy(m0, off, len)) == 0) { 1454 error = ENOBUFS; 1455 ip6stat.ip6s_odropped++; 1456 goto sendorfree; 1457 } 1458 m_cat(m, m_frgpart); 1459 m->m_pkthdr.len = len + hlen + sizeof(*ip6f); 1460 m->m_pkthdr.rcvif = 0; 1461 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; 1462 1463 M_COPY_PFTAG(m, m0); 1464 m_set_service_class(m, m0->m_pkthdr.svc); 1465 1466#ifdef __darwin8_notyet 1467#if CONFIG_MACF_NET 1468 mac_create_fragment(m0, m); 1469#endif 1470#endif 1471 ip6f->ip6f_reserved = 0; 1472 ip6f->ip6f_ident = id; 1473 ip6f->ip6f_nxt = nextproto; 1474 ip6stat.ip6s_ofragments++; 1475 in6_ifstat_inc(ifp, ifs6_out_fragcreat); 1476 } 1477 1478 in6_ifstat_inc(ifp, ifs6_out_fragok); 1479 } 1480 1481 /* 1482 * Remove leading garbages. 1483 */ 1484sendorfree: 1485 m = m0->m_nextpkt; 1486 m0->m_nextpkt = 0; 1487 m_freem(m0); 1488 for (m0 = m; m; m = m0) { 1489 m0 = m->m_nextpkt; 1490 m->m_nextpkt = 0; 1491 if (error == 0) { 1492 /* Record statistics for this interface address. */ 1493 if (ia) { 1494#ifndef __APPLE__ 1495 ia->ia_ifa.if_opackets++; 1496 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1497#endif 1498 } 1499#if IPSEC 1500 /* clean ipsec history once it goes out of the node */ 1501 ipsec_delaux(m); 1502#endif 1503 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, 1504 adv); 1505 1506 } else 1507 m_freem(m); 1508 } 1509 1510 if (error == 0) 1511 ip6stat.ip6s_fragmented++; 1512 1513done: 1514#if IPSEC 1515 if (ipsec_saved_route) { 1516 ro = ipsec_saved_route; 1517 if (ipsec_state.ro.ro_rt) { 1518 rtfree(ipsec_state.ro.ro_rt); 1519 } 1520 } 1521#endif /* IPSEC */ 1522 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */ 1523 rtfree(ro->ro_rt); 1524 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { 1525 rtfree(ro_pmtu->ro_rt); 1526 } 1527 1528#if IPSEC 1529 if (sp != NULL) 1530 key_freesp(sp, KEY_SADB_UNLOCKED); 1531#endif /* IPSEC */ 1532 1533 if (ia != NULL) 1534 IFA_REMREF(&ia->ia_ifa); 1535 if (ifp != NULL) 1536 ifnet_release(ifp); 1537 if (origifp != NULL) 1538 ifnet_release(origifp); 1539 return (error); 1540 1541freehdrs: 1542 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ 1543 m_freem(exthdrs.ip6e_dest1); 1544 m_freem(exthdrs.ip6e_rthdr); 1545 m_freem(exthdrs.ip6e_dest2); 1546 /* fall through */ 1547bad: 1548 m_freem(m); 1549 goto done; 1550} 1551 1552static int 1553ip6_copyexthdr(mp, hdr, hlen) 1554 struct mbuf **mp; 1555 caddr_t hdr; 1556 int hlen; 1557{ 1558 struct mbuf *m; 1559 1560 if (hlen > MCLBYTES) 1561 return(ENOBUFS); /* XXX */ 1562 1563 MGET(m, M_DONTWAIT, MT_DATA); 1564 if (!m) 1565 return(ENOBUFS); 1566 1567 if (hlen > MLEN) { 1568 MCLGET(m, M_DONTWAIT); 1569 if ((m->m_flags & M_EXT) == 0) { 1570 m_free(m); 1571 return (ENOBUFS); 1572 } 1573 } 1574 m->m_len = hlen; 1575 if (hdr) 1576 bcopy(hdr, mtod(m, caddr_t), hlen); 1577 1578 *mp = m; 1579 return (0); 1580} 1581 1582/* 1583 * Process a delayed payload checksum calculation. 1584 */ 1585void 1586in6_delayed_cksum(struct mbuf *m, uint16_t offset) 1587{ 1588 uint16_t csum; 1589 1590 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1591 if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) { 1592 csum = 0xffff; 1593 } 1594 1595 offset += (m->m_pkthdr.csum_data & 0xffff); 1596 if ((offset + sizeof(csum)) > m->m_len) { 1597 m_copyback(m, offset, sizeof(csum), &csum); 1598 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *))) { 1599 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum; 1600 } else { 1601 bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum)); 1602 } 1603} 1604/* 1605 * Insert jumbo payload option. 1606 */ 1607static int 1608ip6_insert_jumboopt(exthdrs, plen) 1609 struct ip6_exthdrs *exthdrs; 1610 u_int32_t plen; 1611{ 1612 struct mbuf *mopt; 1613 u_char *optbuf; 1614 u_int32_t v; 1615 1616#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ 1617 1618 /* 1619 * If there is no hop-by-hop options header, allocate new one. 1620 * If there is one but it doesn't have enough space to store the 1621 * jumbo payload option, allocate a cluster to store the whole options. 1622 * Otherwise, use it to store the options. 1623 */ 1624 if (exthdrs->ip6e_hbh == 0) { 1625 MGET(mopt, M_DONTWAIT, MT_DATA); 1626 if (mopt == 0) 1627 return (ENOBUFS); 1628 mopt->m_len = JUMBOOPTLEN; 1629 optbuf = mtod(mopt, u_char *); 1630 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ 1631 exthdrs->ip6e_hbh = mopt; 1632 } else { 1633 struct ip6_hbh *hbh; 1634 1635 mopt = exthdrs->ip6e_hbh; 1636 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { 1637 /* 1638 * XXX assumption: 1639 * - exthdrs->ip6e_hbh is not referenced from places 1640 * other than exthdrs. 1641 * - exthdrs->ip6e_hbh is not an mbuf chain. 1642 */ 1643 u_int32_t oldoptlen = mopt->m_len; 1644 struct mbuf *n; 1645 1646 /* 1647 * XXX: give up if the whole (new) hbh header does 1648 * not fit even in an mbuf cluster. 1649 */ 1650 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) 1651 return (ENOBUFS); 1652 1653 /* 1654 * As a consequence, we must always prepare a cluster 1655 * at this point. 1656 */ 1657 MGET(n, M_DONTWAIT, MT_DATA); 1658 if (n) { 1659 MCLGET(n, M_DONTWAIT); 1660 if ((n->m_flags & M_EXT) == 0) { 1661 m_freem(n); 1662 n = NULL; 1663 } 1664 } 1665 if (!n) 1666 return (ENOBUFS); 1667 n->m_len = oldoptlen + JUMBOOPTLEN; 1668 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), 1669 oldoptlen); 1670 optbuf = mtod(n, u_char *) + oldoptlen; 1671 m_freem(mopt); 1672 mopt = exthdrs->ip6e_hbh = n; 1673 } else { 1674 optbuf = mtod(mopt, u_char *) + mopt->m_len; 1675 mopt->m_len += JUMBOOPTLEN; 1676 } 1677 optbuf[0] = IP6OPT_PADN; 1678 optbuf[1] = 1; 1679 1680 /* 1681 * Adjust the header length according to the pad and 1682 * the jumbo payload option. 1683 */ 1684 hbh = mtod(mopt, struct ip6_hbh *); 1685 hbh->ip6h_len += (JUMBOOPTLEN >> 3); 1686 } 1687 1688 /* fill in the option. */ 1689 optbuf[2] = IP6OPT_JUMBO; 1690 optbuf[3] = 4; 1691 v = (u_int32_t)htonl(plen + JUMBOOPTLEN); 1692 bcopy(&v, &optbuf[4], sizeof(u_int32_t)); 1693 1694 /* finally, adjust the packet header length */ 1695 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; 1696 1697 return (0); 1698#undef JUMBOOPTLEN 1699} 1700 1701/* 1702 * Insert fragment header and copy unfragmentable header portions. 1703 */ 1704static int 1705ip6_insertfraghdr(m0, m, hlen, frghdrp) 1706 struct mbuf *m0, *m; 1707 int hlen; 1708 struct ip6_frag **frghdrp; 1709{ 1710 struct mbuf *n, *mlast; 1711 1712 if (hlen > sizeof(struct ip6_hdr)) { 1713 n = m_copym(m0, sizeof(struct ip6_hdr), 1714 hlen - sizeof(struct ip6_hdr), M_DONTWAIT); 1715 if (n == 0) 1716 return (ENOBUFS); 1717 m->m_next = n; 1718 } else 1719 n = m; 1720 1721 /* Search for the last mbuf of unfragmentable part. */ 1722 for (mlast = n; mlast->m_next; mlast = mlast->m_next) 1723 ; 1724 1725 if ((mlast->m_flags & M_EXT) == 0 && 1726 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { 1727 /* use the trailing space of the last mbuf for the fragment hdr */ 1728 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + 1729 mlast->m_len); 1730 mlast->m_len += sizeof(struct ip6_frag); 1731 m->m_pkthdr.len += sizeof(struct ip6_frag); 1732 } else { 1733 /* allocate a new mbuf for the fragment header */ 1734 struct mbuf *mfrg; 1735 1736 MGET(mfrg, M_DONTWAIT, MT_DATA); 1737 if (mfrg == 0) 1738 return (ENOBUFS); 1739 mfrg->m_len = sizeof(struct ip6_frag); 1740 *frghdrp = mtod(mfrg, struct ip6_frag *); 1741 mlast->m_next = mfrg; 1742 } 1743 1744 return (0); 1745} 1746 1747extern int load_ipfw(void); 1748static int 1749ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, 1750 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup, 1751 int *alwaysfragp) 1752{ 1753 u_int32_t mtu = 0; 1754 int alwaysfrag = 0; 1755 int error = 0; 1756 1757 if (ro_pmtu != ro) { 1758 /* The first hop and the final destination may differ. */ 1759 struct sockaddr_in6 *sa6_dst = 1760 (struct sockaddr_in6 *)&ro_pmtu->ro_dst; 1761 if (ro_pmtu->ro_rt && 1762 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || 1763 ro_pmtu->ro_rt->generation_id != route_generation || 1764 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { 1765 rtfree(ro_pmtu->ro_rt); 1766 ro_pmtu->ro_rt = (struct rtentry *)NULL; 1767 } 1768 if (ro_pmtu->ro_rt == NULL) { 1769 bzero(sa6_dst, sizeof(*sa6_dst)); 1770 sa6_dst->sin6_family = AF_INET6; 1771 sa6_dst->sin6_len = sizeof(struct sockaddr_in6); 1772 sa6_dst->sin6_addr = *dst; 1773 1774 rtalloc_scoped((struct route *)ro_pmtu, 1775 ifp != NULL ? ifp->if_index : IFSCOPE_NONE); 1776 } 1777 } 1778 1779 1780 if (ro_pmtu->ro_rt != NULL) { 1781 u_int32_t ifmtu; 1782 1783 lck_rw_lock_shared(nd_if_rwlock); 1784 /* Access without acquiring nd_ifinfo lock for performance */ 1785 ifmtu = IN6_LINKMTU(ifp); 1786 lck_rw_done(nd_if_rwlock); 1787 1788 RT_LOCK_SPIN(ro_pmtu->ro_rt); 1789 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; 1790 if (mtu > ifmtu || mtu == 0) { 1791 /* 1792 * The MTU on the route is larger than the MTU on 1793 * the interface! This shouldn't happen, unless the 1794 * MTU of the interface has been changed after the 1795 * interface was brought up. Change the MTU in the 1796 * route to match the interface MTU (as long as the 1797 * field isn't locked). 1798 * 1799 * if MTU on the route is 0, we need to fix the MTU. 1800 * this case happens with path MTU discovery timeouts. 1801 */ 1802 mtu = ifmtu; 1803 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 1804 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ 1805 } 1806 else if (mtu < IPV6_MMTU) { 1807 /* 1808 * RFC2460 section 5, last paragraph: 1809 * if we record ICMPv6 too big message with 1810 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU 1811 * or smaller, with framgent header attached. 1812 * (fragment header is needed regardless from the 1813 * packet size, for translators to identify packets) 1814 */ 1815 alwaysfrag = 1; 1816 mtu = IPV6_MMTU; 1817 } 1818 RT_UNLOCK(ro_pmtu->ro_rt); 1819 } else { 1820 if (ifp) { 1821 lck_rw_lock_shared(nd_if_rwlock); 1822 /* Don't hold nd_ifinfo lock for performance */ 1823 mtu = IN6_LINKMTU(ifp); 1824 lck_rw_done(nd_if_rwlock); 1825 } else 1826 error = EHOSTUNREACH; /* XXX */ 1827 } 1828 1829 *mtup = mtu; 1830 if (alwaysfragp) 1831 *alwaysfragp = alwaysfrag; 1832 return (error); 1833} 1834 1835/* 1836 * IP6 socket option processing. 1837 */ 1838int 1839ip6_ctloutput(so, sopt) 1840 struct socket *so; 1841 struct sockopt *sopt; 1842{ 1843 int optdatalen, uproto; 1844 void *optdata; 1845 int privileged; 1846 struct inpcb *in6p = sotoinpcb(so); 1847 int error = 0, optval = 0; 1848 int level, op = -1, optname = 0; 1849 int optlen = 0; 1850 struct proc *p; 1851 1852 if (sopt == NULL) { 1853 panic("ip6_ctloutput: arg soopt is NULL"); 1854 /* NOTREACHED */ 1855 } 1856 level = sopt->sopt_level; 1857 op = sopt->sopt_dir; 1858 optname = sopt->sopt_name; 1859 optlen = sopt->sopt_valsize; 1860 p = sopt->sopt_p; 1861 uproto = (int)so->so_proto->pr_protocol; 1862 1863 privileged = (proc_suser(p) == 0); 1864 1865 if (level == IPPROTO_IPV6) { 1866 switch (op) { 1867 1868 case SOPT_SET: 1869 switch (optname) { 1870 case IPV6_2292PKTOPTIONS: 1871 { 1872 struct mbuf *m; 1873 1874 error = soopt_getm(sopt, &m); /* XXX */ 1875 if (error != 0) 1876 break; 1877 error = soopt_mcopyin(sopt, m); /* XXX */ 1878 if (error != 0) 1879 break; 1880 error = ip6_pcbopts(&in6p->in6p_outputopts, 1881 m, so, sopt); 1882 m_freem(m); /* XXX */ 1883 break; 1884 } 1885 1886 /* 1887 * Use of some Hop-by-Hop options or some 1888 * Destination options, might require special 1889 * privilege. That is, normal applications 1890 * (without special privilege) might be forbidden 1891 * from setting certain options in outgoing packets, 1892 * and might never see certain options in received 1893 * packets. [RFC 2292 Section 6] 1894 * KAME specific note: 1895 * KAME prevents non-privileged users from sending or 1896 * receiving ANY hbh/dst options in order to avoid 1897 * overhead of parsing options in the kernel. 1898 */ 1899 case IPV6_RECVHOPOPTS: 1900 case IPV6_RECVDSTOPTS: 1901 case IPV6_RECVRTHDRDSTOPTS: 1902 if (!privileged) 1903 break; 1904 /* FALLTHROUGH */ 1905 case IPV6_UNICAST_HOPS: 1906 case IPV6_HOPLIMIT: 1907 1908 case IPV6_RECVPKTINFO: 1909 case IPV6_RECVHOPLIMIT: 1910 case IPV6_RECVRTHDR: 1911 case IPV6_RECVPATHMTU: 1912 case IPV6_RECVTCLASS: 1913 case IPV6_V6ONLY: 1914 case IPV6_AUTOFLOWLABEL: 1915 if (optlen != sizeof(int)) { 1916 error = EINVAL; 1917 break; 1918 } 1919 error = sooptcopyin(sopt, &optval, 1920 sizeof optval, sizeof optval); 1921 if (error) 1922 break; 1923 switch (optname) { 1924 1925 case IPV6_UNICAST_HOPS: 1926 if (optval < -1 || optval >= 256) 1927 error = EINVAL; 1928 else { 1929 /* -1 = kernel default */ 1930 in6p->in6p_hops = optval; 1931 if ((in6p->inp_vflag & 1932 INP_IPV4) != 0) 1933 in6p->inp_ip_ttl = optval; 1934 } 1935 break; 1936#define OPTSET(bit) \ 1937do { \ 1938 if (optval) \ 1939 in6p->inp_flags |= (bit); \ 1940 else \ 1941 in6p->inp_flags &= ~(bit); \ 1942} while (/*CONSTCOND*/ 0) 1943#define OPTSET2292(bit) \ 1944do { \ 1945 in6p->inp_flags |= IN6P_RFC2292; \ 1946 if (optval) \ 1947 in6p->inp_flags |= (bit); \ 1948 else \ 1949 in6p->inp_flags &= ~(bit); \ 1950} while (/*CONSTCOND*/ 0) 1951#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) 1952 1953 case IPV6_RECVPKTINFO: 1954 /* cannot mix with RFC2292 */ 1955 if (OPTBIT(IN6P_RFC2292)) { 1956 error = EINVAL; 1957 break; 1958 } 1959 OPTSET(IN6P_PKTINFO); 1960 break; 1961 1962 case IPV6_HOPLIMIT: 1963 { 1964 struct ip6_pktopts **optp; 1965 1966 /* cannot mix with RFC2292 */ 1967 if (OPTBIT(IN6P_RFC2292)) { 1968 error = EINVAL; 1969 break; 1970 } 1971 optp = &in6p->in6p_outputopts; 1972 error = ip6_pcbopt(IPV6_HOPLIMIT, 1973 (u_char *)&optval, sizeof(optval), 1974 optp, uproto); 1975 break; 1976 } 1977 1978 case IPV6_RECVHOPLIMIT: 1979 /* cannot mix with RFC2292 */ 1980 if (OPTBIT(IN6P_RFC2292)) { 1981 error = EINVAL; 1982 break; 1983 } 1984 OPTSET(IN6P_HOPLIMIT); 1985 break; 1986 1987 case IPV6_RECVHOPOPTS: 1988 /* cannot mix with RFC2292 */ 1989 if (OPTBIT(IN6P_RFC2292)) { 1990 error = EINVAL; 1991 break; 1992 } 1993 OPTSET(IN6P_HOPOPTS); 1994 break; 1995 1996 case IPV6_RECVDSTOPTS: 1997 /* cannot mix with RFC2292 */ 1998 if (OPTBIT(IN6P_RFC2292)) { 1999 error = EINVAL; 2000 break; 2001 } 2002 OPTSET(IN6P_DSTOPTS); 2003 break; 2004 2005 case IPV6_RECVRTHDRDSTOPTS: 2006 /* cannot mix with RFC2292 */ 2007 if (OPTBIT(IN6P_RFC2292)) { 2008 error = EINVAL; 2009 break; 2010 } 2011 OPTSET(IN6P_RTHDRDSTOPTS); 2012 break; 2013 2014 case IPV6_RECVRTHDR: 2015 /* cannot mix with RFC2292 */ 2016 if (OPTBIT(IN6P_RFC2292)) { 2017 error = EINVAL; 2018 break; 2019 } 2020 OPTSET(IN6P_RTHDR); 2021 break; 2022 2023 case IPV6_RECVPATHMTU: 2024 /* 2025 * We ignore this option for TCP 2026 * sockets. 2027 * (RFC3542 leaves this case 2028 * unspecified.) 2029 */ 2030 if (uproto != IPPROTO_TCP) 2031 OPTSET(IN6P_MTU); 2032 break; 2033 2034 case IPV6_V6ONLY: 2035 /* 2036 * make setsockopt(IPV6_V6ONLY) 2037 * available only prior to bind(2). 2038 * see ipng mailing list, Jun 22 2001. 2039 */ 2040 if (in6p->inp_lport || 2041 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { 2042 error = EINVAL; 2043 break; 2044 } 2045 OPTSET(IN6P_IPV6_V6ONLY); 2046 if (optval) 2047 in6p->inp_vflag &= ~INP_IPV4; 2048 else 2049 in6p->inp_vflag |= INP_IPV4; 2050 break; 2051 case IPV6_RECVTCLASS: 2052 /* we can mix with RFC2292 */ 2053 OPTSET(IN6P_TCLASS); 2054 break; 2055 case IPV6_AUTOFLOWLABEL: 2056 OPTSET(IN6P_AUTOFLOWLABEL); 2057 break; 2058 2059 } 2060 break; 2061 2062 case IPV6_TCLASS: 2063 case IPV6_DONTFRAG: 2064 case IPV6_USE_MIN_MTU: 2065 case IPV6_PREFER_TEMPADDR: 2066 if (optlen != sizeof(optval)) { 2067 error = EINVAL; 2068 break; 2069 } 2070 error = sooptcopyin(sopt, &optval, 2071 sizeof optval, sizeof optval); 2072 if (error) 2073 break; 2074 { 2075 struct ip6_pktopts **optp; 2076 optp = &in6p->in6p_outputopts; 2077 error = ip6_pcbopt(optname, 2078 (u_char *)&optval, sizeof(optval), 2079 optp, uproto); 2080 break; 2081 } 2082 2083 case IPV6_2292PKTINFO: 2084 case IPV6_2292HOPLIMIT: 2085 case IPV6_2292HOPOPTS: 2086 case IPV6_2292DSTOPTS: 2087 case IPV6_2292RTHDR: 2088 /* RFC 2292 */ 2089 if (optlen != sizeof(int)) { 2090 error = EINVAL; 2091 break; 2092 } 2093 error = sooptcopyin(sopt, &optval, 2094 sizeof optval, sizeof optval); 2095 if (error) 2096 break; 2097 switch (optname) { 2098 case IPV6_2292PKTINFO: 2099 OPTSET2292(IN6P_PKTINFO); 2100 break; 2101 case IPV6_2292HOPLIMIT: 2102 OPTSET2292(IN6P_HOPLIMIT); 2103 break; 2104 case IPV6_2292HOPOPTS: 2105 /* 2106 * Check super-user privilege. 2107 * See comments for IPV6_RECVHOPOPTS. 2108 */ 2109 if (!privileged) 2110 return(EPERM); 2111 OPTSET2292(IN6P_HOPOPTS); 2112 break; 2113 case IPV6_2292DSTOPTS: 2114 if (!privileged) 2115 return(EPERM); 2116 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ 2117 break; 2118 case IPV6_2292RTHDR: 2119 OPTSET2292(IN6P_RTHDR); 2120 break; 2121 } 2122 break; 2123 case IPV6_3542PKTINFO: 2124 case IPV6_3542HOPOPTS: 2125 case IPV6_3542RTHDR: 2126 case IPV6_3542DSTOPTS: 2127 case IPV6_RTHDRDSTOPTS: 2128 case IPV6_3542NEXTHOP: 2129 { 2130 struct ip6_pktopts **optp; 2131 /* new advanced API (RFC3542) */ 2132 struct mbuf *m; 2133 2134 /* cannot mix with RFC2292 */ 2135 if (OPTBIT(IN6P_RFC2292)) { 2136 error = EINVAL; 2137 break; 2138 } 2139 error = soopt_getm(sopt, &m); 2140 if (error != 0) 2141 break; 2142 error = soopt_mcopyin(sopt, m); 2143 if (error) { 2144 m_freem(m); 2145 break; 2146 } 2147 optp = &in6p->in6p_outputopts; 2148 error = ip6_pcbopt(optname, mtod(m, u_char *), 2149 m->m_len, optp, uproto); 2150 m_freem(m); 2151 break; 2152 } 2153#undef OPTSET 2154 2155 case IPV6_MULTICAST_IF: 2156 case IPV6_MULTICAST_HOPS: 2157 case IPV6_MULTICAST_LOOP: 2158 case IPV6_JOIN_GROUP: 2159 case IPV6_LEAVE_GROUP: 2160 case IPV6_MSFILTER: 2161 case MCAST_BLOCK_SOURCE: 2162 case MCAST_UNBLOCK_SOURCE: 2163 case MCAST_JOIN_GROUP: 2164 case MCAST_LEAVE_GROUP: 2165 case MCAST_JOIN_SOURCE_GROUP: 2166 case MCAST_LEAVE_SOURCE_GROUP: 2167 error = ip6_setmoptions(in6p, sopt); 2168 break; 2169 2170 case IPV6_PORTRANGE: 2171 error = sooptcopyin(sopt, &optval, 2172 sizeof optval, sizeof optval); 2173 if (error) 2174 break; 2175 2176 switch (optval) { 2177 case IPV6_PORTRANGE_DEFAULT: 2178 in6p->inp_flags &= ~(INP_LOWPORT); 2179 in6p->inp_flags &= ~(INP_HIGHPORT); 2180 break; 2181 2182 case IPV6_PORTRANGE_HIGH: 2183 in6p->inp_flags &= ~(INP_LOWPORT); 2184 in6p->inp_flags |= INP_HIGHPORT; 2185 break; 2186 2187 case IPV6_PORTRANGE_LOW: 2188 in6p->inp_flags &= ~(INP_HIGHPORT); 2189 in6p->inp_flags |= INP_LOWPORT; 2190 break; 2191 2192 default: 2193 error = EINVAL; 2194 break; 2195 } 2196 break; 2197 2198#if IPSEC 2199 case IPV6_IPSEC_POLICY: 2200 { 2201 caddr_t req = NULL; 2202 size_t len = 0; 2203 struct mbuf *m; 2204 2205 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 2206 break; 2207 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 2208 break; 2209 if (m) { 2210 req = mtod(m, caddr_t); 2211 len = m->m_len; 2212 } 2213 error = ipsec6_set_policy(in6p, optname, req, 2214 len, privileged); 2215 m_freem(m); 2216 } 2217 break; 2218#endif /* KAME IPSEC */ 2219 2220#if IPFIREWALL 2221 case IPV6_FW_ADD: 2222 case IPV6_FW_DEL: 2223 case IPV6_FW_FLUSH: 2224 case IPV6_FW_ZERO: 2225 { 2226 if (ip6_fw_ctl_ptr == NULL) 2227 load_ip6fw(); 2228 if (ip6_fw_ctl_ptr != NULL) 2229 error = (*ip6_fw_ctl_ptr)(sopt); 2230 else 2231 return ENOPROTOOPT; 2232 } 2233 break; 2234#endif /* IPFIREWALL */ 2235 2236 /* 2237 * IPv6 variant of IP_BOUND_IF; for details see 2238 * comments on IP_BOUND_IF in ip_ctloutput(). 2239 */ 2240 case IPV6_BOUND_IF: 2241 /* This option is settable only on IPv6 */ 2242 if (!(in6p->inp_vflag & INP_IPV6)) { 2243 error = EINVAL; 2244 break; 2245 } 2246 2247 error = sooptcopyin(sopt, &optval, 2248 sizeof (optval), sizeof (optval)); 2249 2250 if (error) 2251 break; 2252 2253 error = inp_bindif(in6p, optval); 2254 break; 2255 2256 case IPV6_NO_IFT_CELLULAR: 2257 /* This option is settable only for IPv6 */ 2258 if (!(in6p->inp_vflag & INP_IPV6)) { 2259 error = EINVAL; 2260 break; 2261 } 2262 2263 error = sooptcopyin(sopt, &optval, 2264 sizeof (optval), sizeof (optval)); 2265 2266 if (error) 2267 break; 2268 2269 error = inp_nocellular(in6p, optval); 2270 break; 2271 2272 case IPV6_OUT_IF: 2273 /* This option is not settable */ 2274 error = EINVAL; 2275 break; 2276 2277 default: 2278 error = ENOPROTOOPT; 2279 break; 2280 } 2281 break; 2282 2283 case SOPT_GET: 2284 switch (optname) { 2285 2286 case IPV6_2292PKTOPTIONS: 2287 /* 2288 * RFC3542 (effectively) deprecated the 2289 * semantics of the 2292-style pktoptions. 2290 * Since it was not reliable in nature (i.e., 2291 * applications had to expect the lack of some 2292 * information after all), it would make sense 2293 * to simplify this part by always returning 2294 * empty data. 2295 */ 2296 sopt->sopt_valsize = 0; 2297 break; 2298 2299 case IPV6_RECVHOPOPTS: 2300 case IPV6_RECVDSTOPTS: 2301 case IPV6_RECVRTHDRDSTOPTS: 2302 case IPV6_UNICAST_HOPS: 2303 case IPV6_RECVPKTINFO: 2304 case IPV6_RECVHOPLIMIT: 2305 case IPV6_RECVRTHDR: 2306 case IPV6_RECVPATHMTU: 2307 2308 case IPV6_V6ONLY: 2309 case IPV6_PORTRANGE: 2310 case IPV6_RECVTCLASS: 2311 case IPV6_AUTOFLOWLABEL: 2312 switch (optname) { 2313 2314 case IPV6_RECVHOPOPTS: 2315 optval = OPTBIT(IN6P_HOPOPTS); 2316 break; 2317 2318 case IPV6_RECVDSTOPTS: 2319 optval = OPTBIT(IN6P_DSTOPTS); 2320 break; 2321 2322 case IPV6_RECVRTHDRDSTOPTS: 2323 optval = OPTBIT(IN6P_RTHDRDSTOPTS); 2324 break; 2325 2326 case IPV6_UNICAST_HOPS: 2327 optval = in6p->in6p_hops; 2328 break; 2329 2330 case IPV6_RECVPKTINFO: 2331 optval = OPTBIT(IN6P_PKTINFO); 2332 break; 2333 2334 case IPV6_RECVHOPLIMIT: 2335 optval = OPTBIT(IN6P_HOPLIMIT); 2336 break; 2337 2338 case IPV6_RECVRTHDR: 2339 optval = OPTBIT(IN6P_RTHDR); 2340 break; 2341 2342 case IPV6_RECVPATHMTU: 2343 optval = OPTBIT(IN6P_MTU); 2344 break; 2345 2346 case IPV6_V6ONLY: 2347 optval = OPTBIT(IN6P_IPV6_V6ONLY); 2348 break; 2349 2350 case IPV6_PORTRANGE: 2351 { 2352 int flags; 2353 flags = in6p->inp_flags; 2354 if (flags & INP_HIGHPORT) 2355 optval = IPV6_PORTRANGE_HIGH; 2356 else if (flags & INP_LOWPORT) 2357 optval = IPV6_PORTRANGE_LOW; 2358 else 2359 optval = 0; 2360 break; 2361 } 2362 case IPV6_RECVTCLASS: 2363 optval = OPTBIT(IN6P_TCLASS); 2364 break; 2365 2366 case IPV6_AUTOFLOWLABEL: 2367 optval = OPTBIT(IN6P_AUTOFLOWLABEL); 2368 break; 2369 } 2370 if (error) 2371 break; 2372 error = sooptcopyout(sopt, &optval, 2373 sizeof optval); 2374 break; 2375 2376 case IPV6_PATHMTU: 2377 { 2378 u_int32_t pmtu = 0; 2379 struct ip6_mtuinfo mtuinfo; 2380 struct route_in6 sro; 2381 2382 bzero(&sro, sizeof(sro)); 2383 2384 if (!(so->so_state & SS_ISCONNECTED)) 2385 return (ENOTCONN); 2386 /* 2387 * XXX: we dot not consider the case of source 2388 * routing, or optional information to specify 2389 * the outgoing interface. 2390 */ 2391 error = ip6_getpmtu(&sro, NULL, NULL, 2392 &in6p->in6p_faddr, &pmtu, NULL); 2393 if (sro.ro_rt) 2394 rtfree(sro.ro_rt); 2395 if (error) 2396 break; 2397 if (pmtu > IPV6_MAXPACKET) 2398 pmtu = IPV6_MAXPACKET; 2399 2400 bzero(&mtuinfo, sizeof(mtuinfo)); 2401 mtuinfo.ip6m_mtu = (u_int32_t)pmtu; 2402 optdata = (void *)&mtuinfo; 2403 optdatalen = sizeof(mtuinfo); 2404 error = sooptcopyout(sopt, optdata, 2405 optdatalen); 2406 break; 2407 } 2408 2409 case IPV6_2292PKTINFO: 2410 case IPV6_2292HOPLIMIT: 2411 case IPV6_2292HOPOPTS: 2412 case IPV6_2292RTHDR: 2413 case IPV6_2292DSTOPTS: 2414 switch (optname) { 2415 case IPV6_2292PKTINFO: 2416 optval = OPTBIT(IN6P_PKTINFO); 2417 break; 2418 case IPV6_2292HOPLIMIT: 2419 optval = OPTBIT(IN6P_HOPLIMIT); 2420 break; 2421 case IPV6_2292HOPOPTS: 2422 optval = OPTBIT(IN6P_HOPOPTS); 2423 break; 2424 case IPV6_2292RTHDR: 2425 optval = OPTBIT(IN6P_RTHDR); 2426 break; 2427 case IPV6_2292DSTOPTS: 2428 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); 2429 break; 2430 } 2431 error = sooptcopyout(sopt, &optval, 2432 sizeof optval); 2433 break; 2434 case IPV6_PKTINFO: 2435 case IPV6_HOPOPTS: 2436 case IPV6_RTHDR: 2437 case IPV6_DSTOPTS: 2438 case IPV6_RTHDRDSTOPTS: 2439 case IPV6_NEXTHOP: 2440 case IPV6_TCLASS: 2441 case IPV6_DONTFRAG: 2442 case IPV6_USE_MIN_MTU: 2443 case IPV6_PREFER_TEMPADDR: 2444 error = ip6_getpcbopt(in6p->in6p_outputopts, 2445 optname, sopt); 2446 break; 2447 2448 case IPV6_MULTICAST_IF: 2449 case IPV6_MULTICAST_HOPS: 2450 case IPV6_MULTICAST_LOOP: 2451 case IPV6_MSFILTER: 2452 error = ip6_getmoptions(in6p, sopt); 2453 break; 2454 2455#if IPSEC 2456 case IPV6_IPSEC_POLICY: 2457 { 2458 caddr_t req = NULL; 2459 size_t len = 0; 2460 struct mbuf *m = NULL; 2461 struct mbuf **mp = &m; 2462 2463 error = soopt_getm(sopt, &m); /* XXX */ 2464 if (error != 0) 2465 break; 2466 error = soopt_mcopyin(sopt, m); /* XXX */ 2467 if (error != 0) 2468 break; 2469 if (m) { 2470 req = mtod(m, caddr_t); 2471 len = m->m_len; 2472 } 2473 error = ipsec6_get_policy(in6p, req, len, mp); 2474 if (error == 0) 2475 error = soopt_mcopyout(sopt, m); /*XXX*/ 2476 if (error == 0 && m) 2477 m_freem(m); 2478 break; 2479 } 2480#endif /* KAME IPSEC */ 2481 2482#if IPFIREWALL 2483 case IPV6_FW_GET: 2484 { 2485 if (ip6_fw_ctl_ptr == NULL) 2486 load_ip6fw(); 2487 if (ip6_fw_ctl_ptr != NULL) 2488 error = (*ip6_fw_ctl_ptr)(sopt); 2489 else 2490 return ENOPROTOOPT; 2491 } 2492 break; 2493#endif /* IPFIREWALL */ 2494 2495 case IPV6_BOUND_IF: 2496 if (in6p->inp_flags & INP_BOUND_IF) 2497 optval = in6p->inp_boundifp->if_index; 2498 error = sooptcopyout(sopt, &optval, 2499 sizeof (optval)); 2500 break; 2501 2502 case IPV6_NO_IFT_CELLULAR: 2503 optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR) 2504 ? 1 : 0; 2505 error = sooptcopyout(sopt, &optval, 2506 sizeof (optval)); 2507 break; 2508 2509 case IPV6_OUT_IF: 2510 optval = (in6p->in6p_last_outifp != NULL) ? 2511 in6p->in6p_last_outifp->if_index : 0; 2512 error = sooptcopyout(sopt, &optval, 2513 sizeof (optval)); 2514 break; 2515 2516 default: 2517 error = ENOPROTOOPT; 2518 break; 2519 } 2520 break; 2521 } 2522 } else { 2523 error = EINVAL; 2524 } 2525 return(error); 2526} 2527 2528int 2529ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) 2530{ 2531 int error = 0, optval, optlen; 2532 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); 2533 struct inpcb *in6p = sotoinpcb(so); 2534 int level, op, optname; 2535 2536 level = sopt->sopt_level; 2537 op = sopt->sopt_dir; 2538 optname = sopt->sopt_name; 2539 optlen = sopt->sopt_valsize; 2540 2541 if (level != IPPROTO_IPV6) { 2542 return (EINVAL); 2543 } 2544 2545 switch (optname) { 2546 case IPV6_CHECKSUM: 2547 /* 2548 * For ICMPv6 sockets, no modification allowed for checksum 2549 * offset, permit "no change" values to help existing apps. 2550 * 2551 * RFC3542 says: "An attempt to set IPV6_CHECKSUM 2552 * for an ICMPv6 socket will fail." 2553 * The current behavior does not meet RFC3542. 2554 */ 2555 switch (op) { 2556 case SOPT_SET: 2557 if (optlen != sizeof(int)) { 2558 error = EINVAL; 2559 break; 2560 } 2561 error = sooptcopyin(sopt, &optval, sizeof(optval), 2562 sizeof(optval)); 2563 if (error) 2564 break; 2565 if ((optval % 2) != 0) { 2566 /* the API assumes even offset values */ 2567 error = EINVAL; 2568 } else if (so->so_proto->pr_protocol == 2569 IPPROTO_ICMPV6) { 2570 if (optval != icmp6off) 2571 error = EINVAL; 2572 } else 2573 in6p->in6p_cksum = optval; 2574 break; 2575 2576 case SOPT_GET: 2577 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) 2578 optval = icmp6off; 2579 else 2580 optval = in6p->in6p_cksum; 2581 2582 error = sooptcopyout(sopt, &optval, sizeof(optval)); 2583 break; 2584 2585 default: 2586 error = EINVAL; 2587 break; 2588 } 2589 break; 2590 2591 default: 2592 error = ENOPROTOOPT; 2593 break; 2594 } 2595 2596 return (error); 2597} 2598 2599/* 2600 * Set up IP6 options in pcb for insertion in output packets or 2601 * specifying behavior of outgoing packets. 2602 */ 2603static int 2604ip6_pcbopts( 2605 struct ip6_pktopts **pktopt, 2606 struct mbuf *m, 2607 __unused struct socket *so, 2608 __unused struct sockopt *sopt) 2609{ 2610 struct ip6_pktopts *opt = *pktopt; 2611 int error = 0; 2612 2613 /* turn off any old options. */ 2614 if (opt) { 2615#if DIAGNOSTIC 2616 if (opt->ip6po_pktinfo || opt->ip6po_nexthop || 2617 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || 2618 opt->ip6po_rhinfo.ip6po_rhi_rthdr) 2619 printf("ip6_pcbopts: all specified options are cleared.\n"); 2620#endif 2621 ip6_clearpktopts(opt, -1); 2622 } else { 2623 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK); 2624 if (opt == NULL) 2625 return ENOBUFS; 2626 } 2627 *pktopt = NULL; 2628 2629 if (!m || m->m_len == 0) { 2630 /* 2631 * Only turning off any previous options, regardless of 2632 * whether the opt is just created or given. 2633 */ 2634 if (opt) 2635 FREE(opt, M_IP6OPT); 2636 return(0); 2637 } 2638 2639 /* set options specified by user. */ 2640 if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) { 2641 ip6_clearpktopts(opt, -1); /* XXX: discard all options */ 2642 FREE(opt, M_IP6OPT); 2643 return(error); 2644 } 2645 *pktopt = opt; 2646 return(0); 2647} 2648 2649/* 2650 * initialize ip6_pktopts. beware that there are non-zero default values in 2651 * the struct. 2652 */ 2653void 2654ip6_initpktopts(struct ip6_pktopts *opt) 2655{ 2656 2657 bzero(opt, sizeof(*opt)); 2658 opt->ip6po_hlim = -1; /* -1 means default hop limit */ 2659 opt->ip6po_tclass = -1; /* -1 means default traffic class */ 2660 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; 2661 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; 2662} 2663 2664static int 2665ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, 2666 int uproto) 2667{ 2668 struct ip6_pktopts *opt; 2669 2670 opt = *pktopt; 2671 if (opt == NULL) { 2672 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK); 2673 if (opt == NULL) 2674 return(ENOBUFS); 2675 ip6_initpktopts(opt); 2676 *pktopt = opt; 2677 } 2678 2679 return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto)); 2680} 2681 2682static int 2683ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) 2684{ 2685 void *optdata = NULL; 2686 int optdatalen = 0; 2687 struct ip6_ext *ip6e; 2688 int error = 0; 2689 struct in6_pktinfo null_pktinfo; 2690 int deftclass = 0, on; 2691 int defminmtu = IP6PO_MINMTU_MCASTONLY; 2692 int defpreftemp = IP6PO_TEMPADDR_SYSTEM; 2693 2694 2695 switch (optname) { 2696 case IPV6_PKTINFO: 2697 if (pktopt && pktopt->ip6po_pktinfo) 2698 optdata = (void *)pktopt->ip6po_pktinfo; 2699 else { 2700 /* XXX: we don't have to do this every time... */ 2701 bzero(&null_pktinfo, sizeof(null_pktinfo)); 2702 optdata = (void *)&null_pktinfo; 2703 } 2704 optdatalen = sizeof(struct in6_pktinfo); 2705 break; 2706 case IPV6_TCLASS: 2707 if (pktopt && pktopt->ip6po_tclass >= 0) 2708 optdata = (void *)&pktopt->ip6po_tclass; 2709 else 2710 optdata = (void *)&deftclass; 2711 optdatalen = sizeof(int); 2712 break; 2713 case IPV6_HOPOPTS: 2714 if (pktopt && pktopt->ip6po_hbh) { 2715 optdata = (void *)pktopt->ip6po_hbh; 2716 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; 2717 optdatalen = (ip6e->ip6e_len + 1) << 3; 2718 } 2719 break; 2720 case IPV6_RTHDR: 2721 if (pktopt && pktopt->ip6po_rthdr) { 2722 optdata = (void *)pktopt->ip6po_rthdr; 2723 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; 2724 optdatalen = (ip6e->ip6e_len + 1) << 3; 2725 } 2726 break; 2727 case IPV6_RTHDRDSTOPTS: 2728 if (pktopt && pktopt->ip6po_dest1) { 2729 optdata = (void *)pktopt->ip6po_dest1; 2730 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; 2731 optdatalen = (ip6e->ip6e_len + 1) << 3; 2732 } 2733 break; 2734 case IPV6_DSTOPTS: 2735 if (pktopt && pktopt->ip6po_dest2) { 2736 optdata = (void *)pktopt->ip6po_dest2; 2737 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; 2738 optdatalen = (ip6e->ip6e_len + 1) << 3; 2739 } 2740 break; 2741 case IPV6_NEXTHOP: 2742 if (pktopt && pktopt->ip6po_nexthop) { 2743 optdata = (void *)pktopt->ip6po_nexthop; 2744 optdatalen = pktopt->ip6po_nexthop->sa_len; 2745 } 2746 break; 2747 case IPV6_USE_MIN_MTU: 2748 if (pktopt) 2749 optdata = (void *)&pktopt->ip6po_minmtu; 2750 else 2751 optdata = (void *)&defminmtu; 2752 optdatalen = sizeof(int); 2753 break; 2754 case IPV6_DONTFRAG: 2755 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) 2756 on = 1; 2757 else 2758 on = 0; 2759 optdata = (void *)&on; 2760 optdatalen = sizeof(on); 2761 break; 2762 case IPV6_PREFER_TEMPADDR: 2763 if (pktopt) 2764 optdata = (void *)&pktopt->ip6po_prefer_tempaddr; 2765 else 2766 optdata = (void *)&defpreftemp; 2767 optdatalen = sizeof(int); 2768 break; 2769 default: /* should not happen */ 2770#ifdef DIAGNOSTIC 2771 panic("ip6_getpcbopt: unexpected option\n"); 2772#endif 2773 return (ENOPROTOOPT); 2774 } 2775 2776 error = sooptcopyout(sopt, optdata, optdatalen); 2777 2778 return (error); 2779} 2780 2781void 2782ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) 2783{ 2784 if (pktopt == NULL) 2785 return; 2786 2787 if (optname == -1 || optname == IPV6_PKTINFO) { 2788 if (pktopt->ip6po_pktinfo) 2789 FREE(pktopt->ip6po_pktinfo, M_IP6OPT); 2790 pktopt->ip6po_pktinfo = NULL; 2791 } 2792 if (optname == -1 || optname == IPV6_HOPLIMIT) 2793 pktopt->ip6po_hlim = -1; 2794 if (optname == -1 || optname == IPV6_TCLASS) 2795 pktopt->ip6po_tclass = -1; 2796 if (optname == -1 || optname == IPV6_NEXTHOP) { 2797 if (pktopt->ip6po_nextroute.ro_rt) { 2798 rtfree(pktopt->ip6po_nextroute.ro_rt); 2799 pktopt->ip6po_nextroute.ro_rt = NULL; 2800 } 2801 if (pktopt->ip6po_nexthop) 2802 FREE(pktopt->ip6po_nexthop, M_IP6OPT); 2803 pktopt->ip6po_nexthop = NULL; 2804 } 2805 if (optname == -1 || optname == IPV6_HOPOPTS) { 2806 if (pktopt->ip6po_hbh) 2807 FREE(pktopt->ip6po_hbh, M_IP6OPT); 2808 pktopt->ip6po_hbh = NULL; 2809 } 2810 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { 2811 if (pktopt->ip6po_dest1) 2812 FREE(pktopt->ip6po_dest1, M_IP6OPT); 2813 pktopt->ip6po_dest1 = NULL; 2814 } 2815 if (optname == -1 || optname == IPV6_RTHDR) { 2816 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) 2817 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); 2818 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; 2819 if (pktopt->ip6po_route.ro_rt) { 2820 rtfree(pktopt->ip6po_route.ro_rt); 2821 pktopt->ip6po_route.ro_rt = NULL; 2822 } 2823 } 2824 if (optname == -1 || optname == IPV6_DSTOPTS) { 2825 if (pktopt->ip6po_dest2) 2826 FREE(pktopt->ip6po_dest2, M_IP6OPT); 2827 pktopt->ip6po_dest2 = NULL; 2828 } 2829} 2830 2831#define PKTOPT_EXTHDRCPY(type) \ 2832do {\ 2833 if (src->type) {\ 2834 int hlen =\ 2835 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ 2836 dst->type = _MALLOC(hlen, M_IP6OPT, canwait);\ 2837 if (dst->type == NULL && canwait == M_NOWAIT)\ 2838 goto bad;\ 2839 bcopy(src->type, dst->type, hlen);\ 2840 }\ 2841} while (0) 2842 2843static int 2844copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) 2845{ 2846 if (dst == NULL || src == NULL) { 2847 printf("copypktopts: invalid argument\n"); 2848 return (EINVAL); 2849 } 2850 2851 dst->ip6po_hlim = src->ip6po_hlim; 2852 dst->ip6po_tclass = src->ip6po_tclass; 2853 dst->ip6po_flags = src->ip6po_flags; 2854 if (src->ip6po_pktinfo) { 2855 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo), 2856 M_IP6OPT, canwait); 2857 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) 2858 goto bad; 2859 *dst->ip6po_pktinfo = *src->ip6po_pktinfo; 2860 } 2861 if (src->ip6po_nexthop) { 2862 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len, 2863 M_IP6OPT, canwait); 2864 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) 2865 goto bad; 2866 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, 2867 src->ip6po_nexthop->sa_len); 2868 } 2869 PKTOPT_EXTHDRCPY(ip6po_hbh); 2870 PKTOPT_EXTHDRCPY(ip6po_dest1); 2871 PKTOPT_EXTHDRCPY(ip6po_dest2); 2872 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ 2873 return (0); 2874 2875 bad: 2876 ip6_clearpktopts(dst, -1); 2877 return (ENOBUFS); 2878} 2879#undef PKTOPT_EXTHDRCPY 2880 2881struct ip6_pktopts * 2882ip6_copypktopts(struct ip6_pktopts *src, int canwait) 2883{ 2884 int error; 2885 struct ip6_pktopts *dst; 2886 2887 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait); 2888 if (dst == NULL) 2889 return (NULL); 2890 ip6_initpktopts(dst); 2891 2892 if ((error = copypktopts(dst, src, canwait)) != 0) { 2893 FREE(dst, M_IP6OPT); 2894 return (NULL); 2895 } 2896 2897 return (dst); 2898} 2899 2900void 2901ip6_freepcbopts(struct ip6_pktopts *pktopt) 2902{ 2903 if (pktopt == NULL) 2904 return; 2905 2906 ip6_clearpktopts(pktopt, -1); 2907 2908 FREE(pktopt, M_IP6OPT); 2909} 2910 2911void 2912ip6_moptions_init(void) 2913{ 2914 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug)); 2915 2916 im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) : 2917 sizeof (struct ip6_moptions_dbg); 2918 2919 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0, 2920 IM6O_ZONE_NAME); 2921 if (im6o_zone == NULL) { 2922 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME); 2923 /* NOTREACHED */ 2924 } 2925 zone_change(im6o_zone, Z_EXPAND, TRUE); 2926} 2927 2928void 2929im6o_addref(struct ip6_moptions *im6o, int locked) 2930{ 2931 if (!locked) 2932 IM6O_LOCK(im6o); 2933 else 2934 IM6O_LOCK_ASSERT_HELD(im6o); 2935 2936 if (++im6o->im6o_refcnt == 0) { 2937 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o); 2938 /* NOTREACHED */ 2939 } else if (im6o->im6o_trace != NULL) { 2940 (*im6o->im6o_trace)(im6o, TRUE); 2941 } 2942 2943 if (!locked) 2944 IM6O_UNLOCK(im6o); 2945} 2946 2947void 2948im6o_remref(struct ip6_moptions *im6o) 2949{ 2950 int i; 2951 2952 IM6O_LOCK(im6o); 2953 if (im6o->im6o_refcnt == 0) { 2954 panic("%s: im6o %p negative refcnt", __func__, im6o); 2955 /* NOTREACHED */ 2956 } else if (im6o->im6o_trace != NULL) { 2957 (*im6o->im6o_trace)(im6o, FALSE); 2958 } 2959 2960 --im6o->im6o_refcnt; 2961 if (im6o->im6o_refcnt > 0) { 2962 IM6O_UNLOCK(im6o); 2963 return; 2964 } 2965 2966 for (i = 0; i < im6o->im6o_num_memberships; ++i) { 2967 struct in6_mfilter *imf; 2968 2969 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL; 2970 if (imf != NULL) 2971 im6f_leave(imf); 2972 2973 (void) in6_mc_leave(im6o->im6o_membership[i], imf); 2974 2975 if (imf != NULL) 2976 im6f_purge(imf); 2977 2978 IN6M_REMREF(im6o->im6o_membership[i]); 2979 im6o->im6o_membership[i] = NULL; 2980 } 2981 im6o->im6o_num_memberships = 0; 2982 if (im6o->im6o_mfilters != NULL) { 2983 FREE(im6o->im6o_mfilters, M_IN6MFILTER); 2984 im6o->im6o_mfilters = NULL; 2985 } 2986 if (im6o->im6o_membership != NULL) { 2987 FREE(im6o->im6o_membership, M_IP6MOPTS); 2988 im6o->im6o_membership = NULL; 2989 } 2990 IM6O_UNLOCK(im6o); 2991 2992 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp); 2993 2994 if (!(im6o->im6o_debug & IFD_ALLOC)) { 2995 panic("%s: im6o %p cannot be freed", __func__, im6o); 2996 /* NOTREACHED */ 2997 } 2998 zfree(im6o_zone, im6o); 2999} 3000 3001static void 3002im6o_trace(struct ip6_moptions *im6o, int refhold) 3003{ 3004 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o; 3005 ctrace_t *tr; 3006 u_int32_t idx; 3007 u_int16_t *cnt; 3008 3009 if (!(im6o->im6o_debug & IFD_DEBUG)) { 3010 panic("%s: im6o %p has no debug structure", __func__, im6o); 3011 /* NOTREACHED */ 3012 } 3013 if (refhold) { 3014 cnt = &im6o_dbg->im6o_refhold_cnt; 3015 tr = im6o_dbg->im6o_refhold; 3016 } else { 3017 cnt = &im6o_dbg->im6o_refrele_cnt; 3018 tr = im6o_dbg->im6o_refrele; 3019 } 3020 3021 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE; 3022 ctrace_record(&tr[idx]); 3023} 3024 3025struct ip6_moptions * 3026ip6_allocmoptions(int how) 3027{ 3028 struct ip6_moptions *im6o; 3029 3030 im6o = (how == M_WAITOK) ? 3031 zalloc(im6o_zone) : zalloc_noblock(im6o_zone); 3032 if (im6o != NULL) { 3033 bzero(im6o, im6o_size); 3034 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr); 3035 im6o->im6o_debug |= IFD_ALLOC; 3036 if (im6o_debug != 0) { 3037 im6o->im6o_debug |= IFD_DEBUG; 3038 im6o->im6o_trace = im6o_trace; 3039 } 3040 IM6O_ADDREF(im6o); 3041 } 3042 3043 return (im6o); 3044} 3045 3046/* 3047 * Set IPv6 outgoing packet options based on advanced API. 3048 */ 3049int 3050ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, 3051 struct ip6_pktopts *stickyopt, int uproto) 3052{ 3053 struct cmsghdr *cm = 0; 3054 3055 if (control == NULL || opt == NULL) 3056 return (EINVAL); 3057 3058 ip6_initpktopts(opt); 3059 if (stickyopt) { 3060 int error; 3061 3062 /* 3063 * If stickyopt is provided, make a local copy of the options 3064 * for this particular packet, then override them by ancillary 3065 * objects. 3066 * XXX: copypktopts() does not copy the cached route to a next 3067 * hop (if any). This is not very good in terms of efficiency, 3068 * but we can allow this since this option should be rarely 3069 * used. 3070 */ 3071 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) 3072 return (error); 3073 } 3074 3075 /* 3076 * XXX: Currently, we assume all the optional information is stored 3077 * in a single mbuf. 3078 */ 3079 if (control->m_next) 3080 return (EINVAL); 3081 3082 if (control->m_len < CMSG_LEN(0)) 3083 return (EINVAL); 3084 3085 for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) { 3086 int error; 3087 3088 if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len) 3089 return (EINVAL); 3090 if (cm->cmsg_level != IPPROTO_IPV6) 3091 continue; 3092 3093 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), 3094 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto); 3095 if (error) 3096 return (error); 3097 } 3098 3099 return (0); 3100} 3101/* 3102 * Set a particular packet option, as a sticky option or an ancillary data 3103 * item. "len" can be 0 only when it's a sticky option. 3104 * We have 4 cases of combination of "sticky" and "cmsg": 3105 * "sticky=0, cmsg=0": impossible 3106 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data 3107 * "sticky=1, cmsg=0": RFC3542 socket option 3108 * "sticky=1, cmsg=1": RFC2292 socket option 3109 */ 3110static int 3111ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, 3112 int sticky, int cmsg, int uproto) 3113{ 3114 int minmtupolicy, preftemp; 3115 int error; 3116 3117 if (!sticky && !cmsg) { 3118#ifdef DIAGNOSTIC 3119 printf("ip6_setpktopt: impossible case\n"); 3120#endif 3121 return (EINVAL); 3122 } 3123 3124 /* 3125 * Caller must have ensured that the buffer is at least 3126 * aligned on 32-bit boundary. 3127 */ 3128 VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t))); 3129 3130 /* 3131 * IPV6_2292xxx is for backward compatibility to RFC2292, and should 3132 * not be specified in the context of RFC3542. Conversely, 3133 * RFC3542 types should not be specified in the context of RFC2292. 3134 */ 3135 if (!cmsg) { 3136 switch (optname) { 3137 case IPV6_2292PKTINFO: 3138 case IPV6_2292HOPLIMIT: 3139 case IPV6_2292NEXTHOP: 3140 case IPV6_2292HOPOPTS: 3141 case IPV6_2292DSTOPTS: 3142 case IPV6_2292RTHDR: 3143 case IPV6_2292PKTOPTIONS: 3144 return (ENOPROTOOPT); 3145 } 3146 } 3147 if (sticky && cmsg) { 3148 switch (optname) { 3149 case IPV6_PKTINFO: 3150 case IPV6_HOPLIMIT: 3151 case IPV6_NEXTHOP: 3152 case IPV6_HOPOPTS: 3153 case IPV6_DSTOPTS: 3154 case IPV6_RTHDRDSTOPTS: 3155 case IPV6_RTHDR: 3156 case IPV6_USE_MIN_MTU: 3157 case IPV6_DONTFRAG: 3158 case IPV6_TCLASS: 3159 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ 3160 return (ENOPROTOOPT); 3161 } 3162 } 3163 3164 switch (optname) { 3165 case IPV6_2292PKTINFO: 3166 case IPV6_PKTINFO: 3167 { 3168 struct ifnet *ifp = NULL; 3169 struct in6_pktinfo *pktinfo; 3170 3171 if (len != sizeof(struct in6_pktinfo)) 3172 return (EINVAL); 3173 3174 pktinfo = (struct in6_pktinfo *)(void *)buf; 3175 3176 /* 3177 * An application can clear any sticky IPV6_PKTINFO option by 3178 * doing a "regular" setsockopt with ipi6_addr being 3179 * in6addr_any and ipi6_ifindex being zero. 3180 * [RFC 3542, Section 6] 3181 */ 3182 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && 3183 pktinfo->ipi6_ifindex == 0 && 3184 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 3185 ip6_clearpktopts(opt, optname); 3186 break; 3187 } 3188 3189 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && 3190 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 3191 return (EINVAL); 3192 } 3193 3194 /* validate the interface index if specified. */ 3195 ifnet_head_lock_shared(); 3196 3197 if (pktinfo->ipi6_ifindex > if_index) { 3198 ifnet_head_done(); 3199 return (ENXIO); 3200 } 3201 3202 if (pktinfo->ipi6_ifindex) { 3203 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex]; 3204 if (ifp == NULL) { 3205 ifnet_head_done(); 3206 return (ENXIO); 3207 } 3208 } 3209 3210 ifnet_head_done(); 3211 3212 /* 3213 * We store the address anyway, and let in6_selectsrc() 3214 * validate the specified address. This is because ipi6_addr 3215 * may not have enough information about its scope zone, and 3216 * we may need additional information (such as outgoing 3217 * interface or the scope zone of a destination address) to 3218 * disambiguate the scope. 3219 * XXX: the delay of the validation may confuse the 3220 * application when it is used as a sticky option. 3221 */ 3222 if (opt->ip6po_pktinfo == NULL) { 3223 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo), 3224 M_IP6OPT, M_NOWAIT); 3225 if (opt->ip6po_pktinfo == NULL) 3226 return (ENOBUFS); 3227 } 3228 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); 3229 break; 3230 } 3231 3232 case IPV6_2292HOPLIMIT: 3233 case IPV6_HOPLIMIT: 3234 { 3235 int *hlimp; 3236 3237 /* 3238 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT 3239 * to simplify the ordering among hoplimit options. 3240 */ 3241 if (optname == IPV6_HOPLIMIT && sticky) 3242 return (ENOPROTOOPT); 3243 3244 if (len != sizeof(int)) 3245 return (EINVAL); 3246 hlimp = (int *)(void *)buf; 3247 if (*hlimp < -1 || *hlimp > 255) 3248 return (EINVAL); 3249 3250 opt->ip6po_hlim = *hlimp; 3251 break; 3252 } 3253 3254 case IPV6_TCLASS: 3255 { 3256 int tclass; 3257 3258 if (len != sizeof(int)) 3259 return (EINVAL); 3260 tclass = *(int *)(void *)buf; 3261 if (tclass < -1 || tclass > 255) 3262 return (EINVAL); 3263 3264 opt->ip6po_tclass = tclass; 3265 break; 3266 } 3267 3268 case IPV6_2292NEXTHOP: 3269 case IPV6_NEXTHOP: 3270 error = suser(kauth_cred_get(), 0); 3271 if (error) 3272 return (EACCES); 3273 3274 if (len == 0) { /* just remove the option */ 3275 ip6_clearpktopts(opt, IPV6_NEXTHOP); 3276 break; 3277 } 3278 3279 /* check if cmsg_len is large enough for sa_len */ 3280 if (len < sizeof(struct sockaddr) || len < *buf) 3281 return (EINVAL); 3282 3283 switch (((struct sockaddr *)buf)->sa_family) { 3284 case AF_INET6: 3285 { 3286 struct sockaddr_in6 *sa6 = 3287 (struct sockaddr_in6 *)(void *)buf; 3288 3289 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) 3290 return (EINVAL); 3291 3292 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || 3293 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { 3294 return (EINVAL); 3295 } 3296 if ((error = sa6_embedscope(sa6, ip6_use_defzone)) 3297 != 0) { 3298 return (error); 3299 } 3300 break; 3301 } 3302 case AF_LINK: /* should eventually be supported */ 3303 default: 3304 return (EAFNOSUPPORT); 3305 } 3306 3307 /* turn off the previous option, then set the new option. */ 3308 ip6_clearpktopts(opt, IPV6_NEXTHOP); 3309 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT); 3310 if (opt->ip6po_nexthop == NULL) 3311 return (ENOBUFS); 3312 bcopy(buf, opt->ip6po_nexthop, *buf); 3313 break; 3314 3315 case IPV6_2292HOPOPTS: 3316 case IPV6_HOPOPTS: 3317 { 3318 struct ip6_hbh *hbh; 3319 int hbhlen; 3320 3321 /* 3322 * XXX: We don't allow a non-privileged user to set ANY HbH 3323 * options, since per-option restriction has too much 3324 * overhead. 3325 */ 3326 error = suser(kauth_cred_get(), 0); 3327 if (error) 3328 return (EACCES); 3329 3330 if (len == 0) { 3331 ip6_clearpktopts(opt, IPV6_HOPOPTS); 3332 break; /* just remove the option */ 3333 } 3334 3335 /* message length validation */ 3336 if (len < sizeof(struct ip6_hbh)) 3337 return (EINVAL); 3338 hbh = (struct ip6_hbh *)(void *)buf; 3339 hbhlen = (hbh->ip6h_len + 1) << 3; 3340 if (len != hbhlen) 3341 return (EINVAL); 3342 3343 /* turn off the previous option, then set the new option. */ 3344 ip6_clearpktopts(opt, IPV6_HOPOPTS); 3345 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT); 3346 if (opt->ip6po_hbh == NULL) 3347 return (ENOBUFS); 3348 bcopy(hbh, opt->ip6po_hbh, hbhlen); 3349 3350 break; 3351 } 3352 3353 case IPV6_2292DSTOPTS: 3354 case IPV6_DSTOPTS: 3355 case IPV6_RTHDRDSTOPTS: 3356 { 3357 struct ip6_dest *dest, **newdest = NULL; 3358 int destlen; 3359 3360 error = suser(kauth_cred_get(), 0); 3361 if (error) 3362 return (EACCES); 3363 3364 if (len == 0) { 3365 ip6_clearpktopts(opt, optname); 3366 break; /* just remove the option */ 3367 } 3368 3369 /* message length validation */ 3370 if (len < sizeof(struct ip6_dest)) 3371 return (EINVAL); 3372 dest = (struct ip6_dest *)(void *)buf; 3373 destlen = (dest->ip6d_len + 1) << 3; 3374 if (len != destlen) 3375 return (EINVAL); 3376 3377 /* 3378 * Determine the position that the destination options header 3379 * should be inserted; before or after the routing header. 3380 */ 3381 switch (optname) { 3382 case IPV6_2292DSTOPTS: 3383 /* 3384 * The old advacned API is ambiguous on this point. 3385 * Our approach is to determine the position based 3386 * according to the existence of a routing header. 3387 * Note, however, that this depends on the order of the 3388 * extension headers in the ancillary data; the 1st 3389 * part of the destination options header must appear 3390 * before the routing header in the ancillary data, 3391 * too. 3392 * RFC3542 solved the ambiguity by introducing 3393 * separate ancillary data or option types. 3394 */ 3395 if (opt->ip6po_rthdr == NULL) 3396 newdest = &opt->ip6po_dest1; 3397 else 3398 newdest = &opt->ip6po_dest2; 3399 break; 3400 case IPV6_RTHDRDSTOPTS: 3401 newdest = &opt->ip6po_dest1; 3402 break; 3403 case IPV6_DSTOPTS: 3404 newdest = &opt->ip6po_dest2; 3405 break; 3406 } 3407 3408 /* turn off the previous option, then set the new option. */ 3409 ip6_clearpktopts(opt, optname); 3410 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT); 3411 if (*newdest == NULL) 3412 return (ENOBUFS); 3413 bcopy(dest, *newdest, destlen); 3414 3415 break; 3416 } 3417 3418 case IPV6_2292RTHDR: 3419 case IPV6_RTHDR: 3420 { 3421 struct ip6_rthdr *rth; 3422 int rthlen; 3423 3424 if (len == 0) { 3425 ip6_clearpktopts(opt, IPV6_RTHDR); 3426 break; /* just remove the option */ 3427 } 3428 3429 /* message length validation */ 3430 if (len < sizeof(struct ip6_rthdr)) 3431 return (EINVAL); 3432 rth = (struct ip6_rthdr *)(void *)buf; 3433 rthlen = (rth->ip6r_len + 1) << 3; 3434 if (len != rthlen) 3435 return (EINVAL); 3436 3437 switch (rth->ip6r_type) { 3438 case IPV6_RTHDR_TYPE_0: 3439 if (rth->ip6r_len == 0) /* must contain one addr */ 3440 return (EINVAL); 3441 if (rth->ip6r_len % 2) /* length must be even */ 3442 return (EINVAL); 3443 if (rth->ip6r_len / 2 != rth->ip6r_segleft) 3444 return (EINVAL); 3445 break; 3446 default: 3447 return (EINVAL); /* not supported */ 3448 } 3449 3450 /* turn off the previous option */ 3451 ip6_clearpktopts(opt, IPV6_RTHDR); 3452 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT); 3453 if (opt->ip6po_rthdr == NULL) 3454 return (ENOBUFS); 3455 bcopy(rth, opt->ip6po_rthdr, rthlen); 3456 3457 break; 3458 } 3459 3460 case IPV6_USE_MIN_MTU: 3461 if (len != sizeof(int)) 3462 return (EINVAL); 3463 minmtupolicy = *(int *)(void *)buf; 3464 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && 3465 minmtupolicy != IP6PO_MINMTU_DISABLE && 3466 minmtupolicy != IP6PO_MINMTU_ALL) { 3467 return (EINVAL); 3468 } 3469 opt->ip6po_minmtu = minmtupolicy; 3470 break; 3471 3472 case IPV6_DONTFRAG: 3473 if (len != sizeof(int)) 3474 return (EINVAL); 3475 3476 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) { 3477 /* 3478 * we ignore this option for TCP sockets. 3479 * (RFC3542 leaves this case unspecified.) 3480 */ 3481 opt->ip6po_flags &= ~IP6PO_DONTFRAG; 3482 } else 3483 opt->ip6po_flags |= IP6PO_DONTFRAG; 3484 break; 3485 3486 case IPV6_PREFER_TEMPADDR: 3487 if (len != sizeof(int)) 3488 return (EINVAL); 3489 preftemp = *(int *)(void *)buf; 3490 if (preftemp != IP6PO_TEMPADDR_SYSTEM && 3491 preftemp != IP6PO_TEMPADDR_NOTPREFER && 3492 preftemp != IP6PO_TEMPADDR_PREFER) { 3493 return (EINVAL); 3494 } 3495 opt->ip6po_prefer_tempaddr = preftemp; 3496 break; 3497 3498 default: 3499 return (ENOPROTOOPT); 3500 } /* end of switch */ 3501 3502 return (0); 3503} 3504 3505/* 3506 * Routine called from ip6_output() to loop back a copy of an IP6 multicast 3507 * packet to the input queue of a specified interface. Note that this 3508 * calls the output routine of the loopback "driver", but with an interface 3509 * pointer that might NOT be &loif -- easier than replicating that code here. 3510 */ 3511void 3512ip6_mloopback( 3513 struct ifnet *ifp, 3514 struct mbuf *m, 3515 struct sockaddr_in6 *dst) 3516{ 3517 struct mbuf *copym; 3518 struct ip6_hdr *ip6; 3519 3520 copym = m_copy(m, 0, M_COPYALL); 3521 if (copym == NULL) 3522 return; 3523 3524 /* 3525 * Make sure to deep-copy IPv6 header portion in case the data 3526 * is in an mbuf cluster, so that we can safely override the IPv6 3527 * header portion later. 3528 */ 3529 if ((copym->m_flags & M_EXT) != 0 || 3530 copym->m_len < sizeof(struct ip6_hdr)) { 3531 copym = m_pullup(copym, sizeof(struct ip6_hdr)); 3532 if (copym == NULL) 3533 return; 3534 } 3535 3536#if DIAGNOSTIC 3537 if (copym->m_len < sizeof(*ip6)) { 3538 m_freem(copym); 3539 return; 3540 } 3541#endif 3542 3543 ip6 = mtod(copym, struct ip6_hdr *); 3544 /* 3545 * clear embedded scope identifiers if necessary. 3546 * in6_clearscope will touch the addresses only when necessary. 3547 */ 3548 in6_clearscope(&ip6->ip6_src); 3549 in6_clearscope(&ip6->ip6_dst); 3550 3551#ifdef __APPLE__ 3552 3553 /* Makes sure the HW checksum flags are cleaned before sending the packet */ 3554 3555 if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) { 3556 in6_delayed_cksum(copym, sizeof(struct ip6_hdr)); 3557 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; 3558 } 3559 copym->m_pkthdr.rcvif = 0; 3560 copym->m_pkthdr.csum_data = 0; 3561 copym->m_pkthdr.csum_flags = 0; 3562 3563 if (lo_ifp) { 3564 copym->m_pkthdr.rcvif = ifp; 3565 dlil_output(lo_ifp, PF_INET6, copym, 0, 3566 (struct sockaddr *)dst, 0, NULL); 3567 } else 3568 m_free(copym); 3569#else 3570 (void)if_simloop(ifp, copym, dst->sin6_family, NULL); 3571#endif 3572} 3573 3574/* 3575 * Chop IPv6 header off from the payload. 3576 */ 3577static int 3578ip6_splithdr(m, exthdrs) 3579 struct mbuf *m; 3580 struct ip6_exthdrs *exthdrs; 3581{ 3582 struct mbuf *mh; 3583 struct ip6_hdr *ip6; 3584 3585 ip6 = mtod(m, struct ip6_hdr *); 3586 if (m->m_len > sizeof(*ip6)) { 3587 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */ 3588 if (mh == 0) { 3589 m_freem(m); 3590 return ENOBUFS; 3591 } 3592 M_COPY_PKTHDR(mh, m); 3593 MH_ALIGN(mh, sizeof(*ip6)); 3594 m->m_flags &= ~M_PKTHDR; 3595 m->m_len -= sizeof(*ip6); 3596 m->m_data += sizeof(*ip6); 3597 mh->m_next = m; 3598 m = mh; 3599 m->m_len = sizeof(*ip6); 3600 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); 3601 } 3602 exthdrs->ip6e_ip6 = m; 3603 return 0; 3604} 3605 3606/* 3607 * Compute IPv6 extension header length. 3608 */ 3609int 3610ip6_optlen(in6p) 3611 struct in6pcb *in6p; 3612{ 3613 int len; 3614 3615 if (!in6p->in6p_outputopts) 3616 return 0; 3617 3618 len = 0; 3619#define elen(x) \ 3620 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) 3621 3622 len += elen(in6p->in6p_outputopts->ip6po_hbh); 3623 if (in6p->in6p_outputopts->ip6po_rthdr) 3624 /* dest1 is valid with rthdr only */ 3625 len += elen(in6p->in6p_outputopts->ip6po_dest1); 3626 len += elen(in6p->in6p_outputopts->ip6po_rthdr); 3627 len += elen(in6p->in6p_outputopts->ip6po_dest2); 3628 return len; 3629#undef elen 3630} 3631