1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1988, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 61 */ 62/* 63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 64 * support for mandatory and extensible security protections. This notice 65 * is included in support of clause 2.2 (b) of the Apple Public License, 66 * Version 2.0. 67 */ 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/kernel.h> 72#include <sys/malloc.h> 73#include <sys/mbuf.h> 74#include <sys/mcache.h> 75#include <sys/proc.h> 76#include <sys/domain.h> 77#include <sys/protosw.h> 78#include <sys/socket.h> 79#include <sys/socketvar.h> 80#include <sys/sysctl.h> 81#include <libkern/OSAtomic.h> 82#include <kern/zalloc.h> 83 84#include <pexpert/pexpert.h> 85 86#include <net/if.h> 87#include <net/route.h> 88 89#define _IP_VHL 90#include <netinet/in.h> 91#include <netinet/in_systm.h> 92#include <netinet/ip.h> 93#include <netinet/in_pcb.h> 94#include <netinet/in_var.h> 95#include <netinet/ip_var.h> 96#include <netinet/ip_mroute.h> 97 98#if INET6 99#include <netinet6/in6_pcb.h> 100#endif /* INET6 */ 101 102#include <netinet/ip_fw.h> 103 104#if IPSEC 105#include <netinet6/ipsec.h> 106#endif /*IPSEC*/ 107 108#if DUMMYNET 109#include <netinet/ip_dummynet.h> 110#endif 111 112#if CONFIG_MACF_NET 113#include <security/mac_framework.h> 114#endif /* MAC_NET */ 115 116int load_ipfw(void); 117int rip_detach(struct socket *); 118int rip_abort(struct socket *); 119int rip_disconnect(struct socket *); 120int rip_bind(struct socket *, struct sockaddr *, struct proc *); 121int rip_connect(struct socket *, struct sockaddr *, struct proc *); 122int rip_shutdown(struct socket *); 123 124#if IPSEC 125extern int ipsec_bypass; 126#endif 127 128struct inpcbhead ripcb; 129struct inpcbinfo ripcbinfo; 130 131/* control hooks for ipfw and dummynet */ 132#if IPFIREWALL 133ip_fw_ctl_t *ip_fw_ctl_ptr; 134#endif /* IPFIREWALL */ 135#if DUMMYNET 136ip_dn_ctl_t *ip_dn_ctl_ptr; 137#endif /* DUMMYNET */ 138 139/* 140 * Nominal space allocated to a raw ip socket. 141 */ 142#define RIPSNDQ 8192 143#define RIPRCVQ 8192 144 145/* 146 * Raw interface to IP protocol. 147 */ 148 149/* 150 * Initialize raw connection block q. 151 */ 152void 153rip_init() 154{ 155 struct inpcbinfo *pcbinfo; 156 157 LIST_INIT(&ripcb); 158 ripcbinfo.listhead = &ripcb; 159 /* 160 * XXX We don't use the hash list for raw IP, but it's easier 161 * to allocate a one entry hash list than it is to check all 162 * over the place for hashbase == NULL. 163 */ 164 ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); 165 ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); 166 167 ripcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb), 168 (4096 * sizeof(struct inpcb)), 169 4096, "ripzone"); 170 171 pcbinfo = &ripcbinfo; 172 /* 173 * allocate lock group attribute and group for udp pcb mutexes 174 */ 175 pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); 176 177 pcbinfo->mtx_grp = lck_grp_alloc_init("ripcb", pcbinfo->mtx_grp_attr); 178 179 /* 180 * allocate the lock attribute for udp pcb mutexes 181 */ 182 pcbinfo->mtx_attr = lck_attr_alloc_init(); 183 184 if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) 185 return; /* pretty much dead if this fails... */ 186 187} 188 189static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET , 0, {0}, {0,0,0,0,0,0,0,0,} }; 190/* 191 * Setup generic address and protocol structures 192 * for raw_input routine, then pass them along with 193 * mbuf chain. 194 */ 195void 196rip_input(m, iphlen) 197 struct mbuf *m; 198 int iphlen; 199{ 200 register struct ip *ip = mtod(m, struct ip *); 201 register struct inpcb *inp; 202 struct inpcb *last = 0; 203 struct mbuf *opts = 0; 204 int skipit = 0, ret = 0; 205 206 /* Expect 32-bit aligned data pointer on strict-align platforms */ 207 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); 208 209 ripsrc.sin_addr = ip->ip_src; 210 lck_rw_lock_shared(ripcbinfo.mtx); 211 LIST_FOREACH(inp, &ripcb, inp_list) { 212#if INET6 213 if ((inp->inp_vflag & INP_IPV4) == 0) 214 continue; 215#endif 216 if (inp->inp_ip_p && (inp->inp_ip_p != ip->ip_p)) 217 continue; 218 if (inp->inp_laddr.s_addr && 219 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 220 continue; 221 if (inp->inp_faddr.s_addr && 222 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 223 continue; 224 if (last) { 225 struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); 226 227 skipit = 0; 228#if IPSEC 229 /* check AH/ESP integrity. */ 230 if (ipsec_bypass == 0 && n) { 231 if (ipsec4_in_reject_so(n, last->inp_socket)) { 232 m_freem(n); 233 IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); 234 /* do not inject data to pcb */ 235 skipit = 1; 236 } 237 } 238#endif /*IPSEC*/ 239#if CONFIG_MACF_NET 240 if (n && skipit == 0) { 241 if (mac_inpcb_check_deliver(last, n, AF_INET, 242 SOCK_RAW) != 0) { 243 m_freem(n); 244 skipit = 1; 245 } 246 } 247#endif 248 if (n && skipit == 0) { 249 int error = 0; 250 if ((last->inp_flags & INP_CONTROLOPTS) != 0 || 251 (last->inp_socket->so_options & SO_TIMESTAMP) != 0 || 252 (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { 253 ret = ip_savecontrol(last, &opts, ip, n); 254 if (ret != 0) { 255 m_freem(n); 256 m_freem(opts); 257 last = inp; 258 continue; 259 } 260 } 261 if (last->inp_flags & INP_STRIPHDR) { 262 n->m_len -= iphlen; 263 n->m_pkthdr.len -= iphlen; 264 n->m_data += iphlen; 265 } 266 so_recv_data_stat(last->inp_socket, m, 0); 267 if (sbappendaddr(&last->inp_socket->so_rcv, 268 (struct sockaddr *)&ripsrc, n, 269 opts, &error) != 0) { 270 sorwakeup(last->inp_socket); 271 } else { 272 if (error) { 273 /* should notify about lost packet */ 274 kprintf("rip_input can't append to socket\n"); 275 } 276 } 277 opts = 0; 278 } 279 } 280 last = inp; 281 } 282 283 skipit = 0; 284#if IPSEC 285 /* check AH/ESP integrity. */ 286 if (ipsec_bypass == 0 && last) { 287 if (ipsec4_in_reject_so(m, last->inp_socket)) { 288 m_freem(m); 289 IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); 290 OSAddAtomic(1, &ipstat.ips_delivered); 291 /* do not inject data to pcb */ 292 skipit = 1; 293 } 294 } 295#endif /*IPSEC*/ 296#if CONFIG_MACF_NET 297 if (last && skipit == 0) { 298 if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0) { 299 skipit = 1; 300 m_freem(m); 301 } 302 } 303#endif 304 if (skipit == 0) { 305 if (last) { 306 if ((last->inp_flags & INP_CONTROLOPTS) != 0 || 307 (last->inp_socket->so_options & SO_TIMESTAMP) != 0 || 308 (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { 309 ret = ip_savecontrol(last, &opts, ip, m); 310 if (ret != 0) { 311 m_freem(m); 312 m_freem(opts); 313 goto unlock; 314 } 315 } 316 if (last->inp_flags & INP_STRIPHDR) { 317 m->m_len -= iphlen; 318 m->m_pkthdr.len -= iphlen; 319 m->m_data += iphlen; 320 } 321 so_recv_data_stat(last->inp_socket, m, 0); 322 if (sbappendaddr(&last->inp_socket->so_rcv, 323 (struct sockaddr *)&ripsrc, m, opts, NULL) != 0) { 324 sorwakeup(last->inp_socket); 325 } else { 326 kprintf("rip_input(2) can't append to socket\n"); 327 } 328 } else { 329 m_freem(m); 330 OSAddAtomic(1, &ipstat.ips_noproto); 331 OSAddAtomic(-1, &ipstat.ips_delivered); 332 } 333 } 334unlock: 335 /* 336 * Keep the list locked because socket filter may force the socket lock 337 * to be released when calling sbappendaddr() -- see rdar://7627704 338 */ 339 lck_rw_done(ripcbinfo.mtx); 340} 341 342/* 343 * Generate IP header and pass packet to ip_output. 344 * Tack on options user may have setup with control call. 345 */ 346int 347rip_output( 348 struct mbuf *m, 349 struct socket *so, 350 u_int32_t dst, 351 struct mbuf *control) 352{ 353 register struct ip *ip; 354 register struct inpcb *inp = sotoinpcb(so); 355 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; 356 struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; 357 struct ip_moptions *imo; 358 int error = 0; 359 mbuf_svc_class_t msc = MBUF_SC_UNSPEC; 360 361 if (control != NULL) { 362 msc = mbuf_service_class_from_control(control); 363 364 m_freem(control); 365 } 366 367 flags |= IP_OUTARGS; 368 /* If socket was bound to an ifindex, tell ip_output about it */ 369 if (inp->inp_flags & INP_BOUND_IF) { 370 ipoa.ipoa_boundif = inp->inp_boundifp->if_index; 371 ipoa.ipoa_flags |= IPOAF_BOUND_IF; 372 } 373 if (inp->inp_flags & INP_NO_IFT_CELLULAR) 374 ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; 375 376 if (inp->inp_flowhash == 0) 377 inp->inp_flowhash = inp_calc_flowhash(inp); 378 379 /* 380 * If the user handed us a complete IP packet, use it. 381 * Otherwise, allocate an mbuf for a header and fill it in. 382 */ 383 if ((inp->inp_flags & INP_HDRINCL) == 0) { 384 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { 385 m_freem(m); 386 return(EMSGSIZE); 387 } 388 M_PREPEND(m, sizeof(struct ip), M_WAIT); 389 if (m == NULL) 390 return ENOBUFS; 391 ip = mtod(m, struct ip *); 392 ip->ip_tos = inp->inp_ip_tos; 393 ip->ip_off = 0; 394 ip->ip_p = inp->inp_ip_p; 395 ip->ip_len = m->m_pkthdr.len; 396 ip->ip_src = inp->inp_laddr; 397 ip->ip_dst.s_addr = dst; 398 ip->ip_ttl = inp->inp_ip_ttl; 399 } else { 400 if (m->m_pkthdr.len > IP_MAXPACKET) { 401 m_freem(m); 402 return(EMSGSIZE); 403 } 404 ip = mtod(m, struct ip *); 405 /* don't allow both user specified and setsockopt options, 406 and don't allow packet length sizes that will crash */ 407 if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2)) 408 && inp->inp_options) 409 || (ip->ip_len > m->m_pkthdr.len) 410 || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) { 411 m_freem(m); 412 return EINVAL; 413 } 414 if (ip->ip_id == 0) 415#if RANDOM_IP_ID 416 ip->ip_id = ip_randomid(); 417#else 418 ip->ip_id = htons(ip_id++); 419#endif 420 /* XXX prevent ip_output from overwriting header fields */ 421 flags |= IP_RAWOUTPUT; 422 OSAddAtomic(1, &ipstat.ips_rawout); 423 } 424 425 if (inp->inp_laddr.s_addr != INADDR_ANY) 426 ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR; 427 428#if IPSEC 429 if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) { 430 m_freem(m); 431 return ENOBUFS; 432 } 433#endif /*IPSEC*/ 434 435 if (inp->inp_route.ro_rt != NULL && 436 inp->inp_route.ro_rt->generation_id != route_generation) { 437 rtfree(inp->inp_route.ro_rt); 438 inp->inp_route.ro_rt = NULL; 439 } 440 441 set_packet_service_class(m, so, msc, 0); 442 m->m_pkthdr.m_flowhash = inp->inp_flowhash; 443 m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH; 444 445#if CONFIG_MACF_NET 446 mac_mbuf_label_associate_inpcb(inp, m); 447#endif 448 449 imo = inp->inp_moptions; 450 if (imo != NULL) 451 IMO_ADDREF(imo); 452 /* 453 * The domain lock is held across ip_output, so it is okay 454 * to pass the PCB cached route pointer directly to IP and 455 * the modules beneath it. 456 */ 457 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 458 imo, &ipoa); 459 460 if (imo != NULL) 461 IMO_REMREF(imo); 462 463 if (inp->inp_route.ro_rt != NULL) { 464 struct rtentry *rt = inp->inp_route.ro_rt; 465 struct ifnet *outif; 466 467 if ((rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) || 468 inp->inp_socket == NULL || 469 !(inp->inp_socket->so_state & SS_ISCONNECTED)) { 470 rt = NULL; /* unusable */ 471 } 472 /* 473 * Always discard the cached route for unconnected 474 * socket or if it is a multicast route. 475 */ 476 if (rt == NULL) { 477 rtfree(inp->inp_route.ro_rt); 478 inp->inp_route.ro_rt = NULL; 479 } 480 /* 481 * If this is a connected socket and the destination 482 * route is unicast, update outif with that of the 483 * route interface used by IP. 484 */ 485 if (rt != NULL && (outif = rt->rt_ifp) != inp->inp_last_outifp) 486 inp->inp_last_outifp = outif; 487 } 488 489 return (error); 490} 491 492#if IPFIREWALL 493int 494load_ipfw(void) 495{ 496 kern_return_t err; 497 498 ipfw_init(); 499 500#if DUMMYNET 501 if (!DUMMYNET_LOADED) 502 ip_dn_init(); 503#endif /* DUMMYNET */ 504 err = 0; 505 506 return err == 0 && ip_fw_ctl_ptr == NULL ? -1 : err; 507} 508#endif /* IPFIREWALL */ 509 510/* 511 * Raw IP socket option processing. 512 */ 513int 514rip_ctloutput(so, sopt) 515 struct socket *so; 516 struct sockopt *sopt; 517{ 518 struct inpcb *inp = sotoinpcb(so); 519 int error, optval; 520 521 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */ 522 if (sopt->sopt_level != IPPROTO_IP && 523 !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH)) 524 return (EINVAL); 525 526 error = 0; 527 528 switch (sopt->sopt_dir) { 529 case SOPT_GET: 530 switch (sopt->sopt_name) { 531 case IP_HDRINCL: 532 optval = inp->inp_flags & INP_HDRINCL; 533 error = sooptcopyout(sopt, &optval, sizeof optval); 534 break; 535 536 case IP_STRIPHDR: 537 optval = inp->inp_flags & INP_STRIPHDR; 538 error = sooptcopyout(sopt, &optval, sizeof optval); 539 break; 540 541#if IPFIREWALL 542 case IP_FW_ADD: 543 case IP_FW_GET: 544 case IP_OLD_FW_ADD: 545 case IP_OLD_FW_GET: 546 if (ip_fw_ctl_ptr == 0) 547 error = load_ipfw(); 548 if (ip_fw_ctl_ptr && error == 0) 549 error = ip_fw_ctl_ptr(sopt); 550 else 551 error = ENOPROTOOPT; 552 break; 553#endif /* IPFIREWALL */ 554 555#if DUMMYNET 556 case IP_DUMMYNET_GET: 557 if (!DUMMYNET_LOADED) 558 ip_dn_init(); 559 if (DUMMYNET_LOADED) 560 error = ip_dn_ctl_ptr(sopt); 561 else 562 error = ENOPROTOOPT; 563 break ; 564#endif /* DUMMYNET */ 565 566#if MROUTING 567 case MRT_INIT: 568 case MRT_DONE: 569 case MRT_ADD_VIF: 570 case MRT_DEL_VIF: 571 case MRT_ADD_MFC: 572 case MRT_DEL_MFC: 573 case MRT_VERSION: 574 case MRT_ASSERT: 575 error = ip_mrouter_get(so, sopt); 576 break; 577#endif /* MROUTING */ 578 579 default: 580 error = ip_ctloutput(so, sopt); 581 break; 582 } 583 break; 584 585 case SOPT_SET: 586 switch (sopt->sopt_name) { 587 case IP_HDRINCL: 588 error = sooptcopyin(sopt, &optval, sizeof optval, 589 sizeof optval); 590 if (error) 591 break; 592 if (optval) 593 inp->inp_flags |= INP_HDRINCL; 594 else 595 inp->inp_flags &= ~INP_HDRINCL; 596 break; 597 598 case IP_STRIPHDR: 599 error = sooptcopyin(sopt, &optval, sizeof optval, 600 sizeof optval); 601 if (error) 602 break; 603 if (optval) 604 inp->inp_flags |= INP_STRIPHDR; 605 else 606 inp->inp_flags &= ~INP_STRIPHDR; 607 break; 608 609#if IPFIREWALL 610 case IP_FW_ADD: 611 case IP_FW_DEL: 612 case IP_FW_FLUSH: 613 case IP_FW_ZERO: 614 case IP_FW_RESETLOG: 615 case IP_OLD_FW_ADD: 616 case IP_OLD_FW_DEL: 617 case IP_OLD_FW_FLUSH: 618 case IP_OLD_FW_ZERO: 619 case IP_OLD_FW_RESETLOG: 620 if (ip_fw_ctl_ptr == 0) 621 error = load_ipfw(); 622 if (ip_fw_ctl_ptr && error == 0) 623 error = ip_fw_ctl_ptr(sopt); 624 else 625 error = ENOPROTOOPT; 626 break; 627#endif /* IPFIREWALL */ 628 629#if DUMMYNET 630 case IP_DUMMYNET_CONFIGURE: 631 case IP_DUMMYNET_DEL: 632 case IP_DUMMYNET_FLUSH: 633 if (!DUMMYNET_LOADED) 634 ip_dn_init(); 635 if (DUMMYNET_LOADED) 636 error = ip_dn_ctl_ptr(sopt); 637 else 638 error = ENOPROTOOPT ; 639 break ; 640#endif 641 642#if MROUTING 643 case IP_RSVP_ON: 644 error = ip_rsvp_init(so); 645 break; 646 647 case IP_RSVP_OFF: 648 error = ip_rsvp_done(); 649 break; 650 651 /* XXX - should be combined */ 652 case IP_RSVP_VIF_ON: 653 error = ip_rsvp_vif_init(so, sopt); 654 break; 655 656 case IP_RSVP_VIF_OFF: 657 error = ip_rsvp_vif_done(so, sopt); 658 break; 659 660 case MRT_INIT: 661 case MRT_DONE: 662 case MRT_ADD_VIF: 663 case MRT_DEL_VIF: 664 case MRT_ADD_MFC: 665 case MRT_DEL_MFC: 666 case MRT_VERSION: 667 case MRT_ASSERT: 668 error = ip_mrouter_set(so, sopt); 669 break; 670#endif /* MROUTING */ 671 672 case SO_FLUSH: 673 if ((error = sooptcopyin(sopt, &optval, sizeof (optval), 674 sizeof (optval))) != 0) 675 break; 676 677 error = inp_flush(inp, optval); 678 break; 679 680 default: 681 error = ip_ctloutput(so, sopt); 682 break; 683 } 684 break; 685 } 686 687 return (error); 688} 689 690/* 691 * This function exists solely to receive the PRC_IFDOWN messages which 692 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, 693 * and calls in_ifadown() to remove all routes corresponding to that address. 694 * It also receives the PRC_IFUP messages from if_up() and reinstalls the 695 * interface routes. 696 */ 697void 698rip_ctlinput( 699 int cmd, 700 struct sockaddr *sa, 701 __unused void *vip) 702{ 703 struct in_ifaddr *ia; 704 struct ifnet *ifp; 705 int err; 706 int flags, done = 0; 707 708 switch (cmd) { 709 case PRC_IFDOWN: 710 lck_rw_lock_shared(in_ifaddr_rwlock); 711 for (ia = in_ifaddrhead.tqh_first; ia; 712 ia = ia->ia_link.tqe_next) { 713 IFA_LOCK(&ia->ia_ifa); 714 if (ia->ia_ifa.ifa_addr == sa && 715 (ia->ia_flags & IFA_ROUTE)) { 716 done = 1; 717 IFA_ADDREF_LOCKED(&ia->ia_ifa); 718 IFA_UNLOCK(&ia->ia_ifa); 719 lck_rw_done(in_ifaddr_rwlock); 720 lck_mtx_lock(rnh_lock); 721 /* 722 * in_ifscrub kills the interface route. 723 */ 724 in_ifscrub(ia->ia_ifp, ia, 1); 725 /* 726 * in_ifadown gets rid of all the rest of 727 * the routes. This is not quite the right 728 * thing to do, but at least if we are running 729 * a routing process they will come back. 730 */ 731 in_ifadown(&ia->ia_ifa, 1); 732 lck_mtx_unlock(rnh_lock); 733 IFA_REMREF(&ia->ia_ifa); 734 break; 735 } 736 IFA_UNLOCK(&ia->ia_ifa); 737 } 738 if (!done) 739 lck_rw_done(in_ifaddr_rwlock); 740 break; 741 742 case PRC_IFUP: 743 lck_rw_lock_shared(in_ifaddr_rwlock); 744 for (ia = in_ifaddrhead.tqh_first; ia; 745 ia = ia->ia_link.tqe_next) { 746 IFA_LOCK(&ia->ia_ifa); 747 if (ia->ia_ifa.ifa_addr == sa) { 748 /* keep it locked */ 749 break; 750 } 751 IFA_UNLOCK(&ia->ia_ifa); 752 } 753 if (ia == NULL || (ia->ia_flags & IFA_ROUTE) || 754 (ia->ia_ifa.ifa_debug & IFD_NOTREADY)) { 755 if (ia != NULL) 756 IFA_UNLOCK(&ia->ia_ifa); 757 lck_rw_done(in_ifaddr_rwlock); 758 return; 759 } 760 IFA_ADDREF_LOCKED(&ia->ia_ifa); 761 IFA_UNLOCK(&ia->ia_ifa); 762 lck_rw_done(in_ifaddr_rwlock); 763 764 flags = RTF_UP; 765 ifp = ia->ia_ifa.ifa_ifp; 766 767 if ((ifp->if_flags & IFF_LOOPBACK) 768 || (ifp->if_flags & IFF_POINTOPOINT)) 769 flags |= RTF_HOST; 770 771 err = rtinit(&ia->ia_ifa, RTM_ADD, flags); 772 if (err == 0) { 773 IFA_LOCK_SPIN(&ia->ia_ifa); 774 ia->ia_flags |= IFA_ROUTE; 775 IFA_UNLOCK(&ia->ia_ifa); 776 } 777 IFA_REMREF(&ia->ia_ifa); 778 break; 779 } 780} 781 782u_int32_t rip_sendspace = RIPSNDQ; 783u_int32_t rip_recvspace = RIPRCVQ; 784 785SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED, 786 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); 787SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, 788 &rip_recvspace, 0, "Maximum incoming raw IP datagram size"); 789 790static int 791rip_attach(struct socket *so, int proto, struct proc *p) 792{ 793 struct inpcb *inp; 794 int error; 795 796 inp = sotoinpcb(so); 797 if (inp) 798 panic("rip_attach"); 799 if ((so->so_state & SS_PRIV) == 0) 800 return (EPERM); 801 802 error = soreserve(so, rip_sendspace, rip_recvspace); 803 if (error) 804 return error; 805 error = in_pcballoc(so, &ripcbinfo, p); 806 if (error) 807 return error; 808 inp = (struct inpcb *)so->so_pcb; 809 inp->inp_vflag |= INP_IPV4; 810 inp->inp_ip_p = proto; 811 inp->inp_ip_ttl = ip_defttl; 812 return 0; 813} 814 815__private_extern__ int 816rip_detach(struct socket *so) 817{ 818 struct inpcb *inp; 819 820 inp = sotoinpcb(so); 821 if (inp == 0) 822 panic("rip_detach"); 823#if MROUTING 824 if (so == ip_mrouter) 825 ip_mrouter_done(); 826 ip_rsvp_force_done(so); 827 if (so == ip_rsvpd) 828 ip_rsvp_done(); 829#endif /* MROUTING */ 830 in_pcbdetach(inp); 831 return 0; 832} 833 834__private_extern__ int 835rip_abort(struct socket *so) 836{ 837 soisdisconnected(so); 838 return rip_detach(so); 839} 840 841__private_extern__ int 842rip_disconnect(struct socket *so) 843{ 844 if ((so->so_state & SS_ISCONNECTED) == 0) 845 return ENOTCONN; 846 return rip_abort(so); 847} 848 849__private_extern__ int 850rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) 851{ 852 struct inpcb *inp = sotoinpcb(so); 853 struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam; 854 struct ifaddr *ifa = NULL; 855 struct ifnet *outif = NULL; 856 857 if (nam->sa_len != sizeof(*addr)) 858 return EINVAL; 859 860 if (TAILQ_EMPTY(&ifnet_head) || ((addr->sin_family != AF_INET) && 861 (addr->sin_family != AF_IMPLINK)) || 862 (addr->sin_addr.s_addr && 863 (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)) { 864 return EADDRNOTAVAIL; 865 } 866 else if (ifa) { 867 IFA_LOCK(ifa); 868 outif = ifa->ifa_ifp; 869 IFA_UNLOCK(ifa); 870 IFA_REMREF(ifa); 871 } 872 inp->inp_laddr = addr->sin_addr; 873 inp->inp_last_outifp = outif; 874 return 0; 875} 876 877__private_extern__ int 878rip_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) 879{ 880 struct inpcb *inp = sotoinpcb(so); 881 struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam; 882 883 if (nam->sa_len != sizeof(*addr)) 884 return EINVAL; 885 if (TAILQ_EMPTY(&ifnet_head)) 886 return EADDRNOTAVAIL; 887 if ((addr->sin_family != AF_INET) && 888 (addr->sin_family != AF_IMPLINK)) 889 return EAFNOSUPPORT; 890 inp->inp_faddr = addr->sin_addr; 891 soisconnected(so); 892 893 return 0; 894} 895 896__private_extern__ int 897rip_shutdown(struct socket *so) 898{ 899 socantsendmore(so); 900 return 0; 901} 902 903__private_extern__ int 904rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam, 905 struct mbuf *control, __unused struct proc *p) 906{ 907 struct inpcb *inp = sotoinpcb(so); 908 register u_int32_t dst; 909 910 if (so->so_state & SS_ISCONNECTED) { 911 if (nam) { 912 m_freem(m); 913 return EISCONN; 914 } 915 dst = inp->inp_faddr.s_addr; 916 } else { 917 if (nam == NULL) { 918 m_freem(m); 919 return ENOTCONN; 920 } 921 dst = ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr; 922 } 923 return rip_output(m, so, dst, control); 924} 925 926/* note: rip_unlock is called from different protos instead of the generic socket_unlock, 927 * it will handle the socket dealloc on last reference 928 * */ 929int 930rip_unlock(struct socket *so, int refcount, void *debug) 931{ 932 void *lr_saved; 933 struct inpcb *inp = sotoinpcb(so); 934 935 if (debug == NULL) 936 lr_saved = __builtin_return_address(0); 937 else 938 lr_saved = debug; 939 940 if (refcount) { 941 if (so->so_usecount <= 0) { 942 panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n", 943 so, so->so_usecount, solockhistory_nr(so)); 944 /* NOTREACHED */ 945 } 946 so->so_usecount--; 947 if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { 948 /* cleanup after last reference */ 949 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); 950 lck_rw_lock_exclusive(ripcbinfo.mtx); 951 if (inp->inp_state != INPCB_STATE_DEAD) { 952#if INET6 953 if (INP_CHECK_SOCKAF(so, AF_INET6)) 954 in6_pcbdetach(inp); 955 else 956#endif /* INET6 */ 957 in_pcbdetach(inp); 958 } 959 in_pcbdispose(inp); 960 lck_rw_done(ripcbinfo.mtx); 961 return(0); 962 } 963 } 964 so->unlock_lr[so->next_unlock_lr] = lr_saved; 965 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; 966 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); 967 return(0); 968} 969 970static int 971rip_pcblist SYSCTL_HANDLER_ARGS 972{ 973#pragma unused(oidp, arg1, arg2) 974 int error, i, n; 975 struct inpcb *inp, **inp_list; 976 inp_gen_t gencnt; 977 struct xinpgen xig; 978 979 /* 980 * The process of preparing the TCB list is too time-consuming and 981 * resource-intensive to repeat twice on every request. 982 */ 983 lck_rw_lock_exclusive(ripcbinfo.mtx); 984 if (req->oldptr == USER_ADDR_NULL) { 985 n = ripcbinfo.ipi_count; 986 req->oldidx = 2 * (sizeof xig) 987 + (n + n/8) * sizeof(struct xinpcb); 988 lck_rw_done(ripcbinfo.mtx); 989 return 0; 990 } 991 992 if (req->newptr != USER_ADDR_NULL) { 993 lck_rw_done(ripcbinfo.mtx); 994 return EPERM; 995 } 996 997 /* 998 * OK, now we're committed to doing something. 999 */ 1000 gencnt = ripcbinfo.ipi_gencnt; 1001 n = ripcbinfo.ipi_count; 1002 1003 bzero(&xig, sizeof(xig)); 1004 xig.xig_len = sizeof xig; 1005 xig.xig_count = n; 1006 xig.xig_gen = gencnt; 1007 xig.xig_sogen = so_gencnt; 1008 error = SYSCTL_OUT(req, &xig, sizeof xig); 1009 if (error) { 1010 lck_rw_done(ripcbinfo.mtx); 1011 return error; 1012 } 1013 /* 1014 * We are done if there is no pcb 1015 */ 1016 if (n == 0) { 1017 lck_rw_done(ripcbinfo.mtx); 1018 return 0; 1019 } 1020 1021 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); 1022 if (inp_list == 0) { 1023 lck_rw_done(ripcbinfo.mtx); 1024 return ENOMEM; 1025 } 1026 1027 for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; 1028 inp = inp->inp_list.le_next) { 1029 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) 1030 inp_list[i++] = inp; 1031 } 1032 n = i; 1033 1034 error = 0; 1035 for (i = 0; i < n; i++) { 1036 inp = inp_list[i]; 1037 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { 1038 struct xinpcb xi; 1039 1040 bzero(&xi, sizeof(xi)); 1041 xi.xi_len = sizeof xi; 1042 /* XXX should avoid extra copy */ 1043 inpcb_to_compat(inp, &xi.xi_inp); 1044 if (inp->inp_socket) 1045 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1046 error = SYSCTL_OUT(req, &xi, sizeof xi); 1047 } 1048 } 1049 if (!error) { 1050 /* 1051 * Give the user an updated idea of our state. 1052 * If the generation differs from what we told 1053 * her before, she knows that something happened 1054 * while we were processing this request, and it 1055 * might be necessary to retry. 1056 */ 1057 bzero(&xig, sizeof(xig)); 1058 xig.xig_len = sizeof xig; 1059 xig.xig_gen = ripcbinfo.ipi_gencnt; 1060 xig.xig_sogen = so_gencnt; 1061 xig.xig_count = ripcbinfo.ipi_count; 1062 error = SYSCTL_OUT(req, &xig, sizeof xig); 1063 } 1064 FREE(inp_list, M_TEMP); 1065 lck_rw_done(ripcbinfo.mtx); 1066 return error; 1067} 1068 1069SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, 1070 rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); 1071 1072#if !CONFIG_EMBEDDED 1073 1074static int 1075rip_pcblist64 SYSCTL_HANDLER_ARGS 1076{ 1077#pragma unused(oidp, arg1, arg2) 1078 int error, i, n; 1079 struct inpcb *inp, **inp_list; 1080 inp_gen_t gencnt; 1081 struct xinpgen xig; 1082 1083 /* 1084 * The process of preparing the TCB list is too time-consuming and 1085 * resource-intensive to repeat twice on every request. 1086 */ 1087 lck_rw_lock_exclusive(ripcbinfo.mtx); 1088 if (req->oldptr == USER_ADDR_NULL) { 1089 n = ripcbinfo.ipi_count; 1090 req->oldidx = 2 * (sizeof xig) 1091 + (n + n/8) * sizeof(struct xinpcb64); 1092 lck_rw_done(ripcbinfo.mtx); 1093 return 0; 1094 } 1095 1096 if (req->newptr != USER_ADDR_NULL) { 1097 lck_rw_done(ripcbinfo.mtx); 1098 return EPERM; 1099 } 1100 1101 /* 1102 * OK, now we're committed to doing something. 1103 */ 1104 gencnt = ripcbinfo.ipi_gencnt; 1105 n = ripcbinfo.ipi_count; 1106 1107 bzero(&xig, sizeof(xig)); 1108 xig.xig_len = sizeof xig; 1109 xig.xig_count = n; 1110 xig.xig_gen = gencnt; 1111 xig.xig_sogen = so_gencnt; 1112 error = SYSCTL_OUT(req, &xig, sizeof xig); 1113 if (error) { 1114 lck_rw_done(ripcbinfo.mtx); 1115 return error; 1116 } 1117 /* 1118 * We are done if there is no pcb 1119 */ 1120 if (n == 0) { 1121 lck_rw_done(ripcbinfo.mtx); 1122 return 0; 1123 } 1124 1125 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); 1126 if (inp_list == 0) { 1127 lck_rw_done(ripcbinfo.mtx); 1128 return ENOMEM; 1129 } 1130 1131 for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; 1132 inp = inp->inp_list.le_next) { 1133 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) 1134 inp_list[i++] = inp; 1135 } 1136 n = i; 1137 1138 error = 0; 1139 for (i = 0; i < n; i++) { 1140 inp = inp_list[i]; 1141 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { 1142 struct xinpcb64 xi; 1143 1144 bzero(&xi, sizeof(xi)); 1145 xi.xi_len = sizeof xi; 1146 inpcb_to_xinpcb64(inp, &xi); 1147 if (inp->inp_socket) 1148 sotoxsocket64(inp->inp_socket, &xi.xi_socket); 1149 error = SYSCTL_OUT(req, &xi, sizeof xi); 1150 } 1151 } 1152 if (!error) { 1153 /* 1154 * Give the user an updated idea of our state. 1155 * If the generation differs from what we told 1156 * her before, she knows that something happened 1157 * while we were processing this request, and it 1158 * might be necessary to retry. 1159 */ 1160 bzero(&xig, sizeof(xig)); 1161 xig.xig_len = sizeof xig; 1162 xig.xig_gen = ripcbinfo.ipi_gencnt; 1163 xig.xig_sogen = so_gencnt; 1164 xig.xig_count = ripcbinfo.ipi_count; 1165 error = SYSCTL_OUT(req, &xig, sizeof xig); 1166 } 1167 FREE(inp_list, M_TEMP); 1168 lck_rw_done(ripcbinfo.mtx); 1169 return error; 1170} 1171 1172SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, 1173 rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets"); 1174 1175#endif /* !CONFIG_EMBEDDED */ 1176 1177 1178static int 1179rip_pcblist_n SYSCTL_HANDLER_ARGS 1180{ 1181#pragma unused(oidp, arg1, arg2) 1182 int error = 0; 1183 1184 error = get_pcblist_n(IPPROTO_IP, req, &ripcbinfo); 1185 1186 return error; 1187} 1188 1189SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, 1190 rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets"); 1191 1192struct pr_usrreqs rip_usrreqs = { 1193 rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, 1194 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, 1195 pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, 1196 pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, 1197 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp 1198}; 1199/* DSEP Review Done pl-20051213-v02 @3253 */ 1200