raw_ip.c revision 128019
1/* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 30 * $FreeBSD: head/sys/netinet/raw_ip.c 128019 2004-04-07 20:46:16Z imp $ 31 */ 32 33#include "opt_inet6.h" 34#include "opt_ipsec.h" 35#include "opt_mac.h" 36#include "opt_random_ip_id.h" 37 38#include <sys/param.h> 39#include <sys/kernel.h> 40#include <sys/lock.h> 41#include <sys/mac.h> 42#include <sys/malloc.h> 43#include <sys/mbuf.h> 44#include <sys/proc.h> 45#include <sys/protosw.h> 46#include <sys/signalvar.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/sx.h> 50#include <sys/sysctl.h> 51#include <sys/systm.h> 52 53#include <vm/uma.h> 54 55#include <net/if.h> 56#include <net/route.h> 57 58#include <netinet/in.h> 59#include <netinet/in_systm.h> 60#include <netinet/in_pcb.h> 61#include <netinet/in_var.h> 62#include <netinet/ip.h> 63#include <netinet/ip_var.h> 64#include <netinet/ip_mroute.h> 65 66#include <netinet/ip_fw.h> 67#include <netinet/ip_dummynet.h> 68 69#ifdef FAST_IPSEC 70#include <netipsec/ipsec.h> 71#endif /*FAST_IPSEC*/ 72 73#ifdef IPSEC 74#include <netinet6/ipsec.h> 75#endif /*IPSEC*/ 76 77struct inpcbhead ripcb; 78struct inpcbinfo ripcbinfo; 79 80/* control hooks for ipfw and dummynet */ 81ip_fw_ctl_t *ip_fw_ctl_ptr; 82ip_dn_ctl_t *ip_dn_ctl_ptr; 83 84/* 85 * hooks for multicast routing. They all default to NULL, 86 * so leave them not initialized and rely on BSS being set to 0. 87 */ 88 89/* The socket used to communicate with the multicast routing daemon. */ 90struct socket *ip_mrouter; 91 92/* The various mrouter and rsvp functions */ 93int (*ip_mrouter_set)(struct socket *, struct sockopt *); 94int (*ip_mrouter_get)(struct socket *, struct sockopt *); 95int (*ip_mrouter_done)(void); 96int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 97 struct ip_moptions *); 98int (*mrt_ioctl)(int, caddr_t); 99int (*legal_vif_num)(int); 100u_long (*ip_mcast_src)(int); 101 102void (*rsvp_input_p)(struct mbuf *m, int off); 103int (*ip_rsvp_vif)(struct socket *, struct sockopt *); 104void (*ip_rsvp_force_done)(struct socket *); 105 106/* 107 * Nominal space allocated to a raw ip socket. 108 */ 109#define RIPSNDQ 8192 110#define RIPRCVQ 8192 111 112/* 113 * Raw interface to IP protocol. 114 */ 115 116/* 117 * Initialize raw connection block q. 118 */ 119void 120rip_init() 121{ 122 INP_INFO_LOCK_INIT(&ripcbinfo, "rip"); 123 LIST_INIT(&ripcb); 124 ripcbinfo.listhead = &ripcb; 125 /* 126 * XXX We don't use the hash list for raw IP, but it's easier 127 * to allocate a one entry hash list than it is to check all 128 * over the place for hashbase == NULL. 129 */ 130 ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); 131 ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); 132 ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), 133 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 134 uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets); 135} 136 137static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 138 139static int 140raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n) 141{ 142 int policyfail = 0; 143 144#if defined(IPSEC) || defined(FAST_IPSEC) 145 /* check AH/ESP integrity. */ 146 if (ipsec4_in_reject(n, last)) { 147 policyfail = 1; 148#ifdef IPSEC 149 ipsecstat.in_polvio++; 150#endif /*IPSEC*/ 151 /* do not inject data to pcb */ 152 } 153#endif /*IPSEC || FAST_IPSEC*/ 154#ifdef MAC 155 if (!policyfail && mac_check_inpcb_deliver(last, n) != 0) 156 policyfail = 1; 157#endif 158 if (!policyfail) { 159 struct mbuf *opts = NULL; 160 161 if ((last->inp_flags & INP_CONTROLOPTS) || 162 (last->inp_socket->so_options & SO_TIMESTAMP)) 163 ip_savecontrol(last, &opts, ip, n); 164 if (sbappendaddr(&last->inp_socket->so_rcv, 165 (struct sockaddr *)&ripsrc, n, opts) == 0) { 166 /* should notify about lost packet */ 167 m_freem(n); 168 if (opts) 169 m_freem(opts); 170 } else 171 sorwakeup(last->inp_socket); 172 } else 173 m_freem(n); 174 return policyfail; 175} 176 177/* 178 * Setup generic address and protocol structures 179 * for raw_input routine, then pass them along with 180 * mbuf chain. 181 */ 182void 183rip_input(struct mbuf *m, int off) 184{ 185 struct ip *ip = mtod(m, struct ip *); 186 int proto = ip->ip_p; 187 struct inpcb *inp, *last; 188 189 INP_INFO_RLOCK(&ripcbinfo); 190 ripsrc.sin_addr = ip->ip_src; 191 last = NULL; 192 LIST_FOREACH(inp, &ripcb, inp_list) { 193 INP_LOCK(inp); 194 if (inp->inp_ip_p && inp->inp_ip_p != proto) { 195 docontinue: 196 INP_UNLOCK(inp); 197 continue; 198 } 199#ifdef INET6 200 if ((inp->inp_vflag & INP_IPV4) == 0) 201 goto docontinue; 202#endif 203 if (inp->inp_laddr.s_addr && 204 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 205 goto docontinue; 206 if (inp->inp_faddr.s_addr && 207 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 208 goto docontinue; 209 if (last) { 210 struct mbuf *n; 211 212 n = m_copy(m, 0, (int)M_COPYALL); 213 if (n != NULL) 214 (void) raw_append(last, ip, n); 215 /* XXX count dropped packet */ 216 INP_UNLOCK(last); 217 } 218 last = inp; 219 } 220 if (last != NULL) { 221 if (raw_append(last, ip, m) != 0) 222 ipstat.ips_delivered--; 223 INP_UNLOCK(last); 224 } else { 225 m_freem(m); 226 ipstat.ips_noproto++; 227 ipstat.ips_delivered--; 228 } 229 INP_INFO_RUNLOCK(&ripcbinfo); 230} 231 232/* 233 * Generate IP header and pass packet to ip_output. 234 * Tack on options user may have setup with control call. 235 */ 236int 237rip_output(struct mbuf *m, struct socket *so, u_long dst) 238{ 239 struct ip *ip; 240 struct inpcb *inp = sotoinpcb(so); 241 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; 242 243#ifdef MAC 244 mac_create_mbuf_from_socket(so, m); 245#endif 246 247 /* 248 * If the user handed us a complete IP packet, use it. 249 * Otherwise, allocate an mbuf for a header and fill it in. 250 */ 251 if ((inp->inp_flags & INP_HDRINCL) == 0) { 252 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { 253 m_freem(m); 254 return(EMSGSIZE); 255 } 256 M_PREPEND(m, sizeof(struct ip), M_TRYWAIT); 257 if (m == NULL) 258 return(ENOBUFS); 259 ip = mtod(m, struct ip *); 260 ip->ip_tos = inp->inp_ip_tos; 261 ip->ip_off = 0; 262 ip->ip_p = inp->inp_ip_p; 263 ip->ip_len = m->m_pkthdr.len; 264 ip->ip_src = inp->inp_laddr; 265 ip->ip_dst.s_addr = dst; 266 ip->ip_ttl = inp->inp_ip_ttl; 267 } else { 268 if (m->m_pkthdr.len > IP_MAXPACKET) { 269 m_freem(m); 270 return(EMSGSIZE); 271 } 272 ip = mtod(m, struct ip *); 273 /* don't allow both user specified and setsockopt options, 274 and don't allow packet length sizes that will crash */ 275 if (((ip->ip_hl != (sizeof (*ip) >> 2)) 276 && inp->inp_options) 277 || (ip->ip_len > m->m_pkthdr.len) 278 || (ip->ip_len < (ip->ip_hl << 2))) { 279 m_freem(m); 280 return EINVAL; 281 } 282 if (ip->ip_id == 0) 283#ifdef RANDOM_IP_ID 284 ip->ip_id = ip_randomid(); 285#else 286 ip->ip_id = htons(ip_id++); 287#endif 288 /* XXX prevent ip_output from overwriting header fields */ 289 flags |= IP_RAWOUTPUT; 290 ipstat.ips_rawout++; 291 } 292 293 if (inp->inp_flags & INP_ONESBCAST) 294 flags |= IP_SENDONES; 295 296 return (ip_output(m, inp->inp_options, NULL, flags, 297 inp->inp_moptions, inp)); 298} 299 300/* 301 * Raw IP socket option processing. 302 * 303 * Note that access to all of the IP administrative functions here is 304 * implicitly protected by suser() as gaining access to a raw socket 305 * requires either that the thread pass a suser() check, or that it be 306 * passed a raw socket by another thread that has passed a suser() check. 307 * If FreeBSD moves to a more fine-grained access control mechanism, 308 * additional checks will need to be placed here if the raw IP attachment 309 * check is not equivilent the the check required for these 310 * administrative operations; in some cases, these checks are already 311 * present. 312 */ 313int 314rip_ctloutput(struct socket *so, struct sockopt *sopt) 315{ 316 struct inpcb *inp = sotoinpcb(so); 317 int error, optval; 318 319 if (sopt->sopt_level != IPPROTO_IP) 320 return (EINVAL); 321 322 error = 0; 323 324 switch (sopt->sopt_dir) { 325 case SOPT_GET: 326 switch (sopt->sopt_name) { 327 case IP_HDRINCL: 328 optval = inp->inp_flags & INP_HDRINCL; 329 error = sooptcopyout(sopt, &optval, sizeof optval); 330 break; 331 332 case IP_FW_ADD: /* ADD actually returns the body... */ 333 case IP_FW_GET: 334 if (IPFW_LOADED) 335 error = ip_fw_ctl_ptr(sopt); 336 else 337 error = ENOPROTOOPT; 338 break; 339 340 case IP_DUMMYNET_GET: 341 if (DUMMYNET_LOADED) 342 error = ip_dn_ctl_ptr(sopt); 343 else 344 error = ENOPROTOOPT; 345 break ; 346 347 case MRT_INIT: 348 case MRT_DONE: 349 case MRT_ADD_VIF: 350 case MRT_DEL_VIF: 351 case MRT_ADD_MFC: 352 case MRT_DEL_MFC: 353 case MRT_VERSION: 354 case MRT_ASSERT: 355 case MRT_API_SUPPORT: 356 case MRT_API_CONFIG: 357 case MRT_ADD_BW_UPCALL: 358 case MRT_DEL_BW_UPCALL: 359 error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : 360 EOPNOTSUPP; 361 break; 362 363 default: 364 error = ip_ctloutput(so, sopt); 365 break; 366 } 367 break; 368 369 case SOPT_SET: 370 switch (sopt->sopt_name) { 371 case IP_HDRINCL: 372 error = sooptcopyin(sopt, &optval, sizeof optval, 373 sizeof optval); 374 if (error) 375 break; 376 if (optval) 377 inp->inp_flags |= INP_HDRINCL; 378 else 379 inp->inp_flags &= ~INP_HDRINCL; 380 break; 381 382 case IP_FW_ADD: 383 case IP_FW_DEL: 384 case IP_FW_FLUSH: 385 case IP_FW_ZERO: 386 case IP_FW_RESETLOG: 387 if (IPFW_LOADED) 388 error = ip_fw_ctl_ptr(sopt); 389 else 390 error = ENOPROTOOPT; 391 break; 392 393 case IP_DUMMYNET_CONFIGURE: 394 case IP_DUMMYNET_DEL: 395 case IP_DUMMYNET_FLUSH: 396 if (DUMMYNET_LOADED) 397 error = ip_dn_ctl_ptr(sopt); 398 else 399 error = ENOPROTOOPT ; 400 break ; 401 402 case IP_RSVP_ON: 403 error = ip_rsvp_init(so); 404 break; 405 406 case IP_RSVP_OFF: 407 error = ip_rsvp_done(); 408 break; 409 410 case IP_RSVP_VIF_ON: 411 case IP_RSVP_VIF_OFF: 412 error = ip_rsvp_vif ? 413 ip_rsvp_vif(so, sopt) : EINVAL; 414 break; 415 416 case MRT_INIT: 417 case MRT_DONE: 418 case MRT_ADD_VIF: 419 case MRT_DEL_VIF: 420 case MRT_ADD_MFC: 421 case MRT_DEL_MFC: 422 case MRT_VERSION: 423 case MRT_ASSERT: 424 case MRT_API_SUPPORT: 425 case MRT_API_CONFIG: 426 case MRT_ADD_BW_UPCALL: 427 case MRT_DEL_BW_UPCALL: 428 error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : 429 EOPNOTSUPP; 430 break; 431 432 default: 433 error = ip_ctloutput(so, sopt); 434 break; 435 } 436 break; 437 } 438 439 return (error); 440} 441 442/* 443 * This function exists solely to receive the PRC_IFDOWN messages which 444 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, 445 * and calls in_ifadown() to remove all routes corresponding to that address. 446 * It also receives the PRC_IFUP messages from if_up() and reinstalls the 447 * interface routes. 448 */ 449void 450rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) 451{ 452 struct in_ifaddr *ia; 453 struct ifnet *ifp; 454 int err; 455 int flags; 456 457 switch (cmd) { 458 case PRC_IFDOWN: 459 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 460 if (ia->ia_ifa.ifa_addr == sa 461 && (ia->ia_flags & IFA_ROUTE)) { 462 /* 463 * in_ifscrub kills the interface route. 464 */ 465 in_ifscrub(ia->ia_ifp, ia); 466 /* 467 * in_ifadown gets rid of all the rest of 468 * the routes. This is not quite the right 469 * thing to do, but at least if we are running 470 * a routing process they will come back. 471 */ 472 in_ifadown(&ia->ia_ifa, 0); 473 break; 474 } 475 } 476 break; 477 478 case PRC_IFUP: 479 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 480 if (ia->ia_ifa.ifa_addr == sa) 481 break; 482 } 483 if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) 484 return; 485 flags = RTF_UP; 486 ifp = ia->ia_ifa.ifa_ifp; 487 488 if ((ifp->if_flags & IFF_LOOPBACK) 489 || (ifp->if_flags & IFF_POINTOPOINT)) 490 flags |= RTF_HOST; 491 492 err = rtinit(&ia->ia_ifa, RTM_ADD, flags); 493 if (err == 0) 494 ia->ia_flags |= IFA_ROUTE; 495 break; 496 } 497} 498 499u_long rip_sendspace = RIPSNDQ; 500u_long rip_recvspace = RIPRCVQ; 501 502SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, 503 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); 504SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, 505 &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); 506 507static int 508rip_attach(struct socket *so, int proto, struct thread *td) 509{ 510 struct inpcb *inp; 511 int error; 512 513 /* XXX why not lower? */ 514 INP_INFO_WLOCK(&ripcbinfo); 515 inp = sotoinpcb(so); 516 if (inp) { 517 /* XXX counter, printf */ 518 INP_INFO_WUNLOCK(&ripcbinfo); 519 return EINVAL; 520 } 521 if (td && (error = suser(td)) != 0) { 522 INP_INFO_WUNLOCK(&ripcbinfo); 523 return error; 524 } 525 if (proto >= IPPROTO_MAX || proto < 0) { 526 INP_INFO_WUNLOCK(&ripcbinfo); 527 return EPROTONOSUPPORT; 528 } 529 530 error = soreserve(so, rip_sendspace, rip_recvspace); 531 if (error) { 532 INP_INFO_WUNLOCK(&ripcbinfo); 533 return error; 534 } 535 error = in_pcballoc(so, &ripcbinfo, "rawinp"); 536 if (error) { 537 INP_INFO_WUNLOCK(&ripcbinfo); 538 return error; 539 } 540 inp = (struct inpcb *)so->so_pcb; 541 INP_LOCK(inp); 542 INP_INFO_WUNLOCK(&ripcbinfo); 543 inp->inp_vflag |= INP_IPV4; 544 inp->inp_ip_p = proto; 545 inp->inp_ip_ttl = ip_defttl; 546 INP_UNLOCK(inp); 547 return 0; 548} 549 550static void 551rip_pcbdetach(struct socket *so, struct inpcb *inp) 552{ 553 INP_INFO_WLOCK_ASSERT(&ripcbinfo); 554 INP_LOCK_ASSERT(inp); 555 556 if (so == ip_mrouter && ip_mrouter_done) 557 ip_mrouter_done(); 558 if (ip_rsvp_force_done) 559 ip_rsvp_force_done(so); 560 if (so == ip_rsvpd) 561 ip_rsvp_done(); 562 in_pcbdetach(inp); 563} 564 565static int 566rip_detach(struct socket *so) 567{ 568 struct inpcb *inp; 569 570 INP_INFO_WLOCK(&ripcbinfo); 571 inp = sotoinpcb(so); 572 if (inp == 0) { 573 /* XXX counter, printf */ 574 INP_INFO_WUNLOCK(&ripcbinfo); 575 return EINVAL; 576 } 577 INP_LOCK(inp); 578 rip_pcbdetach(so, inp); 579 INP_INFO_WUNLOCK(&ripcbinfo); 580 return 0; 581} 582 583static int 584rip_abort(struct socket *so) 585{ 586 struct inpcb *inp; 587 588 INP_INFO_WLOCK(&ripcbinfo); 589 inp = sotoinpcb(so); 590 if (inp == 0) { 591 INP_INFO_WUNLOCK(&ripcbinfo); 592 return EINVAL; /* ??? possible? panic instead? */ 593 } 594 INP_LOCK(inp); 595 soisdisconnected(so); 596 if (so->so_state & SS_NOFDREF) 597 rip_pcbdetach(so, inp); 598 else 599 INP_UNLOCK(inp); 600 INP_INFO_WUNLOCK(&ripcbinfo); 601 return 0; 602} 603 604static int 605rip_disconnect(struct socket *so) 606{ 607 if ((so->so_state & SS_ISCONNECTED) == 0) 608 return ENOTCONN; 609 return rip_abort(so); 610} 611 612static int 613rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 614{ 615 struct sockaddr_in *addr = (struct sockaddr_in *)nam; 616 struct inpcb *inp; 617 618 if (nam->sa_len != sizeof(*addr)) 619 return EINVAL; 620 621 if (TAILQ_EMPTY(&ifnet) || 622 (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || 623 (addr->sin_addr.s_addr && 624 ifa_ifwithaddr((struct sockaddr *)addr) == 0)) 625 return EADDRNOTAVAIL; 626 627 INP_INFO_WLOCK(&ripcbinfo); 628 inp = sotoinpcb(so); 629 if (inp == 0) { 630 INP_INFO_WUNLOCK(&ripcbinfo); 631 return EINVAL; 632 } 633 INP_LOCK(inp); 634 inp->inp_laddr = addr->sin_addr; 635 INP_UNLOCK(inp); 636 INP_INFO_WUNLOCK(&ripcbinfo); 637 return 0; 638} 639 640static int 641rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 642{ 643 struct sockaddr_in *addr = (struct sockaddr_in *)nam; 644 struct inpcb *inp; 645 646 if (nam->sa_len != sizeof(*addr)) 647 return EINVAL; 648 if (TAILQ_EMPTY(&ifnet)) 649 return EADDRNOTAVAIL; 650 if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) 651 return EAFNOSUPPORT; 652 653 INP_INFO_WLOCK(&ripcbinfo); 654 inp = sotoinpcb(so); 655 if (inp == 0) { 656 INP_INFO_WUNLOCK(&ripcbinfo); 657 return EINVAL; 658 } 659 INP_LOCK(inp); 660 inp->inp_faddr = addr->sin_addr; 661 soisconnected(so); 662 INP_UNLOCK(inp); 663 INP_INFO_WUNLOCK(&ripcbinfo); 664 return 0; 665} 666 667static int 668rip_shutdown(struct socket *so) 669{ 670 struct inpcb *inp; 671 672 INP_INFO_RLOCK(&ripcbinfo); 673 inp = sotoinpcb(so); 674 if (inp == 0) { 675 INP_INFO_RUNLOCK(&ripcbinfo); 676 return EINVAL; 677 } 678 INP_LOCK(inp); 679 INP_INFO_RUNLOCK(&ripcbinfo); 680 socantsendmore(so); 681 INP_UNLOCK(inp); 682 return 0; 683} 684 685static int 686rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 687 struct mbuf *control, struct thread *td) 688{ 689 struct inpcb *inp; 690 u_long dst; 691 int ret; 692 693 INP_INFO_WLOCK(&ripcbinfo); 694 inp = sotoinpcb(so); 695 if (so->so_state & SS_ISCONNECTED) { 696 if (nam) { 697 INP_INFO_WUNLOCK(&ripcbinfo); 698 m_freem(m); 699 return EISCONN; 700 } 701 dst = inp->inp_faddr.s_addr; 702 } else { 703 if (nam == NULL) { 704 INP_INFO_WUNLOCK(&ripcbinfo); 705 m_freem(m); 706 return ENOTCONN; 707 } 708 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; 709 } 710 INP_LOCK(inp); 711 ret = rip_output(m, so, dst); 712 INP_UNLOCK(inp); 713 INP_INFO_WUNLOCK(&ripcbinfo); 714 return ret; 715} 716 717static int 718rip_pcblist(SYSCTL_HANDLER_ARGS) 719{ 720 int error, i, n; 721 struct inpcb *inp, **inp_list; 722 inp_gen_t gencnt; 723 struct xinpgen xig; 724 725 /* 726 * The process of preparing the TCB list is too time-consuming and 727 * resource-intensive to repeat twice on every request. 728 */ 729 if (req->oldptr == 0) { 730 n = ripcbinfo.ipi_count; 731 req->oldidx = 2 * (sizeof xig) 732 + (n + n/8) * sizeof(struct xinpcb); 733 return 0; 734 } 735 736 if (req->newptr != 0) 737 return EPERM; 738 739 /* 740 * OK, now we're committed to doing something. 741 */ 742 INP_INFO_RLOCK(&ripcbinfo); 743 gencnt = ripcbinfo.ipi_gencnt; 744 n = ripcbinfo.ipi_count; 745 INP_INFO_RUNLOCK(&ripcbinfo); 746 747 xig.xig_len = sizeof xig; 748 xig.xig_count = n; 749 xig.xig_gen = gencnt; 750 xig.xig_sogen = so_gencnt; 751 error = SYSCTL_OUT(req, &xig, sizeof xig); 752 if (error) 753 return error; 754 755 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 756 if (inp_list == 0) 757 return ENOMEM; 758 759 INP_INFO_RLOCK(&ripcbinfo); 760 for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n; 761 inp = LIST_NEXT(inp, inp_list)) { 762 INP_LOCK(inp); 763 if (inp->inp_gencnt <= gencnt && 764 cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) { 765 /* XXX held references? */ 766 inp_list[i++] = inp; 767 } 768 INP_UNLOCK(inp); 769 } 770 INP_INFO_RUNLOCK(&ripcbinfo); 771 n = i; 772 773 error = 0; 774 for (i = 0; i < n; i++) { 775 inp = inp_list[i]; 776 if (inp->inp_gencnt <= gencnt) { 777 struct xinpcb xi; 778 xi.xi_len = sizeof xi; 779 /* XXX should avoid extra copy */ 780 bcopy(inp, &xi.xi_inp, sizeof *inp); 781 if (inp->inp_socket) 782 sotoxsocket(inp->inp_socket, &xi.xi_socket); 783 error = SYSCTL_OUT(req, &xi, sizeof xi); 784 } 785 } 786 if (!error) { 787 /* 788 * Give the user an updated idea of our state. 789 * If the generation differs from what we told 790 * her before, she knows that something happened 791 * while we were processing this request, and it 792 * might be necessary to retry. 793 */ 794 INP_INFO_RLOCK(&ripcbinfo); 795 xig.xig_gen = ripcbinfo.ipi_gencnt; 796 xig.xig_sogen = so_gencnt; 797 xig.xig_count = ripcbinfo.ipi_count; 798 INP_INFO_RUNLOCK(&ripcbinfo); 799 error = SYSCTL_OUT(req, &xig, sizeof xig); 800 } 801 free(inp_list, M_TEMP); 802 return error; 803} 804 805/* 806 * This is the wrapper function for in_setsockaddr. We just pass down 807 * the pcbinfo for in_setpeeraddr to lock. 808 */ 809static int 810rip_sockaddr(struct socket *so, struct sockaddr **nam) 811{ 812 return (in_setsockaddr(so, nam, &ripcbinfo)); 813} 814 815/* 816 * This is the wrapper function for in_setpeeraddr. We just pass down 817 * the pcbinfo for in_setpeeraddr to lock. 818 */ 819static int 820rip_peeraddr(struct socket *so, struct sockaddr **nam) 821{ 822 return (in_setpeeraddr(so, nam, &ripcbinfo)); 823} 824 825 826SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, 827 rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); 828 829struct pr_usrreqs rip_usrreqs = { 830 rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, 831 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, 832 pru_listen_notsupp, rip_peeraddr, pru_rcvd_notsupp, 833 pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, 834 rip_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel 835}; 836