raw_ip.c revision 125941
1/* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 34 * $FreeBSD: head/sys/netinet/raw_ip.c 125941 2004-02-17 14:02:37Z ume $ 35 */ 36 37#include "opt_inet6.h" 38#include "opt_ipsec.h" 39#include "opt_mac.h" 40#include "opt_random_ip_id.h" 41 42#include <sys/param.h> 43#include <sys/kernel.h> 44#include <sys/lock.h> 45#include <sys/mac.h> 46#include <sys/malloc.h> 47#include <sys/mbuf.h> 48#include <sys/proc.h> 49#include <sys/protosw.h> 50#include <sys/signalvar.h> 51#include <sys/socket.h> 52#include <sys/socketvar.h> 53#include <sys/sx.h> 54#include <sys/sysctl.h> 55#include <sys/systm.h> 56 57#include <vm/uma.h> 58 59#include <net/if.h> 60#include <net/route.h> 61 62#include <netinet/in.h> 63#include <netinet/in_systm.h> 64#include <netinet/in_pcb.h> 65#include <netinet/in_var.h> 66#include <netinet/ip.h> 67#include <netinet/ip_var.h> 68#include <netinet/ip_mroute.h> 69 70#include <netinet/ip_fw.h> 71#include <netinet/ip_dummynet.h> 72 73#ifdef FAST_IPSEC 74#include <netipsec/ipsec.h> 75#endif /*FAST_IPSEC*/ 76 77#ifdef IPSEC 78#include <netinet6/ipsec.h> 79#endif /*IPSEC*/ 80 81struct inpcbhead ripcb; 82struct inpcbinfo ripcbinfo; 83 84/* control hooks for ipfw and dummynet */ 85ip_fw_ctl_t *ip_fw_ctl_ptr; 86ip_dn_ctl_t *ip_dn_ctl_ptr; 87 88/* 89 * hooks for multicast routing. They all default to NULL, 90 * so leave them not initialized and rely on BSS being set to 0. 91 */ 92 93/* The socket used to communicate with the multicast routing daemon. */ 94struct socket *ip_mrouter; 95 96/* The various mrouter and rsvp functions */ 97int (*ip_mrouter_set)(struct socket *, struct sockopt *); 98int (*ip_mrouter_get)(struct socket *, struct sockopt *); 99int (*ip_mrouter_done)(void); 100int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 101 struct ip_moptions *); 102int (*mrt_ioctl)(int, caddr_t); 103int (*legal_vif_num)(int); 104u_long (*ip_mcast_src)(int); 105 106void (*rsvp_input_p)(struct mbuf *m, int off); 107int (*ip_rsvp_vif)(struct socket *, struct sockopt *); 108void (*ip_rsvp_force_done)(struct socket *); 109 110/* 111 * Nominal space allocated to a raw ip socket. 112 */ 113#define RIPSNDQ 8192 114#define RIPRCVQ 8192 115 116/* 117 * Raw interface to IP protocol. 118 */ 119 120/* 121 * Initialize raw connection block q. 122 */ 123void 124rip_init() 125{ 126 INP_INFO_LOCK_INIT(&ripcbinfo, "rip"); 127 LIST_INIT(&ripcb); 128 ripcbinfo.listhead = &ripcb; 129 /* 130 * XXX We don't use the hash list for raw IP, but it's easier 131 * to allocate a one entry hash list than it is to check all 132 * over the place for hashbase == NULL. 133 */ 134 ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); 135 ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); 136 ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), 137 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 138 uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets); 139} 140 141static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 142 143static int 144raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n) 145{ 146 int policyfail = 0; 147 148#if defined(IPSEC) || defined(FAST_IPSEC) 149 /* check AH/ESP integrity. */ 150 if (ipsec4_in_reject(n, last)) { 151 policyfail = 1; 152#ifdef IPSEC 153 ipsecstat.in_polvio++; 154#endif /*IPSEC*/ 155 /* do not inject data to pcb */ 156 } 157#endif /*IPSEC || FAST_IPSEC*/ 158#ifdef MAC 159 if (!policyfail && mac_check_inpcb_deliver(last, n) != 0) 160 policyfail = 1; 161#endif 162 if (!policyfail) { 163 struct mbuf *opts = NULL; 164 165 if ((last->inp_flags & INP_CONTROLOPTS) || 166 (last->inp_socket->so_options & SO_TIMESTAMP)) 167 ip_savecontrol(last, &opts, ip, n); 168 if (sbappendaddr(&last->inp_socket->so_rcv, 169 (struct sockaddr *)&ripsrc, n, opts) == 0) { 170 /* should notify about lost packet */ 171 m_freem(n); 172 if (opts) 173 m_freem(opts); 174 } else 175 sorwakeup(last->inp_socket); 176 } else 177 m_freem(n); 178 return policyfail; 179} 180 181/* 182 * Setup generic address and protocol structures 183 * for raw_input routine, then pass them along with 184 * mbuf chain. 185 */ 186void 187rip_input(struct mbuf *m, int off) 188{ 189 struct ip *ip = mtod(m, struct ip *); 190 int proto = ip->ip_p; 191 struct inpcb *inp, *last; 192 193 INP_INFO_RLOCK(&ripcbinfo); 194 ripsrc.sin_addr = ip->ip_src; 195 last = NULL; 196 LIST_FOREACH(inp, &ripcb, inp_list) { 197 INP_LOCK(inp); 198 if (inp->inp_ip_p && inp->inp_ip_p != proto) { 199 docontinue: 200 INP_UNLOCK(inp); 201 continue; 202 } 203#ifdef INET6 204 if ((inp->inp_vflag & INP_IPV4) == 0) 205 goto docontinue; 206#endif 207 if (inp->inp_laddr.s_addr && 208 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 209 goto docontinue; 210 if (inp->inp_faddr.s_addr && 211 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 212 goto docontinue; 213 if (last) { 214 struct mbuf *n; 215 216 n = m_copy(m, 0, (int)M_COPYALL); 217 if (n != NULL) 218 (void) raw_append(last, ip, n); 219 /* XXX count dropped packet */ 220 INP_UNLOCK(last); 221 } 222 last = inp; 223 } 224 if (last != NULL) { 225 if (raw_append(last, ip, m) != 0) 226 ipstat.ips_delivered--; 227 INP_UNLOCK(last); 228 } else { 229 m_freem(m); 230 ipstat.ips_noproto++; 231 ipstat.ips_delivered--; 232 } 233 INP_INFO_RUNLOCK(&ripcbinfo); 234} 235 236/* 237 * Generate IP header and pass packet to ip_output. 238 * Tack on options user may have setup with control call. 239 */ 240int 241rip_output(struct mbuf *m, struct socket *so, u_long dst) 242{ 243 struct ip *ip; 244 struct inpcb *inp = sotoinpcb(so); 245 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; 246 247#ifdef MAC 248 mac_create_mbuf_from_socket(so, m); 249#endif 250 251 /* 252 * If the user handed us a complete IP packet, use it. 253 * Otherwise, allocate an mbuf for a header and fill it in. 254 */ 255 if ((inp->inp_flags & INP_HDRINCL) == 0) { 256 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { 257 m_freem(m); 258 return(EMSGSIZE); 259 } 260 M_PREPEND(m, sizeof(struct ip), M_TRYWAIT); 261 if (m == NULL) 262 return(ENOBUFS); 263 ip = mtod(m, struct ip *); 264 ip->ip_tos = inp->inp_ip_tos; 265 ip->ip_off = 0; 266 ip->ip_p = inp->inp_ip_p; 267 ip->ip_len = m->m_pkthdr.len; 268 ip->ip_src = inp->inp_laddr; 269 ip->ip_dst.s_addr = dst; 270 ip->ip_ttl = inp->inp_ip_ttl; 271 } else { 272 if (m->m_pkthdr.len > IP_MAXPACKET) { 273 m_freem(m); 274 return(EMSGSIZE); 275 } 276 ip = mtod(m, struct ip *); 277 /* don't allow both user specified and setsockopt options, 278 and don't allow packet length sizes that will crash */ 279 if (((ip->ip_hl != (sizeof (*ip) >> 2)) 280 && inp->inp_options) 281 || (ip->ip_len > m->m_pkthdr.len) 282 || (ip->ip_len < (ip->ip_hl << 2))) { 283 m_freem(m); 284 return EINVAL; 285 } 286 if (ip->ip_id == 0) 287#ifdef RANDOM_IP_ID 288 ip->ip_id = ip_randomid(); 289#else 290 ip->ip_id = htons(ip_id++); 291#endif 292 /* XXX prevent ip_output from overwriting header fields */ 293 flags |= IP_RAWOUTPUT; 294 ipstat.ips_rawout++; 295 } 296 297 if (inp->inp_flags & INP_ONESBCAST) 298 flags |= IP_SENDONES; 299 300 return (ip_output(m, inp->inp_options, NULL, flags, 301 inp->inp_moptions, inp)); 302} 303 304/* 305 * Raw IP socket option processing. 306 * 307 * Note that access to all of the IP administrative functions here is 308 * implicitly protected by suser() as gaining access to a raw socket 309 * requires either that the thread pass a suser() check, or that it be 310 * passed a raw socket by another thread that has passed a suser() check. 311 * If FreeBSD moves to a more fine-grained access control mechanism, 312 * additional checks will need to be placed here if the raw IP attachment 313 * check is not equivilent the the check required for these 314 * administrative operations; in some cases, these checks are already 315 * present. 316 */ 317int 318rip_ctloutput(struct socket *so, struct sockopt *sopt) 319{ 320 struct inpcb *inp = sotoinpcb(so); 321 int error, optval; 322 323 if (sopt->sopt_level != IPPROTO_IP) 324 return (EINVAL); 325 326 error = 0; 327 328 switch (sopt->sopt_dir) { 329 case SOPT_GET: 330 switch (sopt->sopt_name) { 331 case IP_HDRINCL: 332 optval = inp->inp_flags & INP_HDRINCL; 333 error = sooptcopyout(sopt, &optval, sizeof optval); 334 break; 335 336 case IP_FW_ADD: /* ADD actually returns the body... */ 337 case IP_FW_GET: 338 if (IPFW_LOADED) 339 error = ip_fw_ctl_ptr(sopt); 340 else 341 error = ENOPROTOOPT; 342 break; 343 344 case IP_DUMMYNET_GET: 345 if (DUMMYNET_LOADED) 346 error = ip_dn_ctl_ptr(sopt); 347 else 348 error = ENOPROTOOPT; 349 break ; 350 351 case MRT_INIT: 352 case MRT_DONE: 353 case MRT_ADD_VIF: 354 case MRT_DEL_VIF: 355 case MRT_ADD_MFC: 356 case MRT_DEL_MFC: 357 case MRT_VERSION: 358 case MRT_ASSERT: 359 case MRT_API_SUPPORT: 360 case MRT_API_CONFIG: 361 case MRT_ADD_BW_UPCALL: 362 case MRT_DEL_BW_UPCALL: 363 error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : 364 EOPNOTSUPP; 365 break; 366 367 default: 368 error = ip_ctloutput(so, sopt); 369 break; 370 } 371 break; 372 373 case SOPT_SET: 374 switch (sopt->sopt_name) { 375 case IP_HDRINCL: 376 error = sooptcopyin(sopt, &optval, sizeof optval, 377 sizeof optval); 378 if (error) 379 break; 380 if (optval) 381 inp->inp_flags |= INP_HDRINCL; 382 else 383 inp->inp_flags &= ~INP_HDRINCL; 384 break; 385 386 case IP_FW_ADD: 387 case IP_FW_DEL: 388 case IP_FW_FLUSH: 389 case IP_FW_ZERO: 390 case IP_FW_RESETLOG: 391 if (IPFW_LOADED) 392 error = ip_fw_ctl_ptr(sopt); 393 else 394 error = ENOPROTOOPT; 395 break; 396 397 case IP_DUMMYNET_CONFIGURE: 398 case IP_DUMMYNET_DEL: 399 case IP_DUMMYNET_FLUSH: 400 if (DUMMYNET_LOADED) 401 error = ip_dn_ctl_ptr(sopt); 402 else 403 error = ENOPROTOOPT ; 404 break ; 405 406 case IP_RSVP_ON: 407 error = ip_rsvp_init(so); 408 break; 409 410 case IP_RSVP_OFF: 411 error = ip_rsvp_done(); 412 break; 413 414 case IP_RSVP_VIF_ON: 415 case IP_RSVP_VIF_OFF: 416 error = ip_rsvp_vif ? 417 ip_rsvp_vif(so, sopt) : EINVAL; 418 break; 419 420 case MRT_INIT: 421 case MRT_DONE: 422 case MRT_ADD_VIF: 423 case MRT_DEL_VIF: 424 case MRT_ADD_MFC: 425 case MRT_DEL_MFC: 426 case MRT_VERSION: 427 case MRT_ASSERT: 428 case MRT_API_SUPPORT: 429 case MRT_API_CONFIG: 430 case MRT_ADD_BW_UPCALL: 431 case MRT_DEL_BW_UPCALL: 432 error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : 433 EOPNOTSUPP; 434 break; 435 436 default: 437 error = ip_ctloutput(so, sopt); 438 break; 439 } 440 break; 441 } 442 443 return (error); 444} 445 446/* 447 * This function exists solely to receive the PRC_IFDOWN messages which 448 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, 449 * and calls in_ifadown() to remove all routes corresponding to that address. 450 * It also receives the PRC_IFUP messages from if_up() and reinstalls the 451 * interface routes. 452 */ 453void 454rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) 455{ 456 struct in_ifaddr *ia; 457 struct ifnet *ifp; 458 int err; 459 int flags; 460 461 switch (cmd) { 462 case PRC_IFDOWN: 463 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 464 if (ia->ia_ifa.ifa_addr == sa 465 && (ia->ia_flags & IFA_ROUTE)) { 466 /* 467 * in_ifscrub kills the interface route. 468 */ 469 in_ifscrub(ia->ia_ifp, ia); 470 /* 471 * in_ifadown gets rid of all the rest of 472 * the routes. This is not quite the right 473 * thing to do, but at least if we are running 474 * a routing process they will come back. 475 */ 476 in_ifadown(&ia->ia_ifa, 0); 477 break; 478 } 479 } 480 break; 481 482 case PRC_IFUP: 483 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 484 if (ia->ia_ifa.ifa_addr == sa) 485 break; 486 } 487 if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) 488 return; 489 flags = RTF_UP; 490 ifp = ia->ia_ifa.ifa_ifp; 491 492 if ((ifp->if_flags & IFF_LOOPBACK) 493 || (ifp->if_flags & IFF_POINTOPOINT)) 494 flags |= RTF_HOST; 495 496 err = rtinit(&ia->ia_ifa, RTM_ADD, flags); 497 if (err == 0) 498 ia->ia_flags |= IFA_ROUTE; 499 break; 500 } 501} 502 503u_long rip_sendspace = RIPSNDQ; 504u_long rip_recvspace = RIPRCVQ; 505 506SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, 507 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); 508SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, 509 &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); 510 511static int 512rip_attach(struct socket *so, int proto, struct thread *td) 513{ 514 struct inpcb *inp; 515 int error; 516 517 /* XXX why not lower? */ 518 INP_INFO_WLOCK(&ripcbinfo); 519 inp = sotoinpcb(so); 520 if (inp) { 521 /* XXX counter, printf */ 522 INP_INFO_WUNLOCK(&ripcbinfo); 523 return EINVAL; 524 } 525 if (td && (error = suser(td)) != 0) { 526 INP_INFO_WUNLOCK(&ripcbinfo); 527 return error; 528 } 529 if (proto >= IPPROTO_MAX || proto < 0) { 530 INP_INFO_WUNLOCK(&ripcbinfo); 531 return EPROTONOSUPPORT; 532 } 533 534 error = soreserve(so, rip_sendspace, rip_recvspace); 535 if (error) { 536 INP_INFO_WUNLOCK(&ripcbinfo); 537 return error; 538 } 539 error = in_pcballoc(so, &ripcbinfo, td, "rawinp"); 540 if (error) { 541 INP_INFO_WUNLOCK(&ripcbinfo); 542 return error; 543 } 544 inp = (struct inpcb *)so->so_pcb; 545 INP_LOCK(inp); 546 INP_INFO_WUNLOCK(&ripcbinfo); 547 inp->inp_vflag |= INP_IPV4; 548 inp->inp_ip_p = proto; 549 inp->inp_ip_ttl = ip_defttl; 550 INP_UNLOCK(inp); 551 return 0; 552} 553 554static void 555rip_pcbdetach(struct socket *so, struct inpcb *inp) 556{ 557 INP_INFO_WLOCK_ASSERT(&ripcbinfo); 558 INP_LOCK_ASSERT(inp); 559 560 if (so == ip_mrouter && ip_mrouter_done) 561 ip_mrouter_done(); 562 if (ip_rsvp_force_done) 563 ip_rsvp_force_done(so); 564 if (so == ip_rsvpd) 565 ip_rsvp_done(); 566 in_pcbdetach(inp); 567} 568 569static int 570rip_detach(struct socket *so) 571{ 572 struct inpcb *inp; 573 574 INP_INFO_WLOCK(&ripcbinfo); 575 inp = sotoinpcb(so); 576 if (inp == 0) { 577 /* XXX counter, printf */ 578 INP_INFO_WUNLOCK(&ripcbinfo); 579 return EINVAL; 580 } 581 INP_LOCK(inp); 582 rip_pcbdetach(so, inp); 583 INP_INFO_WUNLOCK(&ripcbinfo); 584 return 0; 585} 586 587static int 588rip_abort(struct socket *so) 589{ 590 struct inpcb *inp; 591 592 INP_INFO_WLOCK(&ripcbinfo); 593 inp = sotoinpcb(so); 594 if (inp == 0) { 595 INP_INFO_WUNLOCK(&ripcbinfo); 596 return EINVAL; /* ??? possible? panic instead? */ 597 } 598 INP_LOCK(inp); 599 soisdisconnected(so); 600 if (so->so_state & SS_NOFDREF) 601 rip_pcbdetach(so, inp); 602 else 603 INP_UNLOCK(inp); 604 INP_INFO_WUNLOCK(&ripcbinfo); 605 return 0; 606} 607 608static int 609rip_disconnect(struct socket *so) 610{ 611 if ((so->so_state & SS_ISCONNECTED) == 0) 612 return ENOTCONN; 613 return rip_abort(so); 614} 615 616static int 617rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 618{ 619 struct sockaddr_in *addr = (struct sockaddr_in *)nam; 620 struct inpcb *inp; 621 622 if (nam->sa_len != sizeof(*addr)) 623 return EINVAL; 624 625 if (TAILQ_EMPTY(&ifnet) || 626 (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || 627 (addr->sin_addr.s_addr && 628 ifa_ifwithaddr((struct sockaddr *)addr) == 0)) 629 return EADDRNOTAVAIL; 630 631 INP_INFO_WLOCK(&ripcbinfo); 632 inp = sotoinpcb(so); 633 if (inp == 0) { 634 INP_INFO_WUNLOCK(&ripcbinfo); 635 return EINVAL; 636 } 637 INP_LOCK(inp); 638 inp->inp_laddr = addr->sin_addr; 639 INP_UNLOCK(inp); 640 INP_INFO_WUNLOCK(&ripcbinfo); 641 return 0; 642} 643 644static int 645rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 646{ 647 struct sockaddr_in *addr = (struct sockaddr_in *)nam; 648 struct inpcb *inp; 649 650 if (nam->sa_len != sizeof(*addr)) 651 return EINVAL; 652 if (TAILQ_EMPTY(&ifnet)) 653 return EADDRNOTAVAIL; 654 if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) 655 return EAFNOSUPPORT; 656 657 INP_INFO_WLOCK(&ripcbinfo); 658 inp = sotoinpcb(so); 659 if (inp == 0) { 660 INP_INFO_WUNLOCK(&ripcbinfo); 661 return EINVAL; 662 } 663 INP_LOCK(inp); 664 inp->inp_faddr = addr->sin_addr; 665 soisconnected(so); 666 INP_UNLOCK(inp); 667 INP_INFO_WUNLOCK(&ripcbinfo); 668 return 0; 669} 670 671static int 672rip_shutdown(struct socket *so) 673{ 674 struct inpcb *inp; 675 676 INP_INFO_RLOCK(&ripcbinfo); 677 inp = sotoinpcb(so); 678 if (inp == 0) { 679 INP_INFO_RUNLOCK(&ripcbinfo); 680 return EINVAL; 681 } 682 INP_LOCK(inp); 683 INP_INFO_RUNLOCK(&ripcbinfo); 684 socantsendmore(so); 685 INP_UNLOCK(inp); 686 return 0; 687} 688 689static int 690rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 691 struct mbuf *control, struct thread *td) 692{ 693 struct inpcb *inp; 694 u_long dst; 695 int ret; 696 697 INP_INFO_WLOCK(&ripcbinfo); 698 inp = sotoinpcb(so); 699 if (so->so_state & SS_ISCONNECTED) { 700 if (nam) { 701 INP_INFO_WUNLOCK(&ripcbinfo); 702 m_freem(m); 703 return EISCONN; 704 } 705 dst = inp->inp_faddr.s_addr; 706 } else { 707 if (nam == NULL) { 708 INP_INFO_WUNLOCK(&ripcbinfo); 709 m_freem(m); 710 return ENOTCONN; 711 } 712 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; 713 } 714 INP_LOCK(inp); 715 ret = rip_output(m, so, dst); 716 INP_UNLOCK(inp); 717 INP_INFO_WUNLOCK(&ripcbinfo); 718 return ret; 719} 720 721static int 722rip_pcblist(SYSCTL_HANDLER_ARGS) 723{ 724 int error, i, n; 725 struct inpcb *inp, **inp_list; 726 inp_gen_t gencnt; 727 struct xinpgen xig; 728 729 /* 730 * The process of preparing the TCB list is too time-consuming and 731 * resource-intensive to repeat twice on every request. 732 */ 733 if (req->oldptr == 0) { 734 n = ripcbinfo.ipi_count; 735 req->oldidx = 2 * (sizeof xig) 736 + (n + n/8) * sizeof(struct xinpcb); 737 return 0; 738 } 739 740 if (req->newptr != 0) 741 return EPERM; 742 743 /* 744 * OK, now we're committed to doing something. 745 */ 746 INP_INFO_RLOCK(&ripcbinfo); 747 gencnt = ripcbinfo.ipi_gencnt; 748 n = ripcbinfo.ipi_count; 749 INP_INFO_RUNLOCK(&ripcbinfo); 750 751 xig.xig_len = sizeof xig; 752 xig.xig_count = n; 753 xig.xig_gen = gencnt; 754 xig.xig_sogen = so_gencnt; 755 error = SYSCTL_OUT(req, &xig, sizeof xig); 756 if (error) 757 return error; 758 759 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 760 if (inp_list == 0) 761 return ENOMEM; 762 763 INP_INFO_RLOCK(&ripcbinfo); 764 for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n; 765 inp = LIST_NEXT(inp, inp_list)) { 766 INP_LOCK(inp); 767 if (inp->inp_gencnt <= gencnt && 768 cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) { 769 /* XXX held references? */ 770 inp_list[i++] = inp; 771 } 772 INP_UNLOCK(inp); 773 } 774 INP_INFO_RUNLOCK(&ripcbinfo); 775 n = i; 776 777 error = 0; 778 for (i = 0; i < n; i++) { 779 inp = inp_list[i]; 780 if (inp->inp_gencnt <= gencnt) { 781 struct xinpcb xi; 782 xi.xi_len = sizeof xi; 783 /* XXX should avoid extra copy */ 784 bcopy(inp, &xi.xi_inp, sizeof *inp); 785 if (inp->inp_socket) 786 sotoxsocket(inp->inp_socket, &xi.xi_socket); 787 error = SYSCTL_OUT(req, &xi, sizeof xi); 788 } 789 } 790 if (!error) { 791 /* 792 * Give the user an updated idea of our state. 793 * If the generation differs from what we told 794 * her before, she knows that something happened 795 * while we were processing this request, and it 796 * might be necessary to retry. 797 */ 798 INP_INFO_RLOCK(&ripcbinfo); 799 xig.xig_gen = ripcbinfo.ipi_gencnt; 800 xig.xig_sogen = so_gencnt; 801 xig.xig_count = ripcbinfo.ipi_count; 802 INP_INFO_RUNLOCK(&ripcbinfo); 803 error = SYSCTL_OUT(req, &xig, sizeof xig); 804 } 805 free(inp_list, M_TEMP); 806 return error; 807} 808 809/* 810 * This is the wrapper function for in_setsockaddr. We just pass down 811 * the pcbinfo for in_setpeeraddr to lock. 812 */ 813static int 814rip_sockaddr(struct socket *so, struct sockaddr **nam) 815{ 816 return (in_setsockaddr(so, nam, &ripcbinfo)); 817} 818 819/* 820 * This is the wrapper function for in_setpeeraddr. We just pass down 821 * the pcbinfo for in_setpeeraddr to lock. 822 */ 823static int 824rip_peeraddr(struct socket *so, struct sockaddr **nam) 825{ 826 return (in_setpeeraddr(so, nam, &ripcbinfo)); 827} 828 829 830SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, 831 rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); 832 833struct pr_usrreqs rip_usrreqs = { 834 rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, 835 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, 836 pru_listen_notsupp, rip_peeraddr, pru_rcvd_notsupp, 837 pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, 838 rip_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel 839}; 840