85 86#ifdef IPSEC 87#include <netipsec/ipsec.h> 88#endif 89 90#include <machine/in_cksum.h> 91 92#include <security/mac/mac_framework.h> 93 94/* 95 * UDP protocol implementation. 96 * Per RFC 768, August, 1980. 97 */ 98 99#ifdef VIMAGE_GLOBALS 100int udp_blackhole; 101#endif 102 103/* 104 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums 105 * removes the only data integrity mechanism for packets and malformed 106 * packets that would otherwise be discarded due to bad checksums, and may 107 * cause problems (especially for NFS data blocks). 108 */ 109static int udp_cksum = 1; 110SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum, 111 0, "compute udp checksum"); 112 113int udp_log_in_vain = 0; 114SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 115 &udp_log_in_vain, 0, "Log all incoming UDP packets"); 116 117SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_udp, OID_AUTO, blackhole, 118 CTLFLAG_RW, udp_blackhole, 0, 119 "Do not send port unreachables for refused connects"); 120 121u_long udp_sendspace = 9216; /* really max datagram size */ 122 /* 40 1K datagrams */ 123SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 124 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 125 126u_long udp_recvspace = 40 * (1024 + 127#ifdef INET6 128 sizeof(struct sockaddr_in6) 129#else 130 sizeof(struct sockaddr_in) 131#endif 132 ); 133 134SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 135 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); 136 137#ifdef VIMAGE_GLOBALS 138struct inpcbhead udb; /* from udp_var.h */ 139struct inpcbinfo udbinfo; 140struct udpstat udpstat; /* from udp_var.h */ 141#endif 142 143#ifndef UDBHASHSIZE 144#define UDBHASHSIZE 128 145#endif 146 147SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_udp, UDPCTL_STATS, stats, 148 CTLFLAG_RW, udpstat, udpstat, 149 "UDP statistics (struct udpstat, netinet/udp_var.h)"); 150 151static void udp_detach(struct socket *so); 152static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, 153 struct mbuf *, struct thread *); 154 155static void 156udp_zone_change(void *tag) 157{ 158 159 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 160} 161 162static int 163udp_inpcb_init(void *mem, int size, int flags) 164{ 165 struct inpcb *inp; 166 167 inp = mem; 168 INP_LOCK_INIT(inp, "inp", "udpinp"); 169 return (0); 170} 171 172void 173udp_init(void) 174{ 175 INIT_VNET_INET(curvnet); 176 177 V_udp_blackhole = 0; 178 179 INP_INFO_LOCK_INIT(&V_udbinfo, "udp"); 180 LIST_INIT(&V_udb); 181 V_udbinfo.ipi_listhead = &V_udb; 182 V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB, 183 &V_udbinfo.ipi_hashmask); 184 V_udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB, 185 &V_udbinfo.ipi_porthashmask); 186 V_udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL, 187 NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 188 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 189 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, 190 EVENTHANDLER_PRI_ANY); 191} 192 193/* 194 * Subroutine of udp_input(), which appends the provided mbuf chain to the 195 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that 196 * contains the source address. If the socket ends up being an IPv6 socket, 197 * udp_append() will convert to a sockaddr_in6 before passing the address 198 * into the socket code. 199 */ 200static void 201udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, 202 struct sockaddr_in *udp_in) 203{ 204 struct sockaddr *append_sa; 205 struct socket *so; 206 struct mbuf *opts = 0; 207#ifdef INET6 208 struct sockaddr_in6 udp_in6; 209#endif 210 211 INP_RLOCK_ASSERT(inp); 212 213#ifdef IPSEC 214 /* Check AH/ESP integrity. */ 215 if (ipsec4_in_reject(n, inp)) { 216 INIT_VNET_IPSEC(curvnet); 217 m_freem(n); 218 V_ipsec4stat.in_polvio++; 219 return; 220 } 221#endif /* IPSEC */ 222#ifdef MAC 223 if (mac_inpcb_check_deliver(inp, n) != 0) { 224 m_freem(n); 225 return; 226 } 227#endif 228 if (inp->inp_flags & INP_CONTROLOPTS || 229 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { 230#ifdef INET6 231 if (inp->inp_vflag & INP_IPV6) 232 (void)ip6_savecontrol_v4(inp, n, &opts, NULL); 233 else 234#endif 235 ip_savecontrol(inp, &opts, ip, n); 236 } 237#ifdef INET6 238 if (inp->inp_vflag & INP_IPV6) { 239 bzero(&udp_in6, sizeof(udp_in6)); 240 udp_in6.sin6_len = sizeof(udp_in6); 241 udp_in6.sin6_family = AF_INET6; 242 in6_sin_2_v4mapsin6(udp_in, &udp_in6); 243 append_sa = (struct sockaddr *)&udp_in6; 244 } else 245#endif 246 append_sa = (struct sockaddr *)udp_in; 247 m_adj(n, off); 248 249 so = inp->inp_socket; 250 SOCKBUF_LOCK(&so->so_rcv); 251 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) { 252 INIT_VNET_INET(so->so_vnet); 253 SOCKBUF_UNLOCK(&so->so_rcv); 254 m_freem(n); 255 if (opts) 256 m_freem(opts); 257 V_udpstat.udps_fullsock++; 258 } else 259 sorwakeup_locked(so); 260} 261 262void 263udp_input(struct mbuf *m, int off) 264{ 265 INIT_VNET_INET(curvnet); 266 int iphlen = off; 267 struct ip *ip; 268 struct udphdr *uh; 269 struct ifnet *ifp; 270 struct inpcb *inp; 271 int len; 272 struct ip save_ip; 273 struct sockaddr_in udp_in; 274#ifdef IPFIREWALL_FORWARD 275 struct m_tag *fwd_tag; 276#endif 277 278 ifp = m->m_pkthdr.rcvif; 279 V_udpstat.udps_ipackets++; 280 281 /* 282 * Strip IP options, if any; should skip this, make available to 283 * user, and use on returned packets, but we don't yet have a way to 284 * check the checksum with options still present. 285 */ 286 if (iphlen > sizeof (struct ip)) { 287 ip_stripoptions(m, (struct mbuf *)0); 288 iphlen = sizeof(struct ip); 289 } 290 291 /* 292 * Get IP and UDP header together in first mbuf. 293 */ 294 ip = mtod(m, struct ip *); 295 if (m->m_len < iphlen + sizeof(struct udphdr)) { 296 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { 297 V_udpstat.udps_hdrops++; 298 return; 299 } 300 ip = mtod(m, struct ip *); 301 } 302 uh = (struct udphdr *)((caddr_t)ip + iphlen); 303 304 /* 305 * Destination port of 0 is illegal, based on RFC768. 306 */ 307 if (uh->uh_dport == 0) 308 goto badunlocked; 309 310 /* 311 * Construct sockaddr format source address. Stuff source address 312 * and datagram in user buffer. 313 */ 314 bzero(&udp_in, sizeof(udp_in)); 315 udp_in.sin_len = sizeof(udp_in); 316 udp_in.sin_family = AF_INET; 317 udp_in.sin_port = uh->uh_sport; 318 udp_in.sin_addr = ip->ip_src; 319 320 /* 321 * Make mbuf data length reflect UDP length. If not enough data to 322 * reflect UDP length, drop. 323 */ 324 len = ntohs((u_short)uh->uh_ulen); 325 if (ip->ip_len != len) { 326 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 327 V_udpstat.udps_badlen++; 328 goto badunlocked; 329 } 330 m_adj(m, len - ip->ip_len); 331 /* ip->ip_len = len; */ 332 } 333 334 /* 335 * Save a copy of the IP header in case we want restore it for 336 * sending an ICMP error message in response. 337 */ 338 if (!V_udp_blackhole) 339 save_ip = *ip; 340 else 341 memset(&save_ip, 0, sizeof(save_ip)); 342 343 /* 344 * Checksum extended UDP header and data. 345 */ 346 if (uh->uh_sum) { 347 u_short uh_sum; 348 349 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 350 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 351 uh_sum = m->m_pkthdr.csum_data; 352 else 353 uh_sum = in_pseudo(ip->ip_src.s_addr, 354 ip->ip_dst.s_addr, htonl((u_short)len + 355 m->m_pkthdr.csum_data + IPPROTO_UDP)); 356 uh_sum ^= 0xffff; 357 } else { 358 char b[9]; 359 360 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 361 bzero(((struct ipovly *)ip)->ih_x1, 9); 362 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 363 uh_sum = in_cksum(m, len + sizeof (struct ip)); 364 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 365 } 366 if (uh_sum) { 367 V_udpstat.udps_badsum++; 368 m_freem(m); 369 return; 370 } 371 } else 372 V_udpstat.udps_nosum++; 373 374#ifdef IPFIREWALL_FORWARD 375 /* 376 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. 377 */ 378 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 379 if (fwd_tag != NULL) { 380 struct sockaddr_in *next_hop; 381 382 /* 383 * Do the hack. 384 */ 385 next_hop = (struct sockaddr_in *)(fwd_tag + 1); 386 ip->ip_dst = next_hop->sin_addr; 387 uh->uh_dport = ntohs(next_hop->sin_port); 388 389 /* 390 * Remove the tag from the packet. We don't need it anymore. 391 */ 392 m_tag_delete(m, fwd_tag); 393 } 394#endif 395 396 INP_INFO_RLOCK(&V_udbinfo); 397 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 398 in_broadcast(ip->ip_dst, ifp)) { 399 struct inpcb *last; 400 struct ip_moptions *imo; 401 402 last = NULL; 403 LIST_FOREACH(inp, &V_udb, inp_list) { 404 if (inp->inp_lport != uh->uh_dport) 405 continue; 406#ifdef INET6 407 if ((inp->inp_vflag & INP_IPV4) == 0) 408 continue; 409#endif 410 if (inp->inp_laddr.s_addr != INADDR_ANY && 411 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 412 continue; 413 if (inp->inp_faddr.s_addr != INADDR_ANY && 414 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 415 continue; 416 /* 417 * XXX: Do not check source port of incoming datagram 418 * unless inp_connect() has been called to bind the 419 * fport part of the 4-tuple; the source could be 420 * trying to talk to us with an ephemeral port. 421 */ 422 if (inp->inp_fport != 0 && 423 inp->inp_fport != uh->uh_sport) 424 continue; 425 426 INP_RLOCK(inp); 427 428 /* 429 * Handle socket delivery policy for any-source 430 * and source-specific multicast. [RFC3678] 431 */ 432 imo = inp->inp_moptions; 433 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 434 imo != NULL) { 435 struct sockaddr_in sin; 436 struct in_msource *ims; 437 int blocked, mode; 438 size_t idx; 439 440 bzero(&sin, sizeof(struct sockaddr_in)); 441 sin.sin_len = sizeof(struct sockaddr_in); 442 sin.sin_family = AF_INET; 443 sin.sin_addr = ip->ip_dst; 444 445 blocked = 0; 446 idx = imo_match_group(imo, ifp, 447 (struct sockaddr *)&sin); 448 if (idx == -1) { 449 /* 450 * No group membership for this socket. 451 * Do not bump udps_noportbcast, as 452 * this will happen further down. 453 */ 454 blocked++; 455 } else { 456 /* 457 * Check for a multicast source filter 458 * entry on this socket for this group. 459 * MCAST_EXCLUDE is the default 460 * behaviour. It means default accept; 461 * entries, if present, denote sources 462 * to be excluded from delivery. 463 */ 464 ims = imo_match_source(imo, idx, 465 (struct sockaddr *)&udp_in); 466 mode = imo->imo_mfilters[idx].imf_fmode; 467 if ((ims != NULL && 468 mode == MCAST_EXCLUDE) || 469 (ims == NULL && 470 mode == MCAST_INCLUDE)) { 471#ifdef DIAGNOSTIC 472 if (bootverbose) { 473 printf("%s: blocked by" 474 " source filter\n", 475 __func__); 476 } 477#endif 478 V_udpstat.udps_filtermcast++; 479 blocked++; 480 } 481 } 482 if (blocked != 0) { 483 INP_RUNLOCK(inp); 484 continue; 485 } 486 } 487 if (last != NULL) { 488 struct mbuf *n; 489 490 n = m_copy(m, 0, M_COPYALL); 491 if (n != NULL) 492 udp_append(last, ip, n, iphlen + 493 sizeof(struct udphdr), &udp_in); 494 INP_RUNLOCK(last); 495 } 496 last = inp; 497 /* 498 * Don't look for additional matches if this one does 499 * not have either the SO_REUSEPORT or SO_REUSEADDR 500 * socket options set. This heuristic avoids 501 * searching through all pcbs in the common case of a 502 * non-shared port. It assumes that an application 503 * will never clear these options after setting them. 504 */ 505 if ((last->inp_socket->so_options & 506 (SO_REUSEPORT|SO_REUSEADDR)) == 0) 507 break; 508 } 509 510 if (last == NULL) { 511 /* 512 * No matching pcb found; discard datagram. (No need 513 * to send an ICMP Port Unreachable for a broadcast 514 * or multicast datgram.) 515 */ 516 V_udpstat.udps_noportbcast++; 517 goto badheadlocked; 518 } 519 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 520 &udp_in); 521 INP_RUNLOCK(last); 522 INP_INFO_RUNLOCK(&V_udbinfo); 523 return; 524 } 525 526 /* 527 * Locate pcb for datagram. 528 */ 529 inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport, 530 ip->ip_dst, uh->uh_dport, 1, ifp); 531 if (inp == NULL) { 532 if (udp_log_in_vain) { 533 char buf[4*sizeof "123"]; 534 535 strcpy(buf, inet_ntoa(ip->ip_dst)); 536 log(LOG_INFO, 537 "Connection attempt to UDP %s:%d from %s:%d\n", 538 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 539 ntohs(uh->uh_sport)); 540 } 541 V_udpstat.udps_noport++; 542 if (m->m_flags & (M_BCAST | M_MCAST)) { 543 V_udpstat.udps_noportbcast++; 544 goto badheadlocked; 545 } 546 if (V_udp_blackhole) 547 goto badheadlocked; 548 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 549 goto badheadlocked; 550 *ip = save_ip; 551 ip->ip_len += iphlen; 552 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 553 INP_INFO_RUNLOCK(&V_udbinfo); 554 return; 555 } 556 557 /* 558 * Check the minimum TTL for socket. 559 */ 560 INP_RLOCK(inp); 561 INP_INFO_RUNLOCK(&V_udbinfo); 562 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { 563 INP_RUNLOCK(inp); 564 goto badunlocked; 565 } 566 udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in); 567 INP_RUNLOCK(inp); 568 return; 569 570badheadlocked: 571 if (inp) 572 INP_RUNLOCK(inp); 573 INP_INFO_RUNLOCK(&V_udbinfo); 574badunlocked: 575 m_freem(m); 576} 577 578/* 579 * Notify a udp user of an asynchronous error; just wake up so that they can 580 * collect error status. 581 */ 582struct inpcb * 583udp_notify(struct inpcb *inp, int errno) 584{ 585 586 /* 587 * While udp_ctlinput() always calls udp_notify() with a read lock 588 * when invoking it directly, in_pcbnotifyall() currently uses write 589 * locks due to sharing code with TCP. For now, accept either a read 590 * or a write lock, but a read lock is sufficient. 591 */ 592 INP_LOCK_ASSERT(inp); 593 594 inp->inp_socket->so_error = errno; 595 sorwakeup(inp->inp_socket); 596 sowwakeup(inp->inp_socket); 597 return (inp); 598} 599 600void 601udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) 602{ 603 INIT_VNET_INET(curvnet); 604 struct ip *ip = vip; 605 struct udphdr *uh; 606 struct in_addr faddr; 607 struct inpcb *inp; 608 609 faddr = ((struct sockaddr_in *)sa)->sin_addr; 610 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 611 return; 612 613 /* 614 * Redirects don't need to be handled up here. 615 */ 616 if (PRC_IS_REDIRECT(cmd)) 617 return; 618 619 /* 620 * Hostdead is ugly because it goes linearly through all PCBs. 621 * 622 * XXX: We never get this from ICMP, otherwise it makes an excellent 623 * DoS attack on machines with many connections. 624 */ 625 if (cmd == PRC_HOSTDEAD) 626 ip = NULL; 627 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 628 return; 629 if (ip != NULL) { 630 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 631 INP_INFO_RLOCK(&V_udbinfo); 632 inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport, 633 ip->ip_src, uh->uh_sport, 0, NULL); 634 if (inp != NULL) { 635 INP_RLOCK(inp); 636 if (inp->inp_socket != NULL) { 637 udp_notify(inp, inetctlerrmap[cmd]); 638 } 639 INP_RUNLOCK(inp); 640 } 641 INP_INFO_RUNLOCK(&V_udbinfo); 642 } else 643 in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd], 644 udp_notify); 645} 646 647static int 648udp_pcblist(SYSCTL_HANDLER_ARGS) 649{ 650 INIT_VNET_INET(curvnet); 651 int error, i, n; 652 struct inpcb *inp, **inp_list; 653 inp_gen_t gencnt; 654 struct xinpgen xig; 655 656 /* 657 * The process of preparing the PCB list is too time-consuming and 658 * resource-intensive to repeat twice on every request. 659 */ 660 if (req->oldptr == 0) { 661 n = V_udbinfo.ipi_count; 662 req->oldidx = 2 * (sizeof xig) 663 + (n + n/8) * sizeof(struct xinpcb); 664 return (0); 665 } 666 667 if (req->newptr != 0) 668 return (EPERM); 669 670 /* 671 * OK, now we're committed to doing something. 672 */ 673 INP_INFO_RLOCK(&V_udbinfo); 674 gencnt = V_udbinfo.ipi_gencnt; 675 n = V_udbinfo.ipi_count; 676 INP_INFO_RUNLOCK(&V_udbinfo); 677 678 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) 679 + n * sizeof(struct xinpcb)); 680 if (error != 0) 681 return (error); 682 683 xig.xig_len = sizeof xig; 684 xig.xig_count = n; 685 xig.xig_gen = gencnt; 686 xig.xig_sogen = so_gencnt; 687 error = SYSCTL_OUT(req, &xig, sizeof xig); 688 if (error) 689 return (error); 690 691 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 692 if (inp_list == 0) 693 return (ENOMEM); 694 695 INP_INFO_RLOCK(&V_udbinfo); 696 for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; 697 inp = LIST_NEXT(inp, inp_list)) { 698 INP_RLOCK(inp); 699 if (inp->inp_gencnt <= gencnt && 700 cr_canseeinpcb(req->td->td_ucred, inp) == 0) 701 inp_list[i++] = inp; 702 INP_RUNLOCK(inp); 703 } 704 INP_INFO_RUNLOCK(&V_udbinfo); 705 n = i; 706 707 error = 0; 708 for (i = 0; i < n; i++) { 709 inp = inp_list[i]; 710 INP_RLOCK(inp); 711 if (inp->inp_gencnt <= gencnt) { 712 struct xinpcb xi; 713 bzero(&xi, sizeof(xi)); 714 xi.xi_len = sizeof xi; 715 /* XXX should avoid extra copy */ 716 bcopy(inp, &xi.xi_inp, sizeof *inp); 717 if (inp->inp_socket) 718 sotoxsocket(inp->inp_socket, &xi.xi_socket); 719 xi.xi_inp.inp_gencnt = inp->inp_gencnt; 720 INP_RUNLOCK(inp); 721 error = SYSCTL_OUT(req, &xi, sizeof xi); 722 } else 723 INP_RUNLOCK(inp); 724 } 725 if (!error) { 726 /* 727 * Give the user an updated idea of our state. If the 728 * generation differs from what we told her before, she knows 729 * that something happened while we were processing this 730 * request, and it might be necessary to retry. 731 */ 732 INP_INFO_RLOCK(&V_udbinfo); 733 xig.xig_gen = V_udbinfo.ipi_gencnt; 734 xig.xig_sogen = so_gencnt; 735 xig.xig_count = V_udbinfo.ipi_count; 736 INP_INFO_RUNLOCK(&V_udbinfo); 737 error = SYSCTL_OUT(req, &xig, sizeof xig); 738 } 739 free(inp_list, M_TEMP); 740 return (error); 741} 742 743SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, 744 udp_pcblist, "S,xinpcb", "List of active UDP sockets"); 745 746static int 747udp_getcred(SYSCTL_HANDLER_ARGS) 748{ 749 INIT_VNET_INET(curvnet); 750 struct xucred xuc; 751 struct sockaddr_in addrs[2]; 752 struct inpcb *inp; 753 int error; 754 755 error = priv_check(req->td, PRIV_NETINET_GETCRED); 756 if (error) 757 return (error); 758 error = SYSCTL_IN(req, addrs, sizeof(addrs)); 759 if (error) 760 return (error); 761 INP_INFO_RLOCK(&V_udbinfo); 762 inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, 763 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); 764 if (inp != NULL) { 765 INP_RLOCK(inp); 766 INP_INFO_RUNLOCK(&V_udbinfo); 767 if (inp->inp_socket == NULL) 768 error = ENOENT; 769 if (error == 0) 770 error = cr_canseeinpcb(req->td->td_ucred, inp); 771 if (error == 0) 772 cru2x(inp->inp_cred, &xuc); 773 INP_RUNLOCK(inp); 774 } else { 775 INP_INFO_RUNLOCK(&V_udbinfo); 776 error = ENOENT; 777 } 778 if (error == 0) 779 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); 780 return (error); 781} 782 783SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, 784 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, 785 udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); 786 787static int 788udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, 789 struct mbuf *control, struct thread *td) 790{ 791 INIT_VNET_INET(inp->inp_vnet); 792 struct udpiphdr *ui; 793 int len = m->m_pkthdr.len; 794 struct in_addr faddr, laddr; 795 struct cmsghdr *cm; 796 struct sockaddr_in *sin, src; 797 int error = 0; 798 int ipflags; 799 u_short fport, lport; 800 int unlock_udbinfo; 801 802 /* 803 * udp_output() may need to temporarily bind or connect the current 804 * inpcb. As such, we don't know up front whether we will need the 805 * pcbinfo lock or not. Do any work to decide what is needed up 806 * front before acquiring any locks. 807 */ 808 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 809 if (control) 810 m_freem(control); 811 m_freem(m); 812 return (EMSGSIZE); 813 } 814 815 src.sin_family = 0; 816 if (control != NULL) { 817 /* 818 * XXX: Currently, we assume all the optional information is 819 * stored in a single mbuf. 820 */ 821 if (control->m_next) { 822 m_freem(control); 823 m_freem(m); 824 return (EINVAL); 825 } 826 for (; control->m_len > 0; 827 control->m_data += CMSG_ALIGN(cm->cmsg_len), 828 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { 829 cm = mtod(control, struct cmsghdr *); 830 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 831 || cm->cmsg_len > control->m_len) { 832 error = EINVAL; 833 break; 834 } 835 if (cm->cmsg_level != IPPROTO_IP) 836 continue; 837 838 switch (cm->cmsg_type) { 839 case IP_SENDSRCADDR: 840 if (cm->cmsg_len != 841 CMSG_LEN(sizeof(struct in_addr))) { 842 error = EINVAL; 843 break; 844 } 845 bzero(&src, sizeof(src)); 846 src.sin_family = AF_INET; 847 src.sin_len = sizeof(src); 848 src.sin_port = inp->inp_lport; 849 src.sin_addr = 850 *(struct in_addr *)CMSG_DATA(cm); 851 break; 852 853 default: 854 error = ENOPROTOOPT; 855 break; 856 } 857 if (error) 858 break; 859 } 860 m_freem(control); 861 } 862 if (error) { 863 m_freem(m); 864 return (error); 865 } 866 867 /* 868 * Depending on whether or not the application has bound or connected 869 * the socket, we may have to do varying levels of work. The optimal 870 * case is for a connected UDP socket, as a global lock isn't 871 * required at all. 872 * 873 * In order to decide which we need, we require stability of the 874 * inpcb binding, which we ensure by acquiring a read lock on the 875 * inpcb. This doesn't strictly follow the lock order, so we play 876 * the trylock and retry game; note that we may end up with more 877 * conservative locks than required the second time around, so later 878 * assertions have to accept that. Further analysis of the number of 879 * misses under contention is required. 880 */ 881 sin = (struct sockaddr_in *)addr; 882 INP_RLOCK(inp); 883 if (sin != NULL && 884 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { 885 INP_RUNLOCK(inp); 886 INP_INFO_WLOCK(&V_udbinfo); 887 INP_WLOCK(inp); 888 unlock_udbinfo = 2; 889 } else if ((sin != NULL && ( 890 (sin->sin_addr.s_addr == INADDR_ANY) || 891 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 892 (inp->inp_laddr.s_addr == INADDR_ANY) || 893 (inp->inp_lport == 0))) || 894 (src.sin_family == AF_INET)) { 895 if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) { 896 INP_RUNLOCK(inp); 897 INP_INFO_RLOCK(&V_udbinfo); 898 INP_RLOCK(inp); 899 } 900 unlock_udbinfo = 1; 901 } else 902 unlock_udbinfo = 0; 903 904 /* 905 * If the IP_SENDSRCADDR control message was specified, override the 906 * source address for this datagram. Its use is invalidated if the 907 * address thus specified is incomplete or clobbers other inpcbs. 908 */ 909 laddr = inp->inp_laddr; 910 lport = inp->inp_lport; 911 if (src.sin_family == AF_INET) { 912 INP_INFO_LOCK_ASSERT(&V_udbinfo); 913 if ((lport == 0) || 914 (laddr.s_addr == INADDR_ANY && 915 src.sin_addr.s_addr == INADDR_ANY)) { 916 error = EINVAL; 917 goto release; 918 } 919 error = in_pcbbind_setup(inp, (struct sockaddr *)&src, 920 &laddr.s_addr, &lport, td->td_ucred); 921 if (error) 922 goto release; 923 } 924 925 /* 926 * If a UDP socket has been connected, then a local address/port will 927 * have been selected and bound. 928 * 929 * If a UDP socket has not been connected to, then an explicit 930 * destination address must be used, in which case a local 931 * address/port may not have been selected and bound. 932 */ 933 if (sin != NULL) { 934 INP_LOCK_ASSERT(inp); 935 if (inp->inp_faddr.s_addr != INADDR_ANY) { 936 error = EISCONN; 937 goto release; 938 } 939 940 /* 941 * Jail may rewrite the destination address, so let it do 942 * that before we use it. 943 */ 944 if (prison_remote_ip4(td->td_ucred, &sin->sin_addr) != 0) { 945 error = EINVAL; 946 goto release; 947 } 948 949 /* 950 * If a local address or port hasn't yet been selected, or if 951 * the destination address needs to be rewritten due to using 952 * a special INADDR_ constant, invoke in_pcbconnect_setup() 953 * to do the heavy lifting. Once a port is selected, we 954 * commit the binding back to the socket; we also commit the 955 * binding of the address if in jail. 956 * 957 * If we already have a valid binding and we're not 958 * requesting a destination address rewrite, use a fast path. 959 */ 960 if (inp->inp_laddr.s_addr == INADDR_ANY || 961 inp->inp_lport == 0 || 962 sin->sin_addr.s_addr == INADDR_ANY || 963 sin->sin_addr.s_addr == INADDR_BROADCAST) { 964 INP_INFO_LOCK_ASSERT(&V_udbinfo); 965 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, 966 &lport, &faddr.s_addr, &fport, NULL, 967 td->td_ucred); 968 if (error) 969 goto release; 970 971 /* 972 * XXXRW: Why not commit the port if the address is 973 * !INADDR_ANY? 974 */ 975 /* Commit the local port if newly assigned. */ 976 if (inp->inp_laddr.s_addr == INADDR_ANY && 977 inp->inp_lport == 0) { 978 INP_INFO_WLOCK_ASSERT(&V_udbinfo); 979 INP_WLOCK_ASSERT(inp); 980 /* 981 * Remember addr if jailed, to prevent 982 * rebinding. 983 */ 984 if (jailed(td->td_ucred)) 985 inp->inp_laddr = laddr; 986 inp->inp_lport = lport; 987 if (in_pcbinshash(inp) != 0) { 988 inp->inp_lport = 0; 989 error = EAGAIN; 990 goto release; 991 } 992 inp->inp_flags |= INP_ANONPORT; 993 } 994 } else { 995 faddr = sin->sin_addr; 996 fport = sin->sin_port; 997 } 998 } else { 999 INP_LOCK_ASSERT(inp); 1000 faddr = inp->inp_faddr; 1001 fport = inp->inp_fport; 1002 if (faddr.s_addr == INADDR_ANY) { 1003 error = ENOTCONN; 1004 goto release; 1005 } 1006 } 1007 1008 /* 1009 * Calculate data length and get a mbuf for UDP, IP, and possible 1010 * link-layer headers. Immediate slide the data pointer back forward 1011 * since we won't use that space at this layer. 1012 */ 1013 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT); 1014 if (m == NULL) { 1015 error = ENOBUFS; 1016 goto release; 1017 } 1018 m->m_data += max_linkhdr; 1019 m->m_len -= max_linkhdr; 1020 m->m_pkthdr.len -= max_linkhdr; 1021 1022 /* 1023 * Fill in mbuf with extended UDP header and addresses and length put 1024 * into network format. 1025 */ 1026 ui = mtod(m, struct udpiphdr *); 1027 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ 1028 ui->ui_pr = IPPROTO_UDP; 1029 ui->ui_src = laddr; 1030 ui->ui_dst = faddr; 1031 ui->ui_sport = lport; 1032 ui->ui_dport = fport; 1033 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1034 1035 /* 1036 * Set the Don't Fragment bit in the IP header. 1037 */ 1038 if (inp->inp_flags & INP_DONTFRAG) { 1039 struct ip *ip; 1040 1041 ip = (struct ip *)&ui->ui_i; 1042 ip->ip_off |= IP_DF; 1043 } 1044 1045 ipflags = 0; 1046 if (inp->inp_socket->so_options & SO_DONTROUTE) 1047 ipflags |= IP_ROUTETOIF; 1048 if (inp->inp_socket->so_options & SO_BROADCAST) 1049 ipflags |= IP_ALLOWBROADCAST; 1050 if (inp->inp_flags & INP_ONESBCAST) 1051 ipflags |= IP_SENDONES; 1052 1053#ifdef MAC 1054 mac_inpcb_create_mbuf(inp, m); 1055#endif 1056 1057 /* 1058 * Set up checksum and output datagram. 1059 */ 1060 if (udp_cksum) { 1061 if (inp->inp_flags & INP_ONESBCAST) 1062 faddr.s_addr = INADDR_BROADCAST; 1063 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, 1064 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1065 m->m_pkthdr.csum_flags = CSUM_UDP; 1066 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1067 } else 1068 ui->ui_sum = 0; 1069 ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; 1070 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1071 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1072 V_udpstat.udps_opackets++; 1073 1074 if (unlock_udbinfo == 2) 1075 INP_INFO_WUNLOCK(&V_udbinfo); 1076 else if (unlock_udbinfo == 1) 1077 INP_INFO_RUNLOCK(&V_udbinfo); 1078 error = ip_output(m, inp->inp_options, NULL, ipflags, 1079 inp->inp_moptions, inp); 1080 if (unlock_udbinfo == 2) 1081 INP_WUNLOCK(inp); 1082 else 1083 INP_RUNLOCK(inp); 1084 return (error); 1085 1086release: 1087 if (unlock_udbinfo == 2) { 1088 INP_WUNLOCK(inp); 1089 INP_INFO_WUNLOCK(&V_udbinfo); 1090 } else if (unlock_udbinfo == 1) { 1091 INP_RUNLOCK(inp); 1092 INP_INFO_RUNLOCK(&V_udbinfo); 1093 } else 1094 INP_RUNLOCK(inp); 1095 m_freem(m); 1096 return (error); 1097} 1098 1099static void 1100udp_abort(struct socket *so) 1101{ 1102 INIT_VNET_INET(so->so_vnet); 1103 struct inpcb *inp; 1104 1105 inp = sotoinpcb(so); 1106 KASSERT(inp != NULL, ("udp_abort: inp == NULL")); 1107 INP_INFO_WLOCK(&V_udbinfo); 1108 INP_WLOCK(inp); 1109 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1110 in_pcbdisconnect(inp); 1111 inp->inp_laddr.s_addr = INADDR_ANY; 1112 soisdisconnected(so); 1113 } 1114 INP_WUNLOCK(inp); 1115 INP_INFO_WUNLOCK(&V_udbinfo); 1116} 1117 1118static int 1119udp_attach(struct socket *so, int proto, struct thread *td) 1120{ 1121 INIT_VNET_INET(so->so_vnet); 1122 struct inpcb *inp; 1123 int error; 1124 1125 inp = sotoinpcb(so); 1126 KASSERT(inp == NULL, ("udp_attach: inp != NULL")); 1127 error = soreserve(so, udp_sendspace, udp_recvspace); 1128 if (error) 1129 return (error); 1130 INP_INFO_WLOCK(&V_udbinfo); 1131 error = in_pcballoc(so, &V_udbinfo); 1132 if (error) { 1133 INP_INFO_WUNLOCK(&V_udbinfo); 1134 return (error); 1135 } 1136 1137 inp = (struct inpcb *)so->so_pcb; 1138 INP_INFO_WUNLOCK(&V_udbinfo); 1139 inp->inp_vflag |= INP_IPV4; 1140 inp->inp_ip_ttl = V_ip_defttl; 1141 INP_WUNLOCK(inp); 1142 return (0); 1143} 1144 1145static int 1146udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 1147{ 1148 INIT_VNET_INET(so->so_vnet); 1149 struct inpcb *inp; 1150 int error; 1151 1152 inp = sotoinpcb(so); 1153 KASSERT(inp != NULL, ("udp_bind: inp == NULL")); 1154 INP_INFO_WLOCK(&V_udbinfo); 1155 INP_WLOCK(inp); 1156 error = in_pcbbind(inp, nam, td->td_ucred); 1157 INP_WUNLOCK(inp); 1158 INP_INFO_WUNLOCK(&V_udbinfo); 1159 return (error); 1160} 1161 1162static void 1163udp_close(struct socket *so) 1164{ 1165 INIT_VNET_INET(so->so_vnet); 1166 struct inpcb *inp; 1167 1168 inp = sotoinpcb(so); 1169 KASSERT(inp != NULL, ("udp_close: inp == NULL")); 1170 INP_INFO_WLOCK(&V_udbinfo); 1171 INP_WLOCK(inp); 1172 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1173 in_pcbdisconnect(inp); 1174 inp->inp_laddr.s_addr = INADDR_ANY; 1175 soisdisconnected(so); 1176 } 1177 INP_WUNLOCK(inp); 1178 INP_INFO_WUNLOCK(&V_udbinfo); 1179} 1180 1181static int 1182udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1183{ 1184 INIT_VNET_INET(so->so_vnet); 1185 struct inpcb *inp; 1186 int error; 1187 struct sockaddr_in *sin; 1188 1189 inp = sotoinpcb(so); 1190 KASSERT(inp != NULL, ("udp_connect: inp == NULL")); 1191 INP_INFO_WLOCK(&V_udbinfo); 1192 INP_WLOCK(inp); 1193 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1194 INP_WUNLOCK(inp); 1195 INP_INFO_WUNLOCK(&V_udbinfo); 1196 return (EISCONN); 1197 } 1198 sin = (struct sockaddr_in *)nam; 1199 if (prison_remote_ip4(td->td_ucred, &sin->sin_addr) != 0) { 1200 INP_WUNLOCK(inp); 1201 INP_INFO_WUNLOCK(&udbinfo); 1202 return (EAFNOSUPPORT); 1203 } 1204 error = in_pcbconnect(inp, nam, td->td_ucred); 1205 if (error == 0) 1206 soisconnected(so); 1207 INP_WUNLOCK(inp); 1208 INP_INFO_WUNLOCK(&V_udbinfo); 1209 return (error); 1210} 1211 1212static void 1213udp_detach(struct socket *so) 1214{ 1215 INIT_VNET_INET(so->so_vnet); 1216 struct inpcb *inp; 1217 1218 inp = sotoinpcb(so); 1219 KASSERT(inp != NULL, ("udp_detach: inp == NULL")); 1220 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 1221 ("udp_detach: not disconnected")); 1222 INP_INFO_WLOCK(&V_udbinfo); 1223 INP_WLOCK(inp); 1224 in_pcbdetach(inp); 1225 in_pcbfree(inp); 1226 INP_INFO_WUNLOCK(&V_udbinfo); 1227} 1228 1229static int 1230udp_disconnect(struct socket *so) 1231{ 1232 INIT_VNET_INET(so->so_vnet); 1233 struct inpcb *inp; 1234 1235 inp = sotoinpcb(so); 1236 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); 1237 INP_INFO_WLOCK(&V_udbinfo); 1238 INP_WLOCK(inp); 1239 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1240 INP_WUNLOCK(inp); 1241 INP_INFO_WUNLOCK(&V_udbinfo); 1242 return (ENOTCONN); 1243 } 1244 1245 in_pcbdisconnect(inp); 1246 inp->inp_laddr.s_addr = INADDR_ANY; 1247 SOCK_LOCK(so); 1248 so->so_state &= ~SS_ISCONNECTED; /* XXX */ 1249 SOCK_UNLOCK(so); 1250 INP_WUNLOCK(inp); 1251 INP_INFO_WUNLOCK(&V_udbinfo); 1252 return (0); 1253} 1254 1255static int 1256udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, 1257 struct mbuf *control, struct thread *td) 1258{ 1259 struct inpcb *inp; 1260 1261 inp = sotoinpcb(so); 1262 KASSERT(inp != NULL, ("udp_send: inp == NULL")); 1263 return (udp_output(inp, m, addr, control, td)); 1264} 1265 1266int 1267udp_shutdown(struct socket *so) 1268{ 1269 struct inpcb *inp; 1270 1271 inp = sotoinpcb(so); 1272 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); 1273 INP_WLOCK(inp); 1274 socantsendmore(so); 1275 INP_WUNLOCK(inp); 1276 return (0); 1277} 1278 1279struct pr_usrreqs udp_usrreqs = { 1280 .pru_abort = udp_abort, 1281 .pru_attach = udp_attach, 1282 .pru_bind = udp_bind, 1283 .pru_connect = udp_connect, 1284 .pru_control = in_control, 1285 .pru_detach = udp_detach, 1286 .pru_disconnect = udp_disconnect, 1287 .pru_peeraddr = in_getpeeraddr, 1288 .pru_send = udp_send, 1289 .pru_soreceive = soreceive_dgram, 1290 .pru_sosend = sosend_dgram, 1291 .pru_shutdown = udp_shutdown, 1292 .pru_sockaddr = in_getsockaddr, 1293 .pru_sosetlabel = in_pcbsosetlabel, 1294 .pru_close = udp_close, 1295};
|