84 85#ifdef IPSEC 86#include <netipsec/ipsec.h> 87#endif 88 89#include <machine/in_cksum.h> 90 91#include <security/mac/mac_framework.h> 92 93/* 94 * UDP protocol implementation. 95 * Per RFC 768, August, 1980. 96 */ 97 98#ifdef VIMAGE_GLOBALS 99int udp_blackhole; 100#endif 101 102/* 103 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums 104 * removes the only data integrity mechanism for packets and malformed 105 * packets that would otherwise be discarded due to bad checksums, and may 106 * cause problems (especially for NFS data blocks). 107 */ 108static int udp_cksum = 1; 109SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum, 110 0, "compute udp checksum"); 111 112int udp_log_in_vain = 0; 113SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 114 &udp_log_in_vain, 0, "Log all incoming UDP packets"); 115 116SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_udp, OID_AUTO, blackhole, 117 CTLFLAG_RW, udp_blackhole, 0, 118 "Do not send port unreachables for refused connects"); 119 120u_long udp_sendspace = 9216; /* really max datagram size */ 121 /* 40 1K datagrams */ 122SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 123 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 124 125u_long udp_recvspace = 40 * (1024 + 126#ifdef INET6 127 sizeof(struct sockaddr_in6) 128#else 129 sizeof(struct sockaddr_in) 130#endif 131 ); 132 133SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 134 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); 135 136#ifdef VIMAGE_GLOBALS 137struct inpcbhead udb; /* from udp_var.h */ 138struct inpcbinfo udbinfo; 139struct udpstat udpstat; /* from udp_var.h */ 140#endif 141 142#ifndef UDBHASHSIZE 143#define UDBHASHSIZE 128 144#endif 145 146SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_udp, UDPCTL_STATS, stats, 147 CTLFLAG_RW, udpstat, udpstat, 148 "UDP statistics (struct udpstat, netinet/udp_var.h)"); 149 150static void udp_detach(struct socket *so); 151static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, 152 struct mbuf *, struct thread *); 153 154static void 155udp_zone_change(void *tag) 156{ 157 158 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 159} 160 161static int 162udp_inpcb_init(void *mem, int size, int flags) 163{ 164 struct inpcb *inp; 165 166 inp = mem; 167 INP_LOCK_INIT(inp, "inp", "udpinp"); 168 return (0); 169} 170 171void 172udp_init(void) 173{ 174 INIT_VNET_INET(curvnet); 175 176 V_udp_blackhole = 0; 177 178 INP_INFO_LOCK_INIT(&V_udbinfo, "udp"); 179 LIST_INIT(&V_udb); 180 V_udbinfo.ipi_listhead = &V_udb; 181 V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB, 182 &V_udbinfo.ipi_hashmask); 183 V_udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB, 184 &V_udbinfo.ipi_porthashmask); 185 V_udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL, 186 NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 187 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 188 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, 189 EVENTHANDLER_PRI_ANY); 190} 191 192/* 193 * Subroutine of udp_input(), which appends the provided mbuf chain to the 194 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that 195 * contains the source address. If the socket ends up being an IPv6 socket, 196 * udp_append() will convert to a sockaddr_in6 before passing the address 197 * into the socket code. 198 */ 199static void 200udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, 201 struct sockaddr_in *udp_in) 202{ 203 struct sockaddr *append_sa; 204 struct socket *so; 205 struct mbuf *opts = 0; 206#ifdef INET6 207 struct sockaddr_in6 udp_in6; 208#endif 209 210 INP_RLOCK_ASSERT(inp); 211 212#ifdef IPSEC 213 /* Check AH/ESP integrity. */ 214 if (ipsec4_in_reject(n, inp)) { 215 INIT_VNET_IPSEC(curvnet); 216 m_freem(n); 217 V_ipsec4stat.in_polvio++; 218 return; 219 } 220#endif /* IPSEC */ 221#ifdef MAC 222 if (mac_inpcb_check_deliver(inp, n) != 0) { 223 m_freem(n); 224 return; 225 } 226#endif 227 if (inp->inp_flags & INP_CONTROLOPTS || 228 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { 229#ifdef INET6 230 if (inp->inp_vflag & INP_IPV6) 231 (void)ip6_savecontrol_v4(inp, n, &opts, NULL); 232 else 233#endif 234 ip_savecontrol(inp, &opts, ip, n); 235 } 236#ifdef INET6 237 if (inp->inp_vflag & INP_IPV6) { 238 bzero(&udp_in6, sizeof(udp_in6)); 239 udp_in6.sin6_len = sizeof(udp_in6); 240 udp_in6.sin6_family = AF_INET6; 241 in6_sin_2_v4mapsin6(udp_in, &udp_in6); 242 append_sa = (struct sockaddr *)&udp_in6; 243 } else 244#endif 245 append_sa = (struct sockaddr *)udp_in; 246 m_adj(n, off); 247 248 so = inp->inp_socket; 249 SOCKBUF_LOCK(&so->so_rcv); 250 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) { 251 INIT_VNET_INET(so->so_vnet); 252 SOCKBUF_UNLOCK(&so->so_rcv); 253 m_freem(n); 254 if (opts) 255 m_freem(opts); 256 V_udpstat.udps_fullsock++; 257 } else 258 sorwakeup_locked(so); 259} 260 261void 262udp_input(struct mbuf *m, int off) 263{ 264 INIT_VNET_INET(curvnet); 265 int iphlen = off; 266 struct ip *ip; 267 struct udphdr *uh; 268 struct ifnet *ifp; 269 struct inpcb *inp; 270 int len; 271 struct ip save_ip; 272 struct sockaddr_in udp_in; 273#ifdef IPFIREWALL_FORWARD 274 struct m_tag *fwd_tag; 275#endif 276 277 ifp = m->m_pkthdr.rcvif; 278 V_udpstat.udps_ipackets++; 279 280 /* 281 * Strip IP options, if any; should skip this, make available to 282 * user, and use on returned packets, but we don't yet have a way to 283 * check the checksum with options still present. 284 */ 285 if (iphlen > sizeof (struct ip)) { 286 ip_stripoptions(m, (struct mbuf *)0); 287 iphlen = sizeof(struct ip); 288 } 289 290 /* 291 * Get IP and UDP header together in first mbuf. 292 */ 293 ip = mtod(m, struct ip *); 294 if (m->m_len < iphlen + sizeof(struct udphdr)) { 295 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { 296 V_udpstat.udps_hdrops++; 297 return; 298 } 299 ip = mtod(m, struct ip *); 300 } 301 uh = (struct udphdr *)((caddr_t)ip + iphlen); 302 303 /* 304 * Destination port of 0 is illegal, based on RFC768. 305 */ 306 if (uh->uh_dport == 0) 307 goto badunlocked; 308 309 /* 310 * Construct sockaddr format source address. Stuff source address 311 * and datagram in user buffer. 312 */ 313 bzero(&udp_in, sizeof(udp_in)); 314 udp_in.sin_len = sizeof(udp_in); 315 udp_in.sin_family = AF_INET; 316 udp_in.sin_port = uh->uh_sport; 317 udp_in.sin_addr = ip->ip_src; 318 319 /* 320 * Make mbuf data length reflect UDP length. If not enough data to 321 * reflect UDP length, drop. 322 */ 323 len = ntohs((u_short)uh->uh_ulen); 324 if (ip->ip_len != len) { 325 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 326 V_udpstat.udps_badlen++; 327 goto badunlocked; 328 } 329 m_adj(m, len - ip->ip_len); 330 /* ip->ip_len = len; */ 331 } 332 333 /* 334 * Save a copy of the IP header in case we want restore it for 335 * sending an ICMP error message in response. 336 */ 337 if (!V_udp_blackhole) 338 save_ip = *ip; 339 else 340 memset(&save_ip, 0, sizeof(save_ip)); 341 342 /* 343 * Checksum extended UDP header and data. 344 */ 345 if (uh->uh_sum) { 346 u_short uh_sum; 347 348 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 349 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 350 uh_sum = m->m_pkthdr.csum_data; 351 else 352 uh_sum = in_pseudo(ip->ip_src.s_addr, 353 ip->ip_dst.s_addr, htonl((u_short)len + 354 m->m_pkthdr.csum_data + IPPROTO_UDP)); 355 uh_sum ^= 0xffff; 356 } else { 357 char b[9]; 358 359 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 360 bzero(((struct ipovly *)ip)->ih_x1, 9); 361 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 362 uh_sum = in_cksum(m, len + sizeof (struct ip)); 363 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 364 } 365 if (uh_sum) { 366 V_udpstat.udps_badsum++; 367 m_freem(m); 368 return; 369 } 370 } else 371 V_udpstat.udps_nosum++; 372 373#ifdef IPFIREWALL_FORWARD 374 /* 375 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. 376 */ 377 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 378 if (fwd_tag != NULL) { 379 struct sockaddr_in *next_hop; 380 381 /* 382 * Do the hack. 383 */ 384 next_hop = (struct sockaddr_in *)(fwd_tag + 1); 385 ip->ip_dst = next_hop->sin_addr; 386 uh->uh_dport = ntohs(next_hop->sin_port); 387 388 /* 389 * Remove the tag from the packet. We don't need it anymore. 390 */ 391 m_tag_delete(m, fwd_tag); 392 } 393#endif 394 395 INP_INFO_RLOCK(&V_udbinfo); 396 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 397 in_broadcast(ip->ip_dst, ifp)) { 398 struct inpcb *last; 399 struct ip_moptions *imo; 400 401 last = NULL; 402 LIST_FOREACH(inp, &V_udb, inp_list) { 403 if (inp->inp_lport != uh->uh_dport) 404 continue; 405#ifdef INET6 406 if ((inp->inp_vflag & INP_IPV4) == 0) 407 continue; 408#endif 409 if (inp->inp_laddr.s_addr != INADDR_ANY && 410 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 411 continue; 412 if (inp->inp_faddr.s_addr != INADDR_ANY && 413 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 414 continue; 415 /* 416 * XXX: Do not check source port of incoming datagram 417 * unless inp_connect() has been called to bind the 418 * fport part of the 4-tuple; the source could be 419 * trying to talk to us with an ephemeral port. 420 */ 421 if (inp->inp_fport != 0 && 422 inp->inp_fport != uh->uh_sport) 423 continue; 424 425 INP_RLOCK(inp); 426 427 /* 428 * Handle socket delivery policy for any-source 429 * and source-specific multicast. [RFC3678] 430 */ 431 imo = inp->inp_moptions; 432 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 433 imo != NULL) { 434 struct sockaddr_in sin; 435 struct in_msource *ims; 436 int blocked, mode; 437 size_t idx; 438 439 bzero(&sin, sizeof(struct sockaddr_in)); 440 sin.sin_len = sizeof(struct sockaddr_in); 441 sin.sin_family = AF_INET; 442 sin.sin_addr = ip->ip_dst; 443 444 blocked = 0; 445 idx = imo_match_group(imo, ifp, 446 (struct sockaddr *)&sin); 447 if (idx == -1) { 448 /* 449 * No group membership for this socket. 450 * Do not bump udps_noportbcast, as 451 * this will happen further down. 452 */ 453 blocked++; 454 } else { 455 /* 456 * Check for a multicast source filter 457 * entry on this socket for this group. 458 * MCAST_EXCLUDE is the default 459 * behaviour. It means default accept; 460 * entries, if present, denote sources 461 * to be excluded from delivery. 462 */ 463 ims = imo_match_source(imo, idx, 464 (struct sockaddr *)&udp_in); 465 mode = imo->imo_mfilters[idx].imf_fmode; 466 if ((ims != NULL && 467 mode == MCAST_EXCLUDE) || 468 (ims == NULL && 469 mode == MCAST_INCLUDE)) { 470#ifdef DIAGNOSTIC 471 if (bootverbose) { 472 printf("%s: blocked by" 473 " source filter\n", 474 __func__); 475 } 476#endif 477 V_udpstat.udps_filtermcast++; 478 blocked++; 479 } 480 } 481 if (blocked != 0) { 482 INP_RUNLOCK(inp); 483 continue; 484 } 485 } 486 if (last != NULL) { 487 struct mbuf *n; 488 489 n = m_copy(m, 0, M_COPYALL); 490 if (n != NULL) 491 udp_append(last, ip, n, iphlen + 492 sizeof(struct udphdr), &udp_in); 493 INP_RUNLOCK(last); 494 } 495 last = inp; 496 /* 497 * Don't look for additional matches if this one does 498 * not have either the SO_REUSEPORT or SO_REUSEADDR 499 * socket options set. This heuristic avoids 500 * searching through all pcbs in the common case of a 501 * non-shared port. It assumes that an application 502 * will never clear these options after setting them. 503 */ 504 if ((last->inp_socket->so_options & 505 (SO_REUSEPORT|SO_REUSEADDR)) == 0) 506 break; 507 } 508 509 if (last == NULL) { 510 /* 511 * No matching pcb found; discard datagram. (No need 512 * to send an ICMP Port Unreachable for a broadcast 513 * or multicast datgram.) 514 */ 515 V_udpstat.udps_noportbcast++; 516 goto badheadlocked; 517 } 518 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 519 &udp_in); 520 INP_RUNLOCK(last); 521 INP_INFO_RUNLOCK(&V_udbinfo); 522 return; 523 } 524 525 /* 526 * Locate pcb for datagram. 527 */ 528 inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport, 529 ip->ip_dst, uh->uh_dport, 1, ifp); 530 if (inp == NULL) { 531 if (udp_log_in_vain) { 532 char buf[4*sizeof "123"]; 533 534 strcpy(buf, inet_ntoa(ip->ip_dst)); 535 log(LOG_INFO, 536 "Connection attempt to UDP %s:%d from %s:%d\n", 537 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 538 ntohs(uh->uh_sport)); 539 } 540 V_udpstat.udps_noport++; 541 if (m->m_flags & (M_BCAST | M_MCAST)) { 542 V_udpstat.udps_noportbcast++; 543 goto badheadlocked; 544 } 545 if (V_udp_blackhole) 546 goto badheadlocked; 547 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 548 goto badheadlocked; 549 *ip = save_ip; 550 ip->ip_len += iphlen; 551 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 552 INP_INFO_RUNLOCK(&V_udbinfo); 553 return; 554 } 555 556 /* 557 * Check the minimum TTL for socket. 558 */ 559 INP_RLOCK(inp); 560 INP_INFO_RUNLOCK(&V_udbinfo); 561 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { 562 INP_RUNLOCK(inp); 563 goto badunlocked; 564 } 565 udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in); 566 INP_RUNLOCK(inp); 567 return; 568 569badheadlocked: 570 if (inp) 571 INP_RUNLOCK(inp); 572 INP_INFO_RUNLOCK(&V_udbinfo); 573badunlocked: 574 m_freem(m); 575} 576 577/* 578 * Notify a udp user of an asynchronous error; just wake up so that they can 579 * collect error status. 580 */ 581struct inpcb * 582udp_notify(struct inpcb *inp, int errno) 583{ 584 585 /* 586 * While udp_ctlinput() always calls udp_notify() with a read lock 587 * when invoking it directly, in_pcbnotifyall() currently uses write 588 * locks due to sharing code with TCP. For now, accept either a read 589 * or a write lock, but a read lock is sufficient. 590 */ 591 INP_LOCK_ASSERT(inp); 592 593 inp->inp_socket->so_error = errno; 594 sorwakeup(inp->inp_socket); 595 sowwakeup(inp->inp_socket); 596 return (inp); 597} 598 599void 600udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) 601{ 602 INIT_VNET_INET(curvnet); 603 struct ip *ip = vip; 604 struct udphdr *uh; 605 struct in_addr faddr; 606 struct inpcb *inp; 607 608 faddr = ((struct sockaddr_in *)sa)->sin_addr; 609 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 610 return; 611 612 /* 613 * Redirects don't need to be handled up here. 614 */ 615 if (PRC_IS_REDIRECT(cmd)) 616 return; 617 618 /* 619 * Hostdead is ugly because it goes linearly through all PCBs. 620 * 621 * XXX: We never get this from ICMP, otherwise it makes an excellent 622 * DoS attack on machines with many connections. 623 */ 624 if (cmd == PRC_HOSTDEAD) 625 ip = NULL; 626 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 627 return; 628 if (ip != NULL) { 629 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 630 INP_INFO_RLOCK(&V_udbinfo); 631 inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport, 632 ip->ip_src, uh->uh_sport, 0, NULL); 633 if (inp != NULL) { 634 INP_RLOCK(inp); 635 if (inp->inp_socket != NULL) { 636 udp_notify(inp, inetctlerrmap[cmd]); 637 } 638 INP_RUNLOCK(inp); 639 } 640 INP_INFO_RUNLOCK(&V_udbinfo); 641 } else 642 in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd], 643 udp_notify); 644} 645 646static int 647udp_pcblist(SYSCTL_HANDLER_ARGS) 648{ 649 INIT_VNET_INET(curvnet); 650 int error, i, n; 651 struct inpcb *inp, **inp_list; 652 inp_gen_t gencnt; 653 struct xinpgen xig; 654 655 /* 656 * The process of preparing the PCB list is too time-consuming and 657 * resource-intensive to repeat twice on every request. 658 */ 659 if (req->oldptr == 0) { 660 n = V_udbinfo.ipi_count; 661 req->oldidx = 2 * (sizeof xig) 662 + (n + n/8) * sizeof(struct xinpcb); 663 return (0); 664 } 665 666 if (req->newptr != 0) 667 return (EPERM); 668 669 /* 670 * OK, now we're committed to doing something. 671 */ 672 INP_INFO_RLOCK(&V_udbinfo); 673 gencnt = V_udbinfo.ipi_gencnt; 674 n = V_udbinfo.ipi_count; 675 INP_INFO_RUNLOCK(&V_udbinfo); 676 677 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) 678 + n * sizeof(struct xinpcb)); 679 if (error != 0) 680 return (error); 681 682 xig.xig_len = sizeof xig; 683 xig.xig_count = n; 684 xig.xig_gen = gencnt; 685 xig.xig_sogen = so_gencnt; 686 error = SYSCTL_OUT(req, &xig, sizeof xig); 687 if (error) 688 return (error); 689 690 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 691 if (inp_list == 0) 692 return (ENOMEM); 693 694 INP_INFO_RLOCK(&V_udbinfo); 695 for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; 696 inp = LIST_NEXT(inp, inp_list)) { 697 INP_RLOCK(inp); 698 if (inp->inp_gencnt <= gencnt && 699 cr_canseeinpcb(req->td->td_ucred, inp) == 0) 700 inp_list[i++] = inp; 701 INP_RUNLOCK(inp); 702 } 703 INP_INFO_RUNLOCK(&V_udbinfo); 704 n = i; 705 706 error = 0; 707 for (i = 0; i < n; i++) { 708 inp = inp_list[i]; 709 INP_RLOCK(inp); 710 if (inp->inp_gencnt <= gencnt) { 711 struct xinpcb xi; 712 bzero(&xi, sizeof(xi)); 713 xi.xi_len = sizeof xi; 714 /* XXX should avoid extra copy */ 715 bcopy(inp, &xi.xi_inp, sizeof *inp); 716 if (inp->inp_socket) 717 sotoxsocket(inp->inp_socket, &xi.xi_socket); 718 xi.xi_inp.inp_gencnt = inp->inp_gencnt; 719 INP_RUNLOCK(inp); 720 error = SYSCTL_OUT(req, &xi, sizeof xi); 721 } else 722 INP_RUNLOCK(inp); 723 } 724 if (!error) { 725 /* 726 * Give the user an updated idea of our state. If the 727 * generation differs from what we told her before, she knows 728 * that something happened while we were processing this 729 * request, and it might be necessary to retry. 730 */ 731 INP_INFO_RLOCK(&V_udbinfo); 732 xig.xig_gen = V_udbinfo.ipi_gencnt; 733 xig.xig_sogen = so_gencnt; 734 xig.xig_count = V_udbinfo.ipi_count; 735 INP_INFO_RUNLOCK(&V_udbinfo); 736 error = SYSCTL_OUT(req, &xig, sizeof xig); 737 } 738 free(inp_list, M_TEMP); 739 return (error); 740} 741 742SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, 743 udp_pcblist, "S,xinpcb", "List of active UDP sockets"); 744 745static int 746udp_getcred(SYSCTL_HANDLER_ARGS) 747{ 748 INIT_VNET_INET(curvnet); 749 struct xucred xuc; 750 struct sockaddr_in addrs[2]; 751 struct inpcb *inp; 752 int error; 753 754 error = priv_check(req->td, PRIV_NETINET_GETCRED); 755 if (error) 756 return (error); 757 error = SYSCTL_IN(req, addrs, sizeof(addrs)); 758 if (error) 759 return (error); 760 INP_INFO_RLOCK(&V_udbinfo); 761 inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, 762 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); 763 if (inp != NULL) { 764 INP_RLOCK(inp); 765 INP_INFO_RUNLOCK(&V_udbinfo); 766 if (inp->inp_socket == NULL) 767 error = ENOENT; 768 if (error == 0) 769 error = cr_canseeinpcb(req->td->td_ucred, inp); 770 if (error == 0) 771 cru2x(inp->inp_cred, &xuc); 772 INP_RUNLOCK(inp); 773 } else { 774 INP_INFO_RUNLOCK(&V_udbinfo); 775 error = ENOENT; 776 } 777 if (error == 0) 778 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); 779 return (error); 780} 781 782SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, 783 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, 784 udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); 785 786static int 787udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, 788 struct mbuf *control, struct thread *td) 789{ 790 INIT_VNET_INET(inp->inp_vnet); 791 struct udpiphdr *ui; 792 int len = m->m_pkthdr.len; 793 struct in_addr faddr, laddr; 794 struct cmsghdr *cm; 795 struct sockaddr_in *sin, src; 796 int error = 0; 797 int ipflags; 798 u_short fport, lport; 799 int unlock_udbinfo; 800 801 /* 802 * udp_output() may need to temporarily bind or connect the current 803 * inpcb. As such, we don't know up front whether we will need the 804 * pcbinfo lock or not. Do any work to decide what is needed up 805 * front before acquiring any locks. 806 */ 807 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 808 if (control) 809 m_freem(control); 810 m_freem(m); 811 return (EMSGSIZE); 812 } 813 814 src.sin_family = 0; 815 if (control != NULL) { 816 /* 817 * XXX: Currently, we assume all the optional information is 818 * stored in a single mbuf. 819 */ 820 if (control->m_next) { 821 m_freem(control); 822 m_freem(m); 823 return (EINVAL); 824 } 825 for (; control->m_len > 0; 826 control->m_data += CMSG_ALIGN(cm->cmsg_len), 827 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { 828 cm = mtod(control, struct cmsghdr *); 829 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 830 || cm->cmsg_len > control->m_len) { 831 error = EINVAL; 832 break; 833 } 834 if (cm->cmsg_level != IPPROTO_IP) 835 continue; 836 837 switch (cm->cmsg_type) { 838 case IP_SENDSRCADDR: 839 if (cm->cmsg_len != 840 CMSG_LEN(sizeof(struct in_addr))) { 841 error = EINVAL; 842 break; 843 } 844 bzero(&src, sizeof(src)); 845 src.sin_family = AF_INET; 846 src.sin_len = sizeof(src); 847 src.sin_port = inp->inp_lport; 848 src.sin_addr = 849 *(struct in_addr *)CMSG_DATA(cm); 850 break; 851 852 default: 853 error = ENOPROTOOPT; 854 break; 855 } 856 if (error) 857 break; 858 } 859 m_freem(control); 860 } 861 if (error) { 862 m_freem(m); 863 return (error); 864 } 865 866 /* 867 * Depending on whether or not the application has bound or connected 868 * the socket, we may have to do varying levels of work. The optimal 869 * case is for a connected UDP socket, as a global lock isn't 870 * required at all. 871 * 872 * In order to decide which we need, we require stability of the 873 * inpcb binding, which we ensure by acquiring a read lock on the 874 * inpcb. This doesn't strictly follow the lock order, so we play 875 * the trylock and retry game; note that we may end up with more 876 * conservative locks than required the second time around, so later 877 * assertions have to accept that. Further analysis of the number of 878 * misses under contention is required. 879 */ 880 sin = (struct sockaddr_in *)addr; 881 INP_RLOCK(inp); 882 if (sin != NULL && 883 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { 884 INP_RUNLOCK(inp); 885 INP_INFO_WLOCK(&V_udbinfo); 886 INP_WLOCK(inp); 887 unlock_udbinfo = 2; 888 } else if ((sin != NULL && ( 889 (sin->sin_addr.s_addr == INADDR_ANY) || 890 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 891 (inp->inp_laddr.s_addr == INADDR_ANY) || 892 (inp->inp_lport == 0))) || 893 (src.sin_family == AF_INET)) { 894 if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) { 895 INP_RUNLOCK(inp); 896 INP_INFO_RLOCK(&V_udbinfo); 897 INP_RLOCK(inp); 898 } 899 unlock_udbinfo = 1; 900 } else 901 unlock_udbinfo = 0; 902 903 /* 904 * If the IP_SENDSRCADDR control message was specified, override the 905 * source address for this datagram. Its use is invalidated if the 906 * address thus specified is incomplete or clobbers other inpcbs. 907 */ 908 laddr = inp->inp_laddr; 909 lport = inp->inp_lport; 910 if (src.sin_family == AF_INET) { 911 INP_INFO_LOCK_ASSERT(&V_udbinfo); 912 if ((lport == 0) || 913 (laddr.s_addr == INADDR_ANY && 914 src.sin_addr.s_addr == INADDR_ANY)) { 915 error = EINVAL; 916 goto release; 917 } 918 error = in_pcbbind_setup(inp, (struct sockaddr *)&src, 919 &laddr.s_addr, &lport, td->td_ucred); 920 if (error) 921 goto release; 922 } 923 924 /* 925 * If a UDP socket has been connected, then a local address/port will 926 * have been selected and bound. 927 * 928 * If a UDP socket has not been connected to, then an explicit 929 * destination address must be used, in which case a local 930 * address/port may not have been selected and bound. 931 */ 932 if (sin != NULL) { 933 INP_LOCK_ASSERT(inp); 934 if (inp->inp_faddr.s_addr != INADDR_ANY) { 935 error = EISCONN; 936 goto release; 937 } 938 939 /* 940 * Jail may rewrite the destination address, so let it do 941 * that before we use it. 942 */ 943 if (prison_remote_ip4(td->td_ucred, &sin->sin_addr) != 0) { 944 error = EINVAL; 945 goto release; 946 } 947 948 /* 949 * If a local address or port hasn't yet been selected, or if 950 * the destination address needs to be rewritten due to using 951 * a special INADDR_ constant, invoke in_pcbconnect_setup() 952 * to do the heavy lifting. Once a port is selected, we 953 * commit the binding back to the socket; we also commit the 954 * binding of the address if in jail. 955 * 956 * If we already have a valid binding and we're not 957 * requesting a destination address rewrite, use a fast path. 958 */ 959 if (inp->inp_laddr.s_addr == INADDR_ANY || 960 inp->inp_lport == 0 || 961 sin->sin_addr.s_addr == INADDR_ANY || 962 sin->sin_addr.s_addr == INADDR_BROADCAST) { 963 INP_INFO_LOCK_ASSERT(&V_udbinfo); 964 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, 965 &lport, &faddr.s_addr, &fport, NULL, 966 td->td_ucred); 967 if (error) 968 goto release; 969 970 /* 971 * XXXRW: Why not commit the port if the address is 972 * !INADDR_ANY? 973 */ 974 /* Commit the local port if newly assigned. */ 975 if (inp->inp_laddr.s_addr == INADDR_ANY && 976 inp->inp_lport == 0) { 977 INP_INFO_WLOCK_ASSERT(&V_udbinfo); 978 INP_WLOCK_ASSERT(inp); 979 /* 980 * Remember addr if jailed, to prevent 981 * rebinding. 982 */ 983 if (jailed(td->td_ucred)) 984 inp->inp_laddr = laddr; 985 inp->inp_lport = lport; 986 if (in_pcbinshash(inp) != 0) { 987 inp->inp_lport = 0; 988 error = EAGAIN; 989 goto release; 990 } 991 inp->inp_flags |= INP_ANONPORT; 992 } 993 } else { 994 faddr = sin->sin_addr; 995 fport = sin->sin_port; 996 } 997 } else { 998 INP_LOCK_ASSERT(inp); 999 faddr = inp->inp_faddr; 1000 fport = inp->inp_fport; 1001 if (faddr.s_addr == INADDR_ANY) { 1002 error = ENOTCONN; 1003 goto release; 1004 } 1005 } 1006 1007 /* 1008 * Calculate data length and get a mbuf for UDP, IP, and possible 1009 * link-layer headers. Immediate slide the data pointer back forward 1010 * since we won't use that space at this layer. 1011 */ 1012 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT); 1013 if (m == NULL) { 1014 error = ENOBUFS; 1015 goto release; 1016 } 1017 m->m_data += max_linkhdr; 1018 m->m_len -= max_linkhdr; 1019 m->m_pkthdr.len -= max_linkhdr; 1020 1021 /* 1022 * Fill in mbuf with extended UDP header and addresses and length put 1023 * into network format. 1024 */ 1025 ui = mtod(m, struct udpiphdr *); 1026 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ 1027 ui->ui_pr = IPPROTO_UDP; 1028 ui->ui_src = laddr; 1029 ui->ui_dst = faddr; 1030 ui->ui_sport = lport; 1031 ui->ui_dport = fport; 1032 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1033 1034 /* 1035 * Set the Don't Fragment bit in the IP header. 1036 */ 1037 if (inp->inp_flags & INP_DONTFRAG) { 1038 struct ip *ip; 1039 1040 ip = (struct ip *)&ui->ui_i; 1041 ip->ip_off |= IP_DF; 1042 } 1043 1044 ipflags = 0; 1045 if (inp->inp_socket->so_options & SO_DONTROUTE) 1046 ipflags |= IP_ROUTETOIF; 1047 if (inp->inp_socket->so_options & SO_BROADCAST) 1048 ipflags |= IP_ALLOWBROADCAST; 1049 if (inp->inp_flags & INP_ONESBCAST) 1050 ipflags |= IP_SENDONES; 1051 1052#ifdef MAC 1053 mac_inpcb_create_mbuf(inp, m); 1054#endif 1055 1056 /* 1057 * Set up checksum and output datagram. 1058 */ 1059 if (udp_cksum) { 1060 if (inp->inp_flags & INP_ONESBCAST) 1061 faddr.s_addr = INADDR_BROADCAST; 1062 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, 1063 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1064 m->m_pkthdr.csum_flags = CSUM_UDP; 1065 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1066 } else 1067 ui->ui_sum = 0; 1068 ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; 1069 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1070 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1071 V_udpstat.udps_opackets++; 1072 1073 if (unlock_udbinfo == 2) 1074 INP_INFO_WUNLOCK(&V_udbinfo); 1075 else if (unlock_udbinfo == 1) 1076 INP_INFO_RUNLOCK(&V_udbinfo); 1077 error = ip_output(m, inp->inp_options, NULL, ipflags, 1078 inp->inp_moptions, inp); 1079 if (unlock_udbinfo == 2) 1080 INP_WUNLOCK(inp); 1081 else 1082 INP_RUNLOCK(inp); 1083 return (error); 1084 1085release: 1086 if (unlock_udbinfo == 2) { 1087 INP_WUNLOCK(inp); 1088 INP_INFO_WUNLOCK(&V_udbinfo); 1089 } else if (unlock_udbinfo == 1) { 1090 INP_RUNLOCK(inp); 1091 INP_INFO_RUNLOCK(&V_udbinfo); 1092 } else 1093 INP_RUNLOCK(inp); 1094 m_freem(m); 1095 return (error); 1096} 1097 1098static void 1099udp_abort(struct socket *so) 1100{ 1101 INIT_VNET_INET(so->so_vnet); 1102 struct inpcb *inp; 1103 1104 inp = sotoinpcb(so); 1105 KASSERT(inp != NULL, ("udp_abort: inp == NULL")); 1106 INP_INFO_WLOCK(&V_udbinfo); 1107 INP_WLOCK(inp); 1108 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1109 in_pcbdisconnect(inp); 1110 inp->inp_laddr.s_addr = INADDR_ANY; 1111 soisdisconnected(so); 1112 } 1113 INP_WUNLOCK(inp); 1114 INP_INFO_WUNLOCK(&V_udbinfo); 1115} 1116 1117static int 1118udp_attach(struct socket *so, int proto, struct thread *td) 1119{ 1120 INIT_VNET_INET(so->so_vnet); 1121 struct inpcb *inp; 1122 int error; 1123 1124 inp = sotoinpcb(so); 1125 KASSERT(inp == NULL, ("udp_attach: inp != NULL")); 1126 error = soreserve(so, udp_sendspace, udp_recvspace); 1127 if (error) 1128 return (error); 1129 INP_INFO_WLOCK(&V_udbinfo); 1130 error = in_pcballoc(so, &V_udbinfo); 1131 if (error) { 1132 INP_INFO_WUNLOCK(&V_udbinfo); 1133 return (error); 1134 } 1135 1136 inp = (struct inpcb *)so->so_pcb; 1137 INP_INFO_WUNLOCK(&V_udbinfo); 1138 inp->inp_vflag |= INP_IPV4; 1139 inp->inp_ip_ttl = V_ip_defttl; 1140 INP_WUNLOCK(inp); 1141 return (0); 1142} 1143 1144static int 1145udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 1146{ 1147 INIT_VNET_INET(so->so_vnet); 1148 struct inpcb *inp; 1149 int error; 1150 1151 inp = sotoinpcb(so); 1152 KASSERT(inp != NULL, ("udp_bind: inp == NULL")); 1153 INP_INFO_WLOCK(&V_udbinfo); 1154 INP_WLOCK(inp); 1155 error = in_pcbbind(inp, nam, td->td_ucred); 1156 INP_WUNLOCK(inp); 1157 INP_INFO_WUNLOCK(&V_udbinfo); 1158 return (error); 1159} 1160 1161static void 1162udp_close(struct socket *so) 1163{ 1164 INIT_VNET_INET(so->so_vnet); 1165 struct inpcb *inp; 1166 1167 inp = sotoinpcb(so); 1168 KASSERT(inp != NULL, ("udp_close: inp == NULL")); 1169 INP_INFO_WLOCK(&V_udbinfo); 1170 INP_WLOCK(inp); 1171 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1172 in_pcbdisconnect(inp); 1173 inp->inp_laddr.s_addr = INADDR_ANY; 1174 soisdisconnected(so); 1175 } 1176 INP_WUNLOCK(inp); 1177 INP_INFO_WUNLOCK(&V_udbinfo); 1178} 1179 1180static int 1181udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1182{ 1183 INIT_VNET_INET(so->so_vnet); 1184 struct inpcb *inp; 1185 int error; 1186 struct sockaddr_in *sin; 1187 1188 inp = sotoinpcb(so); 1189 KASSERT(inp != NULL, ("udp_connect: inp == NULL")); 1190 INP_INFO_WLOCK(&V_udbinfo); 1191 INP_WLOCK(inp); 1192 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1193 INP_WUNLOCK(inp); 1194 INP_INFO_WUNLOCK(&V_udbinfo); 1195 return (EISCONN); 1196 } 1197 sin = (struct sockaddr_in *)nam; 1198 if (prison_remote_ip4(td->td_ucred, &sin->sin_addr) != 0) { 1199 INP_WUNLOCK(inp); 1200 INP_INFO_WUNLOCK(&udbinfo); 1201 return (EAFNOSUPPORT); 1202 } 1203 error = in_pcbconnect(inp, nam, td->td_ucred); 1204 if (error == 0) 1205 soisconnected(so); 1206 INP_WUNLOCK(inp); 1207 INP_INFO_WUNLOCK(&V_udbinfo); 1208 return (error); 1209} 1210 1211static void 1212udp_detach(struct socket *so) 1213{ 1214 INIT_VNET_INET(so->so_vnet); 1215 struct inpcb *inp; 1216 1217 inp = sotoinpcb(so); 1218 KASSERT(inp != NULL, ("udp_detach: inp == NULL")); 1219 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 1220 ("udp_detach: not disconnected")); 1221 INP_INFO_WLOCK(&V_udbinfo); 1222 INP_WLOCK(inp); 1223 in_pcbdetach(inp); 1224 in_pcbfree(inp); 1225 INP_INFO_WUNLOCK(&V_udbinfo); 1226} 1227 1228static int 1229udp_disconnect(struct socket *so) 1230{ 1231 INIT_VNET_INET(so->so_vnet); 1232 struct inpcb *inp; 1233 1234 inp = sotoinpcb(so); 1235 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); 1236 INP_INFO_WLOCK(&V_udbinfo); 1237 INP_WLOCK(inp); 1238 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1239 INP_WUNLOCK(inp); 1240 INP_INFO_WUNLOCK(&V_udbinfo); 1241 return (ENOTCONN); 1242 } 1243 1244 in_pcbdisconnect(inp); 1245 inp->inp_laddr.s_addr = INADDR_ANY; 1246 SOCK_LOCK(so); 1247 so->so_state &= ~SS_ISCONNECTED; /* XXX */ 1248 SOCK_UNLOCK(so); 1249 INP_WUNLOCK(inp); 1250 INP_INFO_WUNLOCK(&V_udbinfo); 1251 return (0); 1252} 1253 1254static int 1255udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, 1256 struct mbuf *control, struct thread *td) 1257{ 1258 struct inpcb *inp; 1259 1260 inp = sotoinpcb(so); 1261 KASSERT(inp != NULL, ("udp_send: inp == NULL")); 1262 return (udp_output(inp, m, addr, control, td)); 1263} 1264 1265int 1266udp_shutdown(struct socket *so) 1267{ 1268 struct inpcb *inp; 1269 1270 inp = sotoinpcb(so); 1271 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); 1272 INP_WLOCK(inp); 1273 socantsendmore(so); 1274 INP_WUNLOCK(inp); 1275 return (0); 1276} 1277 1278struct pr_usrreqs udp_usrreqs = { 1279 .pru_abort = udp_abort, 1280 .pru_attach = udp_attach, 1281 .pru_bind = udp_bind, 1282 .pru_connect = udp_connect, 1283 .pru_control = in_control, 1284 .pru_detach = udp_detach, 1285 .pru_disconnect = udp_disconnect, 1286 .pru_peeraddr = in_getpeeraddr, 1287 .pru_send = udp_send, 1288 .pru_soreceive = soreceive_dgram, 1289 .pru_sosend = sosend_dgram, 1290 .pru_shutdown = udp_shutdown, 1291 .pru_sockaddr = in_getsockaddr, 1292 .pru_sosetlabel = in_pcbsosetlabel, 1293 .pru_close = udp_close, 1294};
| 85 86#ifdef IPSEC 87#include <netipsec/ipsec.h> 88#endif 89 90#include <machine/in_cksum.h> 91 92#include <security/mac/mac_framework.h> 93 94/* 95 * UDP protocol implementation. 96 * Per RFC 768, August, 1980. 97 */ 98 99#ifdef VIMAGE_GLOBALS 100int udp_blackhole; 101#endif 102 103/* 104 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums 105 * removes the only data integrity mechanism for packets and malformed 106 * packets that would otherwise be discarded due to bad checksums, and may 107 * cause problems (especially for NFS data blocks). 108 */ 109static int udp_cksum = 1; 110SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum, 111 0, "compute udp checksum"); 112 113int udp_log_in_vain = 0; 114SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 115 &udp_log_in_vain, 0, "Log all incoming UDP packets"); 116 117SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_udp, OID_AUTO, blackhole, 118 CTLFLAG_RW, udp_blackhole, 0, 119 "Do not send port unreachables for refused connects"); 120 121u_long udp_sendspace = 9216; /* really max datagram size */ 122 /* 40 1K datagrams */ 123SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 124 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 125 126u_long udp_recvspace = 40 * (1024 + 127#ifdef INET6 128 sizeof(struct sockaddr_in6) 129#else 130 sizeof(struct sockaddr_in) 131#endif 132 ); 133 134SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 135 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); 136 137#ifdef VIMAGE_GLOBALS 138struct inpcbhead udb; /* from udp_var.h */ 139struct inpcbinfo udbinfo; 140struct udpstat udpstat; /* from udp_var.h */ 141#endif 142 143#ifndef UDBHASHSIZE 144#define UDBHASHSIZE 128 145#endif 146 147SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_udp, UDPCTL_STATS, stats, 148 CTLFLAG_RW, udpstat, udpstat, 149 "UDP statistics (struct udpstat, netinet/udp_var.h)"); 150 151static void udp_detach(struct socket *so); 152static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, 153 struct mbuf *, struct thread *); 154 155static void 156udp_zone_change(void *tag) 157{ 158 159 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 160} 161 162static int 163udp_inpcb_init(void *mem, int size, int flags) 164{ 165 struct inpcb *inp; 166 167 inp = mem; 168 INP_LOCK_INIT(inp, "inp", "udpinp"); 169 return (0); 170} 171 172void 173udp_init(void) 174{ 175 INIT_VNET_INET(curvnet); 176 177 V_udp_blackhole = 0; 178 179 INP_INFO_LOCK_INIT(&V_udbinfo, "udp"); 180 LIST_INIT(&V_udb); 181 V_udbinfo.ipi_listhead = &V_udb; 182 V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB, 183 &V_udbinfo.ipi_hashmask); 184 V_udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB, 185 &V_udbinfo.ipi_porthashmask); 186 V_udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL, 187 NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 188 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); 189 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, 190 EVENTHANDLER_PRI_ANY); 191} 192 193/* 194 * Subroutine of udp_input(), which appends the provided mbuf chain to the 195 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that 196 * contains the source address. If the socket ends up being an IPv6 socket, 197 * udp_append() will convert to a sockaddr_in6 before passing the address 198 * into the socket code. 199 */ 200static void 201udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, 202 struct sockaddr_in *udp_in) 203{ 204 struct sockaddr *append_sa; 205 struct socket *so; 206 struct mbuf *opts = 0; 207#ifdef INET6 208 struct sockaddr_in6 udp_in6; 209#endif 210 211 INP_RLOCK_ASSERT(inp); 212 213#ifdef IPSEC 214 /* Check AH/ESP integrity. */ 215 if (ipsec4_in_reject(n, inp)) { 216 INIT_VNET_IPSEC(curvnet); 217 m_freem(n); 218 V_ipsec4stat.in_polvio++; 219 return; 220 } 221#endif /* IPSEC */ 222#ifdef MAC 223 if (mac_inpcb_check_deliver(inp, n) != 0) { 224 m_freem(n); 225 return; 226 } 227#endif 228 if (inp->inp_flags & INP_CONTROLOPTS || 229 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { 230#ifdef INET6 231 if (inp->inp_vflag & INP_IPV6) 232 (void)ip6_savecontrol_v4(inp, n, &opts, NULL); 233 else 234#endif 235 ip_savecontrol(inp, &opts, ip, n); 236 } 237#ifdef INET6 238 if (inp->inp_vflag & INP_IPV6) { 239 bzero(&udp_in6, sizeof(udp_in6)); 240 udp_in6.sin6_len = sizeof(udp_in6); 241 udp_in6.sin6_family = AF_INET6; 242 in6_sin_2_v4mapsin6(udp_in, &udp_in6); 243 append_sa = (struct sockaddr *)&udp_in6; 244 } else 245#endif 246 append_sa = (struct sockaddr *)udp_in; 247 m_adj(n, off); 248 249 so = inp->inp_socket; 250 SOCKBUF_LOCK(&so->so_rcv); 251 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) { 252 INIT_VNET_INET(so->so_vnet); 253 SOCKBUF_UNLOCK(&so->so_rcv); 254 m_freem(n); 255 if (opts) 256 m_freem(opts); 257 V_udpstat.udps_fullsock++; 258 } else 259 sorwakeup_locked(so); 260} 261 262void 263udp_input(struct mbuf *m, int off) 264{ 265 INIT_VNET_INET(curvnet); 266 int iphlen = off; 267 struct ip *ip; 268 struct udphdr *uh; 269 struct ifnet *ifp; 270 struct inpcb *inp; 271 int len; 272 struct ip save_ip; 273 struct sockaddr_in udp_in; 274#ifdef IPFIREWALL_FORWARD 275 struct m_tag *fwd_tag; 276#endif 277 278 ifp = m->m_pkthdr.rcvif; 279 V_udpstat.udps_ipackets++; 280 281 /* 282 * Strip IP options, if any; should skip this, make available to 283 * user, and use on returned packets, but we don't yet have a way to 284 * check the checksum with options still present. 285 */ 286 if (iphlen > sizeof (struct ip)) { 287 ip_stripoptions(m, (struct mbuf *)0); 288 iphlen = sizeof(struct ip); 289 } 290 291 /* 292 * Get IP and UDP header together in first mbuf. 293 */ 294 ip = mtod(m, struct ip *); 295 if (m->m_len < iphlen + sizeof(struct udphdr)) { 296 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { 297 V_udpstat.udps_hdrops++; 298 return; 299 } 300 ip = mtod(m, struct ip *); 301 } 302 uh = (struct udphdr *)((caddr_t)ip + iphlen); 303 304 /* 305 * Destination port of 0 is illegal, based on RFC768. 306 */ 307 if (uh->uh_dport == 0) 308 goto badunlocked; 309 310 /* 311 * Construct sockaddr format source address. Stuff source address 312 * and datagram in user buffer. 313 */ 314 bzero(&udp_in, sizeof(udp_in)); 315 udp_in.sin_len = sizeof(udp_in); 316 udp_in.sin_family = AF_INET; 317 udp_in.sin_port = uh->uh_sport; 318 udp_in.sin_addr = ip->ip_src; 319 320 /* 321 * Make mbuf data length reflect UDP length. If not enough data to 322 * reflect UDP length, drop. 323 */ 324 len = ntohs((u_short)uh->uh_ulen); 325 if (ip->ip_len != len) { 326 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 327 V_udpstat.udps_badlen++; 328 goto badunlocked; 329 } 330 m_adj(m, len - ip->ip_len); 331 /* ip->ip_len = len; */ 332 } 333 334 /* 335 * Save a copy of the IP header in case we want restore it for 336 * sending an ICMP error message in response. 337 */ 338 if (!V_udp_blackhole) 339 save_ip = *ip; 340 else 341 memset(&save_ip, 0, sizeof(save_ip)); 342 343 /* 344 * Checksum extended UDP header and data. 345 */ 346 if (uh->uh_sum) { 347 u_short uh_sum; 348 349 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 350 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 351 uh_sum = m->m_pkthdr.csum_data; 352 else 353 uh_sum = in_pseudo(ip->ip_src.s_addr, 354 ip->ip_dst.s_addr, htonl((u_short)len + 355 m->m_pkthdr.csum_data + IPPROTO_UDP)); 356 uh_sum ^= 0xffff; 357 } else { 358 char b[9]; 359 360 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 361 bzero(((struct ipovly *)ip)->ih_x1, 9); 362 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 363 uh_sum = in_cksum(m, len + sizeof (struct ip)); 364 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 365 } 366 if (uh_sum) { 367 V_udpstat.udps_badsum++; 368 m_freem(m); 369 return; 370 } 371 } else 372 V_udpstat.udps_nosum++; 373 374#ifdef IPFIREWALL_FORWARD 375 /* 376 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. 377 */ 378 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 379 if (fwd_tag != NULL) { 380 struct sockaddr_in *next_hop; 381 382 /* 383 * Do the hack. 384 */ 385 next_hop = (struct sockaddr_in *)(fwd_tag + 1); 386 ip->ip_dst = next_hop->sin_addr; 387 uh->uh_dport = ntohs(next_hop->sin_port); 388 389 /* 390 * Remove the tag from the packet. We don't need it anymore. 391 */ 392 m_tag_delete(m, fwd_tag); 393 } 394#endif 395 396 INP_INFO_RLOCK(&V_udbinfo); 397 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 398 in_broadcast(ip->ip_dst, ifp)) { 399 struct inpcb *last; 400 struct ip_moptions *imo; 401 402 last = NULL; 403 LIST_FOREACH(inp, &V_udb, inp_list) { 404 if (inp->inp_lport != uh->uh_dport) 405 continue; 406#ifdef INET6 407 if ((inp->inp_vflag & INP_IPV4) == 0) 408 continue; 409#endif 410 if (inp->inp_laddr.s_addr != INADDR_ANY && 411 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 412 continue; 413 if (inp->inp_faddr.s_addr != INADDR_ANY && 414 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 415 continue; 416 /* 417 * XXX: Do not check source port of incoming datagram 418 * unless inp_connect() has been called to bind the 419 * fport part of the 4-tuple; the source could be 420 * trying to talk to us with an ephemeral port. 421 */ 422 if (inp->inp_fport != 0 && 423 inp->inp_fport != uh->uh_sport) 424 continue; 425 426 INP_RLOCK(inp); 427 428 /* 429 * Handle socket delivery policy for any-source 430 * and source-specific multicast. [RFC3678] 431 */ 432 imo = inp->inp_moptions; 433 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 434 imo != NULL) { 435 struct sockaddr_in sin; 436 struct in_msource *ims; 437 int blocked, mode; 438 size_t idx; 439 440 bzero(&sin, sizeof(struct sockaddr_in)); 441 sin.sin_len = sizeof(struct sockaddr_in); 442 sin.sin_family = AF_INET; 443 sin.sin_addr = ip->ip_dst; 444 445 blocked = 0; 446 idx = imo_match_group(imo, ifp, 447 (struct sockaddr *)&sin); 448 if (idx == -1) { 449 /* 450 * No group membership for this socket. 451 * Do not bump udps_noportbcast, as 452 * this will happen further down. 453 */ 454 blocked++; 455 } else { 456 /* 457 * Check for a multicast source filter 458 * entry on this socket for this group. 459 * MCAST_EXCLUDE is the default 460 * behaviour. It means default accept; 461 * entries, if present, denote sources 462 * to be excluded from delivery. 463 */ 464 ims = imo_match_source(imo, idx, 465 (struct sockaddr *)&udp_in); 466 mode = imo->imo_mfilters[idx].imf_fmode; 467 if ((ims != NULL && 468 mode == MCAST_EXCLUDE) || 469 (ims == NULL && 470 mode == MCAST_INCLUDE)) { 471#ifdef DIAGNOSTIC 472 if (bootverbose) { 473 printf("%s: blocked by" 474 " source filter\n", 475 __func__); 476 } 477#endif 478 V_udpstat.udps_filtermcast++; 479 blocked++; 480 } 481 } 482 if (blocked != 0) { 483 INP_RUNLOCK(inp); 484 continue; 485 } 486 } 487 if (last != NULL) { 488 struct mbuf *n; 489 490 n = m_copy(m, 0, M_COPYALL); 491 if (n != NULL) 492 udp_append(last, ip, n, iphlen + 493 sizeof(struct udphdr), &udp_in); 494 INP_RUNLOCK(last); 495 } 496 last = inp; 497 /* 498 * Don't look for additional matches if this one does 499 * not have either the SO_REUSEPORT or SO_REUSEADDR 500 * socket options set. This heuristic avoids 501 * searching through all pcbs in the common case of a 502 * non-shared port. It assumes that an application 503 * will never clear these options after setting them. 504 */ 505 if ((last->inp_socket->so_options & 506 (SO_REUSEPORT|SO_REUSEADDR)) == 0) 507 break; 508 } 509 510 if (last == NULL) { 511 /* 512 * No matching pcb found; discard datagram. (No need 513 * to send an ICMP Port Unreachable for a broadcast 514 * or multicast datgram.) 515 */ 516 V_udpstat.udps_noportbcast++; 517 goto badheadlocked; 518 } 519 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 520 &udp_in); 521 INP_RUNLOCK(last); 522 INP_INFO_RUNLOCK(&V_udbinfo); 523 return; 524 } 525 526 /* 527 * Locate pcb for datagram. 528 */ 529 inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport, 530 ip->ip_dst, uh->uh_dport, 1, ifp); 531 if (inp == NULL) { 532 if (udp_log_in_vain) { 533 char buf[4*sizeof "123"]; 534 535 strcpy(buf, inet_ntoa(ip->ip_dst)); 536 log(LOG_INFO, 537 "Connection attempt to UDP %s:%d from %s:%d\n", 538 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 539 ntohs(uh->uh_sport)); 540 } 541 V_udpstat.udps_noport++; 542 if (m->m_flags & (M_BCAST | M_MCAST)) { 543 V_udpstat.udps_noportbcast++; 544 goto badheadlocked; 545 } 546 if (V_udp_blackhole) 547 goto badheadlocked; 548 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 549 goto badheadlocked; 550 *ip = save_ip; 551 ip->ip_len += iphlen; 552 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 553 INP_INFO_RUNLOCK(&V_udbinfo); 554 return; 555 } 556 557 /* 558 * Check the minimum TTL for socket. 559 */ 560 INP_RLOCK(inp); 561 INP_INFO_RUNLOCK(&V_udbinfo); 562 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { 563 INP_RUNLOCK(inp); 564 goto badunlocked; 565 } 566 udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in); 567 INP_RUNLOCK(inp); 568 return; 569 570badheadlocked: 571 if (inp) 572 INP_RUNLOCK(inp); 573 INP_INFO_RUNLOCK(&V_udbinfo); 574badunlocked: 575 m_freem(m); 576} 577 578/* 579 * Notify a udp user of an asynchronous error; just wake up so that they can 580 * collect error status. 581 */ 582struct inpcb * 583udp_notify(struct inpcb *inp, int errno) 584{ 585 586 /* 587 * While udp_ctlinput() always calls udp_notify() with a read lock 588 * when invoking it directly, in_pcbnotifyall() currently uses write 589 * locks due to sharing code with TCP. For now, accept either a read 590 * or a write lock, but a read lock is sufficient. 591 */ 592 INP_LOCK_ASSERT(inp); 593 594 inp->inp_socket->so_error = errno; 595 sorwakeup(inp->inp_socket); 596 sowwakeup(inp->inp_socket); 597 return (inp); 598} 599 600void 601udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) 602{ 603 INIT_VNET_INET(curvnet); 604 struct ip *ip = vip; 605 struct udphdr *uh; 606 struct in_addr faddr; 607 struct inpcb *inp; 608 609 faddr = ((struct sockaddr_in *)sa)->sin_addr; 610 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 611 return; 612 613 /* 614 * Redirects don't need to be handled up here. 615 */ 616 if (PRC_IS_REDIRECT(cmd)) 617 return; 618 619 /* 620 * Hostdead is ugly because it goes linearly through all PCBs. 621 * 622 * XXX: We never get this from ICMP, otherwise it makes an excellent 623 * DoS attack on machines with many connections. 624 */ 625 if (cmd == PRC_HOSTDEAD) 626 ip = NULL; 627 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 628 return; 629 if (ip != NULL) { 630 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 631 INP_INFO_RLOCK(&V_udbinfo); 632 inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport, 633 ip->ip_src, uh->uh_sport, 0, NULL); 634 if (inp != NULL) { 635 INP_RLOCK(inp); 636 if (inp->inp_socket != NULL) { 637 udp_notify(inp, inetctlerrmap[cmd]); 638 } 639 INP_RUNLOCK(inp); 640 } 641 INP_INFO_RUNLOCK(&V_udbinfo); 642 } else 643 in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd], 644 udp_notify); 645} 646 647static int 648udp_pcblist(SYSCTL_HANDLER_ARGS) 649{ 650 INIT_VNET_INET(curvnet); 651 int error, i, n; 652 struct inpcb *inp, **inp_list; 653 inp_gen_t gencnt; 654 struct xinpgen xig; 655 656 /* 657 * The process of preparing the PCB list is too time-consuming and 658 * resource-intensive to repeat twice on every request. 659 */ 660 if (req->oldptr == 0) { 661 n = V_udbinfo.ipi_count; 662 req->oldidx = 2 * (sizeof xig) 663 + (n + n/8) * sizeof(struct xinpcb); 664 return (0); 665 } 666 667 if (req->newptr != 0) 668 return (EPERM); 669 670 /* 671 * OK, now we're committed to doing something. 672 */ 673 INP_INFO_RLOCK(&V_udbinfo); 674 gencnt = V_udbinfo.ipi_gencnt; 675 n = V_udbinfo.ipi_count; 676 INP_INFO_RUNLOCK(&V_udbinfo); 677 678 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) 679 + n * sizeof(struct xinpcb)); 680 if (error != 0) 681 return (error); 682 683 xig.xig_len = sizeof xig; 684 xig.xig_count = n; 685 xig.xig_gen = gencnt; 686 xig.xig_sogen = so_gencnt; 687 error = SYSCTL_OUT(req, &xig, sizeof xig); 688 if (error) 689 return (error); 690 691 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 692 if (inp_list == 0) 693 return (ENOMEM); 694 695 INP_INFO_RLOCK(&V_udbinfo); 696 for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; 697 inp = LIST_NEXT(inp, inp_list)) { 698 INP_RLOCK(inp); 699 if (inp->inp_gencnt <= gencnt && 700 cr_canseeinpcb(req->td->td_ucred, inp) == 0) 701 inp_list[i++] = inp; 702 INP_RUNLOCK(inp); 703 } 704 INP_INFO_RUNLOCK(&V_udbinfo); 705 n = i; 706 707 error = 0; 708 for (i = 0; i < n; i++) { 709 inp = inp_list[i]; 710 INP_RLOCK(inp); 711 if (inp->inp_gencnt <= gencnt) { 712 struct xinpcb xi; 713 bzero(&xi, sizeof(xi)); 714 xi.xi_len = sizeof xi; 715 /* XXX should avoid extra copy */ 716 bcopy(inp, &xi.xi_inp, sizeof *inp); 717 if (inp->inp_socket) 718 sotoxsocket(inp->inp_socket, &xi.xi_socket); 719 xi.xi_inp.inp_gencnt = inp->inp_gencnt; 720 INP_RUNLOCK(inp); 721 error = SYSCTL_OUT(req, &xi, sizeof xi); 722 } else 723 INP_RUNLOCK(inp); 724 } 725 if (!error) { 726 /* 727 * Give the user an updated idea of our state. If the 728 * generation differs from what we told her before, she knows 729 * that something happened while we were processing this 730 * request, and it might be necessary to retry. 731 */ 732 INP_INFO_RLOCK(&V_udbinfo); 733 xig.xig_gen = V_udbinfo.ipi_gencnt; 734 xig.xig_sogen = so_gencnt; 735 xig.xig_count = V_udbinfo.ipi_count; 736 INP_INFO_RUNLOCK(&V_udbinfo); 737 error = SYSCTL_OUT(req, &xig, sizeof xig); 738 } 739 free(inp_list, M_TEMP); 740 return (error); 741} 742 743SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, 744 udp_pcblist, "S,xinpcb", "List of active UDP sockets"); 745 746static int 747udp_getcred(SYSCTL_HANDLER_ARGS) 748{ 749 INIT_VNET_INET(curvnet); 750 struct xucred xuc; 751 struct sockaddr_in addrs[2]; 752 struct inpcb *inp; 753 int error; 754 755 error = priv_check(req->td, PRIV_NETINET_GETCRED); 756 if (error) 757 return (error); 758 error = SYSCTL_IN(req, addrs, sizeof(addrs)); 759 if (error) 760 return (error); 761 INP_INFO_RLOCK(&V_udbinfo); 762 inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, 763 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); 764 if (inp != NULL) { 765 INP_RLOCK(inp); 766 INP_INFO_RUNLOCK(&V_udbinfo); 767 if (inp->inp_socket == NULL) 768 error = ENOENT; 769 if (error == 0) 770 error = cr_canseeinpcb(req->td->td_ucred, inp); 771 if (error == 0) 772 cru2x(inp->inp_cred, &xuc); 773 INP_RUNLOCK(inp); 774 } else { 775 INP_INFO_RUNLOCK(&V_udbinfo); 776 error = ENOENT; 777 } 778 if (error == 0) 779 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); 780 return (error); 781} 782 783SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, 784 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, 785 udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); 786 787static int 788udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, 789 struct mbuf *control, struct thread *td) 790{ 791 INIT_VNET_INET(inp->inp_vnet); 792 struct udpiphdr *ui; 793 int len = m->m_pkthdr.len; 794 struct in_addr faddr, laddr; 795 struct cmsghdr *cm; 796 struct sockaddr_in *sin, src; 797 int error = 0; 798 int ipflags; 799 u_short fport, lport; 800 int unlock_udbinfo; 801 802 /* 803 * udp_output() may need to temporarily bind or connect the current 804 * inpcb. As such, we don't know up front whether we will need the 805 * pcbinfo lock or not. Do any work to decide what is needed up 806 * front before acquiring any locks. 807 */ 808 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 809 if (control) 810 m_freem(control); 811 m_freem(m); 812 return (EMSGSIZE); 813 } 814 815 src.sin_family = 0; 816 if (control != NULL) { 817 /* 818 * XXX: Currently, we assume all the optional information is 819 * stored in a single mbuf. 820 */ 821 if (control->m_next) { 822 m_freem(control); 823 m_freem(m); 824 return (EINVAL); 825 } 826 for (; control->m_len > 0; 827 control->m_data += CMSG_ALIGN(cm->cmsg_len), 828 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { 829 cm = mtod(control, struct cmsghdr *); 830 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 831 || cm->cmsg_len > control->m_len) { 832 error = EINVAL; 833 break; 834 } 835 if (cm->cmsg_level != IPPROTO_IP) 836 continue; 837 838 switch (cm->cmsg_type) { 839 case IP_SENDSRCADDR: 840 if (cm->cmsg_len != 841 CMSG_LEN(sizeof(struct in_addr))) { 842 error = EINVAL; 843 break; 844 } 845 bzero(&src, sizeof(src)); 846 src.sin_family = AF_INET; 847 src.sin_len = sizeof(src); 848 src.sin_port = inp->inp_lport; 849 src.sin_addr = 850 *(struct in_addr *)CMSG_DATA(cm); 851 break; 852 853 default: 854 error = ENOPROTOOPT; 855 break; 856 } 857 if (error) 858 break; 859 } 860 m_freem(control); 861 } 862 if (error) { 863 m_freem(m); 864 return (error); 865 } 866 867 /* 868 * Depending on whether or not the application has bound or connected 869 * the socket, we may have to do varying levels of work. The optimal 870 * case is for a connected UDP socket, as a global lock isn't 871 * required at all. 872 * 873 * In order to decide which we need, we require stability of the 874 * inpcb binding, which we ensure by acquiring a read lock on the 875 * inpcb. This doesn't strictly follow the lock order, so we play 876 * the trylock and retry game; note that we may end up with more 877 * conservative locks than required the second time around, so later 878 * assertions have to accept that. Further analysis of the number of 879 * misses under contention is required. 880 */ 881 sin = (struct sockaddr_in *)addr; 882 INP_RLOCK(inp); 883 if (sin != NULL && 884 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { 885 INP_RUNLOCK(inp); 886 INP_INFO_WLOCK(&V_udbinfo); 887 INP_WLOCK(inp); 888 unlock_udbinfo = 2; 889 } else if ((sin != NULL && ( 890 (sin->sin_addr.s_addr == INADDR_ANY) || 891 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 892 (inp->inp_laddr.s_addr == INADDR_ANY) || 893 (inp->inp_lport == 0))) || 894 (src.sin_family == AF_INET)) { 895 if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) { 896 INP_RUNLOCK(inp); 897 INP_INFO_RLOCK(&V_udbinfo); 898 INP_RLOCK(inp); 899 } 900 unlock_udbinfo = 1; 901 } else 902 unlock_udbinfo = 0; 903 904 /* 905 * If the IP_SENDSRCADDR control message was specified, override the 906 * source address for this datagram. Its use is invalidated if the 907 * address thus specified is incomplete or clobbers other inpcbs. 908 */ 909 laddr = inp->inp_laddr; 910 lport = inp->inp_lport; 911 if (src.sin_family == AF_INET) { 912 INP_INFO_LOCK_ASSERT(&V_udbinfo); 913 if ((lport == 0) || 914 (laddr.s_addr == INADDR_ANY && 915 src.sin_addr.s_addr == INADDR_ANY)) { 916 error = EINVAL; 917 goto release; 918 } 919 error = in_pcbbind_setup(inp, (struct sockaddr *)&src, 920 &laddr.s_addr, &lport, td->td_ucred); 921 if (error) 922 goto release; 923 } 924 925 /* 926 * If a UDP socket has been connected, then a local address/port will 927 * have been selected and bound. 928 * 929 * If a UDP socket has not been connected to, then an explicit 930 * destination address must be used, in which case a local 931 * address/port may not have been selected and bound. 932 */ 933 if (sin != NULL) { 934 INP_LOCK_ASSERT(inp); 935 if (inp->inp_faddr.s_addr != INADDR_ANY) { 936 error = EISCONN; 937 goto release; 938 } 939 940 /* 941 * Jail may rewrite the destination address, so let it do 942 * that before we use it. 943 */ 944 if (prison_remote_ip4(td->td_ucred, &sin->sin_addr) != 0) { 945 error = EINVAL; 946 goto release; 947 } 948 949 /* 950 * If a local address or port hasn't yet been selected, or if 951 * the destination address needs to be rewritten due to using 952 * a special INADDR_ constant, invoke in_pcbconnect_setup() 953 * to do the heavy lifting. Once a port is selected, we 954 * commit the binding back to the socket; we also commit the 955 * binding of the address if in jail. 956 * 957 * If we already have a valid binding and we're not 958 * requesting a destination address rewrite, use a fast path. 959 */ 960 if (inp->inp_laddr.s_addr == INADDR_ANY || 961 inp->inp_lport == 0 || 962 sin->sin_addr.s_addr == INADDR_ANY || 963 sin->sin_addr.s_addr == INADDR_BROADCAST) { 964 INP_INFO_LOCK_ASSERT(&V_udbinfo); 965 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, 966 &lport, &faddr.s_addr, &fport, NULL, 967 td->td_ucred); 968 if (error) 969 goto release; 970 971 /* 972 * XXXRW: Why not commit the port if the address is 973 * !INADDR_ANY? 974 */ 975 /* Commit the local port if newly assigned. */ 976 if (inp->inp_laddr.s_addr == INADDR_ANY && 977 inp->inp_lport == 0) { 978 INP_INFO_WLOCK_ASSERT(&V_udbinfo); 979 INP_WLOCK_ASSERT(inp); 980 /* 981 * Remember addr if jailed, to prevent 982 * rebinding. 983 */ 984 if (jailed(td->td_ucred)) 985 inp->inp_laddr = laddr; 986 inp->inp_lport = lport; 987 if (in_pcbinshash(inp) != 0) { 988 inp->inp_lport = 0; 989 error = EAGAIN; 990 goto release; 991 } 992 inp->inp_flags |= INP_ANONPORT; 993 } 994 } else { 995 faddr = sin->sin_addr; 996 fport = sin->sin_port; 997 } 998 } else { 999 INP_LOCK_ASSERT(inp); 1000 faddr = inp->inp_faddr; 1001 fport = inp->inp_fport; 1002 if (faddr.s_addr == INADDR_ANY) { 1003 error = ENOTCONN; 1004 goto release; 1005 } 1006 } 1007 1008 /* 1009 * Calculate data length and get a mbuf for UDP, IP, and possible 1010 * link-layer headers. Immediate slide the data pointer back forward 1011 * since we won't use that space at this layer. 1012 */ 1013 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT); 1014 if (m == NULL) { 1015 error = ENOBUFS; 1016 goto release; 1017 } 1018 m->m_data += max_linkhdr; 1019 m->m_len -= max_linkhdr; 1020 m->m_pkthdr.len -= max_linkhdr; 1021 1022 /* 1023 * Fill in mbuf with extended UDP header and addresses and length put 1024 * into network format. 1025 */ 1026 ui = mtod(m, struct udpiphdr *); 1027 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ 1028 ui->ui_pr = IPPROTO_UDP; 1029 ui->ui_src = laddr; 1030 ui->ui_dst = faddr; 1031 ui->ui_sport = lport; 1032 ui->ui_dport = fport; 1033 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1034 1035 /* 1036 * Set the Don't Fragment bit in the IP header. 1037 */ 1038 if (inp->inp_flags & INP_DONTFRAG) { 1039 struct ip *ip; 1040 1041 ip = (struct ip *)&ui->ui_i; 1042 ip->ip_off |= IP_DF; 1043 } 1044 1045 ipflags = 0; 1046 if (inp->inp_socket->so_options & SO_DONTROUTE) 1047 ipflags |= IP_ROUTETOIF; 1048 if (inp->inp_socket->so_options & SO_BROADCAST) 1049 ipflags |= IP_ALLOWBROADCAST; 1050 if (inp->inp_flags & INP_ONESBCAST) 1051 ipflags |= IP_SENDONES; 1052 1053#ifdef MAC 1054 mac_inpcb_create_mbuf(inp, m); 1055#endif 1056 1057 /* 1058 * Set up checksum and output datagram. 1059 */ 1060 if (udp_cksum) { 1061 if (inp->inp_flags & INP_ONESBCAST) 1062 faddr.s_addr = INADDR_BROADCAST; 1063 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, 1064 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1065 m->m_pkthdr.csum_flags = CSUM_UDP; 1066 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1067 } else 1068 ui->ui_sum = 0; 1069 ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; 1070 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1071 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1072 V_udpstat.udps_opackets++; 1073 1074 if (unlock_udbinfo == 2) 1075 INP_INFO_WUNLOCK(&V_udbinfo); 1076 else if (unlock_udbinfo == 1) 1077 INP_INFO_RUNLOCK(&V_udbinfo); 1078 error = ip_output(m, inp->inp_options, NULL, ipflags, 1079 inp->inp_moptions, inp); 1080 if (unlock_udbinfo == 2) 1081 INP_WUNLOCK(inp); 1082 else 1083 INP_RUNLOCK(inp); 1084 return (error); 1085 1086release: 1087 if (unlock_udbinfo == 2) { 1088 INP_WUNLOCK(inp); 1089 INP_INFO_WUNLOCK(&V_udbinfo); 1090 } else if (unlock_udbinfo == 1) { 1091 INP_RUNLOCK(inp); 1092 INP_INFO_RUNLOCK(&V_udbinfo); 1093 } else 1094 INP_RUNLOCK(inp); 1095 m_freem(m); 1096 return (error); 1097} 1098 1099static void 1100udp_abort(struct socket *so) 1101{ 1102 INIT_VNET_INET(so->so_vnet); 1103 struct inpcb *inp; 1104 1105 inp = sotoinpcb(so); 1106 KASSERT(inp != NULL, ("udp_abort: inp == NULL")); 1107 INP_INFO_WLOCK(&V_udbinfo); 1108 INP_WLOCK(inp); 1109 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1110 in_pcbdisconnect(inp); 1111 inp->inp_laddr.s_addr = INADDR_ANY; 1112 soisdisconnected(so); 1113 } 1114 INP_WUNLOCK(inp); 1115 INP_INFO_WUNLOCK(&V_udbinfo); 1116} 1117 1118static int 1119udp_attach(struct socket *so, int proto, struct thread *td) 1120{ 1121 INIT_VNET_INET(so->so_vnet); 1122 struct inpcb *inp; 1123 int error; 1124 1125 inp = sotoinpcb(so); 1126 KASSERT(inp == NULL, ("udp_attach: inp != NULL")); 1127 error = soreserve(so, udp_sendspace, udp_recvspace); 1128 if (error) 1129 return (error); 1130 INP_INFO_WLOCK(&V_udbinfo); 1131 error = in_pcballoc(so, &V_udbinfo); 1132 if (error) { 1133 INP_INFO_WUNLOCK(&V_udbinfo); 1134 return (error); 1135 } 1136 1137 inp = (struct inpcb *)so->so_pcb; 1138 INP_INFO_WUNLOCK(&V_udbinfo); 1139 inp->inp_vflag |= INP_IPV4; 1140 inp->inp_ip_ttl = V_ip_defttl; 1141 INP_WUNLOCK(inp); 1142 return (0); 1143} 1144 1145static int 1146udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 1147{ 1148 INIT_VNET_INET(so->so_vnet); 1149 struct inpcb *inp; 1150 int error; 1151 1152 inp = sotoinpcb(so); 1153 KASSERT(inp != NULL, ("udp_bind: inp == NULL")); 1154 INP_INFO_WLOCK(&V_udbinfo); 1155 INP_WLOCK(inp); 1156 error = in_pcbbind(inp, nam, td->td_ucred); 1157 INP_WUNLOCK(inp); 1158 INP_INFO_WUNLOCK(&V_udbinfo); 1159 return (error); 1160} 1161 1162static void 1163udp_close(struct socket *so) 1164{ 1165 INIT_VNET_INET(so->so_vnet); 1166 struct inpcb *inp; 1167 1168 inp = sotoinpcb(so); 1169 KASSERT(inp != NULL, ("udp_close: inp == NULL")); 1170 INP_INFO_WLOCK(&V_udbinfo); 1171 INP_WLOCK(inp); 1172 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1173 in_pcbdisconnect(inp); 1174 inp->inp_laddr.s_addr = INADDR_ANY; 1175 soisdisconnected(so); 1176 } 1177 INP_WUNLOCK(inp); 1178 INP_INFO_WUNLOCK(&V_udbinfo); 1179} 1180 1181static int 1182udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1183{ 1184 INIT_VNET_INET(so->so_vnet); 1185 struct inpcb *inp; 1186 int error; 1187 struct sockaddr_in *sin; 1188 1189 inp = sotoinpcb(so); 1190 KASSERT(inp != NULL, ("udp_connect: inp == NULL")); 1191 INP_INFO_WLOCK(&V_udbinfo); 1192 INP_WLOCK(inp); 1193 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1194 INP_WUNLOCK(inp); 1195 INP_INFO_WUNLOCK(&V_udbinfo); 1196 return (EISCONN); 1197 } 1198 sin = (struct sockaddr_in *)nam; 1199 if (prison_remote_ip4(td->td_ucred, &sin->sin_addr) != 0) { 1200 INP_WUNLOCK(inp); 1201 INP_INFO_WUNLOCK(&udbinfo); 1202 return (EAFNOSUPPORT); 1203 } 1204 error = in_pcbconnect(inp, nam, td->td_ucred); 1205 if (error == 0) 1206 soisconnected(so); 1207 INP_WUNLOCK(inp); 1208 INP_INFO_WUNLOCK(&V_udbinfo); 1209 return (error); 1210} 1211 1212static void 1213udp_detach(struct socket *so) 1214{ 1215 INIT_VNET_INET(so->so_vnet); 1216 struct inpcb *inp; 1217 1218 inp = sotoinpcb(so); 1219 KASSERT(inp != NULL, ("udp_detach: inp == NULL")); 1220 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 1221 ("udp_detach: not disconnected")); 1222 INP_INFO_WLOCK(&V_udbinfo); 1223 INP_WLOCK(inp); 1224 in_pcbdetach(inp); 1225 in_pcbfree(inp); 1226 INP_INFO_WUNLOCK(&V_udbinfo); 1227} 1228 1229static int 1230udp_disconnect(struct socket *so) 1231{ 1232 INIT_VNET_INET(so->so_vnet); 1233 struct inpcb *inp; 1234 1235 inp = sotoinpcb(so); 1236 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); 1237 INP_INFO_WLOCK(&V_udbinfo); 1238 INP_WLOCK(inp); 1239 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1240 INP_WUNLOCK(inp); 1241 INP_INFO_WUNLOCK(&V_udbinfo); 1242 return (ENOTCONN); 1243 } 1244 1245 in_pcbdisconnect(inp); 1246 inp->inp_laddr.s_addr = INADDR_ANY; 1247 SOCK_LOCK(so); 1248 so->so_state &= ~SS_ISCONNECTED; /* XXX */ 1249 SOCK_UNLOCK(so); 1250 INP_WUNLOCK(inp); 1251 INP_INFO_WUNLOCK(&V_udbinfo); 1252 return (0); 1253} 1254 1255static int 1256udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, 1257 struct mbuf *control, struct thread *td) 1258{ 1259 struct inpcb *inp; 1260 1261 inp = sotoinpcb(so); 1262 KASSERT(inp != NULL, ("udp_send: inp == NULL")); 1263 return (udp_output(inp, m, addr, control, td)); 1264} 1265 1266int 1267udp_shutdown(struct socket *so) 1268{ 1269 struct inpcb *inp; 1270 1271 inp = sotoinpcb(so); 1272 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); 1273 INP_WLOCK(inp); 1274 socantsendmore(so); 1275 INP_WUNLOCK(inp); 1276 return (0); 1277} 1278 1279struct pr_usrreqs udp_usrreqs = { 1280 .pru_abort = udp_abort, 1281 .pru_attach = udp_attach, 1282 .pru_bind = udp_bind, 1283 .pru_connect = udp_connect, 1284 .pru_control = in_control, 1285 .pru_detach = udp_detach, 1286 .pru_disconnect = udp_disconnect, 1287 .pru_peeraddr = in_getpeeraddr, 1288 .pru_send = udp_send, 1289 .pru_soreceive = soreceive_dgram, 1290 .pru_sosend = sosend_dgram, 1291 .pru_shutdown = udp_shutdown, 1292 .pru_sockaddr = in_getsockaddr, 1293 .pru_sosetlabel = in_pcbsosetlabel, 1294 .pru_close = udp_close, 1295};
|