1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1988, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $ 62 */ 63 64 65#include <sys/param.h> 66#include <sys/systm.h> 67#include <sys/kernel.h> 68#include <sys/sysctl.h> 69#include <sys/mbuf.h> 70#if INET6 71#include <sys/domain.h> 72#endif /* INET6 */ 73#include <sys/socket.h> 74#include <sys/socketvar.h> 75#include <sys/protosw.h> 76 77#include <net/if.h> 78#include <net/route.h> 79#include <net/ntstat.h> 80 81#include <netinet/in.h> 82#include <netinet/in_systm.h> 83#if INET6 84#include <netinet/ip6.h> 85#endif 86#include <netinet/in_pcb.h> 87#if INET6 88#include <netinet6/in6_pcb.h> 89#endif 90#include <netinet/in_var.h> 91#include <netinet/ip_var.h> 92#if INET6 93#include <netinet6/ip6_var.h> 94#endif 95#include <netinet/tcp.h> 96#include <netinet/tcp_fsm.h> 97#include <netinet/tcp_seq.h> 98#include <netinet/tcp_timer.h> 99#include <netinet/tcp_var.h> 100#include <netinet/tcpip.h> 101#if TCPDEBUG 102#include <netinet/tcp_debug.h> 103#endif 104 105#if IPSEC 106#include <netinet6/ipsec.h> 107#endif /*IPSEC*/ 108 109void tcp_fill_info(struct tcpcb *, struct tcp_info *); 110errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *); 111 112int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *); 113 114/* 115 * TCP protocol interface to socket abstraction. 116 */ 117extern char *tcpstates[]; /* XXX ??? */ 118 119static int tcp_attach(struct socket *, struct proc *); 120static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *); 121#if INET6 122static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *); 123#endif /* INET6 */ 124static struct tcpcb * 125 tcp_disconnect(struct tcpcb *); 126static struct tcpcb * 127 tcp_usrclosed(struct tcpcb *); 128 129static u_int32_t tcps_in_sw_cksum; 130SYSCTL_UINT(_net_inet_tcp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, 131 &tcps_in_sw_cksum, 0, 132 "Number of received packets checksummed in software"); 133 134static u_int64_t tcps_in_sw_cksum_bytes; 135SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, 136 &tcps_in_sw_cksum_bytes, 137 "Amount of received data checksummed in software"); 138 139static u_int32_t tcps_out_sw_cksum; 140SYSCTL_UINT(_net_inet_tcp, OID_AUTO, out_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, 141 &tcps_out_sw_cksum, 0, 142 "Number of transmitted packets checksummed in software"); 143 144static u_int64_t tcps_out_sw_cksum_bytes; 145SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, 146 &tcps_out_sw_cksum_bytes, 147 "Amount of transmitted data checksummed in software"); 148 149extern uint32_t tcp_autorcvbuf_max; 150 151extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb); 152 153#if TCPDEBUG 154#define TCPDEBUG0 int ostate = 0 155#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 156#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 157 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 158#else 159#define TCPDEBUG0 160#define TCPDEBUG1() 161#define TCPDEBUG2(req) 162#endif 163 164SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 165 0 , 0, tcp_sysctl_info, "S", "TCP info per tuple"); 166 167/* 168 * TCP attaches to socket via pru_attach(), reserving space, 169 * and an internet control block. 170 * 171 * Returns: 0 Success 172 * EISCONN 173 * tcp_attach:ENOBUFS 174 * tcp_attach:ENOMEM 175 * tcp_attach:??? [IPSEC specific] 176 */ 177static int 178tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p) 179{ 180 int error; 181 struct inpcb *inp = sotoinpcb(so); 182 struct tcpcb *tp = 0; 183 TCPDEBUG0; 184 185 TCPDEBUG1(); 186 if (inp) { 187 error = EISCONN; 188 goto out; 189 } 190 191 error = tcp_attach(so, p); 192 if (error) 193 goto out; 194 195 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 196 so->so_linger = TCP_LINGERTIME * hz; 197 tp = sototcpcb(so); 198out: 199 TCPDEBUG2(PRU_ATTACH); 200 return error; 201} 202 203/* 204 * pru_detach() detaches the TCP protocol from the socket. 205 * If the protocol state is non-embryonic, then can't 206 * do this directly: have to initiate a pru_disconnect(), 207 * which may finish later; embryonic TCB's can just 208 * be discarded here. 209 */ 210static int 211tcp_usr_detach(struct socket *so) 212{ 213 int error = 0; 214 struct inpcb *inp = sotoinpcb(so); 215 struct tcpcb *tp; 216 TCPDEBUG0; 217 218 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { 219 return EINVAL; /* XXX */ 220 } 221 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 222 tp = intotcpcb(inp); 223 /* In case we got disconnected from the peer */ 224 if (tp == 0) 225 goto out; 226 TCPDEBUG1(); 227 228 calculate_tcp_clock(); 229 230 tp = tcp_disconnect(tp); 231out: 232 TCPDEBUG2(PRU_DETACH); 233 return error; 234} 235 236#define COMMON_START() TCPDEBUG0; \ 237 do { \ 238 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \ 239 return EINVAL; \ 240 } \ 241 tp = intotcpcb(inp); \ 242 TCPDEBUG1(); \ 243 calculate_tcp_clock(); \ 244 } while(0) 245 246#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out 247 248 249/* 250 * Give the socket an address. 251 * 252 * Returns: 0 Success 253 * EINVAL Invalid argument [COMMON_START] 254 * EAFNOSUPPORT Address family not supported 255 * in_pcbbind:EADDRNOTAVAIL Address not available. 256 * in_pcbbind:EINVAL Invalid argument 257 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 258 * in_pcbbind:EACCES Permission denied 259 * in_pcbbind:EADDRINUSE Address in use 260 * in_pcbbind:EAGAIN Resource unavailable, try again 261 * in_pcbbind:EPERM Operation not permitted 262 */ 263static int 264tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 265{ 266 int error = 0; 267 struct inpcb *inp = sotoinpcb(so); 268 struct tcpcb *tp; 269 struct sockaddr_in *sinp; 270 271 COMMON_START(); 272 273 if (nam->sa_family != 0 && nam->sa_family != AF_INET) { 274 error = EAFNOSUPPORT; 275 goto out; 276 } 277 278 /* 279 * Must check for multicast addresses and disallow binding 280 * to them. 281 */ 282 sinp = (struct sockaddr_in *)(void *)nam; 283 if (sinp->sin_family == AF_INET && 284 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 285 error = EAFNOSUPPORT; 286 goto out; 287 } 288 error = in_pcbbind(inp, nam, p); 289 if (error) 290 goto out; 291 COMMON_END(PRU_BIND); 292 293} 294 295#if INET6 296static int 297tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 298{ 299 int error = 0; 300 struct inpcb *inp = sotoinpcb(so); 301 struct tcpcb *tp; 302 struct sockaddr_in6 *sin6p; 303 304 COMMON_START(); 305 306 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { 307 error = EAFNOSUPPORT; 308 goto out; 309 } 310 311 /* 312 * Must check for multicast addresses and disallow binding 313 * to them. 314 */ 315 sin6p = (struct sockaddr_in6 *)(void *)nam; 316 if (sin6p->sin6_family == AF_INET6 && 317 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 318 error = EAFNOSUPPORT; 319 goto out; 320 } 321 inp->inp_vflag &= ~INP_IPV4; 322 inp->inp_vflag |= INP_IPV6; 323 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 324 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 325 inp->inp_vflag |= INP_IPV4; 326 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 327 struct sockaddr_in sin; 328 329 in6_sin6_2_sin(&sin, sin6p); 330 inp->inp_vflag |= INP_IPV4; 331 inp->inp_vflag &= ~INP_IPV6; 332 error = in_pcbbind(inp, (struct sockaddr *)&sin, p); 333 goto out; 334 } 335 } 336 error = in6_pcbbind(inp, nam, p); 337 if (error) 338 goto out; 339 COMMON_END(PRU_BIND); 340} 341#endif /* INET6 */ 342 343/* 344 * Prepare to accept connections. 345 * 346 * Returns: 0 Success 347 * EINVAL [COMMON_START] 348 * in_pcbbind:EADDRNOTAVAIL Address not available. 349 * in_pcbbind:EINVAL Invalid argument 350 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 351 * in_pcbbind:EACCES Permission denied 352 * in_pcbbind:EADDRINUSE Address in use 353 * in_pcbbind:EAGAIN Resource unavailable, try again 354 * in_pcbbind:EPERM Operation not permitted 355 */ 356static int 357tcp_usr_listen(struct socket *so, struct proc *p) 358{ 359 int error = 0; 360 struct inpcb *inp = sotoinpcb(so); 361 struct tcpcb *tp; 362 363 COMMON_START(); 364 if (inp->inp_lport == 0) 365 error = in_pcbbind(inp, (struct sockaddr *)0, p); 366 if (error == 0) 367 tp->t_state = TCPS_LISTEN; 368 COMMON_END(PRU_LISTEN); 369} 370 371#if INET6 372static int 373tcp6_usr_listen(struct socket *so, struct proc *p) 374{ 375 int error = 0; 376 struct inpcb *inp = sotoinpcb(so); 377 struct tcpcb *tp; 378 379 COMMON_START(); 380 if (inp->inp_lport == 0) { 381 inp->inp_vflag &= ~INP_IPV4; 382 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 383 inp->inp_vflag |= INP_IPV4; 384 error = in6_pcbbind(inp, (struct sockaddr *)0, p); 385 } 386 if (error == 0) 387 tp->t_state = TCPS_LISTEN; 388 COMMON_END(PRU_LISTEN); 389} 390#endif /* INET6 */ 391 392/* 393 * Initiate connection to peer. 394 * Create a template for use in transmissions on this connection. 395 * Enter SYN_SENT state, and mark socket as connecting. 396 * Start keep-alive timer, and seed output sequence space. 397 * Send initial segment on connection. 398 */ 399static int 400tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 401{ 402 int error = 0; 403 struct inpcb *inp = sotoinpcb(so); 404 struct tcpcb *tp; 405 struct sockaddr_in *sinp; 406 407 TCPDEBUG0; 408 if (inp == 0) 409 return EINVAL; 410 else if (inp->inp_state == INPCB_STATE_DEAD) { 411 if (so->so_error) { 412 error = so->so_error; 413 so->so_error = 0; 414 return error; 415 } else 416 return EINVAL; 417 } 418 tp = intotcpcb(inp); 419 TCPDEBUG1(); 420 421 calculate_tcp_clock(); 422 423 if (nam->sa_family != 0 && nam->sa_family != AF_INET) { 424 error = EAFNOSUPPORT; 425 goto out; 426 } 427 /* 428 * Must disallow TCP ``connections'' to multicast addresses. 429 */ 430 sinp = (struct sockaddr_in *)(void *)nam; 431 if (sinp->sin_family == AF_INET 432 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 433 error = EAFNOSUPPORT; 434 goto out; 435 } 436 437 438 if ((error = tcp_connect(tp, nam, p)) != 0) 439 goto out; 440 error = tcp_output(tp); 441 COMMON_END(PRU_CONNECT); 442} 443 444#if INET6 445static int 446tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 447{ 448 int error = 0; 449 struct inpcb *inp = sotoinpcb(so); 450 struct tcpcb *tp; 451 struct sockaddr_in6 *sin6p; 452 453 COMMON_START(); 454 455 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { 456 error = EAFNOSUPPORT; 457 goto out; 458 } 459 460 /* 461 * Must disallow TCP ``connections'' to multicast addresses. 462 */ 463 sin6p = (struct sockaddr_in6 *)(void *)nam; 464 if (sin6p->sin6_family == AF_INET6 465 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 466 error = EAFNOSUPPORT; 467 goto out; 468 } 469 470 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 471 struct sockaddr_in sin; 472 473 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 474 return (EINVAL); 475 476 in6_sin6_2_sin(&sin, sin6p); 477 inp->inp_vflag |= INP_IPV4; 478 inp->inp_vflag &= ~INP_IPV6; 479 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) 480 goto out; 481 error = tcp_output(tp); 482 goto out; 483 } 484 inp->inp_vflag &= ~INP_IPV4; 485 inp->inp_vflag |= INP_IPV6; 486 if ((error = tcp6_connect(tp, nam, p)) != 0) 487 goto out; 488 error = tcp_output(tp); 489 if (error) 490 goto out; 491 COMMON_END(PRU_CONNECT); 492} 493#endif /* INET6 */ 494 495/* 496 * Initiate disconnect from peer. 497 * If connection never passed embryonic stage, just drop; 498 * else if don't need to let data drain, then can just drop anyways, 499 * else have to begin TCP shutdown process: mark socket disconnecting, 500 * drain unread data, state switch to reflect user close, and 501 * send segment (e.g. FIN) to peer. Socket will be really disconnected 502 * when peer sends FIN and acks ours. 503 * 504 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 505 */ 506static int 507tcp_usr_disconnect(struct socket *so) 508{ 509 int error = 0; 510 struct inpcb *inp = sotoinpcb(so); 511 struct tcpcb *tp; 512 513 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 514 COMMON_START(); 515 /* In case we got disconnected from the peer */ 516 if (tp == 0) 517 goto out; 518 tp = tcp_disconnect(tp); 519 COMMON_END(PRU_DISCONNECT); 520} 521 522/* 523 * Accept a connection. Essentially all the work is 524 * done at higher levels; just return the address 525 * of the peer, storing through addr. 526 */ 527static int 528tcp_usr_accept(struct socket *so, struct sockaddr **nam) 529{ 530 int error = 0; 531 struct inpcb *inp = sotoinpcb(so); 532 struct tcpcb *tp = NULL; 533 TCPDEBUG0; 534 535 in_setpeeraddr(so, nam); 536 537 if (so->so_state & SS_ISDISCONNECTED) { 538 error = ECONNABORTED; 539 goto out; 540 } 541 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { 542 return (EINVAL); 543 } 544 tp = intotcpcb(inp); 545 TCPDEBUG1(); 546 547 calculate_tcp_clock(); 548 549 COMMON_END(PRU_ACCEPT); 550} 551 552#if INET6 553static int 554tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 555{ 556 int error = 0; 557 struct inpcb *inp = sotoinpcb(so); 558 struct tcpcb *tp = NULL; 559 TCPDEBUG0; 560 561 if (so->so_state & SS_ISDISCONNECTED) { 562 error = ECONNABORTED; 563 goto out; 564 } 565 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { 566 return (EINVAL); 567 } 568 tp = intotcpcb(inp); 569 TCPDEBUG1(); 570 571 calculate_tcp_clock(); 572 573 in6_mapped_peeraddr(so, nam); 574 COMMON_END(PRU_ACCEPT); 575} 576#endif /* INET6 */ 577 578/* 579 * Mark the connection as being incapable of further output. 580 * 581 * Returns: 0 Success 582 * EINVAL [COMMON_START] 583 * tcp_output:EADDRNOTAVAIL 584 * tcp_output:ENOBUFS 585 * tcp_output:EMSGSIZE 586 * tcp_output:EHOSTUNREACH 587 * tcp_output:ENETUNREACH 588 * tcp_output:ENETDOWN 589 * tcp_output:ENOMEM 590 * tcp_output:EACCES 591 * tcp_output:EMSGSIZE 592 * tcp_output:ENOBUFS 593 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL] 594 */ 595static int 596tcp_usr_shutdown(struct socket *so) 597{ 598 int error = 0; 599 struct inpcb *inp = sotoinpcb(so); 600 struct tcpcb *tp; 601 602 COMMON_START(); 603 socantsendmore(so); 604 /* In case we got disconnected from the peer */ 605 if (tp == 0) 606 goto out; 607 tp = tcp_usrclosed(tp); 608 if (tp) 609 error = tcp_output(tp); 610 COMMON_END(PRU_SHUTDOWN); 611} 612 613/* 614 * After a receive, possibly send window update to peer. 615 */ 616static int 617tcp_usr_rcvd(struct socket *so, __unused int flags) 618{ 619 int error = 0; 620 struct inpcb *inp = sotoinpcb(so); 621 struct tcpcb *tp; 622 623 COMMON_START(); 624 /* In case we got disconnected from the peer */ 625 if (tp == 0) 626 goto out; 627 tcp_sbrcv_trim(tp, &so->so_rcv); 628 629 tcp_output(tp); 630 COMMON_END(PRU_RCVD); 631} 632 633/* 634 * Do a send by putting data in output queue and updating urgent 635 * marker if URG set. Possibly send more data. Unlike the other 636 * pru_*() routines, the mbuf chains are our responsibility. We 637 * must either enqueue them or free them. The other pru_* routines 638 * generally are caller-frees. 639 * 640 * Returns: 0 Success 641 * ECONNRESET 642 * EINVAL 643 * ENOBUFS 644 * tcp_connect:EADDRINUSE Address in use 645 * tcp_connect:EADDRNOTAVAIL Address not available. 646 * tcp_connect:EINVAL Invalid argument 647 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef] 648 * tcp_connect:EACCES Permission denied 649 * tcp_connect:EAGAIN Resource unavailable, try again 650 * tcp_connect:EPERM Operation not permitted 651 * tcp_output:EADDRNOTAVAIL 652 * tcp_output:ENOBUFS 653 * tcp_output:EMSGSIZE 654 * tcp_output:EHOSTUNREACH 655 * tcp_output:ENETUNREACH 656 * tcp_output:ENETDOWN 657 * tcp_output:ENOMEM 658 * tcp_output:EACCES 659 * tcp_output:EMSGSIZE 660 * tcp_output:ENOBUFS 661 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL] 662 * tcp6_connect:??? [IPV6 only] 663 */ 664static int 665tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 666 struct sockaddr *nam, struct mbuf *control, struct proc *p) 667{ 668 int error = 0; 669 struct inpcb *inp = sotoinpcb(so); 670 struct tcpcb *tp; 671#if INET6 672 int isipv6; 673#endif 674 TCPDEBUG0; 675 676 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) { 677 /* 678 * OOPS! we lost a race, the TCP session got reset after 679 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 680 * network interrupt in the non-splnet() section of sosend(). 681 */ 682 if (m) 683 m_freem(m); 684 if (control) 685 m_freem(control); 686 error = ECONNRESET; /* XXX EPIPE? */ 687 tp = NULL; 688 TCPDEBUG1(); 689 goto out; 690 } 691#if INET6 692 isipv6 = nam && nam->sa_family == AF_INET6; 693#endif /* INET6 */ 694 tp = intotcpcb(inp); 695 TCPDEBUG1(); 696 697 calculate_tcp_clock(); 698 699 if (control) { 700 /* TCP doesn't do control messages (rights, creds, etc) */ 701 if (control->m_len) { 702 m_freem(control); 703 if (m) 704 m_freem(m); 705 error = EINVAL; 706 goto out; 707 } 708 m_freem(control); /* empty control, just free it */ 709 } 710 if(!(flags & PRUS_OOB)) { 711 sbappendstream(&so->so_snd, m); 712 if (nam && tp->t_state < TCPS_SYN_SENT) { 713 /* 714 * Do implied connect if not yet connected, 715 * initialize window to default value, and 716 * initialize maxseg/maxopd using peer's cached 717 * MSS. 718 */ 719#if INET6 720 if (isipv6) 721 error = tcp6_connect(tp, nam, p); 722 else 723#endif /* INET6 */ 724 error = tcp_connect(tp, nam, p); 725 if (error) 726 goto out; 727 tp->snd_wnd = TTCP_CLIENT_SND_WND; 728 tcp_mss(tp, -1, IFSCOPE_NONE); 729 } 730 731 if (flags & PRUS_EOF) { 732 /* 733 * Close the send side of the connection after 734 * the data is sent. 735 */ 736 socantsendmore(so); 737 tp = tcp_usrclosed(tp); 738 } 739 if (tp != NULL) { 740 if (flags & PRUS_MORETOCOME) 741 tp->t_flags |= TF_MORETOCOME; 742 error = tcp_output(tp); 743 if (flags & PRUS_MORETOCOME) 744 tp->t_flags &= ~TF_MORETOCOME; 745 } 746 } else { 747 if (sbspace(&so->so_snd) == 0) { 748 /* if no space is left in sockbuf, 749 * do not try to squeeze in OOB traffic */ 750 m_freem(m); 751 error = ENOBUFS; 752 goto out; 753 } 754 /* 755 * According to RFC961 (Assigned Protocols), 756 * the urgent pointer points to the last octet 757 * of urgent data. We continue, however, 758 * to consider it to indicate the first octet 759 * of data past the urgent section. 760 * Otherwise, snd_up should be one lower. 761 */ 762 sbappendstream(&so->so_snd, m); 763 if (nam && tp->t_state < TCPS_SYN_SENT) { 764 /* 765 * Do implied connect if not yet connected, 766 * initialize window to default value, and 767 * initialize maxseg/maxopd using peer's cached 768 * MSS. 769 */ 770#if INET6 771 if (isipv6) 772 error = tcp6_connect(tp, nam, p); 773 else 774#endif /* INET6 */ 775 error = tcp_connect(tp, nam, p); 776 if (error) 777 goto out; 778 tp->snd_wnd = TTCP_CLIENT_SND_WND; 779 tcp_mss(tp, -1, IFSCOPE_NONE); 780 } 781 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 782 tp->t_force = 1; 783 error = tcp_output(tp); 784 tp->t_force = 0; 785 } 786 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 787 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 788} 789 790/* 791 * Abort the TCP. 792 */ 793static int 794tcp_usr_abort(struct socket *so) 795{ 796 int error = 0; 797 struct inpcb *inp = sotoinpcb(so); 798 struct tcpcb *tp; 799 800 COMMON_START(); 801 /* In case we got disconnected from the peer */ 802 if (tp == 0) 803 goto out; 804 tp = tcp_drop(tp, ECONNABORTED); 805 so->so_usecount--; 806 COMMON_END(PRU_ABORT); 807} 808 809/* 810 * Receive out-of-band data. 811 * 812 * Returns: 0 Success 813 * EINVAL [COMMON_START] 814 * EINVAL 815 * EWOULDBLOCK 816 */ 817static int 818tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 819{ 820 int error = 0; 821 struct inpcb *inp = sotoinpcb(so); 822 struct tcpcb *tp; 823 824 COMMON_START(); 825 if ((so->so_oobmark == 0 && 826 (so->so_state & SS_RCVATMARK) == 0) || 827 so->so_options & SO_OOBINLINE || 828 tp->t_oobflags & TCPOOB_HADDATA) { 829 error = EINVAL; 830 goto out; 831 } 832 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 833 error = EWOULDBLOCK; 834 goto out; 835 } 836 m->m_len = 1; 837 *mtod(m, caddr_t) = tp->t_iobc; 838 if ((flags & MSG_PEEK) == 0) 839 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 840 COMMON_END(PRU_RCVOOB); 841} 842 843/* xxx - should be const */ 844struct pr_usrreqs tcp_usrreqs = { 845 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, 846 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, 847 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd, 848 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 849 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp 850}; 851 852#if INET6 853struct pr_usrreqs tcp6_usrreqs = { 854 tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, 855 tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, 856 tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, 857 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 858 in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp 859}; 860#endif /* INET6 */ 861 862/* 863 * Common subroutine to open a TCP connection to remote host specified 864 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 865 * port number if needed. Call in_pcbladdr to do the routing and to choose 866 * a local host address (interface). If there is an existing incarnation 867 * of the same connection in TIME-WAIT state and if the remote host was 868 * sending CC options and if the connection duration was < MSL, then 869 * truncate the previous TIME-WAIT state and proceed. 870 * Initialize connection parameters and enter SYN-SENT state. 871 * 872 * Returns: 0 Success 873 * EADDRINUSE 874 * EINVAL 875 * in_pcbbind:EADDRNOTAVAIL Address not available. 876 * in_pcbbind:EINVAL Invalid argument 877 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 878 * in_pcbbind:EACCES Permission denied 879 * in_pcbbind:EADDRINUSE Address in use 880 * in_pcbbind:EAGAIN Resource unavailable, try again 881 * in_pcbbind:EPERM Operation not permitted 882 * in_pcbladdr:EINVAL Invalid argument 883 * in_pcbladdr:EAFNOSUPPORT Address family not supported 884 * in_pcbladdr:EADDRNOTAVAIL Address not available 885 */ 886static int 887tcp_connect(tp, nam, p) 888 register struct tcpcb *tp; 889 struct sockaddr *nam; 890 struct proc *p; 891{ 892 struct inpcb *inp = tp->t_inpcb, *oinp; 893 struct socket *so = inp->inp_socket; 894 struct tcpcb *otp; 895 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 896 struct sockaddr_in ifaddr; 897 struct rmxp_tao *taop; 898 struct rmxp_tao tao_noncached; 899 int error; 900 struct ifnet *outif = NULL; 901 902 if (inp->inp_lport == 0) { 903 error = in_pcbbind(inp, (struct sockaddr *)0, p); 904 if (error) 905 return error; 906 } 907 908 /* 909 * Cannot simply call in_pcbconnect, because there might be an 910 * earlier incarnation of this same connection still in 911 * TIME_WAIT state, creating an ADDRINUSE error. 912 */ 913 error = in_pcbladdr(inp, nam, &ifaddr, &outif); 914 if (error) 915 return error; 916 917 tcp_unlock(inp->inp_socket, 0, 0); 918 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 919 sin->sin_addr, sin->sin_port, 920 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr 921 : ifaddr.sin_addr, 922 inp->inp_lport, 0, NULL); 923 924 tcp_lock(inp->inp_socket, 0, 0); 925 if (oinp) { 926 if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */ 927 tcp_lock(oinp->inp_socket, 1, 0); 928 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) { 929 if (oinp != inp) 930 tcp_unlock(oinp->inp_socket, 1, 0); 931 goto skip_oinp; 932 } 933 934 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 935 otp->t_state == TCPS_TIME_WAIT && 936 ((int)(tcp_now - otp->t_starttime)) < tcp_msl && 937 (otp->t_flags & TF_RCVD_CC)) 938 otp = tcp_close(otp); 939 else { 940 printf("tcp_connect: inp=%p err=EADDRINUSE\n", inp); 941 if (oinp != inp) 942 tcp_unlock(oinp->inp_socket, 1, 0); 943 return EADDRINUSE; 944 } 945 if (oinp != inp) 946 tcp_unlock(oinp->inp_socket, 1, 0); 947 } 948skip_oinp: 949 if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr.sin_addr.s_addr : 950 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr && 951 inp->inp_lport == sin->sin_port) 952 return EINVAL; 953 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { 954 /*lock inversion issue, mostly with udp multicast packets */ 955 socket_unlock(inp->inp_socket, 0); 956 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); 957 socket_lock(inp->inp_socket, 0); 958 } 959 if (inp->inp_laddr.s_addr == INADDR_ANY) { 960 inp->inp_laddr = ifaddr.sin_addr; 961 inp->inp_last_outifp = outif; 962 } 963 inp->inp_faddr = sin->sin_addr; 964 inp->inp_fport = sin->sin_port; 965 in_pcbrehash(inp); 966 lck_rw_done(inp->inp_pcbinfo->mtx); 967 968 if (inp->inp_flowhash == 0) 969 inp->inp_flowhash = inp_calc_flowhash(inp); 970 971 tcp_set_max_rwinscale(tp, so); 972 973 soisconnecting(so); 974 tcpstat.tcps_connattempt++; 975 tp->t_state = TCPS_SYN_SENT; 976 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 977 tp->t_keepinit ? tp->t_keepinit : tcp_keepinit); 978 tp->iss = tcp_new_isn(tp); 979 tcp_sendseqinit(tp); 980 if (nstat_collect) 981 nstat_route_connect_attempt(inp->inp_route.ro_rt); 982 983 /* 984 * Generate a CC value for this connection and 985 * check whether CC or CCnew should be used. 986 */ 987 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 988 taop = &tao_noncached; 989 bzero(taop, sizeof(*taop)); 990 } 991 992 tp->cc_send = CC_INC(tcp_ccgen); 993 if (taop->tao_ccsent != 0 && 994 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 995 taop->tao_ccsent = tp->cc_send; 996 } else { 997 taop->tao_ccsent = 0; 998 tp->t_flags |= TF_SENDCCNEW; 999 } 1000 1001 return 0; 1002} 1003 1004#if INET6 1005static int 1006tcp6_connect(tp, nam, p) 1007 register struct tcpcb *tp; 1008 struct sockaddr *nam; 1009 struct proc *p; 1010{ 1011 struct inpcb *inp = tp->t_inpcb, *oinp; 1012 struct socket *so = inp->inp_socket; 1013 struct tcpcb *otp; 1014 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam; 1015 struct in6_addr addr6; 1016 struct rmxp_tao *taop; 1017 struct rmxp_tao tao_noncached; 1018 int error = 0; 1019 struct ifnet *outif = NULL; 1020 1021 if (inp->inp_lport == 0) { 1022 error = in6_pcbbind(inp, (struct sockaddr *)0, p); 1023 if (error) 1024 goto done; 1025 } 1026 1027 /* 1028 * Cannot simply call in_pcbconnect, because there might be an 1029 * earlier incarnation of this same connection still in 1030 * TIME_WAIT state, creating an ADDRINUSE error. 1031 * 1032 * in6_pcbladdr() might return an ifp with its reference held 1033 * even in the error case, so make sure that it's released 1034 * whenever it's non-NULL. 1035 */ 1036 error = in6_pcbladdr(inp, nam, &addr6, &outif); 1037 if (error) 1038 goto done; 1039 tcp_unlock(inp->inp_socket, 0, 0); 1040 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1041 &sin6->sin6_addr, sin6->sin6_port, 1042 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1043 ? &addr6 1044 : &inp->in6p_laddr, 1045 inp->inp_lport, 0, NULL); 1046 tcp_lock(inp->inp_socket, 0, 0); 1047 if (oinp) { 1048 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 1049 otp->t_state == TCPS_TIME_WAIT && 1050 ((int)(tcp_now - otp->t_starttime)) < tcp_msl && 1051 (otp->t_flags & TF_RCVD_CC)) { 1052 otp = tcp_close(otp); 1053 } else { 1054 error = EADDRINUSE; 1055 goto done; 1056 } 1057 } 1058 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { 1059 /*lock inversion issue, mostly with udp multicast packets */ 1060 socket_unlock(inp->inp_socket, 0); 1061 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); 1062 socket_lock(inp->inp_socket, 0); 1063 } 1064 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 1065 inp->in6p_laddr = addr6; 1066 inp->in6p_last_outifp = outif; /* no reference needed */ 1067 } 1068 inp->in6p_faddr = sin6->sin6_addr; 1069 inp->inp_fport = sin6->sin6_port; 1070 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) 1071 inp->in6p_flowinfo = sin6->sin6_flowinfo; 1072 in_pcbrehash(inp); 1073 lck_rw_done(inp->inp_pcbinfo->mtx); 1074 1075 if (inp->inp_flowhash == 0) 1076 inp->inp_flowhash = inp_calc_flowhash(inp); 1077 1078 tcp_set_max_rwinscale(tp, so); 1079 1080 soisconnecting(so); 1081 tcpstat.tcps_connattempt++; 1082 tp->t_state = TCPS_SYN_SENT; 1083 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1084 tp->t_keepinit ? tp->t_keepinit : tcp_keepinit); 1085 tp->iss = tcp_new_isn(tp); 1086 tcp_sendseqinit(tp); 1087 if (nstat_collect) 1088 nstat_route_connect_attempt(inp->inp_route.ro_rt); 1089 1090 /* 1091 * Generate a CC value for this connection and 1092 * check whether CC or CCnew should be used. 1093 */ 1094 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 1095 taop = &tao_noncached; 1096 bzero(taop, sizeof(*taop)); 1097 } 1098 1099 tp->cc_send = CC_INC(tcp_ccgen); 1100 if (taop->tao_ccsent != 0 && 1101 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 1102 taop->tao_ccsent = tp->cc_send; 1103 } else { 1104 taop->tao_ccsent = 0; 1105 tp->t_flags |= TF_SENDCCNEW; 1106 } 1107 1108done: 1109 if (outif != NULL) 1110 ifnet_release(outif); 1111 1112 return (error); 1113} 1114#endif /* INET6 */ 1115 1116/* 1117 * Export TCP internal state information via a struct tcp_info 1118 */ 1119__private_extern__ void 1120tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 1121{ 1122 struct inpcb *inp = tp->t_inpcb; 1123 1124 bzero(ti, sizeof(*ti)); 1125 1126 ti->tcpi_state = tp->t_state; 1127 1128 if (tp->t_state > TCPS_LISTEN) { 1129 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1130 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1131 if (tp->t_flags & TF_SACK_PERMIT) 1132 ti->tcpi_options |= TCPI_OPT_SACK; 1133 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1134 ti->tcpi_options |= TCPI_OPT_WSCALE; 1135 ti->tcpi_snd_wscale = tp->snd_scale; 1136 ti->tcpi_rcv_wscale = tp->rcv_scale; 1137 } 1138 1139 /* Are we in retranmission episode */ 1140 if (tp->snd_max != tp->snd_nxt) 1141 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY; 1142 else 1143 ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY; 1144 1145 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0; 1146 ti->tcpi_snd_mss = tp->t_maxseg; 1147 ti->tcpi_rcv_mss = tp->t_maxseg; 1148 1149 ti->tcpi_rttcur = tp->t_rttcur; 1150 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT; 1151 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT; 1152 1153 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1154 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1155 ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc; 1156 1157 ti->tcpi_rcv_space = tp->rcv_wnd; 1158 1159 ti->tcpi_snd_wnd = tp->snd_wnd; 1160 ti->tcpi_snd_nxt = tp->snd_nxt; 1161 ti->tcpi_rcv_nxt = tp->rcv_nxt; 1162 1163 /* convert bytes/msec to bits/sec */ 1164 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 && 1165 tp->t_bwmeas != NULL) { 1166 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000); 1167 } 1168 1169 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 : 1170 tp->t_inpcb->inp_last_outifp->if_index; 1171 1172 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes); 1173 ti->tcpi_txbytes = inp->inp_stat->txbytes; 1174 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes; 1175 ti->tcpi_txunacked = tp->snd_max - tp->snd_una; 1176 1177 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes); 1178 ti->tcpi_rxbytes = inp->inp_stat->rxbytes; 1179 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes; 1180 } 1181} 1182 1183__private_extern__ errno_t 1184tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti) 1185{ 1186 struct inpcbinfo *pcbinfo = NULL; 1187 struct inpcb *inp = NULL; 1188 struct socket *so; 1189 struct tcpcb *tp; 1190 1191 if (itpl->itpl_proto == IPPROTO_TCP) 1192 pcbinfo = &tcbinfo; 1193 else 1194 return EINVAL; 1195 1196 if (itpl->itpl_local_sa.sa_family == AF_INET && 1197 itpl->itpl_remote_sa.sa_family == AF_INET) { 1198 inp = in_pcblookup_hash(pcbinfo, 1199 itpl->itpl_remote_sin.sin_addr, 1200 itpl->itpl_remote_sin.sin_port, 1201 itpl->itpl_local_sin.sin_addr, 1202 itpl->itpl_local_sin.sin_port, 1203 0, NULL); 1204 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 && 1205 itpl->itpl_remote_sa.sa_family == AF_INET6) { 1206 struct in6_addr ina6_local; 1207 struct in6_addr ina6_remote; 1208 1209 ina6_local = itpl->itpl_local_sin6.sin6_addr; 1210 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && itpl->itpl_local_sin6.sin6_scope_id) 1211 ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id); 1212 1213 ina6_remote = itpl->itpl_remote_sin6.sin6_addr; 1214 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && itpl->itpl_remote_sin6.sin6_scope_id) 1215 ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id); 1216 1217 inp = in6_pcblookup_hash(pcbinfo, 1218 &ina6_remote, 1219 itpl->itpl_remote_sin6.sin6_port, 1220 &ina6_local, 1221 itpl->itpl_local_sin6.sin6_port, 1222 0, NULL); 1223 } else 1224 return EINVAL; 1225 if (inp == NULL || (so = inp->inp_socket) == NULL) 1226 return ENOENT; 1227 1228 socket_lock(so, 0); 1229 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { 1230 socket_unlock(so, 0); 1231 return ENOENT; 1232 } 1233 tp = intotcpcb(inp); 1234 1235 tcp_fill_info(tp, ti); 1236 socket_unlock(so, 0); 1237 1238 return 0; 1239} 1240 1241 1242__private_extern__ int 1243tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) 1244{ 1245 int error; 1246 struct tcp_info ti; 1247 struct info_tuple itpl; 1248 1249 if (req->newptr == USER_ADDR_NULL) { 1250 return EINVAL; 1251 } 1252 if (req->newlen < sizeof(struct info_tuple)) { 1253 return EINVAL; 1254 } 1255 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple)); 1256 if (error != 0) { 1257 return error; 1258 } 1259 error = tcp_fill_info_for_info_tuple(&itpl, &ti); 1260 if (error != 0) { 1261 return error; 1262 } 1263 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info)); 1264 if (error != 0) { 1265 return error; 1266 } 1267 1268 return 0; 1269} 1270 1271static int 1272tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid) 1273{ 1274 int error = EHOSTUNREACH; 1275 *out_pid = -1; 1276 if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; 1277 1278 struct inpcb *inp = (struct inpcb*)so->so_pcb; 1279 uint16_t lport = inp->inp_lport; 1280 uint16_t fport = inp->inp_fport; 1281 struct inpcb *finp = NULL; 1282 1283 if (inp->inp_vflag & INP_IPV6) { 1284 struct in6_addr laddr6 = inp->in6p_laddr; 1285 struct in6_addr faddr6 = inp->in6p_faddr; 1286 socket_unlock(so, 0); 1287 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL); 1288 socket_lock(so, 0); 1289 } else if (inp->inp_vflag & INP_IPV4) { 1290 struct in_addr laddr4 = inp->inp_laddr; 1291 struct in_addr faddr4 = inp->inp_faddr; 1292 socket_unlock(so, 0); 1293 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL); 1294 socket_lock(so, 0); 1295 } 1296 1297 if (finp) { 1298 *out_pid = finp->inp_socket->last_pid; 1299 error = 0; 1300 in_pcb_checkstate(finp, WNT_RELEASE, 0); 1301 } 1302 1303 return error; 1304} 1305 1306/* 1307 * The new sockopt interface makes it possible for us to block in the 1308 * copyin/out step (if we take a page fault). Taking a page fault at 1309 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1310 * use TSM, there probably isn't any need for this function to run at 1311 * splnet() any more. This needs more examination.) 1312 */ 1313int 1314tcp_ctloutput(so, sopt) 1315 struct socket *so; 1316 struct sockopt *sopt; 1317{ 1318 int error, opt, optval; 1319 struct inpcb *inp; 1320 struct tcpcb *tp; 1321 1322 error = 0; 1323 inp = sotoinpcb(so); 1324 if (inp == NULL) { 1325 return (ECONNRESET); 1326 } 1327 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */ 1328 if (sopt->sopt_level != IPPROTO_TCP && 1329 !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH)) { 1330#if INET6 1331 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1332 error = ip6_ctloutput(so, sopt); 1333 else 1334#endif /* INET6 */ 1335 error = ip_ctloutput(so, sopt); 1336 return (error); 1337 } 1338 tp = intotcpcb(inp); 1339 if (tp == NULL) { 1340 return (ECONNRESET); 1341 } 1342 1343 calculate_tcp_clock(); 1344 1345 switch (sopt->sopt_dir) { 1346 case SOPT_SET: 1347 switch (sopt->sopt_name) { 1348 case TCP_NODELAY: 1349 case TCP_NOOPT: 1350 case TCP_NOPUSH: 1351 error = sooptcopyin(sopt, &optval, sizeof optval, 1352 sizeof optval); 1353 if (error) 1354 break; 1355 1356 switch (sopt->sopt_name) { 1357 case TCP_NODELAY: 1358 opt = TF_NODELAY; 1359 break; 1360 case TCP_NOOPT: 1361 opt = TF_NOOPT; 1362 break; 1363 case TCP_NOPUSH: 1364 opt = TF_NOPUSH; 1365 break; 1366 default: 1367 opt = 0; /* dead code to fool gcc */ 1368 break; 1369 } 1370 1371 if (optval) 1372 tp->t_flags |= opt; 1373 else 1374 tp->t_flags &= ~opt; 1375 break; 1376 case TCP_RXT_FINDROP: 1377 error = sooptcopyin(sopt, &optval, sizeof optval, 1378 sizeof optval); 1379 if (error) 1380 break; 1381 opt = TF_RXTFINDROP; 1382 if (optval) 1383 tp->t_flagsext |= opt; 1384 else 1385 tp->t_flagsext &= ~opt; 1386 break; 1387 case TCP_MEASURE_SND_BW: 1388 error = sooptcopyin(sopt, &optval, sizeof optval, 1389 sizeof optval); 1390 if (error) 1391 break; 1392 opt = TF_MEASURESNDBW; 1393 if (optval) { 1394 if (tp->t_bwmeas == NULL) { 1395 tp->t_bwmeas = tcp_bwmeas_alloc(tp); 1396 if (tp->t_bwmeas == NULL) { 1397 error = ENOMEM; 1398 break; 1399 } 1400 } 1401 tp->t_flagsext |= opt; 1402 } else { 1403 tp->t_flagsext &= ~opt; 1404 /* Reset snd bw measurement state */ 1405 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS); 1406 if (tp->t_bwmeas != NULL) { 1407 tcp_bwmeas_free(tp); 1408 } 1409 } 1410 break; 1411 case TCP_MEASURE_BW_BURST: { 1412 struct tcp_measure_bw_burst in; 1413 uint32_t minpkts, maxpkts; 1414 bzero(&in, sizeof(in)); 1415 1416 error = sooptcopyin(sopt, &in, sizeof(in), 1417 sizeof(in)); 1418 if (error) 1419 break; 1420 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || 1421 tp->t_bwmeas == NULL) { 1422 error = EINVAL; 1423 break; 1424 } 1425 minpkts = (in.min_burst_size != 0) ? in.min_burst_size : 1426 tp->t_bwmeas->bw_minsizepkts; 1427 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size : 1428 tp->t_bwmeas->bw_maxsizepkts; 1429 if (minpkts > maxpkts) { 1430 error = EINVAL; 1431 break; 1432 } 1433 tp->t_bwmeas->bw_minsizepkts = minpkts; 1434 tp->t_bwmeas->bw_maxsizepkts = maxpkts; 1435 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg); 1436 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg); 1437 break; 1438 } 1439 case TCP_MAXSEG: 1440 error = sooptcopyin(sopt, &optval, sizeof optval, 1441 sizeof optval); 1442 if (error) 1443 break; 1444 1445 if (optval > 0 && optval <= tp->t_maxseg && 1446 optval + 40 >= tcp_minmss) 1447 tp->t_maxseg = optval; 1448 else 1449 error = EINVAL; 1450 break; 1451 1452 case TCP_KEEPALIVE: 1453 error = sooptcopyin(sopt, &optval, sizeof optval, 1454 sizeof optval); 1455 if (error) 1456 break; 1457 if (optval < 0) 1458 error = EINVAL; 1459 else { 1460 tp->t_keepidle = optval * TCP_RETRANSHZ; 1461 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1462 TCP_KEEPIDLE(tp)); /* reset the timer to new value */ 1463 tcp_check_timer_state(tp); 1464 } 1465 break; 1466 1467 case TCP_CONNECTIONTIMEOUT: 1468 error = sooptcopyin(sopt, &optval, sizeof optval, 1469 sizeof optval); 1470 if (error) 1471 break; 1472 if (optval < 0) 1473 error = EINVAL; 1474 else 1475 tp->t_keepinit = optval * TCP_RETRANSHZ; 1476 break; 1477 1478 case PERSIST_TIMEOUT: 1479 error = sooptcopyin(sopt, &optval, sizeof optval, 1480 sizeof optval); 1481 if (error) 1482 break; 1483 if (optval < 0) 1484 error = EINVAL; 1485 else 1486 tp->t_persist_timeout = optval * TCP_RETRANSHZ; 1487 break; 1488 case TCP_RXT_CONNDROPTIME: 1489 error = sooptcopyin(sopt, &optval, sizeof(optval), 1490 sizeof(optval)); 1491 if (error) 1492 break; 1493 if (optval < 0) 1494 error = EINVAL; 1495 else 1496 tp->rxt_conndroptime = optval * TCP_RETRANSHZ; 1497 break; 1498 case TCP_NOTSENT_LOWAT: 1499 error = sooptcopyin(sopt, &optval, sizeof(optval), 1500 sizeof(optval)); 1501 if (error) 1502 break; 1503 if (optval < 0) { 1504 error = EINVAL; 1505 break; 1506 } else { 1507 if (optval == 0) { 1508 so->so_flags &= ~(SOF_NOTSENT_LOWAT); 1509 tp->t_notsent_lowat = 0; 1510 } else { 1511 so->so_flags |= SOF_NOTSENT_LOWAT; 1512 tp->t_notsent_lowat = optval; 1513 } 1514 } 1515 break; 1516 1517 case SO_FLUSH: 1518 if ((error = sooptcopyin(sopt, &optval, sizeof (optval), 1519 sizeof (optval))) != 0) 1520 break; 1521 1522 error = inp_flush(inp, optval); 1523 break; 1524 1525 default: 1526 error = ENOPROTOOPT; 1527 break; 1528 } 1529 break; 1530 1531 case SOPT_GET: 1532 switch (sopt->sopt_name) { 1533 case TCP_NODELAY: 1534 optval = tp->t_flags & TF_NODELAY; 1535 break; 1536 case TCP_MAXSEG: 1537 optval = tp->t_maxseg; 1538 break; 1539 case TCP_KEEPALIVE: 1540 optval = tp->t_keepidle / TCP_RETRANSHZ; 1541 break; 1542 case TCP_NOOPT: 1543 optval = tp->t_flags & TF_NOOPT; 1544 break; 1545 case TCP_NOPUSH: 1546 optval = tp->t_flags & TF_NOPUSH; 1547 break; 1548 case TCP_CONNECTIONTIMEOUT: 1549 optval = tp->t_keepinit / TCP_RETRANSHZ; 1550 break; 1551 case PERSIST_TIMEOUT: 1552 optval = tp->t_persist_timeout / TCP_RETRANSHZ; 1553 break; 1554 case TCP_RXT_CONNDROPTIME: 1555 optval = tp->rxt_conndroptime / TCP_RETRANSHZ; 1556 break; 1557 case TCP_RXT_FINDROP: 1558 optval = tp->t_flagsext & TF_RXTFINDROP; 1559 break; 1560 case TCP_MEASURE_SND_BW: 1561 optval = tp->t_flagsext & TF_MEASURESNDBW; 1562 break; 1563 case TCP_INFO: { 1564 struct tcp_info ti; 1565 1566 tcp_fill_info(tp, &ti); 1567 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info)); 1568 goto done; 1569 /* NOT REACHED */ 1570 } 1571 case TCP_MEASURE_BW_BURST: { 1572 struct tcp_measure_bw_burst out; 1573 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || 1574 tp->t_bwmeas == NULL) { 1575 error = EINVAL; 1576 break; 1577 } 1578 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts; 1579 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts; 1580 error = sooptcopyout(sopt, &out, sizeof(out)); 1581 goto done; 1582 } 1583 case TCP_NOTSENT_LOWAT: 1584 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) { 1585 optval = tp->t_notsent_lowat; 1586 } else { 1587 optval = 0; 1588 } 1589 break; 1590 case TCP_PEER_PID: { 1591 pid_t pid; 1592 error = tcp_lookup_peer_pid_locked(so, &pid); 1593 if (error == 0) 1594 error = sooptcopyout(sopt, &pid, sizeof(pid)); 1595 goto done; 1596 } 1597 default: 1598 error = ENOPROTOOPT; 1599 break; 1600 } 1601 if (error == 0) 1602 error = sooptcopyout(sopt, &optval, sizeof optval); 1603 break; 1604 } 1605done: 1606 return (error); 1607} 1608 1609/* 1610 * tcp_sendspace and tcp_recvspace are the default send and receive window 1611 * sizes, respectively. These are obsolescent (this information should 1612 * be set by the route). 1613 */ 1614u_int32_t tcp_sendspace = 1448*256; 1615u_int32_t tcp_recvspace = 1448*384; 1616 1617/* During attach, the size of socket buffer allocated is limited to 1618 * sb_max in sbreserve. Disallow setting the tcp send and recv space 1619 * to be more than sb_max because that will cause tcp_attach to fail 1620 * (see radar 5713060) 1621 */ 1622static int 1623sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1, 1624 __unused int arg2, struct sysctl_req *req) { 1625 u_int32_t new_value = 0, *space_p = NULL; 1626 int changed = 0, error = 0; 1627 u_quad_t sb_effective_max = (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES; 1628 1629 switch (oidp->oid_number) { 1630 case TCPCTL_SENDSPACE: 1631 space_p = &tcp_sendspace; 1632 break; 1633 case TCPCTL_RECVSPACE: 1634 space_p = &tcp_recvspace; 1635 break; 1636 default: 1637 return EINVAL; 1638 } 1639 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t), 1640 &new_value, &changed); 1641 if (changed) { 1642 if (new_value > 0 && new_value <= sb_effective_max) { 1643 *space_p = new_value; 1644 } else { 1645 error = ERANGE; 1646 } 1647 } 1648 return error; 1649} 1650 1651SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 1652 &tcp_sendspace , 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size"); 1653SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 1654 &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size"); 1655 1656 1657/* 1658 * Attach TCP protocol to socket, allocating 1659 * internet protocol control block, tcp control block, 1660 * bufer space, and entering LISTEN state if to accept connections. 1661 * 1662 * Returns: 0 Success 1663 * in_pcballoc:ENOBUFS 1664 * in_pcballoc:ENOMEM 1665 * in_pcballoc:??? [IPSEC specific] 1666 * soreserve:ENOBUFS 1667 */ 1668static int 1669tcp_attach(so, p) 1670 struct socket *so; 1671 struct proc *p; 1672{ 1673 register struct tcpcb *tp; 1674 struct inpcb *inp; 1675 int error; 1676#if INET6 1677 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1678#endif 1679 1680 error = in_pcballoc(so, &tcbinfo, p); 1681 if (error) 1682 return (error); 1683 1684 inp = sotoinpcb(so); 1685 1686 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1687 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1688 if (error) 1689 return (error); 1690 } 1691 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) 1692 so->so_rcv.sb_flags |= SB_AUTOSIZE; 1693 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) 1694 so->so_snd.sb_flags |= SB_AUTOSIZE; 1695 1696#if INET6 1697 if (isipv6) { 1698 inp->inp_vflag |= INP_IPV6; 1699 inp->in6p_hops = -1; /* use kernel default */ 1700 } 1701 else 1702#endif /* INET6 */ 1703 inp->inp_vflag |= INP_IPV4; 1704 tp = tcp_newtcpcb(inp); 1705 if (tp == 0) { 1706 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 1707 1708 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 1709#if INET6 1710 if (isipv6) 1711 in6_pcbdetach(inp); 1712 else 1713#endif /* INET6 */ 1714 in_pcbdetach(inp); 1715 so->so_state |= nofd; 1716 return (ENOBUFS); 1717 } 1718 if (nstat_collect) { 1719 nstat_tcp_new_pcb(inp); 1720 } 1721 tp->t_state = TCPS_CLOSED; 1722 return (0); 1723} 1724 1725/* 1726 * Initiate (or continue) disconnect. 1727 * If embryonic state, just send reset (once). 1728 * If in ``let data drain'' option and linger null, just drop. 1729 * Otherwise (hard), mark socket disconnecting and drop 1730 * current input data; switch states based on user close, and 1731 * send segment to peer (with FIN). 1732 */ 1733static struct tcpcb * 1734tcp_disconnect(tp) 1735 register struct tcpcb *tp; 1736{ 1737 struct socket *so = tp->t_inpcb->inp_socket; 1738 1739 if (tp->t_state < TCPS_ESTABLISHED) 1740 tp = tcp_close(tp); 1741 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1742 tp = tcp_drop(tp, 0); 1743 else { 1744 soisdisconnecting(so); 1745 sbflush(&so->so_rcv); 1746 tp = tcp_usrclosed(tp); 1747 if (tp) 1748 (void) tcp_output(tp); 1749 } 1750 return (tp); 1751} 1752 1753/* 1754 * User issued close, and wish to trail through shutdown states: 1755 * if never received SYN, just forget it. If got a SYN from peer, 1756 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1757 * If already got a FIN from peer, then almost done; go to LAST_ACK 1758 * state. In all other cases, have already sent FIN to peer (e.g. 1759 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1760 * for peer to send FIN or not respond to keep-alives, etc. 1761 * We can let the user exit from the close as soon as the FIN is acked. 1762 */ 1763static struct tcpcb * 1764tcp_usrclosed(tp) 1765 register struct tcpcb *tp; 1766{ 1767 1768 switch (tp->t_state) { 1769 1770 case TCPS_CLOSED: 1771 case TCPS_LISTEN: 1772 tp->t_state = TCPS_CLOSED; 1773 tp = tcp_close(tp); 1774 break; 1775 1776 case TCPS_SYN_SENT: 1777 case TCPS_SYN_RECEIVED: 1778 tp->t_flags |= TF_NEEDFIN; 1779 break; 1780 1781 case TCPS_ESTABLISHED: 1782 tp->t_state = TCPS_FIN_WAIT_1; 1783 break; 1784 1785 case TCPS_CLOSE_WAIT: 1786 tp->t_state = TCPS_LAST_ACK; 1787 break; 1788 } 1789 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1790 soisdisconnected(tp->t_inpcb->inp_socket); 1791 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1792 if (tp->t_state == TCPS_FIN_WAIT_2) 1793 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, tcp_maxidle); 1794 } 1795 return (tp); 1796} 1797 1798void 1799tcp_in_cksum_stats(u_int32_t len) 1800{ 1801 tcps_in_sw_cksum++; 1802 tcps_in_sw_cksum_bytes += len; 1803} 1804 1805void 1806tcp_out_cksum_stats(u_int32_t len) 1807{ 1808 tcps_out_sw_cksum++; 1809 tcps_out_sw_cksum_bytes += len; 1810} 1811