tcp_usrreq.c revision 130480
1/* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 30 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 130480 2004-06-14 18:16:22Z rwatson $ 31 */ 32 33#include "opt_ipsec.h" 34#include "opt_inet.h" 35#include "opt_inet6.h" 36#include "opt_tcpdebug.h" 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/malloc.h> 41#include <sys/kernel.h> 42#include <sys/sysctl.h> 43#include <sys/mbuf.h> 44#ifdef INET6 45#include <sys/domain.h> 46#endif /* INET6 */ 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/protosw.h> 50#include <sys/proc.h> 51#include <sys/jail.h> 52 53#include <net/if.h> 54#include <net/route.h> 55 56#include <netinet/in.h> 57#include <netinet/in_systm.h> 58#ifdef INET6 59#include <netinet/ip6.h> 60#endif 61#include <netinet/in_pcb.h> 62#ifdef INET6 63#include <netinet6/in6_pcb.h> 64#endif 65#include <netinet/in_var.h> 66#include <netinet/ip_var.h> 67#ifdef INET6 68#include <netinet6/ip6_var.h> 69#endif 70#include <netinet/tcp.h> 71#include <netinet/tcp_fsm.h> 72#include <netinet/tcp_seq.h> 73#include <netinet/tcp_timer.h> 74#include <netinet/tcp_var.h> 75#include <netinet/tcpip.h> 76#ifdef TCPDEBUG 77#include <netinet/tcp_debug.h> 78#endif 79 80#ifdef IPSEC 81#include <netinet6/ipsec.h> 82#endif /*IPSEC*/ 83 84/* 85 * TCP protocol interface to socket abstraction. 86 */ 87extern char *tcpstates[]; /* XXX ??? */ 88 89static int tcp_attach(struct socket *); 90static int tcp_connect(struct tcpcb *, struct sockaddr *, 91 struct thread *td); 92#ifdef INET6 93static int tcp6_connect(struct tcpcb *, struct sockaddr *, 94 struct thread *td); 95#endif /* INET6 */ 96static struct tcpcb * 97 tcp_disconnect(struct tcpcb *); 98static struct tcpcb * 99 tcp_usrclosed(struct tcpcb *); 100 101#ifdef TCPDEBUG 102#define TCPDEBUG0 int ostate = 0 103#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 104#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 105 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 106#else 107#define TCPDEBUG0 108#define TCPDEBUG1() 109#define TCPDEBUG2(req) 110#endif 111 112/* 113 * TCP attaches to socket via pru_attach(), reserving space, 114 * and an internet control block. 115 */ 116static int 117tcp_usr_attach(struct socket *so, int proto, struct thread *td) 118{ 119 int s = splnet(); 120 int error; 121 struct inpcb *inp; 122 struct tcpcb *tp = 0; 123 TCPDEBUG0; 124 125 INP_INFO_WLOCK(&tcbinfo); 126 TCPDEBUG1(); 127 inp = sotoinpcb(so); 128 if (inp) { 129 error = EISCONN; 130 goto out; 131 } 132 133 error = tcp_attach(so); 134 if (error) 135 goto out; 136 137 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 138 so->so_linger = TCP_LINGERTIME; 139 140 inp = sotoinpcb(so); 141 tp = intotcpcb(inp); 142out: 143 TCPDEBUG2(PRU_ATTACH); 144 INP_INFO_WUNLOCK(&tcbinfo); 145 splx(s); 146 return error; 147} 148 149/* 150 * pru_detach() detaches the TCP protocol from the socket. 151 * If the protocol state is non-embryonic, then can't 152 * do this directly: have to initiate a pru_disconnect(), 153 * which may finish later; embryonic TCB's can just 154 * be discarded here. 155 */ 156static int 157tcp_usr_detach(struct socket *so) 158{ 159 int s = splnet(); 160 int error = 0; 161 struct inpcb *inp; 162 struct tcpcb *tp; 163 TCPDEBUG0; 164 165 INP_INFO_WLOCK(&tcbinfo); 166 inp = sotoinpcb(so); 167 if (inp == 0) { 168 INP_INFO_WUNLOCK(&tcbinfo); 169 splx(s); 170 return EINVAL; /* XXX */ 171 } 172 INP_LOCK(inp); 173 tp = intotcpcb(inp); 174 TCPDEBUG1(); 175 tp = tcp_disconnect(tp); 176 177 TCPDEBUG2(PRU_DETACH); 178 if (tp) 179 INP_UNLOCK(inp); 180 INP_INFO_WUNLOCK(&tcbinfo); 181 splx(s); 182 return error; 183} 184 185#define INI_NOLOCK 0 186#define INI_READ 1 187#define INI_WRITE 2 188 189#define COMMON_START() \ 190 TCPDEBUG0; \ 191 do { \ 192 if (inirw == INI_READ) \ 193 INP_INFO_RLOCK(&tcbinfo); \ 194 else if (inirw == INI_WRITE) \ 195 INP_INFO_WLOCK(&tcbinfo); \ 196 inp = sotoinpcb(so); \ 197 if (inp == 0) { \ 198 if (inirw == INI_READ) \ 199 INP_INFO_RUNLOCK(&tcbinfo); \ 200 else if (inirw == INI_WRITE) \ 201 INP_INFO_WUNLOCK(&tcbinfo); \ 202 splx(s); \ 203 return EINVAL; \ 204 } \ 205 INP_LOCK(inp); \ 206 if (inirw == INI_READ) \ 207 INP_INFO_RUNLOCK(&tcbinfo); \ 208 tp = intotcpcb(inp); \ 209 TCPDEBUG1(); \ 210} while(0) 211 212#define COMMON_END(req) \ 213out: TCPDEBUG2(req); \ 214 do { \ 215 if (tp) \ 216 INP_UNLOCK(inp); \ 217 if (inirw == INI_WRITE) \ 218 INP_INFO_WUNLOCK(&tcbinfo); \ 219 splx(s); \ 220 return error; \ 221 goto out; \ 222} while(0) 223 224/* 225 * Give the socket an address. 226 */ 227static int 228tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 229{ 230 int s = splnet(); 231 int error = 0; 232 struct inpcb *inp; 233 struct tcpcb *tp; 234 struct sockaddr_in *sinp; 235 const int inirw = INI_WRITE; 236 237 sinp = (struct sockaddr_in *)nam; 238 if (nam->sa_len != sizeof (*sinp)) 239 return (EINVAL); 240 /* 241 * Must check for multicast addresses and disallow binding 242 * to them. 243 */ 244 if (sinp->sin_family == AF_INET && 245 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 246 return (EAFNOSUPPORT); 247 248 COMMON_START(); 249 error = in_pcbbind(inp, nam, td->td_ucred); 250 if (error) 251 goto out; 252 COMMON_END(PRU_BIND); 253} 254 255#ifdef INET6 256static int 257tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 258{ 259 int s = splnet(); 260 int error = 0; 261 struct inpcb *inp; 262 struct tcpcb *tp; 263 struct sockaddr_in6 *sin6p; 264 const int inirw = INI_WRITE; 265 266 sin6p = (struct sockaddr_in6 *)nam; 267 if (nam->sa_len != sizeof (*sin6p)) 268 return (EINVAL); 269 /* 270 * Must check for multicast addresses and disallow binding 271 * to them. 272 */ 273 if (sin6p->sin6_family == AF_INET6 && 274 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 275 return (EAFNOSUPPORT); 276 277 COMMON_START(); 278 inp->inp_vflag &= ~INP_IPV4; 279 inp->inp_vflag |= INP_IPV6; 280 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 281 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 282 inp->inp_vflag |= INP_IPV4; 283 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 284 struct sockaddr_in sin; 285 286 in6_sin6_2_sin(&sin, sin6p); 287 inp->inp_vflag |= INP_IPV4; 288 inp->inp_vflag &= ~INP_IPV6; 289 error = in_pcbbind(inp, (struct sockaddr *)&sin, 290 td->td_ucred); 291 goto out; 292 } 293 } 294 error = in6_pcbbind(inp, nam, td->td_ucred); 295 if (error) 296 goto out; 297 COMMON_END(PRU_BIND); 298} 299#endif /* INET6 */ 300 301/* 302 * Prepare to accept connections. 303 */ 304static int 305tcp_usr_listen(struct socket *so, struct thread *td) 306{ 307 int s = splnet(); 308 int error = 0; 309 struct inpcb *inp; 310 struct tcpcb *tp; 311 const int inirw = INI_WRITE; 312 313 COMMON_START(); 314 if (inp->inp_lport == 0) 315 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 316 if (error == 0) 317 tp->t_state = TCPS_LISTEN; 318 COMMON_END(PRU_LISTEN); 319} 320 321#ifdef INET6 322static int 323tcp6_usr_listen(struct socket *so, struct thread *td) 324{ 325 int s = splnet(); 326 int error = 0; 327 struct inpcb *inp; 328 struct tcpcb *tp; 329 const int inirw = INI_WRITE; 330 331 COMMON_START(); 332 if (inp->inp_lport == 0) { 333 inp->inp_vflag &= ~INP_IPV4; 334 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 335 inp->inp_vflag |= INP_IPV4; 336 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 337 } 338 if (error == 0) 339 tp->t_state = TCPS_LISTEN; 340 COMMON_END(PRU_LISTEN); 341} 342#endif /* INET6 */ 343 344/* 345 * Initiate connection to peer. 346 * Create a template for use in transmissions on this connection. 347 * Enter SYN_SENT state, and mark socket as connecting. 348 * Start keep-alive timer, and seed output sequence space. 349 * Send initial segment on connection. 350 */ 351static int 352tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 353{ 354 int s = splnet(); 355 int error = 0; 356 struct inpcb *inp; 357 struct tcpcb *tp; 358 struct sockaddr_in *sinp; 359 const int inirw = INI_WRITE; 360 361 sinp = (struct sockaddr_in *)nam; 362 if (nam->sa_len != sizeof (*sinp)) 363 return (EINVAL); 364 /* 365 * Must disallow TCP ``connections'' to multicast addresses. 366 */ 367 if (sinp->sin_family == AF_INET 368 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 369 return (EAFNOSUPPORT); 370 if (td && jailed(td->td_ucred)) 371 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); 372 373 COMMON_START(); 374 if ((error = tcp_connect(tp, nam, td)) != 0) 375 goto out; 376 error = tcp_output(tp); 377 COMMON_END(PRU_CONNECT); 378} 379 380#ifdef INET6 381static int 382tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 383{ 384 int s = splnet(); 385 int error = 0; 386 struct inpcb *inp; 387 struct tcpcb *tp; 388 struct sockaddr_in6 *sin6p; 389 const int inirw = INI_WRITE; 390 391 sin6p = (struct sockaddr_in6 *)nam; 392 if (nam->sa_len != sizeof (*sin6p)) 393 return (EINVAL); 394 /* 395 * Must disallow TCP ``connections'' to multicast addresses. 396 */ 397 if (sin6p->sin6_family == AF_INET6 398 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 399 return (EAFNOSUPPORT); 400 401 COMMON_START(); 402 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 403 struct sockaddr_in sin; 404 405 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 406 error = EINVAL; 407 goto out; 408 } 409 410 in6_sin6_2_sin(&sin, sin6p); 411 inp->inp_vflag |= INP_IPV4; 412 inp->inp_vflag &= ~INP_IPV6; 413 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 414 goto out; 415 error = tcp_output(tp); 416 goto out; 417 } 418 inp->inp_vflag &= ~INP_IPV4; 419 inp->inp_vflag |= INP_IPV6; 420 inp->inp_inc.inc_isipv6 = 1; 421 if ((error = tcp6_connect(tp, nam, td)) != 0) 422 goto out; 423 error = tcp_output(tp); 424 COMMON_END(PRU_CONNECT); 425} 426#endif /* INET6 */ 427 428/* 429 * Initiate disconnect from peer. 430 * If connection never passed embryonic stage, just drop; 431 * else if don't need to let data drain, then can just drop anyways, 432 * else have to begin TCP shutdown process: mark socket disconnecting, 433 * drain unread data, state switch to reflect user close, and 434 * send segment (e.g. FIN) to peer. Socket will be really disconnected 435 * when peer sends FIN and acks ours. 436 * 437 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 438 */ 439static int 440tcp_usr_disconnect(struct socket *so) 441{ 442 int s = splnet(); 443 int error = 0; 444 struct inpcb *inp; 445 struct tcpcb *tp; 446 const int inirw = INI_WRITE; 447 448 COMMON_START(); 449 tp = tcp_disconnect(tp); 450 COMMON_END(PRU_DISCONNECT); 451} 452 453/* 454 * Accept a connection. Essentially all the work is 455 * done at higher levels; just return the address 456 * of the peer, storing through addr. 457 */ 458static int 459tcp_usr_accept(struct socket *so, struct sockaddr **nam) 460{ 461 int s; 462 int error = 0; 463 struct inpcb *inp = NULL; 464 struct tcpcb *tp = NULL; 465 struct in_addr addr; 466 in_port_t port = 0; 467 TCPDEBUG0; 468 469 if (so->so_state & SS_ISDISCONNECTED) { 470 error = ECONNABORTED; 471 goto out; 472 } 473 474 s = splnet(); 475 INP_INFO_RLOCK(&tcbinfo); 476 inp = sotoinpcb(so); 477 if (!inp) { 478 INP_INFO_RUNLOCK(&tcbinfo); 479 splx(s); 480 return (EINVAL); 481 } 482 INP_LOCK(inp); 483 INP_INFO_RUNLOCK(&tcbinfo); 484 tp = intotcpcb(inp); 485 TCPDEBUG1(); 486 487 /* 488 * We inline in_setpeeraddr and COMMON_END here, so that we can 489 * copy the data of interest and defer the malloc until after we 490 * release the lock. 491 */ 492 port = inp->inp_fport; 493 addr = inp->inp_faddr; 494 495out: TCPDEBUG2(PRU_ACCEPT); 496 if (tp) 497 INP_UNLOCK(inp); 498 splx(s); 499 if (error == 0) 500 *nam = in_sockaddr(port, &addr); 501 return error; 502} 503 504#ifdef INET6 505static int 506tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 507{ 508 int s; 509 struct inpcb *inp = NULL; 510 int error = 0; 511 struct tcpcb *tp = NULL; 512 struct in_addr addr; 513 struct in6_addr addr6; 514 in_port_t port = 0; 515 int v4 = 0; 516 TCPDEBUG0; 517 518 if (so->so_state & SS_ISDISCONNECTED) { 519 error = ECONNABORTED; 520 goto out; 521 } 522 523 s = splnet(); 524 INP_INFO_RLOCK(&tcbinfo); 525 inp = sotoinpcb(so); 526 if (inp == 0) { 527 INP_INFO_RUNLOCK(&tcbinfo); 528 splx(s); 529 return (EINVAL); 530 } 531 INP_LOCK(inp); 532 INP_INFO_RUNLOCK(&tcbinfo); 533 tp = intotcpcb(inp); 534 TCPDEBUG1(); 535 /* 536 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 537 * copy the data of interest and defer the malloc until after we 538 * release the lock. 539 */ 540 if (inp->inp_vflag & INP_IPV4) { 541 v4 = 1; 542 port = inp->inp_fport; 543 addr = inp->inp_faddr; 544 } else { 545 port = inp->inp_fport; 546 addr6 = inp->in6p_faddr; 547 } 548 549out: TCPDEBUG2(PRU_ACCEPT); 550 if (tp) 551 INP_UNLOCK(inp); 552 splx(s); 553 if (error == 0) { 554 if (v4) 555 *nam = in6_v4mapsin6_sockaddr(port, &addr); 556 else 557 *nam = in6_sockaddr(port, &addr6); 558 } 559 return error; 560} 561#endif /* INET6 */ 562 563/* 564 * This is the wrapper function for in_setsockaddr. We just pass down 565 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking 566 * here because in_setsockaddr will call malloc and can block. 567 */ 568static int 569tcp_sockaddr(struct socket *so, struct sockaddr **nam) 570{ 571 return (in_setsockaddr(so, nam, &tcbinfo)); 572} 573 574/* 575 * This is the wrapper function for in_setpeeraddr. We just pass down 576 * the pcbinfo for in_setpeeraddr to lock. 577 */ 578static int 579tcp_peeraddr(struct socket *so, struct sockaddr **nam) 580{ 581 return (in_setpeeraddr(so, nam, &tcbinfo)); 582} 583 584/* 585 * Mark the connection as being incapable of further output. 586 */ 587static int 588tcp_usr_shutdown(struct socket *so) 589{ 590 int s = splnet(); 591 int error = 0; 592 struct inpcb *inp; 593 struct tcpcb *tp; 594 const int inirw = INI_WRITE; 595 596 COMMON_START(); 597 socantsendmore(so); 598 tp = tcp_usrclosed(tp); 599 if (tp) 600 error = tcp_output(tp); 601 COMMON_END(PRU_SHUTDOWN); 602} 603 604/* 605 * After a receive, possibly send window update to peer. 606 */ 607static int 608tcp_usr_rcvd(struct socket *so, int flags) 609{ 610 int s = splnet(); 611 int error = 0; 612 struct inpcb *inp; 613 struct tcpcb *tp; 614 const int inirw = INI_READ; 615 616 COMMON_START(); 617 tcp_output(tp); 618 COMMON_END(PRU_RCVD); 619} 620 621/* 622 * Do a send by putting data in output queue and updating urgent 623 * marker if URG set. Possibly send more data. Unlike the other 624 * pru_*() routines, the mbuf chains are our responsibility. We 625 * must either enqueue them or free them. The other pru_* routines 626 * generally are caller-frees. 627 */ 628static int 629tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 630 struct sockaddr *nam, struct mbuf *control, struct thread *td) 631{ 632 int s = splnet(); 633 int error = 0; 634 struct inpcb *inp; 635 struct tcpcb *tp; 636 const int inirw = INI_WRITE; 637#ifdef INET6 638 int isipv6; 639#endif 640 TCPDEBUG0; 641 642 /* 643 * Need write lock here because this function might call 644 * tcp_connect or tcp_usrclosed. 645 * We really want to have to this function upgrade from read lock 646 * to write lock. XXX 647 */ 648 INP_INFO_WLOCK(&tcbinfo); 649 inp = sotoinpcb(so); 650 if (inp == NULL) { 651 /* 652 * OOPS! we lost a race, the TCP session got reset after 653 * we checked SBS_CANTSENDMORE, eg: while doing uiomove or a 654 * network interrupt in the non-splnet() section of sosend(). 655 */ 656 if (m) 657 m_freem(m); 658 if (control) 659 m_freem(control); 660 error = ECONNRESET; /* XXX EPIPE? */ 661 tp = NULL; 662 TCPDEBUG1(); 663 goto out; 664 } 665 INP_LOCK(inp); 666#ifdef INET6 667 isipv6 = nam && nam->sa_family == AF_INET6; 668#endif /* INET6 */ 669 tp = intotcpcb(inp); 670 TCPDEBUG1(); 671 if (control) { 672 /* TCP doesn't do control messages (rights, creds, etc) */ 673 if (control->m_len) { 674 m_freem(control); 675 if (m) 676 m_freem(m); 677 error = EINVAL; 678 goto out; 679 } 680 m_freem(control); /* empty control, just free it */ 681 } 682 if (!(flags & PRUS_OOB)) { 683 sbappendstream(&so->so_snd, m); 684 if (nam && tp->t_state < TCPS_SYN_SENT) { 685 /* 686 * Do implied connect if not yet connected, 687 * initialize window to default value, and 688 * initialize maxseg/maxopd using peer's cached 689 * MSS. 690 */ 691#ifdef INET6 692 if (isipv6) 693 error = tcp6_connect(tp, nam, td); 694 else 695#endif /* INET6 */ 696 error = tcp_connect(tp, nam, td); 697 if (error) 698 goto out; 699 tp->snd_wnd = TTCP_CLIENT_SND_WND; 700 tcp_mss(tp, -1); 701 } 702 703 if (flags & PRUS_EOF) { 704 /* 705 * Close the send side of the connection after 706 * the data is sent. 707 */ 708 socantsendmore(so); 709 tp = tcp_usrclosed(tp); 710 } 711 if (tp != NULL) { 712 if (flags & PRUS_MORETOCOME) 713 tp->t_flags |= TF_MORETOCOME; 714 error = tcp_output(tp); 715 if (flags & PRUS_MORETOCOME) 716 tp->t_flags &= ~TF_MORETOCOME; 717 } 718 } else { 719 if (sbspace(&so->so_snd) < -512) { 720 m_freem(m); 721 error = ENOBUFS; 722 goto out; 723 } 724 /* 725 * According to RFC961 (Assigned Protocols), 726 * the urgent pointer points to the last octet 727 * of urgent data. We continue, however, 728 * to consider it to indicate the first octet 729 * of data past the urgent section. 730 * Otherwise, snd_up should be one lower. 731 */ 732 sbappendstream(&so->so_snd, m); 733 if (nam && tp->t_state < TCPS_SYN_SENT) { 734 /* 735 * Do implied connect if not yet connected, 736 * initialize window to default value, and 737 * initialize maxseg/maxopd using peer's cached 738 * MSS. 739 */ 740#ifdef INET6 741 if (isipv6) 742 error = tcp6_connect(tp, nam, td); 743 else 744#endif /* INET6 */ 745 error = tcp_connect(tp, nam, td); 746 if (error) 747 goto out; 748 tp->snd_wnd = TTCP_CLIENT_SND_WND; 749 tcp_mss(tp, -1); 750 } 751 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 752 tp->t_force = 1; 753 error = tcp_output(tp); 754 tp->t_force = 0; 755 } 756 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 757 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 758} 759 760/* 761 * Abort the TCP. 762 */ 763static int 764tcp_usr_abort(struct socket *so) 765{ 766 int s = splnet(); 767 int error = 0; 768 struct inpcb *inp; 769 struct tcpcb *tp; 770 const int inirw = INI_WRITE; 771 772 COMMON_START(); 773 tp = tcp_drop(tp, ECONNABORTED); 774 COMMON_END(PRU_ABORT); 775} 776 777/* 778 * Receive out-of-band data. 779 */ 780static int 781tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 782{ 783 int s = splnet(); 784 int error = 0; 785 struct inpcb *inp; 786 struct tcpcb *tp; 787 const int inirw = INI_READ; 788 789 COMMON_START(); 790 if ((so->so_oobmark == 0 && 791 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 792 so->so_options & SO_OOBINLINE || 793 tp->t_oobflags & TCPOOB_HADDATA) { 794 error = EINVAL; 795 goto out; 796 } 797 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 798 error = EWOULDBLOCK; 799 goto out; 800 } 801 m->m_len = 1; 802 *mtod(m, caddr_t) = tp->t_iobc; 803 if ((flags & MSG_PEEK) == 0) 804 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 805 COMMON_END(PRU_RCVOOB); 806} 807 808/* xxx - should be const */ 809struct pr_usrreqs tcp_usrreqs = { 810 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, 811 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, 812 tcp_usr_disconnect, tcp_usr_listen, tcp_peeraddr, tcp_usr_rcvd, 813 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 814 tcp_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel 815}; 816 817#ifdef INET6 818struct pr_usrreqs tcp6_usrreqs = { 819 tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, 820 tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, 821 tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, 822 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 823 in6_mapped_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel 824}; 825#endif /* INET6 */ 826 827/* 828 * Common subroutine to open a TCP connection to remote host specified 829 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 830 * port number if needed. Call in_pcbconnect_setup to do the routing and 831 * to choose a local host address (interface). If there is an existing 832 * incarnation of the same connection in TIME-WAIT state and if the remote 833 * host was sending CC options and if the connection duration was < MSL, then 834 * truncate the previous TIME-WAIT state and proceed. 835 * Initialize connection parameters and enter SYN-SENT state. 836 */ 837static int 838tcp_connect(tp, nam, td) 839 register struct tcpcb *tp; 840 struct sockaddr *nam; 841 struct thread *td; 842{ 843 struct inpcb *inp = tp->t_inpcb, *oinp; 844 struct socket *so = inp->inp_socket; 845 struct tcptw *otw; 846 struct rmxp_tao tao; 847 struct in_addr laddr; 848 u_short lport; 849 int error; 850 851 bzero(&tao, sizeof(tao)); 852 853 if (inp->inp_lport == 0) { 854 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 855 if (error) 856 return error; 857 } 858 859 /* 860 * Cannot simply call in_pcbconnect, because there might be an 861 * earlier incarnation of this same connection still in 862 * TIME_WAIT state, creating an ADDRINUSE error. 863 */ 864 laddr = inp->inp_laddr; 865 lport = inp->inp_lport; 866 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 867 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 868 if (error && oinp == NULL) 869 return error; 870 if (oinp) { 871 if (oinp != inp && 872 (oinp->inp_vflag & INP_TIMEWAIT) && 873 (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl && 874 otw->cc_recv != 0) { 875 inp->inp_faddr = oinp->inp_faddr; 876 inp->inp_fport = oinp->inp_fport; 877 (void) tcp_twclose(otw, 0); 878 } else 879 return EADDRINUSE; 880 } 881 inp->inp_laddr = laddr; 882 in_pcbrehash(inp); 883 884 /* Compute window scaling to request. */ 885 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 886 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 887 tp->request_r_scale++; 888 889 soisconnecting(so); 890 tcpstat.tcps_connattempt++; 891 tp->t_state = TCPS_SYN_SENT; 892 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 893 tp->iss = tcp_new_isn(tp); 894 tp->t_bw_rtseq = tp->iss; 895 tcp_sendseqinit(tp); 896 897 /* 898 * Generate a CC value for this connection and 899 * check whether CC or CCnew should be used. 900 */ 901 if (tcp_do_rfc1644) 902 tcp_hc_gettao(&inp->inp_inc, &tao); 903 904 tp->cc_send = CC_INC(tcp_ccgen); 905 if (tao.tao_ccsent != 0 && 906 CC_GEQ(tp->cc_send, tao.tao_ccsent)) { 907 tao.tao_ccsent = tp->cc_send; 908 } else { 909 tao.tao_ccsent = 0; 910 tp->t_flags |= TF_SENDCCNEW; 911 } 912 913 if (tcp_do_rfc1644) 914 tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, 915 tao.tao_ccsent, 0); 916 917 return 0; 918} 919 920#ifdef INET6 921static int 922tcp6_connect(tp, nam, td) 923 register struct tcpcb *tp; 924 struct sockaddr *nam; 925 struct thread *td; 926{ 927 struct inpcb *inp = tp->t_inpcb, *oinp; 928 struct socket *so = inp->inp_socket; 929 struct tcptw *otw; 930 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 931 struct in6_addr *addr6; 932 struct rmxp_tao tao; 933 int error; 934 935 bzero(&tao, sizeof(tao)); 936 937 if (inp->inp_lport == 0) { 938 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 939 if (error) 940 return error; 941 } 942 943 /* 944 * Cannot simply call in_pcbconnect, because there might be an 945 * earlier incarnation of this same connection still in 946 * TIME_WAIT state, creating an ADDRINUSE error. 947 */ 948 error = in6_pcbladdr(inp, nam, &addr6); 949 if (error) 950 return error; 951 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 952 &sin6->sin6_addr, sin6->sin6_port, 953 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 954 ? addr6 955 : &inp->in6p_laddr, 956 inp->inp_lport, 0, NULL); 957 if (oinp) { 958 if (oinp != inp && 959 (oinp->inp_vflag & INP_TIMEWAIT) && 960 (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl && 961 otw->cc_recv != 0) { 962 inp->inp_faddr = oinp->inp_faddr; 963 inp->inp_fport = oinp->inp_fport; 964 (void) tcp_twclose(otw, 0); 965 } else 966 return EADDRINUSE; 967 } 968 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 969 inp->in6p_laddr = *addr6; 970 inp->in6p_faddr = sin6->sin6_addr; 971 inp->inp_fport = sin6->sin6_port; 972 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) 973 inp->in6p_flowinfo = sin6->sin6_flowinfo; 974 in_pcbrehash(inp); 975 976 /* Compute window scaling to request. */ 977 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 978 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 979 tp->request_r_scale++; 980 981 soisconnecting(so); 982 tcpstat.tcps_connattempt++; 983 tp->t_state = TCPS_SYN_SENT; 984 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 985 tp->iss = tcp_new_isn(tp); 986 tp->t_bw_rtseq = tp->iss; 987 tcp_sendseqinit(tp); 988 989 /* 990 * Generate a CC value for this connection and 991 * check whether CC or CCnew should be used. 992 */ 993 if (tcp_do_rfc1644) 994 tcp_hc_gettao(&inp->inp_inc, &tao); 995 996 tp->cc_send = CC_INC(tcp_ccgen); 997 if (tao.tao_ccsent != 0 && 998 CC_GEQ(tp->cc_send, tao.tao_ccsent)) { 999 tao.tao_ccsent = tp->cc_send; 1000 } else { 1001 tao.tao_ccsent = 0; 1002 tp->t_flags |= TF_SENDCCNEW; 1003 } 1004 if (tcp_do_rfc1644) 1005 tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, 1006 tao.tao_ccsent, 0); 1007 1008 return 0; 1009} 1010#endif /* INET6 */ 1011 1012/* 1013 * The new sockopt interface makes it possible for us to block in the 1014 * copyin/out step (if we take a page fault). Taking a page fault at 1015 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1016 * use TSM, there probably isn't any need for this function to run at 1017 * splnet() any more. This needs more examination.) 1018 */ 1019int 1020tcp_ctloutput(so, sopt) 1021 struct socket *so; 1022 struct sockopt *sopt; 1023{ 1024 int error, opt, optval, s; 1025 struct inpcb *inp; 1026 struct tcpcb *tp; 1027 1028 error = 0; 1029 s = splnet(); /* XXX */ 1030 INP_INFO_RLOCK(&tcbinfo); 1031 inp = sotoinpcb(so); 1032 if (inp == NULL) { 1033 INP_INFO_RUNLOCK(&tcbinfo); 1034 splx(s); 1035 return (ECONNRESET); 1036 } 1037 INP_LOCK(inp); 1038 INP_INFO_RUNLOCK(&tcbinfo); 1039 if (sopt->sopt_level != IPPROTO_TCP) { 1040#ifdef INET6 1041 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1042 error = ip6_ctloutput(so, sopt); 1043 else 1044#endif /* INET6 */ 1045 error = ip_ctloutput(so, sopt); 1046 INP_UNLOCK(inp); 1047 splx(s); 1048 return (error); 1049 } 1050 tp = intotcpcb(inp); 1051 1052 switch (sopt->sopt_dir) { 1053 case SOPT_SET: 1054 switch (sopt->sopt_name) { 1055#ifdef TCP_SIGNATURE 1056 case TCP_MD5SIG: 1057 error = sooptcopyin(sopt, &optval, sizeof optval, 1058 sizeof optval); 1059 if (error) 1060 break; 1061 1062 if (optval > 0) 1063 tp->t_flags |= TF_SIGNATURE; 1064 else 1065 tp->t_flags &= ~TF_SIGNATURE; 1066 break; 1067#endif /* TCP_SIGNATURE */ 1068 case TCP_NODELAY: 1069 case TCP_NOOPT: 1070 error = sooptcopyin(sopt, &optval, sizeof optval, 1071 sizeof optval); 1072 if (error) 1073 break; 1074 1075 switch (sopt->sopt_name) { 1076 case TCP_NODELAY: 1077 opt = TF_NODELAY; 1078 break; 1079 case TCP_NOOPT: 1080 opt = TF_NOOPT; 1081 break; 1082 default: 1083 opt = 0; /* dead code to fool gcc */ 1084 break; 1085 } 1086 1087 if (optval) 1088 tp->t_flags |= opt; 1089 else 1090 tp->t_flags &= ~opt; 1091 break; 1092 1093 case TCP_NOPUSH: 1094 error = sooptcopyin(sopt, &optval, sizeof optval, 1095 sizeof optval); 1096 if (error) 1097 break; 1098 1099 if (optval) 1100 tp->t_flags |= TF_NOPUSH; 1101 else { 1102 tp->t_flags &= ~TF_NOPUSH; 1103 error = tcp_output(tp); 1104 } 1105 break; 1106 1107 case TCP_MAXSEG: 1108 error = sooptcopyin(sopt, &optval, sizeof optval, 1109 sizeof optval); 1110 if (error) 1111 break; 1112 1113 if (optval > 0 && optval <= tp->t_maxseg && 1114 optval + 40 >= tcp_minmss) 1115 tp->t_maxseg = optval; 1116 else 1117 error = EINVAL; 1118 break; 1119 1120 default: 1121 error = ENOPROTOOPT; 1122 break; 1123 } 1124 break; 1125 1126 case SOPT_GET: 1127 switch (sopt->sopt_name) { 1128#ifdef TCP_SIGNATURE 1129 case TCP_MD5SIG: 1130 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1131 break; 1132#endif 1133 case TCP_NODELAY: 1134 optval = tp->t_flags & TF_NODELAY; 1135 break; 1136 case TCP_MAXSEG: 1137 optval = tp->t_maxseg; 1138 break; 1139 case TCP_NOOPT: 1140 optval = tp->t_flags & TF_NOOPT; 1141 break; 1142 case TCP_NOPUSH: 1143 optval = tp->t_flags & TF_NOPUSH; 1144 break; 1145 default: 1146 error = ENOPROTOOPT; 1147 break; 1148 } 1149 if (error == 0) 1150 error = sooptcopyout(sopt, &optval, sizeof optval); 1151 break; 1152 } 1153 INP_UNLOCK(inp); 1154 splx(s); 1155 return (error); 1156} 1157 1158/* 1159 * tcp_sendspace and tcp_recvspace are the default send and receive window 1160 * sizes, respectively. These are obsolescent (this information should 1161 * be set by the route). 1162 */ 1163u_long tcp_sendspace = 1024*32; 1164SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1165 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1166u_long tcp_recvspace = 1024*64; 1167SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1168 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1169 1170/* 1171 * Attach TCP protocol to socket, allocating 1172 * internet protocol control block, tcp control block, 1173 * bufer space, and entering LISTEN state if to accept connections. 1174 */ 1175static int 1176tcp_attach(so) 1177 struct socket *so; 1178{ 1179 register struct tcpcb *tp; 1180 struct inpcb *inp; 1181 int error; 1182#ifdef INET6 1183 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1184#endif 1185 1186 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1187 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1188 if (error) 1189 return (error); 1190 } 1191 error = in_pcballoc(so, &tcbinfo, "tcpinp"); 1192 if (error) 1193 return (error); 1194 inp = sotoinpcb(so); 1195#ifdef INET6 1196 if (isipv6) { 1197 inp->inp_vflag |= INP_IPV6; 1198 inp->in6p_hops = -1; /* use kernel default */ 1199 } 1200 else 1201#endif 1202 inp->inp_vflag |= INP_IPV4; 1203 tp = tcp_newtcpcb(inp); 1204 if (tp == 0) { 1205 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 1206 1207 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 1208#ifdef INET6 1209 if (isipv6) 1210 in6_pcbdetach(inp); 1211 else 1212#endif 1213 in_pcbdetach(inp); 1214 so->so_state |= nofd; 1215 return (ENOBUFS); 1216 } 1217 tp->t_state = TCPS_CLOSED; 1218 return (0); 1219} 1220 1221/* 1222 * Initiate (or continue) disconnect. 1223 * If embryonic state, just send reset (once). 1224 * If in ``let data drain'' option and linger null, just drop. 1225 * Otherwise (hard), mark socket disconnecting and drop 1226 * current input data; switch states based on user close, and 1227 * send segment to peer (with FIN). 1228 */ 1229static struct tcpcb * 1230tcp_disconnect(tp) 1231 register struct tcpcb *tp; 1232{ 1233 struct socket *so = tp->t_inpcb->inp_socket; 1234 1235 if (tp->t_state < TCPS_ESTABLISHED) 1236 tp = tcp_close(tp); 1237 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1238 tp = tcp_drop(tp, 0); 1239 else { 1240 soisdisconnecting(so); 1241 sbflush(&so->so_rcv); 1242 tp = tcp_usrclosed(tp); 1243 if (tp) 1244 (void) tcp_output(tp); 1245 } 1246 return (tp); 1247} 1248 1249/* 1250 * User issued close, and wish to trail through shutdown states: 1251 * if never received SYN, just forget it. If got a SYN from peer, 1252 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1253 * If already got a FIN from peer, then almost done; go to LAST_ACK 1254 * state. In all other cases, have already sent FIN to peer (e.g. 1255 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1256 * for peer to send FIN or not respond to keep-alives, etc. 1257 * We can let the user exit from the close as soon as the FIN is acked. 1258 */ 1259static struct tcpcb * 1260tcp_usrclosed(tp) 1261 register struct tcpcb *tp; 1262{ 1263 1264 switch (tp->t_state) { 1265 1266 case TCPS_CLOSED: 1267 case TCPS_LISTEN: 1268 tp->t_state = TCPS_CLOSED; 1269 tp = tcp_close(tp); 1270 break; 1271 1272 case TCPS_SYN_SENT: 1273 case TCPS_SYN_RECEIVED: 1274 tp->t_flags |= TF_NEEDFIN; 1275 break; 1276 1277 case TCPS_ESTABLISHED: 1278 tp->t_state = TCPS_FIN_WAIT_1; 1279 break; 1280 1281 case TCPS_CLOSE_WAIT: 1282 tp->t_state = TCPS_LAST_ACK; 1283 break; 1284 } 1285 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1286 soisdisconnected(tp->t_inpcb->inp_socket); 1287 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1288 if (tp->t_state == TCPS_FIN_WAIT_2) 1289 callout_reset(tp->tt_2msl, tcp_maxidle, 1290 tcp_timer_2msl, tp); 1291 } 1292 return (tp); 1293} 1294 1295