tcp_usrreq.c revision 78101
1/* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 34 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 78064 2001-06-11 12:39:29Z ume $ 35 */ 36 37#include "opt_ipsec.h" 38#include "opt_inet6.h" 39#include "opt_tcpdebug.h" 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/kernel.h> 44#include <sys/sysctl.h> 45#include <sys/mbuf.h> 46#ifdef INET6 47#include <sys/domain.h> 48#endif /* INET6 */ 49#include <sys/socket.h> 50#include <sys/socketvar.h> 51#include <sys/protosw.h> 52#include <sys/proc.h> 53#include <sys/jail.h> 54 55#include <net/if.h> 56#include <net/route.h> 57 58#include <netinet/in.h> 59#include <netinet/in_systm.h> 60#ifdef INET6 61#include <netinet/ip6.h> 62#endif 63#include <netinet/in_pcb.h> 64#ifdef INET6 65#include <netinet6/in6_pcb.h> 66#endif 67#include <netinet/in_var.h> 68#include <netinet/ip_var.h> 69#ifdef INET6 70#include <netinet6/ip6_var.h> 71#endif 72#include <netinet/tcp.h> 73#include <netinet/tcp_fsm.h> 74#include <netinet/tcp_seq.h> 75#include <netinet/tcp_timer.h> 76#include <netinet/tcp_var.h> 77#include <netinet/tcpip.h> 78#ifdef TCPDEBUG 79#include <netinet/tcp_debug.h> 80#endif 81 82#ifdef IPSEC 83#include <netinet6/ipsec.h> 84#endif /*IPSEC*/ 85 86/* 87 * TCP protocol interface to socket abstraction. 88 */ 89extern char *tcpstates[]; /* XXX ??? */ 90 91static int tcp_attach __P((struct socket *, struct proc *)); 92static int tcp_connect __P((struct tcpcb *, struct sockaddr *, 93 struct proc *)); 94#ifdef INET6 95static int tcp6_connect __P((struct tcpcb *, struct sockaddr *, 96 struct proc *)); 97#endif /* INET6 */ 98static struct tcpcb * 99 tcp_disconnect __P((struct tcpcb *)); 100static struct tcpcb * 101 tcp_usrclosed __P((struct tcpcb *)); 102 103#ifdef TCPDEBUG 104#define TCPDEBUG0 int ostate = 0 105#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 106#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 107 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 108#else 109#define TCPDEBUG0 110#define TCPDEBUG1() 111#define TCPDEBUG2(req) 112#endif 113 114/* 115 * TCP attaches to socket via pru_attach(), reserving space, 116 * and an internet control block. 117 */ 118static int 119tcp_usr_attach(struct socket *so, int proto, struct proc *p) 120{ 121 int s = splnet(); 122 int error; 123 struct inpcb *inp = sotoinpcb(so); 124 struct tcpcb *tp = 0; 125 TCPDEBUG0; 126 127 TCPDEBUG1(); 128 if (inp) { 129 error = EISCONN; 130 goto out; 131 } 132 133 error = tcp_attach(so, p); 134 if (error) 135 goto out; 136 137 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 138 so->so_linger = TCP_LINGERTIME; 139 tp = sototcpcb(so); 140out: 141 TCPDEBUG2(PRU_ATTACH); 142 splx(s); 143 return error; 144} 145 146/* 147 * pru_detach() detaches the TCP protocol from the socket. 148 * If the protocol state is non-embryonic, then can't 149 * do this directly: have to initiate a pru_disconnect(), 150 * which may finish later; embryonic TCB's can just 151 * be discarded here. 152 */ 153static int 154tcp_usr_detach(struct socket *so) 155{ 156 int s = splnet(); 157 int error = 0; 158 struct inpcb *inp = sotoinpcb(so); 159 struct tcpcb *tp; 160 TCPDEBUG0; 161 162 if (inp == 0) { 163 splx(s); 164 return EINVAL; /* XXX */ 165 } 166 tp = intotcpcb(inp); 167 TCPDEBUG1(); 168 tp = tcp_disconnect(tp); 169 170 TCPDEBUG2(PRU_DETACH); 171 splx(s); 172 return error; 173} 174 175#define COMMON_START() TCPDEBUG0; \ 176 do { \ 177 if (inp == 0) { \ 178 splx(s); \ 179 return EINVAL; \ 180 } \ 181 tp = intotcpcb(inp); \ 182 TCPDEBUG1(); \ 183 } while(0) 184 185#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out 186 187 188/* 189 * Give the socket an address. 190 */ 191static int 192tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 193{ 194 int s = splnet(); 195 int error = 0; 196 struct inpcb *inp = sotoinpcb(so); 197 struct tcpcb *tp; 198 struct sockaddr_in *sinp; 199 200 COMMON_START(); 201 202 /* 203 * Must check for multicast addresses and disallow binding 204 * to them. 205 */ 206 sinp = (struct sockaddr_in *)nam; 207 if (sinp->sin_family == AF_INET && 208 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 209 error = EAFNOSUPPORT; 210 goto out; 211 } 212 error = in_pcbbind(inp, nam, p); 213 if (error) 214 goto out; 215 COMMON_END(PRU_BIND); 216 217} 218 219#ifdef INET6 220static int 221tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 222{ 223 int s = splnet(); 224 int error = 0; 225 struct inpcb *inp = sotoinpcb(so); 226 struct tcpcb *tp; 227 struct sockaddr_in6 *sin6p; 228 229 COMMON_START(); 230 231 /* 232 * Must check for multicast addresses and disallow binding 233 * to them. 234 */ 235 sin6p = (struct sockaddr_in6 *)nam; 236 if (sin6p->sin6_family == AF_INET6 && 237 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 238 error = EAFNOSUPPORT; 239 goto out; 240 } 241 inp->inp_vflag &= ~INP_IPV4; 242 inp->inp_vflag |= INP_IPV6; 243 if (ip6_mapped_addr_on && (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 244 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 245 inp->inp_vflag |= INP_IPV4; 246 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 247 struct sockaddr_in sin; 248 249 in6_sin6_2_sin(&sin, sin6p); 250 inp->inp_vflag |= INP_IPV4; 251 inp->inp_vflag &= ~INP_IPV6; 252 error = in_pcbbind(inp, (struct sockaddr *)&sin, p); 253 goto out; 254 } 255 } 256 error = in6_pcbbind(inp, nam, p); 257 if (error) 258 goto out; 259 COMMON_END(PRU_BIND); 260} 261#endif /* INET6 */ 262 263/* 264 * Prepare to accept connections. 265 */ 266static int 267tcp_usr_listen(struct socket *so, struct proc *p) 268{ 269 int s = splnet(); 270 int error = 0; 271 struct inpcb *inp = sotoinpcb(so); 272 struct tcpcb *tp; 273 274 COMMON_START(); 275 if (inp->inp_lport == 0) 276 error = in_pcbbind(inp, (struct sockaddr *)0, p); 277 if (error == 0) 278 tp->t_state = TCPS_LISTEN; 279 COMMON_END(PRU_LISTEN); 280} 281 282#ifdef INET6 283static int 284tcp6_usr_listen(struct socket *so, struct proc *p) 285{ 286 int s = splnet(); 287 int error = 0; 288 struct inpcb *inp = sotoinpcb(so); 289 struct tcpcb *tp; 290 291 COMMON_START(); 292 if (inp->inp_lport == 0) { 293 inp->inp_vflag &= ~INP_IPV4; 294 if (ip6_mapped_addr_on && 295 (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 296 inp->inp_vflag |= INP_IPV4; 297 error = in6_pcbbind(inp, (struct sockaddr *)0, p); 298 } 299 if (error == 0) 300 tp->t_state = TCPS_LISTEN; 301 COMMON_END(PRU_LISTEN); 302} 303#endif /* INET6 */ 304 305/* 306 * Initiate connection to peer. 307 * Create a template for use in transmissions on this connection. 308 * Enter SYN_SENT state, and mark socket as connecting. 309 * Start keep-alive timer, and seed output sequence space. 310 * Send initial segment on connection. 311 */ 312static int 313tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 314{ 315 int s = splnet(); 316 int error = 0; 317 struct inpcb *inp = sotoinpcb(so); 318 struct tcpcb *tp; 319 struct sockaddr_in *sinp; 320 321 COMMON_START(); 322 323 /* 324 * Must disallow TCP ``connections'' to multicast addresses. 325 */ 326 sinp = (struct sockaddr_in *)nam; 327 if (sinp->sin_family == AF_INET 328 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 329 error = EAFNOSUPPORT; 330 goto out; 331 } 332 333 if (p && jailed(p->p_ucred)) 334 prison_remote_ip(p->p_ucred, 0, &sinp->sin_addr.s_addr); 335 336 if ((error = tcp_connect(tp, nam, p)) != 0) 337 goto out; 338 error = tcp_output(tp); 339 COMMON_END(PRU_CONNECT); 340} 341 342#ifdef INET6 343static int 344tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 345{ 346 int s = splnet(); 347 int error = 0; 348 struct inpcb *inp = sotoinpcb(so); 349 struct tcpcb *tp; 350 struct sockaddr_in6 *sin6p; 351 352 COMMON_START(); 353 354 /* 355 * Must disallow TCP ``connections'' to multicast addresses. 356 */ 357 sin6p = (struct sockaddr_in6 *)nam; 358 if (sin6p->sin6_family == AF_INET6 359 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 360 error = EAFNOSUPPORT; 361 goto out; 362 } 363 364 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 365 struct sockaddr_in sin; 366 367 if (!ip6_mapped_addr_on || 368 (inp->inp_flags & IN6P_IPV6_V6ONLY)) 369 return(EINVAL); 370 371 in6_sin6_2_sin(&sin, sin6p); 372 inp->inp_vflag |= INP_IPV4; 373 inp->inp_vflag &= ~INP_IPV6; 374 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) 375 goto out; 376 error = tcp_output(tp); 377 goto out; 378 } 379 inp->inp_vflag &= ~INP_IPV4; 380 inp->inp_vflag |= INP_IPV6; 381 if ((error = tcp6_connect(tp, nam, p)) != 0) 382 goto out; 383 error = tcp_output(tp); 384 COMMON_END(PRU_CONNECT); 385} 386#endif /* INET6 */ 387 388/* 389 * Initiate disconnect from peer. 390 * If connection never passed embryonic stage, just drop; 391 * else if don't need to let data drain, then can just drop anyways, 392 * else have to begin TCP shutdown process: mark socket disconnecting, 393 * drain unread data, state switch to reflect user close, and 394 * send segment (e.g. FIN) to peer. Socket will be really disconnected 395 * when peer sends FIN and acks ours. 396 * 397 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 398 */ 399static int 400tcp_usr_disconnect(struct socket *so) 401{ 402 int s = splnet(); 403 int error = 0; 404 struct inpcb *inp = sotoinpcb(so); 405 struct tcpcb *tp; 406 407 COMMON_START(); 408 tp = tcp_disconnect(tp); 409 COMMON_END(PRU_DISCONNECT); 410} 411 412/* 413 * Accept a connection. Essentially all the work is 414 * done at higher levels; just return the address 415 * of the peer, storing through addr. 416 */ 417static int 418tcp_usr_accept(struct socket *so, struct sockaddr **nam) 419{ 420 int s = splnet(); 421 int error = 0; 422 struct inpcb *inp = sotoinpcb(so); 423 struct tcpcb *tp = NULL; 424 TCPDEBUG0; 425 426 if (so->so_state & SS_ISDISCONNECTED) { 427 error = ECONNABORTED; 428 goto out; 429 } 430 if (inp == 0) { 431 splx(s); 432 return (EINVAL); 433 } 434 tp = intotcpcb(inp); 435 TCPDEBUG1(); 436 in_setpeeraddr(so, nam); 437 COMMON_END(PRU_ACCEPT); 438} 439 440#ifdef INET6 441static int 442tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 443{ 444 int s = splnet(); 445 int error = 0; 446 struct inpcb *inp = sotoinpcb(so); 447 struct tcpcb *tp = NULL; 448 TCPDEBUG0; 449 450 if (so->so_state & SS_ISDISCONNECTED) { 451 error = ECONNABORTED; 452 goto out; 453 } 454 if (inp == 0) { 455 splx(s); 456 return (EINVAL); 457 } 458 tp = intotcpcb(inp); 459 TCPDEBUG1(); 460 in6_mapped_peeraddr(so, nam); 461 COMMON_END(PRU_ACCEPT); 462} 463#endif /* INET6 */ 464/* 465 * Mark the connection as being incapable of further output. 466 */ 467static int 468tcp_usr_shutdown(struct socket *so) 469{ 470 int s = splnet(); 471 int error = 0; 472 struct inpcb *inp = sotoinpcb(so); 473 struct tcpcb *tp; 474 475 COMMON_START(); 476 socantsendmore(so); 477 tp = tcp_usrclosed(tp); 478 if (tp) 479 error = tcp_output(tp); 480 COMMON_END(PRU_SHUTDOWN); 481} 482 483/* 484 * After a receive, possibly send window update to peer. 485 */ 486static int 487tcp_usr_rcvd(struct socket *so, int flags) 488{ 489 int s = splnet(); 490 int error = 0; 491 struct inpcb *inp = sotoinpcb(so); 492 struct tcpcb *tp; 493 494 COMMON_START(); 495 tcp_output(tp); 496 COMMON_END(PRU_RCVD); 497} 498 499/* 500 * Do a send by putting data in output queue and updating urgent 501 * marker if URG set. Possibly send more data. Unlike the other 502 * pru_*() routines, the mbuf chains are our responsibility. We 503 * must either enqueue them or free them. The other pru_* routines 504 * generally are caller-frees. 505 */ 506static int 507tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 508 struct sockaddr *nam, struct mbuf *control, struct proc *p) 509{ 510 int s = splnet(); 511 int error = 0; 512 struct inpcb *inp = sotoinpcb(so); 513 struct tcpcb *tp; 514#ifdef INET6 515 int isipv6; 516#endif 517 TCPDEBUG0; 518 519 if (inp == NULL) { 520 /* 521 * OOPS! we lost a race, the TCP session got reset after 522 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 523 * network interrupt in the non-splnet() section of sosend(). 524 */ 525 if (m) 526 m_freem(m); 527 if (control) 528 m_freem(control); 529 error = ECONNRESET; /* XXX EPIPE? */ 530 tp = NULL; 531 TCPDEBUG1(); 532 goto out; 533 } 534#ifdef INET6 535 isipv6 = nam && nam->sa_family == AF_INET6; 536#endif /* INET6 */ 537 tp = intotcpcb(inp); 538 TCPDEBUG1(); 539 if (control) { 540 /* TCP doesn't do control messages (rights, creds, etc) */ 541 if (control->m_len) { 542 m_freem(control); 543 if (m) 544 m_freem(m); 545 error = EINVAL; 546 goto out; 547 } 548 m_freem(control); /* empty control, just free it */ 549 } 550 if(!(flags & PRUS_OOB)) { 551 sbappend(&so->so_snd, m); 552 if (nam && tp->t_state < TCPS_SYN_SENT) { 553 /* 554 * Do implied connect if not yet connected, 555 * initialize window to default value, and 556 * initialize maxseg/maxopd using peer's cached 557 * MSS. 558 */ 559#ifdef INET6 560 if (isipv6) 561 error = tcp6_connect(tp, nam, p); 562 else 563#endif /* INET6 */ 564 error = tcp_connect(tp, nam, p); 565 if (error) 566 goto out; 567 tp->snd_wnd = TTCP_CLIENT_SND_WND; 568 tcp_mss(tp, -1); 569 } 570 571 if (flags & PRUS_EOF) { 572 /* 573 * Close the send side of the connection after 574 * the data is sent. 575 */ 576 socantsendmore(so); 577 tp = tcp_usrclosed(tp); 578 } 579 if (tp != NULL) { 580 if (flags & PRUS_MORETOCOME) 581 tp->t_flags |= TF_MORETOCOME; 582 error = tcp_output(tp); 583 if (flags & PRUS_MORETOCOME) 584 tp->t_flags &= ~TF_MORETOCOME; 585 } 586 } else { 587 if (sbspace(&so->so_snd) < -512) { 588 m_freem(m); 589 error = ENOBUFS; 590 goto out; 591 } 592 /* 593 * According to RFC961 (Assigned Protocols), 594 * the urgent pointer points to the last octet 595 * of urgent data. We continue, however, 596 * to consider it to indicate the first octet 597 * of data past the urgent section. 598 * Otherwise, snd_up should be one lower. 599 */ 600 sbappend(&so->so_snd, m); 601 if (nam && tp->t_state < TCPS_SYN_SENT) { 602 /* 603 * Do implied connect if not yet connected, 604 * initialize window to default value, and 605 * initialize maxseg/maxopd using peer's cached 606 * MSS. 607 */ 608#ifdef INET6 609 if (isipv6) 610 error = tcp6_connect(tp, nam, p); 611 else 612#endif /* INET6 */ 613 error = tcp_connect(tp, nam, p); 614 if (error) 615 goto out; 616 tp->snd_wnd = TTCP_CLIENT_SND_WND; 617 tcp_mss(tp, -1); 618 } 619 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 620 tp->t_force = 1; 621 error = tcp_output(tp); 622 tp->t_force = 0; 623 } 624 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 625 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 626} 627 628/* 629 * Abort the TCP. 630 */ 631static int 632tcp_usr_abort(struct socket *so) 633{ 634 int s = splnet(); 635 int error = 0; 636 struct inpcb *inp = sotoinpcb(so); 637 struct tcpcb *tp; 638 639 COMMON_START(); 640 tp = tcp_drop(tp, ECONNABORTED); 641 COMMON_END(PRU_ABORT); 642} 643 644/* 645 * Receive out-of-band data. 646 */ 647static int 648tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 649{ 650 int s = splnet(); 651 int error = 0; 652 struct inpcb *inp = sotoinpcb(so); 653 struct tcpcb *tp; 654 655 COMMON_START(); 656 if ((so->so_oobmark == 0 && 657 (so->so_state & SS_RCVATMARK) == 0) || 658 so->so_options & SO_OOBINLINE || 659 tp->t_oobflags & TCPOOB_HADDATA) { 660 error = EINVAL; 661 goto out; 662 } 663 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 664 error = EWOULDBLOCK; 665 goto out; 666 } 667 m->m_len = 1; 668 *mtod(m, caddr_t) = tp->t_iobc; 669 if ((flags & MSG_PEEK) == 0) 670 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 671 COMMON_END(PRU_RCVOOB); 672} 673 674/* xxx - should be const */ 675struct pr_usrreqs tcp_usrreqs = { 676 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, 677 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, 678 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd, 679 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 680 in_setsockaddr, sosend, soreceive, sopoll 681}; 682 683#ifdef INET6 684struct pr_usrreqs tcp6_usrreqs = { 685 tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, 686 tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, 687 tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, 688 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 689 in6_mapped_sockaddr, sosend, soreceive, sopoll 690}; 691#endif /* INET6 */ 692 693/* 694 * Common subroutine to open a TCP connection to remote host specified 695 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 696 * port number if needed. Call in_pcbladdr to do the routing and to choose 697 * a local host address (interface). If there is an existing incarnation 698 * of the same connection in TIME-WAIT state and if the remote host was 699 * sending CC options and if the connection duration was < MSL, then 700 * truncate the previous TIME-WAIT state and proceed. 701 * Initialize connection parameters and enter SYN-SENT state. 702 */ 703static int 704tcp_connect(tp, nam, p) 705 register struct tcpcb *tp; 706 struct sockaddr *nam; 707 struct proc *p; 708{ 709 struct inpcb *inp = tp->t_inpcb, *oinp; 710 struct socket *so = inp->inp_socket; 711 struct tcpcb *otp; 712 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 713 struct sockaddr_in *ifaddr; 714 struct rmxp_tao *taop; 715 struct rmxp_tao tao_noncached; 716 int error; 717 718 if (inp->inp_lport == 0) { 719 error = in_pcbbind(inp, (struct sockaddr *)0, p); 720 if (error) 721 return error; 722 } 723 724 /* 725 * Cannot simply call in_pcbconnect, because there might be an 726 * earlier incarnation of this same connection still in 727 * TIME_WAIT state, creating an ADDRINUSE error. 728 */ 729 error = in_pcbladdr(inp, nam, &ifaddr); 730 if (error) 731 return error; 732 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 733 sin->sin_addr, sin->sin_port, 734 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr 735 : ifaddr->sin_addr, 736 inp->inp_lport, 0, NULL); 737 if (oinp) { 738 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 739 otp->t_state == TCPS_TIME_WAIT && 740 (ticks - otp->t_starttime) < tcp_msl && 741 (otp->t_flags & TF_RCVD_CC)) 742 otp = tcp_close(otp); 743 else 744 return EADDRINUSE; 745 } 746 if (inp->inp_laddr.s_addr == INADDR_ANY) 747 inp->inp_laddr = ifaddr->sin_addr; 748 inp->inp_faddr = sin->sin_addr; 749 inp->inp_fport = sin->sin_port; 750 in_pcbrehash(inp); 751 752 tp->t_template = tcp_template(tp); 753 if (tp->t_template == 0) { 754 in_pcbdisconnect(inp); 755 return ENOBUFS; 756 } 757 758 /* Compute window scaling to request. */ 759 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 760 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 761 tp->request_r_scale++; 762 763 soisconnecting(so); 764 tcpstat.tcps_connattempt++; 765 tp->t_state = TCPS_SYN_SENT; 766 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 767 tp->iss = tcp_rndiss_next(); 768 tcp_sendseqinit(tp); 769 770 /* 771 * Generate a CC value for this connection and 772 * check whether CC or CCnew should be used. 773 */ 774 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 775 taop = &tao_noncached; 776 bzero(taop, sizeof(*taop)); 777 } 778 779 tp->cc_send = CC_INC(tcp_ccgen); 780 if (taop->tao_ccsent != 0 && 781 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 782 taop->tao_ccsent = tp->cc_send; 783 } else { 784 taop->tao_ccsent = 0; 785 tp->t_flags |= TF_SENDCCNEW; 786 } 787 788 return 0; 789} 790 791#ifdef INET6 792static int 793tcp6_connect(tp, nam, p) 794 register struct tcpcb *tp; 795 struct sockaddr *nam; 796 struct proc *p; 797{ 798 struct inpcb *inp = tp->t_inpcb, *oinp; 799 struct socket *so = inp->inp_socket; 800 struct tcpcb *otp; 801 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 802 struct in6_addr *addr6; 803 struct rmxp_tao *taop; 804 struct rmxp_tao tao_noncached; 805 int error; 806 807 if (inp->inp_lport == 0) { 808 error = in6_pcbbind(inp, (struct sockaddr *)0, p); 809 if (error) 810 return error; 811 } 812 813 /* 814 * Cannot simply call in_pcbconnect, because there might be an 815 * earlier incarnation of this same connection still in 816 * TIME_WAIT state, creating an ADDRINUSE error. 817 */ 818 error = in6_pcbladdr(inp, nam, &addr6); 819 if (error) 820 return error; 821 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 822 &sin6->sin6_addr, sin6->sin6_port, 823 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 824 ? addr6 825 : &inp->in6p_laddr, 826 inp->inp_lport, 0, NULL); 827 if (oinp) { 828 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 829 otp->t_state == TCPS_TIME_WAIT && 830 (ticks - otp->t_starttime) < tcp_msl && 831 (otp->t_flags & TF_RCVD_CC)) 832 otp = tcp_close(otp); 833 else 834 return EADDRINUSE; 835 } 836 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 837 inp->in6p_laddr = *addr6; 838 inp->in6p_faddr = sin6->sin6_addr; 839 inp->inp_fport = sin6->sin6_port; 840 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL) 841 inp->in6p_flowinfo = sin6->sin6_flowinfo; 842 in_pcbrehash(inp); 843 844 tp->t_template = tcp_template(tp); 845 if (tp->t_template == 0) { 846 in6_pcbdisconnect(inp); 847 return ENOBUFS; 848 } 849 850 /* Compute window scaling to request. */ 851 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 852 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 853 tp->request_r_scale++; 854 855 soisconnecting(so); 856 tcpstat.tcps_connattempt++; 857 tp->t_state = TCPS_SYN_SENT; 858 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 859 tp->iss = tcp_rndiss_next(); 860 tcp_sendseqinit(tp); 861 862 /* 863 * Generate a CC value for this connection and 864 * check whether CC or CCnew should be used. 865 */ 866 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 867 taop = &tao_noncached; 868 bzero(taop, sizeof(*taop)); 869 } 870 871 tp->cc_send = CC_INC(tcp_ccgen); 872 if (taop->tao_ccsent != 0 && 873 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 874 taop->tao_ccsent = tp->cc_send; 875 } else { 876 taop->tao_ccsent = 0; 877 tp->t_flags |= TF_SENDCCNEW; 878 } 879 880 return 0; 881} 882#endif /* INET6 */ 883 884/* 885 * The new sockopt interface makes it possible for us to block in the 886 * copyin/out step (if we take a page fault). Taking a page fault at 887 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 888 * use TSM, there probably isn't any need for this function to run at 889 * splnet() any more. This needs more examination.) 890 */ 891int 892tcp_ctloutput(so, sopt) 893 struct socket *so; 894 struct sockopt *sopt; 895{ 896 int error, opt, optval, s; 897 struct inpcb *inp; 898 struct tcpcb *tp; 899 900 error = 0; 901 s = splnet(); /* XXX */ 902 inp = sotoinpcb(so); 903 if (inp == NULL) { 904 splx(s); 905 return (ECONNRESET); 906 } 907 if (sopt->sopt_level != IPPROTO_TCP) { 908#ifdef INET6 909 if (INP_CHECK_SOCKAF(so, AF_INET6)) 910 error = ip6_ctloutput(so, sopt); 911 else 912#endif /* INET6 */ 913 error = ip_ctloutput(so, sopt); 914 splx(s); 915 return (error); 916 } 917 tp = intotcpcb(inp); 918 919 switch (sopt->sopt_dir) { 920 case SOPT_SET: 921 switch (sopt->sopt_name) { 922 case TCP_NODELAY: 923 case TCP_NOOPT: 924 error = sooptcopyin(sopt, &optval, sizeof optval, 925 sizeof optval); 926 if (error) 927 break; 928 929 switch (sopt->sopt_name) { 930 case TCP_NODELAY: 931 opt = TF_NODELAY; 932 break; 933 case TCP_NOOPT: 934 opt = TF_NOOPT; 935 break; 936 default: 937 opt = 0; /* dead code to fool gcc */ 938 break; 939 } 940 941 if (optval) 942 tp->t_flags |= opt; 943 else 944 tp->t_flags &= ~opt; 945 break; 946 947 case TCP_NOPUSH: 948 error = sooptcopyin(sopt, &optval, sizeof optval, 949 sizeof optval); 950 if (error) 951 break; 952 953 if (optval) 954 tp->t_flags |= TF_NOPUSH; 955 else { 956 tp->t_flags &= ~TF_NOPUSH; 957 error = tcp_output(tp); 958 } 959 break; 960 961 case TCP_MAXSEG: 962 error = sooptcopyin(sopt, &optval, sizeof optval, 963 sizeof optval); 964 if (error) 965 break; 966 967 if (optval > 0 && optval <= tp->t_maxseg) 968 tp->t_maxseg = optval; 969 else 970 error = EINVAL; 971 break; 972 973 default: 974 error = ENOPROTOOPT; 975 break; 976 } 977 break; 978 979 case SOPT_GET: 980 switch (sopt->sopt_name) { 981 case TCP_NODELAY: 982 optval = tp->t_flags & TF_NODELAY; 983 break; 984 case TCP_MAXSEG: 985 optval = tp->t_maxseg; 986 break; 987 case TCP_NOOPT: 988 optval = tp->t_flags & TF_NOOPT; 989 break; 990 case TCP_NOPUSH: 991 optval = tp->t_flags & TF_NOPUSH; 992 break; 993 default: 994 error = ENOPROTOOPT; 995 break; 996 } 997 if (error == 0) 998 error = sooptcopyout(sopt, &optval, sizeof optval); 999 break; 1000 } 1001 splx(s); 1002 return (error); 1003} 1004 1005/* 1006 * tcp_sendspace and tcp_recvspace are the default send and receive window 1007 * sizes, respectively. These are obsolescent (this information should 1008 * be set by the route). 1009 */ 1010u_long tcp_sendspace = 1024*16; 1011SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1012 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1013u_long tcp_recvspace = 1024*16; 1014SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1015 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1016 1017/* 1018 * Attach TCP protocol to socket, allocating 1019 * internet protocol control block, tcp control block, 1020 * bufer space, and entering LISTEN state if to accept connections. 1021 */ 1022static int 1023tcp_attach(so, p) 1024 struct socket *so; 1025 struct proc *p; 1026{ 1027 register struct tcpcb *tp; 1028 struct inpcb *inp; 1029 int error; 1030#ifdef INET6 1031 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL; 1032#endif 1033 1034 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1035 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1036 if (error) 1037 return (error); 1038 } 1039 error = in_pcballoc(so, &tcbinfo, p); 1040 if (error) 1041 return (error); 1042 inp = sotoinpcb(so); 1043#ifdef IPSEC 1044 error = ipsec_init_policy(so, &inp->inp_sp); 1045 if (error) { 1046#ifdef INET6 1047 if (isipv6) 1048 in6_pcbdetach(inp); 1049 else 1050#endif 1051 in_pcbdetach(inp); 1052 return (error); 1053 } 1054#endif /*IPSEC*/ 1055#ifdef INET6 1056 if (isipv6) { 1057 inp->inp_vflag |= INP_IPV6; 1058 inp->in6p_hops = -1; /* use kernel default */ 1059 } 1060 else 1061#endif 1062 inp->inp_vflag |= INP_IPV4; 1063 tp = tcp_newtcpcb(inp); 1064 if (tp == 0) { 1065 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 1066 1067 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 1068#ifdef INET6 1069 if (isipv6) 1070 in6_pcbdetach(inp); 1071 else 1072#endif 1073 in_pcbdetach(inp); 1074 so->so_state |= nofd; 1075 return (ENOBUFS); 1076 } 1077 tp->t_state = TCPS_CLOSED; 1078 return (0); 1079} 1080 1081/* 1082 * Initiate (or continue) disconnect. 1083 * If embryonic state, just send reset (once). 1084 * If in ``let data drain'' option and linger null, just drop. 1085 * Otherwise (hard), mark socket disconnecting and drop 1086 * current input data; switch states based on user close, and 1087 * send segment to peer (with FIN). 1088 */ 1089static struct tcpcb * 1090tcp_disconnect(tp) 1091 register struct tcpcb *tp; 1092{ 1093 struct socket *so = tp->t_inpcb->inp_socket; 1094 1095 if (tp->t_state < TCPS_ESTABLISHED) 1096 tp = tcp_close(tp); 1097 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1098 tp = tcp_drop(tp, 0); 1099 else { 1100 soisdisconnecting(so); 1101 sbflush(&so->so_rcv); 1102 tp = tcp_usrclosed(tp); 1103 if (tp) 1104 (void) tcp_output(tp); 1105 } 1106 return (tp); 1107} 1108 1109/* 1110 * User issued close, and wish to trail through shutdown states: 1111 * if never received SYN, just forget it. If got a SYN from peer, 1112 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1113 * If already got a FIN from peer, then almost done; go to LAST_ACK 1114 * state. In all other cases, have already sent FIN to peer (e.g. 1115 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1116 * for peer to send FIN or not respond to keep-alives, etc. 1117 * We can let the user exit from the close as soon as the FIN is acked. 1118 */ 1119static struct tcpcb * 1120tcp_usrclosed(tp) 1121 register struct tcpcb *tp; 1122{ 1123 1124 switch (tp->t_state) { 1125 1126 case TCPS_CLOSED: 1127 case TCPS_LISTEN: 1128 tp->t_state = TCPS_CLOSED; 1129 tp = tcp_close(tp); 1130 break; 1131 1132 case TCPS_SYN_SENT: 1133 case TCPS_SYN_RECEIVED: 1134 tp->t_flags |= TF_NEEDFIN; 1135 break; 1136 1137 case TCPS_ESTABLISHED: 1138 tp->t_state = TCPS_FIN_WAIT_1; 1139 break; 1140 1141 case TCPS_CLOSE_WAIT: 1142 tp->t_state = TCPS_LAST_ACK; 1143 break; 1144 } 1145 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1146 soisdisconnected(tp->t_inpcb->inp_socket); 1147 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1148 if (tp->t_state == TCPS_FIN_WAIT_2) 1149 callout_reset(tp->tt_2msl, tcp_maxidle, 1150 tcp_timer_2msl, tp); 1151 } 1152 return (tp); 1153} 1154 1155