tcp_usrreq.c revision 157410
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 32 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 157410 2006-04-02 16:42:51Z rwatson $ 33 */ 34 35#include "opt_inet.h" 36#include "opt_inet6.h" 37#include "opt_tcpdebug.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/malloc.h> 42#include <sys/kernel.h> 43#include <sys/sysctl.h> 44#include <sys/mbuf.h> 45#ifdef INET6 46#include <sys/domain.h> 47#endif /* INET6 */ 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/protosw.h> 51#include <sys/proc.h> 52#include <sys/jail.h> 53 54#include <net/if.h> 55#include <net/route.h> 56 57#include <netinet/in.h> 58#include <netinet/in_systm.h> 59#ifdef INET6 60#include <netinet/ip6.h> 61#endif 62#include <netinet/in_pcb.h> 63#ifdef INET6 64#include <netinet6/in6_pcb.h> 65#endif 66#include <netinet/in_var.h> 67#include <netinet/ip_var.h> 68#ifdef INET6 69#include <netinet6/ip6_var.h> 70#include <netinet6/scope6_var.h> 71#endif 72#include <netinet/tcp.h> 73#include <netinet/tcp_fsm.h> 74#include <netinet/tcp_seq.h> 75#include <netinet/tcp_timer.h> 76#include <netinet/tcp_var.h> 77#include <netinet/tcpip.h> 78#ifdef TCPDEBUG 79#include <netinet/tcp_debug.h> 80#endif 81 82/* 83 * TCP protocol interface to socket abstraction. 84 */ 85extern char *tcpstates[]; /* XXX ??? */ 86 87static int tcp_attach(struct socket *); 88static int tcp_connect(struct tcpcb *, struct sockaddr *, 89 struct thread *td); 90#ifdef INET6 91static int tcp6_connect(struct tcpcb *, struct sockaddr *, 92 struct thread *td); 93#endif /* INET6 */ 94static void tcp_disconnect(struct tcpcb *); 95static void tcp_usrclosed(struct tcpcb *); 96static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 97 98#ifdef TCPDEBUG 99#define TCPDEBUG0 int ostate = 0 100#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 101#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 102 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 103#else 104#define TCPDEBUG0 105#define TCPDEBUG1() 106#define TCPDEBUG2(req) 107#endif 108 109/* 110 * TCP attaches to socket via pru_attach(), reserving space, 111 * and an internet control block. 112 */ 113static int 114tcp_usr_attach(struct socket *so, int proto, struct thread *td) 115{ 116 struct inpcb *inp; 117 struct tcpcb *tp = NULL; 118 int error; 119 TCPDEBUG0; 120 121 inp = sotoinpcb(so); 122 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 123 INP_INFO_WLOCK(&tcbinfo); 124 TCPDEBUG1(); 125 126 error = tcp_attach(so); 127 if (error) 128 goto out; 129 130 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 131 so->so_linger = TCP_LINGERTIME; 132 133 inp = sotoinpcb(so); 134 tp = intotcpcb(inp); 135out: 136 TCPDEBUG2(PRU_ATTACH); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 return error; 139} 140 141/* 142 * pru_detach() detaches the TCP protocol from the socket. 143 * If the protocol state is non-embryonic, then can't 144 * do this directly: have to initiate a pru_disconnect(), 145 * which may finish later; embryonic TCB's can just 146 * be discarded here. 147 */ 148static void 149tcp_usr_detach(struct socket *so) 150{ 151 struct inpcb *inp; 152 struct tcpcb *tp; 153#ifdef INET6 154 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 155#endif 156 TCPDEBUG0; 157 158 inp = sotoinpcb(so); 159 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 160 INP_INFO_WLOCK(&tcbinfo); 161 INP_LOCK(inp); 162 KASSERT(inp->inp_socket != NULL, 163 ("tcp_usr_detach: inp_socket == NULL")); 164 TCPDEBUG1(); 165 166 /* 167 * First, if we still have full TCP state, and we're not dropped, 168 * initiate a disconnect. 169 */ 170 if (!(inp->inp_vflag & INP_TIMEWAIT) && 171 !(inp->inp_vflag & INP_DROPPED)) { 172 tp = intotcpcb(inp); 173 tcp_disconnect(tp); 174 } 175 176 /* 177 * Second, release any protocol state that we can reasonably release. 178 * Note that the call to tcp_disconnect() may actually have changed 179 * the TCP state, so we have to re-evaluate INP_TIMEWAIT and 180 * INP_DROPPED. 181 */ 182 if (inp->inp_vflag & INP_TIMEWAIT) { 183 if (inp->inp_vflag & INP_DROPPED) { 184 /* 185 * Connection was in time wait and has been dropped; 186 * the calling path is either via tcp_twclose(), or 187 * as a result of an eventual soclose() after 188 * tcp_twclose() has been called. In either case, 189 * tcp_twclose() has detached the tcptw from the 190 * inpcb, so we just detach and free the inpcb. 191 * 192 * XXXRW: Would it be cleaner to free the tcptw 193 * here? 194 */ 195#ifdef INET6 196 if (isipv6) { 197 in6_pcbdetach(inp); 198 in6_pcbfree(inp); 199 } else { 200#endif 201 in_pcbdetach(inp); 202 in_pcbfree(inp); 203#ifdef INET6 204 } 205#endif 206 } else { 207 /* 208 * Connection is in time wait and has not yet been 209 * dropped; allow the socket to be discarded, but 210 * need to keep inpcb until end of time wait. 211 */ 212#ifdef INET6 213 if (isipv6) 214 in6_pcbdetach(inp); 215 else 216#endif 217 in_pcbdetach(inp); 218 INP_UNLOCK(inp); 219 } 220 } else { 221 tp = intotcpcb(inp); 222 if (inp->inp_vflag & INP_DROPPED || 223 tp->t_state < TCPS_SYN_SENT) { 224 /* 225 * Connection has been dropped or is a listen socket, 226 * tear down all pcb state and allow socket to be 227 * freed. 228 */ 229 tcp_discardcb(tp); 230#ifdef INET6 231 if (isipv6) { 232 in_pcbdetach(inp); 233 in_pcbfree(inp); 234 } else { 235#endif 236 in_pcbdetach(inp); 237 in_pcbfree(inp); 238#ifdef INET6 239 } 240#endif 241 } else { 242 SOCK_LOCK(so); 243 so->so_state |= SS_PROTOREF; 244 SOCK_UNLOCK(so); 245 inp->inp_vflag |= INP_SOCKREF; 246 INP_UNLOCK(inp); 247 } 248 } 249 tp = NULL; 250 TCPDEBUG2(PRU_DETACH); 251 INP_INFO_WUNLOCK(&tcbinfo); 252} 253 254/* 255 * Give the socket an address. 256 */ 257static int 258tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 259{ 260 int error = 0; 261 struct inpcb *inp; 262 struct tcpcb *tp = NULL; 263 struct sockaddr_in *sinp; 264 265 sinp = (struct sockaddr_in *)nam; 266 if (nam->sa_len != sizeof (*sinp)) 267 return (EINVAL); 268 /* 269 * Must check for multicast addresses and disallow binding 270 * to them. 271 */ 272 if (sinp->sin_family == AF_INET && 273 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 274 return (EAFNOSUPPORT); 275 276 TCPDEBUG0; 277 INP_INFO_WLOCK(&tcbinfo); 278 inp = sotoinpcb(so); 279 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 280 INP_LOCK(inp); 281 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 282 error = EINVAL; 283 goto out; 284 } 285 tp = intotcpcb(inp); 286 TCPDEBUG1(); 287 error = in_pcbbind(inp, nam, td->td_ucred); 288out: 289 TCPDEBUG2(PRU_BIND); 290 INP_UNLOCK(inp); 291 INP_INFO_WUNLOCK(&tcbinfo); 292 293 return (error); 294} 295 296#ifdef INET6 297static int 298tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 299{ 300 int error = 0; 301 struct inpcb *inp; 302 struct tcpcb *tp = NULL; 303 struct sockaddr_in6 *sin6p; 304 305 sin6p = (struct sockaddr_in6 *)nam; 306 if (nam->sa_len != sizeof (*sin6p)) 307 return (EINVAL); 308 /* 309 * Must check for multicast addresses and disallow binding 310 * to them. 311 */ 312 if (sin6p->sin6_family == AF_INET6 && 313 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 314 return (EAFNOSUPPORT); 315 316 TCPDEBUG0; 317 INP_INFO_WLOCK(&tcbinfo); 318 inp = sotoinpcb(so); 319 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 320 INP_LOCK(inp); 321 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 322 error = EINVAL; 323 goto out; 324 } 325 tp = intotcpcb(inp); 326 TCPDEBUG1(); 327 inp->inp_vflag &= ~INP_IPV4; 328 inp->inp_vflag |= INP_IPV6; 329 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 330 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 331 inp->inp_vflag |= INP_IPV4; 332 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 333 struct sockaddr_in sin; 334 335 in6_sin6_2_sin(&sin, sin6p); 336 inp->inp_vflag |= INP_IPV4; 337 inp->inp_vflag &= ~INP_IPV6; 338 error = in_pcbbind(inp, (struct sockaddr *)&sin, 339 td->td_ucred); 340 goto out; 341 } 342 } 343 error = in6_pcbbind(inp, nam, td->td_ucred); 344out: 345 TCPDEBUG2(PRU_BIND); 346 INP_UNLOCK(inp); 347 INP_INFO_WUNLOCK(&tcbinfo); 348 return (error); 349} 350#endif /* INET6 */ 351 352/* 353 * Prepare to accept connections. 354 */ 355static int 356tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 357{ 358 int error = 0; 359 struct inpcb *inp; 360 struct tcpcb *tp = NULL; 361 362 TCPDEBUG0; 363 INP_INFO_WLOCK(&tcbinfo); 364 inp = sotoinpcb(so); 365 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 366 INP_LOCK(inp); 367 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 368 error = EINVAL; 369 goto out; 370 } 371 tp = intotcpcb(inp); 372 TCPDEBUG1(); 373 SOCK_LOCK(so); 374 error = solisten_proto_check(so); 375 if (error == 0 && inp->inp_lport == 0) 376 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 377 if (error == 0) { 378 tp->t_state = TCPS_LISTEN; 379 solisten_proto(so, backlog); 380 } 381 SOCK_UNLOCK(so); 382 383out: 384 TCPDEBUG2(PRU_LISTEN); 385 INP_UNLOCK(inp); 386 INP_INFO_WUNLOCK(&tcbinfo); 387 return (error); 388} 389 390#ifdef INET6 391static int 392tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 393{ 394 int error = 0; 395 struct inpcb *inp; 396 struct tcpcb *tp = NULL; 397 398 TCPDEBUG0; 399 INP_INFO_WLOCK(&tcbinfo); 400 inp = sotoinpcb(so); 401 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 402 INP_LOCK(inp); 403 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 404 error = EINVAL; 405 goto out; 406 } 407 tp = intotcpcb(inp); 408 TCPDEBUG1(); 409 SOCK_LOCK(so); 410 error = solisten_proto_check(so); 411 if (error == 0 && inp->inp_lport == 0) { 412 inp->inp_vflag &= ~INP_IPV4; 413 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 414 inp->inp_vflag |= INP_IPV4; 415 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 416 } 417 if (error == 0) { 418 tp->t_state = TCPS_LISTEN; 419 solisten_proto(so, backlog); 420 } 421 SOCK_UNLOCK(so); 422 423out: 424 TCPDEBUG2(PRU_LISTEN); 425 INP_UNLOCK(inp); 426 INP_INFO_WUNLOCK(&tcbinfo); 427 return (error); 428} 429#endif /* INET6 */ 430 431/* 432 * Initiate connection to peer. 433 * Create a template for use in transmissions on this connection. 434 * Enter SYN_SENT state, and mark socket as connecting. 435 * Start keep-alive timer, and seed output sequence space. 436 * Send initial segment on connection. 437 */ 438static int 439tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 440{ 441 int error = 0; 442 struct inpcb *inp; 443 struct tcpcb *tp = NULL; 444 struct sockaddr_in *sinp; 445 446 sinp = (struct sockaddr_in *)nam; 447 if (nam->sa_len != sizeof (*sinp)) 448 return (EINVAL); 449 /* 450 * Must disallow TCP ``connections'' to multicast addresses. 451 */ 452 if (sinp->sin_family == AF_INET 453 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 454 return (EAFNOSUPPORT); 455 if (jailed(td->td_ucred)) 456 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); 457 458 TCPDEBUG0; 459 INP_INFO_WLOCK(&tcbinfo); 460 inp = sotoinpcb(so); 461 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 462 INP_LOCK(inp); 463 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 464 error = EINVAL; 465 goto out; 466 } 467 tp = intotcpcb(inp); 468 TCPDEBUG1(); 469 if ((error = tcp_connect(tp, nam, td)) != 0) 470 goto out; 471 error = tcp_output(tp); 472out: 473 TCPDEBUG2(PRU_CONNECT); 474 INP_UNLOCK(inp); 475 INP_INFO_WUNLOCK(&tcbinfo); 476 return (error); 477} 478 479#ifdef INET6 480static int 481tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 482{ 483 int error = 0; 484 struct inpcb *inp; 485 struct tcpcb *tp = NULL; 486 struct sockaddr_in6 *sin6p; 487 488 TCPDEBUG0; 489 490 sin6p = (struct sockaddr_in6 *)nam; 491 if (nam->sa_len != sizeof (*sin6p)) 492 return (EINVAL); 493 /* 494 * Must disallow TCP ``connections'' to multicast addresses. 495 */ 496 if (sin6p->sin6_family == AF_INET6 497 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 498 return (EAFNOSUPPORT); 499 500 INP_INFO_WLOCK(&tcbinfo); 501 inp = sotoinpcb(so); 502 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 503 INP_LOCK(inp); 504 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 505 error = EINVAL; 506 goto out; 507 } 508 tp = intotcpcb(inp); 509 TCPDEBUG1(); 510 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 511 struct sockaddr_in sin; 512 513 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 514 error = EINVAL; 515 goto out; 516 } 517 518 in6_sin6_2_sin(&sin, sin6p); 519 inp->inp_vflag |= INP_IPV4; 520 inp->inp_vflag &= ~INP_IPV6; 521 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 522 goto out; 523 error = tcp_output(tp); 524 goto out; 525 } 526 inp->inp_vflag &= ~INP_IPV4; 527 inp->inp_vflag |= INP_IPV6; 528 inp->inp_inc.inc_isipv6 = 1; 529 if ((error = tcp6_connect(tp, nam, td)) != 0) 530 goto out; 531 error = tcp_output(tp); 532 533out: 534 TCPDEBUG2(PRU_CONNECT); 535 INP_UNLOCK(inp); 536 INP_INFO_WUNLOCK(&tcbinfo); 537 return (error); 538} 539#endif /* INET6 */ 540 541/* 542 * Initiate disconnect from peer. 543 * If connection never passed embryonic stage, just drop; 544 * else if don't need to let data drain, then can just drop anyways, 545 * else have to begin TCP shutdown process: mark socket disconnecting, 546 * drain unread data, state switch to reflect user close, and 547 * send segment (e.g. FIN) to peer. Socket will be really disconnected 548 * when peer sends FIN and acks ours. 549 * 550 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 551 */ 552static int 553tcp_usr_disconnect(struct socket *so) 554{ 555 struct inpcb *inp; 556 struct tcpcb *tp = NULL; 557 int error = 0; 558 559 TCPDEBUG0; 560 INP_INFO_WLOCK(&tcbinfo); 561 inp = sotoinpcb(so); 562 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 563 INP_LOCK(inp); 564 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 565 error = EINVAL; 566 goto out; 567 } 568 tp = intotcpcb(inp); 569 TCPDEBUG1(); 570 tcp_disconnect(tp); 571out: 572 TCPDEBUG2(PRU_DISCONNECT); 573 INP_UNLOCK(inp); 574 INP_INFO_WUNLOCK(&tcbinfo); 575 return (error); 576} 577 578/* 579 * Accept a connection. Essentially all the work is 580 * done at higher levels; just return the address 581 * of the peer, storing through addr. 582 */ 583static int 584tcp_usr_accept(struct socket *so, struct sockaddr **nam) 585{ 586 int error = 0; 587 struct inpcb *inp = NULL; 588 struct tcpcb *tp = NULL; 589 struct in_addr addr; 590 in_port_t port = 0; 591 TCPDEBUG0; 592 593 if (so->so_state & SS_ISDISCONNECTED) { 594 error = ECONNABORTED; 595 goto out; 596 } 597 598 inp = sotoinpcb(so); 599 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 600 INP_LOCK(inp); 601 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 602 error = EINVAL; 603 goto out; 604 } 605 tp = intotcpcb(inp); 606 TCPDEBUG1(); 607 608 /* 609 * We inline in_setpeeraddr and COMMON_END here, so that we can 610 * copy the data of interest and defer the malloc until after we 611 * release the lock. 612 */ 613 port = inp->inp_fport; 614 addr = inp->inp_faddr; 615 616out: 617 TCPDEBUG2(PRU_ACCEPT); 618 INP_UNLOCK(inp); 619 if (error == 0) 620 *nam = in_sockaddr(port, &addr); 621 return error; 622} 623 624#ifdef INET6 625static int 626tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 627{ 628 struct inpcb *inp = NULL; 629 int error = 0; 630 struct tcpcb *tp = NULL; 631 struct in_addr addr; 632 struct in6_addr addr6; 633 in_port_t port = 0; 634 int v4 = 0; 635 TCPDEBUG0; 636 637 if (so->so_state & SS_ISDISCONNECTED) { 638 error = ECONNABORTED; 639 goto out; 640 } 641 642 inp = sotoinpcb(so); 643 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 644 INP_LOCK(inp); 645 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 646 error = EINVAL; 647 goto out; 648 } 649 tp = intotcpcb(inp); 650 TCPDEBUG1(); 651 652 /* 653 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 654 * copy the data of interest and defer the malloc until after we 655 * release the lock. 656 */ 657 if (inp->inp_vflag & INP_IPV4) { 658 v4 = 1; 659 port = inp->inp_fport; 660 addr = inp->inp_faddr; 661 } else { 662 port = inp->inp_fport; 663 addr6 = inp->in6p_faddr; 664 } 665 666out: 667 TCPDEBUG2(PRU_ACCEPT); 668 INP_UNLOCK(inp); 669 if (error == 0) { 670 if (v4) 671 *nam = in6_v4mapsin6_sockaddr(port, &addr); 672 else 673 *nam = in6_sockaddr(port, &addr6); 674 } 675 return error; 676} 677#endif /* INET6 */ 678 679/* 680 * This is the wrapper function for in_setsockaddr. We just pass down 681 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking 682 * here because in_setsockaddr will call malloc and can block. 683 */ 684static int 685tcp_sockaddr(struct socket *so, struct sockaddr **nam) 686{ 687 return (in_setsockaddr(so, nam, &tcbinfo)); 688} 689 690/* 691 * This is the wrapper function for in_setpeeraddr. We just pass down 692 * the pcbinfo for in_setpeeraddr to lock. 693 */ 694static int 695tcp_peeraddr(struct socket *so, struct sockaddr **nam) 696{ 697 return (in_setpeeraddr(so, nam, &tcbinfo)); 698} 699 700/* 701 * Mark the connection as being incapable of further output. 702 */ 703static int 704tcp_usr_shutdown(struct socket *so) 705{ 706 int error = 0; 707 struct inpcb *inp; 708 struct tcpcb *tp = NULL; 709 710 TCPDEBUG0; 711 INP_INFO_WLOCK(&tcbinfo); 712 inp = sotoinpcb(so); 713 KASSERT(inp != NULL, ("inp == NULL")); 714 INP_LOCK(inp); 715 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 716 error = EINVAL; 717 goto out; 718 } 719 tp = intotcpcb(inp); 720 TCPDEBUG1(); 721 socantsendmore(so); 722 tcp_usrclosed(tp); 723 error = tcp_output(tp); 724 725out: 726 TCPDEBUG2(PRU_SHUTDOWN); 727 INP_UNLOCK(inp); 728 INP_INFO_WUNLOCK(&tcbinfo); 729 730 return (error); 731} 732 733/* 734 * After a receive, possibly send window update to peer. 735 */ 736static int 737tcp_usr_rcvd(struct socket *so, int flags) 738{ 739 struct inpcb *inp; 740 struct tcpcb *tp = NULL; 741 int error = 0; 742 743 TCPDEBUG0; 744 inp = sotoinpcb(so); 745 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 746 INP_LOCK(inp); 747 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 748 error = EINVAL; 749 goto out; 750 } 751 tp = intotcpcb(inp); 752 TCPDEBUG1(); 753 tcp_output(tp); 754 755out: 756 TCPDEBUG2(PRU_RCVD); 757 INP_UNLOCK(inp); 758 return (error); 759} 760 761/* 762 * Do a send by putting data in output queue and updating urgent 763 * marker if URG set. Possibly send more data. Unlike the other 764 * pru_*() routines, the mbuf chains are our responsibility. We 765 * must either enqueue them or free them. The other pru_* routines 766 * generally are caller-frees. 767 */ 768static int 769tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 770 struct sockaddr *nam, struct mbuf *control, struct thread *td) 771{ 772 int error = 0; 773 struct inpcb *inp; 774 struct tcpcb *tp = NULL; 775 int headlocked = 0; 776#ifdef INET6 777 int isipv6; 778#endif 779 TCPDEBUG0; 780 781 /* 782 * We require the pcbinfo lock in two cases: 783 * 784 * (1) An implied connect is taking place, which can result in 785 * binding IPs and ports and hence modification of the pcb hash 786 * chains. 787 * 788 * (2) PRUS_EOF is set, resulting in explicit close on the send. 789 */ 790 if ((nam != NULL) || (flags & PRUS_EOF)) { 791 INP_INFO_WLOCK(&tcbinfo); 792 headlocked = 1; 793 } 794 inp = sotoinpcb(so); 795 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 796 INP_LOCK(inp); 797 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 798 error = EINVAL; 799 goto out; 800 } 801#ifdef INET6 802 isipv6 = nam && nam->sa_family == AF_INET6; 803#endif /* INET6 */ 804 tp = intotcpcb(inp); 805 TCPDEBUG1(); 806 if (control) { 807 /* TCP doesn't do control messages (rights, creds, etc) */ 808 if (control->m_len) { 809 m_freem(control); 810 if (m) 811 m_freem(m); 812 error = EINVAL; 813 goto out; 814 } 815 m_freem(control); /* empty control, just free it */ 816 } 817 if (!(flags & PRUS_OOB)) { 818 sbappendstream(&so->so_snd, m); 819 if (nam && tp->t_state < TCPS_SYN_SENT) { 820 /* 821 * Do implied connect if not yet connected, 822 * initialize window to default value, and 823 * initialize maxseg/maxopd using peer's cached 824 * MSS. 825 */ 826 INP_INFO_WLOCK_ASSERT(&tcbinfo); 827#ifdef INET6 828 if (isipv6) 829 error = tcp6_connect(tp, nam, td); 830 else 831#endif /* INET6 */ 832 error = tcp_connect(tp, nam, td); 833 if (error) 834 goto out; 835 tp->snd_wnd = TTCP_CLIENT_SND_WND; 836 tcp_mss(tp, -1); 837 } 838 if (flags & PRUS_EOF) { 839 /* 840 * Close the send side of the connection after 841 * the data is sent. 842 */ 843 INP_INFO_WLOCK_ASSERT(&tcbinfo); 844 socantsendmore(so); 845 tcp_usrclosed(tp); 846 } 847 if (headlocked) { 848 INP_INFO_WUNLOCK(&tcbinfo); 849 headlocked = 0; 850 } 851 if (tp != NULL) { 852 if (flags & PRUS_MORETOCOME) 853 tp->t_flags |= TF_MORETOCOME; 854 error = tcp_output(tp); 855 if (flags & PRUS_MORETOCOME) 856 tp->t_flags &= ~TF_MORETOCOME; 857 } 858 } else { 859 /* 860 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 861 */ 862 SOCKBUF_LOCK(&so->so_snd); 863 if (sbspace(&so->so_snd) < -512) { 864 SOCKBUF_UNLOCK(&so->so_snd); 865 m_freem(m); 866 error = ENOBUFS; 867 goto out; 868 } 869 /* 870 * According to RFC961 (Assigned Protocols), 871 * the urgent pointer points to the last octet 872 * of urgent data. We continue, however, 873 * to consider it to indicate the first octet 874 * of data past the urgent section. 875 * Otherwise, snd_up should be one lower. 876 */ 877 sbappendstream_locked(&so->so_snd, m); 878 SOCKBUF_UNLOCK(&so->so_snd); 879 if (nam && tp->t_state < TCPS_SYN_SENT) { 880 /* 881 * Do implied connect if not yet connected, 882 * initialize window to default value, and 883 * initialize maxseg/maxopd using peer's cached 884 * MSS. 885 */ 886 INP_INFO_WLOCK_ASSERT(&tcbinfo); 887#ifdef INET6 888 if (isipv6) 889 error = tcp6_connect(tp, nam, td); 890 else 891#endif /* INET6 */ 892 error = tcp_connect(tp, nam, td); 893 if (error) 894 goto out; 895 tp->snd_wnd = TTCP_CLIENT_SND_WND; 896 tcp_mss(tp, -1); 897 INP_INFO_WUNLOCK(&tcbinfo); 898 headlocked = 0; 899 } else if (nam) { 900 INP_INFO_WUNLOCK(&tcbinfo); 901 headlocked = 0; 902 } 903 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 904 tp->t_flags |= TF_FORCEDATA; 905 error = tcp_output(tp); 906 tp->t_flags &= ~TF_FORCEDATA; 907 } 908out: 909 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 910 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 911 INP_UNLOCK(inp); 912 if (headlocked) 913 INP_INFO_WUNLOCK(&tcbinfo); 914 return (error); 915} 916 917/* 918 * Abort the TCP. 919 */ 920static void 921tcp_usr_abort(struct socket *so) 922{ 923#if 0 924 struct inpcb *inp; 925 struct tcpcb *tp; 926#endif 927 928 /* 929 * XXXRW: This is not really quite the same, as we want to tcp_drop() 930 * rather than tcp_disconnect(), I think, but for now I'll avoid 931 * replicating all the tear-down logic here. 932 */ 933 tcp_usr_detach(so); 934 935#if 0 936 TCPDEBUG0; 937 INP_INFO_WLOCK(&tcbinfo); 938 inp = sotoinpcb(so); 939 INP_LOCK(inp); 940 /* 941 * Do we need to handle timewait here? Aborted connections should 942 * never generate a FIN? 943 */ 944 KASSERT((inp->inp_vflag & INP_TIMEWAIT) == 0, 945 ("tcp_usr_abort: timewait")); 946 tp = intotcpcb(inp); 947 TCPDEBUG1(); 948 tp = tcp_drop(tp, ECONNABORTED); 949 TCPDEBUG2(PRU_ABORT); 950 if (tp != NULL) 951 INP_UNLOCK(inp); 952 INP_INFO_WUNLOCK(&tcbinfo); 953#endif 954} 955 956/* 957 * Receive out-of-band data. 958 */ 959static int 960tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 961{ 962 int error = 0; 963 struct inpcb *inp; 964 struct tcpcb *tp = NULL; 965 966 TCPDEBUG0; 967 inp = sotoinpcb(so); 968 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 969 INP_LOCK(inp); 970 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 971 error = EINVAL; 972 goto out; 973 } 974 tp = intotcpcb(inp); 975 TCPDEBUG1(); 976 if ((so->so_oobmark == 0 && 977 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 978 so->so_options & SO_OOBINLINE || 979 tp->t_oobflags & TCPOOB_HADDATA) { 980 error = EINVAL; 981 goto out; 982 } 983 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 984 error = EWOULDBLOCK; 985 goto out; 986 } 987 m->m_len = 1; 988 *mtod(m, caddr_t) = tp->t_iobc; 989 if ((flags & MSG_PEEK) == 0) 990 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 991 992out: 993 TCPDEBUG2(PRU_RCVOOB); 994 INP_UNLOCK(inp); 995 return (error); 996} 997 998struct pr_usrreqs tcp_usrreqs = { 999 .pru_abort = tcp_usr_abort, 1000 .pru_accept = tcp_usr_accept, 1001 .pru_attach = tcp_usr_attach, 1002 .pru_bind = tcp_usr_bind, 1003 .pru_connect = tcp_usr_connect, 1004 .pru_control = in_control, 1005 .pru_detach = tcp_usr_detach, 1006 .pru_disconnect = tcp_usr_disconnect, 1007 .pru_listen = tcp_usr_listen, 1008 .pru_peeraddr = tcp_peeraddr, 1009 .pru_rcvd = tcp_usr_rcvd, 1010 .pru_rcvoob = tcp_usr_rcvoob, 1011 .pru_send = tcp_usr_send, 1012 .pru_shutdown = tcp_usr_shutdown, 1013 .pru_sockaddr = tcp_sockaddr, 1014 .pru_sosetlabel = in_pcbsosetlabel 1015}; 1016 1017#ifdef INET6 1018struct pr_usrreqs tcp6_usrreqs = { 1019 .pru_abort = tcp_usr_abort, 1020 .pru_accept = tcp6_usr_accept, 1021 .pru_attach = tcp_usr_attach, 1022 .pru_bind = tcp6_usr_bind, 1023 .pru_connect = tcp6_usr_connect, 1024 .pru_control = in6_control, 1025 .pru_detach = tcp_usr_detach, 1026 .pru_disconnect = tcp_usr_disconnect, 1027 .pru_listen = tcp6_usr_listen, 1028 .pru_peeraddr = in6_mapped_peeraddr, 1029 .pru_rcvd = tcp_usr_rcvd, 1030 .pru_rcvoob = tcp_usr_rcvoob, 1031 .pru_send = tcp_usr_send, 1032 .pru_shutdown = tcp_usr_shutdown, 1033 .pru_sockaddr = in6_mapped_sockaddr, 1034 .pru_sosetlabel = in_pcbsosetlabel 1035}; 1036#endif /* INET6 */ 1037 1038/* 1039 * Common subroutine to open a TCP connection to remote host specified 1040 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1041 * port number if needed. Call in_pcbconnect_setup to do the routing and 1042 * to choose a local host address (interface). If there is an existing 1043 * incarnation of the same connection in TIME-WAIT state and if the remote 1044 * host was sending CC options and if the connection duration was < MSL, then 1045 * truncate the previous TIME-WAIT state and proceed. 1046 * Initialize connection parameters and enter SYN-SENT state. 1047 */ 1048static int 1049tcp_connect(tp, nam, td) 1050 register struct tcpcb *tp; 1051 struct sockaddr *nam; 1052 struct thread *td; 1053{ 1054 struct inpcb *inp = tp->t_inpcb, *oinp; 1055 struct socket *so = inp->inp_socket; 1056 struct in_addr laddr; 1057 u_short lport; 1058 int error; 1059 1060 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1061 INP_LOCK_ASSERT(inp); 1062 1063 if (inp->inp_lport == 0) { 1064 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1065 if (error) 1066 return error; 1067 } 1068 1069 /* 1070 * Cannot simply call in_pcbconnect, because there might be an 1071 * earlier incarnation of this same connection still in 1072 * TIME_WAIT state, creating an ADDRINUSE error. 1073 */ 1074 laddr = inp->inp_laddr; 1075 lport = inp->inp_lport; 1076 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1077 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1078 if (error && oinp == NULL) 1079 return error; 1080 if (oinp) 1081 return EADDRINUSE; 1082 inp->inp_laddr = laddr; 1083 in_pcbrehash(inp); 1084 1085 /* Compute window scaling to request. */ 1086 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1087 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1088 tp->request_r_scale++; 1089 1090 soisconnecting(so); 1091 tcpstat.tcps_connattempt++; 1092 tp->t_state = TCPS_SYN_SENT; 1093 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1094 tp->iss = tcp_new_isn(tp); 1095 tp->t_bw_rtseq = tp->iss; 1096 tcp_sendseqinit(tp); 1097 1098 return 0; 1099} 1100 1101#ifdef INET6 1102static int 1103tcp6_connect(tp, nam, td) 1104 register struct tcpcb *tp; 1105 struct sockaddr *nam; 1106 struct thread *td; 1107{ 1108 struct inpcb *inp = tp->t_inpcb, *oinp; 1109 struct socket *so = inp->inp_socket; 1110 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1111 struct in6_addr *addr6; 1112 int error; 1113 1114 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1115 INP_LOCK_ASSERT(inp); 1116 1117 if (inp->inp_lport == 0) { 1118 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1119 if (error) 1120 return error; 1121 } 1122 1123 /* 1124 * Cannot simply call in_pcbconnect, because there might be an 1125 * earlier incarnation of this same connection still in 1126 * TIME_WAIT state, creating an ADDRINUSE error. 1127 * in6_pcbladdr() also handles scope zone IDs. 1128 */ 1129 error = in6_pcbladdr(inp, nam, &addr6); 1130 if (error) 1131 return error; 1132 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1133 &sin6->sin6_addr, sin6->sin6_port, 1134 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1135 ? addr6 1136 : &inp->in6p_laddr, 1137 inp->inp_lport, 0, NULL); 1138 if (oinp) 1139 return EADDRINUSE; 1140 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1141 inp->in6p_laddr = *addr6; 1142 inp->in6p_faddr = sin6->sin6_addr; 1143 inp->inp_fport = sin6->sin6_port; 1144 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 1145 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 1146 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 1147 inp->in6p_flowinfo |= 1148 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1149 in_pcbrehash(inp); 1150 1151 /* Compute window scaling to request. */ 1152 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1153 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1154 tp->request_r_scale++; 1155 1156 soisconnecting(so); 1157 tcpstat.tcps_connattempt++; 1158 tp->t_state = TCPS_SYN_SENT; 1159 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1160 tp->iss = tcp_new_isn(tp); 1161 tp->t_bw_rtseq = tp->iss; 1162 tcp_sendseqinit(tp); 1163 1164 return 0; 1165} 1166#endif /* INET6 */ 1167 1168/* 1169 * Export TCP internal state information via a struct tcp_info, based on the 1170 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1171 * (TCP state machine, etc). We export all information using FreeBSD-native 1172 * constants -- for example, the numeric values for tcpi_state will differ 1173 * from Linux. 1174 */ 1175static void 1176tcp_fill_info(tp, ti) 1177 struct tcpcb *tp; 1178 struct tcp_info *ti; 1179{ 1180 1181 INP_LOCK_ASSERT(tp->t_inpcb); 1182 bzero(ti, sizeof(*ti)); 1183 1184 ti->tcpi_state = tp->t_state; 1185 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1186 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1187 if (tp->sack_enable) 1188 ti->tcpi_options |= TCPI_OPT_SACK; 1189 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1190 ti->tcpi_options |= TCPI_OPT_WSCALE; 1191 ti->tcpi_snd_wscale = tp->snd_scale; 1192 ti->tcpi_rcv_wscale = tp->rcv_scale; 1193 } 1194 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1195 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1196 1197 /* 1198 * FreeBSD-specific extension fields for tcp_info. 1199 */ 1200 ti->tcpi_rcv_space = tp->rcv_wnd; 1201 ti->tcpi_snd_wnd = tp->snd_wnd; 1202 ti->tcpi_snd_bwnd = tp->snd_bwnd; 1203} 1204 1205/* 1206 * The new sockopt interface makes it possible for us to block in the 1207 * copyin/out step (if we take a page fault). Taking a page fault at 1208 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1209 * use TSM, there probably isn't any need for this function to run at 1210 * splnet() any more. This needs more examination.) 1211 * 1212 * XXXRW: The locking here is wrong; we may take a page fault while holding 1213 * the inpcb lock. 1214 */ 1215int 1216tcp_ctloutput(so, sopt) 1217 struct socket *so; 1218 struct sockopt *sopt; 1219{ 1220 int error, opt, optval; 1221 struct inpcb *inp; 1222 struct tcpcb *tp; 1223 struct tcp_info ti; 1224 1225 error = 0; 1226 inp = sotoinpcb(so); 1227 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1228 INP_LOCK(inp); 1229 if (sopt->sopt_level != IPPROTO_TCP) { 1230 INP_UNLOCK(inp); 1231#ifdef INET6 1232 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1233 error = ip6_ctloutput(so, sopt); 1234 else 1235#endif /* INET6 */ 1236 error = ip_ctloutput(so, sopt); 1237 return (error); 1238 } 1239 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 1240 error = ECONNRESET; 1241 goto out; 1242 } 1243 tp = intotcpcb(inp); 1244 1245 switch (sopt->sopt_dir) { 1246 case SOPT_SET: 1247 switch (sopt->sopt_name) { 1248#ifdef TCP_SIGNATURE 1249 case TCP_MD5SIG: 1250 error = sooptcopyin(sopt, &optval, sizeof optval, 1251 sizeof optval); 1252 if (error) 1253 break; 1254 1255 if (optval > 0) 1256 tp->t_flags |= TF_SIGNATURE; 1257 else 1258 tp->t_flags &= ~TF_SIGNATURE; 1259 break; 1260#endif /* TCP_SIGNATURE */ 1261 case TCP_NODELAY: 1262 case TCP_NOOPT: 1263 error = sooptcopyin(sopt, &optval, sizeof optval, 1264 sizeof optval); 1265 if (error) 1266 break; 1267 1268 switch (sopt->sopt_name) { 1269 case TCP_NODELAY: 1270 opt = TF_NODELAY; 1271 break; 1272 case TCP_NOOPT: 1273 opt = TF_NOOPT; 1274 break; 1275 default: 1276 opt = 0; /* dead code to fool gcc */ 1277 break; 1278 } 1279 1280 if (optval) 1281 tp->t_flags |= opt; 1282 else 1283 tp->t_flags &= ~opt; 1284 break; 1285 1286 case TCP_NOPUSH: 1287 error = sooptcopyin(sopt, &optval, sizeof optval, 1288 sizeof optval); 1289 if (error) 1290 break; 1291 1292 if (optval) 1293 tp->t_flags |= TF_NOPUSH; 1294 else { 1295 tp->t_flags &= ~TF_NOPUSH; 1296 error = tcp_output(tp); 1297 } 1298 break; 1299 1300 case TCP_MAXSEG: 1301 error = sooptcopyin(sopt, &optval, sizeof optval, 1302 sizeof optval); 1303 if (error) 1304 break; 1305 1306 if (optval > 0 && optval <= tp->t_maxseg && 1307 optval + 40 >= tcp_minmss) 1308 tp->t_maxseg = optval; 1309 else 1310 error = EINVAL; 1311 break; 1312 1313 case TCP_INFO: 1314 error = EINVAL; 1315 break; 1316 1317 default: 1318 error = ENOPROTOOPT; 1319 break; 1320 } 1321 break; 1322 1323 case SOPT_GET: 1324 switch (sopt->sopt_name) { 1325#ifdef TCP_SIGNATURE 1326 case TCP_MD5SIG: 1327 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1328 error = sooptcopyout(sopt, &optval, sizeof optval); 1329 break; 1330#endif 1331 case TCP_NODELAY: 1332 optval = tp->t_flags & TF_NODELAY; 1333 error = sooptcopyout(sopt, &optval, sizeof optval); 1334 break; 1335 case TCP_MAXSEG: 1336 optval = tp->t_maxseg; 1337 error = sooptcopyout(sopt, &optval, sizeof optval); 1338 break; 1339 case TCP_NOOPT: 1340 optval = tp->t_flags & TF_NOOPT; 1341 error = sooptcopyout(sopt, &optval, sizeof optval); 1342 break; 1343 case TCP_NOPUSH: 1344 optval = tp->t_flags & TF_NOPUSH; 1345 error = sooptcopyout(sopt, &optval, sizeof optval); 1346 break; 1347 case TCP_INFO: 1348 tcp_fill_info(tp, &ti); 1349 error = sooptcopyout(sopt, &ti, sizeof ti); 1350 break; 1351 default: 1352 error = ENOPROTOOPT; 1353 break; 1354 } 1355 break; 1356 } 1357out: 1358 INP_UNLOCK(inp); 1359 return (error); 1360} 1361 1362/* 1363 * tcp_sendspace and tcp_recvspace are the default send and receive window 1364 * sizes, respectively. These are obsolescent (this information should 1365 * be set by the route). 1366 */ 1367u_long tcp_sendspace = 1024*32; 1368SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1369 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1370u_long tcp_recvspace = 1024*64; 1371SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1372 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1373 1374/* 1375 * Attach TCP protocol to socket, allocating 1376 * internet protocol control block, tcp control block, 1377 * bufer space, and entering LISTEN state if to accept connections. 1378 */ 1379static int 1380tcp_attach(so) 1381 struct socket *so; 1382{ 1383 register struct tcpcb *tp; 1384 struct inpcb *inp; 1385 int error; 1386#ifdef INET6 1387 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1388#endif 1389 1390 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1391 1392 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1393 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1394 if (error) 1395 return (error); 1396 } 1397 error = in_pcballoc(so, &tcbinfo, "tcpinp"); 1398 if (error) 1399 return (error); 1400 inp = sotoinpcb(so); 1401#ifdef INET6 1402 if (isipv6) { 1403 inp->inp_vflag |= INP_IPV6; 1404 inp->in6p_hops = -1; /* use kernel default */ 1405 } 1406 else 1407#endif 1408 inp->inp_vflag |= INP_IPV4; 1409 tp = tcp_newtcpcb(inp); 1410 if (tp == NULL) { 1411 INP_LOCK(inp); 1412#ifdef INET6 1413 if (isipv6) { 1414 in6_pcbdetach(inp); 1415 in6_pcbfree(inp); 1416 } else { 1417#endif 1418 in_pcbdetach(inp); 1419 in_pcbfree(inp); 1420#ifdef INET6 1421 } 1422#endif 1423 return (ENOBUFS); 1424 } 1425 tp->t_state = TCPS_CLOSED; 1426 return (0); 1427} 1428 1429/* 1430 * Initiate (or continue) disconnect. 1431 * If embryonic state, just send reset (once). 1432 * If in ``let data drain'' option and linger null, just drop. 1433 * Otherwise (hard), mark socket disconnecting and drop 1434 * current input data; switch states based on user close, and 1435 * send segment to peer (with FIN). 1436 */ 1437static void 1438tcp_disconnect(tp) 1439 register struct tcpcb *tp; 1440{ 1441 struct inpcb *inp = tp->t_inpcb; 1442 struct socket *so = inp->inp_socket; 1443 1444 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1445 INP_LOCK_ASSERT(inp); 1446 1447 /* 1448 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1449 * socket is still open. 1450 */ 1451 if (tp->t_state < TCPS_ESTABLISHED) { 1452 tp = tcp_close(tp); 1453 KASSERT(tp != NULL, 1454 ("tcp_disconnect: tcp_close() returned NULL")); 1455 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1456 tp = tcp_drop(tp, 0); 1457 KASSERT(tp != NULL, 1458 ("tcp_disconnect: tcp_drop() returned NULL")); 1459 } else { 1460 soisdisconnecting(so); 1461 sbflush(&so->so_rcv); 1462 tcp_usrclosed(tp); 1463 if (!(inp->inp_vflag & INP_DROPPED)) 1464 tcp_output(tp); 1465 } 1466} 1467 1468/* 1469 * User issued close, and wish to trail through shutdown states: 1470 * if never received SYN, just forget it. If got a SYN from peer, 1471 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1472 * If already got a FIN from peer, then almost done; go to LAST_ACK 1473 * state. In all other cases, have already sent FIN to peer (e.g. 1474 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1475 * for peer to send FIN or not respond to keep-alives, etc. 1476 * We can let the user exit from the close as soon as the FIN is acked. 1477 */ 1478static void 1479tcp_usrclosed(tp) 1480 register struct tcpcb *tp; 1481{ 1482 1483 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1484 INP_LOCK_ASSERT(tp->t_inpcb); 1485 1486 switch (tp->t_state) { 1487 1488 case TCPS_CLOSED: 1489 case TCPS_LISTEN: 1490 tp->t_state = TCPS_CLOSED; 1491 tp = tcp_close(tp); 1492 /* 1493 * tcp_close() should never return NULL here as the socket is 1494 * still open. 1495 */ 1496 KASSERT(tp != NULL, 1497 ("tcp_usrclosed: tcp_close() returned NULL")); 1498 break; 1499 1500 case TCPS_SYN_SENT: 1501 case TCPS_SYN_RECEIVED: 1502 tp->t_flags |= TF_NEEDFIN; 1503 break; 1504 1505 case TCPS_ESTABLISHED: 1506 tp->t_state = TCPS_FIN_WAIT_1; 1507 break; 1508 1509 case TCPS_CLOSE_WAIT: 1510 tp->t_state = TCPS_LAST_ACK; 1511 break; 1512 } 1513 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1514 soisdisconnected(tp->t_inpcb->inp_socket); 1515 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1516 if (tp->t_state == TCPS_FIN_WAIT_2) 1517 callout_reset(tp->tt_2msl, tcp_maxidle, 1518 tcp_timer_2msl, tp); 1519 } 1520} 1521