tcp_usrreq.c revision 157424
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 32 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 157424 2006-04-03 09:52:55Z rwatson $ 33 */ 34 35#include "opt_inet.h" 36#include "opt_inet6.h" 37#include "opt_tcpdebug.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/malloc.h> 42#include <sys/kernel.h> 43#include <sys/sysctl.h> 44#include <sys/mbuf.h> 45#ifdef INET6 46#include <sys/domain.h> 47#endif /* INET6 */ 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/protosw.h> 51#include <sys/proc.h> 52#include <sys/jail.h> 53 54#include <net/if.h> 55#include <net/route.h> 56 57#include <netinet/in.h> 58#include <netinet/in_systm.h> 59#ifdef INET6 60#include <netinet/ip6.h> 61#endif 62#include <netinet/in_pcb.h> 63#ifdef INET6 64#include <netinet6/in6_pcb.h> 65#endif 66#include <netinet/in_var.h> 67#include <netinet/ip_var.h> 68#ifdef INET6 69#include <netinet6/ip6_var.h> 70#include <netinet6/scope6_var.h> 71#endif 72#include <netinet/tcp.h> 73#include <netinet/tcp_fsm.h> 74#include <netinet/tcp_seq.h> 75#include <netinet/tcp_timer.h> 76#include <netinet/tcp_var.h> 77#include <netinet/tcpip.h> 78#ifdef TCPDEBUG 79#include <netinet/tcp_debug.h> 80#endif 81 82/* 83 * TCP protocol interface to socket abstraction. 84 */ 85extern char *tcpstates[]; /* XXX ??? */ 86 87static int tcp_attach(struct socket *); 88static int tcp_connect(struct tcpcb *, struct sockaddr *, 89 struct thread *td); 90#ifdef INET6 91static int tcp6_connect(struct tcpcb *, struct sockaddr *, 92 struct thread *td); 93#endif /* INET6 */ 94static void tcp_disconnect(struct tcpcb *); 95static void tcp_usrclosed(struct tcpcb *); 96static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 97 98#ifdef TCPDEBUG 99#define TCPDEBUG0 int ostate = 0 100#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 101#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 102 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 103#else 104#define TCPDEBUG0 105#define TCPDEBUG1() 106#define TCPDEBUG2(req) 107#endif 108 109/* 110 * TCP attaches to socket via pru_attach(), reserving space, 111 * and an internet control block. 112 */ 113static int 114tcp_usr_attach(struct socket *so, int proto, struct thread *td) 115{ 116 struct inpcb *inp; 117 struct tcpcb *tp = NULL; 118 int error; 119 TCPDEBUG0; 120 121 inp = sotoinpcb(so); 122 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 123 INP_INFO_WLOCK(&tcbinfo); 124 TCPDEBUG1(); 125 126 error = tcp_attach(so); 127 if (error) 128 goto out; 129 130 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 131 so->so_linger = TCP_LINGERTIME; 132 133 inp = sotoinpcb(so); 134 tp = intotcpcb(inp); 135out: 136 TCPDEBUG2(PRU_ATTACH); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 return error; 139} 140 141/* 142 * pru_detach() detaches the TCP protocol from the socket. 143 * If the protocol state is non-embryonic, then can't 144 * do this directly: have to initiate a pru_disconnect(), 145 * which may finish later; embryonic TCB's can just 146 * be discarded here. 147 */ 148static void 149tcp_usr_detach(struct socket *so) 150{ 151 struct inpcb *inp; 152 struct tcpcb *tp; 153#ifdef INET6 154 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 155#endif 156 TCPDEBUG0; 157 158 inp = sotoinpcb(so); 159 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 160 INP_INFO_WLOCK(&tcbinfo); 161 INP_LOCK(inp); 162 KASSERT(inp->inp_socket != NULL, 163 ("tcp_usr_detach: inp_socket == NULL")); 164 TCPDEBUG1(); 165 166 /* 167 * First, if we still have full TCP state, and we're not dropped, 168 * initiate a disconnect. 169 */ 170 if (!(inp->inp_vflag & INP_TIMEWAIT) && 171 !(inp->inp_vflag & INP_DROPPED)) { 172 tp = intotcpcb(inp); 173 tcp_disconnect(tp); 174 } 175 176 /* 177 * Second, release any protocol state that we can reasonably release. 178 * Note that the call to tcp_disconnect() may actually have changed 179 * the TCP state, so we have to re-evaluate INP_TIMEWAIT and 180 * INP_DROPPED. 181 */ 182 if (inp->inp_vflag & INP_TIMEWAIT) { 183 if (inp->inp_vflag & INP_DROPPED) { 184 /* 185 * Connection was in time wait and has been dropped; 186 * the calling path is either via tcp_twclose(), or 187 * as a result of an eventual soclose() after 188 * tcp_twclose() has been called. In either case, 189 * tcp_twclose() has detached the tcptw from the 190 * inpcb, so we just detach and free the inpcb. 191 * 192 * XXXRW: Would it be cleaner to free the tcptw 193 * here? 194 */ 195#ifdef INET6 196 if (isipv6) { 197 in6_pcbdetach(inp); 198 in6_pcbfree(inp); 199 } else { 200#endif 201 in_pcbdetach(inp); 202 in_pcbfree(inp); 203#ifdef INET6 204 } 205#endif 206 } else { 207 /* 208 * Connection is in time wait and has not yet been 209 * dropped; allow the socket to be discarded, but 210 * need to keep inpcb until end of time wait. 211 */ 212#ifdef INET6 213 if (isipv6) 214 in6_pcbdetach(inp); 215 else 216#endif 217 in_pcbdetach(inp); 218 INP_UNLOCK(inp); 219 } 220 } else { 221 tp = intotcpcb(inp); 222 if (inp->inp_vflag & INP_DROPPED || 223 tp->t_state < TCPS_SYN_SENT) { 224 /* 225 * Connection has been dropped or is a listen socket, 226 * tear down all pcb state and allow socket to be 227 * freed. 228 */ 229 tcp_discardcb(tp); 230#ifdef INET6 231 if (isipv6) { 232 in_pcbdetach(inp); 233 in_pcbfree(inp); 234 } else { 235#endif 236 in_pcbdetach(inp); 237 in_pcbfree(inp); 238#ifdef INET6 239 } 240#endif 241 } else { 242 SOCK_LOCK(so); 243 so->so_state |= SS_PROTOREF; 244 SOCK_UNLOCK(so); 245 inp->inp_vflag |= INP_SOCKREF; 246 INP_UNLOCK(inp); 247 } 248 } 249 tp = NULL; 250 TCPDEBUG2(PRU_DETACH); 251 INP_INFO_WUNLOCK(&tcbinfo); 252} 253 254/* 255 * Give the socket an address. 256 */ 257static int 258tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 259{ 260 int error = 0; 261 struct inpcb *inp; 262 struct tcpcb *tp = NULL; 263 struct sockaddr_in *sinp; 264 265 sinp = (struct sockaddr_in *)nam; 266 if (nam->sa_len != sizeof (*sinp)) 267 return (EINVAL); 268 /* 269 * Must check for multicast addresses and disallow binding 270 * to them. 271 */ 272 if (sinp->sin_family == AF_INET && 273 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 274 return (EAFNOSUPPORT); 275 276 TCPDEBUG0; 277 INP_INFO_WLOCK(&tcbinfo); 278 inp = sotoinpcb(so); 279 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 280 INP_LOCK(inp); 281 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 282 error = EINVAL; 283 goto out; 284 } 285 tp = intotcpcb(inp); 286 TCPDEBUG1(); 287 error = in_pcbbind(inp, nam, td->td_ucred); 288out: 289 TCPDEBUG2(PRU_BIND); 290 INP_UNLOCK(inp); 291 INP_INFO_WUNLOCK(&tcbinfo); 292 293 return (error); 294} 295 296#ifdef INET6 297static int 298tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 299{ 300 int error = 0; 301 struct inpcb *inp; 302 struct tcpcb *tp = NULL; 303 struct sockaddr_in6 *sin6p; 304 305 sin6p = (struct sockaddr_in6 *)nam; 306 if (nam->sa_len != sizeof (*sin6p)) 307 return (EINVAL); 308 /* 309 * Must check for multicast addresses and disallow binding 310 * to them. 311 */ 312 if (sin6p->sin6_family == AF_INET6 && 313 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 314 return (EAFNOSUPPORT); 315 316 TCPDEBUG0; 317 INP_INFO_WLOCK(&tcbinfo); 318 inp = sotoinpcb(so); 319 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 320 INP_LOCK(inp); 321 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 322 error = EINVAL; 323 goto out; 324 } 325 tp = intotcpcb(inp); 326 TCPDEBUG1(); 327 inp->inp_vflag &= ~INP_IPV4; 328 inp->inp_vflag |= INP_IPV6; 329 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 330 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 331 inp->inp_vflag |= INP_IPV4; 332 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 333 struct sockaddr_in sin; 334 335 in6_sin6_2_sin(&sin, sin6p); 336 inp->inp_vflag |= INP_IPV4; 337 inp->inp_vflag &= ~INP_IPV6; 338 error = in_pcbbind(inp, (struct sockaddr *)&sin, 339 td->td_ucred); 340 goto out; 341 } 342 } 343 error = in6_pcbbind(inp, nam, td->td_ucred); 344out: 345 TCPDEBUG2(PRU_BIND); 346 INP_UNLOCK(inp); 347 INP_INFO_WUNLOCK(&tcbinfo); 348 return (error); 349} 350#endif /* INET6 */ 351 352/* 353 * Prepare to accept connections. 354 */ 355static int 356tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 357{ 358 int error = 0; 359 struct inpcb *inp; 360 struct tcpcb *tp = NULL; 361 362 TCPDEBUG0; 363 INP_INFO_WLOCK(&tcbinfo); 364 inp = sotoinpcb(so); 365 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 366 INP_LOCK(inp); 367 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 368 error = EINVAL; 369 goto out; 370 } 371 tp = intotcpcb(inp); 372 TCPDEBUG1(); 373 SOCK_LOCK(so); 374 error = solisten_proto_check(so); 375 if (error == 0 && inp->inp_lport == 0) 376 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 377 if (error == 0) { 378 tp->t_state = TCPS_LISTEN; 379 solisten_proto(so, backlog); 380 } 381 SOCK_UNLOCK(so); 382 383out: 384 TCPDEBUG2(PRU_LISTEN); 385 INP_UNLOCK(inp); 386 INP_INFO_WUNLOCK(&tcbinfo); 387 return (error); 388} 389 390#ifdef INET6 391static int 392tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 393{ 394 int error = 0; 395 struct inpcb *inp; 396 struct tcpcb *tp = NULL; 397 398 TCPDEBUG0; 399 INP_INFO_WLOCK(&tcbinfo); 400 inp = sotoinpcb(so); 401 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 402 INP_LOCK(inp); 403 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 404 error = EINVAL; 405 goto out; 406 } 407 tp = intotcpcb(inp); 408 TCPDEBUG1(); 409 SOCK_LOCK(so); 410 error = solisten_proto_check(so); 411 if (error == 0 && inp->inp_lport == 0) { 412 inp->inp_vflag &= ~INP_IPV4; 413 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 414 inp->inp_vflag |= INP_IPV4; 415 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 416 } 417 if (error == 0) { 418 tp->t_state = TCPS_LISTEN; 419 solisten_proto(so, backlog); 420 } 421 SOCK_UNLOCK(so); 422 423out: 424 TCPDEBUG2(PRU_LISTEN); 425 INP_UNLOCK(inp); 426 INP_INFO_WUNLOCK(&tcbinfo); 427 return (error); 428} 429#endif /* INET6 */ 430 431/* 432 * Initiate connection to peer. 433 * Create a template for use in transmissions on this connection. 434 * Enter SYN_SENT state, and mark socket as connecting. 435 * Start keep-alive timer, and seed output sequence space. 436 * Send initial segment on connection. 437 */ 438static int 439tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 440{ 441 int error = 0; 442 struct inpcb *inp; 443 struct tcpcb *tp = NULL; 444 struct sockaddr_in *sinp; 445 446 sinp = (struct sockaddr_in *)nam; 447 if (nam->sa_len != sizeof (*sinp)) 448 return (EINVAL); 449 /* 450 * Must disallow TCP ``connections'' to multicast addresses. 451 */ 452 if (sinp->sin_family == AF_INET 453 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 454 return (EAFNOSUPPORT); 455 if (jailed(td->td_ucred)) 456 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); 457 458 TCPDEBUG0; 459 INP_INFO_WLOCK(&tcbinfo); 460 inp = sotoinpcb(so); 461 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 462 INP_LOCK(inp); 463 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 464 error = EINVAL; 465 goto out; 466 } 467 tp = intotcpcb(inp); 468 TCPDEBUG1(); 469 if ((error = tcp_connect(tp, nam, td)) != 0) 470 goto out; 471 error = tcp_output(tp); 472out: 473 TCPDEBUG2(PRU_CONNECT); 474 INP_UNLOCK(inp); 475 INP_INFO_WUNLOCK(&tcbinfo); 476 return (error); 477} 478 479#ifdef INET6 480static int 481tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 482{ 483 int error = 0; 484 struct inpcb *inp; 485 struct tcpcb *tp = NULL; 486 struct sockaddr_in6 *sin6p; 487 488 TCPDEBUG0; 489 490 sin6p = (struct sockaddr_in6 *)nam; 491 if (nam->sa_len != sizeof (*sin6p)) 492 return (EINVAL); 493 /* 494 * Must disallow TCP ``connections'' to multicast addresses. 495 */ 496 if (sin6p->sin6_family == AF_INET6 497 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 498 return (EAFNOSUPPORT); 499 500 INP_INFO_WLOCK(&tcbinfo); 501 inp = sotoinpcb(so); 502 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 503 INP_LOCK(inp); 504 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 505 error = EINVAL; 506 goto out; 507 } 508 tp = intotcpcb(inp); 509 TCPDEBUG1(); 510 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 511 struct sockaddr_in sin; 512 513 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 514 error = EINVAL; 515 goto out; 516 } 517 518 in6_sin6_2_sin(&sin, sin6p); 519 inp->inp_vflag |= INP_IPV4; 520 inp->inp_vflag &= ~INP_IPV6; 521 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 522 goto out; 523 error = tcp_output(tp); 524 goto out; 525 } 526 inp->inp_vflag &= ~INP_IPV4; 527 inp->inp_vflag |= INP_IPV6; 528 inp->inp_inc.inc_isipv6 = 1; 529 if ((error = tcp6_connect(tp, nam, td)) != 0) 530 goto out; 531 error = tcp_output(tp); 532 533out: 534 TCPDEBUG2(PRU_CONNECT); 535 INP_UNLOCK(inp); 536 INP_INFO_WUNLOCK(&tcbinfo); 537 return (error); 538} 539#endif /* INET6 */ 540 541/* 542 * Initiate disconnect from peer. 543 * If connection never passed embryonic stage, just drop; 544 * else if don't need to let data drain, then can just drop anyways, 545 * else have to begin TCP shutdown process: mark socket disconnecting, 546 * drain unread data, state switch to reflect user close, and 547 * send segment (e.g. FIN) to peer. Socket will be really disconnected 548 * when peer sends FIN and acks ours. 549 * 550 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 551 */ 552static int 553tcp_usr_disconnect(struct socket *so) 554{ 555 struct inpcb *inp; 556 struct tcpcb *tp = NULL; 557 int error = 0; 558 559 TCPDEBUG0; 560 INP_INFO_WLOCK(&tcbinfo); 561 inp = sotoinpcb(so); 562 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 563 INP_LOCK(inp); 564 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 565 error = EINVAL; 566 goto out; 567 } 568 tp = intotcpcb(inp); 569 TCPDEBUG1(); 570 tcp_disconnect(tp); 571out: 572 TCPDEBUG2(PRU_DISCONNECT); 573 INP_UNLOCK(inp); 574 INP_INFO_WUNLOCK(&tcbinfo); 575 return (error); 576} 577 578/* 579 * Accept a connection. Essentially all the work is 580 * done at higher levels; just return the address 581 * of the peer, storing through addr. 582 */ 583static int 584tcp_usr_accept(struct socket *so, struct sockaddr **nam) 585{ 586 int error = 0; 587 struct inpcb *inp = NULL; 588 struct tcpcb *tp = NULL; 589 struct in_addr addr; 590 in_port_t port = 0; 591 TCPDEBUG0; 592 593 if (so->so_state & SS_ISDISCONNECTED) 594 return (ECONNABORTED); 595 596 inp = sotoinpcb(so); 597 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 598 INP_LOCK(inp); 599 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 600 error = ECONNABORTED; 601 goto out; 602 } 603 tp = intotcpcb(inp); 604 TCPDEBUG1(); 605 606 /* 607 * We inline in_setpeeraddr and COMMON_END here, so that we can 608 * copy the data of interest and defer the malloc until after we 609 * release the lock. 610 */ 611 port = inp->inp_fport; 612 addr = inp->inp_faddr; 613 614out: 615 TCPDEBUG2(PRU_ACCEPT); 616 INP_UNLOCK(inp); 617 if (error == 0) 618 *nam = in_sockaddr(port, &addr); 619 return error; 620} 621 622#ifdef INET6 623static int 624tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 625{ 626 struct inpcb *inp = NULL; 627 int error = 0; 628 struct tcpcb *tp = NULL; 629 struct in_addr addr; 630 struct in6_addr addr6; 631 in_port_t port = 0; 632 int v4 = 0; 633 TCPDEBUG0; 634 635 if (so->so_state & SS_ISDISCONNECTED) { 636 error = ECONNABORTED; 637 goto out; 638 } 639 640 inp = sotoinpcb(so); 641 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 642 INP_LOCK(inp); 643 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 644 error = EINVAL; 645 goto out; 646 } 647 tp = intotcpcb(inp); 648 TCPDEBUG1(); 649 650 /* 651 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 652 * copy the data of interest and defer the malloc until after we 653 * release the lock. 654 */ 655 if (inp->inp_vflag & INP_IPV4) { 656 v4 = 1; 657 port = inp->inp_fport; 658 addr = inp->inp_faddr; 659 } else { 660 port = inp->inp_fport; 661 addr6 = inp->in6p_faddr; 662 } 663 664out: 665 TCPDEBUG2(PRU_ACCEPT); 666 INP_UNLOCK(inp); 667 if (error == 0) { 668 if (v4) 669 *nam = in6_v4mapsin6_sockaddr(port, &addr); 670 else 671 *nam = in6_sockaddr(port, &addr6); 672 } 673 return error; 674} 675#endif /* INET6 */ 676 677/* 678 * This is the wrapper function for in_setsockaddr. We just pass down 679 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking 680 * here because in_setsockaddr will call malloc and can block. 681 */ 682static int 683tcp_sockaddr(struct socket *so, struct sockaddr **nam) 684{ 685 return (in_setsockaddr(so, nam, &tcbinfo)); 686} 687 688/* 689 * This is the wrapper function for in_setpeeraddr. We just pass down 690 * the pcbinfo for in_setpeeraddr to lock. 691 */ 692static int 693tcp_peeraddr(struct socket *so, struct sockaddr **nam) 694{ 695 return (in_setpeeraddr(so, nam, &tcbinfo)); 696} 697 698/* 699 * Mark the connection as being incapable of further output. 700 */ 701static int 702tcp_usr_shutdown(struct socket *so) 703{ 704 int error = 0; 705 struct inpcb *inp; 706 struct tcpcb *tp = NULL; 707 708 TCPDEBUG0; 709 INP_INFO_WLOCK(&tcbinfo); 710 inp = sotoinpcb(so); 711 KASSERT(inp != NULL, ("inp == NULL")); 712 INP_LOCK(inp); 713 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 714 error = EINVAL; 715 goto out; 716 } 717 tp = intotcpcb(inp); 718 TCPDEBUG1(); 719 socantsendmore(so); 720 tcp_usrclosed(tp); 721 error = tcp_output(tp); 722 723out: 724 TCPDEBUG2(PRU_SHUTDOWN); 725 INP_UNLOCK(inp); 726 INP_INFO_WUNLOCK(&tcbinfo); 727 728 return (error); 729} 730 731/* 732 * After a receive, possibly send window update to peer. 733 */ 734static int 735tcp_usr_rcvd(struct socket *so, int flags) 736{ 737 struct inpcb *inp; 738 struct tcpcb *tp = NULL; 739 int error = 0; 740 741 TCPDEBUG0; 742 inp = sotoinpcb(so); 743 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 744 INP_LOCK(inp); 745 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 746 error = EINVAL; 747 goto out; 748 } 749 tp = intotcpcb(inp); 750 TCPDEBUG1(); 751 tcp_output(tp); 752 753out: 754 TCPDEBUG2(PRU_RCVD); 755 INP_UNLOCK(inp); 756 return (error); 757} 758 759/* 760 * Do a send by putting data in output queue and updating urgent 761 * marker if URG set. Possibly send more data. Unlike the other 762 * pru_*() routines, the mbuf chains are our responsibility. We 763 * must either enqueue them or free them. The other pru_* routines 764 * generally are caller-frees. 765 */ 766static int 767tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 768 struct sockaddr *nam, struct mbuf *control, struct thread *td) 769{ 770 int error = 0; 771 struct inpcb *inp; 772 struct tcpcb *tp = NULL; 773 int headlocked = 0; 774#ifdef INET6 775 int isipv6; 776#endif 777 TCPDEBUG0; 778 779 /* 780 * We require the pcbinfo lock in two cases: 781 * 782 * (1) An implied connect is taking place, which can result in 783 * binding IPs and ports and hence modification of the pcb hash 784 * chains. 785 * 786 * (2) PRUS_EOF is set, resulting in explicit close on the send. 787 */ 788 if ((nam != NULL) || (flags & PRUS_EOF)) { 789 INP_INFO_WLOCK(&tcbinfo); 790 headlocked = 1; 791 } 792 inp = sotoinpcb(so); 793 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 794 INP_LOCK(inp); 795 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 796 error = EINVAL; 797 goto out; 798 } 799#ifdef INET6 800 isipv6 = nam && nam->sa_family == AF_INET6; 801#endif /* INET6 */ 802 tp = intotcpcb(inp); 803 TCPDEBUG1(); 804 if (control) { 805 /* TCP doesn't do control messages (rights, creds, etc) */ 806 if (control->m_len) { 807 m_freem(control); 808 if (m) 809 m_freem(m); 810 error = EINVAL; 811 goto out; 812 } 813 m_freem(control); /* empty control, just free it */ 814 } 815 if (!(flags & PRUS_OOB)) { 816 sbappendstream(&so->so_snd, m); 817 if (nam && tp->t_state < TCPS_SYN_SENT) { 818 /* 819 * Do implied connect if not yet connected, 820 * initialize window to default value, and 821 * initialize maxseg/maxopd using peer's cached 822 * MSS. 823 */ 824 INP_INFO_WLOCK_ASSERT(&tcbinfo); 825#ifdef INET6 826 if (isipv6) 827 error = tcp6_connect(tp, nam, td); 828 else 829#endif /* INET6 */ 830 error = tcp_connect(tp, nam, td); 831 if (error) 832 goto out; 833 tp->snd_wnd = TTCP_CLIENT_SND_WND; 834 tcp_mss(tp, -1); 835 } 836 if (flags & PRUS_EOF) { 837 /* 838 * Close the send side of the connection after 839 * the data is sent. 840 */ 841 INP_INFO_WLOCK_ASSERT(&tcbinfo); 842 socantsendmore(so); 843 tcp_usrclosed(tp); 844 } 845 if (headlocked) { 846 INP_INFO_WUNLOCK(&tcbinfo); 847 headlocked = 0; 848 } 849 if (tp != NULL) { 850 if (flags & PRUS_MORETOCOME) 851 tp->t_flags |= TF_MORETOCOME; 852 error = tcp_output(tp); 853 if (flags & PRUS_MORETOCOME) 854 tp->t_flags &= ~TF_MORETOCOME; 855 } 856 } else { 857 /* 858 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 859 */ 860 SOCKBUF_LOCK(&so->so_snd); 861 if (sbspace(&so->so_snd) < -512) { 862 SOCKBUF_UNLOCK(&so->so_snd); 863 m_freem(m); 864 error = ENOBUFS; 865 goto out; 866 } 867 /* 868 * According to RFC961 (Assigned Protocols), 869 * the urgent pointer points to the last octet 870 * of urgent data. We continue, however, 871 * to consider it to indicate the first octet 872 * of data past the urgent section. 873 * Otherwise, snd_up should be one lower. 874 */ 875 sbappendstream_locked(&so->so_snd, m); 876 SOCKBUF_UNLOCK(&so->so_snd); 877 if (nam && tp->t_state < TCPS_SYN_SENT) { 878 /* 879 * Do implied connect if not yet connected, 880 * initialize window to default value, and 881 * initialize maxseg/maxopd using peer's cached 882 * MSS. 883 */ 884 INP_INFO_WLOCK_ASSERT(&tcbinfo); 885#ifdef INET6 886 if (isipv6) 887 error = tcp6_connect(tp, nam, td); 888 else 889#endif /* INET6 */ 890 error = tcp_connect(tp, nam, td); 891 if (error) 892 goto out; 893 tp->snd_wnd = TTCP_CLIENT_SND_WND; 894 tcp_mss(tp, -1); 895 INP_INFO_WUNLOCK(&tcbinfo); 896 headlocked = 0; 897 } else if (nam) { 898 INP_INFO_WUNLOCK(&tcbinfo); 899 headlocked = 0; 900 } 901 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 902 tp->t_flags |= TF_FORCEDATA; 903 error = tcp_output(tp); 904 tp->t_flags &= ~TF_FORCEDATA; 905 } 906out: 907 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 908 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 909 INP_UNLOCK(inp); 910 if (headlocked) 911 INP_INFO_WUNLOCK(&tcbinfo); 912 return (error); 913} 914 915/* 916 * Abort the TCP. 917 */ 918static void 919tcp_usr_abort(struct socket *so) 920{ 921#if 0 922 struct inpcb *inp; 923 struct tcpcb *tp; 924#endif 925 926 /* 927 * XXXRW: This is not really quite the same, as we want to tcp_drop() 928 * rather than tcp_disconnect(), I think, but for now I'll avoid 929 * replicating all the tear-down logic here. 930 */ 931 tcp_usr_detach(so); 932 933#if 0 934 TCPDEBUG0; 935 INP_INFO_WLOCK(&tcbinfo); 936 inp = sotoinpcb(so); 937 INP_LOCK(inp); 938 /* 939 * Do we need to handle timewait here? Aborted connections should 940 * never generate a FIN? 941 */ 942 KASSERT((inp->inp_vflag & INP_TIMEWAIT) == 0, 943 ("tcp_usr_abort: timewait")); 944 tp = intotcpcb(inp); 945 TCPDEBUG1(); 946 tp = tcp_drop(tp, ECONNABORTED); 947 TCPDEBUG2(PRU_ABORT); 948 if (tp != NULL) 949 INP_UNLOCK(inp); 950 INP_INFO_WUNLOCK(&tcbinfo); 951#endif 952} 953 954/* 955 * Receive out-of-band data. 956 */ 957static int 958tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 959{ 960 int error = 0; 961 struct inpcb *inp; 962 struct tcpcb *tp = NULL; 963 964 TCPDEBUG0; 965 inp = sotoinpcb(so); 966 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 967 INP_LOCK(inp); 968 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 969 error = EINVAL; 970 goto out; 971 } 972 tp = intotcpcb(inp); 973 TCPDEBUG1(); 974 if ((so->so_oobmark == 0 && 975 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 976 so->so_options & SO_OOBINLINE || 977 tp->t_oobflags & TCPOOB_HADDATA) { 978 error = EINVAL; 979 goto out; 980 } 981 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 982 error = EWOULDBLOCK; 983 goto out; 984 } 985 m->m_len = 1; 986 *mtod(m, caddr_t) = tp->t_iobc; 987 if ((flags & MSG_PEEK) == 0) 988 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 989 990out: 991 TCPDEBUG2(PRU_RCVOOB); 992 INP_UNLOCK(inp); 993 return (error); 994} 995 996struct pr_usrreqs tcp_usrreqs = { 997 .pru_abort = tcp_usr_abort, 998 .pru_accept = tcp_usr_accept, 999 .pru_attach = tcp_usr_attach, 1000 .pru_bind = tcp_usr_bind, 1001 .pru_connect = tcp_usr_connect, 1002 .pru_control = in_control, 1003 .pru_detach = tcp_usr_detach, 1004 .pru_disconnect = tcp_usr_disconnect, 1005 .pru_listen = tcp_usr_listen, 1006 .pru_peeraddr = tcp_peeraddr, 1007 .pru_rcvd = tcp_usr_rcvd, 1008 .pru_rcvoob = tcp_usr_rcvoob, 1009 .pru_send = tcp_usr_send, 1010 .pru_shutdown = tcp_usr_shutdown, 1011 .pru_sockaddr = tcp_sockaddr, 1012 .pru_sosetlabel = in_pcbsosetlabel 1013}; 1014 1015#ifdef INET6 1016struct pr_usrreqs tcp6_usrreqs = { 1017 .pru_abort = tcp_usr_abort, 1018 .pru_accept = tcp6_usr_accept, 1019 .pru_attach = tcp_usr_attach, 1020 .pru_bind = tcp6_usr_bind, 1021 .pru_connect = tcp6_usr_connect, 1022 .pru_control = in6_control, 1023 .pru_detach = tcp_usr_detach, 1024 .pru_disconnect = tcp_usr_disconnect, 1025 .pru_listen = tcp6_usr_listen, 1026 .pru_peeraddr = in6_mapped_peeraddr, 1027 .pru_rcvd = tcp_usr_rcvd, 1028 .pru_rcvoob = tcp_usr_rcvoob, 1029 .pru_send = tcp_usr_send, 1030 .pru_shutdown = tcp_usr_shutdown, 1031 .pru_sockaddr = in6_mapped_sockaddr, 1032 .pru_sosetlabel = in_pcbsosetlabel 1033}; 1034#endif /* INET6 */ 1035 1036/* 1037 * Common subroutine to open a TCP connection to remote host specified 1038 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1039 * port number if needed. Call in_pcbconnect_setup to do the routing and 1040 * to choose a local host address (interface). If there is an existing 1041 * incarnation of the same connection in TIME-WAIT state and if the remote 1042 * host was sending CC options and if the connection duration was < MSL, then 1043 * truncate the previous TIME-WAIT state and proceed. 1044 * Initialize connection parameters and enter SYN-SENT state. 1045 */ 1046static int 1047tcp_connect(tp, nam, td) 1048 register struct tcpcb *tp; 1049 struct sockaddr *nam; 1050 struct thread *td; 1051{ 1052 struct inpcb *inp = tp->t_inpcb, *oinp; 1053 struct socket *so = inp->inp_socket; 1054 struct in_addr laddr; 1055 u_short lport; 1056 int error; 1057 1058 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1059 INP_LOCK_ASSERT(inp); 1060 1061 if (inp->inp_lport == 0) { 1062 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1063 if (error) 1064 return error; 1065 } 1066 1067 /* 1068 * Cannot simply call in_pcbconnect, because there might be an 1069 * earlier incarnation of this same connection still in 1070 * TIME_WAIT state, creating an ADDRINUSE error. 1071 */ 1072 laddr = inp->inp_laddr; 1073 lport = inp->inp_lport; 1074 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1075 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1076 if (error && oinp == NULL) 1077 return error; 1078 if (oinp) 1079 return EADDRINUSE; 1080 inp->inp_laddr = laddr; 1081 in_pcbrehash(inp); 1082 1083 /* Compute window scaling to request. */ 1084 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1085 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1086 tp->request_r_scale++; 1087 1088 soisconnecting(so); 1089 tcpstat.tcps_connattempt++; 1090 tp->t_state = TCPS_SYN_SENT; 1091 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1092 tp->iss = tcp_new_isn(tp); 1093 tp->t_bw_rtseq = tp->iss; 1094 tcp_sendseqinit(tp); 1095 1096 return 0; 1097} 1098 1099#ifdef INET6 1100static int 1101tcp6_connect(tp, nam, td) 1102 register struct tcpcb *tp; 1103 struct sockaddr *nam; 1104 struct thread *td; 1105{ 1106 struct inpcb *inp = tp->t_inpcb, *oinp; 1107 struct socket *so = inp->inp_socket; 1108 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1109 struct in6_addr *addr6; 1110 int error; 1111 1112 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1113 INP_LOCK_ASSERT(inp); 1114 1115 if (inp->inp_lport == 0) { 1116 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1117 if (error) 1118 return error; 1119 } 1120 1121 /* 1122 * Cannot simply call in_pcbconnect, because there might be an 1123 * earlier incarnation of this same connection still in 1124 * TIME_WAIT state, creating an ADDRINUSE error. 1125 * in6_pcbladdr() also handles scope zone IDs. 1126 */ 1127 error = in6_pcbladdr(inp, nam, &addr6); 1128 if (error) 1129 return error; 1130 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1131 &sin6->sin6_addr, sin6->sin6_port, 1132 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1133 ? addr6 1134 : &inp->in6p_laddr, 1135 inp->inp_lport, 0, NULL); 1136 if (oinp) 1137 return EADDRINUSE; 1138 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1139 inp->in6p_laddr = *addr6; 1140 inp->in6p_faddr = sin6->sin6_addr; 1141 inp->inp_fport = sin6->sin6_port; 1142 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 1143 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 1144 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 1145 inp->in6p_flowinfo |= 1146 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1147 in_pcbrehash(inp); 1148 1149 /* Compute window scaling to request. */ 1150 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1151 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1152 tp->request_r_scale++; 1153 1154 soisconnecting(so); 1155 tcpstat.tcps_connattempt++; 1156 tp->t_state = TCPS_SYN_SENT; 1157 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1158 tp->iss = tcp_new_isn(tp); 1159 tp->t_bw_rtseq = tp->iss; 1160 tcp_sendseqinit(tp); 1161 1162 return 0; 1163} 1164#endif /* INET6 */ 1165 1166/* 1167 * Export TCP internal state information via a struct tcp_info, based on the 1168 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1169 * (TCP state machine, etc). We export all information using FreeBSD-native 1170 * constants -- for example, the numeric values for tcpi_state will differ 1171 * from Linux. 1172 */ 1173static void 1174tcp_fill_info(tp, ti) 1175 struct tcpcb *tp; 1176 struct tcp_info *ti; 1177{ 1178 1179 INP_LOCK_ASSERT(tp->t_inpcb); 1180 bzero(ti, sizeof(*ti)); 1181 1182 ti->tcpi_state = tp->t_state; 1183 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1184 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1185 if (tp->sack_enable) 1186 ti->tcpi_options |= TCPI_OPT_SACK; 1187 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1188 ti->tcpi_options |= TCPI_OPT_WSCALE; 1189 ti->tcpi_snd_wscale = tp->snd_scale; 1190 ti->tcpi_rcv_wscale = tp->rcv_scale; 1191 } 1192 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1193 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1194 1195 /* 1196 * FreeBSD-specific extension fields for tcp_info. 1197 */ 1198 ti->tcpi_rcv_space = tp->rcv_wnd; 1199 ti->tcpi_snd_wnd = tp->snd_wnd; 1200 ti->tcpi_snd_bwnd = tp->snd_bwnd; 1201} 1202 1203/* 1204 * The new sockopt interface makes it possible for us to block in the 1205 * copyin/out step (if we take a page fault). Taking a page fault at 1206 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1207 * use TSM, there probably isn't any need for this function to run at 1208 * splnet() any more. This needs more examination.) 1209 * 1210 * XXXRW: The locking here is wrong; we may take a page fault while holding 1211 * the inpcb lock. 1212 */ 1213int 1214tcp_ctloutput(so, sopt) 1215 struct socket *so; 1216 struct sockopt *sopt; 1217{ 1218 int error, opt, optval; 1219 struct inpcb *inp; 1220 struct tcpcb *tp; 1221 struct tcp_info ti; 1222 1223 error = 0; 1224 inp = sotoinpcb(so); 1225 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1226 INP_LOCK(inp); 1227 if (sopt->sopt_level != IPPROTO_TCP) { 1228 INP_UNLOCK(inp); 1229#ifdef INET6 1230 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1231 error = ip6_ctloutput(so, sopt); 1232 else 1233#endif /* INET6 */ 1234 error = ip_ctloutput(so, sopt); 1235 return (error); 1236 } 1237 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 1238 error = ECONNRESET; 1239 goto out; 1240 } 1241 tp = intotcpcb(inp); 1242 1243 switch (sopt->sopt_dir) { 1244 case SOPT_SET: 1245 switch (sopt->sopt_name) { 1246#ifdef TCP_SIGNATURE 1247 case TCP_MD5SIG: 1248 error = sooptcopyin(sopt, &optval, sizeof optval, 1249 sizeof optval); 1250 if (error) 1251 break; 1252 1253 if (optval > 0) 1254 tp->t_flags |= TF_SIGNATURE; 1255 else 1256 tp->t_flags &= ~TF_SIGNATURE; 1257 break; 1258#endif /* TCP_SIGNATURE */ 1259 case TCP_NODELAY: 1260 case TCP_NOOPT: 1261 error = sooptcopyin(sopt, &optval, sizeof optval, 1262 sizeof optval); 1263 if (error) 1264 break; 1265 1266 switch (sopt->sopt_name) { 1267 case TCP_NODELAY: 1268 opt = TF_NODELAY; 1269 break; 1270 case TCP_NOOPT: 1271 opt = TF_NOOPT; 1272 break; 1273 default: 1274 opt = 0; /* dead code to fool gcc */ 1275 break; 1276 } 1277 1278 if (optval) 1279 tp->t_flags |= opt; 1280 else 1281 tp->t_flags &= ~opt; 1282 break; 1283 1284 case TCP_NOPUSH: 1285 error = sooptcopyin(sopt, &optval, sizeof optval, 1286 sizeof optval); 1287 if (error) 1288 break; 1289 1290 if (optval) 1291 tp->t_flags |= TF_NOPUSH; 1292 else { 1293 tp->t_flags &= ~TF_NOPUSH; 1294 error = tcp_output(tp); 1295 } 1296 break; 1297 1298 case TCP_MAXSEG: 1299 error = sooptcopyin(sopt, &optval, sizeof optval, 1300 sizeof optval); 1301 if (error) 1302 break; 1303 1304 if (optval > 0 && optval <= tp->t_maxseg && 1305 optval + 40 >= tcp_minmss) 1306 tp->t_maxseg = optval; 1307 else 1308 error = EINVAL; 1309 break; 1310 1311 case TCP_INFO: 1312 error = EINVAL; 1313 break; 1314 1315 default: 1316 error = ENOPROTOOPT; 1317 break; 1318 } 1319 break; 1320 1321 case SOPT_GET: 1322 switch (sopt->sopt_name) { 1323#ifdef TCP_SIGNATURE 1324 case TCP_MD5SIG: 1325 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1326 error = sooptcopyout(sopt, &optval, sizeof optval); 1327 break; 1328#endif 1329 case TCP_NODELAY: 1330 optval = tp->t_flags & TF_NODELAY; 1331 error = sooptcopyout(sopt, &optval, sizeof optval); 1332 break; 1333 case TCP_MAXSEG: 1334 optval = tp->t_maxseg; 1335 error = sooptcopyout(sopt, &optval, sizeof optval); 1336 break; 1337 case TCP_NOOPT: 1338 optval = tp->t_flags & TF_NOOPT; 1339 error = sooptcopyout(sopt, &optval, sizeof optval); 1340 break; 1341 case TCP_NOPUSH: 1342 optval = tp->t_flags & TF_NOPUSH; 1343 error = sooptcopyout(sopt, &optval, sizeof optval); 1344 break; 1345 case TCP_INFO: 1346 tcp_fill_info(tp, &ti); 1347 error = sooptcopyout(sopt, &ti, sizeof ti); 1348 break; 1349 default: 1350 error = ENOPROTOOPT; 1351 break; 1352 } 1353 break; 1354 } 1355out: 1356 INP_UNLOCK(inp); 1357 return (error); 1358} 1359 1360/* 1361 * tcp_sendspace and tcp_recvspace are the default send and receive window 1362 * sizes, respectively. These are obsolescent (this information should 1363 * be set by the route). 1364 */ 1365u_long tcp_sendspace = 1024*32; 1366SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1367 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1368u_long tcp_recvspace = 1024*64; 1369SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1370 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1371 1372/* 1373 * Attach TCP protocol to socket, allocating 1374 * internet protocol control block, tcp control block, 1375 * bufer space, and entering LISTEN state if to accept connections. 1376 */ 1377static int 1378tcp_attach(so) 1379 struct socket *so; 1380{ 1381 register struct tcpcb *tp; 1382 struct inpcb *inp; 1383 int error; 1384#ifdef INET6 1385 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1386#endif 1387 1388 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1389 1390 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1391 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1392 if (error) 1393 return (error); 1394 } 1395 error = in_pcballoc(so, &tcbinfo, "tcpinp"); 1396 if (error) 1397 return (error); 1398 inp = sotoinpcb(so); 1399#ifdef INET6 1400 if (isipv6) { 1401 inp->inp_vflag |= INP_IPV6; 1402 inp->in6p_hops = -1; /* use kernel default */ 1403 } 1404 else 1405#endif 1406 inp->inp_vflag |= INP_IPV4; 1407 tp = tcp_newtcpcb(inp); 1408 if (tp == NULL) { 1409 INP_LOCK(inp); 1410#ifdef INET6 1411 if (isipv6) { 1412 in6_pcbdetach(inp); 1413 in6_pcbfree(inp); 1414 } else { 1415#endif 1416 in_pcbdetach(inp); 1417 in_pcbfree(inp); 1418#ifdef INET6 1419 } 1420#endif 1421 return (ENOBUFS); 1422 } 1423 tp->t_state = TCPS_CLOSED; 1424 return (0); 1425} 1426 1427/* 1428 * Initiate (or continue) disconnect. 1429 * If embryonic state, just send reset (once). 1430 * If in ``let data drain'' option and linger null, just drop. 1431 * Otherwise (hard), mark socket disconnecting and drop 1432 * current input data; switch states based on user close, and 1433 * send segment to peer (with FIN). 1434 */ 1435static void 1436tcp_disconnect(tp) 1437 register struct tcpcb *tp; 1438{ 1439 struct inpcb *inp = tp->t_inpcb; 1440 struct socket *so = inp->inp_socket; 1441 1442 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1443 INP_LOCK_ASSERT(inp); 1444 1445 /* 1446 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1447 * socket is still open. 1448 */ 1449 if (tp->t_state < TCPS_ESTABLISHED) { 1450 tp = tcp_close(tp); 1451 KASSERT(tp != NULL, 1452 ("tcp_disconnect: tcp_close() returned NULL")); 1453 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1454 tp = tcp_drop(tp, 0); 1455 KASSERT(tp != NULL, 1456 ("tcp_disconnect: tcp_drop() returned NULL")); 1457 } else { 1458 soisdisconnecting(so); 1459 sbflush(&so->so_rcv); 1460 tcp_usrclosed(tp); 1461 if (!(inp->inp_vflag & INP_DROPPED)) 1462 tcp_output(tp); 1463 } 1464} 1465 1466/* 1467 * User issued close, and wish to trail through shutdown states: 1468 * if never received SYN, just forget it. If got a SYN from peer, 1469 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1470 * If already got a FIN from peer, then almost done; go to LAST_ACK 1471 * state. In all other cases, have already sent FIN to peer (e.g. 1472 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1473 * for peer to send FIN or not respond to keep-alives, etc. 1474 * We can let the user exit from the close as soon as the FIN is acked. 1475 */ 1476static void 1477tcp_usrclosed(tp) 1478 register struct tcpcb *tp; 1479{ 1480 1481 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1482 INP_LOCK_ASSERT(tp->t_inpcb); 1483 1484 switch (tp->t_state) { 1485 1486 case TCPS_CLOSED: 1487 case TCPS_LISTEN: 1488 tp->t_state = TCPS_CLOSED; 1489 tp = tcp_close(tp); 1490 /* 1491 * tcp_close() should never return NULL here as the socket is 1492 * still open. 1493 */ 1494 KASSERT(tp != NULL, 1495 ("tcp_usrclosed: tcp_close() returned NULL")); 1496 break; 1497 1498 case TCPS_SYN_SENT: 1499 case TCPS_SYN_RECEIVED: 1500 tp->t_flags |= TF_NEEDFIN; 1501 break; 1502 1503 case TCPS_ESTABLISHED: 1504 tp->t_state = TCPS_FIN_WAIT_1; 1505 break; 1506 1507 case TCPS_CLOSE_WAIT: 1508 tp->t_state = TCPS_LAST_ACK; 1509 break; 1510 } 1511 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1512 soisdisconnected(tp->t_inpcb->inp_socket); 1513 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1514 if (tp->t_state == TCPS_FIN_WAIT_2) 1515 callout_reset(tp->tt_2msl, tcp_maxidle, 1516 tcp_timer_2msl, tp); 1517 } 1518} 1519