tcp_usrreq.c revision 157386
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 32 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 157386 2006-04-01 23:53:25Z rwatson $ 33 */ 34 35#include "opt_inet.h" 36#include "opt_inet6.h" 37#include "opt_tcpdebug.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/malloc.h> 42#include <sys/kernel.h> 43#include <sys/sysctl.h> 44#include <sys/mbuf.h> 45#ifdef INET6 46#include <sys/domain.h> 47#endif /* INET6 */ 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/protosw.h> 51#include <sys/proc.h> 52#include <sys/jail.h> 53 54#include <net/if.h> 55#include <net/route.h> 56 57#include <netinet/in.h> 58#include <netinet/in_systm.h> 59#ifdef INET6 60#include <netinet/ip6.h> 61#endif 62#include <netinet/in_pcb.h> 63#ifdef INET6 64#include <netinet6/in6_pcb.h> 65#endif 66#include <netinet/in_var.h> 67#include <netinet/ip_var.h> 68#ifdef INET6 69#include <netinet6/ip6_var.h> 70#include <netinet6/scope6_var.h> 71#endif 72#include <netinet/tcp.h> 73#include <netinet/tcp_fsm.h> 74#include <netinet/tcp_seq.h> 75#include <netinet/tcp_timer.h> 76#include <netinet/tcp_var.h> 77#include <netinet/tcpip.h> 78#ifdef TCPDEBUG 79#include <netinet/tcp_debug.h> 80#endif 81 82/* 83 * TCP protocol interface to socket abstraction. 84 */ 85extern char *tcpstates[]; /* XXX ??? */ 86 87static int tcp_attach(struct socket *); 88static int tcp_connect(struct tcpcb *, struct sockaddr *, 89 struct thread *td); 90#ifdef INET6 91static int tcp6_connect(struct tcpcb *, struct sockaddr *, 92 struct thread *td); 93#endif /* INET6 */ 94static void tcp_disconnect(struct tcpcb *); 95static void tcp_usrclosed(struct tcpcb *); 96static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 97 98#ifdef TCPDEBUG 99#define TCPDEBUG0 int ostate = 0 100#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 101#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 102 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 103#else 104#define TCPDEBUG0 105#define TCPDEBUG1() 106#define TCPDEBUG2(req) 107#endif 108 109/* 110 * TCP attaches to socket via pru_attach(), reserving space, 111 * and an internet control block. 112 */ 113static int 114tcp_usr_attach(struct socket *so, int proto, struct thread *td) 115{ 116 struct inpcb *inp; 117 struct tcpcb *tp = NULL; 118 int error; 119 TCPDEBUG0; 120 121 inp = sotoinpcb(so); 122 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 123 INP_INFO_WLOCK(&tcbinfo); 124 TCPDEBUG1(); 125 126 error = tcp_attach(so); 127 if (error) 128 goto out; 129 130 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 131 so->so_linger = TCP_LINGERTIME; 132 133 inp = sotoinpcb(so); 134 tp = intotcpcb(inp); 135out: 136 TCPDEBUG2(PRU_ATTACH); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 return error; 139} 140 141/* 142 * pru_detach() detaches the TCP protocol from the socket. 143 * If the protocol state is non-embryonic, then can't 144 * do this directly: have to initiate a pru_disconnect(), 145 * which may finish later; embryonic TCB's can just 146 * be discarded here. 147 */ 148static void 149tcp_usr_detach(struct socket *so) 150{ 151 struct inpcb *inp; 152 struct tcpcb *tp; 153#ifdef INET6 154 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 155#endif 156 TCPDEBUG0; 157 158 inp = sotoinpcb(so); 159 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 160 INP_INFO_WLOCK(&tcbinfo); 161 INP_LOCK(inp); 162 KASSERT(inp->inp_socket != NULL, 163 ("tcp_usr_detach: inp_socket == NULL")); 164 165 TCPDEBUG1(); 166 tp = intotcpcb(inp); 167 168 if (inp->inp_vflag & INP_TIMEWAIT) { 169 if (inp->inp_vflag & INP_DROPPED) { 170 /* 171 * Connection was in time wait and has been dropped; 172 * the calling path is either via tcp_twclose(), or 173 * as a result of an eventual soclose() after 174 * tcp_twclose() has been called. In either case, 175 * tcp_twclose() has detached the tcptw from the 176 * inpcb, so we just detach and free the inpcb. 177 * 178 * XXXRW: Would it be cleaner to free the tcptw 179 * here? 180 */ 181#ifdef INET6 182 if (isipv6) { 183 in6_pcbdetach(inp); 184 in6_pcbfree(inp); 185 } else { 186#endif 187 in_pcbdetach(inp); 188 in_pcbfree(inp); 189#ifdef INET6 190 } 191#endif 192 } else { 193 /* 194 * Connection is in time wait and has not yet been 195 * dropped; allow the socket to be discarded, but 196 * need to keep inpcb until end of time wait. 197 */ 198#ifdef INET6 199 if (isipv6) 200 in6_pcbdetach(inp); 201 else 202#endif 203 in_pcbdetach(inp); 204 INP_UNLOCK(inp); 205 } 206 } else { 207 tp = intotcpcb(inp); 208 if (inp->inp_vflag & INP_DROPPED || 209 tp->t_state < TCPS_SYN_SENT) { 210 /* 211 * Connection has been dropped or is a listen socket, 212 * tear down all pcb state and allow socket to be 213 * freed. 214 */ 215 tcp_discardcb(tp); 216#ifdef INET6 217 if (isipv6) { 218 in_pcbdetach(inp); 219 in_pcbfree(inp); 220 } else { 221#endif 222 in_pcbdetach(inp); 223 in_pcbfree(inp); 224#ifdef INET6 225 } 226#endif 227 } else { 228 /* 229 * Connection state still required, as is socket, so 230 * mark socket for TCP to free later. 231 */ 232 SOCK_LOCK(so); 233 so->so_state |= SS_PROTOREF; 234 SOCK_UNLOCK(so); 235 inp->inp_vflag |= INP_SOCKREF; 236 INP_UNLOCK(inp); 237 } 238 } 239 tp = NULL; 240 TCPDEBUG2(PRU_DETACH); 241 INP_INFO_WUNLOCK(&tcbinfo); 242} 243 244/* 245 * Give the socket an address. 246 */ 247static int 248tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 249{ 250 int error = 0; 251 struct inpcb *inp; 252 struct tcpcb *tp = NULL; 253 struct sockaddr_in *sinp; 254 255 sinp = (struct sockaddr_in *)nam; 256 if (nam->sa_len != sizeof (*sinp)) 257 return (EINVAL); 258 /* 259 * Must check for multicast addresses and disallow binding 260 * to them. 261 */ 262 if (sinp->sin_family == AF_INET && 263 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 264 return (EAFNOSUPPORT); 265 266 TCPDEBUG0; 267 INP_INFO_WLOCK(&tcbinfo); 268 inp = sotoinpcb(so); 269 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 270 INP_LOCK(inp); 271 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 272 error = EINVAL; 273 goto out; 274 } 275 tp = intotcpcb(inp); 276 TCPDEBUG1(); 277 error = in_pcbbind(inp, nam, td->td_ucred); 278out: 279 TCPDEBUG2(PRU_BIND); 280 INP_UNLOCK(inp); 281 INP_INFO_WUNLOCK(&tcbinfo); 282 283 return (error); 284} 285 286#ifdef INET6 287static int 288tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 289{ 290 int error = 0; 291 struct inpcb *inp; 292 struct tcpcb *tp = NULL; 293 struct sockaddr_in6 *sin6p; 294 295 sin6p = (struct sockaddr_in6 *)nam; 296 if (nam->sa_len != sizeof (*sin6p)) 297 return (EINVAL); 298 /* 299 * Must check for multicast addresses and disallow binding 300 * to them. 301 */ 302 if (sin6p->sin6_family == AF_INET6 && 303 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 304 return (EAFNOSUPPORT); 305 306 TCPDEBUG0; 307 INP_INFO_WLOCK(&tcbinfo); 308 inp = sotoinpcb(so); 309 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 310 INP_LOCK(inp); 311 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 312 error = EINVAL; 313 goto out; 314 } 315 tp = intotcpcb(inp); 316 TCPDEBUG1(); 317 inp->inp_vflag &= ~INP_IPV4; 318 inp->inp_vflag |= INP_IPV6; 319 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 320 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 321 inp->inp_vflag |= INP_IPV4; 322 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 323 struct sockaddr_in sin; 324 325 in6_sin6_2_sin(&sin, sin6p); 326 inp->inp_vflag |= INP_IPV4; 327 inp->inp_vflag &= ~INP_IPV6; 328 error = in_pcbbind(inp, (struct sockaddr *)&sin, 329 td->td_ucred); 330 goto out; 331 } 332 } 333 error = in6_pcbbind(inp, nam, td->td_ucred); 334out: 335 TCPDEBUG2(PRU_BIND); 336 INP_UNLOCK(inp); 337 INP_INFO_WUNLOCK(&tcbinfo); 338 return (error); 339} 340#endif /* INET6 */ 341 342/* 343 * Prepare to accept connections. 344 */ 345static int 346tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 347{ 348 int error = 0; 349 struct inpcb *inp; 350 struct tcpcb *tp = NULL; 351 352 TCPDEBUG0; 353 INP_INFO_WLOCK(&tcbinfo); 354 inp = sotoinpcb(so); 355 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 356 INP_LOCK(inp); 357 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 358 error = EINVAL; 359 goto out; 360 } 361 tp = intotcpcb(inp); 362 TCPDEBUG1(); 363 SOCK_LOCK(so); 364 error = solisten_proto_check(so); 365 if (error == 0 && inp->inp_lport == 0) 366 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 367 if (error == 0) { 368 tp->t_state = TCPS_LISTEN; 369 solisten_proto(so, backlog); 370 } 371 SOCK_UNLOCK(so); 372 373out: 374 TCPDEBUG2(PRU_LISTEN); 375 INP_UNLOCK(inp); 376 INP_INFO_WUNLOCK(&tcbinfo); 377 return (error); 378} 379 380#ifdef INET6 381static int 382tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 383{ 384 int error = 0; 385 struct inpcb *inp; 386 struct tcpcb *tp = NULL; 387 388 TCPDEBUG0; 389 INP_INFO_WLOCK(&tcbinfo); 390 inp = sotoinpcb(so); 391 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 392 INP_LOCK(inp); 393 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 394 error = EINVAL; 395 goto out; 396 } 397 tp = intotcpcb(inp); 398 TCPDEBUG1(); 399 SOCK_LOCK(so); 400 error = solisten_proto_check(so); 401 if (error == 0 && inp->inp_lport == 0) { 402 inp->inp_vflag &= ~INP_IPV4; 403 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 404 inp->inp_vflag |= INP_IPV4; 405 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 406 } 407 if (error == 0) { 408 tp->t_state = TCPS_LISTEN; 409 solisten_proto(so, backlog); 410 } 411 SOCK_UNLOCK(so); 412 413out: 414 TCPDEBUG2(PRU_LISTEN); 415 INP_UNLOCK(inp); 416 INP_INFO_WUNLOCK(&tcbinfo); 417 return (error); 418} 419#endif /* INET6 */ 420 421/* 422 * Initiate connection to peer. 423 * Create a template for use in transmissions on this connection. 424 * Enter SYN_SENT state, and mark socket as connecting. 425 * Start keep-alive timer, and seed output sequence space. 426 * Send initial segment on connection. 427 */ 428static int 429tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 430{ 431 int error = 0; 432 struct inpcb *inp; 433 struct tcpcb *tp = NULL; 434 struct sockaddr_in *sinp; 435 436 sinp = (struct sockaddr_in *)nam; 437 if (nam->sa_len != sizeof (*sinp)) 438 return (EINVAL); 439 /* 440 * Must disallow TCP ``connections'' to multicast addresses. 441 */ 442 if (sinp->sin_family == AF_INET 443 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 444 return (EAFNOSUPPORT); 445 if (jailed(td->td_ucred)) 446 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); 447 448 TCPDEBUG0; 449 INP_INFO_WLOCK(&tcbinfo); 450 inp = sotoinpcb(so); 451 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 452 INP_LOCK(inp); 453 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 454 error = EINVAL; 455 goto out; 456 } 457 tp = intotcpcb(inp); 458 TCPDEBUG1(); 459 if ((error = tcp_connect(tp, nam, td)) != 0) 460 goto out; 461 error = tcp_output(tp); 462out: 463 TCPDEBUG2(PRU_CONNECT); 464 INP_UNLOCK(inp); 465 INP_INFO_WUNLOCK(&tcbinfo); 466 return (error); 467} 468 469#ifdef INET6 470static int 471tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 472{ 473 int error = 0; 474 struct inpcb *inp; 475 struct tcpcb *tp = NULL; 476 struct sockaddr_in6 *sin6p; 477 478 TCPDEBUG0; 479 480 sin6p = (struct sockaddr_in6 *)nam; 481 if (nam->sa_len != sizeof (*sin6p)) 482 return (EINVAL); 483 /* 484 * Must disallow TCP ``connections'' to multicast addresses. 485 */ 486 if (sin6p->sin6_family == AF_INET6 487 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 488 return (EAFNOSUPPORT); 489 490 INP_INFO_WLOCK(&tcbinfo); 491 inp = sotoinpcb(so); 492 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 493 INP_LOCK(inp); 494 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 495 error = EINVAL; 496 goto out; 497 } 498 tp = intotcpcb(inp); 499 TCPDEBUG1(); 500 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 501 struct sockaddr_in sin; 502 503 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 504 error = EINVAL; 505 goto out; 506 } 507 508 in6_sin6_2_sin(&sin, sin6p); 509 inp->inp_vflag |= INP_IPV4; 510 inp->inp_vflag &= ~INP_IPV6; 511 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 512 goto out; 513 error = tcp_output(tp); 514 goto out; 515 } 516 inp->inp_vflag &= ~INP_IPV4; 517 inp->inp_vflag |= INP_IPV6; 518 inp->inp_inc.inc_isipv6 = 1; 519 if ((error = tcp6_connect(tp, nam, td)) != 0) 520 goto out; 521 error = tcp_output(tp); 522 523out: 524 TCPDEBUG2(PRU_CONNECT); 525 INP_UNLOCK(inp); 526 INP_INFO_WUNLOCK(&tcbinfo); 527 return (error); 528} 529#endif /* INET6 */ 530 531/* 532 * Initiate disconnect from peer. 533 * If connection never passed embryonic stage, just drop; 534 * else if don't need to let data drain, then can just drop anyways, 535 * else have to begin TCP shutdown process: mark socket disconnecting, 536 * drain unread data, state switch to reflect user close, and 537 * send segment (e.g. FIN) to peer. Socket will be really disconnected 538 * when peer sends FIN and acks ours. 539 * 540 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 541 */ 542static int 543tcp_usr_disconnect(struct socket *so) 544{ 545 struct inpcb *inp; 546 struct tcpcb *tp = NULL; 547 int error = 0; 548 549 TCPDEBUG0; 550 INP_INFO_WLOCK(&tcbinfo); 551 inp = sotoinpcb(so); 552 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 553 INP_LOCK(inp); 554 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 555 error = EINVAL; 556 goto out; 557 } 558 tp = intotcpcb(inp); 559 TCPDEBUG1(); 560 tcp_disconnect(tp); 561out: 562 TCPDEBUG2(PRU_DISCONNECT); 563 INP_UNLOCK(inp); 564 INP_INFO_WUNLOCK(&tcbinfo); 565 return (error); 566} 567 568/* 569 * Accept a connection. Essentially all the work is 570 * done at higher levels; just return the address 571 * of the peer, storing through addr. 572 */ 573static int 574tcp_usr_accept(struct socket *so, struct sockaddr **nam) 575{ 576 int error = 0; 577 struct inpcb *inp = NULL; 578 struct tcpcb *tp = NULL; 579 struct in_addr addr; 580 in_port_t port = 0; 581 TCPDEBUG0; 582 583 if (so->so_state & SS_ISDISCONNECTED) { 584 error = ECONNABORTED; 585 goto out; 586 } 587 588 inp = sotoinpcb(so); 589 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 590 INP_LOCK(inp); 591 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 592 error = EINVAL; 593 goto out; 594 } 595 tp = intotcpcb(inp); 596 TCPDEBUG1(); 597 598 /* 599 * We inline in_setpeeraddr and COMMON_END here, so that we can 600 * copy the data of interest and defer the malloc until after we 601 * release the lock. 602 */ 603 port = inp->inp_fport; 604 addr = inp->inp_faddr; 605 606out: 607 TCPDEBUG2(PRU_ACCEPT); 608 INP_UNLOCK(inp); 609 if (error == 0) 610 *nam = in_sockaddr(port, &addr); 611 return error; 612} 613 614#ifdef INET6 615static int 616tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 617{ 618 struct inpcb *inp = NULL; 619 int error = 0; 620 struct tcpcb *tp = NULL; 621 struct in_addr addr; 622 struct in6_addr addr6; 623 in_port_t port = 0; 624 int v4 = 0; 625 TCPDEBUG0; 626 627 if (so->so_state & SS_ISDISCONNECTED) { 628 error = ECONNABORTED; 629 goto out; 630 } 631 632 inp = sotoinpcb(so); 633 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 634 INP_LOCK(inp); 635 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 636 error = EINVAL; 637 goto out; 638 } 639 tp = intotcpcb(inp); 640 TCPDEBUG1(); 641 642 /* 643 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 644 * copy the data of interest and defer the malloc until after we 645 * release the lock. 646 */ 647 if (inp->inp_vflag & INP_IPV4) { 648 v4 = 1; 649 port = inp->inp_fport; 650 addr = inp->inp_faddr; 651 } else { 652 port = inp->inp_fport; 653 addr6 = inp->in6p_faddr; 654 } 655 656out: 657 TCPDEBUG2(PRU_ACCEPT); 658 INP_UNLOCK(inp); 659 if (error == 0) { 660 if (v4) 661 *nam = in6_v4mapsin6_sockaddr(port, &addr); 662 else 663 *nam = in6_sockaddr(port, &addr6); 664 } 665 return error; 666} 667#endif /* INET6 */ 668 669/* 670 * This is the wrapper function for in_setsockaddr. We just pass down 671 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking 672 * here because in_setsockaddr will call malloc and can block. 673 */ 674static int 675tcp_sockaddr(struct socket *so, struct sockaddr **nam) 676{ 677 return (in_setsockaddr(so, nam, &tcbinfo)); 678} 679 680/* 681 * This is the wrapper function for in_setpeeraddr. We just pass down 682 * the pcbinfo for in_setpeeraddr to lock. 683 */ 684static int 685tcp_peeraddr(struct socket *so, struct sockaddr **nam) 686{ 687 return (in_setpeeraddr(so, nam, &tcbinfo)); 688} 689 690/* 691 * Mark the connection as being incapable of further output. 692 */ 693static int 694tcp_usr_shutdown(struct socket *so) 695{ 696 int error = 0; 697 struct inpcb *inp; 698 struct tcpcb *tp = NULL; 699 700 TCPDEBUG0; 701 INP_INFO_WLOCK(&tcbinfo); 702 inp = sotoinpcb(so); 703 KASSERT(inp != NULL, ("inp == NULL")); 704 INP_LOCK(inp); 705 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 706 error = EINVAL; 707 goto out; 708 } 709 tp = intotcpcb(inp); 710 TCPDEBUG1(); 711 socantsendmore(so); 712 tcp_usrclosed(tp); 713 error = tcp_output(tp); 714 715out: 716 TCPDEBUG2(PRU_SHUTDOWN); 717 INP_UNLOCK(inp); 718 INP_INFO_WUNLOCK(&tcbinfo); 719 720 return (error); 721} 722 723/* 724 * After a receive, possibly send window update to peer. 725 */ 726static int 727tcp_usr_rcvd(struct socket *so, int flags) 728{ 729 struct inpcb *inp; 730 struct tcpcb *tp = NULL; 731 int error = 0; 732 733 TCPDEBUG0; 734 inp = sotoinpcb(so); 735 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 736 INP_LOCK(inp); 737 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 738 error = EINVAL; 739 goto out; 740 } 741 tp = intotcpcb(inp); 742 TCPDEBUG1(); 743 tcp_output(tp); 744 745out: 746 TCPDEBUG2(PRU_RCVD); 747 INP_UNLOCK(inp); 748 return (error); 749} 750 751/* 752 * Do a send by putting data in output queue and updating urgent 753 * marker if URG set. Possibly send more data. Unlike the other 754 * pru_*() routines, the mbuf chains are our responsibility. We 755 * must either enqueue them or free them. The other pru_* routines 756 * generally are caller-frees. 757 */ 758static int 759tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 760 struct sockaddr *nam, struct mbuf *control, struct thread *td) 761{ 762 int error = 0; 763 struct inpcb *inp; 764 struct tcpcb *tp = NULL; 765 int headlocked = 0; 766#ifdef INET6 767 int isipv6; 768#endif 769 TCPDEBUG0; 770 771 /* 772 * We require the pcbinfo lock in two cases: 773 * 774 * (1) An implied connect is taking place, which can result in 775 * binding IPs and ports and hence modification of the pcb hash 776 * chains. 777 * 778 * (2) PRUS_EOF is set, resulting in explicit close on the send. 779 */ 780 if ((nam != NULL) || (flags & PRUS_EOF)) { 781 INP_INFO_WLOCK(&tcbinfo); 782 headlocked = 1; 783 } 784 inp = sotoinpcb(so); 785 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 786 INP_LOCK(inp); 787 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 788 error = EINVAL; 789 goto out; 790 } 791#ifdef INET6 792 isipv6 = nam && nam->sa_family == AF_INET6; 793#endif /* INET6 */ 794 tp = intotcpcb(inp); 795 TCPDEBUG1(); 796 if (control) { 797 /* TCP doesn't do control messages (rights, creds, etc) */ 798 if (control->m_len) { 799 m_freem(control); 800 if (m) 801 m_freem(m); 802 error = EINVAL; 803 goto out; 804 } 805 m_freem(control); /* empty control, just free it */ 806 } 807 if (!(flags & PRUS_OOB)) { 808 sbappendstream(&so->so_snd, m); 809 if (nam && tp->t_state < TCPS_SYN_SENT) { 810 /* 811 * Do implied connect if not yet connected, 812 * initialize window to default value, and 813 * initialize maxseg/maxopd using peer's cached 814 * MSS. 815 */ 816 INP_INFO_WLOCK_ASSERT(&tcbinfo); 817#ifdef INET6 818 if (isipv6) 819 error = tcp6_connect(tp, nam, td); 820 else 821#endif /* INET6 */ 822 error = tcp_connect(tp, nam, td); 823 if (error) 824 goto out; 825 tp->snd_wnd = TTCP_CLIENT_SND_WND; 826 tcp_mss(tp, -1); 827 } 828 if (flags & PRUS_EOF) { 829 /* 830 * Close the send side of the connection after 831 * the data is sent. 832 */ 833 INP_INFO_WLOCK_ASSERT(&tcbinfo); 834 socantsendmore(so); 835 tcp_usrclosed(tp); 836 } 837 if (headlocked) { 838 INP_INFO_WUNLOCK(&tcbinfo); 839 headlocked = 0; 840 } 841 if (tp != NULL) { 842 if (flags & PRUS_MORETOCOME) 843 tp->t_flags |= TF_MORETOCOME; 844 error = tcp_output(tp); 845 if (flags & PRUS_MORETOCOME) 846 tp->t_flags &= ~TF_MORETOCOME; 847 } 848 } else { 849 /* 850 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 851 */ 852 SOCKBUF_LOCK(&so->so_snd); 853 if (sbspace(&so->so_snd) < -512) { 854 SOCKBUF_UNLOCK(&so->so_snd); 855 m_freem(m); 856 error = ENOBUFS; 857 goto out; 858 } 859 /* 860 * According to RFC961 (Assigned Protocols), 861 * the urgent pointer points to the last octet 862 * of urgent data. We continue, however, 863 * to consider it to indicate the first octet 864 * of data past the urgent section. 865 * Otherwise, snd_up should be one lower. 866 */ 867 sbappendstream_locked(&so->so_snd, m); 868 SOCKBUF_UNLOCK(&so->so_snd); 869 if (nam && tp->t_state < TCPS_SYN_SENT) { 870 /* 871 * Do implied connect if not yet connected, 872 * initialize window to default value, and 873 * initialize maxseg/maxopd using peer's cached 874 * MSS. 875 */ 876 INP_INFO_WLOCK_ASSERT(&tcbinfo); 877#ifdef INET6 878 if (isipv6) 879 error = tcp6_connect(tp, nam, td); 880 else 881#endif /* INET6 */ 882 error = tcp_connect(tp, nam, td); 883 if (error) 884 goto out; 885 tp->snd_wnd = TTCP_CLIENT_SND_WND; 886 tcp_mss(tp, -1); 887 INP_INFO_WUNLOCK(&tcbinfo); 888 headlocked = 0; 889 } else if (nam) { 890 INP_INFO_WUNLOCK(&tcbinfo); 891 headlocked = 0; 892 } 893 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 894 tp->t_flags |= TF_FORCEDATA; 895 error = tcp_output(tp); 896 tp->t_flags &= ~TF_FORCEDATA; 897 } 898out: 899 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 900 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 901 INP_UNLOCK(inp); 902 if (headlocked) 903 INP_INFO_WUNLOCK(&tcbinfo); 904 return (error); 905} 906 907/* 908 * Abort the TCP. 909 */ 910static void 911tcp_usr_abort(struct socket *so) 912{ 913#if 0 914 struct inpcb *inp; 915 struct tcpcb *tp; 916#endif 917 918 /* 919 * XXXRW: This is not really quite the same, as we want to tcp_drop() 920 * rather than tcp_disconnect(), I think, but for now I'll avoid 921 * replicating all the tear-down logic here. 922 */ 923 tcp_usr_detach(so); 924 925#if 0 926 TCPDEBUG0; 927 INP_INFO_WLOCK(&tcbinfo); 928 inp = sotoinpcb(so); 929 INP_LOCK(inp); 930 /* 931 * Do we need to handle timewait here? Aborted connections should 932 * never generate a FIN? 933 */ 934 KASSERT((inp->inp_vflag & INP_TIMEWAIT) == 0, 935 ("tcp_usr_abort: timewait")); 936 tp = intotcpcb(inp); 937 TCPDEBUG1(); 938 tp = tcp_drop(tp, ECONNABORTED); 939 TCPDEBUG2(PRU_ABORT); 940 if (tp != NULL) 941 INP_UNLOCK(inp); 942 INP_INFO_WUNLOCK(&tcbinfo); 943#endif 944} 945 946/* 947 * Receive out-of-band data. 948 */ 949static int 950tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 951{ 952 int error = 0; 953 struct inpcb *inp; 954 struct tcpcb *tp = NULL; 955 956 TCPDEBUG0; 957 inp = sotoinpcb(so); 958 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 959 INP_LOCK(inp); 960 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 961 error = EINVAL; 962 goto out; 963 } 964 tp = intotcpcb(inp); 965 TCPDEBUG1(); 966 if ((so->so_oobmark == 0 && 967 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 968 so->so_options & SO_OOBINLINE || 969 tp->t_oobflags & TCPOOB_HADDATA) { 970 error = EINVAL; 971 goto out; 972 } 973 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 974 error = EWOULDBLOCK; 975 goto out; 976 } 977 m->m_len = 1; 978 *mtod(m, caddr_t) = tp->t_iobc; 979 if ((flags & MSG_PEEK) == 0) 980 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 981 982out: 983 TCPDEBUG2(PRU_RCVOOB); 984 INP_UNLOCK(inp); 985 return (error); 986} 987 988struct pr_usrreqs tcp_usrreqs = { 989 .pru_abort = tcp_usr_abort, 990 .pru_accept = tcp_usr_accept, 991 .pru_attach = tcp_usr_attach, 992 .pru_bind = tcp_usr_bind, 993 .pru_connect = tcp_usr_connect, 994 .pru_control = in_control, 995 .pru_detach = tcp_usr_detach, 996 .pru_disconnect = tcp_usr_disconnect, 997 .pru_listen = tcp_usr_listen, 998 .pru_peeraddr = tcp_peeraddr, 999 .pru_rcvd = tcp_usr_rcvd, 1000 .pru_rcvoob = tcp_usr_rcvoob, 1001 .pru_send = tcp_usr_send, 1002 .pru_shutdown = tcp_usr_shutdown, 1003 .pru_sockaddr = tcp_sockaddr, 1004 .pru_sosetlabel = in_pcbsosetlabel 1005}; 1006 1007#ifdef INET6 1008struct pr_usrreqs tcp6_usrreqs = { 1009 .pru_abort = tcp_usr_abort, 1010 .pru_accept = tcp6_usr_accept, 1011 .pru_attach = tcp_usr_attach, 1012 .pru_bind = tcp6_usr_bind, 1013 .pru_connect = tcp6_usr_connect, 1014 .pru_control = in6_control, 1015 .pru_detach = tcp_usr_detach, 1016 .pru_disconnect = tcp_usr_disconnect, 1017 .pru_listen = tcp6_usr_listen, 1018 .pru_peeraddr = in6_mapped_peeraddr, 1019 .pru_rcvd = tcp_usr_rcvd, 1020 .pru_rcvoob = tcp_usr_rcvoob, 1021 .pru_send = tcp_usr_send, 1022 .pru_shutdown = tcp_usr_shutdown, 1023 .pru_sockaddr = in6_mapped_sockaddr, 1024 .pru_sosetlabel = in_pcbsosetlabel 1025}; 1026#endif /* INET6 */ 1027 1028/* 1029 * Common subroutine to open a TCP connection to remote host specified 1030 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1031 * port number if needed. Call in_pcbconnect_setup to do the routing and 1032 * to choose a local host address (interface). If there is an existing 1033 * incarnation of the same connection in TIME-WAIT state and if the remote 1034 * host was sending CC options and if the connection duration was < MSL, then 1035 * truncate the previous TIME-WAIT state and proceed. 1036 * Initialize connection parameters and enter SYN-SENT state. 1037 */ 1038static int 1039tcp_connect(tp, nam, td) 1040 register struct tcpcb *tp; 1041 struct sockaddr *nam; 1042 struct thread *td; 1043{ 1044 struct inpcb *inp = tp->t_inpcb, *oinp; 1045 struct socket *so = inp->inp_socket; 1046 struct in_addr laddr; 1047 u_short lport; 1048 int error; 1049 1050 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1051 INP_LOCK_ASSERT(inp); 1052 1053 if (inp->inp_lport == 0) { 1054 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1055 if (error) 1056 return error; 1057 } 1058 1059 /* 1060 * Cannot simply call in_pcbconnect, because there might be an 1061 * earlier incarnation of this same connection still in 1062 * TIME_WAIT state, creating an ADDRINUSE error. 1063 */ 1064 laddr = inp->inp_laddr; 1065 lport = inp->inp_lport; 1066 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1067 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1068 if (error && oinp == NULL) 1069 return error; 1070 if (oinp) 1071 return EADDRINUSE; 1072 inp->inp_laddr = laddr; 1073 in_pcbrehash(inp); 1074 1075 /* Compute window scaling to request. */ 1076 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1077 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1078 tp->request_r_scale++; 1079 1080 soisconnecting(so); 1081 tcpstat.tcps_connattempt++; 1082 tp->t_state = TCPS_SYN_SENT; 1083 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1084 tp->iss = tcp_new_isn(tp); 1085 tp->t_bw_rtseq = tp->iss; 1086 tcp_sendseqinit(tp); 1087 1088 return 0; 1089} 1090 1091#ifdef INET6 1092static int 1093tcp6_connect(tp, nam, td) 1094 register struct tcpcb *tp; 1095 struct sockaddr *nam; 1096 struct thread *td; 1097{ 1098 struct inpcb *inp = tp->t_inpcb, *oinp; 1099 struct socket *so = inp->inp_socket; 1100 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1101 struct in6_addr *addr6; 1102 int error; 1103 1104 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1105 INP_LOCK_ASSERT(inp); 1106 1107 if (inp->inp_lport == 0) { 1108 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1109 if (error) 1110 return error; 1111 } 1112 1113 /* 1114 * Cannot simply call in_pcbconnect, because there might be an 1115 * earlier incarnation of this same connection still in 1116 * TIME_WAIT state, creating an ADDRINUSE error. 1117 * in6_pcbladdr() also handles scope zone IDs. 1118 */ 1119 error = in6_pcbladdr(inp, nam, &addr6); 1120 if (error) 1121 return error; 1122 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1123 &sin6->sin6_addr, sin6->sin6_port, 1124 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1125 ? addr6 1126 : &inp->in6p_laddr, 1127 inp->inp_lport, 0, NULL); 1128 if (oinp) 1129 return EADDRINUSE; 1130 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1131 inp->in6p_laddr = *addr6; 1132 inp->in6p_faddr = sin6->sin6_addr; 1133 inp->inp_fport = sin6->sin6_port; 1134 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 1135 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 1136 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 1137 inp->in6p_flowinfo |= 1138 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1139 in_pcbrehash(inp); 1140 1141 /* Compute window scaling to request. */ 1142 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1143 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1144 tp->request_r_scale++; 1145 1146 soisconnecting(so); 1147 tcpstat.tcps_connattempt++; 1148 tp->t_state = TCPS_SYN_SENT; 1149 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1150 tp->iss = tcp_new_isn(tp); 1151 tp->t_bw_rtseq = tp->iss; 1152 tcp_sendseqinit(tp); 1153 1154 return 0; 1155} 1156#endif /* INET6 */ 1157 1158/* 1159 * Export TCP internal state information via a struct tcp_info, based on the 1160 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1161 * (TCP state machine, etc). We export all information using FreeBSD-native 1162 * constants -- for example, the numeric values for tcpi_state will differ 1163 * from Linux. 1164 */ 1165static void 1166tcp_fill_info(tp, ti) 1167 struct tcpcb *tp; 1168 struct tcp_info *ti; 1169{ 1170 1171 INP_LOCK_ASSERT(tp->t_inpcb); 1172 bzero(ti, sizeof(*ti)); 1173 1174 ti->tcpi_state = tp->t_state; 1175 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1176 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1177 if (tp->sack_enable) 1178 ti->tcpi_options |= TCPI_OPT_SACK; 1179 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1180 ti->tcpi_options |= TCPI_OPT_WSCALE; 1181 ti->tcpi_snd_wscale = tp->snd_scale; 1182 ti->tcpi_rcv_wscale = tp->rcv_scale; 1183 } 1184 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1185 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1186 1187 /* 1188 * FreeBSD-specific extension fields for tcp_info. 1189 */ 1190 ti->tcpi_rcv_space = tp->rcv_wnd; 1191 ti->tcpi_snd_wnd = tp->snd_wnd; 1192 ti->tcpi_snd_bwnd = tp->snd_bwnd; 1193} 1194 1195/* 1196 * The new sockopt interface makes it possible for us to block in the 1197 * copyin/out step (if we take a page fault). Taking a page fault at 1198 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1199 * use TSM, there probably isn't any need for this function to run at 1200 * splnet() any more. This needs more examination.) 1201 * 1202 * XXXRW: The locking here is wrong; we may take a page fault while holding 1203 * the inpcb lock. 1204 */ 1205int 1206tcp_ctloutput(so, sopt) 1207 struct socket *so; 1208 struct sockopt *sopt; 1209{ 1210 int error, opt, optval; 1211 struct inpcb *inp; 1212 struct tcpcb *tp; 1213 struct tcp_info ti; 1214 1215 error = 0; 1216 inp = sotoinpcb(so); 1217 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1218 INP_LOCK(inp); 1219 if (sopt->sopt_level != IPPROTO_TCP) { 1220 INP_UNLOCK(inp); 1221#ifdef INET6 1222 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1223 error = ip6_ctloutput(so, sopt); 1224 else 1225#endif /* INET6 */ 1226 error = ip_ctloutput(so, sopt); 1227 return (error); 1228 } 1229 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 1230 error = ECONNRESET; 1231 goto out; 1232 } 1233 tp = intotcpcb(inp); 1234 1235 switch (sopt->sopt_dir) { 1236 case SOPT_SET: 1237 switch (sopt->sopt_name) { 1238#ifdef TCP_SIGNATURE 1239 case TCP_MD5SIG: 1240 error = sooptcopyin(sopt, &optval, sizeof optval, 1241 sizeof optval); 1242 if (error) 1243 break; 1244 1245 if (optval > 0) 1246 tp->t_flags |= TF_SIGNATURE; 1247 else 1248 tp->t_flags &= ~TF_SIGNATURE; 1249 break; 1250#endif /* TCP_SIGNATURE */ 1251 case TCP_NODELAY: 1252 case TCP_NOOPT: 1253 error = sooptcopyin(sopt, &optval, sizeof optval, 1254 sizeof optval); 1255 if (error) 1256 break; 1257 1258 switch (sopt->sopt_name) { 1259 case TCP_NODELAY: 1260 opt = TF_NODELAY; 1261 break; 1262 case TCP_NOOPT: 1263 opt = TF_NOOPT; 1264 break; 1265 default: 1266 opt = 0; /* dead code to fool gcc */ 1267 break; 1268 } 1269 1270 if (optval) 1271 tp->t_flags |= opt; 1272 else 1273 tp->t_flags &= ~opt; 1274 break; 1275 1276 case TCP_NOPUSH: 1277 error = sooptcopyin(sopt, &optval, sizeof optval, 1278 sizeof optval); 1279 if (error) 1280 break; 1281 1282 if (optval) 1283 tp->t_flags |= TF_NOPUSH; 1284 else { 1285 tp->t_flags &= ~TF_NOPUSH; 1286 error = tcp_output(tp); 1287 } 1288 break; 1289 1290 case TCP_MAXSEG: 1291 error = sooptcopyin(sopt, &optval, sizeof optval, 1292 sizeof optval); 1293 if (error) 1294 break; 1295 1296 if (optval > 0 && optval <= tp->t_maxseg && 1297 optval + 40 >= tcp_minmss) 1298 tp->t_maxseg = optval; 1299 else 1300 error = EINVAL; 1301 break; 1302 1303 case TCP_INFO: 1304 error = EINVAL; 1305 break; 1306 1307 default: 1308 error = ENOPROTOOPT; 1309 break; 1310 } 1311 break; 1312 1313 case SOPT_GET: 1314 switch (sopt->sopt_name) { 1315#ifdef TCP_SIGNATURE 1316 case TCP_MD5SIG: 1317 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1318 error = sooptcopyout(sopt, &optval, sizeof optval); 1319 break; 1320#endif 1321 case TCP_NODELAY: 1322 optval = tp->t_flags & TF_NODELAY; 1323 error = sooptcopyout(sopt, &optval, sizeof optval); 1324 break; 1325 case TCP_MAXSEG: 1326 optval = tp->t_maxseg; 1327 error = sooptcopyout(sopt, &optval, sizeof optval); 1328 break; 1329 case TCP_NOOPT: 1330 optval = tp->t_flags & TF_NOOPT; 1331 error = sooptcopyout(sopt, &optval, sizeof optval); 1332 break; 1333 case TCP_NOPUSH: 1334 optval = tp->t_flags & TF_NOPUSH; 1335 error = sooptcopyout(sopt, &optval, sizeof optval); 1336 break; 1337 case TCP_INFO: 1338 tcp_fill_info(tp, &ti); 1339 error = sooptcopyout(sopt, &ti, sizeof ti); 1340 break; 1341 default: 1342 error = ENOPROTOOPT; 1343 break; 1344 } 1345 break; 1346 } 1347out: 1348 INP_UNLOCK(inp); 1349 return (error); 1350} 1351 1352/* 1353 * tcp_sendspace and tcp_recvspace are the default send and receive window 1354 * sizes, respectively. These are obsolescent (this information should 1355 * be set by the route). 1356 */ 1357u_long tcp_sendspace = 1024*32; 1358SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1359 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1360u_long tcp_recvspace = 1024*64; 1361SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1362 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1363 1364/* 1365 * Attach TCP protocol to socket, allocating 1366 * internet protocol control block, tcp control block, 1367 * bufer space, and entering LISTEN state if to accept connections. 1368 */ 1369static int 1370tcp_attach(so) 1371 struct socket *so; 1372{ 1373 register struct tcpcb *tp; 1374 struct inpcb *inp; 1375 int error; 1376#ifdef INET6 1377 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1378#endif 1379 1380 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1381 1382 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1383 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1384 if (error) 1385 return (error); 1386 } 1387 error = in_pcballoc(so, &tcbinfo, "tcpinp"); 1388 if (error) 1389 return (error); 1390 inp = sotoinpcb(so); 1391#ifdef INET6 1392 if (isipv6) { 1393 inp->inp_vflag |= INP_IPV6; 1394 inp->in6p_hops = -1; /* use kernel default */ 1395 } 1396 else 1397#endif 1398 inp->inp_vflag |= INP_IPV4; 1399 tp = tcp_newtcpcb(inp); 1400 if (tp == NULL) { 1401 INP_LOCK(inp); 1402#ifdef INET6 1403 if (isipv6) { 1404 in6_pcbdetach(inp); 1405 in6_pcbfree(inp); 1406 } else { 1407#endif 1408 in_pcbdetach(inp); 1409 in_pcbfree(inp); 1410#ifdef INET6 1411 } 1412#endif 1413 return (ENOBUFS); 1414 } 1415 tp->t_state = TCPS_CLOSED; 1416 return (0); 1417} 1418 1419/* 1420 * Initiate (or continue) disconnect. 1421 * If embryonic state, just send reset (once). 1422 * If in ``let data drain'' option and linger null, just drop. 1423 * Otherwise (hard), mark socket disconnecting and drop 1424 * current input data; switch states based on user close, and 1425 * send segment to peer (with FIN). 1426 */ 1427static void 1428tcp_disconnect(tp) 1429 register struct tcpcb *tp; 1430{ 1431 struct inpcb *inp = tp->t_inpcb; 1432 struct socket *so = inp->inp_socket; 1433 1434 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1435 INP_LOCK_ASSERT(inp); 1436 1437 /* 1438 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1439 * socket is still open. 1440 */ 1441 if (tp->t_state < TCPS_ESTABLISHED) { 1442 tp = tcp_close(tp); 1443 KASSERT(tp != NULL, 1444 ("tcp_disconnect: tcp_close() returned NULL")); 1445 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1446 tp = tcp_drop(tp, 0); 1447 KASSERT(tp != NULL, 1448 ("tcp_disconnect: tcp_drop() returned NULL")); 1449 } else { 1450 soisdisconnecting(so); 1451 sbflush(&so->so_rcv); 1452 tcp_usrclosed(tp); 1453 tcp_output(tp); 1454 } 1455} 1456 1457/* 1458 * User issued close, and wish to trail through shutdown states: 1459 * if never received SYN, just forget it. If got a SYN from peer, 1460 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1461 * If already got a FIN from peer, then almost done; go to LAST_ACK 1462 * state. In all other cases, have already sent FIN to peer (e.g. 1463 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1464 * for peer to send FIN or not respond to keep-alives, etc. 1465 * We can let the user exit from the close as soon as the FIN is acked. 1466 */ 1467static void 1468tcp_usrclosed(tp) 1469 register struct tcpcb *tp; 1470{ 1471 1472 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1473 INP_LOCK_ASSERT(tp->t_inpcb); 1474 1475 switch (tp->t_state) { 1476 1477 case TCPS_CLOSED: 1478 case TCPS_LISTEN: 1479 tp->t_state = TCPS_CLOSED; 1480 tp = tcp_close(tp); 1481 /* 1482 * tcp_close() should never return NULL here as the socket is 1483 * still open. 1484 */ 1485 KASSERT(tp != NULL, 1486 ("tcp_usrclosed: tcp_close() returned NULL")); 1487 break; 1488 1489 case TCPS_SYN_SENT: 1490 case TCPS_SYN_RECEIVED: 1491 tp->t_flags |= TF_NEEDFIN; 1492 break; 1493 1494 case TCPS_ESTABLISHED: 1495 tp->t_state = TCPS_FIN_WAIT_1; 1496 break; 1497 1498 case TCPS_CLOSE_WAIT: 1499 tp->t_state = TCPS_LAST_ACK; 1500 break; 1501 } 1502 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1503 soisdisconnected(tp->t_inpcb->inp_socket); 1504 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1505 if (tp->t_state == TCPS_FIN_WAIT_2) 1506 callout_reset(tp->tt_2msl, tcp_maxidle, 1507 tcp_timer_2msl, tp); 1508 } 1509} 1510