tcp_usrreq.c revision 157376
1121232Sgshapiro/*- 2121232Sgshapiro * Copyright (c) 1982, 1986, 1988, 1993 3121232Sgshapiro * The Regents of the University of California. 4121232Sgshapiro * Copyright (c) 2006 Robert N. M. Watson 5121232Sgshapiro * All rights reserved. 6121232Sgshapiro * 7121232Sgshapiro * Redistribution and use in source and binary forms, with or without 8121232Sgshapiro * modification, are permitted provided that the following conditions 9121232Sgshapiro * are met: 10121232Sgshapiro * 1. Redistributions of source code must retain the above copyright 11121232Sgshapiro * notice, this list of conditions and the following disclaimer. 12121232Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright 13121232Sgshapiro * notice, this list of conditions and the following disclaimer in the 14121232Sgshapiro * documentation and/or other materials provided with the distribution. 15121232Sgshapiro * 4. Neither the name of the University nor the names of its contributors 16121232Sgshapiro * may be used to endorse or promote products derived from this software 17121232Sgshapiro * without specific prior written permission. 18121232Sgshapiro * 19121232Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20121232Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21121232Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22121232Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23121232Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24121232Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25121232Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26121232Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 32 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 157376 2006-04-01 16:36:36Z rwatson $ 33 */ 34 35#include "opt_inet.h" 36#include "opt_inet6.h" 37#include "opt_tcpdebug.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/malloc.h> 42#include <sys/kernel.h> 43#include <sys/sysctl.h> 44#include <sys/mbuf.h> 45#ifdef INET6 46#include <sys/domain.h> 47#endif /* INET6 */ 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/protosw.h> 51#include <sys/proc.h> 52#include <sys/jail.h> 53 54#include <net/if.h> 55#include <net/route.h> 56 57#include <netinet/in.h> 58#include <netinet/in_systm.h> 59#ifdef INET6 60#include <netinet/ip6.h> 61#endif 62#include <netinet/in_pcb.h> 63#ifdef INET6 64#include <netinet6/in6_pcb.h> 65#endif 66#include <netinet/in_var.h> 67#include <netinet/ip_var.h> 68#ifdef INET6 69#include <netinet6/ip6_var.h> 70#include <netinet6/scope6_var.h> 71#endif 72#include <netinet/tcp.h> 73#include <netinet/tcp_fsm.h> 74#include <netinet/tcp_seq.h> 75#include <netinet/tcp_timer.h> 76#include <netinet/tcp_var.h> 77#include <netinet/tcpip.h> 78#ifdef TCPDEBUG 79#include <netinet/tcp_debug.h> 80#endif 81 82/* 83 * TCP protocol interface to socket abstraction. 84 */ 85extern char *tcpstates[]; /* XXX ??? */ 86 87static int tcp_attach(struct socket *); 88static int tcp_connect(struct tcpcb *, struct sockaddr *, 89 struct thread *td); 90#ifdef INET6 91static int tcp6_connect(struct tcpcb *, struct sockaddr *, 92 struct thread *td); 93#endif /* INET6 */ 94static void tcp_disconnect(struct tcpcb *); 95static void tcp_usrclosed(struct tcpcb *); 96static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 97 98#ifdef TCPDEBUG 99#define TCPDEBUG0 int ostate = 0 100#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 101#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 102 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 103#else 104#define TCPDEBUG0 105#define TCPDEBUG1() 106#define TCPDEBUG2(req) 107#endif 108 109/* 110 * TCP attaches to socket via pru_attach(), reserving space, 111 * and an internet control block. 112 */ 113static int 114tcp_usr_attach(struct socket *so, int proto, struct thread *td) 115{ 116 struct inpcb *inp; 117 struct tcpcb *tp = NULL; 118 int error; 119 TCPDEBUG0; 120 121 inp = sotoinpcb(so); 122 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 123 INP_INFO_WLOCK(&tcbinfo); 124 TCPDEBUG1(); 125 126 error = tcp_attach(so); 127 if (error) 128 goto out; 129 130 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 131 so->so_linger = TCP_LINGERTIME; 132 133 inp = sotoinpcb(so); 134 tp = intotcpcb(inp); 135out: 136 TCPDEBUG2(PRU_ATTACH); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 return error; 139} 140 141/* 142 * pru_detach() detaches the TCP protocol from the socket. 143 * If the protocol state is non-embryonic, then can't 144 * do this directly: have to initiate a pru_disconnect(), 145 * which may finish later; embryonic TCB's can just 146 * be discarded here. 147 */ 148static void 149tcp_usr_detach(struct socket *so) 150{ 151 struct inpcb *inp; 152 struct tcpcb *tp; 153#ifdef INET6 154 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 155#endif 156 TCPDEBUG0; 157 158 inp = sotoinpcb(so); 159 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 160 INP_INFO_WLOCK(&tcbinfo); 161 INP_LOCK(inp); 162 KASSERT(inp->inp_socket != NULL, 163 ("tcp_usr_detach: inp_socket == NULL")); 164 165 TCPDEBUG1(); 166 tp = intotcpcb(inp); 167 168 if (inp->inp_vflag & INP_TIMEWAIT) { 169 if (inp->inp_vflag & INP_DROPPED) { 170 /* 171 * Connection was in time wait and has been dropped; 172 * the calling path is via tcp_twclose(), which will 173 * free the tcptw, so we can discard the remainder. 174 * 175 * XXXRW: Would it be cleaner to free the tcptw 176 * here? 177 */ 178#ifdef INET6 179 if (isipv6) { 180 in6_pcbdetach(inp); 181 in6_pcbfree(inp); 182 } else { 183#endif 184 in_pcbdetach(inp); 185 in_pcbfree(inp); 186#ifdef INET6 187 } 188#endif 189 } else { 190 /* 191 * Connection is in time wait and has not yet been 192 * dropped; allow the socket to be discarded, but 193 * need to keep inpcb until end of time wait. 194 */ 195#ifdef INET6 196 if (isipv6) 197 in6_pcbdetach(inp); 198 else 199#endif 200 in_pcbdetach(inp); 201 INP_UNLOCK(inp); 202 } 203 } else { 204 tp = intotcpcb(inp); 205 if (inp->inp_vflag & INP_DROPPED || 206 tp->t_state < TCPS_SYN_SENT) { 207 /* 208 * Connection has been dropped or is a listen socket, 209 * tear down all pcb state and allow socket to be 210 * freed. 211 */ 212 tcp_discardcb(tp); 213#ifdef INET6 214 if (isipv6) { 215 in_pcbdetach(inp); 216 in_pcbfree(inp); 217 } else { 218#endif 219 in_pcbdetach(inp); 220 in_pcbfree(inp); 221#ifdef INET6 222 } 223#endif 224 } else { 225 /* 226 * Connection state still required, as is socket, so 227 * mark socket for TCP to free later. 228 */ 229 SOCK_LOCK(so); 230 so->so_state |= SS_PROTOREF; 231 SOCK_UNLOCK(so); 232 inp->inp_vflag |= INP_SOCKREF; 233 INP_UNLOCK(inp); 234 } 235 } 236 tp = NULL; 237 TCPDEBUG2(PRU_DETACH); 238 INP_INFO_WUNLOCK(&tcbinfo); 239} 240 241/* 242 * Give the socket an address. 243 */ 244static int 245tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 246{ 247 int error = 0; 248 struct inpcb *inp; 249 struct tcpcb *tp = NULL; 250 struct sockaddr_in *sinp; 251 252 sinp = (struct sockaddr_in *)nam; 253 if (nam->sa_len != sizeof (*sinp)) 254 return (EINVAL); 255 /* 256 * Must check for multicast addresses and disallow binding 257 * to them. 258 */ 259 if (sinp->sin_family == AF_INET && 260 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 261 return (EAFNOSUPPORT); 262 263 TCPDEBUG0; 264 INP_INFO_WLOCK(&tcbinfo); 265 inp = sotoinpcb(so); 266 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 267 INP_LOCK(inp); 268 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 269 error = EINVAL; 270 goto out; 271 } 272 tp = intotcpcb(inp); 273 TCPDEBUG1(); 274 error = in_pcbbind(inp, nam, td->td_ucred); 275out: 276 TCPDEBUG2(PRU_BIND); 277 INP_UNLOCK(inp); 278 INP_INFO_WUNLOCK(&tcbinfo); 279 280 return (error); 281} 282 283#ifdef INET6 284static int 285tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 286{ 287 int error = 0; 288 struct inpcb *inp; 289 struct tcpcb *tp = NULL; 290 struct sockaddr_in6 *sin6p; 291 292 sin6p = (struct sockaddr_in6 *)nam; 293 if (nam->sa_len != sizeof (*sin6p)) 294 return (EINVAL); 295 /* 296 * Must check for multicast addresses and disallow binding 297 * to them. 298 */ 299 if (sin6p->sin6_family == AF_INET6 && 300 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 301 return (EAFNOSUPPORT); 302 303 TCPDEBUG0; 304 INP_INFO_WLOCK(&tcbinfo); 305 inp = sotoinpcb(so); 306 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 307 INP_LOCK(inp); 308 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 309 error = EINVAL; 310 goto out; 311 } 312 tp = intotcpcb(inp); 313 TCPDEBUG1(); 314 inp->inp_vflag &= ~INP_IPV4; 315 inp->inp_vflag |= INP_IPV6; 316 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 317 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 318 inp->inp_vflag |= INP_IPV4; 319 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 320 struct sockaddr_in sin; 321 322 in6_sin6_2_sin(&sin, sin6p); 323 inp->inp_vflag |= INP_IPV4; 324 inp->inp_vflag &= ~INP_IPV6; 325 error = in_pcbbind(inp, (struct sockaddr *)&sin, 326 td->td_ucred); 327 goto out; 328 } 329 } 330 error = in6_pcbbind(inp, nam, td->td_ucred); 331out: 332 TCPDEBUG2(PRU_BIND); 333 INP_UNLOCK(inp); 334 INP_INFO_WUNLOCK(&tcbinfo); 335 return (error); 336} 337#endif /* INET6 */ 338 339/* 340 * Prepare to accept connections. 341 */ 342static int 343tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 344{ 345 int error = 0; 346 struct inpcb *inp; 347 struct tcpcb *tp = NULL; 348 349 TCPDEBUG0; 350 INP_INFO_WLOCK(&tcbinfo); 351 inp = sotoinpcb(so); 352 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 353 INP_LOCK(inp); 354 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 355 error = EINVAL; 356 goto out; 357 } 358 tp = intotcpcb(inp); 359 TCPDEBUG1(); 360 SOCK_LOCK(so); 361 error = solisten_proto_check(so); 362 if (error == 0 && inp->inp_lport == 0) 363 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 364 if (error == 0) { 365 tp->t_state = TCPS_LISTEN; 366 solisten_proto(so, backlog); 367 } 368 SOCK_UNLOCK(so); 369 370out: 371 TCPDEBUG2(PRU_LISTEN); 372 INP_UNLOCK(inp); 373 INP_INFO_WUNLOCK(&tcbinfo); 374 return (error); 375} 376 377#ifdef INET6 378static int 379tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 380{ 381 int error = 0; 382 struct inpcb *inp; 383 struct tcpcb *tp = NULL; 384 385 TCPDEBUG0; 386 INP_INFO_WLOCK(&tcbinfo); 387 inp = sotoinpcb(so); 388 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 389 INP_LOCK(inp); 390 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 391 error = EINVAL; 392 goto out; 393 } 394 tp = intotcpcb(inp); 395 TCPDEBUG1(); 396 SOCK_LOCK(so); 397 error = solisten_proto_check(so); 398 if (error == 0 && inp->inp_lport == 0) { 399 inp->inp_vflag &= ~INP_IPV4; 400 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 401 inp->inp_vflag |= INP_IPV4; 402 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 403 } 404 if (error == 0) { 405 tp->t_state = TCPS_LISTEN; 406 solisten_proto(so, backlog); 407 } 408 SOCK_UNLOCK(so); 409 410out: 411 TCPDEBUG2(PRU_LISTEN); 412 INP_UNLOCK(inp); 413 INP_INFO_WUNLOCK(&tcbinfo); 414 return (error); 415} 416#endif /* INET6 */ 417 418/* 419 * Initiate connection to peer. 420 * Create a template for use in transmissions on this connection. 421 * Enter SYN_SENT state, and mark socket as connecting. 422 * Start keep-alive timer, and seed output sequence space. 423 * Send initial segment on connection. 424 */ 425static int 426tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 427{ 428 int error = 0; 429 struct inpcb *inp; 430 struct tcpcb *tp = NULL; 431 struct sockaddr_in *sinp; 432 433 sinp = (struct sockaddr_in *)nam; 434 if (nam->sa_len != sizeof (*sinp)) 435 return (EINVAL); 436 /* 437 * Must disallow TCP ``connections'' to multicast addresses. 438 */ 439 if (sinp->sin_family == AF_INET 440 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 441 return (EAFNOSUPPORT); 442 if (jailed(td->td_ucred)) 443 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); 444 445 TCPDEBUG0; 446 INP_INFO_WLOCK(&tcbinfo); 447 inp = sotoinpcb(so); 448 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 449 INP_LOCK(inp); 450 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 451 error = EINVAL; 452 goto out; 453 } 454 tp = intotcpcb(inp); 455 TCPDEBUG1(); 456 if ((error = tcp_connect(tp, nam, td)) != 0) 457 goto out; 458 error = tcp_output(tp); 459out: 460 TCPDEBUG2(PRU_CONNECT); 461 INP_UNLOCK(inp); 462 INP_INFO_WUNLOCK(&tcbinfo); 463 return (error); 464} 465 466#ifdef INET6 467static int 468tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 469{ 470 int error = 0; 471 struct inpcb *inp; 472 struct tcpcb *tp = NULL; 473 struct sockaddr_in6 *sin6p; 474 475 TCPDEBUG0; 476 477 sin6p = (struct sockaddr_in6 *)nam; 478 if (nam->sa_len != sizeof (*sin6p)) 479 return (EINVAL); 480 /* 481 * Must disallow TCP ``connections'' to multicast addresses. 482 */ 483 if (sin6p->sin6_family == AF_INET6 484 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 485 return (EAFNOSUPPORT); 486 487 INP_INFO_WLOCK(&tcbinfo); 488 inp = sotoinpcb(so); 489 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 490 INP_LOCK(inp); 491 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 492 error = EINVAL; 493 goto out; 494 } 495 tp = intotcpcb(inp); 496 TCPDEBUG1(); 497 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 498 struct sockaddr_in sin; 499 500 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 501 error = EINVAL; 502 goto out; 503 } 504 505 in6_sin6_2_sin(&sin, sin6p); 506 inp->inp_vflag |= INP_IPV4; 507 inp->inp_vflag &= ~INP_IPV6; 508 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 509 goto out; 510 error = tcp_output(tp); 511 goto out; 512 } 513 inp->inp_vflag &= ~INP_IPV4; 514 inp->inp_vflag |= INP_IPV6; 515 inp->inp_inc.inc_isipv6 = 1; 516 if ((error = tcp6_connect(tp, nam, td)) != 0) 517 goto out; 518 error = tcp_output(tp); 519 520out: 521 TCPDEBUG2(PRU_CONNECT); 522 INP_UNLOCK(inp); 523 INP_INFO_WUNLOCK(&tcbinfo); 524 return (error); 525} 526#endif /* INET6 */ 527 528/* 529 * Initiate disconnect from peer. 530 * If connection never passed embryonic stage, just drop; 531 * else if don't need to let data drain, then can just drop anyways, 532 * else have to begin TCP shutdown process: mark socket disconnecting, 533 * drain unread data, state switch to reflect user close, and 534 * send segment (e.g. FIN) to peer. Socket will be really disconnected 535 * when peer sends FIN and acks ours. 536 * 537 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 538 */ 539static int 540tcp_usr_disconnect(struct socket *so) 541{ 542 struct inpcb *inp; 543 struct tcpcb *tp = NULL; 544 int error = 0; 545 546 TCPDEBUG0; 547 INP_INFO_WLOCK(&tcbinfo); 548 inp = sotoinpcb(so); 549 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 550 INP_LOCK(inp); 551 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 552 error = EINVAL; 553 goto out; 554 } 555 tp = intotcpcb(inp); 556 TCPDEBUG1(); 557 tcp_disconnect(tp); 558out: 559 TCPDEBUG2(PRU_DISCONNECT); 560 INP_UNLOCK(inp); 561 INP_INFO_WUNLOCK(&tcbinfo); 562 return (error); 563} 564 565/* 566 * Accept a connection. Essentially all the work is 567 * done at higher levels; just return the address 568 * of the peer, storing through addr. 569 */ 570static int 571tcp_usr_accept(struct socket *so, struct sockaddr **nam) 572{ 573 int error = 0; 574 struct inpcb *inp = NULL; 575 struct tcpcb *tp = NULL; 576 struct in_addr addr; 577 in_port_t port = 0; 578 TCPDEBUG0; 579 580 if (so->so_state & SS_ISDISCONNECTED) { 581 error = ECONNABORTED; 582 goto out; 583 } 584 585 inp = sotoinpcb(so); 586 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 587 INP_LOCK(inp); 588 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 589 error = EINVAL; 590 goto out; 591 } 592 tp = intotcpcb(inp); 593 TCPDEBUG1(); 594 595 /* 596 * We inline in_setpeeraddr and COMMON_END here, so that we can 597 * copy the data of interest and defer the malloc until after we 598 * release the lock. 599 */ 600 port = inp->inp_fport; 601 addr = inp->inp_faddr; 602 603out: 604 TCPDEBUG2(PRU_ACCEPT); 605 INP_UNLOCK(inp); 606 if (error == 0) 607 *nam = in_sockaddr(port, &addr); 608 return error; 609} 610 611#ifdef INET6 612static int 613tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 614{ 615 struct inpcb *inp = NULL; 616 int error = 0; 617 struct tcpcb *tp = NULL; 618 struct in_addr addr; 619 struct in6_addr addr6; 620 in_port_t port = 0; 621 int v4 = 0; 622 TCPDEBUG0; 623 624 if (so->so_state & SS_ISDISCONNECTED) { 625 error = ECONNABORTED; 626 goto out; 627 } 628 629 inp = sotoinpcb(so); 630 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 631 INP_LOCK(inp); 632 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 633 error = EINVAL; 634 goto out; 635 } 636 tp = intotcpcb(inp); 637 TCPDEBUG1(); 638 639 /* 640 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 641 * copy the data of interest and defer the malloc until after we 642 * release the lock. 643 */ 644 if (inp->inp_vflag & INP_IPV4) { 645 v4 = 1; 646 port = inp->inp_fport; 647 addr = inp->inp_faddr; 648 } else { 649 port = inp->inp_fport; 650 addr6 = inp->in6p_faddr; 651 } 652 653out: 654 TCPDEBUG2(PRU_ACCEPT); 655 INP_UNLOCK(inp); 656 if (error == 0) { 657 if (v4) 658 *nam = in6_v4mapsin6_sockaddr(port, &addr); 659 else 660 *nam = in6_sockaddr(port, &addr6); 661 } 662 return error; 663} 664#endif /* INET6 */ 665 666/* 667 * This is the wrapper function for in_setsockaddr. We just pass down 668 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking 669 * here because in_setsockaddr will call malloc and can block. 670 */ 671static int 672tcp_sockaddr(struct socket *so, struct sockaddr **nam) 673{ 674 return (in_setsockaddr(so, nam, &tcbinfo)); 675} 676 677/* 678 * This is the wrapper function for in_setpeeraddr. We just pass down 679 * the pcbinfo for in_setpeeraddr to lock. 680 */ 681static int 682tcp_peeraddr(struct socket *so, struct sockaddr **nam) 683{ 684 return (in_setpeeraddr(so, nam, &tcbinfo)); 685} 686 687/* 688 * Mark the connection as being incapable of further output. 689 */ 690static int 691tcp_usr_shutdown(struct socket *so) 692{ 693 int error = 0; 694 struct inpcb *inp; 695 struct tcpcb *tp = NULL; 696 697 TCPDEBUG0; 698 INP_INFO_WLOCK(&tcbinfo); 699 inp = sotoinpcb(so); 700 KASSERT(inp != NULL, ("inp == NULL")); 701 INP_LOCK(inp); 702 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 703 error = EINVAL; 704 goto out; 705 } 706 tp = intotcpcb(inp); 707 TCPDEBUG1(); 708 socantsendmore(so); 709 tcp_usrclosed(tp); 710 error = tcp_output(tp); 711 712out: 713 TCPDEBUG2(PRU_SHUTDOWN); 714 INP_UNLOCK(inp); 715 INP_INFO_WUNLOCK(&tcbinfo); 716 717 return (error); 718} 719 720/* 721 * After a receive, possibly send window update to peer. 722 */ 723static int 724tcp_usr_rcvd(struct socket *so, int flags) 725{ 726 struct inpcb *inp; 727 struct tcpcb *tp = NULL; 728 int error = 0; 729 730 TCPDEBUG0; 731 inp = sotoinpcb(so); 732 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 733 INP_LOCK(inp); 734 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 735 error = EINVAL; 736 goto out; 737 } 738 tp = intotcpcb(inp); 739 TCPDEBUG1(); 740 tcp_output(tp); 741 742out: 743 TCPDEBUG2(PRU_RCVD); 744 INP_UNLOCK(inp); 745 return (error); 746} 747 748/* 749 * Do a send by putting data in output queue and updating urgent 750 * marker if URG set. Possibly send more data. Unlike the other 751 * pru_*() routines, the mbuf chains are our responsibility. We 752 * must either enqueue them or free them. The other pru_* routines 753 * generally are caller-frees. 754 */ 755static int 756tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 757 struct sockaddr *nam, struct mbuf *control, struct thread *td) 758{ 759 int error = 0; 760 struct inpcb *inp; 761 struct tcpcb *tp = NULL; 762 int headlocked = 0; 763#ifdef INET6 764 int isipv6; 765#endif 766 TCPDEBUG0; 767 768 /* 769 * We require the pcbinfo lock in two cases: 770 * 771 * (1) An implied connect is taking place, which can result in 772 * binding IPs and ports and hence modification of the pcb hash 773 * chains. 774 * 775 * (2) PRUS_EOF is set, resulting in explicit close on the send. 776 */ 777 if ((nam != NULL) || (flags & PRUS_EOF)) { 778 INP_INFO_WLOCK(&tcbinfo); 779 headlocked = 1; 780 } 781 inp = sotoinpcb(so); 782 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 783 INP_LOCK(inp); 784 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 785 error = EINVAL; 786 goto out; 787 } 788#ifdef INET6 789 isipv6 = nam && nam->sa_family == AF_INET6; 790#endif /* INET6 */ 791 tp = intotcpcb(inp); 792 TCPDEBUG1(); 793 if (control) { 794 /* TCP doesn't do control messages (rights, creds, etc) */ 795 if (control->m_len) { 796 m_freem(control); 797 if (m) 798 m_freem(m); 799 error = EINVAL; 800 goto out; 801 } 802 m_freem(control); /* empty control, just free it */ 803 } 804 if (!(flags & PRUS_OOB)) { 805 sbappendstream(&so->so_snd, m); 806 if (nam && tp->t_state < TCPS_SYN_SENT) { 807 /* 808 * Do implied connect if not yet connected, 809 * initialize window to default value, and 810 * initialize maxseg/maxopd using peer's cached 811 * MSS. 812 */ 813 INP_INFO_WLOCK_ASSERT(&tcbinfo); 814#ifdef INET6 815 if (isipv6) 816 error = tcp6_connect(tp, nam, td); 817 else 818#endif /* INET6 */ 819 error = tcp_connect(tp, nam, td); 820 if (error) 821 goto out; 822 tp->snd_wnd = TTCP_CLIENT_SND_WND; 823 tcp_mss(tp, -1); 824 } 825 if (flags & PRUS_EOF) { 826 /* 827 * Close the send side of the connection after 828 * the data is sent. 829 */ 830 INP_INFO_WLOCK_ASSERT(&tcbinfo); 831 socantsendmore(so); 832 tcp_usrclosed(tp); 833 } 834 if (headlocked) { 835 INP_INFO_WUNLOCK(&tcbinfo); 836 headlocked = 0; 837 } 838 if (tp != NULL) { 839 if (flags & PRUS_MORETOCOME) 840 tp->t_flags |= TF_MORETOCOME; 841 error = tcp_output(tp); 842 if (flags & PRUS_MORETOCOME) 843 tp->t_flags &= ~TF_MORETOCOME; 844 } 845 } else { 846 /* 847 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 848 */ 849 SOCKBUF_LOCK(&so->so_snd); 850 if (sbspace(&so->so_snd) < -512) { 851 SOCKBUF_UNLOCK(&so->so_snd); 852 m_freem(m); 853 error = ENOBUFS; 854 goto out; 855 } 856 /* 857 * According to RFC961 (Assigned Protocols), 858 * the urgent pointer points to the last octet 859 * of urgent data. We continue, however, 860 * to consider it to indicate the first octet 861 * of data past the urgent section. 862 * Otherwise, snd_up should be one lower. 863 */ 864 sbappendstream_locked(&so->so_snd, m); 865 SOCKBUF_UNLOCK(&so->so_snd); 866 if (nam && tp->t_state < TCPS_SYN_SENT) { 867 /* 868 * Do implied connect if not yet connected, 869 * initialize window to default value, and 870 * initialize maxseg/maxopd using peer's cached 871 * MSS. 872 */ 873 INP_INFO_WLOCK_ASSERT(&tcbinfo); 874#ifdef INET6 875 if (isipv6) 876 error = tcp6_connect(tp, nam, td); 877 else 878#endif /* INET6 */ 879 error = tcp_connect(tp, nam, td); 880 if (error) 881 goto out; 882 tp->snd_wnd = TTCP_CLIENT_SND_WND; 883 tcp_mss(tp, -1); 884 INP_INFO_WUNLOCK(&tcbinfo); 885 headlocked = 0; 886 } else if (nam) { 887 INP_INFO_WUNLOCK(&tcbinfo); 888 headlocked = 0; 889 } 890 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 891 tp->t_flags |= TF_FORCEDATA; 892 error = tcp_output(tp); 893 tp->t_flags &= ~TF_FORCEDATA; 894 } 895out: 896 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 897 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 898 INP_UNLOCK(inp); 899 if (headlocked) 900 INP_INFO_WUNLOCK(&tcbinfo); 901 return (error); 902} 903 904/* 905 * Abort the TCP. 906 */ 907static void 908tcp_usr_abort(struct socket *so) 909{ 910#if 0 911 struct inpcb *inp; 912 struct tcpcb *tp; 913#endif 914 915 /* 916 * XXXRW: This is not really quite the same, as we want to tcp_drop() 917 * rather than tcp_disconnect(), I think, but for now I'll avoid 918 * replicating all the tear-down logic here. 919 */ 920 tcp_usr_detach(so); 921 922#if 0 923 TCPDEBUG0; 924 INP_INFO_WLOCK(&tcbinfo); 925 inp = sotoinpcb(so); 926 INP_LOCK(inp); 927 /* 928 * Do we need to handle timewait here? Aborted connections should 929 * never generate a FIN? 930 */ 931 KASSERT((inp->inp_vflag & INP_TIMEWAIT) == 0, 932 ("tcp_usr_abort: timewait")); 933 tp = intotcpcb(inp); 934 TCPDEBUG1(); 935 tp = tcp_drop(tp, ECONNABORTED); 936 TCPDEBUG2(PRU_ABORT); 937 if (tp != NULL) 938 INP_UNLOCK(inp); 939 INP_INFO_WUNLOCK(&tcbinfo); 940#endif 941} 942 943/* 944 * Receive out-of-band data. 945 */ 946static int 947tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 948{ 949 int error = 0; 950 struct inpcb *inp; 951 struct tcpcb *tp = NULL; 952 953 TCPDEBUG0; 954 inp = sotoinpcb(so); 955 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 956 INP_LOCK(inp); 957 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 958 error = EINVAL; 959 goto out; 960 } 961 tp = intotcpcb(inp); 962 TCPDEBUG1(); 963 if ((so->so_oobmark == 0 && 964 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 965 so->so_options & SO_OOBINLINE || 966 tp->t_oobflags & TCPOOB_HADDATA) { 967 error = EINVAL; 968 goto out; 969 } 970 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 971 error = EWOULDBLOCK; 972 goto out; 973 } 974 m->m_len = 1; 975 *mtod(m, caddr_t) = tp->t_iobc; 976 if ((flags & MSG_PEEK) == 0) 977 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 978 979out: 980 TCPDEBUG2(PRU_RCVOOB); 981 INP_UNLOCK(inp); 982 return (error); 983} 984 985struct pr_usrreqs tcp_usrreqs = { 986 .pru_abort = tcp_usr_abort, 987 .pru_accept = tcp_usr_accept, 988 .pru_attach = tcp_usr_attach, 989 .pru_bind = tcp_usr_bind, 990 .pru_connect = tcp_usr_connect, 991 .pru_control = in_control, 992 .pru_detach = tcp_usr_detach, 993 .pru_disconnect = tcp_usr_disconnect, 994 .pru_listen = tcp_usr_listen, 995 .pru_peeraddr = tcp_peeraddr, 996 .pru_rcvd = tcp_usr_rcvd, 997 .pru_rcvoob = tcp_usr_rcvoob, 998 .pru_send = tcp_usr_send, 999 .pru_shutdown = tcp_usr_shutdown, 1000 .pru_sockaddr = tcp_sockaddr, 1001 .pru_sosetlabel = in_pcbsosetlabel 1002}; 1003 1004#ifdef INET6 1005struct pr_usrreqs tcp6_usrreqs = { 1006 .pru_abort = tcp_usr_abort, 1007 .pru_accept = tcp6_usr_accept, 1008 .pru_attach = tcp_usr_attach, 1009 .pru_bind = tcp6_usr_bind, 1010 .pru_connect = tcp6_usr_connect, 1011 .pru_control = in6_control, 1012 .pru_detach = tcp_usr_detach, 1013 .pru_disconnect = tcp_usr_disconnect, 1014 .pru_listen = tcp6_usr_listen, 1015 .pru_peeraddr = in6_mapped_peeraddr, 1016 .pru_rcvd = tcp_usr_rcvd, 1017 .pru_rcvoob = tcp_usr_rcvoob, 1018 .pru_send = tcp_usr_send, 1019 .pru_shutdown = tcp_usr_shutdown, 1020 .pru_sockaddr = in6_mapped_sockaddr, 1021 .pru_sosetlabel = in_pcbsosetlabel 1022}; 1023#endif /* INET6 */ 1024 1025/* 1026 * Common subroutine to open a TCP connection to remote host specified 1027 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1028 * port number if needed. Call in_pcbconnect_setup to do the routing and 1029 * to choose a local host address (interface). If there is an existing 1030 * incarnation of the same connection in TIME-WAIT state and if the remote 1031 * host was sending CC options and if the connection duration was < MSL, then 1032 * truncate the previous TIME-WAIT state and proceed. 1033 * Initialize connection parameters and enter SYN-SENT state. 1034 */ 1035static int 1036tcp_connect(tp, nam, td) 1037 register struct tcpcb *tp; 1038 struct sockaddr *nam; 1039 struct thread *td; 1040{ 1041 struct inpcb *inp = tp->t_inpcb, *oinp; 1042 struct socket *so = inp->inp_socket; 1043 struct in_addr laddr; 1044 u_short lport; 1045 int error; 1046 1047 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1048 INP_LOCK_ASSERT(inp); 1049 1050 if (inp->inp_lport == 0) { 1051 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1052 if (error) 1053 return error; 1054 } 1055 1056 /* 1057 * Cannot simply call in_pcbconnect, because there might be an 1058 * earlier incarnation of this same connection still in 1059 * TIME_WAIT state, creating an ADDRINUSE error. 1060 */ 1061 laddr = inp->inp_laddr; 1062 lport = inp->inp_lport; 1063 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1064 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1065 if (error && oinp == NULL) 1066 return error; 1067 if (oinp) 1068 return EADDRINUSE; 1069 inp->inp_laddr = laddr; 1070 in_pcbrehash(inp); 1071 1072 /* Compute window scaling to request. */ 1073 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1074 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1075 tp->request_r_scale++; 1076 1077 soisconnecting(so); 1078 tcpstat.tcps_connattempt++; 1079 tp->t_state = TCPS_SYN_SENT; 1080 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1081 tp->iss = tcp_new_isn(tp); 1082 tp->t_bw_rtseq = tp->iss; 1083 tcp_sendseqinit(tp); 1084 1085 return 0; 1086} 1087 1088#ifdef INET6 1089static int 1090tcp6_connect(tp, nam, td) 1091 register struct tcpcb *tp; 1092 struct sockaddr *nam; 1093 struct thread *td; 1094{ 1095 struct inpcb *inp = tp->t_inpcb, *oinp; 1096 struct socket *so = inp->inp_socket; 1097 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1098 struct in6_addr *addr6; 1099 int error; 1100 1101 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1102 INP_LOCK_ASSERT(inp); 1103 1104 if (inp->inp_lport == 0) { 1105 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1106 if (error) 1107 return error; 1108 } 1109 1110 /* 1111 * Cannot simply call in_pcbconnect, because there might be an 1112 * earlier incarnation of this same connection still in 1113 * TIME_WAIT state, creating an ADDRINUSE error. 1114 * in6_pcbladdr() also handles scope zone IDs. 1115 */ 1116 error = in6_pcbladdr(inp, nam, &addr6); 1117 if (error) 1118 return error; 1119 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1120 &sin6->sin6_addr, sin6->sin6_port, 1121 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1122 ? addr6 1123 : &inp->in6p_laddr, 1124 inp->inp_lport, 0, NULL); 1125 if (oinp) 1126 return EADDRINUSE; 1127 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1128 inp->in6p_laddr = *addr6; 1129 inp->in6p_faddr = sin6->sin6_addr; 1130 inp->inp_fport = sin6->sin6_port; 1131 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 1132 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 1133 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 1134 inp->in6p_flowinfo |= 1135 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1136 in_pcbrehash(inp); 1137 1138 /* Compute window scaling to request. */ 1139 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1140 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1141 tp->request_r_scale++; 1142 1143 soisconnecting(so); 1144 tcpstat.tcps_connattempt++; 1145 tp->t_state = TCPS_SYN_SENT; 1146 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1147 tp->iss = tcp_new_isn(tp); 1148 tp->t_bw_rtseq = tp->iss; 1149 tcp_sendseqinit(tp); 1150 1151 return 0; 1152} 1153#endif /* INET6 */ 1154 1155/* 1156 * Export TCP internal state information via a struct tcp_info, based on the 1157 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1158 * (TCP state machine, etc). We export all information using FreeBSD-native 1159 * constants -- for example, the numeric values for tcpi_state will differ 1160 * from Linux. 1161 */ 1162static void 1163tcp_fill_info(tp, ti) 1164 struct tcpcb *tp; 1165 struct tcp_info *ti; 1166{ 1167 1168 INP_LOCK_ASSERT(tp->t_inpcb); 1169 bzero(ti, sizeof(*ti)); 1170 1171 ti->tcpi_state = tp->t_state; 1172 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1173 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1174 if (tp->sack_enable) 1175 ti->tcpi_options |= TCPI_OPT_SACK; 1176 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1177 ti->tcpi_options |= TCPI_OPT_WSCALE; 1178 ti->tcpi_snd_wscale = tp->snd_scale; 1179 ti->tcpi_rcv_wscale = tp->rcv_scale; 1180 } 1181 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1182 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1183 1184 /* 1185 * FreeBSD-specific extension fields for tcp_info. 1186 */ 1187 ti->tcpi_rcv_space = tp->rcv_wnd; 1188 ti->tcpi_snd_wnd = tp->snd_wnd; 1189 ti->tcpi_snd_bwnd = tp->snd_bwnd; 1190} 1191 1192/* 1193 * The new sockopt interface makes it possible for us to block in the 1194 * copyin/out step (if we take a page fault). Taking a page fault at 1195 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1196 * use TSM, there probably isn't any need for this function to run at 1197 * splnet() any more. This needs more examination.) 1198 * 1199 * XXXRW: The locking here is wrong; we may take a page fault while holding 1200 * the inpcb lock. 1201 */ 1202int 1203tcp_ctloutput(so, sopt) 1204 struct socket *so; 1205 struct sockopt *sopt; 1206{ 1207 int error, opt, optval; 1208 struct inpcb *inp; 1209 struct tcpcb *tp; 1210 struct tcp_info ti; 1211 1212 error = 0; 1213 inp = sotoinpcb(so); 1214 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1215 INP_LOCK(inp); 1216 if (sopt->sopt_level != IPPROTO_TCP) { 1217 INP_UNLOCK(inp); 1218#ifdef INET6 1219 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1220 error = ip6_ctloutput(so, sopt); 1221 else 1222#endif /* INET6 */ 1223 error = ip_ctloutput(so, sopt); 1224 return (error); 1225 } 1226 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 1227 error = ECONNRESET; 1228 goto out; 1229 } 1230 tp = intotcpcb(inp); 1231 1232 switch (sopt->sopt_dir) { 1233 case SOPT_SET: 1234 switch (sopt->sopt_name) { 1235#ifdef TCP_SIGNATURE 1236 case TCP_MD5SIG: 1237 error = sooptcopyin(sopt, &optval, sizeof optval, 1238 sizeof optval); 1239 if (error) 1240 break; 1241 1242 if (optval > 0) 1243 tp->t_flags |= TF_SIGNATURE; 1244 else 1245 tp->t_flags &= ~TF_SIGNATURE; 1246 break; 1247#endif /* TCP_SIGNATURE */ 1248 case TCP_NODELAY: 1249 case TCP_NOOPT: 1250 error = sooptcopyin(sopt, &optval, sizeof optval, 1251 sizeof optval); 1252 if (error) 1253 break; 1254 1255 switch (sopt->sopt_name) { 1256 case TCP_NODELAY: 1257 opt = TF_NODELAY; 1258 break; 1259 case TCP_NOOPT: 1260 opt = TF_NOOPT; 1261 break; 1262 default: 1263 opt = 0; /* dead code to fool gcc */ 1264 break; 1265 } 1266 1267 if (optval) 1268 tp->t_flags |= opt; 1269 else 1270 tp->t_flags &= ~opt; 1271 break; 1272 1273 case TCP_NOPUSH: 1274 error = sooptcopyin(sopt, &optval, sizeof optval, 1275 sizeof optval); 1276 if (error) 1277 break; 1278 1279 if (optval) 1280 tp->t_flags |= TF_NOPUSH; 1281 else { 1282 tp->t_flags &= ~TF_NOPUSH; 1283 error = tcp_output(tp); 1284 } 1285 break; 1286 1287 case TCP_MAXSEG: 1288 error = sooptcopyin(sopt, &optval, sizeof optval, 1289 sizeof optval); 1290 if (error) 1291 break; 1292 1293 if (optval > 0 && optval <= tp->t_maxseg && 1294 optval + 40 >= tcp_minmss) 1295 tp->t_maxseg = optval; 1296 else 1297 error = EINVAL; 1298 break; 1299 1300 case TCP_INFO: 1301 error = EINVAL; 1302 break; 1303 1304 default: 1305 error = ENOPROTOOPT; 1306 break; 1307 } 1308 break; 1309 1310 case SOPT_GET: 1311 switch (sopt->sopt_name) { 1312#ifdef TCP_SIGNATURE 1313 case TCP_MD5SIG: 1314 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1315 error = sooptcopyout(sopt, &optval, sizeof optval); 1316 break; 1317#endif 1318 case TCP_NODELAY: 1319 optval = tp->t_flags & TF_NODELAY; 1320 error = sooptcopyout(sopt, &optval, sizeof optval); 1321 break; 1322 case TCP_MAXSEG: 1323 optval = tp->t_maxseg; 1324 error = sooptcopyout(sopt, &optval, sizeof optval); 1325 break; 1326 case TCP_NOOPT: 1327 optval = tp->t_flags & TF_NOOPT; 1328 error = sooptcopyout(sopt, &optval, sizeof optval); 1329 break; 1330 case TCP_NOPUSH: 1331 optval = tp->t_flags & TF_NOPUSH; 1332 error = sooptcopyout(sopt, &optval, sizeof optval); 1333 break; 1334 case TCP_INFO: 1335 tcp_fill_info(tp, &ti); 1336 error = sooptcopyout(sopt, &ti, sizeof ti); 1337 break; 1338 default: 1339 error = ENOPROTOOPT; 1340 break; 1341 } 1342 break; 1343 } 1344out: 1345 INP_UNLOCK(inp); 1346 return (error); 1347} 1348 1349/* 1350 * tcp_sendspace and tcp_recvspace are the default send and receive window 1351 * sizes, respectively. These are obsolescent (this information should 1352 * be set by the route). 1353 */ 1354u_long tcp_sendspace = 1024*32; 1355SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1356 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1357u_long tcp_recvspace = 1024*64; 1358SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1359 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1360 1361/* 1362 * Attach TCP protocol to socket, allocating 1363 * internet protocol control block, tcp control block, 1364 * bufer space, and entering LISTEN state if to accept connections. 1365 */ 1366static int 1367tcp_attach(so) 1368 struct socket *so; 1369{ 1370 register struct tcpcb *tp; 1371 struct inpcb *inp; 1372 int error; 1373#ifdef INET6 1374 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1375#endif 1376 1377 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1378 1379 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1380 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1381 if (error) 1382 return (error); 1383 } 1384 error = in_pcballoc(so, &tcbinfo, "tcpinp"); 1385 if (error) 1386 return (error); 1387 inp = sotoinpcb(so); 1388#ifdef INET6 1389 if (isipv6) { 1390 inp->inp_vflag |= INP_IPV6; 1391 inp->in6p_hops = -1; /* use kernel default */ 1392 } 1393 else 1394#endif 1395 inp->inp_vflag |= INP_IPV4; 1396 tp = tcp_newtcpcb(inp); 1397 if (tp == NULL) { 1398 INP_LOCK(inp); 1399#ifdef INET6 1400 if (isipv6) { 1401 in6_pcbdetach(inp); 1402 in6_pcbfree(inp); 1403 } else { 1404#endif 1405 in_pcbdetach(inp); 1406 in_pcbfree(inp); 1407#ifdef INET6 1408 } 1409#endif 1410 return (ENOBUFS); 1411 } 1412 tp->t_state = TCPS_CLOSED; 1413 return (0); 1414} 1415 1416/* 1417 * Initiate (or continue) disconnect. 1418 * If embryonic state, just send reset (once). 1419 * If in ``let data drain'' option and linger null, just drop. 1420 * Otherwise (hard), mark socket disconnecting and drop 1421 * current input data; switch states based on user close, and 1422 * send segment to peer (with FIN). 1423 */ 1424static void 1425tcp_disconnect(tp) 1426 register struct tcpcb *tp; 1427{ 1428 struct inpcb *inp = tp->t_inpcb; 1429 struct socket *so = inp->inp_socket; 1430 1431 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1432 INP_LOCK_ASSERT(inp); 1433 1434 /* 1435 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1436 * socket is still open. 1437 */ 1438 if (tp->t_state < TCPS_ESTABLISHED) { 1439 tp = tcp_close(tp); 1440 KASSERT(tp != NULL, 1441 ("tcp_disconnect: tcp_close() returned NULL")); 1442 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1443 tp = tcp_drop(tp, 0); 1444 KASSERT(tp != NULL, 1445 ("tcp_disconnect: tcp_drop() returned NULL")); 1446 } else { 1447 soisdisconnecting(so); 1448 sbflush(&so->so_rcv); 1449 tcp_usrclosed(tp); 1450 tcp_output(tp); 1451 } 1452} 1453 1454/* 1455 * User issued close, and wish to trail through shutdown states: 1456 * if never received SYN, just forget it. If got a SYN from peer, 1457 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1458 * If already got a FIN from peer, then almost done; go to LAST_ACK 1459 * state. In all other cases, have already sent FIN to peer (e.g. 1460 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1461 * for peer to send FIN or not respond to keep-alives, etc. 1462 * We can let the user exit from the close as soon as the FIN is acked. 1463 */ 1464static void 1465tcp_usrclosed(tp) 1466 register struct tcpcb *tp; 1467{ 1468 1469 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1470 INP_LOCK_ASSERT(tp->t_inpcb); 1471 1472 switch (tp->t_state) { 1473 1474 case TCPS_CLOSED: 1475 case TCPS_LISTEN: 1476 tp->t_state = TCPS_CLOSED; 1477 tp = tcp_close(tp); 1478 /* 1479 * tcp_close() should never return NULL here as the socket is 1480 * still open. 1481 */ 1482 KASSERT(tp != NULL, 1483 ("tcp_usrclosed: tcp_close() returned NULL")); 1484 break; 1485 1486 case TCPS_SYN_SENT: 1487 case TCPS_SYN_RECEIVED: 1488 tp->t_flags |= TF_NEEDFIN; 1489 break; 1490 1491 case TCPS_ESTABLISHED: 1492 tp->t_state = TCPS_FIN_WAIT_1; 1493 break; 1494 1495 case TCPS_CLOSE_WAIT: 1496 tp->t_state = TCPS_LAST_ACK; 1497 break; 1498 } 1499 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1500 soisdisconnected(tp->t_inpcb->inp_socket); 1501 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1502 if (tp->t_state == TCPS_FIN_WAIT_2) 1503 callout_reset(tp->tt_2msl, tcp_maxidle, 1504 tcp_timer_2msl, tp); 1505 } 1506} 1507