tcp_usrreq.c revision 241132
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2006-2007 Robert N. M. Watson
5 * Copyright (c) 2010-2011 Juniper Networks, Inc.
6 * All rights reserved.
7 *
8 * Portions of this software were developed by Robert N. M. Watson under
9 * contract to Juniper Networks, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_usrreq.c 241132 2012-10-02 12:57:47Z glebius $");
40
41#include "opt_ddb.h"
42#include "opt_inet.h"
43#include "opt_inet6.h"
44#include "opt_tcpdebug.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/limits.h>
49#include <sys/malloc.h>
50#include <sys/kernel.h>
51#include <sys/sysctl.h>
52#include <sys/mbuf.h>
53#ifdef INET6
54#include <sys/domain.h>
55#endif /* INET6 */
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/protosw.h>
59#include <sys/proc.h>
60#include <sys/jail.h>
61
62#ifdef DDB
63#include <ddb/ddb.h>
64#endif
65
66#include <net/if.h>
67#include <net/route.h>
68#include <net/vnet.h>
69
70#include <netinet/cc.h>
71#include <netinet/in.h>
72#include <netinet/in_pcb.h>
73#include <netinet/in_systm.h>
74#include <netinet/in_var.h>
75#include <netinet/ip_var.h>
76#ifdef INET6
77#include <netinet/ip6.h>
78#include <netinet6/in6_pcb.h>
79#include <netinet6/ip6_var.h>
80#include <netinet6/scope6_var.h>
81#endif
82#include <netinet/tcp_fsm.h>
83#include <netinet/tcp_seq.h>
84#include <netinet/tcp_timer.h>
85#include <netinet/tcp_var.h>
86#include <netinet/tcpip.h>
87#ifdef TCPDEBUG
88#include <netinet/tcp_debug.h>
89#endif
90#include <netinet/tcp_offload.h>
91
92/*
93 * TCP protocol interface to socket abstraction.
94 */
95static int	tcp_attach(struct socket *);
96#ifdef INET
97static int	tcp_connect(struct tcpcb *, struct sockaddr *,
98		    struct thread *td);
99#endif /* INET */
100#ifdef INET6
101static int	tcp6_connect(struct tcpcb *, struct sockaddr *,
102		    struct thread *td);
103#endif /* INET6 */
104static void	tcp_disconnect(struct tcpcb *);
105static void	tcp_usrclosed(struct tcpcb *);
106static void	tcp_fill_info(struct tcpcb *, struct tcp_info *);
107
108#ifdef TCPDEBUG
109#define	TCPDEBUG0	int ostate = 0
110#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
111#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
112				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
113#else
114#define	TCPDEBUG0
115#define	TCPDEBUG1()
116#define	TCPDEBUG2(req)
117#endif
118
119/*
120 * TCP attaches to socket via pru_attach(), reserving space,
121 * and an internet control block.
122 */
123static int
124tcp_usr_attach(struct socket *so, int proto, struct thread *td)
125{
126	struct inpcb *inp;
127	struct tcpcb *tp = NULL;
128	int error;
129	TCPDEBUG0;
130
131	inp = sotoinpcb(so);
132	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
133	TCPDEBUG1();
134
135	error = tcp_attach(so);
136	if (error)
137		goto out;
138
139	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
140		so->so_linger = TCP_LINGERTIME;
141
142	inp = sotoinpcb(so);
143	tp = intotcpcb(inp);
144out:
145	TCPDEBUG2(PRU_ATTACH);
146	return error;
147}
148
149/*
150 * tcp_detach is called when the socket layer loses its final reference
151 * to the socket, be it a file descriptor reference, a reference from TCP,
152 * etc.  At this point, there is only one case in which we will keep around
153 * inpcb state: time wait.
154 *
155 * This function can probably be re-absorbed back into tcp_usr_detach() now
156 * that there is a single detach path.
157 */
158static void
159tcp_detach(struct socket *so, struct inpcb *inp)
160{
161	struct tcpcb *tp;
162
163	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
164	INP_WLOCK_ASSERT(inp);
165
166	KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
167	KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
168
169	tp = intotcpcb(inp);
170
171	if (inp->inp_flags & INP_TIMEWAIT) {
172		/*
173		 * There are two cases to handle: one in which the time wait
174		 * state is being discarded (INP_DROPPED), and one in which
175		 * this connection will remain in timewait.  In the former,
176		 * it is time to discard all state (except tcptw, which has
177		 * already been discarded by the timewait close code, which
178		 * should be further up the call stack somewhere).  In the
179		 * latter case, we detach from the socket, but leave the pcb
180		 * present until timewait ends.
181		 *
182		 * XXXRW: Would it be cleaner to free the tcptw here?
183		 */
184		if (inp->inp_flags & INP_DROPPED) {
185			KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
186			    "INP_DROPPED && tp != NULL"));
187			in_pcbdetach(inp);
188			in_pcbfree(inp);
189		} else {
190			in_pcbdetach(inp);
191			INP_WUNLOCK(inp);
192		}
193	} else {
194		/*
195		 * If the connection is not in timewait, we consider two
196		 * two conditions: one in which no further processing is
197		 * necessary (dropped || embryonic), and one in which TCP is
198		 * not yet done, but no longer requires the socket, so the
199		 * pcb will persist for the time being.
200		 *
201		 * XXXRW: Does the second case still occur?
202		 */
203		if (inp->inp_flags & INP_DROPPED ||
204		    tp->t_state < TCPS_SYN_SENT) {
205			tcp_discardcb(tp);
206			in_pcbdetach(inp);
207			in_pcbfree(inp);
208		} else {
209			in_pcbdetach(inp);
210			INP_WUNLOCK(inp);
211		}
212	}
213}
214
215/*
216 * pru_detach() detaches the TCP protocol from the socket.
217 * If the protocol state is non-embryonic, then can't
218 * do this directly: have to initiate a pru_disconnect(),
219 * which may finish later; embryonic TCB's can just
220 * be discarded here.
221 */
222static void
223tcp_usr_detach(struct socket *so)
224{
225	struct inpcb *inp;
226
227	inp = sotoinpcb(so);
228	KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
229	INP_INFO_WLOCK(&V_tcbinfo);
230	INP_WLOCK(inp);
231	KASSERT(inp->inp_socket != NULL,
232	    ("tcp_usr_detach: inp_socket == NULL"));
233	tcp_detach(so, inp);
234	INP_INFO_WUNLOCK(&V_tcbinfo);
235}
236
237#ifdef INET
238/*
239 * Give the socket an address.
240 */
241static int
242tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
243{
244	int error = 0;
245	struct inpcb *inp;
246	struct tcpcb *tp = NULL;
247	struct sockaddr_in *sinp;
248
249	sinp = (struct sockaddr_in *)nam;
250	if (nam->sa_len != sizeof (*sinp))
251		return (EINVAL);
252	/*
253	 * Must check for multicast addresses and disallow binding
254	 * to them.
255	 */
256	if (sinp->sin_family == AF_INET &&
257	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
258		return (EAFNOSUPPORT);
259
260	TCPDEBUG0;
261	inp = sotoinpcb(so);
262	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
263	INP_WLOCK(inp);
264	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
265		error = EINVAL;
266		goto out;
267	}
268	tp = intotcpcb(inp);
269	TCPDEBUG1();
270	INP_HASH_WLOCK(&V_tcbinfo);
271	error = in_pcbbind(inp, nam, td->td_ucred);
272	INP_HASH_WUNLOCK(&V_tcbinfo);
273out:
274	TCPDEBUG2(PRU_BIND);
275	INP_WUNLOCK(inp);
276
277	return (error);
278}
279#endif /* INET */
280
281#ifdef INET6
282static int
283tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
284{
285	int error = 0;
286	struct inpcb *inp;
287	struct tcpcb *tp = NULL;
288	struct sockaddr_in6 *sin6p;
289
290	sin6p = (struct sockaddr_in6 *)nam;
291	if (nam->sa_len != sizeof (*sin6p))
292		return (EINVAL);
293	/*
294	 * Must check for multicast addresses and disallow binding
295	 * to them.
296	 */
297	if (sin6p->sin6_family == AF_INET6 &&
298	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
299		return (EAFNOSUPPORT);
300
301	TCPDEBUG0;
302	inp = sotoinpcb(so);
303	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
304	INP_WLOCK(inp);
305	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
306		error = EINVAL;
307		goto out;
308	}
309	tp = intotcpcb(inp);
310	TCPDEBUG1();
311	INP_HASH_WLOCK(&V_tcbinfo);
312	inp->inp_vflag &= ~INP_IPV4;
313	inp->inp_vflag |= INP_IPV6;
314#ifdef INET
315	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
316		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
317			inp->inp_vflag |= INP_IPV4;
318		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
319			struct sockaddr_in sin;
320
321			in6_sin6_2_sin(&sin, sin6p);
322			inp->inp_vflag |= INP_IPV4;
323			inp->inp_vflag &= ~INP_IPV6;
324			error = in_pcbbind(inp, (struct sockaddr *)&sin,
325			    td->td_ucred);
326			INP_HASH_WUNLOCK(&V_tcbinfo);
327			goto out;
328		}
329	}
330#endif
331	error = in6_pcbbind(inp, nam, td->td_ucred);
332	INP_HASH_WUNLOCK(&V_tcbinfo);
333out:
334	TCPDEBUG2(PRU_BIND);
335	INP_WUNLOCK(inp);
336	return (error);
337}
338#endif /* INET6 */
339
340#ifdef INET
341/*
342 * Prepare to accept connections.
343 */
344static int
345tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
346{
347	int error = 0;
348	struct inpcb *inp;
349	struct tcpcb *tp = NULL;
350
351	TCPDEBUG0;
352	inp = sotoinpcb(so);
353	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
354	INP_WLOCK(inp);
355	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
356		error = EINVAL;
357		goto out;
358	}
359	tp = intotcpcb(inp);
360	TCPDEBUG1();
361	SOCK_LOCK(so);
362	error = solisten_proto_check(so);
363	INP_HASH_WLOCK(&V_tcbinfo);
364	if (error == 0 && inp->inp_lport == 0)
365		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
366	INP_HASH_WUNLOCK(&V_tcbinfo);
367	if (error == 0) {
368		tp->t_state = TCPS_LISTEN;
369		solisten_proto(so, backlog);
370		tcp_offload_listen_open(tp);
371	}
372	SOCK_UNLOCK(so);
373
374out:
375	TCPDEBUG2(PRU_LISTEN);
376	INP_WUNLOCK(inp);
377	return (error);
378}
379#endif /* INET */
380
381#ifdef INET6
382static int
383tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
384{
385	int error = 0;
386	struct inpcb *inp;
387	struct tcpcb *tp = NULL;
388
389	TCPDEBUG0;
390	inp = sotoinpcb(so);
391	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
392	INP_WLOCK(inp);
393	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
394		error = EINVAL;
395		goto out;
396	}
397	tp = intotcpcb(inp);
398	TCPDEBUG1();
399	SOCK_LOCK(so);
400	error = solisten_proto_check(so);
401	INP_HASH_WLOCK(&V_tcbinfo);
402	if (error == 0 && inp->inp_lport == 0) {
403		inp->inp_vflag &= ~INP_IPV4;
404		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
405			inp->inp_vflag |= INP_IPV4;
406		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
407	}
408	INP_HASH_WUNLOCK(&V_tcbinfo);
409	if (error == 0) {
410		tp->t_state = TCPS_LISTEN;
411		solisten_proto(so, backlog);
412	}
413	SOCK_UNLOCK(so);
414
415out:
416	TCPDEBUG2(PRU_LISTEN);
417	INP_WUNLOCK(inp);
418	return (error);
419}
420#endif /* INET6 */
421
422#ifdef INET
423/*
424 * Initiate connection to peer.
425 * Create a template for use in transmissions on this connection.
426 * Enter SYN_SENT state, and mark socket as connecting.
427 * Start keep-alive timer, and seed output sequence space.
428 * Send initial segment on connection.
429 */
430static int
431tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
432{
433	int error = 0;
434	struct inpcb *inp;
435	struct tcpcb *tp = NULL;
436	struct sockaddr_in *sinp;
437
438	sinp = (struct sockaddr_in *)nam;
439	if (nam->sa_len != sizeof (*sinp))
440		return (EINVAL);
441	/*
442	 * Must disallow TCP ``connections'' to multicast addresses.
443	 */
444	if (sinp->sin_family == AF_INET
445	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
446		return (EAFNOSUPPORT);
447	if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
448		return (error);
449
450	TCPDEBUG0;
451	inp = sotoinpcb(so);
452	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
453	INP_WLOCK(inp);
454	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
455		error = EINVAL;
456		goto out;
457	}
458	tp = intotcpcb(inp);
459	TCPDEBUG1();
460	if ((error = tcp_connect(tp, nam, td)) != 0)
461		goto out;
462	error = tcp_output_connect(so, nam);
463out:
464	TCPDEBUG2(PRU_CONNECT);
465	INP_WUNLOCK(inp);
466	return (error);
467}
468#endif /* INET */
469
470#ifdef INET6
471static int
472tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
473{
474	int error = 0;
475	struct inpcb *inp;
476	struct tcpcb *tp = NULL;
477	struct sockaddr_in6 *sin6p;
478
479	TCPDEBUG0;
480
481	sin6p = (struct sockaddr_in6 *)nam;
482	if (nam->sa_len != sizeof (*sin6p))
483		return (EINVAL);
484	/*
485	 * Must disallow TCP ``connections'' to multicast addresses.
486	 */
487	if (sin6p->sin6_family == AF_INET6
488	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
489		return (EAFNOSUPPORT);
490
491	inp = sotoinpcb(so);
492	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
493	INP_WLOCK(inp);
494	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
495		error = EINVAL;
496		goto out;
497	}
498	tp = intotcpcb(inp);
499	TCPDEBUG1();
500#ifdef INET
501	/*
502	 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
503	 * therefore probably require the hash lock, which isn't held here.
504	 * Is this a significant problem?
505	 */
506	if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
507		struct sockaddr_in sin;
508
509		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
510			error = EINVAL;
511			goto out;
512		}
513
514		in6_sin6_2_sin(&sin, sin6p);
515		inp->inp_vflag |= INP_IPV4;
516		inp->inp_vflag &= ~INP_IPV6;
517		if ((error = prison_remote_ip4(td->td_ucred,
518		    &sin.sin_addr)) != 0)
519			goto out;
520		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
521			goto out;
522		error = tcp_output_connect(so, nam);
523		goto out;
524	}
525#endif
526	inp->inp_vflag &= ~INP_IPV4;
527	inp->inp_vflag |= INP_IPV6;
528	inp->inp_inc.inc_flags |= INC_ISIPV6;
529	if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
530		goto out;
531	if ((error = tcp6_connect(tp, nam, td)) != 0)
532		goto out;
533	error = tcp_output_connect(so, nam);
534
535out:
536	TCPDEBUG2(PRU_CONNECT);
537	INP_WUNLOCK(inp);
538	return (error);
539}
540#endif /* INET6 */
541
542/*
543 * Initiate disconnect from peer.
544 * If connection never passed embryonic stage, just drop;
545 * else if don't need to let data drain, then can just drop anyways,
546 * else have to begin TCP shutdown process: mark socket disconnecting,
547 * drain unread data, state switch to reflect user close, and
548 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
549 * when peer sends FIN and acks ours.
550 *
551 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
552 */
553static int
554tcp_usr_disconnect(struct socket *so)
555{
556	struct inpcb *inp;
557	struct tcpcb *tp = NULL;
558	int error = 0;
559
560	TCPDEBUG0;
561	INP_INFO_WLOCK(&V_tcbinfo);
562	inp = sotoinpcb(so);
563	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
564	INP_WLOCK(inp);
565	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
566		error = ECONNRESET;
567		goto out;
568	}
569	tp = intotcpcb(inp);
570	TCPDEBUG1();
571	tcp_disconnect(tp);
572out:
573	TCPDEBUG2(PRU_DISCONNECT);
574	INP_WUNLOCK(inp);
575	INP_INFO_WUNLOCK(&V_tcbinfo);
576	return (error);
577}
578
579#ifdef INET
580/*
581 * Accept a connection.  Essentially all the work is done at higher levels;
582 * just return the address of the peer, storing through addr.
583 *
584 * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
585 * and is described in detail in the commit log entry for r175612.  Acquiring
586 * it delays an accept(2) racing with sonewconn(), which inserts the socket
587 * before the inpcb address/port fields are initialized.  A better fix would
588 * prevent the socket from being placed in the listen queue until all fields
589 * are fully initialized.
590 */
591static int
592tcp_usr_accept(struct socket *so, struct sockaddr **nam)
593{
594	int error = 0;
595	struct inpcb *inp = NULL;
596	struct tcpcb *tp = NULL;
597	struct in_addr addr;
598	in_port_t port = 0;
599	TCPDEBUG0;
600
601	if (so->so_state & SS_ISDISCONNECTED)
602		return (ECONNABORTED);
603
604	inp = sotoinpcb(so);
605	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
606	INP_INFO_RLOCK(&V_tcbinfo);
607	INP_WLOCK(inp);
608	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
609		error = ECONNABORTED;
610		goto out;
611	}
612	tp = intotcpcb(inp);
613	TCPDEBUG1();
614
615	/*
616	 * We inline in_getpeeraddr and COMMON_END here, so that we can
617	 * copy the data of interest and defer the malloc until after we
618	 * release the lock.
619	 */
620	port = inp->inp_fport;
621	addr = inp->inp_faddr;
622
623out:
624	TCPDEBUG2(PRU_ACCEPT);
625	INP_WUNLOCK(inp);
626	INP_INFO_RUNLOCK(&V_tcbinfo);
627	if (error == 0)
628		*nam = in_sockaddr(port, &addr);
629	return error;
630}
631#endif /* INET */
632
633#ifdef INET6
634static int
635tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
636{
637	struct inpcb *inp = NULL;
638	int error = 0;
639	struct tcpcb *tp = NULL;
640	struct in_addr addr;
641	struct in6_addr addr6;
642	in_port_t port = 0;
643	int v4 = 0;
644	TCPDEBUG0;
645
646	if (so->so_state & SS_ISDISCONNECTED)
647		return (ECONNABORTED);
648
649	inp = sotoinpcb(so);
650	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
651	INP_INFO_RLOCK(&V_tcbinfo);
652	INP_WLOCK(inp);
653	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
654		error = ECONNABORTED;
655		goto out;
656	}
657	tp = intotcpcb(inp);
658	TCPDEBUG1();
659
660	/*
661	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
662	 * copy the data of interest and defer the malloc until after we
663	 * release the lock.
664	 */
665	if (inp->inp_vflag & INP_IPV4) {
666		v4 = 1;
667		port = inp->inp_fport;
668		addr = inp->inp_faddr;
669	} else {
670		port = inp->inp_fport;
671		addr6 = inp->in6p_faddr;
672	}
673
674out:
675	TCPDEBUG2(PRU_ACCEPT);
676	INP_WUNLOCK(inp);
677	INP_INFO_RUNLOCK(&V_tcbinfo);
678	if (error == 0) {
679		if (v4)
680			*nam = in6_v4mapsin6_sockaddr(port, &addr);
681		else
682			*nam = in6_sockaddr(port, &addr6);
683	}
684	return error;
685}
686#endif /* INET6 */
687
688/*
689 * Mark the connection as being incapable of further output.
690 */
691static int
692tcp_usr_shutdown(struct socket *so)
693{
694	int error = 0;
695	struct inpcb *inp;
696	struct tcpcb *tp = NULL;
697
698	TCPDEBUG0;
699	INP_INFO_WLOCK(&V_tcbinfo);
700	inp = sotoinpcb(so);
701	KASSERT(inp != NULL, ("inp == NULL"));
702	INP_WLOCK(inp);
703	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
704		error = ECONNRESET;
705		goto out;
706	}
707	tp = intotcpcb(inp);
708	TCPDEBUG1();
709	socantsendmore(so);
710	tcp_usrclosed(tp);
711	if (!(inp->inp_flags & INP_DROPPED))
712		error = tcp_output_disconnect(tp);
713
714out:
715	TCPDEBUG2(PRU_SHUTDOWN);
716	INP_WUNLOCK(inp);
717	INP_INFO_WUNLOCK(&V_tcbinfo);
718
719	return (error);
720}
721
722/*
723 * After a receive, possibly send window update to peer.
724 */
725static int
726tcp_usr_rcvd(struct socket *so, int flags)
727{
728	struct inpcb *inp;
729	struct tcpcb *tp = NULL;
730	int error = 0;
731
732	TCPDEBUG0;
733	inp = sotoinpcb(so);
734	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
735	INP_WLOCK(inp);
736	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
737		error = ECONNRESET;
738		goto out;
739	}
740	tp = intotcpcb(inp);
741	TCPDEBUG1();
742	tcp_output_rcvd(tp);
743
744out:
745	TCPDEBUG2(PRU_RCVD);
746	INP_WUNLOCK(inp);
747	return (error);
748}
749
750/*
751 * Do a send by putting data in output queue and updating urgent
752 * marker if URG set.  Possibly send more data.  Unlike the other
753 * pru_*() routines, the mbuf chains are our responsibility.  We
754 * must either enqueue them or free them.  The other pru_* routines
755 * generally are caller-frees.
756 */
757static int
758tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
759    struct sockaddr *nam, struct mbuf *control, struct thread *td)
760{
761	int error = 0;
762	struct inpcb *inp;
763	struct tcpcb *tp = NULL;
764#ifdef INET6
765	int isipv6;
766#endif
767	TCPDEBUG0;
768
769	/*
770	 * We require the pcbinfo lock if we will close the socket as part of
771	 * this call.
772	 */
773	if (flags & PRUS_EOF)
774		INP_INFO_WLOCK(&V_tcbinfo);
775	inp = sotoinpcb(so);
776	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
777	INP_WLOCK(inp);
778	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
779		if (control)
780			m_freem(control);
781		if (m)
782			m_freem(m);
783		error = ECONNRESET;
784		goto out;
785	}
786#ifdef INET6
787	isipv6 = nam && nam->sa_family == AF_INET6;
788#endif /* INET6 */
789	tp = intotcpcb(inp);
790	TCPDEBUG1();
791	if (control) {
792		/* TCP doesn't do control messages (rights, creds, etc) */
793		if (control->m_len) {
794			m_freem(control);
795			if (m)
796				m_freem(m);
797			error = EINVAL;
798			goto out;
799		}
800		m_freem(control);	/* empty control, just free it */
801	}
802	if (!(flags & PRUS_OOB)) {
803		sbappendstream(&so->so_snd, m);
804		if (nam && tp->t_state < TCPS_SYN_SENT) {
805			/*
806			 * Do implied connect if not yet connected,
807			 * initialize window to default value, and
808			 * initialize maxseg/maxopd using peer's cached
809			 * MSS.
810			 */
811#ifdef INET6
812			if (isipv6)
813				error = tcp6_connect(tp, nam, td);
814#endif /* INET6 */
815#if defined(INET6) && defined(INET)
816			else
817#endif
818#ifdef INET
819				error = tcp_connect(tp, nam, td);
820#endif
821			if (error)
822				goto out;
823			tp->snd_wnd = TTCP_CLIENT_SND_WND;
824			tcp_mss(tp, -1);
825		}
826		if (flags & PRUS_EOF) {
827			/*
828			 * Close the send side of the connection after
829			 * the data is sent.
830			 */
831			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
832			socantsendmore(so);
833			tcp_usrclosed(tp);
834		}
835		if (!(inp->inp_flags & INP_DROPPED)) {
836			if (flags & PRUS_MORETOCOME)
837				tp->t_flags |= TF_MORETOCOME;
838			error = tcp_output_send(tp);
839			if (flags & PRUS_MORETOCOME)
840				tp->t_flags &= ~TF_MORETOCOME;
841		}
842	} else {
843		/*
844		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
845		 */
846		SOCKBUF_LOCK(&so->so_snd);
847		if (sbspace(&so->so_snd) < -512) {
848			SOCKBUF_UNLOCK(&so->so_snd);
849			m_freem(m);
850			error = ENOBUFS;
851			goto out;
852		}
853		/*
854		 * According to RFC961 (Assigned Protocols),
855		 * the urgent pointer points to the last octet
856		 * of urgent data.  We continue, however,
857		 * to consider it to indicate the first octet
858		 * of data past the urgent section.
859		 * Otherwise, snd_up should be one lower.
860		 */
861		sbappendstream_locked(&so->so_snd, m);
862		SOCKBUF_UNLOCK(&so->so_snd);
863		if (nam && tp->t_state < TCPS_SYN_SENT) {
864			/*
865			 * Do implied connect if not yet connected,
866			 * initialize window to default value, and
867			 * initialize maxseg/maxopd using peer's cached
868			 * MSS.
869			 */
870#ifdef INET6
871			if (isipv6)
872				error = tcp6_connect(tp, nam, td);
873#endif /* INET6 */
874#if defined(INET6) && defined(INET)
875			else
876#endif
877#ifdef INET
878				error = tcp_connect(tp, nam, td);
879#endif
880			if (error)
881				goto out;
882			tp->snd_wnd = TTCP_CLIENT_SND_WND;
883			tcp_mss(tp, -1);
884		}
885		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
886		tp->t_flags |= TF_FORCEDATA;
887		error = tcp_output_send(tp);
888		tp->t_flags &= ~TF_FORCEDATA;
889	}
890out:
891	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
892		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
893	INP_WUNLOCK(inp);
894	if (flags & PRUS_EOF)
895		INP_INFO_WUNLOCK(&V_tcbinfo);
896	return (error);
897}
898
899/*
900 * Abort the TCP.  Drop the connection abruptly.
901 */
902static void
903tcp_usr_abort(struct socket *so)
904{
905	struct inpcb *inp;
906	struct tcpcb *tp = NULL;
907	TCPDEBUG0;
908
909	inp = sotoinpcb(so);
910	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
911
912	INP_INFO_WLOCK(&V_tcbinfo);
913	INP_WLOCK(inp);
914	KASSERT(inp->inp_socket != NULL,
915	    ("tcp_usr_abort: inp_socket == NULL"));
916
917	/*
918	 * If we still have full TCP state, and we're not dropped, drop.
919	 */
920	if (!(inp->inp_flags & INP_TIMEWAIT) &&
921	    !(inp->inp_flags & INP_DROPPED)) {
922		tp = intotcpcb(inp);
923		TCPDEBUG1();
924		tcp_drop(tp, ECONNABORTED);
925		TCPDEBUG2(PRU_ABORT);
926	}
927	if (!(inp->inp_flags & INP_DROPPED)) {
928		SOCK_LOCK(so);
929		so->so_state |= SS_PROTOREF;
930		SOCK_UNLOCK(so);
931		inp->inp_flags |= INP_SOCKREF;
932	}
933	INP_WUNLOCK(inp);
934	INP_INFO_WUNLOCK(&V_tcbinfo);
935}
936
937/*
938 * TCP socket is closed.  Start friendly disconnect.
939 */
940static void
941tcp_usr_close(struct socket *so)
942{
943	struct inpcb *inp;
944	struct tcpcb *tp = NULL;
945	TCPDEBUG0;
946
947	inp = sotoinpcb(so);
948	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
949
950	INP_INFO_WLOCK(&V_tcbinfo);
951	INP_WLOCK(inp);
952	KASSERT(inp->inp_socket != NULL,
953	    ("tcp_usr_close: inp_socket == NULL"));
954
955	/*
956	 * If we still have full TCP state, and we're not dropped, initiate
957	 * a disconnect.
958	 */
959	if (!(inp->inp_flags & INP_TIMEWAIT) &&
960	    !(inp->inp_flags & INP_DROPPED)) {
961		tp = intotcpcb(inp);
962		TCPDEBUG1();
963		tcp_disconnect(tp);
964		TCPDEBUG2(PRU_CLOSE);
965	}
966	if (!(inp->inp_flags & INP_DROPPED)) {
967		SOCK_LOCK(so);
968		so->so_state |= SS_PROTOREF;
969		SOCK_UNLOCK(so);
970		inp->inp_flags |= INP_SOCKREF;
971	}
972	INP_WUNLOCK(inp);
973	INP_INFO_WUNLOCK(&V_tcbinfo);
974}
975
976/*
977 * Receive out-of-band data.
978 */
979static int
980tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
981{
982	int error = 0;
983	struct inpcb *inp;
984	struct tcpcb *tp = NULL;
985
986	TCPDEBUG0;
987	inp = sotoinpcb(so);
988	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
989	INP_WLOCK(inp);
990	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
991		error = ECONNRESET;
992		goto out;
993	}
994	tp = intotcpcb(inp);
995	TCPDEBUG1();
996	if ((so->so_oobmark == 0 &&
997	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
998	    so->so_options & SO_OOBINLINE ||
999	    tp->t_oobflags & TCPOOB_HADDATA) {
1000		error = EINVAL;
1001		goto out;
1002	}
1003	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1004		error = EWOULDBLOCK;
1005		goto out;
1006	}
1007	m->m_len = 1;
1008	*mtod(m, caddr_t) = tp->t_iobc;
1009	if ((flags & MSG_PEEK) == 0)
1010		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1011
1012out:
1013	TCPDEBUG2(PRU_RCVOOB);
1014	INP_WUNLOCK(inp);
1015	return (error);
1016}
1017
1018#ifdef INET
1019struct pr_usrreqs tcp_usrreqs = {
1020	.pru_abort =		tcp_usr_abort,
1021	.pru_accept =		tcp_usr_accept,
1022	.pru_attach =		tcp_usr_attach,
1023	.pru_bind =		tcp_usr_bind,
1024	.pru_connect =		tcp_usr_connect,
1025	.pru_control =		in_control,
1026	.pru_detach =		tcp_usr_detach,
1027	.pru_disconnect =	tcp_usr_disconnect,
1028	.pru_listen =		tcp_usr_listen,
1029	.pru_peeraddr =		in_getpeeraddr,
1030	.pru_rcvd =		tcp_usr_rcvd,
1031	.pru_rcvoob =		tcp_usr_rcvoob,
1032	.pru_send =		tcp_usr_send,
1033	.pru_shutdown =		tcp_usr_shutdown,
1034	.pru_sockaddr =		in_getsockaddr,
1035	.pru_sosetlabel =	in_pcbsosetlabel,
1036	.pru_close =		tcp_usr_close,
1037};
1038#endif /* INET */
1039
1040#ifdef INET6
1041struct pr_usrreqs tcp6_usrreqs = {
1042	.pru_abort =		tcp_usr_abort,
1043	.pru_accept =		tcp6_usr_accept,
1044	.pru_attach =		tcp_usr_attach,
1045	.pru_bind =		tcp6_usr_bind,
1046	.pru_connect =		tcp6_usr_connect,
1047	.pru_control =		in6_control,
1048	.pru_detach =		tcp_usr_detach,
1049	.pru_disconnect =	tcp_usr_disconnect,
1050	.pru_listen =		tcp6_usr_listen,
1051	.pru_peeraddr =		in6_mapped_peeraddr,
1052	.pru_rcvd =		tcp_usr_rcvd,
1053	.pru_rcvoob =		tcp_usr_rcvoob,
1054	.pru_send =		tcp_usr_send,
1055	.pru_shutdown =		tcp_usr_shutdown,
1056	.pru_sockaddr =		in6_mapped_sockaddr,
1057	.pru_sosetlabel =	in_pcbsosetlabel,
1058	.pru_close =		tcp_usr_close,
1059};
1060#endif /* INET6 */
1061
1062#ifdef INET
1063/*
1064 * Common subroutine to open a TCP connection to remote host specified
1065 * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
1066 * port number if needed.  Call in_pcbconnect_setup to do the routing and
1067 * to choose a local host address (interface).  If there is an existing
1068 * incarnation of the same connection in TIME-WAIT state and if the remote
1069 * host was sending CC options and if the connection duration was < MSL, then
1070 * truncate the previous TIME-WAIT state and proceed.
1071 * Initialize connection parameters and enter SYN-SENT state.
1072 */
1073static int
1074tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1075{
1076	struct inpcb *inp = tp->t_inpcb, *oinp;
1077	struct socket *so = inp->inp_socket;
1078	struct in_addr laddr;
1079	u_short lport;
1080	int error;
1081
1082	INP_WLOCK_ASSERT(inp);
1083	INP_HASH_WLOCK(&V_tcbinfo);
1084
1085	if (inp->inp_lport == 0) {
1086		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1087		if (error)
1088			goto out;
1089	}
1090
1091	/*
1092	 * Cannot simply call in_pcbconnect, because there might be an
1093	 * earlier incarnation of this same connection still in
1094	 * TIME_WAIT state, creating an ADDRINUSE error.
1095	 */
1096	laddr = inp->inp_laddr;
1097	lport = inp->inp_lport;
1098	error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
1099	    &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
1100	if (error && oinp == NULL)
1101		goto out;
1102	if (oinp) {
1103		error = EADDRINUSE;
1104		goto out;
1105	}
1106	inp->inp_laddr = laddr;
1107	in_pcbrehash(inp);
1108	INP_HASH_WUNLOCK(&V_tcbinfo);
1109
1110	/*
1111	 * Compute window scaling to request:
1112	 * Scale to fit into sweet spot.  See tcp_syncache.c.
1113	 * XXX: This should move to tcp_output().
1114	 */
1115	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1116	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1117		tp->request_r_scale++;
1118
1119	soisconnecting(so);
1120	TCPSTAT_INC(tcps_connattempt);
1121	tp->t_state = TCPS_SYN_SENT;
1122	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
1123	tp->iss = tcp_new_isn(tp);
1124	tcp_sendseqinit(tp);
1125
1126	return 0;
1127
1128out:
1129	INP_HASH_WUNLOCK(&V_tcbinfo);
1130	return (error);
1131}
1132#endif /* INET */
1133
1134#ifdef INET6
1135static int
1136tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1137{
1138	struct inpcb *inp = tp->t_inpcb, *oinp;
1139	struct socket *so = inp->inp_socket;
1140	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
1141	struct in6_addr addr6;
1142	int error;
1143
1144	INP_WLOCK_ASSERT(inp);
1145	INP_HASH_WLOCK(&V_tcbinfo);
1146
1147	if (inp->inp_lport == 0) {
1148		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1149		if (error)
1150			goto out;
1151	}
1152
1153	/*
1154	 * Cannot simply call in_pcbconnect, because there might be an
1155	 * earlier incarnation of this same connection still in
1156	 * TIME_WAIT state, creating an ADDRINUSE error.
1157	 * in6_pcbladdr() also handles scope zone IDs.
1158	 *
1159	 * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked()
1160	 * outside of in6_pcb.c if there were an in6_pcbconnect_setup().
1161	 */
1162	error = in6_pcbladdr(inp, nam, &addr6);
1163	if (error)
1164		goto out;
1165	oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo,
1166				  &sin6->sin6_addr, sin6->sin6_port,
1167				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1168				  ? &addr6
1169				  : &inp->in6p_laddr,
1170				  inp->inp_lport,  0, NULL);
1171	if (oinp) {
1172		error = EADDRINUSE;
1173		goto out;
1174	}
1175	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
1176		inp->in6p_laddr = addr6;
1177	inp->in6p_faddr = sin6->sin6_addr;
1178	inp->inp_fport = sin6->sin6_port;
1179	/* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
1180	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
1181	if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
1182		inp->inp_flow |=
1183		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
1184	in_pcbrehash(inp);
1185	INP_HASH_WUNLOCK(&V_tcbinfo);
1186
1187	/* Compute window scaling to request.  */
1188	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1189	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1190		tp->request_r_scale++;
1191
1192	soisconnecting(so);
1193	TCPSTAT_INC(tcps_connattempt);
1194	tp->t_state = TCPS_SYN_SENT;
1195	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
1196	tp->iss = tcp_new_isn(tp);
1197	tcp_sendseqinit(tp);
1198
1199	return 0;
1200
1201out:
1202	INP_HASH_WUNLOCK(&V_tcbinfo);
1203	return error;
1204}
1205#endif /* INET6 */
1206
1207/*
1208 * Export TCP internal state information via a struct tcp_info, based on the
1209 * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
1210 * (TCP state machine, etc).  We export all information using FreeBSD-native
1211 * constants -- for example, the numeric values for tcpi_state will differ
1212 * from Linux.
1213 */
1214static void
1215tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1216{
1217
1218	INP_WLOCK_ASSERT(tp->t_inpcb);
1219	bzero(ti, sizeof(*ti));
1220
1221	ti->tcpi_state = tp->t_state;
1222	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1223		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1224	if (tp->t_flags & TF_SACK_PERMIT)
1225		ti->tcpi_options |= TCPI_OPT_SACK;
1226	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1227		ti->tcpi_options |= TCPI_OPT_WSCALE;
1228		ti->tcpi_snd_wscale = tp->snd_scale;
1229		ti->tcpi_rcv_wscale = tp->rcv_scale;
1230	}
1231
1232	ti->tcpi_rto = tp->t_rxtcur * tick;
1233	ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick;
1234	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1235	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1236
1237	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1238	ti->tcpi_snd_cwnd = tp->snd_cwnd;
1239
1240	/*
1241	 * FreeBSD-specific extension fields for tcp_info.
1242	 */
1243	ti->tcpi_rcv_space = tp->rcv_wnd;
1244	ti->tcpi_rcv_nxt = tp->rcv_nxt;
1245	ti->tcpi_snd_wnd = tp->snd_wnd;
1246	ti->tcpi_snd_bwnd = 0;		/* Unused, kept for compat. */
1247	ti->tcpi_snd_nxt = tp->snd_nxt;
1248	ti->tcpi_snd_mss = tp->t_maxseg;
1249	ti->tcpi_rcv_mss = tp->t_maxseg;
1250	if (tp->t_flags & TF_TOE)
1251		ti->tcpi_options |= TCPI_OPT_TOE;
1252	ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
1253	ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
1254	ti->tcpi_snd_zerowin = tp->t_sndzerowin;
1255}
1256
1257/*
1258 * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1259 * socket option arguments.  When it re-acquires the lock after the copy, it
1260 * has to revalidate that the connection is still valid for the socket
1261 * option.
1262 */
1263#define INP_WLOCK_RECHECK(inp) do {					\
1264	INP_WLOCK(inp);							\
1265	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {		\
1266		INP_WUNLOCK(inp);					\
1267		return (ECONNRESET);					\
1268	}								\
1269	tp = intotcpcb(inp);						\
1270} while(0)
1271
1272int
1273tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1274{
1275	int	error, opt, optval;
1276	u_int	ui;
1277	struct	inpcb *inp;
1278	struct	tcpcb *tp;
1279	struct	tcp_info ti;
1280	char buf[TCP_CA_NAME_MAX];
1281	struct cc_algo *algo;
1282
1283	error = 0;
1284	inp = sotoinpcb(so);
1285	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1286	INP_WLOCK(inp);
1287	if (sopt->sopt_level != IPPROTO_TCP) {
1288#ifdef INET6
1289		if (inp->inp_vflag & INP_IPV6PROTO) {
1290			INP_WUNLOCK(inp);
1291			error = ip6_ctloutput(so, sopt);
1292		}
1293#endif /* INET6 */
1294#if defined(INET6) && defined(INET)
1295		else
1296#endif
1297#ifdef INET
1298		{
1299			INP_WUNLOCK(inp);
1300			error = ip_ctloutput(so, sopt);
1301		}
1302#endif
1303		return (error);
1304	}
1305	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1306		INP_WUNLOCK(inp);
1307		return (ECONNRESET);
1308	}
1309
1310	switch (sopt->sopt_dir) {
1311	case SOPT_SET:
1312		switch (sopt->sopt_name) {
1313#ifdef TCP_SIGNATURE
1314		case TCP_MD5SIG:
1315			INP_WUNLOCK(inp);
1316			error = sooptcopyin(sopt, &optval, sizeof optval,
1317			    sizeof optval);
1318			if (error)
1319				return (error);
1320
1321			INP_WLOCK_RECHECK(inp);
1322			if (optval > 0)
1323				tp->t_flags |= TF_SIGNATURE;
1324			else
1325				tp->t_flags &= ~TF_SIGNATURE;
1326			INP_WUNLOCK(inp);
1327			break;
1328#endif /* TCP_SIGNATURE */
1329		case TCP_NODELAY:
1330		case TCP_NOOPT:
1331			INP_WUNLOCK(inp);
1332			error = sooptcopyin(sopt, &optval, sizeof optval,
1333			    sizeof optval);
1334			if (error)
1335				return (error);
1336
1337			INP_WLOCK_RECHECK(inp);
1338			switch (sopt->sopt_name) {
1339			case TCP_NODELAY:
1340				opt = TF_NODELAY;
1341				break;
1342			case TCP_NOOPT:
1343				opt = TF_NOOPT;
1344				break;
1345			default:
1346				opt = 0; /* dead code to fool gcc */
1347				break;
1348			}
1349
1350			if (optval)
1351				tp->t_flags |= opt;
1352			else
1353				tp->t_flags &= ~opt;
1354			INP_WUNLOCK(inp);
1355			break;
1356
1357		case TCP_NOPUSH:
1358			INP_WUNLOCK(inp);
1359			error = sooptcopyin(sopt, &optval, sizeof optval,
1360			    sizeof optval);
1361			if (error)
1362				return (error);
1363
1364			INP_WLOCK_RECHECK(inp);
1365			if (optval)
1366				tp->t_flags |= TF_NOPUSH;
1367			else if (tp->t_flags & TF_NOPUSH) {
1368				tp->t_flags &= ~TF_NOPUSH;
1369				if (TCPS_HAVEESTABLISHED(tp->t_state))
1370					error = tcp_output(tp);
1371			}
1372			INP_WUNLOCK(inp);
1373			break;
1374
1375		case TCP_MAXSEG:
1376			INP_WUNLOCK(inp);
1377			error = sooptcopyin(sopt, &optval, sizeof optval,
1378			    sizeof optval);
1379			if (error)
1380				return (error);
1381
1382			INP_WLOCK_RECHECK(inp);
1383			if (optval > 0 && optval <= tp->t_maxseg &&
1384			    optval + 40 >= V_tcp_minmss)
1385				tp->t_maxseg = optval;
1386			else
1387				error = EINVAL;
1388			INP_WUNLOCK(inp);
1389			break;
1390
1391		case TCP_INFO:
1392			INP_WUNLOCK(inp);
1393			error = EINVAL;
1394			break;
1395
1396		case TCP_CONGESTION:
1397			INP_WUNLOCK(inp);
1398			bzero(buf, sizeof(buf));
1399			error = sooptcopyin(sopt, &buf, sizeof(buf), 1);
1400			if (error)
1401				break;
1402			INP_WLOCK_RECHECK(inp);
1403			/*
1404			 * Return EINVAL if we can't find the requested cc algo.
1405			 */
1406			error = EINVAL;
1407			CC_LIST_RLOCK();
1408			STAILQ_FOREACH(algo, &cc_list, entries) {
1409				if (strncmp(buf, algo->name, TCP_CA_NAME_MAX)
1410				    == 0) {
1411					/* We've found the requested algo. */
1412					error = 0;
1413					/*
1414					 * We hold a write lock over the tcb
1415					 * so it's safe to do these things
1416					 * without ordering concerns.
1417					 */
1418					if (CC_ALGO(tp)->cb_destroy != NULL)
1419						CC_ALGO(tp)->cb_destroy(tp->ccv);
1420					CC_ALGO(tp) = algo;
1421					/*
1422					 * If something goes pear shaped
1423					 * initialising the new algo,
1424					 * fall back to newreno (which
1425					 * does not require initialisation).
1426					 */
1427					if (algo->cb_init != NULL)
1428						if (algo->cb_init(tp->ccv) > 0) {
1429							CC_ALGO(tp) = &newreno_cc_algo;
1430							/*
1431							 * The only reason init
1432							 * should fail is
1433							 * because of malloc.
1434							 */
1435							error = ENOMEM;
1436						}
1437					break; /* Break the STAILQ_FOREACH. */
1438				}
1439			}
1440			CC_LIST_RUNLOCK();
1441			INP_WUNLOCK(inp);
1442			break;
1443
1444		case TCP_KEEPIDLE:
1445		case TCP_KEEPINTVL:
1446		case TCP_KEEPINIT:
1447			INP_WUNLOCK(inp);
1448			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
1449			if (error)
1450				return (error);
1451
1452			if (ui > (UINT_MAX / hz)) {
1453				error = EINVAL;
1454				break;
1455			}
1456			ui *= hz;
1457
1458			INP_WLOCK_RECHECK(inp);
1459			switch (sopt->sopt_name) {
1460			case TCP_KEEPIDLE:
1461				tp->t_keepidle = ui;
1462				/*
1463				 * XXX: better check current remaining
1464				 * timeout and "merge" it with new value.
1465				 */
1466				if ((tp->t_state > TCPS_LISTEN) &&
1467				    (tp->t_state <= TCPS_CLOSING))
1468					tcp_timer_activate(tp, TT_KEEP,
1469					    TP_KEEPIDLE(tp));
1470				break;
1471			case TCP_KEEPINTVL:
1472				tp->t_keepintvl = ui;
1473				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
1474				    (TP_MAXIDLE(tp) > 0))
1475					tcp_timer_activate(tp, TT_2MSL,
1476					    TP_MAXIDLE(tp));
1477				break;
1478			case TCP_KEEPINIT:
1479				tp->t_keepinit = ui;
1480				if (tp->t_state == TCPS_SYN_RECEIVED ||
1481				    tp->t_state == TCPS_SYN_SENT)
1482					tcp_timer_activate(tp, TT_KEEP,
1483					    TP_KEEPINIT(tp));
1484				break;
1485			}
1486			INP_WUNLOCK(inp);
1487			break;
1488
1489		case TCP_KEEPCNT:
1490			INP_WUNLOCK(inp);
1491			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
1492			if (error)
1493				return (error);
1494
1495			INP_WLOCK_RECHECK(inp);
1496			tp->t_keepcnt = ui;
1497			if ((tp->t_state == TCPS_FIN_WAIT_2) &&
1498			    (TP_MAXIDLE(tp) > 0))
1499				tcp_timer_activate(tp, TT_2MSL,
1500				    TP_MAXIDLE(tp));
1501			INP_WUNLOCK(inp);
1502			break;
1503
1504		default:
1505			INP_WUNLOCK(inp);
1506			error = ENOPROTOOPT;
1507			break;
1508		}
1509		break;
1510
1511	case SOPT_GET:
1512		tp = intotcpcb(inp);
1513		switch (sopt->sopt_name) {
1514#ifdef TCP_SIGNATURE
1515		case TCP_MD5SIG:
1516			optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
1517			INP_WUNLOCK(inp);
1518			error = sooptcopyout(sopt, &optval, sizeof optval);
1519			break;
1520#endif
1521
1522		case TCP_NODELAY:
1523			optval = tp->t_flags & TF_NODELAY;
1524			INP_WUNLOCK(inp);
1525			error = sooptcopyout(sopt, &optval, sizeof optval);
1526			break;
1527		case TCP_MAXSEG:
1528			optval = tp->t_maxseg;
1529			INP_WUNLOCK(inp);
1530			error = sooptcopyout(sopt, &optval, sizeof optval);
1531			break;
1532		case TCP_NOOPT:
1533			optval = tp->t_flags & TF_NOOPT;
1534			INP_WUNLOCK(inp);
1535			error = sooptcopyout(sopt, &optval, sizeof optval);
1536			break;
1537		case TCP_NOPUSH:
1538			optval = tp->t_flags & TF_NOPUSH;
1539			INP_WUNLOCK(inp);
1540			error = sooptcopyout(sopt, &optval, sizeof optval);
1541			break;
1542		case TCP_INFO:
1543			tcp_fill_info(tp, &ti);
1544			INP_WUNLOCK(inp);
1545			error = sooptcopyout(sopt, &ti, sizeof ti);
1546			break;
1547		case TCP_CONGESTION:
1548			bzero(buf, sizeof(buf));
1549			strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
1550			INP_WUNLOCK(inp);
1551			error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
1552			break;
1553		default:
1554			INP_WUNLOCK(inp);
1555			error = ENOPROTOOPT;
1556			break;
1557		}
1558		break;
1559	}
1560	return (error);
1561}
1562#undef INP_WLOCK_RECHECK
1563
1564/*
1565 * tcp_sendspace and tcp_recvspace are the default send and receive window
1566 * sizes, respectively.  These are obsolescent (this information should
1567 * be set by the route).
1568 */
1569u_long	tcp_sendspace = 1024*32;
1570SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
1571    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
1572u_long	tcp_recvspace = 1024*64;
1573SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1574    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
1575
1576/*
1577 * Attach TCP protocol to socket, allocating
1578 * internet protocol control block, tcp control block,
1579 * bufer space, and entering LISTEN state if to accept connections.
1580 */
1581static int
1582tcp_attach(struct socket *so)
1583{
1584	struct tcpcb *tp;
1585	struct inpcb *inp;
1586	int error;
1587
1588	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1589		error = soreserve(so, tcp_sendspace, tcp_recvspace);
1590		if (error)
1591			return (error);
1592	}
1593	so->so_rcv.sb_flags |= SB_AUTOSIZE;
1594	so->so_snd.sb_flags |= SB_AUTOSIZE;
1595	INP_INFO_WLOCK(&V_tcbinfo);
1596	error = in_pcballoc(so, &V_tcbinfo);
1597	if (error) {
1598		INP_INFO_WUNLOCK(&V_tcbinfo);
1599		return (error);
1600	}
1601	inp = sotoinpcb(so);
1602#ifdef INET6
1603	if (inp->inp_vflag & INP_IPV6PROTO) {
1604		inp->inp_vflag |= INP_IPV6;
1605		inp->in6p_hops = -1;	/* use kernel default */
1606	}
1607	else
1608#endif
1609	inp->inp_vflag |= INP_IPV4;
1610	tp = tcp_newtcpcb(inp);
1611	if (tp == NULL) {
1612		in_pcbdetach(inp);
1613		in_pcbfree(inp);
1614		INP_INFO_WUNLOCK(&V_tcbinfo);
1615		return (ENOBUFS);
1616	}
1617	tp->t_state = TCPS_CLOSED;
1618	INP_WUNLOCK(inp);
1619	INP_INFO_WUNLOCK(&V_tcbinfo);
1620	return (0);
1621}
1622
1623/*
1624 * Initiate (or continue) disconnect.
1625 * If embryonic state, just send reset (once).
1626 * If in ``let data drain'' option and linger null, just drop.
1627 * Otherwise (hard), mark socket disconnecting and drop
1628 * current input data; switch states based on user close, and
1629 * send segment to peer (with FIN).
1630 */
1631static void
1632tcp_disconnect(struct tcpcb *tp)
1633{
1634	struct inpcb *inp = tp->t_inpcb;
1635	struct socket *so = inp->inp_socket;
1636
1637	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1638	INP_WLOCK_ASSERT(inp);
1639
1640	/*
1641	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
1642	 * socket is still open.
1643	 */
1644	if (tp->t_state < TCPS_ESTABLISHED) {
1645		tp = tcp_close(tp);
1646		KASSERT(tp != NULL,
1647		    ("tcp_disconnect: tcp_close() returned NULL"));
1648	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
1649		tp = tcp_drop(tp, 0);
1650		KASSERT(tp != NULL,
1651		    ("tcp_disconnect: tcp_drop() returned NULL"));
1652	} else {
1653		soisdisconnecting(so);
1654		sbflush(&so->so_rcv);
1655		tcp_usrclosed(tp);
1656		if (!(inp->inp_flags & INP_DROPPED))
1657			tcp_output_disconnect(tp);
1658	}
1659}
1660
1661/*
1662 * User issued close, and wish to trail through shutdown states:
1663 * if never received SYN, just forget it.  If got a SYN from peer,
1664 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1665 * If already got a FIN from peer, then almost done; go to LAST_ACK
1666 * state.  In all other cases, have already sent FIN to peer (e.g.
1667 * after PRU_SHUTDOWN), and just have to play tedious game waiting
1668 * for peer to send FIN or not respond to keep-alives, etc.
1669 * We can let the user exit from the close as soon as the FIN is acked.
1670 */
1671static void
1672tcp_usrclosed(struct tcpcb *tp)
1673{
1674
1675	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1676	INP_WLOCK_ASSERT(tp->t_inpcb);
1677
1678	switch (tp->t_state) {
1679	case TCPS_LISTEN:
1680		tcp_offload_listen_close(tp);
1681		/* FALLTHROUGH */
1682	case TCPS_CLOSED:
1683		tp->t_state = TCPS_CLOSED;
1684		tp = tcp_close(tp);
1685		/*
1686		 * tcp_close() should never return NULL here as the socket is
1687		 * still open.
1688		 */
1689		KASSERT(tp != NULL,
1690		    ("tcp_usrclosed: tcp_close() returned NULL"));
1691		break;
1692
1693	case TCPS_SYN_SENT:
1694	case TCPS_SYN_RECEIVED:
1695		tp->t_flags |= TF_NEEDFIN;
1696		break;
1697
1698	case TCPS_ESTABLISHED:
1699		tp->t_state = TCPS_FIN_WAIT_1;
1700		break;
1701
1702	case TCPS_CLOSE_WAIT:
1703		tp->t_state = TCPS_LAST_ACK;
1704		break;
1705	}
1706	if (tp->t_state >= TCPS_FIN_WAIT_2) {
1707		soisdisconnected(tp->t_inpcb->inp_socket);
1708		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
1709		if (tp->t_state == TCPS_FIN_WAIT_2) {
1710			int timeout;
1711
1712			timeout = (tcp_fast_finwait2_recycle) ?
1713			    tcp_finwait2_timeout : TP_MAXIDLE(tp);
1714			tcp_timer_activate(tp, TT_2MSL, timeout);
1715		}
1716	}
1717}
1718
1719#ifdef DDB
1720static void
1721db_print_indent(int indent)
1722{
1723	int i;
1724
1725	for (i = 0; i < indent; i++)
1726		db_printf(" ");
1727}
1728
1729static void
1730db_print_tstate(int t_state)
1731{
1732
1733	switch (t_state) {
1734	case TCPS_CLOSED:
1735		db_printf("TCPS_CLOSED");
1736		return;
1737
1738	case TCPS_LISTEN:
1739		db_printf("TCPS_LISTEN");
1740		return;
1741
1742	case TCPS_SYN_SENT:
1743		db_printf("TCPS_SYN_SENT");
1744		return;
1745
1746	case TCPS_SYN_RECEIVED:
1747		db_printf("TCPS_SYN_RECEIVED");
1748		return;
1749
1750	case TCPS_ESTABLISHED:
1751		db_printf("TCPS_ESTABLISHED");
1752		return;
1753
1754	case TCPS_CLOSE_WAIT:
1755		db_printf("TCPS_CLOSE_WAIT");
1756		return;
1757
1758	case TCPS_FIN_WAIT_1:
1759		db_printf("TCPS_FIN_WAIT_1");
1760		return;
1761
1762	case TCPS_CLOSING:
1763		db_printf("TCPS_CLOSING");
1764		return;
1765
1766	case TCPS_LAST_ACK:
1767		db_printf("TCPS_LAST_ACK");
1768		return;
1769
1770	case TCPS_FIN_WAIT_2:
1771		db_printf("TCPS_FIN_WAIT_2");
1772		return;
1773
1774	case TCPS_TIME_WAIT:
1775		db_printf("TCPS_TIME_WAIT");
1776		return;
1777
1778	default:
1779		db_printf("unknown");
1780		return;
1781	}
1782}
1783
1784static void
1785db_print_tflags(u_int t_flags)
1786{
1787	int comma;
1788
1789	comma = 0;
1790	if (t_flags & TF_ACKNOW) {
1791		db_printf("%sTF_ACKNOW", comma ? ", " : "");
1792		comma = 1;
1793	}
1794	if (t_flags & TF_DELACK) {
1795		db_printf("%sTF_DELACK", comma ? ", " : "");
1796		comma = 1;
1797	}
1798	if (t_flags & TF_NODELAY) {
1799		db_printf("%sTF_NODELAY", comma ? ", " : "");
1800		comma = 1;
1801	}
1802	if (t_flags & TF_NOOPT) {
1803		db_printf("%sTF_NOOPT", comma ? ", " : "");
1804		comma = 1;
1805	}
1806	if (t_flags & TF_SENTFIN) {
1807		db_printf("%sTF_SENTFIN", comma ? ", " : "");
1808		comma = 1;
1809	}
1810	if (t_flags & TF_REQ_SCALE) {
1811		db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
1812		comma = 1;
1813	}
1814	if (t_flags & TF_RCVD_SCALE) {
1815		db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
1816		comma = 1;
1817	}
1818	if (t_flags & TF_REQ_TSTMP) {
1819		db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
1820		comma = 1;
1821	}
1822	if (t_flags & TF_RCVD_TSTMP) {
1823		db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
1824		comma = 1;
1825	}
1826	if (t_flags & TF_SACK_PERMIT) {
1827		db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
1828		comma = 1;
1829	}
1830	if (t_flags & TF_NEEDSYN) {
1831		db_printf("%sTF_NEEDSYN", comma ? ", " : "");
1832		comma = 1;
1833	}
1834	if (t_flags & TF_NEEDFIN) {
1835		db_printf("%sTF_NEEDFIN", comma ? ", " : "");
1836		comma = 1;
1837	}
1838	if (t_flags & TF_NOPUSH) {
1839		db_printf("%sTF_NOPUSH", comma ? ", " : "");
1840		comma = 1;
1841	}
1842	if (t_flags & TF_MORETOCOME) {
1843		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
1844		comma = 1;
1845	}
1846	if (t_flags & TF_LQ_OVERFLOW) {
1847		db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
1848		comma = 1;
1849	}
1850	if (t_flags & TF_LASTIDLE) {
1851		db_printf("%sTF_LASTIDLE", comma ? ", " : "");
1852		comma = 1;
1853	}
1854	if (t_flags & TF_RXWIN0SENT) {
1855		db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
1856		comma = 1;
1857	}
1858	if (t_flags & TF_FASTRECOVERY) {
1859		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
1860		comma = 1;
1861	}
1862	if (t_flags & TF_CONGRECOVERY) {
1863		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
1864		comma = 1;
1865	}
1866	if (t_flags & TF_WASFRECOVERY) {
1867		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
1868		comma = 1;
1869	}
1870	if (t_flags & TF_SIGNATURE) {
1871		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
1872		comma = 1;
1873	}
1874	if (t_flags & TF_FORCEDATA) {
1875		db_printf("%sTF_FORCEDATA", comma ? ", " : "");
1876		comma = 1;
1877	}
1878	if (t_flags & TF_TSO) {
1879		db_printf("%sTF_TSO", comma ? ", " : "");
1880		comma = 1;
1881	}
1882	if (t_flags & TF_ECN_PERMIT) {
1883		db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
1884		comma = 1;
1885	}
1886}
1887
1888static void
1889db_print_toobflags(char t_oobflags)
1890{
1891	int comma;
1892
1893	comma = 0;
1894	if (t_oobflags & TCPOOB_HAVEDATA) {
1895		db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
1896		comma = 1;
1897	}
1898	if (t_oobflags & TCPOOB_HADDATA) {
1899		db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
1900		comma = 1;
1901	}
1902}
1903
1904static void
1905db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
1906{
1907
1908	db_print_indent(indent);
1909	db_printf("%s at %p\n", name, tp);
1910
1911	indent += 2;
1912
1913	db_print_indent(indent);
1914	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
1915	   LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
1916
1917	db_print_indent(indent);
1918	db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
1919	    &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
1920
1921	db_print_indent(indent);
1922	db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
1923	    &tp->t_timers->tt_delack, tp->t_inpcb);
1924
1925	db_print_indent(indent);
1926	db_printf("t_state: %d (", tp->t_state);
1927	db_print_tstate(tp->t_state);
1928	db_printf(")\n");
1929
1930	db_print_indent(indent);
1931	db_printf("t_flags: 0x%x (", tp->t_flags);
1932	db_print_tflags(tp->t_flags);
1933	db_printf(")\n");
1934
1935	db_print_indent(indent);
1936	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
1937	    tp->snd_una, tp->snd_max, tp->snd_nxt);
1938
1939	db_print_indent(indent);
1940	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
1941	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
1942
1943	db_print_indent(indent);
1944	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
1945	    tp->iss, tp->irs, tp->rcv_nxt);
1946
1947	db_print_indent(indent);
1948	db_printf("rcv_adv: 0x%08x   rcv_wnd: %lu   rcv_up: 0x%08x\n",
1949	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
1950
1951	db_print_indent(indent);
1952	db_printf("snd_wnd: %lu   snd_cwnd: %lu\n",
1953	   tp->snd_wnd, tp->snd_cwnd);
1954
1955	db_print_indent(indent);
1956	db_printf("snd_ssthresh: %lu   snd_recover: "
1957	    "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
1958
1959	db_print_indent(indent);
1960	db_printf("t_maxopd: %u   t_rcvtime: %u   t_startime: %u\n",
1961	    tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
1962
1963	db_print_indent(indent);
1964	db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
1965	    tp->t_rtttime, tp->t_rtseq);
1966
1967	db_print_indent(indent);
1968	db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
1969	    tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
1970
1971	db_print_indent(indent);
1972	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
1973	    "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
1974	    tp->t_rttbest);
1975
1976	db_print_indent(indent);
1977	db_printf("t_rttupdated: %lu   max_sndwnd: %lu   t_softerror: %d\n",
1978	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
1979
1980	db_print_indent(indent);
1981	db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
1982	db_print_toobflags(tp->t_oobflags);
1983	db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
1984
1985	db_print_indent(indent);
1986	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
1987	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
1988
1989	db_print_indent(indent);
1990	db_printf("ts_recent: %u   ts_recent_age: %u\n",
1991	    tp->ts_recent, tp->ts_recent_age);
1992
1993	db_print_indent(indent);
1994	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
1995	    "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
1996
1997	db_print_indent(indent);
1998	db_printf("snd_ssthresh_prev: %lu   snd_recover_prev: 0x%08x   "
1999	    "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
2000	    tp->snd_recover_prev, tp->t_badrxtwin);
2001
2002	db_print_indent(indent);
2003	db_printf("snd_numholes: %d  snd_holes first: %p\n",
2004	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
2005
2006	db_print_indent(indent);
2007	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
2008	    "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
2009
2010	/* Skip sackblks, sackhint. */
2011
2012	db_print_indent(indent);
2013	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
2014	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
2015}
2016
2017DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
2018{
2019	struct tcpcb *tp;
2020
2021	if (!have_addr) {
2022		db_printf("usage: show tcpcb <addr>\n");
2023		return;
2024	}
2025	tp = (struct tcpcb *)addr;
2026
2027	db_print_tcpcb(tp, "tcpcb", 0);
2028}
2029#endif
2030