1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1993, 1994, 1995
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.8 2001/08/22 00:59:13 silby Exp $
62 */
63
64#ifndef _NETINET_TCP_VAR_H_
65#define _NETINET_TCP_VAR_H_
66#include <sys/appleapiopts.h>
67#include <sys/queue.h>
68#include <netinet/in_pcb.h>
69#include <netinet/tcp_timer.h>
70
71#if defined(__LP64__)
72#define _TCPCB_PTR(x)			u_int32_t
73#define _TCPCB_LIST_HEAD(name, type)	\
74struct name {				\
75	u_int32_t	lh_first;	\
76};
77#else
78#define _TCPCB_PTR(x)			x
79#define _TCPCB_LIST_HEAD(name, type)	LIST_HEAD(name, type)
80#endif
81
82#define TCP_RETRANSHZ	1000	/* granularity of TCP timestamps, 1ms */
83#define TCP_TIMERHZ	100		/* frequency of TCP fast timer, 100 ms */
84
85/* Minimum time quantum within which the timers are coalesced */
86#define TCP_FASTTIMER_QUANTUM   TCP_TIMERHZ	/* fast mode, once every 100ms */
87#define TCP_SLOWTIMER_QUANTUM   TCP_RETRANSHZ / PR_SLOWHZ	/* slow mode, once every 500ms */
88
89#define TCP_RETRANSHZ_TO_USEC 1000
90
91#ifdef KERNEL_PRIVATE
92#define N_TIME_WAIT_SLOTS   128     	/* must be power of 2 */
93
94/* Base RTT is stored for N_MIN_RTT_HISTORY slots. This is used to
95 * estimate expected minimum RTT for delay based congestion control
96 * algorithms.
97 */
98#define N_RTT_BASE	5
99
100/* Always allow at least 4 packets worth of recv window when adjusting
101 * recv window using inter-packet arrival jitter.
102 */
103#define MIN_IAJ_WIN 4
104
105/* A variation in delay of this many milliseconds is tolerable. This limit has to
106 * be low but greater than zero. We also use standard deviation on jitter to adjust
107 * this limit for different link and connection types.
108 */
109#define ALLOWED_IAJ 5
110
111/* Ignore the first few packets on a connection until the ACK clock gets going
112 */
113#define IAJ_IGNORE_PKTCNT 40
114
115/* Let the accumulated IAJ value increase by this threshold at most. This limit
116 * will control how many ALLOWED_IAJ measurements a receiver will have to see
117 * before opening the receive window
118 */
119#define ACC_IAJ_HIGH_THRESH 100
120
121/* When accumulated IAJ reaches this value, the receiver starts to react by
122 * closing the window
123 */
124#define ACC_IAJ_REACT_LIMIT 200
125
126/* If the number of small packets (smaller than IAJ packet size) seen on a
127 * connection is more than this threshold, reset the size and learn it again.
128 * This is needed because the sender might send smaller segments after PMTU
129 * discovery and the receiver has to learn the new size.
130 */
131#define RESET_IAJ_SIZE_THRESH 20
132
133/*
134 * Kernel variables for tcp.
135 */
136
137/* TCP segment queue entry */
138struct tseg_qent {
139	LIST_ENTRY(tseg_qent) tqe_q;
140	int	tqe_len;		/* TCP segment data length */
141	struct	tcphdr *tqe_th;		/* a pointer to tcp header */
142	struct	mbuf	*tqe_m;		/* mbuf contains packet */
143};
144LIST_HEAD(tsegqe_head, tseg_qent);
145extern int	tcp_reass_maxseg;
146extern int	tcp_reass_qsize;
147#ifdef MALLOC_DECLARE
148MALLOC_DECLARE(M_TSEGQ);
149#endif
150
151struct sackblk {
152	tcp_seq start;		/* start seq no. of sack block */
153	tcp_seq end;		/* end seq no. */
154};
155
156struct sackhole {
157	tcp_seq start;		/* start seq no. of hole */
158	tcp_seq end;		/* end seq no. */
159	tcp_seq rxmit;		/* next seq. no in hole to be retransmitted */
160	TAILQ_ENTRY(sackhole) scblink;	/* scoreboard linkage */
161};
162
163struct sackhint {
164	struct sackhole	*nexthole;
165	int	sack_bytes_rexmit;
166};
167
168struct tcptemp {
169	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
170	struct	tcphdr tt_t;
171};
172
173struct bwmeas {
174	tcp_seq bw_start;		/* start of bw measurement */
175	uint32_t bw_ts;		/* timestamp when bw measurement started */
176	uint32_t bw_size;		/* burst size in bytes for this bw measurement */
177	uint32_t bw_minsizepkts;	/* Min burst size as segments */
178	uint32_t bw_maxsizepkts;	/* Max burst size as segments */
179	uint32_t bw_minsize;	/* Min size in bytes */
180	uint32_t bw_maxsize;	/* Max size in bytes */
181	uint32_t bw_sndbw;		/* Measured send bw */
182};
183
184#define tcp6cb		tcpcb  /* for KAME src sync over BSD*'s */
185
186/*
187 * Tcp control block, one per tcp; fields:
188 * Organized for 16 byte cacheline efficiency.
189 */
190struct tcpcb {
191	struct	tsegqe_head t_segq;
192	int	t_dupacks;		/* consecutive dup acks recd */
193	uint32_t t_timer[TCPT_NTIMERS];	/* tcp timers */
194	struct tcptimerentry tentry;	/* entry in timer list */
195
196	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
197	int	t_state;		/* state of this connection */
198	u_int	t_flags;
199#define	TF_ACKNOW	0x00001		/* ack peer immediately */
200#define	TF_DELACK	0x00002		/* ack, but try to delay it */
201#define	TF_NODELAY	0x00004		/* don't delay packets to coalesce */
202#define	TF_NOOPT	0x00008		/* don't use tcp options */
203#define	TF_SENTFIN	0x00010		/* have sent FIN */
204#define	TF_REQ_SCALE	0x00020		/* have/will request window scaling */
205#define	TF_RCVD_SCALE	0x00040		/* other side has requested scaling */
206#define	TF_REQ_TSTMP	0x00080		/* have/will request timestamps */
207#define	TF_RCVD_TSTMP	0x00100		/* a timestamp was received in SYN */
208#define	TF_SACK_PERMIT	0x00200		/* other side said I could SACK */
209#define	TF_NEEDSYN	0x00400		/* send SYN (implicit state) */
210#define	TF_NEEDFIN	0x00800		/* send FIN (implicit state) */
211#define	TF_NOPUSH	0x01000		/* don't push */
212#define	TF_REQ_CC	0x02000		/* have/will request CC */
213#define	TF_RCVD_CC	0x04000		/* a CC was received in SYN */
214#define	TF_SENDCCNEW	0x08000		/* send CCnew instead of CC in SYN */
215#define	TF_MORETOCOME	0x10000		/* More data to be appended to sock */
216#define	TF_LOCAL	0x20000		/* connection to a host on local link */
217#define	TF_RXWIN0SENT	0x40000		/* sent a receiver win 0 in response */
218#define	TF_SLOWLINK	0x80000		/* route is a on a modem speed link */
219#define	TF_LASTIDLE	0x100000	/* connection was previously idle */
220#define	TF_FASTRECOVERY	0x200000	/* in NewReno Fast Recovery */
221#define	TF_WASFRECOVERY	0x400000	/* was in NewReno Fast Recovery */
222#define	TF_SIGNATURE	0x800000	/* require MD5 digests (RFC2385) */
223#define	TF_MAXSEGSNT	0x1000000	/* last segment sent was a full segment */
224#define TF_PMTUD	0x4000000	/* Perform Path MTU Discovery for this connection */
225#define	TF_CLOSING	0x8000000	/* pending tcp close */
226#define TF_TSO		0x10000000	/* TCP Segment Offloading is enable on this connection */
227#define TF_BLACKHOLE	0x20000000	/* Path MTU Discovery Black Hole detection */
228#define TF_TIMER_ONLIST 0x40000000	/* pcb is on tcp_timer_list */
229#define TF_STRETCHACK	0x80000000	/* receiver is going to delay acks */
230
231	int	t_force;		/* 1 if forcing out a byte */
232
233	tcp_seq	snd_una;		/* send unacknowledged */
234	tcp_seq	snd_max;		/* highest sequence number sent;
235					 * used to recognize retransmits
236					 */
237	tcp_seq	snd_nxt;		/* send next */
238	tcp_seq	snd_up;			/* send urgent pointer */
239
240	tcp_seq	snd_wl1;		/* window update seg seq number */
241	tcp_seq	snd_wl2;		/* window update seg ack number */
242	tcp_seq	iss;			/* initial send sequence number */
243	tcp_seq	irs;			/* initial receive sequence number */
244
245	tcp_seq	rcv_nxt;		/* receive next */
246	tcp_seq	rcv_adv;		/* advertised window */
247	u_int32_t	rcv_wnd;		/* receive window */
248	tcp_seq	rcv_up;			/* receive urgent pointer */
249
250	u_int32_t	snd_wnd;		/* send window */
251	u_int32_t	snd_cwnd;		/* congestion-controlled window */
252	u_int32_t	snd_ssthresh;		/* snd_cwnd size threshold for
253					 * for slow start exponential to
254					 * linear switch
255					 */
256	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
257
258	u_int	t_maxopd;		/* mss plus options */
259
260	u_int32_t	t_rcvtime;	/* time at which a packet was received */
261	u_int32_t	t_starttime;	/* time connection was established */
262	int	t_rtttime;		/* tcp clock when rtt calculation was started */
263	tcp_seq	t_rtseq;		/* sequence number being timed */
264
265	u_int32_t rfbuf_ts;		/* recv buffer autoscaling timestamp */
266	u_int32_t rfbuf_cnt;		/* recv buffer autoscaling byte count */
267
268	int	t_rxtcur;		/* current retransmit value (ticks) */
269	u_int	t_maxseg;		/* maximum segment size */
270	int	t_srtt;			/* smoothed round-trip time */
271	int	t_rttvar;		/* variance in round-trip time */
272
273	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
274	u_int	t_rttmin;		/* minimum rtt allowed */
275	u_int	t_rttbest;		/* best rtt we've seen */
276	u_int	t_rttcur;		/* most recent value of rtt */
277	u_int32_t	t_rttupdated;		/* number of times rtt sampled */
278	u_int32_t	rxt_conndroptime;	/* retxmt conn gets dropped after this time, when set */
279	u_int32_t	rxt_start;		/* time at a connection starts retransmitting */
280	u_int32_t	max_sndwnd;		/* largest window peer has offered */
281
282	int	t_softerror;		/* possible error not yet reported */
283/* out-of-band data */
284	char	t_oobflags;		/* have some */
285	char	t_iobc;			/* input character */
286#define	TCPOOB_HAVEDATA	0x01
287#define	TCPOOB_HADDATA	0x02
288/* RFC 1323 variables */
289	u_char	snd_scale;		/* window scaling for send window */
290	u_char	rcv_scale;		/* window scaling for recv window */
291	u_char	request_r_scale;	/* pending window scaling */
292	u_char	requested_s_scale;
293	u_int16_t	tcp_cc_index;	/* index of congestion control algorithm */
294	u_int32_t	ts_recent;		/* timestamp echo data */
295
296	u_int32_t	ts_recent_age;		/* when last updated */
297	tcp_seq	last_ack_sent;
298/* RFC 1644 variables */
299	tcp_cc	cc_send;		/* send connection count */
300	tcp_cc	cc_recv;		/* receive connection count */
301/* RFC 3465 variables */
302	u_int32_t	t_bytes_acked;		/* ABC "bytes_acked" parameter */
303/* experimental */
304	u_int32_t	snd_cwnd_prev;		/* cwnd prior to retransmit */
305	u_int32_t	snd_ssthresh_prev;	/* ssthresh prior to retransmit */
306	u_int32_t	t_badrxtwin;		/* window for retransmit recovery */
307
308	int     t_keepidle;		/* keepalive idle timer (override global if > 0) */
309	int	t_lastchain;		/* amount of packets chained last time around */
310	int	t_unacksegs;		/* received but unacked segments: used for delaying acks */
311	u_int32_t	t_persist_timeout;	/* ZWP persistence limit as set by PERSIST_TIMEOUT */
312	u_int32_t	t_persist_stop;		/* persistence limit deadline if triggered by ZWP */
313	u_int32_t	t_notsent_lowat;	/* Low water for not sent data */
314
315/* 3529618 MSS overload prevention */
316	u_int32_t	rcv_reset;
317	u_int32_t	rcv_pps;
318	u_int32_t	rcv_byps;
319	u_int32_t	rcv_maxbyps;
320
321/* Receiver state for stretch-ack algorithm */
322	u_int32_t	rcv_unackwin;	/* to measure win for stretching acks */
323	u_int32_t	rcv_by_unackwin; /* bytes seen during the last ack-stretching win */
324	u_int16_t	rcv_waitforss;	/* wait for packets during slow-start */
325	u_int16_t		ecn_flags;
326#define TE_SETUPSENT		0x01	/* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
327#define TE_SETUPRECEIVED	0x02	/* Indicate we have received ECN-SETUP SYN or SYN-ACK */
328#define TE_SENDIPECT		0x04	/* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
329#define TE_SENDCWR		0x08	/* Indicate that the next non-retransmit should have the TCP CWR flag set */
330#define TE_SENDECE		0x10	/* Indicate that the next packet should have the TCP ECE flag set */
331#define TE_ECN_ON		(TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
332
333	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
334/* anti DoS counters */
335	u_int32_t	rcv_second;		/* start of interval second */
336
337/* SACK related state */
338	int	sack_enable;		/* enable SACK for this connection */
339	int	snd_numholes;		/* number of holes seen by sender */
340	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
341						/* SACK scoreboard (sorted) */
342	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
343	int	rcv_numsacks;		/* # distinct sack blks present */
344	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
345	tcp_seq sack_newdata;		/* New data xmitted in this recovery
346   					   episode starts at this seq number */
347	struct sackhint	sackhint;	/* SACK scoreboard hint */
348
349	u_int32_t	t_pktlist_sentlen; /* total bytes in transmit chain */
350	struct mbuf	*t_pktlist_head; /* First packet in transmit chain */
351	struct mbuf	*t_pktlist_tail; /* Last packet in transmit chain */
352
353	int		t_keepinit; /* connection timeout, i.e. idle time in SYN_SENT or SYN_RECV state */
354	u_int32_t	tso_max_segment_size;	/* TCP Segment Offloading maximum segment unit for NIC */
355	u_int 		t_pmtud_saved_maxopd;	/* MSS saved before performing PMTU-D BlackHole detection */
356
357	struct
358	{
359		u_int32_t	rxduplicatebytes;
360		u_int32_t	rxoutoforderbytes;
361		u_int32_t	txretransmitbytes;
362		u_int32_t	unused_pad_to_8;
363	} t_stat;
364
365	/* Background congestion related state */
366	uint32_t	rtt_hist[N_RTT_BASE];	/* history of minimum RTT */
367	uint32_t	rtt_count;		/* Number of RTT samples in recent base history */
368	uint32_t	bg_ssthresh;		/* Slow start threshold until delay increases */
369	uint32_t	t_flagsext;		/* Another field to accommodate more flags */
370#define TF_RXTFINDROP	0x1			/* Drop conn after retransmitting FIN 3 times */
371#define TF_RCVUNACK_WAITSS	0x2		/* set when the receiver should not stretch acks */
372#define TF_BWMEAS_INPROGRESS	0x4		/* Indicate BW meas is happening */
373#define TF_MEASURESNDBW		0x8		/* Measure send bw on this connection */
374#define TF_LRO_OFFLOADED	0x10		/* Connection LRO offloaded */
375#if TRAFFIC_MGT
376	/* Inter-arrival jitter related state */
377	uint32_t 	iaj_rcv_ts;		/* tcp clock when the first packet was received */
378	uint16_t	iaj_size;		/* Size of packet for iaj measurement */
379	uint16_t	iaj_small_pkt;		/* Count of packets smaller than iaj_size */
380	uint16_t	iaj_pktcnt;		/* packet count, to avoid throttling initially */
381	uint16_t	acc_iaj;		/* Accumulated iaj */
382	tcp_seq 	iaj_rwintop;		/* recent max advertised window */
383	uint32_t	avg_iaj;		/* Mean */
384	uint32_t	std_dev_iaj;		/* Standard deviation */
385#endif /* TRAFFIC_MGT */
386	struct bwmeas	*t_bwmeas;		/* State for bandwidth measurement */
387	uint32_t	t_lropktlen;		/* Bytes in a LRO frame */
388	tcp_seq		t_idleat;		/* rcv_nxt at idle time */
389};
390
391#define IN_FASTRECOVERY(tp)	(tp->t_flags & TF_FASTRECOVERY)
392
393/*
394 * If the connection is in a throttled state due to advisory feedback from
395 * the interface output queue, reset that state. We do this in favor
396 * of entering recovery because the data transfer during recovery
397 * should be just a trickle and it will help to improve performance.
398 * We also do not want to back off twice in the same RTT.
399 */
400#define ENTER_FASTRECOVERY(_tp_) do {				\
401	(_tp_)->t_flags |= TF_FASTRECOVERY;			\
402	if (INP_IS_FLOW_CONTROLLED((_tp_)->t_inpcb))		\
403		inp_reset_fc_state((_tp_)->t_inpcb);		\
404} while(0)
405
406#define EXIT_FASTRECOVERY(tp)	tp->t_flags &= ~TF_FASTRECOVERY
407
408#if CONFIG_DTRACE
409enum tcp_cc_event {
410	TCP_CC_CWND_INIT,
411	TCP_CC_INSEQ_ACK_RCVD,
412	TCP_CC_ACK_RCVD,
413	TCP_CC_ENTER_FASTRECOVERY,
414	TCP_CC_IN_FASTRECOVERY,
415	TCP_CC_EXIT_FASTRECOVERY,
416	TCP_CC_PARTIAL_ACK,
417	TCP_CC_IDLE_TIMEOUT,
418	TCP_CC_REXMT_TIMEOUT,
419	TCP_CC_ECN_RCVD,
420	TCP_CC_BAD_REXMT_RECOVERY,
421	TCP_CC_OUTPUT_ERROR,
422	TCP_CC_CHANGE_ALGO,
423	TCP_CC_FLOW_CONTROL,
424	TCP_CC_SUSPEND
425};
426#endif /* CONFIG_DTRACE */
427
428/*
429 * Structure to hold TCP options that are only used during segment
430 * processing (in tcp_input), but not held in the tcpcb.
431 * It's basically used to reduce the number of parameters
432 * to tcp_dooptions.
433 */
434struct tcpopt {
435	u_int32_t	to_flags;		/* which options are present */
436#define TOF_TS		0x0001		/* timestamp */
437#define	TOF_MSS		0x0010
438#define	TOF_SCALE	0x0020
439#define	TOF_SIGNATURE	0x0040	/* signature option present */
440#define	TOF_SIGLEN	0x0080	/* signature length valid (RFC2385) */
441#define	TOF_SACK	0x0100		/* Peer sent SACK option */
442	u_int32_t		to_tsval;
443	u_int32_t		to_tsecr;
444	u_int16_t	to_mss;
445	u_int8_t	to_requested_s_scale;
446	u_int8_t	to_nsacks;	/* number of SACK blocks */
447	u_char		*to_sacks;	/* pointer to the first SACK blocks */
448};
449
450/*
451 * The TAO cache entry which is stored in the protocol family specific
452 * portion of the route metrics.
453 */
454struct rmxp_tao {
455	tcp_cc	tao_cc;			/* latest CC in valid SYN */
456	tcp_cc	tao_ccsent;		/* latest CC sent to peer */
457	u_short	tao_mssopt;		/* peer's cached MSS */
458#ifdef notyet
459	u_short	tao_flags;		/* cache status flags */
460#define	TAOF_DONT	0x0001		/* peer doesn't understand rfc1644 */
461#define	TAOF_OK		0x0002		/* peer does understand rfc1644 */
462#define	TAOF_UNDEF	0		/* we don't know yet */
463#endif /* notyet */
464};
465#define rmx_taop(r)	((struct rmxp_tao *)(r).rmx_filler)
466
467#define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
468#define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
469
470/*
471 * The rtt measured is in milliseconds as the timestamp granularity is
472 * a millisecond. The smoothed round-trip time and estimated variance
473 * are stored as fixed point numbers scaled by the values below.
474 * For convenience, these scales are also used in smoothing the average
475 * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
476 * With these scales, srtt has 5 bits to the right of the binary point,
477 * and thus an "ALPHA" of 0.875.  rttvar has 4 bits to the right of the
478 * binary point, and is smoothed with an ALPHA of 0.75.
479 */
480#define	TCP_RTT_SCALE		32	/* multiplier for srtt; 3 bits frac. */
481#define	TCP_RTT_SHIFT		5	/* shift for srtt; 5 bits frac. */
482#define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 4 bits */
483#define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 4 bits */
484#define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
485
486/*
487 * The initial retransmission should happen at rtt + 4 * rttvar.
488 * Because of the way we do the smoothing, srtt and rttvar
489 * will each average +1/2 tick of bias.  When we compute
490 * the retransmit timer, we want 1/2 tick of rounding and
491 * 1 extra tick because of +-1/2 tick uncertainty in the
492 * firing of the timer.  The bias will give us exactly the
493 * 1.5 tick we need.  But, because the bias is
494 * statistical, we have to test that we don't drop below
495 * the minimum feasible timer (which is 2 ticks).
496 * This version of the macro adapted from a paper by Lawrence
497 * Brakmo and Larry Peterson which outlines a problem caused
498 * by insufficient precision in the original implementation,
499 * which results in inappropriately large RTO values for very
500 * fast networks.
501 */
502#define	TCP_REXMTVAL(tp) \
503	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
504	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
505
506/*
507 * Jaguar compatible TCP control block, for xtcpcb
508 * Does not have the old fields
509 */
510struct otcpcb {
511#else
512struct tseg_qent;
513_TCPCB_LIST_HEAD(tsegqe_head, tseg_qent);
514
515struct tcpcb {
516#endif /* KERNEL_PRIVATE */
517#if defined(KERNEL_PRIVATE)
518	u_int32_t t_segq;
519#else
520	struct	tsegqe_head t_segq;
521#endif /* KERNEL_PRIVATE */
522	int	t_dupacks;		/* consecutive dup acks recd */
523	u_int32_t unused;		/* unused now: was t_template */
524
525	int	t_timer[TCPT_NTIMERS_EXT];	/* tcp timers */
526
527	_TCPCB_PTR(struct inpcb *) t_inpcb;	/* back pointer to internet pcb */
528	int	t_state;		/* state of this connection */
529	u_int	t_flags;
530#define	TF_ACKNOW	0x00001		/* ack peer immediately */
531#define	TF_DELACK	0x00002		/* ack, but try to delay it */
532#define	TF_NODELAY	0x00004		/* don't delay packets to coalesce */
533#define	TF_NOOPT	0x00008		/* don't use tcp options */
534#define	TF_SENTFIN	0x00010		/* have sent FIN */
535#define	TF_REQ_SCALE	0x00020		/* have/will request window scaling */
536#define	TF_RCVD_SCALE	0x00040		/* other side has requested scaling */
537#define	TF_REQ_TSTMP	0x00080		/* have/will request timestamps */
538#define	TF_RCVD_TSTMP	0x00100		/* a timestamp was received in SYN */
539#define	TF_SACK_PERMIT	0x00200		/* other side said I could SACK */
540#define	TF_NEEDSYN	0x00400		/* send SYN (implicit state) */
541#define	TF_NEEDFIN	0x00800		/* send FIN (implicit state) */
542#define	TF_NOPUSH	0x01000		/* don't push */
543#define	TF_REQ_CC	0x02000		/* have/will request CC */
544#define	TF_RCVD_CC	0x04000		/* a CC was received in SYN */
545#define	TF_SENDCCNEW	0x08000		/* send CCnew instead of CC in SYN */
546#define	TF_MORETOCOME	0x10000		/* More data to be appended to sock */
547#define	TF_LQ_OVERFLOW	0x20000		/* listen queue overflow */
548#define	TF_RXWIN0SENT	0x40000		/* sent a receiver win 0 in response */
549#define	TF_SLOWLINK	0x80000		/* route is a on a modem speed link */
550
551	int	t_force;		/* 1 if forcing out a byte */
552
553	tcp_seq	snd_una;		/* send unacknowledged */
554	tcp_seq	snd_max;		/* highest sequence number sent;
555					 * used to recognize retransmits
556					 */
557	tcp_seq	snd_nxt;		/* send next */
558	tcp_seq	snd_up;			/* send urgent pointer */
559
560	tcp_seq	snd_wl1;		/* window update seg seq number */
561	tcp_seq	snd_wl2;		/* window update seg ack number */
562	tcp_seq	iss;			/* initial send sequence number */
563	tcp_seq	irs;			/* initial receive sequence number */
564
565	tcp_seq	rcv_nxt;		/* receive next */
566	tcp_seq	rcv_adv;		/* advertised window */
567	u_int32_t rcv_wnd;		/* receive window */
568	tcp_seq	rcv_up;			/* receive urgent pointer */
569
570	u_int32_t snd_wnd;		/* send window */
571	u_int32_t snd_cwnd;		/* congestion-controlled window */
572	u_int32_t snd_ssthresh;		/* snd_cwnd size threshold for
573					 * for slow start exponential to
574					 * linear switch
575					 */
576	u_int	t_maxopd;		/* mss plus options */
577
578	u_int32_t t_rcvtime;		/* time at which a packet was received */
579	u_int32_t t_starttime;		/* time connection was established */
580	int	t_rtttime;		/* round trip time */
581	tcp_seq	t_rtseq;		/* sequence number being timed */
582
583	int	t_rxtcur;		/* current retransmit value (ticks) */
584	u_int	t_maxseg;		/* maximum segment size */
585	int	t_srtt;			/* smoothed round-trip time */
586	int	t_rttvar;		/* variance in round-trip time */
587
588	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
589	u_int	t_rttmin;		/* minimum rtt allowed */
590	u_int32_t t_rttupdated;		/* number of times rtt sampled */
591	u_int32_t max_sndwnd;		/* largest window peer has offered */
592
593	int	t_softerror;		/* possible error not yet reported */
594/* out-of-band data */
595	char	t_oobflags;		/* have some */
596	char	t_iobc;			/* input character */
597#define	TCPOOB_HAVEDATA	0x01
598#define	TCPOOB_HADDATA	0x02
599/* RFC 1323 variables */
600	u_char	snd_scale;		/* window scaling for send window */
601	u_char	rcv_scale;		/* window scaling for recv window */
602	u_char	request_r_scale;	/* pending window scaling */
603	u_char	requested_s_scale;
604	u_int32_t ts_recent;		/* timestamp echo data */
605
606	u_int32_t ts_recent_age;	/* when last updated */
607	tcp_seq	last_ack_sent;
608/* RFC 1644 variables */
609	tcp_cc	cc_send;		/* send connection count */
610	tcp_cc	cc_recv;		/* receive connection count */
611	tcp_seq	snd_recover;		/* for use in fast recovery */
612/* experimental */
613	u_int32_t snd_cwnd_prev;	/* cwnd prior to retransmit */
614	u_int32_t snd_ssthresh_prev;	/* ssthresh prior to retransmit */
615	u_int32_t t_badrxtwin;		/* window for retransmit recovery */
616};
617
618
619/*
620 * TCP statistics.
621 * Many of these should be kept per connection,
622 * but that's inconvenient at the moment.
623 */
624struct	tcpstat {
625	u_int32_t	tcps_connattempt;	/* connections initiated */
626	u_int32_t	tcps_accepts;		/* connections accepted */
627	u_int32_t	tcps_connects;		/* connections established */
628	u_int32_t	tcps_drops;		/* connections dropped */
629	u_int32_t	tcps_conndrops;		/* embryonic connections dropped */
630	u_int32_t	tcps_closed;		/* conn. closed (includes drops) */
631	u_int32_t	tcps_segstimed;		/* segs where we tried to get rtt */
632	u_int32_t	tcps_rttupdated;	/* times we succeeded */
633	u_int32_t	tcps_delack;		/* delayed acks sent */
634	u_int32_t	tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
635	u_int32_t	tcps_rexmttimeo;	/* retransmit timeouts */
636	u_int32_t	tcps_persisttimeo;	/* persist timeouts */
637	u_int32_t	tcps_keeptimeo;		/* keepalive timeouts */
638	u_int32_t	tcps_keepprobe;		/* keepalive probes sent */
639	u_int32_t	tcps_keepdrops;		/* connections dropped in keepalive */
640
641	u_int32_t	tcps_sndtotal;		/* total packets sent */
642	u_int32_t	tcps_sndpack;		/* data packets sent */
643	u_int32_t	tcps_sndbyte;		/* data bytes sent */
644	u_int32_t	tcps_sndrexmitpack;	/* data packets retransmitted */
645	u_int32_t	tcps_sndrexmitbyte;	/* data bytes retransmitted */
646	u_int32_t	tcps_sndacks;		/* ack-only packets sent */
647	u_int32_t	tcps_sndprobe;		/* window probes sent */
648	u_int32_t	tcps_sndurg;		/* packets sent with URG only */
649	u_int32_t	tcps_sndwinup;		/* window update-only packets sent */
650	u_int32_t	tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
651
652	u_int32_t	tcps_rcvtotal;		/* total packets received */
653	u_int32_t	tcps_rcvpack;		/* packets received in sequence */
654	u_int32_t	tcps_rcvbyte;		/* bytes received in sequence */
655	u_int32_t	tcps_rcvbadsum;		/* packets received with ccksum errs */
656	u_int32_t	tcps_rcvbadoff;		/* packets received with bad offset */
657	u_int32_t	tcps_rcvmemdrop;	/* packets dropped for lack of memory */
658	u_int32_t	tcps_rcvshort;		/* packets received too short */
659	u_int32_t	tcps_rcvduppack;	/* duplicate-only packets received */
660	u_int32_t	tcps_rcvdupbyte;	/* duplicate-only bytes received */
661	u_int32_t	tcps_rcvpartduppack;	/* packets with some duplicate data */
662	u_int32_t	tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
663	u_int32_t	tcps_rcvoopack;		/* out-of-order packets received */
664	u_int32_t	tcps_rcvoobyte;		/* out-of-order bytes received */
665	u_int32_t	tcps_rcvpackafterwin;	/* packets with data after window */
666	u_int32_t	tcps_rcvbyteafterwin;	/* bytes rcvd after window */
667	u_int32_t	tcps_rcvafterclose;	/* packets rcvd after "close" */
668	u_int32_t	tcps_rcvwinprobe;	/* rcvd window probe packets */
669	u_int32_t	tcps_rcvdupack;		/* rcvd duplicate acks */
670	u_int32_t	tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
671	u_int32_t	tcps_rcvackpack;	/* rcvd ack packets */
672	u_int32_t	tcps_rcvackbyte;	/* bytes acked by rcvd acks */
673	u_int32_t	tcps_rcvwinupd;		/* rcvd window update packets */
674	u_int32_t	tcps_pawsdrop;		/* segments dropped due to PAWS */
675	u_int32_t	tcps_predack;		/* times hdr predict ok for acks */
676	u_int32_t	tcps_preddat;		/* times hdr predict ok for data pkts */
677	u_int32_t	tcps_pcbcachemiss;
678	u_int32_t	tcps_cachedrtt;		/* times cached RTT in route updated */
679	u_int32_t	tcps_cachedrttvar;	/* times cached rttvar updated */
680	u_int32_t	tcps_cachedssthresh;	/* times cached ssthresh updated */
681	u_int32_t	tcps_usedrtt;		/* times RTT initialized from route */
682	u_int32_t	tcps_usedrttvar;	/* times RTTVAR initialized from rt */
683	u_int32_t	tcps_usedssthresh;	/* times ssthresh initialized from rt*/
684	u_int32_t	tcps_persistdrop;	/* timeout in persist state */
685	u_int32_t	tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
686	u_int32_t	tcps_mturesent;		/* resends due to MTU discovery */
687	u_int32_t	tcps_listendrop;	/* listen queue overflows */
688
689	/* new stats from FreeBSD 5.4 sync up */
690	u_int32_t	tcps_minmssdrops;	/* average minmss too low drops */
691	u_int32_t	tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
692	u_int32_t	tcps_badrst;		/* ignored RSTs in the window */
693
694	u_int32_t	tcps_sc_added;		/* entry added to syncache */
695	u_int32_t	tcps_sc_retransmitted;	/* syncache entry was retransmitted */
696	u_int32_t	tcps_sc_dupsyn;		/* duplicate SYN packet */
697	u_int32_t	tcps_sc_dropped;	/* could not reply to packet */
698	u_int32_t	tcps_sc_completed;	/* successful extraction of entry */
699	u_int32_t	tcps_sc_bucketoverflow;	/* syncache per-bucket limit hit */
700	u_int32_t	tcps_sc_cacheoverflow;	/* syncache cache limit hit */
701	u_int32_t	tcps_sc_reset;		/* RST removed entry from syncache */
702	u_int32_t	tcps_sc_stale;		/* timed out or listen socket gone */
703	u_int32_t	tcps_sc_aborted;	/* syncache entry aborted */
704	u_int32_t	tcps_sc_badack;		/* removed due to bad ACK */
705	u_int32_t	tcps_sc_unreach;	/* ICMP unreachable received */
706	u_int32_t	tcps_sc_zonefail;	/* zalloc() failed */
707	u_int32_t	tcps_sc_sendcookie;	/* SYN cookie sent */
708	u_int32_t	tcps_sc_recvcookie;	/* SYN cookie received */
709
710	u_int32_t	tcps_hc_added;		/* entry added to hostcache */
711	u_int32_t	tcps_hc_bucketoverflow;	/* hostcache per bucket limit hit */
712
713	/* SACK related stats */
714	u_int32_t	tcps_sack_recovery_episode; /* SACK recovery episodes */
715	u_int32_t 	tcps_sack_rexmits;	    /* SACK rexmit segments   */
716	u_int32_t 	tcps_sack_rexmit_bytes;	    /* SACK rexmit bytes      */
717	u_int32_t 	tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
718	u_int32_t 	tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
719	u_int32_t 	tcps_sack_sboverflow;	    /* SACK sendblock overflow   */
720
721	u_int32_t	tcps_bg_rcvtotal;	/* total background packets received */
722	u_int32_t	tcps_rxtfindrop;	/* drop conn after retransmitting FIN */
723	u_int32_t	tcps_fcholdpacket;	/* packets withheld because of flow control */
724
725	/* LRO related stats */
726	u_int32_t	tcps_coalesced_pack;	/* number of coalesced packets */
727	u_int32_t	tcps_flowtbl_full;	/* times flow table was full */
728	u_int32_t	tcps_flowtbl_collision;	/* collisions in flow tbl */
729	u_int32_t	tcps_lro_twopack;	/* 2 packets coalesced */
730	u_int32_t	tcps_lro_multpack;	/* 3 or 4 pkts coalesced */
731	u_int32_t	tcps_lro_largepack;	/* 5 or more pkts coalesced */
732};
733
734struct tcpstat_local {
735	u_int64_t badformat;
736	u_int64_t unspecv6;
737	u_int64_t synfin;
738	u_int64_t badformatipsec;
739	u_int64_t noconnnolist;
740	u_int64_t noconnlist;
741	u_int64_t listbadsyn;
742	u_int64_t icmp6unreach;
743	u_int64_t deprecate6;
744	u_int64_t ooopacket;
745	u_int64_t rstinsynrcv;
746	u_int64_t dospacket;
747	u_int64_t cleanup;
748	u_int64_t synwindow;
749};
750
751#pragma pack(4)
752
753/*
754 * TCB structure exported to user-land via sysctl(3).
755 * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
756 * included.  Not all of our clients do.
757 */
758
759struct  xtcpcb {
760        u_int32_t       xt_len;
761#ifdef KERNEL_PRIVATE
762        struct  inpcb_compat    xt_inp;
763#else
764        struct  inpcb   xt_inp;
765#endif
766#ifdef KERNEL_PRIVATE
767        struct  otcpcb  xt_tp;
768#else
769        struct  tcpcb   xt_tp;
770#endif
771        struct  xsocket xt_socket;
772        u_quad_t        xt_alignment_hack;
773};
774
775#if !CONFIG_EMBEDDED
776
777struct  xtcpcb64 {
778        u_int32_t      		xt_len;
779        struct xinpcb64		xt_inpcb;
780
781        u_int64_t t_segq;
782        int     t_dupacks;              /* consecutive dup acks recd */
783
784        int t_timer[TCPT_NTIMERS_EXT];  /* tcp timers */
785
786        int     t_state;                /* state of this connection */
787        u_int   t_flags;
788
789        int     t_force;                /* 1 if forcing out a byte */
790
791        tcp_seq snd_una;                /* send unacknowledged */
792        tcp_seq snd_max;                /* highest sequence number sent;
793                                         * used to recognize retransmits
794                                         */
795        tcp_seq snd_nxt;                /* send next */
796        tcp_seq snd_up;                 /* send urgent pointer */
797
798        tcp_seq snd_wl1;                /* window update seg seq number */
799        tcp_seq snd_wl2;                /* window update seg ack number */
800        tcp_seq iss;                    /* initial send sequence number */
801        tcp_seq irs;                    /* initial receive sequence number */
802
803        tcp_seq rcv_nxt;                /* receive next */
804        tcp_seq rcv_adv;                /* advertised window */
805        u_int32_t rcv_wnd;              /* receive window */
806        tcp_seq rcv_up;                 /* receive urgent pointer */
807
808        u_int32_t snd_wnd;              /* send window */
809        u_int32_t snd_cwnd;             /* congestion-controlled window */
810        u_int32_t snd_ssthresh;         /* snd_cwnd size threshold for
811                                         * for slow start exponential to
812                                         * linear switch
813                                         */
814        u_int   t_maxopd;               /* mss plus options */
815
816        u_int32_t t_rcvtime;            /* time at which a packet was received */
817        u_int32_t t_starttime;          /* time connection was established */
818        int     t_rtttime;              /* round trip time */
819        tcp_seq t_rtseq;                /* sequence number being timed */
820
821        int     t_rxtcur;               /* current retransmit value (ticks) */
822        u_int   t_maxseg;               /* maximum segment size */
823        int     t_srtt;                 /* smoothed round-trip time */
824        int     t_rttvar;               /* variance in round-trip time */
825
826        int     t_rxtshift;             /* log(2) of rexmt exp. backoff */
827        u_int   t_rttmin;               /* minimum rtt allowed */
828        u_int32_t t_rttupdated;         /* number of times rtt sampled */
829        u_int32_t max_sndwnd;           /* largest window peer has offered */
830
831        int     t_softerror;            /* possible error not yet reported */
832/* out-of-band data */
833        char    t_oobflags;             /* have some */
834        char    t_iobc;                 /* input character */
835/* RFC 1323 variables */
836        u_char  snd_scale;              /* window scaling for send window */
837        u_char  rcv_scale;              /* window scaling for recv window */
838        u_char  request_r_scale;        /* pending window scaling */
839        u_char  requested_s_scale;
840        u_int32_t ts_recent;            /* timestamp echo data */
841
842        u_int32_t ts_recent_age;        /* when last updated */
843        tcp_seq last_ack_sent;
844/* RFC 1644 variables */
845        tcp_cc  cc_send;                /* send connection count */
846        tcp_cc  cc_recv;                /* receive connection count */
847        tcp_seq snd_recover;            /* for use in fast recovery */
848/* experimental */
849        u_int32_t snd_cwnd_prev;        /* cwnd prior to retransmit */
850        u_int32_t snd_ssthresh_prev;    /* ssthresh prior to retransmit */
851        u_int32_t t_badrxtwin;          /* window for retransmit recovery */
852
853        u_quad_t		xt_alignment_hack;
854};
855
856#endif /* !CONFIG_EMBEDDED */
857
858#ifdef PRIVATE
859
860struct  xtcpcb_n {
861	u_int32_t      		xt_len;
862	u_int32_t			xt_kind;		/* XSO_TCPCB */
863
864	u_int64_t t_segq;
865	int     t_dupacks;              /* consecutive dup acks recd */
866
867	int t_timer[TCPT_NTIMERS_EXT];  /* tcp timers */
868
869	int     t_state;                /* state of this connection */
870	u_int   t_flags;
871
872	int     t_force;                /* 1 if forcing out a byte */
873
874	tcp_seq snd_una;                /* send unacknowledged */
875	tcp_seq snd_max;                /* highest sequence number sent;
876									 * used to recognize retransmits
877									 */
878	tcp_seq snd_nxt;                /* send next */
879	tcp_seq snd_up;                 /* send urgent pointer */
880
881	tcp_seq snd_wl1;                /* window update seg seq number */
882	tcp_seq snd_wl2;                /* window update seg ack number */
883	tcp_seq iss;                    /* initial send sequence number */
884	tcp_seq irs;                    /* initial receive sequence number */
885
886	tcp_seq rcv_nxt;                /* receive next */
887	tcp_seq rcv_adv;                /* advertised window */
888	u_int32_t rcv_wnd;              /* receive window */
889	tcp_seq rcv_up;                 /* receive urgent pointer */
890
891	u_int32_t snd_wnd;              /* send window */
892	u_int32_t snd_cwnd;             /* congestion-controlled window */
893	u_int32_t snd_ssthresh;         /* snd_cwnd size threshold for
894									 * for slow start exponential to
895									 * linear switch
896									 */
897	u_int   t_maxopd;               /* mss plus options */
898
899	u_int32_t t_rcvtime;            /* time at which a packet was received */
900	u_int32_t t_starttime;          /* time connection was established */
901	int     t_rtttime;              /* round trip time */
902	tcp_seq t_rtseq;                /* sequence number being timed */
903
904	int     t_rxtcur;               /* current retransmit value (ticks) */
905	u_int   t_maxseg;               /* maximum segment size */
906	int     t_srtt;                 /* smoothed round-trip time */
907	int     t_rttvar;               /* variance in round-trip time */
908
909	int     t_rxtshift;             /* log(2) of rexmt exp. backoff */
910	u_int   t_rttmin;               /* minimum rtt allowed */
911	u_int32_t t_rttupdated;         /* number of times rtt sampled */
912	u_int32_t max_sndwnd;           /* largest window peer has offered */
913
914	int     t_softerror;            /* possible error not yet reported */
915	/* out-of-band data */
916	char    t_oobflags;             /* have some */
917	char    t_iobc;                 /* input character */
918	/* RFC 1323 variables */
919	u_char  snd_scale;              /* window scaling for send window */
920	u_char  rcv_scale;              /* window scaling for recv window */
921	u_char  request_r_scale;        /* pending window scaling */
922	u_char  requested_s_scale;
923	u_int32_t ts_recent;            /* timestamp echo data */
924
925	u_int32_t ts_recent_age;        /* when last updated */
926	tcp_seq last_ack_sent;
927	/* RFC 1644 variables */
928	tcp_cc  cc_send;                /* send connection count */
929	tcp_cc  cc_recv;                /* receive connection count */
930	tcp_seq snd_recover;            /* for use in fast recovery */
931	/* experimental */
932	u_int32_t snd_cwnd_prev;        /* cwnd prior to retransmit */
933	u_int32_t snd_ssthresh_prev;    /* ssthresh prior to retransmit */
934	u_int32_t t_badrxtwin;          /* window for retransmit recovery */
935};
936
937#endif /* PRIVATE */
938
939#pragma pack()
940
941/*
942 * Names for TCP sysctl objects
943 */
944#define	TCPCTL_DO_RFC1323	1	/* use RFC-1323 extensions */
945#define	TCPCTL_DO_RFC1644	2	/* use RFC-1644 extensions */
946#define	TCPCTL_MSSDFLT		3	/* MSS default */
947#define TCPCTL_STATS		4	/* statistics (read-only) */
948#define	TCPCTL_RTTDFLT		5	/* default RTT estimate */
949#define	TCPCTL_KEEPIDLE		6	/* keepalive idle timer */
950#define	TCPCTL_KEEPINTVL	7	/* interval to send keepalives */
951#define	TCPCTL_SENDSPACE	8	/* send buffer space */
952#define	TCPCTL_RECVSPACE	9	/* receive buffer space */
953#define	TCPCTL_KEEPINIT		10	/* timeout for establishing syn */
954#define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
955#define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
956#define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
957#define	TCPCTL_MAXID		14
958
959#ifdef KERNEL_PRIVATE
960#define	TCP_PKTLIST_CLEAR(tp) {						\
961	(tp)->t_pktlist_head = (tp)->t_pktlist_tail = NULL;		\
962	(tp)->t_lastchain = (tp)->t_pktlist_sentlen = 0;		\
963}
964
965#define TCPCTL_NAMES { \
966	{ 0, 0 }, \
967	{ "rfc1323", CTLTYPE_INT }, \
968	{ "rfc1644", CTLTYPE_INT }, \
969	{ "mssdflt", CTLTYPE_INT }, \
970	{ "stats", CTLTYPE_STRUCT }, \
971	{ "rttdflt", CTLTYPE_INT }, \
972	{ "keepidle", CTLTYPE_INT }, \
973	{ "keepintvl", CTLTYPE_INT }, \
974	{ "sendspace", CTLTYPE_INT }, \
975	{ "recvspace", CTLTYPE_INT }, \
976	{ "keepinit", CTLTYPE_INT }, \
977	{ "pcblist", CTLTYPE_STRUCT }, \
978	{ "delacktime", CTLTYPE_INT }, \
979	{ "v6mssdflt", CTLTYPE_INT }, \
980}
981
982#ifdef SYSCTL_DECL
983SYSCTL_DECL(_net_inet_tcp);
984#endif /* SYSCTL_DECL */
985
986extern	struct inpcbhead tcb;		/* head of queue of active tcpcb's */
987extern	struct inpcbinfo tcbinfo;
988extern	struct tcpstat tcpstat;	/* tcp statistics */
989extern	int tcp_mssdflt;	/* XXX */
990extern	int tcp_minmss;
991extern	int tcp_minmssoverload;
992extern	int ss_fltsz;
993extern	int ss_fltsz_local;
994extern 	int tcp_do_rfc3390;		/* Calculate ss_fltsz according to RFC 3390 */
995#ifdef __APPLE__
996extern	u_int32_t tcp_now;		/* for RFC 1323 timestamps */
997extern struct timeval tcp_uptime;
998extern lck_spin_t *tcp_uptime_lock;
999
1000extern	int tcp_delack_enabled;
1001#endif /* __APPLE__ */
1002
1003extern	int tcp_do_sack;	/* SACK enabled/disabled */
1004
1005#if CONFIG_IFEF_NOWINDOWSCALE
1006extern int tcp_obey_ifef_nowindowscale;
1007#endif
1008
1009void	 tcp_canceltimers(struct tcpcb *);
1010struct tcpcb *
1011	 tcp_close(struct tcpcb *);
1012void	 tcp_ctlinput(int, struct sockaddr *, void *);
1013int	 tcp_ctloutput(struct socket *, struct sockopt *);
1014struct tcpcb *
1015	 tcp_drop(struct tcpcb *, int);
1016void	 tcp_drain(void);
1017void	 tcp_getrt_rtt(struct tcpcb *tp, struct rtentry *rt);
1018struct rmxp_tao *
1019	 tcp_gettaocache(struct inpcb *);
1020void	 tcp_init(void) __attribute__((section("__TEXT, initcode")));
1021void	 tcp_input(struct mbuf *, int);
1022void	 tcp_mss(struct tcpcb *, int, unsigned int);
1023int	 tcp_mssopt(struct tcpcb *);
1024void	 tcp_drop_syn_sent(struct inpcb *, int);
1025void	 tcp_mtudisc(struct inpcb *, int);
1026struct tcpcb *
1027	 tcp_newtcpcb(struct inpcb *);
1028int	 tcp_output(struct tcpcb *);
1029void	 tcp_respond(struct tcpcb *, void *,
1030	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int,
1031	    unsigned int, unsigned int);
1032struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int);
1033void	 tcp_setpersist(struct tcpcb *);
1034void	 tcp_slowtimo(void);
1035void 	 tcp_check_timer_state(struct tcpcb *tp);
1036void	 tcp_run_timerlist(void *arg1, void *arg2);
1037
1038struct tcptemp *
1039	 tcp_maketemplate(struct tcpcb *);
1040void	 tcp_fillheaders(struct tcpcb *, void *, void *);
1041struct tcpcb *
1042	 tcp_timers(struct tcpcb *, int);
1043void	 tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int);
1044
1045void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
1046void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
1047void	 tcp_clean_sackreport(struct tcpcb *tp);
1048void	 tcp_sack_adjust(struct tcpcb *tp);
1049struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
1050void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
1051void	 tcp_free_sackholes(struct tcpcb *tp);
1052int32_t	 tcp_sbspace(struct tcpcb *tp);
1053void	 tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp);
1054void	 tcp_reset_stretch_ack(struct tcpcb *tp);
1055void	 tcp_get_ports_used(unsigned int , uint8_t *);
1056uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags);
1057void	 tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so);
1058u_int8_t tcp_cansbgrow(struct sockbuf *sb);
1059struct bwmeas* tcp_bwmeas_alloc(struct tcpcb *tp);
1060void tcp_bwmeas_free(struct tcpcb *tp);
1061
1062extern void tcp_set_background_cc(struct socket *);
1063extern void tcp_set_foreground_cc(struct socket *);
1064extern void tcp_set_recv_bg(struct socket *);
1065extern void tcp_clear_recv_bg(struct socket *);
1066#define	IS_TCP_RECV_BG(_so)	\
1067	((_so)->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG)
1068
1069#if TRAFFIC_MGT
1070#define CLEAR_IAJ_STATE(_tp_) (_tp_)->iaj_rcv_ts = 0
1071void	 reset_acc_iaj(struct tcpcb *tp);
1072#endif /* TRAFFIC_MGT */
1073
1074int	 tcp_lock (struct socket *, int, void *);
1075int	 tcp_unlock (struct socket *, int, void *);
1076void	 calculate_tcp_clock(void);
1077
1078#ifdef _KERN_LOCKS_H_
1079lck_mtx_t *	 tcp_getlock (struct socket *, int);
1080#else
1081void *	 tcp_getlock (struct socket *, int);
1082#endif
1083
1084
1085extern	struct pr_usrreqs tcp_usrreqs;
1086extern	u_int32_t tcp_sendspace;
1087extern	u_int32_t tcp_recvspace;
1088tcp_seq tcp_new_isn(struct tcpcb *);
1089
1090#endif /* KERNEL_RPIVATE */
1091
1092#endif /* _NETINET_TCP_VAR_H_ */
1093