1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1993, 1994, 1995 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 61 * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.8 2001/08/22 00:59:13 silby Exp $ 62 */ 63 64#ifndef _NETINET_TCP_VAR_H_ 65#define _NETINET_TCP_VAR_H_ 66#include <sys/appleapiopts.h> 67#include <sys/queue.h> 68#include <netinet/in_pcb.h> 69#include <netinet/tcp_timer.h> 70 71#if defined(__LP64__) 72#define _TCPCB_PTR(x) u_int32_t 73#define _TCPCB_LIST_HEAD(name, type) \ 74struct name { \ 75 u_int32_t lh_first; \ 76}; 77#else 78#define _TCPCB_PTR(x) x 79#define _TCPCB_LIST_HEAD(name, type) LIST_HEAD(name, type) 80#endif 81 82#define TCP_RETRANSHZ 1000 /* granularity of TCP timestamps, 1ms */ 83#define TCP_TIMERHZ 100 /* frequency of TCP fast timer, 100 ms */ 84 85/* Minimum time quantum within which the timers are coalesced */ 86#define TCP_FASTTIMER_QUANTUM TCP_TIMERHZ /* fast mode, once every 100ms */ 87#define TCP_SLOWTIMER_QUANTUM (TCP_RETRANSHZ/2) /* slow mode, once every 500ms */ 88 89#define TCP_RETRANSHZ_TO_USEC 1000 90 91#ifdef KERNEL_PRIVATE 92#define N_TIME_WAIT_SLOTS 128 /* must be power of 2 */ 93 94/* Base RTT is stored for N_MIN_RTT_HISTORY slots. This is used to 95 * estimate expected minimum RTT for delay based congestion control 96 * algorithms. 97 */ 98#define N_RTT_BASE 5 99 100/* Always allow at least 4 packets worth of recv window when adjusting 101 * recv window using inter-packet arrival jitter. 102 */ 103#define MIN_IAJ_WIN 4 104 105/* A variation in delay of this many milliseconds is tolerable. This limit has to 106 * be low but greater than zero. We also use standard deviation on jitter to adjust 107 * this limit for different link and connection types. 108 */ 109#define ALLOWED_IAJ 5 110 111/* Ignore the first few packets on a connection until the ACK clock gets going 112 */ 113#define IAJ_IGNORE_PKTCNT 40 114 115/* Let the accumulated IAJ value increase by this threshold at most. This limit 116 * will control how many ALLOWED_IAJ measurements a receiver will have to see 117 * before opening the receive window 118 */ 119#define ACC_IAJ_HIGH_THRESH 100 120 121/* When accumulated IAJ reaches this value, the receiver starts to react by 122 * closing the window 123 */ 124#define ACC_IAJ_REACT_LIMIT 200 125 126/* If the number of small packets (smaller than IAJ packet size) seen on a 127 * connection is more than this threshold, reset the size and learn it again. 128 * This is needed because the sender might send smaller segments after PMTU 129 * discovery and the receiver has to learn the new size. 130 */ 131#define RESET_IAJ_SIZE_THRESH 20 132 133/* 134 * Adaptive timeout is a read/write timeout specified by the application to 135 * get a socket event when the transport layer detects a stall in data 136 * transfer. The value specified is the number of probes that can be sent 137 * to the peer before generating an event. Since it is not specified as 138 * a time value, the timeout will adjust based on the RTT seen on the link. 139 * The timeout will start only when there is an indication that the read/write 140 * operation is not making progress. 141 * 142 * If a write operation stalls, the probe will be retransmission of data. 143 * If a read operation stalls, the probe will be a keep-alive packet. 144 * 145 * The maximum value of adaptive timeout is set to 10 which will allow 146 * transmission of enough number of probes to the peer. 147 */ 148#define TCP_ADAPTIVE_TIMEOUT_MAX 10 149 150/* 151 * Kernel variables for tcp. 152 */ 153 154/* TCP segment queue entry */ 155struct tseg_qent { 156 LIST_ENTRY(tseg_qent) tqe_q; 157 int tqe_len; /* TCP segment data length */ 158 struct tcphdr *tqe_th; /* a pointer to tcp header */ 159 struct mbuf *tqe_m; /* mbuf contains packet */ 160}; 161LIST_HEAD(tsegqe_head, tseg_qent); 162extern int tcp_reass_maxseg; 163extern int tcp_reass_qsize; 164#ifdef MALLOC_DECLARE 165MALLOC_DECLARE(M_TSEGQ); 166#endif 167 168struct sackblk { 169 tcp_seq start; /* start seq no. of sack block */ 170 tcp_seq end; /* end seq no. */ 171}; 172 173struct sackhole { 174 tcp_seq start; /* start seq no. of hole */ 175 tcp_seq end; /* end seq no. */ 176 tcp_seq rxmit; /* next seq. no in hole to be retransmitted */ 177 TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */ 178}; 179 180struct sackhint { 181 struct sackhole *nexthole; 182 int sack_bytes_rexmit; 183}; 184 185struct tcptemp { 186 u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ 187 struct tcphdr tt_t; 188}; 189 190struct bwmeas { 191 tcp_seq bw_start; /* start of bw measurement */ 192 uint32_t bw_ts; /* timestamp when bw measurement started */ 193 uint32_t bw_size; /* burst size in bytes for this bw measurement */ 194 uint32_t bw_minsizepkts; /* Min burst size as segments */ 195 uint32_t bw_maxsizepkts; /* Max burst size as segments */ 196 uint32_t bw_minsize; /* Min size in bytes */ 197 uint32_t bw_maxsize; /* Max size in bytes */ 198 uint32_t bw_sndbw; /* Measured send bw */ 199}; 200 201/* MPTCP Data sequence map entry */ 202struct mpt_dsn_map { 203 uint64_t mpt_dsn; /* data seq num recvd */ 204 uint32_t mpt_sseq; /* relative subflow # */ 205 uint16_t mpt_len; /* length of mapping */ 206 uint16_t mpt_csum; /* checksum value if on */ 207}; 208#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ 209 210/* 211 * Tcp control block, one per tcp; fields: 212 * Organized for 16 byte cacheline efficiency. 213 */ 214struct tcpcb { 215 struct tsegqe_head t_segq; 216 int t_dupacks; /* consecutive dup acks recd */ 217 uint32_t t_timer[TCPT_NTIMERS]; /* tcp timers */ 218 struct tcptimerentry tentry; /* entry in timer list */ 219 220 struct inpcb *t_inpcb; /* back pointer to internet pcb */ 221 int t_state; /* state of this connection */ 222 uint32_t t_flags; 223#define TF_ACKNOW 0x00001 /* ack peer immediately */ 224#define TF_DELACK 0x00002 /* ack, but try to delay it */ 225#define TF_NODELAY 0x00004 /* don't delay packets to coalesce */ 226#define TF_NOOPT 0x00008 /* don't use tcp options */ 227#define TF_SENTFIN 0x00010 /* have sent FIN */ 228#define TF_REQ_SCALE 0x00020 /* have/will request window scaling */ 229#define TF_RCVD_SCALE 0x00040 /* other side has requested scaling */ 230#define TF_REQ_TSTMP 0x00080 /* have/will request timestamps */ 231#define TF_RCVD_TSTMP 0x00100 /* a timestamp was received in SYN */ 232#define TF_SACK_PERMIT 0x00200 /* other side said I could SACK */ 233#define TF_NEEDSYN 0x00400 /* send SYN (implicit state) */ 234#define TF_NEEDFIN 0x00800 /* send FIN (implicit state) */ 235#define TF_NOPUSH 0x01000 /* don't push */ 236#define TF_REQ_CC 0x02000 /* have/will request CC */ 237#define TF_RCVD_CC 0x04000 /* a CC was received in SYN */ 238#define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */ 239#define TF_MORETOCOME 0x10000 /* More data to be appended to sock */ 240#define TF_LOCAL 0x20000 /* connection to a host on local link */ 241#define TF_RXWIN0SENT 0x40000 /* sent a receiver win 0 in response */ 242#define TF_SLOWLINK 0x80000 /* route is a on a modem speed link */ 243#define TF_LASTIDLE 0x100000 /* connection was previously idle */ 244#define TF_FASTRECOVERY 0x200000 /* in NewReno Fast Recovery */ 245#define TF_WASFRECOVERY 0x400000 /* was in NewReno Fast Recovery */ 246#define TF_SIGNATURE 0x800000 /* require MD5 digests (RFC2385) */ 247#define TF_MAXSEGSNT 0x1000000 /* last segment sent was a full segment */ 248#define TF_PMTUD 0x4000000 /* Perform Path MTU Discovery for this connection */ 249#define TF_CLOSING 0x8000000 /* pending tcp close */ 250#define TF_TSO 0x10000000 /* TCP Segment Offloading is enable on this connection */ 251#define TF_BLACKHOLE 0x20000000 /* Path MTU Discovery Black Hole detection */ 252#define TF_TIMER_ONLIST 0x40000000 /* pcb is on tcp_timer_list */ 253#define TF_STRETCHACK 0x80000000 /* receiver is going to delay acks */ 254 255 int t_force; /* 1 if forcing out a byte */ 256 257 tcp_seq snd_una; /* send unacknowledged */ 258 tcp_seq snd_max; /* highest sequence number sent; 259 * used to recognize retransmits 260 */ 261 tcp_seq snd_nxt; /* send next */ 262 tcp_seq snd_up; /* send urgent pointer */ 263 264 tcp_seq snd_wl1; /* window update seg seq number */ 265 tcp_seq snd_wl2; /* window update seg ack number */ 266 tcp_seq iss; /* initial send sequence number */ 267 tcp_seq irs; /* initial receive sequence number */ 268 269 tcp_seq rcv_nxt; /* receive next */ 270 tcp_seq rcv_adv; /* advertised window */ 271 u_int32_t rcv_wnd; /* receive window */ 272 tcp_seq rcv_up; /* receive urgent pointer */ 273 274 u_int32_t snd_wnd; /* send window */ 275 u_int32_t snd_cwnd; /* congestion-controlled window */ 276 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for 277 * for slow start exponential to 278 * linear switch 279 */ 280 tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ 281 282 u_int t_maxopd; /* mss plus options */ 283 284 u_int32_t t_rcvtime; /* time at which a packet was received */ 285 u_int32_t t_starttime; /* time connection was established */ 286 int t_rtttime; /* tcp clock when rtt calculation was started */ 287 tcp_seq t_rtseq; /* sequence number being timed */ 288 289 u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ 290 u_int32_t rfbuf_cnt; /* recv buffer autoscaling byte count */ 291 292 int t_rxtcur; /* current retransmit value (ticks) */ 293 u_int t_maxseg; /* maximum segment size */ 294 int t_srtt; /* smoothed round-trip time */ 295 int t_rttvar; /* variance in round-trip time */ 296 297 int t_rxtshift; /* log(2) of rexmt exp. backoff */ 298 u_int t_rttmin; /* minimum rtt allowed */ 299 u_int t_rttbest; /* best rtt we've seen */ 300 u_int t_rttcur; /* most recent value of rtt */ 301 u_int32_t t_rttupdated; /* number of times rtt sampled */ 302 u_int32_t t_rxt_conndroptime; /* retxmt conn gets dropped after this time, when set */ 303 u_int32_t t_rxtstart; /* time at which retransmission started */ 304 u_int32_t max_sndwnd; /* largest window peer has offered */ 305 306 int t_softerror; /* possible error not yet reported */ 307/* out-of-band data */ 308 char t_oobflags; /* have some */ 309 char t_iobc; /* input character */ 310#define TCPOOB_HAVEDATA 0x01 311#define TCPOOB_HADDATA 0x02 312/* RFC 1323 variables */ 313 u_int8_t snd_scale; /* window scaling for send window */ 314 u_int8_t rcv_scale; /* window scaling for recv window */ 315 u_int8_t request_r_scale; /* pending window scaling */ 316 u_int8_t requested_s_scale; 317 u_int8_t tcp_cc_index; /* index of congestion control algorithm */ 318 u_int8_t t_adaptive_rtimo; /* Read timeout used as a multiple of RTT */ 319 u_int8_t t_adaptive_wtimo; /* Write timeout used as a multiple of RTT */ 320 u_int32_t ts_recent; /* timestamp echo data */ 321 322 u_int32_t ts_recent_age; /* when last updated */ 323 tcp_seq last_ack_sent; 324/* RFC 1644 variables */ 325 tcp_cc cc_send; /* send connection count */ 326 tcp_cc cc_recv; /* receive connection count */ 327/* RFC 3465 variables */ 328 u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */ 329 330 int t_lastchain; /* amount of packets chained last time around */ 331 u_int16_t t_unacksegs; /* received but unacked segments for delaying acks */ 332 u_int8_t t_rexmtthresh; /* duplicate ack threshold for entering fast recovery */ 333 u_int8_t t_rtimo_probes; /* number of adaptive rtimo probes sent */ 334 u_int32_t t_persist_timeout; /* ZWP persistence limit as set by PERSIST_TIMEOUT */ 335 u_int32_t t_persist_stop; /* persistence limit deadline if triggered by ZWP */ 336 u_int32_t t_notsent_lowat; /* Low water for not sent data */ 337 338/* Receiver state for stretch-ack algorithm */ 339 u_int32_t rcv_unackwin; /* to measure win for stretching acks */ 340 u_int32_t rcv_by_unackwin; /* bytes seen during the last ack-stretching win */ 341 u_int16_t rcv_waitforss; /* wait for packets during slow-start */ 342 u_int16_t ecn_flags; 343#define TE_SETUPSENT 0x01 /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */ 344#define TE_SETUPRECEIVED 0x02 /* Indicate we have received ECN-SETUP SYN or SYN-ACK */ 345#define TE_SENDIPECT 0x04 /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */ 346#define TE_SENDCWR 0x08 /* Indicate that the next non-retransmit should have the TCP CWR flag set */ 347#define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */ 348#define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */ 349 350/* state for bad retransmit recovery */ 351 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ 352 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ 353 tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ 354 int t_srtt_prev; /* srtt prior to retransmit */ 355 int t_rttvar_prev; /* rttvar prior to retransmit */ 356 u_int32_t t_badrexmt_time; /* bad rexmt detection time */ 357 358/* state to limit the number of early retransmits */ 359 u_int32_t t_early_rexmt_win; /* window for limiting early retransmits */ 360 u_int16_t t_early_rexmt_count; /* number of early rexmts seen in past window */ 361 362/* SACK related state */ 363 int16_t snd_numholes; /* number of holes seen by sender */ 364 TAILQ_HEAD(sackhole_head, sackhole) snd_holes; 365 /* SACK scoreboard (sorted) */ 366 tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ 367 int rcv_numsacks; /* # distinct sack blks present */ 368 struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ 369 tcp_seq sack_newdata; /* New data xmitted in this recovery 370 episode starts at this seq number */ 371 struct sackhint sackhint; /* SACK scoreboard hint */ 372 373 u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */ 374 struct mbuf *t_pktlist_head; /* First packet in transmit chain */ 375 struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */ 376 377 u_int32_t t_keepidle; /* keepalive idle timer (override global if > 0) */ 378 u_int32_t t_keepinit; /* connection timeout, i.e. idle time 379 in SYN_SENT or SYN_RECV state */ 380 u_int32_t t_keepintvl; /* interval between keepalives */ 381 u_int32_t t_keepcnt; /* number of keepalives before close */ 382 383 u_int32_t tso_max_segment_size; /* TCP Segment Offloading maximum segment unit for NIC */ 384 u_int t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ 385 386 struct 387 { 388 u_int32_t rxduplicatebytes; 389 u_int32_t rxoutoforderbytes; 390 u_int32_t txretransmitbytes; 391 u_int8_t synrxtshift; 392 u_int8_t unused; 393 u_int16_t unused_pad_to_8; 394 } t_stat; 395 396 /* Background congestion related state */ 397 uint32_t rtt_hist[N_RTT_BASE]; /* history of minimum RTT */ 398 uint32_t rtt_count; /* Number of RTT samples in recent base history */ 399 uint32_t bg_ssthresh; /* Slow start threshold until delay increases */ 400 uint32_t t_flagsext; /* Another field to accommodate more flags */ 401#define TF_RXTFINDROP 0x1 /* Drop conn after retransmitting FIN 3 times */ 402#define TF_RCVUNACK_WAITSS 0x2 /* set when the receiver should not stretch acks */ 403#define TF_BWMEAS_INPROGRESS 0x4 /* Indicate BW meas is happening */ 404#define TF_MEASURESNDBW 0x8 /* Measure send bw on this connection */ 405#define TF_LRO_OFFLOADED 0x10 /* Connection LRO offloaded */ 406#define TF_SACK_ENABLE 0x20 /* SACK is enabled */ 407#define TF_RECOMPUTE_RTT 0x40 /* recompute RTT after spurious retransmit */ 408#define TF_DETECT_READSTALL 0x80 /* Used to detect a stall during read operation */ 409#define TF_RECV_THROTTLE 0x100 /* Input throttling active */ 410#define TF_NOSTRETCHACK 0x200 /* ack every other packet */ 411#define TF_STREAMEOW 0x400 /* Last packet was small indicating end of write */ 412#if TRAFFIC_MGT 413 /* Inter-arrival jitter related state */ 414 uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */ 415 uint16_t iaj_size; /* Size of packet for iaj measurement */ 416 uint16_t iaj_small_pkt; /* Count of packets smaller than iaj_size */ 417 uint16_t iaj_pktcnt; /* packet count, to avoid throttling initially */ 418 uint16_t acc_iaj; /* Accumulated iaj */ 419 tcp_seq iaj_rwintop; /* recent max advertised window */ 420 uint32_t avg_iaj; /* Mean */ 421 uint32_t std_dev_iaj; /* Standard deviation */ 422#endif /* TRAFFIC_MGT */ 423 struct bwmeas *t_bwmeas; /* State for bandwidth measurement */ 424 uint32_t t_lropktlen; /* Bytes in a LRO frame */ 425 tcp_seq t_idleat; /* rcv_nxt at idle time */ 426 TAILQ_ENTRY(tcpcb) t_twentry; /* link for time wait queue */ 427#if MPTCP 428 u_int32_t t_mpflags; /* flags for multipath TCP */ 429 430#define TMPF_PREESTABLISHED 0x00000001 /* conn in pre-established state */ 431#define TMPF_SENT_KEYS 0x00000002 /* indicates that keys were sent */ 432#define TMPF_MPTCP_TRUE 0x00000004 /* negotiated MPTCP successfully */ 433#define TMPF_MPTCP_RCVD_KEY 0x00000008 /* state for 3-way handshake */ 434#define TMPF_SND_MPPRIO 0x00000010 /* send priority of subflow */ 435#define TMPF_SND_REM_ADDR 0x00000020 /* initiate address removal */ 436#define TMPF_UNUSED 0x00000040 /* address addition acked by peer */ 437#define TMPF_JOINED_FLOW 0x00000080 /* Indicates additional flow */ 438#define TMPF_BACKUP_PATH 0x00000100 /* Indicates backup path */ 439#define TMPF_MPTCP_ACKNOW 0x00000200 /* Send Data ACK */ 440#define TMPF_SEND_DSN 0x00000400 /* Send DSN mapping */ 441#define TMPF_SEND_DFIN 0x00000800 /* Send Data FIN */ 442#define TMPF_RECV_DFIN 0x00001000 /* Recv Data FIN */ 443#define TMPF_SENT_JOIN 0x00002000 /* Sent Join */ 444#define TMPF_RECVD_JOIN 0x00004000 /* Received Join */ 445#define TMPF_RESET 0x00008000 /* Send RST */ 446#define TMPF_TCP_FALLBACK 0x00010000 /* Fallback to TCP */ 447#define TMPF_FASTCLOSE 0x00020000 /* Send Fastclose option */ 448#define TMPF_EMBED_DSN 0x00040000 /* tp has DSN mapping */ 449#define TMPF_MPTCP_READY 0x00080000 /* Can send DSS options on data */ 450#define TMPF_INFIN_SENT 0x00100000 /* Sent infinite mapping */ 451#define TMPF_SND_MPFAIL 0x00200000 /* Received mapping csum failure */ 452 void *t_mptcb; /* pointer to MPTCP TCB */ 453 tcp_seq t_mpuna; /* unacknowledged sequence */ 454 struct mpt_dsn_map t_rcv_map; /* Receive mapping list */ 455 u_int8_t t_local_aid; /* Addr Id for authentication */ 456 u_int8_t t_rem_aid; /* Addr ID of another subflow */ 457 u_int8_t t_mprxtshift; /* join retransmission */ 458#endif /* MPTCP */ 459}; 460 461#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) 462#define SACK_ENABLED(tp) (tp->t_flagsext & TF_SACK_ENABLE) 463 464/* 465 * If the connection is in a throttled state due to advisory feedback from 466 * the interface output queue, reset that state. We do this in favor 467 * of entering recovery because the data transfer during recovery 468 * should be just a trickle and it will help to improve performance. 469 * We also do not want to back off twice in the same RTT. 470 */ 471#define ENTER_FASTRECOVERY(_tp_) do { \ 472 (_tp_)->t_flags |= TF_FASTRECOVERY; \ 473 if (INP_IS_FLOW_CONTROLLED((_tp_)->t_inpcb)) \ 474 inp_reset_fc_state((_tp_)->t_inpcb); \ 475} while(0) 476 477#define EXIT_FASTRECOVERY(_tp_) do { \ 478 (_tp_)->t_flags &= ~TF_FASTRECOVERY; \ 479 (_tp_)->t_dupacks = 0; \ 480 (_tp_)->t_rexmtthresh = tcprexmtthresh; \ 481 (_tp_)->t_bytes_acked = 0; \ 482} while(0) 483 484/* 485 * When the number of duplicate acks received is less than 486 * the retransmit threshold, use Limited Transmit algorithm 487 */ 488extern int tcprexmtthresh; 489#define ALLOW_LIMITED_TRANSMIT(_tp_) \ 490 ((_tp_)->t_dupacks > 0 && \ 491 (_tp_)->t_dupacks < (_tp_)->t_rexmtthresh) 492 493/* 494 * This condition is true is timestamp option is supported 495 * on a connection. 496 */ 497#define TSTMP_SUPPORTED(_tp_) \ 498 (((_tp_)->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == \ 499 (TF_REQ_TSTMP|TF_RCVD_TSTMP)) 500 501/* 502 * Gives number of bytes acked by this ack 503 */ 504#define BYTES_ACKED(_th_, _tp_) \ 505 ((_th_)->th_ack - (_tp_)->snd_una) 506 507#if CONFIG_DTRACE 508enum tcp_cc_event { 509 TCP_CC_CWND_INIT, 510 TCP_CC_INSEQ_ACK_RCVD, 511 TCP_CC_ACK_RCVD, 512 TCP_CC_ENTER_FASTRECOVERY, 513 TCP_CC_IN_FASTRECOVERY, 514 TCP_CC_EXIT_FASTRECOVERY, 515 TCP_CC_PARTIAL_ACK, 516 TCP_CC_IDLE_TIMEOUT, 517 TCP_CC_REXMT_TIMEOUT, 518 TCP_CC_ECN_RCVD, 519 TCP_CC_BAD_REXMT_RECOVERY, 520 TCP_CC_OUTPUT_ERROR, 521 TCP_CC_CHANGE_ALGO, 522 TCP_CC_FLOW_CONTROL, 523 TCP_CC_SUSPEND, 524 TCP_CC_LIMITED_TRANSMIT, 525 TCP_CC_EARLY_RETRANSMIT 526}; 527#endif /* CONFIG_DTRACE */ 528 529/* 530 * Structure to hold TCP options that are only used during segment 531 * processing (in tcp_input), but not held in the tcpcb. 532 * It's basically used to reduce the number of parameters 533 * to tcp_dooptions. 534 */ 535struct tcpopt { 536 u_int32_t to_flags; /* which options are present */ 537#define TOF_TS 0x0001 /* timestamp */ 538#define TOF_MSS 0x0010 539#define TOF_SCALE 0x0020 540#define TOF_SIGNATURE 0x0040 /* signature option present */ 541#define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */ 542#define TOF_SACK 0x0100 /* Peer sent SACK option */ 543#define TOF_MPTCP 0x0200 /* MPTCP options to be dropped */ 544 u_int32_t to_tsval; 545 u_int32_t to_tsecr; 546 u_int16_t to_mss; 547 u_int8_t to_requested_s_scale; 548 u_int8_t to_nsacks; /* number of SACK blocks */ 549 u_char *to_sacks; /* pointer to the first SACK blocks */ 550}; 551 552/* 553 * The TAO cache entry which is stored in the protocol family specific 554 * portion of the route metrics. 555 */ 556struct rmxp_tao { 557 tcp_cc tao_cc; /* latest CC in valid SYN */ 558 tcp_cc tao_ccsent; /* latest CC sent to peer */ 559 u_short tao_mssopt; /* peer's cached MSS */ 560#ifdef notyet 561 u_short tao_flags; /* cache status flags */ 562#define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */ 563#define TAOF_OK 0x0002 /* peer does understand rfc1644 */ 564#define TAOF_UNDEF 0 /* we don't know yet */ 565#endif /* notyet */ 566}; 567#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) 568 569#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) 570#define sototcpcb(so) (intotcpcb(sotoinpcb(so))) 571 572/* 573 * The rtt measured is in milliseconds as the timestamp granularity is 574 * a millisecond. The smoothed round-trip time and estimated variance 575 * are stored as fixed point numbers scaled by the values below. 576 * For convenience, these scales are also used in smoothing the average 577 * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). 578 * With these scales, srtt has 5 bits to the right of the binary point, 579 * and thus an "ALPHA" of 0.875. rttvar has 4 bits to the right of the 580 * binary point, and is smoothed with an ALPHA of 0.75. 581 */ 582#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ 583#define TCP_RTT_SHIFT 5 /* shift for srtt; 5 bits frac. */ 584#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 4 bits */ 585#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 4 bits */ 586#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ 587 588/* 589 * The initial retransmission should happen at rtt + 4 * rttvar. 590 * Because of the way we do the smoothing, srtt and rttvar 591 * will each average +1/2 tick of bias. When we compute 592 * the retransmit timer, we want 1/2 tick of rounding and 593 * 1 extra tick because of +-1/2 tick uncertainty in the 594 * firing of the timer. The bias will give us exactly the 595 * 1.5 tick we need. But, because the bias is 596 * statistical, we have to test that we don't drop below 597 * the minimum feasible timer (which is 2 ticks). 598 * This version of the macro adapted from a paper by Lawrence 599 * Brakmo and Larry Peterson which outlines a problem caused 600 * by insufficient precision in the original implementation, 601 * which results in inappropriately large RTO values for very 602 * fast networks. 603 */ 604#define TCP_REXMTVAL(tp) \ 605 max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ 606 + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) 607 608/* 609 * Jaguar compatible TCP control block, for xtcpcb 610 * Does not have the old fields 611 */ 612struct otcpcb { 613#else 614struct tseg_qent; 615_TCPCB_LIST_HEAD(tsegqe_head, tseg_qent); 616 617struct tcpcb { 618#endif /* KERNEL_PRIVATE */ 619#if defined(KERNEL_PRIVATE) 620 u_int32_t t_segq; 621#else 622 struct tsegqe_head t_segq; 623#endif /* KERNEL_PRIVATE */ 624 int t_dupacks; /* consecutive dup acks recd */ 625 u_int32_t unused; /* unused now: was t_template */ 626 627 int t_timer[TCPT_NTIMERS_EXT]; /* tcp timers */ 628 629 _TCPCB_PTR(struct inpcb *) t_inpcb; /* back pointer to internet pcb */ 630 int t_state; /* state of this connection */ 631 u_int t_flags; 632#define TF_ACKNOW 0x00001 /* ack peer immediately */ 633#define TF_DELACK 0x00002 /* ack, but try to delay it */ 634#define TF_NODELAY 0x00004 /* don't delay packets to coalesce */ 635#define TF_NOOPT 0x00008 /* don't use tcp options */ 636#define TF_SENTFIN 0x00010 /* have sent FIN */ 637#define TF_REQ_SCALE 0x00020 /* have/will request window scaling */ 638#define TF_RCVD_SCALE 0x00040 /* other side has requested scaling */ 639#define TF_REQ_TSTMP 0x00080 /* have/will request timestamps */ 640#define TF_RCVD_TSTMP 0x00100 /* a timestamp was received in SYN */ 641#define TF_SACK_PERMIT 0x00200 /* other side said I could SACK */ 642#define TF_NEEDSYN 0x00400 /* send SYN (implicit state) */ 643#define TF_NEEDFIN 0x00800 /* send FIN (implicit state) */ 644#define TF_NOPUSH 0x01000 /* don't push */ 645#define TF_REQ_CC 0x02000 /* have/will request CC */ 646#define TF_RCVD_CC 0x04000 /* a CC was received in SYN */ 647#define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */ 648#define TF_MORETOCOME 0x10000 /* More data to be appended to sock */ 649#define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */ 650#define TF_RXWIN0SENT 0x40000 /* sent a receiver win 0 in response */ 651#define TF_SLOWLINK 0x80000 /* route is a on a modem speed link */ 652 653 int t_force; /* 1 if forcing out a byte */ 654 655 tcp_seq snd_una; /* send unacknowledged */ 656 tcp_seq snd_max; /* highest sequence number sent; 657 * used to recognize retransmits 658 */ 659 tcp_seq snd_nxt; /* send next */ 660 tcp_seq snd_up; /* send urgent pointer */ 661 662 tcp_seq snd_wl1; /* window update seg seq number */ 663 tcp_seq snd_wl2; /* window update seg ack number */ 664 tcp_seq iss; /* initial send sequence number */ 665 tcp_seq irs; /* initial receive sequence number */ 666 667 tcp_seq rcv_nxt; /* receive next */ 668 tcp_seq rcv_adv; /* advertised window */ 669 u_int32_t rcv_wnd; /* receive window */ 670 tcp_seq rcv_up; /* receive urgent pointer */ 671 672 u_int32_t snd_wnd; /* send window */ 673 u_int32_t snd_cwnd; /* congestion-controlled window */ 674 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for 675 * for slow start exponential to 676 * linear switch 677 */ 678 u_int t_maxopd; /* mss plus options */ 679 680 u_int32_t t_rcvtime; /* time at which a packet was received */ 681 u_int32_t t_starttime; /* time connection was established */ 682 int t_rtttime; /* round trip time */ 683 tcp_seq t_rtseq; /* sequence number being timed */ 684 685 int t_rxtcur; /* current retransmit value (ticks) */ 686 u_int t_maxseg; /* maximum segment size */ 687 int t_srtt; /* smoothed round-trip time */ 688 int t_rttvar; /* variance in round-trip time */ 689 690 int t_rxtshift; /* log(2) of rexmt exp. backoff */ 691 u_int t_rttmin; /* minimum rtt allowed */ 692 u_int32_t t_rttupdated; /* number of times rtt sampled */ 693 u_int32_t max_sndwnd; /* largest window peer has offered */ 694 695 int t_softerror; /* possible error not yet reported */ 696/* out-of-band data */ 697 char t_oobflags; /* have some */ 698 char t_iobc; /* input character */ 699#define TCPOOB_HAVEDATA 0x01 700#define TCPOOB_HADDATA 0x02 701/* RFC 1323 variables */ 702 u_char snd_scale; /* window scaling for send window */ 703 u_char rcv_scale; /* window scaling for recv window */ 704 u_char request_r_scale; /* pending window scaling */ 705 u_char requested_s_scale; 706 u_int32_t ts_recent; /* timestamp echo data */ 707 708 u_int32_t ts_recent_age; /* when last updated */ 709 tcp_seq last_ack_sent; 710/* RFC 1644 variables */ 711 tcp_cc cc_send; /* send connection count */ 712 tcp_cc cc_recv; /* receive connection count */ 713 tcp_seq snd_recover; /* for use in fast recovery */ 714/* experimental */ 715 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ 716 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ 717 u_int32_t t_badrxtwin; /* window for retransmit recovery */ 718}; 719 720 721/* 722 * TCP statistics. 723 * Many of these should be kept per connection, 724 * but that's inconvenient at the moment. 725 */ 726struct tcpstat { 727 u_int32_t tcps_connattempt; /* connections initiated */ 728 u_int32_t tcps_accepts; /* connections accepted */ 729 u_int32_t tcps_connects; /* connections established */ 730 u_int32_t tcps_drops; /* connections dropped */ 731 u_int32_t tcps_conndrops; /* embryonic connections dropped */ 732 u_int32_t tcps_closed; /* conn. closed (includes drops) */ 733 u_int32_t tcps_segstimed; /* segs where we tried to get rtt */ 734 u_int32_t tcps_rttupdated; /* times we succeeded */ 735 u_int32_t tcps_delack; /* delayed acks sent */ 736 u_int32_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ 737 u_int32_t tcps_rexmttimeo; /* retransmit timeouts */ 738 u_int32_t tcps_persisttimeo; /* persist timeouts */ 739 u_int32_t tcps_keeptimeo; /* keepalive timeouts */ 740 u_int32_t tcps_keepprobe; /* keepalive probes sent */ 741 u_int32_t tcps_keepdrops; /* connections dropped in keepalive */ 742 743 u_int32_t tcps_sndtotal; /* total packets sent */ 744 u_int32_t tcps_sndpack; /* data packets sent */ 745 u_int32_t tcps_sndbyte; /* data bytes sent */ 746 u_int32_t tcps_sndrexmitpack; /* data packets retransmitted */ 747 u_int32_t tcps_sndrexmitbyte; /* data bytes retransmitted */ 748 u_int32_t tcps_sndacks; /* ack-only packets sent */ 749 u_int32_t tcps_sndprobe; /* window probes sent */ 750 u_int32_t tcps_sndurg; /* packets sent with URG only */ 751 u_int32_t tcps_sndwinup; /* window update-only packets sent */ 752 u_int32_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ 753 754 u_int32_t tcps_rcvtotal; /* total packets received */ 755 u_int32_t tcps_rcvpack; /* packets received in sequence */ 756 u_int32_t tcps_rcvbyte; /* bytes received in sequence */ 757 u_int32_t tcps_rcvbadsum; /* packets received with ccksum errs */ 758 u_int32_t tcps_rcvbadoff; /* packets received with bad offset */ 759 u_int32_t tcps_rcvmemdrop; /* packets dropped for lack of memory */ 760 u_int32_t tcps_rcvshort; /* packets received too short */ 761 u_int32_t tcps_rcvduppack; /* duplicate-only packets received */ 762 u_int32_t tcps_rcvdupbyte; /* duplicate-only bytes received */ 763 u_int32_t tcps_rcvpartduppack; /* packets with some duplicate data */ 764 u_int32_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ 765 u_int32_t tcps_rcvoopack; /* out-of-order packets received */ 766 u_int32_t tcps_rcvoobyte; /* out-of-order bytes received */ 767 u_int32_t tcps_rcvpackafterwin; /* packets with data after window */ 768 u_int32_t tcps_rcvbyteafterwin; /* bytes rcvd after window */ 769 u_int32_t tcps_rcvafterclose; /* packets rcvd after "close" */ 770 u_int32_t tcps_rcvwinprobe; /* rcvd window probe packets */ 771 u_int32_t tcps_rcvdupack; /* rcvd duplicate acks */ 772 u_int32_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */ 773 u_int32_t tcps_rcvackpack; /* rcvd ack packets */ 774 u_int32_t tcps_rcvackbyte; /* bytes acked by rcvd acks */ 775 u_int32_t tcps_rcvwinupd; /* rcvd window update packets */ 776 u_int32_t tcps_pawsdrop; /* segments dropped due to PAWS */ 777 u_int32_t tcps_predack; /* times hdr predict ok for acks */ 778 u_int32_t tcps_preddat; /* times hdr predict ok for data pkts */ 779 u_int32_t tcps_pcbcachemiss; 780 u_int32_t tcps_cachedrtt; /* times cached RTT in route updated */ 781 u_int32_t tcps_cachedrttvar; /* times cached rttvar updated */ 782 u_int32_t tcps_cachedssthresh; /* times cached ssthresh updated */ 783 u_int32_t tcps_usedrtt; /* times RTT initialized from route */ 784 u_int32_t tcps_usedrttvar; /* times RTTVAR initialized from rt */ 785 u_int32_t tcps_usedssthresh; /* times ssthresh initialized from rt*/ 786 u_int32_t tcps_persistdrop; /* timeout in persist state */ 787 u_int32_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */ 788 u_int32_t tcps_mturesent; /* resends due to MTU discovery */ 789 u_int32_t tcps_listendrop; /* listen queue overflows */ 790 791 /* new stats from FreeBSD 5.4 sync up */ 792 u_int32_t tcps_minmssdrops; /* average minmss too low drops */ 793 u_int32_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */ 794 u_int32_t tcps_badrst; /* ignored RSTs in the window */ 795 796 u_int32_t tcps_sc_added; /* entry added to syncache */ 797 u_int32_t tcps_sc_retransmitted; /* syncache entry was retransmitted */ 798 u_int32_t tcps_sc_dupsyn; /* duplicate SYN packet */ 799 u_int32_t tcps_sc_dropped; /* could not reply to packet */ 800 u_int32_t tcps_sc_completed; /* successful extraction of entry */ 801 u_int32_t tcps_sc_bucketoverflow; /* syncache per-bucket limit hit */ 802 u_int32_t tcps_sc_cacheoverflow; /* syncache cache limit hit */ 803 u_int32_t tcps_sc_reset; /* RST removed entry from syncache */ 804 u_int32_t tcps_sc_stale; /* timed out or listen socket gone */ 805 u_int32_t tcps_sc_aborted; /* syncache entry aborted */ 806 u_int32_t tcps_sc_badack; /* removed due to bad ACK */ 807 u_int32_t tcps_sc_unreach; /* ICMP unreachable received */ 808 u_int32_t tcps_sc_zonefail; /* zalloc() failed */ 809 u_int32_t tcps_sc_sendcookie; /* SYN cookie sent */ 810 u_int32_t tcps_sc_recvcookie; /* SYN cookie received */ 811 812 u_int32_t tcps_hc_added; /* entry added to hostcache */ 813 u_int32_t tcps_hc_bucketoverflow; /* hostcache per bucket limit hit */ 814 815 /* SACK related stats */ 816 u_int32_t tcps_sack_recovery_episode; /* SACK recovery episodes */ 817 u_int32_t tcps_sack_rexmits; /* SACK rexmit segments */ 818 u_int32_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */ 819 u_int32_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */ 820 u_int32_t tcps_sack_send_blocks; /* SACK blocks (options) sent */ 821 u_int32_t tcps_sack_sboverflow; /* SACK sendblock overflow */ 822 823 u_int32_t tcps_bg_rcvtotal; /* total background packets received */ 824 u_int32_t tcps_rxtfindrop; /* drop conn after retransmitting FIN */ 825 u_int32_t tcps_fcholdpacket; /* packets withheld because of flow control */ 826 827 /* LRO related stats */ 828 u_int32_t tcps_coalesced_pack; /* number of coalesced packets */ 829 u_int32_t tcps_flowtbl_full; /* times flow table was full */ 830 u_int32_t tcps_flowtbl_collision; /* collisions in flow tbl */ 831 u_int32_t tcps_lro_twopack; /* 2 packets coalesced */ 832 u_int32_t tcps_lro_multpack; /* 3 or 4 pkts coalesced */ 833 u_int32_t tcps_lro_largepack; /* 5 or more pkts coalesced */ 834 835 u_int32_t tcps_limited_txt; /* Limited transmit used */ 836 u_int32_t tcps_early_rexmt; /* Early retransmit used */ 837 u_int32_t tcps_sack_ackadv; /* Cumulative ack advanced along with sack */ 838 839 /* Checksum related stats */ 840 u_int32_t tcps_rcv_swcsum; /* tcp swcksum (inbound), packets */ 841 u_int32_t tcps_rcv_swcsum_bytes; /* tcp swcksum (inbound), bytes */ 842 u_int32_t tcps_rcv6_swcsum; /* tcp6 swcksum (inbound), packets */ 843 u_int32_t tcps_rcv6_swcsum_bytes; /* tcp6 swcksum (inbound), bytes */ 844 u_int32_t tcps_snd_swcsum; /* tcp swcksum (outbound), packets */ 845 u_int32_t tcps_snd_swcsum_bytes; /* tcp swcksum (outbound), bytes */ 846 u_int32_t tcps_snd6_swcsum; /* tcp6 swcksum (outbound), packets */ 847 u_int32_t tcps_snd6_swcsum_bytes; /* tcp6 swcksum (outbound), bytes */ 848 u_int32_t tcps_msg_unopkts; /* unordered packet on TCP msg stream */ 849 u_int32_t tcps_msg_unoappendfail; /* failed to append unordered pkt */ 850 u_int32_t tcps_msg_sndwaithipri; /* send is waiting for high priority data */ 851 852 /* MPTCP Related stats */ 853 u_int32_t tcps_invalid_mpcap; /* Invalid MPTCP capable opts */ 854 u_int32_t tcps_invalid_joins; /* Invalid MPTCP joins */ 855 u_int32_t tcps_mpcap_fallback; /* TCP fallback in primary */ 856 u_int32_t tcps_join_fallback; /* No MPTCP in secondary */ 857 u_int32_t tcps_estab_fallback; /* DSS option dropped */ 858 u_int32_t tcps_invalid_opt; /* Catchall error stat */ 859 u_int32_t tcps_mp_outofwin; /* Packet lies outside the 860 shared recv window */ 861 u_int32_t tcps_mp_reducedwin; /* Reduced subflow window */ 862 u_int32_t tcps_mp_badcsum; /* Bad DSS csum */ 863 u_int32_t tcps_mp_oodata; /* Out of order data */ 864 u_int32_t tcps_mp_switches; /* number of subflow switch */ 865 u_int32_t tcps_mp_rcvtotal; /* number of rcvd packets */ 866 u_int32_t tcps_mp_rcvbytes; /* number of bytes received */ 867 u_int32_t tcps_mp_sndpacks; /* number of data packs sent */ 868 u_int32_t tcps_mp_sndbytes; /* number of bytes sent */ 869 u_int32_t tcps_join_rxmts; /* join ack retransmits */ 870}; 871 872struct tcpstat_local { 873 u_int64_t badformat; 874 u_int64_t unspecv6; 875 u_int64_t synfin; 876 u_int64_t badformatipsec; 877 u_int64_t noconnnolist; 878 u_int64_t noconnlist; 879 u_int64_t listbadsyn; 880 u_int64_t icmp6unreach; 881 u_int64_t deprecate6; 882 u_int64_t ooopacket; 883 u_int64_t rstinsynrcv; 884 u_int64_t dospacket; 885 u_int64_t cleanup; 886 u_int64_t synwindow; 887}; 888 889#pragma pack(4) 890 891/* 892 * TCB structure exported to user-land via sysctl(3). 893 * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been 894 * included. Not all of our clients do. 895 */ 896 897struct xtcpcb { 898 u_int32_t xt_len; 899#ifdef KERNEL_PRIVATE 900 struct inpcb_compat xt_inp; 901#else 902 struct inpcb xt_inp; 903#endif 904#ifdef KERNEL_PRIVATE 905 struct otcpcb xt_tp; 906#else 907 struct tcpcb xt_tp; 908#endif 909 struct xsocket xt_socket; 910 u_quad_t xt_alignment_hack; 911}; 912 913 914struct xtcpcb64 { 915 u_int32_t xt_len; 916 struct xinpcb64 xt_inpcb; 917 918 u_int64_t t_segq; 919 int t_dupacks; /* consecutive dup acks recd */ 920 921 int t_timer[TCPT_NTIMERS_EXT]; /* tcp timers */ 922 923 int t_state; /* state of this connection */ 924 u_int t_flags; 925 926 int t_force; /* 1 if forcing out a byte */ 927 928 tcp_seq snd_una; /* send unacknowledged */ 929 tcp_seq snd_max; /* highest sequence number sent; 930 * used to recognize retransmits 931 */ 932 tcp_seq snd_nxt; /* send next */ 933 tcp_seq snd_up; /* send urgent pointer */ 934 935 tcp_seq snd_wl1; /* window update seg seq number */ 936 tcp_seq snd_wl2; /* window update seg ack number */ 937 tcp_seq iss; /* initial send sequence number */ 938 tcp_seq irs; /* initial receive sequence number */ 939 940 tcp_seq rcv_nxt; /* receive next */ 941 tcp_seq rcv_adv; /* advertised window */ 942 u_int32_t rcv_wnd; /* receive window */ 943 tcp_seq rcv_up; /* receive urgent pointer */ 944 945 u_int32_t snd_wnd; /* send window */ 946 u_int32_t snd_cwnd; /* congestion-controlled window */ 947 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for 948 * for slow start exponential to 949 * linear switch 950 */ 951 u_int t_maxopd; /* mss plus options */ 952 953 u_int32_t t_rcvtime; /* time at which a packet was received */ 954 u_int32_t t_starttime; /* time connection was established */ 955 int t_rtttime; /* round trip time */ 956 tcp_seq t_rtseq; /* sequence number being timed */ 957 958 int t_rxtcur; /* current retransmit value (ticks) */ 959 u_int t_maxseg; /* maximum segment size */ 960 int t_srtt; /* smoothed round-trip time */ 961 int t_rttvar; /* variance in round-trip time */ 962 963 int t_rxtshift; /* log(2) of rexmt exp. backoff */ 964 u_int t_rttmin; /* minimum rtt allowed */ 965 u_int32_t t_rttupdated; /* number of times rtt sampled */ 966 u_int32_t max_sndwnd; /* largest window peer has offered */ 967 968 int t_softerror; /* possible error not yet reported */ 969/* out-of-band data */ 970 char t_oobflags; /* have some */ 971 char t_iobc; /* input character */ 972/* RFC 1323 variables */ 973 u_char snd_scale; /* window scaling for send window */ 974 u_char rcv_scale; /* window scaling for recv window */ 975 u_char request_r_scale; /* pending window scaling */ 976 u_char requested_s_scale; 977 u_int32_t ts_recent; /* timestamp echo data */ 978 979 u_int32_t ts_recent_age; /* when last updated */ 980 tcp_seq last_ack_sent; 981/* RFC 1644 variables */ 982 tcp_cc cc_send; /* send connection count */ 983 tcp_cc cc_recv; /* receive connection count */ 984 tcp_seq snd_recover; /* for use in fast recovery */ 985/* experimental */ 986 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ 987 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ 988 u_int32_t t_badrxtwin; /* window for retransmit recovery */ 989 990 u_quad_t xt_alignment_hack; 991}; 992 993 994#ifdef PRIVATE 995 996struct xtcpcb_n { 997 u_int32_t xt_len; 998 u_int32_t xt_kind; /* XSO_TCPCB */ 999 1000 u_int64_t t_segq; 1001 int t_dupacks; /* consecutive dup acks recd */ 1002 1003 int t_timer[TCPT_NTIMERS_EXT]; /* tcp timers */ 1004 1005 int t_state; /* state of this connection */ 1006 u_int t_flags; 1007 1008 int t_force; /* 1 if forcing out a byte */ 1009 1010 tcp_seq snd_una; /* send unacknowledged */ 1011 tcp_seq snd_max; /* highest sequence number sent; 1012 * used to recognize retransmits 1013 */ 1014 tcp_seq snd_nxt; /* send next */ 1015 tcp_seq snd_up; /* send urgent pointer */ 1016 1017 tcp_seq snd_wl1; /* window update seg seq number */ 1018 tcp_seq snd_wl2; /* window update seg ack number */ 1019 tcp_seq iss; /* initial send sequence number */ 1020 tcp_seq irs; /* initial receive sequence number */ 1021 1022 tcp_seq rcv_nxt; /* receive next */ 1023 tcp_seq rcv_adv; /* advertised window */ 1024 u_int32_t rcv_wnd; /* receive window */ 1025 tcp_seq rcv_up; /* receive urgent pointer */ 1026 1027 u_int32_t snd_wnd; /* send window */ 1028 u_int32_t snd_cwnd; /* congestion-controlled window */ 1029 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for 1030 * for slow start exponential to 1031 * linear switch 1032 */ 1033 u_int t_maxopd; /* mss plus options */ 1034 1035 u_int32_t t_rcvtime; /* time at which a packet was received */ 1036 u_int32_t t_starttime; /* time connection was established */ 1037 int t_rtttime; /* round trip time */ 1038 tcp_seq t_rtseq; /* sequence number being timed */ 1039 1040 int t_rxtcur; /* current retransmit value (ticks) */ 1041 u_int t_maxseg; /* maximum segment size */ 1042 int t_srtt; /* smoothed round-trip time */ 1043 int t_rttvar; /* variance in round-trip time */ 1044 1045 int t_rxtshift; /* log(2) of rexmt exp. backoff */ 1046 u_int t_rttmin; /* minimum rtt allowed */ 1047 u_int32_t t_rttupdated; /* number of times rtt sampled */ 1048 u_int32_t max_sndwnd; /* largest window peer has offered */ 1049 1050 int t_softerror; /* possible error not yet reported */ 1051 /* out-of-band data */ 1052 char t_oobflags; /* have some */ 1053 char t_iobc; /* input character */ 1054 /* RFC 1323 variables */ 1055 u_char snd_scale; /* window scaling for send window */ 1056 u_char rcv_scale; /* window scaling for recv window */ 1057 u_char request_r_scale; /* pending window scaling */ 1058 u_char requested_s_scale; 1059 u_int32_t ts_recent; /* timestamp echo data */ 1060 1061 u_int32_t ts_recent_age; /* when last updated */ 1062 tcp_seq last_ack_sent; 1063 /* RFC 1644 variables */ 1064 tcp_cc cc_send; /* send connection count */ 1065 tcp_cc cc_recv; /* receive connection count */ 1066 tcp_seq snd_recover; /* for use in fast recovery */ 1067 /* experimental */ 1068 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ 1069 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ 1070}; 1071 1072#endif /* PRIVATE */ 1073 1074#pragma pack() 1075 1076/* 1077 * Names for TCP sysctl objects 1078 */ 1079#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ 1080#define TCPCTL_DO_RFC1644 2 /* use RFC-1644 extensions */ 1081#define TCPCTL_MSSDFLT 3 /* MSS default */ 1082#define TCPCTL_STATS 4 /* statistics (read-only) */ 1083#define TCPCTL_RTTDFLT 5 /* default RTT estimate */ 1084#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ 1085#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ 1086#define TCPCTL_SENDSPACE 8 /* send buffer space */ 1087#define TCPCTL_RECVSPACE 9 /* receive buffer space */ 1088#define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ 1089#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ 1090#define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ 1091#define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ 1092#define TCPCTL_MAXID 14 1093 1094#ifdef BSD_KERNEL_PRIVATE 1095#include <sys/bitstring.h> 1096 1097#define TCP_PKTLIST_CLEAR(tp) { \ 1098 (tp)->t_pktlist_head = (tp)->t_pktlist_tail = NULL; \ 1099 (tp)->t_lastchain = (tp)->t_pktlist_sentlen = 0; \ 1100} 1101 1102#define TCPCTL_NAMES { \ 1103 { 0, 0 }, \ 1104 { "rfc1323", CTLTYPE_INT }, \ 1105 { "rfc1644", CTLTYPE_INT }, \ 1106 { "mssdflt", CTLTYPE_INT }, \ 1107 { "stats", CTLTYPE_STRUCT }, \ 1108 { "rttdflt", CTLTYPE_INT }, \ 1109 { "keepidle", CTLTYPE_INT }, \ 1110 { "keepintvl", CTLTYPE_INT }, \ 1111 { "sendspace", CTLTYPE_INT }, \ 1112 { "recvspace", CTLTYPE_INT }, \ 1113 { "keepinit", CTLTYPE_INT }, \ 1114 { "pcblist", CTLTYPE_STRUCT }, \ 1115 { "delacktime", CTLTYPE_INT }, \ 1116 { "v6mssdflt", CTLTYPE_INT }, \ 1117} 1118 1119#ifdef SYSCTL_DECL 1120SYSCTL_DECL(_net_inet_tcp); 1121#endif /* SYSCTL_DECL */ 1122 1123/* 1124 * Flags for TCP's connectx(2) user-protocol request routine. 1125 */ 1126#if MPTCP 1127#define TCP_CONNREQF_MPTCP 0x1 /* called internally by MPTCP */ 1128#endif /* MPTCP */ 1129 1130extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ 1131extern struct inpcbinfo tcbinfo; 1132extern struct tcpstat tcpstat; /* tcp statistics */ 1133extern int tcp_mssdflt; /* XXX */ 1134extern int tcp_minmss; 1135extern int ss_fltsz; 1136extern int ss_fltsz_local; 1137extern int tcp_do_rfc3390; /* Calculate ss_fltsz according to RFC 3390 */ 1138extern int target_qdelay; 1139#ifdef __APPLE__ 1140extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ 1141extern struct timeval tcp_uptime; 1142extern lck_spin_t *tcp_uptime_lock; 1143 1144extern int tcp_delack_enabled; 1145#endif /* __APPLE__ */ 1146 1147extern int tcp_do_sack; /* SACK enabled/disabled */ 1148 1149#if CONFIG_IFEF_NOWINDOWSCALE 1150extern int tcp_obey_ifef_nowindowscale; 1151#endif 1152 1153struct protosw; 1154struct domain; 1155 1156void tcp_canceltimers(struct tcpcb *); 1157struct tcpcb * 1158 tcp_close(struct tcpcb *); 1159void tcp_ctlinput(int, struct sockaddr *, void *); 1160int tcp_ctloutput(struct socket *, struct sockopt *); 1161struct tcpcb * 1162 tcp_drop(struct tcpcb *, int); 1163void tcp_drain(void); 1164void tcp_getrt_rtt(struct tcpcb *tp, struct rtentry *rt); 1165struct rmxp_tao * 1166 tcp_gettaocache(struct inpcb *); 1167void tcp_init(struct protosw *, struct domain *); 1168void tcp_input(struct mbuf *, int); 1169void tcp_mss(struct tcpcb *, int, unsigned int); 1170int tcp_mssopt(struct tcpcb *); 1171void tcp_drop_syn_sent(struct inpcb *, int); 1172void tcp_mtudisc(struct inpcb *, int); 1173struct tcpcb * 1174 tcp_newtcpcb(struct inpcb *); 1175int tcp_output(struct tcpcb *); 1176void tcp_respond(struct tcpcb *, void *, 1177 struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int, 1178 unsigned int, unsigned int); 1179struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int); 1180void tcp_setpersist(struct tcpcb *); 1181void tcp_gc(struct inpcbinfo *); 1182void tcp_check_timer_state(struct tcpcb *tp); 1183void tcp_run_timerlist(void *arg1, void *arg2); 1184 1185struct tcptemp * 1186 tcp_maketemplate(struct tcpcb *); 1187void tcp_fillheaders(struct tcpcb *, void *, void *); 1188struct tcpcb * 1189 tcp_timers(struct tcpcb *, int); 1190void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int); 1191 1192void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq, u_int32_t *); 1193void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); 1194void tcp_clean_sackreport(struct tcpcb *tp); 1195void tcp_sack_adjust(struct tcpcb *tp); 1196struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); 1197void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); 1198void tcp_free_sackholes(struct tcpcb *tp); 1199int32_t tcp_sbspace(struct tcpcb *tp); 1200void tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp); 1201void tcp_reset_stretch_ack(struct tcpcb *tp); 1202extern void tcp_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *); 1203uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags); 1204uint32_t tcp_find_anypcb_byaddr(struct ifaddr *ifa); 1205void tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so); 1206struct bwmeas* tcp_bwmeas_alloc(struct tcpcb *tp); 1207void tcp_bwmeas_free(struct tcpcb *tp); 1208 1209extern void tcp_set_background_cc(struct socket *); 1210extern void tcp_set_foreground_cc(struct socket *); 1211extern void tcp_set_recv_bg(struct socket *); 1212extern void tcp_clear_recv_bg(struct socket *); 1213#define IS_TCP_RECV_BG(_so) \ 1214 ((_so)->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG) 1215 1216#if TRAFFIC_MGT 1217#define CLEAR_IAJ_STATE(_tp_) (_tp_)->iaj_rcv_ts = 0 1218void reset_acc_iaj(struct tcpcb *tp); 1219#endif /* TRAFFIC_MGT */ 1220 1221int tcp_lock (struct socket *, int, void *); 1222int tcp_unlock (struct socket *, int, void *); 1223void calculate_tcp_clock(void); 1224 1225extern void mptcp_insert_rmap(struct tcpcb *, struct mbuf *); 1226extern void tcp_keepalive_reset(struct tcpcb *); 1227 1228#ifdef _KERN_LOCKS_H_ 1229lck_mtx_t * tcp_getlock (struct socket *, int); 1230#else 1231void * tcp_getlock (struct socket *, int); 1232#endif 1233 1234 1235extern struct pr_usrreqs tcp_usrreqs; 1236extern u_int32_t tcp_sendspace; 1237extern u_int32_t tcp_recvspace; 1238tcp_seq tcp_new_isn(struct tcpcb *); 1239 1240extern int tcp_input_checksum(int, struct mbuf *, struct tcphdr *, int, int); 1241extern void tcp_getconninfo(struct socket *, struct conninfo_tcp *); 1242#if MPTCP 1243extern uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int); 1244extern void mptcp_output_csum(struct tcpcb *, struct mbuf *, int32_t, unsigned, 1245 u_int64_t, u_int32_t *); 1246extern int mptcp_adj_mss(struct tcpcb *, boolean_t); 1247#endif 1248#endif /* BSD_KERNEL_RPIVATE */ 1249 1250#endif /* _NETINET_TCP_VAR_H_ */ 1251