1217806Slstewart/*- 2217806Slstewart * Copyright (c) 2009-2010 3217806Slstewart * Swinburne University of Technology, Melbourne, Australia 4217806Slstewart * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> 5217806Slstewart * Copyright (c) 2010-2011 The FreeBSD Foundation 6217806Slstewart * All rights reserved. 7217806Slstewart * 8217806Slstewart * This software was developed at the Centre for Advanced Internet 9220560Slstewart * Architectures, Swinburne University of Technology, by David Hayes, made 10220560Slstewart * possible in part by a grant from the Cisco University Research Program Fund 11220560Slstewart * at Community Foundation Silicon Valley. 12217806Slstewart * 13217806Slstewart * Portions of this software were developed at the Centre for Advanced 14217806Slstewart * Internet Architectures, Swinburne University of Technology, Melbourne, 15217806Slstewart * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16217806Slstewart * 17217806Slstewart * Redistribution and use in source and binary forms, with or without 18217806Slstewart * modification, are permitted provided that the following conditions 19217806Slstewart * are met: 20217806Slstewart * 1. Redistributions of source code must retain the above copyright 21217806Slstewart * notice, this list of conditions and the following disclaimer. 22217806Slstewart * 2. Redistributions in binary form must reproduce the above copyright 23217806Slstewart * notice, this list of conditions and the following disclaimer in the 24217806Slstewart * documentation and/or other materials provided with the distribution. 25217806Slstewart * 26217806Slstewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27217806Slstewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28217806Slstewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29217806Slstewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30217806Slstewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31217806Slstewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32217806Slstewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33217806Slstewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34217806Slstewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35217806Slstewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36217806Slstewart * SUCH DAMAGE. 37217806Slstewart */ 38217806Slstewart 39217806Slstewart#include <sys/cdefs.h> 40217806Slstewart__FBSDID("$FreeBSD$"); 41217806Slstewart 42217806Slstewart#include <sys/param.h> 43217806Slstewart#include <sys/kernel.h> 44217806Slstewart#include <sys/mbuf.h> 45217806Slstewart#include <sys/module.h> 46217806Slstewart#include <sys/hhook.h> 47217806Slstewart#include <sys/khelp.h> 48217806Slstewart#include <sys/module_khelp.h> 49217806Slstewart#include <sys/socket.h> 50217806Slstewart#include <sys/sockopt.h> 51217806Slstewart 52217806Slstewart#include <net/vnet.h> 53217806Slstewart 54217806Slstewart#include <netinet/in.h> 55217806Slstewart#include <netinet/in_pcb.h> 56217806Slstewart#include <netinet/tcp_seq.h> 57217806Slstewart#include <netinet/tcp_var.h> 58217806Slstewart 59217806Slstewart#include <netinet/khelp/h_ertt.h> 60217806Slstewart 61217806Slstewart#include <vm/uma.h> 62217806Slstewart 63217806Slstewartuma_zone_t txseginfo_zone; 64217806Slstewart 65217806Slstewart/* Smoothing factor for delayed ack guess. */ 66217806Slstewart#define DLYACK_SMOOTH 5 67217806Slstewart 68217806Slstewart/* Max number of time stamp errors allowed in a session. */ 69217806Slstewart#define MAX_TS_ERR 10 70217806Slstewart 71217806Slstewartstatic int ertt_packet_measurement_hook(int hhook_type, int hhook_id, 72217806Slstewart void *udata, void *ctx_data, void *hdata, struct osd *hosd); 73217806Slstewartstatic int ertt_add_tx_segment_info_hook(int hhook_type, int hhook_id, 74217806Slstewart void *udata, void *ctx_data, void *hdata, struct osd *hosd); 75217806Slstewartstatic int ertt_mod_init(void); 76217806Slstewartstatic int ertt_mod_destroy(void); 77217806Slstewartstatic int ertt_uma_ctor(void *mem, int size, void *arg, int flags); 78217806Slstewartstatic void ertt_uma_dtor(void *mem, int size, void *arg); 79217806Slstewart 80217806Slstewart/* 81217806Slstewart * Contains information about the sent segment for comparison with the 82217806Slstewart * corresponding ack. 83217806Slstewart */ 84217806Slstewartstruct txseginfo { 85217806Slstewart /* Segment length. */ 86217806Slstewart long len; 87217806Slstewart /* Segment sequence number. */ 88217806Slstewart tcp_seq seq; 89217806Slstewart /* Time stamp indicating when the packet was sent. */ 90217806Slstewart uint32_t tx_ts; 91217806Slstewart /* Last received receiver ts (if the TCP option is used). */ 92217806Slstewart uint32_t rx_ts; 93217806Slstewart uint32_t flags; 94217806Slstewart TAILQ_ENTRY (txseginfo) txsegi_lnk; 95217806Slstewart}; 96217806Slstewart 97217806Slstewart/* Flags for struct txseginfo. */ 98217806Slstewart#define TXSI_TSO 0x01 /* TSO was used for this entry. */ 99217806Slstewart#define TXSI_RTT_MEASURE_START 0x02 /* Start a per RTT measurement. */ 100217806Slstewart#define TXSI_RX_MEASURE_END 0x04 /* Measure the rx rate until this txsi. */ 101217806Slstewart 102217806Slstewartstruct helper ertt_helper = { 103217806Slstewart .mod_init = ertt_mod_init, 104217806Slstewart .mod_destroy = ertt_mod_destroy, 105217806Slstewart .h_flags = HELPER_NEEDS_OSD, 106217806Slstewart .h_classes = HELPER_CLASS_TCP 107217806Slstewart}; 108217806Slstewart 109217806Slstewart/* Define the helper hook info required by ERTT. */ 110217806Slstewartstruct hookinfo ertt_hooks[] = { 111217806Slstewart { 112217806Slstewart .hook_type = HHOOK_TYPE_TCP, 113217806Slstewart .hook_id = HHOOK_TCP_EST_IN, 114217806Slstewart .hook_udata = NULL, 115217806Slstewart .hook_func = &ertt_packet_measurement_hook 116217806Slstewart }, 117217806Slstewart { 118217806Slstewart .hook_type = HHOOK_TYPE_TCP, 119217806Slstewart .hook_id = HHOOK_TCP_EST_OUT, 120217806Slstewart .hook_udata = NULL, 121217806Slstewart .hook_func = &ertt_add_tx_segment_info_hook 122217806Slstewart } 123217806Slstewart}; 124217806Slstewart 125217806Slstewart/* Flags to indicate how marked_packet_rtt should handle this txsi. */ 126217806Slstewart#define MULTI_ACK 0x01 /* More than this txsi is acked. */ 127217806Slstewart#define OLD_TXSI 0x02 /* TXSI is old according to timestamps. */ 128217806Slstewart#define CORRECT_ACK 0X04 /* Acks this TXSI. */ 129217806Slstewart#define FORCED_MEASUREMENT 0X08 /* Force an RTT measurement. */ 130217806Slstewart 131217806Slstewart/* 132217806Slstewart * This fuction measures the RTT of a particular segment/ack pair, or the next 133217806Slstewart * closest if this will yield an inaccurate result due to delayed acking or 134217806Slstewart * other issues. 135217806Slstewart */ 136217806Slstewartstatic void inline 137217806Slstewartmarked_packet_rtt(struct txseginfo *txsi, struct ertt *e_t, struct tcpcb *tp, 138217806Slstewart uint32_t *pmeasurenext, int *pmeasurenext_len, int *prtt_bytes_adjust, 139217806Slstewart int mflag) 140217806Slstewart{ 141217806Slstewart 142217806Slstewart /* 143217806Slstewart * If we can't measure this one properly due to delayed acking adjust 144217806Slstewart * byte counters and flag to measure next txsi. Note that since the 145217806Slstewart * marked packet's transmitted bytes are measured we need to subtract the 146217806Slstewart * transmitted bytes. Then pretend the next txsi was marked. 147217806Slstewart */ 148217806Slstewart if (mflag & (MULTI_ACK|OLD_TXSI)) { 149217806Slstewart *pmeasurenext = txsi->tx_ts; 150217806Slstewart *pmeasurenext_len = txsi->len; 151217806Slstewart *prtt_bytes_adjust += *pmeasurenext_len; 152217806Slstewart } else { 153217806Slstewart if (mflag & FORCED_MEASUREMENT) { 154239474Slstewart e_t->markedpkt_rtt = tcp_ts_getticks() - 155239474Slstewart *pmeasurenext + 1; 156217806Slstewart e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt + 157217806Slstewart *pmeasurenext_len - *prtt_bytes_adjust; 158217806Slstewart } else { 159239474Slstewart e_t->markedpkt_rtt = tcp_ts_getticks() - 160239474Slstewart txsi->tx_ts + 1; 161217806Slstewart e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt - 162217806Slstewart *prtt_bytes_adjust; 163217806Slstewart } 164217806Slstewart e_t->marked_snd_cwnd = tp->snd_cwnd; 165217806Slstewart 166217806Slstewart /* 167217806Slstewart * Reset the ERTT_MEASUREMENT_IN_PROGRESS flag to indicate to 168217806Slstewart * add_tx_segment_info that a new measurement should be started. 169217806Slstewart */ 170217806Slstewart e_t->flags &= ~ERTT_MEASUREMENT_IN_PROGRESS; 171217806Slstewart /* 172217806Slstewart * Set ERTT_NEW_MEASUREMENT to tell the congestion control 173217806Slstewart * algorithm that a new marked RTT measurement has has been made 174217806Slstewart * and is available for use. 175217806Slstewart */ 176217806Slstewart e_t->flags |= ERTT_NEW_MEASUREMENT; 177217806Slstewart 178217806Slstewart if (tp->t_flags & TF_TSO) { 179217806Slstewart /* Temporarily disable TSO to aid a new measurment. */ 180217806Slstewart tp->t_flags &= ~TF_TSO; 181217806Slstewart /* Keep track that we've disabled it. */ 182217806Slstewart e_t->flags |= ERTT_TSO_DISABLED; 183217806Slstewart } 184217806Slstewart } 185217806Slstewart} 186217806Slstewart 187217806Slstewart/* 188217806Slstewart * Ertt_packet_measurements uses a small amount of state kept on each packet 189217806Slstewart * sent to match incoming acknowledgements. This enables more accurate and 190217806Slstewart * secure round trip time measurements. The resulting measurement is used for 191217806Slstewart * congestion control algorithms which require a more accurate time. 192217806Slstewart * Ertt_packet_measurements is called via the helper hook in tcp_input.c 193217806Slstewart */ 194217806Slstewartstatic int 195217806Slstewartertt_packet_measurement_hook(int hhook_type, int hhook_id, void *udata, 196217806Slstewart void *ctx_data, void *hdata, struct osd *hosd) 197217806Slstewart{ 198217806Slstewart struct ertt *e_t; 199217806Slstewart struct tcpcb *tp; 200217806Slstewart struct tcphdr *th; 201217806Slstewart struct tcpopt *to; 202217806Slstewart struct tcp_hhook_data *thdp; 203217806Slstewart struct txseginfo *txsi; 204217806Slstewart int acked, measurenext_len, multiack, new_sacked_bytes, rtt_bytes_adjust; 205217806Slstewart uint32_t measurenext, rts; 206217806Slstewart tcp_seq ack; 207217806Slstewart 208217806Slstewart KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__)); 209217806Slstewart KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__)); 210217806Slstewart 211217806Slstewart e_t = (struct ertt *)hdata; 212217806Slstewart thdp = ctx_data; 213217806Slstewart tp = thdp->tp; 214217806Slstewart th = thdp->th; 215217806Slstewart to = thdp->to; 216217806Slstewart new_sacked_bytes = (tp->sackhint.last_sack_ack != 0); 217217806Slstewart measurenext = measurenext_len = multiack = rts = rtt_bytes_adjust = 0; 218217806Slstewart acked = th->th_ack - tp->snd_una; 219217806Slstewart 220217806Slstewart INP_WLOCK_ASSERT(tp->t_inpcb); 221217806Slstewart 222217806Slstewart /* Packet has provided new acknowledgements. */ 223217806Slstewart if (acked > 0 || new_sacked_bytes) { 224217806Slstewart if (acked == 0 && new_sacked_bytes) { 225217806Slstewart /* Use last sacked data. */ 226217806Slstewart ack = tp->sackhint.last_sack_ack; 227217806Slstewart } else 228217806Slstewart ack = th->th_ack; 229217806Slstewart 230217806Slstewart txsi = TAILQ_FIRST(&e_t->txsegi_q); 231217806Slstewart while (txsi != NULL) { 232217806Slstewart rts = 0; 233217806Slstewart 234217806Slstewart /* Acknowledgement is acking more than this txsi. */ 235217806Slstewart if (SEQ_GT(ack, txsi->seq + txsi->len)) { 236217806Slstewart if (txsi->flags & TXSI_RTT_MEASURE_START || 237217806Slstewart measurenext) { 238217806Slstewart marked_packet_rtt(txsi, e_t, tp, 239217806Slstewart &measurenext, &measurenext_len, 240217806Slstewart &rtt_bytes_adjust, MULTI_ACK); 241217806Slstewart } 242217806Slstewart TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk); 243217806Slstewart uma_zfree(txseginfo_zone, txsi); 244217806Slstewart txsi = TAILQ_FIRST(&e_t->txsegi_q); 245217806Slstewart continue; 246217806Slstewart } 247217806Slstewart 248217806Slstewart /* 249217806Slstewart * Guess if delayed acks are being used by the receiver. 250217806Slstewart * 251217806Slstewart * XXXDH: A simple heuristic that could be improved 252217806Slstewart */ 253217806Slstewart if (!new_sacked_bytes) { 254217806Slstewart if (acked > tp->t_maxseg) { 255217806Slstewart e_t->dlyack_rx += 256217806Slstewart (e_t->dlyack_rx < DLYACK_SMOOTH) ? 257217806Slstewart 1 : 0; 258217806Slstewart multiack = 1; 259217806Slstewart } else if (acked > txsi->len) { 260217806Slstewart multiack = 1; 261217806Slstewart e_t->dlyack_rx += 262217806Slstewart (e_t->dlyack_rx < DLYACK_SMOOTH) ? 263217806Slstewart 1 : 0; 264217806Slstewart } else if (acked == tp->t_maxseg || 265217806Slstewart acked == txsi->len) { 266217806Slstewart e_t->dlyack_rx -= 267217806Slstewart (e_t->dlyack_rx > 0) ? 1 : 0; 268217806Slstewart } 269217806Slstewart /* Otherwise leave dlyack_rx the way it was. */ 270217806Slstewart } 271217806Slstewart 272217806Slstewart /* 273217806Slstewart * Time stamps are only to help match the txsi with the 274217806Slstewart * received acknowledgements. 275217806Slstewart */ 276217806Slstewart if (e_t->timestamp_errors < MAX_TS_ERR && 277217806Slstewart (to->to_flags & TOF_TS) != 0 && to->to_tsecr) { 278217806Slstewart /* 279217806Slstewart * Note: All packets sent with the offload will 280217806Slstewart * have the same time stamp. If we are sending 281217806Slstewart * on a fast interface and the t_maxseg is much 282217806Slstewart * smaller than one tick, this will be fine. The 283217806Slstewart * time stamp would be the same whether we were 284217806Slstewart * using tso or not. However, if the interface 285217806Slstewart * is slow, this will cause problems with the 286217806Slstewart * calculations. If the interface is slow, there 287217806Slstewart * is not reason to be using tso, and it should 288217806Slstewart * be turned off. 289217806Slstewart */ 290217806Slstewart /* 291217806Slstewart * If there are too many time stamp errors, time 292217806Slstewart * stamps won't be trusted 293217806Slstewart */ 294217806Slstewart rts = to->to_tsecr; 295217806Slstewart /* Before this packet. */ 296217806Slstewart if (!e_t->dlyack_rx && TSTMP_LT(rts, txsi->tx_ts)) 297217806Slstewart /* When delayed acking is used, the 298217806Slstewart * reflected time stamp is of the first 299217806Slstewart * packet and thus may be before 300217806Slstewart * txsi->tx_ts. 301217806Slstewart */ 302217806Slstewart break; 303217806Slstewart if (TSTMP_GT(rts, txsi->tx_ts)) { 304217806Slstewart /* 305217806Slstewart * If reflected time stamp is later than 306217806Slstewart * tx_tsi, then this txsi is old. 307217806Slstewart */ 308217806Slstewart if (txsi->flags & TXSI_RTT_MEASURE_START 309217806Slstewart || measurenext) { 310217806Slstewart marked_packet_rtt(txsi, e_t, tp, 311217806Slstewart &measurenext, &measurenext_len, 312217806Slstewart &rtt_bytes_adjust, OLD_TXSI); 313217806Slstewart } 314217806Slstewart TAILQ_REMOVE(&e_t->txsegi_q, txsi, 315217806Slstewart txsegi_lnk); 316217806Slstewart uma_zfree(txseginfo_zone, txsi); 317217806Slstewart txsi = TAILQ_FIRST(&e_t->txsegi_q); 318217806Slstewart continue; 319217806Slstewart } 320217806Slstewart if (rts == txsi->tx_ts && 321217806Slstewart TSTMP_LT(to->to_tsval, txsi->rx_ts)) { 322217806Slstewart /* 323217806Slstewart * Segment received before sent! 324217806Slstewart * Something is wrong with the received 325217806Slstewart * timestamps so increment errors. If 326217806Slstewart * this keeps up we will ignore 327217806Slstewart * timestamps. 328217806Slstewart */ 329217806Slstewart e_t->timestamp_errors++; 330217806Slstewart } 331217806Slstewart } 332217806Slstewart /* 333217806Slstewart * Acknowledging a sequence number before this txsi. 334217806Slstewart * If it is an old txsi that may have had the same seq 335217806Slstewart * numbers, it should have been removed if time stamps 336217806Slstewart * are being used. 337217806Slstewart */ 338217806Slstewart if (SEQ_LEQ(ack, txsi->seq)) 339217806Slstewart break; /* Before first packet in txsi. */ 340217806Slstewart 341217806Slstewart /* 342217806Slstewart * Only ack > txsi->seq and ack <= txsi->seq+txsi->len 343217806Slstewart * past this point. 344217806Slstewart * 345217806Slstewart * If delayed acks are being used, an acknowledgement 346217806Slstewart * for a single segment will have been delayed by the 347217806Slstewart * receiver and will yield an inaccurate measurement. In 348217806Slstewart * this case, we only make the measurement if more than 349217806Slstewart * one segment is being acknowledged or sack is 350217806Slstewart * currently being used. 351217806Slstewart */ 352217806Slstewart if (!e_t->dlyack_rx || multiack || new_sacked_bytes) { 353217806Slstewart /* Make an accurate new measurement. */ 354239474Slstewart e_t->rtt = tcp_ts_getticks() - txsi->tx_ts + 1; 355217806Slstewart 356217806Slstewart if (e_t->rtt < e_t->minrtt || e_t->minrtt == 0) 357217806Slstewart e_t->minrtt = e_t->rtt; 358217806Slstewart 359217806Slstewart if (e_t->rtt > e_t->maxrtt || e_t->maxrtt == 0) 360217806Slstewart e_t->maxrtt = e_t->rtt; 361217806Slstewart } 362217806Slstewart 363217806Slstewart if (txsi->flags & TXSI_RTT_MEASURE_START || measurenext) 364217806Slstewart marked_packet_rtt(txsi, e_t, tp, 365217806Slstewart &measurenext, &measurenext_len, 366217806Slstewart &rtt_bytes_adjust, CORRECT_ACK); 367217806Slstewart 368217806Slstewart if (txsi->flags & TXSI_TSO) { 369217806Slstewart txsi->len -= acked; 370217806Slstewart if (txsi->len > 0) { 371217806Slstewart /* 372217806Slstewart * This presumes ack for first bytes in 373217806Slstewart * txsi, this may not be true but it 374217806Slstewart * shouldn't cause problems for the 375217806Slstewart * timing. 376217806Slstewart * 377217806Slstewart * We remeasure RTT even though we only 378217806Slstewart * have a single txsi. The rationale 379217806Slstewart * behind this is that it is better to 380217806Slstewart * have a slightly inaccurate 381217806Slstewart * measurement than no additional 382217806Slstewart * measurement for the rest of the bulk 383217806Slstewart * transfer. Since TSO is only used on 384217806Slstewart * high speed interface cards, so the 385217806Slstewart * packets should be transmitted at line 386217806Slstewart * rate back to back with little 387217806Slstewart * difference in transmission times (in 388217806Slstewart * ticks). 389217806Slstewart */ 390217806Slstewart txsi->seq += acked; 391217806Slstewart /* 392217806Slstewart * Reset txsi measure flag so we don't 393217806Slstewart * use it for another RTT measurement. 394217806Slstewart */ 395217806Slstewart txsi->flags &= ~TXSI_RTT_MEASURE_START; 396217806Slstewart /* 397217806Slstewart * There is still more data to be acked 398217806Slstewart * from tso bulk transmission, so we 399217806Slstewart * won't remove it from the TAILQ yet. 400217806Slstewart */ 401217806Slstewart break; 402217806Slstewart } 403217806Slstewart } 404217806Slstewart 405217806Slstewart TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk); 406217806Slstewart uma_zfree(txseginfo_zone, txsi); 407217806Slstewart break; 408217806Slstewart } 409217806Slstewart 410217806Slstewart if (measurenext) { 411217806Slstewart /* 412217806Slstewart * We need to do a RTT measurement. It won't be the best 413217806Slstewart * if we do it here. 414217806Slstewart */ 415217806Slstewart marked_packet_rtt(txsi, e_t, tp, 416217806Slstewart &measurenext, &measurenext_len, 417217806Slstewart &rtt_bytes_adjust, FORCED_MEASUREMENT); 418217806Slstewart } 419217806Slstewart } 420217806Slstewart 421217806Slstewart return (0); 422217806Slstewart} 423217806Slstewart 424217806Slstewart/* 425217806Slstewart * Add information about a transmitted segment to a list. 426217806Slstewart * This is called via the helper hook in tcp_output.c 427217806Slstewart */ 428217806Slstewartstatic int 429217806Slstewartertt_add_tx_segment_info_hook(int hhook_type, int hhook_id, void *udata, 430217806Slstewart void *ctx_data, void *hdata, struct osd *hosd) 431217806Slstewart{ 432217806Slstewart struct ertt *e_t; 433217806Slstewart struct tcpcb *tp; 434217806Slstewart struct tcphdr *th; 435217806Slstewart struct tcpopt *to; 436217806Slstewart struct tcp_hhook_data *thdp; 437217806Slstewart struct txseginfo *txsi; 438217806Slstewart long len; 439217806Slstewart int tso; 440217806Slstewart 441217806Slstewart KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__)); 442217806Slstewart KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__)); 443217806Slstewart 444217806Slstewart e_t = (struct ertt *)hdata; 445217806Slstewart thdp = ctx_data; 446217806Slstewart tp = thdp->tp; 447217806Slstewart th = thdp->th; 448217806Slstewart to = thdp->to; 449217806Slstewart len = thdp->len; 450217806Slstewart tso = thdp->tso; 451217806Slstewart 452217806Slstewart INP_WLOCK_ASSERT(tp->t_inpcb); 453217806Slstewart 454217806Slstewart if (len > 0) { 455217806Slstewart txsi = uma_zalloc(txseginfo_zone, M_NOWAIT); 456217806Slstewart if (txsi != NULL) { 457217806Slstewart /* Construct txsi setting the necessary flags. */ 458217806Slstewart txsi->flags = 0; /* Needs to be initialised. */ 459217806Slstewart txsi->seq = ntohl(th->th_seq); 460217806Slstewart txsi->len = len; 461217806Slstewart if (tso) 462217806Slstewart txsi->flags |= TXSI_TSO; 463217806Slstewart else if (e_t->flags & ERTT_TSO_DISABLED) { 464217806Slstewart tp->t_flags |= TF_TSO; 465217806Slstewart e_t->flags &= ~ERTT_TSO_DISABLED; 466217806Slstewart } 467217806Slstewart 468217806Slstewart if (e_t->flags & ERTT_MEASUREMENT_IN_PROGRESS) { 469217806Slstewart e_t->bytes_tx_in_rtt += len; 470217806Slstewart } else { 471217806Slstewart txsi->flags |= TXSI_RTT_MEASURE_START; 472217806Slstewart e_t->flags |= ERTT_MEASUREMENT_IN_PROGRESS; 473217806Slstewart e_t->bytes_tx_in_rtt = len; 474217806Slstewart } 475217806Slstewart 476217806Slstewart if (((tp->t_flags & TF_NOOPT) == 0) && 477217806Slstewart (to->to_flags & TOF_TS)) { 478217806Slstewart txsi->tx_ts = ntohl(to->to_tsval) - 479217806Slstewart tp->ts_offset; 480217806Slstewart txsi->rx_ts = ntohl(to->to_tsecr); 481217806Slstewart } else { 482239474Slstewart txsi->tx_ts = tcp_ts_getticks(); 483217806Slstewart txsi->rx_ts = 0; /* No received time stamp. */ 484217806Slstewart } 485217806Slstewart TAILQ_INSERT_TAIL(&e_t->txsegi_q, txsi, txsegi_lnk); 486217806Slstewart } 487217806Slstewart } 488217806Slstewart 489217806Slstewart return (0); 490217806Slstewart} 491217806Slstewart 492217806Slstewartstatic int 493217806Slstewartertt_mod_init(void) 494217806Slstewart{ 495217806Slstewart 496217806Slstewart txseginfo_zone = uma_zcreate("ertt_txseginfo", sizeof(struct txseginfo), 497217806Slstewart NULL, NULL, NULL, NULL, 0, 0); 498217806Slstewart 499217806Slstewart return (0); 500217806Slstewart} 501217806Slstewart 502217806Slstewartstatic int 503217806Slstewartertt_mod_destroy(void) 504217806Slstewart{ 505217806Slstewart 506217806Slstewart uma_zdestroy(txseginfo_zone); 507217806Slstewart 508217806Slstewart return (0); 509217806Slstewart} 510217806Slstewart 511217806Slstewartstatic int 512217806Slstewartertt_uma_ctor(void *mem, int size, void *arg, int flags) 513217806Slstewart{ 514217806Slstewart struct ertt *e_t; 515217806Slstewart 516217806Slstewart e_t = mem; 517217806Slstewart 518217806Slstewart TAILQ_INIT(&e_t->txsegi_q); 519217806Slstewart e_t->timestamp_errors = 0; 520217806Slstewart e_t->minrtt = 0; 521217806Slstewart e_t->maxrtt = 0; 522217806Slstewart e_t->rtt = 0; 523217806Slstewart e_t->flags = 0; 524217806Slstewart e_t->dlyack_rx = 0; 525217806Slstewart e_t->bytes_tx_in_rtt = 0; 526217806Slstewart e_t->markedpkt_rtt = 0; 527217806Slstewart 528217806Slstewart return (0); 529217806Slstewart} 530217806Slstewart 531217806Slstewartstatic void 532217806Slstewartertt_uma_dtor(void *mem, int size, void *arg) 533217806Slstewart{ 534217806Slstewart struct ertt *e_t; 535217806Slstewart struct txseginfo *n_txsi, *txsi; 536217806Slstewart 537217806Slstewart e_t = mem; 538217806Slstewart txsi = TAILQ_FIRST(&e_t->txsegi_q); 539217806Slstewart while (txsi != NULL) { 540217806Slstewart n_txsi = TAILQ_NEXT(txsi, txsegi_lnk); 541217806Slstewart uma_zfree(txseginfo_zone, txsi); 542217806Slstewart txsi = n_txsi; 543217806Slstewart } 544217806Slstewart} 545217806Slstewart 546217806SlstewartKHELP_DECLARE_MOD_UMA(ertt, &ertt_helper, ertt_hooks, 1, sizeof(struct ertt), 547217806Slstewart ertt_uma_ctor, ertt_uma_dtor); 548