1209662Slstewart/*- 2215153Slstewart * Copyright (c) 2007-2009 3215153Slstewart * Swinburne University of Technology, Melbourne, Australia. 4209662Slstewart * Copyright (c) 2009-2010, The FreeBSD Foundation 5209662Slstewart * All rights reserved. 6209662Slstewart * 7209662Slstewart * Portions of this software were developed at the Centre for Advanced 8209662Slstewart * Internet Architectures, Swinburne University of Technology, Melbourne, 9209662Slstewart * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation. 10209662Slstewart * 11209662Slstewart * Redistribution and use in source and binary forms, with or without 12209662Slstewart * modification, are permitted provided that the following conditions 13209662Slstewart * are met: 14209662Slstewart * 1. Redistributions of source code must retain the above copyright 15209662Slstewart * notice, this list of conditions and the following disclaimer. 16209662Slstewart * 2. Redistributions in binary form must reproduce the above copyright 17209662Slstewart * notice, this list of conditions and the following disclaimer in the 18209662Slstewart * documentation and/or other materials provided with the distribution. 19209662Slstewart * 20209662Slstewart * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 21209662Slstewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22209662Slstewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23209662Slstewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 24209662Slstewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25209662Slstewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26209662Slstewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27209662Slstewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28209662Slstewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29209662Slstewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30209662Slstewart * SUCH DAMAGE. 31209662Slstewart */ 32209662Slstewart 33209662Slstewart/****************************************************** 34209662Slstewart * Statistical Information For TCP Research (SIFTR) 35209662Slstewart * 36209662Slstewart * A FreeBSD kernel module that adds very basic intrumentation to the 37209662Slstewart * TCP stack, allowing internal stats to be recorded to a log file 38209662Slstewart * for experimental, debugging and performance analysis purposes. 39209662Slstewart * 40209662Slstewart * SIFTR was first released in 2007 by James Healy and Lawrence Stewart whilst 41220560Slstewart * working on the NewTCP research project at Swinburne University of 42220560Slstewart * Technology's Centre for Advanced Internet Architectures, Melbourne, 43220560Slstewart * Australia, which was made possible in part by a grant from the Cisco 44220560Slstewart * University Research Program Fund at Community Foundation Silicon Valley. 45220560Slstewart * More details are available at: 46209662Slstewart * http://caia.swin.edu.au/urp/newtcp/ 47209662Slstewart * 48209662Slstewart * Work on SIFTR v1.2.x was sponsored by the FreeBSD Foundation as part of 49209662Slstewart * the "Enhancing the FreeBSD TCP Implementation" project 2008-2009. 50209662Slstewart * More details are available at: 51209662Slstewart * http://www.freebsdfoundation.org/ 52209662Slstewart * http://caia.swin.edu.au/freebsd/etcp09/ 53209662Slstewart * 54209662Slstewart * Lawrence Stewart is the current maintainer, and all contact regarding 55209662Slstewart * SIFTR should be directed to him via email: lastewart@swin.edu.au 56209662Slstewart * 57209662Slstewart * Initial release date: June 2007 58213162Slstewart * Most recent update: September 2010 59209662Slstewart ******************************************************/ 60209662Slstewart 61209662Slstewart#include <sys/cdefs.h> 62209662Slstewart__FBSDID("$FreeBSD: stable/11/sys/netinet/siftr.c 343273 2019-01-21 19:33:05Z brooks $"); 63209662Slstewart 64209662Slstewart#include <sys/param.h> 65209662Slstewart#include <sys/alq.h> 66209662Slstewart#include <sys/errno.h> 67257241Sglebius#include <sys/eventhandler.h> 68209662Slstewart#include <sys/hash.h> 69209662Slstewart#include <sys/kernel.h> 70209662Slstewart#include <sys/kthread.h> 71209662Slstewart#include <sys/lock.h> 72209662Slstewart#include <sys/mbuf.h> 73209662Slstewart#include <sys/module.h> 74209662Slstewart#include <sys/mutex.h> 75209662Slstewart#include <sys/pcpu.h> 76209662Slstewart#include <sys/proc.h> 77209662Slstewart#include <sys/sbuf.h> 78282240Sgnn#include <sys/sdt.h> 79209662Slstewart#include <sys/smp.h> 80209662Slstewart#include <sys/socket.h> 81209662Slstewart#include <sys/socketvar.h> 82209662Slstewart#include <sys/sysctl.h> 83209662Slstewart#include <sys/unistd.h> 84209662Slstewart 85209662Slstewart#include <net/if.h> 86257241Sglebius#include <net/if_var.h> 87209662Slstewart#include <net/pfil.h> 88209662Slstewart 89209662Slstewart#include <netinet/in.h> 90282240Sgnn#include <netinet/in_kdtrace.h> 91209662Slstewart#include <netinet/in_pcb.h> 92209662Slstewart#include <netinet/in_systm.h> 93209662Slstewart#include <netinet/in_var.h> 94209662Slstewart#include <netinet/ip.h> 95209662Slstewart#include <netinet/tcp_var.h> 96209662Slstewart 97209662Slstewart#ifdef SIFTR_IPV6 98209662Slstewart#include <netinet/ip6.h> 99209662Slstewart#include <netinet6/in6_pcb.h> 100209662Slstewart#endif /* SIFTR_IPV6 */ 101209662Slstewart 102209662Slstewart#include <machine/in_cksum.h> 103209662Slstewart 104209662Slstewart/* 105209662Slstewart * Three digit version number refers to X.Y.Z where: 106209662Slstewart * X is the major version number 107209662Slstewart * Y is bumped to mark backwards incompatible changes 108209662Slstewart * Z is bumped to mark backwards compatible changes 109209662Slstewart */ 110209662Slstewart#define V_MAJOR 1 111209662Slstewart#define V_BACKBREAK 2 112213162Slstewart#define V_BACKCOMPAT 4 113209662Slstewart#define MODVERSION __CONCAT(V_MAJOR, __CONCAT(V_BACKBREAK, V_BACKCOMPAT)) 114209662Slstewart#define MODVERSION_STR __XSTRING(V_MAJOR) "." __XSTRING(V_BACKBREAK) "." \ 115209662Slstewart __XSTRING(V_BACKCOMPAT) 116209662Slstewart 117209662Slstewart#define HOOK 0 118209662Slstewart#define UNHOOK 1 119209662Slstewart#define SIFTR_EXPECTED_MAX_TCP_FLOWS 65536 120209662Slstewart#define SYS_NAME "FreeBSD" 121209662Slstewart#define PACKET_TAG_SIFTR 100 122209662Slstewart#define PACKET_COOKIE_SIFTR 21749576 123209662Slstewart#define SIFTR_LOG_FILE_MODE 0644 124209662Slstewart#define SIFTR_DISABLE 0 125209662Slstewart#define SIFTR_ENABLE 1 126209662Slstewart 127209662Slstewart/* 128209662Slstewart * Hard upper limit on the length of log messages. Bump this up if you add new 129209662Slstewart * data fields such that the line length could exceed the below value. 130209662Slstewart */ 131209662Slstewart#define MAX_LOG_MSG_LEN 200 132209662Slstewart/* XXX: Make this a sysctl tunable. */ 133209662Slstewart#define SIFTR_ALQ_BUFLEN (1000*MAX_LOG_MSG_LEN) 134209662Slstewart 135209662Slstewart/* 136209662Slstewart * 1 byte for IP version 137209662Slstewart * IPv4: src/dst IP (4+4) + src/dst port (2+2) = 12 bytes 138209662Slstewart * IPv6: src/dst IP (16+16) + src/dst port (2+2) = 36 bytes 139209662Slstewart */ 140209662Slstewart#ifdef SIFTR_IPV6 141209662Slstewart#define FLOW_KEY_LEN 37 142209662Slstewart#else 143209662Slstewart#define FLOW_KEY_LEN 13 144209662Slstewart#endif 145209662Slstewart 146209662Slstewart#ifdef SIFTR_IPV6 147209662Slstewart#define SIFTR_IPMODE 6 148209662Slstewart#else 149209662Slstewart#define SIFTR_IPMODE 4 150209662Slstewart#endif 151209662Slstewart 152209662Slstewart/* useful macros */ 153209662Slstewart#define UPPER_SHORT(X) (((X) & 0xFFFF0000) >> 16) 154209662Slstewart#define LOWER_SHORT(X) ((X) & 0x0000FFFF) 155209662Slstewart 156209662Slstewart#define FIRST_OCTET(X) (((X) & 0xFF000000) >> 24) 157209662Slstewart#define SECOND_OCTET(X) (((X) & 0x00FF0000) >> 16) 158209662Slstewart#define THIRD_OCTET(X) (((X) & 0x0000FF00) >> 8) 159209662Slstewart#define FOURTH_OCTET(X) ((X) & 0x000000FF) 160209662Slstewart 161220592Spluknetstatic MALLOC_DEFINE(M_SIFTR, "siftr", "dynamic memory used by SIFTR"); 162220592Spluknetstatic MALLOC_DEFINE(M_SIFTR_PKTNODE, "siftr_pktnode", 163220592Spluknet "SIFTR pkt_node struct"); 164220592Spluknetstatic MALLOC_DEFINE(M_SIFTR_HASHNODE, "siftr_hashnode", 165220592Spluknet "SIFTR flow_hash_node struct"); 166209662Slstewart 167209662Slstewart/* Used as links in the pkt manager queue. */ 168209662Slstewartstruct pkt_node { 169209662Slstewart /* Timestamp of pkt as noted in the pfil hook. */ 170209662Slstewart struct timeval tval; 171209662Slstewart /* Direction pkt is travelling; either PFIL_IN or PFIL_OUT. */ 172209662Slstewart uint8_t direction; 173209662Slstewart /* IP version pkt_node relates to; either INP_IPV4 or INP_IPV6. */ 174209662Slstewart uint8_t ipver; 175209662Slstewart /* Hash of the pkt which triggered the log message. */ 176209662Slstewart uint32_t hash; 177209662Slstewart /* Local/foreign IP address. */ 178209662Slstewart#ifdef SIFTR_IPV6 179209662Slstewart uint32_t ip_laddr[4]; 180209662Slstewart uint32_t ip_faddr[4]; 181209662Slstewart#else 182209662Slstewart uint8_t ip_laddr[4]; 183209662Slstewart uint8_t ip_faddr[4]; 184209662Slstewart#endif 185209662Slstewart /* Local TCP port. */ 186209662Slstewart uint16_t tcp_localport; 187209662Slstewart /* Foreign TCP port. */ 188209662Slstewart uint16_t tcp_foreignport; 189209662Slstewart /* Congestion Window (bytes). */ 190209662Slstewart u_long snd_cwnd; 191209662Slstewart /* Sending Window (bytes). */ 192209662Slstewart u_long snd_wnd; 193209662Slstewart /* Receive Window (bytes). */ 194209662Slstewart u_long rcv_wnd; 195212765Sandre /* Unused (was: Bandwidth Controlled Window (bytes)). */ 196209662Slstewart u_long snd_bwnd; 197209662Slstewart /* Slow Start Threshold (bytes). */ 198209662Slstewart u_long snd_ssthresh; 199209662Slstewart /* Current state of the TCP FSM. */ 200209662Slstewart int conn_state; 201209662Slstewart /* Max Segment Size (bytes). */ 202209662Slstewart u_int max_seg_size; 203209662Slstewart /* 204209662Slstewart * Smoothed RTT stored as found in the TCP control block 205209662Slstewart * in units of (TCP_RTT_SCALE*hz). 206209662Slstewart */ 207209662Slstewart int smoothed_rtt; 208209662Slstewart /* Is SACK enabled? */ 209209662Slstewart u_char sack_enabled; 210209662Slstewart /* Window scaling for snd window. */ 211209662Slstewart u_char snd_scale; 212209662Slstewart /* Window scaling for recv window. */ 213209662Slstewart u_char rcv_scale; 214209662Slstewart /* TCP control block flags. */ 215209662Slstewart u_int flags; 216209662Slstewart /* Retransmit timeout length. */ 217209662Slstewart int rxt_length; 218209662Slstewart /* Size of the TCP send buffer in bytes. */ 219209662Slstewart u_int snd_buf_hiwater; 220209662Slstewart /* Current num bytes in the send socket buffer. */ 221209662Slstewart u_int snd_buf_cc; 222209662Slstewart /* Size of the TCP receive buffer in bytes. */ 223209662Slstewart u_int rcv_buf_hiwater; 224209662Slstewart /* Current num bytes in the receive socket buffer. */ 225209662Slstewart u_int rcv_buf_cc; 226209662Slstewart /* Number of bytes inflight that we are waiting on ACKs for. */ 227209662Slstewart u_int sent_inflight_bytes; 228213162Slstewart /* Number of segments currently in the reassembly queue. */ 229213162Slstewart int t_segqlen; 230280233Shiren /* Flowid for the connection. */ 231280233Shiren u_int flowid; 232280237Shiren /* Flow type for the connection. */ 233280237Shiren u_int flowtype; 234209662Slstewart /* Link to next pkt_node in the list. */ 235209662Slstewart STAILQ_ENTRY(pkt_node) nodes; 236209662Slstewart}; 237209662Slstewart 238209662Slstewartstruct flow_hash_node 239209662Slstewart{ 240209662Slstewart uint16_t counter; 241209662Slstewart uint8_t key[FLOW_KEY_LEN]; 242209662Slstewart LIST_ENTRY(flow_hash_node) nodes; 243209662Slstewart}; 244209662Slstewart 245209662Slstewartstruct siftr_stats 246209662Slstewart{ 247209662Slstewart /* # TCP pkts seen by the SIFTR PFIL hooks, including any skipped. */ 248209662Slstewart uint64_t n_in; 249209662Slstewart uint64_t n_out; 250209662Slstewart /* # pkts skipped due to failed malloc calls. */ 251209662Slstewart uint32_t nskip_in_malloc; 252209662Slstewart uint32_t nskip_out_malloc; 253209662Slstewart /* # pkts skipped due to failed mtx acquisition. */ 254209662Slstewart uint32_t nskip_in_mtx; 255209662Slstewart uint32_t nskip_out_mtx; 256209662Slstewart /* # pkts skipped due to failed inpcb lookups. */ 257209662Slstewart uint32_t nskip_in_inpcb; 258209662Slstewart uint32_t nskip_out_inpcb; 259209662Slstewart /* # pkts skipped due to failed tcpcb lookups. */ 260209662Slstewart uint32_t nskip_in_tcpcb; 261209662Slstewart uint32_t nskip_out_tcpcb; 262209662Slstewart /* # pkts skipped due to stack reinjection. */ 263209662Slstewart uint32_t nskip_in_dejavu; 264209662Slstewart uint32_t nskip_out_dejavu; 265209662Slstewart}; 266209662Slstewart 267215701Sdimstatic DPCPU_DEFINE(struct siftr_stats, ss); 268209662Slstewart 269209662Slstewartstatic volatile unsigned int siftr_exit_pkt_manager_thread = 0; 270209662Slstewartstatic unsigned int siftr_enabled = 0; 271209662Slstewartstatic unsigned int siftr_pkts_per_log = 1; 272209662Slstewartstatic unsigned int siftr_generate_hashes = 0; 273209662Slstewart/* static unsigned int siftr_binary_log = 0; */ 274209662Slstewartstatic char siftr_logfile[PATH_MAX] = "/var/log/siftr.log"; 275273773Shselaskystatic char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log"; 276209662Slstewartstatic u_long siftr_hashmask; 277209662SlstewartSTAILQ_HEAD(pkthead, pkt_node) pkt_queue = STAILQ_HEAD_INITIALIZER(pkt_queue); 278209662SlstewartLIST_HEAD(listhead, flow_hash_node) *counter_hash; 279209662Slstewartstatic int wait_for_pkt; 280209662Slstewartstatic struct alq *siftr_alq = NULL; 281209662Slstewartstatic struct mtx siftr_pkt_queue_mtx; 282209662Slstewartstatic struct mtx siftr_pkt_mgr_mtx; 283209662Slstewartstatic struct thread *siftr_pkt_manager_thr = NULL; 284209662Slstewart/* 285209662Slstewart * pfil.h defines PFIL_IN as 1 and PFIL_OUT as 2, 286209662Slstewart * which we use as an index into this array. 287209662Slstewart */ 288209662Slstewartstatic char direction[3] = {'\0', 'i','o'}; 289209662Slstewart 290209662Slstewart/* Required function prototypes. */ 291209662Slstewartstatic int siftr_sysctl_enabled_handler(SYSCTL_HANDLER_ARGS); 292209662Slstewartstatic int siftr_sysctl_logfile_name_handler(SYSCTL_HANDLER_ARGS); 293209662Slstewart 294209662Slstewart 295209662Slstewart/* Declare the net.inet.siftr sysctl tree and populate it. */ 296209662Slstewart 297209662SlstewartSYSCTL_DECL(_net_inet_siftr); 298209662Slstewart 299209662SlstewartSYSCTL_NODE(_net_inet, OID_AUTO, siftr, CTLFLAG_RW, NULL, 300209662Slstewart "siftr related settings"); 301209662Slstewart 302209662SlstewartSYSCTL_PROC(_net_inet_siftr, OID_AUTO, enabled, CTLTYPE_UINT|CTLFLAG_RW, 303209662Slstewart &siftr_enabled, 0, &siftr_sysctl_enabled_handler, "IU", 304209662Slstewart "switch siftr module operations on/off"); 305209662Slstewart 306209662SlstewartSYSCTL_PROC(_net_inet_siftr, OID_AUTO, logfile, CTLTYPE_STRING|CTLFLAG_RW, 307273773Shselasky &siftr_logfile_shadow, sizeof(siftr_logfile_shadow), &siftr_sysctl_logfile_name_handler, 308209662Slstewart "A", "file to save siftr log messages to"); 309209662Slstewart 310209662SlstewartSYSCTL_UINT(_net_inet_siftr, OID_AUTO, ppl, CTLFLAG_RW, 311209662Slstewart &siftr_pkts_per_log, 1, 312209662Slstewart "number of packets between generating a log message"); 313209662Slstewart 314209662SlstewartSYSCTL_UINT(_net_inet_siftr, OID_AUTO, genhashes, CTLFLAG_RW, 315209662Slstewart &siftr_generate_hashes, 0, 316209662Slstewart "enable packet hash generation"); 317209662Slstewart 318209662Slstewart/* XXX: TODO 319209662SlstewartSYSCTL_UINT(_net_inet_siftr, OID_AUTO, binary, CTLFLAG_RW, 320209662Slstewart &siftr_binary_log, 0, 321209662Slstewart "write log files in binary instead of ascii"); 322209662Slstewart*/ 323209662Slstewart 324209662Slstewart 325209662Slstewart/* Begin functions. */ 326209662Slstewart 327209662Slstewartstatic void 328209662Slstewartsiftr_process_pkt(struct pkt_node * pkt_node) 329209662Slstewart{ 330209662Slstewart struct flow_hash_node *hash_node; 331209662Slstewart struct listhead *counter_list; 332209662Slstewart struct siftr_stats *ss; 333209662Slstewart struct ale *log_buf; 334209662Slstewart uint8_t key[FLOW_KEY_LEN]; 335209662Slstewart uint8_t found_match, key_offset; 336209662Slstewart 337209662Slstewart hash_node = NULL; 338209662Slstewart ss = DPCPU_PTR(ss); 339209662Slstewart found_match = 0; 340209662Slstewart key_offset = 1; 341209662Slstewart 342209662Slstewart /* 343209662Slstewart * Create the key that will be used to create a hash index 344209662Slstewart * into our hash table. Our key consists of: 345209662Slstewart * ipversion, localip, localport, foreignip, foreignport 346209662Slstewart */ 347209662Slstewart key[0] = pkt_node->ipver; 348209662Slstewart memcpy(key + key_offset, &pkt_node->ip_laddr, 349209662Slstewart sizeof(pkt_node->ip_laddr)); 350209662Slstewart key_offset += sizeof(pkt_node->ip_laddr); 351209662Slstewart memcpy(key + key_offset, &pkt_node->tcp_localport, 352209662Slstewart sizeof(pkt_node->tcp_localport)); 353209662Slstewart key_offset += sizeof(pkt_node->tcp_localport); 354209662Slstewart memcpy(key + key_offset, &pkt_node->ip_faddr, 355209662Slstewart sizeof(pkt_node->ip_faddr)); 356209662Slstewart key_offset += sizeof(pkt_node->ip_faddr); 357209662Slstewart memcpy(key + key_offset, &pkt_node->tcp_foreignport, 358209662Slstewart sizeof(pkt_node->tcp_foreignport)); 359209662Slstewart 360209662Slstewart counter_list = counter_hash + 361209662Slstewart (hash32_buf(key, sizeof(key), 0) & siftr_hashmask); 362209662Slstewart 363209662Slstewart /* 364209662Slstewart * If the list is not empty i.e. the hash index has 365209662Slstewart * been used by another flow previously. 366209662Slstewart */ 367209662Slstewart if (LIST_FIRST(counter_list) != NULL) { 368209662Slstewart /* 369209662Slstewart * Loop through the hash nodes in the list. 370209662Slstewart * There should normally only be 1 hash node in the list, 371209662Slstewart * except if there have been collisions at the hash index 372209662Slstewart * computed by hash32_buf(). 373209662Slstewart */ 374209662Slstewart LIST_FOREACH(hash_node, counter_list, nodes) { 375209662Slstewart /* 376209662Slstewart * Check if the key for the pkt we are currently 377209662Slstewart * processing is the same as the key stored in the 378209662Slstewart * hash node we are currently processing. 379209662Slstewart * If they are the same, then we've found the 380209662Slstewart * hash node that stores the counter for the flow 381209662Slstewart * the pkt belongs to. 382209662Slstewart */ 383209662Slstewart if (memcmp(hash_node->key, key, sizeof(key)) == 0) { 384209662Slstewart found_match = 1; 385209662Slstewart break; 386209662Slstewart } 387209662Slstewart } 388209662Slstewart } 389209662Slstewart 390209662Slstewart /* If this flow hash hasn't been seen before or we have a collision. */ 391209662Slstewart if (hash_node == NULL || !found_match) { 392209662Slstewart /* Create a new hash node to store the flow's counter. */ 393209662Slstewart hash_node = malloc(sizeof(struct flow_hash_node), 394209662Slstewart M_SIFTR_HASHNODE, M_WAITOK); 395209662Slstewart 396209662Slstewart if (hash_node != NULL) { 397209662Slstewart /* Initialise our new hash node list entry. */ 398209662Slstewart hash_node->counter = 0; 399209662Slstewart memcpy(hash_node->key, key, sizeof(key)); 400209662Slstewart LIST_INSERT_HEAD(counter_list, hash_node, nodes); 401209662Slstewart } else { 402209662Slstewart /* Malloc failed. */ 403209662Slstewart if (pkt_node->direction == PFIL_IN) 404209662Slstewart ss->nskip_in_malloc++; 405209662Slstewart else 406209662Slstewart ss->nskip_out_malloc++; 407209662Slstewart 408209662Slstewart return; 409209662Slstewart } 410209662Slstewart } else if (siftr_pkts_per_log > 1) { 411209662Slstewart /* 412209662Slstewart * Taking the remainder of the counter divided 413209662Slstewart * by the current value of siftr_pkts_per_log 414209662Slstewart * and storing that in counter provides a neat 415209662Slstewart * way to modulate the frequency of log 416209662Slstewart * messages being written to the log file. 417209662Slstewart */ 418209662Slstewart hash_node->counter = (hash_node->counter + 1) % 419209662Slstewart siftr_pkts_per_log; 420209662Slstewart 421209662Slstewart /* 422209662Slstewart * If we have not seen enough packets since the last time 423209662Slstewart * we wrote a log message for this connection, return. 424209662Slstewart */ 425209662Slstewart if (hash_node->counter > 0) 426209662Slstewart return; 427209662Slstewart } 428209662Slstewart 429209662Slstewart log_buf = alq_getn(siftr_alq, MAX_LOG_MSG_LEN, ALQ_WAITOK); 430209662Slstewart 431209662Slstewart if (log_buf == NULL) 432209662Slstewart return; /* Should only happen if the ALQ is shutting down. */ 433209662Slstewart 434209662Slstewart#ifdef SIFTR_IPV6 435209662Slstewart pkt_node->ip_laddr[3] = ntohl(pkt_node->ip_laddr[3]); 436209662Slstewart pkt_node->ip_faddr[3] = ntohl(pkt_node->ip_faddr[3]); 437209662Slstewart 438209662Slstewart if (pkt_node->ipver == INP_IPV6) { /* IPv6 packet */ 439209662Slstewart pkt_node->ip_laddr[0] = ntohl(pkt_node->ip_laddr[0]); 440209662Slstewart pkt_node->ip_laddr[1] = ntohl(pkt_node->ip_laddr[1]); 441209662Slstewart pkt_node->ip_laddr[2] = ntohl(pkt_node->ip_laddr[2]); 442209662Slstewart pkt_node->ip_faddr[0] = ntohl(pkt_node->ip_faddr[0]); 443209662Slstewart pkt_node->ip_faddr[1] = ntohl(pkt_node->ip_faddr[1]); 444209662Slstewart pkt_node->ip_faddr[2] = ntohl(pkt_node->ip_faddr[2]); 445209662Slstewart 446209662Slstewart /* Construct an IPv6 log message. */ 447209662Slstewart log_buf->ae_bytesused = snprintf(log_buf->ae_data, 448209662Slstewart MAX_LOG_MSG_LEN, 449209662Slstewart "%c,0x%08x,%zd.%06ld,%x:%x:%x:%x:%x:%x:%x:%x,%u,%x:%x:%x:" 450209662Slstewart "%x:%x:%x:%x:%x,%u,%ld,%ld,%ld,%ld,%ld,%u,%u,%u,%u,%u,%u," 451280441Slstewart "%u,%d,%u,%u,%u,%u,%u,%u,%u,%u\n", 452209662Slstewart direction[pkt_node->direction], 453209662Slstewart pkt_node->hash, 454209662Slstewart pkt_node->tval.tv_sec, 455209662Slstewart pkt_node->tval.tv_usec, 456209662Slstewart UPPER_SHORT(pkt_node->ip_laddr[0]), 457209662Slstewart LOWER_SHORT(pkt_node->ip_laddr[0]), 458209662Slstewart UPPER_SHORT(pkt_node->ip_laddr[1]), 459209662Slstewart LOWER_SHORT(pkt_node->ip_laddr[1]), 460209662Slstewart UPPER_SHORT(pkt_node->ip_laddr[2]), 461209662Slstewart LOWER_SHORT(pkt_node->ip_laddr[2]), 462209662Slstewart UPPER_SHORT(pkt_node->ip_laddr[3]), 463209662Slstewart LOWER_SHORT(pkt_node->ip_laddr[3]), 464209662Slstewart ntohs(pkt_node->tcp_localport), 465209662Slstewart UPPER_SHORT(pkt_node->ip_faddr[0]), 466209662Slstewart LOWER_SHORT(pkt_node->ip_faddr[0]), 467209662Slstewart UPPER_SHORT(pkt_node->ip_faddr[1]), 468209662Slstewart LOWER_SHORT(pkt_node->ip_faddr[1]), 469209662Slstewart UPPER_SHORT(pkt_node->ip_faddr[2]), 470209662Slstewart LOWER_SHORT(pkt_node->ip_faddr[2]), 471209662Slstewart UPPER_SHORT(pkt_node->ip_faddr[3]), 472209662Slstewart LOWER_SHORT(pkt_node->ip_faddr[3]), 473209662Slstewart ntohs(pkt_node->tcp_foreignport), 474209662Slstewart pkt_node->snd_ssthresh, 475209662Slstewart pkt_node->snd_cwnd, 476209662Slstewart pkt_node->snd_bwnd, 477209662Slstewart pkt_node->snd_wnd, 478209662Slstewart pkt_node->rcv_wnd, 479209662Slstewart pkt_node->snd_scale, 480209662Slstewart pkt_node->rcv_scale, 481209662Slstewart pkt_node->conn_state, 482209662Slstewart pkt_node->max_seg_size, 483209662Slstewart pkt_node->smoothed_rtt, 484209662Slstewart pkt_node->sack_enabled, 485209662Slstewart pkt_node->flags, 486209662Slstewart pkt_node->rxt_length, 487209662Slstewart pkt_node->snd_buf_hiwater, 488209662Slstewart pkt_node->snd_buf_cc, 489209662Slstewart pkt_node->rcv_buf_hiwater, 490209662Slstewart pkt_node->rcv_buf_cc, 491213162Slstewart pkt_node->sent_inflight_bytes, 492280233Shiren pkt_node->t_segqlen, 493280237Shiren pkt_node->flowid, 494280237Shiren pkt_node->flowtype); 495209662Slstewart } else { /* IPv4 packet */ 496209662Slstewart pkt_node->ip_laddr[0] = FIRST_OCTET(pkt_node->ip_laddr[3]); 497209662Slstewart pkt_node->ip_laddr[1] = SECOND_OCTET(pkt_node->ip_laddr[3]); 498209662Slstewart pkt_node->ip_laddr[2] = THIRD_OCTET(pkt_node->ip_laddr[3]); 499209662Slstewart pkt_node->ip_laddr[3] = FOURTH_OCTET(pkt_node->ip_laddr[3]); 500209662Slstewart pkt_node->ip_faddr[0] = FIRST_OCTET(pkt_node->ip_faddr[3]); 501209662Slstewart pkt_node->ip_faddr[1] = SECOND_OCTET(pkt_node->ip_faddr[3]); 502209662Slstewart pkt_node->ip_faddr[2] = THIRD_OCTET(pkt_node->ip_faddr[3]); 503209662Slstewart pkt_node->ip_faddr[3] = FOURTH_OCTET(pkt_node->ip_faddr[3]); 504209662Slstewart#endif /* SIFTR_IPV6 */ 505209662Slstewart 506209662Slstewart /* Construct an IPv4 log message. */ 507209662Slstewart log_buf->ae_bytesused = snprintf(log_buf->ae_data, 508209662Slstewart MAX_LOG_MSG_LEN, 509209662Slstewart "%c,0x%08x,%jd.%06ld,%u.%u.%u.%u,%u,%u.%u.%u.%u,%u,%ld,%ld," 510280237Shiren "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u,%u,%u,%u\n", 511209662Slstewart direction[pkt_node->direction], 512209662Slstewart pkt_node->hash, 513209662Slstewart (intmax_t)pkt_node->tval.tv_sec, 514209662Slstewart pkt_node->tval.tv_usec, 515209662Slstewart pkt_node->ip_laddr[0], 516209662Slstewart pkt_node->ip_laddr[1], 517209662Slstewart pkt_node->ip_laddr[2], 518209662Slstewart pkt_node->ip_laddr[3], 519209662Slstewart ntohs(pkt_node->tcp_localport), 520209662Slstewart pkt_node->ip_faddr[0], 521209662Slstewart pkt_node->ip_faddr[1], 522209662Slstewart pkt_node->ip_faddr[2], 523209662Slstewart pkt_node->ip_faddr[3], 524209662Slstewart ntohs(pkt_node->tcp_foreignport), 525209662Slstewart pkt_node->snd_ssthresh, 526209662Slstewart pkt_node->snd_cwnd, 527209662Slstewart pkt_node->snd_bwnd, 528209662Slstewart pkt_node->snd_wnd, 529209662Slstewart pkt_node->rcv_wnd, 530209662Slstewart pkt_node->snd_scale, 531209662Slstewart pkt_node->rcv_scale, 532209662Slstewart pkt_node->conn_state, 533209662Slstewart pkt_node->max_seg_size, 534209662Slstewart pkt_node->smoothed_rtt, 535209662Slstewart pkt_node->sack_enabled, 536209662Slstewart pkt_node->flags, 537209662Slstewart pkt_node->rxt_length, 538209662Slstewart pkt_node->snd_buf_hiwater, 539209662Slstewart pkt_node->snd_buf_cc, 540209662Slstewart pkt_node->rcv_buf_hiwater, 541209662Slstewart pkt_node->rcv_buf_cc, 542213162Slstewart pkt_node->sent_inflight_bytes, 543280233Shiren pkt_node->t_segqlen, 544280237Shiren pkt_node->flowid, 545280237Shiren pkt_node->flowtype); 546209662Slstewart#ifdef SIFTR_IPV6 547209662Slstewart } 548209662Slstewart#endif 549209662Slstewart 550209662Slstewart alq_post_flags(siftr_alq, log_buf, 0); 551209662Slstewart} 552209662Slstewart 553209662Slstewart 554209662Slstewartstatic void 555209662Slstewartsiftr_pkt_manager_thread(void *arg) 556209662Slstewart{ 557209662Slstewart STAILQ_HEAD(pkthead, pkt_node) tmp_pkt_queue = 558209662Slstewart STAILQ_HEAD_INITIALIZER(tmp_pkt_queue); 559209662Slstewart struct pkt_node *pkt_node, *pkt_node_temp; 560209662Slstewart uint8_t draining; 561209662Slstewart 562209662Slstewart draining = 2; 563209662Slstewart 564209662Slstewart mtx_lock(&siftr_pkt_mgr_mtx); 565209662Slstewart 566209662Slstewart /* draining == 0 when queue has been flushed and it's safe to exit. */ 567209662Slstewart while (draining) { 568209662Slstewart /* 569209662Slstewart * Sleep until we are signalled to wake because thread has 570209662Slstewart * been told to exit or until 1 tick has passed. 571209662Slstewart */ 572209662Slstewart mtx_sleep(&wait_for_pkt, &siftr_pkt_mgr_mtx, PWAIT, "pktwait", 573209662Slstewart 1); 574209662Slstewart 575209662Slstewart /* Gain exclusive access to the pkt_node queue. */ 576209662Slstewart mtx_lock(&siftr_pkt_queue_mtx); 577209662Slstewart 578209662Slstewart /* 579209662Slstewart * Move pkt_queue to tmp_pkt_queue, which leaves 580209662Slstewart * pkt_queue empty and ready to receive more pkt_nodes. 581209662Slstewart */ 582209662Slstewart STAILQ_CONCAT(&tmp_pkt_queue, &pkt_queue); 583209662Slstewart 584209662Slstewart /* 585209662Slstewart * We've finished making changes to the list. Unlock it 586209662Slstewart * so the pfil hooks can continue queuing pkt_nodes. 587209662Slstewart */ 588209662Slstewart mtx_unlock(&siftr_pkt_queue_mtx); 589209662Slstewart 590209662Slstewart /* 591209662Slstewart * We can't hold a mutex whilst calling siftr_process_pkt 592209662Slstewart * because ALQ might sleep waiting for buffer space. 593209662Slstewart */ 594209662Slstewart mtx_unlock(&siftr_pkt_mgr_mtx); 595209662Slstewart 596209662Slstewart /* Flush all pkt_nodes to the log file. */ 597209662Slstewart STAILQ_FOREACH_SAFE(pkt_node, &tmp_pkt_queue, nodes, 598209662Slstewart pkt_node_temp) { 599209662Slstewart siftr_process_pkt(pkt_node); 600209662Slstewart STAILQ_REMOVE_HEAD(&tmp_pkt_queue, nodes); 601209662Slstewart free(pkt_node, M_SIFTR_PKTNODE); 602209662Slstewart } 603209662Slstewart 604209662Slstewart KASSERT(STAILQ_EMPTY(&tmp_pkt_queue), 605209662Slstewart ("SIFTR tmp_pkt_queue not empty after flush")); 606209662Slstewart 607209662Slstewart mtx_lock(&siftr_pkt_mgr_mtx); 608209662Slstewart 609209662Slstewart /* 610209662Slstewart * If siftr_exit_pkt_manager_thread gets set during the window 611209662Slstewart * where we are draining the tmp_pkt_queue above, there might 612209662Slstewart * still be pkts in pkt_queue that need to be drained. 613209662Slstewart * Allow one further iteration to occur after 614209662Slstewart * siftr_exit_pkt_manager_thread has been set to ensure 615209662Slstewart * pkt_queue is completely empty before we kill the thread. 616209662Slstewart * 617209662Slstewart * siftr_exit_pkt_manager_thread is set only after the pfil 618209662Slstewart * hooks have been removed, so only 1 extra iteration 619209662Slstewart * is needed to drain the queue. 620209662Slstewart */ 621209662Slstewart if (siftr_exit_pkt_manager_thread) 622209662Slstewart draining--; 623209662Slstewart } 624209662Slstewart 625209662Slstewart mtx_unlock(&siftr_pkt_mgr_mtx); 626209662Slstewart 627209662Slstewart /* Calls wakeup on this thread's struct thread ptr. */ 628209662Slstewart kthread_exit(); 629209662Slstewart} 630209662Slstewart 631209662Slstewart 632209662Slstewartstatic uint32_t 633209662Slstewarthash_pkt(struct mbuf *m, uint32_t offset) 634209662Slstewart{ 635209662Slstewart uint32_t hash; 636209662Slstewart 637209662Slstewart hash = 0; 638209662Slstewart 639209662Slstewart while (m != NULL && offset > m->m_len) { 640209662Slstewart /* 641209662Slstewart * The IP packet payload does not start in this mbuf, so 642209662Slstewart * need to figure out which mbuf it starts in and what offset 643209662Slstewart * into the mbuf's data region the payload starts at. 644209662Slstewart */ 645209662Slstewart offset -= m->m_len; 646209662Slstewart m = m->m_next; 647209662Slstewart } 648209662Slstewart 649209662Slstewart while (m != NULL) { 650209662Slstewart /* Ensure there is data in the mbuf */ 651209662Slstewart if ((m->m_len - offset) > 0) 652209662Slstewart hash = hash32_buf(m->m_data + offset, 653209662Slstewart m->m_len - offset, hash); 654209662Slstewart 655209662Slstewart m = m->m_next; 656209662Slstewart offset = 0; 657209662Slstewart } 658209662Slstewart 659209662Slstewart return (hash); 660209662Slstewart} 661209662Slstewart 662209662Slstewart 663209662Slstewart/* 664209662Slstewart * Check if a given mbuf has the SIFTR mbuf tag. If it does, log the fact that 665209662Slstewart * it's a reinjected packet and return. If it doesn't, tag the mbuf and return. 666209662Slstewart * Return value >0 means the caller should skip processing this mbuf. 667209662Slstewart */ 668209662Slstewartstatic inline int 669209662Slstewartsiftr_chkreinject(struct mbuf *m, int dir, struct siftr_stats *ss) 670209662Slstewart{ 671209662Slstewart if (m_tag_locate(m, PACKET_COOKIE_SIFTR, PACKET_TAG_SIFTR, NULL) 672209662Slstewart != NULL) { 673209662Slstewart if (dir == PFIL_IN) 674209662Slstewart ss->nskip_in_dejavu++; 675209662Slstewart else 676209662Slstewart ss->nskip_out_dejavu++; 677209662Slstewart 678209662Slstewart return (1); 679209662Slstewart } else { 680209662Slstewart struct m_tag *tag = m_tag_alloc(PACKET_COOKIE_SIFTR, 681209662Slstewart PACKET_TAG_SIFTR, 0, M_NOWAIT); 682209662Slstewart if (tag == NULL) { 683209662Slstewart if (dir == PFIL_IN) 684209662Slstewart ss->nskip_in_malloc++; 685209662Slstewart else 686209662Slstewart ss->nskip_out_malloc++; 687209662Slstewart 688209662Slstewart return (1); 689209662Slstewart } 690209662Slstewart 691209662Slstewart m_tag_prepend(m, tag); 692209662Slstewart } 693209662Slstewart 694209662Slstewart return (0); 695209662Slstewart} 696209662Slstewart 697209662Slstewart 698209662Slstewart/* 699209662Slstewart * Look up an inpcb for a packet. Return the inpcb pointer if found, or NULL 700209662Slstewart * otherwise. 701209662Slstewart */ 702209662Slstewartstatic inline struct inpcb * 703209662Slstewartsiftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport, 704209662Slstewart uint16_t dport, int dir, struct siftr_stats *ss) 705209662Slstewart{ 706209662Slstewart struct inpcb *inp; 707209662Slstewart 708209662Slstewart /* We need the tcbinfo lock. */ 709209662Slstewart INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); 710209662Slstewart 711209662Slstewart if (dir == PFIL_IN) 712209662Slstewart inp = (ipver == INP_IPV4 ? 713222488Srwatson in_pcblookup(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst, 714222488Srwatson dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif) 715209662Slstewart : 716209662Slstewart#ifdef SIFTR_IPV6 717222488Srwatson in6_pcblookup(&V_tcbinfo, 718209662Slstewart &((struct ip6_hdr *)ip)->ip6_src, sport, 719222488Srwatson &((struct ip6_hdr *)ip)->ip6_dst, dport, INPLOOKUP_RLOCKPCB, 720209662Slstewart m->m_pkthdr.rcvif) 721209662Slstewart#else 722209662Slstewart NULL 723209662Slstewart#endif 724209662Slstewart ); 725209662Slstewart 726209662Slstewart else 727209662Slstewart inp = (ipver == INP_IPV4 ? 728222488Srwatson in_pcblookup(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src, 729222488Srwatson sport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif) 730209662Slstewart : 731209662Slstewart#ifdef SIFTR_IPV6 732222488Srwatson in6_pcblookup(&V_tcbinfo, 733209662Slstewart &((struct ip6_hdr *)ip)->ip6_dst, dport, 734222488Srwatson &((struct ip6_hdr *)ip)->ip6_src, sport, INPLOOKUP_RLOCKPCB, 735209662Slstewart m->m_pkthdr.rcvif) 736209662Slstewart#else 737209662Slstewart NULL 738209662Slstewart#endif 739209662Slstewart ); 740209662Slstewart 741209662Slstewart /* If we can't find the inpcb, bail. */ 742209662Slstewart if (inp == NULL) { 743209662Slstewart if (dir == PFIL_IN) 744209662Slstewart ss->nskip_in_inpcb++; 745209662Slstewart else 746209662Slstewart ss->nskip_out_inpcb++; 747209662Slstewart } 748209662Slstewart 749209662Slstewart return (inp); 750209662Slstewart} 751209662Slstewart 752209662Slstewart 753210203Slstewartstatic inline void 754210203Slstewartsiftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp, 755210203Slstewart int ipver, int dir, int inp_locally_locked) 756210203Slstewart{ 757210203Slstewart#ifdef SIFTR_IPV6 758210203Slstewart if (ipver == INP_IPV4) { 759210203Slstewart pn->ip_laddr[3] = inp->inp_laddr.s_addr; 760210203Slstewart pn->ip_faddr[3] = inp->inp_faddr.s_addr; 761210203Slstewart#else 762210203Slstewart *((uint32_t *)pn->ip_laddr) = inp->inp_laddr.s_addr; 763210203Slstewart *((uint32_t *)pn->ip_faddr) = inp->inp_faddr.s_addr; 764210203Slstewart#endif 765210203Slstewart#ifdef SIFTR_IPV6 766210203Slstewart } else { 767210203Slstewart pn->ip_laddr[0] = inp->in6p_laddr.s6_addr32[0]; 768210203Slstewart pn->ip_laddr[1] = inp->in6p_laddr.s6_addr32[1]; 769210203Slstewart pn->ip_laddr[2] = inp->in6p_laddr.s6_addr32[2]; 770210203Slstewart pn->ip_laddr[3] = inp->in6p_laddr.s6_addr32[3]; 771210203Slstewart pn->ip_faddr[0] = inp->in6p_faddr.s6_addr32[0]; 772210203Slstewart pn->ip_faddr[1] = inp->in6p_faddr.s6_addr32[1]; 773210203Slstewart pn->ip_faddr[2] = inp->in6p_faddr.s6_addr32[2]; 774210203Slstewart pn->ip_faddr[3] = inp->in6p_faddr.s6_addr32[3]; 775210203Slstewart } 776210203Slstewart#endif 777210203Slstewart pn->tcp_localport = inp->inp_lport; 778210203Slstewart pn->tcp_foreignport = inp->inp_fport; 779210203Slstewart pn->snd_cwnd = tp->snd_cwnd; 780210203Slstewart pn->snd_wnd = tp->snd_wnd; 781210203Slstewart pn->rcv_wnd = tp->rcv_wnd; 782212765Sandre pn->snd_bwnd = 0; /* Unused, kept for compat. */ 783210203Slstewart pn->snd_ssthresh = tp->snd_ssthresh; 784210203Slstewart pn->snd_scale = tp->snd_scale; 785210203Slstewart pn->rcv_scale = tp->rcv_scale; 786210203Slstewart pn->conn_state = tp->t_state; 787210203Slstewart pn->max_seg_size = tp->t_maxseg; 788210203Slstewart pn->smoothed_rtt = tp->t_srtt; 789210203Slstewart pn->sack_enabled = (tp->t_flags & TF_SACK_PERMIT) != 0; 790210203Slstewart pn->flags = tp->t_flags; 791210203Slstewart pn->rxt_length = tp->t_rxtcur; 792210203Slstewart pn->snd_buf_hiwater = inp->inp_socket->so_snd.sb_hiwat; 793274421Sglebius pn->snd_buf_cc = sbused(&inp->inp_socket->so_snd); 794210203Slstewart pn->rcv_buf_hiwater = inp->inp_socket->so_rcv.sb_hiwat; 795274421Sglebius pn->rcv_buf_cc = sbused(&inp->inp_socket->so_rcv); 796210203Slstewart pn->sent_inflight_bytes = tp->snd_max - tp->snd_una; 797213162Slstewart pn->t_segqlen = tp->t_segqlen; 798280233Shiren pn->flowid = inp->inp_flowid; 799280237Shiren pn->flowtype = inp->inp_flowtype; 800210203Slstewart 801210203Slstewart /* We've finished accessing the tcb so release the lock. */ 802210203Slstewart if (inp_locally_locked) 803210203Slstewart INP_RUNLOCK(inp); 804210203Slstewart 805210203Slstewart pn->ipver = ipver; 806210203Slstewart pn->direction = dir; 807210203Slstewart 808210203Slstewart /* 809210203Slstewart * Significantly more accurate than using getmicrotime(), but slower! 810210203Slstewart * Gives true microsecond resolution at the expense of a hit to 811210203Slstewart * maximum pps throughput processing when SIFTR is loaded and enabled. 812210203Slstewart */ 813210203Slstewart microtime(&pn->tval); 814282276Sgnn TCP_PROBE1(siftr, &pn); 815282276Sgnn 816210203Slstewart} 817210203Slstewart 818210203Slstewart 819209662Slstewart/* 820209662Slstewart * pfil hook that is called for each IPv4 packet making its way through the 821209662Slstewart * stack in either direction. 822209662Slstewart * The pfil subsystem holds a non-sleepable mutex somewhere when 823209662Slstewart * calling our hook function, so we can't sleep at all. 824209662Slstewart * It's very important to use the M_NOWAIT flag with all function calls 825209662Slstewart * that support it so that they won't sleep, otherwise you get a panic. 826209662Slstewart */ 827209662Slstewartstatic int 828209662Slstewartsiftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, 829209662Slstewart struct inpcb *inp) 830209662Slstewart{ 831210203Slstewart struct pkt_node *pn; 832209662Slstewart struct ip *ip; 833209662Slstewart struct tcphdr *th; 834209662Slstewart struct tcpcb *tp; 835209662Slstewart struct siftr_stats *ss; 836209662Slstewart unsigned int ip_hl; 837210203Slstewart int inp_locally_locked; 838209662Slstewart 839209662Slstewart inp_locally_locked = 0; 840209662Slstewart ss = DPCPU_PTR(ss); 841209662Slstewart 842209662Slstewart /* 843209662Slstewart * m_pullup is not required here because ip_{input|output} 844209662Slstewart * already do the heavy lifting for us. 845209662Slstewart */ 846209662Slstewart 847209662Slstewart ip = mtod(*m, struct ip *); 848209662Slstewart 849209662Slstewart /* Only continue processing if the packet is TCP. */ 850209662Slstewart if (ip->ip_p != IPPROTO_TCP) 851209662Slstewart goto ret; 852209662Slstewart 853209662Slstewart /* 854209662Slstewart * If a kernel subsystem reinjects packets into the stack, our pfil 855209662Slstewart * hook will be called multiple times for the same packet. 856209662Slstewart * Make sure we only process unique packets. 857209662Slstewart */ 858209662Slstewart if (siftr_chkreinject(*m, dir, ss)) 859209662Slstewart goto ret; 860209662Slstewart 861209662Slstewart if (dir == PFIL_IN) 862209662Slstewart ss->n_in++; 863209662Slstewart else 864209662Slstewart ss->n_out++; 865209662Slstewart 866209662Slstewart /* 867209662Slstewart * Create a tcphdr struct starting at the correct offset 868209662Slstewart * in the IP packet. ip->ip_hl gives the ip header length 869209662Slstewart * in 4-byte words, so multiply it to get the size in bytes. 870209662Slstewart */ 871209662Slstewart ip_hl = (ip->ip_hl << 2); 872209662Slstewart th = (struct tcphdr *)((caddr_t)ip + ip_hl); 873209662Slstewart 874209662Slstewart /* 875209662Slstewart * If the pfil hooks don't provide a pointer to the 876209662Slstewart * inpcb, we need to find it ourselves and lock it. 877209662Slstewart */ 878209662Slstewart if (!inp) { 879209662Slstewart /* Find the corresponding inpcb for this pkt. */ 880209662Slstewart inp = siftr_findinpcb(INP_IPV4, ip, *m, th->th_sport, 881209662Slstewart th->th_dport, dir, ss); 882209662Slstewart 883209662Slstewart if (inp == NULL) 884209662Slstewart goto ret; 885209662Slstewart else 886209662Slstewart inp_locally_locked = 1; 887209662Slstewart } 888209662Slstewart 889209662Slstewart INP_LOCK_ASSERT(inp); 890209662Slstewart 891209662Slstewart /* Find the TCP control block that corresponds with this packet */ 892209662Slstewart tp = intotcpcb(inp); 893209662Slstewart 894209662Slstewart /* 895209662Slstewart * If we can't find the TCP control block (happens occasionaly for a 896209662Slstewart * packet sent during the shutdown phase of a TCP connection), 897209662Slstewart * or we're in the timewait state, bail 898209662Slstewart */ 899209662Slstewart if (tp == NULL || inp->inp_flags & INP_TIMEWAIT) { 900209662Slstewart if (dir == PFIL_IN) 901209662Slstewart ss->nskip_in_tcpcb++; 902209662Slstewart else 903209662Slstewart ss->nskip_out_tcpcb++; 904209662Slstewart 905209662Slstewart goto inp_unlock; 906209662Slstewart } 907209662Slstewart 908210203Slstewart pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO); 909209662Slstewart 910210203Slstewart if (pn == NULL) { 911210203Slstewart if (dir == PFIL_IN) 912210203Slstewart ss->nskip_in_malloc++; 913210203Slstewart else 914210203Slstewart ss->nskip_out_malloc++; 915209662Slstewart 916210203Slstewart goto inp_unlock; 917210203Slstewart } 918209662Slstewart 919210203Slstewart siftr_siftdata(pn, inp, tp, INP_IPV4, dir, inp_locally_locked); 920209662Slstewart 921209662Slstewart if (siftr_generate_hashes) { 922209662Slstewart if ((*m)->m_pkthdr.csum_flags & CSUM_TCP) { 923209662Slstewart /* 924209662Slstewart * For outbound packets, the TCP checksum isn't 925209662Slstewart * calculated yet. This is a problem for our packet 926209662Slstewart * hashing as the receiver will calc a different hash 927209662Slstewart * to ours if we don't include the correct TCP checksum 928209662Slstewart * in the bytes being hashed. To work around this 929209662Slstewart * problem, we manually calc the TCP checksum here in 930209662Slstewart * software. We unset the CSUM_TCP flag so the lower 931209662Slstewart * layers don't recalc it. 932209662Slstewart */ 933209662Slstewart (*m)->m_pkthdr.csum_flags &= ~CSUM_TCP; 934209662Slstewart 935209662Slstewart /* 936209662Slstewart * Calculate the TCP checksum in software and assign 937209662Slstewart * to correct TCP header field, which will follow the 938209662Slstewart * packet mbuf down the stack. The trick here is that 939209662Slstewart * tcp_output() sets th->th_sum to the checksum of the 940209662Slstewart * pseudo header for us already. Because of the nature 941209662Slstewart * of the checksumming algorithm, we can sum over the 942209662Slstewart * entire IP payload (i.e. TCP header and data), which 943209662Slstewart * will include the already calculated pseduo header 944209662Slstewart * checksum, thus giving us the complete TCP checksum. 945209662Slstewart * 946209662Slstewart * To put it in simple terms, if checksum(1,2,3,4)=10, 947209662Slstewart * then checksum(1,2,3,4,5) == checksum(10,5). 948209662Slstewart * This property is what allows us to "cheat" and 949209662Slstewart * checksum only the IP payload which has the TCP 950209662Slstewart * th_sum field populated with the pseudo header's 951209662Slstewart * checksum, and not need to futz around checksumming 952209662Slstewart * pseudo header bytes and TCP header/data in one hit. 953209662Slstewart * Refer to RFC 1071 for more info. 954209662Slstewart * 955209662Slstewart * NB: in_cksum_skip(struct mbuf *m, int len, int skip) 956209662Slstewart * in_cksum_skip 2nd argument is NOT the number of 957209662Slstewart * bytes to read from the mbuf at "skip" bytes offset 958209662Slstewart * from the start of the mbuf (very counter intuitive!). 959209662Slstewart * The number of bytes to read is calculated internally 960209662Slstewart * by the function as len-skip i.e. to sum over the IP 961209662Slstewart * payload (TCP header + data) bytes, it is INCORRECT 962209662Slstewart * to call the function like this: 963209662Slstewart * in_cksum_skip(at, ip->ip_len - offset, offset) 964209662Slstewart * Rather, it should be called like this: 965209662Slstewart * in_cksum_skip(at, ip->ip_len, offset) 966209662Slstewart * which means read "ip->ip_len - offset" bytes from 967209662Slstewart * the mbuf cluster "at" at offset "offset" bytes from 968209662Slstewart * the beginning of the "at" mbuf's data pointer. 969209662Slstewart */ 970241913Sglebius th->th_sum = in_cksum_skip(*m, ntohs(ip->ip_len), 971241913Sglebius ip_hl); 972209662Slstewart } 973209662Slstewart 974209662Slstewart /* 975209662Slstewart * XXX: Having to calculate the checksum in software and then 976209662Slstewart * hash over all bytes is really inefficient. Would be nice to 977209662Slstewart * find a way to create the hash and checksum in the same pass 978209662Slstewart * over the bytes. 979209662Slstewart */ 980210203Slstewart pn->hash = hash_pkt(*m, ip_hl); 981209662Slstewart } 982209662Slstewart 983209662Slstewart mtx_lock(&siftr_pkt_queue_mtx); 984210203Slstewart STAILQ_INSERT_TAIL(&pkt_queue, pn, nodes); 985209662Slstewart mtx_unlock(&siftr_pkt_queue_mtx); 986209662Slstewart goto ret; 987209662Slstewart 988209662Slstewartinp_unlock: 989209662Slstewart if (inp_locally_locked) 990209662Slstewart INP_RUNLOCK(inp); 991209662Slstewart 992209662Slstewartret: 993209662Slstewart /* Returning 0 ensures pfil will not discard the pkt */ 994209662Slstewart return (0); 995209662Slstewart} 996209662Slstewart 997209662Slstewart 998209662Slstewart#ifdef SIFTR_IPV6 999209662Slstewartstatic int 1000209662Slstewartsiftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, 1001209662Slstewart struct inpcb *inp) 1002209662Slstewart{ 1003210203Slstewart struct pkt_node *pn; 1004209662Slstewart struct ip6_hdr *ip6; 1005209662Slstewart struct tcphdr *th; 1006209662Slstewart struct tcpcb *tp; 1007209662Slstewart struct siftr_stats *ss; 1008209662Slstewart unsigned int ip6_hl; 1009210203Slstewart int inp_locally_locked; 1010209662Slstewart 1011209662Slstewart inp_locally_locked = 0; 1012209662Slstewart ss = DPCPU_PTR(ss); 1013209662Slstewart 1014209662Slstewart /* 1015209662Slstewart * m_pullup is not required here because ip6_{input|output} 1016209662Slstewart * already do the heavy lifting for us. 1017209662Slstewart */ 1018209662Slstewart 1019209662Slstewart ip6 = mtod(*m, struct ip6_hdr *); 1020209662Slstewart 1021209662Slstewart /* 1022209662Slstewart * Only continue processing if the packet is TCP 1023209662Slstewart * XXX: We should follow the next header fields 1024209662Slstewart * as shown on Pg 6 RFC 2460, but right now we'll 1025209662Slstewart * only check pkts that have no extension headers. 1026209662Slstewart */ 1027209662Slstewart if (ip6->ip6_nxt != IPPROTO_TCP) 1028209662Slstewart goto ret6; 1029209662Slstewart 1030209662Slstewart /* 1031209662Slstewart * If a kernel subsystem reinjects packets into the stack, our pfil 1032209662Slstewart * hook will be called multiple times for the same packet. 1033209662Slstewart * Make sure we only process unique packets. 1034209662Slstewart */ 1035209662Slstewart if (siftr_chkreinject(*m, dir, ss)) 1036209662Slstewart goto ret6; 1037209662Slstewart 1038209662Slstewart if (dir == PFIL_IN) 1039209662Slstewart ss->n_in++; 1040209662Slstewart else 1041209662Slstewart ss->n_out++; 1042209662Slstewart 1043209662Slstewart ip6_hl = sizeof(struct ip6_hdr); 1044209662Slstewart 1045209662Slstewart /* 1046209662Slstewart * Create a tcphdr struct starting at the correct offset 1047209662Slstewart * in the ipv6 packet. ip->ip_hl gives the ip header length 1048209662Slstewart * in 4-byte words, so multiply it to get the size in bytes. 1049209662Slstewart */ 1050209662Slstewart th = (struct tcphdr *)((caddr_t)ip6 + ip6_hl); 1051209662Slstewart 1052209662Slstewart /* 1053209662Slstewart * For inbound packets, the pfil hooks don't provide a pointer to the 1054209662Slstewart * inpcb, so we need to find it ourselves and lock it. 1055209662Slstewart */ 1056209662Slstewart if (!inp) { 1057209662Slstewart /* Find the corresponding inpcb for this pkt. */ 1058209662Slstewart inp = siftr_findinpcb(INP_IPV6, (struct ip *)ip6, *m, 1059209662Slstewart th->th_sport, th->th_dport, dir, ss); 1060209662Slstewart 1061209662Slstewart if (inp == NULL) 1062209662Slstewart goto ret6; 1063209662Slstewart else 1064209662Slstewart inp_locally_locked = 1; 1065209662Slstewart } 1066209662Slstewart 1067209662Slstewart /* Find the TCP control block that corresponds with this packet. */ 1068209662Slstewart tp = intotcpcb(inp); 1069209662Slstewart 1070209662Slstewart /* 1071209662Slstewart * If we can't find the TCP control block (happens occasionaly for a 1072209662Slstewart * packet sent during the shutdown phase of a TCP connection), 1073209662Slstewart * or we're in the timewait state, bail. 1074209662Slstewart */ 1075209662Slstewart if (tp == NULL || inp->inp_flags & INP_TIMEWAIT) { 1076209662Slstewart if (dir == PFIL_IN) 1077209662Slstewart ss->nskip_in_tcpcb++; 1078209662Slstewart else 1079209662Slstewart ss->nskip_out_tcpcb++; 1080209662Slstewart 1081209662Slstewart goto inp_unlock6; 1082209662Slstewart } 1083209662Slstewart 1084210203Slstewart pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO); 1085209662Slstewart 1086210203Slstewart if (pn == NULL) { 1087210203Slstewart if (dir == PFIL_IN) 1088210203Slstewart ss->nskip_in_malloc++; 1089210203Slstewart else 1090210203Slstewart ss->nskip_out_malloc++; 1091209662Slstewart 1092210203Slstewart goto inp_unlock6; 1093210203Slstewart } 1094209662Slstewart 1095210203Slstewart siftr_siftdata(pn, inp, tp, INP_IPV6, dir, inp_locally_locked); 1096209662Slstewart 1097210203Slstewart /* XXX: Figure out how to generate hashes for IPv6 packets. */ 1098209662Slstewart 1099209662Slstewart mtx_lock(&siftr_pkt_queue_mtx); 1100210203Slstewart STAILQ_INSERT_TAIL(&pkt_queue, pn, nodes); 1101209662Slstewart mtx_unlock(&siftr_pkt_queue_mtx); 1102209662Slstewart goto ret6; 1103209662Slstewart 1104209662Slstewartinp_unlock6: 1105209662Slstewart if (inp_locally_locked) 1106209662Slstewart INP_RUNLOCK(inp); 1107209662Slstewart 1108209662Slstewartret6: 1109209662Slstewart /* Returning 0 ensures pfil will not discard the pkt. */ 1110209662Slstewart return (0); 1111209662Slstewart} 1112209662Slstewart#endif /* #ifdef SIFTR_IPV6 */ 1113209662Slstewart 1114209662Slstewart 1115209662Slstewartstatic int 1116209662Slstewartsiftr_pfil(int action) 1117209662Slstewart{ 1118215552Slstewart struct pfil_head *pfh_inet; 1119209662Slstewart#ifdef SIFTR_IPV6 1120215552Slstewart struct pfil_head *pfh_inet6; 1121209662Slstewart#endif 1122215552Slstewart VNET_ITERATOR_DECL(vnet_iter); 1123209662Slstewart 1124215552Slstewart VNET_LIST_RLOCK(); 1125215552Slstewart VNET_FOREACH(vnet_iter) { 1126215552Slstewart CURVNET_SET(vnet_iter); 1127215552Slstewart pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); 1128209662Slstewart#ifdef SIFTR_IPV6 1129215552Slstewart pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); 1130209662Slstewart#endif 1131215552Slstewart 1132215552Slstewart if (action == HOOK) { 1133215552Slstewart pfil_add_hook(siftr_chkpkt, NULL, 1134215552Slstewart PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet); 1135209662Slstewart#ifdef SIFTR_IPV6 1136215552Slstewart pfil_add_hook(siftr_chkpkt6, NULL, 1137215552Slstewart PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet6); 1138209662Slstewart#endif 1139215552Slstewart } else if (action == UNHOOK) { 1140215552Slstewart pfil_remove_hook(siftr_chkpkt, NULL, 1141215552Slstewart PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet); 1142215552Slstewart#ifdef SIFTR_IPV6 1143215552Slstewart pfil_remove_hook(siftr_chkpkt6, NULL, 1144215552Slstewart PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet6); 1145215552Slstewart#endif 1146215552Slstewart } 1147215552Slstewart CURVNET_RESTORE(); 1148209662Slstewart } 1149215552Slstewart VNET_LIST_RUNLOCK(); 1150209662Slstewart 1151209662Slstewart return (0); 1152209662Slstewart} 1153209662Slstewart 1154209662Slstewart 1155209662Slstewartstatic int 1156209662Slstewartsiftr_sysctl_logfile_name_handler(SYSCTL_HANDLER_ARGS) 1157209662Slstewart{ 1158209662Slstewart struct alq *new_alq; 1159209662Slstewart int error; 1160209662Slstewart 1161273773Shselasky error = sysctl_handle_string(oidp, arg1, arg2, req); 1162209662Slstewart 1163273773Shselasky /* Check for error or same filename */ 1164273773Shselasky if (error != 0 || req->newptr == NULL || 1165273773Shselasky strncmp(siftr_logfile, arg1, arg2) == 0) 1166273773Shselasky goto done; 1167209662Slstewart 1168273773Shselasky /* Filname changed */ 1169273773Shselasky error = alq_open(&new_alq, arg1, curthread->td_ucred, 1170273773Shselasky SIFTR_LOG_FILE_MODE, SIFTR_ALQ_BUFLEN, 0); 1171273773Shselasky if (error != 0) 1172273773Shselasky goto done; 1173209662Slstewart 1174273773Shselasky /* 1175273773Shselasky * If disabled, siftr_alq == NULL so we simply close 1176273773Shselasky * the alq as we've proved it can be opened. 1177273773Shselasky * If enabled, close the existing alq and switch the old 1178273773Shselasky * for the new. 1179273773Shselasky */ 1180273773Shselasky if (siftr_alq == NULL) { 1181273773Shselasky alq_close(new_alq); 1182273773Shselasky } else { 1183273773Shselasky alq_close(siftr_alq); 1184273773Shselasky siftr_alq = new_alq; 1185209662Slstewart } 1186209662Slstewart 1187273773Shselasky /* Update filename upon success */ 1188273773Shselasky strlcpy(siftr_logfile, arg1, arg2); 1189273773Shselaskydone: 1190273773Shselasky return (error); 1191209662Slstewart} 1192209662Slstewart 1193209662Slstewartstatic int 1194209662Slstewartsiftr_manage_ops(uint8_t action) 1195209662Slstewart{ 1196209662Slstewart struct siftr_stats totalss; 1197209662Slstewart struct timeval tval; 1198209662Slstewart struct flow_hash_node *counter, *tmp_counter; 1199209662Slstewart struct sbuf *s; 1200209662Slstewart int i, key_index, ret, error; 1201209662Slstewart uint32_t bytes_to_write, total_skipped_pkts; 1202209662Slstewart uint16_t lport, fport; 1203209662Slstewart uint8_t *key, ipver; 1204209662Slstewart 1205209662Slstewart#ifdef SIFTR_IPV6 1206209662Slstewart uint32_t laddr[4]; 1207209662Slstewart uint32_t faddr[4]; 1208209662Slstewart#else 1209209662Slstewart uint8_t laddr[4]; 1210209662Slstewart uint8_t faddr[4]; 1211209662Slstewart#endif 1212209662Slstewart 1213209662Slstewart error = 0; 1214209662Slstewart total_skipped_pkts = 0; 1215209662Slstewart 1216209662Slstewart /* Init an autosizing sbuf that initially holds 200 chars. */ 1217209662Slstewart if ((s = sbuf_new(NULL, NULL, 200, SBUF_AUTOEXTEND)) == NULL) 1218209662Slstewart return (-1); 1219209662Slstewart 1220343273Sbrooks if (action == SIFTR_ENABLE && siftr_pkt_manager_thr == NULL) { 1221209662Slstewart /* 1222209662Slstewart * Create our alq 1223209662Slstewart * XXX: We should abort if alq_open fails! 1224209662Slstewart */ 1225209662Slstewart alq_open(&siftr_alq, siftr_logfile, curthread->td_ucred, 1226209662Slstewart SIFTR_LOG_FILE_MODE, SIFTR_ALQ_BUFLEN, 0); 1227209662Slstewart 1228209662Slstewart STAILQ_INIT(&pkt_queue); 1229209662Slstewart 1230209982Slstewart DPCPU_ZERO(ss); 1231209982Slstewart 1232209662Slstewart siftr_exit_pkt_manager_thread = 0; 1233209662Slstewart 1234209662Slstewart ret = kthread_add(&siftr_pkt_manager_thread, NULL, NULL, 1235209662Slstewart &siftr_pkt_manager_thr, RFNOWAIT, 0, 1236209662Slstewart "siftr_pkt_manager_thr"); 1237209662Slstewart 1238209662Slstewart siftr_pfil(HOOK); 1239209662Slstewart 1240209662Slstewart microtime(&tval); 1241209662Slstewart 1242209662Slstewart sbuf_printf(s, 1243209662Slstewart "enable_time_secs=%jd\tenable_time_usecs=%06ld\t" 1244209662Slstewart "siftrver=%s\thz=%u\ttcp_rtt_scale=%u\tsysname=%s\t" 1245209662Slstewart "sysver=%u\tipmode=%u\n", 1246209662Slstewart (intmax_t)tval.tv_sec, tval.tv_usec, MODVERSION_STR, hz, 1247209662Slstewart TCP_RTT_SCALE, SYS_NAME, __FreeBSD_version, SIFTR_IPMODE); 1248209662Slstewart 1249209662Slstewart sbuf_finish(s); 1250209662Slstewart alq_writen(siftr_alq, sbuf_data(s), sbuf_len(s), ALQ_WAITOK); 1251209662Slstewart 1252209662Slstewart } else if (action == SIFTR_DISABLE && siftr_pkt_manager_thr != NULL) { 1253209662Slstewart /* 1254209662Slstewart * Remove the pfil hook functions. All threads currently in 1255209662Slstewart * the hook functions are allowed to exit before siftr_pfil() 1256209662Slstewart * returns. 1257209662Slstewart */ 1258209662Slstewart siftr_pfil(UNHOOK); 1259209662Slstewart 1260209662Slstewart /* This will block until the pkt manager thread unlocks it. */ 1261209662Slstewart mtx_lock(&siftr_pkt_mgr_mtx); 1262209662Slstewart 1263209662Slstewart /* Tell the pkt manager thread that it should exit now. */ 1264209662Slstewart siftr_exit_pkt_manager_thread = 1; 1265209662Slstewart 1266209662Slstewart /* 1267209662Slstewart * Wake the pkt_manager thread so it realises that 1268209662Slstewart * siftr_exit_pkt_manager_thread == 1 and exits gracefully. 1269209662Slstewart * The wakeup won't be delivered until we unlock 1270209662Slstewart * siftr_pkt_mgr_mtx so this isn't racy. 1271209662Slstewart */ 1272209662Slstewart wakeup(&wait_for_pkt); 1273209662Slstewart 1274209662Slstewart /* Wait for the pkt_manager thread to exit. */ 1275209662Slstewart mtx_sleep(siftr_pkt_manager_thr, &siftr_pkt_mgr_mtx, PWAIT, 1276209662Slstewart "thrwait", 0); 1277209662Slstewart 1278209662Slstewart siftr_pkt_manager_thr = NULL; 1279209662Slstewart mtx_unlock(&siftr_pkt_mgr_mtx); 1280209662Slstewart 1281209980Slstewart totalss.n_in = DPCPU_VARSUM(ss, n_in); 1282209980Slstewart totalss.n_out = DPCPU_VARSUM(ss, n_out); 1283209980Slstewart totalss.nskip_in_malloc = DPCPU_VARSUM(ss, nskip_in_malloc); 1284209980Slstewart totalss.nskip_out_malloc = DPCPU_VARSUM(ss, nskip_out_malloc); 1285209980Slstewart totalss.nskip_in_mtx = DPCPU_VARSUM(ss, nskip_in_mtx); 1286209980Slstewart totalss.nskip_out_mtx = DPCPU_VARSUM(ss, nskip_out_mtx); 1287209980Slstewart totalss.nskip_in_tcpcb = DPCPU_VARSUM(ss, nskip_in_tcpcb); 1288209980Slstewart totalss.nskip_out_tcpcb = DPCPU_VARSUM(ss, nskip_out_tcpcb); 1289209980Slstewart totalss.nskip_in_inpcb = DPCPU_VARSUM(ss, nskip_in_inpcb); 1290209980Slstewart totalss.nskip_out_inpcb = DPCPU_VARSUM(ss, nskip_out_inpcb); 1291209662Slstewart 1292209662Slstewart total_skipped_pkts = totalss.nskip_in_malloc + 1293209662Slstewart totalss.nskip_out_malloc + totalss.nskip_in_mtx + 1294209662Slstewart totalss.nskip_out_mtx + totalss.nskip_in_tcpcb + 1295209662Slstewart totalss.nskip_out_tcpcb + totalss.nskip_in_inpcb + 1296209662Slstewart totalss.nskip_out_inpcb; 1297209662Slstewart 1298209662Slstewart microtime(&tval); 1299209662Slstewart 1300209662Slstewart sbuf_printf(s, 1301209662Slstewart "disable_time_secs=%jd\tdisable_time_usecs=%06ld\t" 1302209662Slstewart "num_inbound_tcp_pkts=%ju\tnum_outbound_tcp_pkts=%ju\t" 1303209662Slstewart "total_tcp_pkts=%ju\tnum_inbound_skipped_pkts_malloc=%u\t" 1304209662Slstewart "num_outbound_skipped_pkts_malloc=%u\t" 1305209662Slstewart "num_inbound_skipped_pkts_mtx=%u\t" 1306209662Slstewart "num_outbound_skipped_pkts_mtx=%u\t" 1307209662Slstewart "num_inbound_skipped_pkts_tcpcb=%u\t" 1308209662Slstewart "num_outbound_skipped_pkts_tcpcb=%u\t" 1309209662Slstewart "num_inbound_skipped_pkts_inpcb=%u\t" 1310209662Slstewart "num_outbound_skipped_pkts_inpcb=%u\t" 1311209662Slstewart "total_skipped_tcp_pkts=%u\tflow_list=", 1312209662Slstewart (intmax_t)tval.tv_sec, 1313209662Slstewart tval.tv_usec, 1314209662Slstewart (uintmax_t)totalss.n_in, 1315209662Slstewart (uintmax_t)totalss.n_out, 1316209662Slstewart (uintmax_t)(totalss.n_in + totalss.n_out), 1317209662Slstewart totalss.nskip_in_malloc, 1318209662Slstewart totalss.nskip_out_malloc, 1319209662Slstewart totalss.nskip_in_mtx, 1320209662Slstewart totalss.nskip_out_mtx, 1321209662Slstewart totalss.nskip_in_tcpcb, 1322209662Slstewart totalss.nskip_out_tcpcb, 1323209662Slstewart totalss.nskip_in_inpcb, 1324209662Slstewart totalss.nskip_out_inpcb, 1325209662Slstewart total_skipped_pkts); 1326209662Slstewart 1327209662Slstewart /* 1328209662Slstewart * Iterate over the flow hash, printing a summary of each 1329209662Slstewart * flow seen and freeing any malloc'd memory. 1330209662Slstewart * The hash consists of an array of LISTs (man 3 queue). 1331209662Slstewart */ 1332247906Slstewart for (i = 0; i <= siftr_hashmask; i++) { 1333209662Slstewart LIST_FOREACH_SAFE(counter, counter_hash + i, nodes, 1334209662Slstewart tmp_counter) { 1335209662Slstewart key = counter->key; 1336209662Slstewart key_index = 1; 1337209662Slstewart 1338209662Slstewart ipver = key[0]; 1339209662Slstewart 1340209662Slstewart memcpy(laddr, key + key_index, sizeof(laddr)); 1341209662Slstewart key_index += sizeof(laddr); 1342209662Slstewart memcpy(&lport, key + key_index, sizeof(lport)); 1343209662Slstewart key_index += sizeof(lport); 1344209662Slstewart memcpy(faddr, key + key_index, sizeof(faddr)); 1345209662Slstewart key_index += sizeof(faddr); 1346209662Slstewart memcpy(&fport, key + key_index, sizeof(fport)); 1347209662Slstewart 1348209662Slstewart#ifdef SIFTR_IPV6 1349209662Slstewart laddr[3] = ntohl(laddr[3]); 1350209662Slstewart faddr[3] = ntohl(faddr[3]); 1351209662Slstewart 1352209662Slstewart if (ipver == INP_IPV6) { 1353209662Slstewart laddr[0] = ntohl(laddr[0]); 1354209662Slstewart laddr[1] = ntohl(laddr[1]); 1355209662Slstewart laddr[2] = ntohl(laddr[2]); 1356209662Slstewart faddr[0] = ntohl(faddr[0]); 1357209662Slstewart faddr[1] = ntohl(faddr[1]); 1358209662Slstewart faddr[2] = ntohl(faddr[2]); 1359209662Slstewart 1360209662Slstewart sbuf_printf(s, 1361209662Slstewart "%x:%x:%x:%x:%x:%x:%x:%x;%u-" 1362209662Slstewart "%x:%x:%x:%x:%x:%x:%x:%x;%u,", 1363209662Slstewart UPPER_SHORT(laddr[0]), 1364209662Slstewart LOWER_SHORT(laddr[0]), 1365209662Slstewart UPPER_SHORT(laddr[1]), 1366209662Slstewart LOWER_SHORT(laddr[1]), 1367209662Slstewart UPPER_SHORT(laddr[2]), 1368209662Slstewart LOWER_SHORT(laddr[2]), 1369209662Slstewart UPPER_SHORT(laddr[3]), 1370209662Slstewart LOWER_SHORT(laddr[3]), 1371209662Slstewart ntohs(lport), 1372209662Slstewart UPPER_SHORT(faddr[0]), 1373209662Slstewart LOWER_SHORT(faddr[0]), 1374209662Slstewart UPPER_SHORT(faddr[1]), 1375209662Slstewart LOWER_SHORT(faddr[1]), 1376209662Slstewart UPPER_SHORT(faddr[2]), 1377209662Slstewart LOWER_SHORT(faddr[2]), 1378209662Slstewart UPPER_SHORT(faddr[3]), 1379209662Slstewart LOWER_SHORT(faddr[3]), 1380209662Slstewart ntohs(fport)); 1381209662Slstewart } else { 1382209662Slstewart laddr[0] = FIRST_OCTET(laddr[3]); 1383209662Slstewart laddr[1] = SECOND_OCTET(laddr[3]); 1384209662Slstewart laddr[2] = THIRD_OCTET(laddr[3]); 1385209662Slstewart laddr[3] = FOURTH_OCTET(laddr[3]); 1386209662Slstewart faddr[0] = FIRST_OCTET(faddr[3]); 1387209662Slstewart faddr[1] = SECOND_OCTET(faddr[3]); 1388209662Slstewart faddr[2] = THIRD_OCTET(faddr[3]); 1389209662Slstewart faddr[3] = FOURTH_OCTET(faddr[3]); 1390209662Slstewart#endif 1391209662Slstewart sbuf_printf(s, 1392209662Slstewart "%u.%u.%u.%u;%u-%u.%u.%u.%u;%u,", 1393209662Slstewart laddr[0], 1394209662Slstewart laddr[1], 1395209662Slstewart laddr[2], 1396209662Slstewart laddr[3], 1397209662Slstewart ntohs(lport), 1398209662Slstewart faddr[0], 1399209662Slstewart faddr[1], 1400209662Slstewart faddr[2], 1401209662Slstewart faddr[3], 1402209662Slstewart ntohs(fport)); 1403209662Slstewart#ifdef SIFTR_IPV6 1404209662Slstewart } 1405209662Slstewart#endif 1406209662Slstewart 1407209662Slstewart free(counter, M_SIFTR_HASHNODE); 1408209662Slstewart } 1409209662Slstewart 1410209662Slstewart LIST_INIT(counter_hash + i); 1411209662Slstewart } 1412209662Slstewart 1413209662Slstewart sbuf_printf(s, "\n"); 1414209662Slstewart sbuf_finish(s); 1415209662Slstewart 1416209662Slstewart i = 0; 1417209662Slstewart do { 1418209662Slstewart bytes_to_write = min(SIFTR_ALQ_BUFLEN, sbuf_len(s)-i); 1419209662Slstewart alq_writen(siftr_alq, sbuf_data(s)+i, bytes_to_write, ALQ_WAITOK); 1420209662Slstewart i += bytes_to_write; 1421209662Slstewart } while (i < sbuf_len(s)); 1422209662Slstewart 1423209662Slstewart alq_close(siftr_alq); 1424209662Slstewart siftr_alq = NULL; 1425343273Sbrooks } else 1426343273Sbrooks error = EINVAL; 1427209662Slstewart 1428209662Slstewart sbuf_delete(s); 1429209662Slstewart 1430209662Slstewart /* 1431209662Slstewart * XXX: Should be using ret to check if any functions fail 1432209662Slstewart * and set error appropriately 1433209662Slstewart */ 1434209662Slstewart 1435209662Slstewart return (error); 1436209662Slstewart} 1437209662Slstewart 1438209662Slstewart 1439209662Slstewartstatic int 1440209662Slstewartsiftr_sysctl_enabled_handler(SYSCTL_HANDLER_ARGS) 1441209662Slstewart{ 1442342189Sbrooks int error; 1443342189Sbrooks uint32_t new; 1444209662Slstewart 1445342189Sbrooks new = siftr_enabled; 1446342189Sbrooks error = sysctl_handle_int(oidp, &new, 0, req); 1447343273Sbrooks if (error == 0 && req->newptr != NULL) { 1448342189Sbrooks if (new > 1) 1449342189Sbrooks return (EINVAL); 1450342189Sbrooks else if (new != siftr_enabled) { 1451343273Sbrooks if ((error = siftr_manage_ops(new)) == 0) { 1452343273Sbrooks siftr_enabled = new; 1453343273Sbrooks } else { 1454342189Sbrooks siftr_manage_ops(SIFTR_DISABLE); 1455343273Sbrooks } 1456209662Slstewart } 1457342189Sbrooks } 1458209662Slstewart 1459342189Sbrooks return (error); 1460209662Slstewart} 1461209662Slstewart 1462209662Slstewart 1463209662Slstewartstatic void 1464209662Slstewartsiftr_shutdown_handler(void *arg) 1465209662Slstewart{ 1466343273Sbrooks if (siftr_enabled == 1) { 1467343273Sbrooks siftr_manage_ops(SIFTR_DISABLE); 1468343273Sbrooks } 1469209662Slstewart} 1470209662Slstewart 1471209662Slstewart 1472209662Slstewart/* 1473209662Slstewart * Module is being unloaded or machine is shutting down. Take care of cleanup. 1474209662Slstewart */ 1475209662Slstewartstatic int 1476209662Slstewartdeinit_siftr(void) 1477209662Slstewart{ 1478209662Slstewart /* Cleanup. */ 1479209662Slstewart siftr_manage_ops(SIFTR_DISABLE); 1480209662Slstewart hashdestroy(counter_hash, M_SIFTR, siftr_hashmask); 1481209662Slstewart mtx_destroy(&siftr_pkt_queue_mtx); 1482209662Slstewart mtx_destroy(&siftr_pkt_mgr_mtx); 1483209662Slstewart 1484209662Slstewart return (0); 1485209662Slstewart} 1486209662Slstewart 1487209662Slstewart 1488209662Slstewart/* 1489209662Slstewart * Module has just been loaded into the kernel. 1490209662Slstewart */ 1491209662Slstewartstatic int 1492209662Slstewartinit_siftr(void) 1493209662Slstewart{ 1494209662Slstewart EVENTHANDLER_REGISTER(shutdown_pre_sync, siftr_shutdown_handler, NULL, 1495209662Slstewart SHUTDOWN_PRI_FIRST); 1496209662Slstewart 1497209662Slstewart /* Initialise our flow counter hash table. */ 1498209662Slstewart counter_hash = hashinit(SIFTR_EXPECTED_MAX_TCP_FLOWS, M_SIFTR, 1499209662Slstewart &siftr_hashmask); 1500209662Slstewart 1501209662Slstewart mtx_init(&siftr_pkt_queue_mtx, "siftr_pkt_queue_mtx", NULL, MTX_DEF); 1502209662Slstewart mtx_init(&siftr_pkt_mgr_mtx, "siftr_pkt_mgr_mtx", NULL, MTX_DEF); 1503209662Slstewart 1504209662Slstewart /* Print message to the user's current terminal. */ 1505209662Slstewart uprintf("\nStatistical Information For TCP Research (SIFTR) %s\n" 1506209662Slstewart " http://caia.swin.edu.au/urp/newtcp\n\n", 1507209662Slstewart MODVERSION_STR); 1508209662Slstewart 1509209662Slstewart return (0); 1510209662Slstewart} 1511209662Slstewart 1512209662Slstewart 1513209662Slstewart/* 1514209662Slstewart * This is the function that is called to load and unload the module. 1515209662Slstewart * When the module is loaded, this function is called once with 1516209662Slstewart * "what" == MOD_LOAD 1517209662Slstewart * When the module is unloaded, this function is called twice with 1518209662Slstewart * "what" = MOD_QUIESCE first, followed by "what" = MOD_UNLOAD second 1519209662Slstewart * When the system is shut down e.g. CTRL-ALT-DEL or using the shutdown command, 1520209662Slstewart * this function is called once with "what" = MOD_SHUTDOWN 1521209662Slstewart * When the system is shut down, the handler isn't called until the very end 1522209662Slstewart * of the shutdown sequence i.e. after the disks have been synced. 1523209662Slstewart */ 1524209662Slstewartstatic int 1525209662Slstewartsiftr_load_handler(module_t mod, int what, void *arg) 1526209662Slstewart{ 1527209662Slstewart int ret; 1528209662Slstewart 1529209662Slstewart switch (what) { 1530209662Slstewart case MOD_LOAD: 1531209662Slstewart ret = init_siftr(); 1532209662Slstewart break; 1533209662Slstewart 1534209662Slstewart case MOD_QUIESCE: 1535209662Slstewart case MOD_SHUTDOWN: 1536209662Slstewart ret = deinit_siftr(); 1537209662Slstewart break; 1538209662Slstewart 1539209662Slstewart case MOD_UNLOAD: 1540209662Slstewart ret = 0; 1541209662Slstewart break; 1542209662Slstewart 1543209662Slstewart default: 1544209662Slstewart ret = EINVAL; 1545209662Slstewart break; 1546209662Slstewart } 1547209662Slstewart 1548209662Slstewart return (ret); 1549209662Slstewart} 1550209662Slstewart 1551209662Slstewart 1552209662Slstewartstatic moduledata_t siftr_mod = { 1553209662Slstewart .name = "siftr", 1554209662Slstewart .evhand = siftr_load_handler, 1555209662Slstewart}; 1556209662Slstewart 1557209662Slstewart/* 1558209662Slstewart * Param 1: name of the kernel module 1559209662Slstewart * Param 2: moduledata_t struct containing info about the kernel module 1560209662Slstewart * and the execution entry point for the module 1561209662Slstewart * Param 3: From sysinit_sub_id enumeration in /usr/include/sys/kernel.h 1562209662Slstewart * Defines the module initialisation order 1563209662Slstewart * Param 4: From sysinit_elem_order enumeration in /usr/include/sys/kernel.h 1564209662Slstewart * Defines the initialisation order of this kld relative to others 1565209662Slstewart * within the same subsystem as defined by param 3 1566209662Slstewart */ 1567296688SjhbDECLARE_MODULE(siftr, siftr_mod, SI_SUB_LAST, SI_ORDER_ANY); 1568209662SlstewartMODULE_DEPEND(siftr, alq, 1, 1, 1); 1569209662SlstewartMODULE_VERSION(siftr, MODVERSION); 1570