tcp_pcap.c revision 289276
1289276Shiren/*- 2289276Shiren * Copyright (c) 2015 3289276Shiren * Jonathan Looney. All rights reserved. 4289276Shiren * 5289276Shiren * Redistribution and use in source and binary forms, with or without 6289276Shiren * modification, are permitted provided that the following conditions 7289276Shiren * are met: 8289276Shiren * 1. Redistributions of source code must retain the above copyright 9289276Shiren * notice, this list of conditions and the following disclaimer. 10289276Shiren * 2. Redistributions in binary form must reproduce the above copyright 11289276Shiren * notice, this list of conditions and the following disclaimer in the 12289276Shiren * documentation and/or other materials provided with the distribution. 13289276Shiren * 14289276Shiren * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15289276Shiren * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16289276Shiren * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17289276Shiren * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18289276Shiren * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19289276Shiren * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20289276Shiren * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21289276Shiren * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22289276Shiren * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23289276Shiren * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24289276Shiren * SUCH DAMAGE. 25289276Shiren * 26289276Shiren * $FreeBSD: head/sys/netinet/tcp_pcap.c 289276 2015-10-14 00:35:37Z hiren $ 27289276Shiren */ 28289276Shiren 29289276Shiren#include <sys/queue.h> 30289276Shiren#include <sys/param.h> 31289276Shiren#include <sys/types.h> 32289276Shiren#include <sys/socket.h> 33289276Shiren#include <sys/socketvar.h> 34289276Shiren#include <sys/sysctl.h> 35289276Shiren#include <sys/systm.h> 36289276Shiren#include <sys/mbuf.h> 37289276Shiren#include <sys/eventhandler.h> 38289276Shiren#include <machine/atomic.h> 39289276Shiren#include <netinet/tcp_var.h> 40289276Shiren#include <netinet/tcp_pcap.h> 41289276Shiren 42289276Shiren#define M_LEADINGSPACE_NOWRITE(m) \ 43289276Shiren ((m)->m_data - M_START(m)) 44289276Shiren 45289276Shirenstatic int tcp_pcap_clusters_referenced_cur = 0; 46289276Shirenstatic int tcp_pcap_clusters_referenced_max = 0; 47289276Shiren 48289276ShirenSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur, 49289276Shiren CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0, 50289276Shiren "Number of clusters currently referenced on TCP PCAP queues"); 51289276ShirenSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max, 52289276Shiren CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0, 53289276Shiren "Maximum number of clusters allowed to be referenced on TCP PCAP " 54289276Shiren "queues"); 55289276Shiren 56289276Shirenstatic int tcp_pcap_alloc_reuse_ext = 0; 57289276Shirenstatic int tcp_pcap_alloc_reuse_mbuf = 0; 58289276Shirenstatic int tcp_pcap_alloc_new_mbuf = 0; 59289276ShirenSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext, 60289276Shiren CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0, 61289276Shiren "Number of mbufs with external storage reused for the TCP PCAP " 62289276Shiren "functionality"); 63289276ShirenSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf, 64289276Shiren CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0, 65289276Shiren "Number of mbufs with internal storage reused for the TCP PCAP " 66289276Shiren "functionality"); 67289276ShirenSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf, 68289276Shiren CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0, 69289276Shiren "Number of new mbufs allocated for the TCP PCAP functionality"); 70289276Shiren 71289276ShirenVNET_DEFINE(int, tcp_pcap_packets) = 0; 72289276Shiren#define V_tcp_pcap_packets VNET(tcp_pcap_packets) 73289276ShirenSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets, CTLFLAG_RW, 74289276Shiren &V_tcp_pcap_packets, 0, "Default number of packets saved per direction " 75289276Shiren "per TCPCB"); 76289276Shiren 77289276Shiren/* Initialize the values. */ 78289276Shirenstatic void 79289276Shirentcp_pcap_max_set() { 80289276Shiren tcp_pcap_clusters_referenced_max = nmbclusters / 4; 81289276Shiren} 82289276Shiren 83289276Shirenvoid 84289276Shirentcp_pcap_init() { 85289276Shiren tcp_pcap_max_set(); 86289276Shiren EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set, 87289276Shiren NULL, EVENTHANDLER_PRI_ANY); 88289276Shiren} 89289276Shiren 90289276Shiren/* 91289276Shiren * If we are below the maximum allowed cluster references, 92289276Shiren * increment the reference count and return TRUE. Otherwise, 93289276Shiren * leave the reference count alone and return FALSE. 94289276Shiren */ 95289276Shirenstatic __inline bool 96289276Shirentcp_pcap_take_cluster_reference(void) 97289276Shiren{ 98289276Shiren if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >= 99289276Shiren tcp_pcap_clusters_referenced_max) { 100289276Shiren atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1); 101289276Shiren return FALSE; 102289276Shiren } 103289276Shiren return TRUE; 104289276Shiren} 105289276Shiren 106289276Shiren/* 107289276Shiren * For all the external entries in m, apply the given adjustment. 108289276Shiren * This can be used to adjust the counter when an mbuf chain is 109289276Shiren * copied or freed. 110289276Shiren */ 111289276Shirenstatic __inline void 112289276Shirentcp_pcap_adj_cluster_reference(struct mbuf *m, int adj) 113289276Shiren{ 114289276Shiren while (m) { 115289276Shiren if (m->m_flags & M_EXT) 116289276Shiren atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj); 117289276Shiren 118289276Shiren m = m->m_next; 119289276Shiren } 120289276Shiren} 121289276Shiren 122289276Shiren/* 123289276Shiren * Free all mbufs in a chain, decrementing the reference count as 124289276Shiren * necessary. 125289276Shiren * 126289276Shiren * Functions in this file should use this instead of m_freem() when 127289276Shiren * they are freeing mbuf chains that may contain clusters that were 128289276Shiren * already included in tcp_pcap_clusters_referenced_cur. 129289276Shiren */ 130289276Shirenstatic void 131289276Shirentcp_pcap_m_freem(struct mbuf *mb) 132289276Shiren{ 133289276Shiren while (mb != NULL) { 134289276Shiren if (mb->m_flags & M_EXT) 135289276Shiren atomic_subtract_int(&tcp_pcap_clusters_referenced_cur, 136289276Shiren 1); 137289276Shiren mb = m_free(mb); 138289276Shiren } 139289276Shiren} 140289276Shiren 141289276Shiren/* 142289276Shiren * Copy data from m to n, where n cannot fit all the data we might 143289276Shiren * want from m. 144289276Shiren * 145289276Shiren * Prioritize data like this: 146289276Shiren * 1. TCP header 147289276Shiren * 2. IP header 148289276Shiren * 3. Data 149289276Shiren */ 150289276Shirenstatic void 151289276Shirentcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n) 152289276Shiren{ 153289276Shiren struct mbuf *m_cur = m; 154289276Shiren int bytes_to_copy=0, trailing_data, skip=0, tcp_off; 155289276Shiren 156289276Shiren /* Below, we assume these will be non-NULL. */ 157289276Shiren KASSERT(th, ("%s: called with th == NULL", __func__)); 158289276Shiren KASSERT(m, ("%s: called with m == NULL", __func__)); 159289276Shiren KASSERT(n, ("%s: called with n == NULL", __func__)); 160289276Shiren 161289276Shiren /* We assume this initialization occurred elsewhere. */ 162289276Shiren KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)", 163289276Shiren __func__, n->m_len)); 164289276Shiren KASSERT(n->m_data == M_START(n), 165289276Shiren ("%s: called with n->m_data != M_START(n)", __func__)); 166289276Shiren 167289276Shiren /* 168289276Shiren * Calculate the size of the TCP header. We use this often 169289276Shiren * enough that it is worth just calculating at the start. 170289276Shiren */ 171289276Shiren tcp_off = th->th_off << 2; 172289276Shiren 173289276Shiren /* Trim off leading empty mbufs. */ 174289276Shiren while (m && m->m_len == 0) 175289276Shiren m = m->m_next; 176289276Shiren 177289276Shiren if (m) { 178289276Shiren m_cur = m; 179289276Shiren } 180289276Shiren else { 181289276Shiren /* 182289276Shiren * No data? Highly unusual. We would expect to at 183289276Shiren * least see a TCP header in the mbuf. 184289276Shiren * As we have a pointer to the TCP header, I guess 185289276Shiren * we should just copy that. (???) 186289276Shiren */ 187289276Shirenfallback: 188289276Shiren bytes_to_copy = tcp_off; 189289276Shiren if (bytes_to_copy > M_SIZE(n)) 190289276Shiren bytes_to_copy = M_SIZE(n); 191289276Shiren bcopy(th, n->m_data, bytes_to_copy); 192289276Shiren n->m_len = bytes_to_copy; 193289276Shiren return; 194289276Shiren } 195289276Shiren 196289276Shiren /* 197289276Shiren * Find TCP header. Record the total number of bytes up to, 198289276Shiren * and including, the TCP header. 199289276Shiren */ 200289276Shiren while (m_cur) { 201289276Shiren if ((caddr_t) th >= (caddr_t) m_cur->m_data && 202289276Shiren (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len)) 203289276Shiren break; 204289276Shiren bytes_to_copy += m_cur->m_len; 205289276Shiren m_cur = m_cur->m_next; 206289276Shiren } 207289276Shiren if (m_cur) 208289276Shiren bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data; 209289276Shiren else 210289276Shiren goto fallback; 211289276Shiren bytes_to_copy += tcp_off; 212289276Shiren 213289276Shiren /* 214289276Shiren * If we already want to copy more bytes than we can hold 215289276Shiren * in the destination mbuf, skip leading bytes and copy 216289276Shiren * what we can. 217289276Shiren * 218289276Shiren * Otherwise, consider trailing data. 219289276Shiren */ 220289276Shiren if (bytes_to_copy > M_SIZE(n)) { 221289276Shiren skip = bytes_to_copy - M_SIZE(n); 222289276Shiren bytes_to_copy = M_SIZE(n); 223289276Shiren } 224289276Shiren else { 225289276Shiren /* 226289276Shiren * Determine how much trailing data is in the chain. 227289276Shiren * We start with the length of this mbuf (the one 228289276Shiren * containing th) and subtract the size of the TCP 229289276Shiren * header (tcp_off) and the size of the data prior 230289276Shiren * to th (th - m_cur->m_data). 231289276Shiren * 232289276Shiren * This *should not* be negative, as the TCP code 233289276Shiren * should put the whole TCP header in a single 234289276Shiren * mbuf. But, it isn't a problem if it is. We will 235289276Shiren * simple work off our negative balance as we look 236289276Shiren * at subsequent mbufs. 237289276Shiren */ 238289276Shiren trailing_data = m_cur->m_len - tcp_off; 239289276Shiren trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data; 240289276Shiren m_cur = m_cur->m_next; 241289276Shiren while (m_cur) { 242289276Shiren trailing_data += m_cur->m_len; 243289276Shiren m_cur = m_cur->m_next; 244289276Shiren } 245289276Shiren if ((bytes_to_copy + trailing_data) > M_SIZE(n)) 246289276Shiren bytes_to_copy = M_SIZE(n); 247289276Shiren else 248289276Shiren bytes_to_copy += trailing_data; 249289276Shiren } 250289276Shiren 251289276Shiren m_copydata(m, skip, bytes_to_copy, n->m_data); 252289276Shiren n->m_len = bytes_to_copy; 253289276Shiren} 254289276Shiren 255289276Shirenvoid 256289276Shirentcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue) 257289276Shiren{ 258289276Shiren struct mbuf *n = NULL, *mhead; 259289276Shiren 260289276Shiren KASSERT(th, ("%s: called with th == NULL", __func__)); 261289276Shiren KASSERT(m, ("%s: called with m == NULL", __func__)); 262289276Shiren KASSERT(queue, ("%s: called with queue == NULL", __func__)); 263289276Shiren 264289276Shiren /* We only care about data packets. */ 265289276Shiren while (m && m->m_type != MT_DATA) 266289276Shiren m = m->m_next; 267289276Shiren 268289276Shiren /* We only need to do something if we still have an mbuf. */ 269289276Shiren if (!m) 270289276Shiren return; 271289276Shiren 272289276Shiren /* If we are not saving mbufs, return now. */ 273289276Shiren if (queue->mq_maxlen == 0) 274289276Shiren return; 275289276Shiren 276289276Shiren /* 277289276Shiren * Check to see if we will need to recycle mbufs. 278289276Shiren * 279289276Shiren * If we need to get rid of mbufs to stay below 280289276Shiren * our packet count, try to reuse the mbuf. Once 281289276Shiren * we already have a new mbuf (n), then we can 282289276Shiren * simply free subsequent mbufs. 283289276Shiren * 284289276Shiren * Note that most of the logic in here is to deal 285289276Shiren * with the reuse. If we are fine with constant 286289276Shiren * mbuf allocs/deallocs, we could ditch this logic. 287289276Shiren * But, it only seems to make sense to reuse 288289276Shiren * mbufs we already have. 289289276Shiren */ 290289276Shiren while (mbufq_full(queue)) { 291289276Shiren mhead = mbufq_dequeue(queue); 292289276Shiren 293289276Shiren if (n) { 294289276Shiren tcp_pcap_m_freem(mhead); 295289276Shiren } 296289276Shiren else { 297289276Shiren /* 298289276Shiren * If this held an external cluster, try to 299289276Shiren * detach the cluster. But, if we held the 300289276Shiren * last reference, go through the normal 301289276Shiren * free-ing process. 302289276Shiren */ 303289276Shiren if (mhead->m_flags & M_EXT) { 304289276Shiren switch (mhead->m_ext.ext_type) { 305289276Shiren case EXT_SFBUF: 306289276Shiren /* Don't mess around with these. */ 307289276Shiren tcp_pcap_m_freem(mhead); 308289276Shiren continue; 309289276Shiren default: 310289276Shiren if (atomic_fetchadd_int( 311289276Shiren mhead->m_ext.ext_cnt, -1) == 1) 312289276Shiren { 313289276Shiren /* 314289276Shiren * We held the last reference 315289276Shiren * on this cluster. Restore 316289276Shiren * the reference count and put 317289276Shiren * it back in the pool. 318289276Shiren */ 319289276Shiren *(mhead->m_ext.ext_cnt) = 1; 320289276Shiren tcp_pcap_m_freem(mhead); 321289276Shiren continue; 322289276Shiren } 323289276Shiren /* 324289276Shiren * We were able to cleanly free the 325289276Shiren * reference. 326289276Shiren */ 327289276Shiren atomic_subtract_int( 328289276Shiren &tcp_pcap_clusters_referenced_cur, 329289276Shiren 1); 330289276Shiren tcp_pcap_alloc_reuse_ext++; 331289276Shiren break; 332289276Shiren } 333289276Shiren } 334289276Shiren else { 335289276Shiren tcp_pcap_alloc_reuse_mbuf++; 336289276Shiren } 337289276Shiren 338289276Shiren n = mhead; 339289276Shiren tcp_pcap_m_freem(n->m_next); 340289276Shiren m_init(n, NULL, 0, M_NOWAIT, MT_DATA, 0); 341289276Shiren } 342289276Shiren } 343289276Shiren 344289276Shiren /* Check to see if we need to get a new mbuf. */ 345289276Shiren if (!n) { 346289276Shiren if (!(n = m_get(M_NOWAIT, MT_DATA))) 347289276Shiren return; 348289276Shiren tcp_pcap_alloc_new_mbuf++; 349289276Shiren } 350289276Shiren 351289276Shiren /* 352289276Shiren * What are we dealing with? If a cluster, attach it. Otherwise, 353289276Shiren * try to copy the data from the beginning of the mbuf to the 354289276Shiren * end of data. (There may be data between the start of the data 355289276Shiren * area and the current data pointer. We want to get this, because 356289276Shiren * it may contain header information that is useful.) 357289276Shiren * In cases where that isn't possible, settle for what we can 358289276Shiren * get. 359289276Shiren */ 360289276Shiren if ((m->m_flags & M_EXT) && tcp_pcap_take_cluster_reference()) { 361289276Shiren n->m_data = m->m_data; 362289276Shiren n->m_len = m->m_len; 363289276Shiren mb_dupcl(n, m); 364289276Shiren } 365289276Shiren else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) { 366289276Shiren /* 367289276Shiren * At this point, n is guaranteed to be a normal mbuf 368289276Shiren * with no cluster and no packet header. Because the 369289276Shiren * logic in this code block requires this, the assert 370289276Shiren * is here to catch any instances where someone 371289276Shiren * changes the logic to invalidate that assumption. 372289276Shiren */ 373289276Shiren KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0, 374289276Shiren ("%s: Unexpected flags (%#x) for mbuf", 375289276Shiren __func__, n->m_flags)); 376289276Shiren n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m); 377289276Shiren n->m_len = m->m_len; 378289276Shiren bcopy(M_START(m), n->m_dat, 379289276Shiren m->m_len + M_LEADINGSPACE_NOWRITE(m)); 380289276Shiren } 381289276Shiren else { 382289276Shiren /* 383289276Shiren * This is the case where we need to "settle for what 384289276Shiren * we can get". The most probable way to this code 385289276Shiren * path is that we've already taken references to the 386289276Shiren * maximum number of mbuf clusters we can, and the data 387289276Shiren * is too long to fit in an mbuf's internal storage. 388289276Shiren * Try for a "best fit". 389289276Shiren */ 390289276Shiren tcp_pcap_copy_bestfit(th, m, n); 391289276Shiren 392289276Shiren /* Don't try to get additional data. */ 393289276Shiren goto add_to_queue; 394289276Shiren } 395289276Shiren 396289276Shiren if (m->m_next) { 397289276Shiren n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT); 398289276Shiren tcp_pcap_adj_cluster_reference(n->m_next, 1); 399289276Shiren } 400289276Shiren 401289276Shirenadd_to_queue: 402289276Shiren /* Add the new mbuf to the list. */ 403289276Shiren if (mbufq_enqueue(queue, n)) { 404289276Shiren /* This shouldn't happen. If INVARIANTS is defined, panic. */ 405289276Shiren KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__)); 406289276Shiren tcp_pcap_m_freem(n); 407289276Shiren } 408289276Shiren} 409289276Shiren 410289276Shirenvoid 411289276Shirentcp_pcap_drain(struct mbufq *queue) 412289276Shiren{ 413289276Shiren struct mbuf *m; 414289276Shiren while ((m = mbufq_dequeue(queue))) 415289276Shiren tcp_pcap_m_freem(m); 416289276Shiren} 417289276Shiren 418289276Shirenvoid 419289276Shirentcp_pcap_tcpcb_init(struct tcpcb *tp) 420289276Shiren{ 421289276Shiren mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets); 422289276Shiren mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets); 423289276Shiren} 424289276Shiren 425289276Shirenvoid 426289276Shirentcp_pcap_set_sock_max(struct mbufq *queue, int newval) 427289276Shiren{ 428289276Shiren queue->mq_maxlen = newval; 429289276Shiren while (queue->mq_len > queue->mq_maxlen) 430289276Shiren tcp_pcap_m_freem(mbufq_dequeue(queue)); 431289276Shiren} 432289276Shiren 433289276Shirenint 434289276Shirentcp_pcap_get_sock_max(struct mbufq *queue) 435289276Shiren{ 436289276Shiren return queue->mq_maxlen; 437289276Shiren} 438