1135332Sglebius/*- 2219182Sglebius * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru> 3143923Sglebius * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org> 4135332Sglebius * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net> 5135332Sglebius * All rights reserved. 6135332Sglebius * 7135332Sglebius * Redistribution and use in source and binary forms, with or without 8135332Sglebius * modification, are permitted provided that the following conditions 9135332Sglebius * are met: 10135332Sglebius * 1. Redistributions of source code must retain the above copyright 11135332Sglebius * notice, this list of conditions and the following disclaimer. 12135332Sglebius * 2. Redistributions in binary form must reproduce the above copyright 13135332Sglebius * notice, this list of conditions and the following disclaimer in the 14135332Sglebius * documentation and/or other materials provided with the distribution. 15135332Sglebius * 16135332Sglebius * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17135332Sglebius * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18135332Sglebius * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19135332Sglebius * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20135332Sglebius * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21135332Sglebius * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22135332Sglebius * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23135332Sglebius * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24135332Sglebius * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25135332Sglebius * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26135332Sglebius * SUCH DAMAGE. 27135332Sglebius * 28135332Sglebius * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $ 29135332Sglebius */ 30135332Sglebius 31260048Sdim#include <sys/cdefs.h> 32260048Sdim__FBSDID("$FreeBSD$"); 33135332Sglebius 34219182Sglebius#include "opt_inet6.h" 35219182Sglebius#include "opt_route.h" 36135332Sglebius#include <sys/param.h> 37300771Sjkim#include <sys/bitstring.h> 38260169Sglebius#include <sys/systm.h> 39260169Sglebius#include <sys/counter.h> 40135332Sglebius#include <sys/kernel.h> 41293470Smelifaro#include <sys/ktr.h> 42135332Sglebius#include <sys/limits.h> 43135332Sglebius#include <sys/mbuf.h> 44140511Sglebius#include <sys/syslog.h> 45135332Sglebius#include <sys/socket.h> 46295126Sglebius#include <vm/uma.h> 47135332Sglebius 48135332Sglebius#include <net/if.h> 49293914Smelifaro#include <net/if_dl.h> 50257176Sglebius#include <net/if_var.h> 51135332Sglebius#include <net/route.h> 52219182Sglebius#include <net/ethernet.h> 53135332Sglebius#include <netinet/in.h> 54135332Sglebius#include <netinet/in_systm.h> 55135332Sglebius#include <netinet/ip.h> 56219182Sglebius#include <netinet/ip6.h> 57135332Sglebius#include <netinet/tcp.h> 58135332Sglebius#include <netinet/udp.h> 59135332Sglebius 60135332Sglebius#include <netgraph/ng_message.h> 61135332Sglebius#include <netgraph/netgraph.h> 62135332Sglebius 63135332Sglebius#include <netgraph/netflow/netflow.h> 64219182Sglebius#include <netgraph/netflow/netflow_v9.h> 65135332Sglebius#include <netgraph/netflow/ng_netflow.h> 66135332Sglebius 67146092Sglebius#define NBUCKETS (65536) /* must be power of 2 */ 68135332Sglebius 69163238Sglebius/* This hash is for TCP or UDP packets. */ 70163238Sglebius#define FULL_HASH(addr1, addr2, port1, port2) \ 71163238Sglebius (((addr1 ^ (addr1 >> 16) ^ \ 72163238Sglebius htons(addr2 ^ (addr2 >> 16))) ^ \ 73163241Sglebius port1 ^ htons(port2)) & \ 74163238Sglebius (NBUCKETS - 1)) 75135332Sglebius 76163238Sglebius/* This hash is for all other IP packets. */ 77163238Sglebius#define ADDR_HASH(addr1, addr2) \ 78163238Sglebius ((addr1 ^ (addr1 >> 16) ^ \ 79163238Sglebius htons(addr2 ^ (addr2 >> 16))) & \ 80163238Sglebius (NBUCKETS - 1)) 81135332Sglebius 82135332Sglebius/* Macros to shorten logical constructions */ 83135332Sglebius/* XXX: priv must exist in namespace */ 84260169Sglebius#define INACTIVE(fle) (time_uptime - fle->f.last > priv->nfinfo_inact_t) 85260169Sglebius#define AGED(fle) (time_uptime - fle->f.first > priv->nfinfo_act_t) 86135332Sglebius#define ISFREE(fle) (fle->f.packets == 0) 87135332Sglebius 88135332Sglebius/* 89135332Sglebius * 4 is a magical number: statistically number of 4-packet flows is 90135332Sglebius * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP 91135332Sglebius * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case 92135332Sglebius * of reachable host and 4-packet otherwise. 93135332Sglebius */ 94135332Sglebius#define SMALL(fle) (fle->f.packets <= 4) 95143103Sglebius 96151897SrwatsonMALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); 97135332Sglebius 98146092Sglebiusstatic int export_add(item_p, struct flow_entry *); 99219182Sglebiusstatic int export_send(priv_p, fib_export_p, item_p, int); 100135332Sglebius 101248724Sglebiusstatic int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *, 102248724Sglebius int, uint8_t, uint8_t); 103219229Sbz#ifdef INET6 104248724Sglebiusstatic int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *, 105248724Sglebius int, uint8_t, uint8_t); 106219229Sbz#endif 107219182Sglebius 108248724Sglebiusstatic void expire_flow(priv_p, fib_export_p, struct flow_entry *, int); 109219182Sglebius 110219182Sglebius/* 111219182Sglebius * Generate hash for a given flow record. 112219182Sglebius * 113219182Sglebius * FIB is not used here, because: 114219182Sglebius * most VRFS will carry public IPv4 addresses which are unique even 115219182Sglebius * without FIB private addresses can overlap, but this is worked out 116219182Sglebius * via flow_rec bcmp() containing fib id. In IPv6 world addresses are 117219182Sglebius * all globally unique (it's not fully true, there is FC00::/7 for example, 118219182Sglebius * but chances of address overlap are MUCH smaller) 119219182Sglebius */ 120248724Sglebiusstatic inline uint32_t 121135332Sglebiusip_hash(struct flow_rec *r) 122135332Sglebius{ 123248724Sglebius 124135332Sglebius switch (r->r_ip_p) { 125135332Sglebius case IPPROTO_TCP: 126135332Sglebius case IPPROTO_UDP: 127135332Sglebius return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr, 128135332Sglebius r->r_sport, r->r_dport); 129135332Sglebius default: 130135332Sglebius return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr); 131135332Sglebius } 132135332Sglebius} 133135332Sglebius 134219182Sglebius#ifdef INET6 135219182Sglebius/* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */ 136248724Sglebiusstatic inline uint32_t 137219182Sglebiusip6_hash(struct flow6_rec *r) 138219182Sglebius{ 139248724Sglebius 140219182Sglebius switch (r->r_ip_p) { 141219182Sglebius case IPPROTO_TCP: 142219182Sglebius case IPPROTO_UDP: 143219182Sglebius return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], 144219182Sglebius r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport, 145219182Sglebius r->r_dport); 146219182Sglebius default: 147219182Sglebius return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], 148219182Sglebius r->dst.r_dst6.__u6_addr.__u6_addr32[3]); 149219182Sglebius } 150219182Sglebius} 151300771Sjkim 152300771Sjkimstatic inline int 153300771Sjkimip6_masklen(struct in6_addr *saddr, struct rt_addrinfo *info) 154300771Sjkim{ 155300771Sjkim const int nbits = sizeof(*saddr) * NBBY; 156300771Sjkim int mlen; 157300771Sjkim 158300771Sjkim if (info->rti_addrs & RTA_NETMASK) 159300771Sjkim bit_count((bitstr_t *)saddr, 0, nbits, &mlen); 160300771Sjkim else 161300771Sjkim mlen = nbits; 162300771Sjkim return (mlen); 163300771Sjkim} 164219182Sglebius#endif 165219182Sglebius 166146092Sglebius/* 167146092Sglebius * Detach export datagram from priv, if there is any. 168146092Sglebius * If there is no, allocate a new one. 169146092Sglebius */ 170146092Sglebiusstatic item_p 171219182Sglebiusget_export_dgram(priv_p priv, fib_export_p fe) 172146092Sglebius{ 173146092Sglebius item_p item = NULL; 174146092Sglebius 175219182Sglebius mtx_lock(&fe->export_mtx); 176219182Sglebius if (fe->exp.item != NULL) { 177219182Sglebius item = fe->exp.item; 178219182Sglebius fe->exp.item = NULL; 179135332Sglebius } 180219182Sglebius mtx_unlock(&fe->export_mtx); 181135332Sglebius 182146092Sglebius if (item == NULL) { 183146092Sglebius struct netflow_v5_export_dgram *dgram; 184146092Sglebius struct mbuf *m; 185135332Sglebius 186243882Sglebius m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 187146092Sglebius if (m == NULL) 188146092Sglebius return (NULL); 189146285Sglebius item = ng_package_data(m, NG_NOFLAGS); 190146092Sglebius if (item == NULL) 191146092Sglebius return (NULL); 192146092Sglebius dgram = mtod(m, struct netflow_v5_export_dgram *); 193146092Sglebius dgram->header.count = 0; 194146092Sglebius dgram->header.version = htons(NETFLOW_V5); 195210500Sglebius dgram->header.pad = 0; 196146092Sglebius } 197135332Sglebius 198146092Sglebius return (item); 199135332Sglebius} 200135332Sglebius 201146092Sglebius/* 202146092Sglebius * Re-attach incomplete datagram back to priv. 203146092Sglebius * If there is already another one, then send incomplete. */ 204146092Sglebiusstatic void 205219182Sglebiusreturn_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags) 206135332Sglebius{ 207248724Sglebius 208146092Sglebius /* 209146092Sglebius * It may happen on SMP, that some thread has already 210146092Sglebius * put its item there, in this case we bail out and 211146092Sglebius * send what we have to collector. 212146092Sglebius */ 213219182Sglebius mtx_lock(&fe->export_mtx); 214219182Sglebius if (fe->exp.item == NULL) { 215219182Sglebius fe->exp.item = item; 216219182Sglebius mtx_unlock(&fe->export_mtx); 217146092Sglebius } else { 218219182Sglebius mtx_unlock(&fe->export_mtx); 219219182Sglebius export_send(priv, fe, item, flags); 220146092Sglebius } 221135332Sglebius} 222135332Sglebius 223146092Sglebius/* 224146092Sglebius * The flow is over. Call export_add() and free it. If datagram is 225146092Sglebius * full, then call export_send(). 226146092Sglebius */ 227248724Sglebiusstatic void 228219182Sglebiusexpire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags) 229135332Sglebius{ 230219182Sglebius struct netflow_export_item exp; 231219182Sglebius uint16_t version = fle->f.version; 232219182Sglebius 233219182Sglebius if ((priv->export != NULL) && (version == IPVERSION)) { 234219182Sglebius exp.item = get_export_dgram(priv, fe); 235219182Sglebius if (exp.item == NULL) { 236260169Sglebius priv->nfinfo_export_failed++; 237219182Sglebius if (priv->export9 != NULL) 238260169Sglebius priv->nfinfo_export9_failed++; 239248724Sglebius /* fle definitely contains IPv4 flow. */ 240219182Sglebius uma_zfree_arg(priv->zone, fle, priv); 241219182Sglebius return; 242219182Sglebius } 243219182Sglebius 244219182Sglebius if (export_add(exp.item, fle) > 0) 245219182Sglebius export_send(priv, fe, exp.item, flags); 246219182Sglebius else 247219182Sglebius return_export_dgram(priv, fe, exp.item, NG_QUEUE); 248146092Sglebius } 249219182Sglebius 250219182Sglebius if (priv->export9 != NULL) { 251219182Sglebius exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt); 252219182Sglebius if (exp.item9 == NULL) { 253260169Sglebius priv->nfinfo_export9_failed++; 254219182Sglebius if (version == IPVERSION) 255219182Sglebius uma_zfree_arg(priv->zone, fle, priv); 256219229Sbz#ifdef INET6 257219182Sglebius else if (version == IP6VERSION) 258219182Sglebius uma_zfree_arg(priv->zone6, fle, priv); 259219229Sbz#endif 260219182Sglebius else 261248724Sglebius panic("ng_netflow: Unknown IP proto: %d", 262248724Sglebius version); 263219182Sglebius return; 264219182Sglebius } 265219182Sglebius 266219182Sglebius if (export9_add(exp.item9, exp.item9_opt, fle) > 0) 267219182Sglebius export9_send(priv, fe, exp.item9, exp.item9_opt, flags); 268219182Sglebius else 269248724Sglebius return_export9_dgram(priv, fe, exp.item9, 270248724Sglebius exp.item9_opt, NG_QUEUE); 271146092Sglebius } 272219182Sglebius 273219182Sglebius if (version == IPVERSION) 274219182Sglebius uma_zfree_arg(priv->zone, fle, priv); 275219229Sbz#ifdef INET6 276219182Sglebius else if (version == IP6VERSION) 277219182Sglebius uma_zfree_arg(priv->zone6, fle, priv); 278219229Sbz#endif 279135332Sglebius} 280135332Sglebius 281135332Sglebius/* Get a snapshot of node statistics */ 282135332Sglebiusvoid 283135332Sglebiusng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i) 284135332Sglebius{ 285248724Sglebius 286260169Sglebius i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes); 287260169Sglebius i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets); 288260169Sglebius i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6); 289260169Sglebius i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6); 290260169Sglebius i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes); 291260169Sglebius i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets); 292260169Sglebius i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6); 293260169Sglebius i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6); 294260169Sglebius i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp); 295260169Sglebius i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp); 296260169Sglebius 297260169Sglebius i->nfinfo_used = uma_zone_get_cur(priv->zone); 298260186Sdelphij#ifdef INET6 299260169Sglebius i->nfinfo_used6 = uma_zone_get_cur(priv->zone6); 300260186Sdelphij#endif 301260169Sglebius 302260169Sglebius i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed; 303260169Sglebius i->nfinfo_export_failed = priv->nfinfo_export_failed; 304260169Sglebius i->nfinfo_export9_failed = priv->nfinfo_export9_failed; 305260169Sglebius i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf; 306260169Sglebius i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs; 307260169Sglebius i->nfinfo_inact_t = priv->nfinfo_inact_t; 308260169Sglebius i->nfinfo_act_t = priv->nfinfo_act_t; 309135332Sglebius} 310135332Sglebius 311135332Sglebius/* 312135332Sglebius * Insert a record into defined slot. 313135332Sglebius * 314135332Sglebius * First we get for us a free flow entry, then fill in all 315146092Sglebius * possible fields in it. 316146092Sglebius * 317146092Sglebius * TODO: consider dropping hash mutex while filling in datagram, 318146092Sglebius * as this was done in previous version. Need to test & profile 319146092Sglebius * to be sure. 320135332Sglebius */ 321237227Smelifarostatic int 322219182Sglebiushash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, 323237227Smelifaro int plen, uint8_t flags, uint8_t tcp_flags) 324135332Sglebius{ 325176085Sglebius struct flow_entry *fle; 326293914Smelifaro struct sockaddr_in sin, sin_mask; 327293914Smelifaro struct sockaddr_dl rt_gateway; 328293914Smelifaro struct rt_addrinfo info; 329135332Sglebius 330146092Sglebius mtx_assert(&hsh->mtx, MA_OWNED); 331146092Sglebius 332146092Sglebius fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT); 333146092Sglebius if (fle == NULL) { 334260169Sglebius priv->nfinfo_alloc_failed++; 335135332Sglebius return (ENOMEM); 336146092Sglebius } 337135332Sglebius 338135332Sglebius /* 339135332Sglebius * Now fle is totally ours. It is detached from all lists, 340135332Sglebius * we can safely edit it. 341135332Sglebius */ 342219182Sglebius fle->f.version = IPVERSION; 343135332Sglebius bcopy(r, &fle->f.r, sizeof(struct flow_rec)); 344135332Sglebius fle->f.bytes = plen; 345135332Sglebius fle->f.packets = 1; 346143890Sglebius fle->f.tcp_flags = tcp_flags; 347135332Sglebius 348135332Sglebius fle->f.first = fle->f.last = time_uptime; 349135332Sglebius 350135332Sglebius /* 351135332Sglebius * First we do route table lookup on destination address. So we can 352135332Sglebius * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. 353135332Sglebius */ 354237227Smelifaro if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { 355237227Smelifaro bzero(&sin, sizeof(sin)); 356237227Smelifaro sin.sin_len = sizeof(struct sockaddr_in); 357237227Smelifaro sin.sin_family = AF_INET; 358237227Smelifaro sin.sin_addr = fle->f.r.r_dst; 359135332Sglebius 360293914Smelifaro rt_gateway.sdl_len = sizeof(rt_gateway); 361293914Smelifaro sin_mask.sin_len = sizeof(struct sockaddr_in); 362293914Smelifaro bzero(&info, sizeof(info)); 363293914Smelifaro 364293914Smelifaro info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; 365293914Smelifaro info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask; 366293914Smelifaro 367293914Smelifaro if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, NHR_REF, 0, 368293914Smelifaro &info) == 0) { 369293914Smelifaro fle->f.fle_o_ifx = info.rti_ifp->if_index; 370293914Smelifaro 371293914Smelifaro if (info.rti_flags & RTF_GATEWAY && 372293914Smelifaro rt_gateway.sdl_family == AF_INET) 373237227Smelifaro fle->f.next_hop = 374293914Smelifaro ((struct sockaddr_in *)&rt_gateway)->sin_addr; 375135332Sglebius 376293914Smelifaro if (info.rti_addrs & RTA_NETMASK) 377293914Smelifaro fle->f.dst_mask = bitcount32(sin_mask.sin_addr.s_addr); 378293914Smelifaro else if (info.rti_flags & RTF_HOST) 379237227Smelifaro /* Give up. We can't determine mask :( */ 380237227Smelifaro fle->f.dst_mask = 32; 381135332Sglebius 382293914Smelifaro rib_free_info(&info); 383237227Smelifaro } 384135332Sglebius } 385135332Sglebius 386135332Sglebius /* Do route lookup on source address, to fill in src_mask. */ 387237227Smelifaro if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { 388237227Smelifaro bzero(&sin, sizeof(sin)); 389237227Smelifaro sin.sin_len = sizeof(struct sockaddr_in); 390237227Smelifaro sin.sin_family = AF_INET; 391237227Smelifaro sin.sin_addr = fle->f.r.r_src; 392293914Smelifaro 393293914Smelifaro sin_mask.sin_len = sizeof(struct sockaddr_in); 394293914Smelifaro bzero(&info, sizeof(info)); 395293914Smelifaro 396293914Smelifaro info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask; 397293914Smelifaro 398293914Smelifaro if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, 0, 0, 399293914Smelifaro &info) == 0) { 400293914Smelifaro if (info.rti_addrs & RTA_NETMASK) 401248724Sglebius fle->f.src_mask = 402293914Smelifaro bitcount32(sin_mask.sin_addr.s_addr); 403293914Smelifaro else if (info.rti_flags & RTF_HOST) 404237227Smelifaro /* Give up. We can't determine mask :( */ 405237227Smelifaro fle->f.src_mask = 32; 406237227Smelifaro } 407135332Sglebius } 408135332Sglebius 409146092Sglebius /* Push new flow at the and of hash. */ 410146092Sglebius TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); 411135332Sglebius 412135332Sglebius return (0); 413135332Sglebius} 414135332Sglebius 415219182Sglebius#ifdef INET6 416237227Smelifarostatic int 417223787Sglebiushash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, 418237227Smelifaro int plen, uint8_t flags, uint8_t tcp_flags) 419219182Sglebius{ 420219182Sglebius struct flow6_entry *fle6; 421293914Smelifaro struct sockaddr_in6 sin6, sin6_mask; 422293914Smelifaro struct sockaddr_dl rt_gateway; 423293914Smelifaro struct rt_addrinfo info; 424135332Sglebius 425219182Sglebius mtx_assert(&hsh6->mtx, MA_OWNED); 426219182Sglebius 427219182Sglebius fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT); 428219182Sglebius if (fle6 == NULL) { 429260169Sglebius priv->nfinfo_alloc_failed++; 430219182Sglebius return (ENOMEM); 431219182Sglebius } 432219182Sglebius 433219182Sglebius /* 434219182Sglebius * Now fle is totally ours. It is detached from all lists, 435219182Sglebius * we can safely edit it. 436219182Sglebius */ 437219182Sglebius 438219182Sglebius fle6->f.version = IP6VERSION; 439219182Sglebius bcopy(r, &fle6->f.r, sizeof(struct flow6_rec)); 440219182Sglebius fle6->f.bytes = plen; 441219182Sglebius fle6->f.packets = 1; 442219182Sglebius fle6->f.tcp_flags = tcp_flags; 443219182Sglebius 444219182Sglebius fle6->f.first = fle6->f.last = time_uptime; 445219182Sglebius 446219182Sglebius /* 447219182Sglebius * First we do route table lookup on destination address. So we can 448219182Sglebius * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. 449219182Sglebius */ 450248724Sglebius if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { 451293315Smelifaro bzero(&sin6, sizeof(struct sockaddr_in6)); 452293315Smelifaro sin6.sin6_len = sizeof(struct sockaddr_in6); 453293315Smelifaro sin6.sin6_family = AF_INET6; 454293315Smelifaro sin6.sin6_addr = r->dst.r_dst6; 455219182Sglebius 456293914Smelifaro rt_gateway.sdl_len = sizeof(rt_gateway); 457293914Smelifaro sin6_mask.sin6_len = sizeof(struct sockaddr_in6); 458293914Smelifaro bzero(&info, sizeof(info)); 459219182Sglebius 460293914Smelifaro info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; 461293914Smelifaro info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask; 462219182Sglebius 463293914Smelifaro if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, NHR_REF, 464293914Smelifaro 0, &info) == 0) { 465293914Smelifaro fle6->f.fle_o_ifx = info.rti_ifp->if_index; 466293914Smelifaro 467293914Smelifaro if (info.rti_flags & RTF_GATEWAY && 468293914Smelifaro rt_gateway.sdl_family == AF_INET6) 469237227Smelifaro fle6->f.n.next_hop6 = 470293914Smelifaro ((struct sockaddr_in6 *)&rt_gateway)->sin6_addr; 471219182Sglebius 472300772Sjkim fle6->f.dst_mask = 473300772Sjkim ip6_masklen(&sin6_mask.sin6_addr, &info); 474219182Sglebius 475293914Smelifaro rib_free_info(&info); 476237227Smelifaro } 477219182Sglebius } 478219182Sglebius 479293167Smelifaro if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { 480237227Smelifaro /* Do route lookup on source address, to fill in src_mask. */ 481293315Smelifaro bzero(&sin6, sizeof(struct sockaddr_in6)); 482293315Smelifaro sin6.sin6_len = sizeof(struct sockaddr_in6); 483293315Smelifaro sin6.sin6_family = AF_INET6; 484293315Smelifaro sin6.sin6_addr = r->src.r_src6; 485219182Sglebius 486293914Smelifaro sin6_mask.sin6_len = sizeof(struct sockaddr_in6); 487293914Smelifaro bzero(&info, sizeof(info)); 488219182Sglebius 489293914Smelifaro info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask; 490293914Smelifaro 491293914Smelifaro if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, 0, 0, 492300771Sjkim &info) == 0) 493300772Sjkim fle6->f.src_mask = 494300772Sjkim ip6_masklen(&sin6_mask.sin6_addr, &info); 495219182Sglebius } 496219182Sglebius 497219182Sglebius /* Push new flow at the and of hash. */ 498223787Sglebius TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash); 499219182Sglebius 500219182Sglebius return (0); 501219182Sglebius} 502219182Sglebius#endif 503219182Sglebius 504219182Sglebius 505135332Sglebius/* 506135332Sglebius * Non-static functions called from ng_netflow.c 507135332Sglebius */ 508135332Sglebius 509135332Sglebius/* Allocate memory and set up flow cache */ 510220769Sglebiusvoid 511135332Sglebiusng_netflow_cache_init(priv_p priv) 512135332Sglebius{ 513219182Sglebius struct flow_hash_entry *hsh; 514135332Sglebius int i; 515135332Sglebius 516146092Sglebius /* Initialize cache UMA zone. */ 517248724Sglebius priv->zone = uma_zcreate("NetFlow IPv4 cache", 518260169Sglebius sizeof(struct flow_entry), NULL, NULL, NULL, NULL, 519260169Sglebius UMA_ALIGN_CACHE, 0); 520146092Sglebius uma_zone_set_max(priv->zone, CACHESIZE); 521219182Sglebius#ifdef INET6 522248724Sglebius priv->zone6 = uma_zcreate("NetFlow IPv6 cache", 523260169Sglebius sizeof(struct flow6_entry), NULL, NULL, NULL, NULL, 524260169Sglebius UMA_ALIGN_CACHE, 0); 525219182Sglebius uma_zone_set_max(priv->zone6, CACHESIZE); 526219182Sglebius#endif 527135332Sglebius 528146092Sglebius /* Allocate hash. */ 529184214Sdes priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry), 530146092Sglebius M_NETFLOW_HASH, M_WAITOK | M_ZERO); 531135332Sglebius 532146092Sglebius /* Initialize hash. */ 533146092Sglebius for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { 534146092Sglebius mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); 535146092Sglebius TAILQ_INIT(&hsh->head); 536146092Sglebius } 537135332Sglebius 538219182Sglebius#ifdef INET6 539219182Sglebius /* Allocate hash. */ 540223787Sglebius priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry), 541219182Sglebius M_NETFLOW_HASH, M_WAITOK | M_ZERO); 542135332Sglebius 543219182Sglebius /* Initialize hash. */ 544223787Sglebius for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) { 545223787Sglebius mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); 546223787Sglebius TAILQ_INIT(&hsh->head); 547219182Sglebius } 548219182Sglebius#endif 549219182Sglebius 550260169Sglebius priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK); 551260169Sglebius priv->nfinfo_packets = counter_u64_alloc(M_WAITOK); 552260169Sglebius priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK); 553260169Sglebius priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK); 554260169Sglebius priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK); 555260169Sglebius priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK); 556260169Sglebius priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK); 557260169Sglebius priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK); 558260169Sglebius priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK); 559260169Sglebius priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK); 560260169Sglebius 561219182Sglebius ng_netflow_v9_cache_init(priv); 562219182Sglebius CTR0(KTR_NET, "ng_netflow startup()"); 563135332Sglebius} 564135332Sglebius 565219182Sglebius/* Initialize new FIB table for v5 and v9 */ 566219182Sglebiusint 567219182Sglebiusng_netflow_fib_init(priv_p priv, int fib) 568219182Sglebius{ 569219182Sglebius fib_export_p fe = priv_to_fib(priv, fib); 570219182Sglebius 571219182Sglebius CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib); 572219182Sglebius 573219182Sglebius if (fe != NULL) 574219182Sglebius return (0); 575219182Sglebius 576248724Sglebius if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH, 577248724Sglebius M_NOWAIT | M_ZERO)) == NULL) 578248725Sglebius return (ENOMEM); 579219182Sglebius 580219182Sglebius mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF); 581219182Sglebius mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF); 582219182Sglebius fe->fib = fib; 583219182Sglebius fe->domain_id = fib; 584219182Sglebius 585248724Sglebius if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib], 586248724Sglebius (uintptr_t)NULL, (uintptr_t)fe) == 0) { 587219182Sglebius /* FIB already set up by other ISR */ 588248724Sglebius CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p", 589248724Sglebius fib, fe, priv_to_fib(priv, fib)); 590219182Sglebius mtx_destroy(&fe->export_mtx); 591219182Sglebius mtx_destroy(&fe->export9_mtx); 592219182Sglebius free(fe, M_NETGRAPH); 593219182Sglebius } else { 594219182Sglebius /* Increase counter for statistics */ 595248724Sglebius CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)", 596248724Sglebius fib, fe, priv_to_fib(priv, fib)); 597260169Sglebius priv->nfinfo_alloc_fibs++; 598219182Sglebius } 599219182Sglebius 600219182Sglebius return (0); 601219182Sglebius} 602219182Sglebius 603135332Sglebius/* Free all flow cache memory. Called from node close method. */ 604135332Sglebiusvoid 605135332Sglebiusng_netflow_cache_flush(priv_p priv) 606135332Sglebius{ 607146092Sglebius struct flow_entry *fle, *fle1; 608146092Sglebius struct flow_hash_entry *hsh; 609219182Sglebius struct netflow_export_item exp; 610219182Sglebius fib_export_p fe; 611135332Sglebius int i; 612135332Sglebius 613219182Sglebius bzero(&exp, sizeof(exp)); 614219182Sglebius 615135332Sglebius /* 616135332Sglebius * We are going to free probably billable data. 617135332Sglebius * Expire everything before freeing it. 618135332Sglebius * No locking is required since callout is already drained. 619135332Sglebius */ 620146092Sglebius for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) 621146092Sglebius TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { 622146092Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 623219182Sglebius fe = priv_to_fib(priv, fle->f.r.fib); 624219182Sglebius expire_flow(priv, fe, fle, NG_QUEUE); 625146092Sglebius } 626219182Sglebius#ifdef INET6 627223787Sglebius for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) 628223787Sglebius TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { 629223787Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 630223787Sglebius fe = priv_to_fib(priv, fle->f.r.fib); 631223787Sglebius expire_flow(priv, fe, fle, NG_QUEUE); 632219182Sglebius } 633219182Sglebius#endif 634135332Sglebius 635146092Sglebius uma_zdestroy(priv->zone); 636146092Sglebius /* Destroy hash mutexes. */ 637146092Sglebius for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) 638146092Sglebius mtx_destroy(&hsh->mtx); 639146092Sglebius 640146092Sglebius /* Free hash memory. */ 641219182Sglebius if (priv->hash != NULL) 642184205Sdes free(priv->hash, M_NETFLOW_HASH); 643219182Sglebius#ifdef INET6 644219182Sglebius uma_zdestroy(priv->zone6); 645219182Sglebius /* Destroy hash mutexes. */ 646223787Sglebius for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) 647223787Sglebius mtx_destroy(&hsh->mtx); 648135332Sglebius 649219182Sglebius /* Free hash memory. */ 650219182Sglebius if (priv->hash6 != NULL) 651219182Sglebius free(priv->hash6, M_NETFLOW_HASH); 652219182Sglebius#endif 653219182Sglebius 654232921Smelifaro for (i = 0; i < priv->maxfibs; i++) { 655219182Sglebius if ((fe = priv_to_fib(priv, i)) == NULL) 656219182Sglebius continue; 657219182Sglebius 658219182Sglebius if (fe->exp.item != NULL) 659219182Sglebius export_send(priv, fe, fe->exp.item, NG_QUEUE); 660219182Sglebius 661219182Sglebius if (fe->exp.item9 != NULL) 662248724Sglebius export9_send(priv, fe, fe->exp.item9, 663248724Sglebius fe->exp.item9_opt, NG_QUEUE); 664219182Sglebius 665219182Sglebius mtx_destroy(&fe->export_mtx); 666219182Sglebius mtx_destroy(&fe->export9_mtx); 667219182Sglebius free(fe, M_NETGRAPH); 668219182Sglebius } 669219182Sglebius 670260169Sglebius counter_u64_free(priv->nfinfo_bytes); 671260169Sglebius counter_u64_free(priv->nfinfo_packets); 672260169Sglebius counter_u64_free(priv->nfinfo_bytes6); 673260169Sglebius counter_u64_free(priv->nfinfo_packets6); 674260169Sglebius counter_u64_free(priv->nfinfo_sbytes); 675260169Sglebius counter_u64_free(priv->nfinfo_spackets); 676260169Sglebius counter_u64_free(priv->nfinfo_sbytes6); 677260169Sglebius counter_u64_free(priv->nfinfo_spackets6); 678260169Sglebius counter_u64_free(priv->nfinfo_act_exp); 679260169Sglebius counter_u64_free(priv->nfinfo_inact_exp); 680260169Sglebius 681219182Sglebius ng_netflow_v9_cache_flush(priv); 682135332Sglebius} 683135332Sglebius 684146092Sglebius/* Insert packet from into flow cache. */ 685135332Sglebiusint 686248724Sglebiusng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip, 687248724Sglebius caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, 688248724Sglebius unsigned int src_if_index) 689135332Sglebius{ 690248724Sglebius struct flow_entry *fle, *fle1; 691219182Sglebius struct flow_hash_entry *hsh; 692135332Sglebius struct flow_rec r; 693143923Sglebius int hlen, plen; 694146092Sglebius int error = 0; 695248724Sglebius uint16_t eproto; 696135332Sglebius uint8_t tcp_flags = 0; 697135332Sglebius 698135332Sglebius bzero(&r, sizeof(r)); 699248724Sglebius 700143923Sglebius if (ip->ip_v != IPVERSION) 701143923Sglebius return (EINVAL); 702135332Sglebius 703143923Sglebius hlen = ip->ip_hl << 2; 704143923Sglebius if (hlen < sizeof(struct ip)) 705143923Sglebius return (EINVAL); 706143923Sglebius 707219182Sglebius eproto = ETHERTYPE_IP; 708219182Sglebius /* Assume L4 template by default */ 709219182Sglebius r.flow_type = NETFLOW_V9_FLOW_V4_L4; 710219182Sglebius 711143923Sglebius r.r_src = ip->ip_src; 712143923Sglebius r.r_dst = ip->ip_dst; 713219182Sglebius r.fib = fe->fib; 714143923Sglebius 715143923Sglebius plen = ntohs(ip->ip_len); 716143923Sglebius 717143923Sglebius r.r_ip_p = ip->ip_p; 718143923Sglebius r.r_tos = ip->ip_tos; 719143923Sglebius 720183693Smav r.r_i_ifx = src_if_index; 721143923Sglebius 722143923Sglebius /* 723143923Sglebius * XXX NOTE: only first fragment of fragmented TCP, UDP and 724143923Sglebius * ICMP packet will be recorded with proper s_port and d_port. 725143923Sglebius * Following fragments will be recorded simply as IP packet with 726143923Sglebius * ip_proto = ip->ip_p and s_port, d_port set to zero. 727143923Sglebius * I know, it looks like bug. But I don't want to re-implement 728143923Sglebius * ip packet assebmling here. Anyway, (in)famous trafd works this way - 729143923Sglebius * and nobody complains yet :) 730143923Sglebius */ 731144901Sglebius if ((ip->ip_off & htons(IP_OFFMASK)) == 0) 732144901Sglebius switch(r.r_ip_p) { 733144901Sglebius case IPPROTO_TCP: 734248724Sglebius { 735248724Sglebius struct tcphdr *tcp; 736143923Sglebius 737144901Sglebius tcp = (struct tcphdr *)((caddr_t )ip + hlen); 738144901Sglebius r.r_sport = tcp->th_sport; 739144901Sglebius r.r_dport = tcp->th_dport; 740144901Sglebius tcp_flags = tcp->th_flags; 741144901Sglebius break; 742248724Sglebius } 743248724Sglebius case IPPROTO_UDP: 744144901Sglebius r.r_ports = *(uint32_t *)((caddr_t )ip + hlen); 745144901Sglebius break; 746144901Sglebius } 747143923Sglebius 748260169Sglebius counter_u64_add(priv->nfinfo_packets, 1); 749260169Sglebius counter_u64_add(priv->nfinfo_bytes, plen); 750139374Sglebius 751146092Sglebius /* Find hash slot. */ 752146092Sglebius hsh = &priv->hash[ip_hash(&r)]; 753135332Sglebius 754146092Sglebius mtx_lock(&hsh->mtx); 755135332Sglebius 756146092Sglebius /* 757146092Sglebius * Go through hash and find our entry. If we encounter an 758146092Sglebius * entry, that should be expired, purge it. We do a reverse 759146092Sglebius * search since most active entries are first, and most 760146092Sglebius * searches are done on most active entries. 761146092Sglebius */ 762146092Sglebius TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { 763146092Sglebius if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0) 764146092Sglebius break; 765146092Sglebius if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { 766146092Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 767248724Sglebius expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), 768248724Sglebius fle, NG_QUEUE); 769260169Sglebius counter_u64_add(priv->nfinfo_act_exp, 1); 770146092Sglebius } 771146092Sglebius } 772135332Sglebius 773146092Sglebius if (fle) { /* An existent entry. */ 774146092Sglebius 775135332Sglebius fle->f.bytes += plen; 776135332Sglebius fle->f.packets ++; 777135332Sglebius fle->f.tcp_flags |= tcp_flags; 778135332Sglebius fle->f.last = time_uptime; 779135332Sglebius 780135332Sglebius /* 781135332Sglebius * We have the following reasons to expire flow in active way: 782135332Sglebius * - it hit active timeout 783135332Sglebius * - a TCP connection closed 784135332Sglebius * - it is going to overflow counter 785135332Sglebius */ 786135332Sglebius if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || 787219182Sglebius (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { 788146092Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 789248724Sglebius expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), 790248724Sglebius fle, NG_QUEUE); 791260169Sglebius counter_u64_add(priv->nfinfo_act_exp, 1); 792146092Sglebius } else { 793146092Sglebius /* 794146092Sglebius * It is the newest, move it to the tail, 795146092Sglebius * if it isn't there already. Next search will 796146092Sglebius * locate it quicker. 797146092Sglebius */ 798146092Sglebius if (fle != TAILQ_LAST(&hsh->head, fhead)) { 799146092Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 800146092Sglebius TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); 801146092Sglebius } 802146092Sglebius } 803146092Sglebius } else /* A new flow entry. */ 804237227Smelifaro error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags); 805135332Sglebius 806146092Sglebius mtx_unlock(&hsh->mtx); 807135332Sglebius 808219182Sglebius return (error); 809219182Sglebius} 810135332Sglebius 811219182Sglebius#ifdef INET6 812219182Sglebius/* Insert IPv6 packet from into flow cache. */ 813219182Sglebiusint 814248724Sglebiusng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6, 815248724Sglebius caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, 816248724Sglebius unsigned int src_if_index) 817219182Sglebius{ 818248724Sglebius struct flow_entry *fle = NULL, *fle1; 819248724Sglebius struct flow6_entry *fle6; 820248724Sglebius struct flow_hash_entry *hsh; 821248724Sglebius struct flow6_rec r; 822219182Sglebius int plen; 823219182Sglebius int error = 0; 824219182Sglebius uint8_t tcp_flags = 0; 825219182Sglebius 826219182Sglebius /* check version */ 827219182Sglebius if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) 828219182Sglebius return (EINVAL); 829219182Sglebius 830219182Sglebius bzero(&r, sizeof(r)); 831219182Sglebius 832219182Sglebius r.src.r_src6 = ip6->ip6_src; 833219182Sglebius r.dst.r_dst6 = ip6->ip6_dst; 834219182Sglebius r.fib = fe->fib; 835219182Sglebius 836219182Sglebius /* Assume L4 template by default */ 837219182Sglebius r.flow_type = NETFLOW_V9_FLOW_V6_L4; 838219182Sglebius 839219182Sglebius plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); 840219182Sglebius 841248724Sglebius#if 0 842219182Sglebius /* XXX: set DSCP/CoS value */ 843219182Sglebius r.r_tos = ip->ip_tos; 844219182Sglebius#endif 845237227Smelifaro if ((flags & NG_NETFLOW_IS_FRAG) == 0) { 846219182Sglebius switch(upper_proto) { 847219182Sglebius case IPPROTO_TCP: 848248724Sglebius { 849248724Sglebius struct tcphdr *tcp; 850219182Sglebius 851219182Sglebius tcp = (struct tcphdr *)upper_ptr; 852219182Sglebius r.r_ports = *(uint32_t *)upper_ptr; 853219182Sglebius tcp_flags = tcp->th_flags; 854219182Sglebius break; 855248724Sglebius } 856219182Sglebius case IPPROTO_UDP: 857219182Sglebius case IPPROTO_SCTP: 858219182Sglebius r.r_ports = *(uint32_t *)upper_ptr; 859219182Sglebius break; 860219182Sglebius } 861219182Sglebius } 862219182Sglebius 863219182Sglebius r.r_ip_p = upper_proto; 864219182Sglebius r.r_i_ifx = src_if_index; 865219182Sglebius 866260169Sglebius counter_u64_add(priv->nfinfo_packets6, 1); 867260169Sglebius counter_u64_add(priv->nfinfo_bytes6, plen); 868219182Sglebius 869219182Sglebius /* Find hash slot. */ 870223787Sglebius hsh = &priv->hash6[ip6_hash(&r)]; 871219182Sglebius 872223787Sglebius mtx_lock(&hsh->mtx); 873219182Sglebius 874219182Sglebius /* 875219182Sglebius * Go through hash and find our entry. If we encounter an 876219182Sglebius * entry, that should be expired, purge it. We do a reverse 877219182Sglebius * search since most active entries are first, and most 878219182Sglebius * searches are done on most active entries. 879219182Sglebius */ 880223787Sglebius TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { 881223787Sglebius if (fle->f.version != IP6VERSION) 882219182Sglebius continue; 883223787Sglebius fle6 = (struct flow6_entry *)fle; 884219182Sglebius if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0) 885219182Sglebius break; 886219182Sglebius if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) { 887223787Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 888223787Sglebius expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, 889223787Sglebius NG_QUEUE); 890260169Sglebius counter_u64_add(priv->nfinfo_act_exp, 1); 891219182Sglebius } 892219182Sglebius } 893219182Sglebius 894223787Sglebius if (fle != NULL) { /* An existent entry. */ 895223787Sglebius fle6 = (struct flow6_entry *)fle; 896219182Sglebius 897219182Sglebius fle6->f.bytes += plen; 898219182Sglebius fle6->f.packets ++; 899219182Sglebius fle6->f.tcp_flags |= tcp_flags; 900219182Sglebius fle6->f.last = time_uptime; 901219182Sglebius 902219182Sglebius /* 903219182Sglebius * We have the following reasons to expire flow in active way: 904219182Sglebius * - it hit active timeout 905219182Sglebius * - a TCP connection closed 906219182Sglebius * - it is going to overflow counter 907219182Sglebius */ 908219182Sglebius if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) || 909219182Sglebius (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { 910223787Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 911223787Sglebius expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, 912223787Sglebius NG_QUEUE); 913260169Sglebius counter_u64_add(priv->nfinfo_act_exp, 1); 914219182Sglebius } else { 915219182Sglebius /* 916219182Sglebius * It is the newest, move it to the tail, 917219182Sglebius * if it isn't there already. Next search will 918219182Sglebius * locate it quicker. 919219182Sglebius */ 920223787Sglebius if (fle != TAILQ_LAST(&hsh->head, fhead)) { 921223787Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 922223787Sglebius TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); 923219182Sglebius } 924219182Sglebius } 925219182Sglebius } else /* A new flow entry. */ 926237227Smelifaro error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags); 927219182Sglebius 928223787Sglebius mtx_unlock(&hsh->mtx); 929219182Sglebius 930146092Sglebius return (error); 931135332Sglebius} 932219182Sglebius#endif 933135332Sglebius 934135332Sglebius/* 935146092Sglebius * Return records from cache to userland. 936135332Sglebius * 937135332Sglebius * TODO: matching particular IP should be done in kernel, here. 938135332Sglebius */ 939135332Sglebiusint 940223787Sglebiusng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req, 941223787Sglebiusstruct ngnf_show_header *resp) 942135332Sglebius{ 943219182Sglebius struct flow_hash_entry *hsh; 944219182Sglebius struct flow_entry *fle; 945223787Sglebius struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1); 946223787Sglebius#ifdef INET6 947223787Sglebius struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1); 948223787Sglebius#endif 949223787Sglebius int i, max; 950135332Sglebius 951223787Sglebius i = req->hash_id; 952223787Sglebius if (i > NBUCKETS-1) 953223787Sglebius return (EINVAL); 954135332Sglebius 955223787Sglebius#ifdef INET6 956223787Sglebius if (req->version == 6) { 957223787Sglebius resp->version = 6; 958223787Sglebius hsh = priv->hash6 + i; 959223787Sglebius max = NREC6_AT_ONCE; 960223787Sglebius } else 961223787Sglebius#endif 962223787Sglebius if (req->version == 4) { 963223787Sglebius resp->version = 4; 964223787Sglebius hsh = priv->hash + i; 965223787Sglebius max = NREC_AT_ONCE; 966223787Sglebius } else 967223787Sglebius return (EINVAL); 968135332Sglebius 969135332Sglebius /* 970135332Sglebius * We will transfer not more than NREC_AT_ONCE. More data 971135332Sglebius * will come in next message. 972223787Sglebius * We send current hash index and current record number in list 973223787Sglebius * to userland, and userland should return it back to us. 974223787Sglebius * Then, we will restart with new entry. 975146092Sglebius * 976223787Sglebius * The resulting cache snapshot can be inaccurate if flow expiration 977223787Sglebius * is taking place on hash item between userland data requests for 978223787Sglebius * this hash item id. 979135332Sglebius */ 980223787Sglebius resp->nentries = 0; 981146092Sglebius for (; i < NBUCKETS; hsh++, i++) { 982223787Sglebius int list_id; 983146092Sglebius 984223787Sglebius if (mtx_trylock(&hsh->mtx) == 0) { 985223787Sglebius /* 986223787Sglebius * Requested hash index is not available, 987223787Sglebius * relay decision to skip or re-request data 988223787Sglebius * to userland. 989223787Sglebius */ 990223787Sglebius resp->hash_id = i; 991223787Sglebius resp->list_id = 0; 992223787Sglebius return (0); 993223787Sglebius } 994223787Sglebius 995223787Sglebius list_id = 0; 996146092Sglebius TAILQ_FOREACH(fle, &hsh->head, fle_hash) { 997223787Sglebius if (hsh->mtx.mtx_lock & MTX_CONTESTED) { 998223787Sglebius resp->hash_id = i; 999223787Sglebius resp->list_id = list_id; 1000223822Sglebius mtx_unlock(&hsh->mtx); 1001223787Sglebius return (0); 1002223787Sglebius } 1003146092Sglebius 1004223787Sglebius list_id++; 1005223787Sglebius /* Search for particular record in list. */ 1006223787Sglebius if (req->list_id > 0) { 1007223787Sglebius if (list_id < req->list_id) 1008223787Sglebius continue; 1009223787Sglebius 1010223787Sglebius /* Requested list position found. */ 1011223787Sglebius req->list_id = 0; 1012223787Sglebius } 1013223787Sglebius#ifdef INET6 1014223787Sglebius if (req->version == 6) { 1015223787Sglebius struct flow6_entry *fle6; 1016223787Sglebius 1017223787Sglebius fle6 = (struct flow6_entry *)fle; 1018223787Sglebius bcopy(&fle6->f, data6 + resp->nentries, 1019223787Sglebius sizeof(fle6->f)); 1020223787Sglebius } else 1021223787Sglebius#endif 1022223787Sglebius bcopy(&fle->f, data + resp->nentries, 1023223787Sglebius sizeof(fle->f)); 1024223787Sglebius resp->nentries++; 1025223787Sglebius if (resp->nentries == max) { 1026223787Sglebius resp->hash_id = i; 1027223787Sglebius /* 1028223787Sglebius * If it was the last item in list 1029223787Sglebius * we simply skip to next hash_id. 1030223787Sglebius */ 1031223787Sglebius resp->list_id = list_id + 1; 1032223822Sglebius mtx_unlock(&hsh->mtx); 1033146092Sglebius return (0); 1034146092Sglebius } 1035135332Sglebius } 1036146092Sglebius mtx_unlock(&hsh->mtx); 1037146092Sglebius } 1038135332Sglebius 1039223787Sglebius resp->hash_id = resp->list_id = 0; 1040223787Sglebius 1041135332Sglebius return (0); 1042135332Sglebius} 1043135332Sglebius 1044135332Sglebius/* We have full datagram in privdata. Send it to export hook. */ 1045135332Sglebiusstatic int 1046219182Sglebiusexport_send(priv_p priv, fib_export_p fe, item_p item, int flags) 1047135332Sglebius{ 1048146092Sglebius struct mbuf *m = NGI_M(item); 1049146092Sglebius struct netflow_v5_export_dgram *dgram = mtod(m, 1050146092Sglebius struct netflow_v5_export_dgram *); 1051146092Sglebius struct netflow_v5_header *header = &dgram->header; 1052135332Sglebius struct timespec ts; 1053135332Sglebius int error = 0; 1054135332Sglebius 1055146092Sglebius /* Fill mbuf header. */ 1056146092Sglebius m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) * 1057146092Sglebius header->count + sizeof(struct netflow_v5_header); 1058146092Sglebius 1059146092Sglebius /* Fill export header. */ 1060143103Sglebius header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); 1061135332Sglebius getnanotime(&ts); 1062135332Sglebius header->unix_secs = htonl(ts.tv_sec); 1063135332Sglebius header->unix_nsecs = htonl(ts.tv_nsec); 1064158027Smaxim header->engine_type = 0; 1065219182Sglebius header->engine_id = fe->domain_id; 1066158027Smaxim header->pad = 0; 1067219182Sglebius header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq, 1068152847Sglebius header->count)); 1069135332Sglebius header->count = htons(header->count); 1070135332Sglebius 1071146092Sglebius if (priv->export != NULL) 1072154277Sglebius NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags); 1073175718Smav else 1074175718Smav NG_FREE_ITEM(item); 1075135332Sglebius 1076135332Sglebius return (error); 1077135332Sglebius} 1078135332Sglebius 1079135332Sglebius 1080146092Sglebius/* Add export record to dgram. */ 1081135332Sglebiusstatic int 1082146092Sglebiusexport_add(item_p item, struct flow_entry *fle) 1083135332Sglebius{ 1084146092Sglebius struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item), 1085146092Sglebius struct netflow_v5_export_dgram *); 1086146092Sglebius struct netflow_v5_header *header = &dgram->header; 1087135332Sglebius struct netflow_v5_record *rec; 1088135332Sglebius 1089175717Smav rec = &dgram->r[header->count]; 1090175717Smav header->count ++; 1091135332Sglebius 1092146092Sglebius KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS, 1093146092Sglebius ("ng_netflow: export too big")); 1094146092Sglebius 1095146092Sglebius /* Fill in export record. */ 1096135332Sglebius rec->src_addr = fle->f.r.r_src.s_addr; 1097135332Sglebius rec->dst_addr = fle->f.r.r_dst.s_addr; 1098135332Sglebius rec->next_hop = fle->f.next_hop.s_addr; 1099135332Sglebius rec->i_ifx = htons(fle->f.fle_i_ifx); 1100135332Sglebius rec->o_ifx = htons(fle->f.fle_o_ifx); 1101135332Sglebius rec->packets = htonl(fle->f.packets); 1102135332Sglebius rec->octets = htonl(fle->f.bytes); 1103143103Sglebius rec->first = htonl(MILLIUPTIME(fle->f.first)); 1104143103Sglebius rec->last = htonl(MILLIUPTIME(fle->f.last)); 1105135332Sglebius rec->s_port = fle->f.r.r_sport; 1106135332Sglebius rec->d_port = fle->f.r.r_dport; 1107135332Sglebius rec->flags = fle->f.tcp_flags; 1108135332Sglebius rec->prot = fle->f.r.r_ip_p; 1109135332Sglebius rec->tos = fle->f.r.r_tos; 1110135332Sglebius rec->dst_mask = fle->f.dst_mask; 1111135332Sglebius rec->src_mask = fle->f.src_mask; 1112210500Sglebius rec->pad1 = 0; 1113210500Sglebius rec->pad2 = 0; 1114135332Sglebius 1115146092Sglebius /* Not supported fields. */ 1116146092Sglebius rec->src_as = rec->dst_as = 0; 1117135332Sglebius 1118146092Sglebius if (header->count == NETFLOW_V5_MAX_RECORDS) 1119146092Sglebius return (1); /* end of datagram */ 1120146092Sglebius else 1121146092Sglebius return (0); 1122135332Sglebius} 1123135332Sglebius 1124135332Sglebius/* Periodic flow expiry run. */ 1125135332Sglebiusvoid 1126135332Sglebiusng_netflow_expire(void *arg) 1127135332Sglebius{ 1128146092Sglebius struct flow_entry *fle, *fle1; 1129146092Sglebius struct flow_hash_entry *hsh; 1130146092Sglebius priv_p priv = (priv_p )arg; 1131260169Sglebius int used, i; 1132135332Sglebius 1133146092Sglebius /* 1134146092Sglebius * Going through all the cache. 1135146092Sglebius */ 1136260169Sglebius used = uma_zone_get_cur(priv->zone); 1137146092Sglebius for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) { 1138135332Sglebius /* 1139146092Sglebius * Skip entries, that are already being worked on. 1140135332Sglebius */ 1141146092Sglebius if (mtx_trylock(&hsh->mtx) == 0) 1142146092Sglebius continue; 1143135332Sglebius 1144146092Sglebius TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { 1145146092Sglebius /* 1146146092Sglebius * Interrupt thread wants this entry! 1147146092Sglebius * Quick! Quick! Bail out! 1148146092Sglebius */ 1149146092Sglebius if (hsh->mtx.mtx_lock & MTX_CONTESTED) 1150146092Sglebius break; 1151135332Sglebius 1152135332Sglebius /* 1153146092Sglebius * Don't expire aggressively while hash collision 1154146092Sglebius * ratio is predicted small. 1155135332Sglebius */ 1156146092Sglebius if (used <= (NBUCKETS*2) && !INACTIVE(fle)) 1157146092Sglebius break; 1158135332Sglebius 1159163239Sglebius if ((INACTIVE(fle) && (SMALL(fle) || 1160163240Sglebius (used > (NBUCKETS*2)))) || AGED(fle)) { 1161146092Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 1162248724Sglebius expire_flow(priv, priv_to_fib(priv, 1163248724Sglebius fle->f.r.fib), fle, NG_NOFLAGS); 1164146092Sglebius used--; 1165260169Sglebius counter_u64_add(priv->nfinfo_inact_exp, 1); 1166146092Sglebius } 1167135332Sglebius } 1168146092Sglebius mtx_unlock(&hsh->mtx); 1169146092Sglebius } 1170135332Sglebius 1171219182Sglebius#ifdef INET6 1172260169Sglebius used = uma_zone_get_cur(priv->zone6); 1173223787Sglebius for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) { 1174223787Sglebius struct flow6_entry *fle6; 1175223787Sglebius 1176219182Sglebius /* 1177219182Sglebius * Skip entries, that are already being worked on. 1178219182Sglebius */ 1179223787Sglebius if (mtx_trylock(&hsh->mtx) == 0) 1180219182Sglebius continue; 1181135332Sglebius 1182223787Sglebius TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { 1183223787Sglebius fle6 = (struct flow6_entry *)fle; 1184219182Sglebius /* 1185219182Sglebius * Interrupt thread wants this entry! 1186219182Sglebius * Quick! Quick! Bail out! 1187219182Sglebius */ 1188223787Sglebius if (hsh->mtx.mtx_lock & MTX_CONTESTED) 1189219182Sglebius break; 1190219182Sglebius 1191219182Sglebius /* 1192219182Sglebius * Don't expire aggressively while hash collision 1193219182Sglebius * ratio is predicted small. 1194219182Sglebius */ 1195219182Sglebius if (used <= (NBUCKETS*2) && !INACTIVE(fle6)) 1196219182Sglebius break; 1197219182Sglebius 1198219182Sglebius if ((INACTIVE(fle6) && (SMALL(fle6) || 1199219182Sglebius (used > (NBUCKETS*2)))) || AGED(fle6)) { 1200223787Sglebius TAILQ_REMOVE(&hsh->head, fle, fle_hash); 1201223787Sglebius expire_flow(priv, priv_to_fib(priv, 1202223787Sglebius fle->f.r.fib), fle, NG_NOFLAGS); 1203219182Sglebius used--; 1204260169Sglebius counter_u64_add(priv->nfinfo_inact_exp, 1); 1205219182Sglebius } 1206219182Sglebius } 1207223787Sglebius mtx_unlock(&hsh->mtx); 1208219182Sglebius } 1209219182Sglebius#endif 1210219182Sglebius 1211146092Sglebius /* Schedule next expire. */ 1212135332Sglebius callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, 1213135332Sglebius (void *)priv); 1214135332Sglebius} 1215