1135332Sglebius/*-
2219182Sglebius * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru>
3143923Sglebius * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
4135332Sglebius * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
5135332Sglebius * All rights reserved.
6135332Sglebius *
7135332Sglebius * Redistribution and use in source and binary forms, with or without
8135332Sglebius * modification, are permitted provided that the following conditions
9135332Sglebius * are met:
10135332Sglebius * 1. Redistributions of source code must retain the above copyright
11135332Sglebius *    notice, this list of conditions and the following disclaimer.
12135332Sglebius * 2. Redistributions in binary form must reproduce the above copyright
13135332Sglebius *    notice, this list of conditions and the following disclaimer in the
14135332Sglebius *    documentation and/or other materials provided with the distribution.
15135332Sglebius *
16135332Sglebius * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17135332Sglebius * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18135332Sglebius * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19135332Sglebius * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20135332Sglebius * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21135332Sglebius * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22135332Sglebius * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23135332Sglebius * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24135332Sglebius * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25135332Sglebius * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26135332Sglebius * SUCH DAMAGE.
27135332Sglebius *
28135332Sglebius * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
29135332Sglebius */
30135332Sglebius
31260048Sdim#include <sys/cdefs.h>
32260048Sdim__FBSDID("$FreeBSD$");
33135332Sglebius
34219182Sglebius#include "opt_inet6.h"
35219182Sglebius#include "opt_route.h"
36135332Sglebius#include <sys/param.h>
37300771Sjkim#include <sys/bitstring.h>
38260169Sglebius#include <sys/systm.h>
39260169Sglebius#include <sys/counter.h>
40135332Sglebius#include <sys/kernel.h>
41293470Smelifaro#include <sys/ktr.h>
42135332Sglebius#include <sys/limits.h>
43135332Sglebius#include <sys/mbuf.h>
44140511Sglebius#include <sys/syslog.h>
45135332Sglebius#include <sys/socket.h>
46295126Sglebius#include <vm/uma.h>
47135332Sglebius
48135332Sglebius#include <net/if.h>
49293914Smelifaro#include <net/if_dl.h>
50257176Sglebius#include <net/if_var.h>
51135332Sglebius#include <net/route.h>
52219182Sglebius#include <net/ethernet.h>
53135332Sglebius#include <netinet/in.h>
54135332Sglebius#include <netinet/in_systm.h>
55135332Sglebius#include <netinet/ip.h>
56219182Sglebius#include <netinet/ip6.h>
57135332Sglebius#include <netinet/tcp.h>
58135332Sglebius#include <netinet/udp.h>
59135332Sglebius
60135332Sglebius#include <netgraph/ng_message.h>
61135332Sglebius#include <netgraph/netgraph.h>
62135332Sglebius
63135332Sglebius#include <netgraph/netflow/netflow.h>
64219182Sglebius#include <netgraph/netflow/netflow_v9.h>
65135332Sglebius#include <netgraph/netflow/ng_netflow.h>
66135332Sglebius
67146092Sglebius#define	NBUCKETS	(65536)		/* must be power of 2 */
68135332Sglebius
69163238Sglebius/* This hash is for TCP or UDP packets. */
70163238Sglebius#define FULL_HASH(addr1, addr2, port1, port2)	\
71163238Sglebius	(((addr1 ^ (addr1 >> 16) ^ 		\
72163238Sglebius	htons(addr2 ^ (addr2 >> 16))) ^ 	\
73163241Sglebius	port1 ^ htons(port2)) &			\
74163238Sglebius	(NBUCKETS - 1))
75135332Sglebius
76163238Sglebius/* This hash is for all other IP packets. */
77163238Sglebius#define ADDR_HASH(addr1, addr2)			\
78163238Sglebius	((addr1 ^ (addr1 >> 16) ^ 		\
79163238Sglebius	htons(addr2 ^ (addr2 >> 16))) &		\
80163238Sglebius	(NBUCKETS - 1))
81135332Sglebius
82135332Sglebius/* Macros to shorten logical constructions */
83135332Sglebius/* XXX: priv must exist in namespace */
84260169Sglebius#define	INACTIVE(fle)	(time_uptime - fle->f.last > priv->nfinfo_inact_t)
85260169Sglebius#define	AGED(fle)	(time_uptime - fle->f.first > priv->nfinfo_act_t)
86135332Sglebius#define	ISFREE(fle)	(fle->f.packets == 0)
87135332Sglebius
88135332Sglebius/*
89135332Sglebius * 4 is a magical number: statistically number of 4-packet flows is
90135332Sglebius * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
91135332Sglebius * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
92135332Sglebius * of reachable host and 4-packet otherwise.
93135332Sglebius */
94135332Sglebius#define	SMALL(fle)	(fle->f.packets <= 4)
95143103Sglebius
96151897SrwatsonMALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
97135332Sglebius
98146092Sglebiusstatic int export_add(item_p, struct flow_entry *);
99219182Sglebiusstatic int export_send(priv_p, fib_export_p, item_p, int);
100135332Sglebius
101248724Sglebiusstatic int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *,
102248724Sglebius    int, uint8_t, uint8_t);
103219229Sbz#ifdef INET6
104248724Sglebiusstatic int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *,
105248724Sglebius    int, uint8_t, uint8_t);
106219229Sbz#endif
107219182Sglebius
108248724Sglebiusstatic void expire_flow(priv_p, fib_export_p, struct flow_entry *, int);
109219182Sglebius
110219182Sglebius/*
111219182Sglebius * Generate hash for a given flow record.
112219182Sglebius *
113219182Sglebius * FIB is not used here, because:
114219182Sglebius * most VRFS will carry public IPv4 addresses which are unique even
115219182Sglebius * without FIB private addresses can overlap, but this is worked out
116219182Sglebius * via flow_rec bcmp() containing fib id. In IPv6 world addresses are
117219182Sglebius * all globally unique (it's not fully true, there is FC00::/7 for example,
118219182Sglebius * but chances of address overlap are MUCH smaller)
119219182Sglebius */
120248724Sglebiusstatic inline uint32_t
121135332Sglebiusip_hash(struct flow_rec *r)
122135332Sglebius{
123248724Sglebius
124135332Sglebius	switch (r->r_ip_p) {
125135332Sglebius	case IPPROTO_TCP:
126135332Sglebius	case IPPROTO_UDP:
127135332Sglebius		return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
128135332Sglebius		    r->r_sport, r->r_dport);
129135332Sglebius	default:
130135332Sglebius		return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
131135332Sglebius	}
132135332Sglebius}
133135332Sglebius
134219182Sglebius#ifdef INET6
135219182Sglebius/* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */
136248724Sglebiusstatic inline uint32_t
137219182Sglebiusip6_hash(struct flow6_rec *r)
138219182Sglebius{
139248724Sglebius
140219182Sglebius	switch (r->r_ip_p) {
141219182Sglebius	case IPPROTO_TCP:
142219182Sglebius	case IPPROTO_UDP:
143219182Sglebius		return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
144219182Sglebius		    r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport,
145219182Sglebius		    r->r_dport);
146219182Sglebius	default:
147219182Sglebius		return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
148219182Sglebius		    r->dst.r_dst6.__u6_addr.__u6_addr32[3]);
149219182Sglebius 	}
150219182Sglebius}
151300771Sjkim
152300771Sjkimstatic inline int
153300771Sjkimip6_masklen(struct in6_addr *saddr, struct rt_addrinfo *info)
154300771Sjkim{
155300771Sjkim	const int nbits = sizeof(*saddr) * NBBY;
156300771Sjkim	int mlen;
157300771Sjkim
158300771Sjkim	if (info->rti_addrs & RTA_NETMASK)
159300771Sjkim		bit_count((bitstr_t *)saddr, 0, nbits, &mlen);
160300771Sjkim	else
161300771Sjkim		mlen = nbits;
162300771Sjkim	return (mlen);
163300771Sjkim}
164219182Sglebius#endif
165219182Sglebius
166146092Sglebius/*
167146092Sglebius * Detach export datagram from priv, if there is any.
168146092Sglebius * If there is no, allocate a new one.
169146092Sglebius */
170146092Sglebiusstatic item_p
171219182Sglebiusget_export_dgram(priv_p priv, fib_export_p fe)
172146092Sglebius{
173146092Sglebius	item_p	item = NULL;
174146092Sglebius
175219182Sglebius	mtx_lock(&fe->export_mtx);
176219182Sglebius	if (fe->exp.item != NULL) {
177219182Sglebius		item = fe->exp.item;
178219182Sglebius		fe->exp.item = NULL;
179135332Sglebius	}
180219182Sglebius	mtx_unlock(&fe->export_mtx);
181135332Sglebius
182146092Sglebius	if (item == NULL) {
183146092Sglebius		struct netflow_v5_export_dgram *dgram;
184146092Sglebius		struct mbuf *m;
185135332Sglebius
186243882Sglebius		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
187146092Sglebius		if (m == NULL)
188146092Sglebius			return (NULL);
189146285Sglebius		item = ng_package_data(m, NG_NOFLAGS);
190146092Sglebius		if (item == NULL)
191146092Sglebius			return (NULL);
192146092Sglebius		dgram = mtod(m, struct netflow_v5_export_dgram *);
193146092Sglebius		dgram->header.count = 0;
194146092Sglebius		dgram->header.version = htons(NETFLOW_V5);
195210500Sglebius		dgram->header.pad = 0;
196146092Sglebius	}
197135332Sglebius
198146092Sglebius	return (item);
199135332Sglebius}
200135332Sglebius
201146092Sglebius/*
202146092Sglebius * Re-attach incomplete datagram back to priv.
203146092Sglebius * If there is already another one, then send incomplete. */
204146092Sglebiusstatic void
205219182Sglebiusreturn_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags)
206135332Sglebius{
207248724Sglebius
208146092Sglebius	/*
209146092Sglebius	 * It may happen on SMP, that some thread has already
210146092Sglebius	 * put its item there, in this case we bail out and
211146092Sglebius	 * send what we have to collector.
212146092Sglebius	 */
213219182Sglebius	mtx_lock(&fe->export_mtx);
214219182Sglebius	if (fe->exp.item == NULL) {
215219182Sglebius		fe->exp.item = item;
216219182Sglebius		mtx_unlock(&fe->export_mtx);
217146092Sglebius	} else {
218219182Sglebius		mtx_unlock(&fe->export_mtx);
219219182Sglebius		export_send(priv, fe, item, flags);
220146092Sglebius	}
221135332Sglebius}
222135332Sglebius
223146092Sglebius/*
224146092Sglebius * The flow is over. Call export_add() and free it. If datagram is
225146092Sglebius * full, then call export_send().
226146092Sglebius */
227248724Sglebiusstatic void
228219182Sglebiusexpire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags)
229135332Sglebius{
230219182Sglebius	struct netflow_export_item exp;
231219182Sglebius	uint16_t version = fle->f.version;
232219182Sglebius
233219182Sglebius	if ((priv->export != NULL) && (version == IPVERSION)) {
234219182Sglebius		exp.item = get_export_dgram(priv, fe);
235219182Sglebius		if (exp.item == NULL) {
236260169Sglebius			priv->nfinfo_export_failed++;
237219182Sglebius			if (priv->export9 != NULL)
238260169Sglebius				priv->nfinfo_export9_failed++;
239248724Sglebius			/* fle definitely contains IPv4 flow. */
240219182Sglebius			uma_zfree_arg(priv->zone, fle, priv);
241219182Sglebius			return;
242219182Sglebius		}
243219182Sglebius
244219182Sglebius		if (export_add(exp.item, fle) > 0)
245219182Sglebius			export_send(priv, fe, exp.item, flags);
246219182Sglebius		else
247219182Sglebius			return_export_dgram(priv, fe, exp.item, NG_QUEUE);
248146092Sglebius	}
249219182Sglebius
250219182Sglebius	if (priv->export9 != NULL) {
251219182Sglebius		exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt);
252219182Sglebius		if (exp.item9 == NULL) {
253260169Sglebius			priv->nfinfo_export9_failed++;
254219182Sglebius			if (version == IPVERSION)
255219182Sglebius				uma_zfree_arg(priv->zone, fle, priv);
256219229Sbz#ifdef INET6
257219182Sglebius			else if (version == IP6VERSION)
258219182Sglebius				uma_zfree_arg(priv->zone6, fle, priv);
259219229Sbz#endif
260219182Sglebius			else
261248724Sglebius				panic("ng_netflow: Unknown IP proto: %d",
262248724Sglebius				    version);
263219182Sglebius			return;
264219182Sglebius		}
265219182Sglebius
266219182Sglebius		if (export9_add(exp.item9, exp.item9_opt, fle) > 0)
267219182Sglebius			export9_send(priv, fe, exp.item9, exp.item9_opt, flags);
268219182Sglebius		else
269248724Sglebius			return_export9_dgram(priv, fe, exp.item9,
270248724Sglebius			    exp.item9_opt, NG_QUEUE);
271146092Sglebius	}
272219182Sglebius
273219182Sglebius	if (version == IPVERSION)
274219182Sglebius		uma_zfree_arg(priv->zone, fle, priv);
275219229Sbz#ifdef INET6
276219182Sglebius	else if (version == IP6VERSION)
277219182Sglebius		uma_zfree_arg(priv->zone6, fle, priv);
278219229Sbz#endif
279135332Sglebius}
280135332Sglebius
281135332Sglebius/* Get a snapshot of node statistics */
282135332Sglebiusvoid
283135332Sglebiusng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
284135332Sglebius{
285248724Sglebius
286260169Sglebius	i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes);
287260169Sglebius	i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets);
288260169Sglebius	i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6);
289260169Sglebius	i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6);
290260169Sglebius	i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes);
291260169Sglebius	i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets);
292260169Sglebius	i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6);
293260169Sglebius	i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6);
294260169Sglebius	i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp);
295260169Sglebius	i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp);
296260169Sglebius
297260169Sglebius	i->nfinfo_used = uma_zone_get_cur(priv->zone);
298260186Sdelphij#ifdef INET6
299260169Sglebius	i->nfinfo_used6 = uma_zone_get_cur(priv->zone6);
300260186Sdelphij#endif
301260169Sglebius
302260169Sglebius	i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed;
303260169Sglebius	i->nfinfo_export_failed = priv->nfinfo_export_failed;
304260169Sglebius	i->nfinfo_export9_failed = priv->nfinfo_export9_failed;
305260169Sglebius	i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf;
306260169Sglebius	i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs;
307260169Sglebius	i->nfinfo_inact_t = priv->nfinfo_inact_t;
308260169Sglebius	i->nfinfo_act_t = priv->nfinfo_act_t;
309135332Sglebius}
310135332Sglebius
311135332Sglebius/*
312135332Sglebius * Insert a record into defined slot.
313135332Sglebius *
314135332Sglebius * First we get for us a free flow entry, then fill in all
315146092Sglebius * possible fields in it.
316146092Sglebius *
317146092Sglebius * TODO: consider dropping hash mutex while filling in datagram,
318146092Sglebius * as this was done in previous version. Need to test & profile
319146092Sglebius * to be sure.
320135332Sglebius */
321237227Smelifarostatic int
322219182Sglebiushash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
323237227Smelifaro	int plen, uint8_t flags, uint8_t tcp_flags)
324135332Sglebius{
325176085Sglebius	struct flow_entry *fle;
326293914Smelifaro	struct sockaddr_in sin, sin_mask;
327293914Smelifaro	struct sockaddr_dl rt_gateway;
328293914Smelifaro	struct rt_addrinfo info;
329135332Sglebius
330146092Sglebius	mtx_assert(&hsh->mtx, MA_OWNED);
331146092Sglebius
332146092Sglebius	fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
333146092Sglebius	if (fle == NULL) {
334260169Sglebius		priv->nfinfo_alloc_failed++;
335135332Sglebius		return (ENOMEM);
336146092Sglebius	}
337135332Sglebius
338135332Sglebius	/*
339135332Sglebius	 * Now fle is totally ours. It is detached from all lists,
340135332Sglebius	 * we can safely edit it.
341135332Sglebius	 */
342219182Sglebius	fle->f.version = IPVERSION;
343135332Sglebius	bcopy(r, &fle->f.r, sizeof(struct flow_rec));
344135332Sglebius	fle->f.bytes = plen;
345135332Sglebius	fle->f.packets = 1;
346143890Sglebius	fle->f.tcp_flags = tcp_flags;
347135332Sglebius
348135332Sglebius	fle->f.first = fle->f.last = time_uptime;
349135332Sglebius
350135332Sglebius	/*
351135332Sglebius	 * First we do route table lookup on destination address. So we can
352135332Sglebius	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
353135332Sglebius	 */
354237227Smelifaro	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
355237227Smelifaro		bzero(&sin, sizeof(sin));
356237227Smelifaro		sin.sin_len = sizeof(struct sockaddr_in);
357237227Smelifaro		sin.sin_family = AF_INET;
358237227Smelifaro		sin.sin_addr = fle->f.r.r_dst;
359135332Sglebius
360293914Smelifaro		rt_gateway.sdl_len = sizeof(rt_gateway);
361293914Smelifaro		sin_mask.sin_len = sizeof(struct sockaddr_in);
362293914Smelifaro		bzero(&info, sizeof(info));
363293914Smelifaro
364293914Smelifaro		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
365293914Smelifaro		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask;
366293914Smelifaro
367293914Smelifaro		if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, NHR_REF, 0,
368293914Smelifaro		    &info) == 0) {
369293914Smelifaro			fle->f.fle_o_ifx = info.rti_ifp->if_index;
370293914Smelifaro
371293914Smelifaro			if (info.rti_flags & RTF_GATEWAY &&
372293914Smelifaro			    rt_gateway.sdl_family == AF_INET)
373237227Smelifaro				fle->f.next_hop =
374293914Smelifaro				    ((struct sockaddr_in *)&rt_gateway)->sin_addr;
375135332Sglebius
376293914Smelifaro			if (info.rti_addrs & RTA_NETMASK)
377293914Smelifaro				fle->f.dst_mask = bitcount32(sin_mask.sin_addr.s_addr);
378293914Smelifaro			else if (info.rti_flags & RTF_HOST)
379237227Smelifaro				/* Give up. We can't determine mask :( */
380237227Smelifaro				fle->f.dst_mask = 32;
381135332Sglebius
382293914Smelifaro			rib_free_info(&info);
383237227Smelifaro		}
384135332Sglebius	}
385135332Sglebius
386135332Sglebius	/* Do route lookup on source address, to fill in src_mask. */
387237227Smelifaro	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
388237227Smelifaro		bzero(&sin, sizeof(sin));
389237227Smelifaro		sin.sin_len = sizeof(struct sockaddr_in);
390237227Smelifaro		sin.sin_family = AF_INET;
391237227Smelifaro		sin.sin_addr = fle->f.r.r_src;
392293914Smelifaro
393293914Smelifaro		sin_mask.sin_len = sizeof(struct sockaddr_in);
394293914Smelifaro		bzero(&info, sizeof(info));
395293914Smelifaro
396293914Smelifaro		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask;
397293914Smelifaro
398293914Smelifaro		if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, 0, 0,
399293914Smelifaro		    &info) == 0) {
400293914Smelifaro			if (info.rti_addrs & RTA_NETMASK)
401248724Sglebius				fle->f.src_mask =
402293914Smelifaro				    bitcount32(sin_mask.sin_addr.s_addr);
403293914Smelifaro			else if (info.rti_flags & RTF_HOST)
404237227Smelifaro				/* Give up. We can't determine mask :( */
405237227Smelifaro				fle->f.src_mask = 32;
406237227Smelifaro		}
407135332Sglebius	}
408135332Sglebius
409146092Sglebius	/* Push new flow at the and of hash. */
410146092Sglebius	TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
411135332Sglebius
412135332Sglebius	return (0);
413135332Sglebius}
414135332Sglebius
415219182Sglebius#ifdef INET6
416237227Smelifarostatic int
417223787Sglebiushash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r,
418237227Smelifaro	int plen, uint8_t flags, uint8_t tcp_flags)
419219182Sglebius{
420219182Sglebius	struct flow6_entry *fle6;
421293914Smelifaro	struct sockaddr_in6 sin6, sin6_mask;
422293914Smelifaro	struct sockaddr_dl rt_gateway;
423293914Smelifaro	struct rt_addrinfo info;
424135332Sglebius
425219182Sglebius	mtx_assert(&hsh6->mtx, MA_OWNED);
426219182Sglebius
427219182Sglebius	fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT);
428219182Sglebius	if (fle6 == NULL) {
429260169Sglebius		priv->nfinfo_alloc_failed++;
430219182Sglebius		return (ENOMEM);
431219182Sglebius	}
432219182Sglebius
433219182Sglebius	/*
434219182Sglebius	 * Now fle is totally ours. It is detached from all lists,
435219182Sglebius	 * we can safely edit it.
436219182Sglebius	 */
437219182Sglebius
438219182Sglebius	fle6->f.version = IP6VERSION;
439219182Sglebius	bcopy(r, &fle6->f.r, sizeof(struct flow6_rec));
440219182Sglebius	fle6->f.bytes = plen;
441219182Sglebius	fle6->f.packets = 1;
442219182Sglebius	fle6->f.tcp_flags = tcp_flags;
443219182Sglebius
444219182Sglebius	fle6->f.first = fle6->f.last = time_uptime;
445219182Sglebius
446219182Sglebius	/*
447219182Sglebius	 * First we do route table lookup on destination address. So we can
448219182Sglebius	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
449219182Sglebius	 */
450248724Sglebius	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
451293315Smelifaro		bzero(&sin6, sizeof(struct sockaddr_in6));
452293315Smelifaro		sin6.sin6_len = sizeof(struct sockaddr_in6);
453293315Smelifaro		sin6.sin6_family = AF_INET6;
454293315Smelifaro		sin6.sin6_addr = r->dst.r_dst6;
455219182Sglebius
456293914Smelifaro		rt_gateway.sdl_len = sizeof(rt_gateway);
457293914Smelifaro		sin6_mask.sin6_len = sizeof(struct sockaddr_in6);
458293914Smelifaro		bzero(&info, sizeof(info));
459219182Sglebius
460293914Smelifaro		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
461293914Smelifaro		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask;
462219182Sglebius
463293914Smelifaro		if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, NHR_REF,
464293914Smelifaro		    0, &info) == 0) {
465293914Smelifaro			fle6->f.fle_o_ifx = info.rti_ifp->if_index;
466293914Smelifaro
467293914Smelifaro			if (info.rti_flags & RTF_GATEWAY &&
468293914Smelifaro			    rt_gateway.sdl_family == AF_INET6)
469237227Smelifaro				fle6->f.n.next_hop6 =
470293914Smelifaro				    ((struct sockaddr_in6 *)&rt_gateway)->sin6_addr;
471219182Sglebius
472300772Sjkim			fle6->f.dst_mask =
473300772Sjkim			    ip6_masklen(&sin6_mask.sin6_addr, &info);
474219182Sglebius
475293914Smelifaro			rib_free_info(&info);
476237227Smelifaro		}
477219182Sglebius	}
478219182Sglebius
479293167Smelifaro	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
480237227Smelifaro		/* Do route lookup on source address, to fill in src_mask. */
481293315Smelifaro		bzero(&sin6, sizeof(struct sockaddr_in6));
482293315Smelifaro		sin6.sin6_len = sizeof(struct sockaddr_in6);
483293315Smelifaro		sin6.sin6_family = AF_INET6;
484293315Smelifaro		sin6.sin6_addr = r->src.r_src6;
485219182Sglebius
486293914Smelifaro		sin6_mask.sin6_len = sizeof(struct sockaddr_in6);
487293914Smelifaro		bzero(&info, sizeof(info));
488219182Sglebius
489293914Smelifaro		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask;
490293914Smelifaro
491293914Smelifaro		if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, 0, 0,
492300771Sjkim		    &info) == 0)
493300772Sjkim			fle6->f.src_mask =
494300772Sjkim			    ip6_masklen(&sin6_mask.sin6_addr, &info);
495219182Sglebius	}
496219182Sglebius
497219182Sglebius	/* Push new flow at the and of hash. */
498223787Sglebius	TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash);
499219182Sglebius
500219182Sglebius	return (0);
501219182Sglebius}
502219182Sglebius#endif
503219182Sglebius
504219182Sglebius
505135332Sglebius/*
506135332Sglebius * Non-static functions called from ng_netflow.c
507135332Sglebius */
508135332Sglebius
509135332Sglebius/* Allocate memory and set up flow cache */
510220769Sglebiusvoid
511135332Sglebiusng_netflow_cache_init(priv_p priv)
512135332Sglebius{
513219182Sglebius	struct flow_hash_entry *hsh;
514135332Sglebius	int i;
515135332Sglebius
516146092Sglebius	/* Initialize cache UMA zone. */
517248724Sglebius	priv->zone = uma_zcreate("NetFlow IPv4 cache",
518260169Sglebius	    sizeof(struct flow_entry), NULL, NULL, NULL, NULL,
519260169Sglebius	    UMA_ALIGN_CACHE, 0);
520146092Sglebius	uma_zone_set_max(priv->zone, CACHESIZE);
521219182Sglebius#ifdef INET6
522248724Sglebius	priv->zone6 = uma_zcreate("NetFlow IPv6 cache",
523260169Sglebius	    sizeof(struct flow6_entry), NULL, NULL, NULL, NULL,
524260169Sglebius	    UMA_ALIGN_CACHE, 0);
525219182Sglebius	uma_zone_set_max(priv->zone6, CACHESIZE);
526219182Sglebius#endif
527135332Sglebius
528146092Sglebius	/* Allocate hash. */
529184214Sdes	priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
530146092Sglebius	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
531135332Sglebius
532146092Sglebius	/* Initialize hash. */
533146092Sglebius	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
534146092Sglebius		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
535146092Sglebius		TAILQ_INIT(&hsh->head);
536146092Sglebius	}
537135332Sglebius
538219182Sglebius#ifdef INET6
539219182Sglebius	/* Allocate hash. */
540223787Sglebius	priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
541219182Sglebius	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
542135332Sglebius
543219182Sglebius	/* Initialize hash. */
544223787Sglebius	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) {
545223787Sglebius		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
546223787Sglebius		TAILQ_INIT(&hsh->head);
547219182Sglebius	}
548219182Sglebius#endif
549219182Sglebius
550260169Sglebius	priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK);
551260169Sglebius	priv->nfinfo_packets = counter_u64_alloc(M_WAITOK);
552260169Sglebius	priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK);
553260169Sglebius	priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK);
554260169Sglebius	priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK);
555260169Sglebius	priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK);
556260169Sglebius	priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK);
557260169Sglebius	priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK);
558260169Sglebius	priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK);
559260169Sglebius	priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK);
560260169Sglebius
561219182Sglebius	ng_netflow_v9_cache_init(priv);
562219182Sglebius	CTR0(KTR_NET, "ng_netflow startup()");
563135332Sglebius}
564135332Sglebius
565219182Sglebius/* Initialize new FIB table for v5 and v9 */
566219182Sglebiusint
567219182Sglebiusng_netflow_fib_init(priv_p priv, int fib)
568219182Sglebius{
569219182Sglebius	fib_export_p	fe = priv_to_fib(priv, fib);
570219182Sglebius
571219182Sglebius	CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib);
572219182Sglebius
573219182Sglebius	if (fe != NULL)
574219182Sglebius		return (0);
575219182Sglebius
576248724Sglebius	if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH,
577248724Sglebius	    M_NOWAIT | M_ZERO)) == NULL)
578248725Sglebius		return (ENOMEM);
579219182Sglebius
580219182Sglebius	mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF);
581219182Sglebius	mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF);
582219182Sglebius	fe->fib = fib;
583219182Sglebius	fe->domain_id = fib;
584219182Sglebius
585248724Sglebius	if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib],
586248724Sglebius	    (uintptr_t)NULL, (uintptr_t)fe) == 0) {
587219182Sglebius		/* FIB already set up by other ISR */
588248724Sglebius		CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p",
589248724Sglebius		    fib, fe, priv_to_fib(priv, fib));
590219182Sglebius		mtx_destroy(&fe->export_mtx);
591219182Sglebius		mtx_destroy(&fe->export9_mtx);
592219182Sglebius		free(fe, M_NETGRAPH);
593219182Sglebius	} else {
594219182Sglebius		/* Increase counter for statistics */
595248724Sglebius		CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)",
596248724Sglebius		    fib, fe, priv_to_fib(priv, fib));
597260169Sglebius		priv->nfinfo_alloc_fibs++;
598219182Sglebius	}
599219182Sglebius
600219182Sglebius	return (0);
601219182Sglebius}
602219182Sglebius
603135332Sglebius/* Free all flow cache memory. Called from node close method. */
604135332Sglebiusvoid
605135332Sglebiusng_netflow_cache_flush(priv_p priv)
606135332Sglebius{
607146092Sglebius	struct flow_entry	*fle, *fle1;
608146092Sglebius	struct flow_hash_entry	*hsh;
609219182Sglebius	struct netflow_export_item exp;
610219182Sglebius	fib_export_p fe;
611135332Sglebius	int i;
612135332Sglebius
613219182Sglebius	bzero(&exp, sizeof(exp));
614219182Sglebius
615135332Sglebius	/*
616135332Sglebius	 * We are going to free probably billable data.
617135332Sglebius	 * Expire everything before freeing it.
618135332Sglebius	 * No locking is required since callout is already drained.
619135332Sglebius	 */
620146092Sglebius	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
621146092Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
622146092Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
623219182Sglebius			fe = priv_to_fib(priv, fle->f.r.fib);
624219182Sglebius			expire_flow(priv, fe, fle, NG_QUEUE);
625146092Sglebius		}
626219182Sglebius#ifdef INET6
627223787Sglebius	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++)
628223787Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
629223787Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
630223787Sglebius			fe = priv_to_fib(priv, fle->f.r.fib);
631223787Sglebius			expire_flow(priv, fe, fle, NG_QUEUE);
632219182Sglebius		}
633219182Sglebius#endif
634135332Sglebius
635146092Sglebius	uma_zdestroy(priv->zone);
636146092Sglebius	/* Destroy hash mutexes. */
637146092Sglebius	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
638146092Sglebius		mtx_destroy(&hsh->mtx);
639146092Sglebius
640146092Sglebius	/* Free hash memory. */
641219182Sglebius	if (priv->hash != NULL)
642184205Sdes		free(priv->hash, M_NETFLOW_HASH);
643219182Sglebius#ifdef INET6
644219182Sglebius	uma_zdestroy(priv->zone6);
645219182Sglebius	/* Destroy hash mutexes. */
646223787Sglebius	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++)
647223787Sglebius		mtx_destroy(&hsh->mtx);
648135332Sglebius
649219182Sglebius	/* Free hash memory. */
650219182Sglebius	if (priv->hash6 != NULL)
651219182Sglebius		free(priv->hash6, M_NETFLOW_HASH);
652219182Sglebius#endif
653219182Sglebius
654232921Smelifaro	for (i = 0; i < priv->maxfibs; i++) {
655219182Sglebius		if ((fe = priv_to_fib(priv, i)) == NULL)
656219182Sglebius			continue;
657219182Sglebius
658219182Sglebius		if (fe->exp.item != NULL)
659219182Sglebius			export_send(priv, fe, fe->exp.item, NG_QUEUE);
660219182Sglebius
661219182Sglebius		if (fe->exp.item9 != NULL)
662248724Sglebius			export9_send(priv, fe, fe->exp.item9,
663248724Sglebius			    fe->exp.item9_opt, NG_QUEUE);
664219182Sglebius
665219182Sglebius		mtx_destroy(&fe->export_mtx);
666219182Sglebius		mtx_destroy(&fe->export9_mtx);
667219182Sglebius		free(fe, M_NETGRAPH);
668219182Sglebius	}
669219182Sglebius
670260169Sglebius	counter_u64_free(priv->nfinfo_bytes);
671260169Sglebius	counter_u64_free(priv->nfinfo_packets);
672260169Sglebius	counter_u64_free(priv->nfinfo_bytes6);
673260169Sglebius	counter_u64_free(priv->nfinfo_packets6);
674260169Sglebius	counter_u64_free(priv->nfinfo_sbytes);
675260169Sglebius	counter_u64_free(priv->nfinfo_spackets);
676260169Sglebius	counter_u64_free(priv->nfinfo_sbytes6);
677260169Sglebius	counter_u64_free(priv->nfinfo_spackets6);
678260169Sglebius	counter_u64_free(priv->nfinfo_act_exp);
679260169Sglebius	counter_u64_free(priv->nfinfo_inact_exp);
680260169Sglebius
681219182Sglebius	ng_netflow_v9_cache_flush(priv);
682135332Sglebius}
683135332Sglebius
684146092Sglebius/* Insert packet from into flow cache. */
685135332Sglebiusint
686248724Sglebiusng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip,
687248724Sglebius    caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
688248724Sglebius    unsigned int src_if_index)
689135332Sglebius{
690248724Sglebius	struct flow_entry	*fle, *fle1;
691219182Sglebius	struct flow_hash_entry	*hsh;
692135332Sglebius	struct flow_rec		r;
693143923Sglebius	int			hlen, plen;
694146092Sglebius	int			error = 0;
695248724Sglebius	uint16_t		eproto;
696135332Sglebius	uint8_t			tcp_flags = 0;
697135332Sglebius
698135332Sglebius	bzero(&r, sizeof(r));
699248724Sglebius
700143923Sglebius	if (ip->ip_v != IPVERSION)
701143923Sglebius		return (EINVAL);
702135332Sglebius
703143923Sglebius	hlen = ip->ip_hl << 2;
704143923Sglebius	if (hlen < sizeof(struct ip))
705143923Sglebius		return (EINVAL);
706143923Sglebius
707219182Sglebius	eproto = ETHERTYPE_IP;
708219182Sglebius	/* Assume L4 template by default */
709219182Sglebius	r.flow_type = NETFLOW_V9_FLOW_V4_L4;
710219182Sglebius
711143923Sglebius	r.r_src = ip->ip_src;
712143923Sglebius	r.r_dst = ip->ip_dst;
713219182Sglebius	r.fib = fe->fib;
714143923Sglebius
715143923Sglebius	plen = ntohs(ip->ip_len);
716143923Sglebius
717143923Sglebius	r.r_ip_p = ip->ip_p;
718143923Sglebius	r.r_tos = ip->ip_tos;
719143923Sglebius
720183693Smav	r.r_i_ifx = src_if_index;
721143923Sglebius
722143923Sglebius	/*
723143923Sglebius	 * XXX NOTE: only first fragment of fragmented TCP, UDP and
724143923Sglebius	 * ICMP packet will be recorded with proper s_port and d_port.
725143923Sglebius	 * Following fragments will be recorded simply as IP packet with
726143923Sglebius	 * ip_proto = ip->ip_p and s_port, d_port set to zero.
727143923Sglebius	 * I know, it looks like bug. But I don't want to re-implement
728143923Sglebius	 * ip packet assebmling here. Anyway, (in)famous trafd works this way -
729143923Sglebius	 * and nobody complains yet :)
730143923Sglebius	 */
731144901Sglebius	if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
732144901Sglebius		switch(r.r_ip_p) {
733144901Sglebius		case IPPROTO_TCP:
734248724Sglebius		    {
735248724Sglebius			struct tcphdr *tcp;
736143923Sglebius
737144901Sglebius			tcp = (struct tcphdr *)((caddr_t )ip + hlen);
738144901Sglebius			r.r_sport = tcp->th_sport;
739144901Sglebius			r.r_dport = tcp->th_dport;
740144901Sglebius			tcp_flags = tcp->th_flags;
741144901Sglebius			break;
742248724Sglebius		    }
743248724Sglebius		case IPPROTO_UDP:
744144901Sglebius			r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
745144901Sglebius			break;
746144901Sglebius		}
747143923Sglebius
748260169Sglebius	counter_u64_add(priv->nfinfo_packets, 1);
749260169Sglebius	counter_u64_add(priv->nfinfo_bytes, plen);
750139374Sglebius
751146092Sglebius	/* Find hash slot. */
752146092Sglebius	hsh = &priv->hash[ip_hash(&r)];
753135332Sglebius
754146092Sglebius	mtx_lock(&hsh->mtx);
755135332Sglebius
756146092Sglebius	/*
757146092Sglebius	 * Go through hash and find our entry. If we encounter an
758146092Sglebius	 * entry, that should be expired, purge it. We do a reverse
759146092Sglebius	 * search since most active entries are first, and most
760146092Sglebius	 * searches are done on most active entries.
761146092Sglebius	 */
762146092Sglebius	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
763146092Sglebius		if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
764146092Sglebius			break;
765146092Sglebius		if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
766146092Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
767248724Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
768248724Sglebius			    fle, NG_QUEUE);
769260169Sglebius			counter_u64_add(priv->nfinfo_act_exp, 1);
770146092Sglebius		}
771146092Sglebius	}
772135332Sglebius
773146092Sglebius	if (fle) {			/* An existent entry. */
774146092Sglebius
775135332Sglebius		fle->f.bytes += plen;
776135332Sglebius		fle->f.packets ++;
777135332Sglebius		fle->f.tcp_flags |= tcp_flags;
778135332Sglebius		fle->f.last = time_uptime;
779135332Sglebius
780135332Sglebius		/*
781135332Sglebius		 * We have the following reasons to expire flow in active way:
782135332Sglebius		 * - it hit active timeout
783135332Sglebius		 * - a TCP connection closed
784135332Sglebius		 * - it is going to overflow counter
785135332Sglebius		 */
786135332Sglebius		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
787219182Sglebius		    (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
788146092Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
789248724Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
790248724Sglebius			    fle, NG_QUEUE);
791260169Sglebius			counter_u64_add(priv->nfinfo_act_exp, 1);
792146092Sglebius		} else {
793146092Sglebius			/*
794146092Sglebius			 * It is the newest, move it to the tail,
795146092Sglebius			 * if it isn't there already. Next search will
796146092Sglebius			 * locate it quicker.
797146092Sglebius			 */
798146092Sglebius			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
799146092Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
800146092Sglebius				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
801146092Sglebius			}
802146092Sglebius		}
803146092Sglebius	} else				/* A new flow entry. */
804237227Smelifaro		error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags);
805135332Sglebius
806146092Sglebius	mtx_unlock(&hsh->mtx);
807135332Sglebius
808219182Sglebius	return (error);
809219182Sglebius}
810135332Sglebius
811219182Sglebius#ifdef INET6
812219182Sglebius/* Insert IPv6 packet from into flow cache. */
813219182Sglebiusint
814248724Sglebiusng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6,
815248724Sglebius    caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
816248724Sglebius    unsigned int src_if_index)
817219182Sglebius{
818248724Sglebius	struct flow_entry	*fle = NULL, *fle1;
819248724Sglebius	struct flow6_entry	*fle6;
820248724Sglebius	struct flow_hash_entry	*hsh;
821248724Sglebius	struct flow6_rec	r;
822219182Sglebius	int			plen;
823219182Sglebius	int			error = 0;
824219182Sglebius	uint8_t			tcp_flags = 0;
825219182Sglebius
826219182Sglebius	/* check version */
827219182Sglebius	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
828219182Sglebius		return (EINVAL);
829219182Sglebius
830219182Sglebius	bzero(&r, sizeof(r));
831219182Sglebius
832219182Sglebius	r.src.r_src6 = ip6->ip6_src;
833219182Sglebius	r.dst.r_dst6 = ip6->ip6_dst;
834219182Sglebius	r.fib = fe->fib;
835219182Sglebius
836219182Sglebius	/* Assume L4 template by default */
837219182Sglebius	r.flow_type = NETFLOW_V9_FLOW_V6_L4;
838219182Sglebius
839219182Sglebius	plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
840219182Sglebius
841248724Sglebius#if 0
842219182Sglebius	/* XXX: set DSCP/CoS value */
843219182Sglebius	r.r_tos = ip->ip_tos;
844219182Sglebius#endif
845237227Smelifaro	if ((flags & NG_NETFLOW_IS_FRAG) == 0) {
846219182Sglebius		switch(upper_proto) {
847219182Sglebius		case IPPROTO_TCP:
848248724Sglebius		    {
849248724Sglebius			struct tcphdr *tcp;
850219182Sglebius
851219182Sglebius			tcp = (struct tcphdr *)upper_ptr;
852219182Sglebius			r.r_ports = *(uint32_t *)upper_ptr;
853219182Sglebius			tcp_flags = tcp->th_flags;
854219182Sglebius			break;
855248724Sglebius		    }
856219182Sglebius 		case IPPROTO_UDP:
857219182Sglebius		case IPPROTO_SCTP:
858219182Sglebius			r.r_ports = *(uint32_t *)upper_ptr;
859219182Sglebius			break;
860219182Sglebius		}
861219182Sglebius	}
862219182Sglebius
863219182Sglebius	r.r_ip_p = upper_proto;
864219182Sglebius	r.r_i_ifx = src_if_index;
865219182Sglebius
866260169Sglebius	counter_u64_add(priv->nfinfo_packets6, 1);
867260169Sglebius	counter_u64_add(priv->nfinfo_bytes6, plen);
868219182Sglebius
869219182Sglebius	/* Find hash slot. */
870223787Sglebius	hsh = &priv->hash6[ip6_hash(&r)];
871219182Sglebius
872223787Sglebius	mtx_lock(&hsh->mtx);
873219182Sglebius
874219182Sglebius	/*
875219182Sglebius	 * Go through hash and find our entry. If we encounter an
876219182Sglebius	 * entry, that should be expired, purge it. We do a reverse
877219182Sglebius	 * search since most active entries are first, and most
878219182Sglebius	 * searches are done on most active entries.
879219182Sglebius	 */
880223787Sglebius	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
881223787Sglebius		if (fle->f.version != IP6VERSION)
882219182Sglebius			continue;
883223787Sglebius		fle6 = (struct flow6_entry *)fle;
884219182Sglebius		if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0)
885219182Sglebius			break;
886219182Sglebius		if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) {
887223787Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
888223787Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
889223787Sglebius			    NG_QUEUE);
890260169Sglebius			counter_u64_add(priv->nfinfo_act_exp, 1);
891219182Sglebius		}
892219182Sglebius	}
893219182Sglebius
894223787Sglebius	if (fle != NULL) {			/* An existent entry. */
895223787Sglebius		fle6 = (struct flow6_entry *)fle;
896219182Sglebius
897219182Sglebius		fle6->f.bytes += plen;
898219182Sglebius		fle6->f.packets ++;
899219182Sglebius		fle6->f.tcp_flags |= tcp_flags;
900219182Sglebius		fle6->f.last = time_uptime;
901219182Sglebius
902219182Sglebius		/*
903219182Sglebius		 * We have the following reasons to expire flow in active way:
904219182Sglebius		 * - it hit active timeout
905219182Sglebius		 * - a TCP connection closed
906219182Sglebius		 * - it is going to overflow counter
907219182Sglebius		 */
908219182Sglebius		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) ||
909219182Sglebius		    (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
910223787Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
911223787Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
912223787Sglebius			    NG_QUEUE);
913260169Sglebius			counter_u64_add(priv->nfinfo_act_exp, 1);
914219182Sglebius		} else {
915219182Sglebius			/*
916219182Sglebius			 * It is the newest, move it to the tail,
917219182Sglebius			 * if it isn't there already. Next search will
918219182Sglebius			 * locate it quicker.
919219182Sglebius			 */
920223787Sglebius			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
921223787Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
922223787Sglebius				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
923219182Sglebius			}
924219182Sglebius		}
925219182Sglebius	} else				/* A new flow entry. */
926237227Smelifaro		error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags);
927219182Sglebius
928223787Sglebius	mtx_unlock(&hsh->mtx);
929219182Sglebius
930146092Sglebius	return (error);
931135332Sglebius}
932219182Sglebius#endif
933135332Sglebius
934135332Sglebius/*
935146092Sglebius * Return records from cache to userland.
936135332Sglebius *
937135332Sglebius * TODO: matching particular IP should be done in kernel, here.
938135332Sglebius */
939135332Sglebiusint
940223787Sglebiusng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req,
941223787Sglebiusstruct ngnf_show_header *resp)
942135332Sglebius{
943219182Sglebius	struct flow_hash_entry	*hsh;
944219182Sglebius	struct flow_entry	*fle;
945223787Sglebius	struct flow_entry_data	*data = (struct flow_entry_data *)(resp + 1);
946223787Sglebius#ifdef INET6
947223787Sglebius	struct flow6_entry_data	*data6 = (struct flow6_entry_data *)(resp + 1);
948223787Sglebius#endif
949223787Sglebius	int	i, max;
950135332Sglebius
951223787Sglebius	i = req->hash_id;
952223787Sglebius	if (i > NBUCKETS-1)
953223787Sglebius		return (EINVAL);
954135332Sglebius
955223787Sglebius#ifdef INET6
956223787Sglebius	if (req->version == 6) {
957223787Sglebius		resp->version = 6;
958223787Sglebius		hsh = priv->hash6 + i;
959223787Sglebius		max = NREC6_AT_ONCE;
960223787Sglebius	} else
961223787Sglebius#endif
962223787Sglebius	if (req->version == 4) {
963223787Sglebius		resp->version = 4;
964223787Sglebius		hsh = priv->hash + i;
965223787Sglebius		max = NREC_AT_ONCE;
966223787Sglebius	} else
967223787Sglebius		return (EINVAL);
968135332Sglebius
969135332Sglebius	/*
970135332Sglebius	 * We will transfer not more than NREC_AT_ONCE. More data
971135332Sglebius	 * will come in next message.
972223787Sglebius	 * We send current hash index and current record number in list
973223787Sglebius	 * to userland, and userland should return it back to us.
974223787Sglebius	 * Then, we will restart with new entry.
975146092Sglebius	 *
976223787Sglebius	 * The resulting cache snapshot can be inaccurate if flow expiration
977223787Sglebius	 * is taking place on hash item between userland data requests for
978223787Sglebius	 * this hash item id.
979135332Sglebius	 */
980223787Sglebius	resp->nentries = 0;
981146092Sglebius	for (; i < NBUCKETS; hsh++, i++) {
982223787Sglebius		int list_id;
983146092Sglebius
984223787Sglebius		if (mtx_trylock(&hsh->mtx) == 0) {
985223787Sglebius			/*
986223787Sglebius			 * Requested hash index is not available,
987223787Sglebius			 * relay decision to skip or re-request data
988223787Sglebius			 * to userland.
989223787Sglebius			 */
990223787Sglebius			resp->hash_id = i;
991223787Sglebius			resp->list_id = 0;
992223787Sglebius			return (0);
993223787Sglebius		}
994223787Sglebius
995223787Sglebius		list_id = 0;
996146092Sglebius		TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
997223787Sglebius			if (hsh->mtx.mtx_lock & MTX_CONTESTED) {
998223787Sglebius				resp->hash_id = i;
999223787Sglebius				resp->list_id = list_id;
1000223822Sglebius				mtx_unlock(&hsh->mtx);
1001223787Sglebius				return (0);
1002223787Sglebius			}
1003146092Sglebius
1004223787Sglebius			list_id++;
1005223787Sglebius			/* Search for particular record in list. */
1006223787Sglebius			if (req->list_id > 0) {
1007223787Sglebius				if (list_id < req->list_id)
1008223787Sglebius					continue;
1009223787Sglebius
1010223787Sglebius				/* Requested list position found. */
1011223787Sglebius				req->list_id = 0;
1012223787Sglebius			}
1013223787Sglebius#ifdef INET6
1014223787Sglebius			if (req->version == 6) {
1015223787Sglebius				struct flow6_entry *fle6;
1016223787Sglebius
1017223787Sglebius				fle6 = (struct flow6_entry *)fle;
1018223787Sglebius				bcopy(&fle6->f, data6 + resp->nentries,
1019223787Sglebius				    sizeof(fle6->f));
1020223787Sglebius			} else
1021223787Sglebius#endif
1022223787Sglebius				bcopy(&fle->f, data + resp->nentries,
1023223787Sglebius				    sizeof(fle->f));
1024223787Sglebius			resp->nentries++;
1025223787Sglebius			if (resp->nentries == max) {
1026223787Sglebius				resp->hash_id = i;
1027223787Sglebius				/*
1028223787Sglebius				 * If it was the last item in list
1029223787Sglebius				 * we simply skip to next hash_id.
1030223787Sglebius				 */
1031223787Sglebius				resp->list_id = list_id + 1;
1032223822Sglebius				mtx_unlock(&hsh->mtx);
1033146092Sglebius				return (0);
1034146092Sglebius			}
1035135332Sglebius		}
1036146092Sglebius		mtx_unlock(&hsh->mtx);
1037146092Sglebius	}
1038135332Sglebius
1039223787Sglebius	resp->hash_id = resp->list_id = 0;
1040223787Sglebius
1041135332Sglebius	return (0);
1042135332Sglebius}
1043135332Sglebius
1044135332Sglebius/* We have full datagram in privdata. Send it to export hook. */
1045135332Sglebiusstatic int
1046219182Sglebiusexport_send(priv_p priv, fib_export_p fe, item_p item, int flags)
1047135332Sglebius{
1048146092Sglebius	struct mbuf *m = NGI_M(item);
1049146092Sglebius	struct netflow_v5_export_dgram *dgram = mtod(m,
1050146092Sglebius					struct netflow_v5_export_dgram *);
1051146092Sglebius	struct netflow_v5_header *header = &dgram->header;
1052135332Sglebius	struct timespec ts;
1053135332Sglebius	int error = 0;
1054135332Sglebius
1055146092Sglebius	/* Fill mbuf header. */
1056146092Sglebius	m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
1057146092Sglebius	   header->count + sizeof(struct netflow_v5_header);
1058146092Sglebius
1059146092Sglebius	/* Fill export header. */
1060143103Sglebius	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
1061135332Sglebius	getnanotime(&ts);
1062135332Sglebius	header->unix_secs  = htonl(ts.tv_sec);
1063135332Sglebius	header->unix_nsecs = htonl(ts.tv_nsec);
1064158027Smaxim	header->engine_type = 0;
1065219182Sglebius	header->engine_id = fe->domain_id;
1066158027Smaxim	header->pad = 0;
1067219182Sglebius	header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq,
1068152847Sglebius	    header->count));
1069135332Sglebius	header->count = htons(header->count);
1070135332Sglebius
1071146092Sglebius	if (priv->export != NULL)
1072154277Sglebius		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
1073175718Smav	else
1074175718Smav		NG_FREE_ITEM(item);
1075135332Sglebius
1076135332Sglebius	return (error);
1077135332Sglebius}
1078135332Sglebius
1079135332Sglebius
1080146092Sglebius/* Add export record to dgram. */
1081135332Sglebiusstatic int
1082146092Sglebiusexport_add(item_p item, struct flow_entry *fle)
1083135332Sglebius{
1084146092Sglebius	struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
1085146092Sglebius					struct netflow_v5_export_dgram *);
1086146092Sglebius	struct netflow_v5_header *header = &dgram->header;
1087135332Sglebius	struct netflow_v5_record *rec;
1088135332Sglebius
1089175717Smav	rec = &dgram->r[header->count];
1090175717Smav	header->count ++;
1091135332Sglebius
1092146092Sglebius	KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
1093146092Sglebius	    ("ng_netflow: export too big"));
1094146092Sglebius
1095146092Sglebius	/* Fill in export record. */
1096135332Sglebius	rec->src_addr = fle->f.r.r_src.s_addr;
1097135332Sglebius	rec->dst_addr = fle->f.r.r_dst.s_addr;
1098135332Sglebius	rec->next_hop = fle->f.next_hop.s_addr;
1099135332Sglebius	rec->i_ifx    = htons(fle->f.fle_i_ifx);
1100135332Sglebius	rec->o_ifx    = htons(fle->f.fle_o_ifx);
1101135332Sglebius	rec->packets  = htonl(fle->f.packets);
1102135332Sglebius	rec->octets   = htonl(fle->f.bytes);
1103143103Sglebius	rec->first    = htonl(MILLIUPTIME(fle->f.first));
1104143103Sglebius	rec->last     = htonl(MILLIUPTIME(fle->f.last));
1105135332Sglebius	rec->s_port   = fle->f.r.r_sport;
1106135332Sglebius	rec->d_port   = fle->f.r.r_dport;
1107135332Sglebius	rec->flags    = fle->f.tcp_flags;
1108135332Sglebius	rec->prot     = fle->f.r.r_ip_p;
1109135332Sglebius	rec->tos      = fle->f.r.r_tos;
1110135332Sglebius	rec->dst_mask = fle->f.dst_mask;
1111135332Sglebius	rec->src_mask = fle->f.src_mask;
1112210500Sglebius	rec->pad1     = 0;
1113210500Sglebius	rec->pad2     = 0;
1114135332Sglebius
1115146092Sglebius	/* Not supported fields. */
1116146092Sglebius	rec->src_as = rec->dst_as = 0;
1117135332Sglebius
1118146092Sglebius	if (header->count == NETFLOW_V5_MAX_RECORDS)
1119146092Sglebius		return (1); /* end of datagram */
1120146092Sglebius	else
1121146092Sglebius		return (0);
1122135332Sglebius}
1123135332Sglebius
1124135332Sglebius/* Periodic flow expiry run. */
1125135332Sglebiusvoid
1126135332Sglebiusng_netflow_expire(void *arg)
1127135332Sglebius{
1128146092Sglebius	struct flow_entry	*fle, *fle1;
1129146092Sglebius	struct flow_hash_entry	*hsh;
1130146092Sglebius	priv_p			priv = (priv_p )arg;
1131260169Sglebius	int			used, i;
1132135332Sglebius
1133146092Sglebius	/*
1134146092Sglebius	 * Going through all the cache.
1135146092Sglebius	 */
1136260169Sglebius	used = uma_zone_get_cur(priv->zone);
1137146092Sglebius	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
1138135332Sglebius		/*
1139146092Sglebius		 * Skip entries, that are already being worked on.
1140135332Sglebius		 */
1141146092Sglebius		if (mtx_trylock(&hsh->mtx) == 0)
1142146092Sglebius			continue;
1143135332Sglebius
1144146092Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
1145146092Sglebius			/*
1146146092Sglebius			 * Interrupt thread wants this entry!
1147146092Sglebius			 * Quick! Quick! Bail out!
1148146092Sglebius			 */
1149146092Sglebius			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
1150146092Sglebius				break;
1151135332Sglebius
1152135332Sglebius			/*
1153146092Sglebius			 * Don't expire aggressively while hash collision
1154146092Sglebius			 * ratio is predicted small.
1155135332Sglebius			 */
1156146092Sglebius			if (used <= (NBUCKETS*2) && !INACTIVE(fle))
1157146092Sglebius				break;
1158135332Sglebius
1159163239Sglebius			if ((INACTIVE(fle) && (SMALL(fle) ||
1160163240Sglebius			    (used > (NBUCKETS*2)))) || AGED(fle)) {
1161146092Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
1162248724Sglebius				expire_flow(priv, priv_to_fib(priv,
1163248724Sglebius				    fle->f.r.fib), fle, NG_NOFLAGS);
1164146092Sglebius				used--;
1165260169Sglebius				counter_u64_add(priv->nfinfo_inact_exp, 1);
1166146092Sglebius			}
1167135332Sglebius		}
1168146092Sglebius		mtx_unlock(&hsh->mtx);
1169146092Sglebius	}
1170135332Sglebius
1171219182Sglebius#ifdef INET6
1172260169Sglebius	used = uma_zone_get_cur(priv->zone6);
1173223787Sglebius	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) {
1174223787Sglebius		struct flow6_entry	*fle6;
1175223787Sglebius
1176219182Sglebius		/*
1177219182Sglebius		 * Skip entries, that are already being worked on.
1178219182Sglebius		 */
1179223787Sglebius		if (mtx_trylock(&hsh->mtx) == 0)
1180219182Sglebius			continue;
1181135332Sglebius
1182223787Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
1183223787Sglebius			fle6 = (struct flow6_entry *)fle;
1184219182Sglebius			/*
1185219182Sglebius			 * Interrupt thread wants this entry!
1186219182Sglebius			 * Quick! Quick! Bail out!
1187219182Sglebius			 */
1188223787Sglebius			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
1189219182Sglebius				break;
1190219182Sglebius
1191219182Sglebius			/*
1192219182Sglebius			 * Don't expire aggressively while hash collision
1193219182Sglebius			 * ratio is predicted small.
1194219182Sglebius			 */
1195219182Sglebius			if (used <= (NBUCKETS*2) && !INACTIVE(fle6))
1196219182Sglebius				break;
1197219182Sglebius
1198219182Sglebius			if ((INACTIVE(fle6) && (SMALL(fle6) ||
1199219182Sglebius			    (used > (NBUCKETS*2)))) || AGED(fle6)) {
1200223787Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
1201223787Sglebius				expire_flow(priv, priv_to_fib(priv,
1202223787Sglebius				    fle->f.r.fib), fle, NG_NOFLAGS);
1203219182Sglebius				used--;
1204260169Sglebius				counter_u64_add(priv->nfinfo_inact_exp, 1);
1205219182Sglebius			}
1206219182Sglebius		}
1207223787Sglebius		mtx_unlock(&hsh->mtx);
1208219182Sglebius	}
1209219182Sglebius#endif
1210219182Sglebius
1211146092Sglebius	/* Schedule next expire. */
1212135332Sglebius	callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
1213135332Sglebius	    (void *)priv);
1214135332Sglebius}
1215