1/*	$OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $	*/
2
3/*
4 * Copyright (c) 2023 Rubicon Communications, LLC (Netgate)
5 * Copyright (c) 2011 Florian Obser <florian@narrans.de>
6 * Copyright (c) 2011 Sebastian Benoit <benoit-lists@fb12.de>
7 * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
8 * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
9 *
10 * Permission to use, copy, modify, and distribute this software for any
11 * purpose with or without fee is hereby granted, provided that the above
12 * copyright notice and this permission notice appear in all copies.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
20 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 */
22
23#include <sys/cdefs.h>
24#include <sys/param.h>
25#include <sys/bus.h>
26#include <sys/callout.h>
27#include <sys/endian.h>
28#include <sys/interrupt.h>
29#include <sys/kernel.h>
30#include <sys/malloc.h>
31#include <sys/module.h>
32#include <sys/mbuf.h>
33#include <sys/socket.h>
34#include <sys/socketvar.h>
35#include <sys/sockio.h>
36#include <sys/sysctl.h>
37#include <sys/systm.h>
38#include <sys/priv.h>
39
40#include <net/if.h>
41#include <net/if_types.h>
42#include <net/bpf.h>
43#include <net/route.h>
44#include <netinet/in.h>
45#include <netinet/if_ether.h>
46#include <netinet/tcp.h>
47
48#include <netinet/ip.h>
49#include <netinet/ip_icmp.h>
50#include <netinet/ip_var.h>
51#include <netinet/udp.h>
52#include <netinet/udp_var.h>
53#include <netinet/in_pcb.h>
54
55#include <netlink/netlink.h>
56#include <netlink/netlink_ctl.h>
57#include <netlink/netlink_generic.h>
58#include <netlink/netlink_message_writer.h>
59
60#include <net/pfvar.h>
61#include <net/pflow.h>
62#include "net/if_var.h"
63
64#define PFLOW_MINMTU	\
65    (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
66
67#ifdef PFLOWDEBUG
68#define DPRINTF(x)	do { printf x ; } while (0)
69#else
70#define DPRINTF(x)
71#endif
72
73enum pflow_family_t {
74	PFLOW_INET,
75	PFLOW_INET6,
76	PFLOW_NAT4,
77};
78
79static void	pflow_output_process(void *);
80static int	pflow_create(int);
81static int	pflow_destroy(int, bool);
82static int	pflow_calc_mtu(struct pflow_softc *, int, int);
83static void	pflow_setmtu(struct pflow_softc *, int);
84static int	pflowvalidsockaddr(const struct sockaddr *, int);
85
86static struct mbuf	*pflow_get_mbuf(struct pflow_softc *, u_int16_t);
87static void	pflow_flush(struct pflow_softc *);
88static int	pflow_sendout_v5(struct pflow_softc *);
89static int	pflow_sendout_ipfix(struct pflow_softc *, enum pflow_family_t);
90static int	pflow_sendout_ipfix_tmpl(struct pflow_softc *);
91static int	pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
92static int	sysctl_pflowstats(SYSCTL_HANDLER_ARGS);
93static void	pflow_timeout(void *);
94static void	pflow_timeout6(void *);
95static void	pflow_timeout_tmpl(void *);
96static void	pflow_timeout_nat4(void *);
97static void	copy_flow_data(struct pflow_flow *, struct pflow_flow *,
98	const struct pf_kstate *, struct pf_state_key *, int, int);
99static void	copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *,
100	struct pflow_ipfix_flow4 *, const struct pf_kstate *, struct pf_state_key *,
101	struct pflow_softc *, int, int);
102static void	copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *,
103	struct pflow_ipfix_flow6 *, const struct pf_kstate *, struct pf_state_key *,
104	struct pflow_softc *, int, int);
105static int	pflow_pack_flow(const struct pf_kstate *, struct pf_state_key *,
106	struct pflow_softc *);
107static int	pflow_pack_flow_ipfix(const struct pf_kstate *, struct pf_state_key *,
108	struct pflow_softc *);
109static void	export_pflow(const struct pf_kstate *);
110static int	export_pflow_if(const struct pf_kstate*, struct pf_state_key *,
111	struct pflow_softc *);
112static int	copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
113static int	copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow,
114	struct pflow_softc *sc);
115static int	copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow,
116	struct pflow_softc *sc);
117static int	copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *,
118	const struct pf_kstate *, struct pflow_softc *,
119	uint8_t, uint64_t);
120
121static const char pflowname[] = "pflow";
122
123enum pflowstat_counters {
124	pflow_flows,
125	pflow_packets,
126	pflow_onomem,
127	pflow_oerrors,
128	pflow_ncounters,
129};
130struct pflowstats_ctr {
131	counter_u64_t	c[pflow_ncounters];
132};
133
134/**
135 * Locking concept
136 *
137 * The list of pflow devices (V_pflowif_list) is managed through epoch.
138 * It is safe to read the list without locking (while in NET_EPOCH).
139 * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx
140 * on every add/delete.
141 *
142 * Each pflow interface protects its own data with the sc_lock mutex.
143 *
144 * We do not require any pf locks, and in fact expect to be called without
145 * hashrow locks held.
146 **/
147
148VNET_DEFINE(struct unrhdr *,	pflow_unr);
149#define	V_pflow_unr	VNET(pflow_unr)
150VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list);
151#define	V_pflowif_list	VNET(pflowif_list)
152VNET_DEFINE(struct mtx, pflowif_list_mtx);
153#define	V_pflowif_list_mtx	VNET(pflowif_list_mtx)
154VNET_DEFINE(struct pflowstats_ctr,	 pflowstat);
155#define	V_pflowstats	VNET(pflowstat)
156
157#define	PFLOW_LOCK(_sc)		mtx_lock(&(_sc)->sc_lock)
158#define	PFLOW_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_lock)
159#define	PFLOW_ASSERT(_sc)	mtx_assert(&(_sc)->sc_lock, MA_OWNED)
160
161SYSCTL_NODE(_net, OID_AUTO, pflow, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
162    "PFLOW");
163SYSCTL_PROC(_net_pflow, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
164    0, 0, sysctl_pflowstats, "S,pflowstats",
165    "PFLOW statistics (struct pflowstats, net/if_pflow.h)");
166
167static inline void
168pflowstat_inc(enum pflowstat_counters c)
169{
170	counter_u64_add(V_pflowstats.c[c], 1);
171}
172
173static void
174vnet_pflowattach(void)
175{
176	CK_LIST_INIT(&V_pflowif_list);
177	mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF);
178
179	V_pflow_unr = new_unrhdr(0, PFLOW_MAX_ENTRIES - 1, &V_pflowif_list_mtx);
180
181	for (int i = 0; i < pflow_ncounters; i++)
182		V_pflowstats.c[i] = counter_u64_alloc(M_WAITOK);
183}
184VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
185    vnet_pflowattach, NULL);
186
187static void
188vnet_pflowdetach(void)
189{
190	struct pflow_softc	*sc;
191
192	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
193		pflow_destroy(sc->sc_id, false);
194	}
195
196	MPASS(CK_LIST_EMPTY(&V_pflowif_list));
197	delete_unrhdr(V_pflow_unr);
198	mtx_destroy(&V_pflowif_list_mtx);
199
200	for (int i = 0; i < pflow_ncounters; i++)
201		counter_u64_free(V_pflowstats.c[i]);
202}
203VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
204    vnet_pflowdetach, NULL);
205
206static void
207vnet_pflow_finalise(void)
208{
209	/*
210	 * Ensure we've freed all interfaces, and do not have pending
211	 * epoch cleanup calls.
212	 */
213	NET_EPOCH_DRAIN_CALLBACKS();
214}
215VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
216    vnet_pflow_finalise, NULL);
217
218static void
219pflow_output_process(void *arg)
220{
221	struct mbufq ml;
222	struct pflow_softc *sc = arg;
223	struct mbuf *m;
224
225	mbufq_init(&ml, 0);
226
227	PFLOW_LOCK(sc);
228	mbufq_concat(&ml, &sc->sc_outputqueue);
229	PFLOW_UNLOCK(sc);
230
231	CURVNET_SET(sc->sc_vnet);
232	while ((m = mbufq_dequeue(&ml)) != NULL) {
233		pflow_sendout_mbuf(sc, m);
234	}
235	CURVNET_RESTORE();
236}
237
238static int
239pflow_create(int unit)
240{
241	struct pflow_softc	*pflowif;
242	int			 error;
243
244	pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
245	mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF);
246	pflowif->sc_version = PFLOW_PROTO_DEFAULT;
247	pflowif->sc_observation_dom = PFLOW_ENGINE_TYPE;
248
249	/* ipfix template init */
250	bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix));
251	pflowif->sc_tmpl_ipfix.set_header.set_id =
252	    htons(PFLOW_IPFIX_TMPL_SET_ID);
253	pflowif->sc_tmpl_ipfix.set_header.set_length =
254	    htons(sizeof(struct pflow_ipfix_tmpl));
255
256	/* ipfix IPv4 template */
257	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id =
258	    htons(PFLOW_IPFIX_TMPL_IPV4_ID);
259	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count
260	    = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT);
261	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id =
262	    htons(PFIX_IE_sourceIPv4Address);
263	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4);
264	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id =
265	    htons(PFIX_IE_destinationIPv4Address);
266	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4);
267	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id =
268	    htons(PFIX_IE_ingressInterface);
269	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4);
270	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id =
271	    htons(PFIX_IE_egressInterface);
272	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4);
273	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id =
274	    htons(PFIX_IE_packetDeltaCount);
275	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8);
276	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id =
277	    htons(PFIX_IE_octetDeltaCount);
278	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8);
279	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id =
280	    htons(PFIX_IE_flowStartMilliseconds);
281	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8);
282	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id =
283	    htons(PFIX_IE_flowEndMilliseconds);
284	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8);
285	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id =
286	    htons(PFIX_IE_sourceTransportPort);
287	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2);
288	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id =
289	    htons(PFIX_IE_destinationTransportPort);
290	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2);
291	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id =
292	    htons(PFIX_IE_ipClassOfService);
293	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1);
294	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id =
295	    htons(PFIX_IE_protocolIdentifier);
296	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1);
297
298	/* ipfix IPv6 template */
299	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id =
300	    htons(PFLOW_IPFIX_TMPL_IPV6_ID);
301	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count =
302	    htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT);
303	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id =
304	    htons(PFIX_IE_sourceIPv6Address);
305	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16);
306	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id =
307	    htons(PFIX_IE_destinationIPv6Address);
308	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16);
309	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id =
310	    htons(PFIX_IE_ingressInterface);
311	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4);
312	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id =
313	    htons(PFIX_IE_egressInterface);
314	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4);
315	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id =
316	    htons(PFIX_IE_packetDeltaCount);
317	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8);
318	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id =
319	    htons(PFIX_IE_octetDeltaCount);
320	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8);
321	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id =
322	    htons(PFIX_IE_flowStartMilliseconds);
323	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8);
324	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id =
325	    htons(PFIX_IE_flowEndMilliseconds);
326	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8);
327	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id =
328	    htons(PFIX_IE_sourceTransportPort);
329	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2);
330	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id =
331	    htons(PFIX_IE_destinationTransportPort);
332	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2);
333	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id =
334	    htons(PFIX_IE_ipClassOfService);
335	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1);
336	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id =
337	    htons(PFIX_IE_protocolIdentifier);
338	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1);
339
340	/* NAT44 create template */
341	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.tmpl_id =
342	    htons(PFLOW_IPFIX_TMPL_NAT44_ID);
343	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.field_count =
344	    htons(PFLOW_IPFIX_TMPL_NAT44_FIELD_COUNT);
345	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.field_id =
346	    htons(PFIX_IE_timeStamp);
347	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.len =
348	    htons(8);
349	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.field_id =
350	    htons(PFIX_IE_natEvent);
351	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.len =
352	    htons(1);
353	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.field_id =
354	    htons(PFIX_IE_protocolIdentifier);
355	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.len = htons(1);
356	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.field_id =
357	    htons(PFIX_IE_sourceIPv4Address);
358	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.len =
359	    htons(4);
360	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.field_id =
361	    htons(PFIX_IE_sourceTransportPort);
362	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.len = htons(2);
363	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.field_id =
364	    htons(PFIX_IE_postNATSourceIPv4Address);
365	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.len =
366	    htons(4);
367	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.field_id =
368	    htons(PFIX_IE_postNAPTSourceTransportPort);
369	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.len =
370	    htons(2);
371	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.field_id =
372	    htons(PFIX_IE_destinationIPv4Address);
373	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.len =
374	    htons(4);
375	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.field_id =
376	    htons(PFIX_IE_destinationTransportPort);
377	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.len = htons(2);
378	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.field_id =
379	    htons(PFIX_IE_postNATDestinationIPv4Address);
380	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.len =
381	    htons(4);
382	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.field_id =
383	    htons(PFIX_IE_postNAPTDestinationTransportPort);
384	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.len =
385	    htons(2);
386
387	pflowif->sc_id = unit;
388	pflowif->sc_vnet = curvnet;
389
390	mbufq_init(&pflowif->sc_outputqueue, 8192);
391	pflow_setmtu(pflowif, ETHERMTU);
392
393	callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0);
394	callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0);
395	callout_init_mtx(&pflowif->sc_tmo_nat4, &pflowif->sc_lock, 0);
396	callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0);
397
398	error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process,
399	    pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie);
400	if (error) {
401		free(pflowif, M_DEVBUF);
402		return (error);
403	}
404
405	/* Insert into list of pflows */
406	mtx_lock(&V_pflowif_list_mtx);
407	CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next);
408	mtx_unlock(&V_pflowif_list_mtx);
409
410	V_pflow_export_state_ptr = export_pflow;
411
412	return (0);
413}
414
415static void
416pflow_free_cb(struct epoch_context *ctx)
417{
418	struct pflow_softc *sc;
419
420	sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx);
421
422	free(sc, M_DEVBUF);
423}
424
425static int
426pflow_destroy(int unit, bool drain)
427{
428	struct pflow_softc	*sc;
429	int			 error __diagused;
430
431	mtx_lock(&V_pflowif_list_mtx);
432	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
433		if (sc->sc_id == unit)
434			break;
435	}
436	if (sc == NULL) {
437		mtx_unlock(&V_pflowif_list_mtx);
438		return (ENOENT);
439	}
440	CK_LIST_REMOVE(sc, sc_next);
441	if (CK_LIST_EMPTY(&V_pflowif_list))
442		V_pflow_export_state_ptr = NULL;
443	mtx_unlock(&V_pflowif_list_mtx);
444
445	sc->sc_dying = 1;
446
447	if (drain) {
448		/* Let's be sure no one is using this interface any more. */
449		NET_EPOCH_DRAIN_CALLBACKS();
450	}
451
452	error = swi_remove(sc->sc_swi_cookie);
453	MPASS(error == 0);
454	error = intr_event_destroy(sc->sc_swi_ie);
455	MPASS(error == 0);
456
457	callout_drain(&sc->sc_tmo);
458	callout_drain(&sc->sc_tmo6);
459	callout_drain(&sc->sc_tmo_nat4);
460	callout_drain(&sc->sc_tmo_tmpl);
461
462	m_freem(sc->sc_mbuf);
463	m_freem(sc->sc_mbuf6);
464	m_freem(sc->sc_mbuf_nat4);
465
466	PFLOW_LOCK(sc);
467	mbufq_drain(&sc->sc_outputqueue);
468	if (sc->so != NULL) {
469		soclose(sc->so);
470		sc->so = NULL;
471	}
472	if (sc->sc_flowdst != NULL)
473		free(sc->sc_flowdst, M_DEVBUF);
474	if (sc->sc_flowsrc != NULL)
475		free(sc->sc_flowsrc, M_DEVBUF);
476	PFLOW_UNLOCK(sc);
477
478	mtx_destroy(&sc->sc_lock);
479
480	free_unr(V_pflow_unr, unit);
481
482	NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx);
483
484	return (0);
485}
486
487static int
488pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port)
489{
490	const struct sockaddr_in6	*sin6;
491	const struct sockaddr_in	*sin;
492
493	if (sa == NULL)
494		return (0);
495	switch(sa->sa_family) {
496	case AF_INET:
497		sin = (const struct sockaddr_in *)sa;
498		return (sin->sin_addr.s_addr != INADDR_ANY &&
499		    (ignore_port || sin->sin_port != 0));
500	case AF_INET6:
501		sin6 = (const struct sockaddr_in6 *)sa;
502		return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
503		    (ignore_port || sin6->sin6_port != 0));
504	default:
505		return (0);
506	}
507}
508
509int
510pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
511{
512	size_t min;
513
514	sc->sc_maxcount4 = (mtu - hdrsz -
515	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
516	sc->sc_maxcount6 = (mtu - hdrsz -
517	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6);
518	sc->sc_maxcount_nat4 = (mtu - hdrsz -
519	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_nat4);
520	if (sc->sc_maxcount4 > PFLOW_MAXFLOWS)
521		sc->sc_maxcount4 = PFLOW_MAXFLOWS;
522	if (sc->sc_maxcount6 > PFLOW_MAXFLOWS)
523		sc->sc_maxcount6 = PFLOW_MAXFLOWS;
524	if (sc->sc_maxcount_nat4 > PFLOW_MAXFLOWS)
525		sc->sc_maxcount_nat4 = PFLOW_MAXFLOWS;
526
527	min = MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4),
528	    sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6));
529	min = MIN(min, sc->sc_maxcount_nat4 * sizeof(struct pflow_ipfix_nat4));
530
531	return (hdrsz + sizeof(struct udpiphdr) + min);
532}
533
534static void
535pflow_setmtu(struct pflow_softc *sc, int mtu_req)
536{
537	int	mtu;
538
539	mtu = mtu_req;
540
541	switch (sc->sc_version) {
542	case PFLOW_PROTO_5:
543		sc->sc_maxcount = (mtu - sizeof(struct pflow_header) -
544		    sizeof(struct udpiphdr)) / sizeof(struct pflow_flow);
545		if (sc->sc_maxcount > PFLOW_MAXFLOWS)
546		    sc->sc_maxcount = PFLOW_MAXFLOWS;
547		break;
548	case PFLOW_PROTO_10:
549		pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header));
550		break;
551	default: /* NOTREACHED */
552		break;
553	}
554}
555
556static struct mbuf *
557pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id)
558{
559	struct pflow_set_header	 set_hdr;
560	struct pflow_header	 h;
561	struct mbuf		*m;
562
563	MGETHDR(m, M_NOWAIT, MT_DATA);
564	if (m == NULL) {
565		pflowstat_inc(pflow_onomem);
566		return (NULL);
567	}
568
569	MCLGET(m, M_NOWAIT);
570	if ((m->m_flags & M_EXT) == 0) {
571		m_free(m);
572		pflowstat_inc(pflow_onomem);
573		return (NULL);
574	}
575
576	m->m_len = m->m_pkthdr.len = 0;
577
578	if (sc == NULL)		/* get only a new empty mbuf */
579		return (m);
580
581	switch (sc->sc_version) {
582	case PFLOW_PROTO_5:
583		/* populate pflow_header */
584		h.reserved1 = 0;
585		h.reserved2 = 0;
586		h.count = 0;
587		h.version = htons(PFLOW_PROTO_5);
588		h.flow_sequence = htonl(sc->sc_gcounter);
589		h.engine_type = PFLOW_ENGINE_TYPE;
590		h.engine_id = PFLOW_ENGINE_ID;
591		m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h);
592
593		sc->sc_count = 0;
594		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
595		    pflow_timeout, sc);
596		break;
597	case PFLOW_PROTO_10:
598		/* populate pflow_set_header */
599		set_hdr.set_length = 0;
600		set_hdr.set_id = htons(set_id);
601		m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr);
602		break;
603	default: /* NOTREACHED */
604		break;
605	}
606
607	return (m);
608}
609
610static void
611copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
612    const struct pf_kstate *st, struct pf_state_key *sk, int src, int dst)
613{
614	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
615	flow1->src_port = flow2->dest_port = sk->port[src];
616	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
617	flow1->dest_port = flow2->src_port = sk->port[dst];
618
619	flow1->dest_as = flow2->src_as =
620	    flow1->src_as = flow2->dest_as = 0;
621	flow1->if_index_in = htons(st->if_index_in);
622	flow1->if_index_out = htons(st->if_index_out);
623	flow2->if_index_in = htons(st->if_index_out);
624	flow2->if_index_out = htons(st->if_index_in);
625	flow1->dest_mask = flow2->src_mask =
626	    flow1->src_mask = flow2->dest_mask = 0;
627
628	flow1->flow_packets = htonl(st->packets[0]);
629	flow2->flow_packets = htonl(st->packets[1]);
630	flow1->flow_octets = htonl(st->bytes[0]);
631	flow2->flow_octets = htonl(st->bytes[1]);
632
633	/*
634	 * Pretend the flow was created or expired when the machine came up
635	 * when creation is in the future of the last time a package was seen
636	 * or was created / expired before this machine came up due to pfsync.
637	 */
638	flow1->flow_start = flow2->flow_start = st->creation < 0 ||
639	    st->creation > st->expire ? htonl(0) : htonl(st->creation);
640	flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) :
641	    htonl(st->expire);
642	flow1->tcp_flags = flow2->tcp_flags = 0;
643	flow1->protocol = flow2->protocol = sk->proto;
644	flow1->tos = flow2->tos = st->rule.ptr->tos;
645}
646
647static void
648copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1,
649    struct pflow_ipfix_flow4 *flow2, const struct pf_kstate *st,
650    struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
651{
652	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
653	flow1->src_port = flow2->dest_port = sk->port[src];
654	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
655	flow1->dest_port = flow2->src_port = sk->port[dst];
656
657	flow1->if_index_in = htonl(st->if_index_in);
658	flow1->if_index_out = htonl(st->if_index_out);
659	flow2->if_index_in = htonl(st->if_index_out);
660	flow2->if_index_out = htonl(st->if_index_in);
661
662	flow1->flow_packets = htobe64(st->packets[0]);
663	flow2->flow_packets = htobe64(st->packets[1]);
664	flow1->flow_octets = htobe64(st->bytes[0]);
665	flow2->flow_octets = htobe64(st->bytes[1]);
666
667	/*
668	 * Pretend the flow was created when the machine came up when creation
669	 * is in the future of the last time a package was seen due to pfsync.
670	 */
671	if (st->creation > st->expire)
672		flow1->flow_start = flow2->flow_start = htobe64((time_second -
673		    time_uptime)*1000);
674	else
675		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
676		    (pf_get_uptime() - st->creation)));
677	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
678	    (pf_get_uptime() - st->expire)));
679
680	flow1->protocol = flow2->protocol = sk->proto;
681	flow1->tos = flow2->tos = st->rule.ptr->tos;
682}
683
684static void
685copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1,
686    struct pflow_ipfix_flow6 *flow2, const struct pf_kstate *st,
687    struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
688{
689	bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip));
690	bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip));
691	flow1->src_port = flow2->dest_port = sk->port[src];
692	bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip));
693	bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip));
694	flow1->dest_port = flow2->src_port = sk->port[dst];
695
696	flow1->if_index_in = htonl(st->if_index_in);
697	flow1->if_index_out = htonl(st->if_index_out);
698	flow2->if_index_in = htonl(st->if_index_out);
699	flow2->if_index_out = htonl(st->if_index_in);
700
701	flow1->flow_packets = htobe64(st->packets[0]);
702	flow2->flow_packets = htobe64(st->packets[1]);
703	flow1->flow_octets = htobe64(st->bytes[0]);
704	flow2->flow_octets = htobe64(st->bytes[1]);
705
706	/*
707	 * Pretend the flow was created when the machine came up when creation
708	 * is in the future of the last time a package was seen due to pfsync.
709	 */
710	if (st->creation > st->expire)
711		flow1->flow_start = flow2->flow_start = htobe64((time_second -
712		    time_uptime)*1000);
713	else
714		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
715		    (pf_get_uptime() - st->creation)));
716	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
717	    (pf_get_uptime() - st->expire)));
718
719	flow1->protocol = flow2->protocol = sk->proto;
720	flow1->tos = flow2->tos = st->rule.ptr->tos;
721}
722
723static void
724copy_nat_ipfix_4_data(struct pflow_ipfix_nat4 *nat1,
725    struct pflow_ipfix_nat4 *nat2, const struct pf_kstate *st,
726    struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
727{
728	nat1->src_ip = nat2->dest_ip = st->key[PF_SK_STACK]->addr[src].v4.s_addr;
729	nat1->src_port = nat2->dest_port = st->key[PF_SK_STACK]->port[src];
730	nat1->dest_ip = nat2->src_ip = st->key[PF_SK_STACK]->addr[dst].v4.s_addr;
731	nat1->dest_port = nat2->src_port = st->key[PF_SK_STACK]->port[dst];
732	nat1->postnat_src_ip = nat2->postnat_dest_ip = st->key[PF_SK_WIRE]->addr[src].v4.s_addr;
733	nat1->postnat_src_port = nat2->postnat_dest_port = st->key[PF_SK_WIRE]->port[src];
734	nat1->postnat_dest_ip = nat2->postnat_src_ip = st->key[PF_SK_WIRE]->addr[dst].v4.s_addr;
735	nat1->postnat_dest_port = nat2->postnat_src_port = st->key[PF_SK_WIRE]->port[dst];
736	nat1->protocol = nat2->protocol = sk->proto;
737
738	/*
739	 * Because we have to generate a create and delete event we'll fill out the
740	 * timestamp and nat_event fields when we transmit. As opposed to doing this
741	 * work a second time.
742	*/
743}
744
745static void
746export_pflow(const struct pf_kstate *st)
747{
748	struct pflow_softc	*sc = NULL;
749	struct pf_state_key	*sk;
750
751	NET_EPOCH_ASSERT();
752
753	/* e.g. if pf_state_key_attach() fails. */
754	if (st->key[PF_SK_STACK] == NULL || st->key[PF_SK_WIRE] == NULL)
755		return;
756
757	sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
758
759	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
760		PFLOW_LOCK(sc);
761		switch (sc->sc_version) {
762		case PFLOW_PROTO_5:
763			if (sk->af == AF_INET)
764				export_pflow_if(st, sk, sc);
765			break;
766		case PFLOW_PROTO_10:
767			if (sk->af == AF_INET || sk->af == AF_INET6)
768				export_pflow_if(st, sk, sc);
769			break;
770		default: /* NOTREACHED */
771			break;
772		}
773		PFLOW_UNLOCK(sc);
774	}
775}
776
777static int
778export_pflow_if(const struct pf_kstate *st, struct pf_state_key *sk,
779    struct pflow_softc *sc)
780{
781	struct pf_kstate	 pfs_copy;
782	u_int64_t		 bytes[2];
783	int			 ret = 0;
784
785	if (sc->sc_version == PFLOW_PROTO_10)
786		return (pflow_pack_flow_ipfix(st, sk, sc));
787
788	/* PFLOW_PROTO_5 */
789	if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
790	    && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
791		return (pflow_pack_flow(st, sk, sc));
792
793	/* flow > PFLOW_MAXBYTES need special handling */
794	bcopy(st, &pfs_copy, sizeof(pfs_copy));
795	bytes[0] = pfs_copy.bytes[0];
796	bytes[1] = pfs_copy.bytes[1];
797
798	while (bytes[0] > PFLOW_MAXBYTES) {
799		pfs_copy.bytes[0] = PFLOW_MAXBYTES;
800		pfs_copy.bytes[1] = 0;
801
802		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
803			return (ret);
804		if ((bytes[0] - PFLOW_MAXBYTES) > 0)
805			bytes[0] -= PFLOW_MAXBYTES;
806	}
807
808	while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
809		pfs_copy.bytes[1] = PFLOW_MAXBYTES;
810		pfs_copy.bytes[0] = 0;
811
812		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
813			return (ret);
814		if ((bytes[1] - PFLOW_MAXBYTES) > 0)
815			bytes[1] -= PFLOW_MAXBYTES;
816	}
817
818	pfs_copy.bytes[0] = bytes[0];
819	pfs_copy.bytes[1] = bytes[1];
820
821	return (pflow_pack_flow(&pfs_copy, sk, sc));
822}
823
824static int
825copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc)
826{
827	int		ret = 0;
828
829	PFLOW_ASSERT(sc);
830
831	if (sc->sc_mbuf == NULL) {
832		if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
833			return (ENOBUFS);
834	}
835	m_copyback(sc->sc_mbuf, PFLOW_HDRLEN +
836	    (sc->sc_count * sizeof(struct pflow_flow)),
837	    sizeof(struct pflow_flow), (caddr_t)flow);
838
839	pflowstat_inc(pflow_flows);
840	sc->sc_gcounter++;
841	sc->sc_count++;
842
843	if (sc->sc_count >= sc->sc_maxcount)
844		ret = pflow_sendout_v5(sc);
845
846	return(ret);
847}
848
849static int
850copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc)
851{
852	int		ret = 0;
853
854	PFLOW_ASSERT(sc);
855
856	if (sc->sc_mbuf == NULL) {
857		if ((sc->sc_mbuf =
858		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
859			return (ENOBUFS);
860		}
861		sc->sc_count4 = 0;
862		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
863		    pflow_timeout, sc);
864	}
865	m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN +
866	    (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)),
867	    sizeof(struct pflow_ipfix_flow4), (caddr_t)flow);
868
869	pflowstat_inc(pflow_flows);
870	sc->sc_gcounter++;
871	sc->sc_count4++;
872
873	if (sc->sc_count4 >= sc->sc_maxcount4)
874		ret = pflow_sendout_ipfix(sc, PFLOW_INET);
875	return(ret);
876}
877
878static int
879copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc)
880{
881	int		ret = 0;
882
883	PFLOW_ASSERT(sc);
884
885	if (sc->sc_mbuf6 == NULL) {
886		if ((sc->sc_mbuf6 =
887		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
888			return (ENOBUFS);
889		}
890		sc->sc_count6 = 0;
891		callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz,
892		    pflow_timeout6, sc);
893	}
894	m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN +
895	    (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)),
896	    sizeof(struct pflow_ipfix_flow6), (caddr_t)flow);
897
898	pflowstat_inc(pflow_flows);
899	sc->sc_gcounter++;
900	sc->sc_count6++;
901
902	if (sc->sc_count6 >= sc->sc_maxcount6)
903		ret = pflow_sendout_ipfix(sc, PFLOW_INET6);
904
905	return(ret);
906}
907
908int
909copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *nat, const struct pf_kstate *st,
910    struct pflow_softc *sc, uint8_t event, uint64_t timestamp)
911{
912	int		ret = 0;
913
914	PFLOW_ASSERT(sc);
915
916	if (sc->sc_mbuf_nat4 == NULL) {
917		if ((sc->sc_mbuf_nat4 =
918		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_NAT44_ID)) == NULL) {
919			return (ENOBUFS);
920		}
921		sc->sc_count_nat4 = 0;
922		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
923		    pflow_timeout_nat4, sc);
924	}
925
926	nat->nat_event = event;
927	nat->timestamp = htobe64(pf_get_time() - (pf_get_uptime() - timestamp));
928	m_copyback(sc->sc_mbuf_nat4, PFLOW_SET_HDRLEN +
929	    (sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4)),
930	    sizeof(struct pflow_ipfix_nat4), (caddr_t)nat);
931	sc->sc_count_nat4++;
932
933	pflowstat_inc(pflow_flows);
934	sc->sc_gcounter++;
935
936	if (sc->sc_count_nat4 >= sc->sc_maxcount_nat4)
937		ret = pflow_sendout_ipfix(sc, PFLOW_NAT4);
938
939	return (ret);
940}
941
942static int
943pflow_pack_flow(const struct pf_kstate *st, struct pf_state_key *sk,
944    struct pflow_softc *sc)
945{
946	struct pflow_flow	 flow1;
947	struct pflow_flow	 flow2;
948	int			 ret = 0;
949
950	bzero(&flow1, sizeof(flow1));
951	bzero(&flow2, sizeof(flow2));
952
953	if (st->direction == PF_OUT)
954		copy_flow_data(&flow1, &flow2, st, sk, 1, 0);
955	else
956		copy_flow_data(&flow1, &flow2, st, sk, 0, 1);
957
958	if (st->bytes[0] != 0) /* first flow from state */
959		ret = copy_flow_to_m(&flow1, sc);
960
961	if (st->bytes[1] != 0) /* second flow from state */
962		ret = copy_flow_to_m(&flow2, sc);
963
964	return (ret);
965}
966
967static bool
968pflow_is_natd(const struct pf_kstate *st)
969{
970	/* If ports or addresses are different we've been NAT-ed. */
971	return (memcmp(st->key[PF_SK_WIRE], st->key[PF_SK_STACK],
972	    sizeof(struct pf_addr) * 2 + sizeof(uint16_t) * 2) != 0);
973}
974
975static int
976pflow_pack_flow_ipfix(const struct pf_kstate *st, struct pf_state_key *sk,
977    struct pflow_softc *sc)
978{
979	struct pflow_ipfix_flow4	 flow4_1, flow4_2;
980	struct pflow_ipfix_nat4		 nat4_1, nat4_2;
981	struct pflow_ipfix_flow6	 flow6_1, flow6_2;
982	int				 ret = 0;
983	bool				 nat = false;
984
985	if (sk->af == AF_INET) {
986		bzero(&flow4_1, sizeof(flow4_1));
987		bzero(&flow4_2, sizeof(flow4_2));
988
989		nat = pflow_is_natd(st);
990
991		if (st->direction == PF_OUT)
992			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
993			    1, 0);
994		else
995			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
996			    0, 1);
997
998		if (nat)
999			copy_nat_ipfix_4_data(&nat4_1, &nat4_2, st, sk, sc, 1, 0);
1000
1001		if (st->bytes[0] != 0) /* first flow from state */ {
1002			ret = copy_flow_ipfix_4_to_m(&flow4_1, sc);
1003
1004			if (ret == 0 && nat) {
1005				ret = copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1006				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1007				ret |= copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1008				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1009			}
1010		}
1011
1012		if (st->bytes[1] != 0) /* second flow from state */ {
1013			ret = copy_flow_ipfix_4_to_m(&flow4_2, sc);
1014
1015			if (ret == 0 && nat) {
1016				ret = copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1017				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1018				ret |= copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1019				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1020			}
1021		}
1022	} else if (sk->af == AF_INET6) {
1023		bzero(&flow6_1, sizeof(flow6_1));
1024		bzero(&flow6_2, sizeof(flow6_2));
1025
1026		if (st->direction == PF_OUT)
1027			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1028			    1, 0);
1029		else
1030			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1031			    0, 1);
1032
1033		if (st->bytes[0] != 0) /* first flow from state */
1034			ret = copy_flow_ipfix_6_to_m(&flow6_1, sc);
1035
1036		if (st->bytes[1] != 0) /* second flow from state */
1037			ret = copy_flow_ipfix_6_to_m(&flow6_2, sc);
1038	}
1039	return (ret);
1040}
1041
1042static void
1043pflow_timeout(void *v)
1044{
1045	struct pflow_softc	*sc = v;
1046
1047	PFLOW_ASSERT(sc);
1048	CURVNET_SET(sc->sc_vnet);
1049
1050	switch (sc->sc_version) {
1051	case PFLOW_PROTO_5:
1052		pflow_sendout_v5(sc);
1053		break;
1054	case PFLOW_PROTO_10:
1055		pflow_sendout_ipfix(sc, PFLOW_INET);
1056		break;
1057	default: /* NOTREACHED */
1058		panic("Unsupported version %d", sc->sc_version);
1059		break;
1060	}
1061
1062	CURVNET_RESTORE();
1063}
1064
1065static void
1066pflow_timeout6(void *v)
1067{
1068	struct pflow_softc	*sc = v;
1069
1070	PFLOW_ASSERT(sc);
1071
1072	if (sc->sc_version != PFLOW_PROTO_10)
1073		return;
1074
1075	CURVNET_SET(sc->sc_vnet);
1076	pflow_sendout_ipfix(sc, PFLOW_INET6);
1077	CURVNET_RESTORE();
1078}
1079
1080static void
1081pflow_timeout_tmpl(void *v)
1082{
1083	struct pflow_softc	*sc = v;
1084
1085	PFLOW_ASSERT(sc);
1086
1087	if (sc->sc_version != PFLOW_PROTO_10)
1088		return;
1089
1090	CURVNET_SET(sc->sc_vnet);
1091	pflow_sendout_ipfix_tmpl(sc);
1092	CURVNET_RESTORE();
1093}
1094
1095static void
1096pflow_timeout_nat4(void *v)
1097{
1098	struct pflow_softc	*sc = v;
1099
1100	PFLOW_ASSERT(sc);
1101
1102	if (sc->sc_version != PFLOW_PROTO_10)
1103		return;
1104
1105	CURVNET_SET(sc->sc_vnet);
1106	pflow_sendout_ipfix(sc, PFLOW_NAT4);
1107	CURVNET_RESTORE();
1108}
1109
1110static void
1111pflow_flush(struct pflow_softc *sc)
1112{
1113	PFLOW_ASSERT(sc);
1114
1115	switch (sc->sc_version) {
1116	case PFLOW_PROTO_5:
1117		pflow_sendout_v5(sc);
1118		break;
1119	case PFLOW_PROTO_10:
1120		pflow_sendout_ipfix(sc, PFLOW_INET);
1121		pflow_sendout_ipfix(sc, PFLOW_INET6);
1122		pflow_sendout_ipfix(sc, PFLOW_NAT4);
1123		break;
1124	default: /* NOTREACHED */
1125		break;
1126	}
1127}
1128
1129static int
1130pflow_sendout_v5(struct pflow_softc *sc)
1131{
1132	struct mbuf		*m = sc->sc_mbuf;
1133	struct pflow_header	*h;
1134	struct timespec		tv;
1135
1136	PFLOW_ASSERT(sc);
1137
1138	if (m == NULL)
1139		return (0);
1140
1141	sc->sc_mbuf = NULL;
1142
1143	pflowstat_inc(pflow_packets);
1144	h = mtod(m, struct pflow_header *);
1145	h->count = htons(sc->sc_count);
1146
1147	/* populate pflow_header */
1148	h->uptime_ms = htonl(time_uptime * 1000);
1149
1150	getnanotime(&tv);
1151	h->time_sec = htonl(tv.tv_sec);			/* XXX 2038 */
1152	h->time_nanosec = htonl(tv.tv_nsec);
1153	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1154		swi_sched(sc->sc_swi_cookie, 0);
1155
1156	return (0);
1157}
1158
1159static int
1160pflow_sendout_ipfix(struct pflow_softc *sc, enum pflow_family_t af)
1161{
1162	struct mbuf			*m;
1163	struct pflow_v10_header		*h10;
1164	struct pflow_set_header		*set_hdr;
1165	u_int32_t			 count;
1166	int				 set_length;
1167
1168	PFLOW_ASSERT(sc);
1169
1170	switch (af) {
1171	case PFLOW_INET:
1172		m = sc->sc_mbuf;
1173		callout_stop(&sc->sc_tmo);
1174		if (m == NULL)
1175			return (0);
1176		sc->sc_mbuf = NULL;
1177		count = sc->sc_count4;
1178		set_length = sizeof(struct pflow_set_header)
1179		    + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4);
1180		break;
1181	case PFLOW_INET6:
1182		m = sc->sc_mbuf6;
1183		callout_stop(&sc->sc_tmo6);
1184		if (m == NULL)
1185			return (0);
1186		sc->sc_mbuf6 = NULL;
1187		count = sc->sc_count6;
1188		set_length = sizeof(struct pflow_set_header)
1189		    + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6);
1190		break;
1191	case PFLOW_NAT4:
1192		m = sc->sc_mbuf_nat4;
1193		callout_stop(&sc->sc_tmo_nat4);
1194		if (m == NULL)
1195			return (0);
1196		sc->sc_mbuf_nat4 = NULL;
1197		count = sc->sc_count_nat4;
1198		set_length = sizeof(struct pflow_set_header)
1199		    + sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4);
1200		break;
1201	default:
1202		panic("Unsupported AF %d", af);
1203	}
1204
1205	pflowstat_inc(pflow_packets);
1206
1207	set_hdr = mtod(m, struct pflow_set_header *);
1208	set_hdr->set_length = htons(set_length);
1209
1210	/* populate pflow_header */
1211	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1212	if (m == NULL) {
1213		pflowstat_inc(pflow_onomem);
1214		return (ENOBUFS);
1215	}
1216	h10 = mtod(m, struct pflow_v10_header *);
1217	h10->version = htons(PFLOW_PROTO_10);
1218	h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length);
1219	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1220	h10->flow_sequence = htonl(sc->sc_sequence);
1221	sc->sc_sequence += count;
1222	h10->observation_dom = htonl(sc->sc_observation_dom);
1223	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1224		swi_sched(sc->sc_swi_cookie, 0);
1225
1226	return (0);
1227}
1228
1229static int
1230pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
1231{
1232	struct mbuf			*m;
1233	struct pflow_v10_header		*h10;
1234
1235	PFLOW_ASSERT(sc);
1236
1237	m = pflow_get_mbuf(sc, 0);
1238	if (m == NULL)
1239		return (0);
1240	m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
1241	    (caddr_t)&sc->sc_tmpl_ipfix);
1242
1243	pflowstat_inc(pflow_packets);
1244
1245	/* populate pflow_header */
1246	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1247	if (m == NULL) {
1248		pflowstat_inc(pflow_onomem);
1249		return (ENOBUFS);
1250	}
1251	h10 = mtod(m, struct pflow_v10_header *);
1252	h10->version = htons(PFLOW_PROTO_10);
1253	h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct
1254	    pflow_ipfix_tmpl));
1255	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1256	h10->flow_sequence = htonl(sc->sc_sequence);
1257	h10->observation_dom = htonl(sc->sc_observation_dom);
1258
1259	callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1260	    pflow_timeout_tmpl, sc);
1261	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1262		swi_sched(sc->sc_swi_cookie, 0);
1263
1264	return (0);
1265}
1266
1267static int
1268pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
1269{
1270	if (sc->so == NULL) {
1271		m_freem(m);
1272		return (EINVAL);
1273	}
1274	return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread));
1275}
1276
1277static int
1278sysctl_pflowstats(SYSCTL_HANDLER_ARGS)
1279{
1280	struct pflowstats pflowstats;
1281
1282	pflowstats.pflow_flows =
1283	    counter_u64_fetch(V_pflowstats.c[pflow_flows]);
1284	pflowstats.pflow_packets =
1285	    counter_u64_fetch(V_pflowstats.c[pflow_packets]);
1286	pflowstats.pflow_onomem =
1287	    counter_u64_fetch(V_pflowstats.c[pflow_onomem]);
1288	pflowstats.pflow_oerrors =
1289	    counter_u64_fetch(V_pflowstats.c[pflow_oerrors]);
1290
1291	return (sysctl_handle_opaque(oidp, &pflowstats, sizeof(pflowstats), req));
1292}
1293
1294static int
1295pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt)
1296{
1297	struct epoch_tracker	 et;
1298	struct pflow_softc	*sc = NULL;
1299	struct nl_writer	 *nw = npt->nw;
1300	int			 error = 0;
1301
1302	hdr->nlmsg_flags |= NLM_F_MULTI;
1303
1304	NET_EPOCH_ENTER(et);
1305	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1306		if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1307			error = ENOMEM;
1308			goto out;
1309		}
1310
1311		struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1312		ghdr_new->cmd = PFLOWNL_CMD_LIST;
1313		ghdr_new->version = 0;
1314		ghdr_new->reserved = 0;
1315
1316		nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id);
1317
1318		if (! nlmsg_end(nw)) {
1319			error = ENOMEM;
1320			goto out;
1321		}
1322	}
1323
1324out:
1325	NET_EPOCH_EXIT(et);
1326
1327	if (error != 0)
1328		nlmsg_abort(nw);
1329
1330	return (error);
1331}
1332
1333static int
1334pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt)
1335{
1336	struct nl_writer	 *nw = npt->nw;
1337	int			 error = 0;
1338	int			 unit;
1339
1340	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1341		return (ENOMEM);
1342	}
1343
1344	struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1345	ghdr_new->cmd = PFLOWNL_CMD_CREATE;
1346	ghdr_new->version = 0;
1347	ghdr_new->reserved = 0;
1348
1349	unit = alloc_unr(V_pflow_unr);
1350	if (unit == -1) {
1351		nlmsg_abort(nw);
1352		return (ENOMEM);
1353	}
1354
1355	error = pflow_create(unit);
1356	if (error != 0) {
1357		free_unr(V_pflow_unr, unit);
1358		nlmsg_abort(nw);
1359		return (error);
1360	}
1361
1362	nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit);
1363
1364	if (! nlmsg_end(nw)) {
1365		pflow_destroy(unit, true);
1366		return (ENOMEM);
1367	}
1368
1369	return (0);
1370}
1371
1372struct pflow_parsed_del {
1373	int id;
1374};
1375#define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1376#define	_OUT(_field)	offsetof(struct pflow_parsed_del, _field)
1377static const struct nlattr_parser nla_p_del[] = {
1378	{ .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1379};
1380static const struct nlfield_parser nlf_p_del[] = {};
1381#undef _IN
1382#undef _OUT
1383NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del);
1384
1385static int
1386pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt)
1387{
1388	struct pflow_parsed_del d = {};
1389	int error;
1390
1391	error = nl_parse_nlmsg(hdr, &del_parser, npt, &d);
1392	if (error != 0)
1393		return (error);
1394
1395	error = pflow_destroy(d.id, true);
1396
1397	return (error);
1398}
1399
1400struct pflow_parsed_get {
1401	int id;
1402};
1403#define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1404#define	_OUT(_field)	offsetof(struct pflow_parsed_get, _field)
1405static const struct nlattr_parser nla_p_get[] = {
1406	{ .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1407};
1408static const struct nlfield_parser nlf_p_get[] = {};
1409#undef _IN
1410#undef _OUT
1411NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get);
1412
1413static bool
1414nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s)
1415{
1416	int off = nlattr_add_nested(nw, attr);
1417	if (off == 0)
1418		return (false);
1419
1420	nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family);
1421
1422	switch (s->sa_family) {
1423	case AF_INET: {
1424		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
1425		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port);
1426		nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr);
1427		break;
1428	}
1429	case AF_INET6: {
1430		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
1431		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port);
1432		nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr);
1433		break;
1434	}
1435	default:
1436		panic("Unknown address family %d", s->sa_family);
1437	}
1438
1439	nlattr_set_len(nw, off);
1440	return (true);
1441}
1442
1443static int
1444pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
1445{
1446	struct epoch_tracker et;
1447	struct pflow_parsed_get g = {};
1448	struct pflow_softc *sc = NULL;
1449	struct nl_writer *nw = npt->nw;
1450	struct genlmsghdr *ghdr_new;
1451	int error;
1452
1453	error = nl_parse_nlmsg(hdr, &get_parser, npt, &g);
1454	if (error != 0)
1455		return (error);
1456
1457	NET_EPOCH_ENTER(et);
1458	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1459		if (sc->sc_id == g.id)
1460			break;
1461	}
1462	if (sc == NULL) {
1463		error = ENOENT;
1464		goto out;
1465	}
1466
1467	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1468		nlmsg_abort(nw);
1469		error = ENOMEM;
1470		goto out;
1471	}
1472
1473	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1474	if (ghdr_new == NULL) {
1475		nlmsg_abort(nw);
1476		error = ENOMEM;
1477		goto out;
1478	}
1479
1480	ghdr_new->cmd = PFLOWNL_CMD_GET;
1481	ghdr_new->version = 0;
1482	ghdr_new->reserved = 0;
1483
1484	nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id);
1485	nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version);
1486	if (sc->sc_flowsrc)
1487		nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc);
1488	if (sc->sc_flowdst)
1489		nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst);
1490	nlattr_add_u32(nw, PFLOWNL_GET_OBSERVATION_DOMAIN,
1491	    sc->sc_observation_dom);
1492	nlattr_add_u8(nw, PFLOWNL_GET_SOCKET_STATUS, sc->so != NULL);
1493
1494	if (! nlmsg_end(nw)) {
1495		nlmsg_abort(nw);
1496		error = ENOMEM;
1497	}
1498
1499out:
1500	NET_EPOCH_EXIT(et);
1501
1502	return (error);
1503}
1504
1505struct pflow_sockaddr {
1506	union {
1507		struct sockaddr_in in;
1508		struct sockaddr_in6 in6;
1509		struct sockaddr_storage storage;
1510	};
1511};
1512static bool
1513pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused)
1514{
1515	struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args;
1516
1517	if (s->storage.ss_family == AF_INET)
1518		s->storage.ss_len = sizeof(struct sockaddr_in);
1519	else if (s->storage.ss_family == AF_INET6)
1520		s->storage.ss_len = sizeof(struct sockaddr_in6);
1521	else
1522		return (false);
1523
1524	return (true);
1525}
1526
1527#define	_OUT(_field)	offsetof(struct pflow_sockaddr, _field)
1528static struct nlattr_parser nla_p_sockaddr[] = {
1529	{ .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 },
1530	{ .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 },
1531	{ .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr },
1532	{ .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr },
1533};
1534NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr);
1535#undef _OUT
1536
1537struct pflow_parsed_set {
1538	int id;
1539	uint16_t version;
1540	struct sockaddr_storage src;
1541	struct sockaddr_storage dst;
1542	uint32_t observation_dom;
1543};
1544#define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1545#define	_OUT(_field)	offsetof(struct pflow_parsed_set, _field)
1546static const struct nlattr_parser nla_p_set[] = {
1547	{ .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1548	{ .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 },
1549	{ .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested },
1550	{ .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested },
1551	{ .type = PFLOWNL_SET_OBSERVATION_DOMAIN, .off = _OUT(observation_dom), .cb = nlattr_get_uint32 },
1552};
1553static const struct nlfield_parser nlf_p_set[] = {};
1554#undef _IN
1555#undef _OUT
1556NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
1557
1558static int
1559pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred)
1560{
1561	struct thread		*td;
1562	struct socket		*so;
1563	int			 error = 0;
1564
1565	td = curthread;
1566
1567	PFLOW_ASSERT(sc);
1568
1569	if (pflowr->version != 0) {
1570		switch(pflowr->version) {
1571		case PFLOW_PROTO_5:
1572		case PFLOW_PROTO_10:
1573			break;
1574		default:
1575			return(EINVAL);
1576		}
1577	}
1578
1579	pflow_flush(sc);
1580
1581	if (pflowr->dst.ss_len != 0) {
1582		if (sc->sc_flowdst != NULL &&
1583		    sc->sc_flowdst->sa_family != pflowr->dst.ss_family) {
1584			free(sc->sc_flowdst, M_DEVBUF);
1585			sc->sc_flowdst = NULL;
1586			if (sc->so != NULL) {
1587				soclose(sc->so);
1588				sc->so = NULL;
1589			}
1590		}
1591
1592		switch (pflowr->dst.ss_family) {
1593		case AF_INET:
1594			if (sc->sc_flowdst == NULL) {
1595				if ((sc->sc_flowdst = malloc(
1596				    sizeof(struct sockaddr_in),
1597				    M_DEVBUF,  M_NOWAIT)) == NULL)
1598					return (ENOMEM);
1599			}
1600			memcpy(sc->sc_flowdst, &pflowr->dst,
1601			    sizeof(struct sockaddr_in));
1602			sc->sc_flowdst->sa_len = sizeof(struct
1603			    sockaddr_in);
1604			break;
1605		case AF_INET6:
1606			if (sc->sc_flowdst == NULL) {
1607				if ((sc->sc_flowdst = malloc(
1608				    sizeof(struct sockaddr_in6),
1609				    M_DEVBUF, M_NOWAIT)) == NULL)
1610					return (ENOMEM);
1611			}
1612			memcpy(sc->sc_flowdst, &pflowr->dst,
1613			    sizeof(struct sockaddr_in6));
1614			sc->sc_flowdst->sa_len = sizeof(struct
1615			    sockaddr_in6);
1616			break;
1617		default:
1618			break;
1619		}
1620	}
1621
1622	if (pflowr->src.ss_len != 0) {
1623		if (sc->sc_flowsrc != NULL)
1624			free(sc->sc_flowsrc, M_DEVBUF);
1625		sc->sc_flowsrc = NULL;
1626		if (sc->so != NULL) {
1627			soclose(sc->so);
1628			sc->so = NULL;
1629		}
1630		switch(pflowr->src.ss_family) {
1631		case AF_INET:
1632			if ((sc->sc_flowsrc = malloc(
1633			    sizeof(struct sockaddr_in),
1634			    M_DEVBUF, M_NOWAIT)) == NULL)
1635				return (ENOMEM);
1636			memcpy(sc->sc_flowsrc, &pflowr->src,
1637			    sizeof(struct sockaddr_in));
1638			sc->sc_flowsrc->sa_len = sizeof(struct
1639			    sockaddr_in);
1640			break;
1641		case AF_INET6:
1642			if ((sc->sc_flowsrc = malloc(
1643			    sizeof(struct sockaddr_in6),
1644			    M_DEVBUF, M_NOWAIT)) == NULL)
1645				return (ENOMEM);
1646			memcpy(sc->sc_flowsrc, &pflowr->src,
1647			    sizeof(struct sockaddr_in6));
1648			sc->sc_flowsrc->sa_len = sizeof(struct
1649			    sockaddr_in6);
1650			break;
1651		default:
1652			break;
1653		}
1654	}
1655
1656	if (sc->so == NULL) {
1657		if (pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1658			error = socreate(sc->sc_flowdst->sa_family,
1659			    &so, SOCK_DGRAM, IPPROTO_UDP, cred, td);
1660			if (error)
1661				return (error);
1662			if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) {
1663				error = sobind(so, sc->sc_flowsrc, td);
1664				if (error) {
1665					soclose(so);
1666					return (error);
1667				}
1668			}
1669			sc->so = so;
1670		}
1671	} else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1672		soclose(sc->so);
1673		sc->so = NULL;
1674	}
1675
1676	if (pflowr->observation_dom != 0)
1677		sc->sc_observation_dom = pflowr->observation_dom;
1678
1679	/* error check is above */
1680	if (pflowr->version != 0)
1681		sc->sc_version = pflowr->version;
1682
1683	pflow_setmtu(sc, ETHERMTU);
1684
1685	switch (sc->sc_version) {
1686	case PFLOW_PROTO_5:
1687		callout_stop(&sc->sc_tmo6);
1688		callout_stop(&sc->sc_tmo_tmpl);
1689		break;
1690	case PFLOW_PROTO_10:
1691		callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1692		    pflow_timeout_tmpl, sc);
1693		break;
1694	default: /* NOTREACHED */
1695		break;
1696	}
1697
1698	return (0);
1699}
1700
1701static int
1702pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
1703{
1704	struct epoch_tracker et;
1705	struct pflow_parsed_set s = {};
1706	struct pflow_softc *sc = NULL;
1707	int error;
1708
1709	error = nl_parse_nlmsg(hdr, &set_parser, npt, &s);
1710	if (error != 0)
1711		return (error);
1712
1713	NET_EPOCH_ENTER(et);
1714	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1715		if (sc->sc_id == s.id)
1716			break;
1717	}
1718	if (sc == NULL) {
1719		error = ENOENT;
1720		goto out;
1721	}
1722
1723	PFLOW_LOCK(sc);
1724	error = pflow_set(sc, &s, nlp_get_cred(npt->nlp));
1725	PFLOW_UNLOCK(sc);
1726
1727out:
1728	NET_EPOCH_EXIT(et);
1729	return (error);
1730}
1731
1732static const struct genl_cmd pflow_cmds[] = {
1733	{
1734		.cmd_num = PFLOWNL_CMD_LIST,
1735		.cmd_name = "LIST",
1736		.cmd_cb = pflow_nl_list,
1737		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1738		.cmd_priv = PRIV_NETINET_PF,
1739	},
1740	{
1741		.cmd_num = PFLOWNL_CMD_CREATE,
1742		.cmd_name = "CREATE",
1743		.cmd_cb = pflow_nl_create,
1744		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1745		.cmd_priv = PRIV_NETINET_PF,
1746	},
1747	{
1748		.cmd_num = PFLOWNL_CMD_DEL,
1749		.cmd_name = "DEL",
1750		.cmd_cb = pflow_nl_del,
1751		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1752		.cmd_priv = PRIV_NETINET_PF,
1753	},
1754	{
1755		.cmd_num = PFLOWNL_CMD_GET,
1756		.cmd_name = "GET",
1757		.cmd_cb = pflow_nl_get,
1758		.cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1759		.cmd_priv = PRIV_NETINET_PF,
1760	},
1761	{
1762		.cmd_num = PFLOWNL_CMD_SET,
1763		.cmd_name = "SET",
1764		.cmd_cb = pflow_nl_set,
1765		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1766		.cmd_priv = PRIV_NETINET_PF,
1767	},
1768};
1769
1770static const struct nlhdr_parser *all_parsers[] = {
1771	&del_parser,
1772	&get_parser,
1773	&set_parser,
1774};
1775
1776static int
1777pflow_init(void)
1778{
1779	bool ret;
1780	int family_id __diagused;
1781
1782	NL_VERIFY_PARSERS(all_parsers);
1783
1784	family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX);
1785	MPASS(family_id != 0);
1786	ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, NL_ARRAY_LEN(pflow_cmds));
1787
1788	return (ret ? 0 : ENODEV);
1789}
1790
1791static void
1792pflow_uninit(void)
1793{
1794	genl_unregister_family(PFLOWNL_FAMILY_NAME);
1795}
1796
1797static int
1798pflow_modevent(module_t mod, int type, void *data)
1799{
1800	int error = 0;
1801
1802	switch (type) {
1803	case MOD_LOAD:
1804		error = pflow_init();
1805		break;
1806	case MOD_UNLOAD:
1807		pflow_uninit();
1808		break;
1809	default:
1810		error = EINVAL;
1811		break;
1812	}
1813
1814	return (error);
1815}
1816
1817static moduledata_t pflow_mod = {
1818	pflowname,
1819	pflow_modevent,
1820	0
1821};
1822
1823DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
1824MODULE_VERSION(pflow, 1);
1825MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1826