pf.c revision 230868
1/*	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#ifdef __FreeBSD__
39#include "opt_inet.h"
40#include "opt_inet6.h"
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: stable/9/sys/contrib/pf/net/pf.c 230868 2012-02-01 15:57:49Z glebius $");
44#endif
45
46#ifdef __FreeBSD__
47#include "opt_bpf.h"
48#include "opt_pf.h"
49
50#define	NPFSYNC		1
51
52#ifdef DEV_PFLOW
53#define	NPFLOW		DEV_PFLOW
54#else
55#define	NPFLOW		0
56#endif
57
58#else
59#include "bpfilter.h"
60#include "pflog.h"
61#include "pfsync.h"
62#include "pflow.h"
63#endif
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/mbuf.h>
68#include <sys/filio.h>
69#include <sys/socket.h>
70#include <sys/socketvar.h>
71#include <sys/kernel.h>
72#include <sys/time.h>
73#ifdef __FreeBSD__
74#include <sys/random.h>
75#include <sys/sysctl.h>
76#include <sys/endian.h>
77#define	betoh64		be64toh
78#else
79#include <sys/pool.h>
80#endif
81#include <sys/proc.h>
82#ifdef __FreeBSD__
83#include <sys/kthread.h>
84#include <sys/lock.h>
85#include <sys/sx.h>
86#else
87#include <sys/rwlock.h>
88#endif
89
90#ifdef __FreeBSD__
91#include <sys/md5.h>
92#else
93#include <crypto/md5.h>
94#endif
95
96#include <net/if.h>
97#include <net/if_types.h>
98#include <net/bpf.h>
99#include <net/route.h>
100#ifdef __FreeBSD__
101#ifdef RADIX_MPATH
102#include <net/radix_mpath.h>
103#endif
104#else
105#include <net/radix_mpath.h>
106#endif
107
108#include <netinet/in.h>
109#include <netinet/in_var.h>
110#include <netinet/in_systm.h>
111#include <netinet/ip.h>
112#include <netinet/ip_var.h>
113#include <netinet/tcp.h>
114#include <netinet/tcp_seq.h>
115#include <netinet/udp.h>
116#include <netinet/ip_icmp.h>
117#include <netinet/in_pcb.h>
118#include <netinet/tcp_timer.h>
119#include <netinet/tcp_var.h>
120#include <netinet/udp_var.h>
121#include <netinet/icmp_var.h>
122#include <netinet/if_ether.h>
123#ifdef __FreeBSD__
124#include <netinet/ip_fw.h>
125#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
126#endif
127
128#ifndef __FreeBSD__
129#include <dev/rndvar.h>
130#endif
131#include <net/pfvar.h>
132#include <net/if_pflog.h>
133#include <net/if_pflow.h>
134#include <net/if_pfsync.h>
135
136#ifdef INET6
137#include <netinet/ip6.h>
138#include <netinet/in_pcb.h>
139#include <netinet/icmp6.h>
140#include <netinet6/nd6.h>
141#ifdef __FreeBSD__
142#include <netinet6/ip6_var.h>
143#include <netinet6/in6_pcb.h>
144#endif
145#endif /* INET6 */
146
147#ifdef __FreeBSD__
148#include <machine/in_cksum.h>
149#include <sys/limits.h>
150#include <sys/ucred.h>
151#include <security/mac/mac_framework.h>
152
153extern int ip_optcopy(struct ip *, struct ip *);
154#endif
155
156#ifdef __FreeBSD__
157#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
158#else
159#define	DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
160#endif
161
162/*
163 * Global variables
164 */
165
166/* state tables */
167#ifdef __FreeBSD__
168VNET_DEFINE(struct pf_state_tree,	 pf_statetbl);
169
170VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
171VNET_DEFINE(struct pf_palist,		 pf_pabuf);
172VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
173VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
174VNET_DEFINE(struct pf_status,		 pf_status);
175
176VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
177VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
178VNET_DEFINE(int,			 altqs_inactive_open);
179VNET_DEFINE(u_int32_t,			 ticket_pabuf);
180
181VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
182#define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
183VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
184#define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
185VNET_DEFINE(int,			 pf_tcp_secret_init);
186#define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
187VNET_DEFINE(int,			 pf_tcp_iss_off);
188#define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
189
190struct pf_anchor_stackframe {
191	struct pf_ruleset		*rs;
192	struct pf_rule			*r;
193	struct pf_anchor_node		*parent;
194	struct pf_anchor		*child;
195};
196VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]);
197#define	V_pf_anchor_stack		 VNET(pf_anchor_stack)
198
199VNET_DEFINE(uma_zone_t,	 pf_src_tree_pl);
200VNET_DEFINE(uma_zone_t,	 pf_rule_pl);
201VNET_DEFINE(uma_zone_t,	 pf_pooladdr_pl);
202VNET_DEFINE(uma_zone_t,	 pf_state_pl);
203VNET_DEFINE(uma_zone_t,	 pf_state_key_pl);
204VNET_DEFINE(uma_zone_t,	 pf_state_item_pl);
205VNET_DEFINE(uma_zone_t,	 pf_altq_pl);
206#else
207struct pf_state_tree	 pf_statetbl;
208
209struct pf_altqqueue	 pf_altqs[2];
210struct pf_palist	 pf_pabuf;
211struct pf_altqqueue	*pf_altqs_active;
212struct pf_altqqueue	*pf_altqs_inactive;
213struct pf_status	 pf_status;
214
215u_int32_t		 ticket_altqs_active;
216u_int32_t		 ticket_altqs_inactive;
217int			 altqs_inactive_open;
218u_int32_t		 ticket_pabuf;
219
220MD5_CTX			 pf_tcp_secret_ctx;
221u_char			 pf_tcp_secret[16];
222int			 pf_tcp_secret_init;
223int			 pf_tcp_iss_off;
224
225struct pf_anchor_stackframe {
226	struct pf_ruleset			*rs;
227	struct pf_rule				*r;
228	struct pf_anchor_node			*parent;
229	struct pf_anchor			*child;
230} pf_anchor_stack[64];
231
232struct pool		 pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
233struct pool		 pf_state_pl, pf_state_key_pl, pf_state_item_pl;
234struct pool		 pf_altq_pl;
235#endif
236
237void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
238			    u_int32_t);
239void			 pf_add_threshold(struct pf_threshold *);
240int			 pf_check_threshold(struct pf_threshold *);
241
242void			 pf_change_ap(struct pf_addr *, u_int16_t *,
243			    u_int16_t *, u_int16_t *, struct pf_addr *,
244			    u_int16_t, u_int8_t, sa_family_t);
245int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
246			    struct tcphdr *, struct pf_state_peer *);
247#ifdef INET6
248void			 pf_change_a6(struct pf_addr *, u_int16_t *,
249			    struct pf_addr *, u_int8_t);
250#endif /* INET6 */
251void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
252			    struct pf_addr *, struct pf_addr *, u_int16_t,
253			    u_int16_t *, u_int16_t *, u_int16_t *,
254			    u_int16_t *, u_int8_t, sa_family_t);
255#ifdef __FreeBSD__
256void			 pf_send_tcp(struct mbuf *,
257			    const struct pf_rule *, sa_family_t,
258#else
259void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
260#endif
261			    const struct pf_addr *, const struct pf_addr *,
262			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
263			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
264			    u_int16_t, struct ether_header *, struct ifnet *);
265static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
266			    sa_family_t, struct pf_rule *);
267void			 pf_detach_state(struct pf_state *);
268void			 pf_state_key_detach(struct pf_state *, int);
269u_int32_t		 pf_tcp_iss(struct pf_pdesc *);
270int			 pf_test_rule(struct pf_rule **, struct pf_state **,
271			    int, struct pfi_kif *, struct mbuf *, int,
272			    void *, struct pf_pdesc *, struct pf_rule **,
273#ifdef __FreeBSD__
274			    struct pf_ruleset **, struct ifqueue *,
275			    struct inpcb *);
276#else
277			    struct pf_ruleset **, struct ifqueue *);
278#endif
279static __inline int	 pf_create_state(struct pf_rule *, struct pf_rule *,
280			    struct pf_rule *, struct pf_pdesc *,
281			    struct pf_src_node *, struct pf_state_key *,
282			    struct pf_state_key *, struct pf_state_key *,
283			    struct pf_state_key *, struct mbuf *, int,
284			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
285			    struct pf_state **, int, u_int16_t, u_int16_t,
286			    int);
287int			 pf_test_fragment(struct pf_rule **, int,
288			    struct pfi_kif *, struct mbuf *, void *,
289			    struct pf_pdesc *, struct pf_rule **,
290			    struct pf_ruleset **);
291int			 pf_tcp_track_full(struct pf_state_peer *,
292			    struct pf_state_peer *, struct pf_state **,
293			    struct pfi_kif *, struct mbuf *, int,
294			    struct pf_pdesc *, u_short *, int *);
295int			pf_tcp_track_sloppy(struct pf_state_peer *,
296			    struct pf_state_peer *, struct pf_state **,
297			    struct pf_pdesc *, u_short *);
298int			 pf_test_state_tcp(struct pf_state **, int,
299			    struct pfi_kif *, struct mbuf *, int,
300			    void *, struct pf_pdesc *, u_short *);
301int			 pf_test_state_udp(struct pf_state **, int,
302			    struct pfi_kif *, struct mbuf *, int,
303			    void *, struct pf_pdesc *);
304int			 pf_test_state_icmp(struct pf_state **, int,
305			    struct pfi_kif *, struct mbuf *, int,
306			    void *, struct pf_pdesc *, u_short *);
307int			 pf_test_state_other(struct pf_state **, int,
308			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
309void			 pf_route(struct mbuf **, struct pf_rule *, int,
310			    struct ifnet *, struct pf_state *,
311			    struct pf_pdesc *);
312void			 pf_route6(struct mbuf **, struct pf_rule *, int,
313			    struct ifnet *, struct pf_state *,
314			    struct pf_pdesc *);
315#ifndef __FreeBSD__
316int			 pf_socket_lookup(int, struct pf_pdesc *);
317#endif
318u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
319			    sa_family_t);
320u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
321			    sa_family_t);
322u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
323				u_int16_t);
324void			 pf_set_rt_ifp(struct pf_state *,
325			    struct pf_addr *);
326int			 pf_check_proto_cksum(struct mbuf *, int, int,
327			    u_int8_t, sa_family_t);
328#ifndef __FreeBSD__
329struct pf_divert	*pf_get_divert(struct mbuf *);
330#endif
331void			 pf_print_state_parts(struct pf_state *,
332			    struct pf_state_key *, struct pf_state_key *);
333int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
334			    struct pf_addr_wrap *);
335int			 pf_compare_state_keys(struct pf_state_key *,
336			    struct pf_state_key *, struct pfi_kif *, u_int);
337#ifdef __FreeBSD__
338struct pf_state		*pf_find_state(struct pfi_kif *,
339			    struct pf_state_key_cmp *, u_int, struct mbuf *,
340			    struct pf_mtag *);
341#else
342struct pf_state		*pf_find_state(struct pfi_kif *,
343			    struct pf_state_key_cmp *, u_int, struct mbuf *);
344#endif
345int			 pf_src_connlimit(struct pf_state **);
346int			 pf_check_congestion(struct ifqueue *);
347
348#ifdef __FreeBSD__
349int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
350
351VNET_DECLARE(int, pf_end_threads);
352
353VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]);
354#else
355extern struct pool pfr_ktable_pl;
356extern struct pool pfr_kentry_pl;
357
358struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
359	{ &pf_state_pl, PFSTATE_HIWAT },
360	{ &pf_src_tree_pl, PFSNODE_HIWAT },
361	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
362	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
363	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
364};
365#endif
366
367#ifdef __FreeBSD__
368#define	PPACKET_LOOPED()						\
369	(pd->pf_mtag->flags & PF_PACKET_LOOPED)
370
371#define	PACKET_LOOPED()							\
372	(pd.pf_mtag->flags & PF_PACKET_LOOPED)
373
374#define	STATE_LOOKUP(i, k, d, s, m, pt)					\
375	do {								\
376		s = pf_find_state(i, k, d, m, pt);			\
377		if (s == NULL || (s)->timeout == PFTM_PURGE)		\
378			return (PF_DROP);				\
379		if (PPACKET_LOOPED())					\
380			return (PF_PASS);				\
381		if (d == PF_OUT &&					\
382		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
383		    (s)->rule.ptr->direction == PF_OUT) ||		\
384		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
385		    (s)->rule.ptr->direction == PF_IN)) &&		\
386		    (s)->rt_kif != NULL &&				\
387		    (s)->rt_kif != i)					\
388			return (PF_PASS);				\
389	} while (0)
390#else
391#define	STATE_LOOKUP(i, k, d, s, m)					\
392	do {								\
393		s = pf_find_state(i, k, d, m);				\
394		if (s == NULL || (s)->timeout == PFTM_PURGE)		\
395			return (PF_DROP);				\
396		if (d == PF_OUT &&					\
397		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
398		    (s)->rule.ptr->direction == PF_OUT) ||		\
399		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
400		    (s)->rule.ptr->direction == PF_IN)) &&		\
401		    (s)->rt_kif != NULL &&				\
402		    (s)->rt_kif != i)					\
403			return (PF_PASS);				\
404	} while (0)
405#endif
406
407#ifdef __FreeBSD__
408#define	BOUND_IFACE(r, k) \
409	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
410#else
411#define	BOUND_IFACE(r, k) \
412	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
413#endif
414
415#define	STATE_INC_COUNTERS(s)				\
416	do {						\
417		s->rule.ptr->states_cur++;		\
418		s->rule.ptr->states_tot++;		\
419		if (s->anchor.ptr != NULL) {		\
420			s->anchor.ptr->states_cur++;	\
421			s->anchor.ptr->states_tot++;	\
422		}					\
423		if (s->nat_rule.ptr != NULL) {		\
424			s->nat_rule.ptr->states_cur++;	\
425			s->nat_rule.ptr->states_tot++;	\
426		}					\
427	} while (0)
428
429#define	STATE_DEC_COUNTERS(s)				\
430	do {						\
431		if (s->nat_rule.ptr != NULL)		\
432			s->nat_rule.ptr->states_cur--;	\
433		if (s->anchor.ptr != NULL)		\
434			s->anchor.ptr->states_cur--;	\
435		s->rule.ptr->states_cur--;		\
436	} while (0)
437
438static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
439static __inline int pf_state_compare_key(struct pf_state_key *,
440	struct pf_state_key *);
441static __inline int pf_state_compare_id(struct pf_state *,
442	struct pf_state *);
443
444#ifdef __FreeBSD__
445VNET_DEFINE(struct pf_src_tree,	 	 tree_src_tracking);
446
447VNET_DEFINE(struct pf_state_tree_id,	 tree_id);
448VNET_DEFINE(struct pf_state_queue,	 state_list);
449#else
450struct pf_src_tree tree_src_tracking;
451
452struct pf_state_tree_id tree_id;
453struct pf_state_queue state_list;
454#endif
455
456RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
457RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
458RB_GENERATE(pf_state_tree_id, pf_state,
459    entry_id, pf_state_compare_id);
460
461static __inline int
462pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
463{
464	int	diff;
465
466	if (a->rule.ptr > b->rule.ptr)
467		return (1);
468	if (a->rule.ptr < b->rule.ptr)
469		return (-1);
470	if ((diff = a->af - b->af) != 0)
471		return (diff);
472	switch (a->af) {
473#ifdef INET
474	case AF_INET:
475		if (a->addr.addr32[0] > b->addr.addr32[0])
476			return (1);
477		if (a->addr.addr32[0] < b->addr.addr32[0])
478			return (-1);
479		break;
480#endif /* INET */
481#ifdef INET6
482	case AF_INET6:
483		if (a->addr.addr32[3] > b->addr.addr32[3])
484			return (1);
485		if (a->addr.addr32[3] < b->addr.addr32[3])
486			return (-1);
487		if (a->addr.addr32[2] > b->addr.addr32[2])
488			return (1);
489		if (a->addr.addr32[2] < b->addr.addr32[2])
490			return (-1);
491		if (a->addr.addr32[1] > b->addr.addr32[1])
492			return (1);
493		if (a->addr.addr32[1] < b->addr.addr32[1])
494			return (-1);
495		if (a->addr.addr32[0] > b->addr.addr32[0])
496			return (1);
497		if (a->addr.addr32[0] < b->addr.addr32[0])
498			return (-1);
499		break;
500#endif /* INET6 */
501	}
502	return (0);
503}
504
505#ifdef INET6
506void
507pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
508{
509	switch (af) {
510#ifdef INET
511	case AF_INET:
512		dst->addr32[0] = src->addr32[0];
513		break;
514#endif /* INET */
515	case AF_INET6:
516		dst->addr32[0] = src->addr32[0];
517		dst->addr32[1] = src->addr32[1];
518		dst->addr32[2] = src->addr32[2];
519		dst->addr32[3] = src->addr32[3];
520		break;
521	}
522}
523#endif /* INET6 */
524
525void
526pf_init_threshold(struct pf_threshold *threshold,
527    u_int32_t limit, u_int32_t seconds)
528{
529	threshold->limit = limit * PF_THRESHOLD_MULT;
530	threshold->seconds = seconds;
531	threshold->count = 0;
532	threshold->last = time_second;
533}
534
535void
536pf_add_threshold(struct pf_threshold *threshold)
537{
538	u_int32_t t = time_second, diff = t - threshold->last;
539
540	if (diff >= threshold->seconds)
541		threshold->count = 0;
542	else
543		threshold->count -= threshold->count * diff /
544		    threshold->seconds;
545	threshold->count += PF_THRESHOLD_MULT;
546	threshold->last = t;
547}
548
549int
550pf_check_threshold(struct pf_threshold *threshold)
551{
552	return (threshold->count > threshold->limit);
553}
554
555int
556pf_src_connlimit(struct pf_state **state)
557{
558	int bad = 0;
559
560	(*state)->src_node->conn++;
561	(*state)->src.tcp_est = 1;
562	pf_add_threshold(&(*state)->src_node->conn_rate);
563
564	if ((*state)->rule.ptr->max_src_conn &&
565	    (*state)->rule.ptr->max_src_conn <
566	    (*state)->src_node->conn) {
567#ifdef __FreeBSD__
568		V_pf_status.lcounters[LCNT_SRCCONN]++;
569#else
570		pf_status.lcounters[LCNT_SRCCONN]++;
571#endif
572		bad++;
573	}
574
575	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
576	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
577#ifdef __FreeBSD__
578		V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
579#else
580		pf_status.lcounters[LCNT_SRCCONNRATE]++;
581#endif
582		bad++;
583	}
584
585	if (!bad)
586		return (0);
587
588	if ((*state)->rule.ptr->overload_tbl) {
589		struct pfr_addr p;
590		u_int32_t	killed = 0;
591
592#ifdef __FreeBSD__
593		V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
594		if (V_pf_status.debug >= PF_DEBUG_MISC) {
595#else
596		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
597		if (pf_status.debug >= PF_DEBUG_MISC) {
598#endif
599			printf("pf_src_connlimit: blocking address ");
600			pf_print_host(&(*state)->src_node->addr, 0,
601			    (*state)->key[PF_SK_WIRE]->af);
602		}
603
604		bzero(&p, sizeof(p));
605		p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
606		switch ((*state)->key[PF_SK_WIRE]->af) {
607#ifdef INET
608		case AF_INET:
609			p.pfra_net = 32;
610			p.pfra_ip4addr = (*state)->src_node->addr.v4;
611			break;
612#endif /* INET */
613#ifdef INET6
614		case AF_INET6:
615			p.pfra_net = 128;
616			p.pfra_ip6addr = (*state)->src_node->addr.v6;
617			break;
618#endif /* INET6 */
619		}
620
621		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
622		    &p, time_second);
623
624		/* kill existing states if that's required. */
625		if ((*state)->rule.ptr->flush) {
626			struct pf_state_key *sk;
627			struct pf_state *st;
628
629#ifdef __FreeBSD__
630			V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
631			RB_FOREACH(st, pf_state_tree_id, &V_tree_id) {
632#else
633			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
634			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
635#endif
636				sk = st->key[PF_SK_WIRE];
637				/*
638				 * Kill states from this source.  (Only those
639				 * from the same rule if PF_FLUSH_GLOBAL is not
640				 * set)
641				 */
642				if (sk->af ==
643				    (*state)->key[PF_SK_WIRE]->af &&
644				    (((*state)->direction == PF_OUT &&
645				    PF_AEQ(&(*state)->src_node->addr,
646					&sk->addr[0], sk->af)) ||
647				    ((*state)->direction == PF_IN &&
648				    PF_AEQ(&(*state)->src_node->addr,
649					&sk->addr[1], sk->af))) &&
650				    ((*state)->rule.ptr->flush &
651				    PF_FLUSH_GLOBAL ||
652				    (*state)->rule.ptr == st->rule.ptr)) {
653					st->timeout = PFTM_PURGE;
654					st->src.state = st->dst.state =
655					    TCPS_CLOSED;
656					killed++;
657				}
658			}
659#ifdef __FreeBSD__
660			if (V_pf_status.debug >= PF_DEBUG_MISC)
661#else
662			if (pf_status.debug >= PF_DEBUG_MISC)
663#endif
664				printf(", %u states killed", killed);
665		}
666#ifdef __FreeBSD__
667		if (V_pf_status.debug >= PF_DEBUG_MISC)
668#else
669		if (pf_status.debug >= PF_DEBUG_MISC)
670#endif
671			printf("\n");
672	}
673
674	/* kill this state */
675	(*state)->timeout = PFTM_PURGE;
676	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
677	return (1);
678}
679
680int
681pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
682    struct pf_addr *src, sa_family_t af)
683{
684	struct pf_src_node	k;
685
686	if (*sn == NULL) {
687		k.af = af;
688		PF_ACPY(&k.addr, src, af);
689		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
690		    rule->rpool.opts & PF_POOL_STICKYADDR)
691			k.rule.ptr = rule;
692		else
693			k.rule.ptr = NULL;
694#ifdef __FreeBSD__
695		V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
696		*sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
697#else
698		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
699		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
700#endif
701	}
702	if (*sn == NULL) {
703		if (!rule->max_src_nodes ||
704		    rule->src_nodes < rule->max_src_nodes)
705#ifdef __FreeBSD__
706			(*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
707#else
708			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
709#endif
710		else
711#ifdef __FreeBSD__
712			V_pf_status.lcounters[LCNT_SRCNODES]++;
713#else
714			pf_status.lcounters[LCNT_SRCNODES]++;
715#endif
716		if ((*sn) == NULL)
717			return (-1);
718
719		pf_init_threshold(&(*sn)->conn_rate,
720		    rule->max_src_conn_rate.limit,
721		    rule->max_src_conn_rate.seconds);
722
723		(*sn)->af = af;
724		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
725		    rule->rpool.opts & PF_POOL_STICKYADDR)
726			(*sn)->rule.ptr = rule;
727		else
728			(*sn)->rule.ptr = NULL;
729		PF_ACPY(&(*sn)->addr, src, af);
730		if (RB_INSERT(pf_src_tree,
731#ifdef __FreeBSD__
732		    &V_tree_src_tracking, *sn) != NULL) {
733			if (V_pf_status.debug >= PF_DEBUG_MISC) {
734#else
735		    &tree_src_tracking, *sn) != NULL) {
736			if (pf_status.debug >= PF_DEBUG_MISC) {
737#endif
738				printf("pf: src_tree insert failed: ");
739				pf_print_host(&(*sn)->addr, 0, af);
740				printf("\n");
741			}
742#ifdef __FreeBSD__
743			pool_put(&V_pf_src_tree_pl, *sn);
744#else
745			pool_put(&pf_src_tree_pl, *sn);
746#endif
747			return (-1);
748		}
749		(*sn)->creation = time_second;
750		(*sn)->ruletype = rule->action;
751		if ((*sn)->rule.ptr != NULL)
752			(*sn)->rule.ptr->src_nodes++;
753#ifdef __FreeBSD__
754		V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
755		V_pf_status.src_nodes++;
756#else
757		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
758		pf_status.src_nodes++;
759#endif
760	} else {
761		if (rule->max_src_states &&
762		    (*sn)->states >= rule->max_src_states) {
763#ifdef __FreeBSD__
764			V_pf_status.lcounters[LCNT_SRCSTATES]++;
765#else
766			pf_status.lcounters[LCNT_SRCSTATES]++;
767#endif
768			return (-1);
769		}
770	}
771	return (0);
772}
773
774/* state table stuff */
775
776static __inline int
777pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
778{
779	int	diff;
780
781	if ((diff = a->proto - b->proto) != 0)
782		return (diff);
783	if ((diff = a->af - b->af) != 0)
784		return (diff);
785	switch (a->af) {
786#ifdef INET
787	case AF_INET:
788		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
789			return (1);
790		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
791			return (-1);
792		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
793			return (1);
794		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
795			return (-1);
796		break;
797#endif /* INET */
798#ifdef INET6
799	case AF_INET6:
800		if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
801			return (1);
802		if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
803			return (-1);
804		if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
805			return (1);
806		if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
807			return (-1);
808		if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
809			return (1);
810		if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
811			return (-1);
812		if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
813			return (1);
814		if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
815			return (-1);
816		if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
817			return (1);
818		if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
819			return (-1);
820		if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
821			return (1);
822		if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
823			return (-1);
824		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
825			return (1);
826		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
827			return (-1);
828		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
829			return (1);
830		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
831			return (-1);
832		break;
833#endif /* INET6 */
834	}
835
836	if ((diff = a->port[0] - b->port[0]) != 0)
837		return (diff);
838	if ((diff = a->port[1] - b->port[1]) != 0)
839		return (diff);
840
841	return (0);
842}
843
844static __inline int
845pf_state_compare_id(struct pf_state *a, struct pf_state *b)
846{
847	if (a->id > b->id)
848		return (1);
849	if (a->id < b->id)
850		return (-1);
851	if (a->creatorid > b->creatorid)
852		return (1);
853	if (a->creatorid < b->creatorid)
854		return (-1);
855
856	return (0);
857}
858
859int
860pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
861{
862	struct pf_state_item	*si;
863	struct pf_state_key	*cur;
864	struct pf_state		*olds = NULL;
865
866#ifdef __FreeBSD__
867	KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__));
868#else
869	KASSERT(s->key[idx] == NULL);	/* XXX handle this? */
870#endif
871
872#ifdef __FreeBSD__
873	if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) {
874#else
875	if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
876#endif
877		/* key exists. check for same kif, if none, add to key */
878		TAILQ_FOREACH(si, &cur->states, entry)
879			if (si->s->kif == s->kif &&
880			    si->s->direction == s->direction) {
881				if (sk->proto == IPPROTO_TCP &&
882				    si->s->src.state >= TCPS_FIN_WAIT_2 &&
883				    si->s->dst.state >= TCPS_FIN_WAIT_2) {
884					si->s->src.state = si->s->dst.state =
885					    TCPS_CLOSED;
886					/* unlink late or sks can go away */
887					olds = si->s;
888				} else {
889#ifdef __FreeBSD__
890					if (V_pf_status.debug >= PF_DEBUG_MISC) {
891#else
892					if (pf_status.debug >= PF_DEBUG_MISC) {
893#endif
894						printf("pf: %s key attach "
895						    "failed on %s: ",
896						    (idx == PF_SK_WIRE) ?
897						    "wire" : "stack",
898						    s->kif->pfik_name);
899						pf_print_state_parts(s,
900						    (idx == PF_SK_WIRE) ?
901						    sk : NULL,
902						    (idx == PF_SK_STACK) ?
903						    sk : NULL);
904						printf(", existing: ");
905						pf_print_state_parts(si->s,
906						    (idx == PF_SK_WIRE) ?
907						    sk : NULL,
908						    (idx == PF_SK_STACK) ?
909						    sk : NULL);
910						printf("\n");
911					}
912#ifdef __FreeBSD__
913					pool_put(&V_pf_state_key_pl, sk);
914#else
915					pool_put(&pf_state_key_pl, sk);
916#endif
917					return (-1);	/* collision! */
918				}
919			}
920#ifdef __FreeBSD__
921		pool_put(&V_pf_state_key_pl, sk);
922#else
923		pool_put(&pf_state_key_pl, sk);
924#endif
925		s->key[idx] = cur;
926	} else
927		s->key[idx] = sk;
928
929#ifdef __FreeBSD__
930	if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) {
931#else
932	if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
933#endif
934		pf_state_key_detach(s, idx);
935		return (-1);
936	}
937	si->s = s;
938
939	/* list is sorted, if-bound states before floating */
940#ifdef __FreeBSD__
941	if (s->kif == V_pfi_all)
942#else
943	if (s->kif == pfi_all)
944#endif
945		TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
946	else
947		TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
948
949	if (olds)
950		pf_unlink_state(olds);
951
952	return (0);
953}
954
955void
956pf_detach_state(struct pf_state *s)
957{
958	if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
959		s->key[PF_SK_WIRE] = NULL;
960
961	if (s->key[PF_SK_STACK] != NULL)
962		pf_state_key_detach(s, PF_SK_STACK);
963
964	if (s->key[PF_SK_WIRE] != NULL)
965		pf_state_key_detach(s, PF_SK_WIRE);
966}
967
968void
969pf_state_key_detach(struct pf_state *s, int idx)
970{
971	struct pf_state_item	*si;
972
973	si = TAILQ_FIRST(&s->key[idx]->states);
974	while (si && si->s != s)
975	    si = TAILQ_NEXT(si, entry);
976
977	if (si) {
978		TAILQ_REMOVE(&s->key[idx]->states, si, entry);
979#ifdef __FreeBSD__
980		pool_put(&V_pf_state_item_pl, si);
981#else
982		pool_put(&pf_state_item_pl, si);
983#endif
984	}
985
986	if (TAILQ_EMPTY(&s->key[idx]->states)) {
987#ifdef __FreeBSD__
988		RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]);
989#else
990		RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]);
991#endif
992		if (s->key[idx]->reverse)
993			s->key[idx]->reverse->reverse = NULL;
994#ifdef __FreeBSD__
995	/* XXX: implement this */
996#else
997		if (s->key[idx]->inp)
998			s->key[idx]->inp->inp_pf_sk = NULL;
999#endif
1000#ifdef __FreeBSD__
1001		pool_put(&V_pf_state_key_pl, s->key[idx]);
1002#else
1003		pool_put(&pf_state_key_pl, s->key[idx]);
1004#endif
1005	}
1006	s->key[idx] = NULL;
1007}
1008
1009struct pf_state_key *
1010pf_alloc_state_key(int pool_flags)
1011{
1012	struct pf_state_key	*sk;
1013
1014#ifdef __FreeBSD__
1015	if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL)
1016#else
1017	if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
1018#endif
1019		return (NULL);
1020	TAILQ_INIT(&sk->states);
1021
1022	return (sk);
1023}
1024
1025int
1026pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
1027	struct pf_state_key **skw, struct pf_state_key **sks,
1028	struct pf_state_key **skp, struct pf_state_key **nkp,
1029	struct pf_addr *saddr, struct pf_addr *daddr,
1030	u_int16_t sport, u_int16_t dport)
1031{
1032#ifdef __FreeBSD__
1033	KASSERT((*skp == NULL && *nkp == NULL),
1034		("%s: skp == NULL && nkp == NULL", __FUNCTION__));
1035#else
1036	KASSERT((*skp == NULL && *nkp == NULL));
1037#endif
1038
1039	if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
1040		return (ENOMEM);
1041
1042	PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
1043	PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
1044	(*skp)->port[pd->sidx] = sport;
1045	(*skp)->port[pd->didx] = dport;
1046	(*skp)->proto = pd->proto;
1047	(*skp)->af = pd->af;
1048
1049	if (nr != NULL) {
1050		if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
1051			return (ENOMEM); /* caller must handle cleanup */
1052
1053		/* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
1054		PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
1055		PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
1056		(*nkp)->port[0] = (*skp)->port[0];
1057		(*nkp)->port[1] = (*skp)->port[1];
1058		(*nkp)->proto = pd->proto;
1059		(*nkp)->af = pd->af;
1060	} else
1061		*nkp = *skp;
1062
1063	if (pd->dir == PF_IN) {
1064		*skw = *skp;
1065		*sks = *nkp;
1066	} else {
1067		*sks = *skp;
1068		*skw = *nkp;
1069	}
1070	return (0);
1071}
1072
1073
1074int
1075pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
1076    struct pf_state_key *sks, struct pf_state *s)
1077{
1078#ifndef __FreeBSD__
1079	splassert(IPL_SOFTNET);
1080#endif
1081
1082	s->kif = kif;
1083
1084	if (skw == sks) {
1085		if (pf_state_key_attach(skw, s, PF_SK_WIRE))
1086			return (-1);
1087		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1088	} else {
1089		if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
1090#ifdef __FreeBSD__
1091			pool_put(&V_pf_state_key_pl, sks);
1092#else
1093			pool_put(&pf_state_key_pl, sks);
1094#endif
1095			return (-1);
1096		}
1097		if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
1098			pf_state_key_detach(s, PF_SK_WIRE);
1099			return (-1);
1100		}
1101	}
1102
1103	if (s->id == 0 && s->creatorid == 0) {
1104#ifdef __FreeBSD__
1105		s->id = htobe64(V_pf_status.stateid++);
1106		s->creatorid = V_pf_status.hostid;
1107#else
1108		s->id = htobe64(pf_status.stateid++);
1109		s->creatorid = pf_status.hostid;
1110#endif
1111	}
1112#ifdef __FreeBSD__
1113	if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) {
1114		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1115#else
1116	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1117		if (pf_status.debug >= PF_DEBUG_MISC) {
1118#endif
1119			printf("pf: state insert failed: "
1120			    "id: %016llx creatorid: %08x",
1121#ifdef __FreeBSD__
1122			    (unsigned long long)betoh64(s->id), ntohl(s->creatorid));
1123#else
1124			    betoh64(s->id), ntohl(s->creatorid));
1125#endif
1126			printf("\n");
1127		}
1128		pf_detach_state(s);
1129		return (-1);
1130	}
1131#ifdef __FreeBSD__
1132	TAILQ_INSERT_TAIL(&V_state_list, s, entry_list);
1133	V_pf_status.fcounters[FCNT_STATE_INSERT]++;
1134	V_pf_status.states++;
1135#else
1136	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1137	pf_status.fcounters[FCNT_STATE_INSERT]++;
1138	pf_status.states++;
1139#endif
1140	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1141#if NPFSYNC > 0
1142#ifdef __FreeBSD__
1143	if (pfsync_insert_state_ptr != NULL)
1144		pfsync_insert_state_ptr(s);
1145#else
1146	pfsync_insert_state(s);
1147#endif
1148#endif
1149	return (0);
1150}
1151
1152struct pf_state *
1153pf_find_state_byid(struct pf_state_cmp *key)
1154{
1155#ifdef __FreeBSD__
1156	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1157
1158	return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key));
1159#else
1160	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1161
1162	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
1163#endif
1164}
1165
1166/* XXX debug function, intended to be removed one day */
1167int
1168pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
1169    struct pfi_kif *kif, u_int dir)
1170{
1171	/* a (from hdr) and b (new) must be exact opposites of each other */
1172	if (a->af == b->af && a->proto == b->proto &&
1173	    PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
1174	    PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
1175	    a->port[0] == b->port[1] &&
1176	    a->port[1] == b->port[0])
1177		return (0);
1178	else {
1179		/* mismatch. must not happen. */
1180		printf("pf: state key linking mismatch! dir=%s, "
1181		    "if=%s, stored af=%u, a0: ",
1182		    dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af);
1183		pf_print_host(&a->addr[0], a->port[0], a->af);
1184		printf(", a1: ");
1185		pf_print_host(&a->addr[1], a->port[1], a->af);
1186		printf(", proto=%u", a->proto);
1187		printf(", found af=%u, a0: ", b->af);
1188		pf_print_host(&b->addr[0], b->port[0], b->af);
1189		printf(", a1: ");
1190		pf_print_host(&b->addr[1], b->port[1], b->af);
1191		printf(", proto=%u", b->proto);
1192		printf(".\n");
1193		return (-1);
1194	}
1195}
1196
1197struct pf_state *
1198#ifdef __FreeBSD__
1199pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
1200    struct mbuf *m, struct pf_mtag *pftag)
1201#else
1202pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
1203    struct mbuf *m)
1204#endif
1205{
1206	struct pf_state_key	*sk;
1207	struct pf_state_item	*si;
1208
1209#ifdef __FreeBSD__
1210	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1211#else
1212	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1213#endif
1214
1215#ifdef __FreeBSD__
1216	if (dir == PF_OUT && pftag->statekey &&
1217	    ((struct pf_state_key *)pftag->statekey)->reverse)
1218		sk = ((struct pf_state_key *)pftag->statekey)->reverse;
1219	else {
1220#ifdef __FreeBSD__
1221		if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
1222#else
1223		if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
1224#endif
1225		    (struct pf_state_key *)key)) == NULL)
1226			return (NULL);
1227		if (dir == PF_OUT && pftag->statekey &&
1228		    pf_compare_state_keys(pftag->statekey, sk,
1229		    kif, dir) == 0) {
1230			((struct pf_state_key *)
1231			    pftag->statekey)->reverse = sk;
1232			sk->reverse = pftag->statekey;
1233		}
1234	}
1235#else
1236	if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
1237	    ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
1238		sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
1239	else {
1240#ifdef __FreeBSD__
1241		if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
1242#else
1243		if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
1244#endif
1245		    (struct pf_state_key *)key)) == NULL)
1246			return (NULL);
1247		if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
1248		    pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk,
1249		    kif, dir) == 0) {
1250			((struct pf_state_key *)
1251			    m->m_pkthdr.pf.statekey)->reverse = sk;
1252			sk->reverse = m->m_pkthdr.pf.statekey;
1253		}
1254	}
1255#endif
1256
1257	if (dir == PF_OUT)
1258#ifdef __FreeBSD__
1259		pftag->statekey = NULL;
1260#else
1261		m->m_pkthdr.pf.statekey = NULL;
1262#endif
1263
1264	/* list is sorted, if-bound states before floating ones */
1265	TAILQ_FOREACH(si, &sk->states, entry)
1266#ifdef __FreeBSD__
1267		if ((si->s->kif == V_pfi_all || si->s->kif == kif) &&
1268#else
1269		if ((si->s->kif == pfi_all || si->s->kif == kif) &&
1270#endif
1271		    sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
1272		    si->s->key[PF_SK_STACK]))
1273			return (si->s);
1274
1275	return (NULL);
1276}
1277
1278struct pf_state *
1279pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1280{
1281	struct pf_state_key	*sk;
1282	struct pf_state_item	*si, *ret = NULL;
1283
1284#ifdef __FreeBSD__
1285	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1286#else
1287	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1288#endif
1289
1290#ifdef __FreeBSD__
1291	sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key);
1292#else
1293	sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
1294#endif
1295	if (sk != NULL) {
1296		TAILQ_FOREACH(si, &sk->states, entry)
1297			if (dir == PF_INOUT ||
1298			    (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
1299			    si->s->key[PF_SK_STACK]))) {
1300				if (more == NULL)
1301					return (si->s);
1302
1303				if (ret)
1304					(*more)++;
1305				else
1306					ret = si;
1307			}
1308	}
1309	return (ret ? ret->s : NULL);
1310}
1311
1312/* END state table stuff */
1313
1314
1315void
1316pf_purge_thread(void *v)
1317{
1318	int nloops = 0, s;
1319#ifdef __FreeBSD__
1320	int locked;
1321#endif
1322
1323	CURVNET_SET((struct vnet *)v);
1324
1325	for (;;) {
1326		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
1327
1328#ifdef __FreeBSD__
1329		sx_slock(&V_pf_consistency_lock);
1330		PF_LOCK();
1331		locked = 0;
1332
1333		if (V_pf_end_threads) {
1334			PF_UNLOCK();
1335			sx_sunlock(&V_pf_consistency_lock);
1336			sx_xlock(&V_pf_consistency_lock);
1337			PF_LOCK();
1338
1339			pf_purge_expired_states(V_pf_status.states, 1);
1340			pf_purge_expired_fragments();
1341			pf_purge_expired_src_nodes(1);
1342			V_pf_end_threads++;
1343
1344			sx_xunlock(&V_pf_consistency_lock);
1345			PF_UNLOCK();
1346			wakeup(pf_purge_thread);
1347			kproc_exit(0);
1348		}
1349#endif
1350		s = splsoftnet();
1351
1352		/* process a fraction of the state table every second */
1353#ifdef __FreeBSD__
1354		if (!pf_purge_expired_states(1 + (V_pf_status.states /
1355		    V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
1356			PF_UNLOCK();
1357			sx_sunlock(&V_pf_consistency_lock);
1358			sx_xlock(&V_pf_consistency_lock);
1359			PF_LOCK();
1360			locked = 1;
1361
1362			pf_purge_expired_states(1 + (V_pf_status.states /
1363			    V_pf_default_rule.timeout[PFTM_INTERVAL]), 1);
1364		}
1365#else
1366		pf_purge_expired_states(1 + (pf_status.states
1367		    / pf_default_rule.timeout[PFTM_INTERVAL]));
1368#endif
1369
1370		/* purge other expired types every PFTM_INTERVAL seconds */
1371#ifdef __FreeBSD__
1372		if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) {
1373#else
1374		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1375#endif
1376			pf_purge_expired_fragments();
1377			pf_purge_expired_src_nodes(0);
1378			nloops = 0;
1379		}
1380
1381		splx(s);
1382#ifdef __FreeBSD__
1383		PF_UNLOCK();
1384		if (locked)
1385			sx_xunlock(&V_pf_consistency_lock);
1386		else
1387			sx_sunlock(&V_pf_consistency_lock);
1388#endif
1389	}
1390	CURVNET_RESTORE();
1391}
1392
1393u_int32_t
1394pf_state_expires(const struct pf_state *state)
1395{
1396	u_int32_t	timeout;
1397	u_int32_t	start;
1398	u_int32_t	end;
1399	u_int32_t	states;
1400
1401	/* handle all PFTM_* > PFTM_MAX here */
1402	if (state->timeout == PFTM_PURGE)
1403		return (time_second);
1404	if (state->timeout == PFTM_UNTIL_PACKET)
1405		return (0);
1406#ifdef __FreeBSD__
1407	KASSERT(state->timeout != PFTM_UNLINKED,
1408	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
1409	KASSERT((state->timeout < PFTM_MAX),
1410	    ("pf_state_expires: timeout > PFTM_MAX"));
1411#else
1412	KASSERT(state->timeout != PFTM_UNLINKED);
1413	KASSERT(state->timeout < PFTM_MAX);
1414#endif
1415	timeout = state->rule.ptr->timeout[state->timeout];
1416	if (!timeout)
1417#ifdef __FreeBSD__
1418		timeout = V_pf_default_rule.timeout[state->timeout];
1419#else
1420		timeout = pf_default_rule.timeout[state->timeout];
1421#endif
1422	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1423	if (start) {
1424		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1425		states = state->rule.ptr->states_cur;
1426	} else {
1427#ifdef __FreeBSD__
1428		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1429		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1430		states = V_pf_status.states;
1431#else
1432		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1433		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1434		states = pf_status.states;
1435#endif
1436	}
1437	if (end && states > start && start < end) {
1438		if (states < end)
1439			return (state->expire + timeout * (end - states) /
1440			    (end - start));
1441		else
1442			return (time_second);
1443	}
1444	return (state->expire + timeout);
1445}
1446
1447#ifdef __FreeBSD__
1448int
1449pf_purge_expired_src_nodes(int waslocked)
1450#else
1451void
1452pf_purge_expired_src_nodes(int waslocked)
1453#endif
1454{
1455	struct pf_src_node		*cur, *next;
1456	int				 locked = waslocked;
1457
1458#ifdef __FreeBSD__
1459	for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) {
1460	next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur);
1461#else
1462	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1463	next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1464#endif
1465
1466		if (cur->states <= 0 && cur->expire <= time_second) {
1467			if (! locked) {
1468#ifdef __FreeBSD__
1469				if (!sx_try_upgrade(&V_pf_consistency_lock))
1470					return (0);
1471#else
1472				rw_enter_write(&pf_consistency_lock);
1473#endif
1474				next = RB_NEXT(pf_src_tree,
1475#ifdef __FreeBSD__
1476				    &V_tree_src_tracking, cur);
1477#else
1478				    &tree_src_tracking, cur);
1479#endif
1480				locked = 1;
1481			}
1482			if (cur->rule.ptr != NULL) {
1483				cur->rule.ptr->src_nodes--;
1484				if (cur->rule.ptr->states_cur <= 0 &&
1485				    cur->rule.ptr->max_src_nodes <= 0)
1486					pf_rm_rule(NULL, cur->rule.ptr);
1487			}
1488#ifdef __FreeBSD__
1489			RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur);
1490			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1491			V_pf_status.src_nodes--;
1492			pool_put(&V_pf_src_tree_pl, cur);
1493#else
1494			RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1495			pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1496			pf_status.src_nodes--;
1497			pool_put(&pf_src_tree_pl, cur);
1498#endif
1499		}
1500	}
1501
1502	if (locked && !waslocked)
1503#ifdef __FreeBSD__
1504	{
1505		sx_downgrade(&V_pf_consistency_lock);
1506	}
1507	return (1);
1508#else
1509		rw_exit_write(&pf_consistency_lock);
1510#endif
1511}
1512
1513void
1514pf_src_tree_remove_state(struct pf_state *s)
1515{
1516	u_int32_t timeout;
1517
1518	if (s->src_node != NULL) {
1519		if (s->src.tcp_est)
1520			--s->src_node->conn;
1521		if (--s->src_node->states <= 0) {
1522			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1523			if (!timeout)
1524				timeout =
1525#ifdef __FreeBSD__
1526				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1527#else
1528				    pf_default_rule.timeout[PFTM_SRC_NODE];
1529#endif
1530			s->src_node->expire = time_second + timeout;
1531		}
1532	}
1533	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1534		if (--s->nat_src_node->states <= 0) {
1535			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1536			if (!timeout)
1537				timeout =
1538#ifdef __FreeBSD__
1539				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1540#else
1541				    pf_default_rule.timeout[PFTM_SRC_NODE];
1542#endif
1543			s->nat_src_node->expire = time_second + timeout;
1544		}
1545	}
1546	s->src_node = s->nat_src_node = NULL;
1547}
1548
1549/* callers should be at splsoftnet */
1550void
1551pf_unlink_state(struct pf_state *cur)
1552{
1553#ifdef __FreeBSD__
1554	if (cur->local_flags & PFSTATE_EXPIRING)
1555		return;
1556	cur->local_flags |= PFSTATE_EXPIRING;
1557#else
1558	splassert(IPL_SOFTNET);
1559#endif
1560
1561	if (cur->src.state == PF_TCPS_PROXY_DST) {
1562		/* XXX wire key the right one? */
1563#ifdef __FreeBSD__
1564		pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1565#else
1566		pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1567#endif
1568		    &cur->key[PF_SK_WIRE]->addr[1],
1569		    &cur->key[PF_SK_WIRE]->addr[0],
1570		    cur->key[PF_SK_WIRE]->port[1],
1571		    cur->key[PF_SK_WIRE]->port[0],
1572		    cur->src.seqhi, cur->src.seqlo + 1,
1573		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1574	}
1575#ifdef __FreeBSD__
1576	RB_REMOVE(pf_state_tree_id, &V_tree_id, cur);
1577#else
1578	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1579#endif
1580#if NPFLOW > 0
1581	if (cur->state_flags & PFSTATE_PFLOW)
1582#ifdef __FreeBSD__
1583		if (export_pflow_ptr != NULL)
1584			export_pflow_ptr(cur);
1585#else
1586		export_pflow(cur);
1587#endif
1588#endif
1589#if NPFSYNC > 0
1590#ifdef __FreeBSD__
1591	if (pfsync_delete_state_ptr != NULL)
1592		pfsync_delete_state_ptr(cur);
1593#else
1594	pfsync_delete_state(cur);
1595#endif
1596#endif
1597	cur->timeout = PFTM_UNLINKED;
1598	pf_src_tree_remove_state(cur);
1599	pf_detach_state(cur);
1600}
1601
1602/* callers should be at splsoftnet and hold the
1603 * write_lock on pf_consistency_lock */
1604void
1605pf_free_state(struct pf_state *cur)
1606{
1607#ifndef __FreeBSD__
1608	splassert(IPL_SOFTNET);
1609#endif
1610
1611#if NPFSYNC > 0
1612#ifdef __FreeBSD__
1613	if (pfsync_state_in_use_ptr != NULL &&
1614		pfsync_state_in_use_ptr(cur))
1615#else
1616	if (pfsync_state_in_use(cur))
1617#endif
1618		return;
1619#endif
1620#ifdef __FreeBSD__
1621	KASSERT(cur->timeout == PFTM_UNLINKED,
1622	    ("pf_free_state: cur->timeout != PFTM_UNLINKED"));
1623#else
1624	KASSERT(cur->timeout == PFTM_UNLINKED);
1625#endif
1626	if (--cur->rule.ptr->states_cur <= 0 &&
1627	    cur->rule.ptr->src_nodes <= 0)
1628		pf_rm_rule(NULL, cur->rule.ptr);
1629	if (cur->nat_rule.ptr != NULL)
1630		if (--cur->nat_rule.ptr->states_cur <= 0 &&
1631			cur->nat_rule.ptr->src_nodes <= 0)
1632			pf_rm_rule(NULL, cur->nat_rule.ptr);
1633	if (cur->anchor.ptr != NULL)
1634		if (--cur->anchor.ptr->states_cur <= 0)
1635			pf_rm_rule(NULL, cur->anchor.ptr);
1636	pf_normalize_tcp_cleanup(cur);
1637	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1638#ifdef __FreeBSD__
1639	TAILQ_REMOVE(&V_state_list, cur, entry_list);
1640#else
1641	TAILQ_REMOVE(&state_list, cur, entry_list);
1642#endif
1643	if (cur->tag)
1644		pf_tag_unref(cur->tag);
1645#ifdef __FreeBSD__
1646	pool_put(&V_pf_state_pl, cur);
1647	V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1648	V_pf_status.states--;
1649#else
1650	pool_put(&pf_state_pl, cur);
1651	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1652	pf_status.states--;
1653#endif
1654}
1655
1656#ifdef __FreeBSD__
1657int
1658pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1659#else
1660void
1661pf_purge_expired_states(u_int32_t maxcheck)
1662#endif
1663{
1664	static struct pf_state	*cur = NULL;
1665	struct pf_state		*next;
1666#ifdef __FreeBSD__
1667	int			 locked = waslocked;
1668#else
1669	int			 locked = 0;
1670#endif
1671
1672	while (maxcheck--) {
1673		/* wrap to start of list when we hit the end */
1674		if (cur == NULL) {
1675#ifdef __FreeBSD__
1676			cur = TAILQ_FIRST(&V_state_list);
1677#else
1678			cur = TAILQ_FIRST(&state_list);
1679#endif
1680			if (cur == NULL)
1681				break;	/* list empty */
1682		}
1683
1684		/* get next state, as cur may get deleted */
1685		next = TAILQ_NEXT(cur, entry_list);
1686
1687		if (cur->timeout == PFTM_UNLINKED) {
1688			/* free unlinked state */
1689			if (! locked) {
1690#ifdef __FreeBSD__
1691				if (!sx_try_upgrade(&V_pf_consistency_lock))
1692					return (0);
1693#else
1694				rw_enter_write(&pf_consistency_lock);
1695#endif
1696				locked = 1;
1697			}
1698			pf_free_state(cur);
1699		} else if (pf_state_expires(cur) <= time_second) {
1700			/* unlink and free expired state */
1701			pf_unlink_state(cur);
1702			if (! locked) {
1703#ifdef __FreeBSD__
1704				if (!sx_try_upgrade(&V_pf_consistency_lock))
1705					return (0);
1706#else
1707				rw_enter_write(&pf_consistency_lock);
1708#endif
1709				locked = 1;
1710			}
1711			pf_free_state(cur);
1712		}
1713		cur = next;
1714	}
1715
1716#ifdef __FreeBSD__
1717	if (!waslocked && locked)
1718		sx_downgrade(&V_pf_consistency_lock);
1719
1720	return (1);
1721#else
1722	if (locked)
1723		rw_exit_write(&pf_consistency_lock);
1724#endif
1725}
1726
1727int
1728pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1729{
1730	if (aw->type != PF_ADDR_TABLE)
1731		return (0);
1732	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL)
1733		return (1);
1734	return (0);
1735}
1736
1737void
1738pf_tbladdr_remove(struct pf_addr_wrap *aw)
1739{
1740	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1741		return;
1742	pfr_detach_table(aw->p.tbl);
1743	aw->p.tbl = NULL;
1744}
1745
1746void
1747pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1748{
1749	struct pfr_ktable *kt = aw->p.tbl;
1750
1751	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1752		return;
1753	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1754		kt = kt->pfrkt_root;
1755	aw->p.tbl = NULL;
1756	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1757		kt->pfrkt_cnt : -1;
1758}
1759
1760void
1761pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1762{
1763	switch (af) {
1764#ifdef INET
1765	case AF_INET: {
1766		u_int32_t a = ntohl(addr->addr32[0]);
1767		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1768		    (a>>8)&255, a&255);
1769		if (p) {
1770			p = ntohs(p);
1771			printf(":%u", p);
1772		}
1773		break;
1774	}
1775#endif /* INET */
1776#ifdef INET6
1777	case AF_INET6: {
1778		u_int16_t b;
1779		u_int8_t i, curstart, curend, maxstart, maxend;
1780		curstart = curend = maxstart = maxend = 255;
1781		for (i = 0; i < 8; i++) {
1782			if (!addr->addr16[i]) {
1783				if (curstart == 255)
1784					curstart = i;
1785				curend = i;
1786			} else {
1787				if ((curend - curstart) >
1788				    (maxend - maxstart)) {
1789					maxstart = curstart;
1790					maxend = curend;
1791				}
1792				curstart = curend = 255;
1793			}
1794		}
1795		if ((curend - curstart) >
1796		    (maxend - maxstart)) {
1797			maxstart = curstart;
1798			maxend = curend;
1799		}
1800		for (i = 0; i < 8; i++) {
1801			if (i >= maxstart && i <= maxend) {
1802				if (i == 0)
1803					printf(":");
1804				if (i == maxend)
1805					printf(":");
1806			} else {
1807				b = ntohs(addr->addr16[i]);
1808				printf("%x", b);
1809				if (i < 7)
1810					printf(":");
1811			}
1812		}
1813		if (p) {
1814			p = ntohs(p);
1815			printf("[%u]", p);
1816		}
1817		break;
1818	}
1819#endif /* INET6 */
1820	}
1821}
1822
1823void
1824pf_print_state(struct pf_state *s)
1825{
1826	pf_print_state_parts(s, NULL, NULL);
1827}
1828
1829void
1830pf_print_state_parts(struct pf_state *s,
1831    struct pf_state_key *skwp, struct pf_state_key *sksp)
1832{
1833	struct pf_state_key *skw, *sks;
1834	u_int8_t proto, dir;
1835
1836	/* Do our best to fill these, but they're skipped if NULL */
1837	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1838	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1839	proto = skw ? skw->proto : (sks ? sks->proto : 0);
1840	dir = s ? s->direction : 0;
1841
1842	switch (proto) {
1843	case IPPROTO_IPV4:
1844		printf("IPv4");
1845		break;
1846	case IPPROTO_IPV6:
1847		printf("IPv6");
1848		break;
1849	case IPPROTO_TCP:
1850		printf("TCP");
1851		break;
1852	case IPPROTO_UDP:
1853		printf("UDP");
1854		break;
1855	case IPPROTO_ICMP:
1856		printf("ICMP");
1857		break;
1858	case IPPROTO_ICMPV6:
1859		printf("ICMPv6");
1860		break;
1861	default:
1862		printf("%u", skw->proto);
1863		break;
1864	}
1865	switch (dir) {
1866	case PF_IN:
1867		printf(" in");
1868		break;
1869	case PF_OUT:
1870		printf(" out");
1871		break;
1872	}
1873	if (skw) {
1874		printf(" wire: ");
1875		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1876		printf(" ");
1877		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1878	}
1879	if (sks) {
1880		printf(" stack: ");
1881		if (sks != skw) {
1882			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1883			printf(" ");
1884			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1885		} else
1886			printf("-");
1887	}
1888	if (s) {
1889		if (proto == IPPROTO_TCP) {
1890			printf(" [lo=%u high=%u win=%u modulator=%u",
1891			    s->src.seqlo, s->src.seqhi,
1892			    s->src.max_win, s->src.seqdiff);
1893			if (s->src.wscale && s->dst.wscale)
1894				printf(" wscale=%u",
1895				    s->src.wscale & PF_WSCALE_MASK);
1896			printf("]");
1897			printf(" [lo=%u high=%u win=%u modulator=%u",
1898			    s->dst.seqlo, s->dst.seqhi,
1899			    s->dst.max_win, s->dst.seqdiff);
1900			if (s->src.wscale && s->dst.wscale)
1901				printf(" wscale=%u",
1902				s->dst.wscale & PF_WSCALE_MASK);
1903			printf("]");
1904		}
1905		printf(" %u:%u", s->src.state, s->dst.state);
1906	}
1907}
1908
1909void
1910pf_print_flags(u_int8_t f)
1911{
1912	if (f)
1913		printf(" ");
1914	if (f & TH_FIN)
1915		printf("F");
1916	if (f & TH_SYN)
1917		printf("S");
1918	if (f & TH_RST)
1919		printf("R");
1920	if (f & TH_PUSH)
1921		printf("P");
1922	if (f & TH_ACK)
1923		printf("A");
1924	if (f & TH_URG)
1925		printf("U");
1926	if (f & TH_ECE)
1927		printf("E");
1928	if (f & TH_CWR)
1929		printf("W");
1930}
1931
1932#define	PF_SET_SKIP_STEPS(i)					\
1933	do {							\
1934		while (head[i] != cur) {			\
1935			head[i]->skip[i].ptr = cur;		\
1936			head[i] = TAILQ_NEXT(head[i], entries);	\
1937		}						\
1938	} while (0)
1939
1940void
1941pf_calc_skip_steps(struct pf_rulequeue *rules)
1942{
1943	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1944	int i;
1945
1946	cur = TAILQ_FIRST(rules);
1947	prev = cur;
1948	for (i = 0; i < PF_SKIP_COUNT; ++i)
1949		head[i] = cur;
1950	while (cur != NULL) {
1951
1952		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1953			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1954		if (cur->direction != prev->direction)
1955			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1956		if (cur->af != prev->af)
1957			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1958		if (cur->proto != prev->proto)
1959			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1960		if (cur->src.neg != prev->src.neg ||
1961		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1962			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1963		if (cur->src.port[0] != prev->src.port[0] ||
1964		    cur->src.port[1] != prev->src.port[1] ||
1965		    cur->src.port_op != prev->src.port_op)
1966			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1967		if (cur->dst.neg != prev->dst.neg ||
1968		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1969			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1970		if (cur->dst.port[0] != prev->dst.port[0] ||
1971		    cur->dst.port[1] != prev->dst.port[1] ||
1972		    cur->dst.port_op != prev->dst.port_op)
1973			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1974
1975		prev = cur;
1976		cur = TAILQ_NEXT(cur, entries);
1977	}
1978	for (i = 0; i < PF_SKIP_COUNT; ++i)
1979		PF_SET_SKIP_STEPS(i);
1980}
1981
1982int
1983pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1984{
1985	if (aw1->type != aw2->type)
1986		return (1);
1987	switch (aw1->type) {
1988	case PF_ADDR_ADDRMASK:
1989	case PF_ADDR_RANGE:
1990		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1991			return (1);
1992		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1993			return (1);
1994		return (0);
1995	case PF_ADDR_DYNIFTL:
1996		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1997	case PF_ADDR_NOROUTE:
1998	case PF_ADDR_URPFFAILED:
1999		return (0);
2000	case PF_ADDR_TABLE:
2001		return (aw1->p.tbl != aw2->p.tbl);
2002	case PF_ADDR_RTLABEL:
2003		return (aw1->v.rtlabel != aw2->v.rtlabel);
2004	default:
2005		printf("invalid address type: %d\n", aw1->type);
2006		return (1);
2007	}
2008}
2009
2010u_int16_t
2011pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2012{
2013	u_int32_t	l;
2014
2015	if (udp && !cksum)
2016		return (0x0000);
2017	l = cksum + old - new;
2018	l = (l >> 16) + (l & 65535);
2019	l = l & 65535;
2020	if (udp && !l)
2021		return (0xFFFF);
2022	return (l);
2023}
2024
2025void
2026pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
2027    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
2028{
2029	struct pf_addr	ao;
2030	u_int16_t	po = *p;
2031
2032	PF_ACPY(&ao, a, af);
2033	PF_ACPY(a, an, af);
2034
2035	*p = pn;
2036
2037	switch (af) {
2038#ifdef INET
2039	case AF_INET:
2040		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2041		    ao.addr16[0], an->addr16[0], 0),
2042		    ao.addr16[1], an->addr16[1], 0);
2043		*p = pn;
2044		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2045		    ao.addr16[0], an->addr16[0], u),
2046		    ao.addr16[1], an->addr16[1], u),
2047		    po, pn, u);
2048		break;
2049#endif /* INET */
2050#ifdef INET6
2051	case AF_INET6:
2052		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2053		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2054		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2055		    ao.addr16[0], an->addr16[0], u),
2056		    ao.addr16[1], an->addr16[1], u),
2057		    ao.addr16[2], an->addr16[2], u),
2058		    ao.addr16[3], an->addr16[3], u),
2059		    ao.addr16[4], an->addr16[4], u),
2060		    ao.addr16[5], an->addr16[5], u),
2061		    ao.addr16[6], an->addr16[6], u),
2062		    ao.addr16[7], an->addr16[7], u),
2063		    po, pn, u);
2064		break;
2065#endif /* INET6 */
2066	}
2067}
2068
2069
2070/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2071void
2072pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2073{
2074	u_int32_t	ao;
2075
2076	memcpy(&ao, a, sizeof(ao));
2077	memcpy(a, &an, sizeof(u_int32_t));
2078	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2079	    ao % 65536, an % 65536, u);
2080}
2081
2082#ifdef INET6
2083void
2084pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2085{
2086	struct pf_addr	ao;
2087
2088	PF_ACPY(&ao, a, AF_INET6);
2089	PF_ACPY(a, an, AF_INET6);
2090
2091	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2092	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2093	    pf_cksum_fixup(pf_cksum_fixup(*c,
2094	    ao.addr16[0], an->addr16[0], u),
2095	    ao.addr16[1], an->addr16[1], u),
2096	    ao.addr16[2], an->addr16[2], u),
2097	    ao.addr16[3], an->addr16[3], u),
2098	    ao.addr16[4], an->addr16[4], u),
2099	    ao.addr16[5], an->addr16[5], u),
2100	    ao.addr16[6], an->addr16[6], u),
2101	    ao.addr16[7], an->addr16[7], u);
2102}
2103#endif /* INET6 */
2104
2105void
2106pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2107    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2108    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2109{
2110	struct pf_addr	oia, ooa;
2111
2112	PF_ACPY(&oia, ia, af);
2113	if (oa)
2114		PF_ACPY(&ooa, oa, af);
2115
2116	/* Change inner protocol port, fix inner protocol checksum. */
2117	if (ip != NULL) {
2118		u_int16_t	oip = *ip;
2119		u_int32_t	opc;
2120
2121		if (pc != NULL)
2122			opc = *pc;
2123		*ip = np;
2124		if (pc != NULL)
2125			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2126		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2127		if (pc != NULL)
2128			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2129	}
2130	/* Change inner ip address, fix inner ip and icmp checksums. */
2131	PF_ACPY(ia, na, af);
2132	switch (af) {
2133#ifdef INET
2134	case AF_INET: {
2135		u_int32_t	 oh2c = *h2c;
2136
2137		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2138		    oia.addr16[0], ia->addr16[0], 0),
2139		    oia.addr16[1], ia->addr16[1], 0);
2140		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2141		    oia.addr16[0], ia->addr16[0], 0),
2142		    oia.addr16[1], ia->addr16[1], 0);
2143		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2144		break;
2145	}
2146#endif /* INET */
2147#ifdef INET6
2148	case AF_INET6:
2149		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2150		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2151		    pf_cksum_fixup(pf_cksum_fixup(*ic,
2152		    oia.addr16[0], ia->addr16[0], u),
2153		    oia.addr16[1], ia->addr16[1], u),
2154		    oia.addr16[2], ia->addr16[2], u),
2155		    oia.addr16[3], ia->addr16[3], u),
2156		    oia.addr16[4], ia->addr16[4], u),
2157		    oia.addr16[5], ia->addr16[5], u),
2158		    oia.addr16[6], ia->addr16[6], u),
2159		    oia.addr16[7], ia->addr16[7], u);
2160		break;
2161#endif /* INET6 */
2162	}
2163	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
2164	if (oa) {
2165		PF_ACPY(oa, na, af);
2166		switch (af) {
2167#ifdef INET
2168		case AF_INET:
2169			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2170			    ooa.addr16[0], oa->addr16[0], 0),
2171			    ooa.addr16[1], oa->addr16[1], 0);
2172			break;
2173#endif /* INET */
2174#ifdef INET6
2175		case AF_INET6:
2176			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2177			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2178			    pf_cksum_fixup(pf_cksum_fixup(*ic,
2179			    ooa.addr16[0], oa->addr16[0], u),
2180			    ooa.addr16[1], oa->addr16[1], u),
2181			    ooa.addr16[2], oa->addr16[2], u),
2182			    ooa.addr16[3], oa->addr16[3], u),
2183			    ooa.addr16[4], oa->addr16[4], u),
2184			    ooa.addr16[5], oa->addr16[5], u),
2185			    ooa.addr16[6], oa->addr16[6], u),
2186			    ooa.addr16[7], oa->addr16[7], u);
2187			break;
2188#endif /* INET6 */
2189		}
2190	}
2191}
2192
2193
2194/*
2195 * Need to modulate the sequence numbers in the TCP SACK option
2196 * (credits to Krzysztof Pfaff for report and patch)
2197 */
2198int
2199pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2200    struct tcphdr *th, struct pf_state_peer *dst)
2201{
2202	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2203#ifdef __FreeBSD__
2204	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
2205#else
2206	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2207#endif
2208	int copyback = 0, i, olen;
2209	struct sackblk sack;
2210
2211#define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
2212	if (hlen < TCPOLEN_SACKLEN ||
2213	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
2214		return 0;
2215
2216	while (hlen >= TCPOLEN_SACKLEN) {
2217		olen = opt[1];
2218		switch (*opt) {
2219		case TCPOPT_EOL:	/* FALLTHROUGH */
2220		case TCPOPT_NOP:
2221			opt++;
2222			hlen--;
2223			break;
2224		case TCPOPT_SACK:
2225			if (olen > hlen)
2226				olen = hlen;
2227			if (olen >= TCPOLEN_SACKLEN) {
2228				for (i = 2; i + TCPOLEN_SACK <= olen;
2229				    i += TCPOLEN_SACK) {
2230					memcpy(&sack, &opt[i], sizeof(sack));
2231					pf_change_a(&sack.start, &th->th_sum,
2232					    htonl(ntohl(sack.start) -
2233					    dst->seqdiff), 0);
2234					pf_change_a(&sack.end, &th->th_sum,
2235					    htonl(ntohl(sack.end) -
2236					    dst->seqdiff), 0);
2237					memcpy(&opt[i], &sack, sizeof(sack));
2238				}
2239				copyback = 1;
2240			}
2241			/* FALLTHROUGH */
2242		default:
2243			if (olen < 2)
2244				olen = 2;
2245			hlen -= olen;
2246			opt += olen;
2247		}
2248	}
2249
2250	if (copyback)
2251#ifdef __FreeBSD__
2252		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
2253#else
2254		m_copyback(m, off + sizeof(*th), thoptlen, opts);
2255#endif
2256	return (copyback);
2257}
2258
2259void
2260#ifdef __FreeBSD__
2261pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
2262#else
2263pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2264#endif
2265    const struct pf_addr *saddr, const struct pf_addr *daddr,
2266    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2267    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2268    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2269{
2270	struct mbuf	*m;
2271	int		 len, tlen;
2272#ifdef INET
2273	struct ip	*h;
2274#endif /* INET */
2275#ifdef INET6
2276	struct ip6_hdr	*h6;
2277#endif /* INET6 */
2278	struct tcphdr	*th;
2279	char		*opt;
2280#ifdef __FreeBSD__
2281	struct pf_mtag  *pf_mtag;
2282
2283	KASSERT(
2284#ifdef INET
2285	    af == AF_INET
2286#else
2287	    0
2288#endif
2289	    ||
2290#ifdef INET6
2291	    af == AF_INET6
2292#else
2293	    0
2294#endif
2295	    , ("Unsupported AF %d", af));
2296	len = 0;
2297	th = NULL;
2298#ifdef INET
2299	h = NULL;
2300#endif
2301#ifdef INET6
2302	h6 = NULL;
2303#endif
2304#endif /* __FreeBSD__ */
2305
2306	/* maximum segment size tcp option */
2307	tlen = sizeof(struct tcphdr);
2308	if (mss)
2309		tlen += 4;
2310
2311	switch (af) {
2312#ifdef INET
2313	case AF_INET:
2314		len = sizeof(struct ip) + tlen;
2315		break;
2316#endif /* INET */
2317#ifdef INET6
2318	case AF_INET6:
2319		len = sizeof(struct ip6_hdr) + tlen;
2320		break;
2321#endif /* INET6 */
2322	}
2323
2324	/* create outgoing mbuf */
2325	m = m_gethdr(M_DONTWAIT, MT_HEADER);
2326	if (m == NULL)
2327		return;
2328#ifdef __FreeBSD__
2329#ifdef MAC
2330	mac_netinet_firewall_send(m);
2331#endif
2332	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2333		m_freem(m);
2334		return;
2335	}
2336#endif
2337	if (tag)
2338#ifdef __FreeBSD__
2339		m->m_flags |= M_SKIP_FIREWALL;
2340	pf_mtag->tag = rtag;
2341#else
2342		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2343	m->m_pkthdr.pf.tag = rtag;
2344#endif
2345
2346	if (r != NULL && r->rtableid >= 0)
2347#ifdef __FreeBSD__
2348	{
2349		M_SETFIB(m, r->rtableid);
2350		pf_mtag->rtableid = r->rtableid;
2351#else
2352		m->m_pkthdr.pf.rtableid = r->rtableid;
2353#endif
2354#ifdef __FreeBSD__
2355	}
2356#endif
2357
2358#ifdef ALTQ
2359	if (r != NULL && r->qid) {
2360#ifdef __FreeBSD__
2361		pf_mtag->qid = r->qid;
2362
2363		/* add hints for ecn */
2364		pf_mtag->hdr = mtod(m, struct ip *);
2365#else
2366		m->m_pkthdr.pf.qid = r->qid;
2367		/* add hints for ecn */
2368		m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
2369#endif
2370	}
2371#endif /* ALTQ */
2372	m->m_data += max_linkhdr;
2373	m->m_pkthdr.len = m->m_len = len;
2374	m->m_pkthdr.rcvif = NULL;
2375	bzero(m->m_data, len);
2376	switch (af) {
2377#ifdef INET
2378	case AF_INET:
2379		h = mtod(m, struct ip *);
2380
2381		/* IP header fields included in the TCP checksum */
2382		h->ip_p = IPPROTO_TCP;
2383		h->ip_len = htons(tlen);
2384		h->ip_src.s_addr = saddr->v4.s_addr;
2385		h->ip_dst.s_addr = daddr->v4.s_addr;
2386
2387		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
2388		break;
2389#endif /* INET */
2390#ifdef INET6
2391	case AF_INET6:
2392		h6 = mtod(m, struct ip6_hdr *);
2393
2394		/* IP header fields included in the TCP checksum */
2395		h6->ip6_nxt = IPPROTO_TCP;
2396		h6->ip6_plen = htons(tlen);
2397		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
2398		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
2399
2400		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
2401		break;
2402#endif /* INET6 */
2403	}
2404
2405	/* TCP header */
2406	th->th_sport = sport;
2407	th->th_dport = dport;
2408	th->th_seq = htonl(seq);
2409	th->th_ack = htonl(ack);
2410	th->th_off = tlen >> 2;
2411	th->th_flags = flags;
2412	th->th_win = htons(win);
2413
2414	if (mss) {
2415		opt = (char *)(th + 1);
2416		opt[0] = TCPOPT_MAXSEG;
2417		opt[1] = 4;
2418		HTONS(mss);
2419		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2420	}
2421
2422	switch (af) {
2423#ifdef INET
2424	case AF_INET:
2425		/* TCP checksum */
2426		th->th_sum = in_cksum(m, len);
2427
2428		/* Finish the IP header */
2429		h->ip_v = 4;
2430		h->ip_hl = sizeof(*h) >> 2;
2431		h->ip_tos = IPTOS_LOWDELAY;
2432#ifdef __FreeBSD__
2433		h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
2434		h->ip_len = len;
2435		h->ip_ttl = ttl ? ttl : V_ip_defttl;
2436#else
2437		h->ip_len = htons(len);
2438		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
2439		h->ip_ttl = ttl ? ttl : ip_defttl;
2440#endif
2441		h->ip_sum = 0;
2442		if (eh == NULL) {
2443#ifdef __FreeBSD__
2444		PF_UNLOCK();
2445		ip_output(m, (void *)NULL, (void *)NULL, 0,
2446		    (void *)NULL, (void *)NULL);
2447		PF_LOCK();
2448#else /* ! __FreeBSD__ */
2449			ip_output(m, (void *)NULL, (void *)NULL, 0,
2450			    (void *)NULL, (void *)NULL);
2451#endif
2452		} else {
2453			struct route		 ro;
2454			struct rtentry		 rt;
2455			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
2456
2457			if (ifp == NULL) {
2458				m_freem(m);
2459				return;
2460			}
2461			rt.rt_ifp = ifp;
2462			ro.ro_rt = &rt;
2463			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
2464			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
2465			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
2466			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
2467			e->ether_type = eh->ether_type;
2468#ifdef __FreeBSD__
2469			PF_UNLOCK();
2470			/* XXX_IMPORT: later */
2471			ip_output(m, (void *)NULL, &ro, 0,
2472			    (void *)NULL, (void *)NULL);
2473			PF_LOCK();
2474#else /* ! __FreeBSD__ */
2475			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
2476			    (void *)NULL, (void *)NULL);
2477#endif
2478		}
2479		break;
2480#endif /* INET */
2481#ifdef INET6
2482	case AF_INET6:
2483		/* TCP checksum */
2484		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2485		    sizeof(struct ip6_hdr), tlen);
2486
2487		h6->ip6_vfc |= IPV6_VERSION;
2488		h6->ip6_hlim = IPV6_DEFHLIM;
2489
2490#ifdef __FreeBSD__
2491		PF_UNLOCK();
2492		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2493		PF_LOCK();
2494#else
2495		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2496#endif
2497		break;
2498#endif /* INET6 */
2499	}
2500}
2501
2502static void
2503pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
2504    struct pf_rule *r)
2505{
2506	struct mbuf	*m0;
2507#ifdef __FreeBSD__
2508#ifdef INET
2509	struct ip *ip;
2510#endif
2511	struct pf_mtag *pf_mtag;
2512#endif
2513
2514#ifdef __FreeBSD__
2515	m0 = m_copypacket(m, M_DONTWAIT);
2516	if (m0 == NULL)
2517		return;
2518#else
2519	if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL)
2520		return;
2521#endif
2522
2523#ifdef __FreeBSD__
2524	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
2525		return;
2526	/* XXX: revisit */
2527	m0->m_flags |= M_SKIP_FIREWALL;
2528#else
2529	m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2530#endif
2531
2532	if (r->rtableid >= 0)
2533#ifdef __FreeBSD__
2534	{
2535		M_SETFIB(m0, r->rtableid);
2536		pf_mtag->rtableid = r->rtableid;
2537#else
2538		m0->m_pkthdr.pf.rtableid = r->rtableid;
2539#endif
2540#ifdef __FreeBSD__
2541	}
2542#endif
2543
2544#ifdef ALTQ
2545	if (r->qid) {
2546#ifdef __FreeBSD__
2547		pf_mtag->qid = r->qid;
2548		/* add hints for ecn */
2549		pf_mtag->hdr = mtod(m0, struct ip *);
2550#else
2551		m0->m_pkthdr.pf.qid = r->qid;
2552		/* add hints for ecn */
2553		m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
2554#endif
2555	}
2556#endif /* ALTQ */
2557
2558	switch (af) {
2559#ifdef INET
2560	case AF_INET:
2561#ifdef __FreeBSD__
2562		/* icmp_error() expects host byte ordering */
2563		ip = mtod(m0, struct ip *);
2564		NTOHS(ip->ip_len);
2565		NTOHS(ip->ip_off);
2566		PF_UNLOCK();
2567		icmp_error(m0, type, code, 0, 0);
2568		PF_LOCK();
2569#else
2570		icmp_error(m0, type, code, 0, 0);
2571#endif
2572		break;
2573#endif /* INET */
2574#ifdef INET6
2575	case AF_INET6:
2576#ifdef __FreeBSD__
2577		PF_UNLOCK();
2578#endif
2579		icmp6_error(m0, type, code, 0);
2580#ifdef __FreeBSD__
2581		PF_LOCK();
2582#endif
2583		break;
2584#endif /* INET6 */
2585	}
2586}
2587
2588/*
2589 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2590 * If n is 0, they match if they are equal. If n is != 0, they match if they
2591 * are different.
2592 */
2593int
2594pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2595    struct pf_addr *b, sa_family_t af)
2596{
2597	int	match = 0;
2598
2599	switch (af) {
2600#ifdef INET
2601	case AF_INET:
2602		if ((a->addr32[0] & m->addr32[0]) ==
2603		    (b->addr32[0] & m->addr32[0]))
2604			match++;
2605		break;
2606#endif /* INET */
2607#ifdef INET6
2608	case AF_INET6:
2609		if (((a->addr32[0] & m->addr32[0]) ==
2610		     (b->addr32[0] & m->addr32[0])) &&
2611		    ((a->addr32[1] & m->addr32[1]) ==
2612		     (b->addr32[1] & m->addr32[1])) &&
2613		    ((a->addr32[2] & m->addr32[2]) ==
2614		     (b->addr32[2] & m->addr32[2])) &&
2615		    ((a->addr32[3] & m->addr32[3]) ==
2616		     (b->addr32[3] & m->addr32[3])))
2617			match++;
2618		break;
2619#endif /* INET6 */
2620	}
2621	if (match) {
2622		if (n)
2623			return (0);
2624		else
2625			return (1);
2626	} else {
2627		if (n)
2628			return (1);
2629		else
2630			return (0);
2631	}
2632}
2633
2634/*
2635 * Return 1 if b <= a <= e, otherwise return 0.
2636 */
2637int
2638pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2639    struct pf_addr *a, sa_family_t af)
2640{
2641	switch (af) {
2642#ifdef INET
2643	case AF_INET:
2644		if ((a->addr32[0] < b->addr32[0]) ||
2645		    (a->addr32[0] > e->addr32[0]))
2646			return (0);
2647		break;
2648#endif /* INET */
2649#ifdef INET6
2650	case AF_INET6: {
2651		int	i;
2652
2653		/* check a >= b */
2654		for (i = 0; i < 4; ++i)
2655			if (a->addr32[i] > b->addr32[i])
2656				break;
2657			else if (a->addr32[i] < b->addr32[i])
2658				return (0);
2659		/* check a <= e */
2660		for (i = 0; i < 4; ++i)
2661			if (a->addr32[i] < e->addr32[i])
2662				break;
2663			else if (a->addr32[i] > e->addr32[i])
2664				return (0);
2665		break;
2666	}
2667#endif /* INET6 */
2668	}
2669	return (1);
2670}
2671
2672int
2673pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2674{
2675	switch (op) {
2676	case PF_OP_IRG:
2677		return ((p > a1) && (p < a2));
2678	case PF_OP_XRG:
2679		return ((p < a1) || (p > a2));
2680	case PF_OP_RRG:
2681		return ((p >= a1) && (p <= a2));
2682	case PF_OP_EQ:
2683		return (p == a1);
2684	case PF_OP_NE:
2685		return (p != a1);
2686	case PF_OP_LT:
2687		return (p < a1);
2688	case PF_OP_LE:
2689		return (p <= a1);
2690	case PF_OP_GT:
2691		return (p > a1);
2692	case PF_OP_GE:
2693		return (p >= a1);
2694	}
2695	return (0); /* never reached */
2696}
2697
2698int
2699pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2700{
2701	NTOHS(a1);
2702	NTOHS(a2);
2703	NTOHS(p);
2704	return (pf_match(op, a1, a2, p));
2705}
2706
2707int
2708pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2709{
2710	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2711		return (0);
2712	return (pf_match(op, a1, a2, u));
2713}
2714
2715int
2716pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2717{
2718	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2719		return (0);
2720	return (pf_match(op, a1, a2, g));
2721}
2722
2723int
2724#ifdef __FreeBSD__
2725pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag,
2726    struct pf_mtag *pf_mtag)
2727#else
2728pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
2729#endif
2730{
2731	if (*tag == -1)
2732#ifdef __FreeBSD__
2733		*tag = pf_mtag->tag;
2734#else
2735		*tag = m->m_pkthdr.pf.tag;
2736#endif
2737
2738	return ((!r->match_tag_not && r->match_tag == *tag) ||
2739	    (r->match_tag_not && r->match_tag != *tag));
2740}
2741
2742int
2743#ifdef __FreeBSD__
2744pf_tag_packet(struct mbuf *m, int tag, int rtableid,
2745    struct pf_mtag *pf_mtag)
2746#else
2747pf_tag_packet(struct mbuf *m, int tag, int rtableid)
2748#endif
2749{
2750	if (tag <= 0 && rtableid < 0)
2751		return (0);
2752
2753	if (tag > 0)
2754#ifdef __FreeBSD__
2755		pf_mtag->tag = tag;
2756#else
2757		m->m_pkthdr.pf.tag = tag;
2758#endif
2759	if (rtableid >= 0)
2760#ifdef __FreeBSD__
2761	{
2762		M_SETFIB(m, rtableid);
2763	}
2764#else
2765		m->m_pkthdr.pf.rtableid = rtableid;
2766#endif
2767
2768	return (0);
2769}
2770
2771void
2772pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2773    struct pf_rule **r, struct pf_rule **a, int *match)
2774{
2775	struct pf_anchor_stackframe	*f;
2776
2777	(*r)->anchor->match = 0;
2778	if (match)
2779		*match = 0;
2780#ifdef __FreeBSD__
2781	if (*depth >= sizeof(V_pf_anchor_stack) /
2782	    sizeof(V_pf_anchor_stack[0])) {
2783#else
2784	if (*depth >= sizeof(pf_anchor_stack) /
2785	    sizeof(pf_anchor_stack[0])) {
2786#endif
2787		printf("pf_step_into_anchor: stack overflow\n");
2788		*r = TAILQ_NEXT(*r, entries);
2789		return;
2790	} else if (*depth == 0 && a != NULL)
2791		*a = *r;
2792#ifdef __FreeBSD__
2793	f = V_pf_anchor_stack + (*depth)++;
2794#else
2795	f = pf_anchor_stack + (*depth)++;
2796#endif
2797	f->rs = *rs;
2798	f->r = *r;
2799	if ((*r)->anchor_wildcard) {
2800		f->parent = &(*r)->anchor->children;
2801		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2802		    NULL) {
2803			*r = NULL;
2804			return;
2805		}
2806		*rs = &f->child->ruleset;
2807	} else {
2808		f->parent = NULL;
2809		f->child = NULL;
2810		*rs = &(*r)->anchor->ruleset;
2811	}
2812	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2813}
2814
2815int
2816pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2817    struct pf_rule **r, struct pf_rule **a, int *match)
2818{
2819	struct pf_anchor_stackframe	*f;
2820	int quick = 0;
2821
2822	do {
2823		if (*depth <= 0)
2824			break;
2825#ifdef __FreeBSD__
2826		f = V_pf_anchor_stack + *depth - 1;
2827#else
2828		f = pf_anchor_stack + *depth - 1;
2829#endif
2830		if (f->parent != NULL && f->child != NULL) {
2831			if (f->child->match ||
2832			    (match != NULL && *match)) {
2833				f->r->anchor->match = 1;
2834				*match = 0;
2835			}
2836			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2837			if (f->child != NULL) {
2838				*rs = &f->child->ruleset;
2839				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2840				if (*r == NULL)
2841					continue;
2842				else
2843					break;
2844			}
2845		}
2846		(*depth)--;
2847		if (*depth == 0 && a != NULL)
2848			*a = NULL;
2849		*rs = f->rs;
2850		if (f->r->anchor->match || (match != NULL && *match))
2851			quick = f->r->quick;
2852		*r = TAILQ_NEXT(f->r, entries);
2853	} while (*r == NULL);
2854
2855	return (quick);
2856}
2857
2858#ifdef INET6
2859void
2860pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2861    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2862{
2863	switch (af) {
2864#ifdef INET
2865	case AF_INET:
2866		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2867		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2868		break;
2869#endif /* INET */
2870	case AF_INET6:
2871		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2872		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2873		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2874		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2875		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2876		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2877		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2878		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2879		break;
2880	}
2881}
2882
2883void
2884pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2885{
2886	switch (af) {
2887#ifdef INET
2888	case AF_INET:
2889		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2890		break;
2891#endif /* INET */
2892	case AF_INET6:
2893		if (addr->addr32[3] == 0xffffffff) {
2894			addr->addr32[3] = 0;
2895			if (addr->addr32[2] == 0xffffffff) {
2896				addr->addr32[2] = 0;
2897				if (addr->addr32[1] == 0xffffffff) {
2898					addr->addr32[1] = 0;
2899					addr->addr32[0] =
2900					    htonl(ntohl(addr->addr32[0]) + 1);
2901				} else
2902					addr->addr32[1] =
2903					    htonl(ntohl(addr->addr32[1]) + 1);
2904			} else
2905				addr->addr32[2] =
2906				    htonl(ntohl(addr->addr32[2]) + 1);
2907		} else
2908			addr->addr32[3] =
2909			    htonl(ntohl(addr->addr32[3]) + 1);
2910		break;
2911	}
2912}
2913#endif /* INET6 */
2914
2915int
2916#ifdef __FreeBSD__
2917pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
2918#else
2919pf_socket_lookup(int direction, struct pf_pdesc *pd)
2920#endif
2921{
2922	struct pf_addr		*saddr, *daddr;
2923	u_int16_t		 sport, dport;
2924#ifdef __FreeBSD__
2925	struct inpcbinfo	*pi;
2926#else
2927	struct inpcbtable	*tb;
2928#endif
2929	struct inpcb		*inp;
2930
2931	if (pd == NULL)
2932		return (-1);
2933	pd->lookup.uid = UID_MAX;
2934	pd->lookup.gid = GID_MAX;
2935	pd->lookup.pid = NO_PID;
2936
2937#ifdef __FreeBSD__
2938	if (inp_arg != NULL) {
2939		INP_LOCK_ASSERT(inp_arg);
2940		pd->lookup.uid = inp_arg->inp_cred->cr_uid;
2941		pd->lookup.gid = inp_arg->inp_cred->cr_groups[0];
2942		return (1);
2943	}
2944#endif
2945
2946	switch (pd->proto) {
2947	case IPPROTO_TCP:
2948		if (pd->hdr.tcp == NULL)
2949			return (-1);
2950		sport = pd->hdr.tcp->th_sport;
2951		dport = pd->hdr.tcp->th_dport;
2952#ifdef __FreeBSD__
2953		pi = &V_tcbinfo;
2954#else
2955		tb = &tcbtable;
2956#endif
2957		break;
2958	case IPPROTO_UDP:
2959		if (pd->hdr.udp == NULL)
2960			return (-1);
2961		sport = pd->hdr.udp->uh_sport;
2962		dport = pd->hdr.udp->uh_dport;
2963#ifdef __FreeBSD__
2964		pi = &V_udbinfo;
2965#else
2966		tb = &udbtable;
2967#endif
2968		break;
2969	default:
2970		return (-1);
2971	}
2972	if (direction == PF_IN) {
2973		saddr = pd->src;
2974		daddr = pd->dst;
2975	} else {
2976		u_int16_t	p;
2977
2978		p = sport;
2979		sport = dport;
2980		dport = p;
2981		saddr = pd->dst;
2982		daddr = pd->src;
2983	}
2984	switch (pd->af) {
2985#ifdef INET
2986	case AF_INET:
2987#ifdef __FreeBSD__
2988		/*
2989		 * XXXRW: would be nice if we had an mbuf here so that we
2990		 * could use in_pcblookup_mbuf().
2991		 */
2992		inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4,
2993			dport, INPLOOKUP_RLOCKPCB, NULL);
2994		if (inp == NULL) {
2995			inp = in_pcblookup(pi, saddr->v4, sport,
2996			   daddr->v4, dport, INPLOOKUP_WILDCARD |
2997			   INPLOOKUP_RLOCKPCB, NULL);
2998			if (inp == NULL)
2999				return (-1);
3000		}
3001#else
3002		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
3003		if (inp == NULL) {
3004			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0,
3005			    NULL);
3006			if (inp == NULL)
3007				return (-1);
3008		}
3009#endif
3010		break;
3011#endif /* INET */
3012#ifdef INET6
3013	case AF_INET6:
3014#ifdef __FreeBSD__
3015		/*
3016		 * XXXRW: would be nice if we had an mbuf here so that we
3017		 * could use in6_pcblookup_mbuf().
3018		 */
3019		inp = in6_pcblookup(pi, &saddr->v6, sport,
3020			&daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL);
3021		if (inp == NULL) {
3022			inp = in6_pcblookup(pi, &saddr->v6, sport,
3023			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
3024			    INPLOOKUP_RLOCKPCB, NULL);
3025			if (inp == NULL)
3026				return (-1);
3027		}
3028#else
3029		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
3030		    dport);
3031		if (inp == NULL) {
3032			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0,
3033			    NULL);
3034			if (inp == NULL)
3035				return (-1);
3036		}
3037#endif
3038		break;
3039#endif /* INET6 */
3040
3041	default:
3042		return (-1);
3043	}
3044#ifdef __FreeBSD__
3045	INP_RLOCK_ASSERT(inp);
3046	pd->lookup.uid = inp->inp_cred->cr_uid;
3047	pd->lookup.gid = inp->inp_cred->cr_groups[0];
3048	INP_RUNLOCK(inp);
3049#else
3050	pd->lookup.uid = inp->inp_socket->so_euid;
3051	pd->lookup.gid = inp->inp_socket->so_egid;
3052	pd->lookup.pid = inp->inp_socket->so_cpid;
3053#endif
3054	return (1);
3055}
3056
3057u_int8_t
3058pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3059{
3060	int		 hlen;
3061	u_int8_t	 hdr[60];
3062	u_int8_t	*opt, optlen;
3063	u_int8_t	 wscale = 0;
3064
3065	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
3066	if (hlen <= sizeof(struct tcphdr))
3067		return (0);
3068	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3069		return (0);
3070	opt = hdr + sizeof(struct tcphdr);
3071	hlen -= sizeof(struct tcphdr);
3072	while (hlen >= 3) {
3073		switch (*opt) {
3074		case TCPOPT_EOL:
3075		case TCPOPT_NOP:
3076			++opt;
3077			--hlen;
3078			break;
3079		case TCPOPT_WINDOW:
3080			wscale = opt[2];
3081			if (wscale > TCP_MAX_WINSHIFT)
3082				wscale = TCP_MAX_WINSHIFT;
3083			wscale |= PF_WSCALE_FLAG;
3084			/* FALLTHROUGH */
3085		default:
3086			optlen = opt[1];
3087			if (optlen < 2)
3088				optlen = 2;
3089			hlen -= optlen;
3090			opt += optlen;
3091			break;
3092		}
3093	}
3094	return (wscale);
3095}
3096
3097u_int16_t
3098pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3099{
3100	int		 hlen;
3101	u_int8_t	 hdr[60];
3102	u_int8_t	*opt, optlen;
3103#ifdef __FreeBSD__
3104	u_int16_t	 mss = V_tcp_mssdflt;
3105#else
3106	u_int16_t	 mss = tcp_mssdflt;
3107#endif
3108
3109	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
3110	if (hlen <= sizeof(struct tcphdr))
3111		return (0);
3112	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3113		return (0);
3114	opt = hdr + sizeof(struct tcphdr);
3115	hlen -= sizeof(struct tcphdr);
3116	while (hlen >= TCPOLEN_MAXSEG) {
3117		switch (*opt) {
3118		case TCPOPT_EOL:
3119		case TCPOPT_NOP:
3120			++opt;
3121			--hlen;
3122			break;
3123		case TCPOPT_MAXSEG:
3124			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3125			NTOHS(mss);
3126			/* FALLTHROUGH */
3127		default:
3128			optlen = opt[1];
3129			if (optlen < 2)
3130				optlen = 2;
3131			hlen -= optlen;
3132			opt += optlen;
3133			break;
3134		}
3135	}
3136	return (mss);
3137}
3138
3139u_int16_t
3140pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
3141{
3142#ifdef INET
3143	struct sockaddr_in	*dst;
3144	struct route		 ro;
3145#endif /* INET */
3146#ifdef INET6
3147	struct sockaddr_in6	*dst6;
3148	struct route_in6	 ro6;
3149#endif /* INET6 */
3150	struct rtentry		*rt = NULL;
3151#ifdef __FreeBSD__
3152	int			 hlen = 0;
3153	u_int16_t		 mss = V_tcp_mssdflt;
3154#else
3155	int			 hlen;
3156	u_int16_t		 mss = tcp_mssdflt;
3157#endif
3158
3159	switch (af) {
3160#ifdef INET
3161	case AF_INET:
3162		hlen = sizeof(struct ip);
3163		bzero(&ro, sizeof(ro));
3164		dst = (struct sockaddr_in *)&ro.ro_dst;
3165		dst->sin_family = AF_INET;
3166		dst->sin_len = sizeof(*dst);
3167		dst->sin_addr = addr->v4;
3168#ifdef __FreeBSD__
3169#ifdef RTF_PRCLONING
3170		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
3171#else /* !RTF_PRCLONING */
3172		in_rtalloc_ign(&ro, 0, 0);
3173#endif
3174#else /* ! __FreeBSD__ */
3175		rtalloc_noclone(&ro, NO_CLONING);
3176#endif
3177		rt = ro.ro_rt;
3178		break;
3179#endif /* INET */
3180#ifdef INET6
3181	case AF_INET6:
3182		hlen = sizeof(struct ip6_hdr);
3183		bzero(&ro6, sizeof(ro6));
3184		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3185		dst6->sin6_family = AF_INET6;
3186		dst6->sin6_len = sizeof(*dst6);
3187		dst6->sin6_addr = addr->v6;
3188#ifdef __FreeBSD__
3189#ifdef RTF_PRCLONING
3190		rtalloc_ign((struct route *)&ro6,
3191		    (RTF_CLONING | RTF_PRCLONING));
3192#else /* !RTF_PRCLONING */
3193		rtalloc_ign((struct route *)&ro6, 0);
3194#endif
3195#else /* ! __FreeBSD__ */
3196		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
3197#endif
3198		rt = ro6.ro_rt;
3199		break;
3200#endif /* INET6 */
3201	}
3202
3203	if (rt && rt->rt_ifp) {
3204		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3205#ifdef __FreeBSD__
3206		mss = max(V_tcp_mssdflt, mss);
3207#else
3208		mss = max(tcp_mssdflt, mss);
3209#endif
3210		RTFREE(rt);
3211	}
3212	mss = min(mss, offer);
3213	mss = max(mss, 64);		/* sanity - at least max opt space */
3214	return (mss);
3215}
3216
3217void
3218pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3219{
3220	struct pf_rule *r = s->rule.ptr;
3221	struct pf_src_node *sn = NULL;
3222
3223	s->rt_kif = NULL;
3224	if (!r->rt || r->rt == PF_FASTROUTE)
3225		return;
3226	switch (s->key[PF_SK_WIRE]->af) {
3227#ifdef INET
3228	case AF_INET:
3229		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
3230		s->rt_kif = r->rpool.cur->kif;
3231		break;
3232#endif /* INET */
3233#ifdef INET6
3234	case AF_INET6:
3235		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
3236		s->rt_kif = r->rpool.cur->kif;
3237		break;
3238#endif /* INET6 */
3239	}
3240}
3241
3242u_int32_t
3243pf_tcp_iss(struct pf_pdesc *pd)
3244{
3245	MD5_CTX ctx;
3246	u_int32_t digest[4];
3247
3248#ifdef __FreeBSD__
3249	if (V_pf_tcp_secret_init == 0) {
3250		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
3251		MD5Init(&V_pf_tcp_secret_ctx);
3252		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
3253		    sizeof(V_pf_tcp_secret));
3254		V_pf_tcp_secret_init = 1;
3255	}
3256
3257	ctx = V_pf_tcp_secret_ctx;
3258#else
3259	if (pf_tcp_secret_init == 0) {
3260		arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
3261		MD5Init(&pf_tcp_secret_ctx);
3262		MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3263		    sizeof(pf_tcp_secret));
3264		pf_tcp_secret_init = 1;
3265	}
3266
3267	ctx = pf_tcp_secret_ctx;
3268#endif
3269
3270	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
3271	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
3272	if (pd->af == AF_INET6) {
3273		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3274		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3275	} else {
3276		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3277		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
3278	}
3279	MD5Final((u_char *)digest, &ctx);
3280#ifdef __FreeBSD__
3281	V_pf_tcp_iss_off += 4096;
3282#define	ISN_RANDOM_INCREMENT (4096 - 1)
3283	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
3284	    V_pf_tcp_iss_off);
3285#undef	ISN_RANDOM_INCREMENT
3286#else
3287	pf_tcp_iss_off += 4096;
3288	return (digest[0] + tcp_iss + pf_tcp_iss_off);
3289#endif
3290}
3291
3292int
3293pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3294    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3295    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3296#ifdef __FreeBSD__
3297    struct ifqueue *ifq, struct inpcb *inp)
3298#else
3299    struct ifqueue *ifq)
3300#endif
3301{
3302	struct pf_rule		*nr = NULL;
3303	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3304	sa_family_t		 af = pd->af;
3305	struct pf_rule		*r, *a = NULL;
3306	struct pf_ruleset	*ruleset = NULL;
3307	struct pf_src_node	*nsn = NULL;
3308	struct tcphdr		*th = pd->hdr.tcp;
3309	struct pf_state_key	*skw = NULL, *sks = NULL;
3310	struct pf_state_key	*sk = NULL, *nk = NULL;
3311	u_short			 reason;
3312	int			 rewrite = 0, hdrlen = 0;
3313	int			 tag = -1, rtableid = -1;
3314	int			 asd = 0;
3315	int			 match = 0;
3316	int			 state_icmp = 0;
3317#ifdef __FreeBSD__
3318	u_int16_t		 sport = 0, dport = 0;
3319	u_int16_t		 bproto_sum = 0, bip_sum = 0;
3320#else
3321	u_int16_t		 sport, dport;
3322	u_int16_t		 bproto_sum = 0, bip_sum;
3323#endif
3324	u_int8_t		 icmptype = 0, icmpcode = 0;
3325
3326
3327	if (direction == PF_IN && pf_check_congestion(ifq)) {
3328		REASON_SET(&reason, PFRES_CONGEST);
3329		return (PF_DROP);
3330	}
3331
3332#ifdef __FreeBSD__
3333	if (inp != NULL)
3334		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3335	else if (V_debug_pfugidhack) {
3336		PF_UNLOCK();
3337		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
3338		    pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3339		PF_LOCK();
3340	}
3341#endif
3342
3343	switch (pd->proto) {
3344	case IPPROTO_TCP:
3345		sport = th->th_sport;
3346		dport = th->th_dport;
3347		hdrlen = sizeof(*th);
3348		break;
3349	case IPPROTO_UDP:
3350		sport = pd->hdr.udp->uh_sport;
3351		dport = pd->hdr.udp->uh_dport;
3352		hdrlen = sizeof(*pd->hdr.udp);
3353		break;
3354#ifdef INET
3355	case IPPROTO_ICMP:
3356		if (pd->af != AF_INET)
3357			break;
3358		sport = dport = pd->hdr.icmp->icmp_id;
3359		hdrlen = sizeof(*pd->hdr.icmp);
3360		icmptype = pd->hdr.icmp->icmp_type;
3361		icmpcode = pd->hdr.icmp->icmp_code;
3362
3363		if (icmptype == ICMP_UNREACH ||
3364		    icmptype == ICMP_SOURCEQUENCH ||
3365		    icmptype == ICMP_REDIRECT ||
3366		    icmptype == ICMP_TIMXCEED ||
3367		    icmptype == ICMP_PARAMPROB)
3368			state_icmp++;
3369		break;
3370#endif /* INET */
3371#ifdef INET6
3372	case IPPROTO_ICMPV6:
3373		if (af != AF_INET6)
3374			break;
3375		sport = dport = pd->hdr.icmp6->icmp6_id;
3376		hdrlen = sizeof(*pd->hdr.icmp6);
3377		icmptype = pd->hdr.icmp6->icmp6_type;
3378		icmpcode = pd->hdr.icmp6->icmp6_code;
3379
3380		if (icmptype == ICMP6_DST_UNREACH ||
3381		    icmptype == ICMP6_PACKET_TOO_BIG ||
3382		    icmptype == ICMP6_TIME_EXCEEDED ||
3383		    icmptype == ICMP6_PARAM_PROB)
3384			state_icmp++;
3385		break;
3386#endif /* INET6 */
3387	default:
3388		sport = dport = hdrlen = 0;
3389		break;
3390	}
3391
3392	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3393
3394	/* check packet for BINAT/NAT/RDR */
3395	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
3396	    &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
3397		if (nk == NULL || sk == NULL) {
3398			REASON_SET(&reason, PFRES_MEMORY);
3399			goto cleanup;
3400		}
3401
3402		if (pd->ip_sum)
3403			bip_sum = *pd->ip_sum;
3404
3405		switch (pd->proto) {
3406		case IPPROTO_TCP:
3407			bproto_sum = th->th_sum;
3408			pd->proto_sum = &th->th_sum;
3409
3410			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3411			    nk->port[pd->sidx] != sport) {
3412				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3413				    &th->th_sum, &nk->addr[pd->sidx],
3414				    nk->port[pd->sidx], 0, af);
3415				pd->sport = &th->th_sport;
3416				sport = th->th_sport;
3417			}
3418
3419			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3420			    nk->port[pd->didx] != dport) {
3421				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3422				    &th->th_sum, &nk->addr[pd->didx],
3423				    nk->port[pd->didx], 0, af);
3424				dport = th->th_dport;
3425				pd->dport = &th->th_dport;
3426			}
3427			rewrite++;
3428			break;
3429		case IPPROTO_UDP:
3430			bproto_sum = pd->hdr.udp->uh_sum;
3431			pd->proto_sum = &pd->hdr.udp->uh_sum;
3432
3433			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3434			    nk->port[pd->sidx] != sport) {
3435				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3436				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3437				    &nk->addr[pd->sidx],
3438				    nk->port[pd->sidx], 1, af);
3439				sport = pd->hdr.udp->uh_sport;
3440				pd->sport = &pd->hdr.udp->uh_sport;
3441			}
3442
3443			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3444			    nk->port[pd->didx] != dport) {
3445				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3446				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3447				    &nk->addr[pd->didx],
3448				    nk->port[pd->didx], 1, af);
3449				dport = pd->hdr.udp->uh_dport;
3450				pd->dport = &pd->hdr.udp->uh_dport;
3451			}
3452			rewrite++;
3453			break;
3454#ifdef INET
3455		case IPPROTO_ICMP:
3456			nk->port[0] = nk->port[1];
3457			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
3458				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3459				    nk->addr[pd->sidx].v4.s_addr, 0);
3460
3461			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3462				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3463				    nk->addr[pd->didx].v4.s_addr, 0);
3464
3465			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
3466				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3467				    pd->hdr.icmp->icmp_cksum, sport,
3468				    nk->port[1], 0);
3469				pd->hdr.icmp->icmp_id = nk->port[1];
3470				pd->sport = &pd->hdr.icmp->icmp_id;
3471			}
3472			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3473			break;
3474#endif /* INET */
3475#ifdef INET6
3476		case IPPROTO_ICMPV6:
3477			nk->port[0] = nk->port[1];
3478			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
3479				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3480				    &nk->addr[pd->sidx], 0);
3481
3482			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3483				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3484				    &nk->addr[pd->didx], 0);
3485			rewrite++;
3486			break;
3487#endif /* INET */
3488		default:
3489			switch (af) {
3490#ifdef INET
3491			case AF_INET:
3492				if (PF_ANEQ(saddr,
3493				    &nk->addr[pd->sidx], AF_INET))
3494					pf_change_a(&saddr->v4.s_addr,
3495					    pd->ip_sum,
3496					    nk->addr[pd->sidx].v4.s_addr, 0);
3497
3498				if (PF_ANEQ(daddr,
3499				    &nk->addr[pd->didx], AF_INET))
3500					pf_change_a(&daddr->v4.s_addr,
3501					    pd->ip_sum,
3502					    nk->addr[pd->didx].v4.s_addr, 0);
3503				break;
3504#endif /* INET */
3505#ifdef INET6
3506			case AF_INET6:
3507				if (PF_ANEQ(saddr,
3508				    &nk->addr[pd->sidx], AF_INET6))
3509					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
3510
3511				if (PF_ANEQ(daddr,
3512				    &nk->addr[pd->didx], AF_INET6))
3513					PF_ACPY(saddr, &nk->addr[pd->didx], af);
3514				break;
3515#endif /* INET */
3516			}
3517			break;
3518		}
3519		if (nr->natpass)
3520			r = NULL;
3521		pd->nat_rule = nr;
3522	}
3523
3524	while (r != NULL) {
3525		r->evaluations++;
3526		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3527			r = r->skip[PF_SKIP_IFP].ptr;
3528		else if (r->direction && r->direction != direction)
3529			r = r->skip[PF_SKIP_DIR].ptr;
3530		else if (r->af && r->af != af)
3531			r = r->skip[PF_SKIP_AF].ptr;
3532		else if (r->proto && r->proto != pd->proto)
3533			r = r->skip[PF_SKIP_PROTO].ptr;
3534		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3535		    r->src.neg, kif))
3536			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3537		/* tcp/udp only. port_op always 0 in other cases */
3538		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3539		    r->src.port[0], r->src.port[1], sport))
3540			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3541		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3542		    r->dst.neg, NULL))
3543			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3544		/* tcp/udp only. port_op always 0 in other cases */
3545		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3546		    r->dst.port[0], r->dst.port[1], dport))
3547			r = r->skip[PF_SKIP_DST_PORT].ptr;
3548		/* icmp only. type always 0 in other cases */
3549		else if (r->type && r->type != icmptype + 1)
3550			r = TAILQ_NEXT(r, entries);
3551		/* icmp only. type always 0 in other cases */
3552		else if (r->code && r->code != icmpcode + 1)
3553			r = TAILQ_NEXT(r, entries);
3554		else if (r->tos && !(r->tos == pd->tos))
3555			r = TAILQ_NEXT(r, entries);
3556		else if (r->rule_flag & PFRULE_FRAGMENT)
3557			r = TAILQ_NEXT(r, entries);
3558		else if (pd->proto == IPPROTO_TCP &&
3559		    (r->flagset & th->th_flags) != r->flags)
3560			r = TAILQ_NEXT(r, entries);
3561		/* tcp/udp only. uid.op always 0 in other cases */
3562		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3563#ifdef __FreeBSD__
3564		    pf_socket_lookup(direction, pd, inp), 1)) &&
3565#else
3566		    pf_socket_lookup(direction, pd), 1)) &&
3567#endif
3568		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3569		    pd->lookup.uid))
3570			r = TAILQ_NEXT(r, entries);
3571		/* tcp/udp only. gid.op always 0 in other cases */
3572		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3573#ifdef __FreeBSD__
3574		    pf_socket_lookup(direction, pd, inp), 1)) &&
3575#else
3576		    pf_socket_lookup(direction, pd), 1)) &&
3577#endif
3578		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3579		    pd->lookup.gid))
3580			r = TAILQ_NEXT(r, entries);
3581		else if (r->prob &&
3582#ifdef __FreeBSD__
3583		    r->prob <= arc4random())
3584#else
3585		    r->prob <= arc4random_uniform(UINT_MAX - 1) + 1)
3586#endif
3587			r = TAILQ_NEXT(r, entries);
3588#ifdef __FreeBSD__
3589		else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
3590#else
3591		else if (r->match_tag && !pf_match_tag(m, r, &tag))
3592#endif
3593			r = TAILQ_NEXT(r, entries);
3594		else if (r->os_fingerprint != PF_OSFP_ANY &&
3595		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3596		    pf_osfp_fingerprint(pd, m, off, th),
3597		    r->os_fingerprint)))
3598			r = TAILQ_NEXT(r, entries);
3599		else {
3600			if (r->tag)
3601				tag = r->tag;
3602			if (r->rtableid >= 0)
3603				rtableid = r->rtableid;
3604			if (r->anchor == NULL) {
3605				match = 1;
3606				*rm = r;
3607				*am = a;
3608				*rsm = ruleset;
3609				if ((*rm)->quick)
3610					break;
3611				r = TAILQ_NEXT(r, entries);
3612			} else
3613				pf_step_into_anchor(&asd, &ruleset,
3614				    PF_RULESET_FILTER, &r, &a, &match);
3615		}
3616		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3617		    PF_RULESET_FILTER, &r, &a, &match))
3618			break;
3619	}
3620	r = *rm;
3621	a = *am;
3622	ruleset = *rsm;
3623
3624	REASON_SET(&reason, PFRES_MATCH);
3625
3626	if (r->log || (nr != NULL && nr->log)) {
3627		if (rewrite)
3628			m_copyback(m, off, hdrlen, pd->hdr.any);
3629		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3630		    a, ruleset, pd);
3631	}
3632
3633	if ((r->action == PF_DROP) &&
3634	    ((r->rule_flag & PFRULE_RETURNRST) ||
3635	    (r->rule_flag & PFRULE_RETURNICMP) ||
3636	    (r->rule_flag & PFRULE_RETURN))) {
3637		/* undo NAT changes, if they have taken place */
3638		if (nr != NULL) {
3639			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3640			PF_ACPY(daddr, &sk->addr[pd->didx], af);
3641			if (pd->sport)
3642				*pd->sport = sk->port[pd->sidx];
3643			if (pd->dport)
3644				*pd->dport = sk->port[pd->didx];
3645			if (pd->proto_sum)
3646				*pd->proto_sum = bproto_sum;
3647			if (pd->ip_sum)
3648				*pd->ip_sum = bip_sum;
3649			m_copyback(m, off, hdrlen, pd->hdr.any);
3650		}
3651		if (pd->proto == IPPROTO_TCP &&
3652		    ((r->rule_flag & PFRULE_RETURNRST) ||
3653		    (r->rule_flag & PFRULE_RETURN)) &&
3654		    !(th->th_flags & TH_RST)) {
3655			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
3656			int		 len = 0;
3657#ifdef INET
3658			struct ip	*h4;
3659#endif
3660#ifdef INET6
3661			struct ip6_hdr	*h6;
3662#endif
3663
3664			switch (af) {
3665#ifdef INET
3666			case AF_INET:
3667				h4 = mtod(m, struct ip *);
3668				len = ntohs(h4->ip_len) - off;
3669				break;
3670#endif
3671#ifdef INET6
3672			case AF_INET6:
3673				h6 = mtod(m, struct ip6_hdr *);
3674				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
3675				break;
3676#endif
3677			}
3678
3679			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
3680				REASON_SET(&reason, PFRES_PROTCKSUM);
3681			else {
3682				if (th->th_flags & TH_SYN)
3683					ack++;
3684				if (th->th_flags & TH_FIN)
3685					ack++;
3686#ifdef __FreeBSD__
3687				pf_send_tcp(m, r, af, pd->dst,
3688#else
3689				pf_send_tcp(r, af, pd->dst,
3690#endif
3691				    pd->src, th->th_dport, th->th_sport,
3692				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3693				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3694			}
3695		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3696		    r->return_icmp)
3697			pf_send_icmp(m, r->return_icmp >> 8,
3698			    r->return_icmp & 255, af, r);
3699		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3700		    r->return_icmp6)
3701			pf_send_icmp(m, r->return_icmp6 >> 8,
3702			    r->return_icmp6 & 255, af, r);
3703	}
3704
3705	if (r->action == PF_DROP)
3706		goto cleanup;
3707
3708#ifdef __FreeBSD__
3709	if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) {
3710#else
3711	if (pf_tag_packet(m, tag, rtableid)) {
3712#endif
3713		REASON_SET(&reason, PFRES_MEMORY);
3714		goto cleanup;
3715	}
3716
3717	if (!state_icmp && (r->keep_state || nr != NULL ||
3718	    (pd->flags & PFDESC_TCP_NORM))) {
3719		int action;
3720		action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
3721		    off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
3722		    bip_sum, hdrlen);
3723		if (action != PF_PASS)
3724			return (action);
3725	} else {
3726#ifdef __FreeBSD__
3727		if (sk != NULL)
3728			pool_put(&V_pf_state_key_pl, sk);
3729		if (nk != NULL)
3730			pool_put(&V_pf_state_key_pl, nk);
3731#else
3732		if (sk != NULL)
3733			pool_put(&pf_state_key_pl, sk);
3734		if (nk != NULL)
3735			pool_put(&pf_state_key_pl, nk);
3736#endif
3737	}
3738
3739	/* copy back packet headers if we performed NAT operations */
3740	if (rewrite)
3741		m_copyback(m, off, hdrlen, pd->hdr.any);
3742
3743#if NPFSYNC > 0
3744	if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
3745#ifdef __FreeBSD__
3746	    direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) {
3747#else
3748	    direction == PF_OUT && pfsync_up()) {
3749#endif
3750		/*
3751		 * We want the state created, but we dont
3752		 * want to send this in case a partner
3753		 * firewall has to know about it to allow
3754		 * replies through it.
3755		 */
3756#ifdef __FreeBSD__
3757		if (pfsync_defer_ptr != NULL &&
3758			pfsync_defer_ptr(*sm, m))
3759#else
3760		if (pfsync_defer(*sm, m))
3761#endif
3762			return (PF_DEFER);
3763	}
3764#endif
3765
3766	return (PF_PASS);
3767
3768cleanup:
3769#ifdef __FreeBSD__
3770	if (sk != NULL)
3771		pool_put(&V_pf_state_key_pl, sk);
3772	if (nk != NULL)
3773		pool_put(&V_pf_state_key_pl, nk);
3774#else
3775	if (sk != NULL)
3776		pool_put(&pf_state_key_pl, sk);
3777	if (nk != NULL)
3778		pool_put(&pf_state_key_pl, nk);
3779#endif
3780	return (PF_DROP);
3781}
3782
3783static __inline int
3784pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
3785    struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
3786    struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
3787    struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
3788    struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
3789    u_int16_t bip_sum, int hdrlen)
3790{
3791	struct pf_state		*s = NULL;
3792	struct pf_src_node	*sn = NULL;
3793	struct tcphdr		*th = pd->hdr.tcp;
3794#ifdef __FreeBSD__
3795	u_int16_t		 mss = V_tcp_mssdflt;
3796#else
3797	u_int16_t		 mss = tcp_mssdflt;
3798#endif
3799	u_short			 reason;
3800
3801	/* check maximums */
3802	if (r->max_states && (r->states_cur >= r->max_states)) {
3803#ifdef __FreeBSD__
3804		V_pf_status.lcounters[LCNT_STATES]++;
3805#else
3806		pf_status.lcounters[LCNT_STATES]++;
3807#endif
3808		REASON_SET(&reason, PFRES_MAXSTATES);
3809		return (PF_DROP);
3810	}
3811	/* src node for filter rule */
3812	if ((r->rule_flag & PFRULE_SRCTRACK ||
3813	    r->rpool.opts & PF_POOL_STICKYADDR) &&
3814	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
3815		REASON_SET(&reason, PFRES_SRCLIMIT);
3816		goto csfailed;
3817	}
3818	/* src node for translation rule */
3819	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3820	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
3821		REASON_SET(&reason, PFRES_SRCLIMIT);
3822		goto csfailed;
3823	}
3824#ifdef __FreeBSD__
3825	s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO);
3826#else
3827	s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
3828#endif
3829	if (s == NULL) {
3830		REASON_SET(&reason, PFRES_MEMORY);
3831		goto csfailed;
3832	}
3833	s->rule.ptr = r;
3834	s->nat_rule.ptr = nr;
3835	s->anchor.ptr = a;
3836	STATE_INC_COUNTERS(s);
3837	if (r->allow_opts)
3838		s->state_flags |= PFSTATE_ALLOWOPTS;
3839	if (r->rule_flag & PFRULE_STATESLOPPY)
3840		s->state_flags |= PFSTATE_SLOPPY;
3841	if (r->rule_flag & PFRULE_PFLOW)
3842		s->state_flags |= PFSTATE_PFLOW;
3843	s->log = r->log & PF_LOG_ALL;
3844	s->sync_state = PFSYNC_S_NONE;
3845	if (nr != NULL)
3846		s->log |= nr->log & PF_LOG_ALL;
3847	switch (pd->proto) {
3848	case IPPROTO_TCP:
3849		s->src.seqlo = ntohl(th->th_seq);
3850		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
3851		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3852		    r->keep_state == PF_STATE_MODULATE) {
3853			/* Generate sequence number modulator */
3854			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
3855			    0)
3856				s->src.seqdiff = 1;
3857			pf_change_a(&th->th_seq, &th->th_sum,
3858			    htonl(s->src.seqlo + s->src.seqdiff), 0);
3859			*rewrite = 1;
3860		} else
3861			s->src.seqdiff = 0;
3862		if (th->th_flags & TH_SYN) {
3863			s->src.seqhi++;
3864			s->src.wscale = pf_get_wscale(m, off,
3865			    th->th_off, pd->af);
3866		}
3867		s->src.max_win = MAX(ntohs(th->th_win), 1);
3868		if (s->src.wscale & PF_WSCALE_MASK) {
3869			/* Remove scale factor from initial window */
3870			int win = s->src.max_win;
3871			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3872			s->src.max_win = (win - 1) >>
3873			    (s->src.wscale & PF_WSCALE_MASK);
3874		}
3875		if (th->th_flags & TH_FIN)
3876			s->src.seqhi++;
3877		s->dst.seqhi = 1;
3878		s->dst.max_win = 1;
3879		s->src.state = TCPS_SYN_SENT;
3880		s->dst.state = TCPS_CLOSED;
3881		s->timeout = PFTM_TCP_FIRST_PACKET;
3882		break;
3883	case IPPROTO_UDP:
3884		s->src.state = PFUDPS_SINGLE;
3885		s->dst.state = PFUDPS_NO_TRAFFIC;
3886		s->timeout = PFTM_UDP_FIRST_PACKET;
3887		break;
3888	case IPPROTO_ICMP:
3889#ifdef INET6
3890	case IPPROTO_ICMPV6:
3891#endif
3892		s->timeout = PFTM_ICMP_FIRST_PACKET;
3893		break;
3894	default:
3895		s->src.state = PFOTHERS_SINGLE;
3896		s->dst.state = PFOTHERS_NO_TRAFFIC;
3897		s->timeout = PFTM_OTHER_FIRST_PACKET;
3898	}
3899
3900	s->creation = time_second;
3901	s->expire = time_second;
3902
3903	if (sn != NULL) {
3904		s->src_node = sn;
3905		s->src_node->states++;
3906	}
3907	if (nsn != NULL) {
3908		/* XXX We only modify one side for now. */
3909		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
3910		s->nat_src_node = nsn;
3911		s->nat_src_node->states++;
3912	}
3913	if (pd->proto == IPPROTO_TCP) {
3914		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3915		    off, pd, th, &s->src, &s->dst)) {
3916			REASON_SET(&reason, PFRES_MEMORY);
3917			pf_src_tree_remove_state(s);
3918			STATE_DEC_COUNTERS(s);
3919#ifdef __FreeBSD__
3920			pool_put(&V_pf_state_pl, s);
3921#else
3922			pool_put(&pf_state_pl, s);
3923#endif
3924			return (PF_DROP);
3925		}
3926		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3927		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3928		    &s->src, &s->dst, rewrite)) {
3929			/* This really shouldn't happen!!! */
3930			DPFPRINTF(PF_DEBUG_URGENT,
3931			    ("pf_normalize_tcp_stateful failed on first pkt"));
3932			pf_normalize_tcp_cleanup(s);
3933			pf_src_tree_remove_state(s);
3934			STATE_DEC_COUNTERS(s);
3935#ifdef __FreeBSD__
3936			pool_put(&V_pf_state_pl, s);
3937#else
3938			pool_put(&pf_state_pl, s);
3939#endif
3940			return (PF_DROP);
3941		}
3942	}
3943	s->direction = pd->dir;
3944
3945	if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
3946	    pd->src, pd->dst, sport, dport))
3947		goto csfailed;
3948
3949	if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
3950		if (pd->proto == IPPROTO_TCP)
3951			pf_normalize_tcp_cleanup(s);
3952		REASON_SET(&reason, PFRES_STATEINS);
3953		pf_src_tree_remove_state(s);
3954		STATE_DEC_COUNTERS(s);
3955#ifdef __FreeBSD__
3956		pool_put(&V_pf_state_pl, s);
3957#else
3958		pool_put(&pf_state_pl, s);
3959#endif
3960		return (PF_DROP);
3961	} else
3962		*sm = s;
3963
3964	pf_set_rt_ifp(s, pd->src);	/* needs s->state_key set */
3965	if (tag > 0) {
3966		pf_tag_ref(tag);
3967		s->tag = tag;
3968	}
3969	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
3970	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
3971		s->src.state = PF_TCPS_PROXY_SRC;
3972		/* undo NAT changes, if they have taken place */
3973		if (nr != NULL) {
3974			struct pf_state_key *skt = s->key[PF_SK_WIRE];
3975			if (pd->dir == PF_OUT)
3976				skt = s->key[PF_SK_STACK];
3977			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
3978			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
3979			if (pd->sport)
3980				*pd->sport = skt->port[pd->sidx];
3981			if (pd->dport)
3982				*pd->dport = skt->port[pd->didx];
3983			if (pd->proto_sum)
3984				*pd->proto_sum = bproto_sum;
3985			if (pd->ip_sum)
3986				*pd->ip_sum = bip_sum;
3987			m_copyback(m, off, hdrlen, pd->hdr.any);
3988		}
3989		s->src.seqhi = htonl(arc4random());
3990		/* Find mss option */
3991		mss = pf_get_mss(m, off, th->th_off, pd->af);
3992		mss = pf_calc_mss(pd->src, pd->af, mss);
3993		mss = pf_calc_mss(pd->dst, pd->af, mss);
3994		s->src.mss = mss;
3995#ifdef __FreeBSD__
3996		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
3997#else
3998		pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
3999#endif
4000		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
4001		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
4002		REASON_SET(&reason, PFRES_SYNPROXY);
4003		return (PF_SYNPROXY_DROP);
4004	}
4005
4006	return (PF_PASS);
4007
4008csfailed:
4009#ifdef __FreeBSD__
4010	if (sk != NULL)
4011		pool_put(&V_pf_state_key_pl, sk);
4012	if (nk != NULL)
4013		pool_put(&V_pf_state_key_pl, nk);
4014#else
4015	if (sk != NULL)
4016		pool_put(&pf_state_key_pl, sk);
4017	if (nk != NULL)
4018		pool_put(&pf_state_key_pl, nk);
4019#endif
4020
4021	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4022#ifdef __FreeBSD__
4023		RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn);
4024		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4025		V_pf_status.src_nodes--;
4026		pool_put(&V_pf_src_tree_pl, sn);
4027#else
4028		RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4029		pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4030		pf_status.src_nodes--;
4031		pool_put(&pf_src_tree_pl, sn);
4032#endif
4033	}
4034	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
4035#ifdef __FreeBSD__
4036		RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn);
4037		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4038		V_pf_status.src_nodes--;
4039		pool_put(&V_pf_src_tree_pl, nsn);
4040#else
4041		RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4042		pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4043		pf_status.src_nodes--;
4044		pool_put(&pf_src_tree_pl, nsn);
4045#endif
4046	}
4047	return (PF_DROP);
4048}
4049
4050int
4051pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
4052    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
4053    struct pf_ruleset **rsm)
4054{
4055	struct pf_rule		*r, *a = NULL;
4056	struct pf_ruleset	*ruleset = NULL;
4057	sa_family_t		 af = pd->af;
4058	u_short			 reason;
4059	int			 tag = -1;
4060	int			 asd = 0;
4061	int			 match = 0;
4062
4063	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4064	while (r != NULL) {
4065		r->evaluations++;
4066		if (pfi_kif_match(r->kif, kif) == r->ifnot)
4067			r = r->skip[PF_SKIP_IFP].ptr;
4068		else if (r->direction && r->direction != direction)
4069			r = r->skip[PF_SKIP_DIR].ptr;
4070		else if (r->af && r->af != af)
4071			r = r->skip[PF_SKIP_AF].ptr;
4072		else if (r->proto && r->proto != pd->proto)
4073			r = r->skip[PF_SKIP_PROTO].ptr;
4074		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4075		    r->src.neg, kif))
4076			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4077		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4078		    r->dst.neg, NULL))
4079			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4080		else if (r->tos && !(r->tos == pd->tos))
4081			r = TAILQ_NEXT(r, entries);
4082		else if (r->os_fingerprint != PF_OSFP_ANY)
4083			r = TAILQ_NEXT(r, entries);
4084		else if (pd->proto == IPPROTO_UDP &&
4085		    (r->src.port_op || r->dst.port_op))
4086			r = TAILQ_NEXT(r, entries);
4087		else if (pd->proto == IPPROTO_TCP &&
4088		    (r->src.port_op || r->dst.port_op || r->flagset))
4089			r = TAILQ_NEXT(r, entries);
4090		else if ((pd->proto == IPPROTO_ICMP ||
4091		    pd->proto == IPPROTO_ICMPV6) &&
4092		    (r->type || r->code))
4093			r = TAILQ_NEXT(r, entries);
4094		else if (r->prob && r->prob <=
4095		    (arc4random() % (UINT_MAX - 1) + 1))
4096			r = TAILQ_NEXT(r, entries);
4097#ifdef __FreeBSD__
4098		else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
4099#else
4100		else if (r->match_tag && !pf_match_tag(m, r, &tag))
4101#endif
4102			r = TAILQ_NEXT(r, entries);
4103		else {
4104			if (r->anchor == NULL) {
4105				match = 1;
4106				*rm = r;
4107				*am = a;
4108				*rsm = ruleset;
4109				if ((*rm)->quick)
4110					break;
4111				r = TAILQ_NEXT(r, entries);
4112			} else
4113				pf_step_into_anchor(&asd, &ruleset,
4114				    PF_RULESET_FILTER, &r, &a, &match);
4115		}
4116		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4117		    PF_RULESET_FILTER, &r, &a, &match))
4118			break;
4119	}
4120	r = *rm;
4121	a = *am;
4122	ruleset = *rsm;
4123
4124	REASON_SET(&reason, PFRES_MATCH);
4125
4126	if (r->log)
4127		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
4128		    pd);
4129
4130	if (r->action != PF_PASS)
4131		return (PF_DROP);
4132
4133#ifdef __FreeBSD__
4134	if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) {
4135#else
4136	if (pf_tag_packet(m, tag, -1)) {
4137#endif
4138		REASON_SET(&reason, PFRES_MEMORY);
4139		return (PF_DROP);
4140	}
4141
4142	return (PF_PASS);
4143}
4144
4145int
4146pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
4147	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
4148	struct pf_pdesc *pd, u_short *reason, int *copyback)
4149{
4150	struct tcphdr		*th = pd->hdr.tcp;
4151	u_int16_t		 win = ntohs(th->th_win);
4152	u_int32_t		 ack, end, seq, orig_seq;
4153	u_int8_t		 sws, dws;
4154	int			 ackskew;
4155
4156	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4157		sws = src->wscale & PF_WSCALE_MASK;
4158		dws = dst->wscale & PF_WSCALE_MASK;
4159	} else
4160		sws = dws = 0;
4161
4162	/*
4163	 * Sequence tracking algorithm from Guido van Rooij's paper:
4164	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4165	 *	tcp_filtering.ps
4166	 */
4167
4168	orig_seq = seq = ntohl(th->th_seq);
4169	if (src->seqlo == 0) {
4170		/* First packet from this end. Set its state */
4171
4172		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4173		    src->scrub == NULL) {
4174			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4175				REASON_SET(reason, PFRES_MEMORY);
4176				return (PF_DROP);
4177			}
4178		}
4179
4180		/* Deferred generation of sequence number modulator */
4181		if (dst->seqdiff && !src->seqdiff) {
4182			/* use random iss for the TCP server */
4183			while ((src->seqdiff = arc4random() - seq) == 0)
4184				;
4185			ack = ntohl(th->th_ack) - dst->seqdiff;
4186			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4187			    src->seqdiff), 0);
4188			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4189			*copyback = 1;
4190		} else {
4191			ack = ntohl(th->th_ack);
4192		}
4193
4194		end = seq + pd->p_len;
4195		if (th->th_flags & TH_SYN) {
4196			end++;
4197			if (dst->wscale & PF_WSCALE_FLAG) {
4198				src->wscale = pf_get_wscale(m, off, th->th_off,
4199				    pd->af);
4200				if (src->wscale & PF_WSCALE_FLAG) {
4201					/* Remove scale factor from initial
4202					 * window */
4203					sws = src->wscale & PF_WSCALE_MASK;
4204					win = ((u_int32_t)win + (1 << sws) - 1)
4205					    >> sws;
4206					dws = dst->wscale & PF_WSCALE_MASK;
4207				} else {
4208					/* fixup other window */
4209					dst->max_win <<= dst->wscale &
4210					    PF_WSCALE_MASK;
4211					/* in case of a retrans SYN|ACK */
4212					dst->wscale = 0;
4213				}
4214			}
4215		}
4216		if (th->th_flags & TH_FIN)
4217			end++;
4218
4219		src->seqlo = seq;
4220		if (src->state < TCPS_SYN_SENT)
4221			src->state = TCPS_SYN_SENT;
4222
4223		/*
4224		 * May need to slide the window (seqhi may have been set by
4225		 * the crappy stack check or if we picked up the connection
4226		 * after establishment)
4227		 */
4228		if (src->seqhi == 1 ||
4229		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4230			src->seqhi = end + MAX(1, dst->max_win << dws);
4231		if (win > src->max_win)
4232			src->max_win = win;
4233
4234	} else {
4235		ack = ntohl(th->th_ack) - dst->seqdiff;
4236		if (src->seqdiff) {
4237			/* Modulate sequence numbers */
4238			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4239			    src->seqdiff), 0);
4240			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4241			*copyback = 1;
4242		}
4243		end = seq + pd->p_len;
4244		if (th->th_flags & TH_SYN)
4245			end++;
4246		if (th->th_flags & TH_FIN)
4247			end++;
4248	}
4249
4250	if ((th->th_flags & TH_ACK) == 0) {
4251		/* Let it pass through the ack skew check */
4252		ack = dst->seqlo;
4253	} else if ((ack == 0 &&
4254	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4255	    /* broken tcp stacks do not set ack */
4256	    (dst->state < TCPS_SYN_SENT)) {
4257		/*
4258		 * Many stacks (ours included) will set the ACK number in an
4259		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4260		 */
4261		ack = dst->seqlo;
4262	}
4263
4264	if (seq == end) {
4265		/* Ease sequencing restrictions on no data packets */
4266		seq = src->seqlo;
4267		end = seq;
4268	}
4269
4270	ackskew = dst->seqlo - ack;
4271
4272
4273	/*
4274	 * Need to demodulate the sequence numbers in any TCP SACK options
4275	 * (Selective ACK). We could optionally validate the SACK values
4276	 * against the current ACK window, either forwards or backwards, but
4277	 * I'm not confident that SACK has been implemented properly
4278	 * everywhere. It wouldn't surprise me if several stacks accidently
4279	 * SACK too far backwards of previously ACKed data. There really aren't
4280	 * any security implications of bad SACKing unless the target stack
4281	 * doesn't validate the option length correctly. Someone trying to
4282	 * spoof into a TCP connection won't bother blindly sending SACK
4283	 * options anyway.
4284	 */
4285	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4286		if (pf_modulate_sack(m, off, pd, th, dst))
4287			*copyback = 1;
4288	}
4289
4290
4291#define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4292	if (SEQ_GEQ(src->seqhi, end) &&
4293	    /* Last octet inside other's window space */
4294	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4295	    /* Retrans: not more than one window back */
4296	    (ackskew >= -MAXACKWINDOW) &&
4297	    /* Acking not more than one reassembled fragment backwards */
4298	    (ackskew <= (MAXACKWINDOW << sws)) &&
4299	    /* Acking not more than one window forward */
4300	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4301	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
4302	    (pd->flags & PFDESC_IP_REAS) == 0)) {
4303	    /* Require an exact/+1 sequence match on resets when possible */
4304
4305		if (dst->scrub || src->scrub) {
4306			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4307			    *state, src, dst, copyback))
4308				return (PF_DROP);
4309		}
4310
4311		/* update max window */
4312		if (src->max_win < win)
4313			src->max_win = win;
4314		/* synchronize sequencing */
4315		if (SEQ_GT(end, src->seqlo))
4316			src->seqlo = end;
4317		/* slide the window of what the other end can send */
4318		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4319			dst->seqhi = ack + MAX((win << sws), 1);
4320
4321
4322		/* update states */
4323		if (th->th_flags & TH_SYN)
4324			if (src->state < TCPS_SYN_SENT)
4325				src->state = TCPS_SYN_SENT;
4326		if (th->th_flags & TH_FIN)
4327			if (src->state < TCPS_CLOSING)
4328				src->state = TCPS_CLOSING;
4329		if (th->th_flags & TH_ACK) {
4330			if (dst->state == TCPS_SYN_SENT) {
4331				dst->state = TCPS_ESTABLISHED;
4332				if (src->state == TCPS_ESTABLISHED &&
4333				    (*state)->src_node != NULL &&
4334				    pf_src_connlimit(state)) {
4335					REASON_SET(reason, PFRES_SRCLIMIT);
4336					return (PF_DROP);
4337				}
4338			} else if (dst->state == TCPS_CLOSING)
4339				dst->state = TCPS_FIN_WAIT_2;
4340		}
4341		if (th->th_flags & TH_RST)
4342			src->state = dst->state = TCPS_TIME_WAIT;
4343
4344		/* update expire time */
4345		(*state)->expire = time_second;
4346		if (src->state >= TCPS_FIN_WAIT_2 &&
4347		    dst->state >= TCPS_FIN_WAIT_2)
4348			(*state)->timeout = PFTM_TCP_CLOSED;
4349		else if (src->state >= TCPS_CLOSING &&
4350		    dst->state >= TCPS_CLOSING)
4351			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4352		else if (src->state < TCPS_ESTABLISHED ||
4353		    dst->state < TCPS_ESTABLISHED)
4354			(*state)->timeout = PFTM_TCP_OPENING;
4355		else if (src->state >= TCPS_CLOSING ||
4356		    dst->state >= TCPS_CLOSING)
4357			(*state)->timeout = PFTM_TCP_CLOSING;
4358		else
4359			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4360
4361		/* Fall through to PASS packet */
4362
4363	} else if ((dst->state < TCPS_SYN_SENT ||
4364		dst->state >= TCPS_FIN_WAIT_2 ||
4365		src->state >= TCPS_FIN_WAIT_2) &&
4366	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4367	    /* Within a window forward of the originating packet */
4368	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4369	    /* Within a window backward of the originating packet */
4370
4371		/*
4372		 * This currently handles three situations:
4373		 *  1) Stupid stacks will shotgun SYNs before their peer
4374		 *     replies.
4375		 *  2) When PF catches an already established stream (the
4376		 *     firewall rebooted, the state table was flushed, routes
4377		 *     changed...)
4378		 *  3) Packets get funky immediately after the connection
4379		 *     closes (this should catch Solaris spurious ACK|FINs
4380		 *     that web servers like to spew after a close)
4381		 *
4382		 * This must be a little more careful than the above code
4383		 * since packet floods will also be caught here. We don't
4384		 * update the TTL here to mitigate the damage of a packet
4385		 * flood and so the same code can handle awkward establishment
4386		 * and a loosened connection close.
4387		 * In the establishment case, a correct peer response will
4388		 * validate the connection, go through the normal state code
4389		 * and keep updating the state TTL.
4390		 */
4391
4392#ifdef __FreeBSD__
4393		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4394#else
4395		if (pf_status.debug >= PF_DEBUG_MISC) {
4396#endif
4397			printf("pf: loose state match: ");
4398			pf_print_state(*state);
4399			pf_print_flags(th->th_flags);
4400			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4401			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
4402#ifdef __FreeBSD__
4403			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
4404			    (unsigned long long)(*state)->packets[1],
4405#else
4406			    pd->p_len, ackskew, (*state)->packets[0],
4407			    (*state)->packets[1],
4408#endif
4409			    pd->dir == PF_IN ? "in" : "out",
4410			    pd->dir == (*state)->direction ? "fwd" : "rev");
4411		}
4412
4413		if (dst->scrub || src->scrub) {
4414			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4415			    *state, src, dst, copyback))
4416				return (PF_DROP);
4417		}
4418
4419		/* update max window */
4420		if (src->max_win < win)
4421			src->max_win = win;
4422		/* synchronize sequencing */
4423		if (SEQ_GT(end, src->seqlo))
4424			src->seqlo = end;
4425		/* slide the window of what the other end can send */
4426		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4427			dst->seqhi = ack + MAX((win << sws), 1);
4428
4429		/*
4430		 * Cannot set dst->seqhi here since this could be a shotgunned
4431		 * SYN and not an already established connection.
4432		 */
4433
4434		if (th->th_flags & TH_FIN)
4435			if (src->state < TCPS_CLOSING)
4436				src->state = TCPS_CLOSING;
4437		if (th->th_flags & TH_RST)
4438			src->state = dst->state = TCPS_TIME_WAIT;
4439
4440		/* Fall through to PASS packet */
4441
4442	} else {
4443		if ((*state)->dst.state == TCPS_SYN_SENT &&
4444		    (*state)->src.state == TCPS_SYN_SENT) {
4445			/* Send RST for state mismatches during handshake */
4446			if (!(th->th_flags & TH_RST))
4447#ifdef __FreeBSD__
4448				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4449#else
4450				pf_send_tcp((*state)->rule.ptr, pd->af,
4451#endif
4452				    pd->dst, pd->src, th->th_dport,
4453				    th->th_sport, ntohl(th->th_ack), 0,
4454				    TH_RST, 0, 0,
4455				    (*state)->rule.ptr->return_ttl, 1, 0,
4456				    pd->eh, kif->pfik_ifp);
4457			src->seqlo = 0;
4458			src->seqhi = 1;
4459			src->max_win = 1;
4460#ifdef __FreeBSD__
4461		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
4462#else
4463		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4464#endif
4465			printf("pf: BAD state: ");
4466			pf_print_state(*state);
4467			pf_print_flags(th->th_flags);
4468			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4469			    "pkts=%llu:%llu dir=%s,%s\n",
4470			    seq, orig_seq, ack, pd->p_len, ackskew,
4471#ifdef __FreeBSD__
4472			    (unsigned long long)(*state)->packets[0],
4473			    (unsigned long long)(*state)->packets[1],
4474#else
4475			    (*state)->packets[0], (*state)->packets[1],
4476#endif
4477			    pd->dir == PF_IN ? "in" : "out",
4478			    pd->dir == (*state)->direction ? "fwd" : "rev");
4479			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4480			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4481			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4482			    ' ': '2',
4483			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4484			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4485			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4486			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4487		}
4488		REASON_SET(reason, PFRES_BADSTATE);
4489		return (PF_DROP);
4490	}
4491
4492	return (PF_PASS);
4493}
4494
4495int
4496pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
4497	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
4498{
4499	struct tcphdr		*th = pd->hdr.tcp;
4500
4501	if (th->th_flags & TH_SYN)
4502		if (src->state < TCPS_SYN_SENT)
4503			src->state = TCPS_SYN_SENT;
4504	if (th->th_flags & TH_FIN)
4505		if (src->state < TCPS_CLOSING)
4506			src->state = TCPS_CLOSING;
4507	if (th->th_flags & TH_ACK) {
4508		if (dst->state == TCPS_SYN_SENT) {
4509			dst->state = TCPS_ESTABLISHED;
4510			if (src->state == TCPS_ESTABLISHED &&
4511			    (*state)->src_node != NULL &&
4512			    pf_src_connlimit(state)) {
4513				REASON_SET(reason, PFRES_SRCLIMIT);
4514				return (PF_DROP);
4515			}
4516		} else if (dst->state == TCPS_CLOSING) {
4517			dst->state = TCPS_FIN_WAIT_2;
4518		} else if (src->state == TCPS_SYN_SENT &&
4519		    dst->state < TCPS_SYN_SENT) {
4520			/*
4521			 * Handle a special sloppy case where we only see one
4522			 * half of the connection. If there is a ACK after
4523			 * the initial SYN without ever seeing a packet from
4524			 * the destination, set the connection to established.
4525			 */
4526			dst->state = src->state = TCPS_ESTABLISHED;
4527			if ((*state)->src_node != NULL &&
4528			    pf_src_connlimit(state)) {
4529				REASON_SET(reason, PFRES_SRCLIMIT);
4530				return (PF_DROP);
4531			}
4532		} else if (src->state == TCPS_CLOSING &&
4533		    dst->state == TCPS_ESTABLISHED &&
4534		    dst->seqlo == 0) {
4535			/*
4536			 * Handle the closing of half connections where we
4537			 * don't see the full bidirectional FIN/ACK+ACK
4538			 * handshake.
4539			 */
4540			dst->state = TCPS_CLOSING;
4541		}
4542	}
4543	if (th->th_flags & TH_RST)
4544		src->state = dst->state = TCPS_TIME_WAIT;
4545
4546	/* update expire time */
4547	(*state)->expire = time_second;
4548	if (src->state >= TCPS_FIN_WAIT_2 &&
4549	    dst->state >= TCPS_FIN_WAIT_2)
4550		(*state)->timeout = PFTM_TCP_CLOSED;
4551	else if (src->state >= TCPS_CLOSING &&
4552	    dst->state >= TCPS_CLOSING)
4553		(*state)->timeout = PFTM_TCP_FIN_WAIT;
4554	else if (src->state < TCPS_ESTABLISHED ||
4555	    dst->state < TCPS_ESTABLISHED)
4556		(*state)->timeout = PFTM_TCP_OPENING;
4557	else if (src->state >= TCPS_CLOSING ||
4558	    dst->state >= TCPS_CLOSING)
4559		(*state)->timeout = PFTM_TCP_CLOSING;
4560	else
4561		(*state)->timeout = PFTM_TCP_ESTABLISHED;
4562
4563	return (PF_PASS);
4564}
4565
4566int
4567pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4568    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4569    u_short *reason)
4570{
4571	struct pf_state_key_cmp	 key;
4572	struct tcphdr		*th = pd->hdr.tcp;
4573	int			 copyback = 0;
4574	struct pf_state_peer	*src, *dst;
4575	struct pf_state_key	*sk;
4576
4577	key.af = pd->af;
4578	key.proto = IPPROTO_TCP;
4579	if (direction == PF_IN)	{	/* wire side, straight */
4580		PF_ACPY(&key.addr[0], pd->src, key.af);
4581		PF_ACPY(&key.addr[1], pd->dst, key.af);
4582		key.port[0] = th->th_sport;
4583		key.port[1] = th->th_dport;
4584	} else {			/* stack side, reverse */
4585		PF_ACPY(&key.addr[1], pd->src, key.af);
4586		PF_ACPY(&key.addr[0], pd->dst, key.af);
4587		key.port[1] = th->th_sport;
4588		key.port[0] = th->th_dport;
4589	}
4590
4591#ifdef __FreeBSD__
4592	STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
4593#else
4594	STATE_LOOKUP(kif, &key, direction, *state, m);
4595#endif
4596
4597	if (direction == (*state)->direction) {
4598		src = &(*state)->src;
4599		dst = &(*state)->dst;
4600	} else {
4601		src = &(*state)->dst;
4602		dst = &(*state)->src;
4603	}
4604
4605	sk = (*state)->key[pd->didx];
4606
4607	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4608		if (direction != (*state)->direction) {
4609			REASON_SET(reason, PFRES_SYNPROXY);
4610			return (PF_SYNPROXY_DROP);
4611		}
4612		if (th->th_flags & TH_SYN) {
4613			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4614				REASON_SET(reason, PFRES_SYNPROXY);
4615				return (PF_DROP);
4616			}
4617#ifdef __FreeBSD__
4618			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4619#else
4620			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4621#endif
4622			    pd->src, th->th_dport, th->th_sport,
4623			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4624			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
4625			    0, NULL, NULL);
4626			REASON_SET(reason, PFRES_SYNPROXY);
4627			return (PF_SYNPROXY_DROP);
4628		} else if (!(th->th_flags & TH_ACK) ||
4629		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4630		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4631			REASON_SET(reason, PFRES_SYNPROXY);
4632			return (PF_DROP);
4633		} else if ((*state)->src_node != NULL &&
4634		    pf_src_connlimit(state)) {
4635			REASON_SET(reason, PFRES_SRCLIMIT);
4636			return (PF_DROP);
4637		} else
4638			(*state)->src.state = PF_TCPS_PROXY_DST;
4639	}
4640	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4641		if (direction == (*state)->direction) {
4642			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4643			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4644			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4645				REASON_SET(reason, PFRES_SYNPROXY);
4646				return (PF_DROP);
4647			}
4648			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4649			if ((*state)->dst.seqhi == 1)
4650				(*state)->dst.seqhi = htonl(arc4random());
4651#ifdef __FreeBSD__
4652			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4653#else
4654			pf_send_tcp((*state)->rule.ptr, pd->af,
4655#endif
4656			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4657			    sk->port[pd->sidx], sk->port[pd->didx],
4658			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4659			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
4660			REASON_SET(reason, PFRES_SYNPROXY);
4661			return (PF_SYNPROXY_DROP);
4662		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4663		    (TH_SYN|TH_ACK)) ||
4664		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4665			REASON_SET(reason, PFRES_SYNPROXY);
4666			return (PF_DROP);
4667		} else {
4668			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4669			(*state)->dst.seqlo = ntohl(th->th_seq);
4670#ifdef __FreeBSD__
4671			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4672#else
4673			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4674#endif
4675			    pd->src, th->th_dport, th->th_sport,
4676			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4677			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
4678			    (*state)->tag, NULL, NULL);
4679#ifdef __FreeBSD__
4680			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4681#else
4682			pf_send_tcp((*state)->rule.ptr, pd->af,
4683#endif
4684			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4685			    sk->port[pd->sidx], sk->port[pd->didx],
4686			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4687			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
4688			    0, NULL, NULL);
4689			(*state)->src.seqdiff = (*state)->dst.seqhi -
4690			    (*state)->src.seqlo;
4691			(*state)->dst.seqdiff = (*state)->src.seqhi -
4692			    (*state)->dst.seqlo;
4693			(*state)->src.seqhi = (*state)->src.seqlo +
4694			    (*state)->dst.max_win;
4695			(*state)->dst.seqhi = (*state)->dst.seqlo +
4696			    (*state)->src.max_win;
4697			(*state)->src.wscale = (*state)->dst.wscale = 0;
4698			(*state)->src.state = (*state)->dst.state =
4699			    TCPS_ESTABLISHED;
4700			REASON_SET(reason, PFRES_SYNPROXY);
4701			return (PF_SYNPROXY_DROP);
4702		}
4703	}
4704
4705	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
4706	    dst->state >= TCPS_FIN_WAIT_2 &&
4707	    src->state >= TCPS_FIN_WAIT_2) {
4708#ifdef __FreeBSD__
4709		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4710#else
4711		if (pf_status.debug >= PF_DEBUG_MISC) {
4712#endif
4713			printf("pf: state reuse ");
4714			pf_print_state(*state);
4715			pf_print_flags(th->th_flags);
4716			printf("\n");
4717		}
4718		/* XXX make sure it's the same direction ?? */
4719		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
4720		pf_unlink_state(*state);
4721		*state = NULL;
4722		return (PF_DROP);
4723	}
4724
4725	if ((*state)->state_flags & PFSTATE_SLOPPY) {
4726		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
4727			return (PF_DROP);
4728	} else {
4729		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
4730		    &copyback) == PF_DROP)
4731			return (PF_DROP);
4732	}
4733
4734	/* translate source/destination address, if necessary */
4735	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4736		struct pf_state_key *nk = (*state)->key[pd->didx];
4737
4738		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4739		    nk->port[pd->sidx] != th->th_sport)
4740			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4741			    &th->th_sum, &nk->addr[pd->sidx],
4742			    nk->port[pd->sidx], 0, pd->af);
4743
4744		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4745		    nk->port[pd->didx] != th->th_dport)
4746			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4747			    &th->th_sum, &nk->addr[pd->didx],
4748			    nk->port[pd->didx], 0, pd->af);
4749		copyback = 1;
4750	}
4751
4752	/* Copyback sequence modulation or stateful scrub changes if needed */
4753	if (copyback)
4754#ifdef __FreeBSD__
4755		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4756#else
4757		m_copyback(m, off, sizeof(*th), th);
4758#endif
4759
4760	return (PF_PASS);
4761}
4762
4763int
4764pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4765    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4766{
4767	struct pf_state_peer	*src, *dst;
4768	struct pf_state_key_cmp	 key;
4769	struct udphdr		*uh = pd->hdr.udp;
4770
4771	key.af = pd->af;
4772	key.proto = IPPROTO_UDP;
4773	if (direction == PF_IN)	{	/* wire side, straight */
4774		PF_ACPY(&key.addr[0], pd->src, key.af);
4775		PF_ACPY(&key.addr[1], pd->dst, key.af);
4776		key.port[0] = uh->uh_sport;
4777		key.port[1] = uh->uh_dport;
4778	} else {			/* stack side, reverse */
4779		PF_ACPY(&key.addr[1], pd->src, key.af);
4780		PF_ACPY(&key.addr[0], pd->dst, key.af);
4781		key.port[1] = uh->uh_sport;
4782		key.port[0] = uh->uh_dport;
4783	}
4784
4785#ifdef __FreeBSD__
4786	STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
4787#else
4788	STATE_LOOKUP(kif, &key, direction, *state, m);
4789#endif
4790
4791	if (direction == (*state)->direction) {
4792		src = &(*state)->src;
4793		dst = &(*state)->dst;
4794	} else {
4795		src = &(*state)->dst;
4796		dst = &(*state)->src;
4797	}
4798
4799	/* update states */
4800	if (src->state < PFUDPS_SINGLE)
4801		src->state = PFUDPS_SINGLE;
4802	if (dst->state == PFUDPS_SINGLE)
4803		dst->state = PFUDPS_MULTIPLE;
4804
4805	/* update expire time */
4806	(*state)->expire = time_second;
4807	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4808		(*state)->timeout = PFTM_UDP_MULTIPLE;
4809	else
4810		(*state)->timeout = PFTM_UDP_SINGLE;
4811
4812	/* translate source/destination address, if necessary */
4813	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4814		struct pf_state_key *nk = (*state)->key[pd->didx];
4815
4816		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4817		    nk->port[pd->sidx] != uh->uh_sport)
4818			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4819			    &uh->uh_sum, &nk->addr[pd->sidx],
4820			    nk->port[pd->sidx], 1, pd->af);
4821
4822		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4823		    nk->port[pd->didx] != uh->uh_dport)
4824			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4825			    &uh->uh_sum, &nk->addr[pd->didx],
4826			    nk->port[pd->didx], 1, pd->af);
4827#ifdef __FreeBSD__
4828		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4829#else
4830		m_copyback(m, off, sizeof(*uh), uh);
4831#endif
4832	}
4833
4834	return (PF_PASS);
4835}
4836
4837int
4838pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4839    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4840{
4841	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
4842#ifdef __FreeBSD__
4843	u_int16_t	 icmpid = 0, *icmpsum;
4844#else
4845	u_int16_t	 icmpid, *icmpsum;
4846#endif
4847	u_int8_t	 icmptype;
4848	int		 state_icmp = 0;
4849	struct pf_state_key_cmp key;
4850
4851	switch (pd->proto) {
4852#ifdef INET
4853	case IPPROTO_ICMP:
4854		icmptype = pd->hdr.icmp->icmp_type;
4855		icmpid = pd->hdr.icmp->icmp_id;
4856		icmpsum = &pd->hdr.icmp->icmp_cksum;
4857
4858		if (icmptype == ICMP_UNREACH ||
4859		    icmptype == ICMP_SOURCEQUENCH ||
4860		    icmptype == ICMP_REDIRECT ||
4861		    icmptype == ICMP_TIMXCEED ||
4862		    icmptype == ICMP_PARAMPROB)
4863			state_icmp++;
4864		break;
4865#endif /* INET */
4866#ifdef INET6
4867	case IPPROTO_ICMPV6:
4868		icmptype = pd->hdr.icmp6->icmp6_type;
4869		icmpid = pd->hdr.icmp6->icmp6_id;
4870		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4871
4872		if (icmptype == ICMP6_DST_UNREACH ||
4873		    icmptype == ICMP6_PACKET_TOO_BIG ||
4874		    icmptype == ICMP6_TIME_EXCEEDED ||
4875		    icmptype == ICMP6_PARAM_PROB)
4876			state_icmp++;
4877		break;
4878#endif /* INET6 */
4879	}
4880
4881	if (!state_icmp) {
4882
4883		/*
4884		 * ICMP query/reply message not related to a TCP/UDP packet.
4885		 * Search for an ICMP state.
4886		 */
4887		key.af = pd->af;
4888		key.proto = pd->proto;
4889		key.port[0] = key.port[1] = icmpid;
4890		if (direction == PF_IN)	{	/* wire side, straight */
4891			PF_ACPY(&key.addr[0], pd->src, key.af);
4892			PF_ACPY(&key.addr[1], pd->dst, key.af);
4893		} else {			/* stack side, reverse */
4894			PF_ACPY(&key.addr[1], pd->src, key.af);
4895			PF_ACPY(&key.addr[0], pd->dst, key.af);
4896		}
4897
4898#ifdef __FreeBSD__
4899		STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
4900#else
4901		STATE_LOOKUP(kif, &key, direction, *state, m);
4902#endif
4903
4904		(*state)->expire = time_second;
4905		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4906
4907		/* translate source/destination address, if necessary */
4908		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4909			struct pf_state_key *nk = (*state)->key[pd->didx];
4910
4911			switch (pd->af) {
4912#ifdef INET
4913			case AF_INET:
4914				if (PF_ANEQ(pd->src,
4915				    &nk->addr[pd->sidx], AF_INET))
4916					pf_change_a(&saddr->v4.s_addr,
4917					    pd->ip_sum,
4918					    nk->addr[pd->sidx].v4.s_addr, 0);
4919
4920				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
4921				    AF_INET))
4922					pf_change_a(&daddr->v4.s_addr,
4923					    pd->ip_sum,
4924					    nk->addr[pd->didx].v4.s_addr, 0);
4925
4926				if (nk->port[0] !=
4927				    pd->hdr.icmp->icmp_id) {
4928					pd->hdr.icmp->icmp_cksum =
4929					    pf_cksum_fixup(
4930					    pd->hdr.icmp->icmp_cksum, icmpid,
4931					    nk->port[pd->sidx], 0);
4932					pd->hdr.icmp->icmp_id =
4933					    nk->port[pd->sidx];
4934				}
4935
4936				m_copyback(m, off, ICMP_MINLEN,
4937#ifdef __FreeBSD__
4938				    (caddr_t)
4939#endif
4940				    pd->hdr.icmp);
4941				break;
4942#endif /* INET */
4943#ifdef INET6
4944			case AF_INET6:
4945				if (PF_ANEQ(pd->src,
4946				    &nk->addr[pd->sidx], AF_INET6))
4947					pf_change_a6(saddr,
4948					    &pd->hdr.icmp6->icmp6_cksum,
4949					    &nk->addr[pd->sidx], 0);
4950
4951				if (PF_ANEQ(pd->dst,
4952				    &nk->addr[pd->didx], AF_INET6))
4953					pf_change_a6(daddr,
4954					    &pd->hdr.icmp6->icmp6_cksum,
4955					    &nk->addr[pd->didx], 0);
4956
4957				m_copyback(m, off,
4958				    sizeof(struct icmp6_hdr),
4959#ifdef __FreeBSD__
4960				    (caddr_t)
4961#endif
4962				    pd->hdr.icmp6);
4963				break;
4964#endif /* INET6 */
4965			}
4966		}
4967		return (PF_PASS);
4968
4969	} else {
4970		/*
4971		 * ICMP error message in response to a TCP/UDP packet.
4972		 * Extract the inner TCP/UDP header and search for that state.
4973		 */
4974
4975		struct pf_pdesc	pd2;
4976#ifdef __FreeBSD__
4977		bzero(&pd2, sizeof pd2);
4978#endif
4979#ifdef INET
4980		struct ip	h2;
4981#endif /* INET */
4982#ifdef INET6
4983		struct ip6_hdr	h2_6;
4984		int		terminal = 0;
4985#endif /* INET6 */
4986#ifdef __FreeBSD__
4987		int		ipoff2 = 0;
4988		int		off2 = 0;
4989#else
4990		int		ipoff2;
4991		int		off2;
4992#endif
4993
4994		pd2.af = pd->af;
4995		/* Payload packet is from the opposite direction. */
4996		pd2.sidx = (direction == PF_IN) ? 1 : 0;
4997		pd2.didx = (direction == PF_IN) ? 0 : 1;
4998		switch (pd->af) {
4999#ifdef INET
5000		case AF_INET:
5001			/* offset of h2 in mbuf chain */
5002			ipoff2 = off + ICMP_MINLEN;
5003
5004			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
5005			    NULL, reason, pd2.af)) {
5006				DPFPRINTF(PF_DEBUG_MISC,
5007				    ("pf: ICMP error message too short "
5008				    "(ip)\n"));
5009				return (PF_DROP);
5010			}
5011			/*
5012			 * ICMP error messages don't refer to non-first
5013			 * fragments
5014			 */
5015			if (h2.ip_off & htons(IP_OFFMASK)) {
5016				REASON_SET(reason, PFRES_FRAG);
5017				return (PF_DROP);
5018			}
5019
5020			/* offset of protocol header that follows h2 */
5021			off2 = ipoff2 + (h2.ip_hl << 2);
5022
5023			pd2.proto = h2.ip_p;
5024			pd2.src = (struct pf_addr *)&h2.ip_src;
5025			pd2.dst = (struct pf_addr *)&h2.ip_dst;
5026			pd2.ip_sum = &h2.ip_sum;
5027			break;
5028#endif /* INET */
5029#ifdef INET6
5030		case AF_INET6:
5031			ipoff2 = off + sizeof(struct icmp6_hdr);
5032
5033			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
5034			    NULL, reason, pd2.af)) {
5035				DPFPRINTF(PF_DEBUG_MISC,
5036				    ("pf: ICMP error message too short "
5037				    "(ip6)\n"));
5038				return (PF_DROP);
5039			}
5040			pd2.proto = h2_6.ip6_nxt;
5041			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5042			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5043			pd2.ip_sum = NULL;
5044			off2 = ipoff2 + sizeof(h2_6);
5045			do {
5046				switch (pd2.proto) {
5047				case IPPROTO_FRAGMENT:
5048					/*
5049					 * ICMPv6 error messages for
5050					 * non-first fragments
5051					 */
5052					REASON_SET(reason, PFRES_FRAG);
5053					return (PF_DROP);
5054				case IPPROTO_AH:
5055				case IPPROTO_HOPOPTS:
5056				case IPPROTO_ROUTING:
5057				case IPPROTO_DSTOPTS: {
5058					/* get next header and header length */
5059					struct ip6_ext opt6;
5060
5061					if (!pf_pull_hdr(m, off2, &opt6,
5062					    sizeof(opt6), NULL, reason,
5063					    pd2.af)) {
5064						DPFPRINTF(PF_DEBUG_MISC,
5065						    ("pf: ICMPv6 short opt\n"));
5066						return (PF_DROP);
5067					}
5068					if (pd2.proto == IPPROTO_AH)
5069						off2 += (opt6.ip6e_len + 2) * 4;
5070					else
5071						off2 += (opt6.ip6e_len + 1) * 8;
5072					pd2.proto = opt6.ip6e_nxt;
5073					/* goto the next header */
5074					break;
5075				}
5076				default:
5077					terminal++;
5078					break;
5079				}
5080			} while (!terminal);
5081			break;
5082#endif /* INET6 */
5083		}
5084
5085		switch (pd2.proto) {
5086		case IPPROTO_TCP: {
5087			struct tcphdr		 th;
5088			u_int32_t		 seq;
5089			struct pf_state_peer	*src, *dst;
5090			u_int8_t		 dws;
5091			int			 copyback = 0;
5092
5093			/*
5094			 * Only the first 8 bytes of the TCP header can be
5095			 * expected. Don't access any TCP header fields after
5096			 * th_seq, an ackskew test is not possible.
5097			 */
5098			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
5099			    pd2.af)) {
5100				DPFPRINTF(PF_DEBUG_MISC,
5101				    ("pf: ICMP error message too short "
5102				    "(tcp)\n"));
5103				return (PF_DROP);
5104			}
5105
5106			key.af = pd2.af;
5107			key.proto = IPPROTO_TCP;
5108			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5109			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5110			key.port[pd2.sidx] = th.th_sport;
5111			key.port[pd2.didx] = th.th_dport;
5112
5113#ifdef __FreeBSD__
5114			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5115#else
5116			STATE_LOOKUP(kif, &key, direction, *state, m);
5117#endif
5118
5119			if (direction == (*state)->direction) {
5120				src = &(*state)->dst;
5121				dst = &(*state)->src;
5122			} else {
5123				src = &(*state)->src;
5124				dst = &(*state)->dst;
5125			}
5126
5127			if (src->wscale && dst->wscale)
5128				dws = dst->wscale & PF_WSCALE_MASK;
5129			else
5130				dws = 0;
5131
5132			/* Demodulate sequence number */
5133			seq = ntohl(th.th_seq) - src->seqdiff;
5134			if (src->seqdiff) {
5135				pf_change_a(&th.th_seq, icmpsum,
5136				    htonl(seq), 0);
5137				copyback = 1;
5138			}
5139
5140			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
5141			    (!SEQ_GEQ(src->seqhi, seq) ||
5142			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
5143#ifdef __FreeBSD__
5144				if (V_pf_status.debug >= PF_DEBUG_MISC) {
5145#else
5146				if (pf_status.debug >= PF_DEBUG_MISC) {
5147#endif
5148					printf("pf: BAD ICMP %d:%d ",
5149					    icmptype, pd->hdr.icmp->icmp_code);
5150					pf_print_host(pd->src, 0, pd->af);
5151					printf(" -> ");
5152					pf_print_host(pd->dst, 0, pd->af);
5153					printf(" state: ");
5154					pf_print_state(*state);
5155					printf(" seq=%u\n", seq);
5156				}
5157				REASON_SET(reason, PFRES_BADSTATE);
5158				return (PF_DROP);
5159			} else {
5160#ifdef __FreeBSD__
5161				if (V_pf_status.debug >= PF_DEBUG_MISC) {
5162#else
5163				if (pf_status.debug >= PF_DEBUG_MISC) {
5164#endif
5165					printf("pf: OK ICMP %d:%d ",
5166					    icmptype, pd->hdr.icmp->icmp_code);
5167					pf_print_host(pd->src, 0, pd->af);
5168					printf(" -> ");
5169					pf_print_host(pd->dst, 0, pd->af);
5170					printf(" state: ");
5171					pf_print_state(*state);
5172					printf(" seq=%u\n", seq);
5173				}
5174			}
5175
5176			/* translate source/destination address, if necessary */
5177			if ((*state)->key[PF_SK_WIRE] !=
5178			    (*state)->key[PF_SK_STACK]) {
5179				struct pf_state_key *nk =
5180				    (*state)->key[pd->didx];
5181
5182				if (PF_ANEQ(pd2.src,
5183				    &nk->addr[pd2.sidx], pd2.af) ||
5184				    nk->port[pd2.sidx] != th.th_sport)
5185					pf_change_icmp(pd2.src, &th.th_sport,
5186					    daddr, &nk->addr[pd2.sidx],
5187					    nk->port[pd2.sidx], NULL,
5188					    pd2.ip_sum, icmpsum,
5189					    pd->ip_sum, 0, pd2.af);
5190
5191				if (PF_ANEQ(pd2.dst,
5192				    &nk->addr[pd2.didx], pd2.af) ||
5193				    nk->port[pd2.didx] != th.th_dport)
5194					pf_change_icmp(pd2.dst, &th.th_dport,
5195					    NULL, /* XXX Inbound NAT? */
5196					    &nk->addr[pd2.didx],
5197					    nk->port[pd2.didx], NULL,
5198					    pd2.ip_sum, icmpsum,
5199					    pd->ip_sum, 0, pd2.af);
5200				copyback = 1;
5201			}
5202
5203			if (copyback) {
5204				switch (pd2.af) {
5205#ifdef INET
5206				case AF_INET:
5207					m_copyback(m, off, ICMP_MINLEN,
5208#ifdef __FreeBSD__
5209					    (caddr_t)
5210#endif
5211					    pd->hdr.icmp);
5212					m_copyback(m, ipoff2, sizeof(h2),
5213#ifdef __FreeBSD__
5214					    (caddr_t)
5215#endif
5216					    &h2);
5217					break;
5218#endif /* INET */
5219#ifdef INET6
5220				case AF_INET6:
5221					m_copyback(m, off,
5222					    sizeof(struct icmp6_hdr),
5223#ifdef __FreeBSD__
5224					    (caddr_t)
5225#endif
5226					    pd->hdr.icmp6);
5227					m_copyback(m, ipoff2, sizeof(h2_6),
5228#ifdef __FreeBSD__
5229					    (caddr_t)
5230#endif
5231					    &h2_6);
5232					break;
5233#endif /* INET6 */
5234				}
5235#ifdef __FreeBSD__
5236				m_copyback(m, off2, 8, (caddr_t)&th);
5237#else
5238				m_copyback(m, off2, 8, &th);
5239#endif
5240			}
5241
5242			return (PF_PASS);
5243			break;
5244		}
5245		case IPPROTO_UDP: {
5246			struct udphdr		uh;
5247
5248			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5249			    NULL, reason, pd2.af)) {
5250				DPFPRINTF(PF_DEBUG_MISC,
5251				    ("pf: ICMP error message too short "
5252				    "(udp)\n"));
5253				return (PF_DROP);
5254			}
5255
5256			key.af = pd2.af;
5257			key.proto = IPPROTO_UDP;
5258			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5259			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5260			key.port[pd2.sidx] = uh.uh_sport;
5261			key.port[pd2.didx] = uh.uh_dport;
5262
5263#ifdef __FreeBSD__
5264			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5265#else
5266			STATE_LOOKUP(kif, &key, direction, *state, m);
5267#endif
5268
5269			/* translate source/destination address, if necessary */
5270			if ((*state)->key[PF_SK_WIRE] !=
5271			    (*state)->key[PF_SK_STACK]) {
5272				struct pf_state_key *nk =
5273				    (*state)->key[pd->didx];
5274
5275				if (PF_ANEQ(pd2.src,
5276				    &nk->addr[pd2.sidx], pd2.af) ||
5277				    nk->port[pd2.sidx] != uh.uh_sport)
5278					pf_change_icmp(pd2.src, &uh.uh_sport,
5279					    daddr, &nk->addr[pd2.sidx],
5280					    nk->port[pd2.sidx], &uh.uh_sum,
5281					    pd2.ip_sum, icmpsum,
5282					    pd->ip_sum, 1, pd2.af);
5283
5284				if (PF_ANEQ(pd2.dst,
5285				    &nk->addr[pd2.didx], pd2.af) ||
5286				    nk->port[pd2.didx] != uh.uh_dport)
5287					pf_change_icmp(pd2.dst, &uh.uh_dport,
5288					    NULL, /* XXX Inbound NAT? */
5289					    &nk->addr[pd2.didx],
5290					    nk->port[pd2.didx], &uh.uh_sum,
5291					    pd2.ip_sum, icmpsum,
5292					    pd->ip_sum, 1, pd2.af);
5293
5294				switch (pd2.af) {
5295#ifdef INET
5296				case AF_INET:
5297					m_copyback(m, off, ICMP_MINLEN,
5298#ifdef __FreeBSD__
5299					    (caddr_t)
5300#endif
5301					    pd->hdr.icmp);
5302#ifdef __FreeBSD__
5303					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5304#else
5305					m_copyback(m, ipoff2, sizeof(h2), &h2);
5306#endif
5307					break;
5308#endif /* INET */
5309#ifdef INET6
5310				case AF_INET6:
5311					m_copyback(m, off,
5312					    sizeof(struct icmp6_hdr),
5313#ifdef __FreeBSD__
5314					    (caddr_t)
5315#endif
5316					    pd->hdr.icmp6);
5317					m_copyback(m, ipoff2, sizeof(h2_6),
5318#ifdef __FreeBSD__
5319					    (caddr_t)
5320#endif
5321					    &h2_6);
5322					break;
5323#endif /* INET6 */
5324				}
5325#ifdef __FreeBSD__
5326				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
5327#else
5328				m_copyback(m, off2, sizeof(uh), &uh);
5329#endif
5330			}
5331			return (PF_PASS);
5332			break;
5333		}
5334#ifdef INET
5335		case IPPROTO_ICMP: {
5336			struct icmp		iih;
5337
5338			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5339			    NULL, reason, pd2.af)) {
5340				DPFPRINTF(PF_DEBUG_MISC,
5341				    ("pf: ICMP error message too short i"
5342				    "(icmp)\n"));
5343				return (PF_DROP);
5344			}
5345
5346			key.af = pd2.af;
5347			key.proto = IPPROTO_ICMP;
5348			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5349			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5350			key.port[0] = key.port[1] = iih.icmp_id;
5351
5352#ifdef __FreeBSD__
5353			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5354#else
5355			STATE_LOOKUP(kif, &key, direction, *state, m);
5356#endif
5357
5358			/* translate source/destination address, if necessary */
5359			if ((*state)->key[PF_SK_WIRE] !=
5360			    (*state)->key[PF_SK_STACK]) {
5361				struct pf_state_key *nk =
5362				    (*state)->key[pd->didx];
5363
5364				if (PF_ANEQ(pd2.src,
5365				    &nk->addr[pd2.sidx], pd2.af) ||
5366				    nk->port[pd2.sidx] != iih.icmp_id)
5367					pf_change_icmp(pd2.src, &iih.icmp_id,
5368					    daddr, &nk->addr[pd2.sidx],
5369					    nk->port[pd2.sidx], NULL,
5370					    pd2.ip_sum, icmpsum,
5371					    pd->ip_sum, 0, AF_INET);
5372
5373				if (PF_ANEQ(pd2.dst,
5374				    &nk->addr[pd2.didx], pd2.af) ||
5375				    nk->port[pd2.didx] != iih.icmp_id)
5376					pf_change_icmp(pd2.dst, &iih.icmp_id,
5377					    NULL, /* XXX Inbound NAT? */
5378					    &nk->addr[pd2.didx],
5379					    nk->port[pd2.didx], NULL,
5380					    pd2.ip_sum, icmpsum,
5381					    pd->ip_sum, 0, AF_INET);
5382
5383#ifdef __FreeBSD__
5384				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
5385				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5386				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
5387#else
5388				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
5389				m_copyback(m, ipoff2, sizeof(h2), &h2);
5390				m_copyback(m, off2, ICMP_MINLEN, &iih);
5391#endif
5392			}
5393			return (PF_PASS);
5394			break;
5395		}
5396#endif /* INET */
5397#ifdef INET6
5398		case IPPROTO_ICMPV6: {
5399			struct icmp6_hdr	iih;
5400
5401			if (!pf_pull_hdr(m, off2, &iih,
5402			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5403				DPFPRINTF(PF_DEBUG_MISC,
5404				    ("pf: ICMP error message too short "
5405				    "(icmp6)\n"));
5406				return (PF_DROP);
5407			}
5408
5409			key.af = pd2.af;
5410			key.proto = IPPROTO_ICMPV6;
5411			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5412			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5413			key.port[0] = key.port[1] = iih.icmp6_id;
5414
5415#ifdef __FreeBSD__
5416			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5417#else
5418			STATE_LOOKUP(kif, &key, direction, *state, m);
5419#endif
5420
5421			/* translate source/destination address, if necessary */
5422			if ((*state)->key[PF_SK_WIRE] !=
5423			    (*state)->key[PF_SK_STACK]) {
5424				struct pf_state_key *nk =
5425				    (*state)->key[pd->didx];
5426
5427				if (PF_ANEQ(pd2.src,
5428				    &nk->addr[pd2.sidx], pd2.af) ||
5429				    nk->port[pd2.sidx] != iih.icmp6_id)
5430					pf_change_icmp(pd2.src, &iih.icmp6_id,
5431					    daddr, &nk->addr[pd2.sidx],
5432					    nk->port[pd2.sidx], NULL,
5433					    pd2.ip_sum, icmpsum,
5434					    pd->ip_sum, 0, AF_INET6);
5435
5436				if (PF_ANEQ(pd2.dst,
5437				    &nk->addr[pd2.didx], pd2.af) ||
5438				    nk->port[pd2.didx] != iih.icmp6_id)
5439					pf_change_icmp(pd2.dst, &iih.icmp6_id,
5440					    NULL, /* XXX Inbound NAT? */
5441					    &nk->addr[pd2.didx],
5442					    nk->port[pd2.didx], NULL,
5443					    pd2.ip_sum, icmpsum,
5444					    pd->ip_sum, 0, AF_INET6);
5445
5446#ifdef __FreeBSD__
5447				m_copyback(m, off, sizeof(struct icmp6_hdr),
5448				    (caddr_t)pd->hdr.icmp6);
5449				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
5450				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5451				    (caddr_t)&iih);
5452#else
5453				m_copyback(m, off, sizeof(struct icmp6_hdr),
5454				    pd->hdr.icmp6);
5455				m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
5456				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5457				    &iih);
5458#endif
5459			}
5460			return (PF_PASS);
5461			break;
5462		}
5463#endif /* INET6 */
5464		default: {
5465			key.af = pd2.af;
5466			key.proto = pd2.proto;
5467			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5468			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5469			key.port[0] = key.port[1] = 0;
5470
5471#ifdef __FreeBSD__
5472			STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5473#else
5474			STATE_LOOKUP(kif, &key, direction, *state, m);
5475#endif
5476
5477			/* translate source/destination address, if necessary */
5478			if ((*state)->key[PF_SK_WIRE] !=
5479			    (*state)->key[PF_SK_STACK]) {
5480				struct pf_state_key *nk =
5481				    (*state)->key[pd->didx];
5482
5483				if (PF_ANEQ(pd2.src,
5484				    &nk->addr[pd2.sidx], pd2.af))
5485					pf_change_icmp(pd2.src, NULL, daddr,
5486					    &nk->addr[pd2.sidx], 0, NULL,
5487					    pd2.ip_sum, icmpsum,
5488					    pd->ip_sum, 0, pd2.af);
5489
5490				if (PF_ANEQ(pd2.dst,
5491				    &nk->addr[pd2.didx], pd2.af))
5492					pf_change_icmp(pd2.src, NULL,
5493					    NULL, /* XXX Inbound NAT? */
5494					    &nk->addr[pd2.didx], 0, NULL,
5495					    pd2.ip_sum, icmpsum,
5496					    pd->ip_sum, 0, pd2.af);
5497
5498				switch (pd2.af) {
5499#ifdef INET
5500				case AF_INET:
5501#ifdef __FreeBSD__
5502					m_copyback(m, off, ICMP_MINLEN,
5503					    (caddr_t)pd->hdr.icmp);
5504					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5505#else
5506					m_copyback(m, off, ICMP_MINLEN,
5507					    pd->hdr.icmp);
5508					m_copyback(m, ipoff2, sizeof(h2), &h2);
5509#endif
5510					break;
5511#endif /* INET */
5512#ifdef INET6
5513				case AF_INET6:
5514					m_copyback(m, off,
5515					    sizeof(struct icmp6_hdr),
5516#ifdef __FreeBSD__
5517					    (caddr_t)
5518#endif
5519					    pd->hdr.icmp6);
5520					m_copyback(m, ipoff2, sizeof(h2_6),
5521#ifdef __FreeBSD__
5522					    (caddr_t)
5523#endif
5524					    &h2_6);
5525					break;
5526#endif /* INET6 */
5527				}
5528			}
5529			return (PF_PASS);
5530			break;
5531		}
5532		}
5533	}
5534}
5535
5536int
5537pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5538    struct mbuf *m, struct pf_pdesc *pd)
5539{
5540	struct pf_state_peer	*src, *dst;
5541	struct pf_state_key_cmp	 key;
5542
5543	key.af = pd->af;
5544	key.proto = pd->proto;
5545	if (direction == PF_IN)	{
5546		PF_ACPY(&key.addr[0], pd->src, key.af);
5547		PF_ACPY(&key.addr[1], pd->dst, key.af);
5548		key.port[0] = key.port[1] = 0;
5549	} else {
5550		PF_ACPY(&key.addr[1], pd->src, key.af);
5551		PF_ACPY(&key.addr[0], pd->dst, key.af);
5552		key.port[1] = key.port[0] = 0;
5553	}
5554
5555#ifdef __FreeBSD__
5556	STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
5557#else
5558	STATE_LOOKUP(kif, &key, direction, *state, m);
5559#endif
5560
5561	if (direction == (*state)->direction) {
5562		src = &(*state)->src;
5563		dst = &(*state)->dst;
5564	} else {
5565		src = &(*state)->dst;
5566		dst = &(*state)->src;
5567	}
5568
5569	/* update states */
5570	if (src->state < PFOTHERS_SINGLE)
5571		src->state = PFOTHERS_SINGLE;
5572	if (dst->state == PFOTHERS_SINGLE)
5573		dst->state = PFOTHERS_MULTIPLE;
5574
5575	/* update expire time */
5576	(*state)->expire = time_second;
5577	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5578		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5579	else
5580		(*state)->timeout = PFTM_OTHER_SINGLE;
5581
5582	/* translate source/destination address, if necessary */
5583	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5584		struct pf_state_key *nk = (*state)->key[pd->didx];
5585
5586#ifdef __FreeBSD__
5587		KASSERT(nk, ("%s: nk is null", __FUNCTION__));
5588		KASSERT(pd, ("%s: pd is null", __FUNCTION__));
5589		KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__));
5590		KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__));
5591#else
5592		KASSERT(nk);
5593		KASSERT(pd);
5594		KASSERT(pd->src);
5595		KASSERT(pd->dst);
5596#endif
5597		switch (pd->af) {
5598#ifdef INET
5599		case AF_INET:
5600			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5601				pf_change_a(&pd->src->v4.s_addr,
5602				    pd->ip_sum,
5603				    nk->addr[pd->sidx].v4.s_addr,
5604				    0);
5605
5606
5607			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5608				pf_change_a(&pd->dst->v4.s_addr,
5609				    pd->ip_sum,
5610				    nk->addr[pd->didx].v4.s_addr,
5611				    0);
5612
5613				break;
5614#endif /* INET */
5615#ifdef INET6
5616		case AF_INET6:
5617			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5618				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
5619
5620			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5621				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
5622#endif /* INET6 */
5623		}
5624	}
5625	return (PF_PASS);
5626}
5627
5628/*
5629 * ipoff and off are measured from the start of the mbuf chain.
5630 * h must be at "ipoff" on the mbuf chain.
5631 */
5632void *
5633pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5634    u_short *actionp, u_short *reasonp, sa_family_t af)
5635{
5636	switch (af) {
5637#ifdef INET
5638	case AF_INET: {
5639		struct ip	*h = mtod(m, struct ip *);
5640		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5641
5642		if (fragoff) {
5643			if (fragoff >= len)
5644				ACTION_SET(actionp, PF_PASS);
5645			else {
5646				ACTION_SET(actionp, PF_DROP);
5647				REASON_SET(reasonp, PFRES_FRAG);
5648			}
5649			return (NULL);
5650		}
5651		if (m->m_pkthdr.len < off + len ||
5652		    ntohs(h->ip_len) < off + len) {
5653			ACTION_SET(actionp, PF_DROP);
5654			REASON_SET(reasonp, PFRES_SHORT);
5655			return (NULL);
5656		}
5657		break;
5658	}
5659#endif /* INET */
5660#ifdef INET6
5661	case AF_INET6: {
5662		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5663
5664		if (m->m_pkthdr.len < off + len ||
5665		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5666		    (unsigned)(off + len)) {
5667			ACTION_SET(actionp, PF_DROP);
5668			REASON_SET(reasonp, PFRES_SHORT);
5669			return (NULL);
5670		}
5671		break;
5672	}
5673#endif /* INET6 */
5674	}
5675	m_copydata(m, off, len, p);
5676	return (p);
5677}
5678
5679int
5680pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5681{
5682#ifdef __FreeBSD__
5683#ifdef RADIX_MPATH
5684	struct radix_node_head	*rnh;
5685#endif
5686#endif
5687	struct sockaddr_in	*dst;
5688	int			 ret = 1;
5689	int			 check_mpath;
5690#ifndef __FreeBSD__
5691	extern int		 ipmultipath;
5692#endif
5693#ifdef INET6
5694#ifndef __FreeBSD__
5695	extern int		 ip6_multipath;
5696#endif
5697	struct sockaddr_in6	*dst6;
5698	struct route_in6	 ro;
5699#else
5700	struct route		 ro;
5701#endif
5702	struct radix_node	*rn;
5703	struct rtentry		*rt;
5704	struct ifnet		*ifp;
5705
5706	check_mpath = 0;
5707#ifdef __FreeBSD__
5708#ifdef RADIX_MPATH
5709	/* XXX: stick to table 0 for now */
5710	rnh = rt_tables_get_rnh(0, af);
5711	if (rnh != NULL && rn_mpath_capable(rnh))
5712		check_mpath = 1;
5713#endif
5714#endif
5715	bzero(&ro, sizeof(ro));
5716	switch (af) {
5717	case AF_INET:
5718		dst = satosin(&ro.ro_dst);
5719		dst->sin_family = AF_INET;
5720		dst->sin_len = sizeof(*dst);
5721		dst->sin_addr = addr->v4;
5722#ifndef __FreeBSD__
5723		if (ipmultipath)
5724			check_mpath = 1;
5725#endif
5726		break;
5727#ifdef INET6
5728	case AF_INET6:
5729		/*
5730		 * Skip check for addresses with embedded interface scope,
5731		 * as they would always match anyway.
5732		 */
5733		if (IN6_IS_SCOPE_EMBED(&addr->v6))
5734			goto out;
5735		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5736		dst6->sin6_family = AF_INET6;
5737		dst6->sin6_len = sizeof(*dst6);
5738		dst6->sin6_addr = addr->v6;
5739#ifndef __FreeBSD__
5740		if (ip6_multipath)
5741			check_mpath = 1;
5742#endif
5743		break;
5744#endif /* INET6 */
5745	default:
5746		return (0);
5747	}
5748
5749	/* Skip checks for ipsec interfaces */
5750	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5751		goto out;
5752
5753#ifdef __FreeBSD__
5754/* XXX MRT not always INET */ /* stick with table 0 though */
5755#ifdef INET
5756	if (af == AF_INET)
5757		in_rtalloc_ign((struct route *)&ro, 0, 0);
5758	else
5759#endif
5760		rtalloc_ign((struct route *)&ro, 0);
5761#else /* ! __FreeBSD__ */
5762	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5763#endif
5764
5765	if (ro.ro_rt != NULL) {
5766		/* No interface given, this is a no-route check */
5767		if (kif == NULL)
5768			goto out;
5769
5770		if (kif->pfik_ifp == NULL) {
5771			ret = 0;
5772			goto out;
5773		}
5774
5775		/* Perform uRPF check if passed input interface */
5776		ret = 0;
5777		rn = (struct radix_node *)ro.ro_rt;
5778		do {
5779			rt = (struct rtentry *)rn;
5780#ifndef __FreeBSD__ /* CARPDEV */
5781			if (rt->rt_ifp->if_type == IFT_CARP)
5782				ifp = rt->rt_ifp->if_carpdev;
5783			else
5784#endif
5785				ifp = rt->rt_ifp;
5786
5787			if (kif->pfik_ifp == ifp)
5788				ret = 1;
5789#ifdef __FreeBSD__
5790#ifdef RADIX_MPATH
5791			rn = rn_mpath_next(rn);
5792#endif
5793#else
5794			rn = rn_mpath_next(rn, 0);
5795#endif
5796		} while (check_mpath == 1 && rn != NULL && ret == 0);
5797	} else
5798		ret = 0;
5799out:
5800	if (ro.ro_rt != NULL)
5801		RTFREE(ro.ro_rt);
5802	return (ret);
5803}
5804
5805int
5806pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5807{
5808	struct sockaddr_in	*dst;
5809#ifdef INET6
5810	struct sockaddr_in6	*dst6;
5811	struct route_in6	 ro;
5812#else
5813	struct route		 ro;
5814#endif
5815	int			 ret = 0;
5816
5817	bzero(&ro, sizeof(ro));
5818	switch (af) {
5819	case AF_INET:
5820		dst = satosin(&ro.ro_dst);
5821		dst->sin_family = AF_INET;
5822		dst->sin_len = sizeof(*dst);
5823		dst->sin_addr = addr->v4;
5824		break;
5825#ifdef INET6
5826	case AF_INET6:
5827		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5828		dst6->sin6_family = AF_INET6;
5829		dst6->sin6_len = sizeof(*dst6);
5830		dst6->sin6_addr = addr->v6;
5831		break;
5832#endif /* INET6 */
5833	default:
5834		return (0);
5835	}
5836
5837#ifdef __FreeBSD__
5838# ifdef RTF_PRCLONING
5839	rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
5840# else /* !RTF_PRCLONING */
5841#ifdef INET
5842	if (af == AF_INET)
5843		in_rtalloc_ign((struct route *)&ro, 0, 0);
5844	else
5845#endif
5846		rtalloc_ign((struct route *)&ro, 0);
5847# endif
5848#else /* ! __FreeBSD__ */
5849	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5850#endif
5851
5852	if (ro.ro_rt != NULL) {
5853#ifdef __FreeBSD__
5854		/* XXX_IMPORT: later */
5855#else
5856		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
5857			ret = 1;
5858#endif
5859		RTFREE(ro.ro_rt);
5860	}
5861
5862	return (ret);
5863}
5864
5865#ifdef INET
5866void
5867pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5868    struct pf_state *s, struct pf_pdesc *pd)
5869{
5870	struct mbuf		*m0, *m1;
5871	struct route		 iproute;
5872	struct route		*ro = NULL;
5873	struct sockaddr_in	*dst;
5874	struct ip		*ip;
5875	struct ifnet		*ifp = NULL;
5876	struct pf_addr		 naddr;
5877	struct pf_src_node	*sn = NULL;
5878	int			 error = 0;
5879#ifdef __FreeBSD__
5880	int sw_csum;
5881#endif
5882#ifdef IPSEC
5883	struct m_tag		*mtag;
5884#endif /* IPSEC */
5885
5886	if (m == NULL || *m == NULL || r == NULL ||
5887	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5888		panic("pf_route: invalid parameters");
5889
5890#ifdef __FreeBSD__
5891	if (pd->pf_mtag->routed++ > 3) {
5892#else
5893	if ((*m)->m_pkthdr.pf.routed++ > 3) {
5894#endif
5895		m0 = *m;
5896		*m = NULL;
5897		goto bad;
5898	}
5899
5900	if (r->rt == PF_DUPTO) {
5901#ifdef __FreeBSD__
5902		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5903#else
5904		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5905#endif
5906			return;
5907	} else {
5908		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5909			return;
5910		m0 = *m;
5911	}
5912
5913	if (m0->m_len < sizeof(struct ip)) {
5914		DPFPRINTF(PF_DEBUG_URGENT,
5915		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5916		goto bad;
5917	}
5918
5919	ip = mtod(m0, struct ip *);
5920
5921	ro = &iproute;
5922	bzero((caddr_t)ro, sizeof(*ro));
5923	dst = satosin(&ro->ro_dst);
5924	dst->sin_family = AF_INET;
5925	dst->sin_len = sizeof(*dst);
5926	dst->sin_addr = ip->ip_dst;
5927
5928	if (r->rt == PF_FASTROUTE) {
5929#ifdef __FreeBSD__
5930		in_rtalloc(ro, 0);
5931#else
5932		rtalloc(ro);
5933#endif
5934		if (ro->ro_rt == 0) {
5935#ifdef __FreeBSD__
5936			KMOD_IPSTAT_INC(ips_noroute);
5937#else
5938			ipstat.ips_noroute++;
5939#endif
5940			goto bad;
5941		}
5942
5943		ifp = ro->ro_rt->rt_ifp;
5944		ro->ro_rt->rt_use++;
5945
5946		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5947			dst = satosin(ro->ro_rt->rt_gateway);
5948	} else {
5949		if (TAILQ_EMPTY(&r->rpool.list)) {
5950			DPFPRINTF(PF_DEBUG_URGENT,
5951			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
5952			goto bad;
5953		}
5954		if (s == NULL) {
5955			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5956			    &naddr, NULL, &sn);
5957			if (!PF_AZERO(&naddr, AF_INET))
5958				dst->sin_addr.s_addr = naddr.v4.s_addr;
5959			ifp = r->rpool.cur->kif ?
5960			    r->rpool.cur->kif->pfik_ifp : NULL;
5961		} else {
5962			if (!PF_AZERO(&s->rt_addr, AF_INET))
5963				dst->sin_addr.s_addr =
5964				    s->rt_addr.v4.s_addr;
5965			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5966		}
5967	}
5968	if (ifp == NULL)
5969		goto bad;
5970
5971	if (oifp != ifp) {
5972#ifdef __FreeBSD__
5973		PF_UNLOCK();
5974		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5975			PF_LOCK();
5976			goto bad;
5977		} else if (m0 == NULL) {
5978			PF_LOCK();
5979			goto done;
5980		}
5981		PF_LOCK();
5982#else
5983		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5984			goto bad;
5985		else if (m0 == NULL)
5986			goto done;
5987#endif
5988		if (m0->m_len < sizeof(struct ip)) {
5989			DPFPRINTF(PF_DEBUG_URGENT,
5990			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5991			goto bad;
5992		}
5993		ip = mtod(m0, struct ip *);
5994	}
5995
5996#ifdef __FreeBSD__
5997	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5998	m0->m_pkthdr.csum_flags |= CSUM_IP;
5999	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
6000	if (sw_csum & CSUM_DELAY_DATA) {
6001		/*
6002		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
6003		 */
6004		NTOHS(ip->ip_len);
6005		NTOHS(ip->ip_off);	/* XXX: needed? */
6006		in_delayed_cksum(m0);
6007		HTONS(ip->ip_len);
6008		HTONS(ip->ip_off);
6009		sw_csum &= ~CSUM_DELAY_DATA;
6010	}
6011	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
6012
6013	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
6014	    (ifp->if_hwassist & CSUM_FRAGMENT &&
6015	    ((ip->ip_off & htons(IP_DF)) == 0))) {
6016		/*
6017		 * ip->ip_len = htons(ip->ip_len);
6018		 * ip->ip_off = htons(ip->ip_off);
6019		 */
6020		ip->ip_sum = 0;
6021		if (sw_csum & CSUM_DELAY_IP) {
6022			/* From KAME */
6023			if (ip->ip_v == IPVERSION &&
6024			    (ip->ip_hl << 2) == sizeof(*ip)) {
6025				ip->ip_sum = in_cksum_hdr(ip);
6026			} else {
6027				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6028			}
6029		}
6030		PF_UNLOCK();
6031		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro);
6032		PF_LOCK();
6033		goto done;
6034	}
6035#else
6036	/* Copied from ip_output. */
6037#ifdef IPSEC
6038	/*
6039	 * If deferred crypto processing is needed, check that the
6040	 * interface supports it.
6041	 */
6042	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
6043	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
6044		/* Notify IPsec to do its own crypto. */
6045		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
6046		goto bad;
6047	}
6048#endif /* IPSEC */
6049
6050	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
6051	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
6052		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
6053		    ifp->if_bridge != NULL) {
6054			in_delayed_cksum(m0);
6055			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */
6056		}
6057	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
6058		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
6059		    ifp->if_bridge != NULL) {
6060			in_delayed_cksum(m0);
6061			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */
6062		}
6063	}
6064
6065	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
6066		ip->ip_sum = 0;
6067		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
6068		    ifp->if_bridge == NULL) {
6069			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
6070#ifdef __FreeBSD__
6071			KMOD_IPSTAT_INC(ips_outhwcsum);
6072#else
6073			ipstat.ips_outhwcsum++;
6074#endif
6075		} else
6076			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6077		/* Update relevant hardware checksum stats for TCP/UDP */
6078		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
6079			KMOD_TCPSTAT_INC(tcps_outhwcsum);
6080		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
6081			KMOD_UDPSTAT_INC(udps_outhwcsum);
6082		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
6083		goto done;
6084	}
6085#endif
6086
6087	/*
6088	 * Too large for interface; fragment if possible.
6089	 * Must be able to put at least 8 bytes per fragment.
6090	 */
6091	if (ip->ip_off & htons(IP_DF)) {
6092#ifdef __FreeBSD__
6093		KMOD_IPSTAT_INC(ips_cantfrag);
6094#else
6095		ipstat.ips_cantfrag++;
6096#endif
6097		if (r->rt != PF_DUPTO) {
6098#ifdef __FreeBSD__
6099			/* icmp_error() expects host byte ordering */
6100			NTOHS(ip->ip_len);
6101			NTOHS(ip->ip_off);
6102			PF_UNLOCK();
6103			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6104			    ifp->if_mtu);
6105			PF_LOCK();
6106#else
6107			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6108			    ifp->if_mtu);
6109#endif
6110			goto done;
6111		} else
6112			goto bad;
6113	}
6114
6115	m1 = m0;
6116#ifdef __FreeBSD__
6117	/*
6118	 * XXX: is cheaper + less error prone than own function
6119	 */
6120	NTOHS(ip->ip_len);
6121	NTOHS(ip->ip_off);
6122	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
6123#else
6124	error = ip_fragment(m0, ifp, ifp->if_mtu);
6125#endif
6126	if (error) {
6127#ifndef __FreeBSD__    /* ip_fragment does not do m_freem() on FreeBSD */
6128		m0 = NULL;
6129#endif
6130		goto bad;
6131	}
6132
6133	for (m0 = m1; m0; m0 = m1) {
6134		m1 = m0->m_nextpkt;
6135		m0->m_nextpkt = 0;
6136#ifdef __FreeBSD__
6137		if (error == 0) {
6138			PF_UNLOCK();
6139			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6140			    NULL);
6141			PF_LOCK();
6142		} else
6143#else
6144		if (error == 0)
6145			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6146			    NULL);
6147		else
6148#endif
6149			m_freem(m0);
6150	}
6151
6152	if (error == 0)
6153#ifdef __FreeBSD__
6154		KMOD_IPSTAT_INC(ips_fragmented);
6155#else
6156		ipstat.ips_fragmented++;
6157#endif
6158
6159done:
6160	if (r->rt != PF_DUPTO)
6161		*m = NULL;
6162	if (ro == &iproute && ro->ro_rt)
6163		RTFREE(ro->ro_rt);
6164	return;
6165
6166bad:
6167	m_freem(m0);
6168	goto done;
6169}
6170#endif /* INET */
6171
6172#ifdef INET6
6173void
6174pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
6175    struct pf_state *s, struct pf_pdesc *pd)
6176{
6177	struct mbuf		*m0;
6178	struct route_in6	 ip6route;
6179	struct route_in6	*ro;
6180	struct sockaddr_in6	*dst;
6181	struct ip6_hdr		*ip6;
6182	struct ifnet		*ifp = NULL;
6183	struct pf_addr		 naddr;
6184	struct pf_src_node	*sn = NULL;
6185
6186	if (m == NULL || *m == NULL || r == NULL ||
6187	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
6188		panic("pf_route6: invalid parameters");
6189
6190#ifdef __FreeBSD__
6191	if (pd->pf_mtag->routed++ > 3) {
6192#else
6193	if ((*m)->m_pkthdr.pf.routed++ > 3) {
6194#endif
6195		m0 = *m;
6196		*m = NULL;
6197		goto bad;
6198	}
6199
6200	if (r->rt == PF_DUPTO) {
6201#ifdef __FreeBSD__
6202		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
6203#else
6204		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
6205#endif
6206			return;
6207	} else {
6208		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
6209			return;
6210		m0 = *m;
6211	}
6212
6213	if (m0->m_len < sizeof(struct ip6_hdr)) {
6214		DPFPRINTF(PF_DEBUG_URGENT,
6215		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6216		goto bad;
6217	}
6218	ip6 = mtod(m0, struct ip6_hdr *);
6219
6220	ro = &ip6route;
6221	bzero((caddr_t)ro, sizeof(*ro));
6222	dst = (struct sockaddr_in6 *)&ro->ro_dst;
6223	dst->sin6_family = AF_INET6;
6224	dst->sin6_len = sizeof(*dst);
6225	dst->sin6_addr = ip6->ip6_dst;
6226
6227	/* Cheat. XXX why only in the v6 case??? */
6228	if (r->rt == PF_FASTROUTE) {
6229#ifdef __FreeBSD__
6230		m0->m_flags |= M_SKIP_FIREWALL;
6231		PF_UNLOCK();
6232		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
6233#else
6234		m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
6235		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
6236#endif
6237		return;
6238	}
6239
6240	if (TAILQ_EMPTY(&r->rpool.list)) {
6241		DPFPRINTF(PF_DEBUG_URGENT,
6242		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
6243		goto bad;
6244	}
6245	if (s == NULL) {
6246		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
6247		    &naddr, NULL, &sn);
6248		if (!PF_AZERO(&naddr, AF_INET6))
6249			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6250			    &naddr, AF_INET6);
6251		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
6252	} else {
6253		if (!PF_AZERO(&s->rt_addr, AF_INET6))
6254			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6255			    &s->rt_addr, AF_INET6);
6256		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6257	}
6258	if (ifp == NULL)
6259		goto bad;
6260
6261	if (oifp != ifp) {
6262#ifdef __FreeBSD__
6263		PF_UNLOCK();
6264		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6265			PF_LOCK();
6266			goto bad;
6267		} else if (m0 == NULL) {
6268			PF_LOCK();
6269			goto done;
6270		}
6271		PF_LOCK();
6272#else
6273		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
6274			goto bad;
6275		else if (m0 == NULL)
6276			goto done;
6277#endif
6278		if (m0->m_len < sizeof(struct ip6_hdr)) {
6279			DPFPRINTF(PF_DEBUG_URGENT,
6280			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6281			goto bad;
6282		}
6283		ip6 = mtod(m0, struct ip6_hdr *);
6284	}
6285
6286	/*
6287	 * If the packet is too large for the outgoing interface,
6288	 * send back an icmp6 error.
6289	 */
6290	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
6291		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
6292	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
6293#ifdef __FreeBSD__
6294		PF_UNLOCK();
6295#endif
6296		nd6_output(ifp, ifp, m0, dst, NULL);
6297#ifdef __FreeBSD__
6298		PF_LOCK();
6299#endif
6300	} else {
6301		in6_ifstat_inc(ifp, ifs6_in_toobig);
6302#ifdef __FreeBSD__
6303		if (r->rt != PF_DUPTO) {
6304			PF_UNLOCK();
6305			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6306			PF_LOCK();
6307		} else
6308#else
6309		if (r->rt != PF_DUPTO)
6310			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6311		else
6312#endif
6313			goto bad;
6314	}
6315
6316done:
6317	if (r->rt != PF_DUPTO)
6318		*m = NULL;
6319	return;
6320
6321bad:
6322	m_freem(m0);
6323	goto done;
6324}
6325#endif /* INET6 */
6326
6327#ifdef __FreeBSD__
6328/*
6329 * FreeBSD supports cksum offloads for the following drivers.
6330 *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
6331 *   ti(4), txp(4), xl(4)
6332 *
6333 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
6334 *  network driver performed cksum including pseudo header, need to verify
6335 *   csum_data
6336 * CSUM_DATA_VALID :
6337 *  network driver performed cksum, needs to additional pseudo header
6338 *  cksum computation with partial csum_data(i.e. lack of H/W support for
6339 *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
6340 *
6341 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
6342 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
6343 * TCP/UDP layer.
6344 * Also, set csum_data to 0xffff to force cksum validation.
6345 */
6346int
6347pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
6348{
6349	u_int16_t sum = 0;
6350	int hw_assist = 0;
6351	struct ip *ip;
6352
6353	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6354		return (1);
6355	if (m->m_pkthdr.len < off + len)
6356		return (1);
6357
6358	switch (p) {
6359	case IPPROTO_TCP:
6360		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6361			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6362				sum = m->m_pkthdr.csum_data;
6363			} else {
6364				ip = mtod(m, struct ip *);
6365				sum = in_pseudo(ip->ip_src.s_addr,
6366				ip->ip_dst.s_addr, htonl((u_short)len +
6367				m->m_pkthdr.csum_data + IPPROTO_TCP));
6368			}
6369			sum ^= 0xffff;
6370			++hw_assist;
6371		}
6372		break;
6373	case IPPROTO_UDP:
6374		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6375			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6376				sum = m->m_pkthdr.csum_data;
6377			} else {
6378				ip = mtod(m, struct ip *);
6379				sum = in_pseudo(ip->ip_src.s_addr,
6380				ip->ip_dst.s_addr, htonl((u_short)len +
6381				m->m_pkthdr.csum_data + IPPROTO_UDP));
6382			}
6383			sum ^= 0xffff;
6384			++hw_assist;
6385		}
6386		break;
6387	case IPPROTO_ICMP:
6388#ifdef INET6
6389	case IPPROTO_ICMPV6:
6390#endif /* INET6 */
6391		break;
6392	default:
6393		return (1);
6394	}
6395
6396	if (!hw_assist) {
6397		switch (af) {
6398		case AF_INET:
6399			if (p == IPPROTO_ICMP) {
6400				if (m->m_len < off)
6401					return (1);
6402				m->m_data += off;
6403				m->m_len -= off;
6404				sum = in_cksum(m, len);
6405				m->m_data -= off;
6406				m->m_len += off;
6407			} else {
6408				if (m->m_len < sizeof(struct ip))
6409					return (1);
6410				sum = in4_cksum(m, p, off, len);
6411			}
6412			break;
6413#ifdef INET6
6414		case AF_INET6:
6415			if (m->m_len < sizeof(struct ip6_hdr))
6416				return (1);
6417			sum = in6_cksum(m, p, off, len);
6418			break;
6419#endif /* INET6 */
6420		default:
6421			return (1);
6422		}
6423	}
6424	if (sum) {
6425		switch (p) {
6426		case IPPROTO_TCP:
6427		    {
6428			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
6429			break;
6430		    }
6431		case IPPROTO_UDP:
6432		    {
6433			KMOD_UDPSTAT_INC(udps_badsum);
6434			break;
6435		    }
6436#ifdef INET
6437		case IPPROTO_ICMP:
6438		    {
6439			KMOD_ICMPSTAT_INC(icps_checksum);
6440			break;
6441		    }
6442#endif
6443#ifdef INET6
6444		case IPPROTO_ICMPV6:
6445		    {
6446			KMOD_ICMP6STAT_INC(icp6s_checksum);
6447			break;
6448		    }
6449#endif /* INET6 */
6450		}
6451		return (1);
6452	} else {
6453		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
6454			m->m_pkthdr.csum_flags |=
6455			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
6456			m->m_pkthdr.csum_data = 0xffff;
6457		}
6458	}
6459	return (0);
6460}
6461#else /* !__FreeBSD__ */
6462
6463/*
6464 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
6465 *   off is the offset where the protocol header starts
6466 *   len is the total length of protocol header plus payload
6467 * returns 0 when the checksum is valid, otherwise returns 1.
6468 */
6469int
6470pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
6471    sa_family_t af)
6472{
6473	u_int16_t flag_ok, flag_bad;
6474	u_int16_t sum;
6475
6476	switch (p) {
6477	case IPPROTO_TCP:
6478		flag_ok = M_TCP_CSUM_IN_OK;
6479		flag_bad = M_TCP_CSUM_IN_BAD;
6480		break;
6481	case IPPROTO_UDP:
6482		flag_ok = M_UDP_CSUM_IN_OK;
6483		flag_bad = M_UDP_CSUM_IN_BAD;
6484		break;
6485	case IPPROTO_ICMP:
6486#ifdef INET6
6487	case IPPROTO_ICMPV6:
6488#endif /* INET6 */
6489		flag_ok = flag_bad = 0;
6490		break;
6491	default:
6492		return (1);
6493	}
6494	if (m->m_pkthdr.csum_flags & flag_ok)
6495		return (0);
6496	if (m->m_pkthdr.csum_flags & flag_bad)
6497		return (1);
6498	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6499		return (1);
6500	if (m->m_pkthdr.len < off + len)
6501		return (1);
6502	switch (af) {
6503#ifdef INET
6504	case AF_INET:
6505		if (p == IPPROTO_ICMP) {
6506			if (m->m_len < off)
6507				return (1);
6508			m->m_data += off;
6509			m->m_len -= off;
6510			sum = in_cksum(m, len);
6511			m->m_data -= off;
6512			m->m_len += off;
6513		} else {
6514			if (m->m_len < sizeof(struct ip))
6515				return (1);
6516			sum = in4_cksum(m, p, off, len);
6517		}
6518		break;
6519#endif /* INET */
6520#ifdef INET6
6521	case AF_INET6:
6522		if (m->m_len < sizeof(struct ip6_hdr))
6523			return (1);
6524		sum = in6_cksum(m, p, off, len);
6525		break;
6526#endif /* INET6 */
6527	default:
6528		return (1);
6529	}
6530	if (sum) {
6531		m->m_pkthdr.csum_flags |= flag_bad;
6532		switch (p) {
6533		case IPPROTO_TCP:
6534			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
6535			break;
6536		case IPPROTO_UDP:
6537			KMOD_UDPSTAT_INC(udps_badsum);
6538			break;
6539#ifdef INET
6540		case IPPROTO_ICMP:
6541			KMOD_ICMPSTAT_INC(icps_checksum);
6542			break;
6543#endif
6544#ifdef INET6
6545		case IPPROTO_ICMPV6:
6546			KMOD_ICMP6STAT_INC(icp6s_checksum);
6547			break;
6548#endif /* INET6 */
6549		}
6550		return (1);
6551	}
6552	m->m_pkthdr.csum_flags |= flag_ok;
6553	return (0);
6554}
6555#endif
6556
6557#ifndef __FreeBSD__
6558struct pf_divert *
6559pf_find_divert(struct mbuf *m)
6560{
6561	struct m_tag    *mtag;
6562
6563	if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
6564		return (NULL);
6565
6566	return ((struct pf_divert *)(mtag + 1));
6567}
6568
6569struct pf_divert *
6570pf_get_divert(struct mbuf *m)
6571{
6572	struct m_tag    *mtag;
6573
6574	if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
6575		mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
6576		    M_NOWAIT);
6577		if (mtag == NULL)
6578			return (NULL);
6579		bzero(mtag + 1, sizeof(struct pf_divert));
6580		m_tag_prepend(m, mtag);
6581	}
6582
6583	return ((struct pf_divert *)(mtag + 1));
6584}
6585#endif
6586
6587#ifdef INET
6588int
6589#ifdef __FreeBSD__
6590pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6591    struct ether_header *eh, struct inpcb *inp)
6592#else
6593pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6594    struct ether_header *eh)
6595#endif
6596{
6597	struct pfi_kif		*kif;
6598	u_short			 action, reason = 0, log = 0;
6599	struct mbuf		*m = *m0;
6600#ifdef __FreeBSD__
6601	struct ip		*h = NULL;
6602	struct m_tag		*ipfwtag;
6603	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
6604#else
6605	struct ip		*h;
6606	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6607#endif
6608	struct pf_state		*s = NULL;
6609	struct pf_ruleset	*ruleset = NULL;
6610	struct pf_pdesc		 pd;
6611	int			 off, dirndx, pqid = 0;
6612
6613#ifdef __FreeBSD__
6614	PF_LOCK();
6615	if (!V_pf_status.running)
6616	{
6617		PF_UNLOCK();
6618		return (PF_PASS);
6619	}
6620#else
6621	if (!pf_status.running)
6622		return (PF_PASS);
6623#endif
6624
6625	memset(&pd, 0, sizeof(pd));
6626#ifdef __FreeBSD__
6627	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
6628		PF_UNLOCK();
6629		DPFPRINTF(PF_DEBUG_URGENT,
6630		    ("pf_test: pf_get_mtag returned NULL\n"));
6631		return (PF_DROP);
6632	}
6633#endif
6634#ifndef __FreeBSD__
6635	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6636		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
6637	else
6638#endif
6639		kif = (struct pfi_kif *)ifp->if_pf_kif;
6640
6641	if (kif == NULL) {
6642#ifdef __FreeBSD__
6643		PF_UNLOCK();
6644#endif
6645		DPFPRINTF(PF_DEBUG_URGENT,
6646		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6647		return (PF_DROP);
6648	}
6649	if (kif->pfik_flags & PFI_IFLAG_SKIP)
6650#ifdef __FreeBSD__
6651	{
6652		PF_UNLOCK();
6653#endif
6654		return (PF_PASS);
6655#ifdef __FreeBSD__
6656	}
6657#endif
6658
6659#ifdef __FreeBSD__
6660	M_ASSERTPKTHDR(m);
6661#else
6662#ifdef DIAGNOSTIC
6663	if ((m->m_flags & M_PKTHDR) == 0)
6664		panic("non-M_PKTHDR is passed to pf_test");
6665#endif /* DIAGNOSTIC */
6666#endif
6667
6668	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6669		action = PF_DROP;
6670		REASON_SET(&reason, PFRES_SHORT);
6671		log = 1;
6672		goto done;
6673	}
6674
6675#ifdef __FreeBSD__
6676	if (m->m_flags & M_SKIP_FIREWALL) {
6677		PF_UNLOCK();
6678		return (PF_PASS);
6679	}
6680#else
6681	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
6682		return (PF_PASS);
6683#endif
6684
6685#ifdef __FreeBSD__
6686	if (ip_divert_ptr != NULL &&
6687	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
6688		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
6689		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
6690			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
6691			m_tag_delete(m, ipfwtag);
6692		}
6693		if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
6694			m->m_flags |= M_FASTFWD_OURS;
6695			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
6696		}
6697	} else
6698#endif
6699	/* We do IP header normalization and packet reassembly here */
6700	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6701		action = PF_DROP;
6702		goto done;
6703	}
6704	m = *m0;	/* pf_normalize messes with m0 */
6705	h = mtod(m, struct ip *);
6706
6707	off = h->ip_hl << 2;
6708	if (off < (int)sizeof(*h)) {
6709		action = PF_DROP;
6710		REASON_SET(&reason, PFRES_SHORT);
6711		log = 1;
6712		goto done;
6713	}
6714
6715	pd.src = (struct pf_addr *)&h->ip_src;
6716	pd.dst = (struct pf_addr *)&h->ip_dst;
6717	pd.sport = pd.dport = NULL;
6718	pd.ip_sum = &h->ip_sum;
6719	pd.proto_sum = NULL;
6720	pd.proto = h->ip_p;
6721	pd.dir = dir;
6722	pd.sidx = (dir == PF_IN) ? 0 : 1;
6723	pd.didx = (dir == PF_IN) ? 1 : 0;
6724	pd.af = AF_INET;
6725	pd.tos = h->ip_tos;
6726	pd.tot_len = ntohs(h->ip_len);
6727	pd.eh = eh;
6728
6729	/* handle fragments that didn't get reassembled by normalization */
6730	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
6731		action = pf_test_fragment(&r, dir, kif, m, h,
6732		    &pd, &a, &ruleset);
6733		goto done;
6734	}
6735
6736	switch (h->ip_p) {
6737
6738	case IPPROTO_TCP: {
6739		struct tcphdr	th;
6740
6741		pd.hdr.tcp = &th;
6742		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6743		    &action, &reason, AF_INET)) {
6744			log = action != PF_PASS;
6745			goto done;
6746		}
6747		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6748		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
6749			pqid = 1;
6750		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6751		if (action == PF_DROP)
6752			goto done;
6753		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6754		    &reason);
6755		if (action == PF_PASS) {
6756#if NPFSYNC > 0
6757#ifdef __FreeBSD__
6758			if (pfsync_update_state_ptr != NULL)
6759				pfsync_update_state_ptr(s);
6760#else
6761			pfsync_update_state(s);
6762#endif
6763#endif /* NPFSYNC */
6764			r = s->rule.ptr;
6765			a = s->anchor.ptr;
6766			log = s->log;
6767		} else if (s == NULL)
6768#ifdef __FreeBSD__
6769			action = pf_test_rule(&r, &s, dir, kif,
6770			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6771#else
6772			action = pf_test_rule(&r, &s, dir, kif,
6773			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6774#endif
6775		break;
6776	}
6777
6778	case IPPROTO_UDP: {
6779		struct udphdr	uh;
6780
6781		pd.hdr.udp = &uh;
6782		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6783		    &action, &reason, AF_INET)) {
6784			log = action != PF_PASS;
6785			goto done;
6786		}
6787		if (uh.uh_dport == 0 ||
6788		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6789		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6790			action = PF_DROP;
6791			REASON_SET(&reason, PFRES_SHORT);
6792			goto done;
6793		}
6794		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6795		if (action == PF_PASS) {
6796#if NPFSYNC > 0
6797#ifdef __FreeBSD__
6798			if (pfsync_update_state_ptr != NULL)
6799				pfsync_update_state_ptr(s);
6800#else
6801			pfsync_update_state(s);
6802#endif
6803#endif /* NPFSYNC */
6804			r = s->rule.ptr;
6805			a = s->anchor.ptr;
6806			log = s->log;
6807		} else if (s == NULL)
6808#ifdef __FreeBSD__
6809			action = pf_test_rule(&r, &s, dir, kif,
6810			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6811#else
6812			action = pf_test_rule(&r, &s, dir, kif,
6813			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6814#endif
6815		break;
6816	}
6817
6818	case IPPROTO_ICMP: {
6819		struct icmp	ih;
6820
6821		pd.hdr.icmp = &ih;
6822		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6823		    &action, &reason, AF_INET)) {
6824			log = action != PF_PASS;
6825			goto done;
6826		}
6827		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
6828		    &reason);
6829		if (action == PF_PASS) {
6830#if NPFSYNC > 0
6831#ifdef __FreeBSD__
6832			if (pfsync_update_state_ptr != NULL)
6833				pfsync_update_state_ptr(s);
6834#else
6835			pfsync_update_state(s);
6836#endif
6837#endif /* NPFSYNC */
6838			r = s->rule.ptr;
6839			a = s->anchor.ptr;
6840			log = s->log;
6841		} else if (s == NULL)
6842#ifdef __FreeBSD__
6843			action = pf_test_rule(&r, &s, dir, kif,
6844			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6845#else
6846			action = pf_test_rule(&r, &s, dir, kif,
6847			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6848#endif
6849		break;
6850	}
6851
6852#ifdef INET6
6853	case IPPROTO_ICMPV6: {
6854		action = PF_DROP;
6855		DPFPRINTF(PF_DEBUG_MISC,
6856		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
6857		goto done;
6858	}
6859#endif
6860
6861	default:
6862		action = pf_test_state_other(&s, dir, kif, m, &pd);
6863		if (action == PF_PASS) {
6864#if NPFSYNC > 0
6865#ifdef __FreeBSD__
6866			if (pfsync_update_state_ptr != NULL)
6867				pfsync_update_state_ptr(s);
6868#else
6869			pfsync_update_state(s);
6870#endif
6871#endif /* NPFSYNC */
6872			r = s->rule.ptr;
6873			a = s->anchor.ptr;
6874			log = s->log;
6875		} else if (s == NULL)
6876#ifdef __FreeBSD__
6877			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6878			    &pd, &a, &ruleset, NULL, inp);
6879#else
6880			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6881			    &pd, &a, &ruleset, &ipintrq);
6882#endif
6883		break;
6884	}
6885
6886done:
6887	if (action == PF_PASS && h->ip_hl > 5 &&
6888	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
6889		action = PF_DROP;
6890		REASON_SET(&reason, PFRES_IPOPTIONS);
6891		log = 1;
6892		DPFPRINTF(PF_DEBUG_MISC,
6893		    ("pf: dropping packet with ip options\n"));
6894	}
6895
6896	if ((s && s->tag) || r->rtableid)
6897#ifdef __FreeBSD__
6898		pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
6899#else
6900		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
6901#endif
6902
6903	if (dir == PF_IN && s && s->key[PF_SK_STACK])
6904#ifdef __FreeBSD__
6905		pd.pf_mtag->statekey = s->key[PF_SK_STACK];
6906#else
6907		m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
6908#endif
6909
6910#ifdef ALTQ
6911	if (action == PF_PASS && r->qid) {
6912#ifdef __FreeBSD__
6913		if (pqid || (pd.tos & IPTOS_LOWDELAY))
6914			pd.pf_mtag->qid = r->pqid;
6915		else
6916			pd.pf_mtag->qid = r->qid;
6917		/* add hints for ecn */
6918		pd.pf_mtag->hdr = h;
6919
6920#else
6921		if (pqid || (pd.tos & IPTOS_LOWDELAY))
6922			m->m_pkthdr.pf.qid = r->pqid;
6923		else
6924			m->m_pkthdr.pf.qid = r->qid;
6925		/* add hints for ecn */
6926		m->m_pkthdr.pf.hdr = h;
6927#endif
6928	}
6929#endif /* ALTQ */
6930
6931	/*
6932	 * connections redirected to loopback should not match sockets
6933	 * bound specifically to loopback due to security implications,
6934	 * see tcp_input() and in_pcblookup_listen().
6935	 */
6936	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6937	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6938	    (s->nat_rule.ptr->action == PF_RDR ||
6939	    s->nat_rule.ptr->action == PF_BINAT) &&
6940	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
6941#ifdef __FreeBSD__
6942		m->m_flags |= M_SKIP_FIREWALL;
6943#else
6944		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
6945#endif
6946
6947#ifdef __FreeBSD__
6948	if (action == PF_PASS && r->divert.port &&
6949	    ip_divert_ptr != NULL && !PACKET_LOOPED()) {
6950
6951		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
6952				sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
6953		if (ipfwtag != NULL) {
6954			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
6955			    ntohs(r->divert.port);
6956			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
6957
6958			m_tag_prepend(m, ipfwtag);
6959
6960			PF_UNLOCK();
6961
6962			if (m->m_flags & M_FASTFWD_OURS) {
6963				pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
6964				m->m_flags &= ~M_FASTFWD_OURS;
6965			}
6966
6967			ip_divert_ptr(*m0,
6968				dir ==  PF_IN ? DIR_IN : DIR_OUT);
6969			*m0 = NULL;
6970			return (action);
6971		} else {
6972			/* XXX: ipfw has the same behaviour! */
6973			action = PF_DROP;
6974			REASON_SET(&reason, PFRES_MEMORY);
6975			log = 1;
6976			DPFPRINTF(PF_DEBUG_MISC,
6977			    ("pf: failed to allocate divert tag\n"));
6978		}
6979	}
6980#else
6981	if (dir == PF_IN && action == PF_PASS && r->divert.port) {
6982		struct pf_divert *divert;
6983
6984		if ((divert = pf_get_divert(m))) {
6985			m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
6986			divert->port = r->divert.port;
6987			divert->addr.ipv4 = r->divert.addr.v4;
6988		}
6989	}
6990#endif
6991
6992	if (log) {
6993		struct pf_rule *lr;
6994
6995		if (s != NULL && s->nat_rule.ptr != NULL &&
6996		    s->nat_rule.ptr->log & PF_LOG_ALL)
6997			lr = s->nat_rule.ptr;
6998		else
6999			lr = r;
7000		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
7001		    &pd);
7002	}
7003
7004	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7005	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
7006
7007	if (action == PF_PASS || r->action == PF_DROP) {
7008		dirndx = (dir == PF_OUT);
7009		r->packets[dirndx]++;
7010		r->bytes[dirndx] += pd.tot_len;
7011		if (a != NULL) {
7012			a->packets[dirndx]++;
7013			a->bytes[dirndx] += pd.tot_len;
7014		}
7015		if (s != NULL) {
7016			if (s->nat_rule.ptr != NULL) {
7017				s->nat_rule.ptr->packets[dirndx]++;
7018				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7019			}
7020			if (s->src_node != NULL) {
7021				s->src_node->packets[dirndx]++;
7022				s->src_node->bytes[dirndx] += pd.tot_len;
7023			}
7024			if (s->nat_src_node != NULL) {
7025				s->nat_src_node->packets[dirndx]++;
7026				s->nat_src_node->bytes[dirndx] += pd.tot_len;
7027			}
7028			dirndx = (dir == s->direction) ? 0 : 1;
7029			s->packets[dirndx]++;
7030			s->bytes[dirndx] += pd.tot_len;
7031		}
7032		tr = r;
7033		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7034#ifdef __FreeBSD__
7035		if (nr != NULL && r == &V_pf_default_rule)
7036#else
7037		if (nr != NULL && r == &pf_default_rule)
7038#endif
7039			tr = nr;
7040		if (tr->src.addr.type == PF_ADDR_TABLE)
7041			pfr_update_stats(tr->src.addr.p.tbl,
7042			    (s == NULL) ? pd.src :
7043			    &s->key[(s->direction == PF_IN)]->
7044				addr[(s->direction == PF_OUT)],
7045			    pd.af, pd.tot_len, dir == PF_OUT,
7046			    r->action == PF_PASS, tr->src.neg);
7047		if (tr->dst.addr.type == PF_ADDR_TABLE)
7048			pfr_update_stats(tr->dst.addr.p.tbl,
7049			    (s == NULL) ? pd.dst :
7050			    &s->key[(s->direction == PF_IN)]->
7051				addr[(s->direction == PF_IN)],
7052			    pd.af, pd.tot_len, dir == PF_OUT,
7053			    r->action == PF_PASS, tr->dst.neg);
7054	}
7055
7056	switch (action) {
7057	case PF_SYNPROXY_DROP:
7058		m_freem(*m0);
7059	case PF_DEFER:
7060		*m0 = NULL;
7061		action = PF_PASS;
7062		break;
7063	default:
7064		/* pf_route can free the mbuf causing *m0 to become NULL */
7065		if (r->rt)
7066			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
7067		break;
7068	}
7069#ifdef __FreeBSD__
7070	PF_UNLOCK();
7071#endif
7072	return (action);
7073}
7074#endif /* INET */
7075
7076#ifdef INET6
7077int
7078#ifdef __FreeBSD__
7079pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
7080    struct ether_header *eh, struct inpcb *inp)
7081#else
7082pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
7083    struct ether_header *eh)
7084#endif
7085{
7086	struct pfi_kif		*kif;
7087	u_short			 action, reason = 0, log = 0;
7088	struct mbuf		*m = *m0, *n = NULL;
7089#ifdef __FreeBSD__
7090	struct ip6_hdr		*h = NULL;
7091	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
7092#else
7093	struct ip6_hdr		*h;
7094	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
7095#endif
7096	struct pf_state		*s = NULL;
7097	struct pf_ruleset	*ruleset = NULL;
7098	struct pf_pdesc		 pd;
7099	int			 off, terminal = 0, dirndx, rh_cnt = 0;
7100
7101#ifdef __FreeBSD__
7102	PF_LOCK();
7103	if (!V_pf_status.running) {
7104		PF_UNLOCK();
7105		return (PF_PASS);
7106	}
7107#else
7108	if (!pf_status.running)
7109		return (PF_PASS);
7110#endif
7111
7112	memset(&pd, 0, sizeof(pd));
7113#ifdef __FreeBSD__
7114	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
7115		PF_UNLOCK();
7116		DPFPRINTF(PF_DEBUG_URGENT,
7117		    ("pf_test: pf_get_mtag returned NULL\n"));
7118		return (PF_DROP);
7119	}
7120#endif
7121#ifndef __FreeBSD__
7122	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
7123		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
7124	else
7125#endif
7126		kif = (struct pfi_kif *)ifp->if_pf_kif;
7127
7128	if (kif == NULL) {
7129#ifdef __FreeBSD__
7130		PF_UNLOCK();
7131#endif
7132		DPFPRINTF(PF_DEBUG_URGENT,
7133		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
7134		return (PF_DROP);
7135	}
7136	if (kif->pfik_flags & PFI_IFLAG_SKIP)
7137#ifdef __FreeBSD__
7138	{
7139		PF_UNLOCK();
7140#endif
7141		return (PF_PASS);
7142#ifdef __FreeBSD__
7143	}
7144#endif
7145
7146#ifdef __FreeBSD__
7147	M_ASSERTPKTHDR(m);
7148#else
7149#ifdef DIAGNOSTIC
7150	if ((m->m_flags & M_PKTHDR) == 0)
7151		panic("non-M_PKTHDR is passed to pf_test6");
7152#endif /* DIAGNOSTIC */
7153#endif
7154
7155	if (m->m_pkthdr.len < (int)sizeof(*h)) {
7156		action = PF_DROP;
7157		REASON_SET(&reason, PFRES_SHORT);
7158		log = 1;
7159		goto done;
7160	}
7161
7162#ifdef __FreeBSD__
7163	if (pd.pf_mtag->flags & PF_TAG_GENERATED) {
7164		PF_UNLOCK();
7165#else
7166	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
7167#endif
7168		return (PF_PASS);
7169#ifdef __FreeBSD__
7170	}
7171#endif
7172
7173	/* We do IP header normalization and packet reassembly here */
7174	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
7175		action = PF_DROP;
7176		goto done;
7177	}
7178	m = *m0;	/* pf_normalize messes with m0 */
7179	h = mtod(m, struct ip6_hdr *);
7180
7181#if 1
7182	/*
7183	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
7184	 * will do something bad, so drop the packet for now.
7185	 */
7186	if (htons(h->ip6_plen) == 0) {
7187		action = PF_DROP;
7188		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
7189		goto done;
7190	}
7191#endif
7192
7193	pd.src = (struct pf_addr *)&h->ip6_src;
7194	pd.dst = (struct pf_addr *)&h->ip6_dst;
7195	pd.sport = pd.dport = NULL;
7196	pd.ip_sum = NULL;
7197	pd.proto_sum = NULL;
7198	pd.dir = dir;
7199	pd.sidx = (dir == PF_IN) ? 0 : 1;
7200	pd.didx = (dir == PF_IN) ? 1 : 0;
7201	pd.af = AF_INET6;
7202	pd.tos = 0;
7203	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
7204	pd.eh = eh;
7205
7206	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
7207	pd.proto = h->ip6_nxt;
7208	do {
7209		switch (pd.proto) {
7210		case IPPROTO_FRAGMENT:
7211			action = pf_test_fragment(&r, dir, kif, m, h,
7212			    &pd, &a, &ruleset);
7213			if (action == PF_DROP)
7214				REASON_SET(&reason, PFRES_FRAG);
7215			goto done;
7216		case IPPROTO_ROUTING: {
7217			struct ip6_rthdr rthdr;
7218
7219			if (rh_cnt++) {
7220				DPFPRINTF(PF_DEBUG_MISC,
7221				    ("pf: IPv6 more than one rthdr\n"));
7222				action = PF_DROP;
7223				REASON_SET(&reason, PFRES_IPOPTIONS);
7224				log = 1;
7225				goto done;
7226			}
7227			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
7228			    &reason, pd.af)) {
7229				DPFPRINTF(PF_DEBUG_MISC,
7230				    ("pf: IPv6 short rthdr\n"));
7231				action = PF_DROP;
7232				REASON_SET(&reason, PFRES_SHORT);
7233				log = 1;
7234				goto done;
7235			}
7236			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
7237				DPFPRINTF(PF_DEBUG_MISC,
7238				    ("pf: IPv6 rthdr0\n"));
7239				action = PF_DROP;
7240				REASON_SET(&reason, PFRES_IPOPTIONS);
7241				log = 1;
7242				goto done;
7243			}
7244			/* FALLTHROUGH */
7245		}
7246		case IPPROTO_AH:
7247		case IPPROTO_HOPOPTS:
7248		case IPPROTO_DSTOPTS: {
7249			/* get next header and header length */
7250			struct ip6_ext	opt6;
7251
7252			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
7253			    NULL, &reason, pd.af)) {
7254				DPFPRINTF(PF_DEBUG_MISC,
7255				    ("pf: IPv6 short opt\n"));
7256				action = PF_DROP;
7257				log = 1;
7258				goto done;
7259			}
7260			if (pd.proto == IPPROTO_AH)
7261				off += (opt6.ip6e_len + 2) * 4;
7262			else
7263				off += (opt6.ip6e_len + 1) * 8;
7264			pd.proto = opt6.ip6e_nxt;
7265			/* goto the next header */
7266			break;
7267		}
7268		default:
7269			terminal++;
7270			break;
7271		}
7272	} while (!terminal);
7273
7274	/* if there's no routing header, use unmodified mbuf for checksumming */
7275	if (!n)
7276		n = m;
7277
7278	switch (pd.proto) {
7279
7280	case IPPROTO_TCP: {
7281		struct tcphdr	th;
7282
7283		pd.hdr.tcp = &th;
7284		if (!pf_pull_hdr(m, off, &th, sizeof(th),
7285		    &action, &reason, AF_INET6)) {
7286			log = action != PF_PASS;
7287			goto done;
7288		}
7289		pd.p_len = pd.tot_len - off - (th.th_off << 2);
7290		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
7291		if (action == PF_DROP)
7292			goto done;
7293		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
7294		    &reason);
7295		if (action == PF_PASS) {
7296#if NPFSYNC > 0
7297#ifdef __FreeBSD__
7298			if (pfsync_update_state_ptr != NULL)
7299				pfsync_update_state_ptr(s);
7300#else
7301			pfsync_update_state(s);
7302#endif
7303#endif /* NPFSYNC */
7304			r = s->rule.ptr;
7305			a = s->anchor.ptr;
7306			log = s->log;
7307		} else if (s == NULL)
7308#ifdef __FreeBSD__
7309			action = pf_test_rule(&r, &s, dir, kif,
7310			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7311#else
7312			action = pf_test_rule(&r, &s, dir, kif,
7313			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7314#endif
7315		break;
7316	}
7317
7318	case IPPROTO_UDP: {
7319		struct udphdr	uh;
7320
7321		pd.hdr.udp = &uh;
7322		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
7323		    &action, &reason, AF_INET6)) {
7324			log = action != PF_PASS;
7325			goto done;
7326		}
7327		if (uh.uh_dport == 0 ||
7328		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
7329		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
7330			action = PF_DROP;
7331			REASON_SET(&reason, PFRES_SHORT);
7332			goto done;
7333		}
7334		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
7335		if (action == PF_PASS) {
7336#if NPFSYNC > 0
7337#ifdef __FreeBSD__
7338			if (pfsync_update_state_ptr != NULL)
7339				pfsync_update_state_ptr(s);
7340#else
7341			pfsync_update_state(s);
7342#endif
7343#endif /* NPFSYNC */
7344			r = s->rule.ptr;
7345			a = s->anchor.ptr;
7346			log = s->log;
7347		} else if (s == NULL)
7348#ifdef __FreeBSD__
7349			action = pf_test_rule(&r, &s, dir, kif,
7350			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7351#else
7352			action = pf_test_rule(&r, &s, dir, kif,
7353			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7354#endif
7355		break;
7356	}
7357
7358	case IPPROTO_ICMP: {
7359		action = PF_DROP;
7360		DPFPRINTF(PF_DEBUG_MISC,
7361		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
7362		goto done;
7363	}
7364
7365	case IPPROTO_ICMPV6: {
7366		struct icmp6_hdr	ih;
7367
7368		pd.hdr.icmp6 = &ih;
7369		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
7370		    &action, &reason, AF_INET6)) {
7371			log = action != PF_PASS;
7372			goto done;
7373		}
7374		action = pf_test_state_icmp(&s, dir, kif,
7375		    m, off, h, &pd, &reason);
7376		if (action == PF_PASS) {
7377#if NPFSYNC > 0
7378#ifdef __FreeBSD__
7379			if (pfsync_update_state_ptr != NULL)
7380				pfsync_update_state_ptr(s);
7381#else
7382			pfsync_update_state(s);
7383#endif
7384#endif /* NPFSYNC */
7385			r = s->rule.ptr;
7386			a = s->anchor.ptr;
7387			log = s->log;
7388		} else if (s == NULL)
7389#ifdef __FreeBSD__
7390			action = pf_test_rule(&r, &s, dir, kif,
7391			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7392#else
7393			action = pf_test_rule(&r, &s, dir, kif,
7394			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7395#endif
7396		break;
7397	}
7398
7399	default:
7400		action = pf_test_state_other(&s, dir, kif, m, &pd);
7401		if (action == PF_PASS) {
7402#if NPFSYNC > 0
7403#ifdef __FreeBSD__
7404			if (pfsync_update_state_ptr != NULL)
7405				pfsync_update_state_ptr(s);
7406#else
7407			pfsync_update_state(s);
7408#endif
7409#endif /* NPFSYNC */
7410			r = s->rule.ptr;
7411			a = s->anchor.ptr;
7412			log = s->log;
7413		} else if (s == NULL)
7414#ifdef __FreeBSD__
7415			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
7416			    &pd, &a, &ruleset, NULL, inp);
7417#else
7418			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
7419			    &pd, &a, &ruleset, &ip6intrq);
7420#endif
7421		break;
7422	}
7423
7424done:
7425	if (n != m) {
7426		m_freem(n);
7427		n = NULL;
7428	}
7429
7430	/* handle dangerous IPv6 extension headers. */
7431	if (action == PF_PASS && rh_cnt &&
7432	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
7433		action = PF_DROP;
7434		REASON_SET(&reason, PFRES_IPOPTIONS);
7435		log = 1;
7436		DPFPRINTF(PF_DEBUG_MISC,
7437		    ("pf: dropping packet with dangerous v6 headers\n"));
7438	}
7439
7440	if ((s && s->tag) || r->rtableid)
7441#ifdef __FreeBSD__
7442		pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
7443#else
7444		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
7445#endif
7446
7447	if (dir == PF_IN && s && s->key[PF_SK_STACK])
7448#ifdef __FreeBSD__
7449		pd.pf_mtag->statekey = s->key[PF_SK_STACK];
7450#else
7451		m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
7452#endif
7453
7454#ifdef ALTQ
7455	if (action == PF_PASS && r->qid) {
7456#ifdef __FreeBSD__
7457		if (pd.tos & IPTOS_LOWDELAY)
7458			pd.pf_mtag->qid = r->pqid;
7459		else
7460			pd.pf_mtag->qid = r->qid;
7461		/* add hints for ecn */
7462		pd.pf_mtag->hdr = h;
7463#else
7464		if (pd.tos & IPTOS_LOWDELAY)
7465			m->m_pkthdr.pf.qid = r->pqid;
7466		else
7467			m->m_pkthdr.pf.qid = r->qid;
7468		/* add hints for ecn */
7469		m->m_pkthdr.pf.hdr = h;
7470#endif
7471	}
7472#endif /* ALTQ */
7473
7474	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7475	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7476	    (s->nat_rule.ptr->action == PF_RDR ||
7477	    s->nat_rule.ptr->action == PF_BINAT) &&
7478	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
7479#ifdef __FreeBSD__
7480		m->m_flags |= M_SKIP_FIREWALL;
7481#else
7482		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
7483#endif
7484
7485#ifdef __FreeBSD__
7486	/* XXX: Anybody working on it?! */
7487	if (r->divert.port)
7488		printf("pf: divert(9) is not supported for IPv6\n");
7489#else
7490	if (dir == PF_IN && action == PF_PASS && r->divert.port) {
7491		struct pf_divert *divert;
7492
7493		if ((divert = pf_get_divert(m))) {
7494			m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
7495			divert->port = r->divert.port;
7496			divert->addr.ipv6 = r->divert.addr.v6;
7497		}
7498	}
7499#endif
7500
7501	if (log) {
7502		struct pf_rule *lr;
7503
7504		if (s != NULL && s->nat_rule.ptr != NULL &&
7505		    s->nat_rule.ptr->log & PF_LOG_ALL)
7506			lr = s->nat_rule.ptr;
7507		else
7508			lr = r;
7509		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
7510		    &pd);
7511	}
7512
7513	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7514	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
7515
7516	if (action == PF_PASS || r->action == PF_DROP) {
7517		dirndx = (dir == PF_OUT);
7518		r->packets[dirndx]++;
7519		r->bytes[dirndx] += pd.tot_len;
7520		if (a != NULL) {
7521			a->packets[dirndx]++;
7522			a->bytes[dirndx] += pd.tot_len;
7523		}
7524		if (s != NULL) {
7525			if (s->nat_rule.ptr != NULL) {
7526				s->nat_rule.ptr->packets[dirndx]++;
7527				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7528			}
7529			if (s->src_node != NULL) {
7530				s->src_node->packets[dirndx]++;
7531				s->src_node->bytes[dirndx] += pd.tot_len;
7532			}
7533			if (s->nat_src_node != NULL) {
7534				s->nat_src_node->packets[dirndx]++;
7535				s->nat_src_node->bytes[dirndx] += pd.tot_len;
7536			}
7537			dirndx = (dir == s->direction) ? 0 : 1;
7538			s->packets[dirndx]++;
7539			s->bytes[dirndx] += pd.tot_len;
7540		}
7541		tr = r;
7542		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7543#ifdef __FreeBSD__
7544		if (nr != NULL && r == &V_pf_default_rule)
7545#else
7546		if (nr != NULL && r == &pf_default_rule)
7547#endif
7548			tr = nr;
7549		if (tr->src.addr.type == PF_ADDR_TABLE)
7550			pfr_update_stats(tr->src.addr.p.tbl,
7551			    (s == NULL) ? pd.src :
7552			    &s->key[(s->direction == PF_IN)]->addr[0],
7553			    pd.af, pd.tot_len, dir == PF_OUT,
7554			    r->action == PF_PASS, tr->src.neg);
7555		if (tr->dst.addr.type == PF_ADDR_TABLE)
7556			pfr_update_stats(tr->dst.addr.p.tbl,
7557			    (s == NULL) ? pd.dst :
7558			    &s->key[(s->direction == PF_IN)]->addr[1],
7559			    pd.af, pd.tot_len, dir == PF_OUT,
7560			    r->action == PF_PASS, tr->dst.neg);
7561	}
7562
7563	switch (action) {
7564	case PF_SYNPROXY_DROP:
7565		m_freem(*m0);
7566	case PF_DEFER:
7567		*m0 = NULL;
7568		action = PF_PASS;
7569		break;
7570	default:
7571		/* pf_route6 can free the mbuf causing *m0 to become NULL */
7572		if (r->rt)
7573			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
7574		break;
7575	}
7576
7577#ifdef __FreeBSD__
7578	PF_UNLOCK();
7579#endif
7580	return (action);
7581}
7582#endif /* INET6 */
7583
7584int
7585pf_check_congestion(struct ifqueue *ifq)
7586{
7587#ifdef __FreeBSD__
7588	/* XXX_IMPORT: later */
7589	return (0);
7590#else
7591	if (ifq->ifq_congestion)
7592		return (1);
7593	else
7594		return (0);
7595#endif
7596}
7597
7598/*
7599 * must be called whenever any addressing information such as
7600 * address, port, protocol has changed
7601 */
7602void
7603pf_pkt_addr_changed(struct mbuf *m)
7604{
7605#ifdef __FreeBSD__
7606	struct pf_mtag	*pf_tag;
7607
7608	if ((pf_tag = pf_find_mtag(m)) != NULL)
7609		pf_tag->statekey = NULL;
7610#else
7611	m->m_pkthdr.pf.statekey = NULL;
7612#endif
7613}
7614