pf.c revision 145873
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 145873 2005-05-04 15:29:28Z mlaier $	*/
2/*	$OpenBSD: pf.c,v 1.483 2005/03/15 17:38:43 dhartmei Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * Copyright (c) 2002,2003 Henning Brauer
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 */
38
39#ifdef __FreeBSD__
40#include "opt_inet.h"
41#include "opt_inet6.h"
42#endif
43
44#ifdef __FreeBSD__
45#include "opt_bpf.h"
46#include "opt_pf.h"
47#define	NBPFILTER	DEV_BPF
48#define	NPFLOG		DEV_PFLOG
49#define	NPFSYNC		DEV_PFSYNC
50#else
51#include "bpfilter.h"
52#include "pflog.h"
53#include "pfsync.h"
54#endif
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/mbuf.h>
59#include <sys/filio.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/kernel.h>
63#include <sys/time.h>
64#ifdef __FreeBSD__
65#include <sys/sysctl.h>
66#include <sys/endian.h>
67#else
68#include <sys/pool.h>
69#endif
70
71#include <net/if.h>
72#include <net/if_types.h>
73#include <net/bpf.h>
74#include <net/route.h>
75
76#include <netinet/in.h>
77#include <netinet/in_var.h>
78#include <netinet/in_systm.h>
79#include <netinet/ip.h>
80#include <netinet/ip_var.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_seq.h>
83#include <netinet/udp.h>
84#include <netinet/ip_icmp.h>
85#include <netinet/in_pcb.h>
86#include <netinet/tcp_timer.h>
87#include <netinet/tcp_var.h>
88#include <netinet/udp_var.h>
89#include <netinet/icmp_var.h>
90#include <netinet/if_ether.h>
91
92#ifndef __FreeBSD__
93#include <dev/rndvar.h>
94#endif
95#include <net/pfvar.h>
96#include <net/if_pflog.h>
97
98#if NPFSYNC > 0
99#include <net/if_pfsync.h>
100#endif /* NPFSYNC > 0 */
101
102#ifdef INET6
103#include <netinet/ip6.h>
104#include <netinet/in_pcb.h>
105#include <netinet/icmp6.h>
106#include <netinet6/nd6.h>
107#ifdef __FreeBSD__
108#include <netinet6/ip6_var.h>
109#include <netinet6/in6_pcb.h>
110#endif
111#endif /* INET6 */
112
113#ifdef __FreeBSD__
114#include <machine/in_cksum.h>
115#include <sys/limits.h>
116#include <sys/ucred.h>
117
118extern int ip_optcopy(struct ip *, struct ip *);
119#endif
120
121#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
122
123/*
124 * Global variables
125 */
126
127struct pf_anchor_global	 pf_anchors;
128struct pf_ruleset	 pf_main_ruleset;
129struct pf_altqqueue	 pf_altqs[2];
130struct pf_palist	 pf_pabuf;
131struct pf_altqqueue	*pf_altqs_active;
132struct pf_altqqueue	*pf_altqs_inactive;
133struct pf_status	 pf_status;
134
135u_int32_t		 ticket_altqs_active;
136u_int32_t		 ticket_altqs_inactive;
137int			 altqs_inactive_open;
138u_int32_t		 ticket_pabuf;
139
140#ifdef __FreeBSD__
141struct callout	 	 pf_expire_to;			/* expire timeout */
142#else
143struct timeout		 pf_expire_to;			/* expire timeout */
144#endif
145
146struct pf_anchor_stackframe {
147	struct pf_ruleset			*rs;
148	struct pf_rule				*r;
149	struct pf_anchor_node			*parent;
150	struct pf_anchor			*child;
151} pf_anchor_stack[64];
152
153#ifdef __FreeBSD__
154uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
155uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
156#else
157struct pool		 pf_src_tree_pl, pf_rule_pl;
158struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
159#endif
160
161void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
162
163void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
164			    u_int32_t);
165void			 pf_add_threshold(struct pf_threshold *);
166int			 pf_check_threshold(struct pf_threshold *);
167
168void			 pf_change_ap(struct pf_addr *, u_int16_t *,
169			    u_int16_t *, u_int16_t *, struct pf_addr *,
170			    u_int16_t, u_int8_t, sa_family_t);
171#ifdef INET6
172void			 pf_change_a6(struct pf_addr *, u_int16_t *,
173			    struct pf_addr *, u_int8_t);
174#endif /* INET6 */
175void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
176			    struct pf_addr *, struct pf_addr *, u_int16_t,
177			    u_int16_t *, u_int16_t *, u_int16_t *,
178			    u_int16_t *, u_int8_t, sa_family_t);
179void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
180			    const struct pf_addr *, const struct pf_addr *,
181			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
182			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
183			    struct ether_header *, struct ifnet *);
184void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
185			    sa_family_t, struct pf_rule *);
186struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
187			    int, int, struct pfi_kif *,
188			    struct pf_addr *, u_int16_t, struct pf_addr *,
189			    u_int16_t, int);
190struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
191			    int, int, struct pfi_kif *, struct pf_src_node **,
192			    struct pf_addr *, u_int16_t,
193			    struct pf_addr *, u_int16_t,
194			    struct pf_addr *, u_int16_t *);
195int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
196			    int, struct pfi_kif *, struct mbuf *, int,
197			    void *, struct pf_pdesc *, struct pf_rule **,
198#ifdef __FreeBSD__
199			    struct pf_ruleset **, struct ifqueue *,
200			    struct inpcb *);
201#else
202			    struct pf_ruleset **, struct ifqueue *);
203#endif
204int			 pf_test_udp(struct pf_rule **, struct pf_state **,
205			    int, struct pfi_kif *, struct mbuf *, int,
206			    void *, struct pf_pdesc *, struct pf_rule **,
207#ifdef __FreeBSD__
208			    struct pf_ruleset **, struct ifqueue *,
209			    struct inpcb *);
210#else
211			    struct pf_ruleset **, struct ifqueue *);
212#endif
213int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
214			    int, struct pfi_kif *, struct mbuf *, int,
215			    void *, struct pf_pdesc *, struct pf_rule **,
216			    struct pf_ruleset **, struct ifqueue *);
217int			 pf_test_other(struct pf_rule **, struct pf_state **,
218			    int, struct pfi_kif *, struct mbuf *, int, void *,
219			    struct pf_pdesc *, struct pf_rule **,
220			    struct pf_ruleset **, struct ifqueue *);
221int			 pf_test_fragment(struct pf_rule **, int,
222			    struct pfi_kif *, struct mbuf *, void *,
223			    struct pf_pdesc *, struct pf_rule **,
224			    struct pf_ruleset **);
225int			 pf_test_state_tcp(struct pf_state **, int,
226			    struct pfi_kif *, struct mbuf *, int,
227			    void *, struct pf_pdesc *, u_short *);
228int			 pf_test_state_udp(struct pf_state **, int,
229			    struct pfi_kif *, struct mbuf *, int,
230			    void *, struct pf_pdesc *);
231int			 pf_test_state_icmp(struct pf_state **, int,
232			    struct pfi_kif *, struct mbuf *, int,
233			    void *, struct pf_pdesc *, u_short *);
234int			 pf_test_state_other(struct pf_state **, int,
235			    struct pfi_kif *, struct pf_pdesc *);
236struct pf_tag		*pf_get_tag(struct mbuf *);
237int			 pf_match_tag(struct mbuf *, struct pf_rule *,
238			     struct pf_tag **, int *);
239void			 pf_hash(struct pf_addr *, struct pf_addr *,
240			    struct pf_poolhashkey *, sa_family_t);
241int			 pf_map_addr(u_int8_t, struct pf_rule *,
242			    struct pf_addr *, struct pf_addr *,
243			    struct pf_addr *, struct pf_src_node **);
244int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
245			    struct pf_addr *, struct pf_addr *, u_int16_t,
246			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
247			    struct pf_src_node **);
248void			 pf_route(struct mbuf **, struct pf_rule *, int,
249			    struct ifnet *, struct pf_state *);
250void			 pf_route6(struct mbuf **, struct pf_rule *, int,
251			    struct ifnet *, struct pf_state *);
252#ifdef __FreeBSD__
253int			 pf_socket_lookup(uid_t *, gid_t *,
254			    int, struct pf_pdesc *, struct inpcb *);
255#else
256int			 pf_socket_lookup(uid_t *, gid_t *,
257			    int, struct pf_pdesc *);
258#endif
259u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
260			    sa_family_t);
261u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
262			    sa_family_t);
263u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
264				u_int16_t);
265void			 pf_set_rt_ifp(struct pf_state *,
266			    struct pf_addr *);
267int			 pf_check_proto_cksum(struct mbuf *, int, int,
268			    u_int8_t, sa_family_t);
269int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
270			    struct pf_addr_wrap *);
271static int		 pf_add_mbuf_tag(struct mbuf *, u_int);
272struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
273			    struct pf_state *, u_int8_t);
274int			 pf_src_connlimit(struct pf_state **);
275int			 pf_check_congestion(struct ifqueue *);
276
277#ifdef __FreeBSD__
278int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
279
280struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
281
282#else
283struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
284	{ &pf_state_pl, PFSTATE_HIWAT },
285	{ &pf_src_tree_pl, PFSNODE_HIWAT },
286	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT }
287};
288#endif
289
290#define STATE_LOOKUP()							\
291	do {								\
292		if (direction == PF_IN)					\
293			*state = pf_find_state_recurse(			\
294			    kif, &key, PF_EXT_GWY);			\
295		else							\
296			*state = pf_find_state_recurse(			\
297			    kif, &key, PF_LAN_EXT);			\
298		if (*state == NULL || (*state)->timeout == PFTM_PURGE)	\
299			return (PF_DROP);				\
300		if (direction == PF_OUT &&				\
301		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
302		    (*state)->rule.ptr->direction == PF_OUT) ||		\
303		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
304		    (*state)->rule.ptr->direction == PF_IN)) &&		\
305		    (*state)->rt_kif != NULL &&				\
306		    (*state)->rt_kif != kif)				\
307			return (PF_PASS);				\
308	} while (0)
309
310#define	STATE_TRANSLATE(s) \
311	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
312	((s)->af == AF_INET6 && \
313	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
314	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
315	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
316	(s)->lan.port != (s)->gwy.port
317
318#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) :   \
319	((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent :	       \
320	(k)->pfik_parent->pfik_parent)
321
322#define STATE_INC_COUNTERS(s)				\
323	do {						\
324		s->rule.ptr->states++;			\
325		if (s->anchor.ptr != NULL)		\
326			s->anchor.ptr->states++;	\
327		if (s->nat_rule.ptr != NULL)		\
328			s->nat_rule.ptr->states++;	\
329	} while (0)
330
331#define STATE_DEC_COUNTERS(s)				\
332	do {						\
333		if (s->nat_rule.ptr != NULL)		\
334			s->nat_rule.ptr->states--;	\
335		if (s->anchor.ptr != NULL)		\
336			s->anchor.ptr->states--;	\
337		s->rule.ptr->states--;			\
338	} while (0)
339
340#ifndef __FreeBSD__
341static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
342static __inline int pf_state_compare_lan_ext(struct pf_state *,
343	struct pf_state *);
344static __inline int pf_state_compare_ext_gwy(struct pf_state *,
345	struct pf_state *);
346static __inline int pf_state_compare_id(struct pf_state *,
347	struct pf_state *);
348static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
349#else
350static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
351static int pf_state_compare_lan_ext(struct pf_state *,
352	struct pf_state *);
353static int pf_state_compare_ext_gwy(struct pf_state *,
354	struct pf_state *);
355static int pf_state_compare_id(struct pf_state *,
356	struct pf_state *);
357static int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
358#endif
359
360struct pf_src_tree tree_src_tracking;
361
362struct pf_state_tree_id tree_id;
363struct pf_state_queue state_updates;
364
365RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
366RB_GENERATE(pf_state_tree_lan_ext, pf_state,
367    u.s.entry_lan_ext, pf_state_compare_lan_ext);
368RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
369    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
370RB_GENERATE(pf_state_tree_id, pf_state,
371    u.s.entry_id, pf_state_compare_id);
372RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
373RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
374
375#ifdef __FreeBSD__
376static int
377#else
378static __inline int
379#endif
380pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
381{
382	int	diff;
383
384	if (a->rule.ptr > b->rule.ptr)
385		return (1);
386	if (a->rule.ptr < b->rule.ptr)
387		return (-1);
388	if ((diff = a->af - b->af) != 0)
389		return (diff);
390	switch (a->af) {
391#ifdef INET
392	case AF_INET:
393		if (a->addr.addr32[0] > b->addr.addr32[0])
394			return (1);
395		if (a->addr.addr32[0] < b->addr.addr32[0])
396			return (-1);
397		break;
398#endif /* INET */
399#ifdef INET6
400	case AF_INET6:
401		if (a->addr.addr32[3] > b->addr.addr32[3])
402			return (1);
403		if (a->addr.addr32[3] < b->addr.addr32[3])
404			return (-1);
405		if (a->addr.addr32[2] > b->addr.addr32[2])
406			return (1);
407		if (a->addr.addr32[2] < b->addr.addr32[2])
408			return (-1);
409		if (a->addr.addr32[1] > b->addr.addr32[1])
410			return (1);
411		if (a->addr.addr32[1] < b->addr.addr32[1])
412			return (-1);
413		if (a->addr.addr32[0] > b->addr.addr32[0])
414			return (1);
415		if (a->addr.addr32[0] < b->addr.addr32[0])
416			return (-1);
417		break;
418#endif /* INET6 */
419	}
420	return (0);
421}
422
423#ifdef __FreeBSD__
424static int
425#else
426static __inline int
427#endif
428pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
429{
430	int	diff;
431
432	if ((diff = a->proto - b->proto) != 0)
433		return (diff);
434	if ((diff = a->af - b->af) != 0)
435		return (diff);
436	switch (a->af) {
437#ifdef INET
438	case AF_INET:
439		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
440			return (1);
441		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
442			return (-1);
443		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
444			return (1);
445		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
446			return (-1);
447		break;
448#endif /* INET */
449#ifdef INET6
450	case AF_INET6:
451		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
452			return (1);
453		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
454			return (-1);
455		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
456			return (1);
457		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
458			return (-1);
459		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
460			return (1);
461		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
462			return (-1);
463		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
464			return (1);
465		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
466			return (-1);
467		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
468			return (1);
469		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
470			return (-1);
471		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
472			return (1);
473		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
474			return (-1);
475		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
476			return (1);
477		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
478			return (-1);
479		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
480			return (1);
481		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
482			return (-1);
483		break;
484#endif /* INET6 */
485	}
486
487	if ((diff = a->lan.port - b->lan.port) != 0)
488		return (diff);
489	if ((diff = a->ext.port - b->ext.port) != 0)
490		return (diff);
491
492	return (0);
493}
494
495#ifdef __FreeBSD__
496static int
497#else
498static __inline int
499#endif
500pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
501{
502	int	diff;
503
504	if ((diff = a->proto - b->proto) != 0)
505		return (diff);
506	if ((diff = a->af - b->af) != 0)
507		return (diff);
508	switch (a->af) {
509#ifdef INET
510	case AF_INET:
511		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
512			return (1);
513		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
514			return (-1);
515		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
516			return (1);
517		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
518			return (-1);
519		break;
520#endif /* INET */
521#ifdef INET6
522	case AF_INET6:
523		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
524			return (1);
525		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
526			return (-1);
527		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
528			return (1);
529		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
530			return (-1);
531		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
532			return (1);
533		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
534			return (-1);
535		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
536			return (1);
537		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
538			return (-1);
539		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
540			return (1);
541		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
542			return (-1);
543		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
544			return (1);
545		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
546			return (-1);
547		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
548			return (1);
549		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
550			return (-1);
551		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
552			return (1);
553		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
554			return (-1);
555		break;
556#endif /* INET6 */
557	}
558
559	if ((diff = a->ext.port - b->ext.port) != 0)
560		return (diff);
561	if ((diff = a->gwy.port - b->gwy.port) != 0)
562		return (diff);
563
564	return (0);
565}
566
567#ifdef __FreeBSD__
568static int
569#else
570static __inline int
571#endif
572pf_state_compare_id(struct pf_state *a, struct pf_state *b)
573{
574	if (a->id > b->id)
575		return (1);
576	if (a->id < b->id)
577		return (-1);
578	if (a->creatorid > b->creatorid)
579		return (1);
580	if (a->creatorid < b->creatorid)
581		return (-1);
582
583	return (0);
584}
585
586#ifdef __FreeBSD__
587static int
588#else
589static __inline int
590#endif
591pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b)
592{
593	int c = strcmp(a->path, b->path);
594
595	return (c ? (c < 0 ? -1 : 1) : 0);
596}
597
598#ifdef INET6
599void
600pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
601{
602	switch (af) {
603#ifdef INET
604	case AF_INET:
605		dst->addr32[0] = src->addr32[0];
606		break;
607#endif /* INET */
608	case AF_INET6:
609		dst->addr32[0] = src->addr32[0];
610		dst->addr32[1] = src->addr32[1];
611		dst->addr32[2] = src->addr32[2];
612		dst->addr32[3] = src->addr32[3];
613		break;
614	}
615}
616#endif /* INET6 */
617
618struct pf_state *
619pf_find_state_byid(struct pf_state *key)
620{
621	pf_status.fcounters[FCNT_STATE_SEARCH]++;
622	return (RB_FIND(pf_state_tree_id, &tree_id, key));
623}
624
625struct pf_state *
626pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
627{
628	struct pf_state *s;
629
630	pf_status.fcounters[FCNT_STATE_SEARCH]++;
631
632	switch (tree) {
633	case PF_LAN_EXT:
634		for (; kif != NULL; kif = kif->pfik_parent) {
635			s = RB_FIND(pf_state_tree_lan_ext,
636			    &kif->pfik_lan_ext, key);
637			if (s != NULL)
638				return (s);
639		}
640		return (NULL);
641	case PF_EXT_GWY:
642		for (; kif != NULL; kif = kif->pfik_parent) {
643			s = RB_FIND(pf_state_tree_ext_gwy,
644			    &kif->pfik_ext_gwy, key);
645			if (s != NULL)
646				return (s);
647		}
648		return (NULL);
649	default:
650		panic("pf_find_state_recurse");
651	}
652}
653
654struct pf_state *
655pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
656{
657	struct pf_state *s, *ss = NULL;
658	struct pfi_kif	*kif;
659
660	pf_status.fcounters[FCNT_STATE_SEARCH]++;
661
662	switch (tree) {
663	case PF_LAN_EXT:
664		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
665			s = RB_FIND(pf_state_tree_lan_ext,
666			    &kif->pfik_lan_ext, key);
667			if (s == NULL)
668				continue;
669			if (more == NULL)
670				return (s);
671			ss = s;
672			(*more)++;
673		}
674		return (ss);
675	case PF_EXT_GWY:
676		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
677			s = RB_FIND(pf_state_tree_ext_gwy,
678			    &kif->pfik_ext_gwy, key);
679			if (s == NULL)
680				continue;
681			if (more == NULL)
682				return (s);
683			ss = s;
684			(*more)++;
685		}
686		return (ss);
687	default:
688		panic("pf_find_state_all");
689	}
690}
691
692void
693pf_init_threshold(struct pf_threshold *threshold,
694    u_int32_t limit, u_int32_t seconds)
695{
696	threshold->limit = limit * PF_THRESHOLD_MULT;
697	threshold->seconds = seconds;
698	threshold->count = 0;
699	threshold->last = time_second;
700}
701
702void
703pf_add_threshold(struct pf_threshold *threshold)
704{
705	u_int32_t t = time_second, diff = t - threshold->last;
706
707	if (diff >= threshold->seconds)
708		threshold->count = 0;
709	else
710		threshold->count -= threshold->count * diff /
711		    threshold->seconds;
712	threshold->count += PF_THRESHOLD_MULT;
713	threshold->last = t;
714}
715
716int
717pf_check_threshold(struct pf_threshold *threshold)
718{
719	return (threshold->count > threshold->limit);
720}
721
722int
723pf_src_connlimit(struct pf_state **state)
724{
725	struct pf_state	*s;
726	int bad = 0;
727
728	(*state)->src_node->conn++;
729	pf_add_threshold(&(*state)->src_node->conn_rate);
730
731	if ((*state)->rule.ptr->max_src_conn &&
732	    (*state)->rule.ptr->max_src_conn <
733	    (*state)->src_node->conn) {
734		pf_status.lcounters[LCNT_SRCCONN]++;
735		bad++;
736	}
737
738	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
739	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
740		pf_status.lcounters[LCNT_SRCCONNRATE]++;
741		bad++;
742	}
743
744	if (!bad)
745		return (0);
746
747	if ((*state)->rule.ptr->overload_tbl) {
748		struct pfr_addr p;
749		u_int32_t	killed = 0;
750
751		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
752		if (pf_status.debug >= PF_DEBUG_MISC) {
753			printf("pf_src_connlimit: blocking address ");
754			pf_print_host(&(*state)->src_node->addr, 0,
755			    (*state)->af);
756		}
757
758		bzero(&p, sizeof(p));
759		p.pfra_af = (*state)->af;
760		switch ((*state)->af) {
761#ifdef INET
762		case AF_INET:
763			p.pfra_net = 32;
764			p.pfra_ip4addr = (*state)->src_node->addr.v4;
765			break;
766#endif /* INET */
767#ifdef INET6
768		case AF_INET6:
769			p.pfra_net = 128;
770			p.pfra_ip6addr = (*state)->src_node->addr.v6;
771			break;
772#endif /* INET6 */
773		}
774
775		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
776		    &p, time_second);
777
778		/* kill existing states if that's required. */
779		if ((*state)->rule.ptr->flush) {
780			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
781
782			RB_FOREACH(s, pf_state_tree_id, &tree_id) {
783				/*
784				 * Kill states from this source.  (Only those
785				 * from the same rule if PF_FLUSH_GLOBAL is not
786				 * set)
787				 */
788				if (s->af == (*state)->af &&
789				    (((*state)->direction == PF_OUT &&
790				    PF_AEQ(&(*state)->src_node->addr,
791				    &s->lan.addr, s->af)) ||
792				    ((*state)->direction == PF_IN &&
793				    PF_AEQ(&(*state)->src_node->addr,
794				    &s->ext.addr, s->af))) &&
795				    ((*state)->rule.ptr->flush &
796				    PF_FLUSH_GLOBAL ||
797				    (*state)->rule.ptr == s->rule.ptr)) {
798					s->timeout = PFTM_PURGE;
799					s->src.state = s->dst.state =
800					    TCPS_CLOSED;
801					killed++;
802				}
803			}
804			if (pf_status.debug >= PF_DEBUG_MISC)
805				printf(", %u states killed", killed);
806		}
807		if (pf_status.debug >= PF_DEBUG_MISC)
808			printf("\n");
809	}
810
811	/* kill this state */
812	(*state)->timeout = PFTM_PURGE;
813	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
814	return (1);
815}
816
817int
818pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
819    struct pf_addr *src, sa_family_t af)
820{
821	struct pf_src_node	k;
822
823	if (*sn == NULL) {
824		k.af = af;
825		PF_ACPY(&k.addr, src, af);
826		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
827		    rule->rpool.opts & PF_POOL_STICKYADDR)
828			k.rule.ptr = rule;
829		else
830			k.rule.ptr = NULL;
831		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
832		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
833	}
834	if (*sn == NULL) {
835		if (!rule->max_src_nodes ||
836		    rule->src_nodes < rule->max_src_nodes)
837			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
838		else
839			pf_status.lcounters[LCNT_SRCNODES]++;
840		if ((*sn) == NULL)
841			return (-1);
842		bzero(*sn, sizeof(struct pf_src_node));
843
844		pf_init_threshold(&(*sn)->conn_rate,
845		    rule->max_src_conn_rate.limit,
846		    rule->max_src_conn_rate.seconds);
847
848		(*sn)->af = af;
849		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
850		    rule->rpool.opts & PF_POOL_STICKYADDR)
851			(*sn)->rule.ptr = rule;
852		else
853			(*sn)->rule.ptr = NULL;
854		PF_ACPY(&(*sn)->addr, src, af);
855		if (RB_INSERT(pf_src_tree,
856		    &tree_src_tracking, *sn) != NULL) {
857			if (pf_status.debug >= PF_DEBUG_MISC) {
858				printf("pf: src_tree insert failed: ");
859				pf_print_host(&(*sn)->addr, 0, af);
860				printf("\n");
861			}
862			pool_put(&pf_src_tree_pl, *sn);
863			return (-1);
864		}
865		(*sn)->creation = time_second;
866		(*sn)->ruletype = rule->action;
867		if ((*sn)->rule.ptr != NULL)
868			(*sn)->rule.ptr->src_nodes++;
869		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
870		pf_status.src_nodes++;
871	} else {
872		if (rule->max_src_states &&
873		    (*sn)->states >= rule->max_src_states) {
874			pf_status.lcounters[LCNT_SRCSTATES]++;
875			return (-1);
876		}
877	}
878	return (0);
879}
880
881int
882pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
883{
884	/* Thou MUST NOT insert multiple duplicate keys */
885	state->u.s.kif = kif;
886	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
887		if (pf_status.debug >= PF_DEBUG_MISC) {
888			printf("pf: state insert failed: tree_lan_ext");
889			printf(" lan: ");
890			pf_print_host(&state->lan.addr, state->lan.port,
891			    state->af);
892			printf(" gwy: ");
893			pf_print_host(&state->gwy.addr, state->gwy.port,
894			    state->af);
895			printf(" ext: ");
896			pf_print_host(&state->ext.addr, state->ext.port,
897			    state->af);
898			if (state->sync_flags & PFSTATE_FROMSYNC)
899				printf(" (from sync)");
900			printf("\n");
901		}
902		return (-1);
903	}
904
905	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
906		if (pf_status.debug >= PF_DEBUG_MISC) {
907			printf("pf: state insert failed: tree_ext_gwy");
908			printf(" lan: ");
909			pf_print_host(&state->lan.addr, state->lan.port,
910			    state->af);
911			printf(" gwy: ");
912			pf_print_host(&state->gwy.addr, state->gwy.port,
913			    state->af);
914			printf(" ext: ");
915			pf_print_host(&state->ext.addr, state->ext.port,
916			    state->af);
917			if (state->sync_flags & PFSTATE_FROMSYNC)
918				printf(" (from sync)");
919			printf("\n");
920		}
921		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
922		return (-1);
923	}
924
925	if (state->id == 0 && state->creatorid == 0) {
926		state->id = htobe64(pf_status.stateid++);
927		state->creatorid = pf_status.hostid;
928	}
929	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
930		if (pf_status.debug >= PF_DEBUG_MISC) {
931#ifdef __FreeBSD__
932			printf("pf: state insert failed: "
933			    "id: %016llx creatorid: %08x",
934			    (long long)be64toh(state->id),
935			    ntohl(state->creatorid));
936#else
937			printf("pf: state insert failed: "
938			    "id: %016llx creatorid: %08x",
939			    betoh64(state->id), ntohl(state->creatorid));
940#endif
941			if (state->sync_flags & PFSTATE_FROMSYNC)
942				printf(" (from sync)");
943			printf("\n");
944		}
945		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
946		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
947		return (-1);
948	}
949	TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
950
951	pf_status.fcounters[FCNT_STATE_INSERT]++;
952	pf_status.states++;
953	pfi_attach_state(kif);
954#if NPFSYNC
955	pfsync_insert_state(state);
956#endif
957	return (0);
958}
959
960void
961pf_purge_timeout(void *arg)
962{
963#ifdef __FreeBSD__
964	struct callout  *to = arg;
965#else
966	struct timeout	*to = arg;
967#endif
968	int		 s;
969
970#ifdef __FreeBSD__
971	PF_LOCK();
972#endif
973	s = splsoftnet();
974	pf_purge_expired_states();
975	pf_purge_expired_fragments();
976	pf_purge_expired_src_nodes();
977	splx(s);
978#ifdef __FreeBSD__
979	PF_UNLOCK();
980#endif
981
982#ifdef __FreeBSD__
983	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
984	    pf_purge_timeout, to);
985#else
986	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
987#endif
988}
989
990u_int32_t
991pf_state_expires(const struct pf_state *state)
992{
993	u_int32_t	timeout;
994	u_int32_t	start;
995	u_int32_t	end;
996	u_int32_t	states;
997
998	/* handle all PFTM_* > PFTM_MAX here */
999	if (state->timeout == PFTM_PURGE)
1000		return (time_second);
1001	if (state->timeout == PFTM_UNTIL_PACKET)
1002		return (0);
1003#ifdef __FreeBSD__
1004	KASSERT((state->timeout < PFTM_MAX),
1005	    ("pf_state_expires: timeout > PFTM_MAX"));
1006#else
1007	KASSERT(state->timeout < PFTM_MAX);
1008#endif
1009	timeout = state->rule.ptr->timeout[state->timeout];
1010	if (!timeout)
1011		timeout = pf_default_rule.timeout[state->timeout];
1012	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1013	if (start) {
1014		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1015		states = state->rule.ptr->states;
1016	} else {
1017		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1018		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1019		states = pf_status.states;
1020	}
1021	if (end && states > start && start < end) {
1022		if (states < end)
1023			return (state->expire + timeout * (end - states) /
1024			    (end - start));
1025		else
1026			return (time_second);
1027	}
1028	return (state->expire + timeout);
1029}
1030
1031void
1032pf_purge_expired_src_nodes(void)
1033{
1034	 struct pf_src_node		*cur, *next;
1035
1036	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1037		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1038
1039		 if (cur->states <= 0 && cur->expire <= time_second) {
1040			 if (cur->rule.ptr != NULL) {
1041				 cur->rule.ptr->src_nodes--;
1042				 if (cur->rule.ptr->states <= 0 &&
1043				     cur->rule.ptr->max_src_nodes <= 0)
1044					 pf_rm_rule(NULL, cur->rule.ptr);
1045			 }
1046			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1047			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1048			 pf_status.src_nodes--;
1049			 pool_put(&pf_src_tree_pl, cur);
1050		 }
1051	 }
1052}
1053
1054void
1055pf_src_tree_remove_state(struct pf_state *s)
1056{
1057	u_int32_t timeout;
1058
1059	if (s->src_node != NULL) {
1060		if (s->proto == IPPROTO_TCP) {
1061			if (s->src.state == PF_TCPS_PROXY_DST ||
1062			    s->timeout >= PFTM_TCP_ESTABLISHED)
1063				--s->src_node->conn;
1064		}
1065		if (--s->src_node->states <= 0) {
1066			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1067			if (!timeout)
1068				timeout =
1069				    pf_default_rule.timeout[PFTM_SRC_NODE];
1070			s->src_node->expire = time_second + timeout;
1071		}
1072	}
1073	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1074		if (--s->nat_src_node->states <= 0) {
1075			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1076			if (!timeout)
1077				timeout =
1078				    pf_default_rule.timeout[PFTM_SRC_NODE];
1079			s->nat_src_node->expire = time_second + timeout;
1080		}
1081	}
1082	s->src_node = s->nat_src_node = NULL;
1083}
1084
1085void
1086pf_purge_expired_state(struct pf_state *cur)
1087{
1088	if (cur->src.state == PF_TCPS_PROXY_DST)
1089		pf_send_tcp(cur->rule.ptr, cur->af,
1090		    &cur->ext.addr, &cur->lan.addr,
1091		    cur->ext.port, cur->lan.port,
1092		    cur->src.seqhi, cur->src.seqlo + 1,
1093		    TH_RST|TH_ACK, 0, 0, 0, 1, NULL, NULL);
1094	RB_REMOVE(pf_state_tree_ext_gwy,
1095	    &cur->u.s.kif->pfik_ext_gwy, cur);
1096	RB_REMOVE(pf_state_tree_lan_ext,
1097	    &cur->u.s.kif->pfik_lan_ext, cur);
1098	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1099#if NPFSYNC
1100	pfsync_delete_state(cur);
1101#endif
1102	pf_src_tree_remove_state(cur);
1103	if (--cur->rule.ptr->states <= 0 &&
1104	    cur->rule.ptr->src_nodes <= 0)
1105		pf_rm_rule(NULL, cur->rule.ptr);
1106	if (cur->nat_rule.ptr != NULL)
1107		if (--cur->nat_rule.ptr->states <= 0 &&
1108			cur->nat_rule.ptr->src_nodes <= 0)
1109			pf_rm_rule(NULL, cur->nat_rule.ptr);
1110	if (cur->anchor.ptr != NULL)
1111		if (--cur->anchor.ptr->states <= 0)
1112			pf_rm_rule(NULL, cur->anchor.ptr);
1113	pf_normalize_tcp_cleanup(cur);
1114	pfi_detach_state(cur->u.s.kif);
1115	TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
1116	if (cur->tag)
1117		pf_tag_unref(cur->tag);
1118	pool_put(&pf_state_pl, cur);
1119	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1120	pf_status.states--;
1121}
1122
1123void
1124pf_purge_expired_states(void)
1125{
1126	struct pf_state		*cur, *next;
1127
1128	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
1129	    cur; cur = next) {
1130		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
1131		if (pf_state_expires(cur) <= time_second)
1132			pf_purge_expired_state(cur);
1133	}
1134}
1135
1136int
1137pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1138{
1139	if (aw->type != PF_ADDR_TABLE)
1140		return (0);
1141	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1142		return (1);
1143	return (0);
1144}
1145
1146void
1147pf_tbladdr_remove(struct pf_addr_wrap *aw)
1148{
1149	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1150		return;
1151	pfr_detach_table(aw->p.tbl);
1152	aw->p.tbl = NULL;
1153}
1154
1155void
1156pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1157{
1158	struct pfr_ktable *kt = aw->p.tbl;
1159
1160	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1161		return;
1162	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1163		kt = kt->pfrkt_root;
1164	aw->p.tbl = NULL;
1165	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1166		kt->pfrkt_cnt : -1;
1167}
1168
1169void
1170pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1171{
1172	switch (af) {
1173#ifdef INET
1174	case AF_INET: {
1175		u_int32_t a = ntohl(addr->addr32[0]);
1176		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1177		    (a>>8)&255, a&255);
1178		if (p) {
1179			p = ntohs(p);
1180			printf(":%u", p);
1181		}
1182		break;
1183	}
1184#endif /* INET */
1185#ifdef INET6
1186	case AF_INET6: {
1187		u_int16_t b;
1188		u_int8_t i, curstart = 255, curend = 0,
1189		    maxstart = 0, maxend = 0;
1190		for (i = 0; i < 8; i++) {
1191			if (!addr->addr16[i]) {
1192				if (curstart == 255)
1193					curstart = i;
1194				else
1195					curend = i;
1196			} else {
1197				if (curstart) {
1198					if ((curend - curstart) >
1199					    (maxend - maxstart)) {
1200						maxstart = curstart;
1201						maxend = curend;
1202						curstart = 255;
1203					}
1204				}
1205			}
1206		}
1207		for (i = 0; i < 8; i++) {
1208			if (i >= maxstart && i <= maxend) {
1209				if (maxend != 7) {
1210					if (i == maxstart)
1211						printf(":");
1212				} else {
1213					if (i == maxend)
1214						printf(":");
1215				}
1216			} else {
1217				b = ntohs(addr->addr16[i]);
1218				printf("%x", b);
1219				if (i < 7)
1220					printf(":");
1221			}
1222		}
1223		if (p) {
1224			p = ntohs(p);
1225			printf("[%u]", p);
1226		}
1227		break;
1228	}
1229#endif /* INET6 */
1230	}
1231}
1232
1233void
1234pf_print_state(struct pf_state *s)
1235{
1236	switch (s->proto) {
1237	case IPPROTO_TCP:
1238		printf("TCP ");
1239		break;
1240	case IPPROTO_UDP:
1241		printf("UDP ");
1242		break;
1243	case IPPROTO_ICMP:
1244		printf("ICMP ");
1245		break;
1246	case IPPROTO_ICMPV6:
1247		printf("ICMPV6 ");
1248		break;
1249	default:
1250		printf("%u ", s->proto);
1251		break;
1252	}
1253	pf_print_host(&s->lan.addr, s->lan.port, s->af);
1254	printf(" ");
1255	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1256	printf(" ");
1257	pf_print_host(&s->ext.addr, s->ext.port, s->af);
1258	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1259	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1260	if (s->src.wscale && s->dst.wscale)
1261		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1262	printf("]");
1263	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1264	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1265	if (s->src.wscale && s->dst.wscale)
1266		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1267	printf("]");
1268	printf(" %u:%u", s->src.state, s->dst.state);
1269}
1270
1271void
1272pf_print_flags(u_int8_t f)
1273{
1274	if (f)
1275		printf(" ");
1276	if (f & TH_FIN)
1277		printf("F");
1278	if (f & TH_SYN)
1279		printf("S");
1280	if (f & TH_RST)
1281		printf("R");
1282	if (f & TH_PUSH)
1283		printf("P");
1284	if (f & TH_ACK)
1285		printf("A");
1286	if (f & TH_URG)
1287		printf("U");
1288	if (f & TH_ECE)
1289		printf("E");
1290	if (f & TH_CWR)
1291		printf("W");
1292}
1293
1294#define	PF_SET_SKIP_STEPS(i)					\
1295	do {							\
1296		while (head[i] != cur) {			\
1297			head[i]->skip[i].ptr = cur;		\
1298			head[i] = TAILQ_NEXT(head[i], entries);	\
1299		}						\
1300	} while (0)
1301
1302void
1303pf_calc_skip_steps(struct pf_rulequeue *rules)
1304{
1305	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1306	int i;
1307
1308	cur = TAILQ_FIRST(rules);
1309	prev = cur;
1310	for (i = 0; i < PF_SKIP_COUNT; ++i)
1311		head[i] = cur;
1312	while (cur != NULL) {
1313
1314		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1315			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1316		if (cur->direction != prev->direction)
1317			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1318		if (cur->af != prev->af)
1319			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1320		if (cur->proto != prev->proto)
1321			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1322		if (cur->src.neg != prev->src.neg ||
1323		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1324			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1325		if (cur->src.port[0] != prev->src.port[0] ||
1326		    cur->src.port[1] != prev->src.port[1] ||
1327		    cur->src.port_op != prev->src.port_op)
1328			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1329		if (cur->dst.neg != prev->dst.neg ||
1330		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1331			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1332		if (cur->dst.port[0] != prev->dst.port[0] ||
1333		    cur->dst.port[1] != prev->dst.port[1] ||
1334		    cur->dst.port_op != prev->dst.port_op)
1335			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1336
1337		prev = cur;
1338		cur = TAILQ_NEXT(cur, entries);
1339	}
1340	for (i = 0; i < PF_SKIP_COUNT; ++i)
1341		PF_SET_SKIP_STEPS(i);
1342}
1343
1344int
1345pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1346{
1347	if (aw1->type != aw2->type)
1348		return (1);
1349	switch (aw1->type) {
1350	case PF_ADDR_ADDRMASK:
1351		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1352			return (1);
1353		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1354			return (1);
1355		return (0);
1356	case PF_ADDR_DYNIFTL:
1357		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1358	case PF_ADDR_NOROUTE:
1359		return (0);
1360	case PF_ADDR_TABLE:
1361		return (aw1->p.tbl != aw2->p.tbl);
1362	default:
1363		printf("invalid address type: %d\n", aw1->type);
1364		return (1);
1365	}
1366}
1367
1368u_int16_t
1369pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1370{
1371	u_int32_t	l;
1372
1373	if (udp && !cksum)
1374		return (0x0000);
1375	l = cksum + old - new;
1376	l = (l >> 16) + (l & 65535);
1377	l = l & 65535;
1378	if (udp && !l)
1379		return (0xFFFF);
1380	return (l);
1381}
1382
1383void
1384pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1385    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1386{
1387	struct pf_addr	ao;
1388	u_int16_t	po = *p;
1389
1390	PF_ACPY(&ao, a, af);
1391	PF_ACPY(a, an, af);
1392
1393	*p = pn;
1394
1395	switch (af) {
1396#ifdef INET
1397	case AF_INET:
1398		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1399		    ao.addr16[0], an->addr16[0], 0),
1400		    ao.addr16[1], an->addr16[1], 0);
1401		*p = pn;
1402		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1403		    ao.addr16[0], an->addr16[0], u),
1404		    ao.addr16[1], an->addr16[1], u),
1405		    po, pn, u);
1406		break;
1407#endif /* INET */
1408#ifdef INET6
1409	case AF_INET6:
1410		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1411		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1412		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1413		    ao.addr16[0], an->addr16[0], u),
1414		    ao.addr16[1], an->addr16[1], u),
1415		    ao.addr16[2], an->addr16[2], u),
1416		    ao.addr16[3], an->addr16[3], u),
1417		    ao.addr16[4], an->addr16[4], u),
1418		    ao.addr16[5], an->addr16[5], u),
1419		    ao.addr16[6], an->addr16[6], u),
1420		    ao.addr16[7], an->addr16[7], u),
1421		    po, pn, u);
1422		break;
1423#endif /* INET6 */
1424	}
1425}
1426
1427
1428/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1429void
1430pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1431{
1432	u_int32_t	ao;
1433
1434	memcpy(&ao, a, sizeof(ao));
1435	memcpy(a, &an, sizeof(u_int32_t));
1436	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1437	    ao % 65536, an % 65536, u);
1438}
1439
1440#ifdef INET6
1441void
1442pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1443{
1444	struct pf_addr	ao;
1445
1446	PF_ACPY(&ao, a, AF_INET6);
1447	PF_ACPY(a, an, AF_INET6);
1448
1449	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1450	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1451	    pf_cksum_fixup(pf_cksum_fixup(*c,
1452	    ao.addr16[0], an->addr16[0], u),
1453	    ao.addr16[1], an->addr16[1], u),
1454	    ao.addr16[2], an->addr16[2], u),
1455	    ao.addr16[3], an->addr16[3], u),
1456	    ao.addr16[4], an->addr16[4], u),
1457	    ao.addr16[5], an->addr16[5], u),
1458	    ao.addr16[6], an->addr16[6], u),
1459	    ao.addr16[7], an->addr16[7], u);
1460}
1461#endif /* INET6 */
1462
1463void
1464pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1465    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1466    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1467{
1468	struct pf_addr	oia, ooa;
1469
1470	PF_ACPY(&oia, ia, af);
1471	PF_ACPY(&ooa, oa, af);
1472
1473	/* Change inner protocol port, fix inner protocol checksum. */
1474	if (ip != NULL) {
1475		u_int16_t	oip = *ip;
1476		u_int32_t	opc = 0;	/* make the compiler happy */
1477
1478		if (pc != NULL)
1479			opc = *pc;
1480		*ip = np;
1481		if (pc != NULL)
1482			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1483		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1484		if (pc != NULL)
1485			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1486	}
1487	/* Change inner ip address, fix inner ip and icmp checksums. */
1488	PF_ACPY(ia, na, af);
1489	switch (af) {
1490#ifdef INET
1491	case AF_INET: {
1492		u_int32_t	 oh2c = *h2c;
1493
1494		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1495		    oia.addr16[0], ia->addr16[0], 0),
1496		    oia.addr16[1], ia->addr16[1], 0);
1497		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1498		    oia.addr16[0], ia->addr16[0], 0),
1499		    oia.addr16[1], ia->addr16[1], 0);
1500		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1501		break;
1502	}
1503#endif /* INET */
1504#ifdef INET6
1505	case AF_INET6:
1506		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1507		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1508		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1509		    oia.addr16[0], ia->addr16[0], u),
1510		    oia.addr16[1], ia->addr16[1], u),
1511		    oia.addr16[2], ia->addr16[2], u),
1512		    oia.addr16[3], ia->addr16[3], u),
1513		    oia.addr16[4], ia->addr16[4], u),
1514		    oia.addr16[5], ia->addr16[5], u),
1515		    oia.addr16[6], ia->addr16[6], u),
1516		    oia.addr16[7], ia->addr16[7], u);
1517		break;
1518#endif /* INET6 */
1519	}
1520	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1521	PF_ACPY(oa, na, af);
1522	switch (af) {
1523#ifdef INET
1524	case AF_INET:
1525		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1526		    ooa.addr16[0], oa->addr16[0], 0),
1527		    ooa.addr16[1], oa->addr16[1], 0);
1528		break;
1529#endif /* INET */
1530#ifdef INET6
1531	case AF_INET6:
1532		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1533		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1534		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1535		    ooa.addr16[0], oa->addr16[0], u),
1536		    ooa.addr16[1], oa->addr16[1], u),
1537		    ooa.addr16[2], oa->addr16[2], u),
1538		    ooa.addr16[3], oa->addr16[3], u),
1539		    ooa.addr16[4], oa->addr16[4], u),
1540		    ooa.addr16[5], oa->addr16[5], u),
1541		    ooa.addr16[6], oa->addr16[6], u),
1542		    ooa.addr16[7], oa->addr16[7], u);
1543		break;
1544#endif /* INET6 */
1545	}
1546}
1547
1548void
1549pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1550    const struct pf_addr *saddr, const struct pf_addr *daddr,
1551    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1552    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1553    struct ether_header *eh, struct ifnet *ifp)
1554{
1555	struct mbuf	*m;
1556	int		 len = 0, tlen;		/* make the compiler happy */
1557#ifdef INET
1558	struct ip	*h = NULL;		/* make the compiler happy */
1559#endif /* INET */
1560#ifdef INET6
1561	struct ip6_hdr	*h6 = NULL;		/* make the compiler happy */
1562#endif /* INET6 */
1563	struct tcphdr	*th = NULL;		/* make the compiler happy */
1564	char *opt;
1565
1566	/* maximum segment size tcp option */
1567	tlen = sizeof(struct tcphdr);
1568	if (mss)
1569		tlen += 4;
1570
1571	switch (af) {
1572#ifdef INET
1573	case AF_INET:
1574		len = sizeof(struct ip) + tlen;
1575		break;
1576#endif /* INET */
1577#ifdef INET6
1578	case AF_INET6:
1579		len = sizeof(struct ip6_hdr) + tlen;
1580		break;
1581#endif /* INET6 */
1582	}
1583
1584	/* create outgoing mbuf */
1585	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1586	if (m == NULL)
1587		return;
1588	if (tag) {
1589#ifdef __FreeBSD__
1590		m->m_flags |= M_SKIP_FIREWALL;
1591#else
1592		struct m_tag	*mtag;
1593
1594		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1595		if (mtag == NULL) {
1596			m_freem(m);
1597			return;
1598		}
1599		m_tag_prepend(m, mtag);
1600#endif
1601	}
1602#ifdef ALTQ
1603	if (r != NULL && r->qid) {
1604		struct m_tag	*mtag;
1605		struct altq_tag *atag;
1606
1607		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1608		if (mtag != NULL) {
1609			atag = (struct altq_tag *)(mtag + 1);
1610			atag->qid = r->qid;
1611			/* add hints for ecn */
1612			atag->af = af;
1613			atag->hdr = mtod(m, struct ip *);
1614			m_tag_prepend(m, mtag);
1615		}
1616	}
1617#endif /* ALTQ */
1618	m->m_data += max_linkhdr;
1619	m->m_pkthdr.len = m->m_len = len;
1620	m->m_pkthdr.rcvif = NULL;
1621	bzero(m->m_data, len);
1622	switch (af) {
1623#ifdef INET
1624	case AF_INET:
1625		h = mtod(m, struct ip *);
1626
1627		/* IP header fields included in the TCP checksum */
1628		h->ip_p = IPPROTO_TCP;
1629		h->ip_len = htons(tlen);
1630		h->ip_src.s_addr = saddr->v4.s_addr;
1631		h->ip_dst.s_addr = daddr->v4.s_addr;
1632
1633		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1634		break;
1635#endif /* INET */
1636#ifdef INET6
1637	case AF_INET6:
1638		h6 = mtod(m, struct ip6_hdr *);
1639
1640		/* IP header fields included in the TCP checksum */
1641		h6->ip6_nxt = IPPROTO_TCP;
1642		h6->ip6_plen = htons(tlen);
1643		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1644		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1645
1646		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1647		break;
1648#endif /* INET6 */
1649	}
1650
1651	/* TCP header */
1652	th->th_sport = sport;
1653	th->th_dport = dport;
1654	th->th_seq = htonl(seq);
1655	th->th_ack = htonl(ack);
1656	th->th_off = tlen >> 2;
1657	th->th_flags = flags;
1658	th->th_win = htons(win);
1659
1660	if (mss) {
1661		opt = (char *)(th + 1);
1662		opt[0] = TCPOPT_MAXSEG;
1663		opt[1] = 4;
1664		HTONS(mss);
1665		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1666	}
1667
1668	switch (af) {
1669#ifdef INET
1670	case AF_INET:
1671		/* TCP checksum */
1672		th->th_sum = in_cksum(m, len);
1673
1674		/* Finish the IP header */
1675		h->ip_v = 4;
1676		h->ip_hl = sizeof(*h) >> 2;
1677		h->ip_tos = IPTOS_LOWDELAY;
1678#ifdef __FreeBSD__
1679		h->ip_off = path_mtu_discovery ? IP_DF : 0;
1680		h->ip_len = len;
1681#else
1682		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1683		h->ip_len = htons(len);
1684#endif
1685		h->ip_ttl = ttl ? ttl : ip_defttl;
1686		h->ip_sum = 0;
1687		if (eh == NULL) {
1688#ifdef __FreeBSD__
1689			PF_UNLOCK();
1690			ip_output(m, (void *)NULL, (void *)NULL, 0,
1691			    (void *)NULL, (void *)NULL);
1692			PF_LOCK();
1693#else /* ! __FreeBSD__ */
1694			ip_output(m, (void *)NULL, (void *)NULL, 0,
1695			    (void *)NULL, (void *)NULL);
1696#endif
1697		} else {
1698			struct route		 ro;
1699			struct rtentry		 rt;
1700			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
1701
1702			if (ifp == NULL) {
1703				m_freem(m);
1704				return;
1705			}
1706			rt.rt_ifp = ifp;
1707			ro.ro_rt = &rt;
1708			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1709			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1710			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1711			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1712			e->ether_type = eh->ether_type;
1713#ifdef __FreeBSD__
1714			PF_UNLOCK();
1715			/* XXX_IMPORT: later */
1716			ip_output(m, (void *)NULL, &ro, 0,
1717			    (void *)NULL, (void *)NULL);
1718			PF_LOCK();
1719#else /* ! __FreeBSD__ */
1720			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
1721			    (void *)NULL, (void *)NULL);
1722#endif
1723		}
1724		break;
1725#endif /* INET */
1726#ifdef INET6
1727	case AF_INET6:
1728		/* TCP checksum */
1729		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1730		    sizeof(struct ip6_hdr), tlen);
1731
1732		h6->ip6_vfc |= IPV6_VERSION;
1733		h6->ip6_hlim = IPV6_DEFHLIM;
1734
1735#ifdef __FreeBSD__
1736		PF_UNLOCK();
1737		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1738		PF_LOCK();
1739#else
1740		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1741#endif
1742		break;
1743#endif /* INET6 */
1744	}
1745}
1746
1747void
1748pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1749    struct pf_rule *r)
1750{
1751#ifdef ALTQ
1752	struct m_tag	*mtag;
1753#endif
1754	struct mbuf	*m0;
1755#ifdef __FreeBSD__
1756	struct ip *ip;
1757#endif
1758
1759#ifdef __FreeBSD__
1760	m0 = m_copypacket(m, M_DONTWAIT);
1761	if (m0 == NULL)
1762		return;
1763	m0->m_flags |= M_SKIP_FIREWALL;
1764#else
1765	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1766	if (mtag == NULL)
1767		return;
1768	m0 = m_copy(m, 0, M_COPYALL);
1769	if (m0 == NULL) {
1770		m_tag_free(mtag);
1771		return;
1772	}
1773	m_tag_prepend(m0, mtag);
1774#endif
1775
1776#ifdef ALTQ
1777	if (r->qid) {
1778		struct altq_tag *atag;
1779
1780		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1781		if (mtag != NULL) {
1782			atag = (struct altq_tag *)(mtag + 1);
1783			atag->qid = r->qid;
1784			/* add hints for ecn */
1785			atag->af = af;
1786			atag->hdr = mtod(m0, struct ip *);
1787			m_tag_prepend(m0, mtag);
1788		}
1789	}
1790#endif /* ALTQ */
1791
1792	switch (af) {
1793#ifdef INET
1794	case AF_INET:
1795#ifdef __FreeBSD__
1796		/* icmp_error() expects host byte ordering */
1797		ip = mtod(m0, struct ip *);
1798		NTOHS(ip->ip_len);
1799		NTOHS(ip->ip_off);
1800		PF_UNLOCK();
1801		icmp_error(m0, type, code, 0, 0);
1802		PF_LOCK();
1803#endif
1804		break;
1805#endif /* INET */
1806#ifdef INET6
1807	case AF_INET6:
1808#ifdef __FreeBSD__
1809		PF_UNLOCK();
1810#endif
1811		icmp6_error(m0, type, code, 0);
1812#ifdef __FreeBSD__
1813		PF_LOCK();
1814#endif
1815		break;
1816#endif /* INET6 */
1817	}
1818}
1819
1820/*
1821 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1822 * If n is 0, they match if they are equal. If n is != 0, they match if they
1823 * are different.
1824 */
1825int
1826pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1827    struct pf_addr *b, sa_family_t af)
1828{
1829	int	match = 0;
1830
1831	switch (af) {
1832#ifdef INET
1833	case AF_INET:
1834		if ((a->addr32[0] & m->addr32[0]) ==
1835		    (b->addr32[0] & m->addr32[0]))
1836			match++;
1837		break;
1838#endif /* INET */
1839#ifdef INET6
1840	case AF_INET6:
1841		if (((a->addr32[0] & m->addr32[0]) ==
1842		     (b->addr32[0] & m->addr32[0])) &&
1843		    ((a->addr32[1] & m->addr32[1]) ==
1844		     (b->addr32[1] & m->addr32[1])) &&
1845		    ((a->addr32[2] & m->addr32[2]) ==
1846		     (b->addr32[2] & m->addr32[2])) &&
1847		    ((a->addr32[3] & m->addr32[3]) ==
1848		     (b->addr32[3] & m->addr32[3])))
1849			match++;
1850		break;
1851#endif /* INET6 */
1852	}
1853	if (match) {
1854		if (n)
1855			return (0);
1856		else
1857			return (1);
1858	} else {
1859		if (n)
1860			return (1);
1861		else
1862			return (0);
1863	}
1864}
1865
1866int
1867pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1868{
1869	switch (op) {
1870	case PF_OP_IRG:
1871		return ((p > a1) && (p < a2));
1872	case PF_OP_XRG:
1873		return ((p < a1) || (p > a2));
1874	case PF_OP_RRG:
1875		return ((p >= a1) && (p <= a2));
1876	case PF_OP_EQ:
1877		return (p == a1);
1878	case PF_OP_NE:
1879		return (p != a1);
1880	case PF_OP_LT:
1881		return (p < a1);
1882	case PF_OP_LE:
1883		return (p <= a1);
1884	case PF_OP_GT:
1885		return (p > a1);
1886	case PF_OP_GE:
1887		return (p >= a1);
1888	}
1889	return (0); /* never reached */
1890}
1891
1892int
1893pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1894{
1895	NTOHS(a1);
1896	NTOHS(a2);
1897	NTOHS(p);
1898	return (pf_match(op, a1, a2, p));
1899}
1900
1901int
1902pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1903{
1904	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1905		return (0);
1906	return (pf_match(op, a1, a2, u));
1907}
1908
1909int
1910pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1911{
1912	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1913		return (0);
1914	return (pf_match(op, a1, a2, g));
1915}
1916
1917struct pf_tag *
1918pf_get_tag(struct mbuf *m)
1919{
1920	struct m_tag	*mtag;
1921
1922	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1923		return ((struct pf_tag *)(mtag + 1));
1924	else
1925		return (NULL);
1926}
1927
1928int
1929pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_tag **pftag, int *tag)
1930{
1931	if (*tag == -1) {	/* find mbuf tag */
1932		*pftag = pf_get_tag(m);
1933		if (*pftag != NULL)
1934			*tag = (*pftag)->tag;
1935		else
1936			*tag = 0;
1937	}
1938
1939	return ((!r->match_tag_not && r->match_tag == *tag) ||
1940	    (r->match_tag_not && r->match_tag != *tag));
1941}
1942
1943int
1944pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1945{
1946	struct m_tag	*mtag;
1947
1948	if (tag <= 0)
1949		return (0);
1950
1951	if (pftag == NULL) {
1952		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1953		if (mtag == NULL)
1954			return (1);
1955		((struct pf_tag *)(mtag + 1))->tag = tag;
1956		m_tag_prepend(m, mtag);
1957	} else
1958		pftag->tag = tag;
1959
1960	return (0);
1961}
1962
1963static void
1964pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
1965    struct pf_rule **r, struct pf_rule **a)
1966{
1967	struct pf_anchor_stackframe	*f;
1968
1969	if (*depth >= sizeof(pf_anchor_stack) /
1970	    sizeof(pf_anchor_stack[0])) {
1971		printf("pf_step_into_anchor: stack overflow\n");
1972		*r = TAILQ_NEXT(*r, entries);
1973		return;
1974	} else if (*depth == 0 && a != NULL)
1975		*a = *r;
1976	f = pf_anchor_stack + (*depth)++;
1977	f->rs = *rs;
1978	f->r = *r;
1979	if ((*r)->anchor_wildcard) {
1980		f->parent = &(*r)->anchor->children;
1981		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
1982		    NULL) {
1983			*r = NULL;
1984			return;
1985		}
1986		*rs = &f->child->ruleset;
1987	} else {
1988		f->parent = NULL;
1989		f->child = NULL;
1990		*rs = &(*r)->anchor->ruleset;
1991	}
1992	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
1993}
1994
1995static void
1996pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
1997    struct pf_rule **r, struct pf_rule **a)
1998{
1999	struct pf_anchor_stackframe	*f;
2000
2001	do {
2002		if (*depth <= 0)
2003			break;
2004		f = pf_anchor_stack + *depth - 1;
2005		if (f->parent != NULL && f->child != NULL) {
2006			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2007			if (f->child != NULL) {
2008				*rs = &f->child->ruleset;
2009				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2010				if (*r == NULL)
2011					continue;
2012				else
2013					break;
2014			}
2015		}
2016		(*depth)--;
2017		if (*depth == 0 && a != NULL)
2018			*a = NULL;
2019		*rs = f->rs;
2020		*r = TAILQ_NEXT(f->r, entries);
2021	} while (*r == NULL);
2022}
2023
2024#ifdef INET6
2025void
2026pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2027    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2028{
2029	switch (af) {
2030#ifdef INET
2031	case AF_INET:
2032		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2033		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2034		break;
2035#endif /* INET */
2036	case AF_INET6:
2037		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2038		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2039		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2040		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2041		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2042		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2043		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2044		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2045		break;
2046	}
2047}
2048
2049void
2050pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2051{
2052	switch (af) {
2053#ifdef INET
2054	case AF_INET:
2055		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2056		break;
2057#endif /* INET */
2058	case AF_INET6:
2059		if (addr->addr32[3] == 0xffffffff) {
2060			addr->addr32[3] = 0;
2061			if (addr->addr32[2] == 0xffffffff) {
2062				addr->addr32[2] = 0;
2063				if (addr->addr32[1] == 0xffffffff) {
2064					addr->addr32[1] = 0;
2065					addr->addr32[0] =
2066					    htonl(ntohl(addr->addr32[0]) + 1);
2067				} else
2068					addr->addr32[1] =
2069					    htonl(ntohl(addr->addr32[1]) + 1);
2070			} else
2071				addr->addr32[2] =
2072				    htonl(ntohl(addr->addr32[2]) + 1);
2073		} else
2074			addr->addr32[3] =
2075			    htonl(ntohl(addr->addr32[3]) + 1);
2076		break;
2077	}
2078}
2079#endif /* INET6 */
2080
2081#define mix(a,b,c) \
2082	do {					\
2083		a -= b; a -= c; a ^= (c >> 13);	\
2084		b -= c; b -= a; b ^= (a << 8);	\
2085		c -= a; c -= b; c ^= (b >> 13);	\
2086		a -= b; a -= c; a ^= (c >> 12);	\
2087		b -= c; b -= a; b ^= (a << 16);	\
2088		c -= a; c -= b; c ^= (b >> 5);	\
2089		a -= b; a -= c; a ^= (c >> 3);	\
2090		b -= c; b -= a; b ^= (a << 10);	\
2091		c -= a; c -= b; c ^= (b >> 15);	\
2092	} while (0)
2093
2094/*
2095 * hash function based on bridge_hash in if_bridge.c
2096 */
2097void
2098pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2099    struct pf_poolhashkey *key, sa_family_t af)
2100{
2101	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2102
2103	switch (af) {
2104#ifdef INET
2105	case AF_INET:
2106		a += inaddr->addr32[0];
2107		b += key->key32[1];
2108		mix(a, b, c);
2109		hash->addr32[0] = c + key->key32[2];
2110		break;
2111#endif /* INET */
2112#ifdef INET6
2113	case AF_INET6:
2114		a += inaddr->addr32[0];
2115		b += inaddr->addr32[2];
2116		mix(a, b, c);
2117		hash->addr32[0] = c;
2118		a += inaddr->addr32[1];
2119		b += inaddr->addr32[3];
2120		c += key->key32[1];
2121		mix(a, b, c);
2122		hash->addr32[1] = c;
2123		a += inaddr->addr32[2];
2124		b += inaddr->addr32[1];
2125		c += key->key32[2];
2126		mix(a, b, c);
2127		hash->addr32[2] = c;
2128		a += inaddr->addr32[3];
2129		b += inaddr->addr32[0];
2130		c += key->key32[3];
2131		mix(a, b, c);
2132		hash->addr32[3] = c;
2133		break;
2134#endif /* INET6 */
2135	}
2136}
2137
2138int
2139pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2140    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2141{
2142	unsigned char		 hash[16];
2143	struct pf_pool		*rpool = &r->rpool;
2144	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
2145	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
2146	struct pf_pooladdr	*acur = rpool->cur;
2147	struct pf_src_node	 k;
2148
2149	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2150	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2151		k.af = af;
2152		PF_ACPY(&k.addr, saddr, af);
2153		if (r->rule_flag & PFRULE_RULESRCTRACK ||
2154		    r->rpool.opts & PF_POOL_STICKYADDR)
2155			k.rule.ptr = r;
2156		else
2157			k.rule.ptr = NULL;
2158		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2159		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2160		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2161			PF_ACPY(naddr, &(*sn)->raddr, af);
2162			if (pf_status.debug >= PF_DEBUG_MISC) {
2163				printf("pf_map_addr: src tracking maps ");
2164				pf_print_host(&k.addr, 0, af);
2165				printf(" to ");
2166				pf_print_host(naddr, 0, af);
2167				printf("\n");
2168			}
2169			return (0);
2170		}
2171	}
2172
2173	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2174		return (1);
2175	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2176		switch (af) {
2177#ifdef INET
2178		case AF_INET:
2179			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2180			    (rpool->opts & PF_POOL_TYPEMASK) !=
2181			    PF_POOL_ROUNDROBIN)
2182				return (1);
2183			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2184			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
2185			break;
2186#endif /* INET */
2187#ifdef INET6
2188		case AF_INET6:
2189			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2190			    (rpool->opts & PF_POOL_TYPEMASK) !=
2191			    PF_POOL_ROUNDROBIN)
2192				return (1);
2193			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2194			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
2195			break;
2196#endif /* INET6 */
2197		}
2198	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2199		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2200			return (1); /* unsupported */
2201	} else {
2202		raddr = &rpool->cur->addr.v.a.addr;
2203		rmask = &rpool->cur->addr.v.a.mask;
2204	}
2205
2206	switch (rpool->opts & PF_POOL_TYPEMASK) {
2207	case PF_POOL_NONE:
2208		PF_ACPY(naddr, raddr, af);
2209		break;
2210	case PF_POOL_BITMASK:
2211		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2212		break;
2213	case PF_POOL_RANDOM:
2214		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2215			switch (af) {
2216#ifdef INET
2217			case AF_INET:
2218				rpool->counter.addr32[0] = htonl(arc4random());
2219				break;
2220#endif /* INET */
2221#ifdef INET6
2222			case AF_INET6:
2223				if (rmask->addr32[3] != 0xffffffff)
2224					rpool->counter.addr32[3] =
2225					    htonl(arc4random());
2226				else
2227					break;
2228				if (rmask->addr32[2] != 0xffffffff)
2229					rpool->counter.addr32[2] =
2230					    htonl(arc4random());
2231				else
2232					break;
2233				if (rmask->addr32[1] != 0xffffffff)
2234					rpool->counter.addr32[1] =
2235					    htonl(arc4random());
2236				else
2237					break;
2238				if (rmask->addr32[0] != 0xffffffff)
2239					rpool->counter.addr32[0] =
2240					    htonl(arc4random());
2241				break;
2242#endif /* INET6 */
2243			}
2244			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2245			PF_ACPY(init_addr, naddr, af);
2246
2247		} else {
2248			PF_AINC(&rpool->counter, af);
2249			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2250		}
2251		break;
2252	case PF_POOL_SRCHASH:
2253		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2254		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2255		break;
2256	case PF_POOL_ROUNDROBIN:
2257		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2258			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2259			    &rpool->tblidx, &rpool->counter,
2260			    &raddr, &rmask, af))
2261				goto get_addr;
2262		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2263			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2264			    &rpool->tblidx, &rpool->counter,
2265			    &raddr, &rmask, af))
2266				goto get_addr;
2267		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2268			goto get_addr;
2269
2270	try_next:
2271		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2272			rpool->cur = TAILQ_FIRST(&rpool->list);
2273		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2274			rpool->tblidx = -1;
2275			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2276			    &rpool->tblidx, &rpool->counter,
2277			    &raddr, &rmask, af)) {
2278				/* table contains no address of type 'af' */
2279				if (rpool->cur != acur)
2280					goto try_next;
2281				return (1);
2282			}
2283		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2284			rpool->tblidx = -1;
2285			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2286			    &rpool->tblidx, &rpool->counter,
2287			    &raddr, &rmask, af)) {
2288				/* table contains no address of type 'af' */
2289				if (rpool->cur != acur)
2290					goto try_next;
2291				return (1);
2292			}
2293		} else {
2294			raddr = &rpool->cur->addr.v.a.addr;
2295			rmask = &rpool->cur->addr.v.a.mask;
2296			PF_ACPY(&rpool->counter, raddr, af);
2297		}
2298
2299	get_addr:
2300		PF_ACPY(naddr, &rpool->counter, af);
2301		if (init_addr != NULL && PF_AZERO(init_addr, af))
2302			PF_ACPY(init_addr, naddr, af);
2303		PF_AINC(&rpool->counter, af);
2304		break;
2305	}
2306	if (*sn != NULL)
2307		PF_ACPY(&(*sn)->raddr, naddr, af);
2308
2309	if (pf_status.debug >= PF_DEBUG_MISC &&
2310	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2311		printf("pf_map_addr: selected address ");
2312		pf_print_host(naddr, 0, af);
2313		printf("\n");
2314	}
2315
2316	return (0);
2317}
2318
2319int
2320pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2321    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2322    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2323    struct pf_src_node **sn)
2324{
2325	struct pf_state		key;
2326	struct pf_addr		init_addr;
2327	u_int16_t		cut;
2328
2329	bzero(&init_addr, sizeof(init_addr));
2330	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2331		return (1);
2332
2333	do {
2334		key.af = af;
2335		key.proto = proto;
2336		PF_ACPY(&key.ext.addr, daddr, key.af);
2337		PF_ACPY(&key.gwy.addr, naddr, key.af);
2338		key.ext.port = dport;
2339
2340		/*
2341		 * port search; start random, step;
2342		 * similar 2 portloop in in_pcbbind
2343		 */
2344		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
2345			key.gwy.port = dport;
2346			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2347				return (0);
2348		} else if (low == 0 && high == 0) {
2349			key.gwy.port = *nport;
2350			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2351				return (0);
2352		} else if (low == high) {
2353			key.gwy.port = htons(low);
2354			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2355				*nport = htons(low);
2356				return (0);
2357			}
2358		} else {
2359			u_int16_t tmp;
2360
2361			if (low > high) {
2362				tmp = low;
2363				low = high;
2364				high = tmp;
2365			}
2366			/* low < high */
2367			cut = htonl(arc4random()) % (1 + high - low) + low;
2368			/* low <= cut <= high */
2369			for (tmp = cut; tmp <= high; ++(tmp)) {
2370				key.gwy.port = htons(tmp);
2371				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2372				    NULL) {
2373					*nport = htons(tmp);
2374					return (0);
2375				}
2376			}
2377			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2378				key.gwy.port = htons(tmp);
2379				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2380				    NULL) {
2381					*nport = htons(tmp);
2382					return (0);
2383				}
2384			}
2385		}
2386
2387		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2388		case PF_POOL_RANDOM:
2389		case PF_POOL_ROUNDROBIN:
2390			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2391				return (1);
2392			break;
2393		case PF_POOL_NONE:
2394		case PF_POOL_SRCHASH:
2395		case PF_POOL_BITMASK:
2396		default:
2397			return (1);
2398		}
2399	} while (! PF_AEQ(&init_addr, naddr, af) );
2400
2401	return (1);					/* none available */
2402}
2403
2404struct pf_rule *
2405pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2406    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2407    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2408{
2409	struct pf_rule		*r, *rm = NULL;
2410	struct pf_ruleset	*ruleset = NULL;
2411	struct pf_tag		*pftag = NULL;
2412	int			 tag = -1;
2413	int			 asd = 0;
2414
2415	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2416	while (r && rm == NULL) {
2417		struct pf_rule_addr	*src = NULL, *dst = NULL;
2418		struct pf_addr_wrap	*xdst = NULL;
2419
2420		if (r->action == PF_BINAT && direction == PF_IN) {
2421			src = &r->dst;
2422			if (r->rpool.cur != NULL)
2423				xdst = &r->rpool.cur->addr;
2424		} else {
2425			src = &r->src;
2426			dst = &r->dst;
2427		}
2428
2429		r->evaluations++;
2430		if (r->kif != NULL &&
2431		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2432			r = r->skip[PF_SKIP_IFP].ptr;
2433		else if (r->direction && r->direction != direction)
2434			r = r->skip[PF_SKIP_DIR].ptr;
2435		else if (r->af && r->af != pd->af)
2436			r = r->skip[PF_SKIP_AF].ptr;
2437		else if (r->proto && r->proto != pd->proto)
2438			r = r->skip[PF_SKIP_PROTO].ptr;
2439		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->neg))
2440			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2441			    PF_SKIP_DST_ADDR].ptr;
2442		else if (src->port_op && !pf_match_port(src->port_op,
2443		    src->port[0], src->port[1], sport))
2444			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2445			    PF_SKIP_DST_PORT].ptr;
2446		else if (dst != NULL &&
2447		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg))
2448			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2449		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2450			r = TAILQ_NEXT(r, entries);
2451		else if (dst != NULL && dst->port_op &&
2452		    !pf_match_port(dst->port_op, dst->port[0],
2453		    dst->port[1], dport))
2454			r = r->skip[PF_SKIP_DST_PORT].ptr;
2455		else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag))
2456			r = TAILQ_NEXT(r, entries);
2457		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2458		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2459		    off, pd->hdr.tcp), r->os_fingerprint)))
2460			r = TAILQ_NEXT(r, entries);
2461		else {
2462			if (r->tag)
2463				tag = r->tag;
2464			if (r->anchor == NULL) {
2465				rm = r;
2466			} else
2467				pf_step_into_anchor(&asd, &ruleset, rs_num, &r, NULL);
2468		}
2469		if (r == NULL)
2470			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL);
2471	}
2472	if (pf_tag_packet(m, pftag, tag))
2473		return (NULL);
2474	if (rm != NULL && (rm->action == PF_NONAT ||
2475	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2476		return (NULL);
2477	return (rm);
2478}
2479
2480struct pf_rule *
2481pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2482    struct pfi_kif *kif, struct pf_src_node **sn,
2483    struct pf_addr *saddr, u_int16_t sport,
2484    struct pf_addr *daddr, u_int16_t dport,
2485    struct pf_addr *naddr, u_int16_t *nport)
2486{
2487	struct pf_rule	*r = NULL;
2488
2489	if (direction == PF_OUT) {
2490		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2491		    sport, daddr, dport, PF_RULESET_BINAT);
2492		if (r == NULL)
2493			r = pf_match_translation(pd, m, off, direction, kif,
2494			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2495	} else {
2496		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2497		    sport, daddr, dport, PF_RULESET_RDR);
2498		if (r == NULL)
2499			r = pf_match_translation(pd, m, off, direction, kif,
2500			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2501	}
2502
2503	if (r != NULL) {
2504		switch (r->action) {
2505		case PF_NONAT:
2506		case PF_NOBINAT:
2507		case PF_NORDR:
2508			return (NULL);
2509		case PF_NAT:
2510			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2511			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2512			    r->rpool.proxy_port[1], sn)) {
2513				DPFPRINTF(PF_DEBUG_MISC,
2514				    ("pf: NAT proxy port allocation "
2515				    "(%u-%u) failed\n",
2516				    r->rpool.proxy_port[0],
2517				    r->rpool.proxy_port[1]));
2518				return (NULL);
2519			}
2520			break;
2521		case PF_BINAT:
2522			switch (direction) {
2523			case PF_OUT:
2524				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2525					switch (pd->af) {
2526#ifdef INET
2527					case AF_INET:
2528						if (r->rpool.cur->addr.p.dyn->
2529						    pfid_acnt4 < 1)
2530							return (NULL);
2531						PF_POOLMASK(naddr,
2532						    &r->rpool.cur->addr.p.dyn->
2533						    pfid_addr4,
2534						    &r->rpool.cur->addr.p.dyn->
2535						    pfid_mask4,
2536						    saddr, AF_INET);
2537						break;
2538#endif /* INET */
2539#ifdef INET6
2540					case AF_INET6:
2541						if (r->rpool.cur->addr.p.dyn->
2542						    pfid_acnt6 < 1)
2543							return (NULL);
2544						PF_POOLMASK(naddr,
2545						    &r->rpool.cur->addr.p.dyn->
2546						    pfid_addr6,
2547						    &r->rpool.cur->addr.p.dyn->
2548						    pfid_mask6,
2549						    saddr, AF_INET6);
2550						break;
2551#endif /* INET6 */
2552					}
2553				} else
2554					PF_POOLMASK(naddr,
2555					    &r->rpool.cur->addr.v.a.addr,
2556					    &r->rpool.cur->addr.v.a.mask,
2557					    saddr, pd->af);
2558				break;
2559			case PF_IN:
2560				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2561					switch (pd->af) {
2562#ifdef INET
2563					case AF_INET:
2564						if (r->src.addr.p.dyn->
2565						    pfid_acnt4 < 1)
2566							return (NULL);
2567						PF_POOLMASK(naddr,
2568						    &r->src.addr.p.dyn->
2569						    pfid_addr4,
2570						    &r->src.addr.p.dyn->
2571						    pfid_mask4,
2572						    daddr, AF_INET);
2573						break;
2574#endif /* INET */
2575#ifdef INET6
2576					case AF_INET6:
2577						if (r->src.addr.p.dyn->
2578						    pfid_acnt6 < 1)
2579							return (NULL);
2580						PF_POOLMASK(naddr,
2581						    &r->src.addr.p.dyn->
2582						    pfid_addr6,
2583						    &r->src.addr.p.dyn->
2584						    pfid_mask6,
2585						    daddr, AF_INET6);
2586						break;
2587#endif /* INET6 */
2588					}
2589				} else
2590					PF_POOLMASK(naddr,
2591					    &r->src.addr.v.a.addr,
2592					    &r->src.addr.v.a.mask, daddr,
2593					    pd->af);
2594				break;
2595			}
2596			break;
2597		case PF_RDR: {
2598			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2599				return (NULL);
2600
2601			if (r->rpool.proxy_port[1]) {
2602				u_int32_t	tmp_nport;
2603
2604				tmp_nport = ((ntohs(dport) -
2605				    ntohs(r->dst.port[0])) %
2606				    (r->rpool.proxy_port[1] -
2607				    r->rpool.proxy_port[0] + 1)) +
2608				    r->rpool.proxy_port[0];
2609
2610				/* wrap around if necessary */
2611				if (tmp_nport > 65535)
2612					tmp_nport -= 65535;
2613				*nport = htons((u_int16_t)tmp_nport);
2614			} else if (r->rpool.proxy_port[0])
2615				*nport = htons(r->rpool.proxy_port[0]);
2616			break;
2617		}
2618		default:
2619			return (NULL);
2620		}
2621	}
2622
2623	return (r);
2624}
2625
2626int
2627#ifdef __FreeBSD__
2628pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd,
2629    struct inpcb *inp_arg)
2630#else
2631pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
2632#endif
2633{
2634	struct pf_addr		*saddr, *daddr;
2635	u_int16_t		 sport, dport;
2636#ifdef __FreeBSD__
2637	struct inpcbinfo	*pi;
2638#else
2639	struct inpcbtable	*tb;
2640#endif
2641	struct inpcb		*inp;
2642
2643	*uid = UID_MAX;
2644	*gid = GID_MAX;
2645#ifdef __FreeBSD__
2646	if (inp_arg != NULL) {
2647		INP_LOCK_ASSERT(inp_arg);
2648		if (inp_arg->inp_socket) {
2649			*uid = inp_arg->inp_socket->so_cred->cr_uid;
2650			*gid = inp_arg->inp_socket->so_cred->cr_groups[0];
2651			return (1);
2652		} else
2653			return (0);
2654	}
2655#endif
2656	switch (pd->proto) {
2657	case IPPROTO_TCP:
2658		sport = pd->hdr.tcp->th_sport;
2659		dport = pd->hdr.tcp->th_dport;
2660#ifdef __FreeBSD__
2661		pi = &tcbinfo;
2662#else
2663		tb = &tcbtable;
2664#endif
2665		break;
2666	case IPPROTO_UDP:
2667		sport = pd->hdr.udp->uh_sport;
2668		dport = pd->hdr.udp->uh_dport;
2669#ifdef __FreeBSD__
2670		pi = &udbinfo;
2671#else
2672		tb = &udbtable;
2673#endif
2674		break;
2675	default:
2676		return (0);
2677	}
2678	if (direction == PF_IN) {
2679		saddr = pd->src;
2680		daddr = pd->dst;
2681	} else {
2682		u_int16_t	p;
2683
2684		p = sport;
2685		sport = dport;
2686		dport = p;
2687		saddr = pd->dst;
2688		daddr = pd->src;
2689	}
2690	switch (pd->af) {
2691#ifdef INET
2692	case AF_INET:
2693#ifdef __FreeBSD__
2694		INP_INFO_RLOCK(pi);	/* XXX LOR */
2695		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2696			dport, 0, NULL);
2697		if (inp == NULL) {
2698			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2699			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2700			if(inp == NULL) {
2701				INP_INFO_RUNLOCK(pi);
2702				return (0);
2703			}
2704		}
2705#else
2706		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2707		if (inp == NULL) {
2708			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2709			if (inp == NULL)
2710				return (0);
2711		}
2712#endif
2713		break;
2714#endif /* INET */
2715#ifdef INET6
2716	case AF_INET6:
2717#ifdef __FreeBSD__
2718		INP_INFO_RLOCK(pi);
2719		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2720			&daddr->v6, dport, 0, NULL);
2721		if (inp == NULL) {
2722			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2723			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2724			if (inp == NULL) {
2725				INP_INFO_RUNLOCK(pi);
2726				return (0);
2727			}
2728		}
2729#else
2730		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2731		    dport);
2732		if (inp == NULL) {
2733			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2734			if (inp == NULL)
2735				return (0);
2736		}
2737#endif
2738		break;
2739#endif /* INET6 */
2740
2741	default:
2742		return (0);
2743	}
2744#ifdef __FreeBSD__
2745	INP_LOCK(inp);
2746	if ((inp->inp_socket == NULL) || (inp->inp_socket->so_cred == NULL)) {
2747		INP_UNLOCK(inp);
2748		INP_INFO_RUNLOCK(pi);
2749		return (0);
2750	}
2751	*uid = inp->inp_socket->so_cred->cr_uid;
2752	*gid = inp->inp_socket->so_cred->cr_groups[0];
2753	INP_UNLOCK(inp);
2754	INP_INFO_RUNLOCK(pi);
2755#else
2756	*uid = inp->inp_socket->so_euid;
2757	*gid = inp->inp_socket->so_egid;
2758#endif
2759	return (1);
2760}
2761
2762u_int8_t
2763pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2764{
2765	int		 hlen;
2766	u_int8_t	 hdr[60];
2767	u_int8_t	*opt, optlen;
2768	u_int8_t	 wscale = 0;
2769
2770	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2771	if (hlen <= sizeof(struct tcphdr))
2772		return (0);
2773	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2774		return (0);
2775	opt = hdr + sizeof(struct tcphdr);
2776	hlen -= sizeof(struct tcphdr);
2777	while (hlen >= 3) {
2778		switch (*opt) {
2779		case TCPOPT_EOL:
2780		case TCPOPT_NOP:
2781			++opt;
2782			--hlen;
2783			break;
2784		case TCPOPT_WINDOW:
2785			wscale = opt[2];
2786			if (wscale > TCP_MAX_WINSHIFT)
2787				wscale = TCP_MAX_WINSHIFT;
2788			wscale |= PF_WSCALE_FLAG;
2789			/* FALLTHROUGH */
2790		default:
2791			optlen = opt[1];
2792			if (optlen < 2)
2793				optlen = 2;
2794			hlen -= optlen;
2795			opt += optlen;
2796			break;
2797		}
2798	}
2799	return (wscale);
2800}
2801
2802u_int16_t
2803pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2804{
2805	int		 hlen;
2806	u_int8_t	 hdr[60];
2807	u_int8_t	*opt, optlen;
2808	u_int16_t	 mss = tcp_mssdflt;
2809
2810	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2811	if (hlen <= sizeof(struct tcphdr))
2812		return (0);
2813	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2814		return (0);
2815	opt = hdr + sizeof(struct tcphdr);
2816	hlen -= sizeof(struct tcphdr);
2817	while (hlen >= TCPOLEN_MAXSEG) {
2818		switch (*opt) {
2819		case TCPOPT_EOL:
2820		case TCPOPT_NOP:
2821			++opt;
2822			--hlen;
2823			break;
2824		case TCPOPT_MAXSEG:
2825			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2826			NTOHS(mss);
2827			/* FALLTHROUGH */
2828		default:
2829			optlen = opt[1];
2830			if (optlen < 2)
2831				optlen = 2;
2832			hlen -= optlen;
2833			opt += optlen;
2834			break;
2835		}
2836	}
2837	return (mss);
2838}
2839
2840u_int16_t
2841pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2842{
2843#ifdef INET
2844	struct sockaddr_in	*dst;
2845	struct route		 ro;
2846#endif /* INET */
2847#ifdef INET6
2848	struct sockaddr_in6	*dst6;
2849	struct route_in6	 ro6;
2850#endif /* INET6 */
2851	struct rtentry		*rt = NULL;
2852	int			 hlen = 0;	/* make the compiler happy */
2853	u_int16_t		 mss = tcp_mssdflt;
2854
2855	switch (af) {
2856#ifdef INET
2857	case AF_INET:
2858		hlen = sizeof(struct ip);
2859		bzero(&ro, sizeof(ro));
2860		dst = (struct sockaddr_in *)&ro.ro_dst;
2861		dst->sin_family = AF_INET;
2862		dst->sin_len = sizeof(*dst);
2863		dst->sin_addr = addr->v4;
2864#ifdef __FreeBSD__
2865#ifdef RTF_PRCLONING
2866		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2867#else /* !RTF_PRCLONING */
2868		rtalloc_ign(&ro, RTF_CLONING);
2869#endif
2870#else /* ! __FreeBSD__ */
2871		rtalloc_noclone(&ro, NO_CLONING);
2872#endif
2873		rt = ro.ro_rt;
2874		break;
2875#endif /* INET */
2876#ifdef INET6
2877	case AF_INET6:
2878		hlen = sizeof(struct ip6_hdr);
2879		bzero(&ro6, sizeof(ro6));
2880		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2881		dst6->sin6_family = AF_INET6;
2882		dst6->sin6_len = sizeof(*dst6);
2883		dst6->sin6_addr = addr->v6;
2884#ifdef __FreeBSD__
2885#ifdef RTF_PRCLONING
2886		rtalloc_ign((struct route *)&ro6,
2887		    (RTF_CLONING | RTF_PRCLONING));
2888#else /* !RTF_PRCLONING */
2889		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2890#endif
2891#else /* ! __FreeBSD__ */
2892		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2893#endif
2894		rt = ro6.ro_rt;
2895		break;
2896#endif /* INET6 */
2897	}
2898
2899	if (rt && rt->rt_ifp) {
2900		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2901		mss = max(tcp_mssdflt, mss);
2902		RTFREE(rt);
2903	}
2904	mss = min(mss, offer);
2905	mss = max(mss, 64);		/* sanity - at least max opt space */
2906	return (mss);
2907}
2908
2909void
2910pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2911{
2912	struct pf_rule *r = s->rule.ptr;
2913
2914	s->rt_kif = NULL;
2915	if (!r->rt || r->rt == PF_FASTROUTE)
2916		return;
2917	switch (s->af) {
2918#ifdef INET
2919	case AF_INET:
2920		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2921		    &s->nat_src_node);
2922		s->rt_kif = r->rpool.cur->kif;
2923		break;
2924#endif /* INET */
2925#ifdef INET6
2926	case AF_INET6:
2927		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2928		    &s->nat_src_node);
2929		s->rt_kif = r->rpool.cur->kif;
2930		break;
2931#endif /* INET6 */
2932	}
2933}
2934
2935int
2936pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2937    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2938#ifdef __FreeBSD__
2939    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
2940    struct ifqueue *ifq, struct inpcb *inp)
2941#else
2942    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
2943    struct ifqueue *ifq)
2944#endif
2945{
2946	struct pf_rule		*nr = NULL;
2947	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2948	struct tcphdr		*th = pd->hdr.tcp;
2949	u_int16_t		 bport, nport = 0;
2950	sa_family_t		 af = pd->af;
2951	int			 lookup = -1;
2952	uid_t			 uid;
2953	gid_t			 gid;
2954	struct pf_rule		*r, *a = NULL;
2955	struct pf_ruleset	*ruleset = NULL;
2956	struct pf_src_node	*nsn = NULL;
2957	u_short			 reason;
2958	int			 rewrite = 0;
2959	struct pf_tag		*pftag = NULL;
2960	int			 tag = -1;
2961	u_int16_t		 mss = tcp_mssdflt;
2962	int			 asd = 0;
2963
2964	if (pf_check_congestion(ifq)) {
2965		REASON_SET(&reason, PFRES_CONGEST);
2966		return (PF_DROP);
2967	}
2968
2969	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2970
2971	if (direction == PF_OUT) {
2972		bport = nport = th->th_sport;
2973		/* check outgoing packet for BINAT/NAT */
2974		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2975		    saddr, th->th_sport, daddr, th->th_dport,
2976		    &pd->naddr, &nport)) != NULL) {
2977			PF_ACPY(&pd->baddr, saddr, af);
2978			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2979			    &th->th_sum, &pd->naddr, nport, 0, af);
2980			rewrite++;
2981			if (nr->natpass)
2982				r = NULL;
2983			pd->nat_rule = nr;
2984		}
2985	} else {
2986		bport = nport = th->th_dport;
2987		/* check incoming packet for BINAT/RDR */
2988		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2989		    saddr, th->th_sport, daddr, th->th_dport,
2990		    &pd->naddr, &nport)) != NULL) {
2991			PF_ACPY(&pd->baddr, daddr, af);
2992			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2993			    &th->th_sum, &pd->naddr, nport, 0, af);
2994			rewrite++;
2995			if (nr->natpass)
2996				r = NULL;
2997			pd->nat_rule = nr;
2998		}
2999	}
3000
3001	while (r != NULL) {
3002		r->evaluations++;
3003		if (r->kif != NULL &&
3004		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3005			r = r->skip[PF_SKIP_IFP].ptr;
3006		else if (r->direction && r->direction != direction)
3007			r = r->skip[PF_SKIP_DIR].ptr;
3008		else if (r->af && r->af != af)
3009			r = r->skip[PF_SKIP_AF].ptr;
3010		else if (r->proto && r->proto != IPPROTO_TCP)
3011			r = r->skip[PF_SKIP_PROTO].ptr;
3012		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg))
3013			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3014		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3015		    r->src.port[0], r->src.port[1], th->th_sport))
3016			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3017		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg))
3018			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3019		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3020		    r->dst.port[0], r->dst.port[1], th->th_dport))
3021			r = r->skip[PF_SKIP_DST_PORT].ptr;
3022		else if (r->tos && !(r->tos & pd->tos))
3023			r = TAILQ_NEXT(r, entries);
3024		else if (r->rule_flag & PFRULE_FRAGMENT)
3025			r = TAILQ_NEXT(r, entries);
3026		else if ((r->flagset & th->th_flags) != r->flags)
3027			r = TAILQ_NEXT(r, entries);
3028		else if (r->uid.op && (lookup != -1 || (lookup =
3029#ifdef __FreeBSD__
3030		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
3031#else
3032		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3033#endif
3034		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3035		    uid))
3036			r = TAILQ_NEXT(r, entries);
3037		else if (r->gid.op && (lookup != -1 || (lookup =
3038#ifdef __FreeBSD__
3039		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
3040#else
3041		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3042#endif
3043		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3044		    gid))
3045			r = TAILQ_NEXT(r, entries);
3046		else if (r->prob && r->prob <= arc4random())
3047			r = TAILQ_NEXT(r, entries);
3048		else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag))
3049			r = TAILQ_NEXT(r, entries);
3050		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
3051		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
3052			r = TAILQ_NEXT(r, entries);
3053		else {
3054			if (r->tag)
3055				tag = r->tag;
3056			if (r->anchor == NULL) {
3057				*rm = r;
3058				*am = a;
3059				*rsm = ruleset;
3060				if ((*rm)->quick)
3061					break;
3062				r = TAILQ_NEXT(r, entries);
3063			} else
3064				pf_step_into_anchor(&asd, &ruleset,
3065				    PF_RULESET_FILTER, &r, &a);
3066		}
3067		if (r == NULL)
3068			pf_step_out_of_anchor(&asd, &ruleset,
3069			    PF_RULESET_FILTER, &r, &a);
3070	}
3071	r = *rm;
3072	a = *am;
3073	ruleset = *rsm;
3074
3075	REASON_SET(&reason, PFRES_MATCH);
3076
3077	if (r->log) {
3078		if (rewrite)
3079			m_copyback(m, off, sizeof(*th), (caddr_t)th);
3080		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3081	}
3082
3083	if ((r->action == PF_DROP) &&
3084	    ((r->rule_flag & PFRULE_RETURNRST) ||
3085	    (r->rule_flag & PFRULE_RETURNICMP) ||
3086	    (r->rule_flag & PFRULE_RETURN))) {
3087		/* undo NAT changes, if they have taken place */
3088		if (nr != NULL) {
3089			if (direction == PF_OUT) {
3090				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3091				    &th->th_sum, &pd->baddr, bport, 0, af);
3092				rewrite++;
3093			} else {
3094				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3095				    &th->th_sum, &pd->baddr, bport, 0, af);
3096				rewrite++;
3097			}
3098		}
3099		if (((r->rule_flag & PFRULE_RETURNRST) ||
3100		    (r->rule_flag & PFRULE_RETURN)) &&
3101		    !(th->th_flags & TH_RST)) {
3102			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3103
3104			if (th->th_flags & TH_SYN)
3105				ack++;
3106			if (th->th_flags & TH_FIN)
3107				ack++;
3108			pf_send_tcp(r, af, pd->dst,
3109			    pd->src, th->th_dport, th->th_sport,
3110			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3111			    r->return_ttl, 1, pd->eh, kif->pfik_ifp);
3112		} else if ((af == AF_INET) && r->return_icmp)
3113			pf_send_icmp(m, r->return_icmp >> 8,
3114			    r->return_icmp & 255, af, r);
3115		else if ((af == AF_INET6) && r->return_icmp6)
3116			pf_send_icmp(m, r->return_icmp6 >> 8,
3117			    r->return_icmp6 & 255, af, r);
3118	}
3119
3120	if (r->action == PF_DROP)
3121		return (PF_DROP);
3122
3123	if (pf_tag_packet(m, pftag, tag)) {
3124		REASON_SET(&reason, PFRES_MEMORY);
3125		return (PF_DROP);
3126	}
3127
3128	if (r->keep_state || nr != NULL ||
3129	    (pd->flags & PFDESC_TCP_NORM)) {
3130		/* create new state */
3131		u_int16_t	 len;
3132		struct pf_state	*s = NULL;
3133		struct pf_src_node *sn = NULL;
3134
3135		len = pd->tot_len - off - (th->th_off << 2);
3136
3137		/* check maximums */
3138		if (r->max_states && (r->states >= r->max_states)) {
3139			pf_status.lcounters[LCNT_STATES]++;
3140			REASON_SET(&reason, PFRES_MAXSTATES);
3141			goto cleanup;
3142		}
3143		/* src node for flter rule */
3144		if ((r->rule_flag & PFRULE_SRCTRACK ||
3145		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3146		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3147			REASON_SET(&reason, PFRES_SRCLIMIT);
3148			goto cleanup;
3149		}
3150		/* src node for translation rule */
3151		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3152		    ((direction == PF_OUT &&
3153		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3154		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3155			REASON_SET(&reason, PFRES_SRCLIMIT);
3156			goto cleanup;
3157		}
3158		s = pool_get(&pf_state_pl, PR_NOWAIT);
3159		if (s == NULL) {
3160			REASON_SET(&reason, PFRES_MEMORY);
3161cleanup:
3162			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3163				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3164				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3165				pf_status.src_nodes--;
3166				pool_put(&pf_src_tree_pl, sn);
3167			}
3168			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3169			    nsn->expire == 0) {
3170				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3171				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3172				pf_status.src_nodes--;
3173				pool_put(&pf_src_tree_pl, nsn);
3174			}
3175			return (PF_DROP);
3176		}
3177		bzero(s, sizeof(*s));
3178		s->rule.ptr = r;
3179		s->nat_rule.ptr = nr;
3180		s->anchor.ptr = a;
3181		STATE_INC_COUNTERS(s);
3182		s->allow_opts = r->allow_opts;
3183		s->log = r->log & 2;
3184		s->proto = IPPROTO_TCP;
3185		s->direction = direction;
3186		s->af = af;
3187		if (direction == PF_OUT) {
3188			PF_ACPY(&s->gwy.addr, saddr, af);
3189			s->gwy.port = th->th_sport;		/* sport */
3190			PF_ACPY(&s->ext.addr, daddr, af);
3191			s->ext.port = th->th_dport;
3192			if (nr != NULL) {
3193				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3194				s->lan.port = bport;
3195			} else {
3196				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3197				s->lan.port = s->gwy.port;
3198			}
3199		} else {
3200			PF_ACPY(&s->lan.addr, daddr, af);
3201			s->lan.port = th->th_dport;
3202			PF_ACPY(&s->ext.addr, saddr, af);
3203			s->ext.port = th->th_sport;
3204			if (nr != NULL) {
3205				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3206				s->gwy.port = bport;
3207			} else {
3208				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3209				s->gwy.port = s->lan.port;
3210			}
3211		}
3212
3213		s->src.seqlo = ntohl(th->th_seq);
3214		s->src.seqhi = s->src.seqlo + len + 1;
3215		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3216		    r->keep_state == PF_STATE_MODULATE) {
3217			/* Generate sequence number modulator */
3218			while ((s->src.seqdiff = htonl(arc4random())) == 0)
3219				;
3220			pf_change_a(&th->th_seq, &th->th_sum,
3221			    htonl(s->src.seqlo + s->src.seqdiff), 0);
3222			rewrite = 1;
3223		} else
3224			s->src.seqdiff = 0;
3225		if (th->th_flags & TH_SYN) {
3226			s->src.seqhi++;
3227			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
3228		}
3229		s->src.max_win = MAX(ntohs(th->th_win), 1);
3230		if (s->src.wscale & PF_WSCALE_MASK) {
3231			/* Remove scale factor from initial window */
3232			int win = s->src.max_win;
3233			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3234			s->src.max_win = (win - 1) >>
3235			    (s->src.wscale & PF_WSCALE_MASK);
3236		}
3237		if (th->th_flags & TH_FIN)
3238			s->src.seqhi++;
3239		s->dst.seqhi = 1;
3240		s->dst.max_win = 1;
3241		s->src.state = TCPS_SYN_SENT;
3242		s->dst.state = TCPS_CLOSED;
3243		s->creation = time_second;
3244		s->expire = time_second;
3245		s->timeout = PFTM_TCP_FIRST_PACKET;
3246		pf_set_rt_ifp(s, saddr);
3247		if (sn != NULL) {
3248			s->src_node = sn;
3249			s->src_node->states++;
3250		}
3251		if (nsn != NULL) {
3252			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3253			s->nat_src_node = nsn;
3254			s->nat_src_node->states++;
3255		}
3256		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3257		    off, pd, th, &s->src, &s->dst)) {
3258			REASON_SET(&reason, PFRES_MEMORY);
3259			pf_src_tree_remove_state(s);
3260			STATE_DEC_COUNTERS(s);
3261			pool_put(&pf_state_pl, s);
3262			return (PF_DROP);
3263		}
3264		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3265		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3266		    &s->src, &s->dst, &rewrite)) {
3267			/* This really shouldn't happen!!! */
3268			DPFPRINTF(PF_DEBUG_URGENT,
3269			    ("pf_normalize_tcp_stateful failed on first pkt"));
3270			pf_normalize_tcp_cleanup(s);
3271			pf_src_tree_remove_state(s);
3272			STATE_DEC_COUNTERS(s);
3273			pool_put(&pf_state_pl, s);
3274			return (PF_DROP);
3275		}
3276		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3277			pf_normalize_tcp_cleanup(s);
3278			REASON_SET(&reason, PFRES_STATEINS);
3279			pf_src_tree_remove_state(s);
3280			STATE_DEC_COUNTERS(s);
3281			pool_put(&pf_state_pl, s);
3282			return (PF_DROP);
3283		} else
3284			*sm = s;
3285		if (tag > 0) {
3286			pf_tag_ref(tag);
3287			s->tag = tag;
3288		}
3289		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3290		    r->keep_state == PF_STATE_SYNPROXY) {
3291			s->src.state = PF_TCPS_PROXY_SRC;
3292			if (nr != NULL) {
3293				if (direction == PF_OUT) {
3294					pf_change_ap(saddr, &th->th_sport,
3295					    pd->ip_sum, &th->th_sum, &pd->baddr,
3296					    bport, 0, af);
3297				} else {
3298					pf_change_ap(daddr, &th->th_dport,
3299					    pd->ip_sum, &th->th_sum, &pd->baddr,
3300					    bport, 0, af);
3301				}
3302			}
3303			s->src.seqhi = htonl(arc4random());
3304			/* Find mss option */
3305			mss = pf_get_mss(m, off, th->th_off, af);
3306			mss = pf_calc_mss(saddr, af, mss);
3307			mss = pf_calc_mss(daddr, af, mss);
3308			s->src.mss = mss;
3309			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3310			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3311			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, NULL, NULL);
3312			REASON_SET(&reason, PFRES_SYNPROXY);
3313			return (PF_SYNPROXY_DROP);
3314		}
3315	}
3316
3317	/* copy back packet headers if we performed NAT operations */
3318	if (rewrite)
3319		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3320
3321	return (PF_PASS);
3322}
3323
3324int
3325pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3326    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3327#ifdef __FreeBSD__
3328    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3329    struct ifqueue *ifq, struct inpcb *inp)
3330#else
3331    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3332    struct ifqueue *ifq)
3333#endif
3334{
3335	struct pf_rule		*nr = NULL;
3336	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3337	struct udphdr		*uh = pd->hdr.udp;
3338	u_int16_t		 bport, nport = 0;
3339	sa_family_t		 af = pd->af;
3340	int			 lookup = -1;
3341	uid_t			 uid;
3342	gid_t			 gid;
3343	struct pf_rule		*r, *a = NULL;
3344	struct pf_ruleset	*ruleset = NULL;
3345	struct pf_src_node	*nsn = NULL;
3346	u_short			 reason;
3347	int			 rewrite = 0;
3348	struct pf_tag		*pftag = NULL;
3349	int			 tag = -1;
3350	int			 asd = 0;
3351
3352	if (pf_check_congestion(ifq)) {
3353		REASON_SET(&reason, PFRES_CONGEST);
3354		return (PF_DROP);
3355	}
3356
3357	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3358
3359	if (direction == PF_OUT) {
3360		bport = nport = uh->uh_sport;
3361		/* check outgoing packet for BINAT/NAT */
3362		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3363		    saddr, uh->uh_sport, daddr, uh->uh_dport,
3364		    &pd->naddr, &nport)) != NULL) {
3365			PF_ACPY(&pd->baddr, saddr, af);
3366			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3367			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3368			rewrite++;
3369			if (nr->natpass)
3370				r = NULL;
3371			pd->nat_rule = nr;
3372		}
3373	} else {
3374		bport = nport = uh->uh_dport;
3375		/* check incoming packet for BINAT/RDR */
3376		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3377		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3378		    &nport)) != NULL) {
3379			PF_ACPY(&pd->baddr, daddr, af);
3380			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3381			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3382			rewrite++;
3383			if (nr->natpass)
3384				r = NULL;
3385			pd->nat_rule = nr;
3386		}
3387	}
3388
3389	while (r != NULL) {
3390		r->evaluations++;
3391		if (r->kif != NULL &&
3392		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3393			r = r->skip[PF_SKIP_IFP].ptr;
3394		else if (r->direction && r->direction != direction)
3395			r = r->skip[PF_SKIP_DIR].ptr;
3396		else if (r->af && r->af != af)
3397			r = r->skip[PF_SKIP_AF].ptr;
3398		else if (r->proto && r->proto != IPPROTO_UDP)
3399			r = r->skip[PF_SKIP_PROTO].ptr;
3400		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg))
3401			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3402		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3403		    r->src.port[0], r->src.port[1], uh->uh_sport))
3404			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3405		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg))
3406			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3407		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3408		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
3409			r = r->skip[PF_SKIP_DST_PORT].ptr;
3410		else if (r->tos && !(r->tos & pd->tos))
3411			r = TAILQ_NEXT(r, entries);
3412		else if (r->rule_flag & PFRULE_FRAGMENT)
3413			r = TAILQ_NEXT(r, entries);
3414		else if (r->uid.op && (lookup != -1 || (lookup =
3415#ifdef __FreeBSD__
3416		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
3417#else
3418		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3419#endif
3420		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3421		    uid))
3422			r = TAILQ_NEXT(r, entries);
3423		else if (r->gid.op && (lookup != -1 || (lookup =
3424#ifdef __FreeBSD__
3425		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
3426#else
3427		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3428#endif
3429		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3430		    gid))
3431			r = TAILQ_NEXT(r, entries);
3432		else if (r->prob && r->prob <= arc4random())
3433			r = TAILQ_NEXT(r, entries);
3434		else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag))
3435			r = TAILQ_NEXT(r, entries);
3436		else if (r->os_fingerprint != PF_OSFP_ANY)
3437			r = TAILQ_NEXT(r, entries);
3438		else {
3439			if (r->tag)
3440				tag = r->tag;
3441			if (r->anchor == NULL) {
3442				*rm = r;
3443				*am = a;
3444				*rsm = ruleset;
3445				if ((*rm)->quick)
3446					break;
3447				r = TAILQ_NEXT(r, entries);
3448			} else
3449				pf_step_into_anchor(&asd, &ruleset,
3450				    PF_RULESET_FILTER, &r, &a);
3451		}
3452		if (r == NULL)
3453			pf_step_out_of_anchor(&asd, &ruleset,
3454			    PF_RULESET_FILTER, &r, &a);
3455	}
3456	r = *rm;
3457	a = *am;
3458	ruleset = *rsm;
3459
3460	REASON_SET(&reason, PFRES_MATCH);
3461
3462	if (r->log) {
3463		if (rewrite)
3464			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3465		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3466	}
3467
3468	if ((r->action == PF_DROP) &&
3469	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3470	    (r->rule_flag & PFRULE_RETURN))) {
3471		/* undo NAT changes, if they have taken place */
3472		if (nr != NULL) {
3473			if (direction == PF_OUT) {
3474				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3475				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3476				rewrite++;
3477			} else {
3478				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3479				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3480				rewrite++;
3481			}
3482		}
3483		if ((af == AF_INET) && r->return_icmp)
3484			pf_send_icmp(m, r->return_icmp >> 8,
3485			    r->return_icmp & 255, af, r);
3486		else if ((af == AF_INET6) && r->return_icmp6)
3487			pf_send_icmp(m, r->return_icmp6 >> 8,
3488			    r->return_icmp6 & 255, af, r);
3489	}
3490
3491	if (r->action == PF_DROP)
3492		return (PF_DROP);
3493
3494	if (pf_tag_packet(m, pftag, tag)) {
3495		REASON_SET(&reason, PFRES_MEMORY);
3496		return (PF_DROP);
3497	}
3498
3499	if (r->keep_state || nr != NULL) {
3500		/* create new state */
3501		struct pf_state	*s = NULL;
3502		struct pf_src_node *sn = NULL;
3503
3504		/* check maximums */
3505		if (r->max_states && (r->states >= r->max_states)) {
3506			pf_status.lcounters[LCNT_STATES]++;
3507			REASON_SET(&reason, PFRES_MAXSTATES);
3508			goto cleanup;
3509		}
3510		/* src node for flter rule */
3511		if ((r->rule_flag & PFRULE_SRCTRACK ||
3512		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3513		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3514			REASON_SET(&reason, PFRES_SRCLIMIT);
3515			goto cleanup;
3516		}
3517		/* src node for translation rule */
3518		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3519		    ((direction == PF_OUT &&
3520		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3521		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3522			REASON_SET(&reason, PFRES_SRCLIMIT);
3523			goto cleanup;
3524		}
3525		s = pool_get(&pf_state_pl, PR_NOWAIT);
3526		if (s == NULL) {
3527			REASON_SET(&reason, PFRES_MEMORY);
3528cleanup:
3529			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3530				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3531				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3532				pf_status.src_nodes--;
3533				pool_put(&pf_src_tree_pl, sn);
3534			}
3535			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3536			    nsn->expire == 0) {
3537				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3538				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3539				pf_status.src_nodes--;
3540				pool_put(&pf_src_tree_pl, nsn);
3541			}
3542			return (PF_DROP);
3543		}
3544		bzero(s, sizeof(*s));
3545		s->rule.ptr = r;
3546		s->nat_rule.ptr = nr;
3547		s->anchor.ptr = a;
3548		STATE_INC_COUNTERS(s);
3549		s->allow_opts = r->allow_opts;
3550		s->log = r->log & 2;
3551		s->proto = IPPROTO_UDP;
3552		s->direction = direction;
3553		s->af = af;
3554		if (direction == PF_OUT) {
3555			PF_ACPY(&s->gwy.addr, saddr, af);
3556			s->gwy.port = uh->uh_sport;
3557			PF_ACPY(&s->ext.addr, daddr, af);
3558			s->ext.port = uh->uh_dport;
3559			if (nr != NULL) {
3560				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3561				s->lan.port = bport;
3562			} else {
3563				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3564				s->lan.port = s->gwy.port;
3565			}
3566		} else {
3567			PF_ACPY(&s->lan.addr, daddr, af);
3568			s->lan.port = uh->uh_dport;
3569			PF_ACPY(&s->ext.addr, saddr, af);
3570			s->ext.port = uh->uh_sport;
3571			if (nr != NULL) {
3572				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3573				s->gwy.port = bport;
3574			} else {
3575				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3576				s->gwy.port = s->lan.port;
3577			}
3578		}
3579		s->src.state = PFUDPS_SINGLE;
3580		s->dst.state = PFUDPS_NO_TRAFFIC;
3581		s->creation = time_second;
3582		s->expire = time_second;
3583		s->timeout = PFTM_UDP_FIRST_PACKET;
3584		pf_set_rt_ifp(s, saddr);
3585		if (sn != NULL) {
3586			s->src_node = sn;
3587			s->src_node->states++;
3588		}
3589		if (nsn != NULL) {
3590			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3591			s->nat_src_node = nsn;
3592			s->nat_src_node->states++;
3593		}
3594		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3595			REASON_SET(&reason, PFRES_STATEINS);
3596			pf_src_tree_remove_state(s);
3597			STATE_DEC_COUNTERS(s);
3598			pool_put(&pf_state_pl, s);
3599			return (PF_DROP);
3600		} else
3601			*sm = s;
3602		if (tag > 0) {
3603			pf_tag_ref(tag);
3604			s->tag = tag;
3605		}
3606	}
3607
3608	/* copy back packet headers if we performed NAT operations */
3609	if (rewrite)
3610		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3611
3612	return (PF_PASS);
3613}
3614
3615int
3616pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3617    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3618    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3619    struct ifqueue *ifq)
3620{
3621	struct pf_rule		*nr = NULL;
3622	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3623	struct pf_rule		*r, *a = NULL;
3624	struct pf_ruleset	*ruleset = NULL;
3625	struct pf_src_node	*nsn = NULL;
3626	u_short			 reason;
3627	u_int16_t		 icmpid = 0;	/* make the compiler happy */
3628	sa_family_t		 af = pd->af;
3629	u_int8_t		 icmptype = 0;	/* make the compiler happy */
3630	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
3631	int			 state_icmp = 0;
3632	struct pf_tag		*pftag = NULL;
3633	int			 tag = -1;
3634#ifdef INET6
3635	int			 rewrite = 0;
3636#endif /* INET6 */
3637	int			 asd = 0;
3638
3639	if (pf_check_congestion(ifq)) {
3640		REASON_SET(&reason, PFRES_CONGEST);
3641		return (PF_DROP);
3642	}
3643
3644	switch (pd->proto) {
3645#ifdef INET
3646	case IPPROTO_ICMP:
3647		icmptype = pd->hdr.icmp->icmp_type;
3648		icmpcode = pd->hdr.icmp->icmp_code;
3649		icmpid = pd->hdr.icmp->icmp_id;
3650
3651		if (icmptype == ICMP_UNREACH ||
3652		    icmptype == ICMP_SOURCEQUENCH ||
3653		    icmptype == ICMP_REDIRECT ||
3654		    icmptype == ICMP_TIMXCEED ||
3655		    icmptype == ICMP_PARAMPROB)
3656			state_icmp++;
3657		break;
3658#endif /* INET */
3659#ifdef INET6
3660	case IPPROTO_ICMPV6:
3661		icmptype = pd->hdr.icmp6->icmp6_type;
3662		icmpcode = pd->hdr.icmp6->icmp6_code;
3663		icmpid = pd->hdr.icmp6->icmp6_id;
3664
3665		if (icmptype == ICMP6_DST_UNREACH ||
3666		    icmptype == ICMP6_PACKET_TOO_BIG ||
3667		    icmptype == ICMP6_TIME_EXCEEDED ||
3668		    icmptype == ICMP6_PARAM_PROB)
3669			state_icmp++;
3670		break;
3671#endif /* INET6 */
3672	}
3673
3674	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3675
3676	if (direction == PF_OUT) {
3677		/* check outgoing packet for BINAT/NAT */
3678		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3679		    saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) {
3680			PF_ACPY(&pd->baddr, saddr, af);
3681			switch (af) {
3682#ifdef INET
3683			case AF_INET:
3684				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3685				    pd->naddr.v4.s_addr, 0);
3686				break;
3687#endif /* INET */
3688#ifdef INET6
3689			case AF_INET6:
3690				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3691				    &pd->naddr, 0);
3692				rewrite++;
3693				break;
3694#endif /* INET6 */
3695			}
3696			if (nr->natpass)
3697				r = NULL;
3698			pd->nat_rule = nr;
3699		}
3700	} else {
3701		/* check incoming packet for BINAT/RDR */
3702		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3703		    saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) {
3704			PF_ACPY(&pd->baddr, daddr, af);
3705			switch (af) {
3706#ifdef INET
3707			case AF_INET:
3708				pf_change_a(&daddr->v4.s_addr,
3709				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3710				break;
3711#endif /* INET */
3712#ifdef INET6
3713			case AF_INET6:
3714				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3715				    &pd->naddr, 0);
3716				rewrite++;
3717				break;
3718#endif /* INET6 */
3719			}
3720			if (nr->natpass)
3721				r = NULL;
3722			pd->nat_rule = nr;
3723		}
3724	}
3725
3726	while (r != NULL) {
3727		r->evaluations++;
3728		if (r->kif != NULL &&
3729		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3730			r = r->skip[PF_SKIP_IFP].ptr;
3731		else if (r->direction && r->direction != direction)
3732			r = r->skip[PF_SKIP_DIR].ptr;
3733		else if (r->af && r->af != af)
3734			r = r->skip[PF_SKIP_AF].ptr;
3735		else if (r->proto && r->proto != pd->proto)
3736			r = r->skip[PF_SKIP_PROTO].ptr;
3737		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg))
3738			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3739		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg))
3740			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3741		else if (r->type && r->type != icmptype + 1)
3742			r = TAILQ_NEXT(r, entries);
3743		else if (r->code && r->code != icmpcode + 1)
3744			r = TAILQ_NEXT(r, entries);
3745		else if (r->tos && !(r->tos & pd->tos))
3746			r = TAILQ_NEXT(r, entries);
3747		else if (r->rule_flag & PFRULE_FRAGMENT)
3748			r = TAILQ_NEXT(r, entries);
3749		else if (r->prob && r->prob <= arc4random())
3750			r = TAILQ_NEXT(r, entries);
3751		else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag))
3752			r = TAILQ_NEXT(r, entries);
3753		else if (r->os_fingerprint != PF_OSFP_ANY)
3754			r = TAILQ_NEXT(r, entries);
3755		else {
3756			if (r->tag)
3757				tag = r->tag;
3758			if (r->anchor == NULL) {
3759				*rm = r;
3760				*am = a;
3761				*rsm = ruleset;
3762				if ((*rm)->quick)
3763					break;
3764				r = TAILQ_NEXT(r, entries);
3765			} else
3766				pf_step_into_anchor(&asd, &ruleset,
3767				    PF_RULESET_FILTER, &r, &a);
3768		}
3769		if (r == NULL)
3770			pf_step_out_of_anchor(&asd, &ruleset,
3771			    PF_RULESET_FILTER, &r, &a);
3772	}
3773	r = *rm;
3774	a = *am;
3775	ruleset = *rsm;
3776
3777	REASON_SET(&reason, PFRES_MATCH);
3778
3779	if (r->log) {
3780#ifdef INET6
3781		if (rewrite)
3782			m_copyback(m, off, sizeof(struct icmp6_hdr),
3783			    (caddr_t)pd->hdr.icmp6);
3784#endif /* INET6 */
3785		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3786	}
3787
3788	if (r->action != PF_PASS)
3789		return (PF_DROP);
3790
3791	if (pf_tag_packet(m, pftag, tag)) {
3792		REASON_SET(&reason, PFRES_MEMORY);
3793		return (PF_DROP);
3794	}
3795
3796	if (!state_icmp && (r->keep_state || nr != NULL)) {
3797		/* create new state */
3798		struct pf_state	*s = NULL;
3799		struct pf_src_node *sn = NULL;
3800
3801		/* check maximums */
3802		if (r->max_states && (r->states >= r->max_states)) {
3803			pf_status.lcounters[LCNT_STATES]++;
3804			REASON_SET(&reason, PFRES_MAXSTATES);
3805			goto cleanup;
3806		}
3807		/* src node for flter rule */
3808		if ((r->rule_flag & PFRULE_SRCTRACK ||
3809		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3810		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3811			REASON_SET(&reason, PFRES_SRCLIMIT);
3812			goto cleanup;
3813		}
3814		/* src node for translation rule */
3815		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3816		    ((direction == PF_OUT &&
3817		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3818		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3819			REASON_SET(&reason, PFRES_SRCLIMIT);
3820			goto cleanup;
3821		}
3822		s = pool_get(&pf_state_pl, PR_NOWAIT);
3823		if (s == NULL) {
3824			REASON_SET(&reason, PFRES_MEMORY);
3825cleanup:
3826			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3827				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3828				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3829				pf_status.src_nodes--;
3830				pool_put(&pf_src_tree_pl, sn);
3831			}
3832			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3833			    nsn->expire == 0) {
3834				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3835				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3836				pf_status.src_nodes--;
3837				pool_put(&pf_src_tree_pl, nsn);
3838			}
3839			return (PF_DROP);
3840		}
3841		bzero(s, sizeof(*s));
3842		s->rule.ptr = r;
3843		s->nat_rule.ptr = nr;
3844		s->anchor.ptr = a;
3845		STATE_INC_COUNTERS(s);
3846		s->allow_opts = r->allow_opts;
3847		s->log = r->log & 2;
3848		s->proto = pd->proto;
3849		s->direction = direction;
3850		s->af = af;
3851		if (direction == PF_OUT) {
3852			PF_ACPY(&s->gwy.addr, saddr, af);
3853			s->gwy.port = icmpid;
3854			PF_ACPY(&s->ext.addr, daddr, af);
3855			s->ext.port = icmpid;
3856			if (nr != NULL)
3857				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3858			else
3859				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3860			s->lan.port = icmpid;
3861		} else {
3862			PF_ACPY(&s->lan.addr, daddr, af);
3863			s->lan.port = icmpid;
3864			PF_ACPY(&s->ext.addr, saddr, af);
3865			s->ext.port = icmpid;
3866			if (nr != NULL)
3867				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3868			else
3869				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3870			s->gwy.port = icmpid;
3871		}
3872		s->creation = time_second;
3873		s->expire = time_second;
3874		s->timeout = PFTM_ICMP_FIRST_PACKET;
3875		pf_set_rt_ifp(s, saddr);
3876		if (sn != NULL) {
3877			s->src_node = sn;
3878			s->src_node->states++;
3879		}
3880		if (nsn != NULL) {
3881			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3882			s->nat_src_node = nsn;
3883			s->nat_src_node->states++;
3884		}
3885		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3886			REASON_SET(&reason, PFRES_STATEINS);
3887			pf_src_tree_remove_state(s);
3888			STATE_DEC_COUNTERS(s);
3889			pool_put(&pf_state_pl, s);
3890			return (PF_DROP);
3891		} else
3892			*sm = s;
3893		if (tag > 0) {
3894			pf_tag_ref(tag);
3895			s->tag = tag;
3896		}
3897	}
3898
3899#ifdef INET6
3900	/* copy back packet headers if we performed IPv6 NAT operations */
3901	if (rewrite)
3902		m_copyback(m, off, sizeof(struct icmp6_hdr),
3903		    (caddr_t)pd->hdr.icmp6);
3904#endif /* INET6 */
3905
3906	return (PF_PASS);
3907}
3908
3909int
3910pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3911    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3912    struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
3913{
3914	struct pf_rule		*nr = NULL;
3915	struct pf_rule		*r, *a = NULL;
3916	struct pf_ruleset	*ruleset = NULL;
3917	struct pf_src_node	*nsn = NULL;
3918	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3919	sa_family_t		 af = pd->af;
3920	u_short			 reason;
3921	struct pf_tag		*pftag = NULL;
3922	int			 tag = -1;
3923	int			 asd = 0;
3924
3925	if (pf_check_congestion(ifq)) {
3926		REASON_SET(&reason, PFRES_CONGEST);
3927		return (PF_DROP);
3928	}
3929
3930	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3931
3932	if (direction == PF_OUT) {
3933		/* check outgoing packet for BINAT/NAT */
3934		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3935		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3936			PF_ACPY(&pd->baddr, saddr, af);
3937			switch (af) {
3938#ifdef INET
3939			case AF_INET:
3940				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3941				    pd->naddr.v4.s_addr, 0);
3942				break;
3943#endif /* INET */
3944#ifdef INET6
3945			case AF_INET6:
3946				PF_ACPY(saddr, &pd->naddr, af);
3947				break;
3948#endif /* INET6 */
3949			}
3950			if (nr->natpass)
3951				r = NULL;
3952			pd->nat_rule = nr;
3953		}
3954	} else {
3955		/* check incoming packet for BINAT/RDR */
3956		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3957		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3958			PF_ACPY(&pd->baddr, daddr, af);
3959			switch (af) {
3960#ifdef INET
3961			case AF_INET:
3962				pf_change_a(&daddr->v4.s_addr,
3963				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3964				break;
3965#endif /* INET */
3966#ifdef INET6
3967			case AF_INET6:
3968				PF_ACPY(daddr, &pd->naddr, af);
3969				break;
3970#endif /* INET6 */
3971			}
3972			if (nr->natpass)
3973				r = NULL;
3974			pd->nat_rule = nr;
3975		}
3976	}
3977
3978	while (r != NULL) {
3979		r->evaluations++;
3980		if (r->kif != NULL &&
3981		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3982			r = r->skip[PF_SKIP_IFP].ptr;
3983		else if (r->direction && r->direction != direction)
3984			r = r->skip[PF_SKIP_DIR].ptr;
3985		else if (r->af && r->af != af)
3986			r = r->skip[PF_SKIP_AF].ptr;
3987		else if (r->proto && r->proto != pd->proto)
3988			r = r->skip[PF_SKIP_PROTO].ptr;
3989		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg))
3990			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3991		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg))
3992			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3993		else if (r->tos && !(r->tos & pd->tos))
3994			r = TAILQ_NEXT(r, entries);
3995		else if (r->rule_flag & PFRULE_FRAGMENT)
3996			r = TAILQ_NEXT(r, entries);
3997		else if (r->prob && r->prob <= arc4random())
3998			r = TAILQ_NEXT(r, entries);
3999		else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag))
4000			r = TAILQ_NEXT(r, entries);
4001		else if (r->os_fingerprint != PF_OSFP_ANY)
4002			r = TAILQ_NEXT(r, entries);
4003		else {
4004			if (r->tag)
4005				tag = r->tag;
4006			if (r->anchor == NULL) {
4007				*rm = r;
4008				*am = a;
4009				*rsm = ruleset;
4010				if ((*rm)->quick)
4011					break;
4012				r = TAILQ_NEXT(r, entries);
4013			} else
4014				pf_step_into_anchor(&asd, &ruleset,
4015				    PF_RULESET_FILTER, &r, &a);
4016		}
4017		if (r == NULL)
4018			pf_step_out_of_anchor(&asd, &ruleset,
4019			    PF_RULESET_FILTER, &r, &a);
4020	}
4021	r = *rm;
4022	a = *am;
4023	ruleset = *rsm;
4024
4025	REASON_SET(&reason, PFRES_MATCH);
4026
4027	if (r->log)
4028		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
4029
4030	if ((r->action == PF_DROP) &&
4031	    ((r->rule_flag & PFRULE_RETURNICMP) ||
4032	    (r->rule_flag & PFRULE_RETURN))) {
4033		struct pf_addr *a = NULL;
4034
4035		if (nr != NULL) {
4036			if (direction == PF_OUT)
4037				a = saddr;
4038			else
4039				a = daddr;
4040		}
4041		if (a != NULL) {
4042			switch (af) {
4043#ifdef INET
4044			case AF_INET:
4045				pf_change_a(&a->v4.s_addr, pd->ip_sum,
4046				    pd->baddr.v4.s_addr, 0);
4047				break;
4048#endif /* INET */
4049#ifdef INET6
4050			case AF_INET6:
4051				PF_ACPY(a, &pd->baddr, af);
4052				break;
4053#endif /* INET6 */
4054			}
4055		}
4056		if ((af == AF_INET) && r->return_icmp)
4057			pf_send_icmp(m, r->return_icmp >> 8,
4058			    r->return_icmp & 255, af, r);
4059		else if ((af == AF_INET6) && r->return_icmp6)
4060			pf_send_icmp(m, r->return_icmp6 >> 8,
4061			    r->return_icmp6 & 255, af, r);
4062	}
4063
4064	if (r->action != PF_PASS)
4065		return (PF_DROP);
4066
4067	if (pf_tag_packet(m, pftag, tag)) {
4068		REASON_SET(&reason, PFRES_MEMORY);
4069		return (PF_DROP);
4070	}
4071
4072	if (r->keep_state || nr != NULL) {
4073		/* create new state */
4074		struct pf_state	*s = NULL;
4075		struct pf_src_node *sn = NULL;
4076
4077		/* check maximums */
4078		if (r->max_states && (r->states >= r->max_states)) {
4079			pf_status.lcounters[LCNT_STATES]++;
4080			REASON_SET(&reason, PFRES_MAXSTATES);
4081			goto cleanup;
4082		}
4083		/* src node for flter rule */
4084		if ((r->rule_flag & PFRULE_SRCTRACK ||
4085		    r->rpool.opts & PF_POOL_STICKYADDR) &&
4086		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
4087			REASON_SET(&reason, PFRES_SRCLIMIT);
4088			goto cleanup;
4089		}
4090		/* src node for translation rule */
4091		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4092		    ((direction == PF_OUT &&
4093		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
4094		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
4095			REASON_SET(&reason, PFRES_SRCLIMIT);
4096			goto cleanup;
4097		}
4098		s = pool_get(&pf_state_pl, PR_NOWAIT);
4099		if (s == NULL) {
4100			REASON_SET(&reason, PFRES_MEMORY);
4101cleanup:
4102			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4103				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4104				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4105				pf_status.src_nodes--;
4106				pool_put(&pf_src_tree_pl, sn);
4107			}
4108			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
4109			    nsn->expire == 0) {
4110				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4111				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4112				pf_status.src_nodes--;
4113				pool_put(&pf_src_tree_pl, nsn);
4114			}
4115			return (PF_DROP);
4116		}
4117		bzero(s, sizeof(*s));
4118		s->rule.ptr = r;
4119		s->nat_rule.ptr = nr;
4120		s->anchor.ptr = a;
4121		STATE_INC_COUNTERS(s);
4122		s->allow_opts = r->allow_opts;
4123		s->log = r->log & 2;
4124		s->proto = pd->proto;
4125		s->direction = direction;
4126		s->af = af;
4127		if (direction == PF_OUT) {
4128			PF_ACPY(&s->gwy.addr, saddr, af);
4129			PF_ACPY(&s->ext.addr, daddr, af);
4130			if (nr != NULL)
4131				PF_ACPY(&s->lan.addr, &pd->baddr, af);
4132			else
4133				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
4134		} else {
4135			PF_ACPY(&s->lan.addr, daddr, af);
4136			PF_ACPY(&s->ext.addr, saddr, af);
4137			if (nr != NULL)
4138				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
4139			else
4140				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
4141		}
4142		s->src.state = PFOTHERS_SINGLE;
4143		s->dst.state = PFOTHERS_NO_TRAFFIC;
4144		s->creation = time_second;
4145		s->expire = time_second;
4146		s->timeout = PFTM_OTHER_FIRST_PACKET;
4147		pf_set_rt_ifp(s, saddr);
4148		if (sn != NULL) {
4149			s->src_node = sn;
4150			s->src_node->states++;
4151		}
4152		if (nsn != NULL) {
4153			PF_ACPY(&nsn->raddr, &pd->naddr, af);
4154			s->nat_src_node = nsn;
4155			s->nat_src_node->states++;
4156		}
4157		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
4158			REASON_SET(&reason, PFRES_STATEINS);
4159			pf_src_tree_remove_state(s);
4160			STATE_DEC_COUNTERS(s);
4161			pool_put(&pf_state_pl, s);
4162			return (PF_DROP);
4163		} else
4164			*sm = s;
4165		if (tag > 0) {
4166			pf_tag_ref(tag);
4167			s->tag = tag;
4168		}
4169	}
4170
4171	return (PF_PASS);
4172}
4173
4174int
4175pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
4176    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
4177    struct pf_ruleset **rsm)
4178{
4179	struct pf_rule		*r, *a = NULL;
4180	struct pf_ruleset	*ruleset = NULL;
4181	sa_family_t		 af = pd->af;
4182	u_short			 reason;
4183	struct pf_tag		*pftag = NULL;
4184	int			 tag = -1;
4185	int			 asd = 0;
4186
4187	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4188	while (r != NULL) {
4189		r->evaluations++;
4190		if (r->kif != NULL &&
4191		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
4192			r = r->skip[PF_SKIP_IFP].ptr;
4193		else if (r->direction && r->direction != direction)
4194			r = r->skip[PF_SKIP_DIR].ptr;
4195		else if (r->af && r->af != af)
4196			r = r->skip[PF_SKIP_AF].ptr;
4197		else if (r->proto && r->proto != pd->proto)
4198			r = r->skip[PF_SKIP_PROTO].ptr;
4199		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg))
4200			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4201		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg))
4202			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4203		else if (r->tos && !(r->tos & pd->tos))
4204			r = TAILQ_NEXT(r, entries);
4205		else if (r->src.port_op || r->dst.port_op ||
4206		    r->flagset || r->type || r->code ||
4207		    r->os_fingerprint != PF_OSFP_ANY)
4208			r = TAILQ_NEXT(r, entries);
4209		else if (r->prob && r->prob <= arc4random())
4210			r = TAILQ_NEXT(r, entries);
4211		else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag))
4212			r = TAILQ_NEXT(r, entries);
4213		else {
4214			if (r->anchor == NULL) {
4215				*rm = r;
4216				*am = a;
4217				*rsm = ruleset;
4218				if ((*rm)->quick)
4219					break;
4220				r = TAILQ_NEXT(r, entries);
4221			} else
4222				pf_step_into_anchor(&asd, &ruleset,
4223				    PF_RULESET_FILTER, &r, &a);
4224		}
4225		if (r == NULL)
4226			pf_step_out_of_anchor(&asd, &ruleset,
4227			    PF_RULESET_FILTER, &r, &a);
4228	}
4229	r = *rm;
4230	a = *am;
4231	ruleset = *rsm;
4232
4233	REASON_SET(&reason, PFRES_MATCH);
4234
4235	if (r->log)
4236		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
4237
4238	if (r->action != PF_PASS)
4239		return (PF_DROP);
4240
4241	if (pf_tag_packet(m, pftag, tag)) {
4242		REASON_SET(&reason, PFRES_MEMORY);
4243		return (PF_DROP);
4244	}
4245
4246	return (PF_PASS);
4247}
4248
4249int
4250pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4251    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4252    u_short *reason)
4253{
4254	struct pf_state		 key;
4255	struct tcphdr		*th = pd->hdr.tcp;
4256	u_int16_t		 win = ntohs(th->th_win);
4257	u_int32_t		 ack, end, seq, orig_seq;
4258	u_int8_t		 sws, dws;
4259	int			 ackskew;
4260	int			 copyback = 0;
4261	struct pf_state_peer	*src, *dst;
4262
4263	key.af = pd->af;
4264	key.proto = IPPROTO_TCP;
4265	if (direction == PF_IN)	{
4266		PF_ACPY(&key.ext.addr, pd->src, key.af);
4267		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4268		key.ext.port = th->th_sport;
4269		key.gwy.port = th->th_dport;
4270	} else {
4271		PF_ACPY(&key.lan.addr, pd->src, key.af);
4272		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4273		key.lan.port = th->th_sport;
4274		key.ext.port = th->th_dport;
4275	}
4276
4277	STATE_LOOKUP();
4278
4279	if (direction == (*state)->direction) {
4280		src = &(*state)->src;
4281		dst = &(*state)->dst;
4282	} else {
4283		src = &(*state)->dst;
4284		dst = &(*state)->src;
4285	}
4286
4287	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4288		if (direction != (*state)->direction) {
4289			REASON_SET(reason, PFRES_SYNPROXY);
4290			return (PF_SYNPROXY_DROP);
4291		}
4292		if (th->th_flags & TH_SYN) {
4293			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4294				REASON_SET(reason, PFRES_SYNPROXY);
4295				return (PF_DROP);
4296			}
4297			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4298			    pd->src, th->th_dport, th->th_sport,
4299			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4300			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
4301			    NULL, NULL);
4302			REASON_SET(reason, PFRES_SYNPROXY);
4303			return (PF_SYNPROXY_DROP);
4304		} else if (!(th->th_flags & TH_ACK) ||
4305		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4306		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4307			REASON_SET(reason, PFRES_SYNPROXY);
4308			return (PF_DROP);
4309		} else if ((*state)->src_node != NULL &&
4310		    pf_src_connlimit(state)) {
4311			REASON_SET(reason, PFRES_SRCLIMIT);
4312			return (PF_DROP);
4313		} else
4314			(*state)->src.state = PF_TCPS_PROXY_DST;
4315	}
4316	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4317		struct pf_state_host *src, *dst;
4318
4319		if (direction == PF_OUT) {
4320			src = &(*state)->gwy;
4321			dst = &(*state)->ext;
4322		} else {
4323			src = &(*state)->ext;
4324			dst = &(*state)->lan;
4325		}
4326		if (direction == (*state)->direction) {
4327			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4328			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4329			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4330				REASON_SET(reason, PFRES_SYNPROXY);
4331				return (PF_DROP);
4332			}
4333			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4334			if ((*state)->dst.seqhi == 1)
4335				(*state)->dst.seqhi = htonl(arc4random());
4336			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4337			    &dst->addr, src->port, dst->port,
4338			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4339			    (*state)->src.mss, 0, 0, NULL, NULL);
4340			REASON_SET(reason, PFRES_SYNPROXY);
4341			return (PF_SYNPROXY_DROP);
4342		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4343		    (TH_SYN|TH_ACK)) ||
4344		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4345			REASON_SET(reason, PFRES_SYNPROXY);
4346			return (PF_DROP);
4347		} else {
4348			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4349			(*state)->dst.seqlo = ntohl(th->th_seq);
4350			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4351			    pd->src, th->th_dport, th->th_sport,
4352			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4353			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
4354			    NULL, NULL);
4355			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4356			    &dst->addr, src->port, dst->port,
4357			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4358			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
4359			    NULL, NULL);
4360			(*state)->src.seqdiff = (*state)->dst.seqhi -
4361			    (*state)->src.seqlo;
4362			(*state)->dst.seqdiff = (*state)->src.seqhi -
4363			    (*state)->dst.seqlo;
4364			(*state)->src.seqhi = (*state)->src.seqlo +
4365			    (*state)->src.max_win;
4366			(*state)->dst.seqhi = (*state)->dst.seqlo +
4367			    (*state)->dst.max_win;
4368			(*state)->src.wscale = (*state)->dst.wscale = 0;
4369			(*state)->src.state = (*state)->dst.state =
4370			    TCPS_ESTABLISHED;
4371			REASON_SET(reason, PFRES_SYNPROXY);
4372			return (PF_SYNPROXY_DROP);
4373		}
4374	}
4375
4376	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4377		sws = src->wscale & PF_WSCALE_MASK;
4378		dws = dst->wscale & PF_WSCALE_MASK;
4379	} else
4380		sws = dws = 0;
4381
4382	/*
4383	 * Sequence tracking algorithm from Guido van Rooij's paper:
4384	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4385	 *	tcp_filtering.ps
4386	 */
4387
4388	orig_seq = seq = ntohl(th->th_seq);
4389	if (src->seqlo == 0) {
4390		/* First packet from this end. Set its state */
4391
4392		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4393		    src->scrub == NULL) {
4394			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4395				REASON_SET(reason, PFRES_MEMORY);
4396				return (PF_DROP);
4397			}
4398		}
4399
4400		/* Deferred generation of sequence number modulator */
4401		if (dst->seqdiff && !src->seqdiff) {
4402			while ((src->seqdiff = htonl(arc4random())) == 0)
4403				;
4404			ack = ntohl(th->th_ack) - dst->seqdiff;
4405			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4406			    src->seqdiff), 0);
4407			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4408			copyback = 1;
4409		} else {
4410			ack = ntohl(th->th_ack);
4411		}
4412
4413		end = seq + pd->p_len;
4414		if (th->th_flags & TH_SYN) {
4415			end++;
4416			if (dst->wscale & PF_WSCALE_FLAG) {
4417				src->wscale = pf_get_wscale(m, off, th->th_off,
4418				    pd->af);
4419				if (src->wscale & PF_WSCALE_FLAG) {
4420					/* Remove scale factor from initial
4421					 * window */
4422					sws = src->wscale & PF_WSCALE_MASK;
4423					win = ((u_int32_t)win + (1 << sws) - 1)
4424					    >> sws;
4425					dws = dst->wscale & PF_WSCALE_MASK;
4426				} else {
4427					/* fixup other window */
4428					dst->max_win <<= dst->wscale &
4429					    PF_WSCALE_MASK;
4430					/* in case of a retrans SYN|ACK */
4431					dst->wscale = 0;
4432				}
4433			}
4434		}
4435		if (th->th_flags & TH_FIN)
4436			end++;
4437
4438		src->seqlo = seq;
4439		if (src->state < TCPS_SYN_SENT)
4440			src->state = TCPS_SYN_SENT;
4441
4442		/*
4443		 * May need to slide the window (seqhi may have been set by
4444		 * the crappy stack check or if we picked up the connection
4445		 * after establishment)
4446		 */
4447		if (src->seqhi == 1 ||
4448		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4449			src->seqhi = end + MAX(1, dst->max_win << dws);
4450		if (win > src->max_win)
4451			src->max_win = win;
4452
4453	} else {
4454		ack = ntohl(th->th_ack) - dst->seqdiff;
4455		if (src->seqdiff) {
4456			/* Modulate sequence numbers */
4457			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4458			    src->seqdiff), 0);
4459			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4460			copyback = 1;
4461		}
4462		end = seq + pd->p_len;
4463		if (th->th_flags & TH_SYN)
4464			end++;
4465		if (th->th_flags & TH_FIN)
4466			end++;
4467	}
4468
4469	if ((th->th_flags & TH_ACK) == 0) {
4470		/* Let it pass through the ack skew check */
4471		ack = dst->seqlo;
4472	} else if ((ack == 0 &&
4473	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4474	    /* broken tcp stacks do not set ack */
4475	    (dst->state < TCPS_SYN_SENT)) {
4476		/*
4477		 * Many stacks (ours included) will set the ACK number in an
4478		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4479		 */
4480		ack = dst->seqlo;
4481	}
4482
4483	if (seq == end) {
4484		/* Ease sequencing restrictions on no data packets */
4485		seq = src->seqlo;
4486		end = seq;
4487	}
4488
4489	ackskew = dst->seqlo - ack;
4490
4491#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4492	if (SEQ_GEQ(src->seqhi, end) &&
4493	    /* Last octet inside other's window space */
4494	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4495	    /* Retrans: not more than one window back */
4496	    (ackskew >= -MAXACKWINDOW) &&
4497	    /* Acking not more than one reassembled fragment backwards */
4498	    (ackskew <= (MAXACKWINDOW << sws)) &&
4499	    /* Acking not more than one window forward */
4500	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4501	    (pd->flags & PFDESC_IP_REAS) == 0)) {
4502	    /* Require an exact sequence match on resets when possible */
4503
4504		if (dst->scrub || src->scrub) {
4505			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4506			    *state, src, dst, &copyback))
4507				return (PF_DROP);
4508		}
4509
4510		/* update max window */
4511		if (src->max_win < win)
4512			src->max_win = win;
4513		/* synchronize sequencing */
4514		if (SEQ_GT(end, src->seqlo))
4515			src->seqlo = end;
4516		/* slide the window of what the other end can send */
4517		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4518			dst->seqhi = ack + MAX((win << sws), 1);
4519
4520
4521		/* update states */
4522		if (th->th_flags & TH_SYN)
4523			if (src->state < TCPS_SYN_SENT)
4524				src->state = TCPS_SYN_SENT;
4525		if (th->th_flags & TH_FIN)
4526			if (src->state < TCPS_CLOSING)
4527				src->state = TCPS_CLOSING;
4528		if (th->th_flags & TH_ACK) {
4529			if (dst->state == TCPS_SYN_SENT) {
4530				dst->state = TCPS_ESTABLISHED;
4531				if (src->state == TCPS_ESTABLISHED &&
4532				    (*state)->src_node != NULL &&
4533				    pf_src_connlimit(state)) {
4534					REASON_SET(reason, PFRES_SRCLIMIT);
4535					return (PF_DROP);
4536				}
4537			} else if (dst->state == TCPS_CLOSING)
4538				dst->state = TCPS_FIN_WAIT_2;
4539		}
4540		if (th->th_flags & TH_RST)
4541			src->state = dst->state = TCPS_TIME_WAIT;
4542
4543		/* update expire time */
4544		(*state)->expire = time_second;
4545		if (src->state >= TCPS_FIN_WAIT_2 &&
4546		    dst->state >= TCPS_FIN_WAIT_2)
4547			(*state)->timeout = PFTM_TCP_CLOSED;
4548		else if (src->state >= TCPS_FIN_WAIT_2 ||
4549		    dst->state >= TCPS_FIN_WAIT_2)
4550			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4551		else if (src->state < TCPS_ESTABLISHED ||
4552		    dst->state < TCPS_ESTABLISHED)
4553			(*state)->timeout = PFTM_TCP_OPENING;
4554		else if (src->state >= TCPS_CLOSING ||
4555		    dst->state >= TCPS_CLOSING)
4556			(*state)->timeout = PFTM_TCP_CLOSING;
4557		else
4558			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4559
4560		/* Fall through to PASS packet */
4561
4562	} else if ((dst->state < TCPS_SYN_SENT ||
4563		dst->state >= TCPS_FIN_WAIT_2 ||
4564		src->state >= TCPS_FIN_WAIT_2) &&
4565	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4566	    /* Within a window forward of the originating packet */
4567	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4568	    /* Within a window backward of the originating packet */
4569
4570		/*
4571		 * This currently handles three situations:
4572		 *  1) Stupid stacks will shotgun SYNs before their peer
4573		 *     replies.
4574		 *  2) When PF catches an already established stream (the
4575		 *     firewall rebooted, the state table was flushed, routes
4576		 *     changed...)
4577		 *  3) Packets get funky immediately after the connection
4578		 *     closes (this should catch Solaris spurious ACK|FINs
4579		 *     that web servers like to spew after a close)
4580		 *
4581		 * This must be a little more careful than the above code
4582		 * since packet floods will also be caught here. We don't
4583		 * update the TTL here to mitigate the damage of a packet
4584		 * flood and so the same code can handle awkward establishment
4585		 * and a loosened connection close.
4586		 * In the establishment case, a correct peer response will
4587		 * validate the connection, go through the normal state code
4588		 * and keep updating the state TTL.
4589		 */
4590
4591		if (pf_status.debug >= PF_DEBUG_MISC) {
4592			printf("pf: loose state match: ");
4593			pf_print_state(*state);
4594			pf_print_flags(th->th_flags);
4595			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
4596			    seq, ack, pd->p_len, ackskew,
4597			    (*state)->packets[0], (*state)->packets[1]);
4598		}
4599
4600		if (dst->scrub || src->scrub) {
4601			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4602			    *state, src, dst, &copyback))
4603				return (PF_DROP);
4604		}
4605
4606		/* update max window */
4607		if (src->max_win < win)
4608			src->max_win = win;
4609		/* synchronize sequencing */
4610		if (SEQ_GT(end, src->seqlo))
4611			src->seqlo = end;
4612		/* slide the window of what the other end can send */
4613		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4614			dst->seqhi = ack + MAX((win << sws), 1);
4615
4616		/*
4617		 * Cannot set dst->seqhi here since this could be a shotgunned
4618		 * SYN and not an already established connection.
4619		 */
4620
4621		if (th->th_flags & TH_FIN)
4622			if (src->state < TCPS_CLOSING)
4623				src->state = TCPS_CLOSING;
4624		if (th->th_flags & TH_RST)
4625			src->state = dst->state = TCPS_TIME_WAIT;
4626
4627		/* Fall through to PASS packet */
4628
4629	} else {
4630		if ((*state)->dst.state == TCPS_SYN_SENT &&
4631		    (*state)->src.state == TCPS_SYN_SENT) {
4632			/* Send RST for state mismatches during handshake */
4633			if (!(th->th_flags & TH_RST))
4634				pf_send_tcp((*state)->rule.ptr, pd->af,
4635				    pd->dst, pd->src, th->th_dport,
4636				    th->th_sport, ntohl(th->th_ack), 0,
4637				    TH_RST, 0, 0,
4638				    (*state)->rule.ptr->return_ttl, 1,
4639				    pd->eh, kif->pfik_ifp);
4640			src->seqlo = 0;
4641			src->seqhi = 1;
4642			src->max_win = 1;
4643		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4644			printf("pf: BAD state: ");
4645			pf_print_state(*state);
4646			pf_print_flags(th->th_flags);
4647			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
4648			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
4649			    (*state)->packets[0], (*state)->packets[1],
4650			    direction == PF_IN ? "in" : "out",
4651			    direction == (*state)->direction ? "fwd" : "rev");
4652			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4653			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4654			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4655			    ' ': '2',
4656			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4657			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4658			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4659			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4660		}
4661		REASON_SET(reason, PFRES_BADSTATE);
4662		return (PF_DROP);
4663	}
4664
4665	/* Any packets which have gotten here are to be passed */
4666
4667	/* translate source/destination address, if necessary */
4668	if (STATE_TRANSLATE(*state)) {
4669		if (direction == PF_OUT)
4670			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4671			    &th->th_sum, &(*state)->gwy.addr,
4672			    (*state)->gwy.port, 0, pd->af);
4673		else
4674			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4675			    &th->th_sum, &(*state)->lan.addr,
4676			    (*state)->lan.port, 0, pd->af);
4677		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4678	} else if (copyback) {
4679		/* Copyback sequence modulation or stateful scrub changes */
4680		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4681	}
4682
4683	return (PF_PASS);
4684}
4685
4686int
4687pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4688    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4689{
4690	struct pf_state_peer	*src, *dst;
4691	struct pf_state		 key;
4692	struct udphdr		*uh = pd->hdr.udp;
4693
4694	key.af = pd->af;
4695	key.proto = IPPROTO_UDP;
4696	if (direction == PF_IN)	{
4697		PF_ACPY(&key.ext.addr, pd->src, key.af);
4698		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4699		key.ext.port = uh->uh_sport;
4700		key.gwy.port = uh->uh_dport;
4701	} else {
4702		PF_ACPY(&key.lan.addr, pd->src, key.af);
4703		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4704		key.lan.port = uh->uh_sport;
4705		key.ext.port = uh->uh_dport;
4706	}
4707
4708	STATE_LOOKUP();
4709
4710	if (direction == (*state)->direction) {
4711		src = &(*state)->src;
4712		dst = &(*state)->dst;
4713	} else {
4714		src = &(*state)->dst;
4715		dst = &(*state)->src;
4716	}
4717
4718	/* update states */
4719	if (src->state < PFUDPS_SINGLE)
4720		src->state = PFUDPS_SINGLE;
4721	if (dst->state == PFUDPS_SINGLE)
4722		dst->state = PFUDPS_MULTIPLE;
4723
4724	/* update expire time */
4725	(*state)->expire = time_second;
4726	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4727		(*state)->timeout = PFTM_UDP_MULTIPLE;
4728	else
4729		(*state)->timeout = PFTM_UDP_SINGLE;
4730
4731	/* translate source/destination address, if necessary */
4732	if (STATE_TRANSLATE(*state)) {
4733		if (direction == PF_OUT)
4734			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4735			    &uh->uh_sum, &(*state)->gwy.addr,
4736			    (*state)->gwy.port, 1, pd->af);
4737		else
4738			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4739			    &uh->uh_sum, &(*state)->lan.addr,
4740			    (*state)->lan.port, 1, pd->af);
4741		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4742	}
4743
4744	return (PF_PASS);
4745}
4746
4747int
4748pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4749    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4750{
4751	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4752	u_int16_t	 icmpid = 0;		/* make the compiler happy */
4753	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
4754	u_int8_t	 icmptype = 0;		/* make the compiler happy */
4755	int		 state_icmp = 0;
4756
4757	switch (pd->proto) {
4758#ifdef INET
4759	case IPPROTO_ICMP:
4760		icmptype = pd->hdr.icmp->icmp_type;
4761		icmpid = pd->hdr.icmp->icmp_id;
4762		icmpsum = &pd->hdr.icmp->icmp_cksum;
4763
4764		if (icmptype == ICMP_UNREACH ||
4765		    icmptype == ICMP_SOURCEQUENCH ||
4766		    icmptype == ICMP_REDIRECT ||
4767		    icmptype == ICMP_TIMXCEED ||
4768		    icmptype == ICMP_PARAMPROB)
4769			state_icmp++;
4770		break;
4771#endif /* INET */
4772#ifdef INET6
4773	case IPPROTO_ICMPV6:
4774		icmptype = pd->hdr.icmp6->icmp6_type;
4775		icmpid = pd->hdr.icmp6->icmp6_id;
4776		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4777
4778		if (icmptype == ICMP6_DST_UNREACH ||
4779		    icmptype == ICMP6_PACKET_TOO_BIG ||
4780		    icmptype == ICMP6_TIME_EXCEEDED ||
4781		    icmptype == ICMP6_PARAM_PROB)
4782			state_icmp++;
4783		break;
4784#endif /* INET6 */
4785	}
4786
4787	if (!state_icmp) {
4788
4789		/*
4790		 * ICMP query/reply message not related to a TCP/UDP packet.
4791		 * Search for an ICMP state.
4792		 */
4793		struct pf_state		key;
4794
4795		key.af = pd->af;
4796		key.proto = pd->proto;
4797		if (direction == PF_IN)	{
4798			PF_ACPY(&key.ext.addr, pd->src, key.af);
4799			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4800			key.ext.port = icmpid;
4801			key.gwy.port = icmpid;
4802		} else {
4803			PF_ACPY(&key.lan.addr, pd->src, key.af);
4804			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4805			key.lan.port = icmpid;
4806			key.ext.port = icmpid;
4807		}
4808
4809		STATE_LOOKUP();
4810
4811		(*state)->expire = time_second;
4812		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4813
4814		/* translate source/destination address, if necessary */
4815		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4816			if (direction == PF_OUT) {
4817				switch (pd->af) {
4818#ifdef INET
4819				case AF_INET:
4820					pf_change_a(&saddr->v4.s_addr,
4821					    pd->ip_sum,
4822					    (*state)->gwy.addr.v4.s_addr, 0);
4823					break;
4824#endif /* INET */
4825#ifdef INET6
4826				case AF_INET6:
4827					pf_change_a6(saddr,
4828					    &pd->hdr.icmp6->icmp6_cksum,
4829					    &(*state)->gwy.addr, 0);
4830					m_copyback(m, off,
4831					    sizeof(struct icmp6_hdr),
4832					    (caddr_t)pd->hdr.icmp6);
4833					break;
4834#endif /* INET6 */
4835				}
4836			} else {
4837				switch (pd->af) {
4838#ifdef INET
4839				case AF_INET:
4840					pf_change_a(&daddr->v4.s_addr,
4841					    pd->ip_sum,
4842					    (*state)->lan.addr.v4.s_addr, 0);
4843					break;
4844#endif /* INET */
4845#ifdef INET6
4846				case AF_INET6:
4847					pf_change_a6(daddr,
4848					    &pd->hdr.icmp6->icmp6_cksum,
4849					    &(*state)->lan.addr, 0);
4850					m_copyback(m, off,
4851					    sizeof(struct icmp6_hdr),
4852					    (caddr_t)pd->hdr.icmp6);
4853					break;
4854#endif /* INET6 */
4855				}
4856			}
4857		}
4858
4859		return (PF_PASS);
4860
4861	} else {
4862		/*
4863		 * ICMP error message in response to a TCP/UDP packet.
4864		 * Extract the inner TCP/UDP header and search for that state.
4865		 */
4866
4867		struct pf_pdesc	pd2;
4868#ifdef INET
4869		struct ip	h2;
4870#endif /* INET */
4871#ifdef INET6
4872		struct ip6_hdr	h2_6;
4873		int		terminal = 0;
4874#endif /* INET6 */
4875		int		ipoff2 = 0;	/* make the compiler happy */
4876		int		off2 = 0;	/* make the compiler happy */
4877
4878		pd2.af = pd->af;
4879		switch (pd->af) {
4880#ifdef INET
4881		case AF_INET:
4882			/* offset of h2 in mbuf chain */
4883			ipoff2 = off + ICMP_MINLEN;
4884
4885			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4886			    NULL, reason, pd2.af)) {
4887				DPFPRINTF(PF_DEBUG_MISC,
4888				    ("pf: ICMP error message too short "
4889				    "(ip)\n"));
4890				return (PF_DROP);
4891			}
4892			/*
4893			 * ICMP error messages don't refer to non-first
4894			 * fragments
4895			 */
4896			if (h2.ip_off & htons(IP_OFFMASK)) {
4897				REASON_SET(reason, PFRES_FRAG);
4898				return (PF_DROP);
4899			}
4900
4901			/* offset of protocol header that follows h2 */
4902			off2 = ipoff2 + (h2.ip_hl << 2);
4903
4904			pd2.proto = h2.ip_p;
4905			pd2.src = (struct pf_addr *)&h2.ip_src;
4906			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4907			pd2.ip_sum = &h2.ip_sum;
4908			break;
4909#endif /* INET */
4910#ifdef INET6
4911		case AF_INET6:
4912			ipoff2 = off + sizeof(struct icmp6_hdr);
4913
4914			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4915			    NULL, reason, pd2.af)) {
4916				DPFPRINTF(PF_DEBUG_MISC,
4917				    ("pf: ICMP error message too short "
4918				    "(ip6)\n"));
4919				return (PF_DROP);
4920			}
4921			pd2.proto = h2_6.ip6_nxt;
4922			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4923			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4924			pd2.ip_sum = NULL;
4925			off2 = ipoff2 + sizeof(h2_6);
4926			do {
4927				switch (pd2.proto) {
4928				case IPPROTO_FRAGMENT:
4929					/*
4930					 * ICMPv6 error messages for
4931					 * non-first fragments
4932					 */
4933					REASON_SET(reason, PFRES_FRAG);
4934					return (PF_DROP);
4935				case IPPROTO_AH:
4936				case IPPROTO_HOPOPTS:
4937				case IPPROTO_ROUTING:
4938				case IPPROTO_DSTOPTS: {
4939					/* get next header and header length */
4940					struct ip6_ext opt6;
4941
4942					if (!pf_pull_hdr(m, off2, &opt6,
4943					    sizeof(opt6), NULL, reason,
4944					    pd2.af)) {
4945						DPFPRINTF(PF_DEBUG_MISC,
4946						    ("pf: ICMPv6 short opt\n"));
4947						return (PF_DROP);
4948					}
4949					if (pd2.proto == IPPROTO_AH)
4950						off2 += (opt6.ip6e_len + 2) * 4;
4951					else
4952						off2 += (opt6.ip6e_len + 1) * 8;
4953					pd2.proto = opt6.ip6e_nxt;
4954					/* goto the next header */
4955					break;
4956				}
4957				default:
4958					terminal++;
4959					break;
4960				}
4961			} while (!terminal);
4962			break;
4963#endif /* INET6 */
4964		}
4965
4966		switch (pd2.proto) {
4967		case IPPROTO_TCP: {
4968			struct tcphdr		 th;
4969			u_int32_t		 seq;
4970			struct pf_state		 key;
4971			struct pf_state_peer	*src, *dst;
4972			u_int8_t		 dws;
4973			int			 copyback = 0;
4974
4975			/*
4976			 * Only the first 8 bytes of the TCP header can be
4977			 * expected. Don't access any TCP header fields after
4978			 * th_seq, an ackskew test is not possible.
4979			 */
4980			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
4981			    pd2.af)) {
4982				DPFPRINTF(PF_DEBUG_MISC,
4983				    ("pf: ICMP error message too short "
4984				    "(tcp)\n"));
4985				return (PF_DROP);
4986			}
4987
4988			key.af = pd2.af;
4989			key.proto = IPPROTO_TCP;
4990			if (direction == PF_IN)	{
4991				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4992				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4993				key.ext.port = th.th_dport;
4994				key.gwy.port = th.th_sport;
4995			} else {
4996				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4997				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4998				key.lan.port = th.th_dport;
4999				key.ext.port = th.th_sport;
5000			}
5001
5002			STATE_LOOKUP();
5003
5004			if (direction == (*state)->direction) {
5005				src = &(*state)->dst;
5006				dst = &(*state)->src;
5007			} else {
5008				src = &(*state)->src;
5009				dst = &(*state)->dst;
5010			}
5011
5012			if (src->wscale && dst->wscale &&
5013			    !(th.th_flags & TH_SYN))
5014				dws = dst->wscale & PF_WSCALE_MASK;
5015			else
5016				dws = 0;
5017
5018			/* Demodulate sequence number */
5019			seq = ntohl(th.th_seq) - src->seqdiff;
5020			if (src->seqdiff) {
5021				pf_change_a(&th.th_seq, icmpsum,
5022				    htonl(seq), 0);
5023				copyback = 1;
5024			}
5025
5026			if (!SEQ_GEQ(src->seqhi, seq) ||
5027			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
5028				if (pf_status.debug >= PF_DEBUG_MISC) {
5029					printf("pf: BAD ICMP %d:%d ",
5030					    icmptype, pd->hdr.icmp->icmp_code);
5031					pf_print_host(pd->src, 0, pd->af);
5032					printf(" -> ");
5033					pf_print_host(pd->dst, 0, pd->af);
5034					printf(" state: ");
5035					pf_print_state(*state);
5036					printf(" seq=%u\n", seq);
5037				}
5038				REASON_SET(reason, PFRES_BADSTATE);
5039				return (PF_DROP);
5040			}
5041
5042			if (STATE_TRANSLATE(*state)) {
5043				if (direction == PF_IN) {
5044					pf_change_icmp(pd2.src, &th.th_sport,
5045					    daddr, &(*state)->lan.addr,
5046					    (*state)->lan.port, NULL,
5047					    pd2.ip_sum, icmpsum,
5048					    pd->ip_sum, 0, pd2.af);
5049				} else {
5050					pf_change_icmp(pd2.dst, &th.th_dport,
5051					    saddr, &(*state)->gwy.addr,
5052					    (*state)->gwy.port, NULL,
5053					    pd2.ip_sum, icmpsum,
5054					    pd->ip_sum, 0, pd2.af);
5055				}
5056				copyback = 1;
5057			}
5058
5059			if (copyback) {
5060				switch (pd2.af) {
5061#ifdef INET
5062				case AF_INET:
5063					m_copyback(m, off, ICMP_MINLEN,
5064					    (caddr_t)pd->hdr.icmp);
5065					m_copyback(m, ipoff2, sizeof(h2),
5066					    (caddr_t)&h2);
5067					break;
5068#endif /* INET */
5069#ifdef INET6
5070				case AF_INET6:
5071					m_copyback(m, off,
5072					    sizeof(struct icmp6_hdr),
5073					    (caddr_t)pd->hdr.icmp6);
5074					m_copyback(m, ipoff2, sizeof(h2_6),
5075					    (caddr_t)&h2_6);
5076					break;
5077#endif /* INET6 */
5078				}
5079				m_copyback(m, off2, 8, (caddr_t)&th);
5080			}
5081
5082			return (PF_PASS);
5083			break;
5084		}
5085		case IPPROTO_UDP: {
5086			struct udphdr		uh;
5087			struct pf_state		key;
5088
5089			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5090			    NULL, reason, pd2.af)) {
5091				DPFPRINTF(PF_DEBUG_MISC,
5092				    ("pf: ICMP error message too short "
5093				    "(udp)\n"));
5094				return (PF_DROP);
5095			}
5096
5097			key.af = pd2.af;
5098			key.proto = IPPROTO_UDP;
5099			if (direction == PF_IN)	{
5100				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5101				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5102				key.ext.port = uh.uh_dport;
5103				key.gwy.port = uh.uh_sport;
5104			} else {
5105				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5106				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5107				key.lan.port = uh.uh_dport;
5108				key.ext.port = uh.uh_sport;
5109			}
5110
5111			STATE_LOOKUP();
5112
5113			if (STATE_TRANSLATE(*state)) {
5114				if (direction == PF_IN) {
5115					pf_change_icmp(pd2.src, &uh.uh_sport,
5116					    daddr, &(*state)->lan.addr,
5117					    (*state)->lan.port, &uh.uh_sum,
5118					    pd2.ip_sum, icmpsum,
5119					    pd->ip_sum, 1, pd2.af);
5120				} else {
5121					pf_change_icmp(pd2.dst, &uh.uh_dport,
5122					    saddr, &(*state)->gwy.addr,
5123					    (*state)->gwy.port, &uh.uh_sum,
5124					    pd2.ip_sum, icmpsum,
5125					    pd->ip_sum, 1, pd2.af);
5126				}
5127				switch (pd2.af) {
5128#ifdef INET
5129				case AF_INET:
5130					m_copyback(m, off, ICMP_MINLEN,
5131					    (caddr_t)pd->hdr.icmp);
5132					m_copyback(m, ipoff2, sizeof(h2),
5133					    (caddr_t)&h2);
5134					break;
5135#endif /* INET */
5136#ifdef INET6
5137				case AF_INET6:
5138					m_copyback(m, off,
5139					    sizeof(struct icmp6_hdr),
5140					    (caddr_t)pd->hdr.icmp6);
5141					m_copyback(m, ipoff2, sizeof(h2_6),
5142					    (caddr_t)&h2_6);
5143					break;
5144#endif /* INET6 */
5145				}
5146				m_copyback(m, off2, sizeof(uh),
5147				    (caddr_t)&uh);
5148			}
5149
5150			return (PF_PASS);
5151			break;
5152		}
5153#ifdef INET
5154		case IPPROTO_ICMP: {
5155			struct icmp		iih;
5156			struct pf_state		key;
5157
5158			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5159			    NULL, reason, pd2.af)) {
5160				DPFPRINTF(PF_DEBUG_MISC,
5161				    ("pf: ICMP error message too short i"
5162				    "(icmp)\n"));
5163				return (PF_DROP);
5164			}
5165
5166			key.af = pd2.af;
5167			key.proto = IPPROTO_ICMP;
5168			if (direction == PF_IN)	{
5169				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5170				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5171				key.ext.port = iih.icmp_id;
5172				key.gwy.port = iih.icmp_id;
5173			} else {
5174				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5175				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5176				key.lan.port = iih.icmp_id;
5177				key.ext.port = iih.icmp_id;
5178			}
5179
5180			STATE_LOOKUP();
5181
5182			if (STATE_TRANSLATE(*state)) {
5183				if (direction == PF_IN) {
5184					pf_change_icmp(pd2.src, &iih.icmp_id,
5185					    daddr, &(*state)->lan.addr,
5186					    (*state)->lan.port, NULL,
5187					    pd2.ip_sum, icmpsum,
5188					    pd->ip_sum, 0, AF_INET);
5189				} else {
5190					pf_change_icmp(pd2.dst, &iih.icmp_id,
5191					    saddr, &(*state)->gwy.addr,
5192					    (*state)->gwy.port, NULL,
5193					    pd2.ip_sum, icmpsum,
5194					    pd->ip_sum, 0, AF_INET);
5195				}
5196				m_copyback(m, off, ICMP_MINLEN,
5197				    (caddr_t)pd->hdr.icmp);
5198				m_copyback(m, ipoff2, sizeof(h2),
5199				    (caddr_t)&h2);
5200				m_copyback(m, off2, ICMP_MINLEN,
5201				    (caddr_t)&iih);
5202			}
5203
5204			return (PF_PASS);
5205			break;
5206		}
5207#endif /* INET */
5208#ifdef INET6
5209		case IPPROTO_ICMPV6: {
5210			struct icmp6_hdr	iih;
5211			struct pf_state		key;
5212
5213			if (!pf_pull_hdr(m, off2, &iih,
5214			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5215				DPFPRINTF(PF_DEBUG_MISC,
5216				    ("pf: ICMP error message too short "
5217				    "(icmp6)\n"));
5218				return (PF_DROP);
5219			}
5220
5221			key.af = pd2.af;
5222			key.proto = IPPROTO_ICMPV6;
5223			if (direction == PF_IN)	{
5224				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5225				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5226				key.ext.port = iih.icmp6_id;
5227				key.gwy.port = iih.icmp6_id;
5228			} else {
5229				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5230				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5231				key.lan.port = iih.icmp6_id;
5232				key.ext.port = iih.icmp6_id;
5233			}
5234
5235			STATE_LOOKUP();
5236
5237			if (STATE_TRANSLATE(*state)) {
5238				if (direction == PF_IN) {
5239					pf_change_icmp(pd2.src, &iih.icmp6_id,
5240					    daddr, &(*state)->lan.addr,
5241					    (*state)->lan.port, NULL,
5242					    pd2.ip_sum, icmpsum,
5243					    pd->ip_sum, 0, AF_INET6);
5244				} else {
5245					pf_change_icmp(pd2.dst, &iih.icmp6_id,
5246					    saddr, &(*state)->gwy.addr,
5247					    (*state)->gwy.port, NULL,
5248					    pd2.ip_sum, icmpsum,
5249					    pd->ip_sum, 0, AF_INET6);
5250				}
5251				m_copyback(m, off, sizeof(struct icmp6_hdr),
5252				    (caddr_t)pd->hdr.icmp6);
5253				m_copyback(m, ipoff2, sizeof(h2_6),
5254				    (caddr_t)&h2_6);
5255				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5256				    (caddr_t)&iih);
5257			}
5258
5259			return (PF_PASS);
5260			break;
5261		}
5262#endif /* INET6 */
5263		default: {
5264			struct pf_state		key;
5265
5266			key.af = pd2.af;
5267			key.proto = pd2.proto;
5268			if (direction == PF_IN)	{
5269				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5270				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5271				key.ext.port = 0;
5272				key.gwy.port = 0;
5273			} else {
5274				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5275				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5276				key.lan.port = 0;
5277				key.ext.port = 0;
5278			}
5279
5280			STATE_LOOKUP();
5281
5282			if (STATE_TRANSLATE(*state)) {
5283				if (direction == PF_IN) {
5284					pf_change_icmp(pd2.src, NULL,
5285					    daddr, &(*state)->lan.addr,
5286					    0, NULL,
5287					    pd2.ip_sum, icmpsum,
5288					    pd->ip_sum, 0, pd2.af);
5289				} else {
5290					pf_change_icmp(pd2.dst, NULL,
5291					    saddr, &(*state)->gwy.addr,
5292					    0, NULL,
5293					    pd2.ip_sum, icmpsum,
5294					    pd->ip_sum, 0, pd2.af);
5295				}
5296				switch (pd2.af) {
5297#ifdef INET
5298				case AF_INET:
5299					m_copyback(m, off, ICMP_MINLEN,
5300					    (caddr_t)pd->hdr.icmp);
5301					m_copyback(m, ipoff2, sizeof(h2),
5302					    (caddr_t)&h2);
5303					break;
5304#endif /* INET */
5305#ifdef INET6
5306				case AF_INET6:
5307					m_copyback(m, off,
5308					    sizeof(struct icmp6_hdr),
5309					    (caddr_t)pd->hdr.icmp6);
5310					m_copyback(m, ipoff2, sizeof(h2_6),
5311					    (caddr_t)&h2_6);
5312					break;
5313#endif /* INET6 */
5314				}
5315			}
5316
5317			return (PF_PASS);
5318			break;
5319		}
5320		}
5321	}
5322}
5323
5324int
5325pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5326    struct pf_pdesc *pd)
5327{
5328	struct pf_state_peer	*src, *dst;
5329	struct pf_state		 key;
5330
5331	key.af = pd->af;
5332	key.proto = pd->proto;
5333	if (direction == PF_IN)	{
5334		PF_ACPY(&key.ext.addr, pd->src, key.af);
5335		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5336		key.ext.port = 0;
5337		key.gwy.port = 0;
5338	} else {
5339		PF_ACPY(&key.lan.addr, pd->src, key.af);
5340		PF_ACPY(&key.ext.addr, pd->dst, key.af);
5341		key.lan.port = 0;
5342		key.ext.port = 0;
5343	}
5344
5345	STATE_LOOKUP();
5346
5347	if (direction == (*state)->direction) {
5348		src = &(*state)->src;
5349		dst = &(*state)->dst;
5350	} else {
5351		src = &(*state)->dst;
5352		dst = &(*state)->src;
5353	}
5354
5355	/* update states */
5356	if (src->state < PFOTHERS_SINGLE)
5357		src->state = PFOTHERS_SINGLE;
5358	if (dst->state == PFOTHERS_SINGLE)
5359		dst->state = PFOTHERS_MULTIPLE;
5360
5361	/* update expire time */
5362	(*state)->expire = time_second;
5363	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5364		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5365	else
5366		(*state)->timeout = PFTM_OTHER_SINGLE;
5367
5368	/* translate source/destination address, if necessary */
5369	if (STATE_TRANSLATE(*state)) {
5370		if (direction == PF_OUT)
5371			switch (pd->af) {
5372#ifdef INET
5373			case AF_INET:
5374				pf_change_a(&pd->src->v4.s_addr,
5375				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5376				    0);
5377				break;
5378#endif /* INET */
5379#ifdef INET6
5380			case AF_INET6:
5381				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5382				break;
5383#endif /* INET6 */
5384			}
5385		else
5386			switch (pd->af) {
5387#ifdef INET
5388			case AF_INET:
5389				pf_change_a(&pd->dst->v4.s_addr,
5390				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5391				    0);
5392				break;
5393#endif /* INET */
5394#ifdef INET6
5395			case AF_INET6:
5396				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5397				break;
5398#endif /* INET6 */
5399			}
5400	}
5401
5402	return (PF_PASS);
5403}
5404
5405/*
5406 * ipoff and off are measured from the start of the mbuf chain.
5407 * h must be at "ipoff" on the mbuf chain.
5408 */
5409void *
5410pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5411    u_short *actionp, u_short *reasonp, sa_family_t af)
5412{
5413	switch (af) {
5414#ifdef INET
5415	case AF_INET: {
5416		struct ip	*h = mtod(m, struct ip *);
5417		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5418
5419		if (fragoff) {
5420			if (fragoff >= len)
5421				ACTION_SET(actionp, PF_PASS);
5422			else {
5423				ACTION_SET(actionp, PF_DROP);
5424				REASON_SET(reasonp, PFRES_FRAG);
5425			}
5426			return (NULL);
5427		}
5428		if (m->m_pkthdr.len < off + len ||
5429		    ntohs(h->ip_len) < off + len) {
5430			ACTION_SET(actionp, PF_DROP);
5431			REASON_SET(reasonp, PFRES_SHORT);
5432			return (NULL);
5433		}
5434		break;
5435	}
5436#endif /* INET */
5437#ifdef INET6
5438	case AF_INET6: {
5439		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5440
5441		if (m->m_pkthdr.len < off + len ||
5442		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5443		    (unsigned)(off + len)) {
5444			ACTION_SET(actionp, PF_DROP);
5445			REASON_SET(reasonp, PFRES_SHORT);
5446			return (NULL);
5447		}
5448		break;
5449	}
5450#endif /* INET6 */
5451	}
5452	m_copydata(m, off, len, p);
5453	return (p);
5454}
5455
5456int
5457pf_routable(struct pf_addr *addr, sa_family_t af)
5458{
5459	struct sockaddr_in	*dst;
5460#ifdef INET6
5461	struct sockaddr_in6	*dst6;
5462	struct route_in6	 ro;
5463#else
5464	struct route		 ro;
5465#endif
5466
5467	bzero(&ro, sizeof(ro));
5468	switch (af) {
5469	case AF_INET:
5470		dst = satosin(&ro.ro_dst);
5471		dst->sin_family = AF_INET;
5472		dst->sin_len = sizeof(*dst);
5473		dst->sin_addr = addr->v4;
5474		break;
5475#ifdef INET6
5476	case AF_INET6:
5477		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5478		dst6->sin6_family = AF_INET6;
5479		dst6->sin6_len = sizeof(*dst6);
5480		dst6->sin6_addr = addr->v6;
5481		break;
5482#endif /* INET6 */
5483	default:
5484		return (0);
5485	}
5486
5487#ifdef __FreeBSD__
5488#ifdef RTF_PRCLONING
5489	rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING));
5490#else /* !RTF_PRCLONING */
5491	rtalloc_ign((struct route *)&ro, RTF_CLONING);
5492#endif
5493#else /* ! __FreeBSD__ */
5494	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5495#endif
5496
5497	if (ro.ro_rt != NULL) {
5498		RTFREE(ro.ro_rt);
5499		return (1);
5500	}
5501
5502	return (0);
5503}
5504
5505int
5506pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5507{
5508	struct sockaddr_in	*dst;
5509#ifdef INET6
5510	struct sockaddr_in6	*dst6;
5511	struct route_in6	 ro;
5512#else
5513	struct route		 ro;
5514#endif
5515	int			 ret = 0;
5516
5517	bzero(&ro, sizeof(ro));
5518	switch (af) {
5519	case AF_INET:
5520		dst = satosin(&ro.ro_dst);
5521		dst->sin_family = AF_INET;
5522		dst->sin_len = sizeof(*dst);
5523		dst->sin_addr = addr->v4;
5524		break;
5525#ifdef INET6
5526	case AF_INET6:
5527		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5528		dst6->sin6_family = AF_INET6;
5529		dst6->sin6_len = sizeof(*dst6);
5530		dst6->sin6_addr = addr->v6;
5531		break;
5532#endif /* INET6 */
5533	default:
5534		return (0);
5535	}
5536
5537#ifdef __FreeBSD__
5538# ifdef RTF_PRCLONING
5539	rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
5540# else /* !RTF_PRCLONING */
5541	rtalloc_ign((struct route *)&ro, RTF_CLONING);
5542# endif
5543#else /* ! __FreeBSD__ */
5544	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5545#endif
5546
5547	if (ro.ro_rt != NULL) {
5548#ifdef __FreeBSD__
5549		/* XXX_IMPORT: later */
5550#else
5551		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
5552			ret = 1;
5553#endif
5554		RTFREE(ro.ro_rt);
5555	}
5556
5557	return (ret);
5558}
5559
5560#ifdef INET
5561
5562void
5563pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5564    struct pf_state *s)
5565{
5566	struct mbuf		*m0, *m1;
5567	struct m_tag		*mtag;
5568	struct route		 iproute;
5569	struct route		*ro = NULL;	/* XXX: was uninitialized */
5570	struct sockaddr_in	*dst;
5571	struct ip		*ip;
5572	struct ifnet		*ifp = NULL;
5573	struct pf_addr		 naddr;
5574	struct pf_src_node	*sn = NULL;
5575	int			 error = 0;
5576#ifdef __FreeBSD__
5577	int sw_csum;
5578#endif
5579
5580	if (m == NULL || *m == NULL || r == NULL ||
5581	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5582		panic("pf_route: invalid parameters");
5583
5584	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5585		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5586		    NULL) {
5587			m0 = *m;
5588			*m = NULL;
5589			goto bad;
5590		}
5591		*(char *)(mtag + 1) = 1;
5592		m_tag_prepend(*m, mtag);
5593	} else {
5594		if (*(char *)(mtag + 1) > 3) {
5595			m0 = *m;
5596			*m = NULL;
5597			goto bad;
5598		}
5599		(*(char *)(mtag + 1))++;
5600	}
5601
5602	if (r->rt == PF_DUPTO) {
5603#ifdef __FreeBSD__
5604		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5605#else
5606		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5607#endif
5608			return;
5609	} else {
5610		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5611			return;
5612		m0 = *m;
5613	}
5614
5615	if (m0->m_len < sizeof(struct ip)) {
5616		DPFPRINTF(PF_DEBUG_URGENT,
5617		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5618		goto bad;
5619	}
5620
5621	ip = mtod(m0, struct ip *);
5622
5623	ro = &iproute;
5624	bzero((caddr_t)ro, sizeof(*ro));
5625	dst = satosin(&ro->ro_dst);
5626	dst->sin_family = AF_INET;
5627	dst->sin_len = sizeof(*dst);
5628	dst->sin_addr = ip->ip_dst;
5629
5630	if (r->rt == PF_FASTROUTE) {
5631		rtalloc(ro);
5632		if (ro->ro_rt == 0) {
5633			ipstat.ips_noroute++;
5634			goto bad;
5635		}
5636
5637		ifp = ro->ro_rt->rt_ifp;
5638		ro->ro_rt->rt_use++;
5639
5640		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5641			dst = satosin(ro->ro_rt->rt_gateway);
5642	} else {
5643		if (TAILQ_EMPTY(&r->rpool.list)) {
5644			DPFPRINTF(PF_DEBUG_URGENT,
5645			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
5646			goto bad;
5647		}
5648		if (s == NULL) {
5649			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5650			    &naddr, NULL, &sn);
5651			if (!PF_AZERO(&naddr, AF_INET))
5652				dst->sin_addr.s_addr = naddr.v4.s_addr;
5653			ifp = r->rpool.cur->kif ?
5654			    r->rpool.cur->kif->pfik_ifp : NULL;
5655		} else {
5656			if (!PF_AZERO(&s->rt_addr, AF_INET))
5657				dst->sin_addr.s_addr =
5658				    s->rt_addr.v4.s_addr;
5659			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5660		}
5661	}
5662	if (ifp == NULL)
5663		goto bad;
5664
5665	if (oifp != ifp) {
5666#ifdef __FreeBSD__
5667		PF_UNLOCK();
5668		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5669			PF_LOCK();
5670			goto bad;
5671		} else if (m0 == NULL) {
5672			PF_LOCK();
5673			goto done;
5674		}
5675		PF_LOCK();
5676#else
5677		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5678			goto bad;
5679		else if (m0 == NULL)
5680			goto done;
5681#endif
5682		if (m0->m_len < sizeof(struct ip)) {
5683			DPFPRINTF(PF_DEBUG_URGENT,
5684			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5685			goto bad;
5686		}
5687		ip = mtod(m0, struct ip *);
5688	}
5689
5690#ifdef __FreeBSD__
5691	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5692	m0->m_pkthdr.csum_flags |= CSUM_IP;
5693	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5694	if (sw_csum & CSUM_DELAY_DATA) {
5695		/*
5696		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5697		 */
5698		NTOHS(ip->ip_len);
5699		NTOHS(ip->ip_off);	 /* XXX: needed? */
5700		in_delayed_cksum(m0);
5701		HTONS(ip->ip_len);
5702		HTONS(ip->ip_off);
5703		sw_csum &= ~CSUM_DELAY_DATA;
5704	}
5705	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5706
5707	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5708	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5709		((ip->ip_off & htons(IP_DF)) == 0))) {
5710		/*
5711		 * ip->ip_len = htons(ip->ip_len);
5712		 * ip->ip_off = htons(ip->ip_off);
5713		 */
5714		ip->ip_sum = 0;
5715		if (sw_csum & CSUM_DELAY_IP) {
5716			/* From KAME */
5717			if (ip->ip_v == IPVERSION &&
5718			    (ip->ip_hl << 2) == sizeof(*ip)) {
5719				ip->ip_sum = in_cksum_hdr(ip);
5720			} else {
5721				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5722			}
5723		}
5724		PF_UNLOCK();
5725		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5726		PF_LOCK();
5727		goto done;
5728	}
5729
5730#else
5731	/* Copied from ip_output. */
5732#ifdef IPSEC
5733	/*
5734	 * If deferred crypto processing is needed, check that the
5735	 * interface supports it.
5736	 */
5737	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
5738	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
5739		/* Notify IPsec to do its own crypto. */
5740		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
5741		goto bad;
5742	}
5743#endif /* IPSEC */
5744
5745	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5746	if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
5747		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5748		    ifp->if_bridge != NULL) {
5749			in_delayed_cksum(m0);
5750			m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
5751		}
5752	} else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
5753		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5754		    ifp->if_bridge != NULL) {
5755			in_delayed_cksum(m0);
5756			m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
5757		}
5758	}
5759
5760	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5761		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5762		    ifp->if_bridge == NULL) {
5763			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5764			ipstat.ips_outhwcsum++;
5765		} else {
5766			ip->ip_sum = 0;
5767			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5768		}
5769		/* Update relevant hardware checksum stats for TCP/UDP */
5770		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5771			tcpstat.tcps_outhwcsum++;
5772		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5773			udpstat.udps_outhwcsum++;
5774		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5775		goto done;
5776	}
5777#endif
5778	/*
5779	 * Too large for interface; fragment if possible.
5780	 * Must be able to put at least 8 bytes per fragment.
5781	 */
5782	if (ip->ip_off & htons(IP_DF)) {
5783		ipstat.ips_cantfrag++;
5784		if (r->rt != PF_DUPTO) {
5785#ifdef __FreeBSD__
5786			/* icmp_error() expects host byte ordering */
5787			NTOHS(ip->ip_len);
5788			NTOHS(ip->ip_off);
5789			PF_UNLOCK();
5790			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5791			    ifp);
5792			PF_LOCK();
5793#endif
5794			goto done;
5795		} else
5796			goto bad;
5797	}
5798
5799	m1 = m0;
5800#ifdef __FreeBSD__
5801	/*
5802	 * XXX: is cheaper + less error prone than own function
5803	 */
5804	NTOHS(ip->ip_len);
5805	NTOHS(ip->ip_off);
5806	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5807#else
5808	error = ip_fragment(m0, ifp, ifp->if_mtu);
5809#endif
5810	if (error) {
5811#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
5812		m0 = NULL;
5813#endif
5814		goto bad;
5815	}
5816
5817	for (m0 = m1; m0; m0 = m1) {
5818		m1 = m0->m_nextpkt;
5819		m0->m_nextpkt = 0;
5820#ifdef __FreeBSD__
5821		if (error == 0) {
5822			PF_UNLOCK();
5823			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5824			    NULL);
5825			PF_LOCK();
5826		} else
5827#else
5828		if (error == 0)
5829			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5830			    NULL);
5831		else
5832#endif
5833			m_freem(m0);
5834	}
5835
5836	if (error == 0)
5837		ipstat.ips_fragmented++;
5838
5839done:
5840	if (r->rt != PF_DUPTO)
5841		*m = NULL;
5842	if (ro == &iproute && ro->ro_rt)
5843		RTFREE(ro->ro_rt);
5844	return;
5845
5846bad:
5847	m_freem(m0);
5848	goto done;
5849}
5850#endif /* INET */
5851
5852#ifdef INET6
5853void
5854pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5855    struct pf_state *s)
5856{
5857	struct mbuf		*m0;
5858	struct m_tag		*mtag;
5859	struct route_in6	 ip6route;
5860	struct route_in6	*ro;
5861	struct sockaddr_in6	*dst;
5862	struct ip6_hdr		*ip6;
5863	struct ifnet		*ifp = NULL;
5864	struct pf_addr		 naddr;
5865	struct pf_src_node	*sn = NULL;
5866	int			 error = 0;
5867
5868	if (m == NULL || *m == NULL || r == NULL ||
5869	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5870		panic("pf_route6: invalid parameters");
5871
5872	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5873		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5874		    NULL) {
5875			m0 = *m;
5876			*m = NULL;
5877			goto bad;
5878		}
5879		*(char *)(mtag + 1) = 1;
5880		m_tag_prepend(*m, mtag);
5881	} else {
5882		if (*(char *)(mtag + 1) > 3) {
5883			m0 = *m;
5884			*m = NULL;
5885			goto bad;
5886		}
5887		(*(char *)(mtag + 1))++;
5888	}
5889
5890	if (r->rt == PF_DUPTO) {
5891#ifdef __FreeBSD__
5892		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5893#else
5894		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5895#endif
5896			return;
5897	} else {
5898		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5899			return;
5900		m0 = *m;
5901	}
5902
5903	if (m0->m_len < sizeof(struct ip6_hdr)) {
5904		DPFPRINTF(PF_DEBUG_URGENT,
5905		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5906		goto bad;
5907	}
5908	ip6 = mtod(m0, struct ip6_hdr *);
5909
5910	ro = &ip6route;
5911	bzero((caddr_t)ro, sizeof(*ro));
5912	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5913	dst->sin6_family = AF_INET6;
5914	dst->sin6_len = sizeof(*dst);
5915	dst->sin6_addr = ip6->ip6_dst;
5916
5917	/* Cheat. */
5918	if (r->rt == PF_FASTROUTE) {
5919#ifdef __FreeBSD__
5920		m0->m_flags |= M_SKIP_FIREWALL;
5921		PF_UNLOCK();
5922		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5923		PF_LOCK();
5924#else
5925		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5926		if (mtag == NULL)
5927			goto bad;
5928		m_tag_prepend(m0, mtag);
5929		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5930#endif
5931		return;
5932	}
5933
5934	if (TAILQ_EMPTY(&r->rpool.list)) {
5935		DPFPRINTF(PF_DEBUG_URGENT,
5936		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
5937		goto bad;
5938	}
5939	if (s == NULL) {
5940		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5941		    &naddr, NULL, &sn);
5942		if (!PF_AZERO(&naddr, AF_INET6))
5943			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5944			    &naddr, AF_INET6);
5945		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5946	} else {
5947		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5948			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5949			    &s->rt_addr, AF_INET6);
5950		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5951	}
5952	if (ifp == NULL)
5953		goto bad;
5954
5955	if (oifp != ifp) {
5956#ifdef __FreeBSD__
5957		PF_UNLOCK();
5958		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5959			PF_LOCK();
5960			goto bad;
5961		} else if (m0 == NULL) {
5962			PF_LOCK();
5963			goto done;
5964		}
5965		PF_LOCK();
5966#else
5967		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5968			goto bad;
5969		else if (m0 == NULL)
5970			goto done;
5971#endif
5972		if (m0->m_len < sizeof(struct ip6_hdr)) {
5973			DPFPRINTF(PF_DEBUG_URGENT,
5974			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5975			goto bad;
5976		}
5977		ip6 = mtod(m0, struct ip6_hdr *);
5978	}
5979
5980	/*
5981	 * If the packet is too large for the outgoing interface,
5982	 * send back an icmp6 error.
5983	 */
5984	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5985		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5986	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5987#ifdef __FreeBSD__
5988		PF_UNLOCK();
5989#endif
5990		error = nd6_output(ifp, ifp, m0, dst, NULL);
5991#ifdef __FreeBSD__
5992		PF_LOCK();
5993#endif
5994	} else {
5995		in6_ifstat_inc(ifp, ifs6_in_toobig);
5996#ifdef __FreeBSD__
5997		if (r->rt != PF_DUPTO) {
5998			PF_UNLOCK();
5999			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6000			PF_LOCK();
6001		 } else
6002#else
6003		if (r->rt != PF_DUPTO)
6004			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6005		else
6006#endif
6007			goto bad;
6008	}
6009
6010done:
6011	if (r->rt != PF_DUPTO)
6012		*m = NULL;
6013	return;
6014
6015bad:
6016	m_freem(m0);
6017	goto done;
6018}
6019#endif /* INET6 */
6020
6021
6022#ifdef __FreeBSD__
6023/*
6024 * FreeBSD supports cksum offloads for the following drivers.
6025 *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
6026 *   ti(4), txp(4), xl(4)
6027 *
6028 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
6029 *  network driver performed cksum including pseudo header, need to verify
6030 *   csum_data
6031 * CSUM_DATA_VALID :
6032 *  network driver performed cksum, needs to additional pseudo header
6033 *  cksum computation with partial csum_data(i.e. lack of H/W support for
6034 *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
6035 *
6036 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
6037 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
6038 * TCP/UDP layer.
6039 * Also, set csum_data to 0xffff to force cksum validation.
6040 */
6041int
6042pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
6043{
6044	u_int16_t sum = 0;
6045	int hw_assist = 0;
6046	struct ip *ip;
6047
6048	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6049		return (1);
6050	if (m->m_pkthdr.len < off + len)
6051		return (1);
6052
6053	switch (p) {
6054	case IPPROTO_TCP:
6055		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6056			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6057				sum = m->m_pkthdr.csum_data;
6058			} else {
6059				ip = mtod(m, struct ip *);
6060				sum = in_pseudo(ip->ip_src.s_addr,
6061					ip->ip_dst.s_addr, htonl((u_short)len +
6062					m->m_pkthdr.csum_data + IPPROTO_TCP));
6063			}
6064			sum ^= 0xffff;
6065			++hw_assist;
6066		}
6067		break;
6068	case IPPROTO_UDP:
6069		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6070			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6071				sum = m->m_pkthdr.csum_data;
6072			} else {
6073				ip = mtod(m, struct ip *);
6074				sum = in_pseudo(ip->ip_src.s_addr,
6075					ip->ip_dst.s_addr, htonl((u_short)len +
6076					m->m_pkthdr.csum_data + IPPROTO_UDP));
6077			}
6078			sum ^= 0xffff;
6079			++hw_assist;
6080                }
6081		break;
6082	case IPPROTO_ICMP:
6083#ifdef INET6
6084	case IPPROTO_ICMPV6:
6085#endif /* INET6 */
6086		break;
6087	default:
6088		return (1);
6089	}
6090
6091	if (!hw_assist) {
6092		switch (af) {
6093		case AF_INET:
6094			if (p == IPPROTO_ICMP) {
6095				if (m->m_len < off)
6096					return (1);
6097				m->m_data += off;
6098				m->m_len -= off;
6099				sum = in_cksum(m, len);
6100				m->m_data -= off;
6101				m->m_len += off;
6102			} else {
6103				if (m->m_len < sizeof(struct ip))
6104					return (1);
6105				sum = in4_cksum(m, p, off, len);
6106			}
6107			break;
6108#ifdef INET6
6109		case AF_INET6:
6110			if (m->m_len < sizeof(struct ip6_hdr))
6111				return (1);
6112			sum = in6_cksum(m, p, off, len);
6113			break;
6114#endif /* INET6 */
6115		default:
6116			return (1);
6117		}
6118	}
6119	if (sum) {
6120		switch (p) {
6121		case IPPROTO_TCP:
6122			tcpstat.tcps_rcvbadsum++;
6123			break;
6124		case IPPROTO_UDP:
6125			udpstat.udps_badsum++;
6126			break;
6127		case IPPROTO_ICMP:
6128			icmpstat.icps_checksum++;
6129			break;
6130#ifdef INET6
6131		case IPPROTO_ICMPV6:
6132			icmp6stat.icp6s_checksum++;
6133			break;
6134#endif /* INET6 */
6135		}
6136		return (1);
6137	} else {
6138		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
6139			m->m_pkthdr.csum_flags |=
6140			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
6141			m->m_pkthdr.csum_data = 0xffff;
6142		}
6143	}
6144	return (0);
6145}
6146#else
6147/*
6148 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
6149 *   off is the offset where the protocol header starts
6150 *   len is the total length of protocol header plus payload
6151 * returns 0 when the checksum is valid, otherwise returns 1.
6152 */
6153int
6154pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
6155    sa_family_t af)
6156{
6157	u_int16_t flag_ok, flag_bad;
6158	u_int16_t sum;
6159
6160	switch (p) {
6161	case IPPROTO_TCP:
6162		flag_ok = M_TCP_CSUM_IN_OK;
6163		flag_bad = M_TCP_CSUM_IN_BAD;
6164		break;
6165	case IPPROTO_UDP:
6166		flag_ok = M_UDP_CSUM_IN_OK;
6167		flag_bad = M_UDP_CSUM_IN_BAD;
6168		break;
6169	case IPPROTO_ICMP:
6170#ifdef INET6
6171	case IPPROTO_ICMPV6:
6172#endif /* INET6 */
6173		flag_ok = flag_bad = 0;
6174		break;
6175	default:
6176		return (1);
6177	}
6178	if (m->m_pkthdr.csum & flag_ok)
6179		return (0);
6180	if (m->m_pkthdr.csum & flag_bad)
6181		return (1);
6182	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6183		return (1);
6184	if (m->m_pkthdr.len < off + len)
6185		return (1);
6186	switch (af) {
6187#ifdef INET
6188	case AF_INET:
6189		if (p == IPPROTO_ICMP) {
6190			if (m->m_len < off)
6191				return (1);
6192			m->m_data += off;
6193			m->m_len -= off;
6194			sum = in_cksum(m, len);
6195			m->m_data -= off;
6196			m->m_len += off;
6197		} else {
6198			if (m->m_len < sizeof(struct ip))
6199				return (1);
6200			sum = in4_cksum(m, p, off, len);
6201		}
6202		break;
6203#endif /* INET */
6204#ifdef INET6
6205	case AF_INET6:
6206		if (m->m_len < sizeof(struct ip6_hdr))
6207			return (1);
6208		sum = in6_cksum(m, p, off, len);
6209		break;
6210#endif /* INET6 */
6211	default:
6212		return (1);
6213	}
6214	if (sum) {
6215		m->m_pkthdr.csum |= flag_bad;
6216		switch (p) {
6217		case IPPROTO_TCP:
6218			tcpstat.tcps_rcvbadsum++;
6219			break;
6220		case IPPROTO_UDP:
6221			udpstat.udps_badsum++;
6222			break;
6223		case IPPROTO_ICMP:
6224			icmpstat.icps_checksum++;
6225			break;
6226#ifdef INET6
6227		case IPPROTO_ICMPV6:
6228			icmp6stat.icp6s_checksum++;
6229			break;
6230#endif /* INET6 */
6231		}
6232		return (1);
6233	}
6234	m->m_pkthdr.csum |= flag_ok;
6235	return (0);
6236}
6237#endif
6238
6239static int
6240pf_add_mbuf_tag(struct mbuf *m, u_int tag)
6241{
6242	struct m_tag *mtag;
6243
6244	if (m_tag_find(m, tag, NULL) != NULL)
6245		return (0);
6246	mtag = m_tag_get(tag, 0, M_NOWAIT);
6247	if (mtag == NULL)
6248		return (1);
6249	m_tag_prepend(m, mtag);
6250	return (0);
6251}
6252
6253#ifdef INET
6254int
6255#ifdef __FreeBSD__
6256pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6257    struct ether_header *eh, struct inpcb *inp)
6258#else
6259pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6260    struct ether_header *eh)
6261#endif
6262{
6263	struct pfi_kif		*kif;
6264	u_short			 action, reason = 0, log = 0;
6265	struct mbuf		*m = *m0;
6266	struct ip		*h = NULL;	/* make the compiler happy */
6267	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6268	struct pf_state		*s = NULL;
6269	struct pf_ruleset	*ruleset = NULL;
6270	struct pf_pdesc		 pd;
6271	int			 off, dirndx, pqid = 0;
6272
6273#ifdef __FreeBSD__
6274	PF_LOCK();
6275#endif
6276	if (!pf_status.running ||
6277#ifdef __FreeBSD__
6278	    (m->m_flags & M_SKIP_FIREWALL)) {
6279		PF_UNLOCK();
6280#else
6281	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
6282#endif
6283	    	return (PF_PASS);
6284	}
6285
6286#ifdef __FreeBSD__
6287	/* XXX_IMPORT: later */
6288#else
6289	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6290		ifp = ifp->if_carpdev;
6291#endif
6292
6293	kif = pfi_index2kif[ifp->if_index];
6294	if (kif == NULL) {
6295#ifdef __FreeBSD__
6296		PF_UNLOCK();
6297#endif
6298		DPFPRINTF(PF_DEBUG_URGENT,
6299		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6300		return (PF_DROP);
6301	}
6302	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
6303#ifdef __FreeBSD__
6304		PF_UNLOCK();
6305#endif
6306		return (PF_PASS);
6307	}
6308
6309#ifdef __FreeBSD__
6310	M_ASSERTPKTHDR(m);
6311#else
6312#ifdef DIAGNOSTIC
6313	if ((m->m_flags & M_PKTHDR) == 0)
6314		panic("non-M_PKTHDR is passed to pf_test");
6315#endif /* DIAGNOSTIC */
6316#endif /* __FreeBSD__ */
6317
6318	memset(&pd, 0, sizeof(pd));
6319	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6320		action = PF_DROP;
6321		REASON_SET(&reason, PFRES_SHORT);
6322		log = 1;
6323		goto done;
6324	}
6325
6326	/* We do IP header normalization and packet reassembly here */
6327	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6328		action = PF_DROP;
6329		goto done;
6330	}
6331	m = *m0;
6332	h = mtod(m, struct ip *);
6333
6334	off = h->ip_hl << 2;
6335	if (off < (int)sizeof(*h)) {
6336		action = PF_DROP;
6337		REASON_SET(&reason, PFRES_SHORT);
6338		log = 1;
6339		goto done;
6340	}
6341
6342	pd.src = (struct pf_addr *)&h->ip_src;
6343	pd.dst = (struct pf_addr *)&h->ip_dst;
6344	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
6345	pd.ip_sum = &h->ip_sum;
6346	pd.proto = h->ip_p;
6347	pd.af = AF_INET;
6348	pd.tos = h->ip_tos;
6349	pd.tot_len = ntohs(h->ip_len);
6350	pd.eh = eh;
6351
6352	/* handle fragments that didn't get reassembled by normalization */
6353	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
6354		action = pf_test_fragment(&r, dir, kif, m, h,
6355		    &pd, &a, &ruleset);
6356		goto done;
6357	}
6358
6359	switch (h->ip_p) {
6360
6361	case IPPROTO_TCP: {
6362		struct tcphdr	th;
6363
6364		pd.hdr.tcp = &th;
6365		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6366		    &action, &reason, AF_INET)) {
6367			log = action != PF_PASS;
6368			goto done;
6369		}
6370		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6371		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
6372			action = PF_DROP;
6373			goto done;
6374		}
6375		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6376		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
6377			pqid = 1;
6378		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6379		if (action == PF_DROP)
6380			goto done;
6381		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6382		    &reason);
6383		if (action == PF_PASS) {
6384#if NPFSYNC
6385			pfsync_update_state(s);
6386#endif /* NPFSYNC */
6387			r = s->rule.ptr;
6388			a = s->anchor.ptr;
6389			log = s->log;
6390		} else if (s == NULL)
6391#ifdef __FreeBSD__
6392			action = pf_test_tcp(&r, &s, dir, kif,
6393			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6394#else
6395			action = pf_test_tcp(&r, &s, dir, kif,
6396			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6397#endif
6398		break;
6399	}
6400
6401	case IPPROTO_UDP: {
6402		struct udphdr	uh;
6403
6404		pd.hdr.udp = &uh;
6405		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6406		    &action, &reason, AF_INET)) {
6407			log = action != PF_PASS;
6408			goto done;
6409		}
6410		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6411		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
6412			action = PF_DROP;
6413			goto done;
6414		}
6415		if (uh.uh_dport == 0 ||
6416		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6417		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6418			action = PF_DROP;
6419			goto done;
6420		}
6421		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6422		if (action == PF_PASS) {
6423#if NPFSYNC
6424			pfsync_update_state(s);
6425#endif /* NPFSYNC */
6426			r = s->rule.ptr;
6427			a = s->anchor.ptr;
6428			log = s->log;
6429		} else if (s == NULL)
6430#ifdef __FreeBSD__
6431			action = pf_test_udp(&r, &s, dir, kif,
6432			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6433#else
6434			action = pf_test_udp(&r, &s, dir, kif,
6435			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6436#endif
6437		break;
6438	}
6439
6440	case IPPROTO_ICMP: {
6441		struct icmp	ih;
6442
6443		pd.hdr.icmp = &ih;
6444		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6445		    &action, &reason, AF_INET)) {
6446			log = action != PF_PASS;
6447			goto done;
6448		}
6449		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6450		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
6451			action = PF_DROP;
6452			goto done;
6453		}
6454		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
6455		    &reason);
6456		if (action == PF_PASS) {
6457#if NPFSYNC
6458			pfsync_update_state(s);
6459#endif /* NPFSYNC */
6460			r = s->rule.ptr;
6461			a = s->anchor.ptr;
6462			log = s->log;
6463		} else if (s == NULL)
6464#ifdef __FreeBSD__
6465			action = pf_test_icmp(&r, &s, dir, kif,
6466			    m, off, h, &pd, &a, &ruleset, NULL);
6467#else
6468			action = pf_test_icmp(&r, &s, dir, kif,
6469			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6470#endif
6471		break;
6472	}
6473
6474	default:
6475		action = pf_test_state_other(&s, dir, kif, &pd);
6476		if (action == PF_PASS) {
6477#if NPFSYNC
6478			pfsync_update_state(s);
6479#endif /* NPFSYNC */
6480			r = s->rule.ptr;
6481			a = s->anchor.ptr;
6482			log = s->log;
6483		} else if (s == NULL)
6484#ifdef __FreeBSD__
6485			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6486			    &pd, &a, &ruleset, NULL);
6487#else
6488			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6489			    &pd, &a, &ruleset, &ipintrq);
6490#endif
6491		break;
6492	}
6493
6494done:
6495	if (action == PF_PASS && h->ip_hl > 5 &&
6496	    !((s && s->allow_opts) || r->allow_opts)) {
6497		action = PF_DROP;
6498		REASON_SET(&reason, PFRES_IPOPTIONS);
6499		log = 1;
6500		DPFPRINTF(PF_DEBUG_MISC,
6501		    ("pf: dropping packet with ip options\n"));
6502	}
6503
6504	if (s && s->tag)
6505		pf_tag_packet(m, pf_get_tag(m), s->tag);
6506
6507#ifdef ALTQ
6508	if (action == PF_PASS && r->qid) {
6509		struct m_tag	*mtag;
6510		struct altq_tag	*atag;
6511
6512		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6513		if (mtag != NULL) {
6514			atag = (struct altq_tag *)(mtag + 1);
6515			if (pqid || pd.tos == IPTOS_LOWDELAY)
6516				atag->qid = r->pqid;
6517			else
6518				atag->qid = r->qid;
6519			/* add hints for ecn */
6520			atag->af = AF_INET;
6521			atag->hdr = h;
6522			m_tag_prepend(m, mtag);
6523		}
6524	}
6525#endif /* ALTQ */
6526
6527	/*
6528	 * connections redirected to loopback should not match sockets
6529	 * bound specifically to loopback due to security implications,
6530	 * see tcp_input() and in_pcblookup_listen().
6531	 */
6532	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6533	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6534	    (s->nat_rule.ptr->action == PF_RDR ||
6535	    s->nat_rule.ptr->action == PF_BINAT) &&
6536	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
6537	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6538		action = PF_DROP;
6539		REASON_SET(&reason, PFRES_MEMORY);
6540	}
6541
6542	if (log)
6543		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset);
6544
6545	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6546	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6547
6548	if (action == PF_PASS || r->action == PF_DROP) {
6549		r->packets++;
6550		r->bytes += pd.tot_len;
6551		if (a != NULL) {
6552			a->packets++;
6553			a->bytes += pd.tot_len;
6554		}
6555		if (s != NULL) {
6556			dirndx = (dir == s->direction) ? 0 : 1;
6557			s->packets[dirndx]++;
6558			s->bytes[dirndx] += pd.tot_len;
6559			if (s->nat_rule.ptr != NULL) {
6560				s->nat_rule.ptr->packets++;
6561				s->nat_rule.ptr->bytes += pd.tot_len;
6562			}
6563			if (s->src_node != NULL) {
6564				s->src_node->packets++;
6565				s->src_node->bytes += pd.tot_len;
6566			}
6567			if (s->nat_src_node != NULL) {
6568				s->nat_src_node->packets++;
6569				s->nat_src_node->bytes += pd.tot_len;
6570			}
6571		}
6572		tr = r;
6573		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6574		if (nr != NULL) {
6575			struct pf_addr *x;
6576			/*
6577			 * XXX: we need to make sure that the addresses
6578			 * passed to pfr_update_stats() are the same than
6579			 * the addresses used during matching (pfr_match)
6580			 */
6581			if (r == &pf_default_rule) {
6582				tr = nr;
6583				x = (s == NULL || s->direction == dir) ?
6584				    &pd.baddr : &pd.naddr;
6585			} else
6586				x = (s == NULL || s->direction == dir) ?
6587				    &pd.naddr : &pd.baddr;
6588			if (x == &pd.baddr || s == NULL) {
6589				/* we need to change the address */
6590				if (dir == PF_OUT)
6591					pd.src = x;
6592				else
6593					pd.dst = x;
6594			}
6595		}
6596		if (tr->src.addr.type == PF_ADDR_TABLE)
6597			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6598			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6599			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6600			    tr->src.neg);
6601		if (tr->dst.addr.type == PF_ADDR_TABLE)
6602			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6603			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6604			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6605			    tr->dst.neg);
6606	}
6607
6608
6609	if (action == PF_SYNPROXY_DROP) {
6610		m_freem(*m0);
6611		*m0 = NULL;
6612		action = PF_PASS;
6613	} else if (r->rt)
6614		/* pf_route can free the mbuf causing *m0 to become NULL */
6615		pf_route(m0, r, dir, ifp, s);
6616
6617#ifdef __FreeBSD__
6618	PF_UNLOCK();
6619#endif
6620
6621	return (action);
6622}
6623#endif /* INET */
6624
6625#ifdef INET6
6626int
6627#ifdef __FreeBSD__
6628pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
6629    struct ether_header *eh, struct inpcb *inp)
6630#else
6631pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
6632    struct ether_header *eh)
6633#endif
6634{
6635	struct pfi_kif		*kif;
6636	u_short			 action, reason = 0, log = 0;
6637	struct mbuf		*m = *m0;
6638	struct ip6_hdr		*h = NULL;	/* make the compiler happy */
6639	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6640	struct pf_state		*s = NULL;
6641	struct pf_ruleset	*ruleset = NULL;
6642	struct pf_pdesc		 pd;
6643	int			 off, terminal = 0, dirndx;
6644
6645#ifdef __FreeBSD__
6646	PF_LOCK();
6647#endif
6648
6649	if (!pf_status.running ||
6650#ifdef __FreeBSD__
6651	    (m->m_flags & M_SKIP_FIREWALL)) {
6652		PF_UNLOCK();
6653#else
6654	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
6655#endif
6656		return (PF_PASS);
6657	}
6658
6659#ifdef __FreeBSD__
6660	/* XXX_IMPORT: later */
6661#else
6662	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6663		ifp = ifp->if_carpdev;
6664#endif
6665
6666	kif = pfi_index2kif[ifp->if_index];
6667	if (kif == NULL) {
6668#ifdef __FreeBSD__
6669		PF_UNLOCK();
6670#endif
6671		DPFPRINTF(PF_DEBUG_URGENT,
6672		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
6673		return (PF_DROP);
6674	}
6675	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
6676#ifdef __FreeBSD__
6677		PF_UNLOCK();
6678#endif
6679		return (PF_PASS);
6680	}
6681
6682#ifdef __FreeBSD__
6683	M_ASSERTPKTHDR(m);
6684#else
6685#ifdef DIAGNOSTIC
6686	if ((m->m_flags & M_PKTHDR) == 0)
6687		panic("non-M_PKTHDR is passed to pf_test6");
6688#endif /* DIAGNOSTIC */
6689#endif
6690
6691	memset(&pd, 0, sizeof(pd));
6692	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6693		action = PF_DROP;
6694		REASON_SET(&reason, PFRES_SHORT);
6695		log = 1;
6696		goto done;
6697	}
6698
6699	/* We do IP header normalization and packet reassembly here */
6700	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
6701		action = PF_DROP;
6702		goto done;
6703	}
6704	m = *m0;
6705	h = mtod(m, struct ip6_hdr *);
6706
6707	pd.src = (struct pf_addr *)&h->ip6_src;
6708	pd.dst = (struct pf_addr *)&h->ip6_dst;
6709	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6710	pd.ip_sum = NULL;
6711	pd.af = AF_INET6;
6712	pd.tos = 0;
6713	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6714	pd.eh = eh;
6715
6716	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6717	pd.proto = h->ip6_nxt;
6718	do {
6719		switch (pd.proto) {
6720		case IPPROTO_FRAGMENT:
6721			action = pf_test_fragment(&r, dir, kif, m, h,
6722			    &pd, &a, &ruleset);
6723			if (action == PF_DROP)
6724				REASON_SET(&reason, PFRES_FRAG);
6725			goto done;
6726		case IPPROTO_AH:
6727		case IPPROTO_HOPOPTS:
6728		case IPPROTO_ROUTING:
6729		case IPPROTO_DSTOPTS: {
6730			/* get next header and header length */
6731			struct ip6_ext	opt6;
6732
6733			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6734			    NULL, &reason, pd.af)) {
6735				DPFPRINTF(PF_DEBUG_MISC,
6736				    ("pf: IPv6 short opt\n"));
6737				action = PF_DROP;
6738				log = 1;
6739				goto done;
6740			}
6741			if (pd.proto == IPPROTO_AH)
6742				off += (opt6.ip6e_len + 2) * 4;
6743			else
6744				off += (opt6.ip6e_len + 1) * 8;
6745			pd.proto = opt6.ip6e_nxt;
6746			/* goto the next header */
6747			break;
6748		}
6749		default:
6750			terminal++;
6751			break;
6752		}
6753	} while (!terminal);
6754
6755	switch (pd.proto) {
6756
6757	case IPPROTO_TCP: {
6758		struct tcphdr	th;
6759
6760		pd.hdr.tcp = &th;
6761		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6762		    &action, &reason, AF_INET6)) {
6763			log = action != PF_PASS;
6764			goto done;
6765		}
6766		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6767		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6768		    IPPROTO_TCP, AF_INET6)) {
6769			action = PF_DROP;
6770			REASON_SET(&reason, PFRES_PROTCKSUM);
6771			goto done;
6772		}
6773		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6774		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6775		if (action == PF_DROP)
6776			goto done;
6777		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6778		    &reason);
6779		if (action == PF_PASS) {
6780#if NPFSYNC
6781			pfsync_update_state(s);
6782#endif /* NPFSYNC */
6783			r = s->rule.ptr;
6784			a = s->anchor.ptr;
6785			log = s->log;
6786		} else if (s == NULL)
6787#ifdef __FreeBSD__
6788			action = pf_test_tcp(&r, &s, dir, kif,
6789			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6790#else
6791			action = pf_test_tcp(&r, &s, dir, kif,
6792			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
6793#endif
6794		break;
6795	}
6796
6797	case IPPROTO_UDP: {
6798		struct udphdr	uh;
6799
6800		pd.hdr.udp = &uh;
6801		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6802		    &action, &reason, AF_INET6)) {
6803			log = action != PF_PASS;
6804			goto done;
6805		}
6806		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6807		    off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6808		    IPPROTO_UDP, AF_INET6)) {
6809			action = PF_DROP;
6810			REASON_SET(&reason, PFRES_PROTCKSUM);
6811			goto done;
6812		}
6813		if (uh.uh_dport == 0 ||
6814		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6815		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6816			action = PF_DROP;
6817			goto done;
6818		}
6819		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6820		if (action == PF_PASS) {
6821#if NPFSYNC
6822			pfsync_update_state(s);
6823#endif /* NPFSYNC */
6824			r = s->rule.ptr;
6825			a = s->anchor.ptr;
6826			log = s->log;
6827		} else if (s == NULL)
6828#ifdef __FreeBSD__
6829			action = pf_test_udp(&r, &s, dir, kif,
6830			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6831#else
6832			action = pf_test_udp(&r, &s, dir, kif,
6833			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
6834#endif
6835		break;
6836	}
6837
6838	case IPPROTO_ICMPV6: {
6839		struct icmp6_hdr	ih;
6840
6841		pd.hdr.icmp6 = &ih;
6842		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6843		    &action, &reason, AF_INET6)) {
6844			log = action != PF_PASS;
6845			goto done;
6846		}
6847		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6848		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6849		    IPPROTO_ICMPV6, AF_INET6)) {
6850			action = PF_DROP;
6851			REASON_SET(&reason, PFRES_PROTCKSUM);
6852			goto done;
6853		}
6854		action = pf_test_state_icmp(&s, dir, kif,
6855		    m, off, h, &pd, &reason);
6856		if (action == PF_PASS) {
6857#if NPFSYNC
6858			pfsync_update_state(s);
6859#endif /* NPFSYNC */
6860			r = s->rule.ptr;
6861			a = s->anchor.ptr;
6862			log = s->log;
6863		} else if (s == NULL)
6864#ifdef __FreeBSD__
6865			action = pf_test_icmp(&r, &s, dir, kif,
6866			    m, off, h, &pd, &a, &ruleset, NULL);
6867#else
6868			action = pf_test_icmp(&r, &s, dir, kif,
6869			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
6870#endif
6871		break;
6872	}
6873
6874	default:
6875		action = pf_test_state_other(&s, dir, kif, &pd);
6876		if (action == PF_PASS) {
6877#if NPFSYNC
6878			pfsync_update_state(s);
6879#endif /* NPFSYNC */
6880			r = s->rule.ptr;
6881			a = s->anchor.ptr;
6882			log = s->log;
6883		} else if (s == NULL)
6884#ifdef __FreeBSD__
6885			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6886			    &pd, &a, &ruleset, NULL);
6887#else
6888			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6889			    &pd, &a, &ruleset, &ip6intrq);
6890#endif
6891		break;
6892	}
6893
6894done:
6895	/* XXX handle IPv6 options, if not allowed. not implemented. */
6896
6897	if (s && s->tag)
6898		pf_tag_packet(m, pf_get_tag(m), s->tag);
6899
6900#ifdef ALTQ
6901	if (action == PF_PASS && r->qid) {
6902		struct m_tag	*mtag;
6903		struct altq_tag	*atag;
6904
6905		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6906		if (mtag != NULL) {
6907			atag = (struct altq_tag *)(mtag + 1);
6908			if (pd.tos == IPTOS_LOWDELAY)
6909				atag->qid = r->pqid;
6910			else
6911				atag->qid = r->qid;
6912			/* add hints for ecn */
6913			atag->af = AF_INET6;
6914			atag->hdr = h;
6915			m_tag_prepend(m, mtag);
6916		}
6917	}
6918#endif /* ALTQ */
6919
6920	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6921	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6922	    (s->nat_rule.ptr->action == PF_RDR ||
6923	    s->nat_rule.ptr->action == PF_BINAT) &&
6924	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) &&
6925	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6926		action = PF_DROP;
6927		REASON_SET(&reason, PFRES_MEMORY);
6928	}
6929
6930	if (log)
6931		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset);
6932
6933	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6934	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6935
6936	if (action == PF_PASS || r->action == PF_DROP) {
6937		r->packets++;
6938		r->bytes += pd.tot_len;
6939		if (a != NULL) {
6940			a->packets++;
6941			a->bytes += pd.tot_len;
6942		}
6943		if (s != NULL) {
6944			dirndx = (dir == s->direction) ? 0 : 1;
6945			s->packets[dirndx]++;
6946			s->bytes[dirndx] += pd.tot_len;
6947			if (s->nat_rule.ptr != NULL) {
6948				s->nat_rule.ptr->packets++;
6949				s->nat_rule.ptr->bytes += pd.tot_len;
6950			}
6951			if (s->src_node != NULL) {
6952				s->src_node->packets++;
6953				s->src_node->bytes += pd.tot_len;
6954			}
6955			if (s->nat_src_node != NULL) {
6956				s->nat_src_node->packets++;
6957				s->nat_src_node->bytes += pd.tot_len;
6958			}
6959		}
6960		tr = r;
6961		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6962		if (nr != NULL) {
6963			struct pf_addr *x;
6964			/*
6965			 * XXX: we need to make sure that the addresses
6966			 * passed to pfr_update_stats() are the same than
6967			 * the addresses used during matching (pfr_match)
6968			 */
6969			if (r == &pf_default_rule) {
6970				tr = nr;
6971				x = (s == NULL || s->direction == dir) ?
6972				    &pd.baddr : &pd.naddr;
6973			} else {
6974				x = (s == NULL || s->direction == dir) ?
6975				    &pd.naddr : &pd.baddr;
6976			}
6977			if (x == &pd.baddr || s == NULL) {
6978				if (dir == PF_OUT)
6979					pd.src = x;
6980				else
6981					pd.dst = x;
6982			}
6983		}
6984		if (tr->src.addr.type == PF_ADDR_TABLE)
6985			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6986			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6987			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6988			    tr->src.neg);
6989		if (tr->dst.addr.type == PF_ADDR_TABLE)
6990			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6991			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6992			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6993			    tr->dst.neg);
6994	}
6995
6996
6997	if (action == PF_SYNPROXY_DROP) {
6998		m_freem(*m0);
6999		*m0 = NULL;
7000		action = PF_PASS;
7001	} else if (r->rt)
7002		/* pf_route6 can free the mbuf causing *m0 to become NULL */
7003		pf_route6(m0, r, dir, ifp, s);
7004
7005#ifdef __FreeBSD__
7006	PF_UNLOCK();
7007#endif
7008	return (action);
7009}
7010#endif /* INET6 */
7011
7012int
7013pf_check_congestion(struct ifqueue *ifq)
7014{
7015#ifdef __FreeBSD__
7016	/* XXX_IMPORT: later */
7017	return (0);
7018#else
7019	if (ifq->ifq_congestion)
7020		return (1);
7021	else
7022		return (0);
7023#endif
7024}
7025