pf.c revision 195699
1/*	$OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */
2/* add:	$OpenBSD: pf.c,v 1.559 2007/09/18 18:45:59 markus Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * Copyright (c) 2002,2003 Henning Brauer
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 */
38
39#ifdef __FreeBSD__
40#include "opt_inet.h"
41#include "opt_inet6.h"
42
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: head/sys/contrib/pf/net/pf.c 195699 2009-07-14 22:48:30Z rwatson $");
45#endif
46
47#ifdef __FreeBSD__
48#include "opt_bpf.h"
49#include "opt_pf.h"
50
51#ifdef DEV_BPF
52#define	NBPFILTER	DEV_BPF
53#else
54#define	NBPFILTER	0
55#endif
56
57#ifdef DEV_PFLOG
58#define	NPFLOG		DEV_PFLOG
59#else
60#define	NPFLOG		0
61#endif
62
63#ifdef DEV_PFSYNC
64#define	NPFSYNC		DEV_PFSYNC
65#else
66#define	NPFSYNC		0
67#endif
68
69#else
70#include "bpfilter.h"
71#include "pflog.h"
72#include "pfsync.h"
73#endif
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/mbuf.h>
78#include <sys/filio.h>
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <sys/kernel.h>
82#include <sys/time.h>
83#ifdef __FreeBSD__
84#include <sys/sysctl.h>
85#include <sys/endian.h>
86#else
87#include <sys/pool.h>
88#endif
89#include <sys/proc.h>
90#ifdef __FreeBSD__
91#include <sys/kthread.h>
92#include <sys/lock.h>
93#include <sys/sx.h>
94#include <sys/vimage.h>
95#else
96#include <sys/rwlock.h>
97#endif
98
99#include <net/if.h>
100#include <net/if_types.h>
101#include <net/bpf.h>
102#include <net/route.h>
103#ifndef __FreeBSD__
104#include <net/radix_mpath.h>
105#endif
106
107#include <netinet/in.h>
108#include <netinet/in_var.h>
109#include <netinet/in_systm.h>
110#include <netinet/ip.h>
111#include <netinet/ip_var.h>
112#include <netinet/tcp.h>
113#include <netinet/tcp_seq.h>
114#include <netinet/udp.h>
115#include <netinet/ip_icmp.h>
116#include <netinet/in_pcb.h>
117#include <netinet/tcp_timer.h>
118#include <netinet/tcp_var.h>
119#include <netinet/udp_var.h>
120#include <netinet/icmp_var.h>
121#include <netinet/if_ether.h>
122
123#ifndef __FreeBSD__
124#include <dev/rndvar.h>
125#endif
126#include <net/pfvar.h>
127#include <net/if_pflog.h>
128
129#if NPFSYNC > 0
130#include <net/if_pfsync.h>
131#endif /* NPFSYNC > 0 */
132
133#ifdef INET6
134#include <netinet/ip6.h>
135#include <netinet/in_pcb.h>
136#include <netinet/icmp6.h>
137#include <netinet6/nd6.h>
138#ifdef __FreeBSD__
139#include <netinet6/ip6_var.h>
140#include <netinet6/in6_pcb.h>
141#endif
142#endif /* INET6 */
143
144#ifdef __FreeBSD__
145#include <machine/in_cksum.h>
146#include <sys/limits.h>
147#include <sys/ucred.h>
148#include <security/mac/mac_framework.h>
149
150extern int ip_optcopy(struct ip *, struct ip *);
151extern int debug_pfugidhack;
152#endif
153
154#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
155
156/*
157 * Global variables
158 */
159
160struct pf_altqqueue	 pf_altqs[2];
161struct pf_palist	 pf_pabuf;
162struct pf_altqqueue	*pf_altqs_active;
163struct pf_altqqueue	*pf_altqs_inactive;
164struct pf_status	 pf_status;
165
166u_int32_t		 ticket_altqs_active;
167u_int32_t		 ticket_altqs_inactive;
168int			 altqs_inactive_open;
169u_int32_t		 ticket_pabuf;
170
171struct pf_anchor_stackframe {
172	struct pf_ruleset			*rs;
173	struct pf_rule				*r;
174	struct pf_anchor_node			*parent;
175	struct pf_anchor			*child;
176} pf_anchor_stack[64];
177
178#ifdef __FreeBSD__
179uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
180uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
181#else
182struct pool		 pf_src_tree_pl, pf_rule_pl;
183struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
184#endif
185
186void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
187
188void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
189			    u_int32_t);
190void			 pf_add_threshold(struct pf_threshold *);
191int			 pf_check_threshold(struct pf_threshold *);
192
193void			 pf_change_ap(struct pf_addr *, u_int16_t *,
194			    u_int16_t *, u_int16_t *, struct pf_addr *,
195			    u_int16_t, u_int8_t, sa_family_t);
196int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
197			    struct tcphdr *, struct pf_state_peer *);
198#ifdef INET6
199void			 pf_change_a6(struct pf_addr *, u_int16_t *,
200			    struct pf_addr *, u_int8_t);
201#endif /* INET6 */
202void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
203			    struct pf_addr *, struct pf_addr *, u_int16_t,
204			    u_int16_t *, u_int16_t *, u_int16_t *,
205			    u_int16_t *, u_int8_t, sa_family_t);
206#ifdef __FreeBSD__
207void			 pf_send_tcp(struct mbuf *,
208			    const struct pf_rule *, sa_family_t,
209#else
210void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
211#endif
212			    const struct pf_addr *, const struct pf_addr *,
213			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
214			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
215			    u_int16_t, struct ether_header *, struct ifnet *);
216void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
217			    sa_family_t, struct pf_rule *);
218struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
219			    int, int, struct pfi_kif *,
220			    struct pf_addr *, u_int16_t, struct pf_addr *,
221			    u_int16_t, int);
222struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
223			    int, int, struct pfi_kif *, struct pf_src_node **,
224			    struct pf_addr *, u_int16_t,
225			    struct pf_addr *, u_int16_t,
226			    struct pf_addr *, u_int16_t *);
227int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
228			    int, struct pfi_kif *, struct mbuf *, int,
229			    void *, struct pf_pdesc *, struct pf_rule **,
230#ifdef __FreeBSD__
231			    struct pf_ruleset **, struct ifqueue *,
232			    struct inpcb *);
233#else
234			    struct pf_ruleset **, struct ifqueue *);
235#endif
236int			 pf_test_udp(struct pf_rule **, struct pf_state **,
237			    int, struct pfi_kif *, struct mbuf *, int,
238			    void *, struct pf_pdesc *, struct pf_rule **,
239#ifdef __FreeBSD__
240			    struct pf_ruleset **, struct ifqueue *,
241			    struct inpcb *);
242#else
243			    struct pf_ruleset **, struct ifqueue *);
244#endif
245int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
246			    int, struct pfi_kif *, struct mbuf *, int,
247			    void *, struct pf_pdesc *, struct pf_rule **,
248			    struct pf_ruleset **, struct ifqueue *);
249int			 pf_test_other(struct pf_rule **, struct pf_state **,
250			    int, struct pfi_kif *, struct mbuf *, int, void *,
251			    struct pf_pdesc *, struct pf_rule **,
252			    struct pf_ruleset **, struct ifqueue *);
253int			 pf_test_fragment(struct pf_rule **, int,
254			    struct pfi_kif *, struct mbuf *, void *,
255			    struct pf_pdesc *, struct pf_rule **,
256			    struct pf_ruleset **);
257int			 pf_test_state_tcp(struct pf_state **, int,
258			    struct pfi_kif *, struct mbuf *, int,
259			    void *, struct pf_pdesc *, u_short *);
260int			 pf_test_state_udp(struct pf_state **, int,
261			    struct pfi_kif *, struct mbuf *, int,
262			    void *, struct pf_pdesc *);
263int			 pf_test_state_icmp(struct pf_state **, int,
264			    struct pfi_kif *, struct mbuf *, int,
265			    void *, struct pf_pdesc *, u_short *);
266int			 pf_test_state_other(struct pf_state **, int,
267			    struct pfi_kif *, struct pf_pdesc *);
268int			 pf_match_tag(struct mbuf *, struct pf_rule *,
269			     struct pf_mtag *, int *);
270int			 pf_step_out_of_anchor(int *, struct pf_ruleset **,
271			     int, struct pf_rule **, struct pf_rule **,
272			     int *);
273void			 pf_hash(struct pf_addr *, struct pf_addr *,
274			    struct pf_poolhashkey *, sa_family_t);
275int			 pf_map_addr(u_int8_t, struct pf_rule *,
276			    struct pf_addr *, struct pf_addr *,
277			    struct pf_addr *, struct pf_src_node **);
278int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
279			    struct pf_addr *, struct pf_addr *, u_int16_t,
280			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
281			    struct pf_src_node **);
282void			 pf_route(struct mbuf **, struct pf_rule *, int,
283			    struct ifnet *, struct pf_state *,
284			    struct pf_pdesc *);
285void			 pf_route6(struct mbuf **, struct pf_rule *, int,
286			    struct ifnet *, struct pf_state *,
287			    struct pf_pdesc *);
288#ifdef __FreeBSD__
289/* XXX: import */
290#else
291int			 pf_socket_lookup(int, struct pf_pdesc *);
292#endif
293u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
294			    sa_family_t);
295u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
296			    sa_family_t);
297u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
298				u_int16_t);
299void			 pf_set_rt_ifp(struct pf_state *,
300			    struct pf_addr *);
301int			 pf_check_proto_cksum(struct mbuf *, int, int,
302			    u_int8_t, sa_family_t);
303int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
304			    struct pf_addr_wrap *);
305struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
306			    struct pf_state_cmp *, u_int8_t);
307int			 pf_src_connlimit(struct pf_state **);
308int			 pf_check_congestion(struct ifqueue *);
309
310#ifdef __FreeBSD__
311int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
312
313extern int pf_end_threads;
314
315struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
316#else
317extern struct pool pfr_ktable_pl;
318extern struct pool pfr_kentry_pl;
319
320struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
321	{ &pf_state_pl, PFSTATE_HIWAT },
322	{ &pf_src_tree_pl, PFSNODE_HIWAT },
323	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
324	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
325	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
326};
327#endif
328
329#define STATE_LOOKUP()							\
330	do {								\
331		if (direction == PF_IN)					\
332			*state = pf_find_state_recurse(			\
333			    kif, &key, PF_EXT_GWY);			\
334		else							\
335			*state = pf_find_state_recurse(			\
336			    kif, &key, PF_LAN_EXT);			\
337		if (*state == NULL || (*state)->timeout == PFTM_PURGE)	\
338			return (PF_DROP);				\
339		if (direction == PF_OUT &&				\
340		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
341		    (*state)->rule.ptr->direction == PF_OUT) ||		\
342		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
343		    (*state)->rule.ptr->direction == PF_IN)) &&		\
344		    (*state)->rt_kif != NULL &&				\
345		    (*state)->rt_kif != kif)				\
346			return (PF_PASS);				\
347	} while (0)
348
349#define	STATE_TRANSLATE(s) \
350	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
351	((s)->af == AF_INET6 && \
352	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
353	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
354	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
355	(s)->lan.port != (s)->gwy.port
356
357#define BOUND_IFACE(r, k) \
358	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
359
360#define STATE_INC_COUNTERS(s)				\
361	do {						\
362		s->rule.ptr->states++;			\
363		if (s->anchor.ptr != NULL)		\
364			s->anchor.ptr->states++;	\
365		if (s->nat_rule.ptr != NULL)		\
366			s->nat_rule.ptr->states++;	\
367	} while (0)
368
369#define STATE_DEC_COUNTERS(s)				\
370	do {						\
371		if (s->nat_rule.ptr != NULL)		\
372			s->nat_rule.ptr->states--;	\
373		if (s->anchor.ptr != NULL)		\
374			s->anchor.ptr->states--;	\
375		s->rule.ptr->states--;			\
376	} while (0)
377
378struct pf_src_tree tree_src_tracking;
379
380struct pf_state_tree_id tree_id;
381struct pf_state_queue state_list;
382
383#ifdef __FreeBSD__
384static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
385static int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *);
386static int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *);
387static int pf_state_compare_id(struct pf_state *, struct pf_state *);
388#endif
389
390RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
391RB_GENERATE(pf_state_tree_lan_ext, pf_state,
392    u.s.entry_lan_ext, pf_state_compare_lan_ext);
393RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
394    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
395RB_GENERATE(pf_state_tree_id, pf_state,
396    u.s.entry_id, pf_state_compare_id);
397
398#ifdef __FreeBSD__
399static int
400#else
401static __inline int
402#endif
403pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
404{
405	int	diff;
406
407	if (a->rule.ptr > b->rule.ptr)
408		return (1);
409	if (a->rule.ptr < b->rule.ptr)
410		return (-1);
411	if ((diff = a->af - b->af) != 0)
412		return (diff);
413	switch (a->af) {
414#ifdef INET
415	case AF_INET:
416		if (a->addr.addr32[0] > b->addr.addr32[0])
417			return (1);
418		if (a->addr.addr32[0] < b->addr.addr32[0])
419			return (-1);
420		break;
421#endif /* INET */
422#ifdef INET6
423	case AF_INET6:
424		if (a->addr.addr32[3] > b->addr.addr32[3])
425			return (1);
426		if (a->addr.addr32[3] < b->addr.addr32[3])
427			return (-1);
428		if (a->addr.addr32[2] > b->addr.addr32[2])
429			return (1);
430		if (a->addr.addr32[2] < b->addr.addr32[2])
431			return (-1);
432		if (a->addr.addr32[1] > b->addr.addr32[1])
433			return (1);
434		if (a->addr.addr32[1] < b->addr.addr32[1])
435			return (-1);
436		if (a->addr.addr32[0] > b->addr.addr32[0])
437			return (1);
438		if (a->addr.addr32[0] < b->addr.addr32[0])
439			return (-1);
440		break;
441#endif /* INET6 */
442	}
443	return (0);
444}
445
446#ifdef __FreeBSD__
447static int
448#else
449static __inline int
450#endif
451pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
452{
453	int	diff;
454
455	if ((diff = a->proto - b->proto) != 0)
456		return (diff);
457	if ((diff = a->af - b->af) != 0)
458		return (diff);
459	switch (a->af) {
460#ifdef INET
461	case AF_INET:
462		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
463			return (1);
464		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
465			return (-1);
466		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
467			return (1);
468		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
469			return (-1);
470		break;
471#endif /* INET */
472#ifdef INET6
473	case AF_INET6:
474		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
475			return (1);
476		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
477			return (-1);
478		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
479			return (1);
480		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
481			return (-1);
482		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
483			return (1);
484		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
485			return (-1);
486		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
487			return (1);
488		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
489			return (-1);
490		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
491			return (1);
492		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
493			return (-1);
494		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
495			return (1);
496		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
497			return (-1);
498		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
499			return (1);
500		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
501			return (-1);
502		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
503			return (1);
504		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
505			return (-1);
506		break;
507#endif /* INET6 */
508	}
509
510	if ((diff = a->lan.port - b->lan.port) != 0)
511		return (diff);
512	if ((diff = a->ext.port - b->ext.port) != 0)
513		return (diff);
514
515	return (0);
516}
517
518#ifdef __FreeBSD__
519static int
520#else
521static __inline int
522#endif
523pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
524{
525	int	diff;
526
527	if ((diff = a->proto - b->proto) != 0)
528		return (diff);
529	if ((diff = a->af - b->af) != 0)
530		return (diff);
531	switch (a->af) {
532#ifdef INET
533	case AF_INET:
534		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
535			return (1);
536		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
537			return (-1);
538		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
539			return (1);
540		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
541			return (-1);
542		break;
543#endif /* INET */
544#ifdef INET6
545	case AF_INET6:
546		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
547			return (1);
548		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
549			return (-1);
550		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
551			return (1);
552		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
553			return (-1);
554		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
555			return (1);
556		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
557			return (-1);
558		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
559			return (1);
560		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
561			return (-1);
562		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
563			return (1);
564		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
565			return (-1);
566		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
567			return (1);
568		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
569			return (-1);
570		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
571			return (1);
572		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
573			return (-1);
574		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
575			return (1);
576		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
577			return (-1);
578		break;
579#endif /* INET6 */
580	}
581
582	if ((diff = a->ext.port - b->ext.port) != 0)
583		return (diff);
584	if ((diff = a->gwy.port - b->gwy.port) != 0)
585		return (diff);
586
587	return (0);
588}
589
590#ifdef __FreeBSD__
591static int
592#else
593static __inline int
594#endif
595pf_state_compare_id(struct pf_state *a, struct pf_state *b)
596{
597	if (a->id > b->id)
598		return (1);
599	if (a->id < b->id)
600		return (-1);
601	if (a->creatorid > b->creatorid)
602		return (1);
603	if (a->creatorid < b->creatorid)
604		return (-1);
605
606	return (0);
607}
608
609#ifdef INET6
610void
611pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
612{
613	switch (af) {
614#ifdef INET
615	case AF_INET:
616		dst->addr32[0] = src->addr32[0];
617		break;
618#endif /* INET */
619	case AF_INET6:
620		dst->addr32[0] = src->addr32[0];
621		dst->addr32[1] = src->addr32[1];
622		dst->addr32[2] = src->addr32[2];
623		dst->addr32[3] = src->addr32[3];
624		break;
625	}
626}
627#endif /* INET6 */
628
629struct pf_state *
630pf_find_state_byid(struct pf_state_cmp *key)
631{
632	pf_status.fcounters[FCNT_STATE_SEARCH]++;
633	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
634}
635
636struct pf_state *
637pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree)
638{
639	struct pf_state *s;
640
641	pf_status.fcounters[FCNT_STATE_SEARCH]++;
642
643	switch (tree) {
644	case PF_LAN_EXT:
645		if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext,
646		    (struct pf_state *)key)) != NULL)
647			return (s);
648		if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext,
649		    (struct pf_state *)key)) != NULL)
650			return (s);
651		return (NULL);
652	case PF_EXT_GWY:
653		if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy,
654		    (struct pf_state *)key)) != NULL)
655			return (s);
656		if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy,
657		    (struct pf_state *)key)) != NULL)
658			return (s);
659		return (NULL);
660	default:
661		panic("pf_find_state_recurse");
662	}
663}
664
665struct pf_state *
666pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more)
667{
668	struct pf_state *s, *ss = NULL;
669	struct pfi_kif	*kif;
670
671	pf_status.fcounters[FCNT_STATE_SEARCH]++;
672
673	switch (tree) {
674	case PF_LAN_EXT:
675		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
676			s = RB_FIND(pf_state_tree_lan_ext,
677			    &kif->pfik_lan_ext, (struct pf_state *)key);
678			if (s == NULL)
679				continue;
680			if (more == NULL)
681				return (s);
682			ss = s;
683			(*more)++;
684		}
685		return (ss);
686	case PF_EXT_GWY:
687		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
688			s = RB_FIND(pf_state_tree_ext_gwy,
689			    &kif->pfik_ext_gwy, (struct pf_state *)key);
690			if (s == NULL)
691				continue;
692			if (more == NULL)
693				return (s);
694			ss = s;
695			(*more)++;
696		}
697		return (ss);
698	default:
699		panic("pf_find_state_all");
700	}
701}
702
703void
704pf_init_threshold(struct pf_threshold *threshold,
705    u_int32_t limit, u_int32_t seconds)
706{
707	threshold->limit = limit * PF_THRESHOLD_MULT;
708	threshold->seconds = seconds;
709	threshold->count = 0;
710	threshold->last = time_second;
711}
712
713void
714pf_add_threshold(struct pf_threshold *threshold)
715{
716	u_int32_t t = time_second, diff = t - threshold->last;
717
718	if (diff >= threshold->seconds)
719		threshold->count = 0;
720	else
721		threshold->count -= threshold->count * diff /
722		    threshold->seconds;
723	threshold->count += PF_THRESHOLD_MULT;
724	threshold->last = t;
725}
726
727int
728pf_check_threshold(struct pf_threshold *threshold)
729{
730	return (threshold->count > threshold->limit);
731}
732
733int
734pf_src_connlimit(struct pf_state **state)
735{
736	struct pf_state	*s;
737	int bad = 0;
738
739	(*state)->src_node->conn++;
740	(*state)->src.tcp_est = 1;
741	pf_add_threshold(&(*state)->src_node->conn_rate);
742
743	if ((*state)->rule.ptr->max_src_conn &&
744	    (*state)->rule.ptr->max_src_conn <
745	    (*state)->src_node->conn) {
746		pf_status.lcounters[LCNT_SRCCONN]++;
747		bad++;
748	}
749
750	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
751	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
752		pf_status.lcounters[LCNT_SRCCONNRATE]++;
753		bad++;
754	}
755
756	if (!bad)
757		return (0);
758
759	if ((*state)->rule.ptr->overload_tbl) {
760		struct pfr_addr p;
761		u_int32_t	killed = 0;
762
763		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
764		if (pf_status.debug >= PF_DEBUG_MISC) {
765			printf("pf_src_connlimit: blocking address ");
766			pf_print_host(&(*state)->src_node->addr, 0,
767			    (*state)->af);
768		}
769
770		bzero(&p, sizeof(p));
771		p.pfra_af = (*state)->af;
772		switch ((*state)->af) {
773#ifdef INET
774		case AF_INET:
775			p.pfra_net = 32;
776			p.pfra_ip4addr = (*state)->src_node->addr.v4;
777			break;
778#endif /* INET */
779#ifdef INET6
780		case AF_INET6:
781			p.pfra_net = 128;
782			p.pfra_ip6addr = (*state)->src_node->addr.v6;
783			break;
784#endif /* INET6 */
785		}
786
787		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
788		    &p, time_second);
789
790		/* kill existing states if that's required. */
791		if ((*state)->rule.ptr->flush) {
792			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
793
794			RB_FOREACH(s, pf_state_tree_id, &tree_id) {
795				/*
796				 * Kill states from this source.  (Only those
797				 * from the same rule if PF_FLUSH_GLOBAL is not
798				 * set)
799				 */
800				if (s->af == (*state)->af &&
801				    (((*state)->direction == PF_OUT &&
802				    PF_AEQ(&(*state)->src_node->addr,
803				    &s->lan.addr, s->af)) ||
804				    ((*state)->direction == PF_IN &&
805				    PF_AEQ(&(*state)->src_node->addr,
806				    &s->ext.addr, s->af))) &&
807				    ((*state)->rule.ptr->flush &
808				    PF_FLUSH_GLOBAL ||
809				    (*state)->rule.ptr == s->rule.ptr)) {
810					s->timeout = PFTM_PURGE;
811					s->src.state = s->dst.state =
812					    TCPS_CLOSED;
813					killed++;
814				}
815			}
816			if (pf_status.debug >= PF_DEBUG_MISC)
817				printf(", %u states killed", killed);
818		}
819		if (pf_status.debug >= PF_DEBUG_MISC)
820			printf("\n");
821	}
822
823	/* kill this state */
824	(*state)->timeout = PFTM_PURGE;
825	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
826	return (1);
827}
828
829int
830pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
831    struct pf_addr *src, sa_family_t af)
832{
833	struct pf_src_node	k;
834
835	if (*sn == NULL) {
836		k.af = af;
837		PF_ACPY(&k.addr, src, af);
838		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
839		    rule->rpool.opts & PF_POOL_STICKYADDR)
840			k.rule.ptr = rule;
841		else
842			k.rule.ptr = NULL;
843		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
844		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
845	}
846	if (*sn == NULL) {
847		if (!rule->max_src_nodes ||
848		    rule->src_nodes < rule->max_src_nodes)
849			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
850		else
851			pf_status.lcounters[LCNT_SRCNODES]++;
852		if ((*sn) == NULL)
853			return (-1);
854		bzero(*sn, sizeof(struct pf_src_node));
855
856		pf_init_threshold(&(*sn)->conn_rate,
857		    rule->max_src_conn_rate.limit,
858		    rule->max_src_conn_rate.seconds);
859
860		(*sn)->af = af;
861		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
862		    rule->rpool.opts & PF_POOL_STICKYADDR)
863			(*sn)->rule.ptr = rule;
864		else
865			(*sn)->rule.ptr = NULL;
866		PF_ACPY(&(*sn)->addr, src, af);
867		if (RB_INSERT(pf_src_tree,
868		    &tree_src_tracking, *sn) != NULL) {
869			if (pf_status.debug >= PF_DEBUG_MISC) {
870				printf("pf: src_tree insert failed: ");
871				pf_print_host(&(*sn)->addr, 0, af);
872				printf("\n");
873			}
874			pool_put(&pf_src_tree_pl, *sn);
875			return (-1);
876		}
877		(*sn)->creation = time_second;
878		(*sn)->ruletype = rule->action;
879		if ((*sn)->rule.ptr != NULL)
880			(*sn)->rule.ptr->src_nodes++;
881		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
882		pf_status.src_nodes++;
883	} else {
884		if (rule->max_src_states &&
885		    (*sn)->states >= rule->max_src_states) {
886			pf_status.lcounters[LCNT_SRCSTATES]++;
887			return (-1);
888		}
889	}
890	return (0);
891}
892
893int
894pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
895{
896	/* Thou MUST NOT insert multiple duplicate keys */
897	state->u.s.kif = kif;
898	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
899		if (pf_status.debug >= PF_DEBUG_MISC) {
900			printf("pf: state insert failed: tree_lan_ext");
901			printf(" lan: ");
902			pf_print_host(&state->lan.addr, state->lan.port,
903			    state->af);
904			printf(" gwy: ");
905			pf_print_host(&state->gwy.addr, state->gwy.port,
906			    state->af);
907			printf(" ext: ");
908			pf_print_host(&state->ext.addr, state->ext.port,
909			    state->af);
910			if (state->sync_flags & PFSTATE_FROMSYNC)
911				printf(" (from sync)");
912			printf("\n");
913		}
914		return (-1);
915	}
916
917	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
918		if (pf_status.debug >= PF_DEBUG_MISC) {
919			printf("pf: state insert failed: tree_ext_gwy");
920			printf(" lan: ");
921			pf_print_host(&state->lan.addr, state->lan.port,
922			    state->af);
923			printf(" gwy: ");
924			pf_print_host(&state->gwy.addr, state->gwy.port,
925			    state->af);
926			printf(" ext: ");
927			pf_print_host(&state->ext.addr, state->ext.port,
928			    state->af);
929			if (state->sync_flags & PFSTATE_FROMSYNC)
930				printf(" (from sync)");
931			printf("\n");
932		}
933		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
934		return (-1);
935	}
936
937	if (state->id == 0 && state->creatorid == 0) {
938		state->id = htobe64(pf_status.stateid++);
939		state->creatorid = pf_status.hostid;
940	}
941	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
942		if (pf_status.debug >= PF_DEBUG_MISC) {
943#ifdef __FreeBSD__
944			printf("pf: state insert failed: "
945			    "id: %016llx creatorid: %08x",
946			    (long long)be64toh(state->id),
947			    ntohl(state->creatorid));
948#else
949			printf("pf: state insert failed: "
950			    "id: %016llx creatorid: %08x",
951			    betoh64(state->id), ntohl(state->creatorid));
952#endif
953			if (state->sync_flags & PFSTATE_FROMSYNC)
954				printf(" (from sync)");
955			printf("\n");
956		}
957		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
958		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
959		return (-1);
960	}
961	TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
962	pf_status.fcounters[FCNT_STATE_INSERT]++;
963	pf_status.states++;
964	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
965#if NPFSYNC
966	pfsync_insert_state(state);
967#endif
968	return (0);
969}
970
971void
972pf_purge_thread(void *v)
973{
974	int nloops = 0, s;
975
976	for (;;) {
977		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
978
979#ifdef __FreeBSD__
980		sx_slock(&pf_consistency_lock);
981		PF_LOCK();
982
983		if (pf_end_threads) {
984			pf_purge_expired_states(pf_status.states);
985			pf_purge_expired_fragments();
986			pf_purge_expired_src_nodes(0);
987			pf_end_threads++;
988
989			sx_sunlock(&pf_consistency_lock);
990			PF_UNLOCK();
991			wakeup(pf_purge_thread);
992			kproc_exit(0);
993		}
994#endif
995		s = splsoftnet();
996
997		/* process a fraction of the state table every second */
998		pf_purge_expired_states(1 + (pf_status.states
999		    / pf_default_rule.timeout[PFTM_INTERVAL]));
1000
1001		/* purge other expired types every PFTM_INTERVAL seconds */
1002		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1003			pf_purge_expired_fragments();
1004			pf_purge_expired_src_nodes(0);
1005			nloops = 0;
1006		}
1007
1008		splx(s);
1009#ifdef __FreeBSD__
1010		PF_UNLOCK();
1011		sx_sunlock(&pf_consistency_lock);
1012#endif
1013	}
1014}
1015
1016u_int32_t
1017pf_state_expires(const struct pf_state *state)
1018{
1019	u_int32_t	timeout;
1020	u_int32_t	start;
1021	u_int32_t	end;
1022	u_int32_t	states;
1023
1024	/* handle all PFTM_* > PFTM_MAX here */
1025	if (state->timeout == PFTM_PURGE)
1026		return (time_second);
1027	if (state->timeout == PFTM_UNTIL_PACKET)
1028		return (0);
1029#ifdef __FreeBSD__
1030	KASSERT(state->timeout != PFTM_UNLINKED,
1031	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
1032	KASSERT((state->timeout < PFTM_MAX),
1033	    ("pf_state_expires: timeout > PFTM_MAX"));
1034#else
1035	KASSERT(state->timeout != PFTM_UNLINKED);
1036	KASSERT(state->timeout < PFTM_MAX);
1037#endif
1038	timeout = state->rule.ptr->timeout[state->timeout];
1039	if (!timeout)
1040		timeout = pf_default_rule.timeout[state->timeout];
1041	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1042	if (start) {
1043		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1044		states = state->rule.ptr->states;
1045	} else {
1046		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1047		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1048		states = pf_status.states;
1049	}
1050	if (end && states > start && start < end) {
1051		if (states < end)
1052			return (state->expire + timeout * (end - states) /
1053			    (end - start));
1054		else
1055			return (time_second);
1056	}
1057	return (state->expire + timeout);
1058}
1059
1060void
1061pf_purge_expired_src_nodes(int waslocked)
1062{
1063	 struct pf_src_node		*cur, *next;
1064	 int				 locked = waslocked;
1065
1066	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1067		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1068
1069		 if (cur->states <= 0 && cur->expire <= time_second) {
1070			 if (! locked) {
1071#ifdef __FreeBSD__
1072				 if (!sx_try_upgrade(&pf_consistency_lock)) {
1073					 PF_UNLOCK();
1074					 sx_sunlock(&pf_consistency_lock);
1075					 sx_xlock(&pf_consistency_lock);
1076					 PF_LOCK();
1077				 }
1078#else
1079				 rw_enter_write(&pf_consistency_lock);
1080#endif
1081			 	 next = RB_NEXT(pf_src_tree,
1082				     &tree_src_tracking, cur);
1083				 locked = 1;
1084			 }
1085			 if (cur->rule.ptr != NULL) {
1086				 cur->rule.ptr->src_nodes--;
1087				 if (cur->rule.ptr->states <= 0 &&
1088				     cur->rule.ptr->max_src_nodes <= 0)
1089					 pf_rm_rule(NULL, cur->rule.ptr);
1090			 }
1091			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1092			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1093			 pf_status.src_nodes--;
1094			 pool_put(&pf_src_tree_pl, cur);
1095		 }
1096	 }
1097
1098	 if (locked && !waslocked)
1099#ifdef __FreeBSD__
1100		sx_downgrade(&pf_consistency_lock);
1101#else
1102		rw_exit_write(&pf_consistency_lock);
1103#endif
1104}
1105
1106void
1107pf_src_tree_remove_state(struct pf_state *s)
1108{
1109	u_int32_t timeout;
1110
1111	if (s->src_node != NULL) {
1112		if (s->proto == IPPROTO_TCP) {
1113			if (s->src.tcp_est)
1114				--s->src_node->conn;
1115		}
1116		if (--s->src_node->states <= 0) {
1117			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1118			if (!timeout)
1119				timeout =
1120				    pf_default_rule.timeout[PFTM_SRC_NODE];
1121			s->src_node->expire = time_second + timeout;
1122		}
1123	}
1124	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1125		if (--s->nat_src_node->states <= 0) {
1126			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1127			if (!timeout)
1128				timeout =
1129				    pf_default_rule.timeout[PFTM_SRC_NODE];
1130			s->nat_src_node->expire = time_second + timeout;
1131		}
1132	}
1133	s->src_node = s->nat_src_node = NULL;
1134}
1135
1136/* callers should be at splsoftnet */
1137void
1138pf_unlink_state(struct pf_state *cur)
1139{
1140#ifdef __FreeBSD__
1141	if (cur->local_flags & PFSTATE_EXPIRING)
1142		return;
1143	cur->local_flags |= PFSTATE_EXPIRING;
1144#endif
1145	if (cur->src.state == PF_TCPS_PROXY_DST) {
1146#ifdef __FreeBSD__
1147		pf_send_tcp(NULL, cur->rule.ptr, cur->af,
1148#else
1149		pf_send_tcp(cur->rule.ptr, cur->af,
1150#endif
1151		    &cur->ext.addr, &cur->lan.addr,
1152		    cur->ext.port, cur->lan.port,
1153		    cur->src.seqhi, cur->src.seqlo + 1,
1154		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1155	}
1156	RB_REMOVE(pf_state_tree_ext_gwy,
1157	    &cur->u.s.kif->pfik_ext_gwy, cur);
1158	RB_REMOVE(pf_state_tree_lan_ext,
1159	    &cur->u.s.kif->pfik_lan_ext, cur);
1160	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1161#if NPFSYNC
1162	if (cur->creatorid == pf_status.hostid)
1163		pfsync_delete_state(cur);
1164#endif
1165	cur->timeout = PFTM_UNLINKED;
1166	pf_src_tree_remove_state(cur);
1167}
1168
1169/* callers should be at splsoftnet and hold the
1170 * write_lock on pf_consistency_lock */
1171void
1172pf_free_state(struct pf_state *cur)
1173{
1174#if NPFSYNC
1175	if (pfsyncif != NULL &&
1176	    (pfsyncif->sc_bulk_send_next == cur ||
1177	    pfsyncif->sc_bulk_terminator == cur))
1178		return;
1179#endif
1180#ifdef __FreeBSD__
1181	KASSERT(cur->timeout == PFTM_UNLINKED,
1182	    ("pf_free_state: cur->timeout != PFTM_UNLINKED"));
1183#else
1184	KASSERT(cur->timeout == PFTM_UNLINKED);
1185#endif
1186	if (--cur->rule.ptr->states <= 0 &&
1187	    cur->rule.ptr->src_nodes <= 0)
1188		pf_rm_rule(NULL, cur->rule.ptr);
1189	if (cur->nat_rule.ptr != NULL)
1190		if (--cur->nat_rule.ptr->states <= 0 &&
1191			cur->nat_rule.ptr->src_nodes <= 0)
1192			pf_rm_rule(NULL, cur->nat_rule.ptr);
1193	if (cur->anchor.ptr != NULL)
1194		if (--cur->anchor.ptr->states <= 0)
1195			pf_rm_rule(NULL, cur->anchor.ptr);
1196	pf_normalize_tcp_cleanup(cur);
1197	pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
1198	TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
1199	if (cur->tag)
1200		pf_tag_unref(cur->tag);
1201	pool_put(&pf_state_pl, cur);
1202	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1203	pf_status.states--;
1204}
1205
1206void
1207pf_purge_expired_states(u_int32_t maxcheck)
1208{
1209	static struct pf_state	*cur = NULL;
1210	struct pf_state		*next;
1211	int 			 locked = 0;
1212
1213	while (maxcheck--) {
1214		/* wrap to start of list when we hit the end */
1215		if (cur == NULL) {
1216			cur = TAILQ_FIRST(&state_list);
1217			if (cur == NULL)
1218				break;	/* list empty */
1219		}
1220
1221		/* get next state, as cur may get deleted */
1222		next = TAILQ_NEXT(cur, u.s.entry_list);
1223
1224		if (cur->timeout == PFTM_UNLINKED) {
1225			/* free unlinked state */
1226			if (! locked) {
1227#ifdef __FreeBSD__
1228				 if (!sx_try_upgrade(&pf_consistency_lock)) {
1229					 PF_UNLOCK();
1230					 sx_sunlock(&pf_consistency_lock);
1231					 sx_xlock(&pf_consistency_lock);
1232					 PF_LOCK();
1233				 }
1234#else
1235				rw_enter_write(&pf_consistency_lock);
1236#endif
1237				locked = 1;
1238			}
1239			pf_free_state(cur);
1240		} else if (pf_state_expires(cur) <= time_second) {
1241			/* unlink and free expired state */
1242			pf_unlink_state(cur);
1243			if (! locked) {
1244#ifdef __FreeBSD__
1245				 if (!sx_try_upgrade(&pf_consistency_lock)) {
1246					 PF_UNLOCK();
1247					 sx_sunlock(&pf_consistency_lock);
1248					 sx_xlock(&pf_consistency_lock);
1249					 PF_LOCK();
1250				 }
1251#else
1252				rw_enter_write(&pf_consistency_lock);
1253#endif
1254				locked = 1;
1255			}
1256			pf_free_state(cur);
1257		}
1258		cur = next;
1259	}
1260
1261	if (locked)
1262#ifdef __FreeBSD__
1263		sx_downgrade(&pf_consistency_lock);
1264#else
1265		rw_exit_write(&pf_consistency_lock);
1266#endif
1267}
1268
1269int
1270pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1271{
1272	if (aw->type != PF_ADDR_TABLE)
1273		return (0);
1274	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1275		return (1);
1276	return (0);
1277}
1278
1279void
1280pf_tbladdr_remove(struct pf_addr_wrap *aw)
1281{
1282	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1283		return;
1284	pfr_detach_table(aw->p.tbl);
1285	aw->p.tbl = NULL;
1286}
1287
1288void
1289pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1290{
1291	struct pfr_ktable *kt = aw->p.tbl;
1292
1293	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1294		return;
1295	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1296		kt = kt->pfrkt_root;
1297	aw->p.tbl = NULL;
1298	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1299		kt->pfrkt_cnt : -1;
1300}
1301
1302void
1303pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1304{
1305	switch (af) {
1306#ifdef INET
1307	case AF_INET: {
1308		u_int32_t a = ntohl(addr->addr32[0]);
1309		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1310		    (a>>8)&255, a&255);
1311		if (p) {
1312			p = ntohs(p);
1313			printf(":%u", p);
1314		}
1315		break;
1316	}
1317#endif /* INET */
1318#ifdef INET6
1319	case AF_INET6: {
1320		u_int16_t b;
1321		u_int8_t i, curstart = 255, curend = 0,
1322		    maxstart = 0, maxend = 0;
1323		for (i = 0; i < 8; i++) {
1324			if (!addr->addr16[i]) {
1325				if (curstart == 255)
1326					curstart = i;
1327				else
1328					curend = i;
1329			} else {
1330				if (curstart) {
1331					if ((curend - curstart) >
1332					    (maxend - maxstart)) {
1333						maxstart = curstart;
1334						maxend = curend;
1335						curstart = 255;
1336					}
1337				}
1338			}
1339		}
1340		for (i = 0; i < 8; i++) {
1341			if (i >= maxstart && i <= maxend) {
1342				if (maxend != 7) {
1343					if (i == maxstart)
1344						printf(":");
1345				} else {
1346					if (i == maxend)
1347						printf(":");
1348				}
1349			} else {
1350				b = ntohs(addr->addr16[i]);
1351				printf("%x", b);
1352				if (i < 7)
1353					printf(":");
1354			}
1355		}
1356		if (p) {
1357			p = ntohs(p);
1358			printf("[%u]", p);
1359		}
1360		break;
1361	}
1362#endif /* INET6 */
1363	}
1364}
1365
1366void
1367pf_print_state(struct pf_state *s)
1368{
1369	switch (s->proto) {
1370	case IPPROTO_TCP:
1371		printf("TCP ");
1372		break;
1373	case IPPROTO_UDP:
1374		printf("UDP ");
1375		break;
1376	case IPPROTO_ICMP:
1377		printf("ICMP ");
1378		break;
1379	case IPPROTO_ICMPV6:
1380		printf("ICMPV6 ");
1381		break;
1382	default:
1383		printf("%u ", s->proto);
1384		break;
1385	}
1386	pf_print_host(&s->lan.addr, s->lan.port, s->af);
1387	printf(" ");
1388	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1389	printf(" ");
1390	pf_print_host(&s->ext.addr, s->ext.port, s->af);
1391	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1392	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1393	if (s->src.wscale && s->dst.wscale)
1394		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1395	printf("]");
1396	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1397	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1398	if (s->src.wscale && s->dst.wscale)
1399		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1400	printf("]");
1401	printf(" %u:%u", s->src.state, s->dst.state);
1402}
1403
1404void
1405pf_print_flags(u_int8_t f)
1406{
1407	if (f)
1408		printf(" ");
1409	if (f & TH_FIN)
1410		printf("F");
1411	if (f & TH_SYN)
1412		printf("S");
1413	if (f & TH_RST)
1414		printf("R");
1415	if (f & TH_PUSH)
1416		printf("P");
1417	if (f & TH_ACK)
1418		printf("A");
1419	if (f & TH_URG)
1420		printf("U");
1421	if (f & TH_ECE)
1422		printf("E");
1423	if (f & TH_CWR)
1424		printf("W");
1425}
1426
1427#define	PF_SET_SKIP_STEPS(i)					\
1428	do {							\
1429		while (head[i] != cur) {			\
1430			head[i]->skip[i].ptr = cur;		\
1431			head[i] = TAILQ_NEXT(head[i], entries);	\
1432		}						\
1433	} while (0)
1434
1435void
1436pf_calc_skip_steps(struct pf_rulequeue *rules)
1437{
1438	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1439	int i;
1440
1441	cur = TAILQ_FIRST(rules);
1442	prev = cur;
1443	for (i = 0; i < PF_SKIP_COUNT; ++i)
1444		head[i] = cur;
1445	while (cur != NULL) {
1446
1447		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1448			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1449		if (cur->direction != prev->direction)
1450			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1451		if (cur->af != prev->af)
1452			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1453		if (cur->proto != prev->proto)
1454			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1455		if (cur->src.neg != prev->src.neg ||
1456		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1457			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1458		if (cur->src.port[0] != prev->src.port[0] ||
1459		    cur->src.port[1] != prev->src.port[1] ||
1460		    cur->src.port_op != prev->src.port_op)
1461			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1462		if (cur->dst.neg != prev->dst.neg ||
1463		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1464			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1465		if (cur->dst.port[0] != prev->dst.port[0] ||
1466		    cur->dst.port[1] != prev->dst.port[1] ||
1467		    cur->dst.port_op != prev->dst.port_op)
1468			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1469
1470		prev = cur;
1471		cur = TAILQ_NEXT(cur, entries);
1472	}
1473	for (i = 0; i < PF_SKIP_COUNT; ++i)
1474		PF_SET_SKIP_STEPS(i);
1475}
1476
1477int
1478pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1479{
1480	if (aw1->type != aw2->type)
1481		return (1);
1482	switch (aw1->type) {
1483	case PF_ADDR_ADDRMASK:
1484		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1485			return (1);
1486		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1487			return (1);
1488		return (0);
1489	case PF_ADDR_DYNIFTL:
1490		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1491	case PF_ADDR_NOROUTE:
1492	case PF_ADDR_URPFFAILED:
1493		return (0);
1494	case PF_ADDR_TABLE:
1495		return (aw1->p.tbl != aw2->p.tbl);
1496	case PF_ADDR_RTLABEL:
1497		return (aw1->v.rtlabel != aw2->v.rtlabel);
1498	default:
1499		printf("invalid address type: %d\n", aw1->type);
1500		return (1);
1501	}
1502}
1503
1504u_int16_t
1505pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1506{
1507	u_int32_t	l;
1508
1509	if (udp && !cksum)
1510		return (0x0000);
1511	l = cksum + old - new;
1512	l = (l >> 16) + (l & 65535);
1513	l = l & 65535;
1514	if (udp && !l)
1515		return (0xFFFF);
1516	return (l);
1517}
1518
1519void
1520pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1521    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1522{
1523	struct pf_addr	ao;
1524	u_int16_t	po = *p;
1525
1526	PF_ACPY(&ao, a, af);
1527	PF_ACPY(a, an, af);
1528
1529	*p = pn;
1530
1531	switch (af) {
1532#ifdef INET
1533	case AF_INET:
1534		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1535		    ao.addr16[0], an->addr16[0], 0),
1536		    ao.addr16[1], an->addr16[1], 0);
1537		*p = pn;
1538		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1539		    ao.addr16[0], an->addr16[0], u),
1540		    ao.addr16[1], an->addr16[1], u),
1541		    po, pn, u);
1542		break;
1543#endif /* INET */
1544#ifdef INET6
1545	case AF_INET6:
1546		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1547		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1548		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1549		    ao.addr16[0], an->addr16[0], u),
1550		    ao.addr16[1], an->addr16[1], u),
1551		    ao.addr16[2], an->addr16[2], u),
1552		    ao.addr16[3], an->addr16[3], u),
1553		    ao.addr16[4], an->addr16[4], u),
1554		    ao.addr16[5], an->addr16[5], u),
1555		    ao.addr16[6], an->addr16[6], u),
1556		    ao.addr16[7], an->addr16[7], u),
1557		    po, pn, u);
1558		break;
1559#endif /* INET6 */
1560	}
1561}
1562
1563
1564/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1565void
1566pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1567{
1568	u_int32_t	ao;
1569
1570	memcpy(&ao, a, sizeof(ao));
1571	memcpy(a, &an, sizeof(u_int32_t));
1572	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1573	    ao % 65536, an % 65536, u);
1574}
1575
1576#ifdef INET6
1577void
1578pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1579{
1580	struct pf_addr	ao;
1581
1582	PF_ACPY(&ao, a, AF_INET6);
1583	PF_ACPY(a, an, AF_INET6);
1584
1585	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1586	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1587	    pf_cksum_fixup(pf_cksum_fixup(*c,
1588	    ao.addr16[0], an->addr16[0], u),
1589	    ao.addr16[1], an->addr16[1], u),
1590	    ao.addr16[2], an->addr16[2], u),
1591	    ao.addr16[3], an->addr16[3], u),
1592	    ao.addr16[4], an->addr16[4], u),
1593	    ao.addr16[5], an->addr16[5], u),
1594	    ao.addr16[6], an->addr16[6], u),
1595	    ao.addr16[7], an->addr16[7], u);
1596}
1597#endif /* INET6 */
1598
1599void
1600pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1601    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1602    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1603{
1604	struct pf_addr	oia, ooa;
1605
1606	PF_ACPY(&oia, ia, af);
1607	PF_ACPY(&ooa, oa, af);
1608
1609	/* Change inner protocol port, fix inner protocol checksum. */
1610	if (ip != NULL) {
1611		u_int16_t	oip = *ip;
1612		u_int32_t	opc = 0;	/* make the compiler happy */
1613
1614		if (pc != NULL)
1615			opc = *pc;
1616		*ip = np;
1617		if (pc != NULL)
1618			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1619		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1620		if (pc != NULL)
1621			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1622	}
1623	/* Change inner ip address, fix inner ip and icmp checksums. */
1624	PF_ACPY(ia, na, af);
1625	switch (af) {
1626#ifdef INET
1627	case AF_INET: {
1628		u_int32_t	 oh2c = *h2c;
1629
1630		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1631		    oia.addr16[0], ia->addr16[0], 0),
1632		    oia.addr16[1], ia->addr16[1], 0);
1633		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1634		    oia.addr16[0], ia->addr16[0], 0),
1635		    oia.addr16[1], ia->addr16[1], 0);
1636		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1637		break;
1638	}
1639#endif /* INET */
1640#ifdef INET6
1641	case AF_INET6:
1642		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1643		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1644		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1645		    oia.addr16[0], ia->addr16[0], u),
1646		    oia.addr16[1], ia->addr16[1], u),
1647		    oia.addr16[2], ia->addr16[2], u),
1648		    oia.addr16[3], ia->addr16[3], u),
1649		    oia.addr16[4], ia->addr16[4], u),
1650		    oia.addr16[5], ia->addr16[5], u),
1651		    oia.addr16[6], ia->addr16[6], u),
1652		    oia.addr16[7], ia->addr16[7], u);
1653		break;
1654#endif /* INET6 */
1655	}
1656	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1657	PF_ACPY(oa, na, af);
1658	switch (af) {
1659#ifdef INET
1660	case AF_INET:
1661		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1662		    ooa.addr16[0], oa->addr16[0], 0),
1663		    ooa.addr16[1], oa->addr16[1], 0);
1664		break;
1665#endif /* INET */
1666#ifdef INET6
1667	case AF_INET6:
1668		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1669		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1670		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1671		    ooa.addr16[0], oa->addr16[0], u),
1672		    ooa.addr16[1], oa->addr16[1], u),
1673		    ooa.addr16[2], oa->addr16[2], u),
1674		    ooa.addr16[3], oa->addr16[3], u),
1675		    ooa.addr16[4], oa->addr16[4], u),
1676		    ooa.addr16[5], oa->addr16[5], u),
1677		    ooa.addr16[6], oa->addr16[6], u),
1678		    ooa.addr16[7], oa->addr16[7], u);
1679		break;
1680#endif /* INET6 */
1681	}
1682}
1683
1684
1685/*
1686 * Need to modulate the sequence numbers in the TCP SACK option
1687 * (credits to Krzysztof Pfaff for report and patch)
1688 */
1689int
1690pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1691    struct tcphdr *th, struct pf_state_peer *dst)
1692{
1693	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1694#ifdef __FreeBSD__
1695	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
1696#else
1697	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
1698#endif
1699	int copyback = 0, i, olen;
1700	struct sackblk sack;
1701
1702#define TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
1703	if (hlen < TCPOLEN_SACKLEN ||
1704	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1705		return 0;
1706
1707	while (hlen >= TCPOLEN_SACKLEN) {
1708		olen = opt[1];
1709		switch (*opt) {
1710		case TCPOPT_EOL:	/* FALLTHROUGH */
1711		case TCPOPT_NOP:
1712			opt++;
1713			hlen--;
1714			break;
1715		case TCPOPT_SACK:
1716			if (olen > hlen)
1717				olen = hlen;
1718			if (olen >= TCPOLEN_SACKLEN) {
1719				for (i = 2; i + TCPOLEN_SACK <= olen;
1720				    i += TCPOLEN_SACK) {
1721					memcpy(&sack, &opt[i], sizeof(sack));
1722					pf_change_a(&sack.start, &th->th_sum,
1723					    htonl(ntohl(sack.start) -
1724					    dst->seqdiff), 0);
1725					pf_change_a(&sack.end, &th->th_sum,
1726					    htonl(ntohl(sack.end) -
1727					    dst->seqdiff), 0);
1728					memcpy(&opt[i], &sack, sizeof(sack));
1729				}
1730				copyback = 1;
1731			}
1732			/* FALLTHROUGH */
1733		default:
1734			if (olen < 2)
1735				olen = 2;
1736			hlen -= olen;
1737			opt += olen;
1738		}
1739	}
1740
1741	if (copyback)
1742#ifdef __FreeBSD__
1743		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
1744#else
1745		m_copyback(m, off + sizeof(*th), thoptlen, opts);
1746#endif
1747	return (copyback);
1748}
1749
1750void
1751#ifdef __FreeBSD__
1752pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
1753#else
1754pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1755#endif
1756    const struct pf_addr *saddr, const struct pf_addr *daddr,
1757    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1758    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1759    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
1760{
1761	struct mbuf	*m;
1762	int		 len, tlen;
1763#ifdef INET
1764	struct ip	*h;
1765#endif /* INET */
1766#ifdef INET6
1767	struct ip6_hdr	*h6;
1768#endif /* INET6 */
1769	struct tcphdr	*th;
1770	char		*opt;
1771	struct pf_mtag	*pf_mtag;
1772
1773#ifdef __FreeBSD__
1774	KASSERT(
1775#ifdef INET
1776	    af == AF_INET
1777#else
1778	    0
1779#endif
1780	    ||
1781#ifdef INET6
1782	    af == AF_INET6
1783#else
1784	    0
1785#endif
1786	    , ("Unsupported AF %d", af));
1787	len = 0;
1788	th = NULL;
1789#ifdef INET
1790	h = NULL;
1791#endif
1792#ifdef INET6
1793	h6 = NULL;
1794#endif
1795#endif
1796
1797	/* maximum segment size tcp option */
1798	tlen = sizeof(struct tcphdr);
1799	if (mss)
1800		tlen += 4;
1801
1802	switch (af) {
1803#ifdef INET
1804	case AF_INET:
1805		len = sizeof(struct ip) + tlen;
1806		break;
1807#endif /* INET */
1808#ifdef INET6
1809	case AF_INET6:
1810		len = sizeof(struct ip6_hdr) + tlen;
1811		break;
1812#endif /* INET6 */
1813	}
1814
1815	/* create outgoing mbuf */
1816	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1817	if (m == NULL)
1818		return;
1819#ifdef __FreeBSD__
1820#ifdef MAC
1821	if (replyto)
1822		mac_netinet_firewall_reply(replyto, m);
1823	else
1824		mac_netinet_firewall_send(m);
1825#else
1826	(void)replyto;
1827#endif
1828#endif
1829	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
1830		m_freem(m);
1831		return;
1832	}
1833	if (tag)
1834#ifdef __FreeBSD__
1835		m->m_flags |= M_SKIP_FIREWALL;
1836#else
1837		pf_mtag->flags |= PF_TAG_GENERATED;
1838#endif
1839
1840	pf_mtag->tag = rtag;
1841
1842	if (r != NULL && r->rtableid >= 0)
1843#ifdef __FreeBSD__
1844	{
1845		M_SETFIB(m, r->rtableid);
1846#endif
1847		pf_mtag->rtableid = r->rtableid;
1848#ifdef __FreeBSD__
1849	}
1850#endif
1851#ifdef ALTQ
1852	if (r != NULL && r->qid) {
1853		pf_mtag->qid = r->qid;
1854		/* add hints for ecn */
1855		pf_mtag->af = af;
1856		pf_mtag->hdr = mtod(m, struct ip *);
1857	}
1858#endif /* ALTQ */
1859	m->m_data += max_linkhdr;
1860	m->m_pkthdr.len = m->m_len = len;
1861	m->m_pkthdr.rcvif = NULL;
1862	bzero(m->m_data, len);
1863	switch (af) {
1864#ifdef INET
1865	case AF_INET:
1866		h = mtod(m, struct ip *);
1867
1868		/* IP header fields included in the TCP checksum */
1869		h->ip_p = IPPROTO_TCP;
1870		h->ip_len = htons(tlen);
1871		h->ip_src.s_addr = saddr->v4.s_addr;
1872		h->ip_dst.s_addr = daddr->v4.s_addr;
1873
1874		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1875		break;
1876#endif /* INET */
1877#ifdef INET6
1878	case AF_INET6:
1879		h6 = mtod(m, struct ip6_hdr *);
1880
1881		/* IP header fields included in the TCP checksum */
1882		h6->ip6_nxt = IPPROTO_TCP;
1883		h6->ip6_plen = htons(tlen);
1884		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1885		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1886
1887		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1888		break;
1889#endif /* INET6 */
1890	}
1891
1892	/* TCP header */
1893	th->th_sport = sport;
1894	th->th_dport = dport;
1895	th->th_seq = htonl(seq);
1896	th->th_ack = htonl(ack);
1897	th->th_off = tlen >> 2;
1898	th->th_flags = flags;
1899	th->th_win = htons(win);
1900
1901	if (mss) {
1902		opt = (char *)(th + 1);
1903		opt[0] = TCPOPT_MAXSEG;
1904		opt[1] = 4;
1905		HTONS(mss);
1906		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1907	}
1908
1909	switch (af) {
1910#ifdef INET
1911	case AF_INET:
1912		/* TCP checksum */
1913		th->th_sum = in_cksum(m, len);
1914
1915		/* Finish the IP header */
1916		h->ip_v = 4;
1917		h->ip_hl = sizeof(*h) >> 2;
1918		h->ip_tos = IPTOS_LOWDELAY;
1919#ifdef __FreeBSD__
1920		h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
1921		h->ip_len = len;
1922#else
1923		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1924		h->ip_len = htons(len);
1925#endif
1926		h->ip_ttl = ttl ? ttl : V_ip_defttl;
1927		h->ip_sum = 0;
1928		if (eh == NULL) {
1929#ifdef __FreeBSD__
1930			PF_UNLOCK();
1931			ip_output(m, (void *)NULL, (void *)NULL, 0,
1932			    (void *)NULL, (void *)NULL);
1933			PF_LOCK();
1934#else /* ! __FreeBSD__ */
1935			ip_output(m, (void *)NULL, (void *)NULL, 0,
1936			    (void *)NULL, (void *)NULL);
1937#endif
1938		} else {
1939			struct route		 ro;
1940			struct rtentry		 rt;
1941			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
1942
1943			if (ifp == NULL) {
1944				m_freem(m);
1945				return;
1946			}
1947			rt.rt_ifp = ifp;
1948			ro.ro_rt = &rt;
1949			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1950			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1951			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1952			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1953			e->ether_type = eh->ether_type;
1954#ifdef __FreeBSD__
1955			PF_UNLOCK();
1956			/* XXX_IMPORT: later */
1957			ip_output(m, (void *)NULL, &ro, 0,
1958			    (void *)NULL, (void *)NULL);
1959			PF_LOCK();
1960#else /* ! __FreeBSD__ */
1961			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
1962			    (void *)NULL, (void *)NULL);
1963#endif
1964		}
1965		break;
1966#endif /* INET */
1967#ifdef INET6
1968	case AF_INET6:
1969		/* TCP checksum */
1970		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1971		    sizeof(struct ip6_hdr), tlen);
1972
1973		h6->ip6_vfc |= IPV6_VERSION;
1974		h6->ip6_hlim = IPV6_DEFHLIM;
1975
1976#ifdef __FreeBSD__
1977		PF_UNLOCK();
1978		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1979		PF_LOCK();
1980#else
1981		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1982#endif
1983		break;
1984#endif /* INET6 */
1985	}
1986}
1987
1988void
1989pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1990    struct pf_rule *r)
1991{
1992	struct pf_mtag	*pf_mtag;
1993	struct mbuf	*m0;
1994#ifdef __FreeBSD__
1995	struct ip *ip;
1996#endif
1997
1998#ifdef __FreeBSD__
1999	m0 = m_copypacket(m, M_DONTWAIT);
2000	if (m0 == NULL)
2001		return;
2002#else
2003	m0 = m_copy(m, 0, M_COPYALL);
2004#endif
2005	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
2006		return;
2007#ifdef __FreeBSD__
2008	/* XXX: revisit */
2009	m0->m_flags |= M_SKIP_FIREWALL;
2010#else
2011	pf_mtag->flags |= PF_TAG_GENERATED;
2012#endif
2013
2014	if (r->rtableid >= 0)
2015#ifdef __FreeBSD__
2016	{
2017		M_SETFIB(m0, r->rtableid);
2018#endif
2019		pf_mtag->rtableid = r->rtableid;
2020#ifdef __FreeBSD__
2021	}
2022#endif
2023
2024#ifdef ALTQ
2025	if (r->qid) {
2026		pf_mtag->qid = r->qid;
2027		/* add hints for ecn */
2028		pf_mtag->af = af;
2029		pf_mtag->hdr = mtod(m0, struct ip *);
2030	}
2031#endif /* ALTQ */
2032
2033	switch (af) {
2034#ifdef INET
2035	case AF_INET:
2036#ifdef __FreeBSD__
2037		/* icmp_error() expects host byte ordering */
2038		ip = mtod(m0, struct ip *);
2039		NTOHS(ip->ip_len);
2040		NTOHS(ip->ip_off);
2041		PF_UNLOCK();
2042		icmp_error(m0, type, code, 0, 0);
2043		PF_LOCK();
2044#else
2045		icmp_error(m0, type, code, 0, 0);
2046#endif
2047		break;
2048#endif /* INET */
2049#ifdef INET6
2050	case AF_INET6:
2051#ifdef __FreeBSD__
2052		PF_UNLOCK();
2053#endif
2054		icmp6_error(m0, type, code, 0);
2055#ifdef __FreeBSD__
2056		PF_LOCK();
2057#endif
2058		break;
2059#endif /* INET6 */
2060	}
2061}
2062
2063/*
2064 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2065 * If n is 0, they match if they are equal. If n is != 0, they match if they
2066 * are different.
2067 */
2068int
2069pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2070    struct pf_addr *b, sa_family_t af)
2071{
2072	int	match = 0;
2073
2074	switch (af) {
2075#ifdef INET
2076	case AF_INET:
2077		if ((a->addr32[0] & m->addr32[0]) ==
2078		    (b->addr32[0] & m->addr32[0]))
2079			match++;
2080		break;
2081#endif /* INET */
2082#ifdef INET6
2083	case AF_INET6:
2084		if (((a->addr32[0] & m->addr32[0]) ==
2085		     (b->addr32[0] & m->addr32[0])) &&
2086		    ((a->addr32[1] & m->addr32[1]) ==
2087		     (b->addr32[1] & m->addr32[1])) &&
2088		    ((a->addr32[2] & m->addr32[2]) ==
2089		     (b->addr32[2] & m->addr32[2])) &&
2090		    ((a->addr32[3] & m->addr32[3]) ==
2091		     (b->addr32[3] & m->addr32[3])))
2092			match++;
2093		break;
2094#endif /* INET6 */
2095	}
2096	if (match) {
2097		if (n)
2098			return (0);
2099		else
2100			return (1);
2101	} else {
2102		if (n)
2103			return (1);
2104		else
2105			return (0);
2106	}
2107}
2108
2109int
2110pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2111{
2112	switch (op) {
2113	case PF_OP_IRG:
2114		return ((p > a1) && (p < a2));
2115	case PF_OP_XRG:
2116		return ((p < a1) || (p > a2));
2117	case PF_OP_RRG:
2118		return ((p >= a1) && (p <= a2));
2119	case PF_OP_EQ:
2120		return (p == a1);
2121	case PF_OP_NE:
2122		return (p != a1);
2123	case PF_OP_LT:
2124		return (p < a1);
2125	case PF_OP_LE:
2126		return (p <= a1);
2127	case PF_OP_GT:
2128		return (p > a1);
2129	case PF_OP_GE:
2130		return (p >= a1);
2131	}
2132	return (0); /* never reached */
2133}
2134
2135int
2136pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2137{
2138	NTOHS(a1);
2139	NTOHS(a2);
2140	NTOHS(p);
2141	return (pf_match(op, a1, a2, p));
2142}
2143
2144int
2145pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2146{
2147	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2148		return (0);
2149	return (pf_match(op, a1, a2, u));
2150}
2151
2152int
2153pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2154{
2155	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2156		return (0);
2157	return (pf_match(op, a1, a2, g));
2158}
2159
2160#ifndef __FreeBSD__
2161struct pf_mtag *
2162pf_find_mtag(struct mbuf *m)
2163{
2164	struct m_tag	*mtag;
2165
2166	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
2167		return (NULL);
2168
2169	return ((struct pf_mtag *)(mtag + 1));
2170}
2171
2172struct pf_mtag *
2173pf_get_mtag(struct mbuf *m)
2174{
2175	struct m_tag	*mtag;
2176
2177	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) {
2178		mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag),
2179		    M_NOWAIT);
2180		if (mtag == NULL)
2181			return (NULL);
2182		bzero(mtag + 1, sizeof(struct pf_mtag));
2183		m_tag_prepend(m, mtag);
2184	}
2185
2186	return ((struct pf_mtag *)(mtag + 1));
2187}
2188#endif
2189
2190int
2191pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
2192    int *tag)
2193{
2194	if (*tag == -1)
2195		*tag = pf_mtag->tag;
2196
2197	return ((!r->match_tag_not && r->match_tag == *tag) ||
2198	    (r->match_tag_not && r->match_tag != *tag));
2199}
2200
2201int
2202pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
2203{
2204	if (tag <= 0 && rtableid < 0)
2205		return (0);
2206
2207	if (pf_mtag == NULL)
2208		if ((pf_mtag = pf_get_mtag(m)) == NULL)
2209			return (1);
2210	if (tag > 0)
2211		pf_mtag->tag = tag;
2212	if (rtableid >= 0)
2213#ifdef __FreeBSD__
2214	{
2215		M_SETFIB(m, rtableid);
2216#endif
2217		pf_mtag->rtableid = rtableid;
2218#ifdef __FreeBSD__
2219	}
2220#endif
2221
2222	return (0);
2223}
2224
2225static void
2226pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2227    struct pf_rule **r, struct pf_rule **a,  int *match)
2228{
2229	struct pf_anchor_stackframe	*f;
2230
2231	(*r)->anchor->match = 0;
2232	if (match)
2233		*match = 0;
2234	if (*depth >= sizeof(pf_anchor_stack) /
2235	    sizeof(pf_anchor_stack[0])) {
2236		printf("pf_step_into_anchor: stack overflow\n");
2237		*r = TAILQ_NEXT(*r, entries);
2238		return;
2239	} else if (*depth == 0 && a != NULL)
2240		*a = *r;
2241	f = pf_anchor_stack + (*depth)++;
2242	f->rs = *rs;
2243	f->r = *r;
2244	if ((*r)->anchor_wildcard) {
2245		f->parent = &(*r)->anchor->children;
2246		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2247		    NULL) {
2248			*r = NULL;
2249			return;
2250		}
2251		*rs = &f->child->ruleset;
2252	} else {
2253		f->parent = NULL;
2254		f->child = NULL;
2255		*rs = &(*r)->anchor->ruleset;
2256	}
2257	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2258}
2259
2260int
2261pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2262    struct pf_rule **r, struct pf_rule **a, int *match)
2263{
2264	struct pf_anchor_stackframe	*f;
2265	int quick = 0;
2266
2267	do {
2268		if (*depth <= 0)
2269			break;
2270		f = pf_anchor_stack + *depth - 1;
2271		if (f->parent != NULL && f->child != NULL) {
2272			if (f->child->match ||
2273			    (match != NULL && *match)) {
2274				f->r->anchor->match = 1;
2275				*match = 0;
2276			}
2277			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2278			if (f->child != NULL) {
2279				*rs = &f->child->ruleset;
2280				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2281				if (*r == NULL)
2282					continue;
2283				else
2284					break;
2285			}
2286		}
2287		(*depth)--;
2288		if (*depth == 0 && a != NULL)
2289			*a = NULL;
2290		*rs = f->rs;
2291		if (f->r->anchor->match || (match  != NULL && *match))
2292			quick = f->r->quick;
2293		*r = TAILQ_NEXT(f->r, entries);
2294	} while (*r == NULL);
2295
2296	return (quick);
2297}
2298
2299#ifdef INET6
2300void
2301pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2302    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2303{
2304	switch (af) {
2305#ifdef INET
2306	case AF_INET:
2307		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2308		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2309		break;
2310#endif /* INET */
2311	case AF_INET6:
2312		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2313		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2314		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2315		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2316		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2317		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2318		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2319		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2320		break;
2321	}
2322}
2323
2324void
2325pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2326{
2327	switch (af) {
2328#ifdef INET
2329	case AF_INET:
2330		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2331		break;
2332#endif /* INET */
2333	case AF_INET6:
2334		if (addr->addr32[3] == 0xffffffff) {
2335			addr->addr32[3] = 0;
2336			if (addr->addr32[2] == 0xffffffff) {
2337				addr->addr32[2] = 0;
2338				if (addr->addr32[1] == 0xffffffff) {
2339					addr->addr32[1] = 0;
2340					addr->addr32[0] =
2341					    htonl(ntohl(addr->addr32[0]) + 1);
2342				} else
2343					addr->addr32[1] =
2344					    htonl(ntohl(addr->addr32[1]) + 1);
2345			} else
2346				addr->addr32[2] =
2347				    htonl(ntohl(addr->addr32[2]) + 1);
2348		} else
2349			addr->addr32[3] =
2350			    htonl(ntohl(addr->addr32[3]) + 1);
2351		break;
2352	}
2353}
2354#endif /* INET6 */
2355
2356#define mix(a,b,c) \
2357	do {					\
2358		a -= b; a -= c; a ^= (c >> 13);	\
2359		b -= c; b -= a; b ^= (a << 8);	\
2360		c -= a; c -= b; c ^= (b >> 13);	\
2361		a -= b; a -= c; a ^= (c >> 12);	\
2362		b -= c; b -= a; b ^= (a << 16);	\
2363		c -= a; c -= b; c ^= (b >> 5);	\
2364		a -= b; a -= c; a ^= (c >> 3);	\
2365		b -= c; b -= a; b ^= (a << 10);	\
2366		c -= a; c -= b; c ^= (b >> 15);	\
2367	} while (0)
2368
2369/*
2370 * hash function based on bridge_hash in if_bridge.c
2371 */
2372void
2373pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2374    struct pf_poolhashkey *key, sa_family_t af)
2375{
2376	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2377
2378	switch (af) {
2379#ifdef INET
2380	case AF_INET:
2381		a += inaddr->addr32[0];
2382		b += key->key32[1];
2383		mix(a, b, c);
2384		hash->addr32[0] = c + key->key32[2];
2385		break;
2386#endif /* INET */
2387#ifdef INET6
2388	case AF_INET6:
2389		a += inaddr->addr32[0];
2390		b += inaddr->addr32[2];
2391		mix(a, b, c);
2392		hash->addr32[0] = c;
2393		a += inaddr->addr32[1];
2394		b += inaddr->addr32[3];
2395		c += key->key32[1];
2396		mix(a, b, c);
2397		hash->addr32[1] = c;
2398		a += inaddr->addr32[2];
2399		b += inaddr->addr32[1];
2400		c += key->key32[2];
2401		mix(a, b, c);
2402		hash->addr32[2] = c;
2403		a += inaddr->addr32[3];
2404		b += inaddr->addr32[0];
2405		c += key->key32[3];
2406		mix(a, b, c);
2407		hash->addr32[3] = c;
2408		break;
2409#endif /* INET6 */
2410	}
2411}
2412
2413int
2414pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2415    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2416{
2417	unsigned char		 hash[16];
2418	struct pf_pool		*rpool = &r->rpool;
2419	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
2420	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
2421	struct pf_pooladdr	*acur = rpool->cur;
2422	struct pf_src_node	 k;
2423
2424	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2425	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2426		k.af = af;
2427		PF_ACPY(&k.addr, saddr, af);
2428		if (r->rule_flag & PFRULE_RULESRCTRACK ||
2429		    r->rpool.opts & PF_POOL_STICKYADDR)
2430			k.rule.ptr = r;
2431		else
2432			k.rule.ptr = NULL;
2433		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2434		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2435		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2436			PF_ACPY(naddr, &(*sn)->raddr, af);
2437			if (pf_status.debug >= PF_DEBUG_MISC) {
2438				printf("pf_map_addr: src tracking maps ");
2439				pf_print_host(&k.addr, 0, af);
2440				printf(" to ");
2441				pf_print_host(naddr, 0, af);
2442				printf("\n");
2443			}
2444			return (0);
2445		}
2446	}
2447
2448	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2449		return (1);
2450	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2451		switch (af) {
2452#ifdef INET
2453		case AF_INET:
2454			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2455			    (rpool->opts & PF_POOL_TYPEMASK) !=
2456			    PF_POOL_ROUNDROBIN)
2457				return (1);
2458			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2459			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
2460			break;
2461#endif /* INET */
2462#ifdef INET6
2463		case AF_INET6:
2464			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2465			    (rpool->opts & PF_POOL_TYPEMASK) !=
2466			    PF_POOL_ROUNDROBIN)
2467				return (1);
2468			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2469			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
2470			break;
2471#endif /* INET6 */
2472		}
2473	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2474		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2475			return (1); /* unsupported */
2476	} else {
2477		raddr = &rpool->cur->addr.v.a.addr;
2478		rmask = &rpool->cur->addr.v.a.mask;
2479	}
2480
2481	switch (rpool->opts & PF_POOL_TYPEMASK) {
2482	case PF_POOL_NONE:
2483		PF_ACPY(naddr, raddr, af);
2484		break;
2485	case PF_POOL_BITMASK:
2486		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2487		break;
2488	case PF_POOL_RANDOM:
2489		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2490			switch (af) {
2491#ifdef INET
2492			case AF_INET:
2493				rpool->counter.addr32[0] = htonl(arc4random());
2494				break;
2495#endif /* INET */
2496#ifdef INET6
2497			case AF_INET6:
2498				if (rmask->addr32[3] != 0xffffffff)
2499					rpool->counter.addr32[3] =
2500					    htonl(arc4random());
2501				else
2502					break;
2503				if (rmask->addr32[2] != 0xffffffff)
2504					rpool->counter.addr32[2] =
2505					    htonl(arc4random());
2506				else
2507					break;
2508				if (rmask->addr32[1] != 0xffffffff)
2509					rpool->counter.addr32[1] =
2510					    htonl(arc4random());
2511				else
2512					break;
2513				if (rmask->addr32[0] != 0xffffffff)
2514					rpool->counter.addr32[0] =
2515					    htonl(arc4random());
2516				break;
2517#endif /* INET6 */
2518			}
2519			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2520			PF_ACPY(init_addr, naddr, af);
2521
2522		} else {
2523			PF_AINC(&rpool->counter, af);
2524			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2525		}
2526		break;
2527	case PF_POOL_SRCHASH:
2528		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2529		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2530		break;
2531	case PF_POOL_ROUNDROBIN:
2532		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2533			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2534			    &rpool->tblidx, &rpool->counter,
2535			    &raddr, &rmask, af))
2536				goto get_addr;
2537		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2538			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2539			    &rpool->tblidx, &rpool->counter,
2540			    &raddr, &rmask, af))
2541				goto get_addr;
2542		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2543			goto get_addr;
2544
2545	try_next:
2546		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2547			rpool->cur = TAILQ_FIRST(&rpool->list);
2548		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2549			rpool->tblidx = -1;
2550			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2551			    &rpool->tblidx, &rpool->counter,
2552			    &raddr, &rmask, af)) {
2553				/* table contains no address of type 'af' */
2554				if (rpool->cur != acur)
2555					goto try_next;
2556				return (1);
2557			}
2558		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2559			rpool->tblidx = -1;
2560			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2561			    &rpool->tblidx, &rpool->counter,
2562			    &raddr, &rmask, af)) {
2563				/* table contains no address of type 'af' */
2564				if (rpool->cur != acur)
2565					goto try_next;
2566				return (1);
2567			}
2568		} else {
2569			raddr = &rpool->cur->addr.v.a.addr;
2570			rmask = &rpool->cur->addr.v.a.mask;
2571			PF_ACPY(&rpool->counter, raddr, af);
2572		}
2573
2574	get_addr:
2575		PF_ACPY(naddr, &rpool->counter, af);
2576		if (init_addr != NULL && PF_AZERO(init_addr, af))
2577			PF_ACPY(init_addr, naddr, af);
2578		PF_AINC(&rpool->counter, af);
2579		break;
2580	}
2581	if (*sn != NULL)
2582		PF_ACPY(&(*sn)->raddr, naddr, af);
2583
2584	if (pf_status.debug >= PF_DEBUG_MISC &&
2585	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2586		printf("pf_map_addr: selected address ");
2587		pf_print_host(naddr, 0, af);
2588		printf("\n");
2589	}
2590
2591	return (0);
2592}
2593
2594int
2595pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2596    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2597    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2598    struct pf_src_node **sn)
2599{
2600	struct pf_state_cmp	key;
2601	struct pf_addr		init_addr;
2602	u_int16_t		cut;
2603
2604	bzero(&init_addr, sizeof(init_addr));
2605	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2606		return (1);
2607
2608	if (proto == IPPROTO_ICMP) {
2609		low = 1;
2610		high = 65535;
2611	}
2612
2613	do {
2614		key.af = af;
2615		key.proto = proto;
2616		PF_ACPY(&key.ext.addr, daddr, key.af);
2617		PF_ACPY(&key.gwy.addr, naddr, key.af);
2618		key.ext.port = dport;
2619
2620		/*
2621		 * port search; start random, step;
2622		 * similar 2 portloop in in_pcbbind
2623		 */
2624		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2625		    proto == IPPROTO_ICMP)) {
2626			key.gwy.port = dport;
2627			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2628				return (0);
2629		} else if (low == 0 && high == 0) {
2630			key.gwy.port = *nport;
2631			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2632				return (0);
2633		} else if (low == high) {
2634			key.gwy.port = htons(low);
2635			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2636				*nport = htons(low);
2637				return (0);
2638			}
2639		} else {
2640			u_int16_t tmp;
2641
2642			if (low > high) {
2643				tmp = low;
2644				low = high;
2645				high = tmp;
2646			}
2647			/* low < high */
2648			cut = htonl(arc4random()) % (1 + high - low) + low;
2649			/* low <= cut <= high */
2650			for (tmp = cut; tmp <= high; ++(tmp)) {
2651				key.gwy.port = htons(tmp);
2652				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2653				    NULL) {
2654					*nport = htons(tmp);
2655					return (0);
2656				}
2657			}
2658			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2659				key.gwy.port = htons(tmp);
2660				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2661				    NULL) {
2662					*nport = htons(tmp);
2663					return (0);
2664				}
2665			}
2666		}
2667
2668		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2669		case PF_POOL_RANDOM:
2670		case PF_POOL_ROUNDROBIN:
2671			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2672				return (1);
2673			break;
2674		case PF_POOL_NONE:
2675		case PF_POOL_SRCHASH:
2676		case PF_POOL_BITMASK:
2677		default:
2678			return (1);
2679		}
2680	} while (! PF_AEQ(&init_addr, naddr, af) );
2681
2682	return (1);					/* none available */
2683}
2684
2685struct pf_rule *
2686pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2687    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2688    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2689{
2690	struct pf_rule		*r, *rm = NULL;
2691	struct pf_ruleset	*ruleset = NULL;
2692	int			 tag = -1;
2693	int			 rtableid = -1;
2694	int			 asd = 0;
2695
2696	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2697	while (r && rm == NULL) {
2698		struct pf_rule_addr	*src = NULL, *dst = NULL;
2699		struct pf_addr_wrap	*xdst = NULL;
2700
2701		if (r->action == PF_BINAT && direction == PF_IN) {
2702			src = &r->dst;
2703			if (r->rpool.cur != NULL)
2704				xdst = &r->rpool.cur->addr;
2705		} else {
2706			src = &r->src;
2707			dst = &r->dst;
2708		}
2709
2710		r->evaluations++;
2711		if (pfi_kif_match(r->kif, kif) == r->ifnot)
2712			r = r->skip[PF_SKIP_IFP].ptr;
2713		else if (r->direction && r->direction != direction)
2714			r = r->skip[PF_SKIP_DIR].ptr;
2715		else if (r->af && r->af != pd->af)
2716			r = r->skip[PF_SKIP_AF].ptr;
2717		else if (r->proto && r->proto != pd->proto)
2718			r = r->skip[PF_SKIP_PROTO].ptr;
2719		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2720		    src->neg, kif))
2721			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2722			    PF_SKIP_DST_ADDR].ptr;
2723		else if (src->port_op && !pf_match_port(src->port_op,
2724		    src->port[0], src->port[1], sport))
2725			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2726			    PF_SKIP_DST_PORT].ptr;
2727		else if (dst != NULL &&
2728		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
2729			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2730		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2731		    0, NULL))
2732			r = TAILQ_NEXT(r, entries);
2733		else if (dst != NULL && dst->port_op &&
2734		    !pf_match_port(dst->port_op, dst->port[0],
2735		    dst->port[1], dport))
2736			r = r->skip[PF_SKIP_DST_PORT].ptr;
2737		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
2738			r = TAILQ_NEXT(r, entries);
2739		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2740		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2741		    off, pd->hdr.tcp), r->os_fingerprint)))
2742			r = TAILQ_NEXT(r, entries);
2743		else {
2744			if (r->tag)
2745				tag = r->tag;
2746			if (r->rtableid >= 0)
2747				rtableid = r->rtableid;
2748			if (r->anchor == NULL) {
2749				rm = r;
2750			} else
2751				pf_step_into_anchor(&asd, &ruleset, rs_num,
2752				    &r, NULL, NULL);
2753		}
2754		if (r == NULL)
2755			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2756			    NULL, NULL);
2757	}
2758	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
2759		return (NULL);
2760	if (rm != NULL && (rm->action == PF_NONAT ||
2761	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2762		return (NULL);
2763	return (rm);
2764}
2765
2766struct pf_rule *
2767pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2768    struct pfi_kif *kif, struct pf_src_node **sn,
2769    struct pf_addr *saddr, u_int16_t sport,
2770    struct pf_addr *daddr, u_int16_t dport,
2771    struct pf_addr *naddr, u_int16_t *nport)
2772{
2773	struct pf_rule	*r = NULL;
2774
2775	if (direction == PF_OUT) {
2776		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2777		    sport, daddr, dport, PF_RULESET_BINAT);
2778		if (r == NULL)
2779			r = pf_match_translation(pd, m, off, direction, kif,
2780			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2781	} else {
2782		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2783		    sport, daddr, dport, PF_RULESET_RDR);
2784		if (r == NULL)
2785			r = pf_match_translation(pd, m, off, direction, kif,
2786			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2787	}
2788
2789	if (r != NULL) {
2790		switch (r->action) {
2791		case PF_NONAT:
2792		case PF_NOBINAT:
2793		case PF_NORDR:
2794			return (NULL);
2795		case PF_NAT:
2796			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2797			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2798			    r->rpool.proxy_port[1], sn)) {
2799				DPFPRINTF(PF_DEBUG_MISC,
2800				    ("pf: NAT proxy port allocation "
2801				    "(%u-%u) failed\n",
2802				    r->rpool.proxy_port[0],
2803				    r->rpool.proxy_port[1]));
2804				return (NULL);
2805			}
2806			break;
2807		case PF_BINAT:
2808			switch (direction) {
2809			case PF_OUT:
2810				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2811					switch (pd->af) {
2812#ifdef INET
2813					case AF_INET:
2814						if (r->rpool.cur->addr.p.dyn->
2815						    pfid_acnt4 < 1)
2816							return (NULL);
2817						PF_POOLMASK(naddr,
2818						    &r->rpool.cur->addr.p.dyn->
2819						    pfid_addr4,
2820						    &r->rpool.cur->addr.p.dyn->
2821						    pfid_mask4,
2822						    saddr, AF_INET);
2823						break;
2824#endif /* INET */
2825#ifdef INET6
2826					case AF_INET6:
2827						if (r->rpool.cur->addr.p.dyn->
2828						    pfid_acnt6 < 1)
2829							return (NULL);
2830						PF_POOLMASK(naddr,
2831						    &r->rpool.cur->addr.p.dyn->
2832						    pfid_addr6,
2833						    &r->rpool.cur->addr.p.dyn->
2834						    pfid_mask6,
2835						    saddr, AF_INET6);
2836						break;
2837#endif /* INET6 */
2838					}
2839				} else
2840					PF_POOLMASK(naddr,
2841					    &r->rpool.cur->addr.v.a.addr,
2842					    &r->rpool.cur->addr.v.a.mask,
2843					    saddr, pd->af);
2844				break;
2845			case PF_IN:
2846				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2847					switch (pd->af) {
2848#ifdef INET
2849					case AF_INET:
2850						if (r->src.addr.p.dyn->
2851						    pfid_acnt4 < 1)
2852							return (NULL);
2853						PF_POOLMASK(naddr,
2854						    &r->src.addr.p.dyn->
2855						    pfid_addr4,
2856						    &r->src.addr.p.dyn->
2857						    pfid_mask4,
2858						    daddr, AF_INET);
2859						break;
2860#endif /* INET */
2861#ifdef INET6
2862					case AF_INET6:
2863						if (r->src.addr.p.dyn->
2864						    pfid_acnt6 < 1)
2865							return (NULL);
2866						PF_POOLMASK(naddr,
2867						    &r->src.addr.p.dyn->
2868						    pfid_addr6,
2869						    &r->src.addr.p.dyn->
2870						    pfid_mask6,
2871						    daddr, AF_INET6);
2872						break;
2873#endif /* INET6 */
2874					}
2875				} else
2876					PF_POOLMASK(naddr,
2877					    &r->src.addr.v.a.addr,
2878					    &r->src.addr.v.a.mask, daddr,
2879					    pd->af);
2880				break;
2881			}
2882			break;
2883		case PF_RDR: {
2884			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2885				return (NULL);
2886			if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2887			    PF_POOL_BITMASK)
2888				PF_POOLMASK(naddr, naddr,
2889				    &r->rpool.cur->addr.v.a.mask, daddr,
2890				    pd->af);
2891
2892			if (r->rpool.proxy_port[1]) {
2893				u_int32_t	tmp_nport;
2894
2895				tmp_nport = ((ntohs(dport) -
2896				    ntohs(r->dst.port[0])) %
2897				    (r->rpool.proxy_port[1] -
2898				    r->rpool.proxy_port[0] + 1)) +
2899				    r->rpool.proxy_port[0];
2900
2901				/* wrap around if necessary */
2902				if (tmp_nport > 65535)
2903					tmp_nport -= 65535;
2904				*nport = htons((u_int16_t)tmp_nport);
2905			} else if (r->rpool.proxy_port[0])
2906				*nport = htons(r->rpool.proxy_port[0]);
2907			break;
2908		}
2909		default:
2910			return (NULL);
2911		}
2912	}
2913
2914	return (r);
2915}
2916
2917int
2918#ifdef __FreeBSD__
2919pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
2920#else
2921pf_socket_lookup(int direction, struct pf_pdesc *pd)
2922#endif
2923{
2924	struct pf_addr		*saddr, *daddr;
2925	u_int16_t		 sport, dport;
2926#ifdef __FreeBSD__
2927	struct inpcbinfo	*pi;
2928#else
2929	struct inpcbtable	*tb;
2930#endif
2931	struct inpcb		*inp;
2932
2933	if (pd == NULL)
2934		return (-1);
2935	pd->lookup.uid = UID_MAX;
2936	pd->lookup.gid = GID_MAX;
2937	pd->lookup.pid = NO_PID;		/* XXX: revisit */
2938#ifdef __FreeBSD__
2939	if (inp_arg != NULL) {
2940		INP_LOCK_ASSERT(inp_arg);
2941		pd->lookup.uid = inp_arg->inp_cred->cr_uid;
2942		pd->lookup.gid = inp_arg->inp_cred->cr_groups[0];
2943		return (1);
2944	}
2945#endif
2946	switch (pd->proto) {
2947	case IPPROTO_TCP:
2948		if (pd->hdr.tcp == NULL)
2949			return (-1);
2950		sport = pd->hdr.tcp->th_sport;
2951		dport = pd->hdr.tcp->th_dport;
2952#ifdef __FreeBSD__
2953		pi = &V_tcbinfo;
2954#else
2955		tb = &tcbtable;
2956#endif
2957		break;
2958	case IPPROTO_UDP:
2959		if (pd->hdr.udp == NULL)
2960			return (-1);
2961		sport = pd->hdr.udp->uh_sport;
2962		dport = pd->hdr.udp->uh_dport;
2963#ifdef __FreeBSD__
2964		pi = &V_udbinfo;
2965#else
2966		tb = &udbtable;
2967#endif
2968		break;
2969	default:
2970		return (-1);
2971	}
2972	if (direction == PF_IN) {
2973		saddr = pd->src;
2974		daddr = pd->dst;
2975	} else {
2976		u_int16_t	p;
2977
2978		p = sport;
2979		sport = dport;
2980		dport = p;
2981		saddr = pd->dst;
2982		daddr = pd->src;
2983	}
2984	switch (pd->af) {
2985#ifdef INET
2986	case AF_INET:
2987#ifdef __FreeBSD__
2988		INP_INFO_RLOCK(pi);	/* XXX LOR */
2989		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2990			dport, 0, NULL);
2991		if (inp == NULL) {
2992			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2993			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2994			if(inp == NULL) {
2995				INP_INFO_RUNLOCK(pi);
2996				return (-1);
2997			}
2998		}
2999#else
3000		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
3001		if (inp == NULL) {
3002			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
3003			if (inp == NULL)
3004				return (-1);
3005		}
3006#endif
3007		break;
3008#endif /* INET */
3009#ifdef INET6
3010	case AF_INET6:
3011#ifdef __FreeBSD__
3012		INP_INFO_RLOCK(pi);
3013		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
3014			&daddr->v6, dport, 0, NULL);
3015		if (inp == NULL) {
3016			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
3017			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
3018			if (inp == NULL) {
3019				INP_INFO_RUNLOCK(pi);
3020				return (-1);
3021			}
3022		}
3023#else
3024		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
3025		    dport);
3026		if (inp == NULL) {
3027			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
3028			if (inp == NULL)
3029				return (-1);
3030		}
3031#endif
3032		break;
3033#endif /* INET6 */
3034
3035	default:
3036		return (-1);
3037	}
3038#ifdef __FreeBSD__
3039	pd->lookup.uid = inp->inp_cred->cr_uid;
3040	pd->lookup.gid = inp->inp_cred->cr_groups[0];
3041	INP_INFO_RUNLOCK(pi);
3042#else
3043	pd->lookup.uid = inp->inp_socket->so_euid;
3044	pd->lookup.gid = inp->inp_socket->so_egid;
3045	pd->lookup.pid = inp->inp_socket->so_cpid;
3046#endif
3047	return (1);
3048}
3049
3050u_int8_t
3051pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3052{
3053	int		 hlen;
3054	u_int8_t	 hdr[60];
3055	u_int8_t	*opt, optlen;
3056	u_int8_t	 wscale = 0;
3057
3058	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
3059	if (hlen <= sizeof(struct tcphdr))
3060		return (0);
3061	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3062		return (0);
3063	opt = hdr + sizeof(struct tcphdr);
3064	hlen -= sizeof(struct tcphdr);
3065	while (hlen >= 3) {
3066		switch (*opt) {
3067		case TCPOPT_EOL:
3068		case TCPOPT_NOP:
3069			++opt;
3070			--hlen;
3071			break;
3072		case TCPOPT_WINDOW:
3073			wscale = opt[2];
3074			if (wscale > TCP_MAX_WINSHIFT)
3075				wscale = TCP_MAX_WINSHIFT;
3076			wscale |= PF_WSCALE_FLAG;
3077			/* FALLTHROUGH */
3078		default:
3079			optlen = opt[1];
3080			if (optlen < 2)
3081				optlen = 2;
3082			hlen -= optlen;
3083			opt += optlen;
3084			break;
3085		}
3086	}
3087	return (wscale);
3088}
3089
3090u_int16_t
3091pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3092{
3093	int		 hlen;
3094	u_int8_t	 hdr[60];
3095	u_int8_t	*opt, optlen;
3096	u_int16_t	 mss = V_tcp_mssdflt;
3097
3098	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
3099	if (hlen <= sizeof(struct tcphdr))
3100		return (0);
3101	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3102		return (0);
3103	opt = hdr + sizeof(struct tcphdr);
3104	hlen -= sizeof(struct tcphdr);
3105	while (hlen >= TCPOLEN_MAXSEG) {
3106		switch (*opt) {
3107		case TCPOPT_EOL:
3108		case TCPOPT_NOP:
3109			++opt;
3110			--hlen;
3111			break;
3112		case TCPOPT_MAXSEG:
3113			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3114			NTOHS(mss);
3115			/* FALLTHROUGH */
3116		default:
3117			optlen = opt[1];
3118			if (optlen < 2)
3119				optlen = 2;
3120			hlen -= optlen;
3121			opt += optlen;
3122			break;
3123		}
3124	}
3125	return (mss);
3126}
3127
3128u_int16_t
3129pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
3130{
3131#ifdef INET
3132	struct sockaddr_in	*dst;
3133	struct route		 ro;
3134#endif /* INET */
3135#ifdef INET6
3136	struct sockaddr_in6	*dst6;
3137	struct route_in6	 ro6;
3138#endif /* INET6 */
3139	struct rtentry		*rt = NULL;
3140	int			 hlen = 0;	/* make the compiler happy */
3141	u_int16_t		 mss = V_tcp_mssdflt;
3142
3143	switch (af) {
3144#ifdef INET
3145	case AF_INET:
3146		hlen = sizeof(struct ip);
3147		bzero(&ro, sizeof(ro));
3148		dst = (struct sockaddr_in *)&ro.ro_dst;
3149		dst->sin_family = AF_INET;
3150		dst->sin_len = sizeof(*dst);
3151		dst->sin_addr = addr->v4;
3152#ifdef __FreeBSD__
3153#ifdef RTF_PRCLONING
3154		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
3155#else /* !RTF_PRCLONING */
3156		in_rtalloc_ign(&ro, 0, 0);
3157#endif
3158#else /* ! __FreeBSD__ */
3159		rtalloc_noclone(&ro, NO_CLONING);
3160#endif
3161		rt = ro.ro_rt;
3162		break;
3163#endif /* INET */
3164#ifdef INET6
3165	case AF_INET6:
3166		hlen = sizeof(struct ip6_hdr);
3167		bzero(&ro6, sizeof(ro6));
3168		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3169		dst6->sin6_family = AF_INET6;
3170		dst6->sin6_len = sizeof(*dst6);
3171		dst6->sin6_addr = addr->v6;
3172#ifdef __FreeBSD__
3173#ifdef RTF_PRCLONING
3174		rtalloc_ign((struct route *)&ro6,
3175		    (RTF_CLONING | RTF_PRCLONING));
3176#else /* !RTF_PRCLONING */
3177		rtalloc_ign((struct route *)&ro6, 0);
3178#endif
3179#else /* ! __FreeBSD__ */
3180		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
3181#endif
3182		rt = ro6.ro_rt;
3183		break;
3184#endif /* INET6 */
3185	}
3186
3187	if (rt && rt->rt_ifp) {
3188		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3189		mss = max(V_tcp_mssdflt, mss);
3190		RTFREE(rt);
3191	}
3192	mss = min(mss, offer);
3193	mss = max(mss, 64);		/* sanity - at least max opt space */
3194	return (mss);
3195}
3196
3197void
3198pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3199{
3200	struct pf_rule *r = s->rule.ptr;
3201
3202	s->rt_kif = NULL;
3203	if (!r->rt || r->rt == PF_FASTROUTE)
3204		return;
3205	switch (s->af) {
3206#ifdef INET
3207	case AF_INET:
3208		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3209		    &s->nat_src_node);
3210		s->rt_kif = r->rpool.cur->kif;
3211		break;
3212#endif /* INET */
3213#ifdef INET6
3214	case AF_INET6:
3215		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3216		    &s->nat_src_node);
3217		s->rt_kif = r->rpool.cur->kif;
3218		break;
3219#endif /* INET6 */
3220	}
3221}
3222
3223int
3224pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
3225    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3226#ifdef __FreeBSD__
3227    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3228    struct ifqueue *ifq, struct inpcb *inp)
3229#else
3230    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3231    struct ifqueue *ifq)
3232#endif
3233{
3234	struct pf_rule		*nr = NULL;
3235	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3236	struct tcphdr		*th = pd->hdr.tcp;
3237	u_int16_t		 bport, nport = 0;
3238	sa_family_t		 af = pd->af;
3239	struct pf_rule		*r, *a = NULL;
3240	struct pf_ruleset	*ruleset = NULL;
3241	struct pf_src_node	*nsn = NULL;
3242	u_short			 reason;
3243	int			 rewrite = 0;
3244	int			 tag = -1, rtableid = -1;
3245	u_int16_t		 mss = V_tcp_mssdflt;
3246	int			 asd = 0;
3247	int			 match = 0;
3248
3249	if (pf_check_congestion(ifq)) {
3250		REASON_SET(&reason, PFRES_CONGEST);
3251		return (PF_DROP);
3252	}
3253
3254#ifdef __FreeBSD__
3255	if (inp != NULL)
3256		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3257	else if (debug_pfugidhack) {
3258		PF_UNLOCK();
3259		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
3260		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3261		PF_LOCK();
3262	}
3263#endif
3264
3265	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3266
3267	if (direction == PF_OUT) {
3268		bport = nport = th->th_sport;
3269		/* check outgoing packet for BINAT/NAT */
3270		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3271		    saddr, th->th_sport, daddr, th->th_dport,
3272		    &pd->naddr, &nport)) != NULL) {
3273			PF_ACPY(&pd->baddr, saddr, af);
3274			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3275			    &th->th_sum, &pd->naddr, nport, 0, af);
3276			rewrite++;
3277			if (nr->natpass)
3278				r = NULL;
3279			pd->nat_rule = nr;
3280		}
3281	} else {
3282		bport = nport = th->th_dport;
3283		/* check incoming packet for BINAT/RDR */
3284		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3285		    saddr, th->th_sport, daddr, th->th_dport,
3286		    &pd->naddr, &nport)) != NULL) {
3287			PF_ACPY(&pd->baddr, daddr, af);
3288			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3289			    &th->th_sum, &pd->naddr, nport, 0, af);
3290			rewrite++;
3291			if (nr->natpass)
3292				r = NULL;
3293			pd->nat_rule = nr;
3294		}
3295	}
3296
3297	while (r != NULL) {
3298		r->evaluations++;
3299		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3300			r = r->skip[PF_SKIP_IFP].ptr;
3301		else if (r->direction && r->direction != direction)
3302			r = r->skip[PF_SKIP_DIR].ptr;
3303		else if (r->af && r->af != af)
3304			r = r->skip[PF_SKIP_AF].ptr;
3305		else if (r->proto && r->proto != IPPROTO_TCP)
3306			r = r->skip[PF_SKIP_PROTO].ptr;
3307		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3308		    r->src.neg, kif))
3309			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3310		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3311		    r->src.port[0], r->src.port[1], th->th_sport))
3312			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3313		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3314		    r->dst.neg, NULL))
3315			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3316		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3317		    r->dst.port[0], r->dst.port[1], th->th_dport))
3318			r = r->skip[PF_SKIP_DST_PORT].ptr;
3319		else if (r->tos && !(r->tos == pd->tos))
3320			r = TAILQ_NEXT(r, entries);
3321		else if (r->rule_flag & PFRULE_FRAGMENT)
3322			r = TAILQ_NEXT(r, entries);
3323		else if ((r->flagset & th->th_flags) != r->flags)
3324			r = TAILQ_NEXT(r, entries);
3325		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3326#ifdef __FreeBSD__
3327		    pf_socket_lookup(direction, pd, inp), 1)) &&
3328#else
3329		    pf_socket_lookup(direction, pd), 1)) &&
3330#endif
3331		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3332		    pd->lookup.uid))
3333			r = TAILQ_NEXT(r, entries);
3334		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3335#ifdef __FreeBSD__
3336		    pf_socket_lookup(direction, pd, inp), 1)) &&
3337#else
3338		    pf_socket_lookup(direction, pd), 1)) &&
3339#endif
3340		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3341		    pd->lookup.gid))
3342			r = TAILQ_NEXT(r, entries);
3343		else if (r->prob && r->prob <= arc4random())
3344			r = TAILQ_NEXT(r, entries);
3345		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
3346			r = TAILQ_NEXT(r, entries);
3347		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
3348		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
3349			r = TAILQ_NEXT(r, entries);
3350		else {
3351			if (r->tag)
3352				tag = r->tag;
3353			if (r->rtableid >= 0)
3354				rtableid = r->rtableid;
3355			if (r->anchor == NULL) {
3356				match = 1;
3357				*rm = r;
3358				*am = a;
3359				*rsm = ruleset;
3360				if ((*rm)->quick)
3361					break;
3362				r = TAILQ_NEXT(r, entries);
3363			} else
3364				pf_step_into_anchor(&asd, &ruleset,
3365				    PF_RULESET_FILTER, &r, &a, &match);
3366		}
3367		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3368		    PF_RULESET_FILTER, &r, &a, &match))
3369			break;
3370	}
3371	r = *rm;
3372	a = *am;
3373	ruleset = *rsm;
3374
3375	REASON_SET(&reason, PFRES_MATCH);
3376
3377	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
3378		if (rewrite)
3379#ifdef __FreeBSD__
3380			m_copyback(m, off, sizeof(*th), (caddr_t)th);
3381#else
3382			m_copyback(m, off, sizeof(*th), th);
3383#endif
3384		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3385		    a, ruleset, pd);
3386	}
3387
3388	if ((r->action == PF_DROP) &&
3389	    ((r->rule_flag & PFRULE_RETURNRST) ||
3390	    (r->rule_flag & PFRULE_RETURNICMP) ||
3391	    (r->rule_flag & PFRULE_RETURN))) {
3392		/* undo NAT changes, if they have taken place */
3393		if (nr != NULL) {
3394			if (direction == PF_OUT) {
3395				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3396				    &th->th_sum, &pd->baddr, bport, 0, af);
3397				rewrite++;
3398			} else {
3399				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3400				    &th->th_sum, &pd->baddr, bport, 0, af);
3401				rewrite++;
3402			}
3403		}
3404		if (((r->rule_flag & PFRULE_RETURNRST) ||
3405		    (r->rule_flag & PFRULE_RETURN)) &&
3406		    !(th->th_flags & TH_RST)) {
3407			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3408
3409			if (th->th_flags & TH_SYN)
3410				ack++;
3411			if (th->th_flags & TH_FIN)
3412				ack++;
3413#ifdef __FreeBSD__
3414			pf_send_tcp(m, r, af, pd->dst,
3415#else
3416			pf_send_tcp(r, af, pd->dst,
3417#endif
3418			    pd->src, th->th_dport, th->th_sport,
3419			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3420			    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3421		} else if ((af == AF_INET) && r->return_icmp)
3422			pf_send_icmp(m, r->return_icmp >> 8,
3423			    r->return_icmp & 255, af, r);
3424		else if ((af == AF_INET6) && r->return_icmp6)
3425			pf_send_icmp(m, r->return_icmp6 >> 8,
3426			    r->return_icmp6 & 255, af, r);
3427	}
3428
3429	if (r->action == PF_DROP)
3430		return (PF_DROP);
3431
3432	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
3433		REASON_SET(&reason, PFRES_MEMORY);
3434		return (PF_DROP);
3435	}
3436
3437	if (r->keep_state || nr != NULL ||
3438	    (pd->flags & PFDESC_TCP_NORM)) {
3439		/* create new state */
3440		u_int16_t	 len;
3441		struct pf_state	*s = NULL;
3442		struct pf_src_node *sn = NULL;
3443
3444		len = pd->tot_len - off - (th->th_off << 2);
3445
3446		/* check maximums */
3447		if (r->max_states && (r->states >= r->max_states)) {
3448			pf_status.lcounters[LCNT_STATES]++;
3449			REASON_SET(&reason, PFRES_MAXSTATES);
3450			goto cleanup;
3451		}
3452		/* src node for filter rule */
3453		if ((r->rule_flag & PFRULE_SRCTRACK ||
3454		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3455		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3456			REASON_SET(&reason, PFRES_SRCLIMIT);
3457			goto cleanup;
3458		}
3459		/* src node for translation rule */
3460		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3461		    ((direction == PF_OUT &&
3462		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3463		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3464			REASON_SET(&reason, PFRES_SRCLIMIT);
3465			goto cleanup;
3466		}
3467		s = pool_get(&pf_state_pl, PR_NOWAIT);
3468		if (s == NULL) {
3469			REASON_SET(&reason, PFRES_MEMORY);
3470cleanup:
3471			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3472				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3473				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3474				pf_status.src_nodes--;
3475				pool_put(&pf_src_tree_pl, sn);
3476			}
3477			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3478			    nsn->expire == 0) {
3479				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3480				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3481				pf_status.src_nodes--;
3482				pool_put(&pf_src_tree_pl, nsn);
3483			}
3484			return (PF_DROP);
3485		}
3486		bzero(s, sizeof(*s));
3487		s->rule.ptr = r;
3488		s->nat_rule.ptr = nr;
3489		s->anchor.ptr = a;
3490		STATE_INC_COUNTERS(s);
3491		s->allow_opts = r->allow_opts;
3492		s->log = r->log & PF_LOG_ALL;
3493		if (nr != NULL)
3494			s->log |= nr->log & PF_LOG_ALL;
3495		s->proto = IPPROTO_TCP;
3496		s->direction = direction;
3497		s->af = af;
3498		if (direction == PF_OUT) {
3499			PF_ACPY(&s->gwy.addr, saddr, af);
3500			s->gwy.port = th->th_sport;		/* sport */
3501			PF_ACPY(&s->ext.addr, daddr, af);
3502			s->ext.port = th->th_dport;
3503			if (nr != NULL) {
3504				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3505				s->lan.port = bport;
3506			} else {
3507				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3508				s->lan.port = s->gwy.port;
3509			}
3510		} else {
3511			PF_ACPY(&s->lan.addr, daddr, af);
3512			s->lan.port = th->th_dport;
3513			PF_ACPY(&s->ext.addr, saddr, af);
3514			s->ext.port = th->th_sport;
3515			if (nr != NULL) {
3516				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3517				s->gwy.port = bport;
3518			} else {
3519				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3520				s->gwy.port = s->lan.port;
3521			}
3522		}
3523
3524		s->src.seqlo = ntohl(th->th_seq);
3525		s->src.seqhi = s->src.seqlo + len + 1;
3526		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3527		    r->keep_state == PF_STATE_MODULATE) {
3528			/* Generate sequence number modulator */
3529#ifdef __FreeBSD__
3530			while ((s->src.seqdiff =
3531			    pf_new_isn(s) - s->src.seqlo) == 0)
3532				;
3533#else
3534			while ((s->src.seqdiff =
3535			    tcp_rndiss_next() - s->src.seqlo) == 0)
3536				;
3537#endif
3538			pf_change_a(&th->th_seq, &th->th_sum,
3539			    htonl(s->src.seqlo + s->src.seqdiff), 0);
3540			rewrite = 1;
3541		} else
3542			s->src.seqdiff = 0;
3543		if (th->th_flags & TH_SYN) {
3544			s->src.seqhi++;
3545			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
3546		}
3547		s->src.max_win = MAX(ntohs(th->th_win), 1);
3548		if (s->src.wscale & PF_WSCALE_MASK) {
3549			/* Remove scale factor from initial window */
3550			int win = s->src.max_win;
3551			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3552			s->src.max_win = (win - 1) >>
3553			    (s->src.wscale & PF_WSCALE_MASK);
3554		}
3555		if (th->th_flags & TH_FIN)
3556			s->src.seqhi++;
3557		s->dst.seqhi = 1;
3558		s->dst.max_win = 1;
3559		s->src.state = TCPS_SYN_SENT;
3560		s->dst.state = TCPS_CLOSED;
3561		s->creation = time_second;
3562		s->expire = time_second;
3563		s->timeout = PFTM_TCP_FIRST_PACKET;
3564		pf_set_rt_ifp(s, saddr);
3565		if (sn != NULL) {
3566			s->src_node = sn;
3567			s->src_node->states++;
3568		}
3569		if (nsn != NULL) {
3570			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3571			s->nat_src_node = nsn;
3572			s->nat_src_node->states++;
3573		}
3574		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3575		    off, pd, th, &s->src, &s->dst)) {
3576			REASON_SET(&reason, PFRES_MEMORY);
3577			pf_src_tree_remove_state(s);
3578			STATE_DEC_COUNTERS(s);
3579			pool_put(&pf_state_pl, s);
3580			return (PF_DROP);
3581		}
3582		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3583		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3584		    &s->src, &s->dst, &rewrite)) {
3585			/* This really shouldn't happen!!! */
3586			DPFPRINTF(PF_DEBUG_URGENT,
3587			    ("pf_normalize_tcp_stateful failed on first pkt"));
3588			pf_normalize_tcp_cleanup(s);
3589			pf_src_tree_remove_state(s);
3590			STATE_DEC_COUNTERS(s);
3591			pool_put(&pf_state_pl, s);
3592			return (PF_DROP);
3593		}
3594		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3595			pf_normalize_tcp_cleanup(s);
3596			REASON_SET(&reason, PFRES_STATEINS);
3597			pf_src_tree_remove_state(s);
3598			STATE_DEC_COUNTERS(s);
3599			pool_put(&pf_state_pl, s);
3600			return (PF_DROP);
3601		} else
3602			*sm = s;
3603		if (tag > 0) {
3604			pf_tag_ref(tag);
3605			s->tag = tag;
3606		}
3607		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3608		    r->keep_state == PF_STATE_SYNPROXY) {
3609			s->src.state = PF_TCPS_PROXY_SRC;
3610			if (nr != NULL) {
3611				if (direction == PF_OUT) {
3612					pf_change_ap(saddr, &th->th_sport,
3613					    pd->ip_sum, &th->th_sum, &pd->baddr,
3614					    bport, 0, af);
3615				} else {
3616					pf_change_ap(daddr, &th->th_dport,
3617					    pd->ip_sum, &th->th_sum, &pd->baddr,
3618					    bport, 0, af);
3619				}
3620			}
3621			s->src.seqhi = htonl(arc4random());
3622			/* Find mss option */
3623			mss = pf_get_mss(m, off, th->th_off, af);
3624			mss = pf_calc_mss(saddr, af, mss);
3625			mss = pf_calc_mss(daddr, af, mss);
3626			s->src.mss = mss;
3627#ifdef __FreeBSD__
3628			pf_send_tcp(NULL, r, af, daddr, saddr, th->th_dport,
3629#else
3630			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3631#endif
3632			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3633			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3634			REASON_SET(&reason, PFRES_SYNPROXY);
3635			return (PF_SYNPROXY_DROP);
3636		}
3637	}
3638
3639	/* copy back packet headers if we performed NAT operations */
3640	if (rewrite)
3641		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3642
3643	return (PF_PASS);
3644}
3645
3646int
3647pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3648    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3649#ifdef __FreeBSD__
3650    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3651    struct ifqueue *ifq, struct inpcb *inp)
3652#else
3653    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3654    struct ifqueue *ifq)
3655#endif
3656{
3657	struct pf_rule		*nr = NULL;
3658	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3659	struct udphdr		*uh = pd->hdr.udp;
3660	u_int16_t		 bport, nport = 0;
3661	sa_family_t		 af = pd->af;
3662	struct pf_rule		*r, *a = NULL;
3663	struct pf_ruleset	*ruleset = NULL;
3664	struct pf_src_node	*nsn = NULL;
3665	u_short			 reason;
3666	int			 rewrite = 0;
3667	int			 tag = -1, rtableid = -1;
3668	int			 asd = 0;
3669	int			 match = 0;
3670
3671	if (pf_check_congestion(ifq)) {
3672		REASON_SET(&reason, PFRES_CONGEST);
3673		return (PF_DROP);
3674	}
3675
3676#ifdef __FreeBSD__
3677	if (inp != NULL)
3678		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3679	else if (debug_pfugidhack) {
3680		PF_UNLOCK();
3681		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
3682		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
3683		PF_LOCK();
3684	}
3685#endif
3686
3687	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3688
3689	if (direction == PF_OUT) {
3690		bport = nport = uh->uh_sport;
3691		/* check outgoing packet for BINAT/NAT */
3692		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3693		    saddr, uh->uh_sport, daddr, uh->uh_dport,
3694		    &pd->naddr, &nport)) != NULL) {
3695			PF_ACPY(&pd->baddr, saddr, af);
3696			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3697			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3698			rewrite++;
3699			if (nr->natpass)
3700				r = NULL;
3701			pd->nat_rule = nr;
3702		}
3703	} else {
3704		bport = nport = uh->uh_dport;
3705		/* check incoming packet for BINAT/RDR */
3706		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3707		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3708		    &nport)) != NULL) {
3709			PF_ACPY(&pd->baddr, daddr, af);
3710			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3711			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3712			rewrite++;
3713			if (nr->natpass)
3714				r = NULL;
3715			pd->nat_rule = nr;
3716		}
3717	}
3718
3719	while (r != NULL) {
3720		r->evaluations++;
3721		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3722			r = r->skip[PF_SKIP_IFP].ptr;
3723		else if (r->direction && r->direction != direction)
3724			r = r->skip[PF_SKIP_DIR].ptr;
3725		else if (r->af && r->af != af)
3726			r = r->skip[PF_SKIP_AF].ptr;
3727		else if (r->proto && r->proto != IPPROTO_UDP)
3728			r = r->skip[PF_SKIP_PROTO].ptr;
3729		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3730		    r->src.neg, kif))
3731			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3732		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3733		    r->src.port[0], r->src.port[1], uh->uh_sport))
3734			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3735		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3736		    r->dst.neg, NULL))
3737			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3738		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3739		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
3740			r = r->skip[PF_SKIP_DST_PORT].ptr;
3741		else if (r->tos && !(r->tos == pd->tos))
3742			r = TAILQ_NEXT(r, entries);
3743		else if (r->rule_flag & PFRULE_FRAGMENT)
3744			r = TAILQ_NEXT(r, entries);
3745		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3746#ifdef __FreeBSD__
3747		    pf_socket_lookup(direction, pd, inp), 1)) &&
3748#else
3749		    pf_socket_lookup(direction, pd), 1)) &&
3750#endif
3751		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3752		    pd->lookup.uid))
3753			r = TAILQ_NEXT(r, entries);
3754		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3755#ifdef __FreeBSD__
3756		    pf_socket_lookup(direction, pd, inp), 1)) &&
3757#else
3758		    pf_socket_lookup(direction, pd), 1)) &&
3759#endif
3760		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3761		    pd->lookup.gid))
3762			r = TAILQ_NEXT(r, entries);
3763		else if (r->prob && r->prob <= arc4random())
3764			r = TAILQ_NEXT(r, entries);
3765		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
3766			r = TAILQ_NEXT(r, entries);
3767		else if (r->os_fingerprint != PF_OSFP_ANY)
3768			r = TAILQ_NEXT(r, entries);
3769		else {
3770			if (r->tag)
3771				tag = r->tag;
3772			if (r->rtableid >= 0)
3773				rtableid = r->rtableid;
3774			if (r->anchor == NULL) {
3775				match = 1;
3776				*rm = r;
3777				*am = a;
3778				*rsm = ruleset;
3779				if ((*rm)->quick)
3780					break;
3781				r = TAILQ_NEXT(r, entries);
3782			} else
3783				pf_step_into_anchor(&asd, &ruleset,
3784				    PF_RULESET_FILTER, &r, &a, &match);
3785		}
3786		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3787		    PF_RULESET_FILTER, &r, &a, &match))
3788			break;
3789	}
3790	r = *rm;
3791	a = *am;
3792	ruleset = *rsm;
3793
3794	REASON_SET(&reason, PFRES_MATCH);
3795
3796	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
3797		if (rewrite)
3798#ifdef __FreeBSD__
3799			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3800#else
3801			m_copyback(m, off, sizeof(*uh), uh);
3802#endif
3803		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3804		    a, ruleset, pd);
3805	}
3806
3807	if ((r->action == PF_DROP) &&
3808	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3809	    (r->rule_flag & PFRULE_RETURN))) {
3810		/* undo NAT changes, if they have taken place */
3811		if (nr != NULL) {
3812			if (direction == PF_OUT) {
3813				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3814				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3815				rewrite++;
3816			} else {
3817				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3818				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3819				rewrite++;
3820			}
3821		}
3822		if ((af == AF_INET) && r->return_icmp)
3823			pf_send_icmp(m, r->return_icmp >> 8,
3824			    r->return_icmp & 255, af, r);
3825		else if ((af == AF_INET6) && r->return_icmp6)
3826			pf_send_icmp(m, r->return_icmp6 >> 8,
3827			    r->return_icmp6 & 255, af, r);
3828	}
3829
3830	if (r->action == PF_DROP)
3831		return (PF_DROP);
3832
3833	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
3834		REASON_SET(&reason, PFRES_MEMORY);
3835		return (PF_DROP);
3836	}
3837
3838	if (r->keep_state || nr != NULL) {
3839		/* create new state */
3840		struct pf_state	*s = NULL;
3841		struct pf_src_node *sn = NULL;
3842
3843		/* check maximums */
3844		if (r->max_states && (r->states >= r->max_states)) {
3845			pf_status.lcounters[LCNT_STATES]++;
3846			REASON_SET(&reason, PFRES_MAXSTATES);
3847			goto cleanup;
3848		}
3849		/* src node for filter rule */
3850		if ((r->rule_flag & PFRULE_SRCTRACK ||
3851		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3852		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3853			REASON_SET(&reason, PFRES_SRCLIMIT);
3854			goto cleanup;
3855		}
3856		/* src node for translation rule */
3857		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3858		    ((direction == PF_OUT &&
3859		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3860		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3861			REASON_SET(&reason, PFRES_SRCLIMIT);
3862			goto cleanup;
3863		}
3864		s = pool_get(&pf_state_pl, PR_NOWAIT);
3865		if (s == NULL) {
3866			REASON_SET(&reason, PFRES_MEMORY);
3867cleanup:
3868			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3869				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3870				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3871				pf_status.src_nodes--;
3872				pool_put(&pf_src_tree_pl, sn);
3873			}
3874			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3875			    nsn->expire == 0) {
3876				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3877				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3878				pf_status.src_nodes--;
3879				pool_put(&pf_src_tree_pl, nsn);
3880			}
3881			return (PF_DROP);
3882		}
3883		bzero(s, sizeof(*s));
3884		s->rule.ptr = r;
3885		s->nat_rule.ptr = nr;
3886		s->anchor.ptr = a;
3887		STATE_INC_COUNTERS(s);
3888		s->allow_opts = r->allow_opts;
3889		s->log = r->log & PF_LOG_ALL;
3890		if (nr != NULL)
3891			s->log |= nr->log & PF_LOG_ALL;
3892		s->proto = IPPROTO_UDP;
3893		s->direction = direction;
3894		s->af = af;
3895		if (direction == PF_OUT) {
3896			PF_ACPY(&s->gwy.addr, saddr, af);
3897			s->gwy.port = uh->uh_sport;
3898			PF_ACPY(&s->ext.addr, daddr, af);
3899			s->ext.port = uh->uh_dport;
3900			if (nr != NULL) {
3901				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3902				s->lan.port = bport;
3903			} else {
3904				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3905				s->lan.port = s->gwy.port;
3906			}
3907		} else {
3908			PF_ACPY(&s->lan.addr, daddr, af);
3909			s->lan.port = uh->uh_dport;
3910			PF_ACPY(&s->ext.addr, saddr, af);
3911			s->ext.port = uh->uh_sport;
3912			if (nr != NULL) {
3913				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3914				s->gwy.port = bport;
3915			} else {
3916				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3917				s->gwy.port = s->lan.port;
3918			}
3919		}
3920		s->src.state = PFUDPS_SINGLE;
3921		s->dst.state = PFUDPS_NO_TRAFFIC;
3922		s->creation = time_second;
3923		s->expire = time_second;
3924		s->timeout = PFTM_UDP_FIRST_PACKET;
3925		pf_set_rt_ifp(s, saddr);
3926		if (sn != NULL) {
3927			s->src_node = sn;
3928			s->src_node->states++;
3929		}
3930		if (nsn != NULL) {
3931			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3932			s->nat_src_node = nsn;
3933			s->nat_src_node->states++;
3934		}
3935		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3936			REASON_SET(&reason, PFRES_STATEINS);
3937			pf_src_tree_remove_state(s);
3938			STATE_DEC_COUNTERS(s);
3939			pool_put(&pf_state_pl, s);
3940			return (PF_DROP);
3941		} else
3942			*sm = s;
3943		if (tag > 0) {
3944			pf_tag_ref(tag);
3945			s->tag = tag;
3946		}
3947	}
3948
3949	/* copy back packet headers if we performed NAT operations */
3950	if (rewrite)
3951		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3952
3953	return (PF_PASS);
3954}
3955
3956int
3957pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3958    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3959    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3960    struct ifqueue *ifq)
3961{
3962	struct pf_rule		*nr = NULL;
3963	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3964	struct pf_rule		*r, *a = NULL;
3965	struct pf_ruleset	*ruleset = NULL;
3966	struct pf_src_node	*nsn = NULL;
3967	u_short			 reason;
3968	u_int16_t		 icmpid = 0, bport, nport = 0;
3969	sa_family_t		 af = pd->af;
3970	u_int8_t		 icmptype = 0;	/* make the compiler happy */
3971	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
3972	int			 state_icmp = 0;
3973	int			 tag = -1, rtableid = -1;
3974#ifdef INET6
3975	int			 rewrite = 0;
3976#endif /* INET6 */
3977	int			 asd = 0;
3978	int			 match = 0;
3979
3980	if (pf_check_congestion(ifq)) {
3981		REASON_SET(&reason, PFRES_CONGEST);
3982		return (PF_DROP);
3983	}
3984
3985	switch (pd->proto) {
3986#ifdef INET
3987	case IPPROTO_ICMP:
3988		icmptype = pd->hdr.icmp->icmp_type;
3989		icmpcode = pd->hdr.icmp->icmp_code;
3990		icmpid = pd->hdr.icmp->icmp_id;
3991
3992		if (icmptype == ICMP_UNREACH ||
3993		    icmptype == ICMP_SOURCEQUENCH ||
3994		    icmptype == ICMP_REDIRECT ||
3995		    icmptype == ICMP_TIMXCEED ||
3996		    icmptype == ICMP_PARAMPROB)
3997			state_icmp++;
3998		break;
3999#endif /* INET */
4000#ifdef INET6
4001	case IPPROTO_ICMPV6:
4002		icmptype = pd->hdr.icmp6->icmp6_type;
4003		icmpcode = pd->hdr.icmp6->icmp6_code;
4004		icmpid = pd->hdr.icmp6->icmp6_id;
4005
4006		if (icmptype == ICMP6_DST_UNREACH ||
4007		    icmptype == ICMP6_PACKET_TOO_BIG ||
4008		    icmptype == ICMP6_TIME_EXCEEDED ||
4009		    icmptype == ICMP6_PARAM_PROB)
4010			state_icmp++;
4011		break;
4012#endif /* INET6 */
4013	}
4014
4015	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4016
4017	if (direction == PF_OUT) {
4018		bport = nport = icmpid;
4019		/* check outgoing packet for BINAT/NAT */
4020		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
4021		    saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
4022		    NULL) {
4023			PF_ACPY(&pd->baddr, saddr, af);
4024			switch (af) {
4025#ifdef INET
4026			case AF_INET:
4027				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
4028				    pd->naddr.v4.s_addr, 0);
4029				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
4030				    pd->hdr.icmp->icmp_cksum, icmpid, nport, 0);
4031				pd->hdr.icmp->icmp_id = nport;
4032				m_copyback(m, off, ICMP_MINLEN,
4033				    (caddr_t)pd->hdr.icmp);
4034				break;
4035#endif /* INET */
4036#ifdef INET6
4037			case AF_INET6:
4038				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
4039				    &pd->naddr, 0);
4040				rewrite++;
4041				break;
4042#endif /* INET6 */
4043			}
4044			if (nr->natpass)
4045				r = NULL;
4046			pd->nat_rule = nr;
4047		}
4048	} else {
4049		bport = nport = icmpid;
4050		/* check incoming packet for BINAT/RDR */
4051		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
4052		    saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
4053		    NULL) {
4054			PF_ACPY(&pd->baddr, daddr, af);
4055			switch (af) {
4056#ifdef INET
4057			case AF_INET:
4058				pf_change_a(&daddr->v4.s_addr,
4059				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4060				break;
4061#endif /* INET */
4062#ifdef INET6
4063			case AF_INET6:
4064				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
4065				    &pd->naddr, 0);
4066				rewrite++;
4067				break;
4068#endif /* INET6 */
4069			}
4070			if (nr->natpass)
4071				r = NULL;
4072			pd->nat_rule = nr;
4073		}
4074	}
4075
4076	while (r != NULL) {
4077		r->evaluations++;
4078		if (pfi_kif_match(r->kif, kif) == r->ifnot)
4079			r = r->skip[PF_SKIP_IFP].ptr;
4080		else if (r->direction && r->direction != direction)
4081			r = r->skip[PF_SKIP_DIR].ptr;
4082		else if (r->af && r->af != af)
4083			r = r->skip[PF_SKIP_AF].ptr;
4084		else if (r->proto && r->proto != pd->proto)
4085			r = r->skip[PF_SKIP_PROTO].ptr;
4086		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
4087		    r->src.neg, kif))
4088			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4089		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
4090		    r->dst.neg, NULL))
4091			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4092		else if (r->type && r->type != icmptype + 1)
4093			r = TAILQ_NEXT(r, entries);
4094		else if (r->code && r->code != icmpcode + 1)
4095			r = TAILQ_NEXT(r, entries);
4096		else if (r->tos && !(r->tos == pd->tos))
4097			r = TAILQ_NEXT(r, entries);
4098		else if (r->rule_flag & PFRULE_FRAGMENT)
4099			r = TAILQ_NEXT(r, entries);
4100		else if (r->prob && r->prob <= arc4random())
4101			r = TAILQ_NEXT(r, entries);
4102		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
4103			r = TAILQ_NEXT(r, entries);
4104		else if (r->os_fingerprint != PF_OSFP_ANY)
4105			r = TAILQ_NEXT(r, entries);
4106		else {
4107			if (r->tag)
4108				tag = r->tag;
4109			if (r->rtableid >= 0)
4110				rtableid = r->rtableid;
4111			if (r->anchor == NULL) {
4112				match = 1;
4113				*rm = r;
4114				*am = a;
4115				*rsm = ruleset;
4116				if ((*rm)->quick)
4117					break;
4118				r = TAILQ_NEXT(r, entries);
4119			} else
4120				pf_step_into_anchor(&asd, &ruleset,
4121				    PF_RULESET_FILTER, &r, &a, &match);
4122		}
4123		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4124		    PF_RULESET_FILTER, &r, &a, &match))
4125			break;
4126	}
4127	r = *rm;
4128	a = *am;
4129	ruleset = *rsm;
4130
4131	REASON_SET(&reason, PFRES_MATCH);
4132
4133	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
4134#ifdef INET6
4135		if (rewrite)
4136			m_copyback(m, off, sizeof(struct icmp6_hdr),
4137			    (caddr_t)pd->hdr.icmp6);
4138#endif /* INET6 */
4139		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
4140		    a, ruleset, pd);
4141	}
4142
4143	if (r->action != PF_PASS)
4144		return (PF_DROP);
4145
4146	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
4147		REASON_SET(&reason, PFRES_MEMORY);
4148		return (PF_DROP);
4149	}
4150
4151	if (!state_icmp && (r->keep_state || nr != NULL)) {
4152		/* create new state */
4153		struct pf_state	*s = NULL;
4154		struct pf_src_node *sn = NULL;
4155
4156		/* check maximums */
4157		if (r->max_states && (r->states >= r->max_states)) {
4158			pf_status.lcounters[LCNT_STATES]++;
4159			REASON_SET(&reason, PFRES_MAXSTATES);
4160			goto cleanup;
4161		}
4162		/* src node for filter rule */
4163		if ((r->rule_flag & PFRULE_SRCTRACK ||
4164		    r->rpool.opts & PF_POOL_STICKYADDR) &&
4165		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
4166			REASON_SET(&reason, PFRES_SRCLIMIT);
4167			goto cleanup;
4168		}
4169		/* src node for translation rule */
4170		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4171		    ((direction == PF_OUT &&
4172		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
4173		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
4174			REASON_SET(&reason, PFRES_SRCLIMIT);
4175			goto cleanup;
4176		}
4177		s = pool_get(&pf_state_pl, PR_NOWAIT);
4178		if (s == NULL) {
4179			REASON_SET(&reason, PFRES_MEMORY);
4180cleanup:
4181			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4182				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4183				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4184				pf_status.src_nodes--;
4185				pool_put(&pf_src_tree_pl, sn);
4186			}
4187			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
4188			    nsn->expire == 0) {
4189				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4190				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4191				pf_status.src_nodes--;
4192				pool_put(&pf_src_tree_pl, nsn);
4193			}
4194			return (PF_DROP);
4195		}
4196		bzero(s, sizeof(*s));
4197		s->rule.ptr = r;
4198		s->nat_rule.ptr = nr;
4199		s->anchor.ptr = a;
4200		STATE_INC_COUNTERS(s);
4201		s->allow_opts = r->allow_opts;
4202		s->log = r->log & PF_LOG_ALL;
4203		if (nr != NULL)
4204			s->log |= nr->log & PF_LOG_ALL;
4205		s->proto = pd->proto;
4206		s->direction = direction;
4207		s->af = af;
4208		if (direction == PF_OUT) {
4209			PF_ACPY(&s->gwy.addr, saddr, af);
4210			s->gwy.port = nport;
4211			PF_ACPY(&s->ext.addr, daddr, af);
4212			s->ext.port = 0;
4213			if (nr != NULL) {
4214				PF_ACPY(&s->lan.addr, &pd->baddr, af);
4215				s->lan.port = bport;
4216			} else {
4217				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
4218				s->lan.port = s->gwy.port;
4219			}
4220		} else {
4221			PF_ACPY(&s->lan.addr, daddr, af);
4222			s->lan.port = nport;
4223			PF_ACPY(&s->ext.addr, saddr, af);
4224			s->ext.port = 0;
4225			if (nr != NULL) {
4226				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
4227				s->gwy.port = bport;
4228			} else {
4229				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
4230				s->gwy.port = s->lan.port;
4231			}
4232		}
4233		s->creation = time_second;
4234		s->expire = time_second;
4235		s->timeout = PFTM_ICMP_FIRST_PACKET;
4236		pf_set_rt_ifp(s, saddr);
4237		if (sn != NULL) {
4238			s->src_node = sn;
4239			s->src_node->states++;
4240		}
4241		if (nsn != NULL) {
4242			PF_ACPY(&nsn->raddr, &pd->naddr, af);
4243			s->nat_src_node = nsn;
4244			s->nat_src_node->states++;
4245		}
4246		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
4247			REASON_SET(&reason, PFRES_STATEINS);
4248			pf_src_tree_remove_state(s);
4249			STATE_DEC_COUNTERS(s);
4250			pool_put(&pf_state_pl, s);
4251			return (PF_DROP);
4252		} else
4253			*sm = s;
4254		if (tag > 0) {
4255			pf_tag_ref(tag);
4256			s->tag = tag;
4257		}
4258	}
4259
4260#ifdef INET6
4261	/* copy back packet headers if we performed IPv6 NAT operations */
4262	if (rewrite)
4263		m_copyback(m, off, sizeof(struct icmp6_hdr),
4264		    (caddr_t)pd->hdr.icmp6);
4265#endif /* INET6 */
4266
4267	return (PF_PASS);
4268}
4269
4270int
4271pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
4272    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4273    struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
4274{
4275	struct pf_rule		*nr = NULL;
4276	struct pf_rule		*r, *a = NULL;
4277	struct pf_ruleset	*ruleset = NULL;
4278	struct pf_src_node	*nsn = NULL;
4279	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
4280	sa_family_t		 af = pd->af;
4281	u_short			 reason;
4282	int			 tag = -1, rtableid = -1;
4283	int			 asd = 0;
4284	int			 match = 0;
4285
4286	if (pf_check_congestion(ifq)) {
4287		REASON_SET(&reason, PFRES_CONGEST);
4288		return (PF_DROP);
4289	}
4290
4291	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4292
4293	if (direction == PF_OUT) {
4294		/* check outgoing packet for BINAT/NAT */
4295		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
4296		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
4297			PF_ACPY(&pd->baddr, saddr, af);
4298			switch (af) {
4299#ifdef INET
4300			case AF_INET:
4301				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
4302				    pd->naddr.v4.s_addr, 0);
4303				break;
4304#endif /* INET */
4305#ifdef INET6
4306			case AF_INET6:
4307				PF_ACPY(saddr, &pd->naddr, af);
4308				break;
4309#endif /* INET6 */
4310			}
4311			if (nr->natpass)
4312				r = NULL;
4313			pd->nat_rule = nr;
4314		}
4315	} else {
4316		/* check incoming packet for BINAT/RDR */
4317		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
4318		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
4319			PF_ACPY(&pd->baddr, daddr, af);
4320			switch (af) {
4321#ifdef INET
4322			case AF_INET:
4323				pf_change_a(&daddr->v4.s_addr,
4324				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
4325				break;
4326#endif /* INET */
4327#ifdef INET6
4328			case AF_INET6:
4329				PF_ACPY(daddr, &pd->naddr, af);
4330				break;
4331#endif /* INET6 */
4332			}
4333			if (nr->natpass)
4334				r = NULL;
4335			pd->nat_rule = nr;
4336		}
4337	}
4338
4339	while (r != NULL) {
4340		r->evaluations++;
4341		if (pfi_kif_match(r->kif, kif) == r->ifnot)
4342			r = r->skip[PF_SKIP_IFP].ptr;
4343		else if (r->direction && r->direction != direction)
4344			r = r->skip[PF_SKIP_DIR].ptr;
4345		else if (r->af && r->af != af)
4346			r = r->skip[PF_SKIP_AF].ptr;
4347		else if (r->proto && r->proto != pd->proto)
4348			r = r->skip[PF_SKIP_PROTO].ptr;
4349		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4350		    r->src.neg, kif))
4351			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4352		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4353		    r->dst.neg, NULL))
4354			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4355		else if (r->tos && !(r->tos == pd->tos))
4356			r = TAILQ_NEXT(r, entries);
4357		else if (r->rule_flag & PFRULE_FRAGMENT)
4358			r = TAILQ_NEXT(r, entries);
4359		else if (r->prob && r->prob <= arc4random())
4360			r = TAILQ_NEXT(r, entries);
4361		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
4362			r = TAILQ_NEXT(r, entries);
4363		else if (r->os_fingerprint != PF_OSFP_ANY)
4364			r = TAILQ_NEXT(r, entries);
4365		else {
4366			if (r->tag)
4367				tag = r->tag;
4368			if (r->rtableid >= 0)
4369				rtableid = r->rtableid;
4370			if (r->anchor == NULL) {
4371				match = 1;
4372				*rm = r;
4373				*am = a;
4374				*rsm = ruleset;
4375				if ((*rm)->quick)
4376					break;
4377				r = TAILQ_NEXT(r, entries);
4378			} else
4379				pf_step_into_anchor(&asd, &ruleset,
4380				    PF_RULESET_FILTER, &r, &a, &match);
4381		}
4382		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4383		    PF_RULESET_FILTER, &r, &a, &match))
4384			break;
4385	}
4386	r = *rm;
4387	a = *am;
4388	ruleset = *rsm;
4389
4390	REASON_SET(&reason, PFRES_MATCH);
4391
4392	if (r->log || (nr != NULL && nr->natpass && nr->log))
4393		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
4394		    a, ruleset, pd);
4395
4396	if ((r->action == PF_DROP) &&
4397	    ((r->rule_flag & PFRULE_RETURNICMP) ||
4398	    (r->rule_flag & PFRULE_RETURN))) {
4399		struct pf_addr *a = NULL;
4400
4401		if (nr != NULL) {
4402			if (direction == PF_OUT)
4403				a = saddr;
4404			else
4405				a = daddr;
4406		}
4407		if (a != NULL) {
4408			switch (af) {
4409#ifdef INET
4410			case AF_INET:
4411				pf_change_a(&a->v4.s_addr, pd->ip_sum,
4412				    pd->baddr.v4.s_addr, 0);
4413				break;
4414#endif /* INET */
4415#ifdef INET6
4416			case AF_INET6:
4417				PF_ACPY(a, &pd->baddr, af);
4418				break;
4419#endif /* INET6 */
4420			}
4421		}
4422		if ((af == AF_INET) && r->return_icmp)
4423			pf_send_icmp(m, r->return_icmp >> 8,
4424			    r->return_icmp & 255, af, r);
4425		else if ((af == AF_INET6) && r->return_icmp6)
4426			pf_send_icmp(m, r->return_icmp6 >> 8,
4427			    r->return_icmp6 & 255, af, r);
4428	}
4429
4430	if (r->action != PF_PASS)
4431		return (PF_DROP);
4432
4433	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
4434		REASON_SET(&reason, PFRES_MEMORY);
4435		return (PF_DROP);
4436	}
4437
4438	if (r->keep_state || nr != NULL) {
4439		/* create new state */
4440		struct pf_state	*s = NULL;
4441		struct pf_src_node *sn = NULL;
4442
4443		/* check maximums */
4444		if (r->max_states && (r->states >= r->max_states)) {
4445			pf_status.lcounters[LCNT_STATES]++;
4446			REASON_SET(&reason, PFRES_MAXSTATES);
4447			goto cleanup;
4448		}
4449		/* src node for filter rule */
4450		if ((r->rule_flag & PFRULE_SRCTRACK ||
4451		    r->rpool.opts & PF_POOL_STICKYADDR) &&
4452		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
4453			REASON_SET(&reason, PFRES_SRCLIMIT);
4454			goto cleanup;
4455		}
4456		/* src node for translation rule */
4457		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4458		    ((direction == PF_OUT &&
4459		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
4460		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
4461			REASON_SET(&reason, PFRES_SRCLIMIT);
4462			goto cleanup;
4463		}
4464		s = pool_get(&pf_state_pl, PR_NOWAIT);
4465		if (s == NULL) {
4466			REASON_SET(&reason, PFRES_MEMORY);
4467cleanup:
4468			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4469				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4470				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4471				pf_status.src_nodes--;
4472				pool_put(&pf_src_tree_pl, sn);
4473			}
4474			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
4475			    nsn->expire == 0) {
4476				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4477				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4478				pf_status.src_nodes--;
4479				pool_put(&pf_src_tree_pl, nsn);
4480			}
4481			return (PF_DROP);
4482		}
4483		bzero(s, sizeof(*s));
4484		s->rule.ptr = r;
4485		s->nat_rule.ptr = nr;
4486		s->anchor.ptr = a;
4487		STATE_INC_COUNTERS(s);
4488		s->allow_opts = r->allow_opts;
4489		s->log = r->log & PF_LOG_ALL;
4490		if (nr != NULL)
4491			s->log |= nr->log & PF_LOG_ALL;
4492		s->proto = pd->proto;
4493		s->direction = direction;
4494		s->af = af;
4495		if (direction == PF_OUT) {
4496			PF_ACPY(&s->gwy.addr, saddr, af);
4497			PF_ACPY(&s->ext.addr, daddr, af);
4498			if (nr != NULL)
4499				PF_ACPY(&s->lan.addr, &pd->baddr, af);
4500			else
4501				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
4502		} else {
4503			PF_ACPY(&s->lan.addr, daddr, af);
4504			PF_ACPY(&s->ext.addr, saddr, af);
4505			if (nr != NULL)
4506				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
4507			else
4508				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
4509		}
4510		s->src.state = PFOTHERS_SINGLE;
4511		s->dst.state = PFOTHERS_NO_TRAFFIC;
4512		s->creation = time_second;
4513		s->expire = time_second;
4514		s->timeout = PFTM_OTHER_FIRST_PACKET;
4515		pf_set_rt_ifp(s, saddr);
4516		if (sn != NULL) {
4517			s->src_node = sn;
4518			s->src_node->states++;
4519		}
4520		if (nsn != NULL) {
4521			PF_ACPY(&nsn->raddr, &pd->naddr, af);
4522			s->nat_src_node = nsn;
4523			s->nat_src_node->states++;
4524		}
4525		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
4526			REASON_SET(&reason, PFRES_STATEINS);
4527			pf_src_tree_remove_state(s);
4528			STATE_DEC_COUNTERS(s);
4529			pool_put(&pf_state_pl, s);
4530			return (PF_DROP);
4531		} else
4532			*sm = s;
4533		if (tag > 0) {
4534			pf_tag_ref(tag);
4535			s->tag = tag;
4536		}
4537	}
4538
4539	return (PF_PASS);
4540}
4541
4542int
4543pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
4544    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
4545    struct pf_ruleset **rsm)
4546{
4547	struct pf_rule		*r, *a = NULL;
4548	struct pf_ruleset	*ruleset = NULL;
4549	sa_family_t		 af = pd->af;
4550	u_short			 reason;
4551	int			 tag = -1;
4552	int			 asd = 0;
4553	int			 match = 0;
4554
4555	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4556	while (r != NULL) {
4557		r->evaluations++;
4558		if (pfi_kif_match(r->kif, kif) == r->ifnot)
4559			r = r->skip[PF_SKIP_IFP].ptr;
4560		else if (r->direction && r->direction != direction)
4561			r = r->skip[PF_SKIP_DIR].ptr;
4562		else if (r->af && r->af != af)
4563			r = r->skip[PF_SKIP_AF].ptr;
4564		else if (r->proto && r->proto != pd->proto)
4565			r = r->skip[PF_SKIP_PROTO].ptr;
4566		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4567		    r->src.neg, kif))
4568			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4569		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4570		    r->dst.neg, NULL))
4571			r = r->skip[PF_SKIP_DST_ADDR].ptr;
4572		else if (r->tos && !(r->tos == pd->tos))
4573			r = TAILQ_NEXT(r, entries);
4574		else if (r->os_fingerprint != PF_OSFP_ANY)
4575			r = TAILQ_NEXT(r, entries);
4576		else if (pd->proto == IPPROTO_UDP &&
4577		    (r->src.port_op || r->dst.port_op))
4578			r = TAILQ_NEXT(r, entries);
4579		else if (pd->proto == IPPROTO_TCP &&
4580		    (r->src.port_op || r->dst.port_op || r->flagset))
4581			r = TAILQ_NEXT(r, entries);
4582		else if ((pd->proto == IPPROTO_ICMP ||
4583		    pd->proto == IPPROTO_ICMPV6) &&
4584		    (r->type || r->code))
4585			r = TAILQ_NEXT(r, entries);
4586		else if (r->prob && r->prob <= arc4random())
4587			r = TAILQ_NEXT(r, entries);
4588		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
4589			r = TAILQ_NEXT(r, entries);
4590		else {
4591			if (r->anchor == NULL) {
4592				match = 1;
4593				*rm = r;
4594				*am = a;
4595				*rsm = ruleset;
4596				if ((*rm)->quick)
4597					break;
4598				r = TAILQ_NEXT(r, entries);
4599			} else
4600				pf_step_into_anchor(&asd, &ruleset,
4601				    PF_RULESET_FILTER, &r, &a, &match);
4602		}
4603		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4604		    PF_RULESET_FILTER, &r, &a, &match))
4605			break;
4606	}
4607	r = *rm;
4608	a = *am;
4609	ruleset = *rsm;
4610
4611	REASON_SET(&reason, PFRES_MATCH);
4612
4613	if (r->log)
4614		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
4615		    pd);
4616
4617	if (r->action != PF_PASS)
4618		return (PF_DROP);
4619
4620	if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) {
4621		REASON_SET(&reason, PFRES_MEMORY);
4622		return (PF_DROP);
4623	}
4624
4625	return (PF_PASS);
4626}
4627
4628int
4629pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4630    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4631    u_short *reason)
4632{
4633	struct pf_state_cmp	 key;
4634	struct tcphdr		*th = pd->hdr.tcp;
4635	u_int16_t		 win = ntohs(th->th_win);
4636	u_int32_t		 ack, end, seq, orig_seq;
4637	u_int8_t		 sws, dws;
4638	int			 ackskew;
4639	int			 copyback = 0;
4640	struct pf_state_peer	*src, *dst;
4641
4642	key.af = pd->af;
4643	key.proto = IPPROTO_TCP;
4644	if (direction == PF_IN)	{
4645		PF_ACPY(&key.ext.addr, pd->src, key.af);
4646		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4647		key.ext.port = th->th_sport;
4648		key.gwy.port = th->th_dport;
4649	} else {
4650		PF_ACPY(&key.lan.addr, pd->src, key.af);
4651		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4652		key.lan.port = th->th_sport;
4653		key.ext.port = th->th_dport;
4654	}
4655
4656	STATE_LOOKUP();
4657
4658	if (direction == (*state)->direction) {
4659		src = &(*state)->src;
4660		dst = &(*state)->dst;
4661	} else {
4662		src = &(*state)->dst;
4663		dst = &(*state)->src;
4664	}
4665
4666	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4667		if (direction != (*state)->direction) {
4668			REASON_SET(reason, PFRES_SYNPROXY);
4669			return (PF_SYNPROXY_DROP);
4670		}
4671		if (th->th_flags & TH_SYN) {
4672			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4673				REASON_SET(reason, PFRES_SYNPROXY);
4674				return (PF_DROP);
4675			}
4676#ifdef __FreeBSD__
4677			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4678#else
4679			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4680#endif
4681			    pd->src, th->th_dport, th->th_sport,
4682			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4683			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
4684			    0, NULL, NULL);
4685			REASON_SET(reason, PFRES_SYNPROXY);
4686			return (PF_SYNPROXY_DROP);
4687		} else if (!(th->th_flags & TH_ACK) ||
4688		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4689		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4690			REASON_SET(reason, PFRES_SYNPROXY);
4691			return (PF_DROP);
4692		} else if ((*state)->src_node != NULL &&
4693		    pf_src_connlimit(state)) {
4694			REASON_SET(reason, PFRES_SRCLIMIT);
4695			return (PF_DROP);
4696		} else
4697			(*state)->src.state = PF_TCPS_PROXY_DST;
4698	}
4699	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4700		struct pf_state_host *src, *dst;
4701
4702		if (direction == PF_OUT) {
4703			src = &(*state)->gwy;
4704			dst = &(*state)->ext;
4705		} else {
4706			src = &(*state)->ext;
4707			dst = &(*state)->lan;
4708		}
4709		if (direction == (*state)->direction) {
4710			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4711			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4712			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4713				REASON_SET(reason, PFRES_SYNPROXY);
4714				return (PF_DROP);
4715			}
4716			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4717			if ((*state)->dst.seqhi == 1)
4718				(*state)->dst.seqhi = htonl(arc4random());
4719#ifdef __FreeBSD__
4720			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4721			    &src->addr,
4722#else
4723			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4724#endif
4725			    &dst->addr, src->port, dst->port,
4726			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4727			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
4728			REASON_SET(reason, PFRES_SYNPROXY);
4729			return (PF_SYNPROXY_DROP);
4730		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4731		    (TH_SYN|TH_ACK)) ||
4732		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4733			REASON_SET(reason, PFRES_SYNPROXY);
4734			return (PF_DROP);
4735		} else {
4736			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4737			(*state)->dst.seqlo = ntohl(th->th_seq);
4738#ifdef __FreeBSD__
4739			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4740#else
4741			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4742#endif
4743			    pd->src, th->th_dport, th->th_sport,
4744			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4745			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
4746			    (*state)->tag, NULL, NULL);
4747#ifdef __FreeBSD__
4748			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4749			    &src->addr,
4750#else
4751			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4752#endif
4753			    &dst->addr, src->port, dst->port,
4754			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4755			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
4756			    0, NULL, NULL);
4757			(*state)->src.seqdiff = (*state)->dst.seqhi -
4758			    (*state)->src.seqlo;
4759			(*state)->dst.seqdiff = (*state)->src.seqhi -
4760			    (*state)->dst.seqlo;
4761			(*state)->src.seqhi = (*state)->src.seqlo +
4762			    (*state)->dst.max_win;
4763			(*state)->dst.seqhi = (*state)->dst.seqlo +
4764			    (*state)->src.max_win;
4765			(*state)->src.wscale = (*state)->dst.wscale = 0;
4766			(*state)->src.state = (*state)->dst.state =
4767			    TCPS_ESTABLISHED;
4768			REASON_SET(reason, PFRES_SYNPROXY);
4769			return (PF_SYNPROXY_DROP);
4770		}
4771	}
4772
4773	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
4774	    dst->state >= TCPS_FIN_WAIT_2 &&
4775	    src->state >= TCPS_FIN_WAIT_2) {
4776		if (pf_status.debug >= PF_DEBUG_MISC) {
4777			printf("pf: state reuse ");
4778			pf_print_state(*state);
4779			pf_print_flags(th->th_flags);
4780			printf("\n");
4781		}
4782		/* XXX make sure it's the same direction ?? */
4783		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
4784		pf_unlink_state(*state);
4785		*state = NULL;
4786		return (PF_DROP);
4787	}
4788
4789	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4790		sws = src->wscale & PF_WSCALE_MASK;
4791		dws = dst->wscale & PF_WSCALE_MASK;
4792	} else
4793		sws = dws = 0;
4794
4795	/*
4796	 * Sequence tracking algorithm from Guido van Rooij's paper:
4797	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4798	 *	tcp_filtering.ps
4799	 */
4800
4801	orig_seq = seq = ntohl(th->th_seq);
4802	if (src->seqlo == 0) {
4803		/* First packet from this end. Set its state */
4804
4805		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4806		    src->scrub == NULL) {
4807			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4808				REASON_SET(reason, PFRES_MEMORY);
4809				return (PF_DROP);
4810			}
4811		}
4812
4813		/* Deferred generation of sequence number modulator */
4814		if (dst->seqdiff && !src->seqdiff) {
4815#ifdef __FreeBSD__
4816			while ((src->seqdiff = pf_new_isn(*state) - seq) == 0)
4817				;
4818#else
4819			while ((src->seqdiff = tcp_rndiss_next() - seq) == 0)
4820				;
4821#endif
4822			ack = ntohl(th->th_ack) - dst->seqdiff;
4823			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4824			    src->seqdiff), 0);
4825			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4826			copyback = 1;
4827		} else {
4828			ack = ntohl(th->th_ack);
4829		}
4830
4831		end = seq + pd->p_len;
4832		if (th->th_flags & TH_SYN) {
4833			end++;
4834			if (dst->wscale & PF_WSCALE_FLAG) {
4835				src->wscale = pf_get_wscale(m, off, th->th_off,
4836				    pd->af);
4837				if (src->wscale & PF_WSCALE_FLAG) {
4838					/* Remove scale factor from initial
4839					 * window */
4840					sws = src->wscale & PF_WSCALE_MASK;
4841					win = ((u_int32_t)win + (1 << sws) - 1)
4842					    >> sws;
4843					dws = dst->wscale & PF_WSCALE_MASK;
4844				} else {
4845					/* fixup other window */
4846					dst->max_win <<= dst->wscale &
4847					    PF_WSCALE_MASK;
4848					/* in case of a retrans SYN|ACK */
4849					dst->wscale = 0;
4850				}
4851			}
4852		}
4853		if (th->th_flags & TH_FIN)
4854			end++;
4855
4856		src->seqlo = seq;
4857		if (src->state < TCPS_SYN_SENT)
4858			src->state = TCPS_SYN_SENT;
4859
4860		/*
4861		 * May need to slide the window (seqhi may have been set by
4862		 * the crappy stack check or if we picked up the connection
4863		 * after establishment)
4864		 */
4865		if (src->seqhi == 1 ||
4866		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4867			src->seqhi = end + MAX(1, dst->max_win << dws);
4868		if (win > src->max_win)
4869			src->max_win = win;
4870
4871	} else {
4872		ack = ntohl(th->th_ack) - dst->seqdiff;
4873		if (src->seqdiff) {
4874			/* Modulate sequence numbers */
4875			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4876			    src->seqdiff), 0);
4877			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4878			copyback = 1;
4879		}
4880		end = seq + pd->p_len;
4881		if (th->th_flags & TH_SYN)
4882			end++;
4883		if (th->th_flags & TH_FIN)
4884			end++;
4885	}
4886
4887	if ((th->th_flags & TH_ACK) == 0) {
4888		/* Let it pass through the ack skew check */
4889		ack = dst->seqlo;
4890	} else if ((ack == 0 &&
4891	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4892	    /* broken tcp stacks do not set ack */
4893	    (dst->state < TCPS_SYN_SENT)) {
4894		/*
4895		 * Many stacks (ours included) will set the ACK number in an
4896		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4897		 */
4898		ack = dst->seqlo;
4899	}
4900
4901	if (seq == end) {
4902		/* Ease sequencing restrictions on no data packets */
4903		seq = src->seqlo;
4904		end = seq;
4905	}
4906
4907	ackskew = dst->seqlo - ack;
4908
4909
4910	/*
4911	 * Need to demodulate the sequence numbers in any TCP SACK options
4912	 * (Selective ACK). We could optionally validate the SACK values
4913	 * against the current ACK window, either forwards or backwards, but
4914	 * I'm not confident that SACK has been implemented properly
4915	 * everywhere. It wouldn't surprise me if several stacks accidently
4916	 * SACK too far backwards of previously ACKed data. There really aren't
4917	 * any security implications of bad SACKing unless the target stack
4918	 * doesn't validate the option length correctly. Someone trying to
4919	 * spoof into a TCP connection won't bother blindly sending SACK
4920	 * options anyway.
4921	 */
4922	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4923		if (pf_modulate_sack(m, off, pd, th, dst))
4924			copyback = 1;
4925	}
4926
4927
4928#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4929	if (SEQ_GEQ(src->seqhi, end) &&
4930	    /* Last octet inside other's window space */
4931	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4932	    /* Retrans: not more than one window back */
4933	    (ackskew >= -MAXACKWINDOW) &&
4934	    /* Acking not more than one reassembled fragment backwards */
4935	    (ackskew <= (MAXACKWINDOW << sws)) &&
4936	    /* Acking not more than one window forward */
4937	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4938	    (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
4939	    /* Require an exact/+1 sequence match on resets when possible */
4940
4941		if (dst->scrub || src->scrub) {
4942			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4943			    *state, src, dst, &copyback))
4944				return (PF_DROP);
4945		}
4946
4947		/* update max window */
4948		if (src->max_win < win)
4949			src->max_win = win;
4950		/* synchronize sequencing */
4951		if (SEQ_GT(end, src->seqlo))
4952			src->seqlo = end;
4953		/* slide the window of what the other end can send */
4954		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4955			dst->seqhi = ack + MAX((win << sws), 1);
4956
4957
4958		/* update states */
4959		if (th->th_flags & TH_SYN)
4960			if (src->state < TCPS_SYN_SENT)
4961				src->state = TCPS_SYN_SENT;
4962		if (th->th_flags & TH_FIN)
4963			if (src->state < TCPS_CLOSING)
4964				src->state = TCPS_CLOSING;
4965		if (th->th_flags & TH_ACK) {
4966			if (dst->state == TCPS_SYN_SENT) {
4967				dst->state = TCPS_ESTABLISHED;
4968				if (src->state == TCPS_ESTABLISHED &&
4969				    (*state)->src_node != NULL &&
4970				    pf_src_connlimit(state)) {
4971					REASON_SET(reason, PFRES_SRCLIMIT);
4972					return (PF_DROP);
4973				}
4974			} else if (dst->state == TCPS_CLOSING)
4975				dst->state = TCPS_FIN_WAIT_2;
4976		}
4977		if (th->th_flags & TH_RST)
4978			src->state = dst->state = TCPS_TIME_WAIT;
4979
4980		/* update expire time */
4981		(*state)->expire = time_second;
4982		if (src->state >= TCPS_FIN_WAIT_2 &&
4983		    dst->state >= TCPS_FIN_WAIT_2)
4984			(*state)->timeout = PFTM_TCP_CLOSED;
4985		else if (src->state >= TCPS_CLOSING &&
4986		    dst->state >= TCPS_CLOSING)
4987			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4988		else if (src->state < TCPS_ESTABLISHED ||
4989		    dst->state < TCPS_ESTABLISHED)
4990			(*state)->timeout = PFTM_TCP_OPENING;
4991		else if (src->state >= TCPS_CLOSING ||
4992		    dst->state >= TCPS_CLOSING)
4993			(*state)->timeout = PFTM_TCP_CLOSING;
4994		else
4995			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4996
4997		/* Fall through to PASS packet */
4998
4999	} else if ((dst->state < TCPS_SYN_SENT ||
5000		dst->state >= TCPS_FIN_WAIT_2 ||
5001		src->state >= TCPS_FIN_WAIT_2) &&
5002	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
5003	    /* Within a window forward of the originating packet */
5004	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
5005	    /* Within a window backward of the originating packet */
5006
5007		/*
5008		 * This currently handles three situations:
5009		 *  1) Stupid stacks will shotgun SYNs before their peer
5010		 *     replies.
5011		 *  2) When PF catches an already established stream (the
5012		 *     firewall rebooted, the state table was flushed, routes
5013		 *     changed...)
5014		 *  3) Packets get funky immediately after the connection
5015		 *     closes (this should catch Solaris spurious ACK|FINs
5016		 *     that web servers like to spew after a close)
5017		 *
5018		 * This must be a little more careful than the above code
5019		 * since packet floods will also be caught here. We don't
5020		 * update the TTL here to mitigate the damage of a packet
5021		 * flood and so the same code can handle awkward establishment
5022		 * and a loosened connection close.
5023		 * In the establishment case, a correct peer response will
5024		 * validate the connection, go through the normal state code
5025		 * and keep updating the state TTL.
5026		 */
5027
5028		if (pf_status.debug >= PF_DEBUG_MISC) {
5029			printf("pf: loose state match: ");
5030			pf_print_state(*state);
5031			pf_print_flags(th->th_flags);
5032			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5033			    "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
5034#ifdef __FreeBSD__
5035			    ackskew, (unsigned long long)(*state)->packets[0],
5036			    (unsigned long long)(*state)->packets[1]);
5037#else
5038			    ackskew, (*state)->packets[0],
5039			    (*state)->packets[1]);
5040#endif
5041		}
5042
5043		if (dst->scrub || src->scrub) {
5044			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
5045			    *state, src, dst, &copyback))
5046				return (PF_DROP);
5047		}
5048
5049		/* update max window */
5050		if (src->max_win < win)
5051			src->max_win = win;
5052		/* synchronize sequencing */
5053		if (SEQ_GT(end, src->seqlo))
5054			src->seqlo = end;
5055		/* slide the window of what the other end can send */
5056		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
5057			dst->seqhi = ack + MAX((win << sws), 1);
5058
5059		/*
5060		 * Cannot set dst->seqhi here since this could be a shotgunned
5061		 * SYN and not an already established connection.
5062		 */
5063
5064		if (th->th_flags & TH_FIN)
5065			if (src->state < TCPS_CLOSING)
5066				src->state = TCPS_CLOSING;
5067		if (th->th_flags & TH_RST)
5068			src->state = dst->state = TCPS_TIME_WAIT;
5069
5070		/* Fall through to PASS packet */
5071
5072	} else {
5073		if ((*state)->dst.state == TCPS_SYN_SENT &&
5074		    (*state)->src.state == TCPS_SYN_SENT) {
5075			/* Send RST for state mismatches during handshake */
5076			if (!(th->th_flags & TH_RST))
5077#ifdef __FreeBSD__
5078				pf_send_tcp(m, (*state)->rule.ptr, pd->af,
5079#else
5080				pf_send_tcp((*state)->rule.ptr, pd->af,
5081#endif
5082				    pd->dst, pd->src, th->th_dport,
5083				    th->th_sport, ntohl(th->th_ack), 0,
5084				    TH_RST, 0, 0,
5085				    (*state)->rule.ptr->return_ttl, 1, 0,
5086				    pd->eh, kif->pfik_ifp);
5087			src->seqlo = 0;
5088			src->seqhi = 1;
5089			src->max_win = 1;
5090		} else if (pf_status.debug >= PF_DEBUG_MISC) {
5091			printf("pf: BAD state: ");
5092			pf_print_state(*state);
5093			pf_print_flags(th->th_flags);
5094			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5095			    "pkts=%llu:%llu dir=%s,%s\n",
5096			    seq, orig_seq, ack, pd->p_len, ackskew,
5097#ifdef __FreeBSD__
5098			    (unsigned long long)(*state)->packets[0],
5099			    (unsigned long long)(*state)->packets[1],
5100#else
5101			    (*state)->packets[0], (*state)->packets[1],
5102#endif
5103			    direction == PF_IN ? "in" : "out",
5104			    direction == (*state)->direction ? "fwd" : "rev");
5105			printf("pf: State failure on: %c %c %c %c | %c %c\n",
5106			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
5107			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
5108			    ' ': '2',
5109			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
5110			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
5111			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
5112			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
5113		}
5114		REASON_SET(reason, PFRES_BADSTATE);
5115		return (PF_DROP);
5116	}
5117
5118	/* Any packets which have gotten here are to be passed */
5119
5120	/* translate source/destination address, if necessary */
5121	if (STATE_TRANSLATE(*state)) {
5122		if (direction == PF_OUT)
5123			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
5124			    &th->th_sum, &(*state)->gwy.addr,
5125			    (*state)->gwy.port, 0, pd->af);
5126		else
5127			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
5128			    &th->th_sum, &(*state)->lan.addr,
5129			    (*state)->lan.port, 0, pd->af);
5130		m_copyback(m, off, sizeof(*th), (caddr_t)th);
5131	} else if (copyback) {
5132		/* Copyback sequence modulation or stateful scrub changes */
5133		m_copyback(m, off, sizeof(*th), (caddr_t)th);
5134	}
5135
5136	return (PF_PASS);
5137}
5138
5139int
5140pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
5141    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
5142{
5143	struct pf_state_peer	*src, *dst;
5144	struct pf_state_cmp	 key;
5145	struct udphdr		*uh = pd->hdr.udp;
5146
5147	key.af = pd->af;
5148	key.proto = IPPROTO_UDP;
5149	if (direction == PF_IN)	{
5150		PF_ACPY(&key.ext.addr, pd->src, key.af);
5151		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5152		key.ext.port = uh->uh_sport;
5153		key.gwy.port = uh->uh_dport;
5154	} else {
5155		PF_ACPY(&key.lan.addr, pd->src, key.af);
5156		PF_ACPY(&key.ext.addr, pd->dst, key.af);
5157		key.lan.port = uh->uh_sport;
5158		key.ext.port = uh->uh_dport;
5159	}
5160
5161	STATE_LOOKUP();
5162
5163	if (direction == (*state)->direction) {
5164		src = &(*state)->src;
5165		dst = &(*state)->dst;
5166	} else {
5167		src = &(*state)->dst;
5168		dst = &(*state)->src;
5169	}
5170
5171	/* update states */
5172	if (src->state < PFUDPS_SINGLE)
5173		src->state = PFUDPS_SINGLE;
5174	if (dst->state == PFUDPS_SINGLE)
5175		dst->state = PFUDPS_MULTIPLE;
5176
5177	/* update expire time */
5178	(*state)->expire = time_second;
5179	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
5180		(*state)->timeout = PFTM_UDP_MULTIPLE;
5181	else
5182		(*state)->timeout = PFTM_UDP_SINGLE;
5183
5184	/* translate source/destination address, if necessary */
5185	if (STATE_TRANSLATE(*state)) {
5186		if (direction == PF_OUT)
5187			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
5188			    &uh->uh_sum, &(*state)->gwy.addr,
5189			    (*state)->gwy.port, 1, pd->af);
5190		else
5191			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
5192			    &uh->uh_sum, &(*state)->lan.addr,
5193			    (*state)->lan.port, 1, pd->af);
5194		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
5195	}
5196
5197	return (PF_PASS);
5198}
5199
5200int
5201pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
5202    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
5203{
5204	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
5205	u_int16_t	 icmpid = 0;		/* make the compiler happy */
5206	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
5207	u_int8_t	 icmptype = 0;		/* make the compiler happy */
5208	int		 state_icmp = 0;
5209	struct pf_state_cmp key;
5210
5211	switch (pd->proto) {
5212#ifdef INET
5213	case IPPROTO_ICMP:
5214		icmptype = pd->hdr.icmp->icmp_type;
5215		icmpid = pd->hdr.icmp->icmp_id;
5216		icmpsum = &pd->hdr.icmp->icmp_cksum;
5217
5218		if (icmptype == ICMP_UNREACH ||
5219		    icmptype == ICMP_SOURCEQUENCH ||
5220		    icmptype == ICMP_REDIRECT ||
5221		    icmptype == ICMP_TIMXCEED ||
5222		    icmptype == ICMP_PARAMPROB)
5223			state_icmp++;
5224		break;
5225#endif /* INET */
5226#ifdef INET6
5227	case IPPROTO_ICMPV6:
5228		icmptype = pd->hdr.icmp6->icmp6_type;
5229		icmpid = pd->hdr.icmp6->icmp6_id;
5230		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
5231
5232		if (icmptype == ICMP6_DST_UNREACH ||
5233		    icmptype == ICMP6_PACKET_TOO_BIG ||
5234		    icmptype == ICMP6_TIME_EXCEEDED ||
5235		    icmptype == ICMP6_PARAM_PROB)
5236			state_icmp++;
5237		break;
5238#endif /* INET6 */
5239	}
5240
5241	if (!state_icmp) {
5242
5243		/*
5244		 * ICMP query/reply message not related to a TCP/UDP packet.
5245		 * Search for an ICMP state.
5246		 */
5247		key.af = pd->af;
5248		key.proto = pd->proto;
5249		if (direction == PF_IN)	{
5250			PF_ACPY(&key.ext.addr, pd->src, key.af);
5251			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5252			key.ext.port = 0;
5253			key.gwy.port = icmpid;
5254		} else {
5255			PF_ACPY(&key.lan.addr, pd->src, key.af);
5256			PF_ACPY(&key.ext.addr, pd->dst, key.af);
5257			key.lan.port = icmpid;
5258			key.ext.port = 0;
5259		}
5260
5261		STATE_LOOKUP();
5262
5263		(*state)->expire = time_second;
5264		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
5265
5266		/* translate source/destination address, if necessary */
5267		if (STATE_TRANSLATE(*state)) {
5268			if (direction == PF_OUT) {
5269				switch (pd->af) {
5270#ifdef INET
5271				case AF_INET:
5272					pf_change_a(&saddr->v4.s_addr,
5273					    pd->ip_sum,
5274					    (*state)->gwy.addr.v4.s_addr, 0);
5275					pd->hdr.icmp->icmp_cksum =
5276					    pf_cksum_fixup(
5277					    pd->hdr.icmp->icmp_cksum, icmpid,
5278					    (*state)->gwy.port, 0);
5279					pd->hdr.icmp->icmp_id =
5280					    (*state)->gwy.port;
5281					m_copyback(m, off, ICMP_MINLEN,
5282					    (caddr_t)pd->hdr.icmp);
5283					break;
5284#endif /* INET */
5285#ifdef INET6
5286				case AF_INET6:
5287					pf_change_a6(saddr,
5288					    &pd->hdr.icmp6->icmp6_cksum,
5289					    &(*state)->gwy.addr, 0);
5290					m_copyback(m, off,
5291					    sizeof(struct icmp6_hdr),
5292					    (caddr_t)pd->hdr.icmp6);
5293					break;
5294#endif /* INET6 */
5295				}
5296			} else {
5297				switch (pd->af) {
5298#ifdef INET
5299				case AF_INET:
5300					pf_change_a(&daddr->v4.s_addr,
5301					    pd->ip_sum,
5302					    (*state)->lan.addr.v4.s_addr, 0);
5303					pd->hdr.icmp->icmp_cksum =
5304					    pf_cksum_fixup(
5305					    pd->hdr.icmp->icmp_cksum, icmpid,
5306					    (*state)->lan.port, 0);
5307					pd->hdr.icmp->icmp_id =
5308					    (*state)->lan.port;
5309					m_copyback(m, off, ICMP_MINLEN,
5310					    (caddr_t)pd->hdr.icmp);
5311					break;
5312#endif /* INET */
5313#ifdef INET6
5314				case AF_INET6:
5315					pf_change_a6(daddr,
5316					    &pd->hdr.icmp6->icmp6_cksum,
5317					    &(*state)->lan.addr, 0);
5318					m_copyback(m, off,
5319					    sizeof(struct icmp6_hdr),
5320					    (caddr_t)pd->hdr.icmp6);
5321					break;
5322#endif /* INET6 */
5323				}
5324			}
5325		}
5326
5327		return (PF_PASS);
5328
5329	} else {
5330		/*
5331		 * ICMP error message in response to a TCP/UDP packet.
5332		 * Extract the inner TCP/UDP header and search for that state.
5333		 */
5334
5335		struct pf_pdesc	pd2;
5336#ifdef INET
5337		struct ip	h2;
5338#endif /* INET */
5339#ifdef INET6
5340		struct ip6_hdr	h2_6;
5341		int		terminal = 0;
5342#endif /* INET6 */
5343		int		ipoff2 = 0;	/* make the compiler happy */
5344		int		off2 = 0;	/* make the compiler happy */
5345
5346		pd2.af = pd->af;
5347		switch (pd->af) {
5348#ifdef INET
5349		case AF_INET:
5350			/* offset of h2 in mbuf chain */
5351			ipoff2 = off + ICMP_MINLEN;
5352
5353			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
5354			    NULL, reason, pd2.af)) {
5355				DPFPRINTF(PF_DEBUG_MISC,
5356				    ("pf: ICMP error message too short "
5357				    "(ip)\n"));
5358				return (PF_DROP);
5359			}
5360			/*
5361			 * ICMP error messages don't refer to non-first
5362			 * fragments
5363			 */
5364			if (h2.ip_off & htons(IP_OFFMASK)) {
5365				REASON_SET(reason, PFRES_FRAG);
5366				return (PF_DROP);
5367			}
5368
5369			/* offset of protocol header that follows h2 */
5370			off2 = ipoff2 + (h2.ip_hl << 2);
5371
5372			pd2.proto = h2.ip_p;
5373			pd2.src = (struct pf_addr *)&h2.ip_src;
5374			pd2.dst = (struct pf_addr *)&h2.ip_dst;
5375			pd2.ip_sum = &h2.ip_sum;
5376			break;
5377#endif /* INET */
5378#ifdef INET6
5379		case AF_INET6:
5380			ipoff2 = off + sizeof(struct icmp6_hdr);
5381
5382			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
5383			    NULL, reason, pd2.af)) {
5384				DPFPRINTF(PF_DEBUG_MISC,
5385				    ("pf: ICMP error message too short "
5386				    "(ip6)\n"));
5387				return (PF_DROP);
5388			}
5389			pd2.proto = h2_6.ip6_nxt;
5390			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5391			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5392			pd2.ip_sum = NULL;
5393			off2 = ipoff2 + sizeof(h2_6);
5394			do {
5395				switch (pd2.proto) {
5396				case IPPROTO_FRAGMENT:
5397					/*
5398					 * ICMPv6 error messages for
5399					 * non-first fragments
5400					 */
5401					REASON_SET(reason, PFRES_FRAG);
5402					return (PF_DROP);
5403				case IPPROTO_AH:
5404				case IPPROTO_HOPOPTS:
5405				case IPPROTO_ROUTING:
5406				case IPPROTO_DSTOPTS: {
5407					/* get next header and header length */
5408					struct ip6_ext opt6;
5409
5410					if (!pf_pull_hdr(m, off2, &opt6,
5411					    sizeof(opt6), NULL, reason,
5412					    pd2.af)) {
5413						DPFPRINTF(PF_DEBUG_MISC,
5414						    ("pf: ICMPv6 short opt\n"));
5415						return (PF_DROP);
5416					}
5417					if (pd2.proto == IPPROTO_AH)
5418						off2 += (opt6.ip6e_len + 2) * 4;
5419					else
5420						off2 += (opt6.ip6e_len + 1) * 8;
5421					pd2.proto = opt6.ip6e_nxt;
5422					/* goto the next header */
5423					break;
5424				}
5425				default:
5426					terminal++;
5427					break;
5428				}
5429			} while (!terminal);
5430			break;
5431#endif /* INET6 */
5432#ifdef __FreeBSD__
5433		default:
5434			panic("AF not supported: %d", pd->af);
5435#endif
5436		}
5437
5438		switch (pd2.proto) {
5439		case IPPROTO_TCP: {
5440			struct tcphdr		 th;
5441			u_int32_t		 seq;
5442			struct pf_state_peer	*src, *dst;
5443			u_int8_t		 dws;
5444			int			 copyback = 0;
5445
5446			/*
5447			 * Only the first 8 bytes of the TCP header can be
5448			 * expected. Don't access any TCP header fields after
5449			 * th_seq, an ackskew test is not possible.
5450			 */
5451			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
5452			    pd2.af)) {
5453				DPFPRINTF(PF_DEBUG_MISC,
5454				    ("pf: ICMP error message too short "
5455				    "(tcp)\n"));
5456				return (PF_DROP);
5457			}
5458
5459			key.af = pd2.af;
5460			key.proto = IPPROTO_TCP;
5461			if (direction == PF_IN)	{
5462				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5463				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5464				key.ext.port = th.th_dport;
5465				key.gwy.port = th.th_sport;
5466			} else {
5467				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5468				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5469				key.lan.port = th.th_dport;
5470				key.ext.port = th.th_sport;
5471			}
5472
5473			STATE_LOOKUP();
5474
5475			if (direction == (*state)->direction) {
5476				src = &(*state)->dst;
5477				dst = &(*state)->src;
5478			} else {
5479				src = &(*state)->src;
5480				dst = &(*state)->dst;
5481			}
5482
5483			if (src->wscale && dst->wscale)
5484				dws = dst->wscale & PF_WSCALE_MASK;
5485			else
5486				dws = 0;
5487
5488			/* Demodulate sequence number */
5489			seq = ntohl(th.th_seq) - src->seqdiff;
5490			if (src->seqdiff) {
5491				pf_change_a(&th.th_seq, icmpsum,
5492				    htonl(seq), 0);
5493				copyback = 1;
5494			}
5495
5496			if (!SEQ_GEQ(src->seqhi, seq) ||
5497			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
5498				if (pf_status.debug >= PF_DEBUG_MISC) {
5499					printf("pf: BAD ICMP %d:%d ",
5500					    icmptype, pd->hdr.icmp->icmp_code);
5501					pf_print_host(pd->src, 0, pd->af);
5502					printf(" -> ");
5503					pf_print_host(pd->dst, 0, pd->af);
5504					printf(" state: ");
5505					pf_print_state(*state);
5506					printf(" seq=%u\n", seq);
5507				}
5508				REASON_SET(reason, PFRES_BADSTATE);
5509				return (PF_DROP);
5510			}
5511
5512			if (STATE_TRANSLATE(*state)) {
5513				if (direction == PF_IN) {
5514					pf_change_icmp(pd2.src, &th.th_sport,
5515					    daddr, &(*state)->lan.addr,
5516					    (*state)->lan.port, NULL,
5517					    pd2.ip_sum, icmpsum,
5518					    pd->ip_sum, 0, pd2.af);
5519				} else {
5520					pf_change_icmp(pd2.dst, &th.th_dport,
5521					    saddr, &(*state)->gwy.addr,
5522					    (*state)->gwy.port, NULL,
5523					    pd2.ip_sum, icmpsum,
5524					    pd->ip_sum, 0, pd2.af);
5525				}
5526				copyback = 1;
5527			}
5528
5529			if (copyback) {
5530				switch (pd2.af) {
5531#ifdef INET
5532				case AF_INET:
5533					m_copyback(m, off, ICMP_MINLEN,
5534					    (caddr_t)pd->hdr.icmp);
5535					m_copyback(m, ipoff2, sizeof(h2),
5536					    (caddr_t)&h2);
5537					break;
5538#endif /* INET */
5539#ifdef INET6
5540				case AF_INET6:
5541					m_copyback(m, off,
5542					    sizeof(struct icmp6_hdr),
5543					    (caddr_t)pd->hdr.icmp6);
5544					m_copyback(m, ipoff2, sizeof(h2_6),
5545					    (caddr_t)&h2_6);
5546					break;
5547#endif /* INET6 */
5548				}
5549				m_copyback(m, off2, 8, (caddr_t)&th);
5550			}
5551
5552			return (PF_PASS);
5553			break;
5554		}
5555		case IPPROTO_UDP: {
5556			struct udphdr		uh;
5557
5558			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5559			    NULL, reason, pd2.af)) {
5560				DPFPRINTF(PF_DEBUG_MISC,
5561				    ("pf: ICMP error message too short "
5562				    "(udp)\n"));
5563				return (PF_DROP);
5564			}
5565
5566			key.af = pd2.af;
5567			key.proto = IPPROTO_UDP;
5568			if (direction == PF_IN)	{
5569				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5570				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5571				key.ext.port = uh.uh_dport;
5572				key.gwy.port = uh.uh_sport;
5573			} else {
5574				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5575				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5576				key.lan.port = uh.uh_dport;
5577				key.ext.port = uh.uh_sport;
5578			}
5579
5580			STATE_LOOKUP();
5581
5582			if (STATE_TRANSLATE(*state)) {
5583				if (direction == PF_IN) {
5584					pf_change_icmp(pd2.src, &uh.uh_sport,
5585					    daddr, &(*state)->lan.addr,
5586					    (*state)->lan.port, &uh.uh_sum,
5587					    pd2.ip_sum, icmpsum,
5588					    pd->ip_sum, 1, pd2.af);
5589				} else {
5590					pf_change_icmp(pd2.dst, &uh.uh_dport,
5591					    saddr, &(*state)->gwy.addr,
5592					    (*state)->gwy.port, &uh.uh_sum,
5593					    pd2.ip_sum, icmpsum,
5594					    pd->ip_sum, 1, pd2.af);
5595				}
5596				switch (pd2.af) {
5597#ifdef INET
5598				case AF_INET:
5599					m_copyback(m, off, ICMP_MINLEN,
5600					    (caddr_t)pd->hdr.icmp);
5601					m_copyback(m, ipoff2, sizeof(h2),
5602					    (caddr_t)&h2);
5603					break;
5604#endif /* INET */
5605#ifdef INET6
5606				case AF_INET6:
5607					m_copyback(m, off,
5608					    sizeof(struct icmp6_hdr),
5609					    (caddr_t)pd->hdr.icmp6);
5610					m_copyback(m, ipoff2, sizeof(h2_6),
5611					    (caddr_t)&h2_6);
5612					break;
5613#endif /* INET6 */
5614				}
5615				m_copyback(m, off2, sizeof(uh),
5616				    (caddr_t)&uh);
5617			}
5618
5619			return (PF_PASS);
5620			break;
5621		}
5622#ifdef INET
5623		case IPPROTO_ICMP: {
5624			struct icmp		iih;
5625
5626			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5627			    NULL, reason, pd2.af)) {
5628				DPFPRINTF(PF_DEBUG_MISC,
5629				    ("pf: ICMP error message too short i"
5630				    "(icmp)\n"));
5631				return (PF_DROP);
5632			}
5633
5634			key.af = pd2.af;
5635			key.proto = IPPROTO_ICMP;
5636			if (direction == PF_IN)	{
5637				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5638				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5639				key.ext.port = 0;
5640				key.gwy.port = iih.icmp_id;
5641			} else {
5642				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5643				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5644				key.lan.port = iih.icmp_id;
5645				key.ext.port = 0;
5646			}
5647
5648			STATE_LOOKUP();
5649
5650			if (STATE_TRANSLATE(*state)) {
5651				if (direction == PF_IN) {
5652					pf_change_icmp(pd2.src, &iih.icmp_id,
5653					    daddr, &(*state)->lan.addr,
5654					    (*state)->lan.port, NULL,
5655					    pd2.ip_sum, icmpsum,
5656					    pd->ip_sum, 0, AF_INET);
5657				} else {
5658					pf_change_icmp(pd2.dst, &iih.icmp_id,
5659					    saddr, &(*state)->gwy.addr,
5660					    (*state)->gwy.port, NULL,
5661					    pd2.ip_sum, icmpsum,
5662					    pd->ip_sum, 0, AF_INET);
5663				}
5664				m_copyback(m, off, ICMP_MINLEN,
5665				    (caddr_t)pd->hdr.icmp);
5666				m_copyback(m, ipoff2, sizeof(h2),
5667				    (caddr_t)&h2);
5668				m_copyback(m, off2, ICMP_MINLEN,
5669				    (caddr_t)&iih);
5670			}
5671
5672			return (PF_PASS);
5673			break;
5674		}
5675#endif /* INET */
5676#ifdef INET6
5677		case IPPROTO_ICMPV6: {
5678			struct icmp6_hdr	iih;
5679
5680			if (!pf_pull_hdr(m, off2, &iih,
5681			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5682				DPFPRINTF(PF_DEBUG_MISC,
5683				    ("pf: ICMP error message too short "
5684				    "(icmp6)\n"));
5685				return (PF_DROP);
5686			}
5687
5688			key.af = pd2.af;
5689			key.proto = IPPROTO_ICMPV6;
5690			if (direction == PF_IN)	{
5691				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5692				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5693				key.ext.port = 0;
5694				key.gwy.port = iih.icmp6_id;
5695			} else {
5696				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5697				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5698				key.lan.port = iih.icmp6_id;
5699				key.ext.port = 0;
5700			}
5701
5702			STATE_LOOKUP();
5703
5704			if (STATE_TRANSLATE(*state)) {
5705				if (direction == PF_IN) {
5706					pf_change_icmp(pd2.src, &iih.icmp6_id,
5707					    daddr, &(*state)->lan.addr,
5708					    (*state)->lan.port, NULL,
5709					    pd2.ip_sum, icmpsum,
5710					    pd->ip_sum, 0, AF_INET6);
5711				} else {
5712					pf_change_icmp(pd2.dst, &iih.icmp6_id,
5713					    saddr, &(*state)->gwy.addr,
5714					    (*state)->gwy.port, NULL,
5715					    pd2.ip_sum, icmpsum,
5716					    pd->ip_sum, 0, AF_INET6);
5717				}
5718				m_copyback(m, off, sizeof(struct icmp6_hdr),
5719				    (caddr_t)pd->hdr.icmp6);
5720				m_copyback(m, ipoff2, sizeof(h2_6),
5721				    (caddr_t)&h2_6);
5722				m_copyback(m, off2, sizeof(struct icmp6_hdr),
5723				    (caddr_t)&iih);
5724			}
5725
5726			return (PF_PASS);
5727			break;
5728		}
5729#endif /* INET6 */
5730		default: {
5731			key.af = pd2.af;
5732			key.proto = pd2.proto;
5733			if (direction == PF_IN)	{
5734				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5735				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5736				key.ext.port = 0;
5737				key.gwy.port = 0;
5738			} else {
5739				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5740				PF_ACPY(&key.ext.addr, pd2.src, key.af);
5741				key.lan.port = 0;
5742				key.ext.port = 0;
5743			}
5744
5745			STATE_LOOKUP();
5746
5747			if (STATE_TRANSLATE(*state)) {
5748				if (direction == PF_IN) {
5749					pf_change_icmp(pd2.src, NULL,
5750					    daddr, &(*state)->lan.addr,
5751					    0, NULL,
5752					    pd2.ip_sum, icmpsum,
5753					    pd->ip_sum, 0, pd2.af);
5754				} else {
5755					pf_change_icmp(pd2.dst, NULL,
5756					    saddr, &(*state)->gwy.addr,
5757					    0, NULL,
5758					    pd2.ip_sum, icmpsum,
5759					    pd->ip_sum, 0, pd2.af);
5760				}
5761				switch (pd2.af) {
5762#ifdef INET
5763				case AF_INET:
5764					m_copyback(m, off, ICMP_MINLEN,
5765					    (caddr_t)pd->hdr.icmp);
5766					m_copyback(m, ipoff2, sizeof(h2),
5767					    (caddr_t)&h2);
5768					break;
5769#endif /* INET */
5770#ifdef INET6
5771				case AF_INET6:
5772					m_copyback(m, off,
5773					    sizeof(struct icmp6_hdr),
5774					    (caddr_t)pd->hdr.icmp6);
5775					m_copyback(m, ipoff2, sizeof(h2_6),
5776					    (caddr_t)&h2_6);
5777					break;
5778#endif /* INET6 */
5779				}
5780			}
5781
5782			return (PF_PASS);
5783			break;
5784		}
5785		}
5786	}
5787}
5788
5789int
5790pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5791    struct pf_pdesc *pd)
5792{
5793	struct pf_state_peer	*src, *dst;
5794	struct pf_state_cmp	 key;
5795
5796	key.af = pd->af;
5797	key.proto = pd->proto;
5798	if (direction == PF_IN)	{
5799		PF_ACPY(&key.ext.addr, pd->src, key.af);
5800		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5801		key.ext.port = 0;
5802		key.gwy.port = 0;
5803	} else {
5804		PF_ACPY(&key.lan.addr, pd->src, key.af);
5805		PF_ACPY(&key.ext.addr, pd->dst, key.af);
5806		key.lan.port = 0;
5807		key.ext.port = 0;
5808	}
5809
5810	STATE_LOOKUP();
5811
5812	if (direction == (*state)->direction) {
5813		src = &(*state)->src;
5814		dst = &(*state)->dst;
5815	} else {
5816		src = &(*state)->dst;
5817		dst = &(*state)->src;
5818	}
5819
5820	/* update states */
5821	if (src->state < PFOTHERS_SINGLE)
5822		src->state = PFOTHERS_SINGLE;
5823	if (dst->state == PFOTHERS_SINGLE)
5824		dst->state = PFOTHERS_MULTIPLE;
5825
5826	/* update expire time */
5827	(*state)->expire = time_second;
5828	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5829		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5830	else
5831		(*state)->timeout = PFTM_OTHER_SINGLE;
5832
5833	/* translate source/destination address, if necessary */
5834	if (STATE_TRANSLATE(*state)) {
5835		if (direction == PF_OUT)
5836			switch (pd->af) {
5837#ifdef INET
5838			case AF_INET:
5839				pf_change_a(&pd->src->v4.s_addr,
5840				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5841				    0);
5842				break;
5843#endif /* INET */
5844#ifdef INET6
5845			case AF_INET6:
5846				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5847				break;
5848#endif /* INET6 */
5849			}
5850		else
5851			switch (pd->af) {
5852#ifdef INET
5853			case AF_INET:
5854				pf_change_a(&pd->dst->v4.s_addr,
5855				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5856				    0);
5857				break;
5858#endif /* INET */
5859#ifdef INET6
5860			case AF_INET6:
5861				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5862				break;
5863#endif /* INET6 */
5864			}
5865	}
5866
5867	return (PF_PASS);
5868}
5869
5870/*
5871 * ipoff and off are measured from the start of the mbuf chain.
5872 * h must be at "ipoff" on the mbuf chain.
5873 */
5874void *
5875pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5876    u_short *actionp, u_short *reasonp, sa_family_t af)
5877{
5878	switch (af) {
5879#ifdef INET
5880	case AF_INET: {
5881		struct ip	*h = mtod(m, struct ip *);
5882		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5883
5884		if (fragoff) {
5885			if (fragoff >= len)
5886				ACTION_SET(actionp, PF_PASS);
5887			else {
5888				ACTION_SET(actionp, PF_DROP);
5889				REASON_SET(reasonp, PFRES_FRAG);
5890			}
5891			return (NULL);
5892		}
5893		if (m->m_pkthdr.len < off + len ||
5894		    ntohs(h->ip_len) < off + len) {
5895			ACTION_SET(actionp, PF_DROP);
5896			REASON_SET(reasonp, PFRES_SHORT);
5897			return (NULL);
5898		}
5899		break;
5900	}
5901#endif /* INET */
5902#ifdef INET6
5903	case AF_INET6: {
5904		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5905
5906		if (m->m_pkthdr.len < off + len ||
5907		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5908		    (unsigned)(off + len)) {
5909			ACTION_SET(actionp, PF_DROP);
5910			REASON_SET(reasonp, PFRES_SHORT);
5911			return (NULL);
5912		}
5913		break;
5914	}
5915#endif /* INET6 */
5916	}
5917	m_copydata(m, off, len, p);
5918	return (p);
5919}
5920
5921int
5922pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5923{
5924	struct sockaddr_in	*dst;
5925	int			 ret = 1;
5926	int			 check_mpath;
5927#ifndef __FreeBSD__
5928	extern int		 ipmultipath;
5929#endif
5930#ifdef INET6
5931#ifndef __FreeBSD__
5932	extern int		 ip6_multipath;
5933#endif
5934	struct sockaddr_in6	*dst6;
5935	struct route_in6	 ro;
5936#else
5937	struct route		 ro;
5938#endif
5939	struct radix_node	*rn;
5940	struct rtentry		*rt;
5941	struct ifnet		*ifp;
5942
5943	check_mpath = 0;
5944	bzero(&ro, sizeof(ro));
5945	switch (af) {
5946	case AF_INET:
5947		dst = satosin(&ro.ro_dst);
5948		dst->sin_family = AF_INET;
5949		dst->sin_len = sizeof(*dst);
5950		dst->sin_addr = addr->v4;
5951#ifndef __FreeBSD__	/* MULTIPATH_ROUTING */
5952		if (ipmultipath)
5953			check_mpath = 1;
5954#endif
5955		break;
5956#ifdef INET6
5957	case AF_INET6:
5958		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5959		dst6->sin6_family = AF_INET6;
5960		dst6->sin6_len = sizeof(*dst6);
5961		dst6->sin6_addr = addr->v6;
5962#ifndef __FreeBSD__	/* MULTIPATH_ROUTING */
5963		if (ip6_multipath)
5964			check_mpath = 1;
5965#endif
5966		break;
5967#endif /* INET6 */
5968	default:
5969		return (0);
5970	}
5971
5972	/* Skip checks for ipsec interfaces */
5973	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5974		goto out;
5975
5976#ifdef __FreeBSD__
5977/* XXX MRT not always INET */ /* stick with table 0 though */
5978	if (af == AF_INET)
5979		in_rtalloc_ign((struct route *)&ro, 0, 0);
5980	else
5981		rtalloc_ign((struct route *)&ro, 0);
5982#else /* ! __FreeBSD__ */
5983	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5984#endif
5985
5986	if (ro.ro_rt != NULL) {
5987		/* No interface given, this is a no-route check */
5988		if (kif == NULL)
5989			goto out;
5990
5991		if (kif->pfik_ifp == NULL) {
5992			ret = 0;
5993			goto out;
5994		}
5995
5996		/* Perform uRPF check if passed input interface */
5997		ret = 0;
5998		rn = (struct radix_node *)ro.ro_rt;
5999		do {
6000			rt = (struct rtentry *)rn;
6001#ifndef __FreeBSD__ /* CARPDEV */
6002			if (rt->rt_ifp->if_type == IFT_CARP)
6003				ifp = rt->rt_ifp->if_carpdev;
6004			else
6005#endif
6006				ifp = rt->rt_ifp;
6007
6008			if (kif->pfik_ifp == ifp)
6009				ret = 1;
6010#ifdef __FreeBSD__ /* MULTIPATH_ROUTING */
6011			rn = NULL;
6012#else
6013			rn = rn_mpath_next(rn);
6014#endif
6015		} while (check_mpath == 1 && rn != NULL && ret == 0);
6016	} else
6017		ret = 0;
6018out:
6019	if (ro.ro_rt != NULL)
6020		RTFREE(ro.ro_rt);
6021	return (ret);
6022}
6023
6024int
6025pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
6026{
6027	struct sockaddr_in	*dst;
6028#ifdef INET6
6029	struct sockaddr_in6	*dst6;
6030	struct route_in6	 ro;
6031#else
6032	struct route		 ro;
6033#endif
6034	int			 ret = 0;
6035
6036	bzero(&ro, sizeof(ro));
6037	switch (af) {
6038	case AF_INET:
6039		dst = satosin(&ro.ro_dst);
6040		dst->sin_family = AF_INET;
6041		dst->sin_len = sizeof(*dst);
6042		dst->sin_addr = addr->v4;
6043		break;
6044#ifdef INET6
6045	case AF_INET6:
6046		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
6047		dst6->sin6_family = AF_INET6;
6048		dst6->sin6_len = sizeof(*dst6);
6049		dst6->sin6_addr = addr->v6;
6050		break;
6051#endif /* INET6 */
6052	default:
6053		return (0);
6054	}
6055
6056#ifdef __FreeBSD__
6057# ifdef RTF_PRCLONING
6058	rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
6059# else /* !RTF_PRCLONING */
6060	if (af == AF_INET)
6061		in_rtalloc_ign((struct route *)&ro, 0, 0);
6062	else
6063		rtalloc_ign((struct route *)&ro, 0);
6064# endif
6065#else /* ! __FreeBSD__ */
6066	rtalloc_noclone((struct route *)&ro, NO_CLONING);
6067#endif
6068
6069	if (ro.ro_rt != NULL) {
6070#ifdef __FreeBSD__
6071		/* XXX_IMPORT: later */
6072#else
6073		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
6074			ret = 1;
6075#endif
6076		RTFREE(ro.ro_rt);
6077	}
6078
6079	return (ret);
6080}
6081
6082#ifdef INET
6083
6084void
6085pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
6086    struct pf_state *s, struct pf_pdesc *pd)
6087{
6088	struct mbuf		*m0, *m1;
6089	struct route		 iproute;
6090	struct route		*ro = NULL;
6091	struct sockaddr_in	*dst;
6092	struct ip		*ip;
6093	struct ifnet		*ifp = NULL;
6094	struct pf_addr		 naddr;
6095	struct pf_src_node	*sn = NULL;
6096	int			 error = 0;
6097#ifdef __FreeBSD__
6098	int sw_csum;
6099#endif
6100#ifdef IPSEC
6101	struct m_tag		*mtag;
6102#endif /* IPSEC */
6103
6104	if (m == NULL || *m == NULL || r == NULL ||
6105	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
6106		panic("pf_route: invalid parameters");
6107
6108	if (pd->pf_mtag->routed++ > 3) {
6109		m0 = *m;
6110		*m = NULL;
6111		goto bad;
6112	}
6113
6114	if (r->rt == PF_DUPTO) {
6115#ifdef __FreeBSD__
6116		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
6117#else
6118		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
6119#endif
6120			return;
6121	} else {
6122		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
6123			return;
6124		m0 = *m;
6125	}
6126
6127	if (m0->m_len < sizeof(struct ip)) {
6128		DPFPRINTF(PF_DEBUG_URGENT,
6129		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
6130		goto bad;
6131	}
6132
6133	ip = mtod(m0, struct ip *);
6134
6135	ro = &iproute;
6136	bzero((caddr_t)ro, sizeof(*ro));
6137	dst = satosin(&ro->ro_dst);
6138	dst->sin_family = AF_INET;
6139	dst->sin_len = sizeof(*dst);
6140	dst->sin_addr = ip->ip_dst;
6141
6142	if (r->rt == PF_FASTROUTE) {
6143		in_rtalloc(ro, 0);
6144		if (ro->ro_rt == 0) {
6145			IPSTAT_INC(ips_noroute);
6146			goto bad;
6147		}
6148
6149		ifp = ro->ro_rt->rt_ifp;
6150		ro->ro_rt->rt_use++;
6151
6152		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
6153			dst = satosin(ro->ro_rt->rt_gateway);
6154	} else {
6155		if (TAILQ_EMPTY(&r->rpool.list)) {
6156			DPFPRINTF(PF_DEBUG_URGENT,
6157			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
6158			goto bad;
6159		}
6160		if (s == NULL) {
6161			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
6162			    &naddr, NULL, &sn);
6163			if (!PF_AZERO(&naddr, AF_INET))
6164				dst->sin_addr.s_addr = naddr.v4.s_addr;
6165			ifp = r->rpool.cur->kif ?
6166			    r->rpool.cur->kif->pfik_ifp : NULL;
6167		} else {
6168			if (!PF_AZERO(&s->rt_addr, AF_INET))
6169				dst->sin_addr.s_addr =
6170				    s->rt_addr.v4.s_addr;
6171			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6172		}
6173	}
6174	if (ifp == NULL)
6175		goto bad;
6176
6177	if (oifp != ifp) {
6178#ifdef __FreeBSD__
6179		PF_UNLOCK();
6180		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6181			PF_LOCK();
6182			goto bad;
6183		} else if (m0 == NULL) {
6184			PF_LOCK();
6185			goto done;
6186		}
6187		PF_LOCK();
6188#else
6189		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
6190			goto bad;
6191		else if (m0 == NULL)
6192			goto done;
6193#endif
6194		if (m0->m_len < sizeof(struct ip)) {
6195			DPFPRINTF(PF_DEBUG_URGENT,
6196			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
6197			goto bad;
6198		}
6199		ip = mtod(m0, struct ip *);
6200	}
6201
6202#ifdef __FreeBSD__
6203	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
6204	m0->m_pkthdr.csum_flags |= CSUM_IP;
6205	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
6206	if (sw_csum & CSUM_DELAY_DATA) {
6207		/*
6208		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
6209		 */
6210		NTOHS(ip->ip_len);
6211		NTOHS(ip->ip_off);	 /* XXX: needed? */
6212		in_delayed_cksum(m0);
6213		HTONS(ip->ip_len);
6214		HTONS(ip->ip_off);
6215		sw_csum &= ~CSUM_DELAY_DATA;
6216	}
6217	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
6218
6219	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
6220	    (ifp->if_hwassist & CSUM_FRAGMENT &&
6221		((ip->ip_off & htons(IP_DF)) == 0))) {
6222		/*
6223		 * ip->ip_len = htons(ip->ip_len);
6224		 * ip->ip_off = htons(ip->ip_off);
6225		 */
6226		ip->ip_sum = 0;
6227		if (sw_csum & CSUM_DELAY_IP) {
6228			/* From KAME */
6229			if (ip->ip_v == IPVERSION &&
6230			    (ip->ip_hl << 2) == sizeof(*ip)) {
6231				ip->ip_sum = in_cksum_hdr(ip);
6232			} else {
6233				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6234			}
6235		}
6236		PF_UNLOCK();
6237		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro);
6238		PF_LOCK();
6239		goto done;
6240	}
6241
6242#else
6243	/* Copied from ip_output. */
6244#ifdef IPSEC
6245	/*
6246	 * If deferred crypto processing is needed, check that the
6247	 * interface supports it.
6248	 */
6249	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
6250	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
6251		/* Notify IPsec to do its own crypto. */
6252		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
6253		goto bad;
6254	}
6255#endif /* IPSEC */
6256
6257	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
6258	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
6259		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
6260		    ifp->if_bridge != NULL) {
6261			in_delayed_cksum(m0);
6262			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */
6263		}
6264	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
6265		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
6266		    ifp->if_bridge != NULL) {
6267			in_delayed_cksum(m0);
6268			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */
6269		}
6270	}
6271
6272	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
6273		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
6274		    ifp->if_bridge == NULL) {
6275			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
6276			IPSTAT_INC(ips_outhwcsum);
6277		} else {
6278			ip->ip_sum = 0;
6279			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6280		}
6281		/* Update relevant hardware checksum stats for TCP/UDP */
6282		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
6283			TCPSTAT_INC(tcpstat.tcps_outhwcsum);
6284		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
6285			UDPSTAT_INC(udps_outhwcsum);
6286		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
6287		goto done;
6288	}
6289#endif
6290	/*
6291	 * Too large for interface; fragment if possible.
6292	 * Must be able to put at least 8 bytes per fragment.
6293	 */
6294	if (ip->ip_off & htons(IP_DF)) {
6295		IPSTAT_INC(ips_cantfrag);
6296		if (r->rt != PF_DUPTO) {
6297#ifdef __FreeBSD__
6298			/* icmp_error() expects host byte ordering */
6299			NTOHS(ip->ip_len);
6300			NTOHS(ip->ip_off);
6301			PF_UNLOCK();
6302			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6303			    ifp->if_mtu);
6304			PF_LOCK();
6305#else
6306			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6307			    ifp->if_mtu);
6308#endif
6309			goto done;
6310		} else
6311			goto bad;
6312	}
6313
6314	m1 = m0;
6315#ifdef __FreeBSD__
6316	/*
6317	 * XXX: is cheaper + less error prone than own function
6318	 */
6319	NTOHS(ip->ip_len);
6320	NTOHS(ip->ip_off);
6321	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
6322#else
6323	error = ip_fragment(m0, ifp, ifp->if_mtu);
6324#endif
6325	if (error) {
6326#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
6327		m0 = NULL;
6328#endif
6329		goto bad;
6330	}
6331
6332	for (m0 = m1; m0; m0 = m1) {
6333		m1 = m0->m_nextpkt;
6334		m0->m_nextpkt = 0;
6335#ifdef __FreeBSD__
6336		if (error == 0) {
6337			PF_UNLOCK();
6338			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6339			    NULL);
6340			PF_LOCK();
6341		} else
6342#else
6343		if (error == 0)
6344			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6345			    NULL);
6346		else
6347#endif
6348			m_freem(m0);
6349	}
6350
6351	if (error == 0)
6352		IPSTAT_INC(ips_fragmented);
6353
6354done:
6355	if (r->rt != PF_DUPTO)
6356		*m = NULL;
6357	if (ro == &iproute && ro->ro_rt)
6358		RTFREE(ro->ro_rt);
6359	return;
6360
6361bad:
6362	m_freem(m0);
6363	goto done;
6364}
6365#endif /* INET */
6366
6367#ifdef INET6
6368void
6369pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
6370    struct pf_state *s, struct pf_pdesc *pd)
6371{
6372	struct mbuf		*m0;
6373	struct route_in6	 ip6route;
6374	struct route_in6	*ro;
6375	struct sockaddr_in6	*dst;
6376	struct ip6_hdr		*ip6;
6377	struct ifnet		*ifp = NULL;
6378	struct pf_addr		 naddr;
6379	struct pf_src_node	*sn = NULL;
6380	int			 error = 0;
6381
6382	if (m == NULL || *m == NULL || r == NULL ||
6383	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
6384		panic("pf_route6: invalid parameters");
6385
6386	if (pd->pf_mtag->routed++ > 3) {
6387		m0 = *m;
6388		*m = NULL;
6389		goto bad;
6390	}
6391
6392	if (r->rt == PF_DUPTO) {
6393#ifdef __FreeBSD__
6394		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
6395#else
6396		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
6397#endif
6398			return;
6399	} else {
6400		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
6401			return;
6402		m0 = *m;
6403	}
6404
6405	if (m0->m_len < sizeof(struct ip6_hdr)) {
6406		DPFPRINTF(PF_DEBUG_URGENT,
6407		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6408		goto bad;
6409	}
6410	ip6 = mtod(m0, struct ip6_hdr *);
6411
6412	ro = &ip6route;
6413	bzero((caddr_t)ro, sizeof(*ro));
6414	dst = (struct sockaddr_in6 *)&ro->ro_dst;
6415	dst->sin6_family = AF_INET6;
6416	dst->sin6_len = sizeof(*dst);
6417	dst->sin6_addr = ip6->ip6_dst;
6418
6419	/* Cheat. XXX why only in the v6 case??? */
6420	if (r->rt == PF_FASTROUTE) {
6421#ifdef __FreeBSD__
6422		m0->m_flags |= M_SKIP_FIREWALL;
6423		PF_UNLOCK();
6424		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
6425		PF_LOCK();
6426#else
6427		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
6428		if (mtag == NULL)
6429			goto bad;
6430		m_tag_prepend(m0, mtag);
6431		pd->pf_mtag->flags |= PF_TAG_GENERATED;
6432		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
6433#endif
6434		return;
6435	}
6436
6437	if (TAILQ_EMPTY(&r->rpool.list)) {
6438		DPFPRINTF(PF_DEBUG_URGENT,
6439		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
6440		goto bad;
6441	}
6442	if (s == NULL) {
6443		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
6444		    &naddr, NULL, &sn);
6445		if (!PF_AZERO(&naddr, AF_INET6))
6446			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6447			    &naddr, AF_INET6);
6448		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
6449	} else {
6450		if (!PF_AZERO(&s->rt_addr, AF_INET6))
6451			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6452			    &s->rt_addr, AF_INET6);
6453		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6454	}
6455	if (ifp == NULL)
6456		goto bad;
6457
6458	if (oifp != ifp) {
6459#ifdef __FreeBSD__
6460		PF_UNLOCK();
6461		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6462			PF_LOCK();
6463			goto bad;
6464		} else if (m0 == NULL) {
6465			PF_LOCK();
6466			goto done;
6467		}
6468		PF_LOCK();
6469#else
6470		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
6471			goto bad;
6472		else if (m0 == NULL)
6473			goto done;
6474#endif
6475		if (m0->m_len < sizeof(struct ip6_hdr)) {
6476			DPFPRINTF(PF_DEBUG_URGENT,
6477			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6478			goto bad;
6479		}
6480		ip6 = mtod(m0, struct ip6_hdr *);
6481	}
6482
6483	/*
6484	 * If the packet is too large for the outgoing interface,
6485	 * send back an icmp6 error.
6486	 */
6487	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
6488		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
6489	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
6490#ifdef __FreeBSD__
6491		PF_UNLOCK();
6492#endif
6493		error = nd6_output(ifp, ifp, m0, dst, NULL);
6494#ifdef __FreeBSD__
6495		PF_LOCK();
6496#endif
6497	} else {
6498		in6_ifstat_inc(ifp, ifs6_in_toobig);
6499#ifdef __FreeBSD__
6500		if (r->rt != PF_DUPTO) {
6501			PF_UNLOCK();
6502			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6503			PF_LOCK();
6504		 } else
6505#else
6506		if (r->rt != PF_DUPTO)
6507			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6508		else
6509#endif
6510			goto bad;
6511	}
6512
6513done:
6514	if (r->rt != PF_DUPTO)
6515		*m = NULL;
6516	return;
6517
6518bad:
6519	m_freem(m0);
6520	goto done;
6521}
6522#endif /* INET6 */
6523
6524
6525#ifdef __FreeBSD__
6526/*
6527 * FreeBSD supports cksum offloads for the following drivers.
6528 *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
6529 *   ti(4), txp(4), xl(4)
6530 *
6531 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
6532 *  network driver performed cksum including pseudo header, need to verify
6533 *   csum_data
6534 * CSUM_DATA_VALID :
6535 *  network driver performed cksum, needs to additional pseudo header
6536 *  cksum computation with partial csum_data(i.e. lack of H/W support for
6537 *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
6538 *
6539 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
6540 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
6541 * TCP/UDP layer.
6542 * Also, set csum_data to 0xffff to force cksum validation.
6543 */
6544int
6545pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
6546{
6547	u_int16_t sum = 0;
6548	int hw_assist = 0;
6549	struct ip *ip;
6550
6551	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6552		return (1);
6553	if (m->m_pkthdr.len < off + len)
6554		return (1);
6555
6556	switch (p) {
6557	case IPPROTO_TCP:
6558		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6559			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6560				sum = m->m_pkthdr.csum_data;
6561			} else {
6562				ip = mtod(m, struct ip *);
6563				sum = in_pseudo(ip->ip_src.s_addr,
6564					ip->ip_dst.s_addr, htonl((u_short)len +
6565					m->m_pkthdr.csum_data + IPPROTO_TCP));
6566			}
6567			sum ^= 0xffff;
6568			++hw_assist;
6569		}
6570		break;
6571	case IPPROTO_UDP:
6572		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6573			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6574				sum = m->m_pkthdr.csum_data;
6575			} else {
6576				ip = mtod(m, struct ip *);
6577				sum = in_pseudo(ip->ip_src.s_addr,
6578					ip->ip_dst.s_addr, htonl((u_short)len +
6579					m->m_pkthdr.csum_data + IPPROTO_UDP));
6580			}
6581			sum ^= 0xffff;
6582			++hw_assist;
6583                }
6584		break;
6585	case IPPROTO_ICMP:
6586#ifdef INET6
6587	case IPPROTO_ICMPV6:
6588#endif /* INET6 */
6589		break;
6590	default:
6591		return (1);
6592	}
6593
6594	if (!hw_assist) {
6595		switch (af) {
6596		case AF_INET:
6597			if (p == IPPROTO_ICMP) {
6598				if (m->m_len < off)
6599					return (1);
6600				m->m_data += off;
6601				m->m_len -= off;
6602				sum = in_cksum(m, len);
6603				m->m_data -= off;
6604				m->m_len += off;
6605			} else {
6606				if (m->m_len < sizeof(struct ip))
6607					return (1);
6608				sum = in4_cksum(m, p, off, len);
6609			}
6610			break;
6611#ifdef INET6
6612		case AF_INET6:
6613			if (m->m_len < sizeof(struct ip6_hdr))
6614				return (1);
6615			sum = in6_cksum(m, p, off, len);
6616			break;
6617#endif /* INET6 */
6618		default:
6619			return (1);
6620		}
6621	}
6622	if (sum) {
6623		switch (p) {
6624		case IPPROTO_TCP:
6625		    {
6626			TCPSTAT_INC(tcps_rcvbadsum);
6627			break;
6628		    }
6629		case IPPROTO_UDP:
6630		    {
6631			UDPSTAT_INC(udps_badsum);
6632			break;
6633		    }
6634		case IPPROTO_ICMP:
6635		    {
6636			ICMPSTAT_INC(icps_checksum);
6637			break;
6638		    }
6639#ifdef INET6
6640		case IPPROTO_ICMPV6:
6641		    {
6642			ICMP6STAT_INC(icp6s_checksum);
6643			break;
6644		    }
6645#endif /* INET6 */
6646		}
6647		return (1);
6648	} else {
6649		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
6650			m->m_pkthdr.csum_flags |=
6651			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
6652			m->m_pkthdr.csum_data = 0xffff;
6653		}
6654	}
6655	return (0);
6656}
6657#else /* !__FreeBSD__ */
6658/*
6659 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
6660 *   off is the offset where the protocol header starts
6661 *   len is the total length of protocol header plus payload
6662 * returns 0 when the checksum is valid, otherwise returns 1.
6663 */
6664int
6665pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
6666    sa_family_t af)
6667{
6668	u_int16_t flag_ok, flag_bad;
6669	u_int16_t sum;
6670
6671	switch (p) {
6672	case IPPROTO_TCP:
6673		flag_ok = M_TCP_CSUM_IN_OK;
6674		flag_bad = M_TCP_CSUM_IN_BAD;
6675		break;
6676	case IPPROTO_UDP:
6677		flag_ok = M_UDP_CSUM_IN_OK;
6678		flag_bad = M_UDP_CSUM_IN_BAD;
6679		break;
6680	case IPPROTO_ICMP:
6681#ifdef INET6
6682	case IPPROTO_ICMPV6:
6683#endif /* INET6 */
6684		flag_ok = flag_bad = 0;
6685		break;
6686	default:
6687		return (1);
6688	}
6689	if (m->m_pkthdr.csum_flags & flag_ok)
6690		return (0);
6691	if (m->m_pkthdr.csum_flags & flag_bad)
6692		return (1);
6693	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6694		return (1);
6695	if (m->m_pkthdr.len < off + len)
6696		return (1);
6697	switch (af) {
6698#ifdef INET
6699	case AF_INET:
6700		if (p == IPPROTO_ICMP) {
6701			if (m->m_len < off)
6702				return (1);
6703			m->m_data += off;
6704			m->m_len -= off;
6705			sum = in_cksum(m, len);
6706			m->m_data -= off;
6707			m->m_len += off;
6708		} else {
6709			if (m->m_len < sizeof(struct ip))
6710				return (1);
6711			sum = in4_cksum(m, p, off, len);
6712		}
6713		break;
6714#endif /* INET */
6715#ifdef INET6
6716	case AF_INET6:
6717		if (m->m_len < sizeof(struct ip6_hdr))
6718			return (1);
6719		sum = in6_cksum(m, p, off, len);
6720		break;
6721#endif /* INET6 */
6722	default:
6723		return (1);
6724	}
6725	if (sum) {
6726		m->m_pkthdr.csum_flags |= flag_bad;
6727		switch (p) {
6728		case IPPROTO_TCP:
6729			TCPSTAT_INC(tcps_rcvbadsum);
6730			break;
6731		case IPPROTO_UDP:
6732			UDPSTAT_INC(udps_badsum);
6733			break;
6734		case IPPROTO_ICMP:
6735			ICMPSTAT_INC(icps_checksum);
6736			break;
6737#ifdef INET6
6738		case IPPROTO_ICMPV6:
6739			ICMP6STAT_INC(icp6s_checksum);
6740			break;
6741#endif /* INET6 */
6742		}
6743		return (1);
6744	}
6745	m->m_pkthdr.csum_flags |= flag_ok;
6746	return (0);
6747}
6748#endif /* __FreeBSD__ */
6749
6750#ifdef INET
6751int
6752#ifdef __FreeBSD__
6753pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6754    struct ether_header *eh, struct inpcb *inp)
6755#else
6756pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6757    struct ether_header *eh)
6758#endif
6759{
6760	struct pfi_kif		*kif;
6761	u_short			 action, reason = 0, log = 0;
6762	struct mbuf		*m = *m0;
6763	struct ip		*h = NULL;	/* make the compiler happy */
6764	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6765	struct pf_state		*s = NULL;
6766	struct pf_ruleset	*ruleset = NULL;
6767	struct pf_pdesc		 pd;
6768	int			 off, dirndx, pqid = 0;
6769
6770#ifdef __FreeBSD__
6771	PF_LOCK();
6772#endif
6773	if (!pf_status.running)
6774#ifdef __FreeBSD__
6775	{
6776		PF_UNLOCK();
6777#endif
6778		return (PF_PASS);
6779#ifdef __FreeBSD__
6780	}
6781#endif
6782
6783	memset(&pd, 0, sizeof(pd));
6784	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
6785#ifdef __FreeBSD__
6786		PF_UNLOCK();
6787#endif
6788		DPFPRINTF(PF_DEBUG_URGENT,
6789		    ("pf_test: pf_get_mtag returned NULL\n"));
6790		return (PF_DROP);
6791	}
6792#ifdef __FreeBSD__
6793	if (m->m_flags & M_SKIP_FIREWALL) {
6794		PF_UNLOCK();
6795		return (PF_PASS);
6796	}
6797#else
6798	if (pd.pf_mtag->flags & PF_TAG_GENERATED)
6799		return (PF_PASS);
6800#endif
6801
6802#ifdef __FreeBSD__
6803	/* XXX_IMPORT: later */
6804#else
6805	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6806		ifp = ifp->if_carpdev;
6807#endif
6808
6809	kif = (struct pfi_kif *)ifp->if_pf_kif;
6810	if (kif == NULL) {
6811#ifdef __FreeBSD__
6812		PF_UNLOCK();
6813#endif
6814		DPFPRINTF(PF_DEBUG_URGENT,
6815		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6816		return (PF_DROP);
6817	}
6818	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
6819#ifdef __FreeBSD__
6820		PF_UNLOCK();
6821#endif
6822		return (PF_PASS);
6823	}
6824
6825#ifdef __FreeBSD__
6826	M_ASSERTPKTHDR(m);
6827#else
6828#ifdef DIAGNOSTIC
6829	if ((m->m_flags & M_PKTHDR) == 0)
6830		panic("non-M_PKTHDR is passed to pf_test");
6831#endif /* DIAGNOSTIC */
6832#endif /* __FreeBSD__ */
6833
6834	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6835		action = PF_DROP;
6836		REASON_SET(&reason, PFRES_SHORT);
6837		log = 1;
6838		goto done;
6839	}
6840
6841	/* We do IP header normalization and packet reassembly here */
6842	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6843		action = PF_DROP;
6844		goto done;
6845	}
6846	m = *m0;
6847	h = mtod(m, struct ip *);
6848
6849	off = h->ip_hl << 2;
6850	if (off < (int)sizeof(*h)) {
6851		action = PF_DROP;
6852		REASON_SET(&reason, PFRES_SHORT);
6853		log = 1;
6854		goto done;
6855	}
6856
6857	pd.src = (struct pf_addr *)&h->ip_src;
6858	pd.dst = (struct pf_addr *)&h->ip_dst;
6859	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
6860	pd.ip_sum = &h->ip_sum;
6861	pd.proto = h->ip_p;
6862	pd.af = AF_INET;
6863	pd.tos = h->ip_tos;
6864	pd.tot_len = ntohs(h->ip_len);
6865	pd.eh = eh;
6866
6867	/* handle fragments that didn't get reassembled by normalization */
6868	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
6869		action = pf_test_fragment(&r, dir, kif, m, h,
6870		    &pd, &a, &ruleset);
6871		goto done;
6872	}
6873
6874	switch (h->ip_p) {
6875
6876	case IPPROTO_TCP: {
6877		struct tcphdr	th;
6878
6879		pd.hdr.tcp = &th;
6880		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6881		    &action, &reason, AF_INET)) {
6882			log = action != PF_PASS;
6883			goto done;
6884		}
6885		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6886		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
6887			REASON_SET(&reason, PFRES_PROTCKSUM);
6888			action = PF_DROP;
6889			goto done;
6890		}
6891		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6892		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
6893			pqid = 1;
6894		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6895		if (action == PF_DROP)
6896			goto done;
6897		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6898		    &reason);
6899		if (action == PF_PASS) {
6900#if NPFSYNC
6901			pfsync_update_state(s);
6902#endif /* NPFSYNC */
6903			r = s->rule.ptr;
6904			a = s->anchor.ptr;
6905			log = s->log;
6906		} else if (s == NULL)
6907#ifdef __FreeBSD__
6908			action = pf_test_tcp(&r, &s, dir, kif,
6909			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6910#else
6911			action = pf_test_tcp(&r, &s, dir, kif,
6912			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6913#endif
6914		break;
6915	}
6916
6917	case IPPROTO_UDP: {
6918		struct udphdr	uh;
6919
6920		pd.hdr.udp = &uh;
6921		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6922		    &action, &reason, AF_INET)) {
6923			log = action != PF_PASS;
6924			goto done;
6925		}
6926		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6927		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
6928			action = PF_DROP;
6929			REASON_SET(&reason, PFRES_PROTCKSUM);
6930			goto done;
6931		}
6932		if (uh.uh_dport == 0 ||
6933		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6934		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6935			action = PF_DROP;
6936			REASON_SET(&reason, PFRES_SHORT);
6937			goto done;
6938		}
6939		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6940		if (action == PF_PASS) {
6941#if NPFSYNC
6942			pfsync_update_state(s);
6943#endif /* NPFSYNC */
6944			r = s->rule.ptr;
6945			a = s->anchor.ptr;
6946			log = s->log;
6947		} else if (s == NULL)
6948#ifdef __FreeBSD__
6949			action = pf_test_udp(&r, &s, dir, kif,
6950			    m, off, h, &pd, &a, &ruleset, NULL, inp);
6951#else
6952			action = pf_test_udp(&r, &s, dir, kif,
6953			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6954#endif
6955		break;
6956	}
6957
6958	case IPPROTO_ICMP: {
6959		struct icmp	ih;
6960
6961		pd.hdr.icmp = &ih;
6962		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6963		    &action, &reason, AF_INET)) {
6964			log = action != PF_PASS;
6965			goto done;
6966		}
6967		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6968		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
6969			action = PF_DROP;
6970			REASON_SET(&reason, PFRES_PROTCKSUM);
6971			goto done;
6972		}
6973		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
6974		    &reason);
6975		if (action == PF_PASS) {
6976#if NPFSYNC
6977			pfsync_update_state(s);
6978#endif /* NPFSYNC */
6979			r = s->rule.ptr;
6980			a = s->anchor.ptr;
6981			log = s->log;
6982		} else if (s == NULL)
6983#ifdef __FreeBSD__
6984			action = pf_test_icmp(&r, &s, dir, kif,
6985			    m, off, h, &pd, &a, &ruleset, NULL);
6986#else
6987			action = pf_test_icmp(&r, &s, dir, kif,
6988			    m, off, h, &pd, &a, &ruleset, &ipintrq);
6989#endif
6990		break;
6991	}
6992
6993	default:
6994		action = pf_test_state_other(&s, dir, kif, &pd);
6995		if (action == PF_PASS) {
6996#if NPFSYNC
6997			pfsync_update_state(s);
6998#endif /* NPFSYNC */
6999			r = s->rule.ptr;
7000			a = s->anchor.ptr;
7001			log = s->log;
7002		} else if (s == NULL)
7003#ifdef __FreeBSD__
7004			action = pf_test_other(&r, &s, dir, kif, m, off, h,
7005			    &pd, &a, &ruleset, NULL);
7006#else
7007			action = pf_test_other(&r, &s, dir, kif, m, off, h,
7008			    &pd, &a, &ruleset, &ipintrq);
7009#endif
7010		break;
7011	}
7012
7013done:
7014	if (action == PF_PASS && h->ip_hl > 5 &&
7015	    !((s && s->allow_opts) || r->allow_opts)) {
7016		action = PF_DROP;
7017		REASON_SET(&reason, PFRES_IPOPTIONS);
7018		log = 1;
7019		DPFPRINTF(PF_DEBUG_MISC,
7020		    ("pf: dropping packet with ip options\n"));
7021	}
7022
7023	if ((s && s->tag) || r->rtableid)
7024		pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
7025
7026#ifdef ALTQ
7027	if (action == PF_PASS && r->qid) {
7028		if (pqid || (pd.tos & IPTOS_LOWDELAY))
7029			pd.pf_mtag->qid = r->pqid;
7030		else
7031			pd.pf_mtag->qid = r->qid;
7032		/* add hints for ecn */
7033		pd.pf_mtag->af = AF_INET;
7034		pd.pf_mtag->hdr = h;
7035	}
7036#endif /* ALTQ */
7037
7038	/*
7039	 * connections redirected to loopback should not match sockets
7040	 * bound specifically to loopback due to security implications,
7041	 * see tcp_input() and in_pcblookup_listen().
7042	 */
7043	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7044	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7045	    (s->nat_rule.ptr->action == PF_RDR ||
7046	    s->nat_rule.ptr->action == PF_BINAT) &&
7047	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
7048		pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
7049
7050	if (log) {
7051		struct pf_rule *lr;
7052
7053		if (s != NULL && s->nat_rule.ptr != NULL &&
7054		    s->nat_rule.ptr->log & PF_LOG_ALL)
7055			lr = s->nat_rule.ptr;
7056		else
7057			lr = r;
7058		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
7059		    &pd);
7060	}
7061
7062	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7063	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
7064
7065	if (action == PF_PASS || r->action == PF_DROP) {
7066		dirndx = (dir == PF_OUT);
7067		r->packets[dirndx]++;
7068		r->bytes[dirndx] += pd.tot_len;
7069		if (a != NULL) {
7070			a->packets[dirndx]++;
7071			a->bytes[dirndx] += pd.tot_len;
7072		}
7073		if (s != NULL) {
7074			if (s->nat_rule.ptr != NULL) {
7075				s->nat_rule.ptr->packets[dirndx]++;
7076				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7077			}
7078			if (s->src_node != NULL) {
7079				s->src_node->packets[dirndx]++;
7080				s->src_node->bytes[dirndx] += pd.tot_len;
7081			}
7082			if (s->nat_src_node != NULL) {
7083				s->nat_src_node->packets[dirndx]++;
7084				s->nat_src_node->bytes[dirndx] += pd.tot_len;
7085			}
7086			dirndx = (dir == s->direction) ? 0 : 1;
7087			s->packets[dirndx]++;
7088			s->bytes[dirndx] += pd.tot_len;
7089		}
7090		tr = r;
7091		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7092		if (nr != NULL) {
7093			struct pf_addr *x;
7094			/*
7095			 * XXX: we need to make sure that the addresses
7096			 * passed to pfr_update_stats() are the same than
7097			 * the addresses used during matching (pfr_match)
7098			 */
7099			if (r == &pf_default_rule) {
7100				tr = nr;
7101				x = (s == NULL || s->direction == dir) ?
7102				    &pd.baddr : &pd.naddr;
7103			} else
7104				x = (s == NULL || s->direction == dir) ?
7105				    &pd.naddr : &pd.baddr;
7106			if (x == &pd.baddr || s == NULL) {
7107				/* we need to change the address */
7108				if (dir == PF_OUT)
7109					pd.src = x;
7110				else
7111					pd.dst = x;
7112			}
7113		}
7114		if (tr->src.addr.type == PF_ADDR_TABLE)
7115			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
7116			    s->direction == dir) ? pd.src : pd.dst, pd.af,
7117			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
7118			    tr->src.neg);
7119		if (tr->dst.addr.type == PF_ADDR_TABLE)
7120			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
7121			    s->direction == dir) ? pd.dst : pd.src, pd.af,
7122			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
7123			    tr->dst.neg);
7124	}
7125
7126
7127	if (action == PF_SYNPROXY_DROP) {
7128		m_freem(*m0);
7129		*m0 = NULL;
7130		action = PF_PASS;
7131	} else if (r->rt)
7132		/* pf_route can free the mbuf causing *m0 to become NULL */
7133		pf_route(m0, r, dir, ifp, s, &pd);
7134
7135#ifdef __FreeBSD__
7136	PF_UNLOCK();
7137#endif
7138
7139	return (action);
7140}
7141#endif /* INET */
7142
7143#ifdef INET6
7144int
7145#ifdef __FreeBSD__
7146pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
7147    struct ether_header *eh, struct inpcb *inp)
7148#else
7149pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
7150    struct ether_header *eh)
7151#endif
7152{
7153	struct pfi_kif		*kif;
7154	u_short			 action, reason = 0, log = 0;
7155	struct mbuf		*m = *m0, *n = NULL;
7156	struct ip6_hdr		*h;
7157	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
7158	struct pf_state		*s = NULL;
7159	struct pf_ruleset	*ruleset = NULL;
7160	struct pf_pdesc		 pd;
7161	int			 off, terminal = 0, dirndx, rh_cnt = 0;
7162
7163#ifdef __FreeBSD__
7164	PF_LOCK();
7165#endif
7166
7167	if (!pf_status.running)
7168#ifdef __FreeBSD__
7169	{
7170		PF_UNLOCK();
7171#endif
7172		return (PF_PASS);
7173#ifdef __FreeBSD__
7174	}
7175#endif
7176
7177	memset(&pd, 0, sizeof(pd));
7178	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
7179#ifdef __FreeBSD__
7180		PF_UNLOCK();
7181#endif
7182		DPFPRINTF(PF_DEBUG_URGENT,
7183		    ("pf_test6: pf_get_mtag returned NULL\n"));
7184		return (PF_DROP);
7185	}
7186	if (pd.pf_mtag->flags & PF_TAG_GENERATED)
7187		return (PF_PASS);
7188
7189#ifdef __FreeBSD__
7190	/* XXX_IMPORT: later */
7191#else
7192	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
7193		ifp = ifp->if_carpdev;
7194#endif
7195
7196	kif = (struct pfi_kif *)ifp->if_pf_kif;
7197	if (kif == NULL) {
7198#ifdef __FreeBSD__
7199		PF_UNLOCK();
7200#endif
7201		DPFPRINTF(PF_DEBUG_URGENT,
7202		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
7203		return (PF_DROP);
7204	}
7205	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
7206#ifdef __FreeBSD__
7207		PF_UNLOCK();
7208#endif
7209		return (PF_PASS);
7210	}
7211
7212#ifdef __FreeBSD__
7213	M_ASSERTPKTHDR(m);
7214#else
7215#ifdef DIAGNOSTIC
7216	if ((m->m_flags & M_PKTHDR) == 0)
7217		panic("non-M_PKTHDR is passed to pf_test6");
7218#endif /* DIAGNOSTIC */
7219#endif
7220
7221#ifdef __FreeBSD__
7222	h = NULL;	/* make the compiler happy */
7223#endif
7224
7225	if (m->m_pkthdr.len < (int)sizeof(*h)) {
7226		action = PF_DROP;
7227		REASON_SET(&reason, PFRES_SHORT);
7228		log = 1;
7229		goto done;
7230	}
7231
7232	/* We do IP header normalization and packet reassembly here */
7233	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
7234		action = PF_DROP;
7235		goto done;
7236	}
7237	m = *m0;
7238	h = mtod(m, struct ip6_hdr *);
7239
7240#if 1
7241	/*
7242	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
7243	 * will do something bad, so drop the packet for now.
7244	 */
7245	if (htons(h->ip6_plen) == 0) {
7246		action = PF_DROP;
7247		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
7248		goto done;
7249	}
7250#endif
7251
7252	pd.src = (struct pf_addr *)&h->ip6_src;
7253	pd.dst = (struct pf_addr *)&h->ip6_dst;
7254	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
7255	pd.ip_sum = NULL;
7256	pd.af = AF_INET6;
7257	pd.tos = 0;
7258	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
7259	pd.eh = eh;
7260
7261	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
7262	pd.proto = h->ip6_nxt;
7263	do {
7264		switch (pd.proto) {
7265		case IPPROTO_FRAGMENT:
7266			action = pf_test_fragment(&r, dir, kif, m, h,
7267			    &pd, &a, &ruleset);
7268			if (action == PF_DROP)
7269				REASON_SET(&reason, PFRES_FRAG);
7270			goto done;
7271		case IPPROTO_ROUTING: {
7272			struct ip6_rthdr rthdr;
7273
7274			if (rh_cnt++) {
7275				DPFPRINTF(PF_DEBUG_MISC,
7276				    ("pf: IPv6 more than one rthdr\n"));
7277				action = PF_DROP;
7278				REASON_SET(&reason, PFRES_IPOPTIONS);
7279				log = 1;
7280				goto done;
7281			}
7282			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
7283			    &reason, pd.af)) {
7284				DPFPRINTF(PF_DEBUG_MISC,
7285				    ("pf: IPv6 short rthdr\n"));
7286				action = PF_DROP;
7287				REASON_SET(&reason, PFRES_SHORT);
7288				log = 1;
7289				goto done;
7290			}
7291			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
7292				DPFPRINTF(PF_DEBUG_MISC,
7293				    ("pf: IPv6 rthdr0\n"));
7294				action = PF_DROP;
7295				REASON_SET(&reason, PFRES_IPOPTIONS);
7296				log = 1;
7297				goto done;
7298			}
7299			/* fallthrough */
7300		}
7301		case IPPROTO_AH:
7302		case IPPROTO_HOPOPTS:
7303		case IPPROTO_DSTOPTS: {
7304			/* get next header and header length */
7305			struct ip6_ext	opt6;
7306
7307			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
7308			    NULL, &reason, pd.af)) {
7309				DPFPRINTF(PF_DEBUG_MISC,
7310				    ("pf: IPv6 short opt\n"));
7311				action = PF_DROP;
7312				log = 1;
7313				goto done;
7314			}
7315			if (pd.proto == IPPROTO_AH)
7316				off += (opt6.ip6e_len + 2) * 4;
7317			else
7318				off += (opt6.ip6e_len + 1) * 8;
7319			pd.proto = opt6.ip6e_nxt;
7320			/* goto the next header */
7321			break;
7322		}
7323		default:
7324			terminal++;
7325			break;
7326		}
7327	} while (!terminal);
7328
7329	/* if there's no routing header, use unmodified mbuf for checksumming */
7330	if (!n)
7331		n = m;
7332
7333	switch (pd.proto) {
7334
7335	case IPPROTO_TCP: {
7336		struct tcphdr	th;
7337
7338		pd.hdr.tcp = &th;
7339		if (!pf_pull_hdr(m, off, &th, sizeof(th),
7340		    &action, &reason, AF_INET6)) {
7341			log = action != PF_PASS;
7342			goto done;
7343		}
7344		if (dir == PF_IN && pf_check_proto_cksum(n, off,
7345		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
7346		    IPPROTO_TCP, AF_INET6)) {
7347			action = PF_DROP;
7348			REASON_SET(&reason, PFRES_PROTCKSUM);
7349			goto done;
7350		}
7351		pd.p_len = pd.tot_len - off - (th.th_off << 2);
7352		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
7353		if (action == PF_DROP)
7354			goto done;
7355		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
7356		    &reason);
7357		if (action == PF_PASS) {
7358#if NPFSYNC
7359			pfsync_update_state(s);
7360#endif /* NPFSYNC */
7361			r = s->rule.ptr;
7362			a = s->anchor.ptr;
7363			log = s->log;
7364		} else if (s == NULL)
7365#ifdef __FreeBSD__
7366			action = pf_test_tcp(&r, &s, dir, kif,
7367			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7368#else
7369			action = pf_test_tcp(&r, &s, dir, kif,
7370			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7371#endif
7372		break;
7373	}
7374
7375	case IPPROTO_UDP: {
7376		struct udphdr	uh;
7377
7378		pd.hdr.udp = &uh;
7379		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
7380		    &action, &reason, AF_INET6)) {
7381			log = action != PF_PASS;
7382			goto done;
7383		}
7384		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n,
7385		    off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
7386		    IPPROTO_UDP, AF_INET6)) {
7387			action = PF_DROP;
7388			REASON_SET(&reason, PFRES_PROTCKSUM);
7389			goto done;
7390		}
7391		if (uh.uh_dport == 0 ||
7392		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
7393		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
7394			action = PF_DROP;
7395			REASON_SET(&reason, PFRES_SHORT);
7396			goto done;
7397		}
7398		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
7399		if (action == PF_PASS) {
7400#if NPFSYNC
7401			pfsync_update_state(s);
7402#endif /* NPFSYNC */
7403			r = s->rule.ptr;
7404			a = s->anchor.ptr;
7405			log = s->log;
7406		} else if (s == NULL)
7407#ifdef __FreeBSD__
7408			action = pf_test_udp(&r, &s, dir, kif,
7409			    m, off, h, &pd, &a, &ruleset, NULL, inp);
7410#else
7411			action = pf_test_udp(&r, &s, dir, kif,
7412			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7413#endif
7414		break;
7415	}
7416
7417	case IPPROTO_ICMPV6: {
7418		struct icmp6_hdr	ih;
7419
7420		pd.hdr.icmp6 = &ih;
7421		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
7422		    &action, &reason, AF_INET6)) {
7423			log = action != PF_PASS;
7424			goto done;
7425		}
7426		if (dir == PF_IN && pf_check_proto_cksum(n, off,
7427		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
7428		    IPPROTO_ICMPV6, AF_INET6)) {
7429			action = PF_DROP;
7430			REASON_SET(&reason, PFRES_PROTCKSUM);
7431			goto done;
7432		}
7433		action = pf_test_state_icmp(&s, dir, kif,
7434		    m, off, h, &pd, &reason);
7435		if (action == PF_PASS) {
7436#if NPFSYNC
7437			pfsync_update_state(s);
7438#endif /* NPFSYNC */
7439			r = s->rule.ptr;
7440			a = s->anchor.ptr;
7441			log = s->log;
7442		} else if (s == NULL)
7443#ifdef __FreeBSD__
7444			action = pf_test_icmp(&r, &s, dir, kif,
7445			    m, off, h, &pd, &a, &ruleset, NULL);
7446#else
7447			action = pf_test_icmp(&r, &s, dir, kif,
7448			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
7449#endif
7450		break;
7451	}
7452
7453	default:
7454		action = pf_test_state_other(&s, dir, kif, &pd);
7455		if (action == PF_PASS) {
7456#if NPFSYNC
7457			pfsync_update_state(s);
7458#endif /* NPFSYNC */
7459			r = s->rule.ptr;
7460			a = s->anchor.ptr;
7461			log = s->log;
7462		} else if (s == NULL)
7463#ifdef __FreeBSD__
7464			action = pf_test_other(&r, &s, dir, kif, m, off, h,
7465			    &pd, &a, &ruleset, NULL);
7466#else
7467			action = pf_test_other(&r, &s, dir, kif, m, off, h,
7468			    &pd, &a, &ruleset, &ip6intrq);
7469#endif
7470		break;
7471	}
7472
7473done:
7474	/* handle dangerous IPv6 extension headers. */
7475	if (action == PF_PASS && rh_cnt &&
7476	    !((s && s->allow_opts) || r->allow_opts)) {
7477		action = PF_DROP;
7478		REASON_SET(&reason, PFRES_IPOPTIONS);
7479		log = 1;
7480		DPFPRINTF(PF_DEBUG_MISC,
7481		    ("pf: dropping packet with dangerous v6 headers\n"));
7482	}
7483
7484	if ((s && s->tag) || r->rtableid)
7485		pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
7486
7487#ifdef ALTQ
7488	if (action == PF_PASS && r->qid) {
7489		if (pd.tos & IPTOS_LOWDELAY)
7490			pd.pf_mtag->qid = r->pqid;
7491		else
7492			pd.pf_mtag->qid = r->qid;
7493		/* add hints for ecn */
7494		pd.pf_mtag->af = AF_INET6;
7495		pd.pf_mtag->hdr = h;
7496	}
7497#endif /* ALTQ */
7498
7499	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7500	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7501	    (s->nat_rule.ptr->action == PF_RDR ||
7502	    s->nat_rule.ptr->action == PF_BINAT) &&
7503	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
7504		pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
7505
7506	if (log) {
7507		struct pf_rule *lr;
7508
7509		if (s != NULL && s->nat_rule.ptr != NULL &&
7510		    s->nat_rule.ptr->log & PF_LOG_ALL)
7511			lr = s->nat_rule.ptr;
7512		else
7513			lr = r;
7514		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
7515		    &pd);
7516	}
7517
7518	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7519	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
7520
7521	if (action == PF_PASS || r->action == PF_DROP) {
7522		dirndx = (dir == PF_OUT);
7523		r->packets[dirndx]++;
7524		r->bytes[dirndx] += pd.tot_len;
7525		if (a != NULL) {
7526			a->packets[dirndx]++;
7527			a->bytes[dirndx] += pd.tot_len;
7528		}
7529		if (s != NULL) {
7530			if (s->nat_rule.ptr != NULL) {
7531				s->nat_rule.ptr->packets[dirndx]++;
7532				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7533			}
7534			if (s->src_node != NULL) {
7535				s->src_node->packets[dirndx]++;
7536				s->src_node->bytes[dirndx] += pd.tot_len;
7537			}
7538			if (s->nat_src_node != NULL) {
7539				s->nat_src_node->packets[dirndx]++;
7540				s->nat_src_node->bytes[dirndx] += pd.tot_len;
7541			}
7542			dirndx = (dir == s->direction) ? 0 : 1;
7543			s->packets[dirndx]++;
7544			s->bytes[dirndx] += pd.tot_len;
7545		}
7546		tr = r;
7547		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7548		if (nr != NULL) {
7549			struct pf_addr *x;
7550			/*
7551			 * XXX: we need to make sure that the addresses
7552			 * passed to pfr_update_stats() are the same than
7553			 * the addresses used during matching (pfr_match)
7554			 */
7555			if (r == &pf_default_rule) {
7556				tr = nr;
7557				x = (s == NULL || s->direction == dir) ?
7558				    &pd.baddr : &pd.naddr;
7559			} else {
7560				x = (s == NULL || s->direction == dir) ?
7561				    &pd.naddr : &pd.baddr;
7562			}
7563			if (x == &pd.baddr || s == NULL) {
7564				if (dir == PF_OUT)
7565					pd.src = x;
7566				else
7567					pd.dst = x;
7568			}
7569		}
7570		if (tr->src.addr.type == PF_ADDR_TABLE)
7571			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
7572			    s->direction == dir) ? pd.src : pd.dst, pd.af,
7573			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
7574			    tr->src.neg);
7575		if (tr->dst.addr.type == PF_ADDR_TABLE)
7576			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
7577			    s->direction == dir) ? pd.dst : pd.src, pd.af,
7578			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
7579			    tr->dst.neg);
7580	}
7581
7582
7583	if (action == PF_SYNPROXY_DROP) {
7584		m_freem(*m0);
7585		*m0 = NULL;
7586		action = PF_PASS;
7587	} else if (r->rt)
7588		/* pf_route6 can free the mbuf causing *m0 to become NULL */
7589		pf_route6(m0, r, dir, ifp, s, &pd);
7590
7591#ifdef __FreeBSD__
7592	PF_UNLOCK();
7593#endif
7594	return (action);
7595}
7596#endif /* INET6 */
7597
7598int
7599pf_check_congestion(struct ifqueue *ifq)
7600{
7601#ifdef __FreeBSD__
7602	/* XXX_IMPORT: later */
7603	return (0);
7604#else
7605	if (ifq->ifq_congestion)
7606		return (1);
7607	else
7608		return (0);
7609#endif
7610}
7611