1/*	$NetBSD: pf.c,v 1.87 2022/11/04 09:01:53 ozaki-r Exp $	*/
2/*	$OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * Copyright (c) 2002,2003 Henning Brauer
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 */
38
39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: pf.c,v 1.87 2022/11/04 09:01:53 ozaki-r Exp $");
41
42#include "pflog.h"
43
44#include "pfsync.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/mbuf.h>
49#include <sys/filio.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/kernel.h>
53#include <sys/time.h>
54#include <sys/pool.h>
55#include <sys/proc.h>
56#include <sys/rwlock.h>
57#ifdef __NetBSD__
58#include <sys/kthread.h>
59#include <sys/kauth.h>
60#endif /* __NetBSD__ */
61
62#include <net/if.h>
63#include <net/if_types.h>
64#include <net/bpf.h>
65#include <net/route.h>
66#ifndef __NetBSD__
67#include <net/radix_mpath.h>
68#endif /* !__NetBSD__ */
69
70#include <netinet/in.h>
71#ifdef __NetBSD__
72#include <netinet/in_offload.h>
73#endif /* __NetBSD__ */
74#include <netinet/in_var.h>
75#include <netinet/in_systm.h>
76#include <netinet/ip.h>
77#include <netinet/ip_var.h>
78#include <netinet/tcp.h>
79#include <netinet/tcp_seq.h>
80#include <netinet/udp.h>
81#include <netinet/ip_icmp.h>
82#include <netinet/in_pcb.h>
83#include <netinet/tcp_timer.h>
84#include <netinet/tcp_var.h>
85#include <netinet/udp_var.h>
86#include <netinet/icmp_var.h>
87#ifndef __NetBSD__
88#include <netinet/if_ether.h>
89#else
90#include <net/if_ether.h>
91#endif /* __NetBSD__ */
92
93#ifndef __NetBSD__
94#include <dev/rndvar.h>
95#else
96#include <sys/cprng.h>
97#endif /* __NetBSD__ */
98
99#include <net/pfvar.h>
100#include <net/if_pflog.h>
101
102#if NPFSYNC > 0
103#include <net/if_pfsync.h>
104#endif /* NPFSYNC > 0 */
105
106#ifdef INET6
107#include <netinet/ip6.h>
108#include <netinet6/ip6_var.h>
109#ifdef __NetBSD__
110#include <netinet6/in6_pcb.h>
111#endif /* __NetBSD__ */
112#include <netinet/icmp6.h>
113#include <netinet6/nd6.h>
114#endif /* INET6 */
115
116#ifdef __NetBSD__
117#include <netinet/tcp_rndiss.h>
118#endif /* __NetBSD__ */
119
120
121#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
122
123/*
124 * Global variables
125 */
126
127/* state tables */
128struct pf_state_tree_lan_ext	 pf_statetbl_lan_ext;
129struct pf_state_tree_ext_gwy	 pf_statetbl_ext_gwy;
130
131struct pf_altqqueue	 pf_altqs[2];
132struct pf_palist	 pf_pabuf;
133struct pf_altqqueue	*pf_altqs_active;
134struct pf_altqqueue	*pf_altqs_inactive;
135struct pf_status	 pf_status;
136
137u_int32_t		 ticket_altqs_active;
138u_int32_t		 ticket_altqs_inactive;
139int			 altqs_inactive_open;
140u_int32_t		 ticket_pabuf;
141
142struct pf_anchor_stackframe {
143	struct pf_ruleset			*rs;
144	struct pf_rule				*r;
145	struct pf_anchor_node			*parent;
146	struct pf_anchor			*child;
147} pf_anchor_stack[64];
148
149struct pool		 pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
150struct pool		 pf_state_pl, pf_state_key_pl;
151struct pool		 pf_altq_pl;
152
153void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
154
155void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
156			    u_int32_t);
157void			 pf_add_threshold(struct pf_threshold *);
158int			 pf_check_threshold(struct pf_threshold *);
159
160void			 pf_change_ap(struct pf_addr *, u_int16_t *,
161			    u_int16_t *, u_int16_t *, struct pf_addr *,
162			    u_int16_t, u_int8_t, sa_family_t);
163int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
164			    struct tcphdr *, struct pf_state_peer *);
165#ifdef INET6
166void			 pf_change_a6(struct pf_addr *, u_int16_t *,
167			    struct pf_addr *, u_int8_t);
168#endif /* INET6 */
169void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
170			    struct pf_addr *, struct pf_addr *, u_int16_t,
171			    u_int16_t *, u_int16_t *, u_int16_t *,
172			    u_int16_t *, u_int8_t, sa_family_t);
173void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
174			    const struct pf_addr *, const struct pf_addr *,
175			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
176			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
177			    u_int16_t, struct ether_header *, struct ifnet *);
178void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
179			    sa_family_t, struct pf_rule *);
180struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
181			    int, int, struct pfi_kif *,
182			    struct pf_addr *, u_int16_t, struct pf_addr *,
183			    u_int16_t, int);
184struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
185			    int, int, struct pfi_kif *, struct pf_src_node **,
186			    struct pf_addr *, u_int16_t,
187			    struct pf_addr *, u_int16_t,
188			    struct pf_addr *, u_int16_t *);
189void			 pf_attach_state(struct pf_state_key *,
190			    struct pf_state *, int);
191void			 pf_detach_state(struct pf_state *, int);
192int			 pf_test_rule(struct pf_rule **, struct pf_state **,
193			    int, struct pfi_kif *, struct mbuf *, int,
194			    void *, struct pf_pdesc *, struct pf_rule **,
195			    struct pf_ruleset **, struct ifqueue *);
196int			 pf_test_fragment(struct pf_rule **, int,
197			    struct pfi_kif *, struct mbuf *, void *,
198			    struct pf_pdesc *, struct pf_rule **,
199			    struct pf_ruleset **);
200int			 pf_test_state_tcp(struct pf_state **, int,
201			    struct pfi_kif *, struct mbuf *, int,
202			    void *, struct pf_pdesc *, u_short *);
203int			 pf_test_state_udp(struct pf_state **, int,
204			    struct pfi_kif *, struct mbuf *, int,
205			    void *, struct pf_pdesc *);
206int			 pf_test_state_icmp(struct pf_state **, int,
207			    struct pfi_kif *, struct mbuf *, int,
208			    void *, struct pf_pdesc *, u_short *);
209int			 pf_test_state_other(struct pf_state **, int,
210			    struct pfi_kif *, struct pf_pdesc *);
211int			 pf_match_tag(struct mbuf *, struct pf_rule *, int *);
212void			 pf_step_into_anchor(int *, struct pf_ruleset **, int,
213			    struct pf_rule **, struct pf_rule **,  int *);
214int			 pf_step_out_of_anchor(int *, struct pf_ruleset **,
215			     int, struct pf_rule **, struct pf_rule **,
216			     int *);
217void			 pf_hash(const struct pf_addr *, struct pf_addr *,
218			    struct pf_poolhashkey *, sa_family_t);
219int			 pf_map_addr(u_int8_t, struct pf_rule *,
220			    const struct pf_addr *, struct pf_addr *,
221			    struct pf_addr *, struct pf_src_node **);
222int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
223			    struct pf_addr *, struct pf_addr *, u_int16_t,
224			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
225			    struct pf_src_node **);
226void			 pf_route(struct mbuf **, struct pf_rule *, int,
227			    struct ifnet *, struct pf_state *,
228			    struct pf_pdesc *);
229void			 pf_route6(struct mbuf **, struct pf_rule *, int,
230			    struct ifnet *, struct pf_state *,
231			    struct pf_pdesc *);
232int			 pf_socket_lookup(int, struct pf_pdesc *);
233u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
234			    sa_family_t);
235u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
236			    sa_family_t);
237u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
238				u_int16_t);
239void			 pf_set_rt_ifp(struct pf_state *,
240			    struct pf_addr *);
241#ifdef __NetBSD__
242int			 pf_check_proto_cksum(struct mbuf *, int, int, int,
243			    u_int8_t, sa_family_t);
244#else
245int			 pf_check_proto_cksum(struct mbuf *, int, int,
246			    u_int8_t, sa_family_t);
247#endif /* !__NetBSD__ */
248int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
249			    struct pf_addr_wrap *);
250struct pf_state		*pf_find_state(struct pfi_kif *,
251			    struct pf_state_key_cmp *, u_int8_t);
252int			 pf_src_connlimit(struct pf_state **);
253void			 pf_stateins_err(const char *, struct pf_state *,
254			    struct pfi_kif *);
255int			 pf_check_congestion(struct ifqueue *);
256
257extern struct pool pfr_ktable_pl;
258extern struct pool pfr_kentry_pl;
259
260extern int pf_state_lock;
261
262struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
263	{ &pf_state_pl, PFSTATE_HIWAT },
264	{ &pf_src_tree_pl, PFSNODE_HIWAT },
265	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
266	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
267	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
268};
269
270#define STATE_LOOKUP()							\
271	do {								\
272		if (pf_state_lock) {		    \
273			*state = NULL;				\
274			return (PF_DROP);			\
275		}								\
276		if (direction == PF_IN)					\
277			*state = pf_find_state(kif, &key, PF_EXT_GWY);	\
278		else							\
279			*state = pf_find_state(kif, &key, PF_LAN_EXT);	\
280		if (*state == NULL || (*state)->timeout == PFTM_PURGE)	\
281			return (PF_DROP);				\
282		if (direction == PF_OUT &&				\
283		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
284		      (*state)->rule.ptr->direction == PF_OUT) ||	\
285		     ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
286		      (*state)->rule.ptr->direction == PF_IN)) &&	\
287		    (*state)->rt_kif != NULL &&				\
288		    (*state)->rt_kif != kif)				\
289			return (PF_PASS);				\
290	} while (0)
291
292#define	STATE_TRANSLATE(sk) \
293	(sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
294	((sk)->af == AF_INET6 && \
295	((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
296	(sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
297	(sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \
298	(sk)->lan.port != (sk)->gwy.port
299
300#define BOUND_IFACE(r, k) \
301	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
302
303#define STATE_INC_COUNTERS(s)				\
304	do {						\
305		s->rule.ptr->states++;			\
306		if (s->anchor.ptr != NULL)		\
307			s->anchor.ptr->states++;	\
308		if (s->nat_rule.ptr != NULL)		\
309			s->nat_rule.ptr->states++;	\
310	} while (0)
311
312#define STATE_DEC_COUNTERS(s)				\
313	do {						\
314		if (s->nat_rule.ptr != NULL)		\
315			s->nat_rule.ptr->states--;	\
316		if (s->anchor.ptr != NULL)		\
317			s->anchor.ptr->states--;	\
318		s->rule.ptr->states--;			\
319	} while (0)
320
321static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
322static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
323	struct pf_state_key *);
324static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
325	struct pf_state_key *);
326static __inline int pf_state_compare_id(struct pf_state *,
327	struct pf_state *);
328
329struct pf_src_tree tree_src_tracking;
330
331struct pf_state_tree_id tree_id;
332struct pf_state_queue state_list;
333
334RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
335RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
336    entry_lan_ext, pf_state_compare_lan_ext);
337RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
338    entry_ext_gwy, pf_state_compare_ext_gwy);
339RB_GENERATE(pf_state_tree_id, pf_state,
340    entry_id, pf_state_compare_id);
341
342#define	PF_DT_SKIP_LANEXT	0x01
343#define	PF_DT_SKIP_EXTGWY	0x02
344
345#ifdef __NetBSD__
346static __inline struct pfi_kif *
347bound_iface(const struct pf_rule *r, const struct pf_rule *nr,
348    struct pfi_kif *k)
349{
350	uint32_t rule_flag;
351
352	rule_flag = r->rule_flag;
353	if (nr != NULL)
354		rule_flag |= nr->rule_flag;
355
356	return ((rule_flag & PFRULE_IFBOUND) != 0) ? k : pfi_all;
357}
358#endif /* __NetBSD__ */
359
360static __inline int
361pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
362{
363	int	diff;
364
365	if (a->rule.ptr > b->rule.ptr)
366		return (1);
367	if (a->rule.ptr < b->rule.ptr)
368		return (-1);
369	if ((diff = a->af - b->af) != 0)
370		return (diff);
371	switch (a->af) {
372#ifdef INET
373	case AF_INET:
374		if (a->addr.addr32[0] > b->addr.addr32[0])
375			return (1);
376		if (a->addr.addr32[0] < b->addr.addr32[0])
377			return (-1);
378		break;
379#endif /* INET */
380#ifdef INET6
381	case AF_INET6:
382		if (a->addr.addr32[3] > b->addr.addr32[3])
383			return (1);
384		if (a->addr.addr32[3] < b->addr.addr32[3])
385			return (-1);
386		if (a->addr.addr32[2] > b->addr.addr32[2])
387			return (1);
388		if (a->addr.addr32[2] < b->addr.addr32[2])
389			return (-1);
390		if (a->addr.addr32[1] > b->addr.addr32[1])
391			return (1);
392		if (a->addr.addr32[1] < b->addr.addr32[1])
393			return (-1);
394		if (a->addr.addr32[0] > b->addr.addr32[0])
395			return (1);
396		if (a->addr.addr32[0] < b->addr.addr32[0])
397			return (-1);
398		break;
399#endif /* INET6 */
400	}
401	return (0);
402}
403
404static __inline int
405pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
406{
407	int	diff;
408
409	if ((diff = a->proto - b->proto) != 0)
410		return (diff);
411	if ((diff = a->af - b->af) != 0)
412		return (diff);
413	switch (a->af) {
414#ifdef INET
415	case AF_INET:
416		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
417			return (1);
418		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
419			return (-1);
420		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
421			return (1);
422		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
423			return (-1);
424		break;
425#endif /* INET */
426#ifdef INET6
427	case AF_INET6:
428		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
429			return (1);
430		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
431			return (-1);
432		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
433			return (1);
434		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
435			return (-1);
436		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
437			return (1);
438		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
439			return (-1);
440		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
441			return (1);
442		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
443			return (-1);
444		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
445			return (1);
446		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
447			return (-1);
448		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
449			return (1);
450		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
451			return (-1);
452		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
453			return (1);
454		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
455			return (-1);
456		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
457			return (1);
458		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
459			return (-1);
460		break;
461#endif /* INET6 */
462	}
463
464	if ((diff = a->lan.port - b->lan.port) != 0)
465		return (diff);
466	if ((diff = a->ext.port - b->ext.port) != 0)
467		return (diff);
468
469	return (0);
470}
471
472static __inline int
473pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
474{
475	int	diff;
476
477	if ((diff = a->proto - b->proto) != 0)
478		return (diff);
479	if ((diff = a->af - b->af) != 0)
480		return (diff);
481	switch (a->af) {
482#ifdef INET
483	case AF_INET:
484		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
485			return (1);
486		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
487			return (-1);
488		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
489			return (1);
490		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
491			return (-1);
492		break;
493#endif /* INET */
494#ifdef INET6
495	case AF_INET6:
496		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
497			return (1);
498		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
499			return (-1);
500		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
501			return (1);
502		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
503			return (-1);
504		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
505			return (1);
506		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
507			return (-1);
508		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
509			return (1);
510		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
511			return (-1);
512		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
513			return (1);
514		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
515			return (-1);
516		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
517			return (1);
518		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
519			return (-1);
520		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
521			return (1);
522		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
523			return (-1);
524		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
525			return (1);
526		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
527			return (-1);
528		break;
529#endif /* INET6 */
530	}
531
532	if ((diff = a->ext.port - b->ext.port) != 0)
533		return (diff);
534	if ((diff = a->gwy.port - b->gwy.port) != 0)
535		return (diff);
536
537	return (0);
538}
539
540static __inline int
541pf_state_compare_id(struct pf_state *a, struct pf_state *b)
542{
543	if (a->id > b->id)
544		return (1);
545	if (a->id < b->id)
546		return (-1);
547	if (a->creatorid > b->creatorid)
548		return (1);
549	if (a->creatorid < b->creatorid)
550		return (-1);
551
552	return (0);
553}
554
555#ifdef INET6
556void
557pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af)
558{
559	switch (af) {
560#ifdef INET
561	case AF_INET:
562		dst->addr32[0] = src->addr32[0];
563		break;
564#endif /* INET */
565	case AF_INET6:
566		dst->addr32[0] = src->addr32[0];
567		dst->addr32[1] = src->addr32[1];
568		dst->addr32[2] = src->addr32[2];
569		dst->addr32[3] = src->addr32[3];
570		break;
571	}
572}
573#endif /* INET6 */
574
575struct pf_state *
576pf_find_state_byid(struct pf_state_cmp *key)
577{
578	pf_status.fcounters[FCNT_STATE_SEARCH]++;
579
580	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
581}
582
583struct pf_state *
584pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int8_t tree)
585{
586	struct pf_state_key	*sk;
587	struct pf_state		*s;
588
589	pf_status.fcounters[FCNT_STATE_SEARCH]++;
590
591	switch (tree) {
592	case PF_LAN_EXT:
593		sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
594		    (struct pf_state_key *)key);
595		break;
596	case PF_EXT_GWY:
597		sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
598		    (struct pf_state_key *)key);
599		break;
600	default:
601		panic("pf_find_state");
602	}
603
604	/* list is sorted, if-bound states before floating ones */
605	if (sk != NULL)
606		TAILQ_FOREACH(s, &sk->states, next)
607			if (s->kif == pfi_all || s->kif == kif)
608				return (s);
609
610	return (NULL);
611}
612
613struct pf_state *
614pf_find_state_all(struct pf_state_key_cmp *key, u_int8_t tree, int *more)
615{
616	struct pf_state_key	*sk;
617	struct pf_state		*s, *ret = NULL;
618
619	pf_status.fcounters[FCNT_STATE_SEARCH]++;
620
621	switch (tree) {
622	case PF_LAN_EXT:
623		sk = RB_FIND(pf_state_tree_lan_ext,
624		    &pf_statetbl_lan_ext, (struct pf_state_key *)key);
625		break;
626	case PF_EXT_GWY:
627		sk = RB_FIND(pf_state_tree_ext_gwy,
628		    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
629		break;
630	default:
631		panic("pf_find_state_all");
632	}
633
634	if (sk != NULL) {
635		ret = TAILQ_FIRST(&sk->states);
636		if (more == NULL)
637			return (ret);
638
639		TAILQ_FOREACH(s, &sk->states, next)
640			(*more)++;
641	}
642
643	return (ret);
644}
645
646void
647pf_init_threshold(struct pf_threshold *threshold,
648    u_int32_t limit, u_int32_t seconds)
649{
650	threshold->limit = limit * PF_THRESHOLD_MULT;
651	threshold->seconds = seconds;
652	threshold->count = 0;
653	threshold->last = time_second;
654}
655
656void
657pf_add_threshold(struct pf_threshold *threshold)
658{
659	u_int32_t t = time_second, diff = t - threshold->last;
660
661	if (diff >= threshold->seconds)
662		threshold->count = 0;
663	else
664		threshold->count -= threshold->count * diff /
665		    threshold->seconds;
666	threshold->count += PF_THRESHOLD_MULT;
667	threshold->last = t;
668}
669
670int
671pf_check_threshold(struct pf_threshold *threshold)
672{
673	return (threshold->count > threshold->limit);
674}
675
676int
677pf_src_connlimit(struct pf_state **state)
678{
679	int bad = 0;
680
681	(*state)->src_node->conn++;
682	(*state)->src.tcp_est = 1;
683	pf_add_threshold(&(*state)->src_node->conn_rate);
684
685	if ((*state)->rule.ptr->max_src_conn &&
686	    (*state)->rule.ptr->max_src_conn <
687	    (*state)->src_node->conn) {
688		pf_status.lcounters[LCNT_SRCCONN]++;
689		bad++;
690	}
691
692	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
693	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
694		pf_status.lcounters[LCNT_SRCCONNRATE]++;
695		bad++;
696	}
697
698	if (!bad)
699		return (0);
700
701	if ((*state)->rule.ptr->overload_tbl) {
702		struct pfr_addr p;
703		u_int32_t	killed = 0;
704
705		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
706		if (pf_status.debug >= PF_DEBUG_MISC) {
707			printf("pf_src_connlimit: blocking address ");
708			pf_print_host(&(*state)->src_node->addr, 0,
709			    (*state)->state_key->af);
710		}
711
712		bzero(&p, sizeof(p));
713		p.pfra_af = (*state)->state_key->af;
714		switch ((*state)->state_key->af) {
715#ifdef INET
716		case AF_INET:
717			p.pfra_net = 32;
718			p.pfra_ip4addr = (*state)->src_node->addr.v4;
719			break;
720#endif /* INET */
721#ifdef INET6
722		case AF_INET6:
723			p.pfra_net = 128;
724			p.pfra_ip6addr = (*state)->src_node->addr.v6;
725			break;
726#endif /* INET6 */
727		}
728
729		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
730		    &p, time_second);
731
732		/* kill existing states if that's required. */
733		if ((*state)->rule.ptr->flush) {
734			struct pf_state_key *sk;
735			struct pf_state *st;
736
737			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
738			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
739				sk = st->state_key;
740				/*
741				 * Kill states from this source.  (Only those
742				 * from the same rule if PF_FLUSH_GLOBAL is not
743				 * set)
744				 */
745				if (sk->af ==
746				    (*state)->state_key->af &&
747				    (((*state)->state_key->direction ==
748				        PF_OUT &&
749				    PF_AEQ(&(*state)->src_node->addr,
750				        &sk->lan.addr, sk->af)) ||
751				    ((*state)->state_key->direction == PF_IN &&
752				    PF_AEQ(&(*state)->src_node->addr,
753				        &sk->ext.addr, sk->af))) &&
754				    ((*state)->rule.ptr->flush &
755				    PF_FLUSH_GLOBAL ||
756				    (*state)->rule.ptr == st->rule.ptr)) {
757					st->timeout = PFTM_PURGE;
758					st->src.state = st->dst.state =
759					    TCPS_CLOSED;
760					killed++;
761				}
762			}
763			if (pf_status.debug >= PF_DEBUG_MISC)
764				printf(", %u states killed", killed);
765		}
766		if (pf_status.debug >= PF_DEBUG_MISC)
767			printf("\n");
768	}
769
770	/* kill this state */
771	(*state)->timeout = PFTM_PURGE;
772	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
773	return (1);
774}
775
776int
777pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
778    struct pf_addr *src, sa_family_t af)
779{
780	struct pf_src_node	k;
781
782	if (*sn == NULL) {
783		k.af = af;
784		PF_ACPY(&k.addr, src, af);
785		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
786		    rule->rpool.opts & PF_POOL_STICKYADDR)
787			k.rule.ptr = rule;
788		else
789			k.rule.ptr = NULL;
790		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
791		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
792	}
793	if (*sn == NULL) {
794		if (!rule->max_src_nodes ||
795		    rule->src_nodes < rule->max_src_nodes)
796			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
797		else
798			pf_status.lcounters[LCNT_SRCNODES]++;
799		if ((*sn) == NULL)
800			return (-1);
801		bzero(*sn, sizeof(struct pf_src_node));
802
803		pf_init_threshold(&(*sn)->conn_rate,
804		    rule->max_src_conn_rate.limit,
805		    rule->max_src_conn_rate.seconds);
806
807		(*sn)->af = af;
808		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
809		    rule->rpool.opts & PF_POOL_STICKYADDR)
810			(*sn)->rule.ptr = rule;
811		else
812			(*sn)->rule.ptr = NULL;
813		PF_ACPY(&(*sn)->addr, src, af);
814		if (RB_INSERT(pf_src_tree,
815		    &tree_src_tracking, *sn) != NULL) {
816			if (pf_status.debug >= PF_DEBUG_MISC) {
817				printf("pf: src_tree insert failed: ");
818				pf_print_host(&(*sn)->addr, 0, af);
819				printf("\n");
820			}
821			pool_put(&pf_src_tree_pl, *sn);
822			return (-1);
823		}
824		(*sn)->creation = time_second;
825		(*sn)->ruletype = rule->action;
826		if ((*sn)->rule.ptr != NULL)
827			(*sn)->rule.ptr->src_nodes++;
828		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
829		pf_status.src_nodes++;
830	} else {
831		if (rule->max_src_states &&
832		    (*sn)->states >= rule->max_src_states) {
833			pf_status.lcounters[LCNT_SRCSTATES]++;
834			return (-1);
835		}
836	}
837	return (0);
838}
839
840void
841pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
842{
843	struct pf_state_key	*sk = s->state_key;
844
845	if (pf_status.debug >= PF_DEBUG_MISC) {
846		printf("pf: state insert failed: %s %s", tree, kif->pfik_name);
847		printf(" lan: ");
848		pf_print_host(&sk->lan.addr, sk->lan.port,
849		    sk->af);
850		printf(" gwy: ");
851		pf_print_host(&sk->gwy.addr, sk->gwy.port,
852		    sk->af);
853		printf(" ext: ");
854		pf_print_host(&sk->ext.addr, sk->ext.port,
855		    sk->af);
856		if (s->sync_flags & PFSTATE_FROMSYNC)
857			printf(" (from sync)");
858		printf("\n");
859	}
860}
861
862int
863pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
864{
865	struct pf_state_key	*cur;
866	struct pf_state		*sp;
867
868	KASSERT(s->state_key != NULL);
869	s->kif = kif;
870
871	if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
872	    s->state_key)) != NULL) {
873		/* key exists. check for same kif, if none, add to key */
874		TAILQ_FOREACH(sp, &cur->states, next)
875			if (sp->kif == kif) {	/* collision! */
876				pf_stateins_err("tree_lan_ext", s, kif);
877				pf_detach_state(s,
878				    PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
879				return (-1);
880			}
881		pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
882		pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
883	}
884
885	/* if cur != NULL, we already found a state key and attached to it */
886	if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
887	    &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
888		/* must not happen. we must have found the sk above! */
889		pf_stateins_err("tree_ext_gwy", s, kif);
890		pf_detach_state(s, PF_DT_SKIP_EXTGWY);
891		return (-1);
892	}
893
894	if (s->id == 0 && s->creatorid == 0) {
895		s->id = htobe64(pf_status.stateid++);
896		s->creatorid = pf_status.hostid;
897	}
898	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
899		if (pf_status.debug >= PF_DEBUG_MISC) {
900#ifdef __NetBSD__
901			printf("pf: state insert failed: "
902			    "id: %016" PRIx64 " creatorid: %08x",
903			    be64toh(s->id), ntohl(s->creatorid));
904#else
905			printf("pf: state insert failed: "
906			    "id: %016llx creatorid: %08x",
907			    betoh64(s->id), ntohl(s->creatorid));
908#endif /* !__NetBSD__ */
909			if (s->sync_flags & PFSTATE_FROMSYNC)
910				printf(" (from sync)");
911			printf("\n");
912		}
913		pf_detach_state(s, 0);
914		return (-1);
915	}
916	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
917	pf_status.fcounters[FCNT_STATE_INSERT]++;
918	pf_status.states++;
919	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
920#if NPFSYNC
921	pfsync_insert_state(s);
922#endif
923	return (0);
924}
925
926#ifdef _MODULE
927volatile int pf_purge_thread_stop;
928volatile int pf_purge_thread_running;
929#endif
930
931void
932pf_purge_thread(void *v)
933{
934	int nloops = 0, s;
935
936#ifdef _MODULE
937	pf_purge_thread_running = 1;
938	pf_purge_thread_stop = 0;
939
940	while (!pf_purge_thread_stop) {
941#else
942	for (;;) {
943#endif /* !_MODULE */
944		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
945
946		s = splsoftnet();
947
948		/* process a fraction of the state table every second */
949		if (! pf_state_lock)
950			pf_purge_expired_states(1 + (pf_status.states
951						/ pf_default_rule.timeout[PFTM_INTERVAL]));
952
953		/* purge other expired types every PFTM_INTERVAL seconds */
954		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
955			pf_purge_expired_fragments();
956			pf_purge_expired_src_nodes(0);
957			nloops = 0;
958		}
959
960		splx(s);
961	}
962
963#ifdef _MODULE
964	pf_purge_thread_running = 0;
965	wakeup(&pf_purge_thread_running);
966	kthread_exit(0);
967#endif /* _MODULE */
968}
969
970u_int32_t
971pf_state_expires(const struct pf_state *state)
972{
973	u_int32_t	timeout;
974	u_int32_t	start;
975	u_int32_t	end;
976	u_int32_t	states;
977
978	/* handle all PFTM_* > PFTM_MAX here */
979	if (state->timeout == PFTM_PURGE)
980		return (time_second);
981	if (state->timeout == PFTM_UNTIL_PACKET)
982		return (0);
983	KASSERT(state->timeout != PFTM_UNLINKED);
984	KASSERT(state->timeout < PFTM_MAX);
985	timeout = state->rule.ptr->timeout[state->timeout];
986	if (!timeout)
987		timeout = pf_default_rule.timeout[state->timeout];
988	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
989	if (start) {
990		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
991		states = state->rule.ptr->states;
992	} else {
993		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
994		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
995		states = pf_status.states;
996	}
997	if (end && states > start && start < end) {
998		if (states < end)
999			return (state->expire + timeout * (end - states) /
1000			    (end - start));
1001		else
1002			return (time_second);
1003	}
1004	return (state->expire + timeout);
1005}
1006
1007void
1008pf_purge_expired_src_nodes(int waslocked)
1009{
1010	 struct pf_src_node		*cur, *next;
1011	 int				 locked = waslocked;
1012
1013	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1014		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1015
1016		 if (cur->states <= 0 && cur->expire <= time_second) {
1017			 if (! locked) {
1018				 rw_enter_write(&pf_consistency_lock);
1019			 	 next = RB_NEXT(pf_src_tree,
1020				     &tree_src_tracking, cur);
1021				 locked = 1;
1022			 }
1023			 if (cur->rule.ptr != NULL) {
1024				 cur->rule.ptr->src_nodes--;
1025				 if (cur->rule.ptr->states <= 0 &&
1026				     cur->rule.ptr->max_src_nodes <= 0)
1027					 pf_rm_rule(NULL, cur->rule.ptr);
1028			 }
1029			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1030			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1031			 pf_status.src_nodes--;
1032			 pool_put(&pf_src_tree_pl, cur);
1033		 }
1034	 }
1035
1036	 if (locked && !waslocked)
1037		rw_exit_write(&pf_consistency_lock);
1038}
1039
1040void
1041pf_src_tree_remove_state(struct pf_state *s)
1042{
1043	u_int32_t timeout;
1044
1045	if (s->src_node != NULL) {
1046		if (s->src.tcp_est)
1047			--s->src_node->conn;
1048		if (--s->src_node->states <= 0) {
1049			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1050			if (!timeout)
1051				timeout =
1052				    pf_default_rule.timeout[PFTM_SRC_NODE];
1053			s->src_node->expire = time_second + timeout;
1054		}
1055	}
1056	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1057		if (--s->nat_src_node->states <= 0) {
1058			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1059			if (!timeout)
1060				timeout =
1061				    pf_default_rule.timeout[PFTM_SRC_NODE];
1062			s->nat_src_node->expire = time_second + timeout;
1063		}
1064	}
1065	s->src_node = s->nat_src_node = NULL;
1066}
1067
1068/* callers should be at splsoftnet */
1069void
1070pf_unlink_state(struct pf_state *cur)
1071{
1072	if (cur->src.state == PF_TCPS_PROXY_DST) {
1073		pf_send_tcp(cur->rule.ptr, cur->state_key->af,
1074		    &cur->state_key->ext.addr, &cur->state_key->lan.addr,
1075		    cur->state_key->ext.port, cur->state_key->lan.port,
1076		    cur->src.seqhi, cur->src.seqlo + 1,
1077		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1078	}
1079	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1080#if NPFSYNC
1081	if (cur->creatorid == pf_status.hostid)
1082		pfsync_delete_state(cur);
1083#endif
1084	cur->timeout = PFTM_UNLINKED;
1085	pf_src_tree_remove_state(cur);
1086	pf_detach_state(cur, 0);
1087}
1088
1089/* callers should be at splsoftnet and hold the
1090 * write_lock on pf_consistency_lock */
1091void
1092pf_free_state(struct pf_state *cur)
1093{
1094#if NPFSYNC
1095	if (pfsyncif != NULL &&
1096	    (pfsyncif->sc_bulk_send_next == cur ||
1097	    pfsyncif->sc_bulk_terminator == cur))
1098		return;
1099#endif
1100	KASSERT(cur->timeout == PFTM_UNLINKED);
1101	if (--cur->rule.ptr->states <= 0 &&
1102	    cur->rule.ptr->src_nodes <= 0)
1103		pf_rm_rule(NULL, cur->rule.ptr);
1104	if (cur->nat_rule.ptr != NULL)
1105		if (--cur->nat_rule.ptr->states <= 0 &&
1106			cur->nat_rule.ptr->src_nodes <= 0)
1107			pf_rm_rule(NULL, cur->nat_rule.ptr);
1108	if (cur->anchor.ptr != NULL)
1109		if (--cur->anchor.ptr->states <= 0)
1110			pf_rm_rule(NULL, cur->anchor.ptr);
1111	pf_normalize_tcp_cleanup(cur);
1112	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1113	TAILQ_REMOVE(&state_list, cur, entry_list);
1114	if (cur->tag)
1115		pf_tag_unref(cur->tag);
1116	pool_put(&pf_state_pl, cur);
1117	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1118	pf_status.states--;
1119}
1120
1121void
1122pf_purge_expired_states(u_int32_t maxcheck)
1123{
1124	static struct pf_state	*cur = NULL;
1125	struct pf_state		*next;
1126	int 			 locked = 0;
1127
1128	while (maxcheck--) {
1129		/* wrap to start of list when we hit the end */
1130		if (cur == NULL) {
1131			cur = TAILQ_FIRST(&state_list);
1132			if (cur == NULL)
1133				break;	/* list empty */
1134		}
1135
1136		/* get next state, as cur may get deleted */
1137		next = TAILQ_NEXT(cur, entry_list);
1138
1139		if (cur->timeout == PFTM_UNLINKED) {
1140			/* free unlinked state */
1141			if (! locked) {
1142				rw_enter_write(&pf_consistency_lock);
1143				locked = 1;
1144			}
1145			pf_free_state(cur);
1146		} else if (pf_state_expires(cur) <= time_second) {
1147			/* unlink and free expired state */
1148			pf_unlink_state(cur);
1149			if (! locked) {
1150				rw_enter_write(&pf_consistency_lock);
1151				locked = 1;
1152			}
1153			pf_free_state(cur);
1154		}
1155		cur = next;
1156	}
1157
1158	if (locked)
1159		rw_exit_write(&pf_consistency_lock);
1160}
1161
1162int
1163pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1164{
1165	if (aw->type != PF_ADDR_TABLE)
1166		return (0);
1167	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1168		return (1);
1169	return (0);
1170}
1171
1172void
1173pf_tbladdr_remove(struct pf_addr_wrap *aw)
1174{
1175	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1176		return;
1177	pfr_detach_table(aw->p.tbl);
1178	aw->p.tbl = NULL;
1179}
1180
1181void
1182pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1183{
1184	struct pfr_ktable *kt = aw->p.tbl;
1185
1186	if (aw->type != PF_ADDR_TABLE || kt == NULL)
1187		return;
1188	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1189		kt = kt->pfrkt_root;
1190	aw->p.tbl = NULL;
1191	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1192		kt->pfrkt_cnt : -1;
1193}
1194
1195void
1196pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1197{
1198	switch (af) {
1199#ifdef INET
1200	case AF_INET: {
1201		u_int32_t a = ntohl(addr->addr32[0]);
1202		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1203		    (a>>8)&255, a&255);
1204		if (p) {
1205			p = ntohs(p);
1206			printf(":%u", p);
1207		}
1208		break;
1209	}
1210#endif /* INET */
1211#ifdef INET6
1212	case AF_INET6: {
1213		u_int16_t b;
1214		u_int8_t i, curstart = 255, curend = 0,
1215		    maxstart = 0, maxend = 0;
1216		for (i = 0; i < 8; i++) {
1217			if (!addr->addr16[i]) {
1218				if (curstart == 255)
1219					curstart = i;
1220				else
1221					curend = i;
1222			} else {
1223				if (curstart) {
1224					if ((curend - curstart) >
1225					    (maxend - maxstart)) {
1226						maxstart = curstart;
1227						maxend = curend;
1228						curstart = 255;
1229					}
1230				}
1231			}
1232		}
1233		for (i = 0; i < 8; i++) {
1234			if (i >= maxstart && i <= maxend) {
1235				if (maxend != 7) {
1236					if (i == maxstart)
1237						printf(":");
1238				} else {
1239					if (i == maxend)
1240						printf(":");
1241				}
1242			} else {
1243				b = ntohs(addr->addr16[i]);
1244				printf("%x", b);
1245				if (i < 7)
1246					printf(":");
1247			}
1248		}
1249		if (p) {
1250			p = ntohs(p);
1251			printf("[%u]", p);
1252		}
1253		break;
1254	}
1255#endif /* INET6 */
1256	}
1257}
1258
1259void
1260pf_print_state(struct pf_state *s)
1261{
1262	struct pf_state_key *sk = s->state_key;
1263	switch (sk->proto) {
1264	case IPPROTO_TCP:
1265		printf("TCP ");
1266		break;
1267	case IPPROTO_UDP:
1268		printf("UDP ");
1269		break;
1270	case IPPROTO_ICMP:
1271		printf("ICMP ");
1272		break;
1273	case IPPROTO_ICMPV6:
1274		printf("ICMPV6 ");
1275		break;
1276	default:
1277		printf("%u ", sk->proto);
1278		break;
1279	}
1280	pf_print_host(&sk->lan.addr, sk->lan.port, sk->af);
1281	printf(" ");
1282	pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af);
1283	printf(" ");
1284	pf_print_host(&sk->ext.addr, sk->ext.port, sk->af);
1285	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1286	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1287	if (s->src.wscale && s->dst.wscale)
1288		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1289	printf("]");
1290	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1291	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1292	if (s->src.wscale && s->dst.wscale)
1293		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1294	printf("]");
1295	printf(" %u:%u", s->src.state, s->dst.state);
1296}
1297
1298void
1299pf_print_flags(u_int8_t f)
1300{
1301	if (f)
1302		printf(" ");
1303	if (f & TH_FIN)
1304		printf("F");
1305	if (f & TH_SYN)
1306		printf("S");
1307	if (f & TH_RST)
1308		printf("R");
1309	if (f & TH_PUSH)
1310		printf("P");
1311	if (f & TH_ACK)
1312		printf("A");
1313	if (f & TH_URG)
1314		printf("U");
1315	if (f & TH_ECE)
1316		printf("E");
1317	if (f & TH_CWR)
1318		printf("W");
1319}
1320
1321#define	PF_SET_SKIP_STEPS(i)					\
1322	do {							\
1323		while (head[i] != cur) {			\
1324			head[i]->skip[i].ptr = cur;		\
1325			head[i] = TAILQ_NEXT(head[i], entries);	\
1326		}						\
1327	} while (0)
1328
1329void
1330pf_calc_skip_steps(struct pf_rulequeue *rules)
1331{
1332	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1333	int i;
1334
1335	cur = TAILQ_FIRST(rules);
1336	prev = cur;
1337	for (i = 0; i < PF_SKIP_COUNT; ++i)
1338		head[i] = cur;
1339	while (cur != NULL) {
1340
1341		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1342			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1343		if (cur->direction != prev->direction)
1344			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1345		if (cur->af != prev->af)
1346			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1347		if (cur->proto != prev->proto)
1348			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1349		if (cur->src.neg != prev->src.neg ||
1350		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1351			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1352		if (cur->src.port[0] != prev->src.port[0] ||
1353		    cur->src.port[1] != prev->src.port[1] ||
1354		    cur->src.port_op != prev->src.port_op)
1355			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1356		if (cur->dst.neg != prev->dst.neg ||
1357		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1358			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1359		if (cur->dst.port[0] != prev->dst.port[0] ||
1360		    cur->dst.port[1] != prev->dst.port[1] ||
1361		    cur->dst.port_op != prev->dst.port_op)
1362			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1363
1364		prev = cur;
1365		cur = TAILQ_NEXT(cur, entries);
1366	}
1367	for (i = 0; i < PF_SKIP_COUNT; ++i)
1368		PF_SET_SKIP_STEPS(i);
1369}
1370
1371int
1372pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1373{
1374	if (aw1->type != aw2->type)
1375		return (1);
1376	switch (aw1->type) {
1377	case PF_ADDR_ADDRMASK:
1378		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1379			return (1);
1380		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1381			return (1);
1382		return (0);
1383	case PF_ADDR_DYNIFTL:
1384		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1385	case PF_ADDR_NOROUTE:
1386	case PF_ADDR_URPFFAILED:
1387		return (0);
1388	case PF_ADDR_TABLE:
1389		return (aw1->p.tbl != aw2->p.tbl);
1390	case PF_ADDR_RTLABEL:
1391		return (aw1->v.rtlabel != aw2->v.rtlabel);
1392	default:
1393		printf("invalid address type: %d\n", aw1->type);
1394		return (1);
1395	}
1396}
1397
1398u_int16_t
1399pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1400{
1401	u_int32_t	l;
1402
1403	if (udp && !cksum)
1404		return (0x0000);
1405	l = cksum + old - new;
1406	l = (l >> 16) + (l & 65535);
1407	l = l & 65535;
1408	if (udp && !l)
1409		return (0xFFFF);
1410	return (l);
1411}
1412
1413void
1414pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1415    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1416{
1417	struct pf_addr	ao;
1418	u_int16_t	po = *p;
1419
1420	PF_ACPY(&ao, a, af);
1421	PF_ACPY(a, an, af);
1422
1423	*p = pn;
1424
1425	switch (af) {
1426#ifdef INET
1427	case AF_INET:
1428		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1429		    ao.addr16[0], an->addr16[0], 0),
1430		    ao.addr16[1], an->addr16[1], 0);
1431		*p = pn;
1432		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1433		    ao.addr16[0], an->addr16[0], u),
1434		    ao.addr16[1], an->addr16[1], u),
1435		    po, pn, u);
1436		break;
1437#endif /* INET */
1438#ifdef INET6
1439	case AF_INET6:
1440		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1441		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1442		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1443		    ao.addr16[0], an->addr16[0], u),
1444		    ao.addr16[1], an->addr16[1], u),
1445		    ao.addr16[2], an->addr16[2], u),
1446		    ao.addr16[3], an->addr16[3], u),
1447		    ao.addr16[4], an->addr16[4], u),
1448		    ao.addr16[5], an->addr16[5], u),
1449		    ao.addr16[6], an->addr16[6], u),
1450		    ao.addr16[7], an->addr16[7], u),
1451		    po, pn, u);
1452		break;
1453#endif /* INET6 */
1454	}
1455}
1456
1457
1458/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1459void
1460pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1461{
1462	u_int32_t	ao;
1463
1464	memcpy(&ao, a, sizeof(ao));
1465	memcpy(a, &an, sizeof(u_int32_t));
1466	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1467	    ao % 65536, an % 65536, u);
1468}
1469
1470#ifdef INET6
1471void
1472pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1473{
1474	struct pf_addr	ao;
1475
1476	PF_ACPY(&ao, a, AF_INET6);
1477	PF_ACPY(a, an, AF_INET6);
1478
1479	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1480	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1481	    pf_cksum_fixup(pf_cksum_fixup(*c,
1482	    ao.addr16[0], an->addr16[0], u),
1483	    ao.addr16[1], an->addr16[1], u),
1484	    ao.addr16[2], an->addr16[2], u),
1485	    ao.addr16[3], an->addr16[3], u),
1486	    ao.addr16[4], an->addr16[4], u),
1487	    ao.addr16[5], an->addr16[5], u),
1488	    ao.addr16[6], an->addr16[6], u),
1489	    ao.addr16[7], an->addr16[7], u);
1490}
1491#endif /* INET6 */
1492
1493void
1494pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1495    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1496    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1497{
1498	struct pf_addr	oia, ooa;
1499
1500	PF_ACPY(&oia, ia, af);
1501	PF_ACPY(&ooa, oa, af);
1502
1503	/* Change inner protocol port, fix inner protocol checksum. */
1504	if (ip != NULL) {
1505		u_int16_t	oip = *ip;
1506		u_int32_t	opc = 0;
1507
1508		if (pc != NULL)
1509			opc = *pc;
1510		*ip = np;
1511		if (pc != NULL)
1512			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1513		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1514		if (pc != NULL)
1515			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1516	}
1517	/* Change inner ip address, fix inner ip and icmp checksums. */
1518	PF_ACPY(ia, na, af);
1519	switch (af) {
1520#ifdef INET
1521	case AF_INET: {
1522		u_int32_t	 oh2c = *h2c;
1523
1524		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1525		    oia.addr16[0], ia->addr16[0], 0),
1526		    oia.addr16[1], ia->addr16[1], 0);
1527		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1528		    oia.addr16[0], ia->addr16[0], 0),
1529		    oia.addr16[1], ia->addr16[1], 0);
1530		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1531		break;
1532	}
1533#endif /* INET */
1534#ifdef INET6
1535	case AF_INET6:
1536		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1537		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1538		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1539		    oia.addr16[0], ia->addr16[0], u),
1540		    oia.addr16[1], ia->addr16[1], u),
1541		    oia.addr16[2], ia->addr16[2], u),
1542		    oia.addr16[3], ia->addr16[3], u),
1543		    oia.addr16[4], ia->addr16[4], u),
1544		    oia.addr16[5], ia->addr16[5], u),
1545		    oia.addr16[6], ia->addr16[6], u),
1546		    oia.addr16[7], ia->addr16[7], u);
1547		break;
1548#endif /* INET6 */
1549	}
1550	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1551	PF_ACPY(oa, na, af);
1552	switch (af) {
1553#ifdef INET
1554	case AF_INET:
1555		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1556		    ooa.addr16[0], oa->addr16[0], 0),
1557		    ooa.addr16[1], oa->addr16[1], 0);
1558		break;
1559#endif /* INET */
1560#ifdef INET6
1561	case AF_INET6:
1562		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1563		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1564		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1565		    ooa.addr16[0], oa->addr16[0], u),
1566		    ooa.addr16[1], oa->addr16[1], u),
1567		    ooa.addr16[2], oa->addr16[2], u),
1568		    ooa.addr16[3], oa->addr16[3], u),
1569		    ooa.addr16[4], oa->addr16[4], u),
1570		    ooa.addr16[5], oa->addr16[5], u),
1571		    ooa.addr16[6], oa->addr16[6], u),
1572		    ooa.addr16[7], oa->addr16[7], u);
1573		break;
1574#endif /* INET6 */
1575	}
1576}
1577
1578
1579/*
1580 * Need to modulate the sequence numbers in the TCP SACK option
1581 * (credits to Krzysztof Pfaff for report and patch)
1582 */
1583int
1584pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1585    struct tcphdr *th, struct pf_state_peer *dst)
1586{
1587	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1588	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
1589	int copyback = 0, i, olen;
1590	struct sackblk sack;
1591
1592#ifdef __NetBSD__
1593#define	TCPOLEN_SACK		8		/* 2*sizeof(tcp_seq) */
1594#endif
1595
1596#define TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
1597	if (hlen < TCPOLEN_SACKLEN ||
1598	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1599		return 0;
1600
1601	while (hlen >= TCPOLEN_SACKLEN) {
1602		olen = opt[1];
1603		switch (*opt) {
1604		case TCPOPT_EOL:	/* FALLTHROUGH */
1605		case TCPOPT_NOP:
1606			opt++;
1607			hlen--;
1608			break;
1609		case TCPOPT_SACK:
1610			if (olen > hlen)
1611				olen = hlen;
1612			if (olen >= TCPOLEN_SACKLEN) {
1613				for (i = 2; i + TCPOLEN_SACK <= olen;
1614				    i += TCPOLEN_SACK) {
1615					memcpy(&sack, &opt[i], sizeof(sack));
1616#ifdef __NetBSD__
1617#define	SACK_START	sack.left
1618#define	SACK_END	sack.right
1619#else
1620#define	SACK_START	sack.start
1621#define	SACK_END	sack.end
1622#endif
1623					pf_change_a(&SACK_START, &th->th_sum,
1624					    htonl(ntohl(SACK_START) -
1625					    dst->seqdiff), 0);
1626					pf_change_a(&SACK_END, &th->th_sum,
1627					    htonl(ntohl(SACK_END) -
1628					    dst->seqdiff), 0);
1629#undef SACK_START
1630#undef SACK_END
1631					memcpy(&opt[i], &sack, sizeof(sack));
1632				}
1633				copyback = 1;
1634			}
1635			/* FALLTHROUGH */
1636		default:
1637			if (olen < 2)
1638				olen = 2;
1639			hlen -= olen;
1640			opt += olen;
1641		}
1642	}
1643
1644	if (copyback)
1645		m_copyback(m, off + sizeof(*th), thoptlen, opts);
1646	return (copyback);
1647}
1648
1649void
1650pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1651    const struct pf_addr *saddr, const struct pf_addr *daddr,
1652    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1653    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1654    u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
1655{
1656	struct mbuf	*m;
1657	int		 len, tlen;
1658#ifdef INET
1659	struct ip	*h = NULL;
1660#endif /* INET */
1661#ifdef INET6
1662	struct ip6_hdr	*h6 = NULL;
1663#endif /* INET6 */
1664	struct tcphdr	*th;
1665	char		*opt;
1666#ifdef __NetBSD__
1667	struct pf_mtag	*pf_mtag;
1668#endif /* __NetBSD__ */
1669
1670	/* maximum segment size tcp option */
1671	tlen = sizeof(struct tcphdr);
1672	if (mss)
1673		tlen += 4;
1674
1675	switch (af) {
1676#ifdef INET
1677	case AF_INET:
1678		len = sizeof(struct ip) + tlen;
1679		break;
1680#endif /* INET */
1681#ifdef INET6
1682	case AF_INET6:
1683		len = sizeof(struct ip6_hdr) + tlen;
1684		break;
1685#endif /* INET6 */
1686	default:
1687		return;
1688	}
1689
1690	/* create outgoing mbuf */
1691	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1692	if (m == NULL)
1693		return;
1694#ifdef __NetBSD__
1695	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
1696		m_freem(m);
1697		return;
1698	}
1699	if (tag)
1700		pf_mtag->flags |= PF_TAG_GENERATED;
1701	pf_mtag->tag = rtag;
1702
1703	if (r != NULL && r->rtableid >= 0)
1704		pf_mtag->rtableid = r->rtableid;
1705#else
1706	if (tag)
1707		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1708	m->m_pkthdr.pf.tag = rtag;
1709
1710	if (r != NULL && r->rtableid >= 0)
1711		m->m_pkthdr.pf.rtableid = r->rtableid;
1712#endif /* !__NetBSD__ */
1713
1714#ifdef ALTQ
1715	if (r != NULL && r->qid) {
1716#ifdef __NetBSD__
1717		struct m_tag	*mtag;
1718		struct altq_tag	*atag;
1719
1720		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
1721		if (mtag != NULL) {
1722			atag = (struct altq_tag *)(mtag + 1);
1723			atag->qid = r->qid;
1724			/* add hints for ecn */
1725			atag->af = af;
1726			atag->hdr = mtod(m, struct ip *);
1727			m_tag_prepend(m, mtag);
1728		}
1729#else
1730		m->m_pkthdr.pf.qid = r->qid;
1731		/* add hints for ecn */
1732		m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
1733#endif /* !__NetBSD__ */
1734	}
1735#endif /* ALTQ */
1736	m->m_data += max_linkhdr;
1737	m->m_pkthdr.len = m->m_len = len;
1738	m_reset_rcvif(m);
1739	bzero(m->m_data, len);
1740	switch (af) {
1741#ifdef INET
1742	case AF_INET:
1743		h = mtod(m, struct ip *);
1744
1745		/* IP header fields included in the TCP checksum */
1746		h->ip_p = IPPROTO_TCP;
1747		h->ip_len = htons(tlen);
1748		h->ip_src.s_addr = saddr->v4.s_addr;
1749		h->ip_dst.s_addr = daddr->v4.s_addr;
1750
1751		th = (struct tcphdr *)((char *)h + sizeof(struct ip));
1752		break;
1753#endif /* INET */
1754#ifdef INET6
1755	case AF_INET6:
1756		h6 = mtod(m, struct ip6_hdr *);
1757
1758		/* IP header fields included in the TCP checksum */
1759		h6->ip6_nxt = IPPROTO_TCP;
1760		h6->ip6_plen = htons(tlen);
1761		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1762		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1763
1764		th = (struct tcphdr *)((char *)h6 + sizeof(struct ip6_hdr));
1765		break;
1766#endif /* INET6 */
1767	default:
1768		m_freem(m);
1769		return;
1770	}
1771
1772	/* TCP header */
1773	th->th_sport = sport;
1774	th->th_dport = dport;
1775	th->th_seq = htonl(seq);
1776	th->th_ack = htonl(ack);
1777	th->th_off = tlen >> 2;
1778	th->th_flags = flags;
1779	th->th_win = htons(win);
1780
1781	if (mss) {
1782		opt = (char *)(th + 1);
1783		opt[0] = TCPOPT_MAXSEG;
1784		opt[1] = 4;
1785		HTONS(mss);
1786		bcopy((void *)&mss, (void *)(opt + 2), 2);
1787	}
1788
1789	switch (af) {
1790#ifdef INET
1791	case AF_INET:
1792		/* TCP checksum */
1793		th->th_sum = in_cksum(m, len);
1794
1795		/* Finish the IP header */
1796		h->ip_v = 4;
1797		h->ip_hl = sizeof(*h) >> 2;
1798		h->ip_tos = IPTOS_LOWDELAY;
1799		h->ip_len = htons(len);
1800		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1801		h->ip_ttl = ttl ? ttl : ip_defttl;
1802		h->ip_sum = 0;
1803		if (eh == NULL) {
1804			ip_output(m, (void *)NULL, (void *)NULL, 0,
1805			    (void *)NULL, (void *)NULL);
1806		} else {
1807#ifdef __NetBSD__
1808			/*
1809			 * On netbsd, pf_test and pf_test6 are always called
1810			 * with eh == NULL.
1811			 */
1812			panic("pf_send_tcp: eh != NULL");
1813#else
1814			struct route		 ro;
1815			struct rtentry		 rt;
1816			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
1817
1818			if (ifp == NULL) {
1819				m_freem(m);
1820				return;
1821			}
1822			rt.rt_ifp = ifp;
1823			ro.ro_rt = &rt;
1824			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1825			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1826			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1827			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1828			e->ether_type = eh->ether_type;
1829			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
1830			    (void *)NULL, (void *)NULL);
1831#endif /* !__NetBSD__ */
1832		}
1833		break;
1834#endif /* INET */
1835#ifdef INET6
1836	case AF_INET6:
1837		/* TCP checksum */
1838		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1839		    sizeof(struct ip6_hdr), tlen);
1840
1841		h6->ip6_vfc |= IPV6_VERSION;
1842		h6->ip6_hlim = IPV6_DEFHLIM;
1843
1844		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1845		break;
1846#endif /* INET6 */
1847	}
1848}
1849
1850void
1851pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1852    struct pf_rule *r)
1853{
1854	struct mbuf	*m0;
1855#ifdef __NetBSD__
1856	struct pf_mtag	*pf_mtag;
1857#endif /* __NetBSD__ */
1858
1859#ifdef __NetBSD__
1860	m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1861#else
1862	m0 = m_copy(m, 0, M_COPYALL);
1863#endif
1864
1865#ifdef __NetBSD__
1866	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
1867		return;
1868	pf_mtag->flags |= PF_TAG_GENERATED;
1869
1870	if (r->rtableid >= 0)
1871		pf_mtag->rtableid = r->rtableid;
1872#else
1873	m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1874
1875	if (r->rtableid >= 0)
1876		m0->m_pkthdr.pf.rtableid = r->rtableid;
1877#endif /* !__NetBSD__ */
1878
1879#ifdef ALTQ
1880	if (r->qid) {
1881#ifdef __NetBSD__
1882		struct m_tag	*mtag;
1883		struct altq_tag	*atag;
1884
1885		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
1886		if (mtag != NULL) {
1887			atag = (struct altq_tag *)(mtag + 1);
1888			atag->qid = r->qid;
1889			/* add hints for ecn */
1890			atag->af = af;
1891			atag->hdr = mtod(m0, struct ip *);
1892			m_tag_prepend(m0, mtag);
1893		}
1894#else
1895		m0->m_pkthdr.pf.qid = r->qid;
1896		/* add hints for ecn */
1897		m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
1898#endif /* !__NetBSD__ */
1899	}
1900#endif /* ALTQ */
1901
1902	switch (af) {
1903#ifdef INET
1904	case AF_INET:
1905		icmp_error(m0, type, code, 0, 0);
1906		break;
1907#endif /* INET */
1908#ifdef INET6
1909	case AF_INET6:
1910		icmp6_error(m0, type, code, 0);
1911		break;
1912#endif /* INET6 */
1913	}
1914}
1915
1916/*
1917 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1918 * If n is 0, they match if they are equal. If n is != 0, they match if they
1919 * are different.
1920 */
1921int
1922pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1923    struct pf_addr *b, sa_family_t af)
1924{
1925	int	match = 0;
1926
1927	switch (af) {
1928#ifdef INET
1929	case AF_INET:
1930		if ((a->addr32[0] & m->addr32[0]) ==
1931		    (b->addr32[0] & m->addr32[0]))
1932			match++;
1933		break;
1934#endif /* INET */
1935#ifdef INET6
1936	case AF_INET6:
1937		if (((a->addr32[0] & m->addr32[0]) ==
1938		     (b->addr32[0] & m->addr32[0])) &&
1939		    ((a->addr32[1] & m->addr32[1]) ==
1940		     (b->addr32[1] & m->addr32[1])) &&
1941		    ((a->addr32[2] & m->addr32[2]) ==
1942		     (b->addr32[2] & m->addr32[2])) &&
1943		    ((a->addr32[3] & m->addr32[3]) ==
1944		     (b->addr32[3] & m->addr32[3])))
1945			match++;
1946		break;
1947#endif /* INET6 */
1948	}
1949	if (match) {
1950		if (n)
1951			return (0);
1952		else
1953			return (1);
1954	} else {
1955		if (n)
1956			return (1);
1957		else
1958			return (0);
1959	}
1960}
1961
1962int
1963pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1964{
1965	switch (op) {
1966	case PF_OP_IRG:
1967		return ((p > a1) && (p < a2));
1968	case PF_OP_XRG:
1969		return ((p < a1) || (p > a2));
1970	case PF_OP_RRG:
1971		return ((p >= a1) && (p <= a2));
1972	case PF_OP_EQ:
1973		return (p == a1);
1974	case PF_OP_NE:
1975		return (p != a1);
1976	case PF_OP_LT:
1977		return (p < a1);
1978	case PF_OP_LE:
1979		return (p <= a1);
1980	case PF_OP_GT:
1981		return (p > a1);
1982	case PF_OP_GE:
1983		return (p >= a1);
1984	}
1985	return (0); /* never reached */
1986}
1987
1988int
1989pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1990{
1991	NTOHS(a1);
1992	NTOHS(a2);
1993	NTOHS(p);
1994	return (pf_match(op, a1, a2, p));
1995}
1996
1997int
1998pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1999{
2000	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2001		return (0);
2002	return (pf_match(op, a1, a2, u));
2003}
2004
2005int
2006pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2007{
2008	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2009		return (0);
2010	return (pf_match(op, a1, a2, g));
2011}
2012
2013int
2014pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
2015{
2016#ifdef __NetBSD__
2017	if (*tag == -1) {
2018		struct pf_mtag *pf_mtag = pf_get_mtag(m);
2019		if (pf_mtag == NULL)
2020			return (0);
2021
2022		*tag = pf_mtag->tag;
2023	}
2024#else
2025	if (*tag == -1)
2026		*tag = m->m_pkthdr.pf.tag;
2027#endif /* !__NetBSD__ */
2028
2029	return ((!r->match_tag_not && r->match_tag == *tag) ||
2030	    (r->match_tag_not && r->match_tag != *tag));
2031}
2032
2033int
2034pf_tag_packet(struct mbuf *m, int tag, int rtableid)
2035{
2036	if (tag <= 0 && rtableid < 0)
2037		return (0);
2038
2039#ifdef __NetBSD__
2040	if (tag > 0 || rtableid > 0) {
2041		struct pf_mtag *pf_mtag = pf_get_mtag(m);
2042		if (pf_mtag == NULL)
2043			return (1);
2044
2045		if (tag > 0)
2046			pf_mtag->tag = tag;
2047		if (rtableid > 0)
2048			pf_mtag->rtableid = rtableid;
2049	}
2050#else
2051	if (tag > 0)
2052		m->m_pkthdr.pf.tag = tag;
2053	if (rtableid >= 0)
2054		m->m_pkthdr.pf.rtableid = rtableid;
2055#endif /* !__NetBSD__ */
2056
2057	return (0);
2058}
2059
2060void
2061pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2062    struct pf_rule **r, struct pf_rule **a,  int *match)
2063{
2064	struct pf_anchor_stackframe	*f;
2065
2066	(*r)->anchor->match = 0;
2067	if (match)
2068		*match = 0;
2069	if (*depth >= sizeof(pf_anchor_stack) /
2070	    sizeof(pf_anchor_stack[0])) {
2071		printf("pf_step_into_anchor: stack overflow\n");
2072		*r = TAILQ_NEXT(*r, entries);
2073		return;
2074	} else if (*depth == 0 && a != NULL)
2075		*a = *r;
2076	f = pf_anchor_stack + (*depth)++;
2077	f->rs = *rs;
2078	f->r = *r;
2079	if ((*r)->anchor_wildcard) {
2080		f->parent = &(*r)->anchor->children;
2081		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2082		    NULL) {
2083			*r = NULL;
2084			return;
2085		}
2086		*rs = &f->child->ruleset;
2087	} else {
2088		f->parent = NULL;
2089		f->child = NULL;
2090		*rs = &(*r)->anchor->ruleset;
2091	}
2092	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2093}
2094
2095int
2096pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2097    struct pf_rule **r, struct pf_rule **a, int *match)
2098{
2099	struct pf_anchor_stackframe	*f;
2100	int quick = 0;
2101
2102	do {
2103		if (*depth <= 0)
2104			break;
2105		f = pf_anchor_stack + *depth - 1;
2106		if (f->parent != NULL && f->child != NULL) {
2107			if (f->child->match ||
2108			    (match != NULL && *match)) {
2109				f->r->anchor->match = 1;
2110				*match = 0;
2111			}
2112			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2113			if (f->child != NULL) {
2114				*rs = &f->child->ruleset;
2115				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2116				if (*r == NULL)
2117					continue;
2118				else
2119					break;
2120			}
2121		}
2122		(*depth)--;
2123		if (*depth == 0 && a != NULL)
2124			*a = NULL;
2125		*rs = f->rs;
2126		if (f->r->anchor->match || (match  != NULL && *match))
2127			quick = f->r->quick;
2128		*r = TAILQ_NEXT(f->r, entries);
2129	} while (*r == NULL);
2130
2131	return (quick);
2132}
2133
2134#ifdef INET6
2135void
2136pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2137    struct pf_addr *rmask, const struct pf_addr *saddr, sa_family_t af)
2138{
2139	switch (af) {
2140#ifdef INET
2141	case AF_INET:
2142		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2143		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2144		break;
2145#endif /* INET */
2146	case AF_INET6:
2147		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2148		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2149		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2150		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2151		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2152		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2153		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2154		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2155		break;
2156	}
2157}
2158
2159void
2160pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2161{
2162	switch (af) {
2163#ifdef INET
2164	case AF_INET:
2165		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2166		break;
2167#endif /* INET */
2168	case AF_INET6:
2169		if (addr->addr32[3] == 0xffffffff) {
2170			addr->addr32[3] = 0;
2171			if (addr->addr32[2] == 0xffffffff) {
2172				addr->addr32[2] = 0;
2173				if (addr->addr32[1] == 0xffffffff) {
2174					addr->addr32[1] = 0;
2175					addr->addr32[0] =
2176					    htonl(ntohl(addr->addr32[0]) + 1);
2177				} else
2178					addr->addr32[1] =
2179					    htonl(ntohl(addr->addr32[1]) + 1);
2180			} else
2181				addr->addr32[2] =
2182				    htonl(ntohl(addr->addr32[2]) + 1);
2183		} else
2184			addr->addr32[3] =
2185			    htonl(ntohl(addr->addr32[3]) + 1);
2186		break;
2187	}
2188}
2189#endif /* INET6 */
2190
2191#define mix(a,b,c) \
2192	do {					\
2193		a -= b; a -= c; a ^= (c >> 13);	\
2194		b -= c; b -= a; b ^= (a << 8);	\
2195		c -= a; c -= b; c ^= (b >> 13);	\
2196		a -= b; a -= c; a ^= (c >> 12);	\
2197		b -= c; b -= a; b ^= (a << 16);	\
2198		c -= a; c -= b; c ^= (b >> 5);	\
2199		a -= b; a -= c; a ^= (c >> 3);	\
2200		b -= c; b -= a; b ^= (a << 10);	\
2201		c -= a; c -= b; c ^= (b >> 15);	\
2202	} while (0)
2203
2204/*
2205 * hash function based on bridge_hash in if_bridge.c
2206 */
2207void
2208pf_hash(const struct pf_addr *inaddr, struct pf_addr *hash,
2209    struct pf_poolhashkey *key, sa_family_t af)
2210{
2211	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2212
2213	switch (af) {
2214#ifdef INET
2215	case AF_INET:
2216		a += inaddr->addr32[0];
2217		b += key->key32[1];
2218		mix(a, b, c);
2219		hash->addr32[0] = c + key->key32[2];
2220		break;
2221#endif /* INET */
2222#ifdef INET6
2223	case AF_INET6:
2224		a += inaddr->addr32[0];
2225		b += inaddr->addr32[2];
2226		mix(a, b, c);
2227		hash->addr32[0] = c;
2228		a += inaddr->addr32[1];
2229		b += inaddr->addr32[3];
2230		c += key->key32[1];
2231		mix(a, b, c);
2232		hash->addr32[1] = c;
2233		a += inaddr->addr32[2];
2234		b += inaddr->addr32[1];
2235		c += key->key32[2];
2236		mix(a, b, c);
2237		hash->addr32[2] = c;
2238		a += inaddr->addr32[3];
2239		b += inaddr->addr32[0];
2240		c += key->key32[3];
2241		mix(a, b, c);
2242		hash->addr32[3] = c;
2243		break;
2244#endif /* INET6 */
2245	}
2246}
2247
2248int
2249pf_map_addr(sa_family_t af, struct pf_rule *r, const struct pf_addr *saddr,
2250    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2251{
2252	unsigned char		 hash[16];
2253	struct pf_pool		*rpool = &r->rpool;
2254	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
2255	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
2256	struct pf_pooladdr	*acur = rpool->cur;
2257	struct pf_src_node	 k;
2258
2259	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2260	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2261		k.af = af;
2262		PF_ACPY(&k.addr, saddr, af);
2263		if (r->rule_flag & PFRULE_RULESRCTRACK ||
2264		    r->rpool.opts & PF_POOL_STICKYADDR)
2265			k.rule.ptr = r;
2266		else
2267			k.rule.ptr = NULL;
2268		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2269		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2270		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2271			PF_ACPY(naddr, &(*sn)->raddr, af);
2272			if (pf_status.debug >= PF_DEBUG_MISC) {
2273				printf("pf_map_addr: src tracking maps ");
2274				pf_print_host(&k.addr, 0, af);
2275				printf(" to ");
2276				pf_print_host(naddr, 0, af);
2277				printf("\n");
2278			}
2279			return (0);
2280		}
2281	}
2282
2283	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2284		return (1);
2285	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2286		switch (af) {
2287#ifdef INET
2288		case AF_INET:
2289			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2290			    (rpool->opts & PF_POOL_TYPEMASK) !=
2291			    PF_POOL_ROUNDROBIN)
2292				return (1);
2293			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2294			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
2295			break;
2296#endif /* INET */
2297#ifdef INET6
2298		case AF_INET6:
2299			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2300			    (rpool->opts & PF_POOL_TYPEMASK) !=
2301			    PF_POOL_ROUNDROBIN)
2302				return (1);
2303			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2304			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
2305			break;
2306#endif /* INET6 */
2307		}
2308	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2309		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2310			return (1); /* unsupported */
2311	} else {
2312		raddr = &rpool->cur->addr.v.a.addr;
2313		rmask = &rpool->cur->addr.v.a.mask;
2314	}
2315
2316	switch (rpool->opts & PF_POOL_TYPEMASK) {
2317	case PF_POOL_NONE:
2318		PF_ACPY(naddr, raddr, af);
2319		break;
2320	case PF_POOL_BITMASK:
2321		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2322		break;
2323	case PF_POOL_RANDOM:
2324		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2325			switch (af) {
2326#ifdef INET
2327			case AF_INET:
2328				rpool->counter.addr32[0] =
2329				    htonl(cprng_fast32());
2330				break;
2331#endif /* INET */
2332#ifdef INET6
2333			case AF_INET6:
2334				if (rmask->addr32[3] != 0xffffffff)
2335					rpool->counter.addr32[3] =
2336					    htonl(cprng_fast32());
2337				else
2338					break;
2339				if (rmask->addr32[2] != 0xffffffff)
2340					rpool->counter.addr32[2] =
2341					    htonl(cprng_fast32());
2342				else
2343					break;
2344				if (rmask->addr32[1] != 0xffffffff)
2345					rpool->counter.addr32[1] =
2346					    htonl(cprng_fast32());
2347				else
2348					break;
2349				if (rmask->addr32[0] != 0xffffffff)
2350					rpool->counter.addr32[0] =
2351					    htonl(cprng_fast32());
2352				break;
2353#endif /* INET6 */
2354			}
2355			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2356			PF_ACPY(init_addr, naddr, af);
2357
2358		} else {
2359			PF_AINC(&rpool->counter, af);
2360			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2361		}
2362		break;
2363	case PF_POOL_SRCHASH:
2364		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2365		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2366		break;
2367	case PF_POOL_ROUNDROBIN:
2368		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2369			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2370			    &rpool->tblidx, &rpool->counter,
2371			    &raddr, &rmask, af))
2372				goto get_addr;
2373		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2374			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2375			    &rpool->tblidx, &rpool->counter,
2376			    &raddr, &rmask, af))
2377				goto get_addr;
2378		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2379			goto get_addr;
2380
2381	try_next:
2382		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2383			rpool->cur = TAILQ_FIRST(&rpool->list);
2384		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2385			rpool->tblidx = -1;
2386			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2387			    &rpool->tblidx, &rpool->counter,
2388			    &raddr, &rmask, af)) {
2389				/* table contains no address of type 'af' */
2390				if (rpool->cur != acur)
2391					goto try_next;
2392				return (1);
2393			}
2394		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2395			rpool->tblidx = -1;
2396			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2397			    &rpool->tblidx, &rpool->counter,
2398			    &raddr, &rmask, af)) {
2399				/* table contains no address of type 'af' */
2400				if (rpool->cur != acur)
2401					goto try_next;
2402				return (1);
2403			}
2404		} else {
2405			raddr = &rpool->cur->addr.v.a.addr;
2406			rmask = &rpool->cur->addr.v.a.mask;
2407			PF_ACPY(&rpool->counter, raddr, af);
2408		}
2409
2410	get_addr:
2411		PF_ACPY(naddr, &rpool->counter, af);
2412		if (init_addr != NULL && PF_AZERO(init_addr, af))
2413			PF_ACPY(init_addr, naddr, af);
2414		PF_AINC(&rpool->counter, af);
2415		break;
2416	}
2417	if (*sn != NULL)
2418		PF_ACPY(&(*sn)->raddr, naddr, af);
2419
2420	if (pf_status.debug >= PF_DEBUG_MISC &&
2421	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2422		printf("pf_map_addr: selected address ");
2423		pf_print_host(naddr, 0, af);
2424		printf("\n");
2425	}
2426
2427	return (0);
2428}
2429
2430int
2431pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2432    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2433    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2434    struct pf_src_node **sn)
2435{
2436	struct pf_state_key_cmp	key;
2437	struct pf_addr		init_addr;
2438	u_int16_t		cut;
2439
2440	bzero(&init_addr, sizeof(init_addr));
2441	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2442		return (1);
2443
2444	if (proto == IPPROTO_ICMP) {
2445		low = 1;
2446		high = 65535;
2447	}
2448
2449	do {
2450		key.af = af;
2451		key.proto = proto;
2452		PF_ACPY(&key.ext.addr, daddr, key.af);
2453		PF_ACPY(&key.gwy.addr, naddr, key.af);
2454		key.ext.port = dport;
2455
2456		/*
2457		 * port search; start random, step;
2458		 * similar 2 portloop in inpcb_bind
2459		 */
2460		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2461		    proto == IPPROTO_ICMP)) {
2462			key.gwy.port = dport;
2463			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2464				return (0);
2465		} else if (low == 0 && high == 0) {
2466			key.gwy.port = *nport;
2467			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2468				return (0);
2469		} else if (low == high) {
2470			key.gwy.port = htons(low);
2471			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2472				*nport = htons(low);
2473				return (0);
2474			}
2475		} else {
2476			u_int16_t tmp;
2477
2478			if (low > high) {
2479				tmp = low;
2480				low = high;
2481				high = tmp;
2482			}
2483			/* low < high */
2484			cut = htonl(cprng_fast32()) % (1 + high - low) + low;
2485			/* low <= cut <= high */
2486			for (tmp = cut; tmp <= high; ++(tmp)) {
2487				key.gwy.port = htons(tmp);
2488				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2489				    NULL) {
2490					*nport = htons(tmp);
2491					return (0);
2492				}
2493			}
2494			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2495				key.gwy.port = htons(tmp);
2496				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2497				    NULL) {
2498					*nport = htons(tmp);
2499					return (0);
2500				}
2501			}
2502		}
2503
2504		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2505		case PF_POOL_RANDOM:
2506		case PF_POOL_ROUNDROBIN:
2507			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2508				return (1);
2509			break;
2510		case PF_POOL_NONE:
2511		case PF_POOL_SRCHASH:
2512		case PF_POOL_BITMASK:
2513		default:
2514			return (1);
2515		}
2516	} while (! PF_AEQ(&init_addr, naddr, af) );
2517
2518	return (1);					/* none available */
2519}
2520
2521struct pf_rule *
2522pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2523    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2524    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2525{
2526	struct pf_rule		*r, *rm = NULL;
2527	struct pf_ruleset	*ruleset = NULL;
2528	int			 tag = -1;
2529	int			 rtableid = -1;
2530	int			 asd = 0;
2531
2532	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2533	while (r && rm == NULL) {
2534		struct pf_rule_addr	*src = NULL, *dst = NULL;
2535		struct pf_addr_wrap	*xdst = NULL;
2536
2537		if (r->action == PF_BINAT && direction == PF_IN) {
2538			src = &r->dst;
2539			if (r->rpool.cur != NULL)
2540				xdst = &r->rpool.cur->addr;
2541		} else {
2542			src = &r->src;
2543			dst = &r->dst;
2544		}
2545
2546		r->evaluations++;
2547		if (pfi_kif_match(r->kif, kif) == r->ifnot)
2548			r = r->skip[PF_SKIP_IFP].ptr;
2549		else if (r->direction && r->direction != direction)
2550			r = r->skip[PF_SKIP_DIR].ptr;
2551		else if (r->af && r->af != pd->af)
2552			r = r->skip[PF_SKIP_AF].ptr;
2553		else if (r->proto && r->proto != pd->proto)
2554			r = r->skip[PF_SKIP_PROTO].ptr;
2555		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2556		    src->neg, kif))
2557			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2558			    PF_SKIP_DST_ADDR].ptr;
2559		else if (src->port_op && !pf_match_port(src->port_op,
2560		    src->port[0], src->port[1], sport))
2561			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2562			    PF_SKIP_DST_PORT].ptr;
2563		else if (dst != NULL &&
2564		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
2565			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2566		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2567		    0, NULL))
2568			r = TAILQ_NEXT(r, entries);
2569		else if (dst != NULL && dst->port_op &&
2570		    !pf_match_port(dst->port_op, dst->port[0],
2571		    dst->port[1], dport))
2572			r = r->skip[PF_SKIP_DST_PORT].ptr;
2573		else if (r->match_tag && !pf_match_tag(m, r, &tag))
2574			r = TAILQ_NEXT(r, entries);
2575		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2576		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2577		    off, pd->hdr.tcp), r->os_fingerprint)))
2578			r = TAILQ_NEXT(r, entries);
2579		else {
2580			if (r->tag)
2581				tag = r->tag;
2582			if (r->rtableid >= 0)
2583				rtableid = r->rtableid;
2584			if (r->anchor == NULL) {
2585				rm = r;
2586			} else
2587				pf_step_into_anchor(&asd, &ruleset, rs_num,
2588				    &r, NULL, NULL);
2589		}
2590		if (r == NULL)
2591			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2592			    NULL, NULL);
2593	}
2594	if (pf_tag_packet(m, tag, rtableid))
2595		return (NULL);
2596	if (rm != NULL && (rm->action == PF_NONAT ||
2597	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2598		return (NULL);
2599	return (rm);
2600}
2601
2602struct pf_rule *
2603pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2604    struct pfi_kif *kif, struct pf_src_node **sn,
2605    struct pf_addr *saddr, u_int16_t sport,
2606    struct pf_addr *daddr, u_int16_t dport,
2607    struct pf_addr *naddr, u_int16_t *nport)
2608{
2609	struct pf_rule	*r = NULL;
2610
2611	if (direction == PF_OUT) {
2612		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2613		    sport, daddr, dport, PF_RULESET_BINAT);
2614		if (r == NULL)
2615			r = pf_match_translation(pd, m, off, direction, kif,
2616			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2617	} else {
2618		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2619		    sport, daddr, dport, PF_RULESET_RDR);
2620		if (r == NULL)
2621			r = pf_match_translation(pd, m, off, direction, kif,
2622			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2623	}
2624
2625	if (r != NULL) {
2626		switch (r->action) {
2627		case PF_NONAT:
2628		case PF_NOBINAT:
2629		case PF_NORDR:
2630			return (NULL);
2631		case PF_NAT:
2632			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2633			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2634			    r->rpool.proxy_port[1], sn)) {
2635				DPFPRINTF(PF_DEBUG_MISC,
2636				    ("pf: NAT proxy port allocation "
2637				    "(%u-%u) failed\n",
2638				    r->rpool.proxy_port[0],
2639				    r->rpool.proxy_port[1]));
2640				return (NULL);
2641			}
2642			break;
2643		case PF_BINAT:
2644			switch (direction) {
2645			case PF_OUT:
2646				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2647					switch (pd->af) {
2648#ifdef INET
2649					case AF_INET:
2650						if (r->rpool.cur->addr.p.dyn->
2651						    pfid_acnt4 < 1)
2652							return (NULL);
2653						PF_POOLMASK(naddr,
2654						    &r->rpool.cur->addr.p.dyn->
2655						    pfid_addr4,
2656						    &r->rpool.cur->addr.p.dyn->
2657						    pfid_mask4,
2658						    saddr, AF_INET);
2659						break;
2660#endif /* INET */
2661#ifdef INET6
2662					case AF_INET6:
2663						if (r->rpool.cur->addr.p.dyn->
2664						    pfid_acnt6 < 1)
2665							return (NULL);
2666						PF_POOLMASK(naddr,
2667						    &r->rpool.cur->addr.p.dyn->
2668						    pfid_addr6,
2669						    &r->rpool.cur->addr.p.dyn->
2670						    pfid_mask6,
2671						    saddr, AF_INET6);
2672						break;
2673#endif /* INET6 */
2674					}
2675				} else
2676					PF_POOLMASK(naddr,
2677					    &r->rpool.cur->addr.v.a.addr,
2678					    &r->rpool.cur->addr.v.a.mask,
2679					    saddr, pd->af);
2680				break;
2681			case PF_IN:
2682				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2683					switch (pd->af) {
2684#ifdef INET
2685					case AF_INET:
2686						if (r->src.addr.p.dyn->
2687						    pfid_acnt4 < 1)
2688							return (NULL);
2689						PF_POOLMASK(naddr,
2690						    &r->src.addr.p.dyn->
2691						    pfid_addr4,
2692						    &r->src.addr.p.dyn->
2693						    pfid_mask4,
2694						    daddr, AF_INET);
2695						break;
2696#endif /* INET */
2697#ifdef INET6
2698					case AF_INET6:
2699						if (r->src.addr.p.dyn->
2700						    pfid_acnt6 < 1)
2701							return (NULL);
2702						PF_POOLMASK(naddr,
2703						    &r->src.addr.p.dyn->
2704						    pfid_addr6,
2705						    &r->src.addr.p.dyn->
2706						    pfid_mask6,
2707						    daddr, AF_INET6);
2708						break;
2709#endif /* INET6 */
2710					}
2711				} else
2712					PF_POOLMASK(naddr,
2713					    &r->src.addr.v.a.addr,
2714					    &r->src.addr.v.a.mask, daddr,
2715					    pd->af);
2716				break;
2717			}
2718			break;
2719		case PF_RDR: {
2720			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2721				return (NULL);
2722			if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2723			    PF_POOL_BITMASK)
2724				PF_POOLMASK(naddr, naddr,
2725				    &r->rpool.cur->addr.v.a.mask, daddr,
2726				    pd->af);
2727
2728			if (r->rpool.proxy_port[1]) {
2729				u_int32_t	tmp_nport;
2730
2731				tmp_nport = ((ntohs(dport) -
2732				    ntohs(r->dst.port[0])) %
2733				    (r->rpool.proxy_port[1] -
2734				    r->rpool.proxy_port[0] + 1)) +
2735				    r->rpool.proxy_port[0];
2736
2737				/* wrap around if necessary */
2738				if (tmp_nport > 65535)
2739					tmp_nport -= 65535;
2740				*nport = htons((u_int16_t)tmp_nport);
2741			} else if (r->rpool.proxy_port[0])
2742				*nport = htons(r->rpool.proxy_port[0]);
2743			break;
2744		}
2745		default:
2746			return (NULL);
2747		}
2748	}
2749
2750	return (r);
2751}
2752
2753int
2754pf_socket_lookup(int direction, struct pf_pdesc *pd)
2755{
2756	struct pf_addr		*saddr, *daddr;
2757	u_int16_t		 sport, dport;
2758	struct inpcbtable	*tb;
2759	struct inpcb		*inp = NULL;
2760	struct socket		*so = NULL;
2761#define in6p inp
2762#define in6p_socket inp_socket
2763
2764	if (pd == NULL)
2765		return (-1);
2766	pd->lookup.uid = UID_MAX;
2767	pd->lookup.gid = GID_MAX;
2768	pd->lookup.pid = NO_PID;
2769	switch (pd->proto) {
2770	case IPPROTO_TCP:
2771		if (pd->hdr.tcp == NULL)
2772			return (-1);
2773		sport = pd->hdr.tcp->th_sport;
2774		dport = pd->hdr.tcp->th_dport;
2775		tb = &tcbtable;
2776		break;
2777	case IPPROTO_UDP:
2778		if (pd->hdr.udp == NULL)
2779			return (-1);
2780		sport = pd->hdr.udp->uh_sport;
2781		dport = pd->hdr.udp->uh_dport;
2782		tb = &udbtable;
2783		break;
2784	default:
2785		return (-1);
2786	}
2787	if (direction == PF_IN) {
2788		saddr = pd->src;
2789		daddr = pd->dst;
2790	} else {
2791		u_int16_t	p;
2792
2793		p = sport;
2794		sport = dport;
2795		dport = p;
2796		saddr = pd->dst;
2797		daddr = pd->src;
2798	}
2799	switch (pd->af) {
2800
2801#ifdef __NetBSD__
2802#define in_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
2803    inpcb_lookup(tbl, saddr, sport, daddr, dport, NULL)
2804#define in6_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
2805    in6pcb_lookup(tbl, saddr, sport, daddr, dport, 0, NULL)
2806#define in_pcblookup_listen(tbl, addr, port, zero) \
2807    inpcb_lookup_bound(tbl, addr, port)
2808#define in6_pcblookup_listen(tbl, addr, port, zero) \
2809    in6pcb_lookup_bound(tbl, addr, port, zero)
2810#endif
2811
2812#ifdef INET
2813	case AF_INET:
2814		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2815		if (inp == NULL) {
2816			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2817			if (inp == NULL)
2818				return (-1);
2819		}
2820		break;
2821#endif /* INET */
2822#ifdef INET6
2823	case AF_INET6:
2824		in6p = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2825		    dport);
2826		if (inp == NULL) {
2827			in6p = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2828			if (inp == NULL)
2829				return (-1);
2830		}
2831		break;
2832#endif /* INET6 */
2833
2834	default:
2835		return (-1);
2836	}
2837
2838#ifdef __NetBSD__
2839	switch (pd->af) {
2840#ifdef INET
2841	case AF_INET:
2842		so = inp->inp_socket;
2843		break;
2844#endif
2845#ifdef INET6
2846	case AF_INET6:
2847		so = in6p->in6p_socket;
2848		break;
2849#endif /* INET6 */
2850	}
2851	if (so == NULL || so->so_cred == NULL)
2852		return -1;
2853	pd->lookup.uid = kauth_cred_geteuid(so->so_cred);
2854	pd->lookup.gid = kauth_cred_getegid(so->so_cred);
2855#else
2856	so = inp->inp_socket;
2857	pd->lookup.uid = so->so_euid;
2858	pd->lookup.gid = so->so_egid;
2859#endif /* !__NetBSD__ */
2860	pd->lookup.pid = so->so_cpid;
2861	return (1);
2862}
2863
2864u_int8_t
2865pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2866{
2867	int		 hlen;
2868	u_int8_t	 hdr[60];
2869	u_int8_t	*opt, optlen;
2870	u_int8_t	 wscale = 0;
2871
2872	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2873	if (hlen <= sizeof(struct tcphdr))
2874		return (0);
2875	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2876		return (0);
2877	opt = hdr + sizeof(struct tcphdr);
2878	hlen -= sizeof(struct tcphdr);
2879	while (hlen >= 3) {
2880		switch (*opt) {
2881		case TCPOPT_EOL:
2882		case TCPOPT_NOP:
2883			++opt;
2884			--hlen;
2885			break;
2886		case TCPOPT_WINDOW:
2887			wscale = opt[2];
2888			if (wscale > TCP_MAX_WINSHIFT)
2889				wscale = TCP_MAX_WINSHIFT;
2890			wscale |= PF_WSCALE_FLAG;
2891			/* FALLTHROUGH */
2892		default:
2893			optlen = opt[1];
2894			if (optlen < 2)
2895				optlen = 2;
2896			hlen -= optlen;
2897			opt += optlen;
2898			break;
2899		}
2900	}
2901	return (wscale);
2902}
2903
2904u_int16_t
2905pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2906{
2907	int		 hlen;
2908	u_int8_t	 hdr[60];
2909	u_int8_t	*opt, optlen;
2910	u_int16_t	 mss = tcp_mssdflt;
2911
2912	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2913	if (hlen <= sizeof(struct tcphdr))
2914		return (0);
2915	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2916		return (0);
2917	opt = hdr + sizeof(struct tcphdr);
2918	hlen -= sizeof(struct tcphdr);
2919	while (hlen >= TCPOLEN_MAXSEG) {
2920		switch (*opt) {
2921		case TCPOPT_EOL:
2922		case TCPOPT_NOP:
2923			++opt;
2924			--hlen;
2925			break;
2926		case TCPOPT_MAXSEG:
2927			bcopy((void *)(opt + 2), (void *)&mss, 2);
2928			NTOHS(mss);
2929			/* FALLTHROUGH */
2930		default:
2931			optlen = opt[1];
2932			if (optlen < 2)
2933				optlen = 2;
2934			hlen -= optlen;
2935			opt += optlen;
2936			break;
2937		}
2938	}
2939	return (mss);
2940}
2941
2942u_int16_t
2943pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2944{
2945	union {
2946		struct sockaddr		dst;
2947		struct sockaddr_in	dst4;
2948		struct sockaddr_in6	dst6;
2949	} u;
2950	struct route		 ro;
2951	struct route		*rop = &ro;
2952	struct rtentry		*rt;
2953	int			 hlen;
2954	u_int16_t		 mss = tcp_mssdflt;
2955
2956	hlen = 0;	/* XXXGCC -Wuninitialized m68k */
2957
2958	memset(&ro, 0, sizeof(ro));
2959	switch (af) {
2960#ifdef INET
2961	case AF_INET:
2962		hlen = sizeof(struct ip);
2963		sockaddr_in_init(&u.dst4, &addr->v4, 0);
2964		rtcache_setdst(rop, &u.dst);
2965		break;
2966#endif /* INET */
2967#ifdef INET6
2968	case AF_INET6:
2969		hlen = sizeof(struct ip6_hdr);
2970		sockaddr_in6_init(&u.dst6, &addr->v6, 0, 0, 0);
2971		rtcache_setdst(rop, &u.dst);
2972		break;
2973#endif /* INET6 */
2974	}
2975
2976#ifndef __NetBSD__
2977	rtalloc_noclone(rop, NO_CLONING);
2978	if ((rt = ro->ro_rt) != NULL) {
2979		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2980		mss = uimax(tcp_mssdflt, mss);
2981	}
2982#else
2983	if ((rt = rtcache_init_noclone(rop)) != NULL) {
2984		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2985		mss = uimax(tcp_mssdflt, mss);
2986		rtcache_unref(rt, rop);
2987	}
2988	rtcache_free(rop);
2989#endif
2990	mss = uimin(mss, offer);
2991	mss = uimax(mss, 64);		/* sanity - at least max opt space */
2992	return (mss);
2993}
2994
2995void
2996pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2997{
2998	struct pf_rule *r = s->rule.ptr;
2999
3000	s->rt_kif = NULL;
3001	if (!r->rt || r->rt == PF_FASTROUTE)
3002		return;
3003	switch (s->state_key->af) {
3004#ifdef INET
3005	case AF_INET:
3006		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3007		    &s->nat_src_node);
3008		s->rt_kif = r->rpool.cur->kif;
3009		break;
3010#endif /* INET */
3011#ifdef INET6
3012	case AF_INET6:
3013		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3014		    &s->nat_src_node);
3015		s->rt_kif = r->rpool.cur->kif;
3016		break;
3017#endif /* INET6 */
3018	}
3019}
3020
3021void
3022pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
3023{
3024	s->state_key = sk;
3025	sk->refcnt++;
3026
3027	/* list is sorted, if-bound states before floating */
3028	if (tail)
3029		TAILQ_INSERT_TAIL(&sk->states, s, next);
3030	else
3031		TAILQ_INSERT_HEAD(&sk->states, s, next);
3032}
3033
3034void
3035pf_detach_state(struct pf_state *s, int flags)
3036{
3037	struct pf_state_key	*sk = s->state_key;
3038
3039	if (sk == NULL)
3040		return;
3041
3042	s->state_key = NULL;
3043	TAILQ_REMOVE(&sk->states, s, next);
3044	if (--sk->refcnt == 0) {
3045		if (!(flags & PF_DT_SKIP_EXTGWY))
3046			RB_REMOVE(pf_state_tree_ext_gwy,
3047			    &pf_statetbl_ext_gwy, sk);
3048		if (!(flags & PF_DT_SKIP_LANEXT))
3049			RB_REMOVE(pf_state_tree_lan_ext,
3050			    &pf_statetbl_lan_ext, sk);
3051		pool_put(&pf_state_key_pl, sk);
3052	}
3053}
3054
3055struct pf_state_key *
3056pf_alloc_state_key(struct pf_state *s)
3057{
3058	struct pf_state_key	*sk;
3059
3060	if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT)) == NULL)
3061		return (NULL);
3062	bzero(sk, sizeof(*sk));
3063	TAILQ_INIT(&sk->states);
3064	pf_attach_state(sk, s, 0);
3065
3066	return (sk);
3067}
3068
3069int
3070pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3071    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3072    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3073    struct ifqueue *ifq)
3074{
3075	struct pf_rule		*nr = NULL;
3076	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3077	u_int16_t		 bport, nport = 0;
3078	sa_family_t		 af = pd->af;
3079	struct pf_rule		*r, *a = NULL;
3080	struct pf_ruleset	*ruleset = NULL;
3081	struct pf_src_node	*nsn = NULL;
3082	struct tcphdr		*th = pd->hdr.tcp;
3083	u_short			 reason;
3084	int			 rewrite = 0, hdrlen = 0;
3085	int			 tag = -1, rtableid = -1;
3086	int			 asd = 0;
3087	int			 match = 0;
3088	int			 state_icmp = 0;
3089	u_int16_t		 mss = tcp_mssdflt;
3090	u_int16_t		 sport, dport;
3091	u_int8_t		 icmptype = 0, icmpcode = 0;
3092
3093	if (direction == PF_IN && pf_check_congestion(ifq)) {
3094		REASON_SET_NOPTR(&reason, PFRES_CONGEST);
3095		return (PF_DROP);
3096	}
3097
3098	sport = dport = hdrlen = 0;
3099
3100	switch (pd->proto) {
3101	case IPPROTO_TCP:
3102		sport = th->th_sport;
3103		dport = th->th_dport;
3104		hdrlen = sizeof(*th);
3105		break;
3106	case IPPROTO_UDP:
3107		sport = pd->hdr.udp->uh_sport;
3108		dport = pd->hdr.udp->uh_dport;
3109		hdrlen = sizeof(*pd->hdr.udp);
3110		break;
3111#ifdef INET
3112	case IPPROTO_ICMP:
3113		if (pd->af != AF_INET)
3114			break;
3115		sport = dport = pd->hdr.icmp->icmp_id;
3116		icmptype = pd->hdr.icmp->icmp_type;
3117		icmpcode = pd->hdr.icmp->icmp_code;
3118
3119		if (icmptype == ICMP_UNREACH ||
3120		    icmptype == ICMP_SOURCEQUENCH ||
3121		    icmptype == ICMP_REDIRECT ||
3122		    icmptype == ICMP_TIMXCEED ||
3123		    icmptype == ICMP_PARAMPROB)
3124			state_icmp++;
3125		break;
3126#endif /* INET */
3127
3128#ifdef INET6
3129	case IPPROTO_ICMPV6:
3130		if (pd->af != AF_INET6)
3131			break;
3132		sport = dport = pd->hdr.icmp6->icmp6_id;
3133		hdrlen = sizeof(*pd->hdr.icmp6);
3134		icmptype = pd->hdr.icmp6->icmp6_type;
3135		icmpcode = pd->hdr.icmp6->icmp6_code;
3136
3137		if (icmptype == ICMP6_DST_UNREACH ||
3138		    icmptype == ICMP6_PACKET_TOO_BIG ||
3139		    icmptype == ICMP6_TIME_EXCEEDED ||
3140		    icmptype == ICMP6_PARAM_PROB)
3141			state_icmp++;
3142		break;
3143#endif /* INET6 */
3144	}
3145
3146	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3147
3148	if (direction == PF_OUT) {
3149		bport = nport = sport;
3150		/* check outgoing packet for BINAT/NAT */
3151		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3152		    saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
3153			PF_ACPY(&pd->baddr, saddr, af);
3154			switch (pd->proto) {
3155			case IPPROTO_TCP:
3156				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3157				    &th->th_sum, &pd->naddr, nport, 0, af);
3158				sport = th->th_sport;
3159				rewrite++;
3160				break;
3161			case IPPROTO_UDP:
3162				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3163				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3164				    &pd->naddr, nport, 1, af);
3165				sport = pd->hdr.udp->uh_sport;
3166				rewrite++;
3167				break;
3168#ifdef INET
3169			case IPPROTO_ICMP:
3170				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3171				    pd->naddr.v4.s_addr, 0);
3172				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3173				    pd->hdr.icmp->icmp_cksum, sport, nport, 0);
3174				pd->hdr.icmp->icmp_id = nport;
3175				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
3176				break;
3177#endif /* INET */
3178#ifdef INET6
3179			case IPPROTO_ICMPV6:
3180				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3181				    &pd->naddr, 0);
3182				rewrite++;
3183				break;
3184#endif /* INET */
3185			default:
3186				switch (af) {
3187#ifdef INET
3188				case AF_INET:
3189					pf_change_a(&saddr->v4.s_addr,
3190					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3191					break;
3192#endif /* INET */
3193#ifdef INET6
3194				case AF_INET6:
3195					PF_ACPY(saddr, &pd->naddr, af);
3196					break;
3197#endif /* INET */
3198				}
3199				break;
3200			}
3201
3202			if (nr->natpass)
3203				r = NULL;
3204			pd->nat_rule = nr;
3205		}
3206	} else {
3207		bport = nport = dport;
3208		/* check incoming packet for BINAT/RDR */
3209		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3210		    saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
3211			PF_ACPY(&pd->baddr, daddr, af);
3212			switch (pd->proto) {
3213			case IPPROTO_TCP:
3214				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3215				    &th->th_sum, &pd->naddr, nport, 0, af);
3216				dport = th->th_dport;
3217				rewrite++;
3218				break;
3219			case IPPROTO_UDP:
3220				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3221				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3222				    &pd->naddr, nport, 1, af);
3223				dport = pd->hdr.udp->uh_dport;
3224				rewrite++;
3225				break;
3226#ifdef INET
3227			case IPPROTO_ICMP:
3228				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3229				    pd->naddr.v4.s_addr, 0);
3230				break;
3231#endif /* INET */
3232#ifdef INET6
3233			case IPPROTO_ICMPV6:
3234				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3235				    &pd->naddr, 0);
3236				rewrite++;
3237				break;
3238#endif /* INET6 */
3239			default:
3240				switch (af) {
3241#ifdef INET
3242				case AF_INET:
3243					pf_change_a(&daddr->v4.s_addr,
3244					    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3245					break;
3246#endif /* INET */
3247#ifdef INET6
3248				case AF_INET6:
3249					PF_ACPY(daddr, &pd->naddr, af);
3250					break;
3251#endif /* INET */
3252				}
3253				break;
3254			}
3255
3256			if (nr->natpass)
3257				r = NULL;
3258			pd->nat_rule = nr;
3259		}
3260	}
3261
3262	while (r != NULL) {
3263		r->evaluations++;
3264		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3265			r = r->skip[PF_SKIP_IFP].ptr;
3266		else if (r->direction && r->direction != direction)
3267			r = r->skip[PF_SKIP_DIR].ptr;
3268		else if (r->af && r->af != af)
3269			r = r->skip[PF_SKIP_AF].ptr;
3270		else if (r->proto && r->proto != pd->proto)
3271			r = r->skip[PF_SKIP_PROTO].ptr;
3272		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3273		    r->src.neg, kif))
3274			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3275		/* tcp/udp only. port_op always 0 in other cases */
3276		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3277		    r->src.port[0], r->src.port[1], sport))
3278			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3279		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3280		    r->dst.neg, NULL))
3281			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3282		/* tcp/udp only. port_op always 0 in other cases */
3283		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3284		    r->dst.port[0], r->dst.port[1], dport))
3285			r = r->skip[PF_SKIP_DST_PORT].ptr;
3286		/* icmp only. type always 0 in other cases */
3287		else if (r->type && r->type != icmptype + 1)
3288			r = TAILQ_NEXT(r, entries);
3289		/* icmp only. type always 0 in other cases */
3290		else if (r->code && r->code != icmpcode + 1)
3291			r = TAILQ_NEXT(r, entries);
3292		else if (r->tos && !(r->tos == pd->tos))
3293			r = TAILQ_NEXT(r, entries);
3294		else if (r->rule_flag & PFRULE_FRAGMENT)
3295			r = TAILQ_NEXT(r, entries);
3296		else if (pd->proto == IPPROTO_TCP &&
3297		    (r->flagset & th->th_flags) != r->flags)
3298			r = TAILQ_NEXT(r, entries);
3299		/* tcp/udp only. uid.op always 0 in other cases */
3300		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3301		    pf_socket_lookup(direction, pd), 1)) &&
3302		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3303		    pd->lookup.uid))
3304			r = TAILQ_NEXT(r, entries);
3305		/* tcp/udp only. gid.op always 0 in other cases */
3306		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3307		    pf_socket_lookup(direction, pd), 1)) &&
3308		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3309		    pd->lookup.gid))
3310			r = TAILQ_NEXT(r, entries);
3311		else if (r->prob && r->prob <= cprng_fast32())
3312			r = TAILQ_NEXT(r, entries);
3313		else if (r->match_tag && !pf_match_tag(m, r, &tag))
3314			r = TAILQ_NEXT(r, entries);
3315		else if (r->os_fingerprint != PF_OSFP_ANY &&
3316		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3317		    pf_osfp_fingerprint(pd, m, off, th),
3318		    r->os_fingerprint)))
3319			r = TAILQ_NEXT(r, entries);
3320		else {
3321			if (r->tag)
3322				tag = r->tag;
3323			if (r->rtableid >= 0)
3324				rtableid = r->rtableid;
3325			if (r->anchor == NULL) {
3326				match = 1;
3327				*rm = r;
3328				*am = a;
3329				*rsm = ruleset;
3330				if ((*rm)->quick)
3331					break;
3332				r = TAILQ_NEXT(r, entries);
3333			} else
3334				pf_step_into_anchor(&asd, &ruleset,
3335				    PF_RULESET_FILTER, &r, &a, &match);
3336		}
3337		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3338		    PF_RULESET_FILTER, &r, &a, &match))
3339			break;
3340	}
3341	r = *rm;
3342	a = *am;
3343	ruleset = *rsm;
3344
3345	REASON_SET_NOPTR(&reason, PFRES_MATCH);
3346
3347	if (r->log || (nr != NULL && nr->log)) {
3348		if (rewrite)
3349			m_copyback(m, off, hdrlen, pd->hdr.any);
3350		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3351		    a, ruleset, pd);
3352	}
3353
3354	if (r->keep_state && pf_state_lock) {
3355		REASON_SET_NOPTR(&reason, PFRES_STATELOCKED);
3356		return PF_DROP;
3357	}
3358
3359	if ((r->action == PF_DROP) &&
3360	    ((r->rule_flag & PFRULE_RETURNRST) ||
3361	    (r->rule_flag & PFRULE_RETURNICMP) ||
3362	    (r->rule_flag & PFRULE_RETURN))) {
3363		/* undo NAT changes, if they have taken place */
3364		if (nr != NULL) {
3365			if (direction == PF_OUT) {
3366				switch (pd->proto) {
3367				case IPPROTO_TCP:
3368					pf_change_ap(saddr, &th->th_sport,
3369					    pd->ip_sum, &th->th_sum,
3370					    &pd->baddr, bport, 0, af);
3371					sport = th->th_sport;
3372					rewrite++;
3373					break;
3374				case IPPROTO_UDP:
3375					pf_change_ap(saddr,
3376					    &pd->hdr.udp->uh_sport, pd->ip_sum,
3377					    &pd->hdr.udp->uh_sum, &pd->baddr,
3378					    bport, 1, af);
3379					sport = pd->hdr.udp->uh_sport;
3380					rewrite++;
3381					break;
3382				case IPPROTO_ICMP:
3383#ifdef INET6
3384				case IPPROTO_ICMPV6:
3385#endif
3386					/* nothing! */
3387					break;
3388				default:
3389					switch (af) {
3390					case AF_INET:
3391						pf_change_a(&saddr->v4.s_addr,
3392						    pd->ip_sum,
3393						    pd->baddr.v4.s_addr, 0);
3394						break;
3395					case AF_INET6:
3396						PF_ACPY(saddr, &pd->baddr, af);
3397						break;
3398					}
3399				}
3400			} else {
3401				switch (pd->proto) {
3402				case IPPROTO_TCP:
3403					pf_change_ap(daddr, &th->th_dport,
3404					    pd->ip_sum, &th->th_sum,
3405					    &pd->baddr, bport, 0, af);
3406					dport = th->th_dport;
3407					rewrite++;
3408					break;
3409				case IPPROTO_UDP:
3410					pf_change_ap(daddr,
3411					    &pd->hdr.udp->uh_dport, pd->ip_sum,
3412					    &pd->hdr.udp->uh_sum, &pd->baddr,
3413					    bport, 1, af);
3414					dport = pd->hdr.udp->uh_dport;
3415					rewrite++;
3416					break;
3417				case IPPROTO_ICMP:
3418#ifdef INET6
3419				case IPPROTO_ICMPV6:
3420#endif
3421					/* nothing! */
3422					break;
3423				default:
3424					switch (af) {
3425					case AF_INET:
3426						pf_change_a(&daddr->v4.s_addr,
3427						    pd->ip_sum,
3428						    pd->baddr.v4.s_addr, 0);
3429						break;
3430					case AF_INET6:
3431						PF_ACPY(daddr, &pd->baddr, af);
3432						break;
3433					}
3434				}
3435			}
3436		}
3437		if (pd->proto == IPPROTO_TCP &&
3438		    ((r->rule_flag & PFRULE_RETURNRST) ||
3439		    (r->rule_flag & PFRULE_RETURN)) &&
3440		    !(th->th_flags & TH_RST)) {
3441			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
3442			struct ip	*hip = mtod(m, struct ip *);
3443
3444#ifdef __NetBSD__
3445			if (pf_check_proto_cksum(m, direction, off,
3446			    ntohs(hip->ip_len) - off, IPPROTO_TCP, AF_INET))
3447#else
3448			if (pf_check_proto_cksum(m, off,
3449			    ntohs(hip->ip_len) - off, IPPROTO_TCP, AF_INET))
3450#endif /* !__NetBSD__ */
3451				REASON_SET_NOPTR(&reason, PFRES_PROTCKSUM);
3452			else {
3453				if (th->th_flags & TH_SYN)
3454					ack++;
3455				if (th->th_flags & TH_FIN)
3456					ack++;
3457				pf_send_tcp(r, af, pd->dst,
3458				    pd->src, th->th_dport, th->th_sport,
3459				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3460				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3461			}
3462		} else if ((af == AF_INET) && r->return_icmp)
3463			pf_send_icmp(m, r->return_icmp >> 8,
3464			    r->return_icmp & 255, af, r);
3465		else if ((af == AF_INET6) && r->return_icmp6)
3466			pf_send_icmp(m, r->return_icmp6 >> 8,
3467			    r->return_icmp6 & 255, af, r);
3468	}
3469
3470	if (r->action == PF_DROP)
3471		return (PF_DROP);
3472
3473	if (pf_tag_packet(m, tag, rtableid)) {
3474		REASON_SET_NOPTR(&reason, PFRES_MEMORY);
3475		return (PF_DROP);
3476	}
3477
3478	if (!state_icmp && (r->keep_state || nr != NULL ||
3479	    (pd->flags & PFDESC_TCP_NORM))) {
3480		/* create new state */
3481		u_int16_t	 len;
3482		struct pf_state	*s = NULL;
3483		struct pf_state_key *sk = NULL;
3484		struct pf_src_node *sn = NULL;
3485
3486		/* check maximums */
3487		if (r->max_states && (r->states >= r->max_states)) {
3488			pf_status.lcounters[LCNT_STATES]++;
3489			REASON_SET_NOPTR(&reason, PFRES_MAXSTATES);
3490			goto cleanup;
3491		}
3492		/* src node for filter rule */
3493		if ((r->rule_flag & PFRULE_SRCTRACK ||
3494		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3495		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
3496			REASON_SET_NOPTR(&reason, PFRES_SRCLIMIT);
3497			goto cleanup;
3498		}
3499		/* src node for translation rule */
3500		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3501		    ((direction == PF_OUT &&
3502		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3503		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3504			REASON_SET_NOPTR(&reason, PFRES_SRCLIMIT);
3505			goto cleanup;
3506		}
3507		s = pool_get(&pf_state_pl, PR_NOWAIT);
3508		if (s == NULL) {
3509			REASON_SET_NOPTR(&reason, PFRES_MEMORY);
3510cleanup:
3511			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3512				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3513				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3514				pf_status.src_nodes--;
3515				pool_put(&pf_src_tree_pl, sn);
3516			}
3517			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3518			    nsn->expire == 0) {
3519				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3520				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3521				pf_status.src_nodes--;
3522				pool_put(&pf_src_tree_pl, nsn);
3523			}
3524			if (sk != NULL) {
3525				pool_put(&pf_state_key_pl, sk);
3526			}
3527			return (PF_DROP);
3528		}
3529		bzero(s, sizeof(*s));
3530		s->rule.ptr = r;
3531		s->nat_rule.ptr = nr;
3532		s->anchor.ptr = a;
3533		STATE_INC_COUNTERS(s);
3534		s->allow_opts = r->allow_opts;
3535		s->log = r->log & PF_LOG_ALL;
3536		if (nr != NULL)
3537			s->log |= nr->log & PF_LOG_ALL;
3538		switch (pd->proto) {
3539		case IPPROTO_TCP:
3540			len = pd->tot_len - off - (th->th_off << 2);
3541			s->src.seqlo = ntohl(th->th_seq);
3542			s->src.seqhi = s->src.seqlo + len + 1;
3543			if ((th->th_flags & (TH_SYN|TH_ACK)) ==
3544			TH_SYN && r->keep_state == PF_STATE_MODULATE) {
3545				/* Generate sequence number modulator */
3546				while ((s->src.seqdiff =
3547				    tcp_rndiss_next() - s->src.seqlo) == 0)
3548					;
3549				pf_change_a(&th->th_seq, &th->th_sum,
3550				    htonl(s->src.seqlo + s->src.seqdiff), 0);
3551				rewrite = 1;
3552			} else
3553				s->src.seqdiff = 0;
3554			if (th->th_flags & TH_SYN) {
3555				s->src.seqhi++;
3556				s->src.wscale = pf_get_wscale(m, off,
3557				    th->th_off, af);
3558			}
3559			s->src.max_win = MAX(ntohs(th->th_win), 1);
3560			if (s->src.wscale & PF_WSCALE_MASK) {
3561				/* Remove scale factor from initial window */
3562				int win = s->src.max_win;
3563				win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3564				s->src.max_win = (win - 1) >>
3565				    (s->src.wscale & PF_WSCALE_MASK);
3566			}
3567			if (th->th_flags & TH_FIN)
3568				s->src.seqhi++;
3569			s->dst.seqhi = 1;
3570			s->dst.max_win = 1;
3571			s->src.state = TCPS_SYN_SENT;
3572			s->dst.state = TCPS_CLOSED;
3573			s->timeout = PFTM_TCP_FIRST_PACKET;
3574			break;
3575		case IPPROTO_UDP:
3576			s->src.state = PFUDPS_SINGLE;
3577			s->dst.state = PFUDPS_NO_TRAFFIC;
3578			s->timeout = PFTM_UDP_FIRST_PACKET;
3579			break;
3580		case IPPROTO_ICMP:
3581#ifdef INET6
3582		case IPPROTO_ICMPV6:
3583#endif
3584			s->timeout = PFTM_ICMP_FIRST_PACKET;
3585			break;
3586		default:
3587			s->src.state = PFOTHERS_SINGLE;
3588			s->dst.state = PFOTHERS_NO_TRAFFIC;
3589			s->timeout = PFTM_OTHER_FIRST_PACKET;
3590		}
3591
3592		s->creation = time_second;
3593		s->expire = time_second;
3594
3595		if (sn != NULL) {
3596			s->src_node = sn;
3597			s->src_node->states++;
3598		}
3599		if (nsn != NULL) {
3600			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3601			s->nat_src_node = nsn;
3602			s->nat_src_node->states++;
3603		}
3604		if (pd->proto == IPPROTO_TCP) {
3605			if ((pd->flags & PFDESC_TCP_NORM) &&
3606			    pf_normalize_tcp_init(m, off, pd, th, &s->src,
3607			    &s->dst)) {
3608				REASON_SET_NOPTR(&reason, PFRES_MEMORY);
3609				pf_src_tree_remove_state(s);
3610				STATE_DEC_COUNTERS(s);
3611				pool_put(&pf_state_pl, s);
3612				return (PF_DROP);
3613			}
3614			if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3615			    pf_normalize_tcp_stateful(m, off, pd, &reason,
3616			    th, s, &s->src, &s->dst, &rewrite)) {
3617				/* This really shouldn't happen!!! */
3618				DPFPRINTF(PF_DEBUG_URGENT,
3619				    ("pf_normalize_tcp_stateful failed on "
3620				    "first pkt"));
3621				pf_normalize_tcp_cleanup(s);
3622				pf_src_tree_remove_state(s);
3623				STATE_DEC_COUNTERS(s);
3624				pool_put(&pf_state_pl, s);
3625				return (PF_DROP);
3626			}
3627		}
3628
3629		if ((sk = pf_alloc_state_key(s)) == NULL) {
3630			REASON_SET_NOPTR(&reason, PFRES_MEMORY);
3631			goto cleanup;
3632		}
3633
3634		sk->proto = pd->proto;
3635		sk->direction = direction;
3636		sk->af = af;
3637		if (direction == PF_OUT) {
3638			PF_ACPY(&sk->gwy.addr, saddr, af);
3639			PF_ACPY(&sk->ext.addr, daddr, af);
3640			switch (pd->proto) {
3641			case IPPROTO_ICMP:
3642#ifdef INET6
3643			case IPPROTO_ICMPV6:
3644#endif
3645				sk->gwy.port = nport;
3646				sk->ext.port = 0;
3647				break;
3648			default:
3649				sk->gwy.port = sport;
3650				sk->ext.port = dport;
3651			}
3652			if (nr != NULL) {
3653				PF_ACPY(&sk->lan.addr, &pd->baddr, af);
3654				sk->lan.port = bport;
3655			} else {
3656				PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af);
3657				sk->lan.port = sk->gwy.port;
3658			}
3659		} else {
3660			PF_ACPY(&sk->lan.addr, daddr, af);
3661			PF_ACPY(&sk->ext.addr, saddr, af);
3662			switch (pd->proto) {
3663			case IPPROTO_ICMP:
3664#ifdef INET6
3665			case IPPROTO_ICMPV6:
3666#endif
3667				sk->lan.port = nport;
3668				sk->ext.port = 0;
3669				break;
3670			default:
3671				sk->lan.port = dport;
3672				sk->ext.port = sport;
3673			}
3674			if (nr != NULL) {
3675				PF_ACPY(&sk->gwy.addr, &pd->baddr, af);
3676				sk->gwy.port = bport;
3677			} else {
3678				PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af);
3679				sk->gwy.port = sk->lan.port;
3680			}
3681		}
3682
3683		pf_set_rt_ifp(s, saddr);	/* needs s->state_key set */
3684
3685		if (pf_insert_state(bound_iface(r, nr, kif), s)) {
3686			if (pd->proto == IPPROTO_TCP)
3687				pf_normalize_tcp_cleanup(s);
3688			REASON_SET_NOPTR(&reason, PFRES_STATEINS);
3689			pf_src_tree_remove_state(s);
3690			STATE_DEC_COUNTERS(s);
3691			pool_put(&pf_state_pl, s);
3692			return (PF_DROP);
3693		} else
3694			*sm = s;
3695		if (tag > 0) {
3696			pf_tag_ref(tag);
3697			s->tag = tag;
3698		}
3699		if (pd->proto == IPPROTO_TCP &&
3700		    (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3701		    r->keep_state == PF_STATE_SYNPROXY) {
3702			s->src.state = PF_TCPS_PROXY_SRC;
3703			if (nr != NULL) {
3704				if (direction == PF_OUT) {
3705					pf_change_ap(saddr, &th->th_sport,
3706					    pd->ip_sum, &th->th_sum, &pd->baddr,
3707					    bport, 0, af);
3708					sport = th->th_sport;
3709				} else {
3710					pf_change_ap(daddr, &th->th_dport,
3711					    pd->ip_sum, &th->th_sum, &pd->baddr,
3712					    bport, 0, af);
3713					sport = th->th_dport;
3714				}
3715			}
3716			s->src.seqhi = htonl(cprng_fast32());
3717			/* Find mss option */
3718			mss = pf_get_mss(m, off, th->th_off, af);
3719			mss = pf_calc_mss(saddr, af, mss);
3720			mss = pf_calc_mss(daddr, af, mss);
3721			s->src.mss = mss;
3722			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3723			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3724			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3725			REASON_SET_NOPTR(&reason, PFRES_SYNPROXY);
3726			return (PF_SYNPROXY_DROP);
3727		}
3728	}
3729
3730	/* copy back packet headers if we performed NAT operations */
3731	if (rewrite)
3732		m_copyback(m, off, hdrlen, pd->hdr.any);
3733
3734	return (PF_PASS);
3735}
3736
3737int
3738pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3739    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3740    struct pf_ruleset **rsm)
3741{
3742	struct pf_rule		*r, *a = NULL;
3743	struct pf_ruleset	*ruleset = NULL;
3744	sa_family_t		 af = pd->af;
3745	u_short			 reason;
3746	int			 tag = -1;
3747	int			 asd = 0;
3748	int			 match = 0;
3749
3750	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3751	while (r != NULL) {
3752		r->evaluations++;
3753		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3754			r = r->skip[PF_SKIP_IFP].ptr;
3755		else if (r->direction && r->direction != direction)
3756			r = r->skip[PF_SKIP_DIR].ptr;
3757		else if (r->af && r->af != af)
3758			r = r->skip[PF_SKIP_AF].ptr;
3759		else if (r->proto && r->proto != pd->proto)
3760			r = r->skip[PF_SKIP_PROTO].ptr;
3761		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3762		    r->src.neg, kif))
3763			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3764		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3765		    r->dst.neg, NULL))
3766			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3767		else if (r->tos && !(r->tos == pd->tos))
3768			r = TAILQ_NEXT(r, entries);
3769		else if (r->src.port_op || r->dst.port_op ||
3770		    r->flagset || r->type || r->code ||
3771		    r->os_fingerprint != PF_OSFP_ANY)
3772			r = TAILQ_NEXT(r, entries);
3773		else if (r->prob && r->prob <= cprng_fast32())
3774			r = TAILQ_NEXT(r, entries);
3775		else if (r->match_tag && !pf_match_tag(m, r, &tag))
3776			r = TAILQ_NEXT(r, entries);
3777		else {
3778			if (r->anchor == NULL) {
3779				match = 1;
3780				*rm = r;
3781				*am = a;
3782				*rsm = ruleset;
3783				if ((*rm)->quick)
3784					break;
3785				r = TAILQ_NEXT(r, entries);
3786			} else
3787				pf_step_into_anchor(&asd, &ruleset,
3788				    PF_RULESET_FILTER, &r, &a, &match);
3789		}
3790		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3791		    PF_RULESET_FILTER, &r, &a, &match))
3792			break;
3793	}
3794	r = *rm;
3795	a = *am;
3796	ruleset = *rsm;
3797
3798	REASON_SET_NOPTR(&reason, PFRES_MATCH);
3799
3800	if (r->log)
3801		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
3802		    pd);
3803
3804	if (r->action != PF_PASS)
3805		return (PF_DROP);
3806
3807	if (pf_tag_packet(m, tag, -1)) {
3808		REASON_SET_NOPTR(&reason, PFRES_MEMORY);
3809		return (PF_DROP);
3810	}
3811
3812	return (PF_PASS);
3813}
3814
3815int
3816pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3817    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3818    u_short *reason)
3819{
3820	struct pf_state_key_cmp	 key;
3821	struct tcphdr		*th = pd->hdr.tcp;
3822	u_int16_t		 win = ntohs(th->th_win);
3823	u_int32_t		 ack, end, seq, orig_seq;
3824	u_int8_t		 sws, dws;
3825	int			 ackskew;
3826	int			 copyback = 0;
3827	struct pf_state_peer	*src, *dst;
3828
3829	key.af = pd->af;
3830	key.proto = IPPROTO_TCP;
3831	if (direction == PF_IN)	{
3832		PF_ACPY(&key.ext.addr, pd->src, key.af);
3833		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3834		key.ext.port = th->th_sport;
3835		key.gwy.port = th->th_dport;
3836	} else {
3837		PF_ACPY(&key.lan.addr, pd->src, key.af);
3838		PF_ACPY(&key.ext.addr, pd->dst, key.af);
3839		key.lan.port = th->th_sport;
3840		key.ext.port = th->th_dport;
3841	}
3842
3843	STATE_LOOKUP();
3844
3845	if (direction == (*state)->state_key->direction) {
3846		src = &(*state)->src;
3847		dst = &(*state)->dst;
3848	} else {
3849		src = &(*state)->dst;
3850		dst = &(*state)->src;
3851	}
3852
3853	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3854		if (direction != (*state)->state_key->direction) {
3855			REASON_SET(reason, PFRES_SYNPROXY);
3856			return (PF_SYNPROXY_DROP);
3857		}
3858		if (th->th_flags & TH_SYN) {
3859			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
3860				REASON_SET(reason, PFRES_SYNPROXY);
3861				return (PF_DROP);
3862			}
3863			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3864			    pd->src, th->th_dport, th->th_sport,
3865			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3866			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
3867			    0, NULL, NULL);
3868			REASON_SET(reason, PFRES_SYNPROXY);
3869			return (PF_SYNPROXY_DROP);
3870		} else if (!(th->th_flags & TH_ACK) ||
3871		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3872		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3873			REASON_SET(reason, PFRES_SYNPROXY);
3874			return (PF_DROP);
3875		} else if ((*state)->src_node != NULL &&
3876		    pf_src_connlimit(state)) {
3877			REASON_SET(reason, PFRES_SRCLIMIT);
3878			return (PF_DROP);
3879		} else
3880			(*state)->src.state = PF_TCPS_PROXY_DST;
3881	}
3882	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3883		struct pf_state_host *psrc, *pdst;
3884
3885		if (direction == PF_OUT) {
3886			psrc = &(*state)->state_key->gwy;
3887			pdst = &(*state)->state_key->ext;
3888		} else {
3889			psrc = &(*state)->state_key->ext;
3890			pdst = &(*state)->state_key->lan;
3891		}
3892		if (direction == (*state)->state_key->direction) {
3893			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3894			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3895			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3896				REASON_SET(reason, PFRES_SYNPROXY);
3897				return (PF_DROP);
3898			}
3899			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3900			if ((*state)->dst.seqhi == 1)
3901				(*state)->dst.seqhi = htonl(cprng_fast32());
3902			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
3903			    &pdst->addr, psrc->port, pdst->port,
3904			    (*state)->dst.seqhi, 0, TH_SYN, 0,
3905			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
3906			REASON_SET(reason, PFRES_SYNPROXY);
3907			return (PF_SYNPROXY_DROP);
3908		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3909		    (TH_SYN|TH_ACK)) ||
3910		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
3911			REASON_SET(reason, PFRES_SYNPROXY);
3912			return (PF_DROP);
3913		} else {
3914			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3915			(*state)->dst.seqlo = ntohl(th->th_seq);
3916			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3917			    pd->src, th->th_dport, th->th_sport,
3918			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3919			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
3920			    (*state)->tag, NULL, NULL);
3921			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
3922			    &pdst->addr, psrc->port, pdst->port,
3923			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3924			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
3925			    0, NULL, NULL);
3926			(*state)->src.seqdiff = (*state)->dst.seqhi -
3927			    (*state)->src.seqlo;
3928			(*state)->dst.seqdiff = (*state)->src.seqhi -
3929			    (*state)->dst.seqlo;
3930			(*state)->src.seqhi = (*state)->src.seqlo +
3931			    (*state)->dst.max_win;
3932			(*state)->dst.seqhi = (*state)->dst.seqlo +
3933			    (*state)->src.max_win;
3934			(*state)->src.wscale = (*state)->dst.wscale = 0;
3935			(*state)->src.state = (*state)->dst.state =
3936			    TCPS_ESTABLISHED;
3937			REASON_SET(reason, PFRES_SYNPROXY);
3938			return (PF_SYNPROXY_DROP);
3939		}
3940	}
3941
3942	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3943		sws = src->wscale & PF_WSCALE_MASK;
3944		dws = dst->wscale & PF_WSCALE_MASK;
3945	} else
3946		sws = dws = 0;
3947
3948	/*
3949	 * Sequence tracking algorithm from Guido van Rooij's paper:
3950	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
3951	 *	tcp_filtering.ps
3952	 */
3953
3954	orig_seq = seq = ntohl(th->th_seq);
3955	if (src->seqlo == 0) {
3956		/* First packet from this end. Set its state */
3957
3958		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3959		    src->scrub == NULL) {
3960			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3961				REASON_SET(reason, PFRES_MEMORY);
3962				return (PF_DROP);
3963			}
3964		}
3965
3966		/* Deferred generation of sequence number modulator */
3967		if (dst->seqdiff && !src->seqdiff) {
3968			while ((src->seqdiff = tcp_rndiss_next() - seq) == 0)
3969				;
3970			ack = ntohl(th->th_ack) - dst->seqdiff;
3971			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3972			    src->seqdiff), 0);
3973			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3974			copyback = 1;
3975		} else {
3976			ack = ntohl(th->th_ack);
3977		}
3978
3979		end = seq + pd->p_len;
3980		if (th->th_flags & TH_SYN) {
3981			end++;
3982			if (dst->wscale & PF_WSCALE_FLAG) {
3983				src->wscale = pf_get_wscale(m, off, th->th_off,
3984				    pd->af);
3985				if (src->wscale & PF_WSCALE_FLAG) {
3986					/* Remove scale factor from initial
3987					 * window */
3988					sws = src->wscale & PF_WSCALE_MASK;
3989					win = ((u_int32_t)win + (1 << sws) - 1)
3990					    >> sws;
3991					dws = dst->wscale & PF_WSCALE_MASK;
3992				} else {
3993					/* fixup other window */
3994					dst->max_win <<= dst->wscale &
3995					    PF_WSCALE_MASK;
3996					/* in case of a retrans SYN|ACK */
3997					dst->wscale = 0;
3998				}
3999			}
4000		}
4001		if (th->th_flags & TH_FIN)
4002			end++;
4003
4004		src->seqlo = seq;
4005		if (src->state < TCPS_SYN_SENT)
4006			src->state = TCPS_SYN_SENT;
4007
4008		/*
4009		 * May need to slide the window (seqhi may have been set by
4010		 * the crappy stack check or if we picked up the connection
4011		 * after establishment)
4012		 */
4013		if (src->seqhi == 1 ||
4014		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4015			src->seqhi = end + MAX(1, dst->max_win << dws);
4016		if (win > src->max_win)
4017			src->max_win = win;
4018
4019	} else {
4020		ack = ntohl(th->th_ack) - dst->seqdiff;
4021		if (src->seqdiff) {
4022			/* Modulate sequence numbers */
4023			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4024			    src->seqdiff), 0);
4025			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4026			copyback = 1;
4027		}
4028		end = seq + pd->p_len;
4029		if (th->th_flags & TH_SYN)
4030			end++;
4031		if (th->th_flags & TH_FIN)
4032			end++;
4033	}
4034
4035	if ((th->th_flags & TH_ACK) == 0) {
4036		/* Let it pass through the ack skew check */
4037		ack = dst->seqlo;
4038	} else if ((ack == 0 &&
4039	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4040	    /* broken tcp stacks do not set ack */
4041	    (dst->state < TCPS_SYN_SENT)) {
4042		/*
4043		 * Many stacks (ours included) will set the ACK number in an
4044		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4045		 */
4046		ack = dst->seqlo;
4047	}
4048
4049	if (seq == end) {
4050		/* Ease sequencing restrictions on no data packets */
4051		seq = src->seqlo;
4052		end = seq;
4053	}
4054
4055	ackskew = dst->seqlo - ack;
4056
4057
4058	/*
4059	 * Need to demodulate the sequence numbers in any TCP SACK options
4060	 * (Selective ACK). We could optionally validate the SACK values
4061	 * against the current ACK window, either forwards or backwards, but
4062	 * I'm not confident that SACK has been implemented properly
4063	 * everywhere. It wouldn't surprise me if several stacks accidently
4064	 * SACK too far backwards of previously ACKed data. There really aren't
4065	 * any security implications of bad SACKing unless the target stack
4066	 * doesn't validate the option length correctly. Someone trying to
4067	 * spoof into a TCP connection won't bother blindly sending SACK
4068	 * options anyway.
4069	 */
4070	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4071		if (pf_modulate_sack(m, off, pd, th, dst))
4072			copyback = 1;
4073	}
4074
4075
4076#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4077	if (SEQ_GEQ(src->seqhi, end) &&
4078	    /* Last octet inside other's window space */
4079	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4080	    /* Retrans: not more than one window back */
4081	    (ackskew >= -MAXACKWINDOW) &&
4082	    /* Acking not more than one reassembled fragment backwards */
4083	    (ackskew <= (MAXACKWINDOW << sws)) &&
4084	    /* Acking not more than one window forward */
4085	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4086	    (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
4087	    /* Require an exact/+1 sequence match on resets when possible */
4088
4089		if (dst->scrub || src->scrub) {
4090			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4091			    *state, src, dst, &copyback))
4092				return (PF_DROP);
4093		}
4094
4095		/* update max window */
4096		if (src->max_win < win)
4097			src->max_win = win;
4098		/* synchronize sequencing */
4099		if (SEQ_GT(end, src->seqlo))
4100			src->seqlo = end;
4101		/* slide the window of what the other end can send */
4102		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4103			dst->seqhi = ack + MAX((win << sws), 1);
4104
4105
4106		/* update states */
4107		if (th->th_flags & TH_SYN)
4108			if (src->state < TCPS_SYN_SENT)
4109				src->state = TCPS_SYN_SENT;
4110		if (th->th_flags & TH_FIN)
4111			if (src->state < TCPS_CLOSING)
4112				src->state = TCPS_CLOSING;
4113		if (th->th_flags & TH_ACK) {
4114			if (dst->state == TCPS_SYN_SENT) {
4115				dst->state = TCPS_ESTABLISHED;
4116				if (src->state == TCPS_ESTABLISHED &&
4117				    (*state)->src_node != NULL &&
4118				    pf_src_connlimit(state)) {
4119					REASON_SET(reason, PFRES_SRCLIMIT);
4120					return (PF_DROP);
4121				}
4122			} else if (dst->state == TCPS_CLOSING)
4123				dst->state = TCPS_FIN_WAIT_2;
4124		}
4125		if (th->th_flags & TH_RST)
4126			src->state = dst->state = TCPS_TIME_WAIT;
4127
4128		/* update expire time */
4129		(*state)->expire = time_second;
4130		if (src->state >= TCPS_FIN_WAIT_2 &&
4131		    dst->state >= TCPS_FIN_WAIT_2)
4132			(*state)->timeout = PFTM_TCP_CLOSED;
4133		else if (src->state >= TCPS_CLOSING &&
4134		    dst->state >= TCPS_CLOSING)
4135			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4136		else if (src->state < TCPS_ESTABLISHED ||
4137		    dst->state < TCPS_ESTABLISHED)
4138			(*state)->timeout = PFTM_TCP_OPENING;
4139		else if (src->state >= TCPS_CLOSING ||
4140		    dst->state >= TCPS_CLOSING)
4141			(*state)->timeout = PFTM_TCP_CLOSING;
4142		else
4143			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4144
4145		/* Fall through to PASS packet */
4146
4147	} else if ((dst->state < TCPS_SYN_SENT ||
4148		dst->state >= TCPS_FIN_WAIT_2 ||
4149		src->state >= TCPS_FIN_WAIT_2) &&
4150	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4151	    /* Within a window forward of the originating packet */
4152	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4153	    /* Within a window backward of the originating packet */
4154
4155		/*
4156		 * This currently handles three situations:
4157		 *  1) Stupid stacks will shotgun SYNs before their peer
4158		 *     replies.
4159		 *  2) When PF catches an already established stream (the
4160		 *     firewall rebooted, the state table was flushed, routes
4161		 *     changed...)
4162		 *  3) Packets get funky immediately after the connection
4163		 *     closes (this should catch Solaris spurious ACK|FINs
4164		 *     that web servers like to spew after a close)
4165		 *
4166		 * This must be a little more careful than the above code
4167		 * since packet floods will also be caught here. We don't
4168		 * update the TTL here to mitigate the damage of a packet
4169		 * flood and so the same code can handle awkward establishment
4170		 * and a loosened connection close.
4171		 * In the establishment case, a correct peer response will
4172		 * validate the connection, go through the normal state code
4173		 * and keep updating the state TTL.
4174		 */
4175
4176		if (pf_status.debug >= PF_DEBUG_MISC) {
4177			printf("pf: loose state match: ");
4178			pf_print_state(*state);
4179			pf_print_flags(th->th_flags);
4180			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4181			    "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
4182			    ackskew,
4183			    (unsigned long long int)(*state)->packets[0],
4184			    (unsigned long long int)(*state)->packets[1]);
4185		}
4186
4187		if (dst->scrub || src->scrub) {
4188			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4189			    *state, src, dst, &copyback))
4190				return (PF_DROP);
4191		}
4192
4193		/* update max window */
4194		if (src->max_win < win)
4195			src->max_win = win;
4196		/* synchronize sequencing */
4197		if (SEQ_GT(end, src->seqlo))
4198			src->seqlo = end;
4199		/* slide the window of what the other end can send */
4200		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4201			dst->seqhi = ack + MAX((win << sws), 1);
4202
4203		/*
4204		 * Cannot set dst->seqhi here since this could be a shotgunned
4205		 * SYN and not an already established connection.
4206		 */
4207
4208		if (th->th_flags & TH_FIN)
4209			if (src->state < TCPS_CLOSING)
4210				src->state = TCPS_CLOSING;
4211		if (th->th_flags & TH_RST)
4212			src->state = dst->state = TCPS_TIME_WAIT;
4213
4214		/* Fall through to PASS packet */
4215
4216	} else {
4217		if ((*state)->dst.state == TCPS_SYN_SENT &&
4218		    (*state)->src.state == TCPS_SYN_SENT) {
4219			/* Send RST for state mismatches during handshake */
4220			if (!(th->th_flags & TH_RST))
4221				pf_send_tcp((*state)->rule.ptr, pd->af,
4222				    pd->dst, pd->src, th->th_dport,
4223				    th->th_sport, ntohl(th->th_ack), 0,
4224				    TH_RST, 0, 0,
4225				    (*state)->rule.ptr->return_ttl, 1, 0,
4226				    pd->eh, kif->pfik_ifp);
4227			src->seqlo = 0;
4228			src->seqhi = 1;
4229			src->max_win = 1;
4230		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4231			printf("pf: BAD state: ");
4232			pf_print_state(*state);
4233			pf_print_flags(th->th_flags);
4234			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4235			    "pkts=%llu:%llu dir=%s,%s\n",
4236			    seq, orig_seq, ack, pd->p_len, ackskew,
4237			    (unsigned long long int)(*state)->packets[0],
4238			    (unsigned long long int)(*state)->packets[1],
4239			    direction == PF_IN ? "in" : "out",
4240			    direction == (*state)->state_key->direction ?
4241				"fwd" : "rev");
4242			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4243			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4244			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4245			    ' ': '2',
4246			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4247			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4248			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4249			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4250		}
4251		REASON_SET(reason, PFRES_BADSTATE);
4252		return (PF_DROP);
4253	}
4254
4255	/* Any packets which have gotten here are to be passed */
4256
4257	/* translate source/destination address, if necessary */
4258	if (STATE_TRANSLATE((*state)->state_key)) {
4259		if (direction == PF_OUT)
4260			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4261			    &th->th_sum, &(*state)->state_key->gwy.addr,
4262			    (*state)->state_key->gwy.port, 0, pd->af);
4263		else
4264			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4265			    &th->th_sum, &(*state)->state_key->lan.addr,
4266			    (*state)->state_key->lan.port, 0, pd->af);
4267		m_copyback(m, off, sizeof(*th), th);
4268	} else if (copyback) {
4269		/* Copyback sequence modulation or stateful scrub changes */
4270		m_copyback(m, off, sizeof(*th), th);
4271	}
4272
4273	return (PF_PASS);
4274}
4275
4276int
4277pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4278    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4279{
4280	struct pf_state_peer	*src, *dst;
4281	struct pf_state_key_cmp	 key;
4282	struct udphdr		*uh = pd->hdr.udp;
4283
4284	key.af = pd->af;
4285	key.proto = IPPROTO_UDP;
4286	if (direction == PF_IN)	{
4287		PF_ACPY(&key.ext.addr, pd->src, key.af);
4288		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4289		key.ext.port = uh->uh_sport;
4290		key.gwy.port = uh->uh_dport;
4291	} else {
4292		PF_ACPY(&key.lan.addr, pd->src, key.af);
4293		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4294		key.lan.port = uh->uh_sport;
4295		key.ext.port = uh->uh_dport;
4296	}
4297
4298	STATE_LOOKUP();
4299
4300	if (direction == (*state)->state_key->direction) {
4301		src = &(*state)->src;
4302		dst = &(*state)->dst;
4303	} else {
4304		src = &(*state)->dst;
4305		dst = &(*state)->src;
4306	}
4307
4308	/* update states */
4309	if (src->state < PFUDPS_SINGLE)
4310		src->state = PFUDPS_SINGLE;
4311	if (dst->state == PFUDPS_SINGLE)
4312		dst->state = PFUDPS_MULTIPLE;
4313
4314	/* update expire time */
4315	(*state)->expire = time_second;
4316	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4317		(*state)->timeout = PFTM_UDP_MULTIPLE;
4318	else
4319		(*state)->timeout = PFTM_UDP_SINGLE;
4320
4321	/* translate source/destination address, if necessary */
4322	if (STATE_TRANSLATE((*state)->state_key)) {
4323		if (direction == PF_OUT)
4324			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4325			    &uh->uh_sum, &(*state)->state_key->gwy.addr,
4326			    (*state)->state_key->gwy.port, 1, pd->af);
4327		else
4328			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4329			    &uh->uh_sum, &(*state)->state_key->lan.addr,
4330			    (*state)->state_key->lan.port, 1, pd->af);
4331		m_copyback(m, off, sizeof(*uh), uh);
4332	}
4333
4334	return (PF_PASS);
4335}
4336
4337int
4338pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4339    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4340    u_short *reason)
4341{
4342	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4343	u_int16_t	 icmpid = 0, *icmpsum;
4344	u_int8_t	 icmptype;
4345	int		 state_icmp = 0;
4346	struct pf_state_key_cmp key;
4347
4348	icmpsum = NULL;	/* XXXGCC -Wuninitialized m68k */
4349	icmptype = 0;	/* XXXGCC -Wuninitialized m68k */
4350
4351	switch (pd->proto) {
4352#ifdef INET
4353	case IPPROTO_ICMP:
4354		icmptype = pd->hdr.icmp->icmp_type;
4355		icmpid = pd->hdr.icmp->icmp_id;
4356		icmpsum = &pd->hdr.icmp->icmp_cksum;
4357
4358		if (icmptype == ICMP_UNREACH ||
4359		    icmptype == ICMP_SOURCEQUENCH ||
4360		    icmptype == ICMP_REDIRECT ||
4361		    icmptype == ICMP_TIMXCEED ||
4362		    icmptype == ICMP_PARAMPROB)
4363			state_icmp++;
4364		break;
4365#endif /* INET */
4366#ifdef INET6
4367	case IPPROTO_ICMPV6:
4368		icmptype = pd->hdr.icmp6->icmp6_type;
4369		icmpid = pd->hdr.icmp6->icmp6_id;
4370		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4371
4372		if (icmptype == ICMP6_DST_UNREACH ||
4373		    icmptype == ICMP6_PACKET_TOO_BIG ||
4374		    icmptype == ICMP6_TIME_EXCEEDED ||
4375		    icmptype == ICMP6_PARAM_PROB)
4376			state_icmp++;
4377		break;
4378#endif /* INET6 */
4379	}
4380
4381	if (!state_icmp) {
4382
4383		/*
4384		 * ICMP query/reply message not related to a TCP/UDP packet.
4385		 * Search for an ICMP state.
4386		 */
4387		key.af = pd->af;
4388		key.proto = pd->proto;
4389		if (direction == PF_IN)	{
4390			PF_ACPY(&key.ext.addr, pd->src, key.af);
4391			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4392			key.ext.port = 0;
4393			key.gwy.port = icmpid;
4394		} else {
4395			PF_ACPY(&key.lan.addr, pd->src, key.af);
4396			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4397			key.lan.port = icmpid;
4398			key.ext.port = 0;
4399		}
4400
4401		STATE_LOOKUP();
4402
4403		(*state)->expire = time_second;
4404		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4405
4406		/* translate source/destination address, if necessary */
4407		if (STATE_TRANSLATE((*state)->state_key)) {
4408			if (direction == PF_OUT) {
4409				switch (pd->af) {
4410#ifdef INET
4411				case AF_INET:
4412					pf_change_a(&saddr->v4.s_addr,
4413					    pd->ip_sum,
4414					    (*state)->state_key->gwy.addr.v4.s_addr, 0);
4415					pd->hdr.icmp->icmp_cksum =
4416					    pf_cksum_fixup(
4417					    pd->hdr.icmp->icmp_cksum, icmpid,
4418					    (*state)->state_key->gwy.port, 0);
4419					pd->hdr.icmp->icmp_id =
4420					    (*state)->state_key->gwy.port;
4421					m_copyback(m, off, ICMP_MINLEN,
4422					    pd->hdr.icmp);
4423					break;
4424#endif /* INET */
4425#ifdef INET6
4426				case AF_INET6:
4427					pf_change_a6(saddr,
4428					    &pd->hdr.icmp6->icmp6_cksum,
4429					    &(*state)->state_key->gwy.addr, 0);
4430					m_copyback(m, off,
4431					    sizeof(struct icmp6_hdr),
4432					    pd->hdr.icmp6);
4433					break;
4434#endif /* INET6 */
4435				}
4436			} else {
4437				switch (pd->af) {
4438#ifdef INET
4439				case AF_INET:
4440					pf_change_a(&daddr->v4.s_addr,
4441					    pd->ip_sum,
4442					    (*state)->state_key->lan.addr.v4.s_addr, 0);
4443					pd->hdr.icmp->icmp_cksum =
4444					    pf_cksum_fixup(
4445					    pd->hdr.icmp->icmp_cksum, icmpid,
4446					    (*state)->state_key->lan.port, 0);
4447					pd->hdr.icmp->icmp_id =
4448					    (*state)->state_key->lan.port;
4449					m_copyback(m, off, ICMP_MINLEN,
4450					    pd->hdr.icmp);
4451					break;
4452#endif /* INET */
4453#ifdef INET6
4454				case AF_INET6:
4455					pf_change_a6(daddr,
4456					    &pd->hdr.icmp6->icmp6_cksum,
4457					    &(*state)->state_key->lan.addr, 0);
4458					m_copyback(m, off,
4459					    sizeof(struct icmp6_hdr),
4460					    pd->hdr.icmp6);
4461					break;
4462#endif /* INET6 */
4463				}
4464			}
4465		}
4466
4467		return (PF_PASS);
4468
4469	} else {
4470		/*
4471		 * ICMP error message in response to a TCP/UDP packet.
4472		 * Extract the inner TCP/UDP header and search for that state.
4473		 */
4474
4475		struct pf_pdesc	pd2;
4476#ifdef INET
4477		struct ip	h2;
4478#endif /* INET */
4479#ifdef INET6
4480		struct ip6_hdr	h2_6;
4481		int		terminal = 0;
4482#endif /* INET6 */
4483		int		ipoff2 = 0;
4484		int		off2 = 0;
4485
4486		memset(&pd2, 0, sizeof pd2);	/* XXX gcc */
4487
4488		pd2.af = pd->af;
4489		switch (pd->af) {
4490#ifdef INET
4491		case AF_INET:
4492			/* offset of h2 in mbuf chain */
4493			ipoff2 = off + ICMP_MINLEN;
4494
4495			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4496			    NULL, reason, pd2.af)) {
4497				DPFPRINTF(PF_DEBUG_MISC,
4498				    ("pf: ICMP error message too short "
4499				    "(ip)\n"));
4500				return (PF_DROP);
4501			}
4502			/*
4503			 * ICMP error messages don't refer to non-first
4504			 * fragments
4505			 */
4506			if (h2.ip_off & htons(IP_OFFMASK)) {
4507				REASON_SET(reason, PFRES_FRAG);
4508				return (PF_DROP);
4509			}
4510
4511			/* offset of protocol header that follows h2 */
4512			off2 = ipoff2 + (h2.ip_hl << 2);
4513
4514			pd2.proto = h2.ip_p;
4515			pd2.src = (struct pf_addr *)&h2.ip_src;
4516			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4517			pd2.ip_sum = &h2.ip_sum;
4518			break;
4519#endif /* INET */
4520#ifdef INET6
4521		case AF_INET6:
4522			ipoff2 = off + sizeof(struct icmp6_hdr);
4523
4524			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4525			    NULL, reason, pd2.af)) {
4526				DPFPRINTF(PF_DEBUG_MISC,
4527				    ("pf: ICMP error message too short "
4528				    "(ip6)\n"));
4529				return (PF_DROP);
4530			}
4531			pd2.proto = h2_6.ip6_nxt;
4532			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4533			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4534			pd2.ip_sum = NULL;
4535			off2 = ipoff2 + sizeof(h2_6);
4536			do {
4537				switch (pd2.proto) {
4538				case IPPROTO_FRAGMENT:
4539					/*
4540					 * ICMPv6 error messages for
4541					 * non-first fragments
4542					 */
4543					REASON_SET(reason, PFRES_FRAG);
4544					return (PF_DROP);
4545				case IPPROTO_AH:
4546				case IPPROTO_HOPOPTS:
4547				case IPPROTO_ROUTING:
4548				case IPPROTO_DSTOPTS: {
4549					/* get next header and header length */
4550					struct ip6_ext opt6;
4551
4552					if (!pf_pull_hdr(m, off2, &opt6,
4553					    sizeof(opt6), NULL, reason,
4554					    pd2.af)) {
4555						DPFPRINTF(PF_DEBUG_MISC,
4556						    ("pf: ICMPv6 short opt\n"));
4557						return (PF_DROP);
4558					}
4559					if (pd2.proto == IPPROTO_AH)
4560						off2 += (opt6.ip6e_len + 2) * 4;
4561					else
4562						off2 += (opt6.ip6e_len + 1) * 8;
4563					pd2.proto = opt6.ip6e_nxt;
4564					/* goto the next header */
4565					break;
4566				}
4567				default:
4568					terminal++;
4569					break;
4570				}
4571			} while (!terminal);
4572			break;
4573#endif /* INET6 */
4574		}
4575
4576		switch (pd2.proto) {
4577		case IPPROTO_TCP: {
4578			struct tcphdr		 th;
4579			u_int32_t		 seq;
4580			struct pf_state_peer	*src, *dst;
4581			u_int8_t		 dws;
4582			int			 copyback = 0;
4583
4584			/*
4585			 * Only the first 8 bytes of the TCP header can be
4586			 * expected. Don't access any TCP header fields after
4587			 * th_seq, an ackskew test is not possible.
4588			 */
4589			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
4590			    pd2.af)) {
4591				DPFPRINTF(PF_DEBUG_MISC,
4592				    ("pf: ICMP error message too short "
4593				    "(tcp)\n"));
4594				return (PF_DROP);
4595			}
4596
4597			key.af = pd2.af;
4598			key.proto = IPPROTO_TCP;
4599			if (direction == PF_IN)	{
4600				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4601				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4602				key.ext.port = th.th_dport;
4603				key.gwy.port = th.th_sport;
4604			} else {
4605				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4606				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4607				key.lan.port = th.th_dport;
4608				key.ext.port = th.th_sport;
4609			}
4610
4611			STATE_LOOKUP();
4612
4613			if (direction == (*state)->state_key->direction) {
4614				src = &(*state)->dst;
4615				dst = &(*state)->src;
4616			} else {
4617				src = &(*state)->src;
4618				dst = &(*state)->dst;
4619			}
4620
4621			if (src->wscale && dst->wscale)
4622				dws = dst->wscale & PF_WSCALE_MASK;
4623			else
4624				dws = 0;
4625
4626			/* Demodulate sequence number */
4627			seq = ntohl(th.th_seq) - src->seqdiff;
4628			if (src->seqdiff) {
4629				pf_change_a(&th.th_seq, icmpsum,
4630				    htonl(seq), 0);
4631				copyback = 1;
4632			}
4633
4634			if (!SEQ_GEQ(src->seqhi, seq) ||
4635			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4636				if (pf_status.debug >= PF_DEBUG_MISC) {
4637					printf("pf: BAD ICMP %d:%d ",
4638					    icmptype, pd->hdr.icmp->icmp_code);
4639					pf_print_host(pd->src, 0, pd->af);
4640					printf(" -> ");
4641					pf_print_host(pd->dst, 0, pd->af);
4642					printf(" state: ");
4643					pf_print_state(*state);
4644					printf(" seq=%u\n", seq);
4645				}
4646				REASON_SET(reason, PFRES_BADSTATE);
4647				return (PF_DROP);
4648			}
4649
4650			if (STATE_TRANSLATE((*state)->state_key)) {
4651				if (direction == PF_IN) {
4652					pf_change_icmp(pd2.src, &th.th_sport,
4653					    daddr, &(*state)->state_key->lan.addr,
4654					    (*state)->state_key->lan.port, NULL,
4655					    pd2.ip_sum, icmpsum,
4656					    pd->ip_sum, 0, pd2.af);
4657				} else {
4658					pf_change_icmp(pd2.dst, &th.th_dport,
4659					    saddr, &(*state)->state_key->gwy.addr,
4660					    (*state)->state_key->gwy.port, NULL,
4661					    pd2.ip_sum, icmpsum,
4662					    pd->ip_sum, 0, pd2.af);
4663				}
4664				copyback = 1;
4665			}
4666
4667			if (copyback) {
4668				switch (pd2.af) {
4669#ifdef INET
4670				case AF_INET:
4671					m_copyback(m, off, ICMP_MINLEN,
4672					    pd->hdr.icmp);
4673					m_copyback(m, ipoff2, sizeof(h2),
4674					    &h2);
4675					break;
4676#endif /* INET */
4677#ifdef INET6
4678				case AF_INET6:
4679					m_copyback(m, off,
4680					    sizeof(struct icmp6_hdr),
4681					    pd->hdr.icmp6);
4682					m_copyback(m, ipoff2, sizeof(h2_6),
4683					    &h2_6);
4684					break;
4685#endif /* INET6 */
4686				}
4687				m_copyback(m, off2, 8, &th);
4688			}
4689
4690			return (PF_PASS);
4691			break;
4692		}
4693		case IPPROTO_UDP: {
4694			struct udphdr		uh;
4695
4696			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4697			    NULL, reason, pd2.af)) {
4698				DPFPRINTF(PF_DEBUG_MISC,
4699				    ("pf: ICMP error message too short "
4700				    "(udp)\n"));
4701				return (PF_DROP);
4702			}
4703
4704			key.af = pd2.af;
4705			key.proto = IPPROTO_UDP;
4706			if (direction == PF_IN)	{
4707				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4708				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4709				key.ext.port = uh.uh_dport;
4710				key.gwy.port = uh.uh_sport;
4711			} else {
4712				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4713				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4714				key.lan.port = uh.uh_dport;
4715				key.ext.port = uh.uh_sport;
4716			}
4717
4718			STATE_LOOKUP();
4719
4720			if (STATE_TRANSLATE((*state)->state_key)) {
4721				if (direction == PF_IN) {
4722					pf_change_icmp(pd2.src, &uh.uh_sport,
4723					    daddr,
4724					    &(*state)->state_key->lan.addr,
4725					    (*state)->state_key->lan.port,
4726					    &uh.uh_sum,
4727					    pd2.ip_sum, icmpsum,
4728					    pd->ip_sum, 1, pd2.af);
4729				} else {
4730					pf_change_icmp(pd2.dst, &uh.uh_dport,
4731					    saddr,
4732					    &(*state)->state_key->gwy.addr,
4733					    (*state)->state_key->gwy.port, &uh.uh_sum,
4734					    pd2.ip_sum, icmpsum,
4735					    pd->ip_sum, 1, pd2.af);
4736				}
4737				switch (pd2.af) {
4738#ifdef INET
4739				case AF_INET:
4740					m_copyback(m, off, ICMP_MINLEN,
4741					    pd->hdr.icmp);
4742					m_copyback(m, ipoff2, sizeof(h2), &h2);
4743					break;
4744#endif /* INET */
4745#ifdef INET6
4746				case AF_INET6:
4747					m_copyback(m, off,
4748					    sizeof(struct icmp6_hdr),
4749					    pd->hdr.icmp6);
4750					m_copyback(m, ipoff2, sizeof(h2_6),
4751					    &h2_6);
4752					break;
4753#endif /* INET6 */
4754				}
4755				m_copyback(m, off2, sizeof(uh), &uh);
4756			}
4757
4758			return (PF_PASS);
4759			break;
4760		}
4761#ifdef INET
4762		case IPPROTO_ICMP: {
4763			struct icmp		iih;
4764
4765			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4766			    NULL, reason, pd2.af)) {
4767				DPFPRINTF(PF_DEBUG_MISC,
4768				    ("pf: ICMP error message too short i"
4769				    "(icmp)\n"));
4770				return (PF_DROP);
4771			}
4772
4773			key.af = pd2.af;
4774			key.proto = IPPROTO_ICMP;
4775			if (direction == PF_IN)	{
4776				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4777				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4778				key.ext.port = 0;
4779				key.gwy.port = iih.icmp_id;
4780			} else {
4781				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4782				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4783				key.lan.port = iih.icmp_id;
4784				key.ext.port = 0;
4785			}
4786
4787			STATE_LOOKUP();
4788
4789			if (STATE_TRANSLATE((*state)->state_key)) {
4790				if (direction == PF_IN) {
4791					pf_change_icmp(pd2.src, &iih.icmp_id,
4792					    daddr,
4793					    &(*state)->state_key->lan.addr,
4794					    (*state)->state_key->lan.port, NULL,
4795					    pd2.ip_sum, icmpsum,
4796					    pd->ip_sum, 0, AF_INET);
4797				} else {
4798					pf_change_icmp(pd2.dst, &iih.icmp_id,
4799					    saddr,
4800					    &(*state)->state_key->gwy.addr,
4801					    (*state)->state_key->gwy.port, NULL,
4802					    pd2.ip_sum, icmpsum,
4803					    pd->ip_sum, 0, AF_INET);
4804				}
4805				m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
4806				m_copyback(m, ipoff2, sizeof(h2), &h2);
4807				m_copyback(m, off2, ICMP_MINLEN, &iih);
4808			}
4809
4810			return (PF_PASS);
4811			break;
4812		}
4813#endif /* INET */
4814#ifdef INET6
4815		case IPPROTO_ICMPV6: {
4816			struct icmp6_hdr	iih;
4817
4818			if (!pf_pull_hdr(m, off2, &iih,
4819			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
4820				DPFPRINTF(PF_DEBUG_MISC,
4821				    ("pf: ICMP error message too short "
4822				    "(icmp6)\n"));
4823				return (PF_DROP);
4824			}
4825
4826			key.af = pd2.af;
4827			key.proto = IPPROTO_ICMPV6;
4828			if (direction == PF_IN)	{
4829				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4830				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4831				key.ext.port = 0;
4832				key.gwy.port = iih.icmp6_id;
4833			} else {
4834				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4835				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4836				key.lan.port = iih.icmp6_id;
4837				key.ext.port = 0;
4838			}
4839
4840			STATE_LOOKUP();
4841
4842			if (STATE_TRANSLATE((*state)->state_key)) {
4843				if (direction == PF_IN) {
4844					pf_change_icmp(pd2.src, &iih.icmp6_id,
4845					    daddr,
4846					    &(*state)->state_key->lan.addr,
4847					    (*state)->state_key->lan.port, NULL,
4848					    pd2.ip_sum, icmpsum,
4849					    pd->ip_sum, 0, AF_INET6);
4850				} else {
4851					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4852					    saddr, &(*state)->state_key->gwy.addr,
4853					    (*state)->state_key->gwy.port, NULL,
4854					    pd2.ip_sum, icmpsum,
4855					    pd->ip_sum, 0, AF_INET6);
4856				}
4857				m_copyback(m, off, sizeof(struct icmp6_hdr),
4858				    pd->hdr.icmp6);
4859				m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
4860				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4861				    &iih);
4862			}
4863
4864			return (PF_PASS);
4865			break;
4866		}
4867#endif /* INET6 */
4868		default: {
4869			key.af = pd2.af;
4870			key.proto = pd2.proto;
4871			if (direction == PF_IN)	{
4872				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4873				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4874				key.ext.port = 0;
4875				key.gwy.port = 0;
4876			} else {
4877				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4878				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4879				key.lan.port = 0;
4880				key.ext.port = 0;
4881			}
4882
4883			STATE_LOOKUP();
4884
4885			if (STATE_TRANSLATE((*state)->state_key)) {
4886				if (direction == PF_IN) {
4887					pf_change_icmp(pd2.src, NULL,
4888					    daddr,
4889					    &(*state)->state_key->lan.addr,
4890					    0, NULL,
4891					    pd2.ip_sum, icmpsum,
4892					    pd->ip_sum, 0, pd2.af);
4893				} else {
4894					pf_change_icmp(pd2.dst, NULL,
4895					    saddr,
4896					    &(*state)->state_key->gwy.addr,
4897					    0, NULL,
4898					    pd2.ip_sum, icmpsum,
4899					    pd->ip_sum, 0, pd2.af);
4900				}
4901				switch (pd2.af) {
4902#ifdef INET
4903				case AF_INET:
4904					m_copyback(m, off, ICMP_MINLEN,
4905					    pd->hdr.icmp);
4906					m_copyback(m, ipoff2, sizeof(h2), &h2);
4907					break;
4908#endif /* INET */
4909#ifdef INET6
4910				case AF_INET6:
4911					m_copyback(m, off,
4912					    sizeof(struct icmp6_hdr),
4913					    pd->hdr.icmp6);
4914					m_copyback(m, ipoff2, sizeof(h2_6),
4915					    &h2_6);
4916					break;
4917#endif /* INET6 */
4918				}
4919			}
4920
4921			return (PF_PASS);
4922			break;
4923		}
4924		}
4925	}
4926}
4927
4928int
4929pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4930    struct pf_pdesc *pd)
4931{
4932	struct pf_state_peer	*src, *dst;
4933	struct pf_state_key_cmp	 key;
4934
4935	key.af = pd->af;
4936	key.proto = pd->proto;
4937	if (direction == PF_IN)	{
4938		PF_ACPY(&key.ext.addr, pd->src, key.af);
4939		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4940		key.ext.port = 0;
4941		key.gwy.port = 0;
4942	} else {
4943		PF_ACPY(&key.lan.addr, pd->src, key.af);
4944		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4945		key.lan.port = 0;
4946		key.ext.port = 0;
4947	}
4948
4949	STATE_LOOKUP();
4950
4951	if (direction == (*state)->state_key->direction) {
4952		src = &(*state)->src;
4953		dst = &(*state)->dst;
4954	} else {
4955		src = &(*state)->dst;
4956		dst = &(*state)->src;
4957	}
4958
4959	/* update states */
4960	if (src->state < PFOTHERS_SINGLE)
4961		src->state = PFOTHERS_SINGLE;
4962	if (dst->state == PFOTHERS_SINGLE)
4963		dst->state = PFOTHERS_MULTIPLE;
4964
4965	/* update expire time */
4966	(*state)->expire = time_second;
4967	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4968		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4969	else
4970		(*state)->timeout = PFTM_OTHER_SINGLE;
4971
4972	/* translate source/destination address, if necessary */
4973	if (STATE_TRANSLATE((*state)->state_key)) {
4974		if (direction == PF_OUT)
4975			switch (pd->af) {
4976#ifdef INET
4977			case AF_INET:
4978				pf_change_a(&pd->src->v4.s_addr,
4979				    pd->ip_sum,
4980				    (*state)->state_key->gwy.addr.v4.s_addr,
4981				    0);
4982				break;
4983#endif /* INET */
4984#ifdef INET6
4985			case AF_INET6:
4986				PF_ACPY(pd->src,
4987				    &(*state)->state_key->gwy.addr, pd->af);
4988				break;
4989#endif /* INET6 */
4990			}
4991		else
4992			switch (pd->af) {
4993#ifdef INET
4994			case AF_INET:
4995				pf_change_a(&pd->dst->v4.s_addr,
4996				    pd->ip_sum,
4997				    (*state)->state_key->lan.addr.v4.s_addr,
4998				    0);
4999				break;
5000#endif /* INET */
5001#ifdef INET6
5002			case AF_INET6:
5003				PF_ACPY(pd->dst,
5004				    &(*state)->state_key->lan.addr, pd->af);
5005				break;
5006#endif /* INET6 */
5007			}
5008	}
5009
5010	return (PF_PASS);
5011}
5012
5013/*
5014 * ipoff and off are measured from the start of the mbuf chain.
5015 * h must be at "ipoff" on the mbuf chain.
5016 */
5017void *
5018pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5019    u_short *actionp, u_short *reasonp, sa_family_t af)
5020{
5021	switch (af) {
5022#ifdef INET
5023	case AF_INET: {
5024		struct ip	*h = mtod(m, struct ip *);
5025		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5026
5027		if (fragoff) {
5028			if (fragoff >= len)
5029				ACTION_SET(actionp, PF_PASS);
5030			else {
5031				ACTION_SET(actionp, PF_DROP);
5032				REASON_SET(reasonp, PFRES_FRAG);
5033			}
5034			return (NULL);
5035		}
5036		if (m->m_pkthdr.len < off + len ||
5037		    ntohs(h->ip_len) < off + len) {
5038			ACTION_SET(actionp, PF_DROP);
5039			REASON_SET(reasonp, PFRES_SHORT);
5040			return (NULL);
5041		}
5042		break;
5043	}
5044#endif /* INET */
5045#ifdef INET6
5046	case AF_INET6: {
5047		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5048
5049		if (m->m_pkthdr.len < off + len ||
5050		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5051		    (unsigned)(off + len)) {
5052			ACTION_SET(actionp, PF_DROP);
5053			REASON_SET(reasonp, PFRES_SHORT);
5054			return (NULL);
5055		}
5056		break;
5057	}
5058#endif /* INET6 */
5059	}
5060	m_copydata(m, off, len, p);
5061	return (p);
5062}
5063
5064int
5065pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5066{
5067#ifdef __NetBSD__
5068	union {
5069		struct sockaddr		dst;
5070		struct sockaddr_in	dst4;
5071		struct sockaddr_in6	dst6;
5072	} u;
5073	struct route		 ro;
5074	int			 ret = 1;
5075	struct rtentry		*rt;
5076
5077	bzero(&ro, sizeof(ro));
5078	switch (af) {
5079	case AF_INET:
5080		sockaddr_in_init(&u.dst4, &addr->v4, 0);
5081		break;
5082#ifdef INET6
5083	case AF_INET6:
5084		sockaddr_in6_init(&u.dst6, &addr->v6, 0, 0, 0);
5085		break;
5086#endif /* INET6 */
5087	default:
5088		return (0);
5089	}
5090	rtcache_setdst(&ro, &u.dst);
5091
5092	rt = rtcache_init(&ro);
5093	ret = rt != NULL ? 1 : 0;
5094	if (rt != NULL)
5095		rtcache_unref(rt, &ro);
5096	rtcache_free(&ro);
5097
5098	return (ret);
5099#else /* !__NetBSD__ */
5100	struct sockaddr_in	*dst;
5101	int			 ret = 1;
5102	int			 check_mpath;
5103	extern int		 ipmultipath;
5104#ifdef INET6
5105	extern int		 ip6_multipath;
5106	struct sockaddr_in6	*dst6;
5107	struct route_in6	 ro;
5108#else
5109	struct route		 ro;
5110#endif
5111	struct radix_node	*rn;
5112	struct rtentry		*rt;
5113	struct ifnet		*ifp;
5114
5115	check_mpath = 0;
5116	bzero(&ro, sizeof(ro));
5117	switch (af) {
5118	case AF_INET:
5119		dst = satosin(&ro.ro_dst);
5120		dst->sin_family = AF_INET;
5121		dst->sin_len = sizeof(*dst);
5122		dst->sin_addr = addr->v4;
5123		if (ipmultipath)
5124			check_mpath = 1;
5125		break;
5126#ifdef INET6
5127	case AF_INET6:
5128		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5129		dst6->sin6_family = AF_INET6;
5130		dst6->sin6_len = sizeof(*dst6);
5131		dst6->sin6_addr = addr->v6;
5132		if (ip6_multipath)
5133			check_mpath = 1;
5134		break;
5135#endif /* INET6 */
5136	default:
5137		return (0);
5138	}
5139
5140	/* Skip checks for ipsec interfaces */
5141	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5142		goto out;
5143
5144	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5145
5146	if (ro.ro_rt != NULL) {
5147		/* No interface given, this is a no-route check */
5148		if (kif == NULL)
5149			goto out;
5150
5151		if (kif->pfik_ifp == NULL) {
5152			ret = 0;
5153			goto out;
5154		}
5155
5156		/* Perform uRPF check if passed input interface */
5157		ret = 0;
5158		rn = (struct radix_node *)ro.ro_rt;
5159		do {
5160			rt = (struct rtentry *)rn;
5161			if (rt->rt_ifp->if_type == IFT_CARP)
5162				ifp = rt->rt_ifp->if_carpdev;
5163			else
5164				ifp = rt->rt_ifp;
5165
5166			if (kif->pfik_ifp == ifp)
5167				ret = 1;
5168			rn = rn_mpath_next(rn);
5169		} while (check_mpath == 1 && rn != NULL && ret == 0);
5170	} else
5171		ret = 0;
5172out:
5173	if (ro.ro_rt != NULL)
5174		RTFREE(ro.ro_rt);
5175	return (ret);
5176#endif /* !__NetBSD__ */
5177}
5178
5179int
5180pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5181{
5182#ifdef __NetBSD__
5183	/* NetBSD doesn't have route labels. */
5184
5185	return (0);
5186#else
5187	struct sockaddr_in	*dst;
5188#ifdef INET6
5189	struct sockaddr_in6	*dst6;
5190	struct route_in6	 ro;
5191#else
5192	struct route		 ro;
5193#endif
5194	int			 ret = 0;
5195
5196	bzero(&ro, sizeof(ro));
5197	switch (af) {
5198	case AF_INET:
5199		dst = satosin(&ro.ro_dst);
5200		dst->sin_family = AF_INET;
5201		dst->sin_len = sizeof(*dst);
5202		dst->sin_addr = addr->v4;
5203		break;
5204#ifdef INET6
5205	case AF_INET6:
5206		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5207		dst6->sin6_family = AF_INET6;
5208		dst6->sin6_len = sizeof(*dst6);
5209		dst6->sin6_addr = addr->v6;
5210		break;
5211#endif /* INET6 */
5212	default:
5213		return (0);
5214	}
5215
5216	rtalloc_noclone((struct route *)&ro, NO_CLONING);
5217
5218	if (ro.ro_rt != NULL) {
5219		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
5220			ret = 1;
5221		RTFREE(ro.ro_rt);
5222	}
5223
5224	return (ret);
5225#endif /* !__NetBSD__ */
5226}
5227
5228#ifdef INET
5229void
5230pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5231    struct pf_state *s, struct pf_pdesc *pd)
5232{
5233	struct mbuf		*m0, *m1;
5234	struct route		 iproute;
5235	struct route		*ro = NULL;
5236	const struct sockaddr	*dst;
5237	union {
5238		struct sockaddr		dst;
5239		struct sockaddr_in	dst4;
5240	} u;
5241	struct ip		*ip;
5242	struct ifnet		*ifp = NULL;
5243	struct pf_addr		 naddr;
5244	struct pf_src_node	*sn = NULL;
5245	int			 error = 0;
5246#ifdef __NetBSD__
5247	struct pf_mtag		*pf_mtag;
5248#endif /* __NetBSD__ */
5249
5250	if (m == NULL || *m == NULL || r == NULL ||
5251	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5252		panic("pf_route: invalid parameters");
5253
5254#ifdef __NetBSD__
5255	if ((pf_mtag = pf_get_mtag(*m)) == NULL) {
5256		m0 = *m;
5257		*m = NULL;
5258		goto bad;
5259	}
5260	if (pf_mtag->routed++ > 3) {
5261		m0 = *m;
5262		*m = NULL;
5263		goto bad;
5264	}
5265#else
5266	if ((*m)->m_pkthdr.pf.routed++ > 3) {
5267		m0 = *m;
5268		*m = NULL;
5269		goto bad;
5270	}
5271#endif /* !__NetBSD__ */
5272
5273	if (r->rt == PF_DUPTO) {
5274		if ((m0 = m_dup(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5275			return;
5276	} else {
5277		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5278			return;
5279		m0 = *m;
5280	}
5281
5282	if (m0->m_len < sizeof(struct ip)) {
5283		DPFPRINTF(PF_DEBUG_URGENT,
5284		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5285		goto bad;
5286	}
5287
5288	ip = mtod(m0, struct ip *);
5289
5290	ro = &iproute;
5291	memset(ro, 0, sizeof(*ro));
5292	sockaddr_in_init(&u.dst4, &ip->ip_dst, 0);
5293	dst = &u.dst;
5294	rtcache_setdst(ro, dst);
5295
5296	if (r->rt == PF_FASTROUTE) {
5297		struct rtentry *rt;
5298
5299		rt = rtcache_init(ro);
5300
5301		if (rt == NULL) {
5302			ip_statinc(IP_STAT_NOROUTE);
5303			goto bad;
5304		}
5305
5306		ifp = rt->rt_ifp;
5307		rt->rt_use++;
5308
5309		if (rt->rt_flags & RTF_GATEWAY)
5310			dst = rt->rt_gateway;
5311		rtcache_unref(rt, ro); /* FIXME dst is NOMPSAFE */
5312	} else {
5313		if (TAILQ_EMPTY(&r->rpool.list)) {
5314			DPFPRINTF(PF_DEBUG_URGENT,
5315			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
5316			goto bad;
5317		}
5318		if (s == NULL) {
5319			pf_map_addr(AF_INET, r,
5320			    (const struct pf_addr *)&ip->ip_src,
5321			    &naddr, NULL, &sn);
5322			if (!PF_AZERO(&naddr, AF_INET))
5323				u.dst4.sin_addr.s_addr = naddr.v4.s_addr;
5324			ifp = r->rpool.cur->kif ?
5325			    r->rpool.cur->kif->pfik_ifp : NULL;
5326		} else {
5327			if (!PF_AZERO(&s->rt_addr, AF_INET))
5328				u.dst4.sin_addr.s_addr = s->rt_addr.v4.s_addr;
5329			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5330		}
5331	}
5332	if (ifp == NULL)
5333		goto bad;
5334
5335	if (oifp != ifp) {
5336		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5337			goto bad;
5338		else if (m0 == NULL)
5339			goto done;
5340		if (m0->m_len < sizeof(struct ip)) {
5341			DPFPRINTF(PF_DEBUG_URGENT,
5342			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5343			goto bad;
5344		}
5345		ip = mtod(m0, struct ip *);
5346	}
5347
5348	/* Copied from ip_output. */
5349
5350	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5351#ifdef __NetBSD__
5352	if (m0->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
5353		in_undefer_cksum_tcpudp(m0);
5354		m0->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
5355	}
5356#else
5357	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
5358		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5359		    ifp->if_bridge != NULL) {
5360			in_delayed_cksum(m0);
5361			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */
5362		}
5363	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
5364		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5365		    ifp->if_bridge != NULL) {
5366			in_delayed_cksum(m0);
5367			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */
5368		}
5369	}
5370#endif /* !__NetBSD__ */
5371
5372	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5373#ifdef __NetBSD__
5374		ip->ip_sum = 0;
5375		ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5376
5377		m0->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
5378#else
5379		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5380		    ifp->if_bridge == NULL) {
5381			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
5382			ipstat.ips_outhwcsum++;
5383		} else {
5384			ip->ip_sum = 0;
5385			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5386		}
5387		/* Update relevant hardware checksum stats for TCP/UDP */
5388		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
5389			tcpstat.tcps_outhwcsum++;
5390		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
5391			udpstat.udps_outhwcsum++;
5392#endif /* !__NetBSD__ */
5393		error = if_output_lock(ifp, ifp, m0, dst, NULL);
5394		goto done;
5395	}
5396
5397	/*
5398	 * Too large for interface; fragment if possible.
5399	 * Must be able to put at least 8 bytes per fragment.
5400	 */
5401	if (ip->ip_off & htons(IP_DF)) {
5402		ip_statinc(IP_STAT_CANTFRAG);
5403		if (r->rt != PF_DUPTO) {
5404			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5405			    ifp->if_mtu);
5406			goto done;
5407		} else
5408			goto bad;
5409	}
5410
5411#ifdef __NetBSD__
5412	/* Make ip_fragment re-compute checksums. */
5413	if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
5414		m0->m_pkthdr.csum_flags |= M_CSUM_IPv4;
5415	}
5416#endif /* __NetBSD__ */
5417	m1 = m0;
5418	error = ip_fragment(m0, ifp, ifp->if_mtu);
5419	if (error) {
5420		m0 = NULL;
5421		goto bad;
5422	}
5423
5424	for (m0 = m1; m0; m0 = m1) {
5425		m1 = m0->m_nextpkt;
5426		m0->m_nextpkt = 0;
5427		if (error == 0)
5428			error = (*ifp->if_output)(ifp, m0, dst, NULL);
5429		else
5430			m_freem(m0);
5431	}
5432
5433	if (error == 0)
5434		ip_statinc(IP_STAT_FRAGMENTED);
5435
5436done:
5437	if (r->rt != PF_DUPTO)
5438		*m = NULL;
5439	if (ro == &iproute)
5440		rtcache_free(ro);
5441	return;
5442
5443bad:
5444	m_freem(m0);
5445	goto done;
5446}
5447#endif /* INET */
5448
5449#ifdef INET6
5450void
5451pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5452    struct pf_state *s, struct pf_pdesc *pd)
5453{
5454	struct mbuf		*m0;
5455	struct sockaddr_in6	 dst;
5456	struct ip6_hdr		*ip6;
5457	struct ifnet		*ifp = NULL;
5458	struct pf_addr		 naddr;
5459	struct pf_src_node	*sn = NULL;
5460#ifdef __NetBSD__
5461	struct pf_mtag		*pf_mtag;
5462#endif /* __NetBSD__ */
5463
5464	if (m == NULL || *m == NULL || r == NULL ||
5465	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5466		panic("pf_route6: invalid parameters");
5467
5468#ifdef __NetBSD__
5469	if ((pf_mtag = pf_get_mtag(*m)) == NULL) {
5470		m0 = *m;
5471		*m = NULL;
5472		goto bad;
5473	}
5474	if (pf_mtag->routed++ > 3) {
5475		m0 = *m;
5476		*m = NULL;
5477		goto bad;
5478	}
5479#else
5480	if ((*m)->m_pkthdr.pf.routed++ > 3) {
5481		m0 = *m;
5482		*m = NULL;
5483		goto bad;
5484	}
5485#endif /* !__NetBSD__ */
5486
5487	if (r->rt == PF_DUPTO) {
5488		if ((m0 = m_dup(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5489			return;
5490	} else {
5491		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5492			return;
5493		m0 = *m;
5494	}
5495
5496	if (m0->m_len < sizeof(struct ip6_hdr)) {
5497		DPFPRINTF(PF_DEBUG_URGENT,
5498		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5499		goto bad;
5500	}
5501	ip6 = mtod(m0, struct ip6_hdr *);
5502
5503	dst.sin6_family = AF_INET6;
5504	dst.sin6_len = sizeof(dst);
5505	dst.sin6_addr = ip6->ip6_dst;
5506
5507	/* Cheat. XXX why only in the v6 case??? */
5508	if (r->rt == PF_FASTROUTE) {
5509#ifdef __NetBSD__
5510		pf_mtag->flags |= PF_TAG_GENERATED;
5511#else
5512		m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
5513#endif /* !__NetBSD__ */
5514		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5515		return;
5516	}
5517
5518	if (TAILQ_EMPTY(&r->rpool.list)) {
5519		DPFPRINTF(PF_DEBUG_URGENT,
5520		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
5521		goto bad;
5522	}
5523	if (s == NULL) {
5524		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5525		    &naddr, NULL, &sn);
5526		if (!PF_AZERO(&naddr, AF_INET6))
5527			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5528			    &naddr, AF_INET6);
5529		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5530	} else {
5531		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5532			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5533			    &s->rt_addr, AF_INET6);
5534		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5535	}
5536	if (ifp == NULL)
5537		goto bad;
5538
5539	if (oifp != ifp) {
5540		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5541			goto bad;
5542		else if (m0 == NULL)
5543			goto done;
5544		if (m0->m_len < sizeof(struct ip6_hdr)) {
5545			DPFPRINTF(PF_DEBUG_URGENT,
5546			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5547			goto bad;
5548		}
5549		ip6 = mtod(m0, struct ip6_hdr *);
5550	}
5551
5552	/*
5553	 * If the packet is too large for the outgoing interface,
5554	 * send back an icmp6 error.
5555	 */
5556	if (IN6_IS_SCOPE_EMBEDDABLE(&dst.sin6_addr))
5557		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5558	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5559		(void)ip6_if_output(ifp, ifp, m0, &dst, NULL);
5560	} else {
5561		in6_ifstat_inc(ifp, ifs6_in_toobig);
5562		if (r->rt != PF_DUPTO)
5563			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5564		else
5565			goto bad;
5566	}
5567
5568done:
5569	if (r->rt != PF_DUPTO)
5570		*m = NULL;
5571	return;
5572
5573bad:
5574	m_freem(m0);
5575	goto done;
5576}
5577#endif /* INET6 */
5578
5579
5580/*
5581 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5582 *   off is the offset where the protocol header starts
5583 *   len is the total length of protocol header plus payload
5584 * returns 0 when the checksum is valid, otherwise returns 1.
5585 */
5586#ifdef __NetBSD__
5587int
5588pf_check_proto_cksum(struct mbuf *m, int direction, int off, int len,
5589    u_int8_t p, sa_family_t af)
5590#else
5591int
5592pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5593    sa_family_t af)
5594#endif /* !__NetBSD__ */
5595{
5596#ifndef __NetBSD__
5597	u_int16_t flag_ok, flag_bad;
5598#endif /* !__NetBSD__ */
5599	u_int16_t sum;
5600
5601#ifndef __NetBSD__
5602	switch (p) {
5603	case IPPROTO_TCP:
5604		flag_ok = M_TCP_CSUM_IN_OK;
5605		flag_bad = M_TCP_CSUM_IN_BAD;
5606		break;
5607	case IPPROTO_UDP:
5608		flag_ok = M_UDP_CSUM_IN_OK;
5609		flag_bad = M_UDP_CSUM_IN_BAD;
5610		break;
5611	case IPPROTO_ICMP:
5612#ifdef INET6
5613	case IPPROTO_ICMPV6:
5614#endif /* INET6 */
5615		flag_ok = flag_bad = 0;
5616		break;
5617	default:
5618		return (1);
5619	}
5620	if (m->m_pkthdr.csum_flags & flag_ok)
5621		return (0);
5622	if (m->m_pkthdr.csum_flags & flag_bad)
5623		return (1);
5624#endif /* !__NetBSD__ */
5625	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5626		return (1);
5627	if (m->m_pkthdr.len < off + len)
5628		return (1);
5629#ifdef __NetBSD__
5630	if (direction == PF_IN) {
5631		switch (p) {
5632		case IPPROTO_TCP: {
5633			struct tcphdr th; /* XXX */
5634			int thlen;
5635
5636			m_copydata(m, off, sizeof(th), &th); /* XXX */
5637			thlen = th.th_off << 2;
5638			return tcp_input_checksum(af, m, &th, off,
5639			    thlen, len - thlen) != 0;
5640		}
5641
5642		case IPPROTO_UDP: {
5643			struct udphdr uh; /* XXX */
5644
5645			m_copydata(m, off, sizeof(uh), &uh); /* XXX */
5646			return udp_input_checksum(af, m, &uh, off, len) != 0;
5647		}
5648		}
5649	}
5650#endif /* __NetBSD__ */
5651	switch (af) {
5652#ifdef INET
5653	case AF_INET:
5654		if (p == IPPROTO_ICMP) {
5655			if (m->m_len < off)
5656				return (1);
5657			m->m_data += off;
5658			m->m_len -= off;
5659			sum = in_cksum(m, len);
5660			m->m_data -= off;
5661			m->m_len += off;
5662		} else {
5663			if (m->m_len < sizeof(struct ip))
5664				return (1);
5665			sum = in4_cksum(m, p, off, len);
5666		}
5667		break;
5668#endif /* INET */
5669#ifdef INET6
5670	case AF_INET6:
5671		if (m->m_len < sizeof(struct ip6_hdr))
5672			return (1);
5673		sum = in6_cksum(m, p, off, len);
5674		break;
5675#endif /* INET6 */
5676	default:
5677		return (1);
5678	}
5679	if (sum) {
5680#ifndef __NetBSD__
5681		m->m_pkthdr.csum_flags |= flag_bad;
5682#endif /* !__NetBSD__ */
5683		switch (p) {
5684		case IPPROTO_TCP:
5685			tcp_statinc(TCP_STAT_RCVBADSUM);
5686			break;
5687		case IPPROTO_UDP:
5688			udp_statinc(UDP_STAT_BADSUM);
5689			break;
5690		case IPPROTO_ICMP:
5691			icmp_statinc(ICMP_STAT_CHECKSUM);
5692			break;
5693#ifdef INET6
5694		case IPPROTO_ICMPV6:
5695			icmp6_statinc(ICMP6_STAT_CHECKSUM);
5696			break;
5697#endif /* INET6 */
5698		}
5699		return (1);
5700	}
5701#ifndef __NetBSD__
5702	m->m_pkthdr.csum_flags |= flag_ok;
5703#endif /* !__NetBSD__ */
5704	return (0);
5705}
5706
5707#ifdef INET
5708int
5709pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
5710    struct ether_header *eh)
5711{
5712	struct pfi_kif		*kif;
5713	u_short			 action, reason = 0, log = 0;
5714	struct mbuf		*m = *m0;
5715	struct ip		*h = NULL;
5716	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
5717	struct pf_state		*s = NULL;
5718	struct pf_state_key	*sk = NULL;
5719	struct pf_ruleset	*ruleset = NULL;
5720	struct pf_pdesc		 pd;
5721	int			 off, dirndx;
5722#ifdef __NetBSD__
5723	struct pf_mtag		*pf_mtag = NULL; /* XXX gcc */
5724#if defined(ALTQ)
5725	int pqid = 0;
5726#endif
5727#endif /* __NetBSD__ */
5728
5729	if (!pf_status.running)
5730		return (PF_PASS);
5731
5732	memset(&pd, 0, sizeof(pd));
5733	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
5734		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
5735	else
5736		kif = (struct pfi_kif *)ifp->if_pf_kif;
5737
5738	if (kif == NULL) {
5739		DPFPRINTF(PF_DEBUG_URGENT,
5740		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
5741		return (PF_DROP);
5742	}
5743	if (kif->pfik_flags & PFI_IFLAG_SKIP)
5744		return (PF_PASS);
5745
5746#ifdef DIAGNOSTIC
5747	if ((m->m_flags & M_PKTHDR) == 0)
5748		panic("non-M_PKTHDR is passed to pf_test");
5749#endif /* DIAGNOSTIC */
5750
5751	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5752		action = PF_DROP;
5753		REASON_SET_NOPTR(&reason, PFRES_SHORT);
5754		log = 1;
5755		goto done;
5756	}
5757
5758#ifdef __NetBSD__
5759	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
5760		DPFPRINTF(PF_DEBUG_URGENT,
5761		    ("pf_test: pf_get_mtag returned NULL\n"));
5762		return (PF_DROP);
5763	}
5764	if (pf_mtag->flags & PF_TAG_GENERATED)
5765		return (PF_PASS);
5766#else
5767	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
5768		return (PF_PASS);
5769#endif /* !__NetBSD__ */
5770
5771	/* We do IP header normalization and packet reassembly here */
5772	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
5773		action = PF_DROP;
5774		goto done;
5775	}
5776	m = *m0;	/* pf_normalize messes with m0 */
5777	h = mtod(m, struct ip *);
5778
5779	off = h->ip_hl << 2;
5780	if (off < (int)sizeof(*h)) {
5781		action = PF_DROP;
5782		REASON_SET_NOPTR(&reason, PFRES_SHORT);
5783		log = 1;
5784		goto done;
5785	}
5786
5787	pd.src = (struct pf_addr *)&h->ip_src;
5788	pd.dst = (struct pf_addr *)&h->ip_dst;
5789	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5790	pd.ip_sum = &h->ip_sum;
5791	pd.proto = h->ip_p;
5792	pd.af = AF_INET;
5793	pd.tos = h->ip_tos;
5794	pd.tot_len = ntohs(h->ip_len);
5795	pd.eh = eh;
5796
5797	/* handle fragments that didn't get reassembled by normalization */
5798	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5799		action = pf_test_fragment(&r, dir, kif, m, h,
5800		    &pd, &a, &ruleset);
5801		goto done;
5802	}
5803
5804	switch (h->ip_p) {
5805
5806	case IPPROTO_TCP: {
5807		struct tcphdr	th;
5808
5809		pd.hdr.tcp = &th;
5810		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5811		    &action, &reason, AF_INET)) {
5812			log = action != PF_PASS;
5813			goto done;
5814		}
5815		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5816#if defined(ALTQ) && defined(__NetBSD__)
5817		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5818			pqid = 1;
5819#endif
5820		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5821		if (action == PF_DROP)
5822			goto done;
5823		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5824		    &reason);
5825		if (action == PF_PASS) {
5826#if NPFSYNC
5827			pfsync_update_state(s);
5828#endif /* NPFSYNC */
5829			r = s->rule.ptr;
5830			a = s->anchor.ptr;
5831			log = s->log;
5832		} else if (s == NULL)
5833			action = pf_test_rule(&r, &s, dir, kif,
5834			    m, off, h, &pd, &a, &ruleset, NULL);
5835		break;
5836	}
5837
5838	case IPPROTO_UDP: {
5839		struct udphdr	uh;
5840
5841		pd.hdr.udp = &uh;
5842		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5843		    &action, &reason, AF_INET)) {
5844			log = action != PF_PASS;
5845			goto done;
5846		}
5847		if (uh.uh_dport == 0 ||
5848		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5849		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5850			action = PF_DROP;
5851			REASON_SET_NOPTR(&reason, PFRES_SHORT);
5852			goto done;
5853		}
5854		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5855		if (action == PF_PASS) {
5856#if NPFSYNC
5857			pfsync_update_state(s);
5858#endif /* NPFSYNC */
5859			r = s->rule.ptr;
5860			a = s->anchor.ptr;
5861			log = s->log;
5862		} else if (s == NULL)
5863			action = pf_test_rule(&r, &s, dir, kif,
5864			    m, off, h, &pd, &a, &ruleset, NULL);
5865		break;
5866	}
5867
5868	case IPPROTO_ICMP: {
5869		struct icmp	ih;
5870
5871		pd.hdr.icmp = &ih;
5872		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5873		    &action, &reason, AF_INET)) {
5874			log = action != PF_PASS;
5875			goto done;
5876		}
5877		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
5878		    &reason);
5879		if (action == PF_PASS) {
5880#if NPFSYNC
5881			pfsync_update_state(s);
5882#endif /* NPFSYNC */
5883			r = s->rule.ptr;
5884			a = s->anchor.ptr;
5885			log = s->log;
5886		} else if (s == NULL)
5887			action = pf_test_rule(&r, &s, dir, kif,
5888			    m, off, h, &pd, &a, &ruleset, NULL);
5889		break;
5890	}
5891
5892#ifdef INET6
5893	case IPPROTO_ICMPV6: {
5894		action = PF_DROP;
5895		DPFPRINTF(PF_DEBUG_MISC,
5896		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
5897		goto done;
5898	}
5899#endif
5900
5901	default:
5902		action = pf_test_state_other(&s, dir, kif, &pd);
5903		if (action == PF_PASS) {
5904#if NPFSYNC
5905			pfsync_update_state(s);
5906#endif /* NPFSYNC */
5907			r = s->rule.ptr;
5908			a = s->anchor.ptr;
5909			log = s->log;
5910		} else if (s == NULL)
5911			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
5912			    &pd, &a, &ruleset, NULL);
5913		break;
5914	}
5915
5916done:
5917	if (action == PF_PASS && h->ip_hl > 5 &&
5918	    !((s && s->allow_opts) || r->allow_opts)) {
5919		action = PF_DROP;
5920		REASON_SET_NOPTR(&reason, PFRES_IPOPTIONS);
5921		log = 1;
5922		DPFPRINTF(PF_DEBUG_MISC,
5923		    ("pf: dropping packet with ip options\n"));
5924	}
5925
5926	if ((s && s->tag) || r->rtableid)
5927		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
5928
5929#ifdef ALTQ
5930	if (action == PF_PASS && r->qid) {
5931#ifdef __NetBSD__
5932		struct m_tag	*mtag;
5933		struct altq_tag	*atag;
5934
5935		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
5936		if (mtag != NULL) {
5937			atag = (struct altq_tag *)(mtag + 1);
5938			if (pqid || (pd.tos & IPTOS_LOWDELAY))
5939				atag->qid = r->pqid;
5940			else
5941				atag->qid = r->qid;
5942			/* add hints for ecn */
5943			atag->af = AF_INET;
5944			atag->hdr = h;
5945			m_tag_prepend(m, mtag);
5946		}
5947#else
5948		if (pqid || (pd.tos & IPTOS_LOWDELAY))
5949			m->m_pkthdr.pf.qid = r->pqid;
5950		else
5951			m->m_pkthdr.pf.qid = r->qid;
5952		/* add hints for ecn */
5953		m->m_pkthdr.pf.hdr = h;
5954#endif /* !__NetBSD__ */
5955	}
5956#endif /* ALTQ */
5957
5958	/*
5959	 * connections redirected to loopback should not match sockets
5960	 * bound specifically to loopback due to security implications,
5961	 * see tcp_input() and in_pcblookup_listen().
5962	 */
5963	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
5964	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
5965	    (s->nat_rule.ptr->action == PF_RDR ||
5966	    s->nat_rule.ptr->action == PF_BINAT) &&
5967	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
5968#ifdef __NetBSD__
5969		pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
5970#else
5971		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
5972#endif /* !__NetBSD__ */
5973
5974	if (log) {
5975#if NPFLOG > 0
5976		struct pf_rule *lr;
5977
5978		if (s != NULL && s->nat_rule.ptr != NULL &&
5979		    s->nat_rule.ptr->log & PF_LOG_ALL)
5980			lr = s->nat_rule.ptr;
5981		else
5982			lr = r;
5983		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
5984		    &pd);
5985#endif
5986	}
5987
5988	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
5989	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
5990
5991	if (action == PF_PASS || r->action == PF_DROP) {
5992		dirndx = (dir == PF_OUT);
5993		r->packets[dirndx]++;
5994		r->bytes[dirndx] += pd.tot_len;
5995		if (a != NULL) {
5996			a->packets[dirndx]++;
5997			a->bytes[dirndx] += pd.tot_len;
5998		}
5999		if (s != NULL) {
6000			sk = s->state_key;
6001			if (s->nat_rule.ptr != NULL) {
6002				s->nat_rule.ptr->packets[dirndx]++;
6003				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6004			}
6005			if (s->src_node != NULL) {
6006				s->src_node->packets[dirndx]++;
6007				s->src_node->bytes[dirndx] += pd.tot_len;
6008			}
6009			if (s->nat_src_node != NULL) {
6010				s->nat_src_node->packets[dirndx]++;
6011				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6012			}
6013			dirndx = (dir == sk->direction) ? 0 : 1;
6014			s->packets[dirndx]++;
6015			s->bytes[dirndx] += pd.tot_len;
6016		}
6017		tr = r;
6018		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6019		if (nr != NULL) {
6020			struct pf_addr *x;
6021			/*
6022			 * XXX: we need to make sure that the addresses
6023			 * passed to pfr_update_stats() are the same than
6024			 * the addresses used during matching (pfr_match)
6025			 */
6026			if (r == &pf_default_rule) {
6027				tr = nr;
6028				x = (sk == NULL || sk->direction == dir) ?
6029				    &pd.baddr : &pd.naddr;
6030			} else
6031				x = (sk == NULL || sk->direction == dir) ?
6032				    &pd.naddr : &pd.baddr;
6033			if (x == &pd.baddr || s == NULL) {
6034				/* we need to change the address */
6035				if (dir == PF_OUT)
6036					pd.src = x;
6037				else
6038					pd.dst = x;
6039			}
6040		}
6041		if (tr->src.addr.type == PF_ADDR_TABLE)
6042			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
6043			    sk->direction == dir) ?
6044			    pd.src : pd.dst, pd.af,
6045			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6046			    tr->src.neg);
6047		if (tr->dst.addr.type == PF_ADDR_TABLE)
6048			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
6049			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
6050			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6051			    tr->dst.neg);
6052	}
6053
6054
6055	if (action == PF_SYNPROXY_DROP) {
6056		m_freem(*m0);
6057		*m0 = NULL;
6058		action = PF_PASS;
6059	} else if (r->rt)
6060		/* pf_route can free the mbuf causing *m0 to become NULL */
6061		pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
6062
6063	return (action);
6064}
6065#endif /* INET */
6066
6067#ifdef INET6
6068int
6069pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
6070    struct ether_header *eh)
6071{
6072	struct pfi_kif		*kif;
6073	u_short			 action, reason = 0, log = 0;
6074	struct mbuf		*m = *m0, *n = NULL;
6075	struct ip6_hdr		*h = NULL; /* XXX gcc */
6076	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6077	struct pf_state		*s = NULL;
6078	struct pf_state_key	*sk = NULL;
6079	struct pf_ruleset	*ruleset = NULL;
6080	struct pf_pdesc		 pd;
6081	int			 off, terminal = 0, dirndx, rh_cnt = 0;
6082#ifdef __NetBSD__
6083	struct pf_mtag		*pf_mtag = NULL; /* XXX gcc */
6084#endif /* __NetBSD__ */
6085
6086	if (!pf_status.running)
6087		return (PF_PASS);
6088
6089	memset(&pd, 0, sizeof(pd));
6090	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6091		kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
6092	else
6093		kif = (struct pfi_kif *)ifp->if_pf_kif;
6094
6095	if (kif == NULL) {
6096		DPFPRINTF(PF_DEBUG_URGENT,
6097		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
6098		return (PF_DROP);
6099	}
6100	if (kif->pfik_flags & PFI_IFLAG_SKIP)
6101		return (PF_PASS);
6102
6103#ifdef DIAGNOSTIC
6104	if ((m->m_flags & M_PKTHDR) == 0)
6105		panic("non-M_PKTHDR is passed to pf_test6");
6106#endif /* DIAGNOSTIC */
6107
6108	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6109		action = PF_DROP;
6110		REASON_SET_NOPTR(&reason, PFRES_SHORT);
6111		log = 1;
6112		goto done;
6113	}
6114
6115#ifdef __NetBSD__
6116	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
6117		DPFPRINTF(PF_DEBUG_URGENT,
6118		    ("pf_test6: pf_get_mtag returned NULL\n"));
6119		return (PF_DROP);
6120	}
6121	if (pf_mtag->flags & PF_TAG_GENERATED)
6122		return (PF_PASS);
6123#else
6124	if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
6125		return (PF_PASS);
6126#endif /* !__NetBSD__ */
6127
6128	/* We do IP header normalization and packet reassembly here */
6129	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
6130		action = PF_DROP;
6131		goto done;
6132	}
6133	m = *m0;	/* pf_normalize messes with m0 */
6134	h = mtod(m, struct ip6_hdr *);
6135
6136#if 1
6137	/*
6138	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
6139	 * will do something bad, so drop the packet for now.
6140	 */
6141	if (htons(h->ip6_plen) == 0) {
6142		action = PF_DROP;
6143		REASON_SET_NOPTR(&reason, PFRES_NORM);	/*XXX*/
6144		goto done;
6145	}
6146#endif
6147
6148	pd.src = (struct pf_addr *)&h->ip6_src;
6149	pd.dst = (struct pf_addr *)&h->ip6_dst;
6150	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6151	pd.ip_sum = NULL;
6152	pd.af = AF_INET6;
6153	pd.tos = 0;
6154	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6155	pd.eh = eh;
6156
6157	off = ((char *)h - m->m_data) + sizeof(struct ip6_hdr);
6158	pd.proto = h->ip6_nxt;
6159	do {
6160		switch (pd.proto) {
6161		case IPPROTO_FRAGMENT:
6162			action = pf_test_fragment(&r, dir, kif, m, h,
6163			    &pd, &a, &ruleset);
6164			if (action == PF_DROP)
6165				REASON_SET_NOPTR(&reason, PFRES_FRAG);
6166			goto done;
6167		case IPPROTO_ROUTING: {
6168			struct ip6_rthdr rthdr;
6169
6170			if (rh_cnt++) {
6171				DPFPRINTF(PF_DEBUG_MISC,
6172				    ("pf: IPv6 more than one rthdr\n"));
6173				action = PF_DROP;
6174				REASON_SET_NOPTR(&reason, PFRES_IPOPTIONS);
6175				log = 1;
6176				goto done;
6177			}
6178			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
6179			    &reason, pd.af)) {
6180				DPFPRINTF(PF_DEBUG_MISC,
6181				    ("pf: IPv6 short rthdr\n"));
6182				action = PF_DROP;
6183				REASON_SET_NOPTR(&reason, PFRES_SHORT);
6184				log = 1;
6185				goto done;
6186			}
6187			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
6188				DPFPRINTF(PF_DEBUG_MISC,
6189				    ("pf: IPv6 rthdr0\n"));
6190				action = PF_DROP;
6191				REASON_SET_NOPTR(&reason, PFRES_IPOPTIONS);
6192				log = 1;
6193				goto done;
6194			}
6195			/* FALLTHROUGH */
6196		}
6197		case IPPROTO_AH:
6198		case IPPROTO_HOPOPTS:
6199		case IPPROTO_DSTOPTS: {
6200			/* get next header and header length */
6201			struct ip6_ext	opt6;
6202
6203			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6204			    NULL, &reason, pd.af)) {
6205				DPFPRINTF(PF_DEBUG_MISC,
6206				    ("pf: IPv6 short opt\n"));
6207				action = PF_DROP;
6208				log = 1;
6209				goto done;
6210			}
6211			if (pd.proto == IPPROTO_AH)
6212				off += (opt6.ip6e_len + 2) * 4;
6213			else
6214				off += (opt6.ip6e_len + 1) * 8;
6215			pd.proto = opt6.ip6e_nxt;
6216			/* goto the next header */
6217			break;
6218		}
6219		default:
6220			terminal++;
6221			break;
6222		}
6223	} while (!terminal);
6224
6225	/* if there's no routing header, use unmodified mbuf for checksumming */
6226	if (!n)
6227		n = m;
6228
6229	switch (pd.proto) {
6230
6231	case IPPROTO_TCP: {
6232		struct tcphdr	th;
6233
6234		pd.hdr.tcp = &th;
6235		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6236		    &action, &reason, AF_INET6)) {
6237			log = action != PF_PASS;
6238			goto done;
6239		}
6240		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6241		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6242		if (action == PF_DROP)
6243			goto done;
6244		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6245		    &reason);
6246		if (action == PF_PASS) {
6247#if NPFSYNC
6248			pfsync_update_state(s);
6249#endif /* NPFSYNC */
6250			r = s->rule.ptr;
6251			a = s->anchor.ptr;
6252			log = s->log;
6253		} else if (s == NULL)
6254			action = pf_test_rule(&r, &s, dir, kif,
6255			    m, off, h, &pd, &a, &ruleset, NULL);
6256		break;
6257	}
6258
6259	case IPPROTO_UDP: {
6260		struct udphdr	uh;
6261
6262		pd.hdr.udp = &uh;
6263		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6264		    &action, &reason, AF_INET6)) {
6265			log = action != PF_PASS;
6266			goto done;
6267		}
6268		if (uh.uh_dport == 0 ||
6269		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6270		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6271			action = PF_DROP;
6272			REASON_SET_NOPTR(&reason, PFRES_SHORT);
6273			goto done;
6274		}
6275		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6276		if (action == PF_PASS) {
6277#if NPFSYNC
6278			pfsync_update_state(s);
6279#endif /* NPFSYNC */
6280			r = s->rule.ptr;
6281			a = s->anchor.ptr;
6282			log = s->log;
6283		} else if (s == NULL)
6284			action = pf_test_rule(&r, &s, dir, kif,
6285			    m, off, h, &pd, &a, &ruleset, NULL);
6286		break;
6287	}
6288
6289#ifdef INET
6290	case IPPROTO_ICMP: {
6291		action = PF_DROP;
6292		DPFPRINTF(PF_DEBUG_MISC,
6293		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
6294		goto done;
6295	}
6296#endif
6297
6298	case IPPROTO_ICMPV6: {
6299		struct icmp6_hdr	ih;
6300
6301		pd.hdr.icmp6 = &ih;
6302		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6303		    &action, &reason, AF_INET6)) {
6304			log = action != PF_PASS;
6305			goto done;
6306		}
6307		action = pf_test_state_icmp(&s, dir, kif,
6308		    m, off, h, &pd, &reason);
6309		if (action == PF_PASS) {
6310#if NPFSYNC
6311			pfsync_update_state(s);
6312#endif /* NPFSYNC */
6313			r = s->rule.ptr;
6314			a = s->anchor.ptr;
6315			log = s->log;
6316		} else if (s == NULL)
6317			action = pf_test_rule(&r, &s, dir, kif,
6318			    m, off, h, &pd, &a, &ruleset, NULL);
6319		break;
6320	}
6321
6322	default:
6323		action = pf_test_state_other(&s, dir, kif, &pd);
6324		if (action == PF_PASS) {
6325#if NPFSYNC
6326			pfsync_update_state(s);
6327#endif /* NPFSYNC */
6328			r = s->rule.ptr;
6329			a = s->anchor.ptr;
6330			log = s->log;
6331		} else if (s == NULL)
6332			action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6333			    &pd, &a, &ruleset, NULL);
6334		break;
6335	}
6336
6337done:
6338	if (n != m) {
6339		m_freem(n);
6340		n = NULL;
6341	}
6342
6343	/* handle dangerous IPv6 extension headers. */
6344	if (action == PF_PASS && rh_cnt &&
6345	    !((s && s->allow_opts) || r->allow_opts)) {
6346		action = PF_DROP;
6347		REASON_SET_NOPTR(&reason, PFRES_IPOPTIONS);
6348		log = 1;
6349		DPFPRINTF(PF_DEBUG_MISC,
6350		    ("pf: dropping packet with dangerous v6 headers\n"));
6351	}
6352
6353	if ((s && s->tag) || r->rtableid)
6354		pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
6355
6356#ifdef ALTQ
6357	if (action == PF_PASS && r->qid) {
6358#ifdef __NetBSD__
6359		struct m_tag	*mtag;
6360		struct altq_tag	*atag;
6361
6362		mtag = m_tag_get(PACKET_TAG_ALTQ_QID, sizeof(*atag), M_NOWAIT);
6363		if (mtag != NULL) {
6364			atag = (struct altq_tag *)(mtag + 1);
6365			if (pd.tos & IPTOS_LOWDELAY)
6366				atag->qid = r->pqid;
6367			else
6368				atag->qid = r->qid;
6369			/* add hints for ecn */
6370			atag->af = AF_INET6;
6371			atag->hdr = h;
6372			m_tag_prepend(m, mtag);
6373		}
6374#else
6375		if (pd.tos & IPTOS_LOWDELAY)
6376			m->m_pkthdr.pf.qid = r->pqid;
6377		else
6378			m->m_pkthdr.pf.qid = r->qid;
6379		/* add hints for ecn */
6380		m->m_pkthdr.pf.hdr = h;
6381#endif /* !__NetBSD__ */
6382	}
6383#endif /* ALTQ */
6384
6385	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6386	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6387	    (s->nat_rule.ptr->action == PF_RDR ||
6388	    s->nat_rule.ptr->action == PF_BINAT) &&
6389	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
6390#ifdef __NetBSD__
6391		pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
6392#else
6393		m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
6394#endif /* !__NetBSD__ */
6395
6396	if (log) {
6397#if NPFLOG > 0
6398		struct pf_rule *lr;
6399
6400		if (s != NULL && s->nat_rule.ptr != NULL &&
6401		    s->nat_rule.ptr->log & PF_LOG_ALL)
6402			lr = s->nat_rule.ptr;
6403		else
6404			lr = r;
6405		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
6406		    &pd);
6407#endif
6408	}
6409
6410	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6411	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6412
6413	if (action == PF_PASS || r->action == PF_DROP) {
6414		dirndx = (dir == PF_OUT);
6415		r->packets[dirndx]++;
6416		r->bytes[dirndx] += pd.tot_len;
6417		if (a != NULL) {
6418			a->packets[dirndx]++;
6419			a->bytes[dirndx] += pd.tot_len;
6420		}
6421		if (s != NULL) {
6422			sk = s->state_key;
6423			if (s->nat_rule.ptr != NULL) {
6424				s->nat_rule.ptr->packets[dirndx]++;
6425				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6426			}
6427			if (s->src_node != NULL) {
6428				s->src_node->packets[dirndx]++;
6429				s->src_node->bytes[dirndx] += pd.tot_len;
6430			}
6431			if (s->nat_src_node != NULL) {
6432				s->nat_src_node->packets[dirndx]++;
6433				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6434			}
6435			dirndx = (dir == sk->direction) ? 0 : 1;
6436			s->packets[dirndx]++;
6437			s->bytes[dirndx] += pd.tot_len;
6438		}
6439		tr = r;
6440		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6441		if (nr != NULL) {
6442			struct pf_addr *x;
6443			/*
6444			 * XXX: we need to make sure that the addresses
6445			 * passed to pfr_update_stats() are the same than
6446			 * the addresses used during matching (pfr_match)
6447			 */
6448			if (r == &pf_default_rule) {
6449				tr = nr;
6450				x = (s == NULL || sk->direction == dir) ?
6451				    &pd.baddr : &pd.naddr;
6452			} else {
6453				x = (s == NULL || sk->direction == dir) ?
6454				    &pd.naddr : &pd.baddr;
6455			}
6456			if (x == &pd.baddr || s == NULL) {
6457				if (dir == PF_OUT)
6458					pd.src = x;
6459				else
6460					pd.dst = x;
6461			}
6462		}
6463		if (tr->src.addr.type == PF_ADDR_TABLE)
6464			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
6465			    sk->direction == dir) ? pd.src : pd.dst, pd.af,
6466			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6467			    tr->src.neg);
6468		if (tr->dst.addr.type == PF_ADDR_TABLE)
6469			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
6470			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
6471			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6472			    tr->dst.neg);
6473	}
6474
6475
6476	if (action == PF_SYNPROXY_DROP) {
6477		m_freem(*m0);
6478		*m0 = NULL;
6479		action = PF_PASS;
6480	} else if (r->rt)
6481		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6482		pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
6483
6484	return (action);
6485}
6486#endif /* INET6 */
6487
6488int
6489pf_check_congestion(struct ifqueue *ifq)
6490{
6491#ifdef __NetBSD__
6492	// XXX: not handled anyway
6493	KASSERT(ifq == NULL);
6494	return (0);
6495#else
6496	if (ifq->ifq_congestion)
6497		return (1);
6498	else
6499		return (0);
6500#endif /* !__NetBSD__ */
6501}
6502