pf.c revision 240233
1223637Sbz/*	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */
2126258Smlaier
3126258Smlaier/*
4126258Smlaier * Copyright (c) 2001 Daniel Hartmeier
5223637Sbz * Copyright (c) 2002 - 2008 Henning Brauer
6126258Smlaier * All rights reserved.
7126258Smlaier *
8126258Smlaier * Redistribution and use in source and binary forms, with or without
9126258Smlaier * modification, are permitted provided that the following conditions
10126258Smlaier * are met:
11126258Smlaier *
12126258Smlaier *    - Redistributions of source code must retain the above copyright
13126258Smlaier *      notice, this list of conditions and the following disclaimer.
14126258Smlaier *    - Redistributions in binary form must reproduce the above
15126258Smlaier *      copyright notice, this list of conditions and the following
16126258Smlaier *      disclaimer in the documentation and/or other materials provided
17126258Smlaier *      with the distribution.
18126258Smlaier *
19126258Smlaier * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20126258Smlaier * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21126258Smlaier * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22126258Smlaier * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23126258Smlaier * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24126258Smlaier * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25126258Smlaier * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26126258Smlaier * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27126258Smlaier * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28126258Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29126258Smlaier * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30126258Smlaier * POSSIBILITY OF SUCH DAMAGE.
31126258Smlaier *
32126258Smlaier * Effort sponsored in part by the Defense Advanced Research Projects
33126258Smlaier * Agency (DARPA) and Air Force Research Laboratory, Air Force
34126258Smlaier * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35126258Smlaier *
36126258Smlaier */
37126258Smlaier
38240233Sglebius#include <sys/cdefs.h>
39171168Smlaier
40171168Smlaier__FBSDID("$FreeBSD: head/sys/contrib/pf/net/pf.c 240233 2012-09-08 06:41:54Z glebius $");
41126261Smlaier
42240233Sglebius#include "opt_inet.h"
43240233Sglebius#include "opt_inet6.h"
44126261Smlaier#include "opt_bpf.h"
45126261Smlaier#include "opt_pf.h"
46153110Sru
47126258Smlaier#include <sys/param.h>
48240233Sglebius#include <sys/bus.h>
49240233Sglebius#include <sys/endian.h>
50240233Sglebius#include <sys/hash.h>
51240233Sglebius#include <sys/interrupt.h>
52240233Sglebius#include <sys/kernel.h>
53240233Sglebius#include <sys/kthread.h>
54240233Sglebius#include <sys/limits.h>
55126258Smlaier#include <sys/mbuf.h>
56240233Sglebius#include <sys/md5.h>
57240233Sglebius#include <sys/random.h>
58240233Sglebius#include <sys/refcount.h>
59126258Smlaier#include <sys/socket.h>
60126261Smlaier#include <sys/sysctl.h>
61240233Sglebius#include <sys/taskqueue.h>
62240233Sglebius#include <sys/ucred.h>
63126258Smlaier
64126258Smlaier#include <net/if.h>
65126258Smlaier#include <net/if_types.h>
66126258Smlaier#include <net/route.h>
67171168Smlaier#include <net/radix_mpath.h>
68240233Sglebius#include <net/vnet.h>
69126258Smlaier
70240233Sglebius#include <net/pfvar.h>
71240233Sglebius#include <net/pf_mtag.h>
72240233Sglebius#include <net/if_pflog.h>
73240233Sglebius#include <net/if_pfsync.h>
74240233Sglebius
75240233Sglebius#include <netinet/in_pcb.h>
76126258Smlaier#include <netinet/in_var.h>
77126258Smlaier#include <netinet/ip.h>
78240233Sglebius#include <netinet/ip_fw.h>
79240233Sglebius#include <netinet/ip_icmp.h>
80240233Sglebius#include <netinet/icmp_var.h>
81126258Smlaier#include <netinet/ip_var.h>
82240233Sglebius#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
83126258Smlaier#include <netinet/tcp.h>
84240233Sglebius#include <netinet/tcp_fsm.h>
85126258Smlaier#include <netinet/tcp_seq.h>
86126258Smlaier#include <netinet/tcp_timer.h>
87126258Smlaier#include <netinet/tcp_var.h>
88240233Sglebius#include <netinet/udp.h>
89126258Smlaier#include <netinet/udp_var.h>
90126258Smlaier
91126258Smlaier#ifdef INET6
92126258Smlaier#include <netinet/ip6.h>
93126258Smlaier#include <netinet/icmp6.h>
94126258Smlaier#include <netinet6/nd6.h>
95126261Smlaier#include <netinet6/ip6_var.h>
96126261Smlaier#include <netinet6/in6_pcb.h>
97126258Smlaier#endif /* INET6 */
98126258Smlaier
99126261Smlaier#include <machine/in_cksum.h>
100163606Srwatson#include <security/mac/mac_framework.h>
101126258Smlaier
102223637Sbz#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
103126258Smlaier
104126258Smlaier/*
105126258Smlaier * Global variables
106126258Smlaier */
107126258Smlaier
108223637Sbz/* state tables */
109223637SbzVNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
110223637SbzVNET_DEFINE(struct pf_palist,		 pf_pabuf);
111223637SbzVNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
112223637SbzVNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
113223637SbzVNET_DEFINE(struct pf_status,		 pf_status);
114223637Sbz
115223637SbzVNET_DEFINE(u_int32_t,			 ticket_altqs_active);
116223637SbzVNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
117223637SbzVNET_DEFINE(int,			 altqs_inactive_open);
118223637SbzVNET_DEFINE(u_int32_t,			 ticket_pabuf);
119223637Sbz
120223637SbzVNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
121223637Sbz#define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
122223637SbzVNET_DEFINE(u_char,			 pf_tcp_secret[16]);
123223637Sbz#define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
124223637SbzVNET_DEFINE(int,			 pf_tcp_secret_init);
125223637Sbz#define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
126223637SbzVNET_DEFINE(int,			 pf_tcp_iss_off);
127223637Sbz#define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
128223637Sbz
129223637Sbzstruct pf_anchor_stackframe {
130223637Sbz	struct pf_ruleset		*rs;
131223637Sbz	struct pf_rule			*r;
132223637Sbz	struct pf_anchor_node		*parent;
133223637Sbz	struct pf_anchor		*child;
134223637Sbz};
135223637SbzVNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]);
136223637Sbz#define	V_pf_anchor_stack		 VNET(pf_anchor_stack)
137223637Sbz
138240233Sglebius/*
139240233Sglebius * Queue for pf_intr() sends.
140240233Sglebius */
141240233Sglebiusstatic MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
142240233Sglebiusstruct pf_send_entry {
143240233Sglebius	STAILQ_ENTRY(pf_send_entry)	pfse_next;
144240233Sglebius	struct mbuf			*pfse_m;
145240233Sglebius	enum {
146240233Sglebius		PFSE_IP,
147240233Sglebius		PFSE_IP6,
148240233Sglebius		PFSE_ICMP,
149240233Sglebius		PFSE_ICMP6,
150240233Sglebius	}				pfse_type;
151240233Sglebius	union {
152240233Sglebius		struct route		ro;
153240233Sglebius		struct {
154240233Sglebius			int		type;
155240233Sglebius			int		code;
156240233Sglebius			int		mtu;
157240233Sglebius		} icmpopts;
158240233Sglebius	} u;
159240233Sglebius#define	pfse_ro		u.ro
160240233Sglebius#define	pfse_icmp_type	u.icmpopts.type
161240233Sglebius#define	pfse_icmp_code	u.icmpopts.code
162240233Sglebius#define	pfse_icmp_mtu	u.icmpopts.mtu
163240233Sglebius};
164223637Sbz
165240233SglebiusSTAILQ_HEAD(pf_send_head, pf_send_entry);
166240233Sglebiusstatic VNET_DEFINE(struct pf_send_head, pf_sendqueue);
167240233Sglebius#define	V_pf_sendqueue	VNET(pf_sendqueue)
168126258Smlaier
169240233Sglebiusstatic struct mtx pf_sendqueue_mtx;
170240233Sglebius#define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
171240233Sglebius#define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
172126258Smlaier
173240233Sglebius/*
174240233Sglebius * Queue for pf_flush_task() tasks.
175240233Sglebius */
176240233Sglebiusstruct pf_flush_entry {
177240233Sglebius	SLIST_ENTRY(pf_flush_entry)	next;
178240233Sglebius	struct pf_addr  		addr;
179240233Sglebius	sa_family_t			af;
180240233Sglebius	uint8_t				dir;
181240233Sglebius	struct pf_rule  		*rule;  /* never dereferenced */
182240233Sglebius};
183223637Sbz
184240233SglebiusSLIST_HEAD(pf_flush_head, pf_flush_entry);
185240233Sglebiusstatic VNET_DEFINE(struct pf_flush_head, pf_flushqueue);
186240233Sglebius#define V_pf_flushqueue	VNET(pf_flushqueue)
187240233Sglebiusstatic VNET_DEFINE(struct task, pf_flushtask);
188240233Sglebius#define	V_pf_flushtask	VNET(pf_flushtask)
189126261Smlaier
190240233Sglebiusstatic struct mtx pf_flushqueue_mtx;
191240233Sglebius#define	PF_FLUSHQ_LOCK()	mtx_lock(&pf_flushqueue_mtx)
192240233Sglebius#define	PF_FLUSHQ_UNLOCK()	mtx_unlock(&pf_flushqueue_mtx)
193126258Smlaier
194240233SglebiusVNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
195240233Sglebiusstruct mtx pf_unlnkdrules_mtx;
196240233Sglebius
197240233Sglebiusstatic VNET_DEFINE(uma_zone_t,	pf_sources_z);
198240233Sglebius#define	V_pf_sources_z	VNET(pf_sources_z)
199240233Sglebiusstatic VNET_DEFINE(uma_zone_t,	pf_mtag_z);
200240233Sglebius#define	V_pf_mtag_z	VNET(pf_mtag_z)
201240233SglebiusVNET_DEFINE(uma_zone_t,	 pf_state_z);
202240233SglebiusVNET_DEFINE(uma_zone_t,	 pf_state_key_z);
203240233Sglebius
204240233SglebiusVNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
205240233Sglebius#define	PFID_CPUBITS	8
206240233Sglebius#define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
207240233Sglebius#define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
208240233Sglebius#define	PFID_MAXID	(~PFID_CPUMASK)
209240233SglebiusCTASSERT((1 << PFID_CPUBITS) > MAXCPU);
210240233Sglebius
211240233Sglebiusstatic void		 pf_src_tree_remove_state(struct pf_state *);
212240233Sglebiusstatic void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
213145836Smlaier			    u_int32_t);
214240233Sglebiusstatic void		 pf_add_threshold(struct pf_threshold *);
215240233Sglebiusstatic int		 pf_check_threshold(struct pf_threshold *);
216145836Smlaier
217240233Sglebiusstatic void		 pf_change_ap(struct pf_addr *, u_int16_t *,
218126258Smlaier			    u_int16_t *, u_int16_t *, struct pf_addr *,
219126258Smlaier			    u_int16_t, u_int8_t, sa_family_t);
220240233Sglebiusstatic int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
221171168Smlaier			    struct tcphdr *, struct pf_state_peer *);
222240233Sglebiusstatic void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
223126258Smlaier			    struct pf_addr *, struct pf_addr *, u_int16_t,
224126258Smlaier			    u_int16_t *, u_int16_t *, u_int16_t *,
225126258Smlaier			    u_int16_t *, u_int8_t, sa_family_t);
226240233Sglebiusstatic void		 pf_send_tcp(struct mbuf *,
227162238Scsjp			    const struct pf_rule *, sa_family_t,
228126258Smlaier			    const struct pf_addr *, const struct pf_addr *,
229126258Smlaier			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
230145836Smlaier			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
231240233Sglebius			    u_int16_t, struct ifnet *);
232223637Sbzstatic void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
233126258Smlaier			    sa_family_t, struct pf_rule *);
234240233Sglebiusstatic void		 pf_detach_state(struct pf_state *);
235240233Sglebiusstatic int		 pf_state_key_attach(struct pf_state_key *,
236240233Sglebius			    struct pf_state_key *, struct pf_state *);
237240233Sglebiusstatic void		 pf_state_key_detach(struct pf_state *, int);
238240233Sglebiusstatic int		 pf_state_key_ctor(void *, int, void *, int);
239240233Sglebiusstatic u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
240240233Sglebiusstatic int		 pf_test_rule(struct pf_rule **, struct pf_state **,
241130613Smlaier			    int, struct pfi_kif *, struct mbuf *, int,
242240233Sglebius			    struct pf_pdesc *, struct pf_rule **,
243240233Sglebius			    struct pf_ruleset **, struct inpcb *);
244240233Sglebiusstatic int		 pf_create_state(struct pf_rule *, struct pf_rule *,
245223637Sbz			    struct pf_rule *, struct pf_pdesc *,
246223637Sbz			    struct pf_src_node *, struct pf_state_key *,
247223637Sbz			    struct pf_state_key *, struct mbuf *, int,
248223637Sbz			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
249223637Sbz			    struct pf_state **, int, u_int16_t, u_int16_t,
250223637Sbz			    int);
251240233Sglebiusstatic int		 pf_test_fragment(struct pf_rule **, int,
252130613Smlaier			    struct pfi_kif *, struct mbuf *, void *,
253126258Smlaier			    struct pf_pdesc *, struct pf_rule **,
254126258Smlaier			    struct pf_ruleset **);
255240233Sglebiusstatic int		 pf_tcp_track_full(struct pf_state_peer *,
256200930Sdelphij			    struct pf_state_peer *, struct pf_state **,
257200930Sdelphij			    struct pfi_kif *, struct mbuf *, int,
258200930Sdelphij			    struct pf_pdesc *, u_short *, int *);
259240233Sglebiusstatic int		 pf_tcp_track_sloppy(struct pf_state_peer *,
260200930Sdelphij			    struct pf_state_peer *, struct pf_state **,
261200930Sdelphij			    struct pf_pdesc *, u_short *);
262240233Sglebiusstatic int		 pf_test_state_tcp(struct pf_state **, int,
263130613Smlaier			    struct pfi_kif *, struct mbuf *, int,
264126258Smlaier			    void *, struct pf_pdesc *, u_short *);
265240233Sglebiusstatic int		 pf_test_state_udp(struct pf_state **, int,
266130613Smlaier			    struct pfi_kif *, struct mbuf *, int,
267126258Smlaier			    void *, struct pf_pdesc *);
268240233Sglebiusstatic int		 pf_test_state_icmp(struct pf_state **, int,
269130613Smlaier			    struct pfi_kif *, struct mbuf *, int,
270145836Smlaier			    void *, struct pf_pdesc *, u_short *);
271240233Sglebiusstatic int		 pf_test_state_other(struct pf_state **, int,
272223637Sbz			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
273240233Sglebiusstatic u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
274126258Smlaier			    sa_family_t);
275240233Sglebiusstatic u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
276126258Smlaier			    sa_family_t);
277240233Sglebiusstatic u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
278231852Sbz				int, u_int16_t);
279240233Sglebiusstatic void		 pf_set_rt_ifp(struct pf_state *,
280126258Smlaier			    struct pf_addr *);
281240233Sglebiusstatic int		 pf_check_proto_cksum(struct mbuf *, int, int,
282126258Smlaier			    u_int8_t, sa_family_t);
283240233Sglebiusstatic void		 pf_print_state_parts(struct pf_state *,
284223637Sbz			    struct pf_state_key *, struct pf_state_key *);
285240233Sglebiusstatic int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
286126258Smlaier			    struct pf_addr_wrap *);
287240233Sglebiusstatic struct pf_state	*pf_find_state(struct pfi_kif *,
288240233Sglebius			    struct pf_state_key_cmp *, u_int);
289240233Sglebiusstatic int		 pf_src_connlimit(struct pf_state **);
290240233Sglebiusstatic void		 pf_flush_task(void *c, int pending);
291240233Sglebiusstatic int		 pf_insert_src_node(struct pf_src_node **,
292240233Sglebius			    struct pf_rule *, struct pf_addr *, sa_family_t);
293240233Sglebiusstatic int		 pf_purge_expired_states(int);
294240233Sglebiusstatic void		 pf_purge_unlinked_rules(void);
295240233Sglebiusstatic int		 pf_mtag_init(void *, int, int);
296240233Sglebiusstatic void		 pf_mtag_free(struct m_tag *);
297240233Sglebius#ifdef INET
298240233Sglebiusstatic void		 pf_route(struct mbuf **, struct pf_rule *, int,
299240233Sglebius			    struct ifnet *, struct pf_state *,
300240233Sglebius			    struct pf_pdesc *);
301240233Sglebius#endif /* INET */
302240233Sglebius#ifdef INET6
303240233Sglebiusstatic void		 pf_change_a6(struct pf_addr *, u_int16_t *,
304240233Sglebius			    struct pf_addr *, u_int8_t);
305240233Sglebiusstatic void		 pf_route6(struct mbuf **, struct pf_rule *, int,
306240233Sglebius			    struct ifnet *, struct pf_state *,
307240233Sglebius			    struct pf_pdesc *);
308240233Sglebius#endif /* INET6 */
309126258Smlaier
310126261Smlaierint in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
311126258Smlaier
312223637SbzVNET_DECLARE(int, pf_end_threads);
313171168Smlaier
314240233SglebiusVNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
315145836Smlaier
316240233Sglebius#define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
317240233Sglebius				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
318126258Smlaier
319240233Sglebius#define	STATE_LOOKUP(i, k, d, s, pd)					\
320126258Smlaier	do {								\
321240233Sglebius		(s) = pf_find_state((i), (k), (d));			\
322240233Sglebius		if ((s) == NULL || (s)->timeout == PFTM_PURGE)		\
323126258Smlaier			return (PF_DROP);				\
324240233Sglebius		if (PACKET_LOOPED(pd))					\
325126258Smlaier			return (PF_PASS);				\
326240233Sglebius		if ((d) == PF_OUT &&					\
327223637Sbz		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
328223637Sbz		    (s)->rule.ptr->direction == PF_OUT) ||		\
329223637Sbz		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
330223637Sbz		    (s)->rule.ptr->direction == PF_IN)) &&		\
331223637Sbz		    (s)->rt_kif != NULL &&				\
332240233Sglebius		    (s)->rt_kif != (i))					\
333223637Sbz			return (PF_PASS);				\
334126258Smlaier	} while (0)
335126258Smlaier
336223637Sbz#define	BOUND_IFACE(r, k) \
337223637Sbz	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
338126258Smlaier
339223637Sbz#define	STATE_INC_COUNTERS(s)				\
340145836Smlaier	do {						\
341223637Sbz		s->rule.ptr->states_cur++;		\
342223637Sbz		s->rule.ptr->states_tot++;		\
343223637Sbz		if (s->anchor.ptr != NULL) {		\
344223637Sbz			s->anchor.ptr->states_cur++;	\
345223637Sbz			s->anchor.ptr->states_tot++;	\
346223637Sbz		}					\
347223637Sbz		if (s->nat_rule.ptr != NULL) {		\
348223637Sbz			s->nat_rule.ptr->states_cur++;	\
349223637Sbz			s->nat_rule.ptr->states_tot++;	\
350223637Sbz		}					\
351145836Smlaier	} while (0)
352145836Smlaier
353223637Sbz#define	STATE_DEC_COUNTERS(s)				\
354145836Smlaier	do {						\
355145836Smlaier		if (s->nat_rule.ptr != NULL)		\
356223637Sbz			s->nat_rule.ptr->states_cur--;	\
357145836Smlaier		if (s->anchor.ptr != NULL)		\
358223637Sbz			s->anchor.ptr->states_cur--;	\
359223637Sbz		s->rule.ptr->states_cur--;		\
360145836Smlaier	} while (0)
361145836Smlaier
362240233Sglebiusstatic MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
363240233SglebiusVNET_DEFINE(struct pf_keyhash *, pf_keyhash);
364240233SglebiusVNET_DEFINE(struct pf_idhash *, pf_idhash);
365240233SglebiusVNET_DEFINE(u_long, pf_hashmask);
366240233SglebiusVNET_DEFINE(struct pf_srchash *, pf_srchash);
367240233SglebiusVNET_DEFINE(u_long, pf_srchashmask);
368223637Sbz
369240233SglebiusSYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
370223637Sbz
371240233SglebiusVNET_DEFINE(u_long, pf_hashsize);
372240233Sglebius#define	V_pf_hashsize	VNET(pf_hashsize)
373240233SglebiusSYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
374240233Sglebius    &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
375130613Smlaier
376240233SglebiusVNET_DEFINE(u_long, pf_srchashsize);
377240233Sglebius#define	V_pf_srchashsize	VNET(pf_srchashsize)
378240233SglebiusSYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
379240233Sglebius    &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
380171168Smlaier
381240233SglebiusVNET_DEFINE(void *, pf_swi_cookie);
382130613Smlaier
383240233SglebiusVNET_DEFINE(uint32_t, pf_hashseed);
384240233Sglebius#define	V_pf_hashseed	VNET(pf_hashseed)
385240233Sglebius
386240233Sglebiusstatic __inline uint32_t
387240233Sglebiuspf_hashkey(struct pf_state_key *sk)
388126258Smlaier{
389240233Sglebius	uint32_t h;
390126258Smlaier
391240233Sglebius	h = jenkins_hash32((uint32_t *)sk,
392240233Sglebius	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
393240233Sglebius	    V_pf_hashseed);
394240233Sglebius
395240233Sglebius	return (h & V_pf_hashmask);
396130613Smlaier}
397130613Smlaier
398126258Smlaier#ifdef INET6
399126258Smlaiervoid
400126258Smlaierpf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
401126258Smlaier{
402126258Smlaier	switch (af) {
403126258Smlaier#ifdef INET
404126258Smlaier	case AF_INET:
405126258Smlaier		dst->addr32[0] = src->addr32[0];
406126258Smlaier		break;
407126258Smlaier#endif /* INET */
408126258Smlaier	case AF_INET6:
409126258Smlaier		dst->addr32[0] = src->addr32[0];
410126258Smlaier		dst->addr32[1] = src->addr32[1];
411126258Smlaier		dst->addr32[2] = src->addr32[2];
412126258Smlaier		dst->addr32[3] = src->addr32[3];
413126258Smlaier		break;
414126258Smlaier	}
415126258Smlaier}
416145836Smlaier#endif /* INET6 */
417126258Smlaier
418240233Sglebiusstatic void
419145836Smlaierpf_init_threshold(struct pf_threshold *threshold,
420145836Smlaier    u_int32_t limit, u_int32_t seconds)
421145836Smlaier{
422145836Smlaier	threshold->limit = limit * PF_THRESHOLD_MULT;
423145836Smlaier	threshold->seconds = seconds;
424145836Smlaier	threshold->count = 0;
425240233Sglebius	threshold->last = time_uptime;
426145836Smlaier}
427145836Smlaier
428240233Sglebiusstatic void
429145836Smlaierpf_add_threshold(struct pf_threshold *threshold)
430145836Smlaier{
431240233Sglebius	u_int32_t t = time_uptime, diff = t - threshold->last;
432145836Smlaier
433145836Smlaier	if (diff >= threshold->seconds)
434145836Smlaier		threshold->count = 0;
435145836Smlaier	else
436145836Smlaier		threshold->count -= threshold->count * diff /
437145836Smlaier		    threshold->seconds;
438145836Smlaier	threshold->count += PF_THRESHOLD_MULT;
439145836Smlaier	threshold->last = t;
440145836Smlaier}
441145836Smlaier
442240233Sglebiusstatic int
443145836Smlaierpf_check_threshold(struct pf_threshold *threshold)
444145836Smlaier{
445145836Smlaier	return (threshold->count > threshold->limit);
446145836Smlaier}
447145836Smlaier
448240233Sglebiusstatic int
449145836Smlaierpf_src_connlimit(struct pf_state **state)
450145836Smlaier{
451240233Sglebius	struct pfr_addr p;
452240233Sglebius	struct pf_flush_entry *pffe;
453145836Smlaier	int bad = 0;
454145836Smlaier
455240233Sglebius	PF_STATE_LOCK_ASSERT(*state);
456240233Sglebius
457145836Smlaier	(*state)->src_node->conn++;
458171168Smlaier	(*state)->src.tcp_est = 1;
459145836Smlaier	pf_add_threshold(&(*state)->src_node->conn_rate);
460145836Smlaier
461145836Smlaier	if ((*state)->rule.ptr->max_src_conn &&
462145836Smlaier	    (*state)->rule.ptr->max_src_conn <
463145836Smlaier	    (*state)->src_node->conn) {
464223637Sbz		V_pf_status.lcounters[LCNT_SRCCONN]++;
465145836Smlaier		bad++;
466145836Smlaier	}
467145836Smlaier
468145836Smlaier	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
469145836Smlaier	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
470223637Sbz		V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
471145836Smlaier		bad++;
472145836Smlaier	}
473145836Smlaier
474145836Smlaier	if (!bad)
475145836Smlaier		return (0);
476145836Smlaier
477240233Sglebius	/* Kill this state. */
478240233Sglebius	(*state)->timeout = PFTM_PURGE;
479240233Sglebius	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
480145836Smlaier
481240233Sglebius	if ((*state)->rule.ptr->overload_tbl == NULL)
482240233Sglebius		return (1);
483145836Smlaier
484240233Sglebius	V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
485240233Sglebius	if (V_pf_status.debug >= PF_DEBUG_MISC) {
486240233Sglebius		printf("%s: blocking address ", __func__);
487240233Sglebius		pf_print_host(&(*state)->src_node->addr, 0,
488240233Sglebius		    (*state)->key[PF_SK_WIRE]->af);
489240233Sglebius		printf("\n");
490240233Sglebius	}
491240233Sglebius
492240233Sglebius	bzero(&p, sizeof(p));
493240233Sglebius	p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
494240233Sglebius	switch ((*state)->key[PF_SK_WIRE]->af) {
495145836Smlaier#ifdef INET
496240233Sglebius	case AF_INET:
497240233Sglebius		p.pfra_net = 32;
498240233Sglebius		p.pfra_ip4addr = (*state)->src_node->addr.v4;
499240233Sglebius		break;
500145836Smlaier#endif /* INET */
501145836Smlaier#ifdef INET6
502240233Sglebius	case AF_INET6:
503240233Sglebius		p.pfra_net = 128;
504240233Sglebius		p.pfra_ip6addr = (*state)->src_node->addr.v6;
505240233Sglebius		break;
506145836Smlaier#endif /* INET6 */
507240233Sglebius	}
508145836Smlaier
509240233Sglebius	pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second);
510145836Smlaier
511240233Sglebius	if ((*state)->rule.ptr->flush == 0)
512240233Sglebius		return (1);
513223637Sbz
514240233Sglebius	/* Schedule flushing task. */
515240233Sglebius	pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT);
516240233Sglebius	if (pffe == NULL)
517240233Sglebius		return (1);	/* too bad :( */
518240233Sglebius
519240233Sglebius	bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr));
520240233Sglebius	pffe->af = (*state)->key[PF_SK_WIRE]->af;
521240233Sglebius	pffe->dir = (*state)->direction;
522240233Sglebius	if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL)
523240233Sglebius		pffe->rule = NULL;
524240233Sglebius	else
525240233Sglebius		pffe->rule = (*state)->rule.ptr;
526240233Sglebius	PF_FLUSHQ_LOCK();
527240233Sglebius	SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next);
528240233Sglebius	PF_FLUSHQ_UNLOCK();
529240233Sglebius	taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask);
530240233Sglebius
531240233Sglebius	return (1);
532240233Sglebius}
533240233Sglebius
534240233Sglebiusstatic void
535240233Sglebiuspf_flush_task(void *c, int pending)
536240233Sglebius{
537240233Sglebius	struct pf_flush_head queue;
538240233Sglebius	struct pf_flush_entry *pffe, *pffe1;
539240233Sglebius	uint32_t killed = 0;
540240233Sglebius
541240233Sglebius	PF_FLUSHQ_LOCK();
542240233Sglebius	queue = *(struct pf_flush_head *)c;
543240233Sglebius	SLIST_INIT((struct pf_flush_head *)c);
544240233Sglebius	PF_FLUSHQ_UNLOCK();
545240233Sglebius
546240233Sglebius	V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
547240233Sglebius
548240233Sglebius	for (int i = 0; i <= V_pf_hashmask; i++) {
549240233Sglebius		struct pf_idhash *ih = &V_pf_idhash[i];
550240233Sglebius		struct pf_state_key *sk;
551240233Sglebius		struct pf_state *s;
552240233Sglebius
553240233Sglebius		PF_HASHROW_LOCK(ih);
554240233Sglebius		LIST_FOREACH(s, &ih->states, entry) {
555240233Sglebius		    sk = s->key[PF_SK_WIRE];
556240233Sglebius		    SLIST_FOREACH(pffe, &queue, next)
557240233Sglebius			if (sk->af == pffe->af && (pffe->rule == NULL ||
558240233Sglebius			    pffe->rule == s->rule.ptr) &&
559240233Sglebius			    ((pffe->dir == PF_OUT &&
560240233Sglebius			    PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) ||
561240233Sglebius			    (pffe->dir == PF_IN &&
562240233Sglebius			    PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) {
563240233Sglebius				s->timeout = PFTM_PURGE;
564240233Sglebius				s->src.state = s->dst.state = TCPS_CLOSED;
565240233Sglebius				killed++;
566145836Smlaier			}
567145836Smlaier		}
568240233Sglebius		PF_HASHROW_UNLOCK(ih);
569145836Smlaier	}
570240233Sglebius	SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1)
571240233Sglebius		free(pffe, M_PFTEMP);
572240233Sglebius	if (V_pf_status.debug >= PF_DEBUG_MISC)
573240233Sglebius		printf("%s: %u states killed", __func__, killed);
574240233Sglebius}
575145836Smlaier
576240233Sglebius/*
577240233Sglebius * Can return locked on failure, so that we can consistently
578240233Sglebius * allocate and insert a new one.
579240233Sglebius */
580240233Sglebiusstruct pf_src_node *
581240233Sglebiuspf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
582240233Sglebius	int returnlocked)
583240233Sglebius{
584240233Sglebius	struct pf_srchash *sh;
585240233Sglebius	struct pf_src_node *n;
586240233Sglebius
587240233Sglebius	V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
588240233Sglebius
589240233Sglebius	sh = &V_pf_srchash[pf_hashsrc(src, af)];
590240233Sglebius	PF_HASHROW_LOCK(sh);
591240233Sglebius	LIST_FOREACH(n, &sh->nodes, entry)
592240233Sglebius		if (n->rule.ptr == rule && n->af == af &&
593240233Sglebius		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
594240233Sglebius		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
595240233Sglebius			break;
596240233Sglebius	if (n != NULL || returnlocked == 0)
597240233Sglebius		PF_HASHROW_UNLOCK(sh);
598240233Sglebius
599240233Sglebius	return (n);
600145836Smlaier}
601145836Smlaier
602240233Sglebiusstatic int
603130613Smlaierpf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
604130613Smlaier    struct pf_addr *src, sa_family_t af)
605126258Smlaier{
606126258Smlaier
607240233Sglebius	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
608240233Sglebius	    rule->rpool.opts & PF_POOL_STICKYADDR),
609240233Sglebius	    ("%s for non-tracking rule %p", __func__, rule));
610240233Sglebius
611240233Sglebius	if (*sn == NULL)
612240233Sglebius		*sn = pf_find_src_node(src, rule, af, 1);
613240233Sglebius
614130613Smlaier	if (*sn == NULL) {
615240233Sglebius		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
616240233Sglebius
617240233Sglebius		PF_HASHROW_ASSERT(sh);
618240233Sglebius
619130613Smlaier		if (!rule->max_src_nodes ||
620130613Smlaier		    rule->src_nodes < rule->max_src_nodes)
621240233Sglebius			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
622145836Smlaier		else
623223637Sbz			V_pf_status.lcounters[LCNT_SRCNODES]++;
624240233Sglebius		if ((*sn) == NULL) {
625240233Sglebius			PF_HASHROW_UNLOCK(sh);
626130613Smlaier			return (-1);
627240233Sglebius		}
628145836Smlaier
629145836Smlaier		pf_init_threshold(&(*sn)->conn_rate,
630145836Smlaier		    rule->max_src_conn_rate.limit,
631145836Smlaier		    rule->max_src_conn_rate.seconds);
632145836Smlaier
633130613Smlaier		(*sn)->af = af;
634240233Sglebius		(*sn)->rule.ptr = rule;
635130613Smlaier		PF_ACPY(&(*sn)->addr, src, af);
636240233Sglebius		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
637240233Sglebius		(*sn)->creation = time_uptime;
638130613Smlaier		(*sn)->ruletype = rule->action;
639130613Smlaier		if ((*sn)->rule.ptr != NULL)
640130613Smlaier			(*sn)->rule.ptr->src_nodes++;
641240233Sglebius		PF_HASHROW_UNLOCK(sh);
642223637Sbz		V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
643223637Sbz		V_pf_status.src_nodes++;
644130613Smlaier	} else {
645130613Smlaier		if (rule->max_src_states &&
646145836Smlaier		    (*sn)->states >= rule->max_src_states) {
647223637Sbz			V_pf_status.lcounters[LCNT_SRCSTATES]++;
648130613Smlaier			return (-1);
649145836Smlaier		}
650130613Smlaier	}
651130613Smlaier	return (0);
652130613Smlaier}
653126258Smlaier
654240233Sglebiusstatic void
655240233Sglebiuspf_remove_src_node(struct pf_src_node *src)
656240233Sglebius{
657240233Sglebius	struct pf_srchash *sh;
658223637Sbz
659240233Sglebius	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
660240233Sglebius	PF_HASHROW_LOCK(sh);
661240233Sglebius	LIST_REMOVE(src, entry);
662240233Sglebius	PF_HASHROW_UNLOCK(sh);
663240233Sglebius}
664240233Sglebius
665240233Sglebius/* Data storage structures initialization. */
666240233Sglebiusvoid
667240233Sglebiuspf_initialize()
668223637Sbz{
669240233Sglebius	struct pf_keyhash	*kh;
670240233Sglebius	struct pf_idhash	*ih;
671240233Sglebius	struct pf_srchash	*sh;
672240233Sglebius	u_int i;
673223637Sbz
674240233Sglebius	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize);
675240233Sglebius	if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
676240233Sglebius		V_pf_hashsize = PF_HASHSIZ;
677240233Sglebius	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize);
678240233Sglebius	if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
679240233Sglebius		V_pf_srchashsize = PF_HASHSIZ / 4;
680240233Sglebius
681240233Sglebius	V_pf_hashseed = arc4random();
682240233Sglebius
683240233Sglebius	/* States and state keys storage. */
684240233Sglebius	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
685240233Sglebius	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
686240233Sglebius	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
687240233Sglebius	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
688240233Sglebius
689240233Sglebius	V_pf_state_key_z = uma_zcreate("pf state keys",
690240233Sglebius	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
691240233Sglebius	    UMA_ALIGN_PTR, 0);
692240233Sglebius	V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash),
693240233Sglebius	    M_PFHASH, M_WAITOK | M_ZERO);
694240233Sglebius	V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash),
695240233Sglebius	    M_PFHASH, M_WAITOK | M_ZERO);
696240233Sglebius	V_pf_hashmask = V_pf_hashsize - 1;
697240233Sglebius	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
698240233Sglebius	    i++, kh++, ih++) {
699240233Sglebius		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF);
700240233Sglebius		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
701223637Sbz	}
702223637Sbz
703240233Sglebius	/* Source nodes. */
704240233Sglebius	V_pf_sources_z = uma_zcreate("pf source nodes",
705240233Sglebius	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
706240233Sglebius	    0);
707240233Sglebius	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
708240233Sglebius	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
709240233Sglebius	V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash),
710240233Sglebius	  M_PFHASH, M_WAITOK|M_ZERO);
711240233Sglebius	V_pf_srchashmask = V_pf_srchashsize - 1;
712240233Sglebius	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
713240233Sglebius		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
714223637Sbz
715240233Sglebius	/* ALTQ */
716240233Sglebius	TAILQ_INIT(&V_pf_altqs[0]);
717240233Sglebius	TAILQ_INIT(&V_pf_altqs[1]);
718240233Sglebius	TAILQ_INIT(&V_pf_pabuf);
719240233Sglebius	V_pf_altqs_active = &V_pf_altqs[0];
720240233Sglebius	V_pf_altqs_inactive = &V_pf_altqs[1];
721240233Sglebius
722240233Sglebius	/* Mbuf tags */
723240233Sglebius	V_pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
724240233Sglebius	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL,
725240233Sglebius	    UMA_ALIGN_PTR, 0);
726240233Sglebius
727240233Sglebius	/* Send & flush queues. */
728240233Sglebius	STAILQ_INIT(&V_pf_sendqueue);
729240233Sglebius	SLIST_INIT(&V_pf_flushqueue);
730240233Sglebius	TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue);
731240233Sglebius	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
732240233Sglebius	mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF);
733240233Sglebius
734240233Sglebius	/* Unlinked, but may be referenced rules. */
735240233Sglebius	TAILQ_INIT(&V_pf_unlinked_rules);
736240233Sglebius	mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
737223637Sbz}
738223637Sbz
739240233Sglebiusvoid
740240233Sglebiuspf_cleanup()
741223637Sbz{
742240233Sglebius	struct pf_keyhash	*kh;
743240233Sglebius	struct pf_idhash	*ih;
744240233Sglebius	struct pf_srchash	*sh;
745240233Sglebius	struct pf_send_entry	*pfse, *next;
746240233Sglebius	u_int i;
747223637Sbz
748240233Sglebius	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
749240233Sglebius	    i++, kh++, ih++) {
750240233Sglebius		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
751240233Sglebius		    __func__));
752240233Sglebius		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
753240233Sglebius		    __func__));
754240233Sglebius		mtx_destroy(&kh->lock);
755240233Sglebius		mtx_destroy(&ih->lock);
756240233Sglebius	}
757240233Sglebius	free(V_pf_keyhash, M_PFHASH);
758240233Sglebius	free(V_pf_idhash, M_PFHASH);
759240233Sglebius
760240233Sglebius	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
761240233Sglebius		KASSERT(LIST_EMPTY(&sh->nodes),
762240233Sglebius		    ("%s: source node hash not empty", __func__));
763240233Sglebius		mtx_destroy(&sh->lock);
764240233Sglebius	}
765240233Sglebius	free(V_pf_srchash, M_PFHASH);
766240233Sglebius
767240233Sglebius	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
768240233Sglebius		m_freem(pfse->pfse_m);
769240233Sglebius		free(pfse, M_PFTEMP);
770240233Sglebius	}
771240233Sglebius
772240233Sglebius	mtx_destroy(&pf_sendqueue_mtx);
773240233Sglebius	mtx_destroy(&pf_flushqueue_mtx);
774240233Sglebius	mtx_destroy(&pf_unlnkdrules_mtx);
775240233Sglebius
776240233Sglebius	uma_zdestroy(V_pf_mtag_z);
777240233Sglebius	uma_zdestroy(V_pf_sources_z);
778240233Sglebius	uma_zdestroy(V_pf_state_z);
779240233Sglebius	uma_zdestroy(V_pf_state_key_z);
780240233Sglebius}
781240233Sglebius
782240233Sglebiusstatic int
783240233Sglebiuspf_mtag_init(void *mem, int size, int how)
784240233Sglebius{
785240233Sglebius	struct m_tag *t;
786240233Sglebius
787240233Sglebius	t = (struct m_tag *)mem;
788240233Sglebius	t->m_tag_cookie = MTAG_ABI_COMPAT;
789240233Sglebius	t->m_tag_id = PACKET_TAG_PF;
790240233Sglebius	t->m_tag_len = sizeof(struct pf_mtag);
791240233Sglebius	t->m_tag_free = pf_mtag_free;
792240233Sglebius
793223637Sbz	return (0);
794223637Sbz}
795223637Sbz
796240233Sglebiusstatic void
797240233Sglebiuspf_mtag_free(struct m_tag *t)
798130613Smlaier{
799223637Sbz
800240233Sglebius	uma_zfree(V_pf_mtag_z, t);
801240233Sglebius}
802223637Sbz
803240233Sglebiusstruct pf_mtag *
804240233Sglebiuspf_get_mtag(struct mbuf *m)
805240233Sglebius{
806240233Sglebius	struct m_tag *mtag;
807240233Sglebius
808240233Sglebius	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
809240233Sglebius		return ((struct pf_mtag *)(mtag + 1));
810240233Sglebius
811240233Sglebius	mtag = uma_zalloc(V_pf_mtag_z, M_NOWAIT);
812240233Sglebius	if (mtag == NULL)
813240233Sglebius		return (NULL);
814240233Sglebius	bzero(mtag + 1, sizeof(struct pf_mtag));
815240233Sglebius	m_tag_prepend(m, mtag);
816240233Sglebius
817240233Sglebius	return ((struct pf_mtag *)(mtag + 1));
818240233Sglebius}
819240233Sglebius
820240233Sglebiusstatic int
821240233Sglebiuspf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
822240233Sglebius    struct pf_state *s)
823240233Sglebius{
824240233Sglebius	struct pf_keyhash	*kh;
825240233Sglebius	struct pf_state_key	*sk, *cur;
826240233Sglebius	struct pf_state		*si, *olds = NULL;
827240233Sglebius	int idx;
828240233Sglebius
829240233Sglebius	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
830240233Sglebius	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
831240233Sglebius	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
832240233Sglebius
833240233Sglebius	/*
834240233Sglebius	 * First run: start with wire key.
835240233Sglebius	 */
836240233Sglebius	sk = skw;
837240233Sglebius	idx = PF_SK_WIRE;
838240233Sglebius
839240233Sglebiuskeyattach:
840240233Sglebius	kh = &V_pf_keyhash[pf_hashkey(sk)];
841240233Sglebius
842240233Sglebius	PF_HASHROW_LOCK(kh);
843240233Sglebius	LIST_FOREACH(cur, &kh->keys, entry)
844240233Sglebius		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
845240233Sglebius			break;
846240233Sglebius
847240233Sglebius	if (cur != NULL) {
848240233Sglebius		/* Key exists. Check for same kif, if none, add to key. */
849240233Sglebius		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
850240233Sglebius			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
851240233Sglebius
852240233Sglebius			PF_HASHROW_LOCK(ih);
853240233Sglebius			if (si->kif == s->kif &&
854240233Sglebius			    si->direction == s->direction) {
855223637Sbz				if (sk->proto == IPPROTO_TCP &&
856240233Sglebius				    si->src.state >= TCPS_FIN_WAIT_2 &&
857240233Sglebius				    si->dst.state >= TCPS_FIN_WAIT_2) {
858240233Sglebius					si->src.state = si->dst.state =
859223637Sbz					    TCPS_CLOSED;
860240233Sglebius					/* Unlink later or cur can go away. */
861240233Sglebius					pf_ref_state(si);
862240233Sglebius					olds = si;
863223637Sbz				} else {
864223637Sbz					if (V_pf_status.debug >= PF_DEBUG_MISC) {
865223637Sbz						printf("pf: %s key attach "
866223637Sbz						    "failed on %s: ",
867223637Sbz						    (idx == PF_SK_WIRE) ?
868223637Sbz						    "wire" : "stack",
869223637Sbz						    s->kif->pfik_name);
870223637Sbz						pf_print_state_parts(s,
871223637Sbz						    (idx == PF_SK_WIRE) ?
872223637Sbz						    sk : NULL,
873223637Sbz						    (idx == PF_SK_STACK) ?
874223637Sbz						    sk : NULL);
875223637Sbz						printf(", existing: ");
876240233Sglebius						pf_print_state_parts(si,
877223637Sbz						    (idx == PF_SK_WIRE) ?
878223637Sbz						    sk : NULL,
879223637Sbz						    (idx == PF_SK_STACK) ?
880223637Sbz						    sk : NULL);
881223637Sbz						printf("\n");
882223637Sbz					}
883240233Sglebius					PF_HASHROW_UNLOCK(ih);
884240233Sglebius					PF_HASHROW_UNLOCK(kh);
885240233Sglebius					uma_zfree(V_pf_state_key_z, sk);
886240233Sglebius					if (idx == PF_SK_STACK)
887240233Sglebius						pf_detach_state(s);
888223637Sbz					return (-1);	/* collision! */
889223637Sbz				}
890223637Sbz			}
891240233Sglebius			PF_HASHROW_UNLOCK(ih);
892240233Sglebius		}
893240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
894223637Sbz		s->key[idx] = cur;
895240233Sglebius	} else {
896240233Sglebius		LIST_INSERT_HEAD(&kh->keys, sk, entry);
897223637Sbz		s->key[idx] = sk;
898126258Smlaier	}
899126258Smlaier
900240233Sglebiusstateattach:
901240233Sglebius	/* List is sorted, if-bound states before floating. */
902223637Sbz	if (s->kif == V_pfi_all)
903240233Sglebius		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
904223637Sbz	else
905240233Sglebius		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
906223637Sbz
907240233Sglebius	/*
908240233Sglebius	 * Attach done. See how should we (or should not?)
909240233Sglebius	 * attach a second key.
910240233Sglebius	 */
911240233Sglebius	if (sks == skw) {
912240233Sglebius		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
913240233Sglebius		idx = PF_SK_STACK;
914240233Sglebius		sks = NULL;
915240233Sglebius		goto stateattach;
916240233Sglebius	} else if (sks != NULL) {
917240233Sglebius		PF_HASHROW_UNLOCK(kh);
918240233Sglebius		if (olds) {
919240233Sglebius			pf_unlink_state(olds, 0);
920240233Sglebius			pf_release_state(olds);
921240233Sglebius			olds = NULL;
922240233Sglebius		}
923240233Sglebius		/*
924240233Sglebius		 * Continue attaching with stack key.
925240233Sglebius		 */
926240233Sglebius		sk = sks;
927240233Sglebius		idx = PF_SK_STACK;
928240233Sglebius		sks = NULL;
929240233Sglebius		goto keyattach;
930240233Sglebius	} else
931240233Sglebius		PF_HASHROW_UNLOCK(kh);
932223637Sbz
933240233Sglebius	if (olds) {
934240233Sglebius		pf_unlink_state(olds, 0);
935240233Sglebius		pf_release_state(olds);
936240233Sglebius	}
937240233Sglebius
938240233Sglebius	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
939240233Sglebius	    ("%s failure", __func__));
940240233Sglebius
941223637Sbz	return (0);
942223637Sbz}
943223637Sbz
944240233Sglebiusstatic void
945223637Sbzpf_detach_state(struct pf_state *s)
946223637Sbz{
947240233Sglebius	struct pf_state_key *sks = s->key[PF_SK_STACK];
948240233Sglebius	struct pf_keyhash *kh;
949223637Sbz
950240233Sglebius	if (sks != NULL) {
951240233Sglebius		kh = &V_pf_keyhash[pf_hashkey(sks)];
952240233Sglebius		PF_HASHROW_LOCK(kh);
953240233Sglebius		if (s->key[PF_SK_STACK] != NULL)
954240233Sglebius			pf_state_key_detach(s, PF_SK_STACK);
955240233Sglebius		/*
956240233Sglebius		 * If both point to same key, then we are done.
957240233Sglebius		 */
958240233Sglebius		if (sks == s->key[PF_SK_WIRE]) {
959240233Sglebius			pf_state_key_detach(s, PF_SK_WIRE);
960240233Sglebius			PF_HASHROW_UNLOCK(kh);
961240233Sglebius			return;
962240233Sglebius		}
963240233Sglebius		PF_HASHROW_UNLOCK(kh);
964240233Sglebius	}
965223637Sbz
966240233Sglebius	if (s->key[PF_SK_WIRE] != NULL) {
967240233Sglebius		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
968240233Sglebius		PF_HASHROW_LOCK(kh);
969240233Sglebius		if (s->key[PF_SK_WIRE] != NULL)
970240233Sglebius			pf_state_key_detach(s, PF_SK_WIRE);
971240233Sglebius		PF_HASHROW_UNLOCK(kh);
972240233Sglebius	}
973223637Sbz}
974223637Sbz
975240233Sglebiusstatic void
976223637Sbzpf_state_key_detach(struct pf_state *s, int idx)
977223637Sbz{
978240233Sglebius	struct pf_state_key *sk = s->key[idx];
979240233Sglebius#ifdef INVARIANTS
980240233Sglebius	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
981223637Sbz
982240233Sglebius	PF_HASHROW_ASSERT(kh);
983223637Sbz#endif
984240233Sglebius	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
985240233Sglebius	s->key[idx] = NULL;
986223637Sbz
987240233Sglebius	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
988240233Sglebius		LIST_REMOVE(sk, entry);
989240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
990223637Sbz	}
991223637Sbz}
992223637Sbz
993240233Sglebiusstatic int
994240233Sglebiuspf_state_key_ctor(void *mem, int size, void *arg, int flags)
995240233Sglebius{
996240233Sglebius	struct pf_state_key *sk = mem;
997240233Sglebius
998240233Sglebius	bzero(sk, sizeof(struct pf_state_key_cmp));
999240233Sglebius	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
1000240233Sglebius	TAILQ_INIT(&sk->states[PF_SK_STACK]);
1001240233Sglebius
1002240233Sglebius	return (0);
1003240233Sglebius}
1004240233Sglebius
1005223637Sbzstruct pf_state_key *
1006240233Sglebiuspf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
1007240233Sglebius	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
1008223637Sbz{
1009240233Sglebius	struct pf_state_key *sk;
1010223637Sbz
1011240233Sglebius	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1012240233Sglebius	if (sk == NULL)
1013223637Sbz		return (NULL);
1014223637Sbz
1015240233Sglebius	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
1016240233Sglebius	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
1017240233Sglebius	sk->port[pd->sidx] = sport;
1018240233Sglebius	sk->port[pd->didx] = dport;
1019240233Sglebius	sk->proto = pd->proto;
1020240233Sglebius	sk->af = pd->af;
1021240233Sglebius
1022223637Sbz	return (sk);
1023223637Sbz}
1024223637Sbz
1025240233Sglebiusstruct pf_state_key *
1026240233Sglebiuspf_state_key_clone(struct pf_state_key *orig)
1027223637Sbz{
1028240233Sglebius	struct pf_state_key *sk;
1029223637Sbz
1030240233Sglebius	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1031240233Sglebius	if (sk == NULL)
1032240233Sglebius		return (NULL);
1033223637Sbz
1034240233Sglebius	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
1035223637Sbz
1036240233Sglebius	return (sk);
1037223637Sbz}
1038223637Sbz
1039223637Sbzint
1040223637Sbzpf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
1041223637Sbz    struct pf_state_key *sks, struct pf_state *s)
1042223637Sbz{
1043240233Sglebius	struct pf_idhash *ih;
1044240233Sglebius	struct pf_state *cur;
1045223637Sbz
1046240233Sglebius	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
1047240233Sglebius	    ("%s: sks not pristine", __func__));
1048240233Sglebius	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
1049240233Sglebius	    ("%s: skw not pristine", __func__));
1050240233Sglebius	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1051240233Sglebius
1052223637Sbz	s->kif = kif;
1053223637Sbz
1054240233Sglebius	if (pf_state_key_attach(skw, sks, s))
1055240233Sglebius		return (-1);
1056126258Smlaier
1057223637Sbz	if (s->id == 0 && s->creatorid == 0) {
1058240233Sglebius		/* XXX: should be atomic, but probability of collision low */
1059240233Sglebius		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
1060240233Sglebius			V_pf_stateid[curcpu] = 1;
1061240233Sglebius		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
1062240233Sglebius		s->id = htobe64(s->id);
1063223637Sbz		s->creatorid = V_pf_status.hostid;
1064130613Smlaier	}
1065240233Sglebius
1066240233Sglebius	ih = &V_pf_idhash[PF_IDHASH(s)];
1067240233Sglebius	PF_HASHROW_LOCK(ih);
1068240233Sglebius	LIST_FOREACH(cur, &ih->states, entry)
1069240233Sglebius		if (cur->id == s->id && cur->creatorid == s->creatorid)
1070240233Sglebius			break;
1071240233Sglebius
1072240233Sglebius	if (cur != NULL) {
1073240233Sglebius		PF_HASHROW_UNLOCK(ih);
1074223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1075130613Smlaier			printf("pf: state insert failed: "
1076130613Smlaier			    "id: %016llx creatorid: %08x",
1077240233Sglebius			    (unsigned long long)be64toh(s->id),
1078240233Sglebius			    ntohl(s->creatorid));
1079130613Smlaier			printf("\n");
1080130613Smlaier		}
1081223637Sbz		pf_detach_state(s);
1082130613Smlaier		return (-1);
1083130613Smlaier	}
1084240233Sglebius	LIST_INSERT_HEAD(&ih->states, s, entry);
1085240233Sglebius	/* One for keys, one for ID hash. */
1086240233Sglebius	refcount_init(&s->refs, 2);
1087240233Sglebius
1088223637Sbz	V_pf_status.fcounters[FCNT_STATE_INSERT]++;
1089223637Sbz	if (pfsync_insert_state_ptr != NULL)
1090223637Sbz		pfsync_insert_state_ptr(s);
1091240233Sglebius
1092240233Sglebius	/* Returns locked. */
1093126258Smlaier	return (0);
1094126258Smlaier}
1095126258Smlaier
1096240233Sglebius/*
1097240233Sglebius * Find state by ID: returns with locked row on success.
1098240233Sglebius */
1099223637Sbzstruct pf_state *
1100240233Sglebiuspf_find_state_byid(uint64_t id, uint32_t creatorid)
1101223637Sbz{
1102240233Sglebius	struct pf_idhash *ih;
1103240233Sglebius	struct pf_state *s;
1104240233Sglebius
1105223637Sbz	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1106223637Sbz
1107240233Sglebius	ih = &V_pf_idhash[(be64toh(id) % (V_pf_hashmask + 1))];
1108223637Sbz
1109240233Sglebius	PF_HASHROW_LOCK(ih);
1110240233Sglebius	LIST_FOREACH(s, &ih->states, entry)
1111240233Sglebius		if (s->id == id && s->creatorid == creatorid)
1112240233Sglebius			break;
1113223637Sbz
1114240233Sglebius	if (s == NULL)
1115240233Sglebius		PF_HASHROW_UNLOCK(ih);
1116240233Sglebius
1117240233Sglebius	return (s);
1118223637Sbz}
1119223637Sbz
1120240233Sglebius/*
1121240233Sglebius * Find state by key.
1122240233Sglebius * Returns with ID hash slot locked on success.
1123240233Sglebius */
1124240233Sglebiusstatic struct pf_state *
1125240233Sglebiuspf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1126223637Sbz{
1127240233Sglebius	struct pf_keyhash	*kh;
1128223637Sbz	struct pf_state_key	*sk;
1129240233Sglebius	struct pf_state		*s;
1130240233Sglebius	int idx;
1131223637Sbz
1132223637Sbz	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1133223637Sbz
1134240233Sglebius	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
1135240233Sglebius
1136240233Sglebius	PF_HASHROW_LOCK(kh);
1137240233Sglebius	LIST_FOREACH(sk, &kh->keys, entry)
1138240233Sglebius		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1139240233Sglebius			break;
1140240233Sglebius	if (sk == NULL) {
1141240233Sglebius		PF_HASHROW_UNLOCK(kh);
1142240233Sglebius		return (NULL);
1143223637Sbz	}
1144223637Sbz
1145240233Sglebius	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
1146223637Sbz
1147240233Sglebius	/* List is sorted, if-bound states before floating ones. */
1148240233Sglebius	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
1149240233Sglebius		if (s->kif == V_pfi_all || s->kif == kif) {
1150240233Sglebius			PF_STATE_LOCK(s);
1151240233Sglebius			PF_HASHROW_UNLOCK(kh);
1152240233Sglebius			if (s->timeout == PFTM_UNLINKED) {
1153240233Sglebius				/*
1154240233Sglebius				 * State is being processed
1155240233Sglebius				 * by pf_unlink_state() in
1156240233Sglebius				 * an other thread.
1157240233Sglebius				 */
1158240233Sglebius				PF_STATE_UNLOCK(s);
1159240233Sglebius				return (NULL);
1160240233Sglebius			}
1161240233Sglebius			return (s);
1162240233Sglebius		}
1163240233Sglebius	PF_HASHROW_UNLOCK(kh);
1164223637Sbz
1165223637Sbz	return (NULL);
1166223637Sbz}
1167223637Sbz
1168223637Sbzstruct pf_state *
1169223637Sbzpf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1170223637Sbz{
1171240233Sglebius	struct pf_keyhash	*kh;
1172223637Sbz	struct pf_state_key	*sk;
1173240233Sglebius	struct pf_state		*s, *ret = NULL;
1174240233Sglebius	int			 idx, inout = 0;
1175223637Sbz
1176223637Sbz	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
1177223637Sbz
1178240233Sglebius	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
1179223637Sbz
1180240233Sglebius	PF_HASHROW_LOCK(kh);
1181240233Sglebius	LIST_FOREACH(sk, &kh->keys, entry)
1182240233Sglebius		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1183240233Sglebius			break;
1184240233Sglebius	if (sk == NULL) {
1185240233Sglebius		PF_HASHROW_UNLOCK(kh);
1186240233Sglebius		return (NULL);
1187223637Sbz	}
1188240233Sglebius	switch (dir) {
1189240233Sglebius	case PF_IN:
1190240233Sglebius		idx = PF_SK_WIRE;
1191240233Sglebius		break;
1192240233Sglebius	case PF_OUT:
1193240233Sglebius		idx = PF_SK_STACK;
1194240233Sglebius		break;
1195240233Sglebius	case PF_INOUT:
1196240233Sglebius		idx = PF_SK_WIRE;
1197240233Sglebius		inout = 1;
1198240233Sglebius		break;
1199240233Sglebius	default:
1200240233Sglebius		panic("%s: dir %u", __func__, dir);
1201240233Sglebius	}
1202240233Sglebiussecond_run:
1203240233Sglebius	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
1204240233Sglebius		if (more == NULL) {
1205240233Sglebius			PF_HASHROW_UNLOCK(kh);
1206240233Sglebius			return (s);
1207240233Sglebius		}
1208240233Sglebius
1209240233Sglebius		if (ret)
1210240233Sglebius			(*more)++;
1211240233Sglebius		else
1212240233Sglebius			ret = s;
1213240233Sglebius	}
1214240233Sglebius	if (inout == 1) {
1215240233Sglebius		inout = 0;
1216240233Sglebius		idx = PF_SK_STACK;
1217240233Sglebius		goto second_run;
1218240233Sglebius	}
1219240233Sglebius	PF_HASHROW_UNLOCK(kh);
1220240233Sglebius
1221240233Sglebius	return (ret);
1222223637Sbz}
1223223637Sbz
1224223637Sbz/* END state table stuff */
1225223637Sbz
1226240233Sglebiusstatic void
1227240233Sglebiuspf_send(struct pf_send_entry *pfse)
1228240233Sglebius{
1229223637Sbz
1230240233Sglebius	PF_SENDQ_LOCK();
1231240233Sglebius	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
1232240233Sglebius	PF_SENDQ_UNLOCK();
1233240233Sglebius	swi_sched(V_pf_swi_cookie, 0);
1234240233Sglebius}
1235240233Sglebius
1236126258Smlaiervoid
1237240233Sglebiuspf_intr(void *v)
1238240233Sglebius{
1239240233Sglebius	struct pf_send_head queue;
1240240233Sglebius	struct pf_send_entry *pfse, *next;
1241240233Sglebius
1242240233Sglebius	CURVNET_SET((struct vnet *)v);
1243240233Sglebius
1244240233Sglebius	PF_SENDQ_LOCK();
1245240233Sglebius	queue = V_pf_sendqueue;
1246240233Sglebius	STAILQ_INIT(&V_pf_sendqueue);
1247240233Sglebius	PF_SENDQ_UNLOCK();
1248240233Sglebius
1249240233Sglebius	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
1250240233Sglebius		switch (pfse->pfse_type) {
1251240233Sglebius#ifdef INET
1252240233Sglebius		case PFSE_IP:
1253240233Sglebius			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
1254240233Sglebius			break;
1255240233Sglebius		case PFSE_ICMP:
1256240233Sglebius			icmp_error(pfse->pfse_m, pfse->pfse_icmp_type,
1257240233Sglebius			    pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu);
1258240233Sglebius			break;
1259240233Sglebius#endif /* INET */
1260240233Sglebius#ifdef INET6
1261240233Sglebius		case PFSE_IP6:
1262240233Sglebius			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
1263240233Sglebius			    NULL);
1264240233Sglebius			break;
1265240233Sglebius		case PFSE_ICMP6:
1266240233Sglebius			icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type,
1267240233Sglebius			    pfse->pfse_icmp_code, pfse->pfse_icmp_mtu);
1268240233Sglebius			break;
1269240233Sglebius#endif /* INET6 */
1270240233Sglebius		default:
1271240233Sglebius			panic("%s: unknown type", __func__);
1272240233Sglebius		}
1273240233Sglebius		free(pfse, M_PFTEMP);
1274240233Sglebius	}
1275240233Sglebius	CURVNET_RESTORE();
1276240233Sglebius}
1277240233Sglebius
1278240233Sglebiusvoid
1279171168Smlaierpf_purge_thread(void *v)
1280126258Smlaier{
1281240233Sglebius	int fullrun;
1282171168Smlaier
1283223637Sbz	CURVNET_SET((struct vnet *)v);
1284223637Sbz
1285171168Smlaier	for (;;) {
1286240233Sglebius		PF_RULES_RLOCK();
1287240233Sglebius		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
1288171168Smlaier
1289226527Sbz		if (V_pf_end_threads) {
1290240233Sglebius			/*
1291240233Sglebius			 * To cleanse up all kifs and rules we need
1292240233Sglebius			 * two runs: first one clears reference flags,
1293240233Sglebius			 * then pf_purge_expired_states() doesn't
1294240233Sglebius			 * raise them, and then second run frees.
1295240233Sglebius			 */
1296240233Sglebius			PF_RULES_RUNLOCK();
1297240233Sglebius			pf_purge_unlinked_rules();
1298240233Sglebius			pfi_kif_purge();
1299171168Smlaier
1300240233Sglebius			/*
1301240233Sglebius			 * Now purge everything.
1302240233Sglebius			 */
1303240233Sglebius			pf_purge_expired_states(V_pf_hashmask + 1);
1304226527Sbz			pf_purge_expired_fragments();
1305240233Sglebius			pf_purge_expired_src_nodes();
1306240233Sglebius
1307240233Sglebius			/*
1308240233Sglebius			 * Now all kifs & rules should be unreferenced,
1309240233Sglebius			 * thus should be successfully freed.
1310240233Sglebius			 */
1311240233Sglebius			pf_purge_unlinked_rules();
1312240233Sglebius			pfi_kif_purge();
1313240233Sglebius
1314240233Sglebius			/*
1315240233Sglebius			 * Announce success and exit.
1316240233Sglebius			 */
1317240233Sglebius			PF_RULES_RLOCK();
1318226527Sbz			V_pf_end_threads++;
1319240233Sglebius			PF_RULES_RUNLOCK();
1320226527Sbz			wakeup(pf_purge_thread);
1321226527Sbz			kproc_exit(0);
1322226527Sbz		}
1323240233Sglebius		PF_RULES_RUNLOCK();
1324126258Smlaier
1325240233Sglebius		/* Process 1/interval fraction of the state table every run. */
1326240233Sglebius		fullrun = pf_purge_expired_states(V_pf_hashmask /
1327240233Sglebius			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
1328196372Smlaier
1329240233Sglebius		/* Purge other expired types every PFTM_INTERVAL seconds. */
1330240233Sglebius		if (fullrun) {
1331240233Sglebius			/*
1332240233Sglebius			 * Order is important:
1333240233Sglebius			 * - states and src nodes reference rules
1334240233Sglebius			 * - states and rules reference kifs
1335240233Sglebius			 */
1336171168Smlaier			pf_purge_expired_fragments();
1337240233Sglebius			pf_purge_expired_src_nodes();
1338240233Sglebius			pf_purge_unlinked_rules();
1339240233Sglebius			pfi_kif_purge();
1340171168Smlaier		}
1341171168Smlaier	}
1342240233Sglebius	/* not reached */
1343223637Sbz	CURVNET_RESTORE();
1344126258Smlaier}
1345126258Smlaier
1346126258Smlaieru_int32_t
1347126258Smlaierpf_state_expires(const struct pf_state *state)
1348126258Smlaier{
1349126258Smlaier	u_int32_t	timeout;
1350126258Smlaier	u_int32_t	start;
1351126258Smlaier	u_int32_t	end;
1352126258Smlaier	u_int32_t	states;
1353126258Smlaier
1354126258Smlaier	/* handle all PFTM_* > PFTM_MAX here */
1355126258Smlaier	if (state->timeout == PFTM_PURGE)
1356240233Sglebius		return (time_uptime);
1357126258Smlaier	if (state->timeout == PFTM_UNTIL_PACKET)
1358126258Smlaier		return (0);
1359171168Smlaier	KASSERT(state->timeout != PFTM_UNLINKED,
1360171168Smlaier	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
1361240233Sglebius	KASSERT((state->timeout < PFTM_MAX),
1362126261Smlaier	    ("pf_state_expires: timeout > PFTM_MAX"));
1363126258Smlaier	timeout = state->rule.ptr->timeout[state->timeout];
1364126258Smlaier	if (!timeout)
1365223637Sbz		timeout = V_pf_default_rule.timeout[state->timeout];
1366126258Smlaier	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1367126258Smlaier	if (start) {
1368126258Smlaier		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1369240233Sglebius		states = state->rule.ptr->states_cur;	/* XXXGL */
1370126258Smlaier	} else {
1371223637Sbz		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1372223637Sbz		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1373223637Sbz		states = V_pf_status.states;
1374126258Smlaier	}
1375126258Smlaier	if (end && states > start && start < end) {
1376126258Smlaier		if (states < end)
1377126258Smlaier			return (state->expire + timeout * (end - states) /
1378126258Smlaier			    (end - start));
1379126258Smlaier		else
1380240233Sglebius			return (time_uptime);
1381126258Smlaier	}
1382126258Smlaier	return (state->expire + timeout);
1383126258Smlaier}
1384126258Smlaier
1385126258Smlaiervoid
1386240233Sglebiuspf_purge_expired_src_nodes()
1387126258Smlaier{
1388240233Sglebius	struct pf_srchash	*sh;
1389240233Sglebius	struct pf_src_node	*cur, *next;
1390240233Sglebius	int i;
1391126258Smlaier
1392240233Sglebius	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
1393240233Sglebius	    PF_HASHROW_LOCK(sh);
1394240233Sglebius	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
1395240233Sglebius		if (cur->states <= 0 && cur->expire <= time_uptime) {
1396240233Sglebius			if (cur->rule.ptr != NULL)
1397223637Sbz				cur->rule.ptr->src_nodes--;
1398240233Sglebius			LIST_REMOVE(cur, entry);
1399223637Sbz			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1400223637Sbz			V_pf_status.src_nodes--;
1401240233Sglebius			uma_zfree(V_pf_sources_z, cur);
1402240233Sglebius		} else if (cur->rule.ptr != NULL)
1403240233Sglebius			cur->rule.ptr->rule_flag |= PFRULE_REFS;
1404240233Sglebius	    PF_HASHROW_UNLOCK(sh);
1405223637Sbz	}
1406130613Smlaier}
1407126258Smlaier
1408240233Sglebiusstatic void
1409130613Smlaierpf_src_tree_remove_state(struct pf_state *s)
1410130613Smlaier{
1411130613Smlaier	u_int32_t timeout;
1412126258Smlaier
1413130613Smlaier	if (s->src_node != NULL) {
1414223637Sbz		if (s->src.tcp_est)
1415223637Sbz			--s->src_node->conn;
1416130613Smlaier		if (--s->src_node->states <= 0) {
1417130613Smlaier			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1418130613Smlaier			if (!timeout)
1419130613Smlaier				timeout =
1420223637Sbz				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1421240233Sglebius			s->src_node->expire = time_uptime + timeout;
1422130613Smlaier		}
1423130613Smlaier	}
1424130613Smlaier	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1425130613Smlaier		if (--s->nat_src_node->states <= 0) {
1426130613Smlaier			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1427130613Smlaier			if (!timeout)
1428130613Smlaier				timeout =
1429223637Sbz				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
1430240233Sglebius			s->nat_src_node->expire = time_uptime + timeout;
1431130613Smlaier		}
1432130613Smlaier	}
1433130613Smlaier	s->src_node = s->nat_src_node = NULL;
1434130613Smlaier}
1435126258Smlaier
1436240233Sglebius/*
1437240233Sglebius * Unlink and potentilly free a state. Function may be
1438240233Sglebius * called with ID hash row locked, but always returns
1439240233Sglebius * unlocked, since it needs to go through key hash locking.
1440240233Sglebius */
1441240233Sglebiusint
1442240233Sglebiuspf_unlink_state(struct pf_state *s, u_int flags)
1443145836Smlaier{
1444240233Sglebius	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
1445223637Sbz
1446240233Sglebius	if ((flags & PF_ENTER_LOCKED) == 0)
1447240233Sglebius		PF_HASHROW_LOCK(ih);
1448240233Sglebius	else
1449240233Sglebius		PF_HASHROW_ASSERT(ih);
1450240233Sglebius
1451240233Sglebius	if (s->timeout == PFTM_UNLINKED) {
1452240233Sglebius		/*
1453240233Sglebius		 * State is being processed
1454240233Sglebius		 * by pf_unlink_state() in
1455240233Sglebius		 * an other thread.
1456240233Sglebius		 */
1457240233Sglebius		PF_HASHROW_UNLOCK(ih);
1458240233Sglebius		return (0);	/* XXXGL: undefined actually */
1459240233Sglebius	}
1460240233Sglebius
1461240233Sglebius	s->timeout = PFTM_UNLINKED;
1462240233Sglebius
1463240233Sglebius	if (s->src.state == PF_TCPS_PROXY_DST) {
1464223637Sbz		/* XXX wire key the right one? */
1465240233Sglebius		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
1466240233Sglebius		    &s->key[PF_SK_WIRE]->addr[1],
1467240233Sglebius		    &s->key[PF_SK_WIRE]->addr[0],
1468240233Sglebius		    s->key[PF_SK_WIRE]->port[1],
1469240233Sglebius		    s->key[PF_SK_WIRE]->port[0],
1470240233Sglebius		    s->src.seqhi, s->src.seqlo + 1,
1471240233Sglebius		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
1472171168Smlaier	}
1473240233Sglebius
1474240233Sglebius	LIST_REMOVE(s, entry);
1475240233Sglebius	pf_src_tree_remove_state(s);
1476240233Sglebius	PF_HASHROW_UNLOCK(ih);
1477240233Sglebius
1478223637Sbz	if (pfsync_delete_state_ptr != NULL)
1479240233Sglebius		pfsync_delete_state_ptr(s);
1480240233Sglebius
1481240233Sglebius	pf_detach_state(s);
1482240233Sglebius	refcount_release(&s->refs);
1483240233Sglebius
1484240233Sglebius	return (pf_release_state(s));
1485171168Smlaier}
1486171168Smlaier
1487171168Smlaiervoid
1488171168Smlaierpf_free_state(struct pf_state *cur)
1489171168Smlaier{
1490223637Sbz
1491240233Sglebius	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
1492240233Sglebius	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
1493240233Sglebius	    cur->timeout));
1494240233Sglebius	--cur->rule.ptr->states_cur;
1495145836Smlaier	if (cur->nat_rule.ptr != NULL)
1496240233Sglebius		--cur->nat_rule.ptr->states_cur;
1497145836Smlaier	if (cur->anchor.ptr != NULL)
1498240233Sglebius		--cur->anchor.ptr->states_cur;
1499145836Smlaier	pf_normalize_tcp_cleanup(cur);
1500240233Sglebius	uma_zfree(V_pf_state_z, cur);
1501223637Sbz	V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1502145836Smlaier}
1503145836Smlaier
1504240233Sglebius/*
1505240233Sglebius * Called only from pf_purge_thread(), thus serialized.
1506240233Sglebius */
1507240233Sglebiusstatic int
1508240233Sglebiuspf_purge_expired_states(int maxcheck)
1509130613Smlaier{
1510240233Sglebius	static u_int i = 0;
1511130613Smlaier
1512240233Sglebius	struct pf_idhash *ih;
1513240233Sglebius	struct pf_state *s;
1514240233Sglebius	int rv = 0;
1515240233Sglebius
1516240233Sglebius	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
1517240233Sglebius
1518240233Sglebius	/*
1519240233Sglebius	 * Go through hash and unlink states that expire now.
1520240233Sglebius	 */
1521240233Sglebius	while (maxcheck > 0) {
1522240233Sglebius
1523240233Sglebius		/* Wrap to start of hash when we hit the end. */
1524240233Sglebius		if (i > V_pf_hashmask) {
1525240233Sglebius			i = 0;
1526240233Sglebius			rv = 1;
1527171168Smlaier		}
1528171168Smlaier
1529240233Sglebius		ih = &V_pf_idhash[i];
1530240233Sglebiusrelock:
1531240233Sglebius		PF_HASHROW_LOCK(ih);
1532240233Sglebius		LIST_FOREACH(s, &ih->states, entry) {
1533240233Sglebius			if (pf_state_expires(s) <= time_uptime) {
1534240233Sglebius				V_pf_status.states -=
1535240233Sglebius				    pf_unlink_state(s, PF_ENTER_LOCKED);
1536240233Sglebius				goto relock;
1537171168Smlaier			}
1538240233Sglebius			s->rule.ptr->rule_flag |= PFRULE_REFS;
1539240233Sglebius			if (s->nat_rule.ptr != NULL)
1540240233Sglebius				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
1541240233Sglebius			if (s->anchor.ptr != NULL)
1542240233Sglebius				s->anchor.ptr->rule_flag |= PFRULE_REFS;
1543240233Sglebius			s->kif->pfik_flags |= PFI_IFLAG_REFS;
1544240233Sglebius			if (s->rt_kif)
1545240233Sglebius				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
1546171168Smlaier		}
1547240233Sglebius		PF_HASHROW_UNLOCK(ih);
1548240233Sglebius		i++;
1549240233Sglebius		maxcheck--;
1550126258Smlaier	}
1551171168Smlaier
1552240233Sglebius	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
1553196372Smlaier
1554240233Sglebius	return (rv);
1555126258Smlaier}
1556126258Smlaier
1557240233Sglebiusstatic void
1558240233Sglebiuspf_purge_unlinked_rules()
1559126258Smlaier{
1560240233Sglebius	struct pf_rulequeue tmpq;
1561240233Sglebius	struct pf_rule *r, *r1;
1562126258Smlaier
1563240233Sglebius	/*
1564240233Sglebius	 * Do naive mark-and-sweep garbage collecting of old rules.
1565240233Sglebius	 * Reference flag is raised by pf_purge_expired_states()
1566240233Sglebius	 * and pf_purge_expired_src_nodes().
1567240233Sglebius	 *
1568240233Sglebius	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
1569240233Sglebius	 * use a temporary queue.
1570240233Sglebius	 */
1571240233Sglebius	TAILQ_INIT(&tmpq);
1572240233Sglebius	PF_UNLNKDRULES_LOCK();
1573240233Sglebius	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
1574240233Sglebius		if (!(r->rule_flag & PFRULE_REFS)) {
1575240233Sglebius			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
1576240233Sglebius			TAILQ_INSERT_TAIL(&tmpq, r, entries);
1577240233Sglebius		} else
1578240233Sglebius			r->rule_flag &= ~PFRULE_REFS;
1579240233Sglebius	}
1580240233Sglebius	PF_UNLNKDRULES_UNLOCK();
1581126258Smlaier
1582240233Sglebius	if (!TAILQ_EMPTY(&tmpq)) {
1583240233Sglebius		PF_RULES_WLOCK();
1584240233Sglebius		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
1585240233Sglebius			TAILQ_REMOVE(&tmpq, r, entries);
1586240233Sglebius			pf_free_rule(r);
1587240233Sglebius		}
1588240233Sglebius		PF_RULES_WUNLOCK();
1589240233Sglebius	}
1590126258Smlaier}
1591126258Smlaier
1592126258Smlaiervoid
1593126258Smlaierpf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1594126258Smlaier{
1595126258Smlaier	switch (af) {
1596126258Smlaier#ifdef INET
1597126258Smlaier	case AF_INET: {
1598126258Smlaier		u_int32_t a = ntohl(addr->addr32[0]);
1599126258Smlaier		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1600126258Smlaier		    (a>>8)&255, a&255);
1601126258Smlaier		if (p) {
1602126258Smlaier			p = ntohs(p);
1603126258Smlaier			printf(":%u", p);
1604126258Smlaier		}
1605126258Smlaier		break;
1606126258Smlaier	}
1607126258Smlaier#endif /* INET */
1608126258Smlaier#ifdef INET6
1609126258Smlaier	case AF_INET6: {
1610126258Smlaier		u_int16_t b;
1611223637Sbz		u_int8_t i, curstart, curend, maxstart, maxend;
1612223637Sbz		curstart = curend = maxstart = maxend = 255;
1613126258Smlaier		for (i = 0; i < 8; i++) {
1614126258Smlaier			if (!addr->addr16[i]) {
1615126258Smlaier				if (curstart == 255)
1616126258Smlaier					curstart = i;
1617223637Sbz				curend = i;
1618126258Smlaier			} else {
1619223637Sbz				if ((curend - curstart) >
1620223637Sbz				    (maxend - maxstart)) {
1621223637Sbz					maxstart = curstart;
1622223637Sbz					maxend = curend;
1623126258Smlaier				}
1624223637Sbz				curstart = curend = 255;
1625126258Smlaier			}
1626126258Smlaier		}
1627223637Sbz		if ((curend - curstart) >
1628223637Sbz		    (maxend - maxstart)) {
1629223637Sbz			maxstart = curstart;
1630223637Sbz			maxend = curend;
1631223637Sbz		}
1632126258Smlaier		for (i = 0; i < 8; i++) {
1633126258Smlaier			if (i >= maxstart && i <= maxend) {
1634223637Sbz				if (i == 0)
1635223637Sbz					printf(":");
1636223637Sbz				if (i == maxend)
1637223637Sbz					printf(":");
1638126258Smlaier			} else {
1639126258Smlaier				b = ntohs(addr->addr16[i]);
1640126258Smlaier				printf("%x", b);
1641126258Smlaier				if (i < 7)
1642126258Smlaier					printf(":");
1643126258Smlaier			}
1644126258Smlaier		}
1645126258Smlaier		if (p) {
1646126258Smlaier			p = ntohs(p);
1647126258Smlaier			printf("[%u]", p);
1648126258Smlaier		}
1649126258Smlaier		break;
1650126258Smlaier	}
1651126258Smlaier#endif /* INET6 */
1652126258Smlaier	}
1653126258Smlaier}
1654126258Smlaier
1655126258Smlaiervoid
1656126258Smlaierpf_print_state(struct pf_state *s)
1657126258Smlaier{
1658223637Sbz	pf_print_state_parts(s, NULL, NULL);
1659223637Sbz}
1660223637Sbz
1661240233Sglebiusstatic void
1662223637Sbzpf_print_state_parts(struct pf_state *s,
1663223637Sbz    struct pf_state_key *skwp, struct pf_state_key *sksp)
1664223637Sbz{
1665223637Sbz	struct pf_state_key *skw, *sks;
1666223637Sbz	u_int8_t proto, dir;
1667223637Sbz
1668223637Sbz	/* Do our best to fill these, but they're skipped if NULL */
1669223637Sbz	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1670223637Sbz	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1671223637Sbz	proto = skw ? skw->proto : (sks ? sks->proto : 0);
1672223637Sbz	dir = s ? s->direction : 0;
1673223637Sbz
1674223637Sbz	switch (proto) {
1675223637Sbz	case IPPROTO_IPV4:
1676223637Sbz		printf("IPv4");
1677223637Sbz		break;
1678223637Sbz	case IPPROTO_IPV6:
1679223637Sbz		printf("IPv6");
1680223637Sbz		break;
1681126258Smlaier	case IPPROTO_TCP:
1682223637Sbz		printf("TCP");
1683126258Smlaier		break;
1684126258Smlaier	case IPPROTO_UDP:
1685223637Sbz		printf("UDP");
1686126258Smlaier		break;
1687126258Smlaier	case IPPROTO_ICMP:
1688223637Sbz		printf("ICMP");
1689126258Smlaier		break;
1690126258Smlaier	case IPPROTO_ICMPV6:
1691223637Sbz		printf("ICMPv6");
1692126258Smlaier		break;
1693126258Smlaier	default:
1694223637Sbz		printf("%u", skw->proto);
1695126258Smlaier		break;
1696126258Smlaier	}
1697223637Sbz	switch (dir) {
1698223637Sbz	case PF_IN:
1699223637Sbz		printf(" in");
1700223637Sbz		break;
1701223637Sbz	case PF_OUT:
1702223637Sbz		printf(" out");
1703223637Sbz		break;
1704223637Sbz	}
1705223637Sbz	if (skw) {
1706223637Sbz		printf(" wire: ");
1707223637Sbz		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1708223637Sbz		printf(" ");
1709223637Sbz		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1710223637Sbz	}
1711223637Sbz	if (sks) {
1712223637Sbz		printf(" stack: ");
1713223637Sbz		if (sks != skw) {
1714223637Sbz			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1715223637Sbz			printf(" ");
1716223637Sbz			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1717223637Sbz		} else
1718223637Sbz			printf("-");
1719223637Sbz	}
1720223637Sbz	if (s) {
1721223637Sbz		if (proto == IPPROTO_TCP) {
1722223637Sbz			printf(" [lo=%u high=%u win=%u modulator=%u",
1723223637Sbz			    s->src.seqlo, s->src.seqhi,
1724223637Sbz			    s->src.max_win, s->src.seqdiff);
1725223637Sbz			if (s->src.wscale && s->dst.wscale)
1726223637Sbz				printf(" wscale=%u",
1727223637Sbz				    s->src.wscale & PF_WSCALE_MASK);
1728223637Sbz			printf("]");
1729223637Sbz			printf(" [lo=%u high=%u win=%u modulator=%u",
1730223637Sbz			    s->dst.seqlo, s->dst.seqhi,
1731223637Sbz			    s->dst.max_win, s->dst.seqdiff);
1732223637Sbz			if (s->src.wscale && s->dst.wscale)
1733223637Sbz				printf(" wscale=%u",
1734223637Sbz				s->dst.wscale & PF_WSCALE_MASK);
1735223637Sbz			printf("]");
1736223637Sbz		}
1737223637Sbz		printf(" %u:%u", s->src.state, s->dst.state);
1738223637Sbz	}
1739126258Smlaier}
1740126258Smlaier
1741126258Smlaiervoid
1742126258Smlaierpf_print_flags(u_int8_t f)
1743126258Smlaier{
1744126258Smlaier	if (f)
1745126258Smlaier		printf(" ");
1746126258Smlaier	if (f & TH_FIN)
1747126258Smlaier		printf("F");
1748126258Smlaier	if (f & TH_SYN)
1749126258Smlaier		printf("S");
1750126258Smlaier	if (f & TH_RST)
1751126258Smlaier		printf("R");
1752126258Smlaier	if (f & TH_PUSH)
1753126258Smlaier		printf("P");
1754126258Smlaier	if (f & TH_ACK)
1755126258Smlaier		printf("A");
1756126258Smlaier	if (f & TH_URG)
1757126258Smlaier		printf("U");
1758126258Smlaier	if (f & TH_ECE)
1759126258Smlaier		printf("E");
1760126258Smlaier	if (f & TH_CWR)
1761126258Smlaier		printf("W");
1762126258Smlaier}
1763126258Smlaier
1764126258Smlaier#define	PF_SET_SKIP_STEPS(i)					\
1765126258Smlaier	do {							\
1766126258Smlaier		while (head[i] != cur) {			\
1767126258Smlaier			head[i]->skip[i].ptr = cur;		\
1768126258Smlaier			head[i] = TAILQ_NEXT(head[i], entries);	\
1769126258Smlaier		}						\
1770126258Smlaier	} while (0)
1771126258Smlaier
1772126258Smlaiervoid
1773126258Smlaierpf_calc_skip_steps(struct pf_rulequeue *rules)
1774126258Smlaier{
1775126258Smlaier	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1776126258Smlaier	int i;
1777126258Smlaier
1778126258Smlaier	cur = TAILQ_FIRST(rules);
1779126258Smlaier	prev = cur;
1780126258Smlaier	for (i = 0; i < PF_SKIP_COUNT; ++i)
1781126258Smlaier		head[i] = cur;
1782126258Smlaier	while (cur != NULL) {
1783126258Smlaier
1784130613Smlaier		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1785126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1786126258Smlaier		if (cur->direction != prev->direction)
1787126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1788126258Smlaier		if (cur->af != prev->af)
1789126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1790126258Smlaier		if (cur->proto != prev->proto)
1791126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1792145836Smlaier		if (cur->src.neg != prev->src.neg ||
1793126258Smlaier		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1794126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1795126258Smlaier		if (cur->src.port[0] != prev->src.port[0] ||
1796126258Smlaier		    cur->src.port[1] != prev->src.port[1] ||
1797126258Smlaier		    cur->src.port_op != prev->src.port_op)
1798126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1799145836Smlaier		if (cur->dst.neg != prev->dst.neg ||
1800126258Smlaier		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1801126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1802126258Smlaier		if (cur->dst.port[0] != prev->dst.port[0] ||
1803126258Smlaier		    cur->dst.port[1] != prev->dst.port[1] ||
1804126258Smlaier		    cur->dst.port_op != prev->dst.port_op)
1805126258Smlaier			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1806126258Smlaier
1807126258Smlaier		prev = cur;
1808126258Smlaier		cur = TAILQ_NEXT(cur, entries);
1809126258Smlaier	}
1810126258Smlaier	for (i = 0; i < PF_SKIP_COUNT; ++i)
1811126258Smlaier		PF_SET_SKIP_STEPS(i);
1812126258Smlaier}
1813126258Smlaier
1814240233Sglebiusstatic int
1815126258Smlaierpf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1816126258Smlaier{
1817126258Smlaier	if (aw1->type != aw2->type)
1818126258Smlaier		return (1);
1819126258Smlaier	switch (aw1->type) {
1820126258Smlaier	case PF_ADDR_ADDRMASK:
1821223637Sbz	case PF_ADDR_RANGE:
1822126258Smlaier		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1823126258Smlaier			return (1);
1824126258Smlaier		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1825126258Smlaier			return (1);
1826126258Smlaier		return (0);
1827126258Smlaier	case PF_ADDR_DYNIFTL:
1828130613Smlaier		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1829126258Smlaier	case PF_ADDR_NOROUTE:
1830171168Smlaier	case PF_ADDR_URPFFAILED:
1831126258Smlaier		return (0);
1832126258Smlaier	case PF_ADDR_TABLE:
1833126258Smlaier		return (aw1->p.tbl != aw2->p.tbl);
1834126258Smlaier	default:
1835126258Smlaier		printf("invalid address type: %d\n", aw1->type);
1836126258Smlaier		return (1);
1837126258Smlaier	}
1838126258Smlaier}
1839126258Smlaier
1840126258Smlaieru_int16_t
1841126258Smlaierpf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1842126258Smlaier{
1843126258Smlaier	u_int32_t	l;
1844126258Smlaier
1845126258Smlaier	if (udp && !cksum)
1846126258Smlaier		return (0x0000);
1847126258Smlaier	l = cksum + old - new;
1848126258Smlaier	l = (l >> 16) + (l & 65535);
1849126258Smlaier	l = l & 65535;
1850126258Smlaier	if (udp && !l)
1851126258Smlaier		return (0xFFFF);
1852126258Smlaier	return (l);
1853126258Smlaier}
1854126258Smlaier
1855240233Sglebiusstatic void
1856126258Smlaierpf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1857126258Smlaier    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1858126258Smlaier{
1859126258Smlaier	struct pf_addr	ao;
1860126258Smlaier	u_int16_t	po = *p;
1861126258Smlaier
1862126258Smlaier	PF_ACPY(&ao, a, af);
1863126258Smlaier	PF_ACPY(a, an, af);
1864126258Smlaier
1865126258Smlaier	*p = pn;
1866126258Smlaier
1867126258Smlaier	switch (af) {
1868126258Smlaier#ifdef INET
1869126258Smlaier	case AF_INET:
1870126258Smlaier		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1871126258Smlaier		    ao.addr16[0], an->addr16[0], 0),
1872126258Smlaier		    ao.addr16[1], an->addr16[1], 0);
1873126258Smlaier		*p = pn;
1874126258Smlaier		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1875126258Smlaier		    ao.addr16[0], an->addr16[0], u),
1876126258Smlaier		    ao.addr16[1], an->addr16[1], u),
1877126258Smlaier		    po, pn, u);
1878126258Smlaier		break;
1879126258Smlaier#endif /* INET */
1880126258Smlaier#ifdef INET6
1881126258Smlaier	case AF_INET6:
1882126258Smlaier		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1883126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1884126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1885126258Smlaier		    ao.addr16[0], an->addr16[0], u),
1886126258Smlaier		    ao.addr16[1], an->addr16[1], u),
1887126258Smlaier		    ao.addr16[2], an->addr16[2], u),
1888126258Smlaier		    ao.addr16[3], an->addr16[3], u),
1889126258Smlaier		    ao.addr16[4], an->addr16[4], u),
1890126258Smlaier		    ao.addr16[5], an->addr16[5], u),
1891126258Smlaier		    ao.addr16[6], an->addr16[6], u),
1892126258Smlaier		    ao.addr16[7], an->addr16[7], u),
1893126258Smlaier		    po, pn, u);
1894126258Smlaier		break;
1895126258Smlaier#endif /* INET6 */
1896126258Smlaier	}
1897126258Smlaier}
1898126258Smlaier
1899126258Smlaier
1900126258Smlaier/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1901126258Smlaiervoid
1902126258Smlaierpf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1903126258Smlaier{
1904126258Smlaier	u_int32_t	ao;
1905126258Smlaier
1906126258Smlaier	memcpy(&ao, a, sizeof(ao));
1907126258Smlaier	memcpy(a, &an, sizeof(u_int32_t));
1908126258Smlaier	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1909126258Smlaier	    ao % 65536, an % 65536, u);
1910126258Smlaier}
1911126258Smlaier
1912126258Smlaier#ifdef INET6
1913240233Sglebiusstatic void
1914126258Smlaierpf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1915126258Smlaier{
1916126258Smlaier	struct pf_addr	ao;
1917126258Smlaier
1918126258Smlaier	PF_ACPY(&ao, a, AF_INET6);
1919126258Smlaier	PF_ACPY(a, an, AF_INET6);
1920126258Smlaier
1921126258Smlaier	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1922126258Smlaier	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1923126258Smlaier	    pf_cksum_fixup(pf_cksum_fixup(*c,
1924126258Smlaier	    ao.addr16[0], an->addr16[0], u),
1925126258Smlaier	    ao.addr16[1], an->addr16[1], u),
1926126258Smlaier	    ao.addr16[2], an->addr16[2], u),
1927126258Smlaier	    ao.addr16[3], an->addr16[3], u),
1928126258Smlaier	    ao.addr16[4], an->addr16[4], u),
1929126258Smlaier	    ao.addr16[5], an->addr16[5], u),
1930126258Smlaier	    ao.addr16[6], an->addr16[6], u),
1931126258Smlaier	    ao.addr16[7], an->addr16[7], u);
1932126258Smlaier}
1933126258Smlaier#endif /* INET6 */
1934126258Smlaier
1935240233Sglebiusstatic void
1936126258Smlaierpf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1937126258Smlaier    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1938126258Smlaier    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1939126258Smlaier{
1940126258Smlaier	struct pf_addr	oia, ooa;
1941126258Smlaier
1942126258Smlaier	PF_ACPY(&oia, ia, af);
1943223637Sbz	if (oa)
1944223637Sbz		PF_ACPY(&ooa, oa, af);
1945126258Smlaier
1946126258Smlaier	/* Change inner protocol port, fix inner protocol checksum. */
1947126258Smlaier	if (ip != NULL) {
1948126258Smlaier		u_int16_t	oip = *ip;
1949223637Sbz		u_int32_t	opc;
1950126258Smlaier
1951126258Smlaier		if (pc != NULL)
1952126258Smlaier			opc = *pc;
1953126258Smlaier		*ip = np;
1954126258Smlaier		if (pc != NULL)
1955126258Smlaier			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1956126258Smlaier		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1957126258Smlaier		if (pc != NULL)
1958126258Smlaier			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1959126258Smlaier	}
1960126258Smlaier	/* Change inner ip address, fix inner ip and icmp checksums. */
1961126258Smlaier	PF_ACPY(ia, na, af);
1962126258Smlaier	switch (af) {
1963126258Smlaier#ifdef INET
1964126258Smlaier	case AF_INET: {
1965126258Smlaier		u_int32_t	 oh2c = *h2c;
1966126258Smlaier
1967126258Smlaier		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1968126258Smlaier		    oia.addr16[0], ia->addr16[0], 0),
1969126258Smlaier		    oia.addr16[1], ia->addr16[1], 0);
1970126258Smlaier		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1971126258Smlaier		    oia.addr16[0], ia->addr16[0], 0),
1972126258Smlaier		    oia.addr16[1], ia->addr16[1], 0);
1973126258Smlaier		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1974126258Smlaier		break;
1975126258Smlaier	}
1976126258Smlaier#endif /* INET */
1977126258Smlaier#ifdef INET6
1978126258Smlaier	case AF_INET6:
1979126258Smlaier		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1980126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1981126258Smlaier		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1982126258Smlaier		    oia.addr16[0], ia->addr16[0], u),
1983126258Smlaier		    oia.addr16[1], ia->addr16[1], u),
1984126258Smlaier		    oia.addr16[2], ia->addr16[2], u),
1985126258Smlaier		    oia.addr16[3], ia->addr16[3], u),
1986126258Smlaier		    oia.addr16[4], ia->addr16[4], u),
1987126258Smlaier		    oia.addr16[5], ia->addr16[5], u),
1988126258Smlaier		    oia.addr16[6], ia->addr16[6], u),
1989126258Smlaier		    oia.addr16[7], ia->addr16[7], u);
1990126258Smlaier		break;
1991126258Smlaier#endif /* INET6 */
1992126258Smlaier	}
1993223637Sbz	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
1994223637Sbz	if (oa) {
1995223637Sbz		PF_ACPY(oa, na, af);
1996223637Sbz		switch (af) {
1997126258Smlaier#ifdef INET
1998223637Sbz		case AF_INET:
1999223637Sbz			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2000223637Sbz			    ooa.addr16[0], oa->addr16[0], 0),
2001223637Sbz			    ooa.addr16[1], oa->addr16[1], 0);
2002223637Sbz			break;
2003126258Smlaier#endif /* INET */
2004126258Smlaier#ifdef INET6
2005223637Sbz		case AF_INET6:
2006223637Sbz			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2007223637Sbz			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2008223637Sbz			    pf_cksum_fixup(pf_cksum_fixup(*ic,
2009223637Sbz			    ooa.addr16[0], oa->addr16[0], u),
2010223637Sbz			    ooa.addr16[1], oa->addr16[1], u),
2011223637Sbz			    ooa.addr16[2], oa->addr16[2], u),
2012223637Sbz			    ooa.addr16[3], oa->addr16[3], u),
2013223637Sbz			    ooa.addr16[4], oa->addr16[4], u),
2014223637Sbz			    ooa.addr16[5], oa->addr16[5], u),
2015223637Sbz			    ooa.addr16[6], oa->addr16[6], u),
2016223637Sbz			    ooa.addr16[7], oa->addr16[7], u);
2017223637Sbz			break;
2018126258Smlaier#endif /* INET6 */
2019223637Sbz		}
2020126258Smlaier	}
2021126258Smlaier}
2022126258Smlaier
2023171168Smlaier
2024171168Smlaier/*
2025171168Smlaier * Need to modulate the sequence numbers in the TCP SACK option
2026171168Smlaier * (credits to Krzysztof Pfaff for report and patch)
2027171168Smlaier */
2028240233Sglebiusstatic int
2029171168Smlaierpf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2030171168Smlaier    struct tcphdr *th, struct pf_state_peer *dst)
2031171168Smlaier{
2032171168Smlaier	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2033171168Smlaier	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
2034171168Smlaier	int copyback = 0, i, olen;
2035171168Smlaier	struct sackblk sack;
2036171168Smlaier
2037223637Sbz#define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
2038171168Smlaier	if (hlen < TCPOLEN_SACKLEN ||
2039171168Smlaier	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
2040171168Smlaier		return 0;
2041171168Smlaier
2042171168Smlaier	while (hlen >= TCPOLEN_SACKLEN) {
2043171168Smlaier		olen = opt[1];
2044171168Smlaier		switch (*opt) {
2045171168Smlaier		case TCPOPT_EOL:	/* FALLTHROUGH */
2046171168Smlaier		case TCPOPT_NOP:
2047171168Smlaier			opt++;
2048171168Smlaier			hlen--;
2049171168Smlaier			break;
2050171168Smlaier		case TCPOPT_SACK:
2051171168Smlaier			if (olen > hlen)
2052171168Smlaier				olen = hlen;
2053171168Smlaier			if (olen >= TCPOLEN_SACKLEN) {
2054171168Smlaier				for (i = 2; i + TCPOLEN_SACK <= olen;
2055171168Smlaier				    i += TCPOLEN_SACK) {
2056171168Smlaier					memcpy(&sack, &opt[i], sizeof(sack));
2057171168Smlaier					pf_change_a(&sack.start, &th->th_sum,
2058171168Smlaier					    htonl(ntohl(sack.start) -
2059171168Smlaier					    dst->seqdiff), 0);
2060171168Smlaier					pf_change_a(&sack.end, &th->th_sum,
2061171168Smlaier					    htonl(ntohl(sack.end) -
2062171168Smlaier					    dst->seqdiff), 0);
2063171168Smlaier					memcpy(&opt[i], &sack, sizeof(sack));
2064171168Smlaier				}
2065171168Smlaier				copyback = 1;
2066171168Smlaier			}
2067171168Smlaier			/* FALLTHROUGH */
2068171168Smlaier		default:
2069171168Smlaier			if (olen < 2)
2070171168Smlaier				olen = 2;
2071171168Smlaier			hlen -= olen;
2072171168Smlaier			opt += olen;
2073171168Smlaier		}
2074171168Smlaier	}
2075171168Smlaier
2076171168Smlaier	if (copyback)
2077171168Smlaier		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
2078171168Smlaier	return (copyback);
2079171168Smlaier}
2080171168Smlaier
2081240233Sglebiusstatic void
2082162238Scsjppf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
2083126258Smlaier    const struct pf_addr *saddr, const struct pf_addr *daddr,
2084126258Smlaier    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2085145836Smlaier    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2086240233Sglebius    u_int16_t rtag, struct ifnet *ifp)
2087126258Smlaier{
2088240233Sglebius	struct pf_send_entry *pfse;
2089126258Smlaier	struct mbuf	*m;
2090171168Smlaier	int		 len, tlen;
2091126258Smlaier#ifdef INET
2092240233Sglebius	struct ip	*h = NULL;
2093126258Smlaier#endif /* INET */
2094126258Smlaier#ifdef INET6
2095240233Sglebius	struct ip6_hdr	*h6 = NULL;
2096126258Smlaier#endif /* INET6 */
2097171168Smlaier	struct tcphdr	*th;
2098171168Smlaier	char		*opt;
2099223637Sbz	struct pf_mtag  *pf_mtag;
2100126258Smlaier
2101171168Smlaier	len = 0;
2102171168Smlaier	th = NULL;
2103171168Smlaier
2104126258Smlaier	/* maximum segment size tcp option */
2105126258Smlaier	tlen = sizeof(struct tcphdr);
2106126258Smlaier	if (mss)
2107126258Smlaier		tlen += 4;
2108126258Smlaier
2109126258Smlaier	switch (af) {
2110126258Smlaier#ifdef INET
2111126258Smlaier	case AF_INET:
2112126258Smlaier		len = sizeof(struct ip) + tlen;
2113126258Smlaier		break;
2114126258Smlaier#endif /* INET */
2115126258Smlaier#ifdef INET6
2116126258Smlaier	case AF_INET6:
2117126258Smlaier		len = sizeof(struct ip6_hdr) + tlen;
2118126258Smlaier		break;
2119126258Smlaier#endif /* INET6 */
2120240233Sglebius	default:
2121240233Sglebius		panic("%s: unsupported af %d", __func__, af);
2122126258Smlaier	}
2123126258Smlaier
2124240233Sglebius	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
2125240233Sglebius	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
2126240233Sglebius	if (pfse == NULL)
2127132280Smlaier		return;
2128240233Sglebius	m = m_gethdr(M_NOWAIT, MT_HEADER);
2129240233Sglebius	if (m == NULL) {
2130240233Sglebius		free(pfse, M_PFTEMP);
2131240233Sglebius		return;
2132240233Sglebius	}
2133162238Scsjp#ifdef MAC
2134223637Sbz	mac_netinet_firewall_send(m);
2135162238Scsjp#endif
2136171168Smlaier	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2137240233Sglebius		free(pfse, M_PFTEMP);
2138171168Smlaier		m_freem(m);
2139171168Smlaier		return;
2140171168Smlaier	}
2141171168Smlaier	if (tag)
2142145836Smlaier		m->m_flags |= M_SKIP_FIREWALL;
2143223637Sbz	pf_mtag->tag = rtag;
2144145836Smlaier
2145171168Smlaier	if (r != NULL && r->rtableid >= 0)
2146178888Sjulian		M_SETFIB(m, r->rtableid);
2147223637Sbz
2148126258Smlaier#ifdef ALTQ
2149126258Smlaier	if (r != NULL && r->qid) {
2150171168Smlaier		pf_mtag->qid = r->qid;
2151223637Sbz
2152171168Smlaier		/* add hints for ecn */
2153171168Smlaier		pf_mtag->hdr = mtod(m, struct ip *);
2154126258Smlaier	}
2155145836Smlaier#endif /* ALTQ */
2156126258Smlaier	m->m_data += max_linkhdr;
2157126258Smlaier	m->m_pkthdr.len = m->m_len = len;
2158126258Smlaier	m->m_pkthdr.rcvif = NULL;
2159126258Smlaier	bzero(m->m_data, len);
2160126258Smlaier	switch (af) {
2161126258Smlaier#ifdef INET
2162126258Smlaier	case AF_INET:
2163126258Smlaier		h = mtod(m, struct ip *);
2164126258Smlaier
2165126258Smlaier		/* IP header fields included in the TCP checksum */
2166126258Smlaier		h->ip_p = IPPROTO_TCP;
2167126258Smlaier		h->ip_len = htons(tlen);
2168126258Smlaier		h->ip_src.s_addr = saddr->v4.s_addr;
2169126258Smlaier		h->ip_dst.s_addr = daddr->v4.s_addr;
2170126258Smlaier
2171126258Smlaier		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
2172126258Smlaier		break;
2173126258Smlaier#endif /* INET */
2174126258Smlaier#ifdef INET6
2175126258Smlaier	case AF_INET6:
2176126258Smlaier		h6 = mtod(m, struct ip6_hdr *);
2177126258Smlaier
2178126258Smlaier		/* IP header fields included in the TCP checksum */
2179126258Smlaier		h6->ip6_nxt = IPPROTO_TCP;
2180126258Smlaier		h6->ip6_plen = htons(tlen);
2181126258Smlaier		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
2182126258Smlaier		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
2183126258Smlaier
2184126258Smlaier		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
2185126258Smlaier		break;
2186126258Smlaier#endif /* INET6 */
2187126258Smlaier	}
2188126258Smlaier
2189126258Smlaier	/* TCP header */
2190126258Smlaier	th->th_sport = sport;
2191126258Smlaier	th->th_dport = dport;
2192126258Smlaier	th->th_seq = htonl(seq);
2193126258Smlaier	th->th_ack = htonl(ack);
2194126258Smlaier	th->th_off = tlen >> 2;
2195126258Smlaier	th->th_flags = flags;
2196126258Smlaier	th->th_win = htons(win);
2197126258Smlaier
2198126258Smlaier	if (mss) {
2199126258Smlaier		opt = (char *)(th + 1);
2200126258Smlaier		opt[0] = TCPOPT_MAXSEG;
2201126258Smlaier		opt[1] = 4;
2202126258Smlaier		HTONS(mss);
2203126258Smlaier		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2204126258Smlaier	}
2205126258Smlaier
2206126258Smlaier	switch (af) {
2207126258Smlaier#ifdef INET
2208126258Smlaier	case AF_INET:
2209126258Smlaier		/* TCP checksum */
2210126258Smlaier		th->th_sum = in_cksum(m, len);
2211126258Smlaier
2212126258Smlaier		/* Finish the IP header */
2213126258Smlaier		h->ip_v = 4;
2214126258Smlaier		h->ip_hl = sizeof(*h) >> 2;
2215126258Smlaier		h->ip_tos = IPTOS_LOWDELAY;
2216181803Sbz		h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
2217130613Smlaier		h->ip_len = len;
2218223637Sbz		h->ip_ttl = ttl ? ttl : V_ip_defttl;
2219126258Smlaier		h->ip_sum = 0;
2220145836Smlaier
2221240233Sglebius		pfse->pfse_type = PFSE_IP;
2222126258Smlaier		break;
2223126258Smlaier#endif /* INET */
2224126258Smlaier#ifdef INET6
2225126258Smlaier	case AF_INET6:
2226126258Smlaier		/* TCP checksum */
2227126258Smlaier		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2228126258Smlaier		    sizeof(struct ip6_hdr), tlen);
2229126258Smlaier
2230126258Smlaier		h6->ip6_vfc |= IPV6_VERSION;
2231126258Smlaier		h6->ip6_hlim = IPV6_DEFHLIM;
2232126258Smlaier
2233240233Sglebius		pfse->pfse_type = PFSE_IP6;
2234126258Smlaier		break;
2235126258Smlaier#endif /* INET6 */
2236126258Smlaier	}
2237240233Sglebius	pfse->pfse_m = m;
2238240233Sglebius	pf_send(pfse);
2239126258Smlaier}
2240126258Smlaier
2241223637Sbzstatic void
2242126258Smlaierpf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
2243126258Smlaier    struct pf_rule *r)
2244126258Smlaier{
2245240233Sglebius	struct pf_send_entry *pfse;
2246240233Sglebius	struct mbuf *m0;
2247223637Sbz	struct pf_mtag *pf_mtag;
2248126258Smlaier
2249240233Sglebius	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
2250240233Sglebius	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
2251240233Sglebius	if (pfse == NULL)
2252132280Smlaier		return;
2253240233Sglebius
2254240233Sglebius	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
2255240233Sglebius		free(pfse, M_PFTEMP);
2256223637Sbz		return;
2257240233Sglebius	}
2258223637Sbz
2259240233Sglebius	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2260240233Sglebius		free(pfse, M_PFTEMP);
2261126258Smlaier		return;
2262240233Sglebius	}
2263171168Smlaier	/* XXX: revisit */
2264171168Smlaier	m0->m_flags |= M_SKIP_FIREWALL;
2265126258Smlaier
2266171168Smlaier	if (r->rtableid >= 0)
2267178888Sjulian		M_SETFIB(m0, r->rtableid);
2268171168Smlaier
2269126258Smlaier#ifdef ALTQ
2270126258Smlaier	if (r->qid) {
2271171168Smlaier		pf_mtag->qid = r->qid;
2272171168Smlaier		/* add hints for ecn */
2273171168Smlaier		pf_mtag->hdr = mtod(m0, struct ip *);
2274126258Smlaier	}
2275145836Smlaier#endif /* ALTQ */
2276126258Smlaier
2277126258Smlaier	switch (af) {
2278126258Smlaier#ifdef INET
2279126258Smlaier	case AF_INET:
2280240233Sglebius	    {
2281240233Sglebius		struct ip *ip;
2282240233Sglebius
2283126261Smlaier		/* icmp_error() expects host byte ordering */
2284126261Smlaier		ip = mtod(m0, struct ip *);
2285126261Smlaier		NTOHS(ip->ip_len);
2286126261Smlaier		NTOHS(ip->ip_off);
2287240233Sglebius
2288240233Sglebius		pfse->pfse_type = PFSE_ICMP;
2289126258Smlaier		break;
2290240233Sglebius	    }
2291126258Smlaier#endif /* INET */
2292126258Smlaier#ifdef INET6
2293126258Smlaier	case AF_INET6:
2294240233Sglebius		pfse->pfse_type = PFSE_ICMP6;
2295126258Smlaier		break;
2296126258Smlaier#endif /* INET6 */
2297126258Smlaier	}
2298240233Sglebius	pfse->pfse_m = m0;
2299240233Sglebius	pfse->pfse_icmp_type = type;
2300240233Sglebius	pfse->pfse_icmp_code = code;
2301240233Sglebius	pf_send(pfse);
2302126258Smlaier}
2303126258Smlaier
2304126258Smlaier/*
2305126258Smlaier * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2306126258Smlaier * If n is 0, they match if they are equal. If n is != 0, they match if they
2307126258Smlaier * are different.
2308126258Smlaier */
2309126258Smlaierint
2310126258Smlaierpf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2311126258Smlaier    struct pf_addr *b, sa_family_t af)
2312126258Smlaier{
2313126258Smlaier	int	match = 0;
2314126258Smlaier
2315126258Smlaier	switch (af) {
2316126258Smlaier#ifdef INET
2317126258Smlaier	case AF_INET:
2318126258Smlaier		if ((a->addr32[0] & m->addr32[0]) ==
2319126258Smlaier		    (b->addr32[0] & m->addr32[0]))
2320126258Smlaier			match++;
2321126258Smlaier		break;
2322126258Smlaier#endif /* INET */
2323126258Smlaier#ifdef INET6
2324126258Smlaier	case AF_INET6:
2325126258Smlaier		if (((a->addr32[0] & m->addr32[0]) ==
2326126258Smlaier		     (b->addr32[0] & m->addr32[0])) &&
2327126258Smlaier		    ((a->addr32[1] & m->addr32[1]) ==
2328126258Smlaier		     (b->addr32[1] & m->addr32[1])) &&
2329126258Smlaier		    ((a->addr32[2] & m->addr32[2]) ==
2330126258Smlaier		     (b->addr32[2] & m->addr32[2])) &&
2331126258Smlaier		    ((a->addr32[3] & m->addr32[3]) ==
2332126258Smlaier		     (b->addr32[3] & m->addr32[3])))
2333126258Smlaier			match++;
2334126258Smlaier		break;
2335126258Smlaier#endif /* INET6 */
2336126258Smlaier	}
2337126258Smlaier	if (match) {
2338126258Smlaier		if (n)
2339126258Smlaier			return (0);
2340126258Smlaier		else
2341126258Smlaier			return (1);
2342126258Smlaier	} else {
2343126258Smlaier		if (n)
2344126258Smlaier			return (1);
2345126258Smlaier		else
2346126258Smlaier			return (0);
2347126258Smlaier	}
2348126258Smlaier}
2349126258Smlaier
2350223637Sbz/*
2351223637Sbz * Return 1 if b <= a <= e, otherwise return 0.
2352223637Sbz */
2353126258Smlaierint
2354223637Sbzpf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2355223637Sbz    struct pf_addr *a, sa_family_t af)
2356223637Sbz{
2357223637Sbz	switch (af) {
2358223637Sbz#ifdef INET
2359223637Sbz	case AF_INET:
2360223637Sbz		if ((a->addr32[0] < b->addr32[0]) ||
2361223637Sbz		    (a->addr32[0] > e->addr32[0]))
2362223637Sbz			return (0);
2363223637Sbz		break;
2364223637Sbz#endif /* INET */
2365223637Sbz#ifdef INET6
2366223637Sbz	case AF_INET6: {
2367223637Sbz		int	i;
2368223637Sbz
2369223637Sbz		/* check a >= b */
2370223637Sbz		for (i = 0; i < 4; ++i)
2371223637Sbz			if (a->addr32[i] > b->addr32[i])
2372223637Sbz				break;
2373223637Sbz			else if (a->addr32[i] < b->addr32[i])
2374223637Sbz				return (0);
2375223637Sbz		/* check a <= e */
2376223637Sbz		for (i = 0; i < 4; ++i)
2377223637Sbz			if (a->addr32[i] < e->addr32[i])
2378223637Sbz				break;
2379223637Sbz			else if (a->addr32[i] > e->addr32[i])
2380223637Sbz				return (0);
2381223637Sbz		break;
2382223637Sbz	}
2383223637Sbz#endif /* INET6 */
2384223637Sbz	}
2385223637Sbz	return (1);
2386223637Sbz}
2387223637Sbz
2388240233Sglebiusstatic int
2389126258Smlaierpf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2390126258Smlaier{
2391126258Smlaier	switch (op) {
2392126258Smlaier	case PF_OP_IRG:
2393126258Smlaier		return ((p > a1) && (p < a2));
2394126258Smlaier	case PF_OP_XRG:
2395126258Smlaier		return ((p < a1) || (p > a2));
2396126258Smlaier	case PF_OP_RRG:
2397126258Smlaier		return ((p >= a1) && (p <= a2));
2398126258Smlaier	case PF_OP_EQ:
2399126258Smlaier		return (p == a1);
2400126258Smlaier	case PF_OP_NE:
2401126258Smlaier		return (p != a1);
2402126258Smlaier	case PF_OP_LT:
2403126258Smlaier		return (p < a1);
2404126258Smlaier	case PF_OP_LE:
2405126258Smlaier		return (p <= a1);
2406126258Smlaier	case PF_OP_GT:
2407126258Smlaier		return (p > a1);
2408126258Smlaier	case PF_OP_GE:
2409126258Smlaier		return (p >= a1);
2410126258Smlaier	}
2411126258Smlaier	return (0); /* never reached */
2412126258Smlaier}
2413126258Smlaier
2414126258Smlaierint
2415126258Smlaierpf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2416126258Smlaier{
2417126258Smlaier	NTOHS(a1);
2418126258Smlaier	NTOHS(a2);
2419126258Smlaier	NTOHS(p);
2420126258Smlaier	return (pf_match(op, a1, a2, p));
2421126258Smlaier}
2422126258Smlaier
2423240233Sglebiusstatic int
2424126258Smlaierpf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2425126258Smlaier{
2426126258Smlaier	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2427126258Smlaier		return (0);
2428126258Smlaier	return (pf_match(op, a1, a2, u));
2429126258Smlaier}
2430126258Smlaier
2431240233Sglebiusstatic int
2432126258Smlaierpf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2433126258Smlaier{
2434126258Smlaier	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2435126258Smlaier		return (0);
2436126258Smlaier	return (pf_match(op, a1, a2, g));
2437126258Smlaier}
2438126258Smlaier
2439223637Sbzint
2440240233Sglebiuspf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
2441171168Smlaier{
2442171168Smlaier	if (*tag == -1)
2443240233Sglebius		*tag = mtag;
2444171168Smlaier
2445126258Smlaier	return ((!r->match_tag_not && r->match_tag == *tag) ||
2446126258Smlaier	    (r->match_tag_not && r->match_tag != *tag));
2447126258Smlaier}
2448126258Smlaier
2449126258Smlaierint
2450240233Sglebiuspf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
2451126258Smlaier{
2452126258Smlaier
2453240233Sglebius	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
2454126258Smlaier
2455240233Sglebius	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
2456240233Sglebius		return (ENOMEM);
2457240233Sglebius
2458240233Sglebius	pd->pf_mtag->tag = tag;
2459240233Sglebius
2460126258Smlaier	return (0);
2461126258Smlaier}
2462126258Smlaier
2463223637Sbzvoid
2464145836Smlaierpf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2465223637Sbz    struct pf_rule **r, struct pf_rule **a, int *match)
2466145836Smlaier{
2467145836Smlaier	struct pf_anchor_stackframe	*f;
2468126258Smlaier
2469240233Sglebius	PF_RULES_RASSERT();
2470240233Sglebius
2471171168Smlaier	(*r)->anchor->match = 0;
2472171168Smlaier	if (match)
2473171168Smlaier		*match = 0;
2474223637Sbz	if (*depth >= sizeof(V_pf_anchor_stack) /
2475223637Sbz	    sizeof(V_pf_anchor_stack[0])) {
2476145836Smlaier		printf("pf_step_into_anchor: stack overflow\n");
2477145836Smlaier		*r = TAILQ_NEXT(*r, entries);
2478145836Smlaier		return;
2479145836Smlaier	} else if (*depth == 0 && a != NULL)
2480145836Smlaier		*a = *r;
2481223637Sbz	f = V_pf_anchor_stack + (*depth)++;
2482145836Smlaier	f->rs = *rs;
2483145836Smlaier	f->r = *r;
2484145836Smlaier	if ((*r)->anchor_wildcard) {
2485145836Smlaier		f->parent = &(*r)->anchor->children;
2486145836Smlaier		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2487145836Smlaier		    NULL) {
2488145836Smlaier			*r = NULL;
2489145836Smlaier			return;
2490145836Smlaier		}
2491145836Smlaier		*rs = &f->child->ruleset;
2492145836Smlaier	} else {
2493145836Smlaier		f->parent = NULL;
2494145836Smlaier		f->child = NULL;
2495145836Smlaier		*rs = &(*r)->anchor->ruleset;
2496145836Smlaier	}
2497145836Smlaier	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2498145836Smlaier}
2499126258Smlaier
2500171168Smlaierint
2501145836Smlaierpf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2502171168Smlaier    struct pf_rule **r, struct pf_rule **a, int *match)
2503145836Smlaier{
2504145836Smlaier	struct pf_anchor_stackframe	*f;
2505171168Smlaier	int quick = 0;
2506145836Smlaier
2507240233Sglebius	PF_RULES_RASSERT();
2508240233Sglebius
2509145836Smlaier	do {
2510145836Smlaier		if (*depth <= 0)
2511145836Smlaier			break;
2512223637Sbz		f = V_pf_anchor_stack + *depth - 1;
2513145836Smlaier		if (f->parent != NULL && f->child != NULL) {
2514171168Smlaier			if (f->child->match ||
2515171168Smlaier			    (match != NULL && *match)) {
2516171168Smlaier				f->r->anchor->match = 1;
2517171168Smlaier				*match = 0;
2518171168Smlaier			}
2519145836Smlaier			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2520145836Smlaier			if (f->child != NULL) {
2521145836Smlaier				*rs = &f->child->ruleset;
2522145836Smlaier				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2523145836Smlaier				if (*r == NULL)
2524145836Smlaier					continue;
2525145836Smlaier				else
2526145836Smlaier					break;
2527145836Smlaier			}
2528145836Smlaier		}
2529145836Smlaier		(*depth)--;
2530145836Smlaier		if (*depth == 0 && a != NULL)
2531145836Smlaier			*a = NULL;
2532145836Smlaier		*rs = f->rs;
2533223637Sbz		if (f->r->anchor->match || (match != NULL && *match))
2534171168Smlaier			quick = f->r->quick;
2535145836Smlaier		*r = TAILQ_NEXT(f->r, entries);
2536145836Smlaier	} while (*r == NULL);
2537171168Smlaier
2538171168Smlaier	return (quick);
2539145836Smlaier}
2540145836Smlaier
2541126258Smlaier#ifdef INET6
2542126258Smlaiervoid
2543126258Smlaierpf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2544126258Smlaier    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2545126258Smlaier{
2546126258Smlaier	switch (af) {
2547126258Smlaier#ifdef INET
2548126258Smlaier	case AF_INET:
2549126258Smlaier		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2550126258Smlaier		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2551126258Smlaier		break;
2552126258Smlaier#endif /* INET */
2553126258Smlaier	case AF_INET6:
2554126258Smlaier		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2555126258Smlaier		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2556126258Smlaier		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2557126258Smlaier		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2558126258Smlaier		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2559126258Smlaier		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2560126258Smlaier		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2561126258Smlaier		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2562126258Smlaier		break;
2563126258Smlaier	}
2564126258Smlaier}
2565126258Smlaier
2566126258Smlaiervoid
2567130613Smlaierpf_addr_inc(struct pf_addr *addr, sa_family_t af)
2568126258Smlaier{
2569126258Smlaier	switch (af) {
2570126258Smlaier#ifdef INET
2571126258Smlaier	case AF_INET:
2572126258Smlaier		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2573126258Smlaier		break;
2574126258Smlaier#endif /* INET */
2575126258Smlaier	case AF_INET6:
2576126258Smlaier		if (addr->addr32[3] == 0xffffffff) {
2577126258Smlaier			addr->addr32[3] = 0;
2578126258Smlaier			if (addr->addr32[2] == 0xffffffff) {
2579126258Smlaier				addr->addr32[2] = 0;
2580126258Smlaier				if (addr->addr32[1] == 0xffffffff) {
2581126258Smlaier					addr->addr32[1] = 0;
2582126258Smlaier					addr->addr32[0] =
2583126258Smlaier					    htonl(ntohl(addr->addr32[0]) + 1);
2584126258Smlaier				} else
2585126258Smlaier					addr->addr32[1] =
2586126258Smlaier					    htonl(ntohl(addr->addr32[1]) + 1);
2587126258Smlaier			} else
2588126258Smlaier				addr->addr32[2] =
2589126258Smlaier				    htonl(ntohl(addr->addr32[2]) + 1);
2590126258Smlaier		} else
2591126258Smlaier			addr->addr32[3] =
2592126258Smlaier			    htonl(ntohl(addr->addr32[3]) + 1);
2593126258Smlaier		break;
2594126258Smlaier	}
2595126258Smlaier}
2596126258Smlaier#endif /* INET6 */
2597126258Smlaier
2598126258Smlaierint
2599240233Sglebiuspf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
2600126258Smlaier{
2601126258Smlaier	struct pf_addr		*saddr, *daddr;
2602126258Smlaier	u_int16_t		 sport, dport;
2603126261Smlaier	struct inpcbinfo	*pi;
2604126258Smlaier	struct inpcb		*inp;
2605126258Smlaier
2606171168Smlaier	pd->lookup.uid = UID_MAX;
2607171168Smlaier	pd->lookup.gid = GID_MAX;
2608223637Sbz
2609130613Smlaier	switch (pd->proto) {
2610126258Smlaier	case IPPROTO_TCP:
2611171168Smlaier		if (pd->hdr.tcp == NULL)
2612171168Smlaier			return (-1);
2613126258Smlaier		sport = pd->hdr.tcp->th_sport;
2614126258Smlaier		dport = pd->hdr.tcp->th_dport;
2615181803Sbz		pi = &V_tcbinfo;
2616126258Smlaier		break;
2617126258Smlaier	case IPPROTO_UDP:
2618171168Smlaier		if (pd->hdr.udp == NULL)
2619171168Smlaier			return (-1);
2620126258Smlaier		sport = pd->hdr.udp->uh_sport;
2621126258Smlaier		dport = pd->hdr.udp->uh_dport;
2622181803Sbz		pi = &V_udbinfo;
2623126258Smlaier		break;
2624126258Smlaier	default:
2625171168Smlaier		return (-1);
2626126258Smlaier	}
2627126258Smlaier	if (direction == PF_IN) {
2628126258Smlaier		saddr = pd->src;
2629126258Smlaier		daddr = pd->dst;
2630126258Smlaier	} else {
2631126258Smlaier		u_int16_t	p;
2632126258Smlaier
2633126258Smlaier		p = sport;
2634126258Smlaier		sport = dport;
2635126258Smlaier		dport = p;
2636126258Smlaier		saddr = pd->dst;
2637126258Smlaier		daddr = pd->src;
2638126258Smlaier	}
2639130613Smlaier	switch (pd->af) {
2640145836Smlaier#ifdef INET
2641126258Smlaier	case AF_INET:
2642240233Sglebius		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
2643240233Sglebius		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
2644126261Smlaier		if (inp == NULL) {
2645240233Sglebius			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
2646222488Srwatson			   daddr->v4, dport, INPLOOKUP_WILDCARD |
2647240233Sglebius			   INPLOOKUP_RLOCKPCB, NULL, m);
2648222488Srwatson			if (inp == NULL)
2649171168Smlaier				return (-1);
2650126261Smlaier		}
2651126258Smlaier		break;
2652145836Smlaier#endif /* INET */
2653126258Smlaier#ifdef INET6
2654126258Smlaier	case AF_INET6:
2655240233Sglebius		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
2656240233Sglebius		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
2657126261Smlaier		if (inp == NULL) {
2658240233Sglebius			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
2659222488Srwatson			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
2660240233Sglebius			    INPLOOKUP_RLOCKPCB, NULL, m);
2661222488Srwatson			if (inp == NULL)
2662171168Smlaier				return (-1);
2663126261Smlaier		}
2664126258Smlaier		break;
2665126258Smlaier#endif /* INET6 */
2666126258Smlaier
2667126258Smlaier	default:
2668171168Smlaier		return (-1);
2669126258Smlaier	}
2670222488Srwatson	INP_RLOCK_ASSERT(inp);
2671183606Sbz	pd->lookup.uid = inp->inp_cred->cr_uid;
2672183606Sbz	pd->lookup.gid = inp->inp_cred->cr_groups[0];
2673222488Srwatson	INP_RUNLOCK(inp);
2674240233Sglebius
2675126258Smlaier	return (1);
2676126258Smlaier}
2677126258Smlaier
2678240233Sglebiusstatic u_int8_t
2679126258Smlaierpf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2680126258Smlaier{
2681126258Smlaier	int		 hlen;
2682126258Smlaier	u_int8_t	 hdr[60];
2683126258Smlaier	u_int8_t	*opt, optlen;
2684126258Smlaier	u_int8_t	 wscale = 0;
2685126258Smlaier
2686126258Smlaier	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2687126258Smlaier	if (hlen <= sizeof(struct tcphdr))
2688126258Smlaier		return (0);
2689126258Smlaier	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2690126258Smlaier		return (0);
2691126258Smlaier	opt = hdr + sizeof(struct tcphdr);
2692126258Smlaier	hlen -= sizeof(struct tcphdr);
2693126258Smlaier	while (hlen >= 3) {
2694126258Smlaier		switch (*opt) {
2695126258Smlaier		case TCPOPT_EOL:
2696126258Smlaier		case TCPOPT_NOP:
2697126258Smlaier			++opt;
2698126258Smlaier			--hlen;
2699126258Smlaier			break;
2700126258Smlaier		case TCPOPT_WINDOW:
2701126258Smlaier			wscale = opt[2];
2702126258Smlaier			if (wscale > TCP_MAX_WINSHIFT)
2703126258Smlaier				wscale = TCP_MAX_WINSHIFT;
2704126258Smlaier			wscale |= PF_WSCALE_FLAG;
2705130613Smlaier			/* FALLTHROUGH */
2706126258Smlaier		default:
2707126258Smlaier			optlen = opt[1];
2708126258Smlaier			if (optlen < 2)
2709126258Smlaier				optlen = 2;
2710126258Smlaier			hlen -= optlen;
2711126258Smlaier			opt += optlen;
2712130613Smlaier			break;
2713126258Smlaier		}
2714126258Smlaier	}
2715126258Smlaier	return (wscale);
2716126258Smlaier}
2717126258Smlaier
2718240233Sglebiusstatic u_int16_t
2719126258Smlaierpf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2720126258Smlaier{
2721126258Smlaier	int		 hlen;
2722126258Smlaier	u_int8_t	 hdr[60];
2723126258Smlaier	u_int8_t	*opt, optlen;
2724181803Sbz	u_int16_t	 mss = V_tcp_mssdflt;
2725126258Smlaier
2726126258Smlaier	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2727126258Smlaier	if (hlen <= sizeof(struct tcphdr))
2728126258Smlaier		return (0);
2729126258Smlaier	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2730126258Smlaier		return (0);
2731126258Smlaier	opt = hdr + sizeof(struct tcphdr);
2732126258Smlaier	hlen -= sizeof(struct tcphdr);
2733126258Smlaier	while (hlen >= TCPOLEN_MAXSEG) {
2734126258Smlaier		switch (*opt) {
2735126258Smlaier		case TCPOPT_EOL:
2736126258Smlaier		case TCPOPT_NOP:
2737126258Smlaier			++opt;
2738126258Smlaier			--hlen;
2739126258Smlaier			break;
2740126258Smlaier		case TCPOPT_MAXSEG:
2741126258Smlaier			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2742145030Sglebius			NTOHS(mss);
2743130613Smlaier			/* FALLTHROUGH */
2744126258Smlaier		default:
2745126258Smlaier			optlen = opt[1];
2746126258Smlaier			if (optlen < 2)
2747126258Smlaier				optlen = 2;
2748126258Smlaier			hlen -= optlen;
2749126258Smlaier			opt += optlen;
2750130613Smlaier			break;
2751126258Smlaier		}
2752126258Smlaier	}
2753126258Smlaier	return (mss);
2754126258Smlaier}
2755126258Smlaier
2756240233Sglebiusstatic u_int16_t
2757231852Sbzpf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
2758126258Smlaier{
2759126258Smlaier#ifdef INET
2760126258Smlaier	struct sockaddr_in	*dst;
2761126258Smlaier	struct route		 ro;
2762126258Smlaier#endif /* INET */
2763126258Smlaier#ifdef INET6
2764126258Smlaier	struct sockaddr_in6	*dst6;
2765126258Smlaier	struct route_in6	 ro6;
2766126258Smlaier#endif /* INET6 */
2767126258Smlaier	struct rtentry		*rt = NULL;
2768223637Sbz	int			 hlen = 0;
2769181803Sbz	u_int16_t		 mss = V_tcp_mssdflt;
2770126258Smlaier
2771126258Smlaier	switch (af) {
2772126258Smlaier#ifdef INET
2773126258Smlaier	case AF_INET:
2774126258Smlaier		hlen = sizeof(struct ip);
2775126258Smlaier		bzero(&ro, sizeof(ro));
2776126258Smlaier		dst = (struct sockaddr_in *)&ro.ro_dst;
2777126258Smlaier		dst->sin_family = AF_INET;
2778126258Smlaier		dst->sin_len = sizeof(*dst);
2779126258Smlaier		dst->sin_addr = addr->v4;
2780231852Sbz		in_rtalloc_ign(&ro, 0, rtableid);
2781126258Smlaier		rt = ro.ro_rt;
2782126258Smlaier		break;
2783126258Smlaier#endif /* INET */
2784126258Smlaier#ifdef INET6
2785126258Smlaier	case AF_INET6:
2786126258Smlaier		hlen = sizeof(struct ip6_hdr);
2787126258Smlaier		bzero(&ro6, sizeof(ro6));
2788126258Smlaier		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2789126258Smlaier		dst6->sin6_family = AF_INET6;
2790126258Smlaier		dst6->sin6_len = sizeof(*dst6);
2791126258Smlaier		dst6->sin6_addr = addr->v6;
2792231852Sbz		in6_rtalloc_ign(&ro6, 0, rtableid);
2793126258Smlaier		rt = ro6.ro_rt;
2794126258Smlaier		break;
2795126258Smlaier#endif /* INET6 */
2796126258Smlaier	}
2797126258Smlaier
2798126258Smlaier	if (rt && rt->rt_ifp) {
2799126258Smlaier		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2800181803Sbz		mss = max(V_tcp_mssdflt, mss);
2801126258Smlaier		RTFREE(rt);
2802126258Smlaier	}
2803126258Smlaier	mss = min(mss, offer);
2804126258Smlaier	mss = max(mss, 64);		/* sanity - at least max opt space */
2805126258Smlaier	return (mss);
2806126258Smlaier}
2807126258Smlaier
2808240233Sglebiusstatic void
2809126258Smlaierpf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2810126258Smlaier{
2811126258Smlaier	struct pf_rule *r = s->rule.ptr;
2812223637Sbz	struct pf_src_node *sn = NULL;
2813126258Smlaier
2814130613Smlaier	s->rt_kif = NULL;
2815126258Smlaier	if (!r->rt || r->rt == PF_FASTROUTE)
2816126258Smlaier		return;
2817223637Sbz	switch (s->key[PF_SK_WIRE]->af) {
2818126258Smlaier#ifdef INET
2819126258Smlaier	case AF_INET:
2820223637Sbz		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
2821130613Smlaier		s->rt_kif = r->rpool.cur->kif;
2822126258Smlaier		break;
2823126258Smlaier#endif /* INET */
2824126258Smlaier#ifdef INET6
2825126258Smlaier	case AF_INET6:
2826223637Sbz		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
2827130613Smlaier		s->rt_kif = r->rpool.cur->kif;
2828126258Smlaier		break;
2829126258Smlaier#endif /* INET6 */
2830126258Smlaier	}
2831126258Smlaier}
2832126258Smlaier
2833240233Sglebiusstatic u_int32_t
2834223637Sbzpf_tcp_iss(struct pf_pdesc *pd)
2835223637Sbz{
2836223637Sbz	MD5_CTX ctx;
2837223637Sbz	u_int32_t digest[4];
2838223637Sbz
2839223637Sbz	if (V_pf_tcp_secret_init == 0) {
2840223637Sbz		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
2841223637Sbz		MD5Init(&V_pf_tcp_secret_ctx);
2842223637Sbz		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
2843223637Sbz		    sizeof(V_pf_tcp_secret));
2844223637Sbz		V_pf_tcp_secret_init = 1;
2845223637Sbz	}
2846223637Sbz
2847223637Sbz	ctx = V_pf_tcp_secret_ctx;
2848223637Sbz
2849223637Sbz	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
2850223637Sbz	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
2851223637Sbz	if (pd->af == AF_INET6) {
2852223637Sbz		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
2853223637Sbz		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
2854223637Sbz	} else {
2855223637Sbz		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
2856223637Sbz		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
2857223637Sbz	}
2858223637Sbz	MD5Final((u_char *)digest, &ctx);
2859223637Sbz	V_pf_tcp_iss_off += 4096;
2860223637Sbz#define	ISN_RANDOM_INCREMENT (4096 - 1)
2861223637Sbz	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
2862223637Sbz	    V_pf_tcp_iss_off);
2863223637Sbz#undef	ISN_RANDOM_INCREMENT
2864223637Sbz}
2865223637Sbz
2866240233Sglebiusstatic int
2867223637Sbzpf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
2868240233Sglebius    struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
2869240233Sglebius    struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
2870126258Smlaier{
2871130613Smlaier	struct pf_rule		*nr = NULL;
2872240233Sglebius	struct pf_addr		* const saddr = pd->src;
2873240233Sglebius	struct pf_addr		* const daddr = pd->dst;
2874126258Smlaier	sa_family_t		 af = pd->af;
2875126258Smlaier	struct pf_rule		*r, *a = NULL;
2876126258Smlaier	struct pf_ruleset	*ruleset = NULL;
2877130613Smlaier	struct pf_src_node	*nsn = NULL;
2878223637Sbz	struct tcphdr		*th = pd->hdr.tcp;
2879223637Sbz	struct pf_state_key	*sk = NULL, *nk = NULL;
2880126258Smlaier	u_short			 reason;
2881223637Sbz	int			 rewrite = 0, hdrlen = 0;
2882171168Smlaier	int			 tag = -1, rtableid = -1;
2883145836Smlaier	int			 asd = 0;
2884171168Smlaier	int			 match = 0;
2885223637Sbz	int			 state_icmp = 0;
2886223637Sbz	u_int16_t		 sport = 0, dport = 0;
2887223637Sbz	u_int16_t		 bproto_sum = 0, bip_sum = 0;
2888223637Sbz	u_int8_t		 icmptype = 0, icmpcode = 0;
2889126258Smlaier
2890240233Sglebius	PF_RULES_RASSERT();
2891223637Sbz
2892240233Sglebius	if (inp != NULL) {
2893240233Sglebius		INP_LOCK_ASSERT(inp);
2894240233Sglebius		pd->lookup.uid = inp->inp_cred->cr_uid;
2895240233Sglebius		pd->lookup.gid = inp->inp_cred->cr_groups[0];
2896240233Sglebius		pd->lookup.done = 1;
2897145836Smlaier	}
2898145836Smlaier
2899223637Sbz	switch (pd->proto) {
2900223637Sbz	case IPPROTO_TCP:
2901223637Sbz		sport = th->th_sport;
2902223637Sbz		dport = th->th_dport;
2903223637Sbz		hdrlen = sizeof(*th);
2904223637Sbz		break;
2905223637Sbz	case IPPROTO_UDP:
2906223637Sbz		sport = pd->hdr.udp->uh_sport;
2907223637Sbz		dport = pd->hdr.udp->uh_dport;
2908223637Sbz		hdrlen = sizeof(*pd->hdr.udp);
2909223637Sbz		break;
2910223637Sbz#ifdef INET
2911223637Sbz	case IPPROTO_ICMP:
2912223637Sbz		if (pd->af != AF_INET)
2913223637Sbz			break;
2914223637Sbz		sport = dport = pd->hdr.icmp->icmp_id;
2915223637Sbz		hdrlen = sizeof(*pd->hdr.icmp);
2916223637Sbz		icmptype = pd->hdr.icmp->icmp_type;
2917223637Sbz		icmpcode = pd->hdr.icmp->icmp_code;
2918223637Sbz
2919223637Sbz		if (icmptype == ICMP_UNREACH ||
2920223637Sbz		    icmptype == ICMP_SOURCEQUENCH ||
2921223637Sbz		    icmptype == ICMP_REDIRECT ||
2922223637Sbz		    icmptype == ICMP_TIMXCEED ||
2923223637Sbz		    icmptype == ICMP_PARAMPROB)
2924223637Sbz			state_icmp++;
2925223637Sbz		break;
2926223637Sbz#endif /* INET */
2927223637Sbz#ifdef INET6
2928223637Sbz	case IPPROTO_ICMPV6:
2929223637Sbz		if (af != AF_INET6)
2930223637Sbz			break;
2931223637Sbz		sport = dport = pd->hdr.icmp6->icmp6_id;
2932223637Sbz		hdrlen = sizeof(*pd->hdr.icmp6);
2933223637Sbz		icmptype = pd->hdr.icmp6->icmp6_type;
2934223637Sbz		icmpcode = pd->hdr.icmp6->icmp6_code;
2935223637Sbz
2936223637Sbz		if (icmptype == ICMP6_DST_UNREACH ||
2937223637Sbz		    icmptype == ICMP6_PACKET_TOO_BIG ||
2938223637Sbz		    icmptype == ICMP6_TIME_EXCEEDED ||
2939223637Sbz		    icmptype == ICMP6_PARAM_PROB)
2940223637Sbz			state_icmp++;
2941223637Sbz		break;
2942223637Sbz#endif /* INET6 */
2943223637Sbz	default:
2944223637Sbz		sport = dport = hdrlen = 0;
2945223637Sbz		break;
2946223637Sbz	}
2947223637Sbz
2948126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2949126258Smlaier
2950223637Sbz	/* check packet for BINAT/NAT/RDR */
2951240233Sglebius	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
2952240233Sglebius	    &nk, saddr, daddr, sport, dport)) != NULL) {
2953240233Sglebius		KASSERT(sk != NULL, ("%s: null sk", __func__));
2954240233Sglebius		KASSERT(nk != NULL, ("%s: null nk", __func__));
2955223637Sbz
2956223637Sbz		if (pd->ip_sum)
2957223637Sbz			bip_sum = *pd->ip_sum;
2958223637Sbz
2959223637Sbz		switch (pd->proto) {
2960223637Sbz		case IPPROTO_TCP:
2961223637Sbz			bproto_sum = th->th_sum;
2962223637Sbz			pd->proto_sum = &th->th_sum;
2963223637Sbz
2964223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
2965223637Sbz			    nk->port[pd->sidx] != sport) {
2966223637Sbz				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2967223637Sbz				    &th->th_sum, &nk->addr[pd->sidx],
2968223637Sbz				    nk->port[pd->sidx], 0, af);
2969223637Sbz				pd->sport = &th->th_sport;
2970223637Sbz				sport = th->th_sport;
2971223637Sbz			}
2972223637Sbz
2973223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
2974223637Sbz			    nk->port[pd->didx] != dport) {
2975223637Sbz				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2976223637Sbz				    &th->th_sum, &nk->addr[pd->didx],
2977223637Sbz				    nk->port[pd->didx], 0, af);
2978223637Sbz				dport = th->th_dport;
2979223637Sbz				pd->dport = &th->th_dport;
2980223637Sbz			}
2981126258Smlaier			rewrite++;
2982223637Sbz			break;
2983223637Sbz		case IPPROTO_UDP:
2984223637Sbz			bproto_sum = pd->hdr.udp->uh_sum;
2985223637Sbz			pd->proto_sum = &pd->hdr.udp->uh_sum;
2986223637Sbz
2987223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
2988223637Sbz			    nk->port[pd->sidx] != sport) {
2989223637Sbz				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
2990223637Sbz				    pd->ip_sum, &pd->hdr.udp->uh_sum,
2991223637Sbz				    &nk->addr[pd->sidx],
2992223637Sbz				    nk->port[pd->sidx], 1, af);
2993223637Sbz				sport = pd->hdr.udp->uh_sport;
2994223637Sbz				pd->sport = &pd->hdr.udp->uh_sport;
2995223637Sbz			}
2996223637Sbz
2997223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
2998223637Sbz			    nk->port[pd->didx] != dport) {
2999223637Sbz				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3000223637Sbz				    pd->ip_sum, &pd->hdr.udp->uh_sum,
3001223637Sbz				    &nk->addr[pd->didx],
3002223637Sbz				    nk->port[pd->didx], 1, af);
3003223637Sbz				dport = pd->hdr.udp->uh_dport;
3004223637Sbz				pd->dport = &pd->hdr.udp->uh_dport;
3005223637Sbz			}
3006223637Sbz			rewrite++;
3007223637Sbz			break;
3008223637Sbz#ifdef INET
3009223637Sbz		case IPPROTO_ICMP:
3010223637Sbz			nk->port[0] = nk->port[1];
3011223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
3012223637Sbz				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3013223637Sbz				    nk->addr[pd->sidx].v4.s_addr, 0);
3014223637Sbz
3015223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3016223637Sbz				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3017223637Sbz				    nk->addr[pd->didx].v4.s_addr, 0);
3018223637Sbz
3019223637Sbz			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
3020223637Sbz				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3021223637Sbz				    pd->hdr.icmp->icmp_cksum, sport,
3022223637Sbz				    nk->port[1], 0);
3023223637Sbz				pd->hdr.icmp->icmp_id = nk->port[1];
3024223637Sbz				pd->sport = &pd->hdr.icmp->icmp_id;
3025223637Sbz			}
3026223637Sbz			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3027223637Sbz			break;
3028223637Sbz#endif /* INET */
3029223637Sbz#ifdef INET6
3030223637Sbz		case IPPROTO_ICMPV6:
3031223637Sbz			nk->port[0] = nk->port[1];
3032223637Sbz			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
3033223637Sbz				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3034223637Sbz				    &nk->addr[pd->sidx], 0);
3035223637Sbz
3036223637Sbz			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3037223637Sbz				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3038223637Sbz				    &nk->addr[pd->didx], 0);
3039223637Sbz			rewrite++;
3040223637Sbz			break;
3041223637Sbz#endif /* INET */
3042223637Sbz		default:
3043223637Sbz			switch (af) {
3044223637Sbz#ifdef INET
3045223637Sbz			case AF_INET:
3046223637Sbz				if (PF_ANEQ(saddr,
3047223637Sbz				    &nk->addr[pd->sidx], AF_INET))
3048223637Sbz					pf_change_a(&saddr->v4.s_addr,
3049223637Sbz					    pd->ip_sum,
3050223637Sbz					    nk->addr[pd->sidx].v4.s_addr, 0);
3051223637Sbz
3052223637Sbz				if (PF_ANEQ(daddr,
3053223637Sbz				    &nk->addr[pd->didx], AF_INET))
3054223637Sbz					pf_change_a(&daddr->v4.s_addr,
3055223637Sbz					    pd->ip_sum,
3056223637Sbz					    nk->addr[pd->didx].v4.s_addr, 0);
3057223637Sbz				break;
3058223637Sbz#endif /* INET */
3059223637Sbz#ifdef INET6
3060223637Sbz			case AF_INET6:
3061223637Sbz				if (PF_ANEQ(saddr,
3062223637Sbz				    &nk->addr[pd->sidx], AF_INET6))
3063223637Sbz					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
3064223637Sbz
3065223637Sbz				if (PF_ANEQ(daddr,
3066223637Sbz				    &nk->addr[pd->didx], AF_INET6))
3067223637Sbz					PF_ACPY(saddr, &nk->addr[pd->didx], af);
3068223637Sbz				break;
3069223637Sbz#endif /* INET */
3070223637Sbz			}
3071223637Sbz			break;
3072126258Smlaier		}
3073223637Sbz		if (nr->natpass)
3074223637Sbz			r = NULL;
3075223637Sbz		pd->nat_rule = nr;
3076126258Smlaier	}
3077126258Smlaier
3078126258Smlaier	while (r != NULL) {
3079126258Smlaier		r->evaluations++;
3080171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3081126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
3082126258Smlaier		else if (r->direction && r->direction != direction)
3083126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
3084126258Smlaier		else if (r->af && r->af != af)
3085126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
3086223637Sbz		else if (r->proto && r->proto != pd->proto)
3087126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
3088171168Smlaier		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3089231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
3090126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3091223637Sbz		/* tcp/udp only. port_op always 0 in other cases */
3092126258Smlaier		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3093223637Sbz		    r->src.port[0], r->src.port[1], sport))
3094126258Smlaier			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3095171168Smlaier		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3096231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
3097126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3098223637Sbz		/* tcp/udp only. port_op always 0 in other cases */
3099126258Smlaier		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3100223637Sbz		    r->dst.port[0], r->dst.port[1], dport))
3101126258Smlaier			r = r->skip[PF_SKIP_DST_PORT].ptr;
3102223637Sbz		/* icmp only. type always 0 in other cases */
3103223637Sbz		else if (r->type && r->type != icmptype + 1)
3104223637Sbz			r = TAILQ_NEXT(r, entries);
3105223637Sbz		/* icmp only. type always 0 in other cases */
3106223637Sbz		else if (r->code && r->code != icmpcode + 1)
3107223637Sbz			r = TAILQ_NEXT(r, entries);
3108171168Smlaier		else if (r->tos && !(r->tos == pd->tos))
3109126258Smlaier			r = TAILQ_NEXT(r, entries);
3110126258Smlaier		else if (r->rule_flag & PFRULE_FRAGMENT)
3111126258Smlaier			r = TAILQ_NEXT(r, entries);
3112223637Sbz		else if (pd->proto == IPPROTO_TCP &&
3113223637Sbz		    (r->flagset & th->th_flags) != r->flags)
3114126258Smlaier			r = TAILQ_NEXT(r, entries);
3115223637Sbz		/* tcp/udp only. uid.op always 0 in other cases */
3116171168Smlaier		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3117240233Sglebius		    pf_socket_lookup(direction, pd, m), 1)) &&
3118126258Smlaier		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3119171168Smlaier		    pd->lookup.uid))
3120126258Smlaier			r = TAILQ_NEXT(r, entries);
3121223637Sbz		/* tcp/udp only. gid.op always 0 in other cases */
3122171168Smlaier		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3123240233Sglebius		    pf_socket_lookup(direction, pd, m), 1)) &&
3124126258Smlaier		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3125171168Smlaier		    pd->lookup.gid))
3126126258Smlaier			r = TAILQ_NEXT(r, entries);
3127223637Sbz		else if (r->prob &&
3128223637Sbz		    r->prob <= arc4random())
3129126258Smlaier			r = TAILQ_NEXT(r, entries);
3130240233Sglebius		else if (r->match_tag && !pf_match_tag(m, r, &tag,
3131240233Sglebius		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
3132126258Smlaier			r = TAILQ_NEXT(r, entries);
3133223637Sbz		else if (r->os_fingerprint != PF_OSFP_ANY &&
3134223637Sbz		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3135223637Sbz		    pf_osfp_fingerprint(pd, m, off, th),
3136223637Sbz		    r->os_fingerprint)))
3137126258Smlaier			r = TAILQ_NEXT(r, entries);
3138126258Smlaier		else {
3139126258Smlaier			if (r->tag)
3140126258Smlaier				tag = r->tag;
3141171168Smlaier			if (r->rtableid >= 0)
3142171168Smlaier				rtableid = r->rtableid;
3143126258Smlaier			if (r->anchor == NULL) {
3144171168Smlaier				match = 1;
3145126258Smlaier				*rm = r;
3146126258Smlaier				*am = a;
3147126258Smlaier				*rsm = ruleset;
3148126258Smlaier				if ((*rm)->quick)
3149126258Smlaier					break;
3150126258Smlaier				r = TAILQ_NEXT(r, entries);
3151126258Smlaier			} else
3152145836Smlaier				pf_step_into_anchor(&asd, &ruleset,
3153171168Smlaier				    PF_RULESET_FILTER, &r, &a, &match);
3154126258Smlaier		}
3155171168Smlaier		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3156171168Smlaier		    PF_RULESET_FILTER, &r, &a, &match))
3157171168Smlaier			break;
3158126258Smlaier	}
3159126258Smlaier	r = *rm;
3160126258Smlaier	a = *am;
3161126258Smlaier	ruleset = *rsm;
3162126258Smlaier
3163126258Smlaier	REASON_SET(&reason, PFRES_MATCH);
3164126258Smlaier
3165223637Sbz	if (r->log || (nr != NULL && nr->log)) {
3166126258Smlaier		if (rewrite)
3167223637Sbz			m_copyback(m, off, hdrlen, pd->hdr.any);
3168240233Sglebius		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
3169240233Sglebius		    ruleset, pd, 1);
3170126258Smlaier	}
3171126258Smlaier
3172126258Smlaier	if ((r->action == PF_DROP) &&
3173126258Smlaier	    ((r->rule_flag & PFRULE_RETURNRST) ||
3174126258Smlaier	    (r->rule_flag & PFRULE_RETURNICMP) ||
3175126258Smlaier	    (r->rule_flag & PFRULE_RETURN))) {
3176126258Smlaier		/* undo NAT changes, if they have taken place */
3177130613Smlaier		if (nr != NULL) {
3178223637Sbz			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3179223637Sbz			PF_ACPY(daddr, &sk->addr[pd->didx], af);
3180223637Sbz			if (pd->sport)
3181223637Sbz				*pd->sport = sk->port[pd->sidx];
3182223637Sbz			if (pd->dport)
3183223637Sbz				*pd->dport = sk->port[pd->didx];
3184223637Sbz			if (pd->proto_sum)
3185223637Sbz				*pd->proto_sum = bproto_sum;
3186223637Sbz			if (pd->ip_sum)
3187223637Sbz				*pd->ip_sum = bip_sum;
3188223637Sbz			m_copyback(m, off, hdrlen, pd->hdr.any);
3189126258Smlaier		}
3190223637Sbz		if (pd->proto == IPPROTO_TCP &&
3191223637Sbz		    ((r->rule_flag & PFRULE_RETURNRST) ||
3192126258Smlaier		    (r->rule_flag & PFRULE_RETURN)) &&
3193126258Smlaier		    !(th->th_flags & TH_RST)) {
3194223637Sbz			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
3195223637Sbz			int		 len = 0;
3196223637Sbz#ifdef INET
3197223637Sbz			struct ip	*h4;
3198223637Sbz#endif
3199223637Sbz#ifdef INET6
3200223637Sbz			struct ip6_hdr	*h6;
3201223637Sbz#endif
3202126258Smlaier
3203223637Sbz			switch (af) {
3204223637Sbz#ifdef INET
3205223637Sbz			case AF_INET:
3206223637Sbz				h4 = mtod(m, struct ip *);
3207223637Sbz				len = ntohs(h4->ip_len) - off;
3208223637Sbz				break;
3209223637Sbz#endif
3210223637Sbz#ifdef INET6
3211223637Sbz			case AF_INET6:
3212223637Sbz				h6 = mtod(m, struct ip6_hdr *);
3213223637Sbz				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
3214223637Sbz				break;
3215223637Sbz#endif
3216223637Sbz			}
3217223637Sbz
3218223637Sbz			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
3219223637Sbz				REASON_SET(&reason, PFRES_PROTCKSUM);
3220223637Sbz			else {
3221223637Sbz				if (th->th_flags & TH_SYN)
3222223637Sbz					ack++;
3223223637Sbz				if (th->th_flags & TH_FIN)
3224223637Sbz					ack++;
3225223637Sbz				pf_send_tcp(m, r, af, pd->dst,
3226223637Sbz				    pd->src, th->th_dport, th->th_sport,
3227223637Sbz				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3228240233Sglebius				    r->return_ttl, 1, 0, kif->pfik_ifp);
3229223637Sbz			}
3230223637Sbz		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3231223637Sbz		    r->return_icmp)
3232126258Smlaier			pf_send_icmp(m, r->return_icmp >> 8,
3233126258Smlaier			    r->return_icmp & 255, af, r);
3234223637Sbz		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3235223637Sbz		    r->return_icmp6)
3236126258Smlaier			pf_send_icmp(m, r->return_icmp6 >> 8,
3237126258Smlaier			    r->return_icmp6 & 255, af, r);
3238126258Smlaier	}
3239126258Smlaier
3240126258Smlaier	if (r->action == PF_DROP)
3241223637Sbz		goto cleanup;
3242126258Smlaier
3243240233Sglebius	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
3244126258Smlaier		REASON_SET(&reason, PFRES_MEMORY);
3245223637Sbz		goto cleanup;
3246126258Smlaier	}
3247240233Sglebius	if (rtableid >= 0)
3248240233Sglebius		M_SETFIB(m, rtableid);
3249126258Smlaier
3250223637Sbz	if (!state_icmp && (r->keep_state || nr != NULL ||
3251223637Sbz	    (pd->flags & PFDESC_TCP_NORM))) {
3252223637Sbz		int action;
3253240233Sglebius		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
3254240233Sglebius		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
3255240233Sglebius		    hdrlen);
3256223637Sbz		if (action != PF_PASS)
3257223637Sbz			return (action);
3258223637Sbz	} else {
3259223637Sbz		if (sk != NULL)
3260240233Sglebius			uma_zfree(V_pf_state_key_z, sk);
3261223637Sbz		if (nk != NULL)
3262240233Sglebius			uma_zfree(V_pf_state_key_z, nk);
3263223637Sbz	}
3264126258Smlaier
3265223637Sbz	/* copy back packet headers if we performed NAT operations */
3266223637Sbz	if (rewrite)
3267223637Sbz		m_copyback(m, off, hdrlen, pd->hdr.any);
3268130613Smlaier
3269240233Sglebius	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
3270240233Sglebius	    direction == PF_OUT &&
3271240233Sglebius	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
3272223637Sbz		/*
3273223637Sbz		 * We want the state created, but we dont
3274223637Sbz		 * want to send this in case a partner
3275223637Sbz		 * firewall has to know about it to allow
3276223637Sbz		 * replies through it.
3277223637Sbz		 */
3278240233Sglebius		return (PF_DEFER);
3279223637Sbz
3280223637Sbz	return (PF_PASS);
3281223637Sbz
3282130613Smlaiercleanup:
3283223637Sbz	if (sk != NULL)
3284240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
3285223637Sbz	if (nk != NULL)
3286240233Sglebius		uma_zfree(V_pf_state_key_z, nk);
3287223637Sbz	return (PF_DROP);
3288223637Sbz}
3289126258Smlaier
3290240233Sglebiusstatic int
3291223637Sbzpf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
3292240233Sglebius    struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
3293240233Sglebius    struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
3294240233Sglebius    u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
3295240233Sglebius    int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
3296223637Sbz{
3297223637Sbz	struct pf_state		*s = NULL;
3298223637Sbz	struct pf_src_node	*sn = NULL;
3299223637Sbz	struct tcphdr		*th = pd->hdr.tcp;
3300223637Sbz	u_int16_t		 mss = V_tcp_mssdflt;
3301223637Sbz	u_short			 reason;
3302223637Sbz
3303223637Sbz	/* check maximums */
3304223637Sbz	if (r->max_states && (r->states_cur >= r->max_states)) {
3305223637Sbz		V_pf_status.lcounters[LCNT_STATES]++;
3306223637Sbz		REASON_SET(&reason, PFRES_MAXSTATES);
3307223637Sbz		return (PF_DROP);
3308223637Sbz	}
3309223637Sbz	/* src node for filter rule */
3310223637Sbz	if ((r->rule_flag & PFRULE_SRCTRACK ||
3311223637Sbz	    r->rpool.opts & PF_POOL_STICKYADDR) &&
3312223637Sbz	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
3313223637Sbz		REASON_SET(&reason, PFRES_SRCLIMIT);
3314223637Sbz		goto csfailed;
3315223637Sbz	}
3316223637Sbz	/* src node for translation rule */
3317223637Sbz	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3318223637Sbz	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
3319223637Sbz		REASON_SET(&reason, PFRES_SRCLIMIT);
3320223637Sbz		goto csfailed;
3321223637Sbz	}
3322240233Sglebius	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
3323223637Sbz	if (s == NULL) {
3324223637Sbz		REASON_SET(&reason, PFRES_MEMORY);
3325223637Sbz		goto csfailed;
3326223637Sbz	}
3327223637Sbz	s->rule.ptr = r;
3328223637Sbz	s->nat_rule.ptr = nr;
3329223637Sbz	s->anchor.ptr = a;
3330223637Sbz	STATE_INC_COUNTERS(s);
3331223637Sbz	if (r->allow_opts)
3332223637Sbz		s->state_flags |= PFSTATE_ALLOWOPTS;
3333223637Sbz	if (r->rule_flag & PFRULE_STATESLOPPY)
3334223637Sbz		s->state_flags |= PFSTATE_SLOPPY;
3335223637Sbz	s->log = r->log & PF_LOG_ALL;
3336223637Sbz	s->sync_state = PFSYNC_S_NONE;
3337223637Sbz	if (nr != NULL)
3338223637Sbz		s->log |= nr->log & PF_LOG_ALL;
3339223637Sbz	switch (pd->proto) {
3340223637Sbz	case IPPROTO_TCP:
3341126258Smlaier		s->src.seqlo = ntohl(th->th_seq);
3342223637Sbz		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
3343126258Smlaier		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3344126258Smlaier		    r->keep_state == PF_STATE_MODULATE) {
3345126258Smlaier			/* Generate sequence number modulator */
3346223637Sbz			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
3347223637Sbz			    0)
3348223637Sbz				s->src.seqdiff = 1;
3349126258Smlaier			pf_change_a(&th->th_seq, &th->th_sum,
3350126258Smlaier			    htonl(s->src.seqlo + s->src.seqdiff), 0);
3351223637Sbz			*rewrite = 1;
3352126258Smlaier		} else
3353126258Smlaier			s->src.seqdiff = 0;
3354126258Smlaier		if (th->th_flags & TH_SYN) {
3355126258Smlaier			s->src.seqhi++;
3356223637Sbz			s->src.wscale = pf_get_wscale(m, off,
3357223637Sbz			    th->th_off, pd->af);
3358126258Smlaier		}
3359126258Smlaier		s->src.max_win = MAX(ntohs(th->th_win), 1);
3360126258Smlaier		if (s->src.wscale & PF_WSCALE_MASK) {
3361126258Smlaier			/* Remove scale factor from initial window */
3362126258Smlaier			int win = s->src.max_win;
3363126258Smlaier			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3364126258Smlaier			s->src.max_win = (win - 1) >>
3365126258Smlaier			    (s->src.wscale & PF_WSCALE_MASK);
3366126258Smlaier		}
3367126258Smlaier		if (th->th_flags & TH_FIN)
3368126258Smlaier			s->src.seqhi++;
3369126258Smlaier		s->dst.seqhi = 1;
3370126258Smlaier		s->dst.max_win = 1;
3371126258Smlaier		s->src.state = TCPS_SYN_SENT;
3372126258Smlaier		s->dst.state = TCPS_CLOSED;
3373126258Smlaier		s->timeout = PFTM_TCP_FIRST_PACKET;
3374223637Sbz		break;
3375223637Sbz	case IPPROTO_UDP:
3376223637Sbz		s->src.state = PFUDPS_SINGLE;
3377223637Sbz		s->dst.state = PFUDPS_NO_TRAFFIC;
3378223637Sbz		s->timeout = PFTM_UDP_FIRST_PACKET;
3379223637Sbz		break;
3380223637Sbz	case IPPROTO_ICMP:
3381223637Sbz#ifdef INET6
3382223637Sbz	case IPPROTO_ICMPV6:
3383223637Sbz#endif
3384223637Sbz		s->timeout = PFTM_ICMP_FIRST_PACKET;
3385223637Sbz		break;
3386223637Sbz	default:
3387223637Sbz		s->src.state = PFOTHERS_SINGLE;
3388223637Sbz		s->dst.state = PFOTHERS_NO_TRAFFIC;
3389223637Sbz		s->timeout = PFTM_OTHER_FIRST_PACKET;
3390223637Sbz	}
3391223637Sbz
3392240233Sglebius	s->creation = time_uptime;
3393240233Sglebius	s->expire = time_uptime;
3394223637Sbz
3395223637Sbz	if (sn != NULL) {
3396223637Sbz		s->src_node = sn;
3397223637Sbz		s->src_node->states++;
3398223637Sbz	}
3399223637Sbz	if (nsn != NULL) {
3400223637Sbz		/* XXX We only modify one side for now. */
3401223637Sbz		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
3402223637Sbz		s->nat_src_node = nsn;
3403223637Sbz		s->nat_src_node->states++;
3404223637Sbz	}
3405223637Sbz	if (pd->proto == IPPROTO_TCP) {
3406126258Smlaier		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3407126258Smlaier		    off, pd, th, &s->src, &s->dst)) {
3408126258Smlaier			REASON_SET(&reason, PFRES_MEMORY);
3409130613Smlaier			pf_src_tree_remove_state(s);
3410145836Smlaier			STATE_DEC_COUNTERS(s);
3411240233Sglebius			uma_zfree(V_pf_state_z, s);
3412126258Smlaier			return (PF_DROP);
3413126258Smlaier		}
3414126258Smlaier		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3415145836Smlaier		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3416223637Sbz		    &s->src, &s->dst, rewrite)) {
3417145836Smlaier			/* This really shouldn't happen!!! */
3418145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
3419145836Smlaier			    ("pf_normalize_tcp_stateful failed on first pkt"));
3420126258Smlaier			pf_normalize_tcp_cleanup(s);
3421130613Smlaier			pf_src_tree_remove_state(s);
3422145836Smlaier			STATE_DEC_COUNTERS(s);
3423240233Sglebius			uma_zfree(V_pf_state_z, s);
3424223637Sbz			return (PF_DROP);
3425126258Smlaier		}
3426126258Smlaier	}
3427223637Sbz	s->direction = pd->dir;
3428126258Smlaier
3429240233Sglebius	/*
3430240233Sglebius	 * sk/nk could already been setup by pf_get_translation().
3431240233Sglebius	 */
3432240233Sglebius	if (nr == NULL) {
3433240233Sglebius		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
3434240233Sglebius		    __func__, nr, sk, nk));
3435240233Sglebius		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
3436240233Sglebius		if (sk == NULL)
3437240233Sglebius			goto csfailed;
3438240233Sglebius		nk = sk;
3439240233Sglebius	} else
3440240233Sglebius		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
3441240233Sglebius		    __func__, nr, sk, nk));
3442126258Smlaier
3443240233Sglebius	/* Swap sk/nk for PF_OUT. */
3444240233Sglebius	if (pf_state_insert(BOUND_IFACE(r, kif),
3445240233Sglebius	    (pd->dir == PF_IN) ? sk : nk,
3446240233Sglebius	    (pd->dir == PF_IN) ? nk : sk, s)) {
3447223637Sbz		if (pd->proto == IPPROTO_TCP)
3448223637Sbz			pf_normalize_tcp_cleanup(s);
3449223637Sbz		REASON_SET(&reason, PFRES_STATEINS);
3450223637Sbz		pf_src_tree_remove_state(s);
3451223637Sbz		STATE_DEC_COUNTERS(s);
3452240233Sglebius		uma_zfree(V_pf_state_z, s);
3453223637Sbz		return (PF_DROP);
3454223637Sbz	} else
3455223637Sbz		*sm = s;
3456126258Smlaier
3457223637Sbz	pf_set_rt_ifp(s, pd->src);	/* needs s->state_key set */
3458240233Sglebius	if (tag > 0)
3459223637Sbz		s->tag = tag;
3460223637Sbz	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
3461223637Sbz	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
3462223637Sbz		s->src.state = PF_TCPS_PROXY_SRC;
3463223637Sbz		/* undo NAT changes, if they have taken place */
3464223637Sbz		if (nr != NULL) {
3465223637Sbz			struct pf_state_key *skt = s->key[PF_SK_WIRE];
3466223637Sbz			if (pd->dir == PF_OUT)
3467223637Sbz				skt = s->key[PF_SK_STACK];
3468223637Sbz			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
3469223637Sbz			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
3470223637Sbz			if (pd->sport)
3471223637Sbz				*pd->sport = skt->port[pd->sidx];
3472223637Sbz			if (pd->dport)
3473223637Sbz				*pd->dport = skt->port[pd->didx];
3474223637Sbz			if (pd->proto_sum)
3475223637Sbz				*pd->proto_sum = bproto_sum;
3476223637Sbz			if (pd->ip_sum)
3477223637Sbz				*pd->ip_sum = bip_sum;
3478223637Sbz			m_copyback(m, off, hdrlen, pd->hdr.any);
3479223637Sbz		}
3480223637Sbz		s->src.seqhi = htonl(arc4random());
3481223637Sbz		/* Find mss option */
3482231852Sbz		int rtid = M_GETFIB(m);
3483223637Sbz		mss = pf_get_mss(m, off, th->th_off, pd->af);
3484231852Sbz		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
3485231852Sbz		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
3486223637Sbz		s->src.mss = mss;
3487223637Sbz		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
3488223637Sbz		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3489240233Sglebius		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
3490223637Sbz		REASON_SET(&reason, PFRES_SYNPROXY);
3491223637Sbz		return (PF_SYNPROXY_DROP);
3492171168Smlaier	}
3493165631Smlaier
3494223637Sbz	return (PF_PASS);
3495126258Smlaier
3496223637Sbzcsfailed:
3497223637Sbz	if (sk != NULL)
3498240233Sglebius		uma_zfree(V_pf_state_key_z, sk);
3499223637Sbz	if (nk != NULL)
3500240233Sglebius		uma_zfree(V_pf_state_key_z, nk);
3501223637Sbz
3502223637Sbz	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3503240233Sglebius		pf_remove_src_node(sn);
3504223637Sbz		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3505223637Sbz		V_pf_status.src_nodes--;
3506240233Sglebius		uma_zfree(V_pf_sources_z, sn);
3507126258Smlaier	}
3508223637Sbz	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
3509240233Sglebius		pf_remove_src_node(nsn);
3510223637Sbz		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3511223637Sbz		V_pf_status.src_nodes--;
3512240233Sglebius		uma_zfree(V_pf_sources_z, nsn);
3513126258Smlaier	}
3514223637Sbz	return (PF_DROP);
3515126258Smlaier}
3516126258Smlaier
3517240233Sglebiusstatic int
3518130613Smlaierpf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3519126258Smlaier    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3520126258Smlaier    struct pf_ruleset **rsm)
3521126258Smlaier{
3522126258Smlaier	struct pf_rule		*r, *a = NULL;
3523126258Smlaier	struct pf_ruleset	*ruleset = NULL;
3524126258Smlaier	sa_family_t		 af = pd->af;
3525126258Smlaier	u_short			 reason;
3526126258Smlaier	int			 tag = -1;
3527145836Smlaier	int			 asd = 0;
3528171168Smlaier	int			 match = 0;
3529126258Smlaier
3530240233Sglebius	PF_RULES_RASSERT();
3531240233Sglebius
3532126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3533126258Smlaier	while (r != NULL) {
3534126258Smlaier		r->evaluations++;
3535171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
3536126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
3537126258Smlaier		else if (r->direction && r->direction != direction)
3538126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
3539126258Smlaier		else if (r->af && r->af != af)
3540126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
3541126258Smlaier		else if (r->proto && r->proto != pd->proto)
3542126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
3543171168Smlaier		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3544231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
3545126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3546171168Smlaier		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3547231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
3548126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3549171168Smlaier		else if (r->tos && !(r->tos == pd->tos))
3550126258Smlaier			r = TAILQ_NEXT(r, entries);
3551173815Smlaier		else if (r->os_fingerprint != PF_OSFP_ANY)
3552126258Smlaier			r = TAILQ_NEXT(r, entries);
3553173815Smlaier		else if (pd->proto == IPPROTO_UDP &&
3554173815Smlaier		    (r->src.port_op || r->dst.port_op))
3555173815Smlaier			r = TAILQ_NEXT(r, entries);
3556173815Smlaier		else if (pd->proto == IPPROTO_TCP &&
3557173815Smlaier		    (r->src.port_op || r->dst.port_op || r->flagset))
3558173815Smlaier			r = TAILQ_NEXT(r, entries);
3559173815Smlaier		else if ((pd->proto == IPPROTO_ICMP ||
3560173815Smlaier		    pd->proto == IPPROTO_ICMPV6) &&
3561173815Smlaier		    (r->type || r->code))
3562173815Smlaier			r = TAILQ_NEXT(r, entries);
3563223637Sbz		else if (r->prob && r->prob <=
3564223637Sbz		    (arc4random() % (UINT_MAX - 1) + 1))
3565126258Smlaier			r = TAILQ_NEXT(r, entries);
3566240233Sglebius		else if (r->match_tag && !pf_match_tag(m, r, &tag,
3567240233Sglebius		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
3568126258Smlaier			r = TAILQ_NEXT(r, entries);
3569126258Smlaier		else {
3570126258Smlaier			if (r->anchor == NULL) {
3571171168Smlaier				match = 1;
3572126258Smlaier				*rm = r;
3573126258Smlaier				*am = a;
3574126258Smlaier				*rsm = ruleset;
3575126258Smlaier				if ((*rm)->quick)
3576126258Smlaier					break;
3577126258Smlaier				r = TAILQ_NEXT(r, entries);
3578126258Smlaier			} else
3579145836Smlaier				pf_step_into_anchor(&asd, &ruleset,
3580171168Smlaier				    PF_RULESET_FILTER, &r, &a, &match);
3581126258Smlaier		}
3582171168Smlaier		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3583171168Smlaier		    PF_RULESET_FILTER, &r, &a, &match))
3584171168Smlaier			break;
3585126258Smlaier	}
3586126258Smlaier	r = *rm;
3587126258Smlaier	a = *am;
3588126258Smlaier	ruleset = *rsm;
3589126258Smlaier
3590126258Smlaier	REASON_SET(&reason, PFRES_MATCH);
3591130613Smlaier
3592126258Smlaier	if (r->log)
3593240233Sglebius		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
3594240233Sglebius		    1);
3595126258Smlaier
3596126258Smlaier	if (r->action != PF_PASS)
3597126258Smlaier		return (PF_DROP);
3598126258Smlaier
3599240233Sglebius	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
3600126258Smlaier		REASON_SET(&reason, PFRES_MEMORY);
3601126258Smlaier		return (PF_DROP);
3602126258Smlaier	}
3603126258Smlaier
3604126258Smlaier	return (PF_PASS);
3605126258Smlaier}
3606126258Smlaier
3607240233Sglebiusstatic int
3608200930Sdelphijpf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
3609200930Sdelphij	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
3610200930Sdelphij	struct pf_pdesc *pd, u_short *reason, int *copyback)
3611126258Smlaier{
3612223637Sbz	struct tcphdr		*th = pd->hdr.tcp;
3613223637Sbz	u_int16_t		 win = ntohs(th->th_win);
3614223637Sbz	u_int32_t		 ack, end, seq, orig_seq;
3615223637Sbz	u_int8_t		 sws, dws;
3616223637Sbz	int			 ackskew;
3617126258Smlaier
3618126258Smlaier	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3619126258Smlaier		sws = src->wscale & PF_WSCALE_MASK;
3620126258Smlaier		dws = dst->wscale & PF_WSCALE_MASK;
3621126258Smlaier	} else
3622126258Smlaier		sws = dws = 0;
3623126258Smlaier
3624126258Smlaier	/*
3625126258Smlaier	 * Sequence tracking algorithm from Guido van Rooij's paper:
3626126258Smlaier	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
3627126258Smlaier	 *	tcp_filtering.ps
3628126258Smlaier	 */
3629126258Smlaier
3630145836Smlaier	orig_seq = seq = ntohl(th->th_seq);
3631126258Smlaier	if (src->seqlo == 0) {
3632126258Smlaier		/* First packet from this end. Set its state */
3633126258Smlaier
3634126258Smlaier		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3635126258Smlaier		    src->scrub == NULL) {
3636126258Smlaier			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3637126258Smlaier				REASON_SET(reason, PFRES_MEMORY);
3638126258Smlaier				return (PF_DROP);
3639126258Smlaier			}
3640126258Smlaier		}
3641126258Smlaier
3642126258Smlaier		/* Deferred generation of sequence number modulator */
3643126258Smlaier		if (dst->seqdiff && !src->seqdiff) {
3644223637Sbz			/* use random iss for the TCP server */
3645223637Sbz			while ((src->seqdiff = arc4random() - seq) == 0)
3646126258Smlaier				;
3647126258Smlaier			ack = ntohl(th->th_ack) - dst->seqdiff;
3648126258Smlaier			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3649126258Smlaier			    src->seqdiff), 0);
3650126258Smlaier			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3651200930Sdelphij			*copyback = 1;
3652126258Smlaier		} else {
3653126258Smlaier			ack = ntohl(th->th_ack);
3654126258Smlaier		}
3655126258Smlaier
3656126258Smlaier		end = seq + pd->p_len;
3657126258Smlaier		if (th->th_flags & TH_SYN) {
3658126258Smlaier			end++;
3659126258Smlaier			if (dst->wscale & PF_WSCALE_FLAG) {
3660126258Smlaier				src->wscale = pf_get_wscale(m, off, th->th_off,
3661126258Smlaier				    pd->af);
3662126258Smlaier				if (src->wscale & PF_WSCALE_FLAG) {
3663126258Smlaier					/* Remove scale factor from initial
3664126258Smlaier					 * window */
3665126258Smlaier					sws = src->wscale & PF_WSCALE_MASK;
3666126258Smlaier					win = ((u_int32_t)win + (1 << sws) - 1)
3667126258Smlaier					    >> sws;
3668126258Smlaier					dws = dst->wscale & PF_WSCALE_MASK;
3669126258Smlaier				} else {
3670126258Smlaier					/* fixup other window */
3671126258Smlaier					dst->max_win <<= dst->wscale &
3672126258Smlaier					    PF_WSCALE_MASK;
3673126258Smlaier					/* in case of a retrans SYN|ACK */
3674126258Smlaier					dst->wscale = 0;
3675126258Smlaier				}
3676126258Smlaier			}
3677126258Smlaier		}
3678126258Smlaier		if (th->th_flags & TH_FIN)
3679126258Smlaier			end++;
3680126258Smlaier
3681126258Smlaier		src->seqlo = seq;
3682126258Smlaier		if (src->state < TCPS_SYN_SENT)
3683126258Smlaier			src->state = TCPS_SYN_SENT;
3684126258Smlaier
3685126258Smlaier		/*
3686126258Smlaier		 * May need to slide the window (seqhi may have been set by
3687126258Smlaier		 * the crappy stack check or if we picked up the connection
3688126258Smlaier		 * after establishment)
3689126258Smlaier		 */
3690126258Smlaier		if (src->seqhi == 1 ||
3691126258Smlaier		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
3692126258Smlaier			src->seqhi = end + MAX(1, dst->max_win << dws);
3693126258Smlaier		if (win > src->max_win)
3694126258Smlaier			src->max_win = win;
3695126258Smlaier
3696126258Smlaier	} else {
3697126258Smlaier		ack = ntohl(th->th_ack) - dst->seqdiff;
3698126258Smlaier		if (src->seqdiff) {
3699126258Smlaier			/* Modulate sequence numbers */
3700126258Smlaier			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3701126258Smlaier			    src->seqdiff), 0);
3702126258Smlaier			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3703200930Sdelphij			*copyback = 1;
3704126258Smlaier		}
3705126258Smlaier		end = seq + pd->p_len;
3706126258Smlaier		if (th->th_flags & TH_SYN)
3707126258Smlaier			end++;
3708126258Smlaier		if (th->th_flags & TH_FIN)
3709126258Smlaier			end++;
3710126258Smlaier	}
3711126258Smlaier
3712126258Smlaier	if ((th->th_flags & TH_ACK) == 0) {
3713126258Smlaier		/* Let it pass through the ack skew check */
3714126258Smlaier		ack = dst->seqlo;
3715126258Smlaier	} else if ((ack == 0 &&
3716126258Smlaier	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
3717126258Smlaier	    /* broken tcp stacks do not set ack */
3718126258Smlaier	    (dst->state < TCPS_SYN_SENT)) {
3719126258Smlaier		/*
3720126258Smlaier		 * Many stacks (ours included) will set the ACK number in an
3721126258Smlaier		 * FIN|ACK if the SYN times out -- no sequence to ACK.
3722126258Smlaier		 */
3723126258Smlaier		ack = dst->seqlo;
3724126258Smlaier	}
3725126258Smlaier
3726126258Smlaier	if (seq == end) {
3727126258Smlaier		/* Ease sequencing restrictions on no data packets */
3728126258Smlaier		seq = src->seqlo;
3729126258Smlaier		end = seq;
3730126258Smlaier	}
3731126258Smlaier
3732126258Smlaier	ackskew = dst->seqlo - ack;
3733126258Smlaier
3734171168Smlaier
3735171168Smlaier	/*
3736171168Smlaier	 * Need to demodulate the sequence numbers in any TCP SACK options
3737171168Smlaier	 * (Selective ACK). We could optionally validate the SACK values
3738171168Smlaier	 * against the current ACK window, either forwards or backwards, but
3739171168Smlaier	 * I'm not confident that SACK has been implemented properly
3740171168Smlaier	 * everywhere. It wouldn't surprise me if several stacks accidently
3741171168Smlaier	 * SACK too far backwards of previously ACKed data. There really aren't
3742171168Smlaier	 * any security implications of bad SACKing unless the target stack
3743171168Smlaier	 * doesn't validate the option length correctly. Someone trying to
3744171168Smlaier	 * spoof into a TCP connection won't bother blindly sending SACK
3745171168Smlaier	 * options anyway.
3746171168Smlaier	 */
3747171168Smlaier	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
3748171168Smlaier		if (pf_modulate_sack(m, off, pd, th, dst))
3749200930Sdelphij			*copyback = 1;
3750171168Smlaier	}
3751171168Smlaier
3752171168Smlaier
3753223637Sbz#define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
3754126258Smlaier	if (SEQ_GEQ(src->seqhi, end) &&
3755126258Smlaier	    /* Last octet inside other's window space */
3756126258Smlaier	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
3757126258Smlaier	    /* Retrans: not more than one window back */
3758126258Smlaier	    (ackskew >= -MAXACKWINDOW) &&
3759126258Smlaier	    /* Acking not more than one reassembled fragment backwards */
3760145836Smlaier	    (ackskew <= (MAXACKWINDOW << sws)) &&
3761126258Smlaier	    /* Acking not more than one window forward */
3762145836Smlaier	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
3763223637Sbz	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
3764223637Sbz	    (pd->flags & PFDESC_IP_REAS) == 0)) {
3765171168Smlaier	    /* Require an exact/+1 sequence match on resets when possible */
3766126258Smlaier
3767145836Smlaier		if (dst->scrub || src->scrub) {
3768145836Smlaier			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
3769200930Sdelphij			    *state, src, dst, copyback))
3770145836Smlaier				return (PF_DROP);
3771145836Smlaier		}
3772145836Smlaier
3773126258Smlaier		/* update max window */
3774126258Smlaier		if (src->max_win < win)
3775126258Smlaier			src->max_win = win;
3776126258Smlaier		/* synchronize sequencing */
3777126258Smlaier		if (SEQ_GT(end, src->seqlo))
3778126258Smlaier			src->seqlo = end;
3779126258Smlaier		/* slide the window of what the other end can send */
3780126258Smlaier		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
3781126258Smlaier			dst->seqhi = ack + MAX((win << sws), 1);
3782126258Smlaier
3783126258Smlaier
3784126258Smlaier		/* update states */
3785126258Smlaier		if (th->th_flags & TH_SYN)
3786126258Smlaier			if (src->state < TCPS_SYN_SENT)
3787126258Smlaier				src->state = TCPS_SYN_SENT;
3788126258Smlaier		if (th->th_flags & TH_FIN)
3789126258Smlaier			if (src->state < TCPS_CLOSING)
3790126258Smlaier				src->state = TCPS_CLOSING;
3791126258Smlaier		if (th->th_flags & TH_ACK) {
3792145836Smlaier			if (dst->state == TCPS_SYN_SENT) {
3793126258Smlaier				dst->state = TCPS_ESTABLISHED;
3794145836Smlaier				if (src->state == TCPS_ESTABLISHED &&
3795145836Smlaier				    (*state)->src_node != NULL &&
3796145836Smlaier				    pf_src_connlimit(state)) {
3797145836Smlaier					REASON_SET(reason, PFRES_SRCLIMIT);
3798145836Smlaier					return (PF_DROP);
3799145836Smlaier				}
3800145836Smlaier			} else if (dst->state == TCPS_CLOSING)
3801126258Smlaier				dst->state = TCPS_FIN_WAIT_2;
3802126258Smlaier		}
3803126258Smlaier		if (th->th_flags & TH_RST)
3804126258Smlaier			src->state = dst->state = TCPS_TIME_WAIT;
3805126258Smlaier
3806126258Smlaier		/* update expire time */
3807240233Sglebius		(*state)->expire = time_uptime;
3808126258Smlaier		if (src->state >= TCPS_FIN_WAIT_2 &&
3809126258Smlaier		    dst->state >= TCPS_FIN_WAIT_2)
3810126258Smlaier			(*state)->timeout = PFTM_TCP_CLOSED;
3811171168Smlaier		else if (src->state >= TCPS_CLOSING &&
3812171168Smlaier		    dst->state >= TCPS_CLOSING)
3813126258Smlaier			(*state)->timeout = PFTM_TCP_FIN_WAIT;
3814126258Smlaier		else if (src->state < TCPS_ESTABLISHED ||
3815126258Smlaier		    dst->state < TCPS_ESTABLISHED)
3816126258Smlaier			(*state)->timeout = PFTM_TCP_OPENING;
3817126258Smlaier		else if (src->state >= TCPS_CLOSING ||
3818126258Smlaier		    dst->state >= TCPS_CLOSING)
3819126258Smlaier			(*state)->timeout = PFTM_TCP_CLOSING;
3820126258Smlaier		else
3821126258Smlaier			(*state)->timeout = PFTM_TCP_ESTABLISHED;
3822126258Smlaier
3823126258Smlaier		/* Fall through to PASS packet */
3824126258Smlaier
3825126258Smlaier	} else if ((dst->state < TCPS_SYN_SENT ||
3826126258Smlaier		dst->state >= TCPS_FIN_WAIT_2 ||
3827126258Smlaier		src->state >= TCPS_FIN_WAIT_2) &&
3828126258Smlaier	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
3829126258Smlaier	    /* Within a window forward of the originating packet */
3830126258Smlaier	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
3831126258Smlaier	    /* Within a window backward of the originating packet */
3832126258Smlaier
3833126258Smlaier		/*
3834126258Smlaier		 * This currently handles three situations:
3835126258Smlaier		 *  1) Stupid stacks will shotgun SYNs before their peer
3836126258Smlaier		 *     replies.
3837126258Smlaier		 *  2) When PF catches an already established stream (the
3838126258Smlaier		 *     firewall rebooted, the state table was flushed, routes
3839126258Smlaier		 *     changed...)
3840126258Smlaier		 *  3) Packets get funky immediately after the connection
3841126258Smlaier		 *     closes (this should catch Solaris spurious ACK|FINs
3842126258Smlaier		 *     that web servers like to spew after a close)
3843126258Smlaier		 *
3844126258Smlaier		 * This must be a little more careful than the above code
3845126258Smlaier		 * since packet floods will also be caught here. We don't
3846126258Smlaier		 * update the TTL here to mitigate the damage of a packet
3847126258Smlaier		 * flood and so the same code can handle awkward establishment
3848126258Smlaier		 * and a loosened connection close.
3849126258Smlaier		 * In the establishment case, a correct peer response will
3850126258Smlaier		 * validate the connection, go through the normal state code
3851126258Smlaier		 * and keep updating the state TTL.
3852126258Smlaier		 */
3853126258Smlaier
3854223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
3855126258Smlaier			printf("pf: loose state match: ");
3856126258Smlaier			pf_print_state(*state);
3857126258Smlaier			pf_print_flags(th->th_flags);
3858171168Smlaier			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
3859223637Sbz			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
3860223637Sbz			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
3861223637Sbz			    (unsigned long long)(*state)->packets[1],
3862223637Sbz			    pd->dir == PF_IN ? "in" : "out",
3863223637Sbz			    pd->dir == (*state)->direction ? "fwd" : "rev");
3864126258Smlaier		}
3865126258Smlaier
3866145836Smlaier		if (dst->scrub || src->scrub) {
3867145836Smlaier			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
3868200930Sdelphij			    *state, src, dst, copyback))
3869145836Smlaier				return (PF_DROP);
3870145836Smlaier		}
3871145836Smlaier
3872126258Smlaier		/* update max window */
3873126258Smlaier		if (src->max_win < win)
3874126258Smlaier			src->max_win = win;
3875126258Smlaier		/* synchronize sequencing */
3876126258Smlaier		if (SEQ_GT(end, src->seqlo))
3877126258Smlaier			src->seqlo = end;
3878126258Smlaier		/* slide the window of what the other end can send */
3879126258Smlaier		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
3880126258Smlaier			dst->seqhi = ack + MAX((win << sws), 1);
3881126258Smlaier
3882126258Smlaier		/*
3883126258Smlaier		 * Cannot set dst->seqhi here since this could be a shotgunned
3884126258Smlaier		 * SYN and not an already established connection.
3885126258Smlaier		 */
3886126258Smlaier
3887126258Smlaier		if (th->th_flags & TH_FIN)
3888126258Smlaier			if (src->state < TCPS_CLOSING)
3889126258Smlaier				src->state = TCPS_CLOSING;
3890126258Smlaier		if (th->th_flags & TH_RST)
3891126258Smlaier			src->state = dst->state = TCPS_TIME_WAIT;
3892126258Smlaier
3893126258Smlaier		/* Fall through to PASS packet */
3894126258Smlaier
3895126258Smlaier	} else {
3896126258Smlaier		if ((*state)->dst.state == TCPS_SYN_SENT &&
3897126258Smlaier		    (*state)->src.state == TCPS_SYN_SENT) {
3898126258Smlaier			/* Send RST for state mismatches during handshake */
3899145836Smlaier			if (!(th->th_flags & TH_RST))
3900223637Sbz				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
3901126258Smlaier				    pd->dst, pd->src, th->th_dport,
3902145836Smlaier				    th->th_sport, ntohl(th->th_ack), 0,
3903145836Smlaier				    TH_RST, 0, 0,
3904171168Smlaier				    (*state)->rule.ptr->return_ttl, 1, 0,
3905240233Sglebius				    kif->pfik_ifp);
3906126258Smlaier			src->seqlo = 0;
3907126258Smlaier			src->seqhi = 1;
3908126258Smlaier			src->max_win = 1;
3909223637Sbz		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
3910126258Smlaier			printf("pf: BAD state: ");
3911126258Smlaier			pf_print_state(*state);
3912126258Smlaier			pf_print_flags(th->th_flags);
3913171168Smlaier			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
3914171168Smlaier			    "pkts=%llu:%llu dir=%s,%s\n",
3915171168Smlaier			    seq, orig_seq, ack, pd->p_len, ackskew,
3916171168Smlaier			    (unsigned long long)(*state)->packets[0],
3917171168Smlaier			    (unsigned long long)(*state)->packets[1],
3918223637Sbz			    pd->dir == PF_IN ? "in" : "out",
3919223637Sbz			    pd->dir == (*state)->direction ? "fwd" : "rev");
3920126258Smlaier			printf("pf: State failure on: %c %c %c %c | %c %c\n",
3921126258Smlaier			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
3922126258Smlaier			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
3923126258Smlaier			    ' ': '2',
3924126258Smlaier			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
3925126258Smlaier			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
3926126258Smlaier			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
3927126258Smlaier			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
3928126258Smlaier		}
3929145836Smlaier		REASON_SET(reason, PFRES_BADSTATE);
3930126258Smlaier		return (PF_DROP);
3931126258Smlaier	}
3932126258Smlaier
3933200930Sdelphij	return (PF_PASS);
3934200930Sdelphij}
3935126258Smlaier
3936240233Sglebiusstatic int
3937200930Sdelphijpf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
3938200930Sdelphij	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
3939200930Sdelphij{
3940200930Sdelphij	struct tcphdr		*th = pd->hdr.tcp;
3941200930Sdelphij
3942200930Sdelphij	if (th->th_flags & TH_SYN)
3943200930Sdelphij		if (src->state < TCPS_SYN_SENT)
3944200930Sdelphij			src->state = TCPS_SYN_SENT;
3945200930Sdelphij	if (th->th_flags & TH_FIN)
3946200930Sdelphij		if (src->state < TCPS_CLOSING)
3947200930Sdelphij			src->state = TCPS_CLOSING;
3948200930Sdelphij	if (th->th_flags & TH_ACK) {
3949200930Sdelphij		if (dst->state == TCPS_SYN_SENT) {
3950200930Sdelphij			dst->state = TCPS_ESTABLISHED;
3951200930Sdelphij			if (src->state == TCPS_ESTABLISHED &&
3952200930Sdelphij			    (*state)->src_node != NULL &&
3953200930Sdelphij			    pf_src_connlimit(state)) {
3954200930Sdelphij				REASON_SET(reason, PFRES_SRCLIMIT);
3955200930Sdelphij				return (PF_DROP);
3956200930Sdelphij			}
3957200930Sdelphij		} else if (dst->state == TCPS_CLOSING) {
3958200930Sdelphij			dst->state = TCPS_FIN_WAIT_2;
3959200930Sdelphij		} else if (src->state == TCPS_SYN_SENT &&
3960200930Sdelphij		    dst->state < TCPS_SYN_SENT) {
3961200930Sdelphij			/*
3962200930Sdelphij			 * Handle a special sloppy case where we only see one
3963200930Sdelphij			 * half of the connection. If there is a ACK after
3964200930Sdelphij			 * the initial SYN without ever seeing a packet from
3965200930Sdelphij			 * the destination, set the connection to established.
3966200930Sdelphij			 */
3967200930Sdelphij			dst->state = src->state = TCPS_ESTABLISHED;
3968200930Sdelphij			if ((*state)->src_node != NULL &&
3969200930Sdelphij			    pf_src_connlimit(state)) {
3970200930Sdelphij				REASON_SET(reason, PFRES_SRCLIMIT);
3971200930Sdelphij				return (PF_DROP);
3972200930Sdelphij			}
3973200930Sdelphij		} else if (src->state == TCPS_CLOSING &&
3974200930Sdelphij		    dst->state == TCPS_ESTABLISHED &&
3975200930Sdelphij		    dst->seqlo == 0) {
3976200930Sdelphij			/*
3977200930Sdelphij			 * Handle the closing of half connections where we
3978200930Sdelphij			 * don't see the full bidirectional FIN/ACK+ACK
3979200930Sdelphij			 * handshake.
3980200930Sdelphij			 */
3981200930Sdelphij			dst->state = TCPS_CLOSING;
3982200930Sdelphij		}
3983200930Sdelphij	}
3984200930Sdelphij	if (th->th_flags & TH_RST)
3985200930Sdelphij		src->state = dst->state = TCPS_TIME_WAIT;
3986200930Sdelphij
3987200930Sdelphij	/* update expire time */
3988240233Sglebius	(*state)->expire = time_uptime;
3989200930Sdelphij	if (src->state >= TCPS_FIN_WAIT_2 &&
3990200930Sdelphij	    dst->state >= TCPS_FIN_WAIT_2)
3991200930Sdelphij		(*state)->timeout = PFTM_TCP_CLOSED;
3992200930Sdelphij	else if (src->state >= TCPS_CLOSING &&
3993200930Sdelphij	    dst->state >= TCPS_CLOSING)
3994200930Sdelphij		(*state)->timeout = PFTM_TCP_FIN_WAIT;
3995200930Sdelphij	else if (src->state < TCPS_ESTABLISHED ||
3996200930Sdelphij	    dst->state < TCPS_ESTABLISHED)
3997200930Sdelphij		(*state)->timeout = PFTM_TCP_OPENING;
3998200930Sdelphij	else if (src->state >= TCPS_CLOSING ||
3999200930Sdelphij	    dst->state >= TCPS_CLOSING)
4000200930Sdelphij		(*state)->timeout = PFTM_TCP_CLOSING;
4001200930Sdelphij	else
4002200930Sdelphij		(*state)->timeout = PFTM_TCP_ESTABLISHED;
4003200930Sdelphij
4004200930Sdelphij	return (PF_PASS);
4005200930Sdelphij}
4006200930Sdelphij
4007240233Sglebiusstatic int
4008200930Sdelphijpf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4009200930Sdelphij    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4010200930Sdelphij    u_short *reason)
4011200930Sdelphij{
4012223637Sbz	struct pf_state_key_cmp	 key;
4013200930Sdelphij	struct tcphdr		*th = pd->hdr.tcp;
4014200930Sdelphij	int			 copyback = 0;
4015200930Sdelphij	struct pf_state_peer	*src, *dst;
4016223637Sbz	struct pf_state_key	*sk;
4017200930Sdelphij
4018240233Sglebius	bzero(&key, sizeof(key));
4019200930Sdelphij	key.af = pd->af;
4020200930Sdelphij	key.proto = IPPROTO_TCP;
4021223637Sbz	if (direction == PF_IN)	{	/* wire side, straight */
4022223637Sbz		PF_ACPY(&key.addr[0], pd->src, key.af);
4023223637Sbz		PF_ACPY(&key.addr[1], pd->dst, key.af);
4024223637Sbz		key.port[0] = th->th_sport;
4025223637Sbz		key.port[1] = th->th_dport;
4026223637Sbz	} else {			/* stack side, reverse */
4027223637Sbz		PF_ACPY(&key.addr[1], pd->src, key.af);
4028223637Sbz		PF_ACPY(&key.addr[0], pd->dst, key.af);
4029223637Sbz		key.port[1] = th->th_sport;
4030223637Sbz		key.port[0] = th->th_dport;
4031200930Sdelphij	}
4032200930Sdelphij
4033240233Sglebius	STATE_LOOKUP(kif, &key, direction, *state, pd);
4034200930Sdelphij
4035200930Sdelphij	if (direction == (*state)->direction) {
4036200930Sdelphij		src = &(*state)->src;
4037200930Sdelphij		dst = &(*state)->dst;
4038200930Sdelphij	} else {
4039200930Sdelphij		src = &(*state)->dst;
4040200930Sdelphij		dst = &(*state)->src;
4041200930Sdelphij	}
4042200930Sdelphij
4043223637Sbz	sk = (*state)->key[pd->didx];
4044223637Sbz
4045200930Sdelphij	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4046200930Sdelphij		if (direction != (*state)->direction) {
4047200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4048200930Sdelphij			return (PF_SYNPROXY_DROP);
4049200930Sdelphij		}
4050200930Sdelphij		if (th->th_flags & TH_SYN) {
4051200930Sdelphij			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4052200930Sdelphij				REASON_SET(reason, PFRES_SYNPROXY);
4053200930Sdelphij				return (PF_DROP);
4054200930Sdelphij			}
4055200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4056200930Sdelphij			    pd->src, th->th_dport, th->th_sport,
4057200930Sdelphij			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4058240233Sglebius			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
4059200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4060200930Sdelphij			return (PF_SYNPROXY_DROP);
4061200930Sdelphij		} else if (!(th->th_flags & TH_ACK) ||
4062200930Sdelphij		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4063200930Sdelphij		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4064200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4065200930Sdelphij			return (PF_DROP);
4066200930Sdelphij		} else if ((*state)->src_node != NULL &&
4067200930Sdelphij		    pf_src_connlimit(state)) {
4068200930Sdelphij			REASON_SET(reason, PFRES_SRCLIMIT);
4069200930Sdelphij			return (PF_DROP);
4070200930Sdelphij		} else
4071200930Sdelphij			(*state)->src.state = PF_TCPS_PROXY_DST;
4072200930Sdelphij	}
4073200930Sdelphij	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4074200930Sdelphij		if (direction == (*state)->direction) {
4075200930Sdelphij			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4076200930Sdelphij			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4077200930Sdelphij			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4078200930Sdelphij				REASON_SET(reason, PFRES_SYNPROXY);
4079200930Sdelphij				return (PF_DROP);
4080200930Sdelphij			}
4081200930Sdelphij			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4082200930Sdelphij			if ((*state)->dst.seqhi == 1)
4083200930Sdelphij				(*state)->dst.seqhi = htonl(arc4random());
4084200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4085223637Sbz			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4086223637Sbz			    sk->port[pd->sidx], sk->port[pd->didx],
4087200930Sdelphij			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4088240233Sglebius			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
4089200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4090200930Sdelphij			return (PF_SYNPROXY_DROP);
4091200930Sdelphij		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4092200930Sdelphij		    (TH_SYN|TH_ACK)) ||
4093200930Sdelphij		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4094200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4095200930Sdelphij			return (PF_DROP);
4096200930Sdelphij		} else {
4097200930Sdelphij			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4098200930Sdelphij			(*state)->dst.seqlo = ntohl(th->th_seq);
4099200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
4100200930Sdelphij			    pd->src, th->th_dport, th->th_sport,
4101200930Sdelphij			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4102200930Sdelphij			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
4103240233Sglebius			    (*state)->tag, NULL);
4104200930Sdelphij			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
4105223637Sbz			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
4106223637Sbz			    sk->port[pd->sidx], sk->port[pd->didx],
4107200930Sdelphij			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4108240233Sglebius			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
4109200930Sdelphij			(*state)->src.seqdiff = (*state)->dst.seqhi -
4110200930Sdelphij			    (*state)->src.seqlo;
4111200930Sdelphij			(*state)->dst.seqdiff = (*state)->src.seqhi -
4112200930Sdelphij			    (*state)->dst.seqlo;
4113200930Sdelphij			(*state)->src.seqhi = (*state)->src.seqlo +
4114200930Sdelphij			    (*state)->dst.max_win;
4115200930Sdelphij			(*state)->dst.seqhi = (*state)->dst.seqlo +
4116200930Sdelphij			    (*state)->src.max_win;
4117200930Sdelphij			(*state)->src.wscale = (*state)->dst.wscale = 0;
4118200930Sdelphij			(*state)->src.state = (*state)->dst.state =
4119200930Sdelphij			    TCPS_ESTABLISHED;
4120200930Sdelphij			REASON_SET(reason, PFRES_SYNPROXY);
4121200930Sdelphij			return (PF_SYNPROXY_DROP);
4122200930Sdelphij		}
4123200930Sdelphij	}
4124200930Sdelphij
4125200930Sdelphij	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
4126200930Sdelphij	    dst->state >= TCPS_FIN_WAIT_2 &&
4127200930Sdelphij	    src->state >= TCPS_FIN_WAIT_2) {
4128223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
4129200930Sdelphij			printf("pf: state reuse ");
4130200930Sdelphij			pf_print_state(*state);
4131200930Sdelphij			pf_print_flags(th->th_flags);
4132200930Sdelphij			printf("\n");
4133200930Sdelphij		}
4134200930Sdelphij		/* XXX make sure it's the same direction ?? */
4135200930Sdelphij		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
4136240233Sglebius		pf_unlink_state(*state, PF_ENTER_LOCKED);
4137200930Sdelphij		*state = NULL;
4138200930Sdelphij		return (PF_DROP);
4139200930Sdelphij	}
4140200930Sdelphij
4141200930Sdelphij	if ((*state)->state_flags & PFSTATE_SLOPPY) {
4142200930Sdelphij		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
4143200930Sdelphij			return (PF_DROP);
4144200930Sdelphij	} else {
4145200930Sdelphij		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
4146200930Sdelphij		    &copyback) == PF_DROP)
4147200930Sdelphij			return (PF_DROP);
4148200930Sdelphij	}
4149200930Sdelphij
4150126258Smlaier	/* translate source/destination address, if necessary */
4151223637Sbz	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4152223637Sbz		struct pf_state_key *nk = (*state)->key[pd->didx];
4153223637Sbz
4154223637Sbz		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4155223637Sbz		    nk->port[pd->sidx] != th->th_sport)
4156126258Smlaier			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4157223637Sbz			    &th->th_sum, &nk->addr[pd->sidx],
4158223637Sbz			    nk->port[pd->sidx], 0, pd->af);
4159223637Sbz
4160223637Sbz		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4161223637Sbz		    nk->port[pd->didx] != th->th_dport)
4162126258Smlaier			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4163223637Sbz			    &th->th_sum, &nk->addr[pd->didx],
4164223637Sbz			    nk->port[pd->didx], 0, pd->af);
4165223637Sbz		copyback = 1;
4166126258Smlaier	}
4167126258Smlaier
4168223637Sbz	/* Copyback sequence modulation or stateful scrub changes if needed */
4169223637Sbz	if (copyback)
4170223637Sbz		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4171223637Sbz
4172126258Smlaier	return (PF_PASS);
4173126258Smlaier}
4174126258Smlaier
4175240233Sglebiusstatic int
4176130613Smlaierpf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4177130613Smlaier    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4178126258Smlaier{
4179126258Smlaier	struct pf_state_peer	*src, *dst;
4180223637Sbz	struct pf_state_key_cmp	 key;
4181126258Smlaier	struct udphdr		*uh = pd->hdr.udp;
4182126258Smlaier
4183240233Sglebius	bzero(&key, sizeof(key));
4184126258Smlaier	key.af = pd->af;
4185126258Smlaier	key.proto = IPPROTO_UDP;
4186223637Sbz	if (direction == PF_IN)	{	/* wire side, straight */
4187223637Sbz		PF_ACPY(&key.addr[0], pd->src, key.af);
4188223637Sbz		PF_ACPY(&key.addr[1], pd->dst, key.af);
4189223637Sbz		key.port[0] = uh->uh_sport;
4190223637Sbz		key.port[1] = uh->uh_dport;
4191223637Sbz	} else {			/* stack side, reverse */
4192223637Sbz		PF_ACPY(&key.addr[1], pd->src, key.af);
4193223637Sbz		PF_ACPY(&key.addr[0], pd->dst, key.af);
4194223637Sbz		key.port[1] = uh->uh_sport;
4195223637Sbz		key.port[0] = uh->uh_dport;
4196130613Smlaier	}
4197126258Smlaier
4198240233Sglebius	STATE_LOOKUP(kif, &key, direction, *state, pd);
4199126258Smlaier
4200126258Smlaier	if (direction == (*state)->direction) {
4201126258Smlaier		src = &(*state)->src;
4202126258Smlaier		dst = &(*state)->dst;
4203126258Smlaier	} else {
4204126258Smlaier		src = &(*state)->dst;
4205126258Smlaier		dst = &(*state)->src;
4206126258Smlaier	}
4207126258Smlaier
4208126258Smlaier	/* update states */
4209126258Smlaier	if (src->state < PFUDPS_SINGLE)
4210126258Smlaier		src->state = PFUDPS_SINGLE;
4211126258Smlaier	if (dst->state == PFUDPS_SINGLE)
4212126258Smlaier		dst->state = PFUDPS_MULTIPLE;
4213126258Smlaier
4214126258Smlaier	/* update expire time */
4215240233Sglebius	(*state)->expire = time_uptime;
4216126258Smlaier	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4217126258Smlaier		(*state)->timeout = PFTM_UDP_MULTIPLE;
4218126258Smlaier	else
4219126258Smlaier		(*state)->timeout = PFTM_UDP_SINGLE;
4220126258Smlaier
4221126258Smlaier	/* translate source/destination address, if necessary */
4222223637Sbz	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4223223637Sbz		struct pf_state_key *nk = (*state)->key[pd->didx];
4224223637Sbz
4225223637Sbz		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
4226223637Sbz		    nk->port[pd->sidx] != uh->uh_sport)
4227126258Smlaier			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4228223637Sbz			    &uh->uh_sum, &nk->addr[pd->sidx],
4229223637Sbz			    nk->port[pd->sidx], 1, pd->af);
4230223637Sbz
4231223637Sbz		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
4232223637Sbz		    nk->port[pd->didx] != uh->uh_dport)
4233126258Smlaier			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4234223637Sbz			    &uh->uh_sum, &nk->addr[pd->didx],
4235223637Sbz			    nk->port[pd->didx], 1, pd->af);
4236126261Smlaier		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4237126258Smlaier	}
4238126258Smlaier
4239126258Smlaier	return (PF_PASS);
4240126258Smlaier}
4241126258Smlaier
4242240233Sglebiusstatic int
4243130613Smlaierpf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4244145836Smlaier    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4245126258Smlaier{
4246223637Sbz	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
4247223637Sbz	u_int16_t	 icmpid = 0, *icmpsum;
4248223637Sbz	u_int8_t	 icmptype;
4249130613Smlaier	int		 state_icmp = 0;
4250223637Sbz	struct pf_state_key_cmp key;
4251126258Smlaier
4252240233Sglebius	bzero(&key, sizeof(key));
4253126258Smlaier	switch (pd->proto) {
4254126258Smlaier#ifdef INET
4255126258Smlaier	case IPPROTO_ICMP:
4256126258Smlaier		icmptype = pd->hdr.icmp->icmp_type;
4257126258Smlaier		icmpid = pd->hdr.icmp->icmp_id;
4258126258Smlaier		icmpsum = &pd->hdr.icmp->icmp_cksum;
4259126258Smlaier
4260126258Smlaier		if (icmptype == ICMP_UNREACH ||
4261126258Smlaier		    icmptype == ICMP_SOURCEQUENCH ||
4262126258Smlaier		    icmptype == ICMP_REDIRECT ||
4263126258Smlaier		    icmptype == ICMP_TIMXCEED ||
4264126258Smlaier		    icmptype == ICMP_PARAMPROB)
4265126258Smlaier			state_icmp++;
4266126258Smlaier		break;
4267126258Smlaier#endif /* INET */
4268126258Smlaier#ifdef INET6
4269126258Smlaier	case IPPROTO_ICMPV6:
4270126258Smlaier		icmptype = pd->hdr.icmp6->icmp6_type;
4271126258Smlaier		icmpid = pd->hdr.icmp6->icmp6_id;
4272126258Smlaier		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4273126258Smlaier
4274126258Smlaier		if (icmptype == ICMP6_DST_UNREACH ||
4275126258Smlaier		    icmptype == ICMP6_PACKET_TOO_BIG ||
4276126258Smlaier		    icmptype == ICMP6_TIME_EXCEEDED ||
4277126258Smlaier		    icmptype == ICMP6_PARAM_PROB)
4278126258Smlaier			state_icmp++;
4279126258Smlaier		break;
4280126258Smlaier#endif /* INET6 */
4281126258Smlaier	}
4282126258Smlaier
4283126258Smlaier	if (!state_icmp) {
4284126258Smlaier
4285126258Smlaier		/*
4286126258Smlaier		 * ICMP query/reply message not related to a TCP/UDP packet.
4287126258Smlaier		 * Search for an ICMP state.
4288126258Smlaier		 */
4289126258Smlaier		key.af = pd->af;
4290126258Smlaier		key.proto = pd->proto;
4291223637Sbz		key.port[0] = key.port[1] = icmpid;
4292223637Sbz		if (direction == PF_IN)	{	/* wire side, straight */
4293223637Sbz			PF_ACPY(&key.addr[0], pd->src, key.af);
4294223637Sbz			PF_ACPY(&key.addr[1], pd->dst, key.af);
4295223637Sbz		} else {			/* stack side, reverse */
4296223637Sbz			PF_ACPY(&key.addr[1], pd->src, key.af);
4297223637Sbz			PF_ACPY(&key.addr[0], pd->dst, key.af);
4298130613Smlaier		}
4299126258Smlaier
4300240233Sglebius		STATE_LOOKUP(kif, &key, direction, *state, pd);
4301126258Smlaier
4302240233Sglebius		(*state)->expire = time_uptime;
4303126258Smlaier		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4304126258Smlaier
4305126258Smlaier		/* translate source/destination address, if necessary */
4306223637Sbz		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4307223637Sbz			struct pf_state_key *nk = (*state)->key[pd->didx];
4308223637Sbz
4309223637Sbz			switch (pd->af) {
4310126258Smlaier#ifdef INET
4311223637Sbz			case AF_INET:
4312223637Sbz				if (PF_ANEQ(pd->src,
4313223637Sbz				    &nk->addr[pd->sidx], AF_INET))
4314126258Smlaier					pf_change_a(&saddr->v4.s_addr,
4315126258Smlaier					    pd->ip_sum,
4316223637Sbz					    nk->addr[pd->sidx].v4.s_addr, 0);
4317223637Sbz
4318223637Sbz				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
4319223637Sbz				    AF_INET))
4320223637Sbz					pf_change_a(&daddr->v4.s_addr,
4321223637Sbz					    pd->ip_sum,
4322223637Sbz					    nk->addr[pd->didx].v4.s_addr, 0);
4323223637Sbz
4324223637Sbz				if (nk->port[0] !=
4325223637Sbz				    pd->hdr.icmp->icmp_id) {
4326149884Smlaier					pd->hdr.icmp->icmp_cksum =
4327149884Smlaier					    pf_cksum_fixup(
4328149884Smlaier					    pd->hdr.icmp->icmp_cksum, icmpid,
4329223637Sbz					    nk->port[pd->sidx], 0);
4330149884Smlaier					pd->hdr.icmp->icmp_id =
4331223637Sbz					    nk->port[pd->sidx];
4332223637Sbz				}
4333223637Sbz
4334223637Sbz				m_copyback(m, off, ICMP_MINLEN,
4335240233Sglebius				    (caddr_t )pd->hdr.icmp);
4336223637Sbz				break;
4337126258Smlaier#endif /* INET */
4338126258Smlaier#ifdef INET6
4339223637Sbz			case AF_INET6:
4340223637Sbz				if (PF_ANEQ(pd->src,
4341223637Sbz				    &nk->addr[pd->sidx], AF_INET6))
4342126258Smlaier					pf_change_a6(saddr,
4343126258Smlaier					    &pd->hdr.icmp6->icmp6_cksum,
4344223637Sbz					    &nk->addr[pd->sidx], 0);
4345223637Sbz
4346223637Sbz				if (PF_ANEQ(pd->dst,
4347223637Sbz				    &nk->addr[pd->didx], AF_INET6))
4348126258Smlaier					pf_change_a6(daddr,
4349126258Smlaier					    &pd->hdr.icmp6->icmp6_cksum,
4350223637Sbz					    &nk->addr[pd->didx], 0);
4351223637Sbz
4352240233Sglebius				m_copyback(m, off, sizeof(struct icmp6_hdr),
4353240233Sglebius				    (caddr_t )pd->hdr.icmp6);
4354223637Sbz				break;
4355126258Smlaier#endif /* INET6 */
4356126258Smlaier			}
4357126258Smlaier		}
4358126258Smlaier		return (PF_PASS);
4359126258Smlaier
4360126258Smlaier	} else {
4361126258Smlaier		/*
4362126258Smlaier		 * ICMP error message in response to a TCP/UDP packet.
4363126258Smlaier		 * Extract the inner TCP/UDP header and search for that state.
4364126258Smlaier		 */
4365126258Smlaier
4366126258Smlaier		struct pf_pdesc	pd2;
4367223637Sbz		bzero(&pd2, sizeof pd2);
4368126258Smlaier#ifdef INET
4369126258Smlaier		struct ip	h2;
4370126258Smlaier#endif /* INET */
4371126258Smlaier#ifdef INET6
4372126258Smlaier		struct ip6_hdr	h2_6;
4373126258Smlaier		int		terminal = 0;
4374126258Smlaier#endif /* INET6 */
4375223637Sbz		int		ipoff2 = 0;
4376223637Sbz		int		off2 = 0;
4377126258Smlaier
4378126258Smlaier		pd2.af = pd->af;
4379223637Sbz		/* Payload packet is from the opposite direction. */
4380223637Sbz		pd2.sidx = (direction == PF_IN) ? 1 : 0;
4381223637Sbz		pd2.didx = (direction == PF_IN) ? 0 : 1;
4382126258Smlaier		switch (pd->af) {
4383126258Smlaier#ifdef INET
4384126258Smlaier		case AF_INET:
4385126258Smlaier			/* offset of h2 in mbuf chain */
4386126258Smlaier			ipoff2 = off + ICMP_MINLEN;
4387126258Smlaier
4388126258Smlaier			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4389145836Smlaier			    NULL, reason, pd2.af)) {
4390126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4391126258Smlaier				    ("pf: ICMP error message too short "
4392126258Smlaier				    "(ip)\n"));
4393126258Smlaier				return (PF_DROP);
4394126258Smlaier			}
4395126258Smlaier			/*
4396126258Smlaier			 * ICMP error messages don't refer to non-first
4397126258Smlaier			 * fragments
4398126258Smlaier			 */
4399145836Smlaier			if (h2.ip_off & htons(IP_OFFMASK)) {
4400145836Smlaier				REASON_SET(reason, PFRES_FRAG);
4401126258Smlaier				return (PF_DROP);
4402145836Smlaier			}
4403126258Smlaier
4404126258Smlaier			/* offset of protocol header that follows h2 */
4405126258Smlaier			off2 = ipoff2 + (h2.ip_hl << 2);
4406126258Smlaier
4407126258Smlaier			pd2.proto = h2.ip_p;
4408126258Smlaier			pd2.src = (struct pf_addr *)&h2.ip_src;
4409126258Smlaier			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4410126258Smlaier			pd2.ip_sum = &h2.ip_sum;
4411126258Smlaier			break;
4412126258Smlaier#endif /* INET */
4413126258Smlaier#ifdef INET6
4414126258Smlaier		case AF_INET6:
4415126258Smlaier			ipoff2 = off + sizeof(struct icmp6_hdr);
4416126258Smlaier
4417126258Smlaier			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4418145836Smlaier			    NULL, reason, pd2.af)) {
4419126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4420126258Smlaier				    ("pf: ICMP error message too short "
4421126258Smlaier				    "(ip6)\n"));
4422126258Smlaier				return (PF_DROP);
4423126258Smlaier			}
4424126258Smlaier			pd2.proto = h2_6.ip6_nxt;
4425126258Smlaier			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4426126258Smlaier			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4427126258Smlaier			pd2.ip_sum = NULL;
4428126258Smlaier			off2 = ipoff2 + sizeof(h2_6);
4429126258Smlaier			do {
4430126258Smlaier				switch (pd2.proto) {
4431126258Smlaier				case IPPROTO_FRAGMENT:
4432126258Smlaier					/*
4433126258Smlaier					 * ICMPv6 error messages for
4434126258Smlaier					 * non-first fragments
4435126258Smlaier					 */
4436145836Smlaier					REASON_SET(reason, PFRES_FRAG);
4437126258Smlaier					return (PF_DROP);
4438126258Smlaier				case IPPROTO_AH:
4439126258Smlaier				case IPPROTO_HOPOPTS:
4440126258Smlaier				case IPPROTO_ROUTING:
4441126258Smlaier				case IPPROTO_DSTOPTS: {
4442126258Smlaier					/* get next header and header length */
4443126258Smlaier					struct ip6_ext opt6;
4444126258Smlaier
4445126258Smlaier					if (!pf_pull_hdr(m, off2, &opt6,
4446145836Smlaier					    sizeof(opt6), NULL, reason,
4447145836Smlaier					    pd2.af)) {
4448126258Smlaier						DPFPRINTF(PF_DEBUG_MISC,
4449126258Smlaier						    ("pf: ICMPv6 short opt\n"));
4450126258Smlaier						return (PF_DROP);
4451126258Smlaier					}
4452126258Smlaier					if (pd2.proto == IPPROTO_AH)
4453126258Smlaier						off2 += (opt6.ip6e_len + 2) * 4;
4454126258Smlaier					else
4455126258Smlaier						off2 += (opt6.ip6e_len + 1) * 8;
4456126258Smlaier					pd2.proto = opt6.ip6e_nxt;
4457126258Smlaier					/* goto the next header */
4458126258Smlaier					break;
4459126258Smlaier				}
4460126258Smlaier				default:
4461126258Smlaier					terminal++;
4462126258Smlaier					break;
4463126258Smlaier				}
4464126258Smlaier			} while (!terminal);
4465126258Smlaier			break;
4466126258Smlaier#endif /* INET6 */
4467126258Smlaier		}
4468126258Smlaier
4469126258Smlaier		switch (pd2.proto) {
4470126258Smlaier		case IPPROTO_TCP: {
4471126258Smlaier			struct tcphdr		 th;
4472126258Smlaier			u_int32_t		 seq;
4473126258Smlaier			struct pf_state_peer	*src, *dst;
4474126258Smlaier			u_int8_t		 dws;
4475128129Smlaier			int			 copyback = 0;
4476126258Smlaier
4477126258Smlaier			/*
4478126258Smlaier			 * Only the first 8 bytes of the TCP header can be
4479126258Smlaier			 * expected. Don't access any TCP header fields after
4480126258Smlaier			 * th_seq, an ackskew test is not possible.
4481126258Smlaier			 */
4482145836Smlaier			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
4483145836Smlaier			    pd2.af)) {
4484126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4485126258Smlaier				    ("pf: ICMP error message too short "
4486126258Smlaier				    "(tcp)\n"));
4487126258Smlaier				return (PF_DROP);
4488126258Smlaier			}
4489126258Smlaier
4490126258Smlaier			key.af = pd2.af;
4491126258Smlaier			key.proto = IPPROTO_TCP;
4492223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4493223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4494223637Sbz			key.port[pd2.sidx] = th.th_sport;
4495223637Sbz			key.port[pd2.didx] = th.th_dport;
4496126258Smlaier
4497240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4498126258Smlaier
4499126258Smlaier			if (direction == (*state)->direction) {
4500126258Smlaier				src = &(*state)->dst;
4501126258Smlaier				dst = &(*state)->src;
4502126258Smlaier			} else {
4503126258Smlaier				src = &(*state)->src;
4504126258Smlaier				dst = &(*state)->dst;
4505126258Smlaier			}
4506126258Smlaier
4507171929Sdhartmei			if (src->wscale && dst->wscale)
4508126258Smlaier				dws = dst->wscale & PF_WSCALE_MASK;
4509126258Smlaier			else
4510126258Smlaier				dws = 0;
4511126258Smlaier
4512126258Smlaier			/* Demodulate sequence number */
4513126258Smlaier			seq = ntohl(th.th_seq) - src->seqdiff;
4514128129Smlaier			if (src->seqdiff) {
4515128129Smlaier				pf_change_a(&th.th_seq, icmpsum,
4516126258Smlaier				    htonl(seq), 0);
4517128129Smlaier				copyback = 1;
4518128129Smlaier			}
4519126258Smlaier
4520200930Sdelphij			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
4521200930Sdelphij			    (!SEQ_GEQ(src->seqhi, seq) ||
4522200930Sdelphij			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
4523223637Sbz				if (V_pf_status.debug >= PF_DEBUG_MISC) {
4524126258Smlaier					printf("pf: BAD ICMP %d:%d ",
4525126258Smlaier					    icmptype, pd->hdr.icmp->icmp_code);
4526126258Smlaier					pf_print_host(pd->src, 0, pd->af);
4527126258Smlaier					printf(" -> ");
4528126258Smlaier					pf_print_host(pd->dst, 0, pd->af);
4529126258Smlaier					printf(" state: ");
4530126258Smlaier					pf_print_state(*state);
4531126258Smlaier					printf(" seq=%u\n", seq);
4532126258Smlaier				}
4533145836Smlaier				REASON_SET(reason, PFRES_BADSTATE);
4534126258Smlaier				return (PF_DROP);
4535223637Sbz			} else {
4536223637Sbz				if (V_pf_status.debug >= PF_DEBUG_MISC) {
4537223637Sbz					printf("pf: OK ICMP %d:%d ",
4538223637Sbz					    icmptype, pd->hdr.icmp->icmp_code);
4539223637Sbz					pf_print_host(pd->src, 0, pd->af);
4540223637Sbz					printf(" -> ");
4541223637Sbz					pf_print_host(pd->dst, 0, pd->af);
4542223637Sbz					printf(" state: ");
4543223637Sbz					pf_print_state(*state);
4544223637Sbz					printf(" seq=%u\n", seq);
4545223637Sbz				}
4546126258Smlaier			}
4547126258Smlaier
4548223637Sbz			/* translate source/destination address, if necessary */
4549223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4550223637Sbz			    (*state)->key[PF_SK_STACK]) {
4551223637Sbz				struct pf_state_key *nk =
4552223637Sbz				    (*state)->key[pd->didx];
4553223637Sbz
4554223637Sbz				if (PF_ANEQ(pd2.src,
4555223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4556223637Sbz				    nk->port[pd2.sidx] != th.th_sport)
4557126258Smlaier					pf_change_icmp(pd2.src, &th.th_sport,
4558223637Sbz					    daddr, &nk->addr[pd2.sidx],
4559223637Sbz					    nk->port[pd2.sidx], NULL,
4560126258Smlaier					    pd2.ip_sum, icmpsum,
4561126258Smlaier					    pd->ip_sum, 0, pd2.af);
4562223637Sbz
4563223637Sbz				if (PF_ANEQ(pd2.dst,
4564223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4565223637Sbz				    nk->port[pd2.didx] != th.th_dport)
4566126258Smlaier					pf_change_icmp(pd2.dst, &th.th_dport,
4567223637Sbz					    NULL, /* XXX Inbound NAT? */
4568223637Sbz					    &nk->addr[pd2.didx],
4569223637Sbz					    nk->port[pd2.didx], NULL,
4570126258Smlaier					    pd2.ip_sum, icmpsum,
4571126258Smlaier					    pd->ip_sum, 0, pd2.af);
4572128129Smlaier				copyback = 1;
4573128129Smlaier			}
4574128129Smlaier
4575128129Smlaier			if (copyback) {
4576126258Smlaier				switch (pd2.af) {
4577126258Smlaier#ifdef INET
4578126258Smlaier				case AF_INET:
4579126258Smlaier					m_copyback(m, off, ICMP_MINLEN,
4580240233Sglebius					    (caddr_t )pd->hdr.icmp);
4581126258Smlaier					m_copyback(m, ipoff2, sizeof(h2),
4582240233Sglebius					    (caddr_t )&h2);
4583126258Smlaier					break;
4584126258Smlaier#endif /* INET */
4585126258Smlaier#ifdef INET6
4586126258Smlaier				case AF_INET6:
4587126258Smlaier					m_copyback(m, off,
4588126258Smlaier					    sizeof(struct icmp6_hdr),
4589240233Sglebius					    (caddr_t )pd->hdr.icmp6);
4590126258Smlaier					m_copyback(m, ipoff2, sizeof(h2_6),
4591240233Sglebius					    (caddr_t )&h2_6);
4592126258Smlaier					break;
4593126258Smlaier#endif /* INET6 */
4594126258Smlaier				}
4595126261Smlaier				m_copyback(m, off2, 8, (caddr_t)&th);
4596126258Smlaier			}
4597126258Smlaier
4598126258Smlaier			return (PF_PASS);
4599126258Smlaier			break;
4600126258Smlaier		}
4601126258Smlaier		case IPPROTO_UDP: {
4602126258Smlaier			struct udphdr		uh;
4603126258Smlaier
4604126258Smlaier			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4605145836Smlaier			    NULL, reason, pd2.af)) {
4606126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4607126258Smlaier				    ("pf: ICMP error message too short "
4608126258Smlaier				    "(udp)\n"));
4609126258Smlaier				return (PF_DROP);
4610126258Smlaier			}
4611126258Smlaier
4612126258Smlaier			key.af = pd2.af;
4613126258Smlaier			key.proto = IPPROTO_UDP;
4614223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4615223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4616223637Sbz			key.port[pd2.sidx] = uh.uh_sport;
4617223637Sbz			key.port[pd2.didx] = uh.uh_dport;
4618126258Smlaier
4619240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4620126258Smlaier
4621223637Sbz			/* translate source/destination address, if necessary */
4622223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4623223637Sbz			    (*state)->key[PF_SK_STACK]) {
4624223637Sbz				struct pf_state_key *nk =
4625223637Sbz				    (*state)->key[pd->didx];
4626223637Sbz
4627223637Sbz				if (PF_ANEQ(pd2.src,
4628223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4629223637Sbz				    nk->port[pd2.sidx] != uh.uh_sport)
4630126258Smlaier					pf_change_icmp(pd2.src, &uh.uh_sport,
4631223637Sbz					    daddr, &nk->addr[pd2.sidx],
4632223637Sbz					    nk->port[pd2.sidx], &uh.uh_sum,
4633126258Smlaier					    pd2.ip_sum, icmpsum,
4634126258Smlaier					    pd->ip_sum, 1, pd2.af);
4635223637Sbz
4636223637Sbz				if (PF_ANEQ(pd2.dst,
4637223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4638223637Sbz				    nk->port[pd2.didx] != uh.uh_dport)
4639126258Smlaier					pf_change_icmp(pd2.dst, &uh.uh_dport,
4640223637Sbz					    NULL, /* XXX Inbound NAT? */
4641223637Sbz					    &nk->addr[pd2.didx],
4642223637Sbz					    nk->port[pd2.didx], &uh.uh_sum,
4643126258Smlaier					    pd2.ip_sum, icmpsum,
4644126258Smlaier					    pd->ip_sum, 1, pd2.af);
4645223637Sbz
4646126258Smlaier				switch (pd2.af) {
4647126258Smlaier#ifdef INET
4648126258Smlaier				case AF_INET:
4649126258Smlaier					m_copyback(m, off, ICMP_MINLEN,
4650240233Sglebius					    (caddr_t )pd->hdr.icmp);
4651223637Sbz					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4652126258Smlaier					break;
4653126258Smlaier#endif /* INET */
4654126258Smlaier#ifdef INET6
4655126258Smlaier				case AF_INET6:
4656126258Smlaier					m_copyback(m, off,
4657126258Smlaier					    sizeof(struct icmp6_hdr),
4658240233Sglebius					    (caddr_t )pd->hdr.icmp6);
4659126258Smlaier					m_copyback(m, ipoff2, sizeof(h2_6),
4660240233Sglebius					    (caddr_t )&h2_6);
4661126258Smlaier					break;
4662126258Smlaier#endif /* INET6 */
4663126258Smlaier				}
4664223637Sbz				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
4665126258Smlaier			}
4666126258Smlaier			return (PF_PASS);
4667126258Smlaier			break;
4668126258Smlaier		}
4669126258Smlaier#ifdef INET
4670126258Smlaier		case IPPROTO_ICMP: {
4671126258Smlaier			struct icmp		iih;
4672126258Smlaier
4673126258Smlaier			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4674145836Smlaier			    NULL, reason, pd2.af)) {
4675126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4676126258Smlaier				    ("pf: ICMP error message too short i"
4677126258Smlaier				    "(icmp)\n"));
4678126258Smlaier				return (PF_DROP);
4679126258Smlaier			}
4680126258Smlaier
4681126258Smlaier			key.af = pd2.af;
4682126258Smlaier			key.proto = IPPROTO_ICMP;
4683223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4684223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4685223637Sbz			key.port[0] = key.port[1] = iih.icmp_id;
4686126258Smlaier
4687240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4688126258Smlaier
4689223637Sbz			/* translate source/destination address, if necessary */
4690223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4691223637Sbz			    (*state)->key[PF_SK_STACK]) {
4692223637Sbz				struct pf_state_key *nk =
4693223637Sbz				    (*state)->key[pd->didx];
4694223637Sbz
4695223637Sbz				if (PF_ANEQ(pd2.src,
4696223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4697223637Sbz				    nk->port[pd2.sidx] != iih.icmp_id)
4698126258Smlaier					pf_change_icmp(pd2.src, &iih.icmp_id,
4699223637Sbz					    daddr, &nk->addr[pd2.sidx],
4700223637Sbz					    nk->port[pd2.sidx], NULL,
4701126258Smlaier					    pd2.ip_sum, icmpsum,
4702126258Smlaier					    pd->ip_sum, 0, AF_INET);
4703223637Sbz
4704223637Sbz				if (PF_ANEQ(pd2.dst,
4705223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4706223637Sbz				    nk->port[pd2.didx] != iih.icmp_id)
4707126258Smlaier					pf_change_icmp(pd2.dst, &iih.icmp_id,
4708223637Sbz					    NULL, /* XXX Inbound NAT? */
4709223637Sbz					    &nk->addr[pd2.didx],
4710223637Sbz					    nk->port[pd2.didx], NULL,
4711126258Smlaier					    pd2.ip_sum, icmpsum,
4712126258Smlaier					    pd->ip_sum, 0, AF_INET);
4713223637Sbz
4714223637Sbz				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
4715223637Sbz				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4716223637Sbz				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
4717126258Smlaier			}
4718126258Smlaier			return (PF_PASS);
4719126258Smlaier			break;
4720126258Smlaier		}
4721126258Smlaier#endif /* INET */
4722126258Smlaier#ifdef INET6
4723126258Smlaier		case IPPROTO_ICMPV6: {
4724126258Smlaier			struct icmp6_hdr	iih;
4725126258Smlaier
4726126258Smlaier			if (!pf_pull_hdr(m, off2, &iih,
4727145836Smlaier			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
4728126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
4729126258Smlaier				    ("pf: ICMP error message too short "
4730126258Smlaier				    "(icmp6)\n"));
4731126258Smlaier				return (PF_DROP);
4732126258Smlaier			}
4733126258Smlaier
4734126258Smlaier			key.af = pd2.af;
4735126258Smlaier			key.proto = IPPROTO_ICMPV6;
4736223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4737223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4738223637Sbz			key.port[0] = key.port[1] = iih.icmp6_id;
4739126258Smlaier
4740240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4741126258Smlaier
4742223637Sbz			/* translate source/destination address, if necessary */
4743223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4744223637Sbz			    (*state)->key[PF_SK_STACK]) {
4745223637Sbz				struct pf_state_key *nk =
4746223637Sbz				    (*state)->key[pd->didx];
4747223637Sbz
4748223637Sbz				if (PF_ANEQ(pd2.src,
4749223637Sbz				    &nk->addr[pd2.sidx], pd2.af) ||
4750223637Sbz				    nk->port[pd2.sidx] != iih.icmp6_id)
4751126258Smlaier					pf_change_icmp(pd2.src, &iih.icmp6_id,
4752223637Sbz					    daddr, &nk->addr[pd2.sidx],
4753223637Sbz					    nk->port[pd2.sidx], NULL,
4754126258Smlaier					    pd2.ip_sum, icmpsum,
4755126258Smlaier					    pd->ip_sum, 0, AF_INET6);
4756223637Sbz
4757223637Sbz				if (PF_ANEQ(pd2.dst,
4758223637Sbz				    &nk->addr[pd2.didx], pd2.af) ||
4759223637Sbz				    nk->port[pd2.didx] != iih.icmp6_id)
4760126258Smlaier					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4761223637Sbz					    NULL, /* XXX Inbound NAT? */
4762223637Sbz					    &nk->addr[pd2.didx],
4763223637Sbz					    nk->port[pd2.didx], NULL,
4764126258Smlaier					    pd2.ip_sum, icmpsum,
4765126258Smlaier					    pd->ip_sum, 0, AF_INET6);
4766223637Sbz
4767126258Smlaier				m_copyback(m, off, sizeof(struct icmp6_hdr),
4768126261Smlaier				    (caddr_t)pd->hdr.icmp6);
4769223637Sbz				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
4770126258Smlaier				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4771126261Smlaier				    (caddr_t)&iih);
4772126258Smlaier			}
4773126258Smlaier			return (PF_PASS);
4774126258Smlaier			break;
4775126258Smlaier		}
4776126258Smlaier#endif /* INET6 */
4777126258Smlaier		default: {
4778126258Smlaier			key.af = pd2.af;
4779126258Smlaier			key.proto = pd2.proto;
4780223637Sbz			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
4781223637Sbz			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
4782223637Sbz			key.port[0] = key.port[1] = 0;
4783126258Smlaier
4784240233Sglebius			STATE_LOOKUP(kif, &key, direction, *state, pd);
4785126258Smlaier
4786223637Sbz			/* translate source/destination address, if necessary */
4787223637Sbz			if ((*state)->key[PF_SK_WIRE] !=
4788223637Sbz			    (*state)->key[PF_SK_STACK]) {
4789223637Sbz				struct pf_state_key *nk =
4790223637Sbz				    (*state)->key[pd->didx];
4791223637Sbz
4792223637Sbz				if (PF_ANEQ(pd2.src,
4793223637Sbz				    &nk->addr[pd2.sidx], pd2.af))
4794223637Sbz					pf_change_icmp(pd2.src, NULL, daddr,
4795223637Sbz					    &nk->addr[pd2.sidx], 0, NULL,
4796126258Smlaier					    pd2.ip_sum, icmpsum,
4797126258Smlaier					    pd->ip_sum, 0, pd2.af);
4798223637Sbz
4799223637Sbz				if (PF_ANEQ(pd2.dst,
4800223637Sbz				    &nk->addr[pd2.didx], pd2.af))
4801223637Sbz					pf_change_icmp(pd2.src, NULL,
4802223637Sbz					    NULL, /* XXX Inbound NAT? */
4803223637Sbz					    &nk->addr[pd2.didx], 0, NULL,
4804126258Smlaier					    pd2.ip_sum, icmpsum,
4805126258Smlaier					    pd->ip_sum, 0, pd2.af);
4806223637Sbz
4807126258Smlaier				switch (pd2.af) {
4808126258Smlaier#ifdef INET
4809126258Smlaier				case AF_INET:
4810126258Smlaier					m_copyback(m, off, ICMP_MINLEN,
4811126261Smlaier					    (caddr_t)pd->hdr.icmp);
4812223637Sbz					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4813126258Smlaier					break;
4814126258Smlaier#endif /* INET */
4815126258Smlaier#ifdef INET6
4816126258Smlaier				case AF_INET6:
4817126258Smlaier					m_copyback(m, off,
4818126258Smlaier					    sizeof(struct icmp6_hdr),
4819240233Sglebius					    (caddr_t )pd->hdr.icmp6);
4820126258Smlaier					m_copyback(m, ipoff2, sizeof(h2_6),
4821240233Sglebius					    (caddr_t )&h2_6);
4822126258Smlaier					break;
4823126258Smlaier#endif /* INET6 */
4824126258Smlaier				}
4825126258Smlaier			}
4826126258Smlaier			return (PF_PASS);
4827126258Smlaier			break;
4828126258Smlaier		}
4829126258Smlaier		}
4830126258Smlaier	}
4831126258Smlaier}
4832126258Smlaier
4833240233Sglebiusstatic int
4834130613Smlaierpf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4835223637Sbz    struct mbuf *m, struct pf_pdesc *pd)
4836126258Smlaier{
4837126258Smlaier	struct pf_state_peer	*src, *dst;
4838223637Sbz	struct pf_state_key_cmp	 key;
4839126258Smlaier
4840240233Sglebius	bzero(&key, sizeof(key));
4841126258Smlaier	key.af = pd->af;
4842126258Smlaier	key.proto = pd->proto;
4843130613Smlaier	if (direction == PF_IN)	{
4844223637Sbz		PF_ACPY(&key.addr[0], pd->src, key.af);
4845223637Sbz		PF_ACPY(&key.addr[1], pd->dst, key.af);
4846223637Sbz		key.port[0] = key.port[1] = 0;
4847130613Smlaier	} else {
4848223637Sbz		PF_ACPY(&key.addr[1], pd->src, key.af);
4849223637Sbz		PF_ACPY(&key.addr[0], pd->dst, key.af);
4850223637Sbz		key.port[1] = key.port[0] = 0;
4851130613Smlaier	}
4852126258Smlaier
4853240233Sglebius	STATE_LOOKUP(kif, &key, direction, *state, pd);
4854126258Smlaier
4855126258Smlaier	if (direction == (*state)->direction) {
4856126258Smlaier		src = &(*state)->src;
4857126258Smlaier		dst = &(*state)->dst;
4858126258Smlaier	} else {
4859126258Smlaier		src = &(*state)->dst;
4860126258Smlaier		dst = &(*state)->src;
4861126258Smlaier	}
4862126258Smlaier
4863126258Smlaier	/* update states */
4864126258Smlaier	if (src->state < PFOTHERS_SINGLE)
4865126258Smlaier		src->state = PFOTHERS_SINGLE;
4866126258Smlaier	if (dst->state == PFOTHERS_SINGLE)
4867126258Smlaier		dst->state = PFOTHERS_MULTIPLE;
4868126258Smlaier
4869126258Smlaier	/* update expire time */
4870240233Sglebius	(*state)->expire = time_uptime;
4871126258Smlaier	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4872126258Smlaier		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4873126258Smlaier	else
4874126258Smlaier		(*state)->timeout = PFTM_OTHER_SINGLE;
4875126258Smlaier
4876126258Smlaier	/* translate source/destination address, if necessary */
4877223637Sbz	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
4878223637Sbz		struct pf_state_key *nk = (*state)->key[pd->didx];
4879223637Sbz
4880240233Sglebius		KASSERT(nk, ("%s: nk is null", __func__));
4881240233Sglebius		KASSERT(pd, ("%s: pd is null", __func__));
4882240233Sglebius		KASSERT(pd->src, ("%s: pd->src is null", __func__));
4883240233Sglebius		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
4884223637Sbz		switch (pd->af) {
4885126258Smlaier#ifdef INET
4886223637Sbz		case AF_INET:
4887223637Sbz			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
4888126258Smlaier				pf_change_a(&pd->src->v4.s_addr,
4889223637Sbz				    pd->ip_sum,
4890223637Sbz				    nk->addr[pd->sidx].v4.s_addr,
4891126258Smlaier				    0);
4892223637Sbz
4893223637Sbz
4894223637Sbz			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
4895126258Smlaier				pf_change_a(&pd->dst->v4.s_addr,
4896223637Sbz				    pd->ip_sum,
4897223637Sbz				    nk->addr[pd->didx].v4.s_addr,
4898126258Smlaier				    0);
4899223637Sbz
4900126258Smlaier				break;
4901126258Smlaier#endif /* INET */
4902126258Smlaier#ifdef INET6
4903223637Sbz		case AF_INET6:
4904223637Sbz			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
4905223637Sbz				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
4906223637Sbz
4907223637Sbz			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
4908223637Sbz				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
4909126258Smlaier#endif /* INET6 */
4910223637Sbz		}
4911126258Smlaier	}
4912126258Smlaier	return (PF_PASS);
4913126258Smlaier}
4914126258Smlaier
4915126258Smlaier/*
4916126258Smlaier * ipoff and off are measured from the start of the mbuf chain.
4917126258Smlaier * h must be at "ipoff" on the mbuf chain.
4918126258Smlaier */
4919126258Smlaiervoid *
4920126258Smlaierpf_pull_hdr(struct mbuf *m, int off, void *p, int len,
4921126258Smlaier    u_short *actionp, u_short *reasonp, sa_family_t af)
4922126258Smlaier{
4923126258Smlaier	switch (af) {
4924126258Smlaier#ifdef INET
4925126258Smlaier	case AF_INET: {
4926126258Smlaier		struct ip	*h = mtod(m, struct ip *);
4927126258Smlaier		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
4928126258Smlaier
4929126258Smlaier		if (fragoff) {
4930126258Smlaier			if (fragoff >= len)
4931126258Smlaier				ACTION_SET(actionp, PF_PASS);
4932126258Smlaier			else {
4933126258Smlaier				ACTION_SET(actionp, PF_DROP);
4934126258Smlaier				REASON_SET(reasonp, PFRES_FRAG);
4935126258Smlaier			}
4936126258Smlaier			return (NULL);
4937126258Smlaier		}
4938130613Smlaier		if (m->m_pkthdr.len < off + len ||
4939130613Smlaier		    ntohs(h->ip_len) < off + len) {
4940126258Smlaier			ACTION_SET(actionp, PF_DROP);
4941126258Smlaier			REASON_SET(reasonp, PFRES_SHORT);
4942126258Smlaier			return (NULL);
4943126258Smlaier		}
4944126258Smlaier		break;
4945126258Smlaier	}
4946126258Smlaier#endif /* INET */
4947126258Smlaier#ifdef INET6
4948126258Smlaier	case AF_INET6: {
4949126258Smlaier		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
4950126258Smlaier
4951126258Smlaier		if (m->m_pkthdr.len < off + len ||
4952126258Smlaier		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
4953126258Smlaier		    (unsigned)(off + len)) {
4954126258Smlaier			ACTION_SET(actionp, PF_DROP);
4955126258Smlaier			REASON_SET(reasonp, PFRES_SHORT);
4956126258Smlaier			return (NULL);
4957126258Smlaier		}
4958126258Smlaier		break;
4959126258Smlaier	}
4960126258Smlaier#endif /* INET6 */
4961126258Smlaier	}
4962126258Smlaier	m_copydata(m, off, len, p);
4963126258Smlaier	return (p);
4964126258Smlaier}
4965126258Smlaier
4966126258Smlaierint
4967231852Sbzpf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
4968231852Sbz    int rtableid)
4969126258Smlaier{
4970223637Sbz#ifdef RADIX_MPATH
4971223637Sbz	struct radix_node_head	*rnh;
4972223637Sbz#endif
4973126258Smlaier	struct sockaddr_in	*dst;
4974171168Smlaier	int			 ret = 1;
4975171168Smlaier	int			 check_mpath;
4976145836Smlaier#ifdef INET6
4977145836Smlaier	struct sockaddr_in6	*dst6;
4978145836Smlaier	struct route_in6	 ro;
4979145836Smlaier#else
4980126258Smlaier	struct route		 ro;
4981145836Smlaier#endif
4982171168Smlaier	struct radix_node	*rn;
4983171168Smlaier	struct rtentry		*rt;
4984171168Smlaier	struct ifnet		*ifp;
4985126258Smlaier
4986171168Smlaier	check_mpath = 0;
4987223637Sbz#ifdef RADIX_MPATH
4988223637Sbz	/* XXX: stick to table 0 for now */
4989223637Sbz	rnh = rt_tables_get_rnh(0, af);
4990223637Sbz	if (rnh != NULL && rn_mpath_capable(rnh))
4991223637Sbz		check_mpath = 1;
4992223637Sbz#endif
4993126258Smlaier	bzero(&ro, sizeof(ro));
4994145836Smlaier	switch (af) {
4995145836Smlaier	case AF_INET:
4996145836Smlaier		dst = satosin(&ro.ro_dst);
4997145836Smlaier		dst->sin_family = AF_INET;
4998145836Smlaier		dst->sin_len = sizeof(*dst);
4999145836Smlaier		dst->sin_addr = addr->v4;
5000145836Smlaier		break;
5001145836Smlaier#ifdef INET6
5002145836Smlaier	case AF_INET6:
5003223637Sbz		/*
5004223637Sbz		 * Skip check for addresses with embedded interface scope,
5005223637Sbz		 * as they would always match anyway.
5006223637Sbz		 */
5007223637Sbz		if (IN6_IS_SCOPE_EMBED(&addr->v6))
5008223637Sbz			goto out;
5009145836Smlaier		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5010145836Smlaier		dst6->sin6_family = AF_INET6;
5011145836Smlaier		dst6->sin6_len = sizeof(*dst6);
5012145836Smlaier		dst6->sin6_addr = addr->v6;
5013145836Smlaier		break;
5014145836Smlaier#endif /* INET6 */
5015145836Smlaier	default:
5016145836Smlaier		return (0);
5017145836Smlaier	}
5018145836Smlaier
5019171168Smlaier	/* Skip checks for ipsec interfaces */
5020171168Smlaier	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5021171168Smlaier		goto out;
5022171168Smlaier
5023231852Sbz	switch (af) {
5024231852Sbz#ifdef INET6
5025231852Sbz	case AF_INET6:
5026231852Sbz		in6_rtalloc_ign(&ro, 0, rtableid);
5027231852Sbz		break;
5028231852Sbz#endif
5029222529Sbz#ifdef INET
5030231852Sbz	case AF_INET:
5031231852Sbz		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
5032231852Sbz		break;
5033222529Sbz#endif
5034231852Sbz	default:
5035231852Sbz		rtalloc_ign((struct route *)&ro, 0);	/* No/default FIB. */
5036231852Sbz		break;
5037231852Sbz	}
5038126258Smlaier
5039126258Smlaier	if (ro.ro_rt != NULL) {
5040171168Smlaier		/* No interface given, this is a no-route check */
5041171168Smlaier		if (kif == NULL)
5042171168Smlaier			goto out;
5043171168Smlaier
5044171168Smlaier		if (kif->pfik_ifp == NULL) {
5045171168Smlaier			ret = 0;
5046171168Smlaier			goto out;
5047171168Smlaier		}
5048171168Smlaier
5049171168Smlaier		/* Perform uRPF check if passed input interface */
5050171168Smlaier		ret = 0;
5051171168Smlaier		rn = (struct radix_node *)ro.ro_rt;
5052171168Smlaier		do {
5053171168Smlaier			rt = (struct rtentry *)rn;
5054240233Sglebius			ifp = rt->rt_ifp;
5055171168Smlaier
5056171168Smlaier			if (kif->pfik_ifp == ifp)
5057171168Smlaier				ret = 1;
5058223637Sbz#ifdef RADIX_MPATH
5059171168Smlaier			rn = rn_mpath_next(rn);
5060171168Smlaier#endif
5061171168Smlaier		} while (check_mpath == 1 && rn != NULL && ret == 0);
5062171168Smlaier	} else
5063171168Smlaier		ret = 0;
5064171168Smlaierout:
5065171168Smlaier	if (ro.ro_rt != NULL)
5066126258Smlaier		RTFREE(ro.ro_rt);
5067171168Smlaier	return (ret);
5068145836Smlaier}
5069145836Smlaier
5070222529Sbz#ifdef INET
5071240233Sglebiusstatic void
5072126258Smlaierpf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5073171168Smlaier    struct pf_state *s, struct pf_pdesc *pd)
5074126258Smlaier{
5075126258Smlaier	struct mbuf		*m0, *m1;
5076240233Sglebius	struct sockaddr_in	dst;
5077126258Smlaier	struct ip		*ip;
5078126258Smlaier	struct ifnet		*ifp = NULL;
5079126258Smlaier	struct pf_addr		 naddr;
5080130613Smlaier	struct pf_src_node	*sn = NULL;
5081126258Smlaier	int			 error = 0;
5082126261Smlaier	int sw_csum;
5083126258Smlaier
5084240233Sglebius	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
5085240233Sglebius	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
5086240233Sglebius	    __func__));
5087126258Smlaier
5088240233Sglebius	if ((pd->pf_mtag == NULL &&
5089240233Sglebius	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
5090240233Sglebius	    pd->pf_mtag->routed++ > 3) {
5091171168Smlaier		m0 = *m;
5092171168Smlaier		*m = NULL;
5093240233Sglebius		goto bad_locked;
5094132303Smlaier	}
5095132303Smlaier
5096126258Smlaier	if (r->rt == PF_DUPTO) {
5097240233Sglebius		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
5098240233Sglebius			if (s)
5099240233Sglebius				PF_STATE_UNLOCK(s);
5100126258Smlaier			return;
5101240233Sglebius		}
5102126258Smlaier	} else {
5103240233Sglebius		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
5104240233Sglebius			if (s)
5105240233Sglebius				PF_STATE_UNLOCK(s);
5106126258Smlaier			return;
5107240233Sglebius		}
5108126258Smlaier		m0 = *m;
5109126258Smlaier	}
5110126258Smlaier
5111126258Smlaier	ip = mtod(m0, struct ip *);
5112126258Smlaier
5113240233Sglebius	bzero(&dst, sizeof(dst));
5114240233Sglebius	dst.sin_family = AF_INET;
5115240233Sglebius	dst.sin_len = sizeof(dst);
5116240233Sglebius	dst.sin_addr = ip->ip_dst;
5117126258Smlaier
5118126258Smlaier	if (r->rt == PF_FASTROUTE) {
5119240233Sglebius		struct rtentry *rt;
5120240233Sglebius
5121240233Sglebius		if (s)
5122240233Sglebius			PF_STATE_UNLOCK(s);
5123240233Sglebius		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
5124240233Sglebius		if (rt == NULL) {
5125240233Sglebius			RTFREE_LOCKED(rt);
5126196039Srwatson			KMOD_IPSTAT_INC(ips_noroute);
5127240233Sglebius			error = EHOSTUNREACH;
5128126258Smlaier			goto bad;
5129126258Smlaier		}
5130126258Smlaier
5131240233Sglebius		ifp = rt->rt_ifp;
5132240233Sglebius		rt->rt_rmx.rmx_pksent++;
5133126258Smlaier
5134240233Sglebius		if (rt->rt_flags & RTF_GATEWAY)
5135240233Sglebius			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
5136240233Sglebius		RTFREE_LOCKED(rt);
5137126258Smlaier	} else {
5138145836Smlaier		if (TAILQ_EMPTY(&r->rpool.list)) {
5139145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
5140240233Sglebius			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
5141240233Sglebius			goto bad_locked;
5142145836Smlaier		}
5143126258Smlaier		if (s == NULL) {
5144130613Smlaier			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5145130613Smlaier			    &naddr, NULL, &sn);
5146126258Smlaier			if (!PF_AZERO(&naddr, AF_INET))
5147240233Sglebius				dst.sin_addr.s_addr = naddr.v4.s_addr;
5148130613Smlaier			ifp = r->rpool.cur->kif ?
5149130613Smlaier			    r->rpool.cur->kif->pfik_ifp : NULL;
5150126258Smlaier		} else {
5151126258Smlaier			if (!PF_AZERO(&s->rt_addr, AF_INET))
5152240233Sglebius				dst.sin_addr.s_addr =
5153126258Smlaier				    s->rt_addr.v4.s_addr;
5154130613Smlaier			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5155240233Sglebius			PF_STATE_UNLOCK(s);
5156126258Smlaier		}
5157126258Smlaier	}
5158126258Smlaier	if (ifp == NULL)
5159126258Smlaier		goto bad;
5160126258Smlaier
5161130639Smlaier	if (oifp != ifp) {
5162145836Smlaier		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5163126258Smlaier			goto bad;
5164126258Smlaier		else if (m0 == NULL)
5165126258Smlaier			goto done;
5166145836Smlaier		if (m0->m_len < sizeof(struct ip)) {
5167145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
5168240233Sglebius			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
5169145836Smlaier			goto bad;
5170145836Smlaier		}
5171126258Smlaier		ip = mtod(m0, struct ip *);
5172126258Smlaier	}
5173126258Smlaier
5174240233Sglebius	if (ifp->if_flags & IFF_LOOPBACK)
5175240233Sglebius		m0->m_flags |= M_SKIP_FIREWALL;
5176240233Sglebius
5177240233Sglebius	/* Back to host byte order. */
5178240233Sglebius	ip->ip_len = ntohs(ip->ip_len);
5179240233Sglebius	ip->ip_off = ntohs(ip->ip_off);
5180240233Sglebius
5181240233Sglebius	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
5182126261Smlaier	m0->m_pkthdr.csum_flags |= CSUM_IP;
5183126261Smlaier	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5184126261Smlaier	if (sw_csum & CSUM_DELAY_DATA) {
5185126261Smlaier		in_delayed_cksum(m0);
5186126261Smlaier		sw_csum &= ~CSUM_DELAY_DATA;
5187126261Smlaier	}
5188240233Sglebius#ifdef SCTP
5189240233Sglebius	if (sw_csum & CSUM_SCTP) {
5190240233Sglebius		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
5191240233Sglebius		sw_csum &= ~CSUM_SCTP;
5192240233Sglebius	}
5193240233Sglebius#endif
5194126261Smlaier	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5195126261Smlaier
5196130613Smlaier	/*
5197240233Sglebius	 * If small enough for interface, or the interface will take
5198240233Sglebius	 * care of the fragmentation for us, we can just send directly.
5199130613Smlaier	 */
5200240233Sglebius	if (ip->ip_len <= ifp->if_mtu ||
5201240233Sglebius	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
5202240233Sglebius	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
5203240233Sglebius		ip->ip_len = htons(ip->ip_len);
5204240233Sglebius		ip->ip_off = htons(ip->ip_off);
5205223637Sbz		ip->ip_sum = 0;
5206240233Sglebius		if (sw_csum & CSUM_DELAY_IP)
5207126258Smlaier			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5208240233Sglebius		m0->m_flags &= ~(M_PROTOFLAGS);
5209240233Sglebius		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
5210126258Smlaier		goto done;
5211126258Smlaier	}
5212223637Sbz
5213240233Sglebius	/* Balk when DF bit is set or the interface didn't support TSO. */
5214240233Sglebius	if ((ip->ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
5215240233Sglebius		error = EMSGSIZE;
5216196039Srwatson		KMOD_IPSTAT_INC(ips_cantfrag);
5217126258Smlaier		if (r->rt != PF_DUPTO) {
5218126258Smlaier			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5219145886Smlaier			    ifp->if_mtu);
5220126258Smlaier			goto done;
5221126258Smlaier		} else
5222126258Smlaier			goto bad;
5223126258Smlaier	}
5224126258Smlaier
5225126261Smlaier	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5226240233Sglebius	if (error)
5227126258Smlaier		goto bad;
5228126258Smlaier
5229240233Sglebius	for (; m0; m0 = m1) {
5230126258Smlaier		m1 = m0->m_nextpkt;
5231240233Sglebius		m0->m_nextpkt = NULL;
5232126261Smlaier		if (error == 0) {
5233240233Sglebius			m0->m_flags &= ~(M_PROTOFLAGS);
5234240233Sglebius			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
5235126261Smlaier		} else
5236126258Smlaier			m_freem(m0);
5237126258Smlaier	}
5238126258Smlaier
5239126258Smlaier	if (error == 0)
5240196039Srwatson		KMOD_IPSTAT_INC(ips_fragmented);
5241126258Smlaier
5242126258Smlaierdone:
5243126258Smlaier	if (r->rt != PF_DUPTO)
5244126258Smlaier		*m = NULL;
5245126258Smlaier	return;
5246126258Smlaier
5247240233Sglebiusbad_locked:
5248240233Sglebius	if (s)
5249240233Sglebius		PF_STATE_UNLOCK(s);
5250126258Smlaierbad:
5251126258Smlaier	m_freem(m0);
5252126258Smlaier	goto done;
5253126258Smlaier}
5254126258Smlaier#endif /* INET */
5255126258Smlaier
5256126258Smlaier#ifdef INET6
5257240233Sglebiusstatic void
5258126258Smlaierpf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5259171168Smlaier    struct pf_state *s, struct pf_pdesc *pd)
5260126258Smlaier{
5261126258Smlaier	struct mbuf		*m0;
5262240233Sglebius	struct sockaddr_in6	dst;
5263126258Smlaier	struct ip6_hdr		*ip6;
5264126258Smlaier	struct ifnet		*ifp = NULL;
5265126258Smlaier	struct pf_addr		 naddr;
5266130613Smlaier	struct pf_src_node	*sn = NULL;
5267126258Smlaier
5268240233Sglebius	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
5269240233Sglebius	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
5270240233Sglebius	    __func__));
5271126258Smlaier
5272240233Sglebius	if ((pd->pf_mtag == NULL &&
5273240233Sglebius	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
5274240233Sglebius	    pd->pf_mtag->routed++ > 3) {
5275171168Smlaier		m0 = *m;
5276171168Smlaier		*m = NULL;
5277240233Sglebius		goto bad_locked;
5278132303Smlaier	}
5279132303Smlaier
5280126258Smlaier	if (r->rt == PF_DUPTO) {
5281240233Sglebius		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
5282240233Sglebius			if (s)
5283240233Sglebius				PF_STATE_UNLOCK(s);
5284126258Smlaier			return;
5285240233Sglebius		}
5286126258Smlaier	} else {
5287240233Sglebius		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
5288240233Sglebius			if (s)
5289240233Sglebius				PF_STATE_UNLOCK(s);
5290126258Smlaier			return;
5291240233Sglebius		}
5292126258Smlaier		m0 = *m;
5293126258Smlaier	}
5294126258Smlaier
5295126258Smlaier	ip6 = mtod(m0, struct ip6_hdr *);
5296126258Smlaier
5297240233Sglebius	bzero(&dst, sizeof(dst));
5298240233Sglebius	dst.sin6_family = AF_INET6;
5299240233Sglebius	dst.sin6_len = sizeof(dst);
5300240233Sglebius	dst.sin6_addr = ip6->ip6_dst;
5301126258Smlaier
5302171168Smlaier	/* Cheat. XXX why only in the v6 case??? */
5303126258Smlaier	if (r->rt == PF_FASTROUTE) {
5304240233Sglebius		if (s)
5305240233Sglebius			PF_STATE_UNLOCK(s);
5306132280Smlaier		m0->m_flags |= M_SKIP_FIREWALL;
5307126261Smlaier		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5308126258Smlaier		return;
5309126258Smlaier	}
5310126258Smlaier
5311145836Smlaier	if (TAILQ_EMPTY(&r->rpool.list)) {
5312145836Smlaier		DPFPRINTF(PF_DEBUG_URGENT,
5313240233Sglebius		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
5314240233Sglebius		goto bad_locked;
5315145836Smlaier	}
5316126258Smlaier	if (s == NULL) {
5317130613Smlaier		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5318130613Smlaier		    &naddr, NULL, &sn);
5319126258Smlaier		if (!PF_AZERO(&naddr, AF_INET6))
5320240233Sglebius			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5321126258Smlaier			    &naddr, AF_INET6);
5322130613Smlaier		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5323126258Smlaier	} else {
5324126258Smlaier		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5325240233Sglebius			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
5326126258Smlaier			    &s->rt_addr, AF_INET6);
5327130613Smlaier		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5328126258Smlaier	}
5329240233Sglebius
5330240233Sglebius	if (s)
5331240233Sglebius		PF_STATE_UNLOCK(s);
5332240233Sglebius
5333126258Smlaier	if (ifp == NULL)
5334126258Smlaier		goto bad;
5335126258Smlaier
5336126258Smlaier	if (oifp != ifp) {
5337145836Smlaier		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
5338132303Smlaier			goto bad;
5339132303Smlaier		else if (m0 == NULL)
5340132303Smlaier			goto done;
5341145836Smlaier		if (m0->m_len < sizeof(struct ip6_hdr)) {
5342145836Smlaier			DPFPRINTF(PF_DEBUG_URGENT,
5343240233Sglebius			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
5344240233Sglebius			    __func__));
5345145836Smlaier			goto bad;
5346145836Smlaier		}
5347132303Smlaier		ip6 = mtod(m0, struct ip6_hdr *);
5348126258Smlaier	}
5349126258Smlaier
5350240233Sglebius	if (ifp->if_flags & IFF_LOOPBACK)
5351240233Sglebius		m0->m_flags |= M_SKIP_FIREWALL;
5352240233Sglebius
5353126258Smlaier	/*
5354126258Smlaier	 * If the packet is too large for the outgoing interface,
5355126258Smlaier	 * send back an icmp6 error.
5356126258Smlaier	 */
5357240233Sglebius	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
5358240233Sglebius		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5359240233Sglebius	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
5360240233Sglebius		nd6_output(ifp, ifp, m0, &dst, NULL);
5361240233Sglebius	else {
5362126258Smlaier		in6_ifstat_inc(ifp, ifs6_in_toobig);
5363126258Smlaier		if (r->rt != PF_DUPTO)
5364126258Smlaier			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5365126258Smlaier		else
5366126258Smlaier			goto bad;
5367126258Smlaier	}
5368126258Smlaier
5369126258Smlaierdone:
5370126258Smlaier	if (r->rt != PF_DUPTO)
5371126258Smlaier		*m = NULL;
5372126258Smlaier	return;
5373126258Smlaier
5374240233Sglebiusbad_locked:
5375240233Sglebius	if (s)
5376240233Sglebius		PF_STATE_UNLOCK(s);
5377126258Smlaierbad:
5378126258Smlaier	m_freem(m0);
5379126258Smlaier	goto done;
5380126258Smlaier}
5381126258Smlaier#endif /* INET6 */
5382126258Smlaier
5383126258Smlaier/*
5384132566Smlaier * FreeBSD supports cksum offloads for the following drivers.
5385137413Sru *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
5386132566Smlaier *   ti(4), txp(4), xl(4)
5387132566Smlaier *
5388132566Smlaier * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
5389132566Smlaier *  network driver performed cksum including pseudo header, need to verify
5390132566Smlaier *   csum_data
5391132566Smlaier * CSUM_DATA_VALID :
5392132566Smlaier *  network driver performed cksum, needs to additional pseudo header
5393132566Smlaier *  cksum computation with partial csum_data(i.e. lack of H/W support for
5394132566Smlaier *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
5395132566Smlaier *
5396132566Smlaier * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
5397132566Smlaier * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
5398132566Smlaier * TCP/UDP layer.
5399132566Smlaier * Also, set csum_data to 0xffff to force cksum validation.
5400126261Smlaier */
5401240233Sglebiusstatic int
5402126261Smlaierpf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5403126261Smlaier{
5404126261Smlaier	u_int16_t sum = 0;
5405126261Smlaier	int hw_assist = 0;
5406126261Smlaier	struct ip *ip;
5407126261Smlaier
5408126261Smlaier	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5409126261Smlaier		return (1);
5410126261Smlaier	if (m->m_pkthdr.len < off + len)
5411126261Smlaier		return (1);
5412126261Smlaier
5413126261Smlaier	switch (p) {
5414126261Smlaier	case IPPROTO_TCP:
5415126261Smlaier		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5416126261Smlaier			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5417126261Smlaier				sum = m->m_pkthdr.csum_data;
5418126261Smlaier			} else {
5419223637Sbz				ip = mtod(m, struct ip *);
5420126261Smlaier				sum = in_pseudo(ip->ip_src.s_addr,
5421240233Sglebius				ip->ip_dst.s_addr, htonl((u_short)len +
5422223637Sbz				m->m_pkthdr.csum_data + IPPROTO_TCP));
5423126261Smlaier			}
5424126261Smlaier			sum ^= 0xffff;
5425126261Smlaier			++hw_assist;
5426126261Smlaier		}
5427126261Smlaier		break;
5428126261Smlaier	case IPPROTO_UDP:
5429126261Smlaier		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5430126261Smlaier			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5431126261Smlaier				sum = m->m_pkthdr.csum_data;
5432126261Smlaier			} else {
5433223637Sbz				ip = mtod(m, struct ip *);
5434126261Smlaier				sum = in_pseudo(ip->ip_src.s_addr,
5435223637Sbz				ip->ip_dst.s_addr, htonl((u_short)len +
5436223637Sbz				m->m_pkthdr.csum_data + IPPROTO_UDP));
5437126261Smlaier			}
5438126261Smlaier			sum ^= 0xffff;
5439126261Smlaier			++hw_assist;
5440223637Sbz		}
5441126261Smlaier		break;
5442126261Smlaier	case IPPROTO_ICMP:
5443126261Smlaier#ifdef INET6
5444126261Smlaier	case IPPROTO_ICMPV6:
5445126261Smlaier#endif /* INET6 */
5446126261Smlaier		break;
5447126261Smlaier	default:
5448126261Smlaier		return (1);
5449126261Smlaier	}
5450126261Smlaier
5451126261Smlaier	if (!hw_assist) {
5452126261Smlaier		switch (af) {
5453126261Smlaier		case AF_INET:
5454126261Smlaier			if (p == IPPROTO_ICMP) {
5455126261Smlaier				if (m->m_len < off)
5456126261Smlaier					return (1);
5457126261Smlaier				m->m_data += off;
5458126261Smlaier				m->m_len -= off;
5459126261Smlaier				sum = in_cksum(m, len);
5460126261Smlaier				m->m_data -= off;
5461126261Smlaier				m->m_len += off;
5462126261Smlaier			} else {
5463126261Smlaier				if (m->m_len < sizeof(struct ip))
5464126261Smlaier					return (1);
5465126261Smlaier				sum = in4_cksum(m, p, off, len);
5466126261Smlaier			}
5467126261Smlaier			break;
5468126261Smlaier#ifdef INET6
5469126261Smlaier		case AF_INET6:
5470126261Smlaier			if (m->m_len < sizeof(struct ip6_hdr))
5471126261Smlaier				return (1);
5472126261Smlaier			sum = in6_cksum(m, p, off, len);
5473126261Smlaier			break;
5474126261Smlaier#endif /* INET6 */
5475126261Smlaier		default:
5476126261Smlaier			return (1);
5477126261Smlaier		}
5478126261Smlaier	}
5479126261Smlaier	if (sum) {
5480126261Smlaier		switch (p) {
5481126261Smlaier		case IPPROTO_TCP:
5482183550Szec		    {
5483196039Srwatson			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
5484126261Smlaier			break;
5485183550Szec		    }
5486126261Smlaier		case IPPROTO_UDP:
5487183550Szec		    {
5488196039Srwatson			KMOD_UDPSTAT_INC(udps_badsum);
5489126261Smlaier			break;
5490183550Szec		    }
5491222529Sbz#ifdef INET
5492126261Smlaier		case IPPROTO_ICMP:
5493183550Szec		    {
5494196039Srwatson			KMOD_ICMPSTAT_INC(icps_checksum);
5495126261Smlaier			break;
5496183550Szec		    }
5497222529Sbz#endif
5498126261Smlaier#ifdef INET6
5499126261Smlaier		case IPPROTO_ICMPV6:
5500183550Szec		    {
5501196039Srwatson			KMOD_ICMP6STAT_INC(icp6s_checksum);
5502126261Smlaier			break;
5503183550Szec		    }
5504126261Smlaier#endif /* INET6 */
5505126261Smlaier		}
5506126261Smlaier		return (1);
5507132566Smlaier	} else {
5508132566Smlaier		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
5509132566Smlaier			m->m_pkthdr.csum_flags |=
5510132566Smlaier			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5511132566Smlaier			m->m_pkthdr.csum_data = 0xffff;
5512132566Smlaier		}
5513126261Smlaier	}
5514126261Smlaier	return (0);
5515126261Smlaier}
5516223637Sbz
5517126258Smlaier
5518145836Smlaier#ifdef INET
5519126258Smlaierint
5520240233Sglebiuspf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
5521126258Smlaier{
5522130613Smlaier	struct pfi_kif		*kif;
5523130613Smlaier	u_short			 action, reason = 0, log = 0;
5524130613Smlaier	struct mbuf		*m = *m0;
5525223637Sbz	struct ip		*h = NULL;
5526223637Sbz	struct m_tag		*ipfwtag;
5527223637Sbz	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
5528130613Smlaier	struct pf_state		*s = NULL;
5529130613Smlaier	struct pf_ruleset	*ruleset = NULL;
5530130613Smlaier	struct pf_pdesc		 pd;
5531130613Smlaier	int			 off, dirndx, pqid = 0;
5532126258Smlaier
5533240233Sglebius	M_ASSERTPKTHDR(m);
5534240233Sglebius
5535223637Sbz	if (!V_pf_status.running)
5536171168Smlaier		return (PF_PASS);
5537126258Smlaier
5538171168Smlaier	memset(&pd, 0, sizeof(pd));
5539145836Smlaier
5540240233Sglebius	kif = (struct pfi_kif *)ifp->if_pf_kif;
5541240233Sglebius
5542130613Smlaier	if (kif == NULL) {
5543145836Smlaier		DPFPRINTF(PF_DEBUG_URGENT,
5544145836Smlaier		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
5545130613Smlaier		return (PF_DROP);
5546130613Smlaier	}
5547223637Sbz	if (kif->pfik_flags & PFI_IFLAG_SKIP)
5548145836Smlaier		return (PF_PASS);
5549130613Smlaier
5550240233Sglebius	if (m->m_flags & M_SKIP_FIREWALL)
5551240233Sglebius		return (PF_PASS);
5552126258Smlaier
5553240233Sglebius	if (m->m_pkthdr.len < (int)sizeof(struct ip)) {
5554126258Smlaier		action = PF_DROP;
5555126258Smlaier		REASON_SET(&reason, PFRES_SHORT);
5556126258Smlaier		log = 1;
5557126258Smlaier		goto done;
5558126258Smlaier	}
5559126258Smlaier
5560240233Sglebius	pd.pf_mtag = pf_find_mtag(m);
5561240233Sglebius
5562240233Sglebius	PF_RULES_RLOCK();
5563240233Sglebius
5564223637Sbz	if (ip_divert_ptr != NULL &&
5565223637Sbz	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
5566223637Sbz		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
5567223637Sbz		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
5568240233Sglebius			if (pd.pf_mtag == NULL &&
5569240233Sglebius			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
5570240233Sglebius				action = PF_DROP;
5571240233Sglebius				goto done;
5572240233Sglebius			}
5573223637Sbz			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
5574223637Sbz			m_tag_delete(m, ipfwtag);
5575223637Sbz		}
5576240233Sglebius		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
5577223637Sbz			m->m_flags |= M_FASTFWD_OURS;
5578223637Sbz			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
5579223637Sbz		}
5580240233Sglebius	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
5581240233Sglebius		/* We do IP header normalization and packet reassembly here */
5582126258Smlaier		action = PF_DROP;
5583126258Smlaier		goto done;
5584126258Smlaier	}
5585223637Sbz	m = *m0;	/* pf_normalize messes with m0 */
5586126258Smlaier	h = mtod(m, struct ip *);
5587126258Smlaier
5588126258Smlaier	off = h->ip_hl << 2;
5589240233Sglebius	if (off < (int)sizeof(struct ip)) {
5590126258Smlaier		action = PF_DROP;
5591126258Smlaier		REASON_SET(&reason, PFRES_SHORT);
5592126258Smlaier		log = 1;
5593126258Smlaier		goto done;
5594126258Smlaier	}
5595126258Smlaier
5596126258Smlaier	pd.src = (struct pf_addr *)&h->ip_src;
5597126258Smlaier	pd.dst = (struct pf_addr *)&h->ip_dst;
5598223637Sbz	pd.sport = pd.dport = NULL;
5599126258Smlaier	pd.ip_sum = &h->ip_sum;
5600223637Sbz	pd.proto_sum = NULL;
5601126258Smlaier	pd.proto = h->ip_p;
5602223637Sbz	pd.dir = dir;
5603223637Sbz	pd.sidx = (dir == PF_IN) ? 0 : 1;
5604223637Sbz	pd.didx = (dir == PF_IN) ? 1 : 0;
5605126258Smlaier	pd.af = AF_INET;
5606126258Smlaier	pd.tos = h->ip_tos;
5607126258Smlaier	pd.tot_len = ntohs(h->ip_len);
5608126258Smlaier
5609126258Smlaier	/* handle fragments that didn't get reassembled by normalization */
5610126258Smlaier	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5611130613Smlaier		action = pf_test_fragment(&r, dir, kif, m, h,
5612126258Smlaier		    &pd, &a, &ruleset);
5613126258Smlaier		goto done;
5614126258Smlaier	}
5615126258Smlaier
5616126258Smlaier	switch (h->ip_p) {
5617126258Smlaier
5618126258Smlaier	case IPPROTO_TCP: {
5619126258Smlaier		struct tcphdr	th;
5620126258Smlaier
5621126258Smlaier		pd.hdr.tcp = &th;
5622126258Smlaier		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5623126258Smlaier		    &action, &reason, AF_INET)) {
5624126258Smlaier			log = action != PF_PASS;
5625126258Smlaier			goto done;
5626126258Smlaier		}
5627126258Smlaier		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5628126258Smlaier		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5629126258Smlaier			pqid = 1;
5630130613Smlaier		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5631126258Smlaier		if (action == PF_DROP)
5632130613Smlaier			goto done;
5633130613Smlaier		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5634126258Smlaier		    &reason);
5635126258Smlaier		if (action == PF_PASS) {
5636223637Sbz			if (pfsync_update_state_ptr != NULL)
5637223637Sbz				pfsync_update_state_ptr(s);
5638126258Smlaier			r = s->rule.ptr;
5639130613Smlaier			a = s->anchor.ptr;
5640126258Smlaier			log = s->log;
5641126258Smlaier		} else if (s == NULL)
5642240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5643240233Sglebius			    &a, &ruleset, inp);
5644126258Smlaier		break;
5645126258Smlaier	}
5646126258Smlaier
5647126258Smlaier	case IPPROTO_UDP: {
5648126258Smlaier		struct udphdr	uh;
5649126258Smlaier
5650126258Smlaier		pd.hdr.udp = &uh;
5651126258Smlaier		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5652126258Smlaier		    &action, &reason, AF_INET)) {
5653126258Smlaier			log = action != PF_PASS;
5654126258Smlaier			goto done;
5655126258Smlaier		}
5656130613Smlaier		if (uh.uh_dport == 0 ||
5657130613Smlaier		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5658130613Smlaier		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5659130613Smlaier			action = PF_DROP;
5660171168Smlaier			REASON_SET(&reason, PFRES_SHORT);
5661130613Smlaier			goto done;
5662130613Smlaier		}
5663130613Smlaier		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5664126258Smlaier		if (action == PF_PASS) {
5665223637Sbz			if (pfsync_update_state_ptr != NULL)
5666223637Sbz				pfsync_update_state_ptr(s);
5667126258Smlaier			r = s->rule.ptr;
5668126258Smlaier			a = s->anchor.ptr;
5669126258Smlaier			log = s->log;
5670126258Smlaier		} else if (s == NULL)
5671240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5672240233Sglebius			    &a, &ruleset, inp);
5673126258Smlaier		break;
5674126258Smlaier	}
5675126258Smlaier
5676126258Smlaier	case IPPROTO_ICMP: {
5677126258Smlaier		struct icmp	ih;
5678126258Smlaier
5679126258Smlaier		pd.hdr.icmp = &ih;
5680126258Smlaier		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5681126258Smlaier		    &action, &reason, AF_INET)) {
5682126258Smlaier			log = action != PF_PASS;
5683126258Smlaier			goto done;
5684126258Smlaier		}
5685145836Smlaier		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
5686145836Smlaier		    &reason);
5687126258Smlaier		if (action == PF_PASS) {
5688223637Sbz			if (pfsync_update_state_ptr != NULL)
5689223637Sbz				pfsync_update_state_ptr(s);
5690126258Smlaier			r = s->rule.ptr;
5691126258Smlaier			a = s->anchor.ptr;
5692126258Smlaier			log = s->log;
5693126258Smlaier		} else if (s == NULL)
5694240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5695240233Sglebius			    &a, &ruleset, inp);
5696126258Smlaier		break;
5697126258Smlaier	}
5698126258Smlaier
5699223637Sbz#ifdef INET6
5700223637Sbz	case IPPROTO_ICMPV6: {
5701223637Sbz		action = PF_DROP;
5702223637Sbz		DPFPRINTF(PF_DEBUG_MISC,
5703223637Sbz		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
5704223637Sbz		goto done;
5705223637Sbz	}
5706223637Sbz#endif
5707223637Sbz
5708126258Smlaier	default:
5709223637Sbz		action = pf_test_state_other(&s, dir, kif, m, &pd);
5710126258Smlaier		if (action == PF_PASS) {
5711223637Sbz			if (pfsync_update_state_ptr != NULL)
5712223637Sbz				pfsync_update_state_ptr(s);
5713126258Smlaier			r = s->rule.ptr;
5714126258Smlaier			a = s->anchor.ptr;
5715126258Smlaier			log = s->log;
5716126258Smlaier		} else if (s == NULL)
5717240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
5718240233Sglebius			    &a, &ruleset, inp);
5719126258Smlaier		break;
5720126258Smlaier	}
5721126258Smlaier
5722126258Smlaierdone:
5723240233Sglebius	PF_RULES_RUNLOCK();
5724126258Smlaier	if (action == PF_PASS && h->ip_hl > 5 &&
5725200930Sdelphij	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
5726126258Smlaier		action = PF_DROP;
5727145836Smlaier		REASON_SET(&reason, PFRES_IPOPTIONS);
5728126258Smlaier		log = 1;
5729126258Smlaier		DPFPRINTF(PF_DEBUG_MISC,
5730126258Smlaier		    ("pf: dropping packet with ip options\n"));
5731126258Smlaier	}
5732126258Smlaier
5733240233Sglebius	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
5734240233Sglebius		action = PF_DROP;
5735240233Sglebius		REASON_SET(&reason, PFRES_MEMORY);
5736240233Sglebius	}
5737240233Sglebius	if (r->rtableid >= 0)
5738240233Sglebius		M_SETFIB(m, r->rtableid);
5739145836Smlaier
5740126258Smlaier#ifdef ALTQ
5741126258Smlaier	if (action == PF_PASS && r->qid) {
5742240233Sglebius		if (pd.pf_mtag == NULL &&
5743240233Sglebius		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
5744240233Sglebius			action = PF_DROP;
5745240233Sglebius			REASON_SET(&reason, PFRES_MEMORY);
5746240233Sglebius		}
5747171168Smlaier		if (pqid || (pd.tos & IPTOS_LOWDELAY))
5748171168Smlaier			pd.pf_mtag->qid = r->pqid;
5749171168Smlaier		else
5750171168Smlaier			pd.pf_mtag->qid = r->qid;
5751171168Smlaier		/* add hints for ecn */
5752171168Smlaier		pd.pf_mtag->hdr = h;
5753223637Sbz
5754126258Smlaier	}
5755145836Smlaier#endif /* ALTQ */
5756126258Smlaier
5757130613Smlaier	/*
5758130613Smlaier	 * connections redirected to loopback should not match sockets
5759130613Smlaier	 * bound specifically to loopback due to security implications,
5760130613Smlaier	 * see tcp_input() and in_pcblookup_listen().
5761130613Smlaier	 */
5762130613Smlaier	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
5763130613Smlaier	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
5764130613Smlaier	    (s->nat_rule.ptr->action == PF_RDR ||
5765130613Smlaier	    s->nat_rule.ptr->action == PF_BINAT) &&
5766171168Smlaier	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
5767223637Sbz		m->m_flags |= M_SKIP_FIREWALL;
5768171168Smlaier
5769240233Sglebius	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
5770240233Sglebius	    !PACKET_LOOPED(&pd)) {
5771223637Sbz
5772223637Sbz		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
5773240233Sglebius		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
5774223637Sbz		if (ipfwtag != NULL) {
5775225171Sbz			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
5776225171Sbz			    ntohs(r->divert.port);
5777223637Sbz			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
5778223637Sbz
5779240233Sglebius			if (s)
5780240233Sglebius				PF_STATE_UNLOCK(s);
5781240233Sglebius
5782223637Sbz			m_tag_prepend(m, ipfwtag);
5783223637Sbz			if (m->m_flags & M_FASTFWD_OURS) {
5784240233Sglebius				if (pd.pf_mtag == NULL &&
5785240233Sglebius				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
5786240233Sglebius					action = PF_DROP;
5787240233Sglebius					REASON_SET(&reason, PFRES_MEMORY);
5788240233Sglebius					log = 1;
5789240233Sglebius					DPFPRINTF(PF_DEBUG_MISC,
5790240233Sglebius					    ("pf: failed to allocate tag\n"));
5791240233Sglebius				}
5792223637Sbz				pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
5793223637Sbz				m->m_flags &= ~M_FASTFWD_OURS;
5794223637Sbz			}
5795240233Sglebius			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
5796240233Sglebius			*m0 = NULL;
5797223637Sbz
5798223637Sbz			return (action);
5799223637Sbz		} else {
5800223637Sbz			/* XXX: ipfw has the same behaviour! */
5801223637Sbz			action = PF_DROP;
5802223637Sbz			REASON_SET(&reason, PFRES_MEMORY);
5803223637Sbz			log = 1;
5804223637Sbz			DPFPRINTF(PF_DEBUG_MISC,
5805223637Sbz			    ("pf: failed to allocate divert tag\n"));
5806223637Sbz		}
5807223637Sbz	}
5808223637Sbz
5809171168Smlaier	if (log) {
5810171168Smlaier		struct pf_rule *lr;
5811171168Smlaier
5812171168Smlaier		if (s != NULL && s->nat_rule.ptr != NULL &&
5813171168Smlaier		    s->nat_rule.ptr->log & PF_LOG_ALL)
5814171168Smlaier			lr = s->nat_rule.ptr;
5815171168Smlaier		else
5816171168Smlaier			lr = r;
5817240233Sglebius		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
5818240233Sglebius		    (s == NULL));
5819130613Smlaier	}
5820130613Smlaier
5821130613Smlaier	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
5822130613Smlaier	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
5823130613Smlaier
5824130613Smlaier	if (action == PF_PASS || r->action == PF_DROP) {
5825171168Smlaier		dirndx = (dir == PF_OUT);
5826171168Smlaier		r->packets[dirndx]++;
5827171168Smlaier		r->bytes[dirndx] += pd.tot_len;
5828130613Smlaier		if (a != NULL) {
5829171168Smlaier			a->packets[dirndx]++;
5830171168Smlaier			a->bytes[dirndx] += pd.tot_len;
5831130613Smlaier		}
5832130613Smlaier		if (s != NULL) {
5833130613Smlaier			if (s->nat_rule.ptr != NULL) {
5834171168Smlaier				s->nat_rule.ptr->packets[dirndx]++;
5835171168Smlaier				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
5836130613Smlaier			}
5837130613Smlaier			if (s->src_node != NULL) {
5838171168Smlaier				s->src_node->packets[dirndx]++;
5839171168Smlaier				s->src_node->bytes[dirndx] += pd.tot_len;
5840130613Smlaier			}
5841130613Smlaier			if (s->nat_src_node != NULL) {
5842171168Smlaier				s->nat_src_node->packets[dirndx]++;
5843171168Smlaier				s->nat_src_node->bytes[dirndx] += pd.tot_len;
5844130613Smlaier			}
5845171168Smlaier			dirndx = (dir == s->direction) ? 0 : 1;
5846171168Smlaier			s->packets[dirndx]++;
5847171168Smlaier			s->bytes[dirndx] += pd.tot_len;
5848130613Smlaier		}
5849130613Smlaier		tr = r;
5850130613Smlaier		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
5851223637Sbz		if (nr != NULL && r == &V_pf_default_rule)
5852223637Sbz			tr = nr;
5853130613Smlaier		if (tr->src.addr.type == PF_ADDR_TABLE)
5854223637Sbz			pfr_update_stats(tr->src.addr.p.tbl,
5855223637Sbz			    (s == NULL) ? pd.src :
5856223637Sbz			    &s->key[(s->direction == PF_IN)]->
5857223637Sbz				addr[(s->direction == PF_OUT)],
5858223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
5859223637Sbz			    r->action == PF_PASS, tr->src.neg);
5860130613Smlaier		if (tr->dst.addr.type == PF_ADDR_TABLE)
5861223637Sbz			pfr_update_stats(tr->dst.addr.p.tbl,
5862223637Sbz			    (s == NULL) ? pd.dst :
5863223637Sbz			    &s->key[(s->direction == PF_IN)]->
5864223637Sbz				addr[(s->direction == PF_IN)],
5865223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
5866223637Sbz			    r->action == PF_PASS, tr->dst.neg);
5867130613Smlaier	}
5868130613Smlaier
5869223637Sbz	switch (action) {
5870223637Sbz	case PF_SYNPROXY_DROP:
5871126258Smlaier		m_freem(*m0);
5872223637Sbz	case PF_DEFER:
5873126258Smlaier		*m0 = NULL;
5874126258Smlaier		action = PF_PASS;
5875223637Sbz		break;
5876223637Sbz	default:
5877240233Sglebius		/* pf_route() returns unlocked. */
5878240233Sglebius		if (r->rt) {
5879223637Sbz			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
5880240233Sglebius			return (action);
5881240233Sglebius		}
5882223637Sbz		break;
5883223637Sbz	}
5884240233Sglebius	if (s)
5885240233Sglebius		PF_STATE_UNLOCK(s);
5886240233Sglebius
5887126258Smlaier	return (action);
5888126258Smlaier}
5889126258Smlaier#endif /* INET */
5890126258Smlaier
5891126258Smlaier#ifdef INET6
5892126258Smlaierint
5893240233Sglebiuspf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
5894126258Smlaier{
5895130613Smlaier	struct pfi_kif		*kif;
5896130613Smlaier	u_short			 action, reason = 0, log = 0;
5897171168Smlaier	struct mbuf		*m = *m0, *n = NULL;
5898223637Sbz	struct ip6_hdr		*h = NULL;
5899223637Sbz	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
5900130613Smlaier	struct pf_state		*s = NULL;
5901130613Smlaier	struct pf_ruleset	*ruleset = NULL;
5902130613Smlaier	struct pf_pdesc		 pd;
5903169843Sdhartmei	int			 off, terminal = 0, dirndx, rh_cnt = 0;
5904126258Smlaier
5905240233Sglebius	M_ASSERTPKTHDR(m);
5906240233Sglebius
5907240233Sglebius	if (!V_pf_status.running)
5908126258Smlaier		return (PF_PASS);
5909126258Smlaier
5910171168Smlaier	memset(&pd, 0, sizeof(pd));
5911240233Sglebius	pd.pf_mtag = pf_find_mtag(m);
5912145836Smlaier
5913240233Sglebius	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
5914240233Sglebius		return (PF_PASS);
5915240233Sglebius
5916240233Sglebius	kif = (struct pfi_kif *)ifp->if_pf_kif;
5917130613Smlaier	if (kif == NULL) {
5918145836Smlaier		DPFPRINTF(PF_DEBUG_URGENT,
5919145836Smlaier		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
5920130613Smlaier		return (PF_DROP);
5921130613Smlaier	}
5922223637Sbz	if (kif->pfik_flags & PFI_IFLAG_SKIP)
5923145836Smlaier		return (PF_PASS);
5924130613Smlaier
5925126258Smlaier	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5926126258Smlaier		action = PF_DROP;
5927126258Smlaier		REASON_SET(&reason, PFRES_SHORT);
5928126258Smlaier		log = 1;
5929126258Smlaier		goto done;
5930126258Smlaier	}
5931126258Smlaier
5932240233Sglebius	PF_RULES_RLOCK();
5933223637Sbz
5934126258Smlaier	/* We do IP header normalization and packet reassembly here */
5935145836Smlaier	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
5936126258Smlaier		action = PF_DROP;
5937126258Smlaier		goto done;
5938126258Smlaier	}
5939223637Sbz	m = *m0;	/* pf_normalize messes with m0 */
5940126258Smlaier	h = mtod(m, struct ip6_hdr *);
5941126258Smlaier
5942169843Sdhartmei#if 1
5943169843Sdhartmei	/*
5944169843Sdhartmei	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
5945169843Sdhartmei	 * will do something bad, so drop the packet for now.
5946169843Sdhartmei	 */
5947169843Sdhartmei	if (htons(h->ip6_plen) == 0) {
5948169843Sdhartmei		action = PF_DROP;
5949169843Sdhartmei		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
5950169843Sdhartmei		goto done;
5951169843Sdhartmei	}
5952169843Sdhartmei#endif
5953169843Sdhartmei
5954126258Smlaier	pd.src = (struct pf_addr *)&h->ip6_src;
5955126258Smlaier	pd.dst = (struct pf_addr *)&h->ip6_dst;
5956223637Sbz	pd.sport = pd.dport = NULL;
5957126258Smlaier	pd.ip_sum = NULL;
5958223637Sbz	pd.proto_sum = NULL;
5959223637Sbz	pd.dir = dir;
5960223637Sbz	pd.sidx = (dir == PF_IN) ? 0 : 1;
5961223637Sbz	pd.didx = (dir == PF_IN) ? 1 : 0;
5962126258Smlaier	pd.af = AF_INET6;
5963126258Smlaier	pd.tos = 0;
5964126258Smlaier	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
5965126258Smlaier
5966126258Smlaier	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
5967126258Smlaier	pd.proto = h->ip6_nxt;
5968126258Smlaier	do {
5969126258Smlaier		switch (pd.proto) {
5970126258Smlaier		case IPPROTO_FRAGMENT:
5971130613Smlaier			action = pf_test_fragment(&r, dir, kif, m, h,
5972126258Smlaier			    &pd, &a, &ruleset);
5973126258Smlaier			if (action == PF_DROP)
5974126258Smlaier				REASON_SET(&reason, PFRES_FRAG);
5975126258Smlaier			goto done;
5976169843Sdhartmei		case IPPROTO_ROUTING: {
5977169843Sdhartmei			struct ip6_rthdr rthdr;
5978169843Sdhartmei
5979169843Sdhartmei			if (rh_cnt++) {
5980169843Sdhartmei				DPFPRINTF(PF_DEBUG_MISC,
5981169843Sdhartmei				    ("pf: IPv6 more than one rthdr\n"));
5982169843Sdhartmei				action = PF_DROP;
5983169843Sdhartmei				REASON_SET(&reason, PFRES_IPOPTIONS);
5984169843Sdhartmei				log = 1;
5985169843Sdhartmei				goto done;
5986169843Sdhartmei			}
5987169843Sdhartmei			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
5988169843Sdhartmei			    &reason, pd.af)) {
5989169843Sdhartmei				DPFPRINTF(PF_DEBUG_MISC,
5990169843Sdhartmei				    ("pf: IPv6 short rthdr\n"));
5991169843Sdhartmei				action = PF_DROP;
5992169843Sdhartmei				REASON_SET(&reason, PFRES_SHORT);
5993169843Sdhartmei				log = 1;
5994169843Sdhartmei				goto done;
5995169843Sdhartmei			}
5996169843Sdhartmei			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
5997169843Sdhartmei				DPFPRINTF(PF_DEBUG_MISC,
5998169843Sdhartmei				    ("pf: IPv6 rthdr0\n"));
5999169843Sdhartmei				action = PF_DROP;
6000169843Sdhartmei				REASON_SET(&reason, PFRES_IPOPTIONS);
6001169843Sdhartmei				log = 1;
6002169843Sdhartmei				goto done;
6003169843Sdhartmei			}
6004223637Sbz			/* FALLTHROUGH */
6005169843Sdhartmei		}
6006126258Smlaier		case IPPROTO_AH:
6007126258Smlaier		case IPPROTO_HOPOPTS:
6008126258Smlaier		case IPPROTO_DSTOPTS: {
6009126258Smlaier			/* get next header and header length */
6010126258Smlaier			struct ip6_ext	opt6;
6011126258Smlaier
6012126258Smlaier			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6013145836Smlaier			    NULL, &reason, pd.af)) {
6014126258Smlaier				DPFPRINTF(PF_DEBUG_MISC,
6015126258Smlaier				    ("pf: IPv6 short opt\n"));
6016126258Smlaier				action = PF_DROP;
6017126258Smlaier				log = 1;
6018126258Smlaier				goto done;
6019126258Smlaier			}
6020126258Smlaier			if (pd.proto == IPPROTO_AH)
6021126258Smlaier				off += (opt6.ip6e_len + 2) * 4;
6022126258Smlaier			else
6023126258Smlaier				off += (opt6.ip6e_len + 1) * 8;
6024126258Smlaier			pd.proto = opt6.ip6e_nxt;
6025126258Smlaier			/* goto the next header */
6026126258Smlaier			break;
6027126258Smlaier		}
6028126258Smlaier		default:
6029126258Smlaier			terminal++;
6030126258Smlaier			break;
6031126258Smlaier		}
6032126258Smlaier	} while (!terminal);
6033126258Smlaier
6034171168Smlaier	/* if there's no routing header, use unmodified mbuf for checksumming */
6035171168Smlaier	if (!n)
6036171168Smlaier		n = m;
6037171168Smlaier
6038126258Smlaier	switch (pd.proto) {
6039126258Smlaier
6040126258Smlaier	case IPPROTO_TCP: {
6041126258Smlaier		struct tcphdr	th;
6042126258Smlaier
6043126258Smlaier		pd.hdr.tcp = &th;
6044126258Smlaier		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6045126258Smlaier		    &action, &reason, AF_INET6)) {
6046126258Smlaier			log = action != PF_PASS;
6047126258Smlaier			goto done;
6048126258Smlaier		}
6049126258Smlaier		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6050130613Smlaier		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6051126258Smlaier		if (action == PF_DROP)
6052130613Smlaier			goto done;
6053130613Smlaier		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6054126258Smlaier		    &reason);
6055126258Smlaier		if (action == PF_PASS) {
6056223637Sbz			if (pfsync_update_state_ptr != NULL)
6057223637Sbz				pfsync_update_state_ptr(s);
6058126258Smlaier			r = s->rule.ptr;
6059130613Smlaier			a = s->anchor.ptr;
6060126258Smlaier			log = s->log;
6061126258Smlaier		} else if (s == NULL)
6062240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6063240233Sglebius			    &a, &ruleset, inp);
6064126258Smlaier		break;
6065126258Smlaier	}
6066126258Smlaier
6067126258Smlaier	case IPPROTO_UDP: {
6068126258Smlaier		struct udphdr	uh;
6069126258Smlaier
6070126258Smlaier		pd.hdr.udp = &uh;
6071126258Smlaier		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6072126258Smlaier		    &action, &reason, AF_INET6)) {
6073126258Smlaier			log = action != PF_PASS;
6074126258Smlaier			goto done;
6075126258Smlaier		}
6076130613Smlaier		if (uh.uh_dport == 0 ||
6077130613Smlaier		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6078130613Smlaier		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6079130613Smlaier			action = PF_DROP;
6080171168Smlaier			REASON_SET(&reason, PFRES_SHORT);
6081130613Smlaier			goto done;
6082130613Smlaier		}
6083130613Smlaier		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6084126258Smlaier		if (action == PF_PASS) {
6085223637Sbz			if (pfsync_update_state_ptr != NULL)
6086223637Sbz				pfsync_update_state_ptr(s);
6087126258Smlaier			r = s->rule.ptr;
6088130613Smlaier			a = s->anchor.ptr;
6089126258Smlaier			log = s->log;
6090126258Smlaier		} else if (s == NULL)
6091240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6092240233Sglebius			    &a, &ruleset, inp);
6093126258Smlaier		break;
6094126258Smlaier	}
6095126258Smlaier
6096223637Sbz	case IPPROTO_ICMP: {
6097223637Sbz		action = PF_DROP;
6098223637Sbz		DPFPRINTF(PF_DEBUG_MISC,
6099223637Sbz		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
6100223637Sbz		goto done;
6101223637Sbz	}
6102223637Sbz
6103126258Smlaier	case IPPROTO_ICMPV6: {
6104126258Smlaier		struct icmp6_hdr	ih;
6105126258Smlaier
6106126258Smlaier		pd.hdr.icmp6 = &ih;
6107126258Smlaier		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6108126258Smlaier		    &action, &reason, AF_INET6)) {
6109126258Smlaier			log = action != PF_PASS;
6110126258Smlaier			goto done;
6111126258Smlaier		}
6112130613Smlaier		action = pf_test_state_icmp(&s, dir, kif,
6113145836Smlaier		    m, off, h, &pd, &reason);
6114126258Smlaier		if (action == PF_PASS) {
6115223637Sbz			if (pfsync_update_state_ptr != NULL)
6116223637Sbz				pfsync_update_state_ptr(s);
6117126258Smlaier			r = s->rule.ptr;
6118130613Smlaier			a = s->anchor.ptr;
6119126258Smlaier			log = s->log;
6120126258Smlaier		} else if (s == NULL)
6121240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6122240233Sglebius			    &a, &ruleset, inp);
6123126258Smlaier		break;
6124126258Smlaier	}
6125126258Smlaier
6126126258Smlaier	default:
6127223637Sbz		action = pf_test_state_other(&s, dir, kif, m, &pd);
6128130613Smlaier		if (action == PF_PASS) {
6129223637Sbz			if (pfsync_update_state_ptr != NULL)
6130223637Sbz				pfsync_update_state_ptr(s);
6131130613Smlaier			r = s->rule.ptr;
6132130613Smlaier			a = s->anchor.ptr;
6133130613Smlaier			log = s->log;
6134130613Smlaier		} else if (s == NULL)
6135240233Sglebius			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
6136240233Sglebius			    &a, &ruleset, inp);
6137126258Smlaier		break;
6138126258Smlaier	}
6139126258Smlaier
6140126258Smlaierdone:
6141240233Sglebius	PF_RULES_RUNLOCK();
6142223637Sbz	if (n != m) {
6143223637Sbz		m_freem(n);
6144223637Sbz		n = NULL;
6145223637Sbz	}
6146223637Sbz
6147169843Sdhartmei	/* handle dangerous IPv6 extension headers. */
6148169843Sdhartmei	if (action == PF_PASS && rh_cnt &&
6149200930Sdelphij	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
6150169843Sdhartmei		action = PF_DROP;
6151169843Sdhartmei		REASON_SET(&reason, PFRES_IPOPTIONS);
6152169843Sdhartmei		log = 1;
6153169843Sdhartmei		DPFPRINTF(PF_DEBUG_MISC,
6154169843Sdhartmei		    ("pf: dropping packet with dangerous v6 headers\n"));
6155169843Sdhartmei	}
6156126258Smlaier
6157240233Sglebius	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
6158240233Sglebius		action = PF_DROP;
6159240233Sglebius		REASON_SET(&reason, PFRES_MEMORY);
6160240233Sglebius	}
6161240233Sglebius	if (r->rtableid >= 0)
6162240233Sglebius		M_SETFIB(m, r->rtableid);
6163145836Smlaier
6164126258Smlaier#ifdef ALTQ
6165126258Smlaier	if (action == PF_PASS && r->qid) {
6166240233Sglebius		if (pd.pf_mtag == NULL &&
6167240233Sglebius		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
6168240233Sglebius			action = PF_DROP;
6169240233Sglebius			REASON_SET(&reason, PFRES_MEMORY);
6170240233Sglebius		}
6171171168Smlaier		if (pd.tos & IPTOS_LOWDELAY)
6172171168Smlaier			pd.pf_mtag->qid = r->pqid;
6173171168Smlaier		else
6174171168Smlaier			pd.pf_mtag->qid = r->qid;
6175171168Smlaier		/* add hints for ecn */
6176171168Smlaier		pd.pf_mtag->hdr = h;
6177126258Smlaier	}
6178145836Smlaier#endif /* ALTQ */
6179126258Smlaier
6180130613Smlaier	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6181130613Smlaier	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6182130613Smlaier	    (s->nat_rule.ptr->action == PF_RDR ||
6183130613Smlaier	    s->nat_rule.ptr->action == PF_BINAT) &&
6184171168Smlaier	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
6185223637Sbz		m->m_flags |= M_SKIP_FIREWALL;
6186171168Smlaier
6187223637Sbz	/* XXX: Anybody working on it?! */
6188223637Sbz	if (r->divert.port)
6189223637Sbz		printf("pf: divert(9) is not supported for IPv6\n");
6190223637Sbz
6191171168Smlaier	if (log) {
6192171168Smlaier		struct pf_rule *lr;
6193171168Smlaier
6194171168Smlaier		if (s != NULL && s->nat_rule.ptr != NULL &&
6195171168Smlaier		    s->nat_rule.ptr->log & PF_LOG_ALL)
6196171168Smlaier			lr = s->nat_rule.ptr;
6197171168Smlaier		else
6198171168Smlaier			lr = r;
6199240233Sglebius		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
6200240233Sglebius		    &pd, (s == NULL));
6201130613Smlaier	}
6202130613Smlaier
6203130613Smlaier	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6204130613Smlaier	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6205130613Smlaier
6206130613Smlaier	if (action == PF_PASS || r->action == PF_DROP) {
6207171168Smlaier		dirndx = (dir == PF_OUT);
6208171168Smlaier		r->packets[dirndx]++;
6209171168Smlaier		r->bytes[dirndx] += pd.tot_len;
6210130613Smlaier		if (a != NULL) {
6211171168Smlaier			a->packets[dirndx]++;
6212171168Smlaier			a->bytes[dirndx] += pd.tot_len;
6213130613Smlaier		}
6214130613Smlaier		if (s != NULL) {
6215130613Smlaier			if (s->nat_rule.ptr != NULL) {
6216171168Smlaier				s->nat_rule.ptr->packets[dirndx]++;
6217171168Smlaier				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6218130613Smlaier			}
6219130613Smlaier			if (s->src_node != NULL) {
6220171168Smlaier				s->src_node->packets[dirndx]++;
6221171168Smlaier				s->src_node->bytes[dirndx] += pd.tot_len;
6222130613Smlaier			}
6223130613Smlaier			if (s->nat_src_node != NULL) {
6224171168Smlaier				s->nat_src_node->packets[dirndx]++;
6225171168Smlaier				s->nat_src_node->bytes[dirndx] += pd.tot_len;
6226130613Smlaier			}
6227171168Smlaier			dirndx = (dir == s->direction) ? 0 : 1;
6228171168Smlaier			s->packets[dirndx]++;
6229171168Smlaier			s->bytes[dirndx] += pd.tot_len;
6230130613Smlaier		}
6231130613Smlaier		tr = r;
6232130613Smlaier		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6233223637Sbz		if (nr != NULL && r == &V_pf_default_rule)
6234223637Sbz			tr = nr;
6235130613Smlaier		if (tr->src.addr.type == PF_ADDR_TABLE)
6236223637Sbz			pfr_update_stats(tr->src.addr.p.tbl,
6237223637Sbz			    (s == NULL) ? pd.src :
6238223637Sbz			    &s->key[(s->direction == PF_IN)]->addr[0],
6239223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
6240223637Sbz			    r->action == PF_PASS, tr->src.neg);
6241130613Smlaier		if (tr->dst.addr.type == PF_ADDR_TABLE)
6242223637Sbz			pfr_update_stats(tr->dst.addr.p.tbl,
6243223637Sbz			    (s == NULL) ? pd.dst :
6244223637Sbz			    &s->key[(s->direction == PF_IN)]->addr[1],
6245223637Sbz			    pd.af, pd.tot_len, dir == PF_OUT,
6246223637Sbz			    r->action == PF_PASS, tr->dst.neg);
6247130613Smlaier	}
6248130613Smlaier
6249223637Sbz	switch (action) {
6250223637Sbz	case PF_SYNPROXY_DROP:
6251126258Smlaier		m_freem(*m0);
6252223637Sbz	case PF_DEFER:
6253126258Smlaier		*m0 = NULL;
6254126258Smlaier		action = PF_PASS;
6255223637Sbz		break;
6256223637Sbz	default:
6257240233Sglebius		/* pf_route6() returns unlocked. */
6258240233Sglebius		if (r->rt) {
6259223637Sbz			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
6260240233Sglebius			return (action);
6261240233Sglebius		}
6262223637Sbz		break;
6263223637Sbz	}
6264126258Smlaier
6265240233Sglebius	if (s)
6266240233Sglebius		PF_STATE_UNLOCK(s);
6267240233Sglebius
6268126258Smlaier	return (action);
6269126258Smlaier}
6270126258Smlaier#endif /* INET6 */
6271