pf.c revision 126409
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 126409 2004-02-29 16:34:43Z mlaier $	*/
2/*	$OpenBSD: pf.c,v 1.390 2003/09/24 17:18:03 mcbride Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#if defined(__FreeBSD__)
39#include "opt_inet.h"
40#include "opt_inet6.h"
41#endif
42
43#if defined(__FreeBSD__) && __FreeBSD__ >= 5
44#include "opt_bpf.h"
45#define NBPFILTER DEV_BPF
46#include "opt_pf.h"
47#define NPFLOG DEV_PFLOG
48#define NPFSYNC DEV_PFSYNC
49#else
50#include "bpfilter.h"
51#include "pflog.h"
52#include "pfsync.h"
53#endif
54
55#include <sys/param.h>
56#include <sys/systm.h>
57#include <sys/mbuf.h>
58#include <sys/filio.h>
59#include <sys/socket.h>
60#include <sys/socketvar.h>
61#include <sys/kernel.h>
62#include <sys/time.h>
63#if defined(__FreeBSD__)
64#include <sys/sysctl.h>
65#else
66#include <sys/pool.h>
67#endif
68
69#include <net/if.h>
70#include <net/if_types.h>
71#include <net/bpf.h>
72#include <net/route.h>
73
74#include <netinet/in.h>
75#include <netinet/in_var.h>
76#include <netinet/in_systm.h>
77#include <netinet/ip.h>
78#include <netinet/ip_var.h>
79#include <netinet/tcp.h>
80#include <netinet/tcp_seq.h>
81#include <netinet/udp.h>
82#include <netinet/ip_icmp.h>
83#include <netinet/in_pcb.h>
84#include <netinet/tcp_timer.h>
85#include <netinet/tcp_var.h>
86#include <netinet/udp_var.h>
87#include <netinet/icmp_var.h>
88
89#if !defined(__FreeBSD__)
90#include <dev/rndvar.h>
91#endif
92#include <net/pfvar.h>
93#include <net/if_pflog.h>
94#include <net/if_pfsync.h>
95
96#ifdef INET6
97#include <netinet/ip6.h>
98#include <netinet/in_pcb.h>
99#include <netinet/icmp6.h>
100#include <netinet6/nd6.h>
101#if defined(__FreeBSD__)
102#include <netinet6/ip6_var.h>
103#include <netinet6/in6_pcb.h>
104#endif
105#endif /* INET6 */
106
107#ifdef ALTQ
108#include <altq/if_altq.h>
109#endif
110
111#if defined(__FreeBSD__)
112#include <machine/in_cksum.h>
113#if (__FreeBSD_version >= 500112)
114#include <sys/limits.h>
115#else
116#include <machine/limits.h>
117#endif
118#include <sys/ucred.h>
119#endif
120
121#if defined(__FreeBSD__)
122extern int ip_optcopy(struct ip *, struct ip *);
123#if (__FreeBSD_version < 501105)
124int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
125	u_long if_hwassist_flags, int sw_csum);
126#endif
127#endif
128
129#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
130struct pf_state_tree;
131
132/*
133 * Global variables
134 */
135
136struct pf_anchorqueue	 pf_anchors;
137struct pf_ruleset	 pf_main_ruleset;
138struct pf_altqqueue	 pf_altqs[2];
139struct pf_palist	 pf_pabuf;
140struct pf_altqqueue	*pf_altqs_active;
141struct pf_altqqueue	*pf_altqs_inactive;
142struct pf_status	 pf_status;
143struct ifnet		*status_ifp;
144
145u_int32_t		 ticket_altqs_active;
146u_int32_t		 ticket_altqs_inactive;
147u_int32_t		 ticket_pabuf;
148
149#if defined(__FreeBSD__)
150struct callout	 	 pf_expire_to;			/* expire timeout */
151#else
152struct timeout		 pf_expire_to;			/* expire timeout */
153#endif
154
155
156#if defined(__FreeBSD__)
157uma_zone_t		 pf_tree_pl, pf_rule_pl, pf_addr_pl;
158uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
159#else
160struct pool		 pf_tree_pl, pf_rule_pl, pf_addr_pl;
161struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
162#endif
163
164void			 pf_dynaddr_update(void *);
165#if defined(__FreeBSD__) && defined(HOOK_HACK)
166void			pf_dynaddr_update_event(void *arg, struct ifnet *ifp);
167#endif
168void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
169void			 pf_print_state(struct pf_state *);
170void			 pf_print_flags(u_int8_t);
171
172u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
173			    u_int8_t);
174void			 pf_change_ap(struct pf_addr *, u_int16_t *,
175			    u_int16_t *, u_int16_t *, struct pf_addr *,
176			    u_int16_t, u_int8_t, sa_family_t);
177#ifdef INET6
178void			 pf_change_a6(struct pf_addr *, u_int16_t *,
179			    struct pf_addr *, u_int8_t);
180#endif /* INET6 */
181void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
182			    struct pf_addr *, struct pf_addr *, u_int16_t,
183			    u_int16_t *, u_int16_t *, u_int16_t *,
184			    u_int16_t *, u_int8_t, sa_family_t);
185void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
186			    const struct pf_addr *, const struct pf_addr *,
187			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
188			    u_int8_t, u_int16_t, u_int16_t, u_int8_t);
189void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
190			    sa_family_t, struct pf_rule *);
191struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
192			    int, int, struct ifnet *,
193			    struct pf_addr *, u_int16_t, struct pf_addr *,
194			    u_int16_t, int);
195struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
196			    int, int, struct ifnet *,
197			    struct pf_addr *, u_int16_t,
198			    struct pf_addr *, u_int16_t,
199			    struct pf_addr *, u_int16_t *);
200int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
201			    int, struct ifnet *, struct mbuf *, int, int,
202			    void *, struct pf_pdesc *, struct pf_rule **,
203			    struct pf_ruleset **);
204int			 pf_test_udp(struct pf_rule **, struct pf_state **,
205			    int, struct ifnet *, struct mbuf *, int, int,
206			    void *, struct pf_pdesc *, struct pf_rule **,
207			    struct pf_ruleset **);
208int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
209			    int, struct ifnet *, struct mbuf *, int, int,
210			    void *, struct pf_pdesc *, struct pf_rule **,
211			    struct pf_ruleset **);
212int			 pf_test_other(struct pf_rule **, struct pf_state **,
213			    int, struct ifnet *, struct mbuf *, int, void *,
214			    struct pf_pdesc *, struct pf_rule **,
215			    struct pf_ruleset **);
216int			 pf_test_fragment(struct pf_rule **, int,
217			    struct ifnet *, struct mbuf *, void *,
218			    struct pf_pdesc *, struct pf_rule **,
219			    struct pf_ruleset **);
220int			 pf_test_state_tcp(struct pf_state **, int,
221			    struct ifnet *, struct mbuf *, int, int,
222			    void *, struct pf_pdesc *, u_short *);
223int			 pf_test_state_udp(struct pf_state **, int,
224			    struct ifnet *, struct mbuf *, int, int,
225			    void *, struct pf_pdesc *);
226int			 pf_test_state_icmp(struct pf_state **, int,
227			    struct ifnet *, struct mbuf *, int, int,
228			    void *, struct pf_pdesc *);
229int			 pf_test_state_other(struct pf_state **, int,
230			    struct ifnet *, struct pf_pdesc *);
231struct pf_tag		*pf_get_tag(struct mbuf *);
232int			 pf_match_tag(struct mbuf *, struct pf_rule *,
233			     struct pf_rule *, struct pf_rule *,
234			     struct pf_tag *, int *);
235void			 pf_hash(struct pf_addr *, struct pf_addr *,
236			    struct pf_poolhashkey *, sa_family_t);
237int			 pf_map_addr(u_int8_t, struct pf_pool *,
238			    struct pf_addr *, struct pf_addr *,
239			    struct pf_addr *);
240int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_pool *,
241			    struct pf_addr *, struct pf_addr *, u_int16_t,
242			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t);
243void			 pf_route(struct mbuf **, struct pf_rule *, int,
244			    struct ifnet *, struct pf_state *);
245void			 pf_route6(struct mbuf **, struct pf_rule *, int,
246			    struct ifnet *, struct pf_state *);
247int			 pf_socket_lookup(uid_t *, gid_t *, int, sa_family_t,
248			    int, struct pf_pdesc *);
249u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
250			    sa_family_t);
251u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
252			    sa_family_t);
253u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
254				u_int16_t);
255void			 pf_set_rt_ifp(struct pf_state *,
256			    struct pf_addr *);
257int			 pf_check_proto_cksum(struct mbuf *, int, int,
258			    u_int8_t, sa_family_t);
259int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
260			    struct pf_addr_wrap *);
261
262#if defined(__FreeBSD__)
263int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
264#endif
265
266#if defined(__FreeBSD__)
267struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
268#else
269struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] =
270    { { &pf_state_pl, PFSTATE_HIWAT }, { &pf_frent_pl, PFFRAG_FRENT_HIWAT } };
271#endif
272
273#define STATE_LOOKUP()							\
274	do {								\
275		if (direction == PF_IN)					\
276			*state = pf_find_state(&tree_ext_gwy, &key);	\
277		else							\
278			*state = pf_find_state(&tree_lan_ext, &key);	\
279		if (*state == NULL)					\
280			return (PF_DROP);				\
281		if (direction == PF_OUT &&				\
282		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
283		    (*state)->rule.ptr->direction == PF_OUT) ||		\
284		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
285		    (*state)->rule.ptr->direction == PF_IN)) &&		\
286		    (*state)->rt_ifp != NULL &&				\
287		    (*state)->rt_ifp != ifp)				\
288			return (PF_PASS);				\
289	} while (0)
290
291#define	STATE_TRANSLATE(s) \
292	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
293	((s)->af == AF_INET6 && \
294	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
295	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
296	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
297	(s)->lan.port != (s)->gwy.port
298
299static __inline int pf_state_compare(struct pf_tree_node *,
300			struct pf_tree_node *);
301
302struct pf_state_tree tree_lan_ext, tree_ext_gwy;
303RB_GENERATE(pf_state_tree, pf_tree_node, entry, pf_state_compare);
304
305#if defined(__FreeBSD__)
306static int
307#else
308static __inline int
309#endif
310pf_state_compare(struct pf_tree_node *a, struct pf_tree_node *b)
311{
312	int	diff;
313
314	if ((diff = a->proto - b->proto) != 0)
315		return (diff);
316	if ((diff = a->af - b->af) != 0)
317		return (diff);
318	switch (a->af) {
319#ifdef INET
320	case AF_INET:
321		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
322			return (1);
323		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
324			return (-1);
325		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
326			return (1);
327		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
328			return (-1);
329		break;
330#endif /* INET */
331#ifdef INET6
332	case AF_INET6:
333		if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
334			return (1);
335		if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
336			return (-1);
337		if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
338			return (1);
339		if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
340			return (-1);
341		if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
342			return (1);
343		if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
344			return (-1);
345		if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
346			return (1);
347		if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
348			return (-1);
349		if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
350			return (1);
351		if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
352			return (-1);
353		if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
354			return (1);
355		if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
356			return (-1);
357		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
358			return (1);
359		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
360			return (-1);
361		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
362			return (1);
363		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
364			return (-1);
365		break;
366#endif /* INET6 */
367	}
368
369	if ((diff = a->port[0] - b->port[0]) != 0)
370		return (diff);
371	if ((diff = a->port[1] - b->port[1]) != 0)
372		return (diff);
373
374	return (0);
375}
376
377#ifdef INET6
378void
379pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
380{
381	switch (af) {
382#ifdef INET
383	case AF_INET:
384		dst->addr32[0] = src->addr32[0];
385		break;
386#endif /* INET */
387	case AF_INET6:
388		dst->addr32[0] = src->addr32[0];
389		dst->addr32[1] = src->addr32[1];
390		dst->addr32[2] = src->addr32[2];
391		dst->addr32[3] = src->addr32[3];
392		break;
393	}
394}
395#endif
396
397struct pf_state *
398pf_find_state(struct pf_state_tree *tree, struct pf_tree_node *key)
399{
400	struct pf_tree_node	*k;
401
402	pf_status.fcounters[FCNT_STATE_SEARCH]++;
403	k = RB_FIND(pf_state_tree, tree, key);
404	if (k)
405		return (k->state);
406	else
407		return (NULL);
408}
409
410int
411pf_insert_state(struct pf_state *state)
412{
413	struct pf_tree_node	*keya, *keyb;
414
415	keya = pool_get(&pf_tree_pl, PR_NOWAIT);
416	if (keya == NULL)
417		return (-1);
418	keya->state = state;
419	keya->proto = state->proto;
420	keya->af = state->af;
421	PF_ACPY(&keya->addr[0], &state->lan.addr, state->af);
422	keya->port[0] = state->lan.port;
423	PF_ACPY(&keya->addr[1], &state->ext.addr, state->af);
424	keya->port[1] = state->ext.port;
425
426	/* Thou MUST NOT insert multiple duplicate keys */
427	if (RB_INSERT(pf_state_tree, &tree_lan_ext, keya) != NULL) {
428		if (pf_status.debug >= PF_DEBUG_MISC) {
429			printf("pf: state insert failed: tree_lan_ext");
430			printf(" lan: ");
431			pf_print_host(&state->lan.addr, state->lan.port,
432			    state->af);
433			printf(" gwy: ");
434			pf_print_host(&state->gwy.addr, state->gwy.port,
435			    state->af);
436			printf(" ext: ");
437			pf_print_host(&state->ext.addr, state->ext.port,
438			    state->af);
439			printf("\n");
440		}
441		pool_put(&pf_tree_pl, keya);
442		return (-1);
443	}
444
445	keyb = pool_get(&pf_tree_pl, PR_NOWAIT);
446	if (keyb == NULL) {
447		/* Need to pull out the other state */
448		RB_REMOVE(pf_state_tree, &tree_lan_ext, keya);
449		pool_put(&pf_tree_pl, keya);
450		return (-1);
451	}
452	keyb->state = state;
453	keyb->proto = state->proto;
454	keyb->af = state->af;
455	PF_ACPY(&keyb->addr[0], &state->ext.addr, state->af);
456	keyb->port[0] = state->ext.port;
457	PF_ACPY(&keyb->addr[1], &state->gwy.addr, state->af);
458	keyb->port[1] = state->gwy.port;
459
460	if (RB_INSERT(pf_state_tree, &tree_ext_gwy, keyb) != NULL) {
461		if (pf_status.debug >= PF_DEBUG_MISC) {
462			printf("pf: state insert failed: tree_ext_gwy");
463			printf(" lan: ");
464			pf_print_host(&state->lan.addr, state->lan.port,
465			    state->af);
466			printf(" gwy: ");
467			pf_print_host(&state->gwy.addr, state->gwy.port,
468			    state->af);
469			printf(" ext: ");
470			pf_print_host(&state->ext.addr, state->ext.port,
471			    state->af);
472			printf("\n");
473		}
474		RB_REMOVE(pf_state_tree, &tree_lan_ext, keya);
475		pool_put(&pf_tree_pl, keya);
476		pool_put(&pf_tree_pl, keyb);
477		return (-1);
478	}
479
480	pf_status.fcounters[FCNT_STATE_INSERT]++;
481	pf_status.states++;
482#if NPFSYNC
483	pfsync_insert_state(state);
484#endif
485	return (0);
486}
487
488void
489pf_purge_timeout(void *arg)
490{
491#if defined(__FreeBSD__)
492	struct callout  *to = arg;
493#else
494	struct timeout	*to = arg;
495#endif
496	int		 s;
497
498#if defined(__FreeBSD__)
499	PF_LOCK();
500#endif
501	s = splsoftnet();
502	pf_purge_expired_states();
503	pf_purge_expired_fragments();
504	splx(s);
505#if defined(__FreeBSD__)
506	PF_UNLOCK();
507#endif
508
509#if defined(__FreeBSD__)
510	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
511	    pf_purge_timeout, to);
512#else
513	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
514#endif
515}
516
517u_int32_t
518pf_state_expires(const struct pf_state *state)
519{
520	u_int32_t	timeout;
521	u_int32_t	start;
522	u_int32_t	end;
523	u_int32_t	states;
524
525	/* handle all PFTM_* > PFTM_MAX here */
526	if (state->timeout == PFTM_PURGE)
527#if defined(__FreeBSD__)
528		return (time_second);
529#else
530		return (time.tv_sec);
531#endif
532	if (state->timeout == PFTM_UNTIL_PACKET)
533		return (0);
534#if defined(__FreeBSD__)
535	KASSERT((state->timeout < PFTM_MAX),
536	    ("pf_state_expires: timeout > PFTM_MAX"));
537#else
538	KASSERT(state->timeout < PFTM_MAX);
539#endif
540	timeout = state->rule.ptr->timeout[state->timeout];
541	if (!timeout)
542		timeout = pf_default_rule.timeout[state->timeout];
543	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
544	if (start) {
545		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
546		states = state->rule.ptr->states;
547	} else {
548		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
549		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
550		states = pf_status.states;
551	}
552	if (end && states > start && start < end) {
553		if (states < end)
554			return (state->expire + timeout * (end - states) /
555			    (end - start));
556		else
557#if defined(__FreeBSD__)
558			return (time_second);
559#else
560			return (time.tv_sec);
561#endif
562	}
563	return (state->expire + timeout);
564}
565
566void
567pf_purge_expired_states(void)
568{
569	struct pf_tree_node	*cur, *peer, *next;
570	struct pf_tree_node	 key;
571
572	for (cur = RB_MIN(pf_state_tree, &tree_ext_gwy); cur; cur = next) {
573		next = RB_NEXT(pf_state_tree, &tree_ext_gwy, cur);
574
575#if defined(__FreeBSD__)
576		if (pf_state_expires(cur->state) <= (u_int32_t)time_second) {
577#else
578		if (pf_state_expires(cur->state) <= time.tv_sec) {
579#endif
580			if (cur->state->src.state == PF_TCPS_PROXY_DST)
581				pf_send_tcp(cur->state->rule.ptr,
582				    cur->state->af,
583				    &cur->state->ext.addr,
584				    &cur->state->lan.addr,
585				    cur->state->ext.port,
586				    cur->state->lan.port,
587				    cur->state->src.seqhi,
588				    cur->state->src.seqlo + 1,
589					0,
590				    TH_RST|TH_ACK, 0, 0);
591			RB_REMOVE(pf_state_tree, &tree_ext_gwy, cur);
592
593			/* Need this key's peer (in the other tree) */
594			key.state = cur->state;
595			key.proto = cur->state->proto;
596			key.af = cur->state->af;
597			PF_ACPY(&key.addr[0], &cur->state->lan.addr,
598			    cur->state->af);
599			key.port[0] = cur->state->lan.port;
600			PF_ACPY(&key.addr[1], &cur->state->ext.addr,
601			    cur->state->af);
602			key.port[1] = cur->state->ext.port;
603
604			peer = RB_FIND(pf_state_tree, &tree_lan_ext, &key);
605#if defined(__FreeBSD__)
606			KASSERT((peer), ("peer null :%s", __FUNCTION__));
607			KASSERT((peer->state == cur->state),
608			   ("peer->state != cur->state: %s", __FUNCTION__));
609#else
610			KASSERT(peer);
611			KASSERT(peer->state == cur->state);
612#endif
613			RB_REMOVE(pf_state_tree, &tree_lan_ext, peer);
614
615#if NPFSYNC
616			pfsync_delete_state(cur->state);
617#endif
618			if (--cur->state->rule.ptr->states <= 0)
619				pf_rm_rule(NULL, cur->state->rule.ptr);
620			if (cur->state->nat_rule.ptr != NULL)
621				if (--cur->state->nat_rule.ptr->states <= 0)
622					pf_rm_rule(NULL,
623					    cur->state->nat_rule.ptr);
624			if (cur->state->anchor.ptr != NULL)
625				if (--cur->state->anchor.ptr->states <= 0)
626					pf_rm_rule(NULL,
627					    cur->state->anchor.ptr);
628			pf_normalize_tcp_cleanup(cur->state);
629			pool_put(&pf_state_pl, cur->state);
630			pool_put(&pf_tree_pl, cur);
631			pool_put(&pf_tree_pl, peer);
632			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
633			pf_status.states--;
634		}
635	}
636}
637
638int
639pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
640{
641	if (aw->type != PF_ADDR_TABLE)
642		return (0);
643	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
644		return (1);
645	return (0);
646}
647
648void
649pf_tbladdr_remove(struct pf_addr_wrap *aw)
650{
651	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
652		return;
653	pfr_detach_table(aw->p.tbl);
654	aw->p.tbl = NULL;
655}
656
657void
658pf_tbladdr_copyout(struct pf_addr_wrap *aw)
659{
660	struct pfr_ktable *kt = aw->p.tbl;
661
662	if (aw->type != PF_ADDR_TABLE || kt == NULL)
663		return;
664	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
665		kt = kt->pfrkt_root;
666	aw->p.tbl = NULL;
667	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
668		kt->pfrkt_cnt : -1;
669}
670
671int
672pf_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
673{
674	if (aw->type != PF_ADDR_DYNIFTL)
675		return (0);
676	aw->p.dyn = pool_get(&pf_addr_pl, PR_NOWAIT);
677	if (aw->p.dyn == NULL)
678		return (1);
679	bcopy(aw->v.ifname, aw->p.dyn->ifname, sizeof(aw->p.dyn->ifname));
680	aw->p.dyn->ifp = ifunit(aw->p.dyn->ifname);
681	if (aw->p.dyn->ifp == NULL) {
682		pool_put(&pf_addr_pl, aw->p.dyn);
683		aw->p.dyn = NULL;
684		return (1);
685	}
686	aw->p.dyn->addr = &aw->v.a.addr;
687	aw->p.dyn->af = af;
688	aw->p.dyn->undefined = 1;
689#if !defined(__FreeBSD__)
690	aw->p.dyn->hook_cookie = hook_establish(
691	    aw->p.dyn->ifp->if_addrhooks, 1,
692	    pf_dynaddr_update, aw->p.dyn);
693	if (aw->p.dyn->hook_cookie == NULL) {
694		pool_put(&pf_addr_pl, aw->p.dyn);
695		aw->p.dyn = NULL;
696		return (1);
697	}
698#elif defined(__FreeBSD__) && defined(HOOK_HACK)
699	PF_UNLOCK();
700	aw->p.dyn->hook_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
701	    pf_dynaddr_update_event, aw->p.dyn, EVENTHANDLER_PRI_ANY);
702	PF_LOCK();
703	if (aw->p.dyn->hook_cookie == NULL) {
704		pool_put(&pf_addr_pl, aw->p.dyn);
705		aw->p.dyn = NULL;
706		return (1);
707	}
708#else
709	/*
710	 * XXX
711	 * We have no hook_establish(9)/dohooks(9) kernel interfaces.
712	 * This means that we do not aware of interface address changes(add,
713	 * remove, etc). User should update pf rule manually after interface
714	 * address changed. This may not be possible solution if you use xDSL.
715	 * ipfw/ipfw2's approach with this situation(with me keyword) is not
716	 * very efficient due to analyzing interface address during runtime.
717	 * Another solution is to use a user-land daemon watching address
718	 * changes with socket interface. Neither one is good.
719	 * Supporting hook_establish(9) requries modification of in_control()
720	 * located in netinet/in.c.
721	 */
722#endif
723	pf_dynaddr_update(aw->p.dyn);
724	return (0);
725}
726
727#if defined(__FreeBSD__) && defined(HOOK_HACK)
728void
729pf_dynaddr_update_event(void *arg, struct ifnet *ifp)
730{
731	PF_LOCK();
732	pf_dynaddr_update(arg);
733	PF_UNLOCK();
734}
735#endif
736
737void
738pf_dynaddr_update(void *p)
739{
740	struct pf_addr_dyn	*ad = (struct pf_addr_dyn *)p;
741	struct ifaddr		*ia;
742	int			 s, changed = 0;
743
744	if (ad == NULL || ad->ifp == NULL)
745		panic("pf_dynaddr_update");
746	s = splsoftnet();
747	TAILQ_FOREACH(ia, &ad->ifp->if_addrlist, ifa_list)
748		if (ia->ifa_addr != NULL &&
749		    ia->ifa_addr->sa_family == ad->af) {
750			if (ad->af == AF_INET) {
751				struct in_addr *a, *b;
752
753				a = &ad->addr->v4;
754				b = &((struct sockaddr_in *)ia->ifa_addr)
755				    ->sin_addr;
756				if (ad->undefined ||
757				    memcmp(a, b, sizeof(*a))) {
758					bcopy(b, a, sizeof(*a));
759					changed = 1;
760				}
761			} else if (ad->af == AF_INET6) {
762				struct in6_addr *a, *b;
763
764				a = &ad->addr->v6;
765				b = &((struct sockaddr_in6 *)ia->ifa_addr)
766				    ->sin6_addr;
767				if (ad->undefined ||
768				    memcmp(a, b, sizeof(*a))) {
769					bcopy(b, a, sizeof(*a));
770					changed = 1;
771				}
772			}
773			if (changed)
774				ad->undefined = 0;
775			break;
776		}
777	if (ia == NULL)
778		ad->undefined = 1;
779	splx(s);
780}
781
782void
783pf_dynaddr_remove(struct pf_addr_wrap *aw)
784{
785	if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL)
786		return;
787#if !defined(__FreeBSD__)
788	hook_disestablish(aw->p.dyn->ifp->if_addrhooks,
789	    aw->p.dyn->hook_cookie);
790#elif defined(__FreeBSD__) && defined(HOOK_HACK)
791	PF_UNLOCK();
792	EVENTHANDLER_DEREGISTER(ifaddr_event, aw->p.dyn->hook_cookie);
793	PF_LOCK();
794#else
795	/*
796	 * XXX
797	 * We have no hook_establish(9)/dohooks(9) kernel interfaces.
798	 * See comments above function, pf_dynaddr_setup().
799	 */
800#endif
801	pool_put(&pf_addr_pl, aw->p.dyn);
802	aw->p.dyn = NULL;
803}
804
805void
806pf_dynaddr_copyout(struct pf_addr_wrap *aw)
807{
808	if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL)
809		return;
810	bcopy(aw->p.dyn->ifname, aw->v.ifname, sizeof(aw->v.ifname));
811	aw->p.dyn = (struct pf_addr_dyn *)1;
812}
813
814void
815pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
816{
817	switch (af) {
818#ifdef INET
819	case AF_INET: {
820		u_int32_t a = ntohl(addr->addr32[0]);
821		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
822		    (a>>8)&255, a&255);
823		if (p) {
824			p = ntohs(p);
825			printf(":%u", p);
826		}
827		break;
828	}
829#endif /* INET */
830#ifdef INET6
831	case AF_INET6: {
832		u_int16_t b;
833		u_int8_t i, curstart = 255, curend = 0,
834		    maxstart = 0, maxend = 0;
835		for (i = 0; i < 8; i++) {
836			if (!addr->addr16[i]) {
837				if (curstart == 255)
838					curstart = i;
839				else
840					curend = i;
841			} else {
842				if (curstart) {
843					if ((curend - curstart) >
844					    (maxend - maxstart)) {
845						maxstart = curstart;
846						maxend = curend;
847						curstart = 255;
848					}
849				}
850			}
851		}
852		for (i = 0; i < 8; i++) {
853			if (i >= maxstart && i <= maxend) {
854				if (maxend != 7) {
855					if (i == maxstart)
856						printf(":");
857				} else {
858					if (i == maxend)
859						printf(":");
860				}
861			} else {
862				b = ntohs(addr->addr16[i]);
863				printf("%x", b);
864				if (i < 7)
865					printf(":");
866			}
867		}
868		if (p) {
869			p = ntohs(p);
870			printf("[%u]", p);
871		}
872		break;
873	}
874#endif /* INET6 */
875	}
876}
877
878void
879pf_print_state(struct pf_state *s)
880{
881	switch (s->proto) {
882	case IPPROTO_TCP:
883		printf("TCP ");
884		break;
885	case IPPROTO_UDP:
886		printf("UDP ");
887		break;
888	case IPPROTO_ICMP:
889		printf("ICMP ");
890		break;
891	case IPPROTO_ICMPV6:
892		printf("ICMPV6 ");
893		break;
894	default:
895		printf("%u ", s->proto);
896		break;
897	}
898	pf_print_host(&s->lan.addr, s->lan.port, s->af);
899	printf(" ");
900	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
901	printf(" ");
902	pf_print_host(&s->ext.addr, s->ext.port, s->af);
903	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
904	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
905	if (s->src.wscale && s->dst.wscale)
906		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
907	printf("]");
908	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
909	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
910	if (s->src.wscale && s->dst.wscale)
911		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
912	printf("]");
913	printf(" %u:%u", s->src.state, s->dst.state);
914}
915
916void
917pf_print_flags(u_int8_t f)
918{
919	if (f)
920		printf(" ");
921	if (f & TH_FIN)
922		printf("F");
923	if (f & TH_SYN)
924		printf("S");
925	if (f & TH_RST)
926		printf("R");
927	if (f & TH_PUSH)
928		printf("P");
929	if (f & TH_ACK)
930		printf("A");
931	if (f & TH_URG)
932		printf("U");
933	if (f & TH_ECE)
934		printf("E");
935	if (f & TH_CWR)
936		printf("W");
937}
938
939#define	PF_SET_SKIP_STEPS(i)					\
940	do {							\
941		while (head[i] != cur) {			\
942			head[i]->skip[i].ptr = cur;		\
943			head[i] = TAILQ_NEXT(head[i], entries);	\
944		}						\
945	} while (0)
946
947void
948pf_calc_skip_steps(struct pf_rulequeue *rules)
949{
950	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
951	int i;
952
953	cur = TAILQ_FIRST(rules);
954	prev = cur;
955	for (i = 0; i < PF_SKIP_COUNT; ++i)
956		head[i] = cur;
957	while (cur != NULL) {
958
959		if (cur->ifp != prev->ifp || cur->ifnot != prev->ifnot)
960			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
961		if (cur->direction != prev->direction)
962			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
963		if (cur->af != prev->af)
964			PF_SET_SKIP_STEPS(PF_SKIP_AF);
965		if (cur->proto != prev->proto)
966			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
967		if (cur->src.not != prev->src.not ||
968		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
969			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
970		if (cur->src.port[0] != prev->src.port[0] ||
971		    cur->src.port[1] != prev->src.port[1] ||
972		    cur->src.port_op != prev->src.port_op)
973			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
974		if (cur->dst.not != prev->dst.not ||
975		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
976			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
977		if (cur->dst.port[0] != prev->dst.port[0] ||
978		    cur->dst.port[1] != prev->dst.port[1] ||
979		    cur->dst.port_op != prev->dst.port_op)
980			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
981
982		prev = cur;
983		cur = TAILQ_NEXT(cur, entries);
984	}
985	for (i = 0; i < PF_SKIP_COUNT; ++i)
986		PF_SET_SKIP_STEPS(i);
987}
988
989int
990pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
991{
992	if (aw1->type != aw2->type)
993		return (1);
994	switch (aw1->type) {
995	case PF_ADDR_ADDRMASK:
996		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
997			return (1);
998		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
999			return (1);
1000		return (0);
1001	case PF_ADDR_DYNIFTL:
1002		if (aw1->p.dyn->ifp != aw2->p.dyn->ifp)
1003			return (1);
1004		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1005			return (1);
1006		return (0);
1007	case PF_ADDR_NOROUTE:
1008		return (0);
1009	case PF_ADDR_TABLE:
1010		return (aw1->p.tbl != aw2->p.tbl);
1011	default:
1012		printf("invalid address type: %d\n", aw1->type);
1013		return (1);
1014	}
1015}
1016
1017void
1018pf_rule_set_qid(struct pf_rulequeue *rules)
1019{
1020	struct pf_rule *rule;
1021
1022	TAILQ_FOREACH(rule, rules, entries)
1023		if (rule->qname[0] != 0) {
1024			rule->qid = pf_qname_to_qid(rule->qname);
1025			if (rule->pqname[0] != 0)
1026				rule->pqid = pf_qname_to_qid(rule->pqname);
1027			else
1028				rule->pqid = rule->qid;
1029		}
1030}
1031
1032u_int32_t
1033pf_qname_to_qid(char *qname)
1034{
1035	struct pf_altq		*altq;
1036
1037	TAILQ_FOREACH(altq, pf_altqs_active, entries)
1038		if (!strcmp(altq->qname, qname))
1039			return (altq->qid);
1040
1041	return (0);
1042}
1043
1044void
1045pf_update_anchor_rules()
1046{
1047	struct pf_rule	*rule;
1048	int		 i;
1049
1050	for (i = 0; i < PF_RULESET_MAX; ++i)
1051		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1052		    entries)
1053			if (rule->anchorname[0])
1054				rule->anchor = pf_find_anchor(rule->anchorname);
1055			else
1056				rule->anchor = NULL;
1057}
1058
1059u_int16_t
1060pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1061{
1062	u_int32_t	l;
1063
1064	if (udp && !cksum)
1065		return (0x0000);
1066	l = cksum + old - new;
1067	l = (l >> 16) + (l & 65535);
1068	l = l & 65535;
1069	if (udp && !l)
1070		return (0xFFFF);
1071	return (l);
1072}
1073
1074void
1075pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1076    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1077{
1078	struct pf_addr	ao;
1079	u_int16_t	po = *p;
1080
1081	PF_ACPY(&ao, a, af);
1082	PF_ACPY(a, an, af);
1083
1084	*p = pn;
1085
1086	switch (af) {
1087#ifdef INET
1088	case AF_INET:
1089		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1090		    ao.addr16[0], an->addr16[0], 0),
1091		    ao.addr16[1], an->addr16[1], 0);
1092		*p = pn;
1093		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1094		    ao.addr16[0], an->addr16[0], u),
1095		    ao.addr16[1], an->addr16[1], u),
1096		    po, pn, u);
1097		break;
1098#endif /* INET */
1099#ifdef INET6
1100	case AF_INET6:
1101		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1102		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1103		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1104		    ao.addr16[0], an->addr16[0], u),
1105		    ao.addr16[1], an->addr16[1], u),
1106		    ao.addr16[2], an->addr16[2], u),
1107		    ao.addr16[3], an->addr16[3], u),
1108		    ao.addr16[4], an->addr16[4], u),
1109		    ao.addr16[5], an->addr16[5], u),
1110		    ao.addr16[6], an->addr16[6], u),
1111		    ao.addr16[7], an->addr16[7], u),
1112		    po, pn, u);
1113		break;
1114#endif /* INET6 */
1115	}
1116}
1117
1118
1119/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1120void
1121pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1122{
1123	u_int32_t	ao;
1124
1125	memcpy(&ao, a, sizeof(ao));
1126	memcpy(a, &an, sizeof(u_int32_t));
1127	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1128	    ao % 65536, an % 65536, u);
1129}
1130
1131#ifdef INET6
1132void
1133pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1134{
1135	struct pf_addr	ao;
1136
1137	PF_ACPY(&ao, a, AF_INET6);
1138	PF_ACPY(a, an, AF_INET6);
1139
1140	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1141	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1142	    pf_cksum_fixup(pf_cksum_fixup(*c,
1143	    ao.addr16[0], an->addr16[0], u),
1144	    ao.addr16[1], an->addr16[1], u),
1145	    ao.addr16[2], an->addr16[2], u),
1146	    ao.addr16[3], an->addr16[3], u),
1147	    ao.addr16[4], an->addr16[4], u),
1148	    ao.addr16[5], an->addr16[5], u),
1149	    ao.addr16[6], an->addr16[6], u),
1150	    ao.addr16[7], an->addr16[7], u);
1151}
1152#endif /* INET6 */
1153
1154void
1155pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1156    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1157    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1158{
1159	struct pf_addr	oia, ooa;
1160
1161	PF_ACPY(&oia, ia, af);
1162	PF_ACPY(&ooa, oa, af);
1163
1164	/* Change inner protocol port, fix inner protocol checksum. */
1165	if (ip != NULL) {
1166		u_int16_t	oip = *ip;
1167		u_int32_t	opc;
1168
1169		if (pc != NULL)
1170			opc = *pc;
1171		*ip = np;
1172		if (pc != NULL)
1173			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1174		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1175		if (pc != NULL)
1176			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1177	}
1178	/* Change inner ip address, fix inner ip and icmp checksums. */
1179	PF_ACPY(ia, na, af);
1180	switch (af) {
1181#ifdef INET
1182	case AF_INET: {
1183		u_int32_t	 oh2c = *h2c;
1184
1185		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1186		    oia.addr16[0], ia->addr16[0], 0),
1187		    oia.addr16[1], ia->addr16[1], 0);
1188		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1189		    oia.addr16[0], ia->addr16[0], 0),
1190		    oia.addr16[1], ia->addr16[1], 0);
1191		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1192		break;
1193	}
1194#endif /* INET */
1195#ifdef INET6
1196	case AF_INET6:
1197		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1198		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1199		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1200		    oia.addr16[0], ia->addr16[0], u),
1201		    oia.addr16[1], ia->addr16[1], u),
1202		    oia.addr16[2], ia->addr16[2], u),
1203		    oia.addr16[3], ia->addr16[3], u),
1204		    oia.addr16[4], ia->addr16[4], u),
1205		    oia.addr16[5], ia->addr16[5], u),
1206		    oia.addr16[6], ia->addr16[6], u),
1207		    oia.addr16[7], ia->addr16[7], u);
1208		break;
1209#endif /* INET6 */
1210	}
1211	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1212	PF_ACPY(oa, na, af);
1213	switch (af) {
1214#ifdef INET
1215	case AF_INET:
1216		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1217		    ooa.addr16[0], oa->addr16[0], 0),
1218		    ooa.addr16[1], oa->addr16[1], 0);
1219		break;
1220#endif /* INET */
1221#ifdef INET6
1222	case AF_INET6:
1223		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1224		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1225		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1226		    ooa.addr16[0], oa->addr16[0], u),
1227		    ooa.addr16[1], oa->addr16[1], u),
1228		    ooa.addr16[2], oa->addr16[2], u),
1229		    ooa.addr16[3], oa->addr16[3], u),
1230		    ooa.addr16[4], oa->addr16[4], u),
1231		    ooa.addr16[5], oa->addr16[5], u),
1232		    ooa.addr16[6], oa->addr16[6], u),
1233		    ooa.addr16[7], oa->addr16[7], u);
1234		break;
1235#endif /* INET6 */
1236	}
1237}
1238
1239void
1240pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1241    const struct pf_addr *saddr, const struct pf_addr *daddr,
1242    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1243    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1244{
1245	struct mbuf	*m;
1246	struct m_tag	*mtag;
1247	int		 len, tlen;
1248#ifdef INET
1249	struct ip	*h;
1250#endif /* INET */
1251#ifdef INET6
1252	struct ip6_hdr	*h6;
1253#endif /* INET6 */
1254	struct tcphdr	*th;
1255#if defined(__FreeBSD__)
1256	struct ip 	*ip;
1257#if (__FreeBSD_version < 501114)
1258	struct route 	 ro;
1259#endif
1260#endif
1261	char *opt;
1262
1263	/* maximum segment size tcp option */
1264	tlen = sizeof(struct tcphdr);
1265	if (mss)
1266		tlen += 4;
1267
1268	switch (af) {
1269#ifdef INET
1270	case AF_INET:
1271		len = sizeof(struct ip) + tlen;
1272		break;
1273#endif /* INET */
1274#ifdef INET6
1275	case AF_INET6:
1276		len = sizeof(struct ip6_hdr) + tlen;
1277		break;
1278#endif /* INET6 */
1279	}
1280
1281	/* create outgoing mbuf */
1282	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1283	if (mtag == NULL)
1284		return;
1285	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1286	if (m == NULL) {
1287		m_tag_free(mtag);
1288		return;
1289	}
1290	m_tag_prepend(m, mtag);
1291#ifdef ALTQ
1292	if (r != NULL && r->qid) {
1293		struct altq_tag *atag;
1294
1295		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1296		if (mtag != NULL) {
1297			atag = (struct altq_tag *)(mtag + 1);
1298			atag->qid = r->qid;
1299			/* add hints for ecn */
1300			atag->af = af;
1301			atag->hdr = mtod(m, struct ip *);
1302			m_tag_prepend(m, mtag);
1303		}
1304	}
1305#endif
1306	m->m_data += max_linkhdr;
1307	m->m_pkthdr.len = m->m_len = len;
1308	m->m_pkthdr.rcvif = NULL;
1309	bzero(m->m_data, len);
1310	switch (af) {
1311#ifdef INET
1312	case AF_INET:
1313		h = mtod(m, struct ip *);
1314
1315		/* IP header fields included in the TCP checksum */
1316		h->ip_p = IPPROTO_TCP;
1317		h->ip_len = htons(tlen);
1318		h->ip_src.s_addr = saddr->v4.s_addr;
1319		h->ip_dst.s_addr = daddr->v4.s_addr;
1320
1321		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1322		break;
1323#endif /* INET */
1324#ifdef INET6
1325	case AF_INET6:
1326		h6 = mtod(m, struct ip6_hdr *);
1327
1328		/* IP header fields included in the TCP checksum */
1329		h6->ip6_nxt = IPPROTO_TCP;
1330		h6->ip6_plen = htons(tlen);
1331		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1332		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1333
1334		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1335		break;
1336#endif /* INET6 */
1337	}
1338
1339	/* TCP header */
1340	th->th_sport = sport;
1341	th->th_dport = dport;
1342	th->th_seq = htonl(seq);
1343	th->th_ack = htonl(ack);
1344	th->th_off = tlen >> 2;
1345	th->th_flags = flags;
1346	th->th_win = htons(win);
1347
1348	if (mss) {
1349		opt = (char *)(th + 1);
1350		opt[0] = TCPOPT_MAXSEG;
1351		opt[1] = 4;
1352		HTONS(mss);
1353		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1354	}
1355
1356	switch (af) {
1357#ifdef INET
1358	case AF_INET:
1359		/* TCP checksum */
1360		th->th_sum = in_cksum(m, len);
1361
1362		/* Finish the IP header */
1363		h->ip_v = 4;
1364		h->ip_hl = sizeof(*h) >> 2;
1365		h->ip_tos = IPTOS_LOWDELAY;
1366#if defined(__FreeBSD__)
1367                h->ip_off = htons(path_mtu_discovery ? IP_DF : 0);
1368#else
1369		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1370#endif
1371		h->ip_len = htons(len);
1372		h->ip_ttl = ttl ? ttl : ip_defttl;
1373		h->ip_sum = 0;
1374#if defined(__FreeBSD__)
1375		ip = mtod(m, struct ip *);
1376		/*
1377		 * XXX
1378		 * OpenBSD changed ip_len/ip_off byte ordering!
1379		 * Because FreeBSD assumes host byte ordering we need to
1380		 * change here.
1381		 */
1382		NTOHS(ip->ip_len);
1383		NTOHS(ip->ip_off);
1384#if (__FreeBSD_version < 501114)
1385		bzero(&ro, sizeof(ro));
1386		ip_rtaddr(ip->ip_dst, &ro);
1387		PF_UNLOCK();
1388		ip_output(m, (void *)NULL, &ro, 0, (void *)NULL,
1389			(void *)NULL);
1390		PF_LOCK();
1391		if(ro.ro_rt) {
1392			RTFREE(ro.ro_rt);
1393		}
1394#else /* __FreeBSD_version >= 501114 */
1395		PF_UNLOCK();
1396		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1397			(void *)NULL);
1398		PF_LOCK();
1399#endif
1400#else /* ! __FreeBSD__ */
1401		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1402		    (void *)NULL);
1403#endif
1404		break;
1405#endif /* INET */
1406#ifdef INET6
1407	case AF_INET6:
1408		/* TCP checksum */
1409		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1410		    sizeof(struct ip6_hdr), tlen);
1411
1412		h6->ip6_vfc |= IPV6_VERSION;
1413		h6->ip6_hlim = IPV6_DEFHLIM;
1414
1415#if defined(__FreeBSD__)
1416		PF_UNLOCK();
1417		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1418		PF_LOCK();
1419#else
1420		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1421#endif
1422		break;
1423#endif /* INET6 */
1424	}
1425}
1426
1427void
1428pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1429    struct pf_rule *r)
1430{
1431	struct m_tag	*mtag;
1432	struct mbuf	*m0;
1433#if defined(__FreeBSD__)
1434	struct ip *ip;
1435#endif
1436
1437	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1438	if (mtag == NULL)
1439		return;
1440#if defined(__FreeBSD__)
1441	m0 = m_copypacket(m, M_DONTWAIT);
1442#else
1443	m0 = m_copy(m, 0, M_COPYALL);
1444#endif
1445	if (m0 == NULL) {
1446		m_tag_free(mtag);
1447		return;
1448	}
1449	m_tag_prepend(m0, mtag);
1450
1451#ifdef ALTQ
1452	if (r->qid) {
1453		struct altq_tag *atag;
1454
1455		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1456		if (mtag != NULL) {
1457			atag = (struct altq_tag *)(mtag + 1);
1458			atag->qid = r->qid;
1459			/* add hints for ecn */
1460			atag->af = af;
1461			atag->hdr = mtod(m0, struct ip *);
1462			m_tag_prepend(m0, mtag);
1463		}
1464	}
1465#endif
1466
1467	switch (af) {
1468#ifdef INET
1469	case AF_INET:
1470#if defined(__FreeBSD__)
1471		/* icmp_error() expects host byte ordering */
1472		ip = mtod(m0, struct ip *);
1473		NTOHS(ip->ip_len);
1474		NTOHS(ip->ip_off);
1475		PF_UNLOCK();
1476#endif
1477		icmp_error(m0, type, code, 0, NULL);
1478#if defined(__FreeBSD__)
1479		PF_LOCK();
1480#endif
1481		break;
1482#endif /* INET */
1483#ifdef INET6
1484	case AF_INET6:
1485#if defined(__FreeBSD__)
1486		PF_UNLOCK();
1487#endif
1488		icmp6_error(m0, type, code, 0);
1489#if defined(__FreeBSD__)
1490		PF_LOCK();
1491#endif
1492		break;
1493#endif /* INET6 */
1494	}
1495}
1496
1497/*
1498 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1499 * If n is 0, they match if they are equal. If n is != 0, they match if they
1500 * are different.
1501 */
1502int
1503pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1504    struct pf_addr *b, sa_family_t af)
1505{
1506	int	match = 0;
1507
1508	switch (af) {
1509#ifdef INET
1510	case AF_INET:
1511		if ((a->addr32[0] & m->addr32[0]) ==
1512		    (b->addr32[0] & m->addr32[0]))
1513			match++;
1514		break;
1515#endif /* INET */
1516#ifdef INET6
1517	case AF_INET6:
1518		if (((a->addr32[0] & m->addr32[0]) ==
1519		     (b->addr32[0] & m->addr32[0])) &&
1520		    ((a->addr32[1] & m->addr32[1]) ==
1521		     (b->addr32[1] & m->addr32[1])) &&
1522		    ((a->addr32[2] & m->addr32[2]) ==
1523		     (b->addr32[2] & m->addr32[2])) &&
1524		    ((a->addr32[3] & m->addr32[3]) ==
1525		     (b->addr32[3] & m->addr32[3])))
1526			match++;
1527		break;
1528#endif /* INET6 */
1529	}
1530	if (match) {
1531		if (n)
1532			return (0);
1533		else
1534			return (1);
1535	} else {
1536		if (n)
1537			return (1);
1538		else
1539			return (0);
1540	}
1541}
1542
1543int
1544pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1545{
1546	switch (op) {
1547	case PF_OP_IRG:
1548		return ((p > a1) && (p < a2));
1549	case PF_OP_XRG:
1550		return ((p < a1) || (p > a2));
1551	case PF_OP_RRG:
1552		return ((p >= a1) && (p <= a2));
1553	case PF_OP_EQ:
1554		return (p == a1);
1555	case PF_OP_NE:
1556		return (p != a1);
1557	case PF_OP_LT:
1558		return (p < a1);
1559	case PF_OP_LE:
1560		return (p <= a1);
1561	case PF_OP_GT:
1562		return (p > a1);
1563	case PF_OP_GE:
1564		return (p >= a1);
1565	}
1566	return (0); /* never reached */
1567}
1568
1569int
1570pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1571{
1572	NTOHS(a1);
1573	NTOHS(a2);
1574	NTOHS(p);
1575	return (pf_match(op, a1, a2, p));
1576}
1577
1578int
1579pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1580{
1581	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1582		return (0);
1583	return (pf_match(op, a1, a2, u));
1584}
1585
1586int
1587pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1588{
1589	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1590		return (0);
1591	return (pf_match(op, a1, a2, g));
1592}
1593
1594struct pf_tag *
1595pf_get_tag(struct mbuf *m)
1596{
1597	struct m_tag	*mtag;
1598
1599	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1600		return ((struct pf_tag *)(mtag + 1));
1601	else
1602		return (NULL);
1603}
1604
1605int
1606pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat,
1607    struct pf_rule *rdr, struct pf_tag *pftag, int *tag)
1608{
1609	if (*tag == -1) {	/* find mbuf tag */
1610		pftag = pf_get_tag(m);
1611		if (pftag != NULL)
1612			*tag = pftag->tag;
1613		else
1614			*tag = 0;
1615		if (nat != NULL && nat->tag)
1616			*tag = nat->tag;
1617		if (rdr != NULL && rdr->tag)
1618			*tag = rdr->tag;
1619	}
1620
1621	return ((!r->match_tag_not && r->match_tag == *tag) ||
1622	    (r->match_tag_not && r->match_tag != *tag));
1623}
1624
1625int
1626pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1627{
1628	struct m_tag	*mtag;
1629
1630	if (tag <= 0)
1631		return (0);
1632
1633	if (pftag == NULL) {
1634		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1635		if (mtag == NULL)
1636			return (1);
1637		((struct pf_tag *)(mtag + 1))->tag = tag;
1638		m_tag_prepend(m, mtag);
1639	} else
1640		pftag->tag = tag;
1641
1642	return (0);
1643}
1644
1645#define PF_STEP_INTO_ANCHOR(r, a, s, n)					\
1646	do {								\
1647		if ((r) == NULL || (r)->anchor == NULL ||		\
1648		    (s) != NULL || (a) != NULL)				\
1649			panic("PF_STEP_INTO_ANCHOR");			\
1650		(a) = (r);						\
1651		(s) = TAILQ_FIRST(&(r)->anchor->rulesets);		\
1652		(r) = NULL;						\
1653		while ((s) != NULL && ((r) =				\
1654		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1655			(s) = TAILQ_NEXT((s), entries);			\
1656		if ((r) == NULL) {					\
1657			(r) = TAILQ_NEXT((a), entries);			\
1658			(a) = NULL;					\
1659		}							\
1660	} while (0)
1661
1662#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)				\
1663	do {								\
1664		if ((r) != NULL || (a) == NULL || (s) == NULL)		\
1665			panic("PF_STEP_OUT_OF_ANCHOR");			\
1666		(s) = TAILQ_NEXT((s), entries);				\
1667		while ((s) != NULL && ((r) =				\
1668		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1669			(s) = TAILQ_NEXT((s), entries);			\
1670		if ((r) == NULL) {					\
1671			(r) = TAILQ_NEXT((a), entries);			\
1672			(a) = NULL;					\
1673		}							\
1674	} while (0)
1675
1676#ifdef INET6
1677void
1678pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1679    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1680{
1681	switch (af) {
1682#ifdef INET
1683	case AF_INET:
1684		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1685		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1686		break;
1687#endif /* INET */
1688	case AF_INET6:
1689		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1690		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1691		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1692		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1693		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1694		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1695		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1696		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1697		break;
1698	}
1699}
1700
1701void
1702pf_addr_inc(struct pf_addr *addr, u_int8_t af)
1703{
1704	switch (af) {
1705#ifdef INET
1706	case AF_INET:
1707		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1708		break;
1709#endif /* INET */
1710	case AF_INET6:
1711		if (addr->addr32[3] == 0xffffffff) {
1712			addr->addr32[3] = 0;
1713			if (addr->addr32[2] == 0xffffffff) {
1714				addr->addr32[2] = 0;
1715				if (addr->addr32[1] == 0xffffffff) {
1716					addr->addr32[1] = 0;
1717					addr->addr32[0] =
1718					    htonl(ntohl(addr->addr32[0]) + 1);
1719				} else
1720					addr->addr32[1] =
1721					    htonl(ntohl(addr->addr32[1]) + 1);
1722			} else
1723				addr->addr32[2] =
1724				    htonl(ntohl(addr->addr32[2]) + 1);
1725		} else
1726			addr->addr32[3] =
1727			    htonl(ntohl(addr->addr32[3]) + 1);
1728		break;
1729	}
1730}
1731#endif /* INET6 */
1732
1733#define mix(a,b,c) \
1734	do {					\
1735		a -= b; a -= c; a ^= (c >> 13);	\
1736		b -= c; b -= a; b ^= (a << 8);	\
1737		c -= a; c -= b; c ^= (b >> 13);	\
1738		a -= b; a -= c; a ^= (c >> 12);	\
1739		b -= c; b -= a; b ^= (a << 16);	\
1740		c -= a; c -= b; c ^= (b >> 5);	\
1741		a -= b; a -= c; a ^= (c >> 3);	\
1742		b -= c; b -= a; b ^= (a << 10);	\
1743		c -= a; c -= b; c ^= (b >> 15);	\
1744	} while (0)
1745
1746/*
1747 * hash function based on bridge_hash in if_bridge.c
1748 */
1749void
1750pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1751    struct pf_poolhashkey *key, sa_family_t af)
1752{
1753	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1754
1755	switch (af) {
1756#ifdef INET
1757	case AF_INET:
1758		a += inaddr->addr32[0];
1759		b += key->key32[1];
1760		mix(a, b, c);
1761		hash->addr32[0] = c + key->key32[2];
1762		break;
1763#endif /* INET */
1764#ifdef INET6
1765	case AF_INET6:
1766		a += inaddr->addr32[0];
1767		b += inaddr->addr32[2];
1768		mix(a, b, c);
1769		hash->addr32[0] = c;
1770		a += inaddr->addr32[1];
1771		b += inaddr->addr32[3];
1772		c += key->key32[1];
1773		mix(a, b, c);
1774		hash->addr32[1] = c;
1775		a += inaddr->addr32[2];
1776		b += inaddr->addr32[1];
1777		c += key->key32[2];
1778		mix(a, b, c);
1779		hash->addr32[2] = c;
1780		a += inaddr->addr32[3];
1781		b += inaddr->addr32[0];
1782		c += key->key32[3];
1783		mix(a, b, c);
1784		hash->addr32[3] = c;
1785		break;
1786#endif /* INET6 */
1787	}
1788}
1789
1790int
1791pf_map_addr(u_int8_t af, struct pf_pool *rpool, struct pf_addr *saddr,
1792    struct pf_addr *naddr, struct pf_addr *init_addr)
1793{
1794	unsigned char		 hash[16];
1795	struct pf_addr		*raddr;
1796	struct pf_addr		*rmask;
1797	struct pf_pooladdr	*acur = rpool->cur;
1798
1799	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1800		return (1);
1801	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL &&
1802	    rpool->cur->addr.p.dyn->undefined)
1803		return (1);
1804	if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1805		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1806			return (1); /* unsupported */
1807	} else {
1808		raddr = &rpool->cur->addr.v.a.addr;
1809		rmask = &rpool->cur->addr.v.a.mask;
1810	}
1811
1812	switch (rpool->opts & PF_POOL_TYPEMASK) {
1813	case PF_POOL_NONE:
1814		PF_ACPY(naddr, raddr, af);
1815		break;
1816	case PF_POOL_BITMASK:
1817		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
1818		break;
1819	case PF_POOL_RANDOM:
1820		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
1821			switch (af) {
1822#ifdef INET
1823			case AF_INET:
1824				rpool->counter.addr32[0] = arc4random();
1825				break;
1826#endif /* INET */
1827#ifdef INET6
1828			case AF_INET6:
1829				if (rmask->addr32[3] != 0xffffffff)
1830					rpool->counter.addr32[3] = arc4random();
1831				else
1832					break;
1833				if (rmask->addr32[2] != 0xffffffff)
1834					rpool->counter.addr32[2] = arc4random();
1835				else
1836					break;
1837				if (rmask->addr32[1] != 0xffffffff)
1838					rpool->counter.addr32[1] = arc4random();
1839				else
1840					break;
1841				if (rmask->addr32[0] != 0xffffffff)
1842					rpool->counter.addr32[0] = arc4random();
1843				break;
1844#endif /* INET6 */
1845			}
1846			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
1847			PF_ACPY(init_addr, naddr, af);
1848
1849		} else {
1850			PF_AINC(&rpool->counter, af);
1851			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
1852		}
1853		break;
1854	case PF_POOL_SRCHASH:
1855		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
1856		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
1857		break;
1858	case PF_POOL_ROUNDROBIN:
1859		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1860			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
1861			    &rpool->tblidx, &rpool->counter,
1862			    &raddr, &rmask, af))
1863				goto get_addr;
1864		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
1865			goto get_addr;
1866
1867	try_next:
1868		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
1869			rpool->cur = TAILQ_FIRST(&rpool->list);
1870		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1871			rpool->tblidx = -1;
1872			if (pfr_pool_get(rpool->cur->addr.p.tbl,
1873			    &rpool->tblidx, &rpool->counter,
1874			    &raddr, &rmask, af)) {
1875				/* table contain no address of type 'af' */
1876				if (rpool->cur != acur)
1877					goto try_next;
1878				return (1);
1879			}
1880		} else {
1881			raddr = &rpool->cur->addr.v.a.addr;
1882			rmask = &rpool->cur->addr.v.a.mask;
1883			PF_ACPY(&rpool->counter, raddr, af);
1884		}
1885
1886	get_addr:
1887		PF_ACPY(naddr, &rpool->counter, af);
1888		PF_AINC(&rpool->counter, af);
1889		break;
1890	}
1891
1892	if (pf_status.debug >= PF_DEBUG_MISC &&
1893	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1894		printf("pf_map_addr: selected address: ");
1895		pf_print_host(naddr, 0, af);
1896		printf("\n");
1897	}
1898
1899	return (0);
1900}
1901
1902int
1903pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_pool *rpool,
1904    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
1905    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high)
1906{
1907	struct pf_tree_node	key;
1908	struct pf_addr		init_addr;
1909	u_int16_t		cut;
1910
1911	bzero(&init_addr, sizeof(init_addr));
1912	if (pf_map_addr(af, rpool, saddr, naddr, &init_addr))
1913		return (1);
1914
1915	do {
1916		key.af = af;
1917		key.proto = proto;
1918		PF_ACPY(&key.addr[0], daddr, key.af);
1919		PF_ACPY(&key.addr[1], naddr, key.af);
1920		key.port[0] = dport;
1921
1922		/*
1923		 * port search; start random, step;
1924		 * similar 2 portloop in in_pcbbind
1925		 */
1926		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
1927			key.port[1] = 0;
1928			if (pf_find_state(&tree_ext_gwy, &key) == NULL)
1929				return (0);
1930		} else if (low == 0 && high == 0) {
1931			key.port[1] = *nport;
1932			if (pf_find_state(&tree_ext_gwy, &key) == NULL) {
1933				return (0);
1934			}
1935		} else if (low == high) {
1936			key.port[1] = htons(low);
1937			if (pf_find_state(&tree_ext_gwy, &key) == NULL) {
1938				*nport = htons(low);
1939				return (0);
1940			}
1941		} else {
1942			u_int16_t tmp;
1943
1944			if (low > high) {
1945				tmp = low;
1946				low = high;
1947				high = tmp;
1948			}
1949			/* low < high */
1950			cut = arc4random() % (1 + high - low) + low;
1951			/* low <= cut <= high */
1952			for (tmp = cut; tmp <= high; ++(tmp)) {
1953				key.port[1] = htons(tmp);
1954				if (pf_find_state(&tree_ext_gwy, &key) ==
1955				    NULL) {
1956					*nport = htons(tmp);
1957					return (0);
1958				}
1959			}
1960			for (tmp = cut - 1; tmp >= low; --(tmp)) {
1961				key.port[1] = htons(tmp);
1962				if (pf_find_state(&tree_ext_gwy, &key) ==
1963				    NULL) {
1964					*nport = htons(tmp);
1965					return (0);
1966				}
1967			}
1968		}
1969
1970		switch (rpool->opts & PF_POOL_TYPEMASK) {
1971		case PF_POOL_RANDOM:
1972		case PF_POOL_ROUNDROBIN:
1973			if (pf_map_addr(af, rpool, saddr, naddr, &init_addr))
1974				return (1);
1975			break;
1976		case PF_POOL_NONE:
1977		case PF_POOL_SRCHASH:
1978		case PF_POOL_BITMASK:
1979		default:
1980			return (1);
1981			break;
1982		}
1983	} while (! PF_AEQ(&init_addr, naddr, af) );
1984
1985	return (1);					/* none available */
1986}
1987
1988struct pf_rule *
1989pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
1990    int direction, struct ifnet *ifp, struct pf_addr *saddr, u_int16_t sport,
1991    struct pf_addr *daddr, u_int16_t dport, int rs_num)
1992{
1993	struct pf_rule		*r, *rm = NULL, *anchorrule = NULL;
1994	struct pf_ruleset	*ruleset = NULL;
1995
1996	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
1997	while (r && rm == NULL) {
1998		struct pf_rule_addr	*src = NULL, *dst = NULL;
1999		struct pf_addr_wrap	*xdst = NULL;
2000
2001		if (r->action == PF_BINAT && direction == PF_IN) {
2002			src = &r->dst;
2003			if (r->rpool.cur != NULL)
2004				xdst = &r->rpool.cur->addr;
2005		} else {
2006			src = &r->src;
2007			dst = &r->dst;
2008		}
2009
2010		r->evaluations++;
2011		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
2012		    (r->ifp == ifp && r->ifnot)))
2013			r = r->skip[PF_SKIP_IFP].ptr;
2014		else if (r->direction && r->direction != direction)
2015			r = r->skip[PF_SKIP_DIR].ptr;
2016		else if (r->af && r->af != pd->af)
2017			r = r->skip[PF_SKIP_AF].ptr;
2018		else if (r->proto && r->proto != pd->proto)
2019			r = r->skip[PF_SKIP_PROTO].ptr;
2020		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
2021			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2022			    PF_SKIP_DST_ADDR].ptr;
2023		else if (src->port_op && !pf_match_port(src->port_op,
2024		    src->port[0], src->port[1], sport))
2025			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2026			    PF_SKIP_DST_PORT].ptr;
2027		else if (dst != NULL &&
2028		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
2029			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2030		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2031			r = TAILQ_NEXT(r, entries);
2032		else if (dst != NULL && dst->port_op &&
2033		    !pf_match_port(dst->port_op, dst->port[0],
2034		    dst->port[1], dport))
2035			r = r->skip[PF_SKIP_DST_PORT].ptr;
2036		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2037		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2038		    off, pd->hdr.tcp), r->os_fingerprint)))
2039			r = TAILQ_NEXT(r, entries);
2040		else if (r->anchorname[0] && r->anchor == NULL)
2041			r = TAILQ_NEXT(r, entries);
2042		else if (r->anchor == NULL)
2043				rm = r;
2044		else
2045			PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2046		if (r == NULL && anchorrule != NULL)
2047			PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2048			    rs_num);
2049	}
2050	if (rm != NULL && (rm->action == PF_NONAT ||
2051	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2052		return (NULL);
2053	return (rm);
2054}
2055
2056struct pf_rule *
2057pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2058    struct ifnet *ifp,
2059    struct pf_addr *saddr, u_int16_t sport,
2060    struct pf_addr *daddr, u_int16_t dport,
2061    struct pf_addr *naddr, u_int16_t *nport)
2062{
2063	struct pf_rule	*r = NULL;
2064
2065	if (direction == PF_OUT) {
2066		r = pf_match_translation(pd, m, off, direction, ifp, saddr,
2067		    sport, daddr, dport, PF_RULESET_BINAT);
2068		if (r == NULL)
2069			r = pf_match_translation(pd, m, off, direction, ifp,
2070			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2071	} else {
2072		r = pf_match_translation(pd, m, off, direction, ifp, saddr,
2073		    sport, daddr, dport, PF_RULESET_RDR);
2074		if (r == NULL)
2075			r = pf_match_translation(pd, m, off, direction, ifp,
2076			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2077	}
2078
2079	if (r != NULL) {
2080		switch (r->action) {
2081		case PF_NONAT:
2082		case PF_NOBINAT:
2083		case PF_NORDR:
2084			return (NULL);
2085			break;
2086		case PF_NAT:
2087			if (pf_get_sport(pd->af, pd->proto, &r->rpool, saddr,
2088			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2089			    r->rpool.proxy_port[1])) {
2090				DPFPRINTF(PF_DEBUG_MISC,
2091				    ("pf: NAT proxy port allocation "
2092				    "(%u-%u) failed\n",
2093				    r->rpool.proxy_port[0],
2094				    r->rpool.proxy_port[1]));
2095				return (NULL);
2096			}
2097			break;
2098		case PF_BINAT:
2099			switch (direction) {
2100			case PF_OUT:
2101				if (r->rpool.cur->addr.type ==
2102				    PF_ADDR_DYNIFTL &&
2103				    r->rpool.cur->addr.p.dyn->undefined)
2104					return (NULL);
2105				else
2106					PF_POOLMASK(naddr,
2107					    &r->rpool.cur->addr.v.a.addr,
2108					    &r->rpool.cur->addr.v.a.mask,
2109					    saddr, pd->af);
2110				break;
2111			case PF_IN:
2112				if (r->src.addr.type == PF_ADDR_DYNIFTL &&
2113				    r->src.addr.p.dyn->undefined)
2114					return (NULL);
2115				else
2116					PF_POOLMASK(naddr,
2117					    &r->src.addr.v.a.addr,
2118					    &r->src.addr.v.a.mask, daddr,
2119					    pd->af);
2120				break;
2121			}
2122			break;
2123		case PF_RDR: {
2124			if (pf_map_addr(r->af, &r->rpool, saddr, naddr, NULL))
2125				return (NULL);
2126
2127			if (r->rpool.proxy_port[1]) {
2128				u_int32_t	tmp_nport;
2129
2130				tmp_nport = ((ntohs(dport) -
2131				    ntohs(r->dst.port[0])) %
2132				    (r->rpool.proxy_port[1] -
2133				    r->rpool.proxy_port[0] + 1)) +
2134				    r->rpool.proxy_port[0];
2135
2136				/* wrap around if necessary */
2137				if (tmp_nport > 65535)
2138					tmp_nport -= 65535;
2139				*nport = htons((u_int16_t)tmp_nport);
2140			} else if (r->rpool.proxy_port[0])
2141				*nport = htons(r->rpool.proxy_port[0]);
2142			break;
2143		}
2144		default:
2145			return (NULL);
2146			break;
2147		}
2148	}
2149
2150	return (r);
2151}
2152
2153int
2154pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, sa_family_t af,
2155    int proto, struct pf_pdesc *pd)
2156{
2157	struct pf_addr		*saddr, *daddr;
2158	u_int16_t		 sport, dport;
2159#if defined(__FreeBSD__)
2160	struct inpcbinfo	*pi;
2161#else
2162	struct inpcbtable	*tb;
2163#endif
2164	struct inpcb		*inp;
2165
2166	*uid = UID_MAX;
2167	*gid = GID_MAX;
2168	switch (proto) {
2169	case IPPROTO_TCP:
2170		sport = pd->hdr.tcp->th_sport;
2171		dport = pd->hdr.tcp->th_dport;
2172#if defined(__FreeBSD__)
2173		pi = &tcbinfo;
2174#else
2175		tb = &tcbtable;
2176#endif
2177		break;
2178	case IPPROTO_UDP:
2179		sport = pd->hdr.udp->uh_sport;
2180		dport = pd->hdr.udp->uh_dport;
2181#if defined(__FreeBSD__)
2182		pi = &udbinfo;
2183#else
2184		tb = &udbtable;
2185#endif
2186		break;
2187	default:
2188		return (0);
2189	}
2190	if (direction == PF_IN) {
2191		saddr = pd->src;
2192		daddr = pd->dst;
2193	} else {
2194		u_int16_t	p;
2195
2196		p = sport;
2197		sport = dport;
2198		dport = p;
2199		saddr = pd->dst;
2200		daddr = pd->src;
2201	}
2202	switch(af) {
2203	case AF_INET:
2204#if defined(__FreeBSD__)
2205#if (__FreeBSD_version >= 500043)
2206		INP_INFO_RLOCK(pi);	/* XXX LOR */
2207#endif
2208		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2209			dport, 0, NULL);
2210		if (inp == NULL) {
2211			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2212			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2213			if(inp == NULL) {
2214#if (__FreeBSD_version >= 500043)
2215				INP_INFO_RUNLOCK(pi);
2216#endif
2217				return (0);
2218			}
2219		}
2220#else
2221		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2222		if (inp == NULL) {
2223			inp = in_pcblookup(tb, &saddr->v4, sport, &daddr->v4,
2224			    dport, INPLOOKUP_WILDCARD);
2225			if (inp == NULL)
2226				return (0);
2227		}
2228#endif
2229		break;
2230#ifdef INET6
2231	case AF_INET6:
2232#if defined(__FreeBSD__)
2233#if (__FreeBSD_version >= 500043)
2234		INP_INFO_RLOCK(pi);
2235#endif
2236		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2237			&daddr->v6, dport, 0, NULL);
2238		if (inp == NULL) {
2239			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2240			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2241			if (inp == NULL) {
2242#if (__FreeBSD_version >= 500043)
2243				INP_INFO_RUNLOCK(pi);
2244#endif
2245				return (0);
2246			}
2247		}
2248#else
2249		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2250		    dport);
2251		if (inp == NULL) {
2252			inp = in_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
2253			    dport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
2254			if (inp == NULL)
2255				return (0);
2256		}
2257#endif
2258		break;
2259#endif /* INET6 */
2260
2261	default:
2262		return (0);
2263	}
2264#if defined(__FreeBSD__)
2265#if (__FreeBSD_version >= 500043)
2266	INP_LOCK(inp);
2267#endif
2268	*uid = inp->inp_socket->so_cred->cr_uid;
2269	*gid = inp->inp_socket->so_cred->cr_groups[0];
2270#if (__FreeBSD_version >= 500043)
2271	INP_UNLOCK(inp);
2272	INP_INFO_RUNLOCK(pi);
2273#endif
2274#else
2275	*uid = inp->inp_socket->so_euid;
2276	*gid = inp->inp_socket->so_egid;
2277#endif
2278	return (1);
2279}
2280
2281u_int8_t
2282pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2283{
2284	int		 hlen;
2285	u_int8_t	 hdr[60];
2286	u_int8_t	*opt, optlen;
2287	u_int8_t	 wscale = 0;
2288
2289	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2290	if (hlen <= sizeof(struct tcphdr))
2291		return (0);
2292	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2293		return (0);
2294	opt = hdr + sizeof(struct tcphdr);
2295	hlen -= sizeof(struct tcphdr);
2296	while (hlen >= 3) {
2297		switch (*opt) {
2298		case TCPOPT_EOL:
2299		case TCPOPT_NOP:
2300			++opt;
2301			--hlen;
2302			break;
2303		case TCPOPT_WINDOW:
2304			wscale = opt[2];
2305			if (wscale > TCP_MAX_WINSHIFT)
2306				wscale = TCP_MAX_WINSHIFT;
2307			wscale |= PF_WSCALE_FLAG;
2308			/* fallthrough */
2309		default:
2310			optlen = opt[1];
2311			if (optlen < 2)
2312				optlen = 2;
2313			hlen -= optlen;
2314			opt += optlen;
2315		}
2316	}
2317	return (wscale);
2318}
2319
2320u_int16_t
2321pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2322{
2323	int		 hlen;
2324	u_int8_t	 hdr[60];
2325	u_int8_t	*opt, optlen;
2326	u_int16_t	 mss = tcp_mssdflt;
2327
2328	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2329	if (hlen <= sizeof(struct tcphdr))
2330		return (0);
2331	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2332		return (0);
2333	opt = hdr + sizeof(struct tcphdr);
2334	hlen -= sizeof(struct tcphdr);
2335	while (hlen >= TCPOLEN_MAXSEG) {
2336		switch (*opt) {
2337		case TCPOPT_EOL:
2338		case TCPOPT_NOP:
2339			++opt;
2340			--hlen;
2341			break;
2342		case TCPOPT_MAXSEG:
2343			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2344			/* fallthrough */
2345		default:
2346			optlen = opt[1];
2347			if (optlen < 2)
2348				optlen = 2;
2349			hlen -= optlen;
2350			opt += optlen;
2351		}
2352	}
2353	return (mss);
2354}
2355
2356u_int16_t
2357pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2358{
2359#ifdef INET
2360	struct sockaddr_in	*dst;
2361	struct route		 ro;
2362#endif /* INET */
2363#ifdef INET6
2364	struct sockaddr_in6	*dst6;
2365	struct route_in6	 ro6;
2366#endif /* INET6 */
2367	struct rtentry		*rt = NULL;
2368	int			 hlen;
2369	u_int16_t		 mss = tcp_mssdflt;
2370
2371	switch (af) {
2372#ifdef INET
2373	case AF_INET:
2374		hlen = sizeof(struct ip);
2375		bzero(&ro, sizeof(ro));
2376		dst = (struct sockaddr_in *)&ro.ro_dst;
2377		dst->sin_family = AF_INET;
2378		dst->sin_len = sizeof(*dst);
2379		dst->sin_addr = addr->v4;
2380#if defined(__FreeBSD__)
2381#ifdef RTF_PRCLONING
2382		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2383#else /* !RTF_PRCLONING */
2384		rtalloc_ign(&ro, RTF_CLONING);
2385#endif
2386#else /* ! __FreeBSD__ */
2387		rtalloc_noclone(&ro, NO_CLONING);
2388#endif
2389		rt = ro.ro_rt;
2390		break;
2391#endif /* INET */
2392#ifdef INET6
2393	case AF_INET6:
2394		hlen = sizeof(struct ip6_hdr);
2395		bzero(&ro6, sizeof(ro6));
2396		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2397		dst6->sin6_family = AF_INET6;
2398		dst6->sin6_len = sizeof(*dst6);
2399		dst6->sin6_addr = addr->v6;
2400#if defined(__FreeBSD__)
2401#ifdef RTF_PRCLONING
2402		rtalloc_ign((struct route *)&ro6,
2403		    (RTF_CLONING | RTF_PRCLONING));
2404#else /* !RTF_PRCLONING */
2405		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2406#endif
2407#else /* ! __FreeBSD__ */
2408		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2409#endif
2410		rt = ro6.ro_rt;
2411		break;
2412#endif /* INET6 */
2413	}
2414
2415	if (rt && rt->rt_ifp) {
2416		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2417		mss = max(tcp_mssdflt, mss);
2418		RTFREE(rt);
2419	}
2420	mss = min(mss, offer);
2421	mss = max(mss, 64);		/* sanity - at least max opt space */
2422	return (mss);
2423}
2424
2425void
2426pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2427{
2428	struct pf_rule *r = s->rule.ptr;
2429
2430	s->rt_ifp = NULL;
2431	if (!r->rt || r->rt == PF_FASTROUTE)
2432		return;
2433	switch (s->af) {
2434#ifdef INET
2435	case AF_INET:
2436		pf_map_addr(AF_INET, &r->rpool, saddr,
2437		    &s->rt_addr, NULL);
2438		s->rt_ifp = r->rpool.cur->ifp;
2439		break;
2440#endif /* INET */
2441#ifdef INET6
2442	case AF_INET6:
2443		pf_map_addr(AF_INET6, &r->rpool, saddr,
2444		    &s->rt_addr, NULL);
2445		s->rt_ifp = r->rpool.cur->ifp;
2446		break;
2447#endif /* INET6 */
2448	}
2449}
2450
2451int
2452pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2453    struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h,
2454    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2455{
2456	struct pf_rule		*nat = NULL, *rdr = NULL;
2457	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2458	struct pf_addr		 baddr, naddr;
2459	struct tcphdr		*th = pd->hdr.tcp;
2460	u_int16_t		 bport, nport = 0;
2461	sa_family_t		 af = pd->af;
2462	int			 lookup = -1;
2463	uid_t			 uid;
2464	gid_t			 gid;
2465	struct pf_rule		*r, *a = NULL;
2466	struct pf_ruleset	*ruleset = NULL;
2467	u_short			 reason;
2468	int			 rewrite = 0;
2469	struct pf_tag		*pftag = NULL;
2470	int			 tag = -1;
2471	u_int16_t		 mss = tcp_mssdflt;
2472
2473	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2474
2475	if (direction == PF_OUT) {
2476		bport = nport = th->th_sport;
2477		/* check outgoing packet for BINAT/NAT */
2478		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp,
2479		    saddr, th->th_sport, daddr, th->th_dport,
2480		    &naddr, &nport)) != NULL) {
2481			PF_ACPY(&baddr, saddr, af);
2482			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2483			    &th->th_sum, &naddr, nport, 0, af);
2484			rewrite++;
2485			if (nat->natpass)
2486				r = NULL;
2487		}
2488	} else {
2489		bport = nport = th->th_dport;
2490		/* check incoming packet for BINAT/RDR */
2491		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr,
2492		    th->th_sport, daddr, th->th_dport,
2493		    &naddr, &nport)) != NULL) {
2494			PF_ACPY(&baddr, daddr, af);
2495			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2496			    &th->th_sum, &naddr, nport, 0, af);
2497			rewrite++;
2498			if (rdr->natpass)
2499				r = NULL;
2500		}
2501	}
2502
2503	while (r != NULL) {
2504		r->evaluations++;
2505		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
2506		    (r->ifp == ifp && r->ifnot)))
2507			r = r->skip[PF_SKIP_IFP].ptr;
2508		else if (r->direction && r->direction != direction)
2509			r = r->skip[PF_SKIP_DIR].ptr;
2510		else if (r->af && r->af != af)
2511			r = r->skip[PF_SKIP_AF].ptr;
2512		else if (r->proto && r->proto != IPPROTO_TCP)
2513			r = r->skip[PF_SKIP_PROTO].ptr;
2514		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2515			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2516		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2517		    r->src.port[0], r->src.port[1], th->th_sport))
2518			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2519		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2520			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2521		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2522		    r->dst.port[0], r->dst.port[1], th->th_dport))
2523			r = r->skip[PF_SKIP_DST_PORT].ptr;
2524		else if (r->tos && !(r->tos & pd->tos))
2525			r = TAILQ_NEXT(r, entries);
2526		else if (r->rule_flag & PFRULE_FRAGMENT)
2527			r = TAILQ_NEXT(r, entries);
2528		else if ((r->flagset & th->th_flags) != r->flags)
2529			r = TAILQ_NEXT(r, entries);
2530		else if (r->uid.op && (lookup != -1 || (lookup =
2531		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_TCP,
2532		    pd), 1)) &&
2533		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2534		    uid))
2535			r = TAILQ_NEXT(r, entries);
2536		else if (r->gid.op && (lookup != -1 || (lookup =
2537		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_TCP,
2538		    pd), 1)) &&
2539		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2540		    gid))
2541			r = TAILQ_NEXT(r, entries);
2542		else if (r->match_tag &&
2543		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
2544			r = TAILQ_NEXT(r, entries);
2545		else if (r->anchorname[0] && r->anchor == NULL)
2546			r = TAILQ_NEXT(r, entries);
2547		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2548		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2549			r = TAILQ_NEXT(r, entries);
2550		else {
2551			if (r->tag)
2552				tag = r->tag;
2553			if (r->anchor == NULL) {
2554				*rm = r;
2555				*am = a;
2556				*rsm = ruleset;
2557				if ((*rm)->quick)
2558					break;
2559				r = TAILQ_NEXT(r, entries);
2560			} else
2561				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2562				    PF_RULESET_FILTER);
2563		}
2564		if (r == NULL && a != NULL)
2565			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2566			    PF_RULESET_FILTER);
2567	}
2568	r = *rm;
2569	a = *am;
2570	ruleset = *rsm;
2571
2572	r->packets++;
2573	r->bytes += pd->tot_len;
2574	if (a != NULL) {
2575		a->packets++;
2576		a->bytes += pd->tot_len;
2577	}
2578	REASON_SET(&reason, PFRES_MATCH);
2579
2580	if (r->log) {
2581		if (rewrite)
2582			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2583		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
2584	}
2585
2586	if ((r->action == PF_DROP) &&
2587	    ((r->rule_flag & PFRULE_RETURNRST) ||
2588	    (r->rule_flag & PFRULE_RETURNICMP) ||
2589	    (r->rule_flag & PFRULE_RETURN))) {
2590		/* undo NAT changes, if they have taken place */
2591		if (nat != NULL) {
2592			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2593			    &th->th_sum, &baddr, bport, 0, af);
2594			rewrite++;
2595		} else if (rdr != NULL) {
2596			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2597			    &th->th_sum, &baddr, bport, 0, af);
2598			rewrite++;
2599		}
2600		if (((r->rule_flag & PFRULE_RETURNRST) ||
2601		    (r->rule_flag & PFRULE_RETURN)) &&
2602		    !(th->th_flags & TH_RST)) {
2603			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2604
2605			if (th->th_flags & TH_SYN)
2606				ack++;
2607			if (th->th_flags & TH_FIN)
2608				ack++;
2609			pf_send_tcp(r, af, pd->dst,
2610			    pd->src, th->th_dport, th->th_sport,
2611			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2612			    r->return_ttl);
2613		} else if ((af == AF_INET) && r->return_icmp)
2614			pf_send_icmp(m, r->return_icmp >> 8,
2615			    r->return_icmp & 255, af, r);
2616		else if ((af == AF_INET6) && r->return_icmp6)
2617			pf_send_icmp(m, r->return_icmp6 >> 8,
2618			    r->return_icmp6 & 255, af, r);
2619	}
2620
2621	if (r->action == PF_DROP)
2622		return (PF_DROP);
2623
2624	if (pf_tag_packet(m, pftag, tag)) {
2625		REASON_SET(&reason, PFRES_MEMORY);
2626		return (PF_DROP);
2627	}
2628
2629	if (r->keep_state || nat != NULL || rdr != NULL ||
2630	    (pd->flags & PFDESC_TCP_NORM)) {
2631		/* create new state */
2632		u_int16_t	 len;
2633		struct pf_state	*s = NULL;
2634
2635		len = pd->tot_len - off - (th->th_off << 2);
2636		if (!r->max_states || r->states < r->max_states)
2637			s = pool_get(&pf_state_pl, PR_NOWAIT);
2638		if (s == NULL) {
2639			REASON_SET(&reason, PFRES_MEMORY);
2640			return (PF_DROP);
2641		}
2642		bzero(s, sizeof(*s));
2643		r->states++;
2644		if (a != NULL)
2645			a->states++;
2646		s->rule.ptr = r;
2647		if (nat != NULL)
2648			s->nat_rule.ptr = nat;
2649		else
2650			s->nat_rule.ptr = rdr;
2651		if (s->nat_rule.ptr != NULL)
2652			s->nat_rule.ptr->states++;
2653		s->anchor.ptr = a;
2654		s->allow_opts = r->allow_opts;
2655		s->log = r->log & 2;
2656		s->proto = IPPROTO_TCP;
2657		s->direction = direction;
2658		s->af = af;
2659		if (direction == PF_OUT) {
2660			PF_ACPY(&s->gwy.addr, saddr, af);
2661			s->gwy.port = th->th_sport;		/* sport */
2662			PF_ACPY(&s->ext.addr, daddr, af);
2663			s->ext.port = th->th_dport;
2664			if (nat != NULL) {
2665				PF_ACPY(&s->lan.addr, &baddr, af);
2666				s->lan.port = bport;
2667			} else {
2668				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2669				s->lan.port = s->gwy.port;
2670			}
2671		} else {
2672			PF_ACPY(&s->lan.addr, daddr, af);
2673			s->lan.port = th->th_dport;
2674			PF_ACPY(&s->ext.addr, saddr, af);
2675			s->ext.port = th->th_sport;
2676			if (rdr != NULL) {
2677				PF_ACPY(&s->gwy.addr, &baddr, af);
2678				s->gwy.port = bport;
2679			} else {
2680				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2681				s->gwy.port = s->lan.port;
2682			}
2683		}
2684
2685		s->src.seqlo = ntohl(th->th_seq);
2686		s->src.seqhi = s->src.seqlo + len + 1;
2687		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2688		    r->keep_state == PF_STATE_MODULATE) {
2689			/* Generate sequence number modulator */
2690			while ((s->src.seqdiff = arc4random()) == 0)
2691				;
2692			pf_change_a(&th->th_seq, &th->th_sum,
2693			    htonl(s->src.seqlo + s->src.seqdiff), 0);
2694			rewrite = 1;
2695		} else
2696			s->src.seqdiff = 0;
2697		if (th->th_flags & TH_SYN) {
2698			s->src.seqhi++;
2699			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2700		}
2701		s->src.max_win = MAX(ntohs(th->th_win), 1);
2702		if (s->src.wscale & PF_WSCALE_MASK) {
2703			/* Remove scale factor from initial window */
2704			int win = s->src.max_win;
2705			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2706			s->src.max_win = (win - 1) >>
2707			    (s->src.wscale & PF_WSCALE_MASK);
2708		}
2709		if (th->th_flags & TH_FIN)
2710			s->src.seqhi++;
2711		s->dst.seqhi = 1;
2712		s->dst.max_win = 1;
2713		s->src.state = TCPS_SYN_SENT;
2714		s->dst.state = TCPS_CLOSED;
2715#if defined(__FreeBSD__)
2716		s->creation = time_second;
2717		s->expire = time_second;
2718#else
2719		s->creation = time.tv_sec;
2720		s->expire = time.tv_sec;
2721#endif
2722		s->timeout = PFTM_TCP_FIRST_PACKET;
2723		s->packets[0] = 1;
2724		s->bytes[0] = pd->tot_len;
2725		pf_set_rt_ifp(s, saddr);
2726
2727		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
2728		    off, pd, th, &s->src, &s->dst)) {
2729			REASON_SET(&reason, PFRES_MEMORY);
2730			pool_put(&pf_state_pl, s);
2731			return (PF_DROP);
2732		}
2733		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
2734		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
2735		    &s->dst, &rewrite)) {
2736			pf_normalize_tcp_cleanup(s);
2737			pool_put(&pf_state_pl, s);
2738			return (PF_DROP);
2739		}
2740		if (pf_insert_state(s)) {
2741			pf_normalize_tcp_cleanup(s);
2742			REASON_SET(&reason, PFRES_MEMORY);
2743			pool_put(&pf_state_pl, s);
2744			return (PF_DROP);
2745		} else
2746			*sm = s;
2747		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2748		    r->keep_state == PF_STATE_SYNPROXY) {
2749			s->src.state = PF_TCPS_PROXY_SRC;
2750			if (nat != NULL)
2751				pf_change_ap(saddr, &th->th_sport,
2752				    pd->ip_sum, &th->th_sum, &baddr,
2753				    bport, 0, af);
2754			else if (rdr != NULL)
2755				pf_change_ap(daddr, &th->th_dport,
2756				    pd->ip_sum, &th->th_sum, &baddr,
2757				    bport, 0, af);
2758			s->src.seqhi = arc4random();
2759			/* Find mss option */
2760			mss = pf_get_mss(m, off, th->th_off, af);
2761			mss = pf_calc_mss(saddr, af, mss);
2762			mss = pf_calc_mss(daddr, af, mss);
2763			s->src.mss = mss;
2764			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
2765			    th->th_sport, s->src.seqhi,
2766			    ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0);
2767			return (PF_SYNPROXY_DROP);
2768		}
2769	}
2770
2771	/* copy back packet headers if we performed NAT operations */
2772	if (rewrite)
2773		m_copyback(m, off, sizeof(*th), (caddr_t)th);
2774
2775	return (PF_PASS);
2776}
2777
2778int
2779pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
2780    struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h,
2781    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2782{
2783	struct pf_rule		*nat = NULL, *rdr = NULL;
2784	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2785	struct pf_addr		 baddr, naddr;
2786	struct udphdr		*uh = pd->hdr.udp;
2787	u_int16_t		 bport, nport = 0;
2788	sa_family_t		 af = pd->af;
2789	int			 lookup = -1;
2790	uid_t			 uid;
2791	gid_t			 gid;
2792	struct pf_rule		*r, *a = NULL;
2793	struct pf_ruleset	*ruleset = NULL;
2794	u_short			 reason;
2795	int			 rewrite = 0;
2796	struct pf_tag		*pftag = NULL;
2797	int			 tag = -1;
2798
2799	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2800
2801	if (direction == PF_OUT) {
2802		bport = nport = uh->uh_sport;
2803		/* check outgoing packet for BINAT/NAT */
2804		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp,
2805		    saddr, uh->uh_sport, daddr, uh->uh_dport,
2806		    &naddr, &nport)) != NULL) {
2807			PF_ACPY(&baddr, saddr, af);
2808			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
2809			    &uh->uh_sum, &naddr, nport, 1, af);
2810			rewrite++;
2811			if (nat->natpass)
2812				r = NULL;
2813		}
2814	} else {
2815		bport = nport = uh->uh_dport;
2816		/* check incoming packet for BINAT/RDR */
2817		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr,
2818		    uh->uh_sport, daddr, uh->uh_dport, &naddr, &nport))
2819		    != NULL) {
2820			PF_ACPY(&baddr, daddr, af);
2821			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
2822			    &uh->uh_sum, &naddr, nport, 1, af);
2823			rewrite++;
2824			if (rdr->natpass)
2825				r = NULL;
2826		}
2827	}
2828
2829	while (r != NULL) {
2830		r->evaluations++;
2831		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
2832		    (r->ifp == ifp && r->ifnot)))
2833			r = r->skip[PF_SKIP_IFP].ptr;
2834		else if (r->direction && r->direction != direction)
2835			r = r->skip[PF_SKIP_DIR].ptr;
2836		else if (r->af && r->af != af)
2837			r = r->skip[PF_SKIP_AF].ptr;
2838		else if (r->proto && r->proto != IPPROTO_UDP)
2839			r = r->skip[PF_SKIP_PROTO].ptr;
2840		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2841			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2842		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2843		    r->src.port[0], r->src.port[1], uh->uh_sport))
2844			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2845		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2846			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2847		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2848		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
2849			r = r->skip[PF_SKIP_DST_PORT].ptr;
2850		else if (r->tos && !(r->tos & pd->tos))
2851			r = TAILQ_NEXT(r, entries);
2852		else if (r->rule_flag & PFRULE_FRAGMENT)
2853			r = TAILQ_NEXT(r, entries);
2854		else if (r->uid.op && (lookup != -1 || (lookup =
2855		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_UDP,
2856		    pd), 1)) &&
2857		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2858		    uid))
2859			r = TAILQ_NEXT(r, entries);
2860		else if (r->gid.op && (lookup != -1 || (lookup =
2861		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_UDP,
2862		    pd), 1)) &&
2863		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2864		    gid))
2865			r = TAILQ_NEXT(r, entries);
2866		else if (r->match_tag &&
2867		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
2868			r = TAILQ_NEXT(r, entries);
2869		else if (r->anchorname[0] && r->anchor == NULL)
2870			r = TAILQ_NEXT(r, entries);
2871		else if (r->os_fingerprint != PF_OSFP_ANY)
2872			r = TAILQ_NEXT(r, entries);
2873		else {
2874			if (r->tag)
2875				tag = r->tag;
2876			if (r->anchor == NULL) {
2877				*rm = r;
2878				*am = a;
2879				*rsm = ruleset;
2880				if ((*rm)->quick)
2881					break;
2882				r = TAILQ_NEXT(r, entries);
2883			} else
2884				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2885				    PF_RULESET_FILTER);
2886		}
2887		if (r == NULL && a != NULL)
2888			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2889			    PF_RULESET_FILTER);
2890	}
2891	r = *rm;
2892	a = *am;
2893	ruleset = *rsm;
2894
2895	r->packets++;
2896	r->bytes += pd->tot_len;
2897	if (a != NULL) {
2898		a->packets++;
2899		a->bytes += pd->tot_len;
2900	}
2901	REASON_SET(&reason, PFRES_MATCH);
2902
2903	if (r->log) {
2904		if (rewrite)
2905			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
2906		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
2907	}
2908
2909	if ((r->action == PF_DROP) &&
2910	    ((r->rule_flag & PFRULE_RETURNICMP) ||
2911	    (r->rule_flag & PFRULE_RETURN))) {
2912		/* undo NAT changes, if they have taken place */
2913		if (nat != NULL) {
2914			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
2915			    &uh->uh_sum, &baddr, bport, 1, af);
2916			rewrite++;
2917		} else if (rdr != NULL) {
2918			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
2919			    &uh->uh_sum, &baddr, bport, 1, af);
2920			rewrite++;
2921		}
2922		if ((af == AF_INET) && r->return_icmp)
2923			pf_send_icmp(m, r->return_icmp >> 8,
2924			    r->return_icmp & 255, af, r);
2925		else if ((af == AF_INET6) && r->return_icmp6)
2926			pf_send_icmp(m, r->return_icmp6 >> 8,
2927			    r->return_icmp6 & 255, af, r);
2928	}
2929
2930	if (r->action == PF_DROP)
2931		return (PF_DROP);
2932
2933	if (pf_tag_packet(m, pftag, tag)) {
2934		REASON_SET(&reason, PFRES_MEMORY);
2935		return (PF_DROP);
2936	}
2937
2938	if (r->keep_state || nat != NULL || rdr != NULL) {
2939		/* create new state */
2940		struct pf_state	*s = NULL;
2941
2942		if (!r->max_states || r->states < r->max_states)
2943			s = pool_get(&pf_state_pl, PR_NOWAIT);
2944		if (s == NULL) {
2945			REASON_SET(&reason, PFRES_MEMORY);
2946			return (PF_DROP);
2947		}
2948		bzero(s, sizeof(*s));
2949		r->states++;
2950		if (a != NULL)
2951			a->states++;
2952		s->rule.ptr = r;
2953		if (nat != NULL)
2954			s->nat_rule.ptr = nat;
2955		else
2956			s->nat_rule.ptr = rdr;
2957		if (s->nat_rule.ptr != NULL)
2958			s->nat_rule.ptr->states++;
2959		s->anchor.ptr = a;
2960		s->allow_opts = r->allow_opts;
2961		s->log = r->log & 2;
2962		s->proto = IPPROTO_UDP;
2963		s->direction = direction;
2964		s->af = af;
2965		if (direction == PF_OUT) {
2966			PF_ACPY(&s->gwy.addr, saddr, af);
2967			s->gwy.port = uh->uh_sport;
2968			PF_ACPY(&s->ext.addr, daddr, af);
2969			s->ext.port = uh->uh_dport;
2970			if (nat != NULL) {
2971				PF_ACPY(&s->lan.addr, &baddr, af);
2972				s->lan.port = bport;
2973			} else {
2974				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2975				s->lan.port = s->gwy.port;
2976			}
2977		} else {
2978			PF_ACPY(&s->lan.addr, daddr, af);
2979			s->lan.port = uh->uh_dport;
2980			PF_ACPY(&s->ext.addr, saddr, af);
2981			s->ext.port = uh->uh_sport;
2982			if (rdr != NULL) {
2983				PF_ACPY(&s->gwy.addr, &baddr, af);
2984				s->gwy.port = bport;
2985			} else {
2986				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2987				s->gwy.port = s->lan.port;
2988			}
2989		}
2990		s->src.state = PFUDPS_SINGLE;
2991		s->dst.state = PFUDPS_NO_TRAFFIC;
2992#if defined(__FreeBSD__)
2993		s->creation = time_second;
2994		s->expire = time_second;
2995#else
2996		s->creation = time.tv_sec;
2997		s->expire = time.tv_sec;
2998#endif
2999		s->timeout = PFTM_UDP_FIRST_PACKET;
3000		s->packets[0] = 1;
3001		s->bytes[0] = pd->tot_len;
3002		pf_set_rt_ifp(s, saddr);
3003		if (pf_insert_state(s)) {
3004			REASON_SET(&reason, PFRES_MEMORY);
3005			pool_put(&pf_state_pl, s);
3006			return (PF_DROP);
3007		} else
3008			*sm = s;
3009	}
3010
3011	/* copy back packet headers if we performed NAT operations */
3012	if (rewrite)
3013		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3014
3015	return (PF_PASS);
3016}
3017
3018int
3019pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3020    struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h,
3021    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3022{
3023	struct pf_rule		*nat = NULL, *rdr = NULL;
3024	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3025	struct pf_addr		 baddr, naddr;
3026	struct pf_rule		*r, *a = NULL;
3027	struct pf_ruleset	*ruleset = NULL;
3028	u_short			 reason;
3029	u_int16_t		 icmpid;
3030	sa_family_t		 af = pd->af;
3031	u_int8_t		 icmptype, icmpcode;
3032	int			 state_icmp = 0;
3033	struct pf_tag		*pftag = NULL;
3034	int			 tag = -1;
3035#ifdef INET6
3036	int			 rewrite = 0;
3037#endif /* INET6 */
3038
3039	switch (pd->proto) {
3040#ifdef INET
3041	case IPPROTO_ICMP:
3042		icmptype = pd->hdr.icmp->icmp_type;
3043		icmpcode = pd->hdr.icmp->icmp_code;
3044		icmpid = pd->hdr.icmp->icmp_id;
3045
3046		if (icmptype == ICMP_UNREACH ||
3047		    icmptype == ICMP_SOURCEQUENCH ||
3048		    icmptype == ICMP_REDIRECT ||
3049		    icmptype == ICMP_TIMXCEED ||
3050		    icmptype == ICMP_PARAMPROB)
3051			state_icmp++;
3052		break;
3053#endif /* INET */
3054#ifdef INET6
3055	case IPPROTO_ICMPV6:
3056		icmptype = pd->hdr.icmp6->icmp6_type;
3057		icmpcode = pd->hdr.icmp6->icmp6_code;
3058		icmpid = pd->hdr.icmp6->icmp6_id;
3059
3060		if (icmptype == ICMP6_DST_UNREACH ||
3061		    icmptype == ICMP6_PACKET_TOO_BIG ||
3062		    icmptype == ICMP6_TIME_EXCEEDED ||
3063		    icmptype == ICMP6_PARAM_PROB)
3064			state_icmp++;
3065		break;
3066#endif /* INET6 */
3067	}
3068
3069	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3070
3071	if (direction == PF_OUT) {
3072		/* check outgoing packet for BINAT/NAT */
3073		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, saddr, 0,
3074		    daddr, 0, &naddr, NULL)) != NULL) {
3075			PF_ACPY(&baddr, saddr, af);
3076			switch (af) {
3077#ifdef INET
3078			case AF_INET:
3079				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3080				    naddr.v4.s_addr, 0);
3081				break;
3082#endif /* INET */
3083#ifdef INET6
3084			case AF_INET6:
3085				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3086				    &naddr, 0);
3087				rewrite++;
3088				break;
3089#endif /* INET6 */
3090			}
3091			if (nat->natpass)
3092				r = NULL;
3093		}
3094	} else {
3095		/* check incoming packet for BINAT/RDR */
3096		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, 0,
3097		    daddr, 0, &naddr, NULL)) != NULL) {
3098			PF_ACPY(&baddr, daddr, af);
3099			switch (af) {
3100#ifdef INET
3101			case AF_INET:
3102				pf_change_a(&daddr->v4.s_addr,
3103				    pd->ip_sum, naddr.v4.s_addr, 0);
3104				break;
3105#endif /* INET */
3106#ifdef INET6
3107			case AF_INET6:
3108				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3109				    &naddr, 0);
3110				rewrite++;
3111				break;
3112#endif /* INET6 */
3113			}
3114			if (rdr->natpass)
3115				r = NULL;
3116		}
3117	}
3118
3119	while (r != NULL) {
3120		r->evaluations++;
3121		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
3122		    (r->ifp == ifp && r->ifnot)))
3123			r = r->skip[PF_SKIP_IFP].ptr;
3124		else if (r->direction && r->direction != direction)
3125			r = r->skip[PF_SKIP_DIR].ptr;
3126		else if (r->af && r->af != af)
3127			r = r->skip[PF_SKIP_AF].ptr;
3128		else if (r->proto && r->proto != pd->proto)
3129			r = r->skip[PF_SKIP_PROTO].ptr;
3130		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3131			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3132		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3133			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3134		else if (r->type && r->type != icmptype + 1)
3135			r = TAILQ_NEXT(r, entries);
3136		else if (r->code && r->code != icmpcode + 1)
3137			r = TAILQ_NEXT(r, entries);
3138		else if (r->tos && !(r->tos & pd->tos))
3139			r = TAILQ_NEXT(r, entries);
3140		else if (r->rule_flag & PFRULE_FRAGMENT)
3141			r = TAILQ_NEXT(r, entries);
3142		else if (r->match_tag &&
3143		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
3144			r = TAILQ_NEXT(r, entries);
3145		else if (r->anchorname[0] && r->anchor == NULL)
3146			r = TAILQ_NEXT(r, entries);
3147		else if (r->os_fingerprint != PF_OSFP_ANY)
3148			r = TAILQ_NEXT(r, entries);
3149		else {
3150			if (r->tag)
3151				tag = r->tag;
3152			if (r->anchor == NULL) {
3153				*rm = r;
3154				*am = a;
3155				*rsm = ruleset;
3156				if ((*rm)->quick)
3157					break;
3158				r = TAILQ_NEXT(r, entries);
3159			} else
3160				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3161				    PF_RULESET_FILTER);
3162		}
3163		if (r == NULL && a != NULL)
3164			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3165			    PF_RULESET_FILTER);
3166	}
3167	r = *rm;
3168	a = *am;
3169	ruleset = *rsm;
3170
3171	r->packets++;
3172	r->bytes += pd->tot_len;
3173	if (a != NULL) {
3174		a->packets++;
3175		a->bytes += pd->tot_len;
3176	}
3177	REASON_SET(&reason, PFRES_MATCH);
3178
3179	if (r->log) {
3180#ifdef INET6
3181		if (rewrite)
3182			m_copyback(m, off, sizeof(struct icmp6_hdr),
3183			    (caddr_t)pd->hdr.icmp6);
3184#endif /* INET6 */
3185		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
3186	}
3187
3188	if (r->action != PF_PASS)
3189		return (PF_DROP);
3190
3191	if (pf_tag_packet(m, pftag, tag)) {
3192		REASON_SET(&reason, PFRES_MEMORY);
3193		return (PF_DROP);
3194	}
3195
3196	if (!state_icmp && (r->keep_state ||
3197	    nat != NULL || rdr != NULL)) {
3198		/* create new state */
3199		struct pf_state	*s = NULL;
3200
3201		if (!r->max_states || r->states < r->max_states)
3202			s = pool_get(&pf_state_pl, PR_NOWAIT);
3203		if (s == NULL) {
3204			REASON_SET(&reason, PFRES_MEMORY);
3205			return (PF_DROP);
3206		}
3207		bzero(s, sizeof(*s));
3208		r->states++;
3209		if (a != NULL)
3210			a->states++;
3211		s->rule.ptr = r;
3212		if (nat != NULL)
3213			s->nat_rule.ptr = nat;
3214		else
3215			s->nat_rule.ptr = rdr;
3216		if (s->nat_rule.ptr != NULL)
3217			s->nat_rule.ptr->states++;
3218		s->anchor.ptr = a;
3219		s->allow_opts = r->allow_opts;
3220		s->log = r->log & 2;
3221		s->proto = pd->proto;
3222		s->direction = direction;
3223		s->af = af;
3224		if (direction == PF_OUT) {
3225			PF_ACPY(&s->gwy.addr, saddr, af);
3226			s->gwy.port = icmpid;
3227			PF_ACPY(&s->ext.addr, daddr, af);
3228			s->ext.port = icmpid;
3229			if (nat != NULL)
3230				PF_ACPY(&s->lan.addr, &baddr, af);
3231			else
3232				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3233			s->lan.port = icmpid;
3234		} else {
3235			PF_ACPY(&s->lan.addr, daddr, af);
3236			s->lan.port = icmpid;
3237			PF_ACPY(&s->ext.addr, saddr, af);
3238			s->ext.port = icmpid;
3239			if (rdr != NULL)
3240				PF_ACPY(&s->gwy.addr, &baddr, af);
3241			else
3242				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3243			s->gwy.port = icmpid;
3244		}
3245
3246#if defined(__FreeBSD__)
3247		s->creation = time_second;
3248		s->expire = time_second;
3249#else
3250		s->creation = time.tv_sec;
3251		s->expire = time.tv_sec;
3252#endif
3253		s->timeout = PFTM_ICMP_FIRST_PACKET;
3254		s->packets[0] = 1;
3255		s->bytes[0] = pd->tot_len;
3256		pf_set_rt_ifp(s, saddr);
3257		if (pf_insert_state(s)) {
3258			REASON_SET(&reason, PFRES_MEMORY);
3259			pool_put(&pf_state_pl, s);
3260			return (PF_DROP);
3261		} else
3262			*sm = s;
3263	}
3264
3265#ifdef INET6
3266	/* copy back packet headers if we performed IPv6 NAT operations */
3267	if (rewrite)
3268		m_copyback(m, off, sizeof(struct icmp6_hdr),
3269		    (caddr_t)pd->hdr.icmp6);
3270#endif /* INET6 */
3271
3272	return (PF_PASS);
3273}
3274
3275int
3276pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3277    struct ifnet *ifp, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3278    struct pf_rule **am, struct pf_ruleset **rsm)
3279{
3280	struct pf_rule		*nat = NULL, *rdr = NULL;
3281	struct pf_rule		*r, *a = NULL;
3282	struct pf_ruleset	*ruleset = NULL;
3283	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3284	struct pf_addr		 baddr, naddr;
3285	sa_family_t		 af = pd->af;
3286	u_short			 reason;
3287	struct pf_tag		*pftag = NULL;
3288	int			 tag = -1;
3289
3290	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3291
3292	if (direction == PF_OUT) {
3293		/* check outgoing packet for BINAT/NAT */
3294		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, saddr, 0,
3295		    daddr, 0, &naddr, NULL)) != NULL) {
3296			PF_ACPY(&baddr, saddr, af);
3297			switch (af) {
3298#ifdef INET
3299			case AF_INET:
3300				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3301				    naddr.v4.s_addr, 0);
3302				break;
3303#endif /* INET */
3304#ifdef INET6
3305			case AF_INET6:
3306				PF_ACPY(saddr, &naddr, af);
3307				break;
3308#endif /* INET6 */
3309			}
3310			if (nat->natpass)
3311				r = NULL;
3312		}
3313	} else {
3314		/* check incoming packet for BINAT/RDR */
3315		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, 0,
3316		    daddr, 0, &naddr, NULL)) != NULL) {
3317			PF_ACPY(&baddr, daddr, af);
3318			switch (af) {
3319#ifdef INET
3320			case AF_INET:
3321				pf_change_a(&daddr->v4.s_addr,
3322				    pd->ip_sum, naddr.v4.s_addr, 0);
3323				break;
3324#endif /* INET */
3325#ifdef INET6
3326			case AF_INET6:
3327				PF_ACPY(daddr, &naddr, af);
3328				break;
3329#endif /* INET6 */
3330			}
3331			if (rdr->natpass)
3332				r = NULL;
3333		}
3334	}
3335
3336	while (r != NULL) {
3337		r->evaluations++;
3338		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
3339		    (r->ifp == ifp && r->ifnot)))
3340			r = r->skip[PF_SKIP_IFP].ptr;
3341		else if (r->direction && r->direction != direction)
3342			r = r->skip[PF_SKIP_DIR].ptr;
3343		else if (r->af && r->af != af)
3344			r = r->skip[PF_SKIP_AF].ptr;
3345		else if (r->proto && r->proto != pd->proto)
3346			r = r->skip[PF_SKIP_PROTO].ptr;
3347		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3348			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3349		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3350			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3351		else if (r->tos && !(r->tos & pd->tos))
3352			r = TAILQ_NEXT(r, entries);
3353		else if (r->rule_flag & PFRULE_FRAGMENT)
3354			r = TAILQ_NEXT(r, entries);
3355		else if (r->match_tag &&
3356		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
3357			r = TAILQ_NEXT(r, entries);
3358		else if (r->anchorname[0] && r->anchor == NULL)
3359			r = TAILQ_NEXT(r, entries);
3360		else if (r->os_fingerprint != PF_OSFP_ANY)
3361			r = TAILQ_NEXT(r, entries);
3362		else {
3363			if (r->tag)
3364				tag = r->tag;
3365			if (r->anchor == NULL) {
3366				*rm = r;
3367				*am = a;
3368				*rsm = ruleset;
3369				if ((*rm)->quick)
3370					break;
3371				r = TAILQ_NEXT(r, entries);
3372			} else
3373				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3374				    PF_RULESET_FILTER);
3375		}
3376		if (r == NULL && a != NULL)
3377			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3378			    PF_RULESET_FILTER);
3379	}
3380	r = *rm;
3381	a = *am;
3382	ruleset = *rsm;
3383
3384	r->packets++;
3385	r->bytes += pd->tot_len;
3386	if (a != NULL) {
3387		a->packets++;
3388		a->bytes += pd->tot_len;
3389	}
3390	REASON_SET(&reason, PFRES_MATCH);
3391	if (r->log)
3392		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
3393
3394	if ((r->action == PF_DROP) &&
3395	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3396	    (r->rule_flag & PFRULE_RETURN))) {
3397		struct pf_addr *a = NULL;
3398
3399		if (nat != NULL)
3400			a = saddr;
3401		else if (rdr != NULL)
3402			a = daddr;
3403		if (a != NULL) {
3404			switch (af) {
3405#ifdef INET
3406			case AF_INET:
3407				pf_change_a(&a->v4.s_addr, pd->ip_sum,
3408				    baddr.v4.s_addr, 0);
3409				break;
3410#endif /* INET */
3411#ifdef INET6
3412			case AF_INET6:
3413				PF_ACPY(a, &baddr, af);
3414				break;
3415#endif /* INET6 */
3416			}
3417		}
3418		if ((af == AF_INET) && r->return_icmp)
3419			pf_send_icmp(m, r->return_icmp >> 8,
3420			    r->return_icmp & 255, af, r);
3421		else if ((af == AF_INET6) && r->return_icmp6)
3422			pf_send_icmp(m, r->return_icmp6 >> 8,
3423			    r->return_icmp6 & 255, af, r);
3424	}
3425
3426	if (r->action != PF_PASS)
3427		return (PF_DROP);
3428
3429	if (pf_tag_packet(m, pftag, tag)) {
3430		REASON_SET(&reason, PFRES_MEMORY);
3431		return (PF_DROP);
3432	}
3433
3434	if (r->keep_state || nat != NULL || rdr != NULL) {
3435		/* create new state */
3436		struct pf_state	*s = NULL;
3437
3438		if (!r->max_states || r->states < r->max_states)
3439			s = pool_get(&pf_state_pl, PR_NOWAIT);
3440		if (s == NULL) {
3441			REASON_SET(&reason, PFRES_MEMORY);
3442			return (PF_DROP);
3443		}
3444		bzero(s, sizeof(*s));
3445		r->states++;
3446		if (a != NULL)
3447			a->states++;
3448		s->rule.ptr = r;
3449		if (nat != NULL)
3450			s->nat_rule.ptr = nat;
3451		else
3452			s->nat_rule.ptr = rdr;
3453		if (s->nat_rule.ptr != NULL)
3454			s->nat_rule.ptr->states++;
3455		s->anchor.ptr = a;
3456		s->allow_opts = r->allow_opts;
3457		s->log = r->log & 2;
3458		s->proto = pd->proto;
3459		s->direction = direction;
3460		s->af = af;
3461		if (direction == PF_OUT) {
3462			PF_ACPY(&s->gwy.addr, saddr, af);
3463			PF_ACPY(&s->ext.addr, daddr, af);
3464			if (nat != NULL)
3465				PF_ACPY(&s->lan.addr, &baddr, af);
3466			else
3467				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3468		} else {
3469			PF_ACPY(&s->lan.addr, daddr, af);
3470			PF_ACPY(&s->ext.addr, saddr, af);
3471			if (rdr != NULL)
3472				PF_ACPY(&s->gwy.addr, &baddr, af);
3473			else
3474				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3475		}
3476		s->src.state = PFOTHERS_SINGLE;
3477		s->dst.state = PFOTHERS_NO_TRAFFIC;
3478#if defined(__FreeBSD__)
3479		s->creation = time_second;
3480		s->expire = time_second;
3481#else
3482		s->creation = time.tv_sec;
3483		s->expire = time.tv_sec;
3484#endif
3485		s->timeout = PFTM_OTHER_FIRST_PACKET;
3486		s->packets[0] = 1;
3487		s->bytes[0] = pd->tot_len;
3488		pf_set_rt_ifp(s, saddr);
3489		if (pf_insert_state(s)) {
3490			REASON_SET(&reason, PFRES_MEMORY);
3491			if (r->log)
3492				PFLOG_PACKET(ifp, h, m, af, direction, reason,
3493				    r, a, ruleset);
3494			pool_put(&pf_state_pl, s);
3495			return (PF_DROP);
3496		} else
3497			*sm = s;
3498	}
3499
3500	return (PF_PASS);
3501}
3502
3503int
3504pf_test_fragment(struct pf_rule **rm, int direction, struct ifnet *ifp,
3505    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3506    struct pf_ruleset **rsm)
3507{
3508	struct pf_rule		*r, *a = NULL;
3509	struct pf_ruleset	*ruleset = NULL;
3510	sa_family_t		 af = pd->af;
3511	u_short			 reason;
3512	struct pf_tag		*pftag = NULL;
3513	int			 tag = -1;
3514
3515	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3516	while (r != NULL) {
3517		r->evaluations++;
3518		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
3519		    (r->ifp == ifp && r->ifnot)))
3520			r = r->skip[PF_SKIP_IFP].ptr;
3521		else if (r->direction && r->direction != direction)
3522			r = r->skip[PF_SKIP_DIR].ptr;
3523		else if (r->af && r->af != af)
3524			r = r->skip[PF_SKIP_AF].ptr;
3525		else if (r->proto && r->proto != pd->proto)
3526			r = r->skip[PF_SKIP_PROTO].ptr;
3527		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3528			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3529		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3530			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3531		else if (r->tos && !(r->tos & pd->tos))
3532			r = TAILQ_NEXT(r, entries);
3533		else if (r->src.port_op || r->dst.port_op ||
3534		    r->flagset || r->type || r->code ||
3535		    r->os_fingerprint != PF_OSFP_ANY)
3536			r = TAILQ_NEXT(r, entries);
3537		else if (r->match_tag &&
3538		    !pf_match_tag(m, r, NULL, NULL, pftag, &tag))
3539			r = TAILQ_NEXT(r, entries);
3540		else if (r->anchorname[0] && r->anchor == NULL)
3541			r = TAILQ_NEXT(r, entries);
3542		else {
3543			if (r->anchor == NULL) {
3544				*rm = r;
3545				*am = a;
3546				*rsm = ruleset;
3547				if ((*rm)->quick)
3548					break;
3549				r = TAILQ_NEXT(r, entries);
3550			} else
3551				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3552				    PF_RULESET_FILTER);
3553		}
3554		if (r == NULL && a != NULL)
3555			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3556			    PF_RULESET_FILTER);
3557	}
3558	r = *rm;
3559	a = *am;
3560	ruleset = *rsm;
3561
3562	r->packets++;
3563	r->bytes += pd->tot_len;
3564	if (a != NULL) {
3565		a->packets++;
3566		a->bytes += pd->tot_len;
3567	}
3568	REASON_SET(&reason, PFRES_MATCH);
3569	if (r->log)
3570		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
3571
3572	if (r->action != PF_PASS)
3573		return (PF_DROP);
3574
3575	if (pf_tag_packet(m, pftag, tag)) {
3576		REASON_SET(&reason, PFRES_MEMORY);
3577		return (PF_DROP);
3578	}
3579
3580	return (PF_PASS);
3581}
3582
3583int
3584pf_test_state_tcp(struct pf_state **state, int direction, struct ifnet *ifp,
3585    struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd,
3586    u_short *reason)
3587{
3588	struct pf_tree_node	 key;
3589	struct tcphdr		*th = pd->hdr.tcp;
3590	u_int16_t		 win = ntohs(th->th_win);
3591	u_int32_t		 ack, end, seq;
3592	u_int8_t		 sws, dws;
3593	int			 ackskew, dirndx;
3594	int			 copyback = 0;
3595	struct pf_state_peer	*src, *dst;
3596
3597	key.af = pd->af;
3598	key.proto = IPPROTO_TCP;
3599	PF_ACPY(&key.addr[0], pd->src, key.af);
3600	PF_ACPY(&key.addr[1], pd->dst, key.af);
3601	key.port[0] = th->th_sport;
3602	key.port[1] = th->th_dport;
3603
3604	STATE_LOOKUP();
3605
3606	if (direction == (*state)->direction) {
3607		src = &(*state)->src;
3608		dst = &(*state)->dst;
3609		dirndx = 0;
3610	} else {
3611		src = &(*state)->dst;
3612		dst = &(*state)->src;
3613		dirndx = 1;
3614	}
3615
3616	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3617		if (direction != (*state)->direction)
3618			return (PF_SYNPROXY_DROP);
3619		if (th->th_flags & TH_SYN) {
3620			if (ntohl(th->th_seq) != (*state)->src.seqlo)
3621				return (PF_DROP);
3622			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3623			    pd->src, th->th_dport, th->th_sport,
3624			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3625			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0);
3626			return (PF_SYNPROXY_DROP);
3627		} else if (!(th->th_flags & TH_ACK) ||
3628		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3629		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3630			return (PF_DROP);
3631		else
3632			(*state)->src.state = PF_TCPS_PROXY_DST;
3633	}
3634	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3635		struct pf_state_host *src, *dst;
3636
3637		if (direction == PF_OUT) {
3638			src = &(*state)->gwy;
3639			dst = &(*state)->ext;
3640		} else {
3641			src = &(*state)->ext;
3642			dst = &(*state)->lan;
3643		}
3644		if (direction == (*state)->direction) {
3645			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3646			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3647			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3648				return (PF_DROP);
3649			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3650			if ((*state)->dst.seqhi == 1)
3651				(*state)->dst.seqhi = arc4random();
3652			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3653			    &dst->addr, src->port, dst->port,
3654			    (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0);
3655			return (PF_SYNPROXY_DROP);
3656		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3657		    (TH_SYN|TH_ACK)) ||
3658		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1))
3659			return (PF_DROP);
3660		else {
3661			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3662			(*state)->dst.seqlo = ntohl(th->th_seq);
3663			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3664			    pd->src, th->th_dport, th->th_sport,
3665			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3666			    TH_ACK, (*state)->src.max_win, 0, 0);
3667			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3668			    &dst->addr, src->port, dst->port,
3669			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3670			    TH_ACK, (*state)->dst.max_win, 0, 0);
3671			(*state)->src.seqdiff = (*state)->dst.seqhi -
3672			    (*state)->src.seqlo;
3673			(*state)->dst.seqdiff = (*state)->src.seqhi -
3674			    (*state)->dst.seqlo;
3675			(*state)->src.seqhi = (*state)->src.seqlo +
3676			    (*state)->src.max_win;
3677			(*state)->dst.seqhi = (*state)->dst.seqlo +
3678			    (*state)->dst.max_win;
3679			(*state)->src.wscale = (*state)->dst.wscale = 0;
3680			(*state)->src.state = (*state)->dst.state =
3681			    TCPS_ESTABLISHED;
3682			return (PF_SYNPROXY_DROP);
3683		}
3684	}
3685
3686	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3687		sws = src->wscale & PF_WSCALE_MASK;
3688		dws = dst->wscale & PF_WSCALE_MASK;
3689	} else
3690		sws = dws = 0;
3691
3692	/*
3693	 * Sequence tracking algorithm from Guido van Rooij's paper:
3694	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
3695	 *	tcp_filtering.ps
3696	 */
3697
3698	seq = ntohl(th->th_seq);
3699	if (src->seqlo == 0) {
3700		/* First packet from this end. Set its state */
3701
3702		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3703		    src->scrub == NULL) {
3704			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3705				REASON_SET(reason, PFRES_MEMORY);
3706				return (PF_DROP);
3707			}
3708		}
3709
3710		/* Deferred generation of sequence number modulator */
3711		if (dst->seqdiff && !src->seqdiff) {
3712			while ((src->seqdiff = arc4random()) == 0)
3713				;
3714			ack = ntohl(th->th_ack) - dst->seqdiff;
3715			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3716			    src->seqdiff), 0);
3717			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3718			copyback = 1;
3719		} else {
3720			ack = ntohl(th->th_ack);
3721		}
3722
3723		end = seq + pd->p_len;
3724		if (th->th_flags & TH_SYN) {
3725			end++;
3726			if (dst->wscale & PF_WSCALE_FLAG) {
3727				src->wscale = pf_get_wscale(m, off, th->th_off,
3728				    pd->af);
3729				if (src->wscale & PF_WSCALE_FLAG) {
3730					/* Remove scale factor from initial
3731					 * window */
3732					sws = src->wscale & PF_WSCALE_MASK;
3733					win = ((u_int32_t)win + (1 << sws) - 1)
3734					    >> sws;
3735					dws = dst->wscale & PF_WSCALE_MASK;
3736				} else {
3737					/* fixup other window */
3738					dst->max_win <<= dst->wscale &
3739					    PF_WSCALE_MASK;
3740					/* in case of a retrans SYN|ACK */
3741					dst->wscale = 0;
3742				}
3743			}
3744		}
3745		if (th->th_flags & TH_FIN)
3746			end++;
3747
3748		src->seqlo = seq;
3749		if (src->state < TCPS_SYN_SENT)
3750			src->state = TCPS_SYN_SENT;
3751
3752		/*
3753		 * May need to slide the window (seqhi may have been set by
3754		 * the crappy stack check or if we picked up the connection
3755		 * after establishment)
3756		 */
3757		if (src->seqhi == 1 ||
3758		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
3759			src->seqhi = end + MAX(1, dst->max_win << dws);
3760		if (win > src->max_win)
3761			src->max_win = win;
3762
3763	} else {
3764		ack = ntohl(th->th_ack) - dst->seqdiff;
3765		if (src->seqdiff) {
3766			/* Modulate sequence numbers */
3767			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3768			    src->seqdiff), 0);
3769			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3770			copyback = 1;
3771		}
3772		end = seq + pd->p_len;
3773		if (th->th_flags & TH_SYN)
3774			end++;
3775		if (th->th_flags & TH_FIN)
3776			end++;
3777	}
3778
3779	if ((th->th_flags & TH_ACK) == 0) {
3780		/* Let it pass through the ack skew check */
3781		ack = dst->seqlo;
3782	} else if ((ack == 0 &&
3783	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
3784	    /* broken tcp stacks do not set ack */
3785	    (dst->state < TCPS_SYN_SENT)) {
3786		/*
3787		 * Many stacks (ours included) will set the ACK number in an
3788		 * FIN|ACK if the SYN times out -- no sequence to ACK.
3789		 */
3790		ack = dst->seqlo;
3791	}
3792
3793	if (seq == end) {
3794		/* Ease sequencing restrictions on no data packets */
3795		seq = src->seqlo;
3796		end = seq;
3797	}
3798
3799	ackskew = dst->seqlo - ack;
3800
3801#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
3802	if (SEQ_GEQ(src->seqhi, end) &&
3803	    /* Last octet inside other's window space */
3804	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
3805	    /* Retrans: not more than one window back */
3806	    (ackskew >= -MAXACKWINDOW) &&
3807	    /* Acking not more than one reassembled fragment backwards */
3808	    (ackskew <= (MAXACKWINDOW << sws))) {
3809	    /* Acking not more than one window forward */
3810
3811		(*state)->packets[dirndx]++;
3812		(*state)->bytes[dirndx] += pd->tot_len;
3813
3814		/* update max window */
3815		if (src->max_win < win)
3816			src->max_win = win;
3817		/* synchronize sequencing */
3818		if (SEQ_GT(end, src->seqlo))
3819			src->seqlo = end;
3820		/* slide the window of what the other end can send */
3821		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
3822			dst->seqhi = ack + MAX((win << sws), 1);
3823
3824
3825		/* update states */
3826		if (th->th_flags & TH_SYN)
3827			if (src->state < TCPS_SYN_SENT)
3828				src->state = TCPS_SYN_SENT;
3829		if (th->th_flags & TH_FIN)
3830			if (src->state < TCPS_CLOSING)
3831				src->state = TCPS_CLOSING;
3832		if (th->th_flags & TH_ACK) {
3833			if (dst->state == TCPS_SYN_SENT)
3834				dst->state = TCPS_ESTABLISHED;
3835			else if (dst->state == TCPS_CLOSING)
3836				dst->state = TCPS_FIN_WAIT_2;
3837		}
3838		if (th->th_flags & TH_RST)
3839			src->state = dst->state = TCPS_TIME_WAIT;
3840
3841		/* update expire time */
3842#if defined(__FreeBSD__)
3843		(*state)->expire = time_second;
3844#else
3845		(*state)->expire = time.tv_sec;
3846#endif
3847		if (src->state >= TCPS_FIN_WAIT_2 &&
3848		    dst->state >= TCPS_FIN_WAIT_2)
3849			(*state)->timeout = PFTM_TCP_CLOSED;
3850		else if (src->state >= TCPS_FIN_WAIT_2 ||
3851		    dst->state >= TCPS_FIN_WAIT_2)
3852			(*state)->timeout = PFTM_TCP_FIN_WAIT;
3853		else if (src->state < TCPS_ESTABLISHED ||
3854		    dst->state < TCPS_ESTABLISHED)
3855			(*state)->timeout = PFTM_TCP_OPENING;
3856		else if (src->state >= TCPS_CLOSING ||
3857		    dst->state >= TCPS_CLOSING)
3858			(*state)->timeout = PFTM_TCP_CLOSING;
3859		else
3860			(*state)->timeout = PFTM_TCP_ESTABLISHED;
3861
3862		/* Fall through to PASS packet */
3863
3864	} else if ((dst->state < TCPS_SYN_SENT ||
3865		dst->state >= TCPS_FIN_WAIT_2 ||
3866		src->state >= TCPS_FIN_WAIT_2) &&
3867	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
3868	    /* Within a window forward of the originating packet */
3869	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
3870	    /* Within a window backward of the originating packet */
3871
3872		/*
3873		 * This currently handles three situations:
3874		 *  1) Stupid stacks will shotgun SYNs before their peer
3875		 *     replies.
3876		 *  2) When PF catches an already established stream (the
3877		 *     firewall rebooted, the state table was flushed, routes
3878		 *     changed...)
3879		 *  3) Packets get funky immediately after the connection
3880		 *     closes (this should catch Solaris spurious ACK|FINs
3881		 *     that web servers like to spew after a close)
3882		 *
3883		 * This must be a little more careful than the above code
3884		 * since packet floods will also be caught here. We don't
3885		 * update the TTL here to mitigate the damage of a packet
3886		 * flood and so the same code can handle awkward establishment
3887		 * and a loosened connection close.
3888		 * In the establishment case, a correct peer response will
3889		 * validate the connection, go through the normal state code
3890		 * and keep updating the state TTL.
3891		 */
3892
3893		if (pf_status.debug >= PF_DEBUG_MISC) {
3894			printf("pf: loose state match: ");
3895			pf_print_state(*state);
3896			pf_print_flags(th->th_flags);
3897			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
3898			    seq, ack, pd->p_len, ackskew,
3899			    (*state)->packets[0], (*state)->packets[1]);
3900		}
3901
3902		(*state)->packets[dirndx]++;
3903		(*state)->bytes[dirndx] += pd->tot_len;
3904
3905		/* update max window */
3906		if (src->max_win < win)
3907			src->max_win = win;
3908		/* synchronize sequencing */
3909		if (SEQ_GT(end, src->seqlo))
3910			src->seqlo = end;
3911		/* slide the window of what the other end can send */
3912		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
3913			dst->seqhi = ack + MAX((win << sws), 1);
3914
3915		/*
3916		 * Cannot set dst->seqhi here since this could be a shotgunned
3917		 * SYN and not an already established connection.
3918		 */
3919
3920		if (th->th_flags & TH_FIN)
3921			if (src->state < TCPS_CLOSING)
3922				src->state = TCPS_CLOSING;
3923		if (th->th_flags & TH_RST)
3924			src->state = dst->state = TCPS_TIME_WAIT;
3925
3926		/* Fall through to PASS packet */
3927
3928	} else {
3929		if ((*state)->dst.state == TCPS_SYN_SENT &&
3930		    (*state)->src.state == TCPS_SYN_SENT) {
3931			/* Send RST for state mismatches during handshake */
3932			if (!(th->th_flags & TH_RST)) {
3933				u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3934
3935				if (th->th_flags & TH_SYN)
3936					ack++;
3937				if (th->th_flags & TH_FIN)
3938					ack++;
3939				pf_send_tcp((*state)->rule.ptr, pd->af,
3940				    pd->dst, pd->src, th->th_dport,
3941				    th->th_sport, ntohl(th->th_ack), ack,
3942				    TH_RST|TH_ACK, 0, 0,
3943				    (*state)->rule.ptr->return_ttl);
3944			}
3945			src->seqlo = 0;
3946			src->seqhi = 1;
3947			src->max_win = 1;
3948		} else if (pf_status.debug >= PF_DEBUG_MISC) {
3949			printf("pf: BAD state: ");
3950			pf_print_state(*state);
3951			pf_print_flags(th->th_flags);
3952			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
3953			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
3954			    (*state)->packets[0], (*state)->packets[1],
3955			    direction == PF_IN ? "in" : "out",
3956			    direction == (*state)->direction ? "fwd" : "rev");
3957			printf("pf: State failure on: %c %c %c %c | %c %c\n",
3958			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
3959			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
3960			    ' ': '2',
3961			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
3962			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
3963			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
3964			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
3965		}
3966		return (PF_DROP);
3967	}
3968
3969	if (dst->scrub || src->scrub) {
3970		if (pf_normalize_tcp_stateful(m, off, pd, reason, th, src, dst,
3971		    &copyback))
3972			return (PF_DROP);
3973	}
3974
3975	/* Any packets which have gotten here are to be passed */
3976
3977	/* translate source/destination address, if necessary */
3978	if (STATE_TRANSLATE(*state)) {
3979		if (direction == PF_OUT)
3980			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
3981			    &th->th_sum, &(*state)->gwy.addr,
3982			    (*state)->gwy.port, 0, pd->af);
3983		else
3984			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
3985			    &th->th_sum, &(*state)->lan.addr,
3986			    (*state)->lan.port, 0, pd->af);
3987		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3988	} else if (copyback) {
3989		/* Copyback sequence modulation or stateful scrub changes */
3990		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3991	}
3992
3993	(*state)->rule.ptr->packets++;
3994	(*state)->rule.ptr->bytes += pd->tot_len;
3995	if ((*state)->nat_rule.ptr != NULL) {
3996		(*state)->nat_rule.ptr->packets++;
3997		(*state)->nat_rule.ptr->bytes += pd->tot_len;
3998	}
3999	if ((*state)->anchor.ptr != NULL) {
4000		(*state)->anchor.ptr->packets++;
4001		(*state)->anchor.ptr->bytes += pd->tot_len;
4002	}
4003	return (PF_PASS);
4004}
4005
4006int
4007pf_test_state_udp(struct pf_state **state, int direction, struct ifnet *ifp,
4008    struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd)
4009{
4010	struct pf_state_peer	*src, *dst;
4011	struct pf_tree_node	 key;
4012	struct udphdr		*uh = pd->hdr.udp;
4013	int			dirndx;
4014
4015	key.af = pd->af;
4016	key.proto = IPPROTO_UDP;
4017	PF_ACPY(&key.addr[0], pd->src, key.af);
4018	PF_ACPY(&key.addr[1], pd->dst, key.af);
4019	key.port[0] = uh->uh_sport;
4020	key.port[1] = uh->uh_dport;
4021
4022	STATE_LOOKUP();
4023
4024	if (direction == (*state)->direction) {
4025		src = &(*state)->src;
4026		dst = &(*state)->dst;
4027		dirndx = 0;
4028	} else {
4029		src = &(*state)->dst;
4030		dst = &(*state)->src;
4031		dirndx = 1;
4032	}
4033
4034	(*state)->packets[dirndx]++;
4035	(*state)->bytes[dirndx] += pd->tot_len;
4036
4037	/* update states */
4038	if (src->state < PFUDPS_SINGLE)
4039		src->state = PFUDPS_SINGLE;
4040	if (dst->state == PFUDPS_SINGLE)
4041		dst->state = PFUDPS_MULTIPLE;
4042
4043	/* update expire time */
4044#if defined(__FreeBSD__)
4045	(*state)->expire = time_second;
4046#else
4047	(*state)->expire = time.tv_sec;
4048#endif
4049	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4050		(*state)->timeout = PFTM_UDP_MULTIPLE;
4051	else
4052		(*state)->timeout = PFTM_UDP_SINGLE;
4053
4054	/* translate source/destination address, if necessary */
4055	if (STATE_TRANSLATE(*state)) {
4056		if (direction == PF_OUT)
4057			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4058			    &uh->uh_sum, &(*state)->gwy.addr,
4059			    (*state)->gwy.port, 1, pd->af);
4060		else
4061			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4062			    &uh->uh_sum, &(*state)->lan.addr,
4063			    (*state)->lan.port, 1, pd->af);
4064		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4065	}
4066
4067	(*state)->rule.ptr->packets++;
4068	(*state)->rule.ptr->bytes += pd->tot_len;
4069	if ((*state)->nat_rule.ptr != NULL) {
4070		(*state)->nat_rule.ptr->packets++;
4071		(*state)->nat_rule.ptr->bytes += pd->tot_len;
4072	}
4073	if ((*state)->anchor.ptr != NULL) {
4074		(*state)->anchor.ptr->packets++;
4075		(*state)->anchor.ptr->bytes += pd->tot_len;
4076	}
4077	return (PF_PASS);
4078}
4079
4080int
4081pf_test_state_icmp(struct pf_state **state, int direction, struct ifnet *ifp,
4082    struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd)
4083{
4084	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4085	u_int16_t	 icmpid, *icmpsum;
4086	u_int8_t	 icmptype;
4087	int		 state_icmp = 0, dirndx;
4088
4089	switch (pd->proto) {
4090#ifdef INET
4091	case IPPROTO_ICMP:
4092		icmptype = pd->hdr.icmp->icmp_type;
4093		icmpid = pd->hdr.icmp->icmp_id;
4094		icmpsum = &pd->hdr.icmp->icmp_cksum;
4095
4096		if (icmptype == ICMP_UNREACH ||
4097		    icmptype == ICMP_SOURCEQUENCH ||
4098		    icmptype == ICMP_REDIRECT ||
4099		    icmptype == ICMP_TIMXCEED ||
4100		    icmptype == ICMP_PARAMPROB)
4101			state_icmp++;
4102		break;
4103#endif /* INET */
4104#ifdef INET6
4105	case IPPROTO_ICMPV6:
4106		icmptype = pd->hdr.icmp6->icmp6_type;
4107		icmpid = pd->hdr.icmp6->icmp6_id;
4108		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4109
4110		if (icmptype == ICMP6_DST_UNREACH ||
4111		    icmptype == ICMP6_PACKET_TOO_BIG ||
4112		    icmptype == ICMP6_TIME_EXCEEDED ||
4113		    icmptype == ICMP6_PARAM_PROB)
4114			state_icmp++;
4115		break;
4116#endif /* INET6 */
4117	}
4118
4119	if (!state_icmp) {
4120
4121		/*
4122		 * ICMP query/reply message not related to a TCP/UDP packet.
4123		 * Search for an ICMP state.
4124		 */
4125		struct pf_tree_node	key;
4126
4127		key.af = pd->af;
4128		key.proto = pd->proto;
4129		PF_ACPY(&key.addr[0], saddr, key.af);
4130		PF_ACPY(&key.addr[1], daddr, key.af);
4131		key.port[0] = icmpid;
4132		key.port[1] = icmpid;
4133
4134		STATE_LOOKUP();
4135
4136		dirndx = (direction == (*state)->direction) ? 0 : 1;
4137		(*state)->packets[dirndx]++;
4138		(*state)->bytes[dirndx] += pd->tot_len;
4139#if defined(__FreeBSD__)
4140		(*state)->expire = time_second;
4141#else
4142		(*state)->expire = time.tv_sec;
4143#endif
4144		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4145
4146		/* translate source/destination address, if necessary */
4147		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4148			if (direction == PF_OUT) {
4149				switch (pd->af) {
4150#ifdef INET
4151				case AF_INET:
4152					pf_change_a(&saddr->v4.s_addr,
4153					    pd->ip_sum,
4154					    (*state)->gwy.addr.v4.s_addr, 0);
4155					break;
4156#endif /* INET */
4157#ifdef INET6
4158				case AF_INET6:
4159					pf_change_a6(saddr,
4160					    &pd->hdr.icmp6->icmp6_cksum,
4161					    &(*state)->gwy.addr, 0);
4162					m_copyback(m, off,
4163					    sizeof(struct icmp6_hdr),
4164					    (caddr_t)pd->hdr.icmp6);
4165					break;
4166#endif /* INET6 */
4167				}
4168			} else {
4169				switch (pd->af) {
4170#ifdef INET
4171				case AF_INET:
4172					pf_change_a(&daddr->v4.s_addr,
4173					    pd->ip_sum,
4174					    (*state)->lan.addr.v4.s_addr, 0);
4175					break;
4176#endif /* INET */
4177#ifdef INET6
4178				case AF_INET6:
4179					pf_change_a6(daddr,
4180					    &pd->hdr.icmp6->icmp6_cksum,
4181					    &(*state)->lan.addr, 0);
4182					m_copyback(m, off,
4183					    sizeof(struct icmp6_hdr),
4184					    (caddr_t)pd->hdr.icmp6);
4185					break;
4186#endif /* INET6 */
4187				}
4188			}
4189		}
4190
4191		return (PF_PASS);
4192
4193	} else {
4194		/*
4195		 * ICMP error message in response to a TCP/UDP packet.
4196		 * Extract the inner TCP/UDP header and search for that state.
4197		 */
4198
4199		struct pf_pdesc	pd2;
4200#ifdef INET
4201		struct ip	h2;
4202#endif /* INET */
4203#ifdef INET6
4204		struct ip6_hdr	h2_6;
4205		int		terminal = 0;
4206#endif /* INET6 */
4207		int		ipoff2;
4208		int		off2;
4209
4210		pd2.af = pd->af;
4211		switch (pd->af) {
4212#ifdef INET
4213		case AF_INET:
4214			/* offset of h2 in mbuf chain */
4215			ipoff2 = off + ICMP_MINLEN;
4216
4217			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4218			    NULL, NULL, pd2.af)) {
4219				DPFPRINTF(PF_DEBUG_MISC,
4220				    ("pf: ICMP error message too short "
4221				    "(ip)\n"));
4222				return (PF_DROP);
4223			}
4224			/*
4225			 * ICMP error messages don't refer to non-first
4226			 * fragments
4227			 */
4228			if (h2.ip_off & htons(IP_OFFMASK))
4229				return (PF_DROP);
4230
4231			/* offset of protocol header that follows h2 */
4232			off2 = ipoff2 + (h2.ip_hl << 2);
4233
4234			pd2.proto = h2.ip_p;
4235			pd2.src = (struct pf_addr *)&h2.ip_src;
4236			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4237			pd2.ip_sum = &h2.ip_sum;
4238			break;
4239#endif /* INET */
4240#ifdef INET6
4241		case AF_INET6:
4242			ipoff2 = off + sizeof(struct icmp6_hdr);
4243
4244			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4245			    NULL, NULL, pd2.af)) {
4246				DPFPRINTF(PF_DEBUG_MISC,
4247				    ("pf: ICMP error message too short "
4248				    "(ip6)\n"));
4249				return (PF_DROP);
4250			}
4251			pd2.proto = h2_6.ip6_nxt;
4252			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4253			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4254			pd2.ip_sum = NULL;
4255			off2 = ipoff2 + sizeof(h2_6);
4256			do {
4257				switch (pd2.proto) {
4258				case IPPROTO_FRAGMENT:
4259					/*
4260					 * ICMPv6 error messages for
4261					 * non-first fragments
4262					 */
4263					return (PF_DROP);
4264				case IPPROTO_AH:
4265				case IPPROTO_HOPOPTS:
4266				case IPPROTO_ROUTING:
4267				case IPPROTO_DSTOPTS: {
4268					/* get next header and header length */
4269					struct ip6_ext opt6;
4270
4271					if (!pf_pull_hdr(m, off2, &opt6,
4272					    sizeof(opt6), NULL, NULL, pd2.af)) {
4273						DPFPRINTF(PF_DEBUG_MISC,
4274						    ("pf: ICMPv6 short opt\n"));
4275						return (PF_DROP);
4276					}
4277					if (pd2.proto == IPPROTO_AH)
4278						off2 += (opt6.ip6e_len + 2) * 4;
4279					else
4280						off2 += (opt6.ip6e_len + 1) * 8;
4281					pd2.proto = opt6.ip6e_nxt;
4282					/* goto the next header */
4283					break;
4284				}
4285				default:
4286					terminal++;
4287					break;
4288				}
4289			} while (!terminal);
4290			break;
4291#endif /* INET6 */
4292		}
4293
4294		switch (pd2.proto) {
4295		case IPPROTO_TCP: {
4296			struct tcphdr		 th;
4297			u_int32_t		 seq;
4298			struct pf_tree_node	 key;
4299			struct pf_state_peer	*src, *dst;
4300			u_int8_t		 dws;
4301
4302			/*
4303			 * Only the first 8 bytes of the TCP header can be
4304			 * expected. Don't access any TCP header fields after
4305			 * th_seq, an ackskew test is not possible.
4306			 */
4307			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) {
4308				DPFPRINTF(PF_DEBUG_MISC,
4309				    ("pf: ICMP error message too short "
4310				    "(tcp)\n"));
4311				return (PF_DROP);
4312			}
4313
4314			key.af = pd2.af;
4315			key.proto = IPPROTO_TCP;
4316			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4317			key.port[0] = th.th_dport;
4318			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4319			key.port[1] = th.th_sport;
4320
4321			STATE_LOOKUP();
4322
4323			if (direction == (*state)->direction) {
4324				src = &(*state)->dst;
4325				dst = &(*state)->src;
4326			} else {
4327				src = &(*state)->src;
4328				dst = &(*state)->dst;
4329			}
4330
4331			if (src->wscale && dst->wscale && !(th.th_flags & TH_SYN))
4332				dws = dst->wscale & PF_WSCALE_MASK;
4333			else
4334				dws = 0;
4335
4336			/* Demodulate sequence number */
4337			seq = ntohl(th.th_seq) - src->seqdiff;
4338			if (src->seqdiff)
4339				pf_change_a(&th.th_seq, &th.th_sum,
4340				    htonl(seq), 0);
4341
4342			if (!SEQ_GEQ(src->seqhi, seq) ||
4343			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4344				if (pf_status.debug >= PF_DEBUG_MISC) {
4345					printf("pf: BAD ICMP %d:%d ",
4346					    icmptype, pd->hdr.icmp->icmp_code);
4347					pf_print_host(pd->src, 0, pd->af);
4348					printf(" -> ");
4349					pf_print_host(pd->dst, 0, pd->af);
4350					printf(" state: ");
4351					pf_print_state(*state);
4352					printf(" seq=%u\n", seq);
4353				}
4354				return (PF_DROP);
4355			}
4356
4357			if (STATE_TRANSLATE(*state)) {
4358				if (direction == PF_IN) {
4359					pf_change_icmp(pd2.src, &th.th_sport,
4360					    saddr, &(*state)->lan.addr,
4361					    (*state)->lan.port, NULL,
4362					    pd2.ip_sum, icmpsum,
4363					    pd->ip_sum, 0, pd2.af);
4364				} else {
4365					pf_change_icmp(pd2.dst, &th.th_dport,
4366					    saddr, &(*state)->gwy.addr,
4367					    (*state)->gwy.port, NULL,
4368					    pd2.ip_sum, icmpsum,
4369					    pd->ip_sum, 0, pd2.af);
4370				}
4371				switch (pd2.af) {
4372#ifdef INET
4373				case AF_INET:
4374					m_copyback(m, off, ICMP_MINLEN,
4375					    (caddr_t)pd->hdr.icmp);
4376					m_copyback(m, ipoff2, sizeof(h2),
4377					    (caddr_t)&h2);
4378					break;
4379#endif /* INET */
4380#ifdef INET6
4381				case AF_INET6:
4382					m_copyback(m, off,
4383					    sizeof(struct icmp6_hdr),
4384					    (caddr_t)pd->hdr.icmp6);
4385					m_copyback(m, ipoff2, sizeof(h2_6),
4386					    (caddr_t)&h2_6);
4387					break;
4388#endif /* INET6 */
4389				}
4390				m_copyback(m, off2, 8, (caddr_t)&th);
4391			} else if (src->seqdiff) {
4392				m_copyback(m, off2, 8, (caddr_t)&th);
4393			}
4394
4395			return (PF_PASS);
4396			break;
4397		}
4398		case IPPROTO_UDP: {
4399			struct udphdr		uh;
4400			struct pf_tree_node	key;
4401
4402			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4403			    NULL, NULL, pd2.af)) {
4404				DPFPRINTF(PF_DEBUG_MISC,
4405				    ("pf: ICMP error message too short "
4406				    "(udp)\n"));
4407				return (PF_DROP);
4408			}
4409
4410			key.af = pd2.af;
4411			key.proto = IPPROTO_UDP;
4412			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4413			key.port[0] = uh.uh_dport;
4414			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4415			key.port[1] = uh.uh_sport;
4416
4417			STATE_LOOKUP();
4418
4419			if (STATE_TRANSLATE(*state)) {
4420				if (direction == PF_IN) {
4421					pf_change_icmp(pd2.src, &uh.uh_sport,
4422					    daddr, &(*state)->lan.addr,
4423					    (*state)->lan.port, &uh.uh_sum,
4424					    pd2.ip_sum, icmpsum,
4425					    pd->ip_sum, 1, pd2.af);
4426				} else {
4427					pf_change_icmp(pd2.dst, &uh.uh_dport,
4428					    saddr, &(*state)->gwy.addr,
4429					    (*state)->gwy.port, &uh.uh_sum,
4430					    pd2.ip_sum, icmpsum,
4431					    pd->ip_sum, 1, pd2.af);
4432				}
4433				switch (pd2.af) {
4434#ifdef INET
4435				case AF_INET:
4436					m_copyback(m, off, ICMP_MINLEN,
4437					    (caddr_t)pd->hdr.icmp);
4438					m_copyback(m, ipoff2, sizeof(h2),
4439					    (caddr_t)&h2);
4440					break;
4441#endif /* INET */
4442#ifdef INET6
4443				case AF_INET6:
4444					m_copyback(m, off,
4445					    sizeof(struct icmp6_hdr),
4446					    (caddr_t)pd->hdr.icmp6);
4447					m_copyback(m, ipoff2, sizeof(h2_6),
4448					    (caddr_t)&h2_6);
4449					break;
4450#endif /* INET6 */
4451				}
4452				m_copyback(m, off2, sizeof(uh),
4453				    (caddr_t)&uh);
4454			}
4455
4456			return (PF_PASS);
4457			break;
4458		}
4459#ifdef INET
4460		case IPPROTO_ICMP: {
4461			struct icmp		iih;
4462			struct pf_tree_node	key;
4463
4464			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4465			    NULL, NULL, pd2.af)) {
4466				DPFPRINTF(PF_DEBUG_MISC,
4467				    ("pf: ICMP error message too short i"
4468				    "(icmp)\n"));
4469				return (PF_DROP);
4470			}
4471
4472			key.af = pd2.af;
4473			key.proto = IPPROTO_ICMP;
4474			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4475			key.port[0] = iih.icmp_id;
4476			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4477			key.port[1] = iih.icmp_id;
4478
4479			STATE_LOOKUP();
4480
4481			if (STATE_TRANSLATE(*state)) {
4482				if (direction == PF_IN) {
4483					pf_change_icmp(pd2.src, &iih.icmp_id,
4484					    daddr, &(*state)->lan.addr,
4485					    (*state)->lan.port, NULL,
4486					    pd2.ip_sum, icmpsum,
4487					    pd->ip_sum, 0, AF_INET);
4488				} else {
4489					pf_change_icmp(pd2.dst, &iih.icmp_id,
4490					    saddr, &(*state)->gwy.addr,
4491					    (*state)->gwy.port, NULL,
4492					    pd2.ip_sum, icmpsum,
4493					    pd->ip_sum, 0, AF_INET);
4494				}
4495				m_copyback(m, off, ICMP_MINLEN,
4496				    (caddr_t)pd->hdr.icmp);
4497				m_copyback(m, ipoff2, sizeof(h2),
4498				    (caddr_t)&h2);
4499				m_copyback(m, off2, ICMP_MINLEN,
4500				    (caddr_t)&iih);
4501			}
4502
4503			return (PF_PASS);
4504			break;
4505		}
4506#endif /* INET */
4507#ifdef INET6
4508		case IPPROTO_ICMPV6: {
4509			struct icmp6_hdr	iih;
4510			struct pf_tree_node	key;
4511
4512			if (!pf_pull_hdr(m, off2, &iih,
4513			    sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) {
4514				DPFPRINTF(PF_DEBUG_MISC,
4515				    ("pf: ICMP error message too short "
4516				    "(icmp6)\n"));
4517				return (PF_DROP);
4518			}
4519
4520			key.af = pd2.af;
4521			key.proto = IPPROTO_ICMPV6;
4522			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4523			key.port[0] = iih.icmp6_id;
4524			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4525			key.port[1] = iih.icmp6_id;
4526
4527			STATE_LOOKUP();
4528
4529			if (STATE_TRANSLATE(*state)) {
4530				if (direction == PF_IN) {
4531					pf_change_icmp(pd2.src, &iih.icmp6_id,
4532					    daddr, &(*state)->lan.addr,
4533					    (*state)->lan.port, NULL,
4534					    pd2.ip_sum, icmpsum,
4535					    pd->ip_sum, 0, AF_INET6);
4536				} else {
4537					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4538					    saddr, &(*state)->gwy.addr,
4539					    (*state)->gwy.port, NULL,
4540					    pd2.ip_sum, icmpsum,
4541					    pd->ip_sum, 0, AF_INET6);
4542				}
4543				m_copyback(m, off, sizeof(struct icmp6_hdr),
4544				    (caddr_t)pd->hdr.icmp6);
4545				m_copyback(m, ipoff2, sizeof(h2_6),
4546				    (caddr_t)&h2_6);
4547				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4548				    (caddr_t)&iih);
4549			}
4550
4551			return (PF_PASS);
4552			break;
4553		}
4554#endif /* INET6 */
4555		default: {
4556			struct pf_tree_node	key;
4557
4558			key.af = pd2.af;
4559			key.proto = pd2.proto;
4560			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4561			key.port[0] = 0;
4562			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4563			key.port[1] = 0;
4564
4565			STATE_LOOKUP();
4566
4567			if (STATE_TRANSLATE(*state)) {
4568				if (direction == PF_IN) {
4569					pf_change_icmp(pd2.src, NULL,
4570					    daddr, &(*state)->lan.addr,
4571					    0, NULL,
4572					    pd2.ip_sum, icmpsum,
4573					    pd->ip_sum, 0, pd2.af);
4574				} else {
4575					pf_change_icmp(pd2.dst, NULL,
4576					    saddr, &(*state)->gwy.addr,
4577					    0, NULL,
4578					    pd2.ip_sum, icmpsum,
4579					    pd->ip_sum, 0, pd2.af);
4580				}
4581				switch (pd2.af) {
4582#ifdef INET
4583				case AF_INET:
4584					m_copyback(m, off, ICMP_MINLEN,
4585					    (caddr_t)pd->hdr.icmp);
4586					m_copyback(m, ipoff2, sizeof(h2),
4587					    (caddr_t)&h2);
4588					break;
4589#endif /* INET */
4590#ifdef INET6
4591				case AF_INET6:
4592					m_copyback(m, off,
4593					    sizeof(struct icmp6_hdr),
4594					    (caddr_t)pd->hdr.icmp6);
4595					m_copyback(m, ipoff2, sizeof(h2_6),
4596					    (caddr_t)&h2_6);
4597					break;
4598#endif /* INET6 */
4599				}
4600			}
4601
4602			return (PF_PASS);
4603			break;
4604		}
4605		}
4606	}
4607}
4608
4609int
4610pf_test_state_other(struct pf_state **state, int direction, struct ifnet *ifp,
4611    struct pf_pdesc *pd)
4612{
4613	struct pf_state_peer	*src, *dst;
4614	struct pf_tree_node	 key;
4615	int			dirndx;
4616
4617	key.af = pd->af;
4618	key.proto = pd->proto;
4619	PF_ACPY(&key.addr[0], pd->src, key.af);
4620	PF_ACPY(&key.addr[1], pd->dst, key.af);
4621	key.port[0] = 0;
4622	key.port[1] = 0;
4623
4624	STATE_LOOKUP();
4625
4626	if (direction == (*state)->direction) {
4627		src = &(*state)->src;
4628		dst = &(*state)->dst;
4629		dirndx = 0;
4630	} else {
4631		src = &(*state)->dst;
4632		dst = &(*state)->src;
4633		dirndx = 1;
4634	}
4635
4636	(*state)->packets[dirndx]++;
4637	(*state)->bytes[dirndx] += pd->tot_len;
4638
4639	/* update states */
4640	if (src->state < PFOTHERS_SINGLE)
4641		src->state = PFOTHERS_SINGLE;
4642	if (dst->state == PFOTHERS_SINGLE)
4643		dst->state = PFOTHERS_MULTIPLE;
4644
4645	/* update expire time */
4646#if defined(__FreeBSD__)
4647	(*state)->expire = time_second;
4648#else
4649	(*state)->expire = time.tv_sec;
4650#endif
4651	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4652		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4653	else
4654		(*state)->timeout = PFTM_OTHER_SINGLE;
4655
4656	/* translate source/destination address, if necessary */
4657	if (STATE_TRANSLATE(*state)) {
4658		if (direction == PF_OUT)
4659			switch (pd->af) {
4660#ifdef INET
4661			case AF_INET:
4662				pf_change_a(&pd->src->v4.s_addr,
4663				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
4664				    0);
4665				break;
4666#endif /* INET */
4667#ifdef INET6
4668			case AF_INET6:
4669				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
4670				break;
4671#endif /* INET6 */
4672			}
4673		else
4674			switch (pd->af) {
4675#ifdef INET
4676			case AF_INET:
4677				pf_change_a(&pd->dst->v4.s_addr,
4678				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
4679				    0);
4680				break;
4681#endif /* INET */
4682#ifdef INET6
4683			case AF_INET6:
4684				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
4685				break;
4686#endif /* INET6 */
4687			}
4688	}
4689
4690	(*state)->rule.ptr->packets++;
4691	(*state)->rule.ptr->bytes += pd->tot_len;
4692	if ((*state)->nat_rule.ptr != NULL) {
4693		(*state)->nat_rule.ptr->packets++;
4694		(*state)->nat_rule.ptr->bytes += pd->tot_len;
4695	}
4696	if ((*state)->anchor.ptr != NULL) {
4697		(*state)->anchor.ptr->packets++;
4698		(*state)->anchor.ptr->bytes += pd->tot_len;
4699	}
4700	return (PF_PASS);
4701}
4702
4703/*
4704 * ipoff and off are measured from the start of the mbuf chain.
4705 * h must be at "ipoff" on the mbuf chain.
4706 */
4707void *
4708pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
4709    u_short *actionp, u_short *reasonp, sa_family_t af)
4710{
4711	switch (af) {
4712#ifdef INET
4713	case AF_INET: {
4714		struct ip	*h = mtod(m, struct ip *);
4715		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
4716
4717		if (fragoff) {
4718			if (fragoff >= len)
4719				ACTION_SET(actionp, PF_PASS);
4720			else {
4721				ACTION_SET(actionp, PF_DROP);
4722				REASON_SET(reasonp, PFRES_FRAG);
4723			}
4724			return (NULL);
4725		}
4726		if (m->m_pkthdr.len < off + len || ntohs(h->ip_len) < off + len) {
4727			ACTION_SET(actionp, PF_DROP);
4728			REASON_SET(reasonp, PFRES_SHORT);
4729			return (NULL);
4730		}
4731		break;
4732	}
4733#endif /* INET */
4734#ifdef INET6
4735	case AF_INET6: {
4736		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
4737
4738		if (m->m_pkthdr.len < off + len ||
4739		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
4740		    (unsigned)(off + len)) {
4741			ACTION_SET(actionp, PF_DROP);
4742			REASON_SET(reasonp, PFRES_SHORT);
4743			return (NULL);
4744		}
4745		break;
4746	}
4747#endif /* INET6 */
4748	}
4749	m_copydata(m, off, len, p);
4750	return (p);
4751}
4752
4753int
4754pf_routable(struct pf_addr *addr, sa_family_t af)
4755{
4756	struct sockaddr_in	*dst;
4757	struct route		 ro;
4758	int			 ret = 0;
4759
4760	bzero(&ro, sizeof(ro));
4761	dst = satosin(&ro.ro_dst);
4762	dst->sin_family = af;
4763	dst->sin_len = sizeof(*dst);
4764	dst->sin_addr = addr->v4;
4765#if defined(__FreeBSD__)
4766#ifdef RTF_PRCLONING
4767	rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING));
4768#else /* !RTF_PRCLONING */
4769	rtalloc_ign(&ro, RTF_CLONING);
4770#endif
4771#else /* ! __FreeBSD__ */
4772	rtalloc_noclone(&ro, NO_CLONING);
4773#endif
4774
4775	if (ro.ro_rt != NULL) {
4776		ret = 1;
4777		RTFREE(ro.ro_rt);
4778	}
4779
4780	return (ret);
4781}
4782
4783#ifdef INET
4784
4785#if defined(__FreeBSD__) && (__FreeBSD_version < 501105)
4786int
4787ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
4788	    u_long if_hwassist_flags, int sw_csum)
4789{
4790	int error = 0;
4791	int hlen = ip->ip_hl << 2;
4792	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
4793	int off;
4794	struct mbuf *m0 = *m_frag;	/* the original packet		*/
4795	int firstlen;
4796	struct mbuf **mnext;
4797	int nfrags;
4798
4799	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
4800		ipstat.ips_cantfrag++;
4801		return EMSGSIZE;
4802	}
4803
4804	/*
4805	 * Must be able to put at least 8 bytes per fragment.
4806	 */
4807	if (len < 8)
4808		return EMSGSIZE;
4809
4810	/*
4811	 * If the interface will not calculate checksums on
4812	 * fragmented packets, then do it here.
4813	 */
4814	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
4815	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
4816		in_delayed_cksum(m0);
4817		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
4818	}
4819
4820	if (len > PAGE_SIZE) {
4821		/*
4822		 * Fragment large datagrams such that each segment
4823		 * contains a multiple of PAGE_SIZE amount of data,
4824		 * plus headers. This enables a receiver to perform
4825		 * page-flipping zero-copy optimizations.
4826		 *
4827		 * XXX When does this help given that sender and receiver
4828		 * could have different page sizes, and also mtu could
4829		 * be less than the receiver's page size ?
4830		 */
4831		int newlen;
4832		struct mbuf *m;
4833
4834		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
4835			off += m->m_len;
4836
4837		/*
4838		 * firstlen (off - hlen) must be aligned on an
4839		 * 8-byte boundary
4840		 */
4841		if (off < hlen)
4842			goto smart_frag_failure;
4843		off = ((off - hlen) & ~7) + hlen;
4844		newlen = (~PAGE_MASK) & mtu;
4845		if ((newlen + sizeof (struct ip)) > mtu) {
4846			/* we failed, go back the default */
4847smart_frag_failure:
4848			newlen = len;
4849			off = hlen + len;
4850		}
4851		len = newlen;
4852
4853	} else {
4854		off = hlen + len;
4855	}
4856
4857	firstlen = off - hlen;
4858	mnext = &m0->m_nextpkt;		/* pointer to next packet */
4859
4860	/*
4861	 * Loop through length of segment after first fragment,
4862	 * make new header and copy data of each part and link onto chain.
4863	 * Here, m0 is the original packet, m is the fragment being created.
4864	 * The fragments are linked off the m_nextpkt of the original
4865	 * packet, which after processing serves as the first fragment.
4866	 */
4867	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
4868		struct ip *mhip;	/* ip header on the fragment */
4869		struct mbuf *m;
4870		int mhlen = sizeof (struct ip);
4871
4872		MGETHDR(m, M_DONTWAIT, MT_HEADER);
4873		if (m == 0) {
4874			error = ENOBUFS;
4875			ipstat.ips_odropped++;
4876			goto done;
4877		}
4878		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
4879		/*
4880		 * In the first mbuf, leave room for the link header, then
4881		 * copy the original IP header including options. The payload
4882		 * goes into an additional mbuf chain returned by m_copy().
4883		 */
4884		m->m_data += max_linkhdr;
4885		mhip = mtod(m, struct ip *);
4886		*mhip = *ip;
4887		if (hlen > sizeof (struct ip)) {
4888			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
4889			mhip->ip_v = IPVERSION;
4890			mhip->ip_hl = mhlen >> 2;
4891		}
4892		m->m_len = mhlen;
4893		/* XXX do we need to add ip->ip_off below ? */
4894		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
4895		if (off + len >= ip->ip_len) {	/* last fragment */
4896			len = ip->ip_len - off;
4897			m->m_flags |= M_LASTFRAG;
4898		} else
4899			mhip->ip_off |= IP_MF;
4900		mhip->ip_len = htons((u_short)(len + mhlen));
4901		m->m_next = m_copy(m0, off, len);
4902		if (m->m_next == 0) {		/* copy failed */
4903			m_free(m);
4904			error = ENOBUFS;	/* ??? */
4905			ipstat.ips_odropped++;
4906			goto done;
4907		}
4908		m->m_pkthdr.len = mhlen + len;
4909		m->m_pkthdr.rcvif = (struct ifnet *)0;
4910#ifdef MAC
4911		mac_create_fragment(m0, m);
4912#endif
4913		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
4914		mhip->ip_off = htons(mhip->ip_off);
4915		mhip->ip_sum = 0;
4916		if (sw_csum & CSUM_DELAY_IP)
4917			mhip->ip_sum = in_cksum(m, mhlen);
4918		*mnext = m;
4919		mnext = &m->m_nextpkt;
4920	}
4921	ipstat.ips_ofragments += nfrags;
4922
4923	/* set first marker for fragment chain */
4924	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
4925	m0->m_pkthdr.csum_data = nfrags;
4926
4927	/*
4928	 * Update first fragment by trimming what's been copied out
4929	 * and updating header.
4930	 */
4931	m_adj(m0, hlen + firstlen - ip->ip_len);
4932	m0->m_pkthdr.len = hlen + firstlen;
4933	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
4934	ip->ip_off |= IP_MF;
4935	ip->ip_off = htons(ip->ip_off);
4936	ip->ip_sum = 0;
4937	if (sw_csum & CSUM_DELAY_IP)
4938		ip->ip_sum = in_cksum(m0, hlen);
4939
4940done:
4941	*m_frag = m0;
4942	return error;
4943}
4944#endif /* __FreeBSD__ && __FreeBSD_version > 501105 */
4945
4946void
4947pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
4948    struct pf_state *s)
4949{
4950	struct mbuf		*m0, *m1;
4951	struct route		 iproute;
4952	struct route		*ro;
4953	struct sockaddr_in	*dst;
4954	struct ip		*ip;
4955	struct ifnet		*ifp = NULL;
4956	struct m_tag		*mtag;
4957	struct pf_addr		 naddr;
4958	int			 error = 0;
4959#if defined(__FreeBSD__)
4960	int sw_csum;
4961#endif
4962
4963	if (m == NULL || *m == NULL || r == NULL ||
4964	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
4965		panic("pf_route: invalid parameters");
4966
4967	if (r->rt == PF_DUPTO) {
4968		m0 = *m;
4969		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
4970		if (mtag == NULL) {
4971			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
4972			if (mtag == NULL)
4973				goto bad;
4974			m_tag_prepend(m0, mtag);
4975		}
4976#if defined(__FreeBSD__)
4977		m0 = m_dup(*m, M_DONTWAIT);
4978#else
4979		m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT);
4980#endif
4981		if (m0 == NULL)
4982			return;
4983	} else {
4984		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
4985			return;
4986		m0 = *m;
4987	}
4988
4989	if (m0->m_len < sizeof(struct ip))
4990		panic("pf_route: m0->m_len < sizeof(struct ip)");
4991	ip = mtod(m0, struct ip *);
4992
4993	ro = &iproute;
4994	bzero((caddr_t)ro, sizeof(*ro));
4995	dst = satosin(&ro->ro_dst);
4996	dst->sin_family = AF_INET;
4997	dst->sin_len = sizeof(*dst);
4998	dst->sin_addr = ip->ip_dst;
4999
5000	if (r->rt == PF_FASTROUTE) {
5001		rtalloc(ro);
5002		if (ro->ro_rt == 0) {
5003			ipstat.ips_noroute++;
5004			goto bad;
5005		}
5006
5007		ifp = ro->ro_rt->rt_ifp;
5008		ro->ro_rt->rt_use++;
5009
5010		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5011			dst = satosin(ro->ro_rt->rt_gateway);
5012	} else {
5013		if (TAILQ_EMPTY(&r->rpool.list))
5014			panic("pf_route: TAILQ_EMPTY(&r->rpool.list)");
5015		if (s == NULL) {
5016			pf_map_addr(AF_INET, &r->rpool,
5017			    (struct pf_addr *)&ip->ip_src,
5018			    &naddr, NULL);
5019			if (!PF_AZERO(&naddr, AF_INET))
5020				dst->sin_addr.s_addr = naddr.v4.s_addr;
5021			ifp = r->rpool.cur->ifp;
5022		} else {
5023			if (!PF_AZERO(&s->rt_addr, AF_INET))
5024				dst->sin_addr.s_addr =
5025				    s->rt_addr.v4.s_addr;
5026			ifp = s->rt_ifp;
5027		}
5028	}
5029
5030	if (ifp == NULL)
5031		goto bad;
5032
5033	mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5034	if (mtag == NULL) {
5035		struct m_tag *mtag;
5036
5037		mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5038		if (mtag == NULL)
5039			goto bad;
5040		m_tag_prepend(m0, mtag);
5041	}
5042
5043	if (oifp != ifp && mtag == NULL) {
5044#if defined(__FreeBSD__)
5045		PF_UNLOCK();
5046		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) {
5047			PF_LOCK();
5048			goto bad;
5049		} else if (m0 == NULL) {
5050			PF_LOCK();
5051			goto done;
5052		}
5053		PF_LOCK();
5054#else
5055		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS)
5056			goto bad;
5057		else if (m0 == NULL)
5058			goto done;
5059#endif
5060		if (m0->m_len < sizeof(struct ip))
5061			panic("pf_route: m0->m_len < sizeof(struct ip)");
5062		ip = mtod(m0, struct ip *);
5063	}
5064
5065#if defined(__FreeBSD__)
5066	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5067	m0->m_pkthdr.csum_flags |= CSUM_IP;
5068	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5069	if (sw_csum & CSUM_DELAY_DATA) {
5070		/*
5071		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5072		 */
5073		NTOHS(ip->ip_len);
5074		NTOHS(ip->ip_off);	 /* XXX: needed? */
5075		in_delayed_cksum(m0);
5076		HTONS(ip->ip_len);
5077		HTONS(ip->ip_off);
5078		sw_csum &= ~CSUM_DELAY_DATA;
5079	}
5080	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5081
5082	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5083	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5084		((ip->ip_off & htons(IP_DF)) == 0))) {
5085		/*
5086		 * ip->ip_len = htons(ip->ip_len);
5087		 * ip->ip_off = htons(ip->ip_off);
5088		 */
5089		ip->ip_sum = 0;
5090		if (sw_csum & CSUM_DELAY_IP) {
5091			/* From KAME */
5092			if (ip->ip_v == IPVERSION &&
5093			    (ip->ip_hl << 2) == sizeof(*ip)) {
5094				ip->ip_sum = in_cksum_hdr(ip);
5095			} else {
5096				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5097			}
5098		}
5099		PF_UNLOCK();
5100		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5101		PF_LOCK();
5102		goto done;
5103	}
5104
5105#else
5106	/* Copied from ip_output. */
5107	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5108		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5109		    ifp->if_bridge == NULL) {
5110			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5111			ipstat.ips_outhwcsum++;
5112		} else {
5113			ip->ip_sum = 0;
5114			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5115		}
5116		/* Update relevant hardware checksum stats for TCP/UDP */
5117		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5118			tcpstat.tcps_outhwcsum++;
5119		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5120			udpstat.udps_outhwcsum++;
5121		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5122		goto done;
5123	}
5124#endif
5125	/*
5126	 * Too large for interface; fragment if possible.
5127	 * Must be able to put at least 8 bytes per fragment.
5128	 */
5129	if (ip->ip_off & htons(IP_DF)) {
5130		ipstat.ips_cantfrag++;
5131		if (r->rt != PF_DUPTO) {
5132#if defined(__FreeBSD__)
5133			/* icmp_error() expects host byte ordering */
5134			NTOHS(ip->ip_len);
5135			NTOHS(ip->ip_off);
5136			PF_UNLOCK();
5137#endif
5138			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5139			    ifp);
5140#if defined(__FreeBSD__)
5141			PF_LOCK();
5142#endif
5143			goto done;
5144		} else
5145			goto bad;
5146	}
5147
5148	m1 = m0;
5149#if defined(__FreeBSD__)
5150	/*
5151	 * XXX: is cheaper + less error prone than own function
5152	 */
5153	NTOHS(ip->ip_len);
5154	NTOHS(ip->ip_off);
5155	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5156#else
5157	error = ip_fragment(m0, ifp, ifp->if_mtu);
5158#endif
5159#if defined(__FreeBSD__)
5160	if (error)
5161#else
5162	if (error == EMSGSIZE)
5163#endif
5164		goto bad;
5165
5166	for (m0 = m1; m0; m0 = m1) {
5167		m1 = m0->m_nextpkt;
5168		m0->m_nextpkt = 0;
5169#if defined(__FreeBSD__)
5170		if (error == 0) {
5171			PF_UNLOCK();
5172			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5173			    NULL);
5174			PF_LOCK();
5175		} else
5176#else
5177		if (error == 0)
5178			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5179			    NULL);
5180		else
5181#endif
5182			m_freem(m0);
5183	}
5184
5185	if (error == 0)
5186		ipstat.ips_fragmented++;
5187
5188done:
5189	if (r->rt != PF_DUPTO)
5190		*m = NULL;
5191	if (ro == &iproute && ro->ro_rt)
5192		RTFREE(ro->ro_rt);
5193	return;
5194
5195bad:
5196	m_freem(m0);
5197	goto done;
5198}
5199#endif /* INET */
5200
5201#ifdef INET6
5202void
5203pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5204    struct pf_state *s)
5205{
5206	struct mbuf		*m0;
5207	struct m_tag		*mtag;
5208	struct route_in6	 ip6route;
5209	struct route_in6	*ro;
5210	struct sockaddr_in6	*dst;
5211	struct ip6_hdr		*ip6;
5212	struct ifnet		*ifp = NULL;
5213	struct pf_addr		 naddr;
5214	int			 error = 0;
5215
5216	if (m == NULL || *m == NULL || r == NULL ||
5217	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5218		panic("pf_route6: invalid parameters");
5219
5220	if (r->rt == PF_DUPTO) {
5221		m0 = *m;
5222		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5223		if (mtag == NULL) {
5224			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5225			if (mtag == NULL)
5226				goto bad;
5227			m_tag_prepend(m0, mtag);
5228		}
5229#if defined(__FreeBSD__)
5230		m0 = m_dup(*m, M_DONTWAIT);
5231#else
5232		m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT);
5233#endif
5234		if (m0 == NULL)
5235			return;
5236	} else {
5237		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5238			return;
5239		m0 = *m;
5240	}
5241
5242	if (m0->m_len < sizeof(struct ip6_hdr))
5243		panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5244	ip6 = mtod(m0, struct ip6_hdr *);
5245
5246	ro = &ip6route;
5247	bzero((caddr_t)ro, sizeof(*ro));
5248	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5249	dst->sin6_family = AF_INET6;
5250	dst->sin6_len = sizeof(*dst);
5251	dst->sin6_addr = ip6->ip6_dst;
5252
5253	/* Cheat. */
5254	if (r->rt == PF_FASTROUTE) {
5255		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5256		if (mtag == NULL)
5257			goto bad;
5258		m_tag_prepend(m0, mtag);
5259#if defined(__FreeBSD__)
5260		PF_UNLOCK();
5261		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5262		PF_LOCK();
5263#else
5264		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5265#endif
5266		return;
5267	}
5268
5269	if (TAILQ_EMPTY(&r->rpool.list))
5270		panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)");
5271	if (s == NULL) {
5272		pf_map_addr(AF_INET6, &r->rpool,
5273		    (struct pf_addr *)&ip6->ip6_src, &naddr, NULL);
5274		if (!PF_AZERO(&naddr, AF_INET6))
5275			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5276			    &naddr, AF_INET6);
5277		ifp = r->rpool.cur->ifp;
5278	} else {
5279		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5280			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5281			    &s->rt_addr, AF_INET6);
5282		ifp = s->rt_ifp;
5283	}
5284
5285	if (ifp == NULL)
5286		goto bad;
5287
5288	if (oifp != ifp) {
5289		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5290		if (mtag == NULL) {
5291			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5292			if (mtag == NULL)
5293				goto bad;
5294			m_tag_prepend(m0, mtag);
5295#if defined(__FreeBSD__)
5296			PF_UNLOCK();
5297			if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) {
5298				PF_LOCK();
5299				goto bad;
5300			} else if (m0 == NULL) {
5301				PF_LOCK();
5302				goto done;
5303			}
5304			PF_LOCK();
5305#else
5306			if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS)
5307				goto bad;
5308			else if (m0 == NULL)
5309				goto done;
5310#endif
5311		}
5312	}
5313
5314	/*
5315	 * If the packet is too large for the outgoing interface,
5316	 * send back an icmp6 error.
5317	 */
5318	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5319		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5320	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5321#if defined(__FreeBSD__)
5322		PF_UNLOCK();
5323#endif
5324		error = nd6_output(ifp, ifp, m0, dst, NULL);
5325#if defined(__FreeBSD__)
5326		PF_LOCK();
5327#endif
5328	} else {
5329		in6_ifstat_inc(ifp, ifs6_in_toobig);
5330#if defined(__FreeBSD__)
5331		if (r->rt != PF_DUPTO) {
5332			PF_UNLOCK();
5333			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5334			PF_LOCK();
5335		 } else
5336#else
5337		if (r->rt != PF_DUPTO)
5338			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5339		else
5340#endif
5341			goto bad;
5342	}
5343
5344done:
5345	if (r->rt != PF_DUPTO)
5346		*m = NULL;
5347	return;
5348
5349bad:
5350	m_freem(m0);
5351	goto done;
5352}
5353#endif /* INET6 */
5354
5355
5356#if defined(__FreeBSD__)
5357/*
5358 * XXX
5359 * FreeBSD supports cksum offload for the following drivers.
5360 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
5361 * If we can make full use of it we would outperform ipfw/ipfilter in
5362 * very heavy traffic.
5363 * I have not tested 'cause I don't have NICs that supports cksum offload.
5364 * (There might be problems. Typical phenomena would be
5365 *   1. No route message for UDP packet.
5366 *   2. No connection acceptance from external hosts regardless of rule set.)
5367 */
5368int
5369pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5370{
5371	u_int16_t sum = 0;
5372	int hw_assist = 0;
5373	struct ip *ip;
5374
5375	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5376		return (1);
5377	if (m->m_pkthdr.len < off + len)
5378		return (1);
5379
5380	switch (p) {
5381	case IPPROTO_TCP:
5382		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5383			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5384				sum = m->m_pkthdr.csum_data;
5385			} else {
5386				ip = mtod(m, struct ip *);
5387				sum = in_pseudo(ip->ip_src.s_addr,
5388					ip->ip_dst.s_addr,
5389					htonl(m->m_pkthdr.csum_data +
5390					    IPPROTO_TCP) + ip->ip_len);
5391			}
5392			sum ^= 0xffff;
5393			++hw_assist;
5394		}
5395		break;
5396	case IPPROTO_UDP:
5397		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5398			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5399				sum = m->m_pkthdr.csum_data;
5400			} else {
5401				ip = mtod(m, struct ip *);
5402				sum = in_pseudo(ip->ip_src.s_addr,
5403					ip->ip_dst.s_addr, htonl((u_short)len +
5404					m->m_pkthdr.csum_data + IPPROTO_UDP));
5405			}
5406			sum ^= 0xffff;
5407			++hw_assist;
5408                }
5409		break;
5410	case IPPROTO_ICMP:
5411#ifdef INET6
5412	case IPPROTO_ICMPV6:
5413#endif /* INET6 */
5414		break;
5415	default:
5416		return (1);
5417	}
5418
5419	if (!hw_assist) {
5420		switch (af) {
5421		case AF_INET:
5422			if (p == IPPROTO_ICMP) {
5423				if (m->m_len < off)
5424					return (1);
5425				m->m_data += off;
5426				m->m_len -= off;
5427				sum = in_cksum(m, len);
5428				m->m_data -= off;
5429				m->m_len += off;
5430			} else {
5431				if (m->m_len < sizeof(struct ip))
5432					return (1);
5433				sum = in4_cksum(m, p, off, len);
5434				if (sum == 0) {
5435					m->m_pkthdr.csum_flags |=
5436					    (CSUM_DATA_VALID |
5437					     CSUM_PSEUDO_HDR);
5438					m->m_pkthdr.csum_data = 0xffff;
5439				}
5440			}
5441			break;
5442#ifdef INET6
5443		case AF_INET6:
5444			if (m->m_len < sizeof(struct ip6_hdr))
5445				return (1);
5446			sum = in6_cksum(m, p, off, len);
5447			/*
5448			 * XXX
5449			 * IPv6 H/W cksum off-load not supported yet!
5450			 *
5451			 * if (sum == 0) {
5452			 *	m->m_pkthdr.csum_flags |=
5453			 *	    (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
5454			 *	m->m_pkthdr.csum_data = 0xffff;
5455			 *}
5456			 */
5457			break;
5458#endif /* INET6 */
5459		default:
5460			return (1);
5461		}
5462	}
5463	if (sum) {
5464		switch (p) {
5465		case IPPROTO_TCP:
5466			tcpstat.tcps_rcvbadsum++;
5467			break;
5468		case IPPROTO_UDP:
5469			udpstat.udps_badsum++;
5470			break;
5471		case IPPROTO_ICMP:
5472			icmpstat.icps_checksum++;
5473			break;
5474#ifdef INET6
5475		case IPPROTO_ICMPV6:
5476			icmp6stat.icp6s_checksum++;
5477			break;
5478#endif /* INET6 */
5479		}
5480		return (1);
5481	}
5482	return (0);
5483}
5484#else
5485/*
5486 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5487 *   off is the offset where the protocol header starts
5488 *   len is the total length of protocol header plus payload
5489 * returns 0 when the checksum is valid, otherwise returns 1.
5490 */
5491int
5492pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5493{
5494	u_int16_t flag_ok, flag_bad;
5495	u_int16_t sum;
5496
5497	switch (p) {
5498	case IPPROTO_TCP:
5499		flag_ok = M_TCP_CSUM_IN_OK;
5500		flag_bad = M_TCP_CSUM_IN_BAD;
5501		break;
5502	case IPPROTO_UDP:
5503		flag_ok = M_UDP_CSUM_IN_OK;
5504		flag_bad = M_UDP_CSUM_IN_BAD;
5505		break;
5506	case IPPROTO_ICMP:
5507#ifdef INET6
5508	case IPPROTO_ICMPV6:
5509#endif /* INET6 */
5510		flag_ok = flag_bad = 0;
5511		break;
5512	default:
5513		return (1);
5514	}
5515	if (m->m_pkthdr.csum & flag_ok)
5516		return (0);
5517	if (m->m_pkthdr.csum & flag_bad)
5518		return (1);
5519	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5520		return (1);
5521	if (m->m_pkthdr.len < off + len)
5522		return (1);
5523		switch (af) {
5524	case AF_INET:
5525		if (p == IPPROTO_ICMP) {
5526			if (m->m_len < off)
5527				return (1);
5528			m->m_data += off;
5529			m->m_len -= off;
5530			sum = in_cksum(m, len);
5531			m->m_data -= off;
5532			m->m_len += off;
5533		} else {
5534			if (m->m_len < sizeof(struct ip))
5535				return (1);
5536			sum = in4_cksum(m, p, off, len);
5537		}
5538		break;
5539#ifdef INET6
5540	case AF_INET6:
5541		if (m->m_len < sizeof(struct ip6_hdr))
5542			return (1);
5543		sum = in6_cksum(m, p, off, len);
5544		break;
5545#endif /* INET6 */
5546	default:
5547		return (1);
5548	}
5549	if (sum) {
5550		m->m_pkthdr.csum |= flag_bad;
5551		switch (p) {
5552		case IPPROTO_TCP:
5553			tcpstat.tcps_rcvbadsum++;
5554			break;
5555		case IPPROTO_UDP:
5556			udpstat.udps_badsum++;
5557			break;
5558		case IPPROTO_ICMP:
5559			icmpstat.icps_checksum++;
5560			break;
5561#ifdef INET6
5562		case IPPROTO_ICMPV6:
5563			icmp6stat.icp6s_checksum++;
5564			break;
5565#endif /* INET6 */
5566		}
5567		return (1);
5568	}
5569	m->m_pkthdr.csum |= flag_ok;
5570	return (0);
5571}
5572#endif
5573
5574#ifdef INET
5575int
5576pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
5577{
5578	u_short		   action, reason = 0, log = 0;
5579	struct mbuf	  *m = *m0;
5580	struct ip	  *h;
5581	struct pf_rule	  *a = NULL, *r = &pf_default_rule, *tr;
5582	struct pf_state	  *s = NULL;
5583	struct pf_ruleset *ruleset = NULL;
5584	struct pf_pdesc	   pd;
5585	int		   off;
5586	int		   pqid = 0;
5587
5588#if defined(__FreeBSD__)
5589	PF_LOCK();
5590#endif
5591	if (!pf_status.running ||
5592	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5593#if defined(__FreeBSD__)
5594		PF_UNLOCK();
5595#endif
5596	    	return (PF_PASS);
5597	}
5598
5599#if defined(__FreeBSD__) && (__FreeBSD_version >= 501000)
5600	M_ASSERTPKTHDR(m);
5601#else
5602#ifdef DIAGNOSTIC
5603	if ((m->m_flags & M_PKTHDR) == 0)
5604		panic("non-M_PKTHDR is passed to pf_test");
5605#endif
5606#endif
5607
5608	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5609		action = PF_DROP;
5610		REASON_SET(&reason, PFRES_SHORT);
5611		log = 1;
5612		goto done;
5613	}
5614
5615	/* We do IP header normalization and packet reassembly here */
5616	if (pf_normalize_ip(m0, dir, ifp, &reason) != PF_PASS) {
5617		action = PF_DROP;
5618		goto done;
5619	}
5620	m = *m0;
5621	h = mtod(m, struct ip *);
5622
5623	off = h->ip_hl << 2;
5624	if (off < (int)sizeof(*h)) {
5625		action = PF_DROP;
5626		REASON_SET(&reason, PFRES_SHORT);
5627		log = 1;
5628		goto done;
5629	}
5630
5631	memset(&pd, 0, sizeof(pd));
5632	pd.src = (struct pf_addr *)&h->ip_src;
5633	pd.dst = (struct pf_addr *)&h->ip_dst;
5634	pd.ip_sum = &h->ip_sum;
5635	pd.proto = h->ip_p;
5636	pd.af = AF_INET;
5637	pd.tos = h->ip_tos;
5638	pd.tot_len = ntohs(h->ip_len);
5639
5640	/* handle fragments that didn't get reassembled by normalization */
5641	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5642		action = pf_test_fragment(&r, dir, ifp, m, h,
5643		    &pd, &a, &ruleset);
5644		goto done;
5645	}
5646
5647	switch (h->ip_p) {
5648
5649	case IPPROTO_TCP: {
5650		struct tcphdr	th;
5651
5652		pd.hdr.tcp = &th;
5653		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5654		    &action, &reason, AF_INET)) {
5655			log = action != PF_PASS;
5656			goto done;
5657		}
5658		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5659		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
5660			action = PF_DROP;
5661			goto done;
5662		}
5663		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5664		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5665			pqid = 1;
5666		action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &pd);
5667		if (action == PF_DROP)
5668			break;
5669		action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h, &pd,
5670		    &reason);
5671		if (action == PF_PASS) {
5672			r = s->rule.ptr;
5673			log = s->log;
5674		} else if (s == NULL)
5675			action = pf_test_tcp(&r, &s, dir, ifp,
5676			    m, 0, off, h, &pd, &a, &ruleset);
5677		break;
5678	}
5679
5680	case IPPROTO_UDP: {
5681		struct udphdr	uh;
5682
5683		pd.hdr.udp = &uh;
5684		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5685		    &action, &reason, AF_INET)) {
5686			log = action != PF_PASS;
5687			goto done;
5688		}
5689		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5690		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
5691			action = PF_DROP;
5692			goto done;
5693		}
5694		action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &pd);
5695		if (action == PF_PASS) {
5696			r = s->rule.ptr;
5697			a = s->anchor.ptr;
5698			log = s->log;
5699		} else if (s == NULL)
5700			action = pf_test_udp(&r, &s, dir, ifp,
5701			    m, 0, off, h, &pd, &a, &ruleset);
5702		break;
5703	}
5704
5705	case IPPROTO_ICMP: {
5706		struct icmp	ih;
5707
5708		pd.hdr.icmp = &ih;
5709		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5710		    &action, &reason, AF_INET)) {
5711			log = action != PF_PASS;
5712			goto done;
5713		}
5714		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5715		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
5716			action = PF_DROP;
5717			goto done;
5718		}
5719		action = pf_test_state_icmp(&s, dir, ifp, m, 0, off, h, &pd);
5720		if (action == PF_PASS) {
5721			r = s->rule.ptr;
5722			r->packets++;
5723			r->bytes += ntohs(h->ip_len);
5724			a = s->anchor.ptr;
5725			if (a != NULL) {
5726				a->packets++;
5727				a->bytes += ntohs(h->ip_len);
5728			}
5729			log = s->log;
5730		} else if (s == NULL)
5731			action = pf_test_icmp(&r, &s, dir, ifp,
5732			    m, 0, off, h, &pd, &a, &ruleset);
5733		break;
5734	}
5735
5736	default:
5737		action = pf_test_state_other(&s, dir, ifp, &pd);
5738		if (action == PF_PASS) {
5739			r = s->rule.ptr;
5740			a = s->anchor.ptr;
5741			log = s->log;
5742		} else if (s == NULL)
5743			action = pf_test_other(&r, &s, dir, ifp, m, off, h,
5744			    &pd, &a, &ruleset);
5745		break;
5746	}
5747
5748	if (ifp == status_ifp) {
5749		pf_status.bcounters[0][dir == PF_OUT] += pd.tot_len;
5750		pf_status.pcounters[0][dir == PF_OUT][action != PF_PASS]++;
5751	}
5752
5753done:
5754	tr = r;
5755	if (r == &pf_default_rule && s != NULL && s->nat_rule.ptr != NULL)
5756		tr = s->nat_rule.ptr;
5757	if (tr->src.addr.type == PF_ADDR_TABLE)
5758		pfr_update_stats(tr->src.addr.p.tbl,
5759		    (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af,
5760		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
5761		    tr->src.not);
5762	if (tr->dst.addr.type == PF_ADDR_TABLE)
5763		pfr_update_stats(tr->dst.addr.p.tbl,
5764		    (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af,
5765		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
5766		    tr->dst.not);
5767
5768	if (action == PF_PASS && h->ip_hl > 5 &&
5769	    !((s && s->allow_opts) || r->allow_opts)) {
5770		action = PF_DROP;
5771		REASON_SET(&reason, PFRES_SHORT);
5772		log = 1;
5773		DPFPRINTF(PF_DEBUG_MISC,
5774		    ("pf: dropping packet with ip options\n"));
5775	}
5776
5777#ifdef ALTQ
5778	if (action == PF_PASS && r->qid) {
5779		struct m_tag	*mtag;
5780		struct altq_tag	*atag;
5781
5782		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
5783		if (mtag != NULL) {
5784			atag = (struct altq_tag *)(mtag + 1);
5785			if (pqid || pd.tos == IPTOS_LOWDELAY)
5786				atag->qid = r->pqid;
5787			else
5788				atag->qid = r->qid;
5789			/* add hints for ecn */
5790			atag->af = AF_INET;
5791			atag->hdr = h;
5792			m_tag_prepend(m, mtag);
5793		}
5794	}
5795#endif
5796
5797	if (log)
5798		PFLOG_PACKET(ifp, h, m, AF_INET, dir, reason, r, a, ruleset);
5799
5800	if (action == PF_SYNPROXY_DROP) {
5801		m_freem(*m0);
5802		*m0 = NULL;
5803		action = PF_PASS;
5804	} else if (r->rt)
5805		/* pf_route can free the mbuf causing *m0 to become NULL */
5806		pf_route(m0, r, dir, ifp, s);
5807
5808#if defined(__FreeBSD__)
5809	PF_UNLOCK();
5810#endif
5811
5812	return (action);
5813}
5814#endif /* INET */
5815
5816#ifdef INET6
5817int
5818pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0)
5819{
5820	u_short		   action, reason = 0, log = 0;
5821	struct mbuf	  *m = *m0;
5822	struct ip6_hdr	  *h;
5823	struct pf_rule	  *a = NULL, *r = &pf_default_rule, *tr;
5824	struct pf_state	  *s = NULL;
5825	struct pf_ruleset *ruleset = NULL;
5826	struct pf_pdesc    pd;
5827	int		   off, terminal = 0;
5828
5829#if defined(__FreeBSD__)
5830	PF_LOCK();
5831#endif
5832
5833	if (!pf_status.running ||
5834	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5835#if defined(__FreeBSD__)
5836		PF_UNLOCK();
5837#endif
5838		return (PF_PASS);
5839	}
5840
5841#if defined(__FreeBSD__) && (__FreeBSD_version >= 501000)
5842	M_ASSERTPKTHDR(m);
5843#else
5844#ifdef DIAGNOSTIC
5845	if ((m->m_flags & M_PKTHDR) == 0)
5846		panic("non-M_PKTHDR is passed to pf_test");
5847#endif
5848#endif
5849
5850	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5851		action = PF_DROP;
5852		REASON_SET(&reason, PFRES_SHORT);
5853		log = 1;
5854		goto done;
5855	}
5856
5857	/* We do IP header normalization and packet reassembly here */
5858	if (pf_normalize_ip6(m0, dir, ifp, &reason) != PF_PASS) {
5859		action = PF_DROP;
5860		goto done;
5861	}
5862	m = *m0;
5863	h = mtod(m, struct ip6_hdr *);
5864
5865	memset(&pd, 0, sizeof(pd));
5866	pd.src = (struct pf_addr *)&h->ip6_src;
5867	pd.dst = (struct pf_addr *)&h->ip6_dst;
5868	pd.ip_sum = NULL;
5869	pd.af = AF_INET6;
5870	pd.tos = 0;
5871	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
5872
5873	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
5874	pd.proto = h->ip6_nxt;
5875	do {
5876		switch (pd.proto) {
5877		case IPPROTO_FRAGMENT:
5878			action = pf_test_fragment(&r, dir, ifp, m, h,
5879			    &pd, &a, &ruleset);
5880			if (action == PF_DROP)
5881				REASON_SET(&reason, PFRES_FRAG);
5882			goto done;
5883		case IPPROTO_AH:
5884		case IPPROTO_HOPOPTS:
5885		case IPPROTO_ROUTING:
5886		case IPPROTO_DSTOPTS: {
5887			/* get next header and header length */
5888			struct ip6_ext	opt6;
5889
5890			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
5891			    NULL, NULL, pd.af)) {
5892				DPFPRINTF(PF_DEBUG_MISC,
5893				    ("pf: IPv6 short opt\n"));
5894				action = PF_DROP;
5895				REASON_SET(&reason, PFRES_SHORT);
5896				log = 1;
5897				goto done;
5898			}
5899			if (pd.proto == IPPROTO_AH)
5900				off += (opt6.ip6e_len + 2) * 4;
5901			else
5902				off += (opt6.ip6e_len + 1) * 8;
5903			pd.proto = opt6.ip6e_nxt;
5904			/* goto the next header */
5905			break;
5906		}
5907		default:
5908			terminal++;
5909			break;
5910		}
5911	} while (!terminal);
5912
5913	switch (pd.proto) {
5914
5915	case IPPROTO_TCP: {
5916		struct tcphdr	th;
5917
5918		pd.hdr.tcp = &th;
5919		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5920		    &action, &reason, AF_INET6)) {
5921			log = action != PF_PASS;
5922			goto done;
5923		}
5924		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5925		    ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) {
5926			action = PF_DROP;
5927			goto done;
5928		}
5929		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5930		action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &pd);
5931		if (action == PF_DROP)
5932			break;
5933		action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h, &pd,
5934		    &reason);
5935		if (action == PF_PASS) {
5936			r = s->rule.ptr;
5937			log = s->log;
5938		} else if (s == NULL)
5939			action = pf_test_tcp(&r, &s, dir, ifp,
5940			    m, 0, off, h, &pd, &a, &ruleset);
5941		break;
5942	}
5943
5944	case IPPROTO_UDP: {
5945		struct udphdr	uh;
5946
5947		pd.hdr.udp = &uh;
5948		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5949		    &action, &reason, AF_INET6)) {
5950			log = action != PF_PASS;
5951			goto done;
5952		}
5953		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5954		    off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) {
5955			action = PF_DROP;
5956			goto done;
5957		}
5958		action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &pd);
5959		if (action == PF_PASS) {
5960			r = s->rule.ptr;
5961			log = s->log;
5962		} else if (s == NULL)
5963			action = pf_test_udp(&r, &s, dir, ifp,
5964			    m, 0, off, h, &pd, &a, &ruleset);
5965		break;
5966	}
5967
5968	case IPPROTO_ICMPV6: {
5969		struct icmp6_hdr	ih;
5970
5971		pd.hdr.icmp6 = &ih;
5972		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
5973		    &action, &reason, AF_INET6)) {
5974			log = action != PF_PASS;
5975			goto done;
5976		}
5977		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5978		    ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) {
5979			action = PF_DROP;
5980			goto done;
5981		}
5982		action = pf_test_state_icmp(&s, dir, ifp,
5983		    m, 0, off, h, &pd);
5984		if (action == PF_PASS) {
5985			r = s->rule.ptr;
5986			r->packets++;
5987			r->bytes += h->ip6_plen;
5988			log = s->log;
5989		} else if (s == NULL)
5990			action = pf_test_icmp(&r, &s, dir, ifp,
5991			    m, 0, off, h, &pd, &a, &ruleset);
5992		break;
5993	}
5994
5995	default:
5996		action = pf_test_other(&r, &s, dir, ifp, m, off, h,
5997		    &pd, &a, &ruleset);
5998		break;
5999	}
6000
6001	if (ifp == status_ifp) {
6002		pf_status.bcounters[1][dir == PF_OUT] += pd.tot_len;
6003		pf_status.pcounters[1][dir == PF_OUT][action != PF_PASS]++;
6004	}
6005
6006done:
6007	tr = r;
6008	if (r == &pf_default_rule && s != NULL && s->nat_rule.ptr != NULL)
6009		tr = s->nat_rule.ptr;
6010	if (tr->src.addr.type == PF_ADDR_TABLE)
6011		pfr_update_stats(tr->src.addr.p.tbl,
6012		    (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af,
6013		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6014		    tr->src.not);
6015	if (tr->dst.addr.type == PF_ADDR_TABLE)
6016		pfr_update_stats(tr->dst.addr.p.tbl,
6017		    (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af,
6018		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6019		    tr->dst.not);
6020
6021	/* XXX handle IPv6 options, if not allowed. not implemented. */
6022
6023#ifdef ALTQ
6024	if (action == PF_PASS && r->qid) {
6025		struct m_tag	*mtag;
6026		struct altq_tag	*atag;
6027
6028		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6029		if (mtag != NULL) {
6030			atag = (struct altq_tag *)(mtag + 1);
6031			if (pd.tos == IPTOS_LOWDELAY)
6032				atag->qid = r->pqid;
6033			else
6034				atag->qid = r->qid;
6035			/* add hints for ecn */
6036			atag->af = AF_INET6;
6037			atag->hdr = h;
6038			m_tag_prepend(m, mtag);
6039		}
6040	}
6041#endif
6042
6043	if (log)
6044		PFLOG_PACKET(ifp, h, m, AF_INET6, dir, reason, r, a, ruleset);
6045
6046	if (action == PF_SYNPROXY_DROP) {
6047		m_freem(*m0);
6048		*m0 = NULL;
6049		action = PF_PASS;
6050	} else if (r->rt)
6051		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6052		pf_route6(m0, r, dir, ifp, s);
6053
6054#if defined(__FreeBSD__)
6055	PF_UNLOCK();
6056#endif
6057	return (action);
6058}
6059#endif /* INET6 */
6060