pf.c revision 133574
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 133574 2004-08-12 13:59:44Z mlaier $	*/
2/*	$OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */
3/* add	$OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */
4
5/*
6 * Copyright (c) 2001 Daniel Hartmeier
7 * Copyright (c) 2002,2003 Henning Brauer
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 *    - Redistributions of source code must retain the above copyright
15 *      notice, this list of conditions and the following disclaimer.
16 *    - Redistributions in binary form must reproduce the above
17 *      copyright notice, this list of conditions and the following
18 *      disclaimer in the documentation and/or other materials provided
19 *      with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Effort sponsored in part by the Defense Advanced Research Projects
35 * Agency (DARPA) and Air Force Research Laboratory, Air Force
36 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
37 *
38 */
39
40#ifdef __FreeBSD__
41#include "opt_inet.h"
42#include "opt_inet6.h"
43#endif
44
45#ifdef __FreeBSD__
46#include "opt_bpf.h"
47#include "opt_pf.h"
48#define	NBPFILTER	DEV_BPF
49#define	NPFLOG		DEV_PFLOG
50#define	NPFSYNC		DEV_PFSYNC
51#else
52#include "bpfilter.h"
53#include "pflog.h"
54#include "pfsync.h"
55#endif
56
57#include <sys/param.h>
58#include <sys/systm.h>
59#include <sys/mbuf.h>
60#include <sys/filio.h>
61#include <sys/socket.h>
62#include <sys/socketvar.h>
63#include <sys/kernel.h>
64#include <sys/time.h>
65#ifdef __FreeBSD__
66#include <sys/sysctl.h>
67#include <sys/endian.h>
68#else
69#include <sys/pool.h>
70#endif
71
72#include <net/if.h>
73#include <net/if_types.h>
74#include <net/bpf.h>
75#include <net/route.h>
76
77#include <netinet/in.h>
78#include <netinet/in_var.h>
79#include <netinet/in_systm.h>
80#include <netinet/ip.h>
81#include <netinet/ip_var.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_seq.h>
84#include <netinet/udp.h>
85#include <netinet/ip_icmp.h>
86#include <netinet/in_pcb.h>
87#include <netinet/tcp_timer.h>
88#include <netinet/tcp_var.h>
89#include <netinet/udp_var.h>
90#include <netinet/icmp_var.h>
91
92#ifndef __FreeBSD__
93#include <dev/rndvar.h>
94#endif
95#include <net/pfvar.h>
96#include <net/if_pflog.h>
97
98#if NPFSYNC > 0
99#include <net/if_pfsync.h>
100#endif /* NPFSYNC > 0 */
101
102#ifdef INET6
103#include <netinet/ip6.h>
104#include <netinet/in_pcb.h>
105#include <netinet/icmp6.h>
106#include <netinet6/nd6.h>
107#ifdef __FreeBSD__
108#include <netinet6/ip6_var.h>
109#include <netinet6/in6_pcb.h>
110#endif
111#endif /* INET6 */
112
113#ifdef __FreeBSD__
114#include <machine/in_cksum.h>
115#include <sys/limits.h>
116#include <sys/ucred.h>
117
118extern int ip_optcopy(struct ip *, struct ip *);
119#endif
120
121#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
122
123/*
124 * Global variables
125 */
126
127struct pf_anchorqueue	 pf_anchors;
128struct pf_ruleset	 pf_main_ruleset;
129struct pf_altqqueue	 pf_altqs[2];
130struct pf_palist	 pf_pabuf;
131struct pf_altqqueue	*pf_altqs_active;
132struct pf_altqqueue	*pf_altqs_inactive;
133struct pf_status	 pf_status;
134
135u_int32_t		 ticket_altqs_active;
136u_int32_t		 ticket_altqs_inactive;
137int			 altqs_inactive_open;
138u_int32_t		 ticket_pabuf;
139
140#ifdef __FreeBSD__
141struct callout	 	 pf_expire_to;			/* expire timeout */
142#else
143struct timeout		 pf_expire_to;			/* expire timeout */
144#endif
145
146
147#ifdef __FreeBSD__
148uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
149uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
150#else
151struct pool		 pf_src_tree_pl, pf_rule_pl;
152struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
153#endif
154
155void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
156void			 pf_print_state(struct pf_state *);
157void			 pf_print_flags(u_int8_t);
158
159void			 pf_change_ap(struct pf_addr *, u_int16_t *,
160			    u_int16_t *, u_int16_t *, struct pf_addr *,
161			    u_int16_t, u_int8_t, sa_family_t);
162#ifdef INET6
163void			 pf_change_a6(struct pf_addr *, u_int16_t *,
164			    struct pf_addr *, u_int8_t);
165#endif /* INET6 */
166void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
167			    struct pf_addr *, struct pf_addr *, u_int16_t,
168			    u_int16_t *, u_int16_t *, u_int16_t *,
169			    u_int16_t *, u_int8_t, sa_family_t);
170void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
171			    const struct pf_addr *, const struct pf_addr *,
172			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
173			    u_int8_t, u_int16_t, u_int16_t, u_int8_t);
174void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
175			    sa_family_t, struct pf_rule *);
176struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
177			    int, int, struct pfi_kif *,
178			    struct pf_addr *, u_int16_t, struct pf_addr *,
179			    u_int16_t, int);
180struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
181			    int, int, struct pfi_kif *, struct pf_src_node **,
182			    struct pf_addr *, u_int16_t,
183			    struct pf_addr *, u_int16_t,
184			    struct pf_addr *, u_int16_t *);
185int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
186			    int, struct pfi_kif *, struct mbuf *, int,
187			    void *, struct pf_pdesc *, struct pf_rule **,
188			    struct pf_ruleset **);
189int			 pf_test_udp(struct pf_rule **, struct pf_state **,
190			    int, struct pfi_kif *, struct mbuf *, int,
191			    void *, struct pf_pdesc *, struct pf_rule **,
192			    struct pf_ruleset **);
193int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
194			    int, struct pfi_kif *, struct mbuf *, int,
195			    void *, struct pf_pdesc *, struct pf_rule **,
196			    struct pf_ruleset **);
197int			 pf_test_other(struct pf_rule **, struct pf_state **,
198			    int, struct pfi_kif *, struct mbuf *, int, void *,
199			    struct pf_pdesc *, struct pf_rule **,
200			    struct pf_ruleset **);
201int			 pf_test_fragment(struct pf_rule **, int,
202			    struct pfi_kif *, struct mbuf *, void *,
203			    struct pf_pdesc *, struct pf_rule **,
204			    struct pf_ruleset **);
205int			 pf_test_state_tcp(struct pf_state **, int,
206			    struct pfi_kif *, struct mbuf *, int,
207			    void *, struct pf_pdesc *, u_short *);
208int			 pf_test_state_udp(struct pf_state **, int,
209			    struct pfi_kif *, struct mbuf *, int,
210			    void *, struct pf_pdesc *);
211int			 pf_test_state_icmp(struct pf_state **, int,
212			    struct pfi_kif *, struct mbuf *, int,
213			    void *, struct pf_pdesc *);
214int			 pf_test_state_other(struct pf_state **, int,
215			    struct pfi_kif *, struct pf_pdesc *);
216struct pf_tag		*pf_get_tag(struct mbuf *);
217int			 pf_match_tag(struct mbuf *, struct pf_rule *,
218			     struct pf_rule *, struct pf_tag *, int *);
219void			 pf_hash(struct pf_addr *, struct pf_addr *,
220			    struct pf_poolhashkey *, sa_family_t);
221int			 pf_map_addr(u_int8_t, struct pf_rule *,
222			    struct pf_addr *, struct pf_addr *,
223			    struct pf_addr *, struct pf_src_node **);
224int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
225			    struct pf_addr *, struct pf_addr *, u_int16_t,
226			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
227			    struct pf_src_node **);
228void			 pf_route(struct mbuf **, struct pf_rule *, int,
229			    struct ifnet *, struct pf_state *);
230void			 pf_route6(struct mbuf **, struct pf_rule *, int,
231			    struct ifnet *, struct pf_state *);
232int			 pf_socket_lookup(uid_t *, gid_t *,
233			    int, struct pf_pdesc *);
234u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
235			    sa_family_t);
236u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
237			    sa_family_t);
238u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
239				u_int16_t);
240void			 pf_set_rt_ifp(struct pf_state *,
241			    struct pf_addr *);
242int			 pf_check_proto_cksum(struct mbuf *, int, int,
243			    u_int8_t, sa_family_t);
244int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
245			    struct pf_addr_wrap *);
246static int		 pf_add_mbuf_tag(struct mbuf *, u_int);
247struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
248			    struct pf_state *, u_int8_t);
249
250#ifdef __FreeBSD__
251int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
252
253struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
254#else
255struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
256	{ &pf_state_pl, PFSTATE_HIWAT },
257	{ &pf_src_tree_pl, PFSNODE_HIWAT },
258	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT }
259};
260#endif
261
262#define STATE_LOOKUP()							\
263	do {								\
264		if (direction == PF_IN)					\
265			*state = pf_find_state_recurse(		\
266			    kif, &key, PF_EXT_GWY);			\
267		else							\
268			*state = pf_find_state_recurse(		\
269			    kif, &key, PF_LAN_EXT);			\
270		if (*state == NULL)					\
271			return (PF_DROP);				\
272		if (direction == PF_OUT &&				\
273		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
274		    (*state)->rule.ptr->direction == PF_OUT) ||		\
275		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
276		    (*state)->rule.ptr->direction == PF_IN)) &&		\
277		    (*state)->rt_kif != NULL &&				\
278		    (*state)->rt_kif != kif)				\
279			return (PF_PASS);				\
280	} while (0)
281
282#define	STATE_TRANSLATE(s) \
283	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
284	((s)->af == AF_INET6 && \
285	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
286	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
287	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
288	(s)->lan.port != (s)->gwy.port
289
290#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) :   \
291	((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent :	       \
292	(k)->pfik_parent->pfik_parent)
293
294#ifndef __FreeBSD__
295static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
296static __inline int pf_state_compare_lan_ext(struct pf_state *,
297	struct pf_state *);
298static __inline int pf_state_compare_ext_gwy(struct pf_state *,
299	struct pf_state *);
300static __inline int pf_state_compare_id(struct pf_state *,
301	struct pf_state *);
302#else
303static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
304static int pf_state_compare_lan_ext(struct pf_state *,
305	struct pf_state *);
306static int pf_state_compare_ext_gwy(struct pf_state *,
307	struct pf_state *);
308static int pf_state_compare_id(struct pf_state *,
309	struct pf_state *);
310#endif
311
312struct pf_src_tree tree_src_tracking;
313
314struct pf_state_tree_id tree_id;
315struct pf_state_queue state_updates;
316
317RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
318RB_GENERATE(pf_state_tree_lan_ext, pf_state,
319    u.s.entry_lan_ext, pf_state_compare_lan_ext);
320RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
321    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
322RB_GENERATE(pf_state_tree_id, pf_state,
323    u.s.entry_id, pf_state_compare_id);
324
325#ifdef __FreeBSD__
326static int
327#else
328static __inline int
329#endif
330pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
331{
332	int	diff;
333
334	if (a->rule.ptr > b->rule.ptr)
335		return (1);
336	if (a->rule.ptr < b->rule.ptr)
337		return (-1);
338	if ((diff = a->af - b->af) != 0)
339		return (diff);
340	switch (a->af) {
341#ifdef INET
342	case AF_INET:
343		if (a->addr.addr32[0] > b->addr.addr32[0])
344			return (1);
345		if (a->addr.addr32[0] < b->addr.addr32[0])
346			return (-1);
347		break;
348#endif /* INET */
349#ifdef INET6
350	case AF_INET6:
351		if (a->addr.addr32[3] > b->addr.addr32[3])
352			return (1);
353		if (a->addr.addr32[3] < b->addr.addr32[3])
354			return (-1);
355		if (a->addr.addr32[2] > b->addr.addr32[2])
356			return (1);
357		if (a->addr.addr32[2] < b->addr.addr32[2])
358			return (-1);
359		if (a->addr.addr32[1] > b->addr.addr32[1])
360			return (1);
361		if (a->addr.addr32[1] < b->addr.addr32[1])
362			return (-1);
363		if (a->addr.addr32[0] > b->addr.addr32[0])
364			return (1);
365		if (a->addr.addr32[0] < b->addr.addr32[0])
366			return (-1);
367		break;
368#endif /* INET6 */
369	}
370	return (0);
371}
372
373#ifdef __FreeBSD__
374static int
375#else
376static __inline int
377#endif
378pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
379{
380	int	diff;
381
382	if ((diff = a->proto - b->proto) != 0)
383		return (diff);
384	if ((diff = a->af - b->af) != 0)
385		return (diff);
386	switch (a->af) {
387#ifdef INET
388	case AF_INET:
389		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
390			return (1);
391		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
392			return (-1);
393		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
394			return (1);
395		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
396			return (-1);
397		break;
398#endif /* INET */
399#ifdef INET6
400	case AF_INET6:
401		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
402			return (1);
403		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
404			return (-1);
405		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
406			return (1);
407		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
408			return (-1);
409		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
410			return (1);
411		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
412			return (-1);
413		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
414			return (1);
415		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
416			return (-1);
417		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
418			return (1);
419		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
420			return (-1);
421		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
422			return (1);
423		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
424			return (-1);
425		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
426			return (1);
427		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
428			return (-1);
429		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
430			return (1);
431		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
432			return (-1);
433		break;
434#endif /* INET6 */
435	}
436
437	if ((diff = a->lan.port - b->lan.port) != 0)
438		return (diff);
439	if ((diff = a->ext.port - b->ext.port) != 0)
440		return (diff);
441
442	return (0);
443}
444
445#ifdef __FreeBSD__
446static int
447#else
448static __inline int
449#endif
450pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
451{
452	int	diff;
453
454	if ((diff = a->proto - b->proto) != 0)
455		return (diff);
456	if ((diff = a->af - b->af) != 0)
457		return (diff);
458	switch (a->af) {
459#ifdef INET
460	case AF_INET:
461		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
462			return (1);
463		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
464			return (-1);
465		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
466			return (1);
467		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
468			return (-1);
469		break;
470#endif /* INET */
471#ifdef INET6
472	case AF_INET6:
473		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
474			return (1);
475		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
476			return (-1);
477		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
478			return (1);
479		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
480			return (-1);
481		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
482			return (1);
483		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
484			return (-1);
485		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
486			return (1);
487		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
488			return (-1);
489		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
490			return (1);
491		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
492			return (-1);
493		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
494			return (1);
495		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
496			return (-1);
497		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
498			return (1);
499		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
500			return (-1);
501		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
502			return (1);
503		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
504			return (-1);
505		break;
506#endif /* INET6 */
507	}
508
509	if ((diff = a->ext.port - b->ext.port) != 0)
510		return (diff);
511	if ((diff = a->gwy.port - b->gwy.port) != 0)
512		return (diff);
513
514	return (0);
515}
516
517#ifdef __FreeBSD__
518static int
519#else
520static __inline int
521#endif
522pf_state_compare_id(struct pf_state *a, struct pf_state *b)
523{
524	if (a->id > b->id)
525		return (1);
526	if (a->id < b->id)
527		return (-1);
528	if (a->creatorid > b->creatorid)
529		return (1);
530	if (a->creatorid < b->creatorid)
531		return (-1);
532
533	return (0);
534}
535
536#ifdef INET6
537void
538pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
539{
540	switch (af) {
541#ifdef INET
542	case AF_INET:
543		dst->addr32[0] = src->addr32[0];
544		break;
545#endif /* INET */
546	case AF_INET6:
547		dst->addr32[0] = src->addr32[0];
548		dst->addr32[1] = src->addr32[1];
549		dst->addr32[2] = src->addr32[2];
550		dst->addr32[3] = src->addr32[3];
551		break;
552	}
553}
554#endif
555
556struct pf_state *
557pf_find_state_byid(struct pf_state *key)
558{
559	pf_status.fcounters[FCNT_STATE_SEARCH]++;
560	return (RB_FIND(pf_state_tree_id, &tree_id, key));
561}
562
563struct pf_state *
564pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
565{
566	struct pf_state *s;
567
568	pf_status.fcounters[FCNT_STATE_SEARCH]++;
569
570	switch (tree) {
571	case PF_LAN_EXT:
572		for (; kif != NULL; kif = kif->pfik_parent) {
573			s = RB_FIND(pf_state_tree_lan_ext,
574			    &kif->pfik_lan_ext, key);
575			if (s != NULL)
576				return (s);
577		}
578		return (NULL);
579	case PF_EXT_GWY:
580		for (; kif != NULL; kif = kif->pfik_parent) {
581			s = RB_FIND(pf_state_tree_ext_gwy,
582			    &kif->pfik_ext_gwy, key);
583			if (s != NULL)
584				return (s);
585		}
586		return (NULL);
587	default:
588		panic("pf_find_state_recurse");
589	}
590}
591
592struct pf_state *
593pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
594{
595	struct pf_state *s, *ss = NULL;
596	struct pfi_kif	*kif;
597
598	pf_status.fcounters[FCNT_STATE_SEARCH]++;
599
600	switch (tree) {
601	case PF_LAN_EXT:
602		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
603			s = RB_FIND(pf_state_tree_lan_ext,
604			    &kif->pfik_lan_ext, key);
605			if (s == NULL)
606				continue;
607			if (more == NULL)
608				return (s);
609			ss = s;
610			(*more)++;
611		}
612		return (ss);
613	case PF_EXT_GWY:
614		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
615			s = RB_FIND(pf_state_tree_ext_gwy,
616			    &kif->pfik_ext_gwy, key);
617			if (s == NULL)
618				continue;
619			if (more == NULL)
620				return (s);
621			ss = s;
622			(*more)++;
623		}
624		return (ss);
625	default:
626		panic("pf_find_state_all");
627	}
628}
629
630int
631pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
632    struct pf_addr *src, sa_family_t af)
633{
634	struct pf_src_node	k;
635
636	if (*sn == NULL) {
637		k.af = af;
638		PF_ACPY(&k.addr, src, af);
639		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
640		    rule->rpool.opts & PF_POOL_STICKYADDR)
641			k.rule.ptr = rule;
642		else
643			k.rule.ptr = NULL;
644		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
645		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
646	}
647	if (*sn == NULL) {
648		if (!rule->max_src_nodes ||
649		    rule->src_nodes < rule->max_src_nodes)
650			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
651		if ((*sn) == NULL)
652			return (-1);
653		bzero(*sn, sizeof(struct pf_src_node));
654		(*sn)->af = af;
655		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
656		    rule->rpool.opts & PF_POOL_STICKYADDR)
657			(*sn)->rule.ptr = rule;
658		else
659			(*sn)->rule.ptr = NULL;
660		PF_ACPY(&(*sn)->addr, src, af);
661		if (RB_INSERT(pf_src_tree,
662		    &tree_src_tracking, *sn) != NULL) {
663			if (pf_status.debug >= PF_DEBUG_MISC) {
664				printf("pf: src_tree insert failed: ");
665				pf_print_host(&(*sn)->addr, 0, af);
666				printf("\n");
667			}
668			pool_put(&pf_src_tree_pl, *sn);
669			return (-1);
670		}
671#ifdef __FreeBSD__
672		(*sn)->creation = time_second;
673#else
674		(*sn)->creation = time.tv_sec;
675#endif
676		(*sn)->ruletype = rule->action;
677		if ((*sn)->rule.ptr != NULL)
678			(*sn)->rule.ptr->src_nodes++;
679		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
680		pf_status.src_nodes++;
681	} else {
682		if (rule->max_src_states &&
683		    (*sn)->states >= rule->max_src_states)
684			return (-1);
685	}
686	return (0);
687}
688
689int
690pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
691{
692	/* Thou MUST NOT insert multiple duplicate keys */
693	state->u.s.kif = kif;
694	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
695		if (pf_status.debug >= PF_DEBUG_MISC) {
696			printf("pf: state insert failed: tree_lan_ext");
697			printf(" lan: ");
698			pf_print_host(&state->lan.addr, state->lan.port,
699			    state->af);
700			printf(" gwy: ");
701			pf_print_host(&state->gwy.addr, state->gwy.port,
702			    state->af);
703			printf(" ext: ");
704			pf_print_host(&state->ext.addr, state->ext.port,
705			    state->af);
706			if (state->sync_flags & PFSTATE_FROMSYNC)
707				printf(" (from sync)");
708			printf("\n");
709		}
710		return (-1);
711	}
712
713	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
714		if (pf_status.debug >= PF_DEBUG_MISC) {
715			printf("pf: state insert failed: tree_ext_gwy");
716			printf(" lan: ");
717			pf_print_host(&state->lan.addr, state->lan.port,
718			    state->af);
719			printf(" gwy: ");
720			pf_print_host(&state->gwy.addr, state->gwy.port,
721			    state->af);
722			printf(" ext: ");
723			pf_print_host(&state->ext.addr, state->ext.port,
724			    state->af);
725			if (state->sync_flags & PFSTATE_FROMSYNC)
726				printf(" (from sync)");
727			printf("\n");
728		}
729		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
730		return (-1);
731	}
732
733	if (state->id == 0 && state->creatorid == 0) {
734		state->id = htobe64(pf_status.stateid++);
735		state->creatorid = pf_status.hostid;
736	}
737	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
738		if (pf_status.debug >= PF_DEBUG_MISC) {
739#ifdef __FreeBSD__
740			printf("pf: state insert failed: "
741			    "id: %016llx creatorid: %08x",
742			    (long long)be64toh(state->id),
743			    ntohl(state->creatorid));
744#else
745			printf("pf: state insert failed: "
746			    "id: %016llx creatorid: %08x",
747			    betoh64(state->id), ntohl(state->creatorid));
748#endif
749			if (state->sync_flags & PFSTATE_FROMSYNC)
750				printf(" (from sync)");
751			printf("\n");
752		}
753		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
754		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
755		return (-1);
756	}
757	TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
758
759	pf_status.fcounters[FCNT_STATE_INSERT]++;
760	pf_status.states++;
761	pfi_attach_state(kif);
762#if NPFSYNC
763	pfsync_insert_state(state);
764#endif
765	return (0);
766}
767
768void
769pf_purge_timeout(void *arg)
770{
771#ifdef __FreeBSD__
772	struct callout  *to = arg;
773#else
774	struct timeout	*to = arg;
775#endif
776	int		 s;
777
778#ifdef __FreeBSD__
779	PF_LOCK();
780#endif
781	s = splsoftnet();
782	pf_purge_expired_states();
783	pf_purge_expired_fragments();
784	pf_purge_expired_src_nodes();
785	splx(s);
786#ifdef __FreeBSD__
787	PF_UNLOCK();
788#endif
789
790#ifdef __FreeBSD__
791	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
792	    pf_purge_timeout, to);
793#else
794	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
795#endif
796}
797
798u_int32_t
799pf_state_expires(const struct pf_state *state)
800{
801	u_int32_t	timeout;
802	u_int32_t	start;
803	u_int32_t	end;
804	u_int32_t	states;
805
806	/* handle all PFTM_* > PFTM_MAX here */
807	if (state->timeout == PFTM_PURGE)
808#ifdef __FreeBSD__
809		return (time_second);
810#else
811		return (time.tv_sec);
812#endif
813	if (state->timeout == PFTM_UNTIL_PACKET)
814		return (0);
815#ifdef __FreeBSD__
816	KASSERT((state->timeout < PFTM_MAX),
817	    ("pf_state_expires: timeout > PFTM_MAX"));
818#else
819	KASSERT(state->timeout < PFTM_MAX);
820#endif
821	timeout = state->rule.ptr->timeout[state->timeout];
822	if (!timeout)
823		timeout = pf_default_rule.timeout[state->timeout];
824	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
825	if (start) {
826		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
827		states = state->rule.ptr->states;
828	} else {
829		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
830		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
831		states = pf_status.states;
832	}
833	if (end && states > start && start < end) {
834		if (states < end)
835			return (state->expire + timeout * (end - states) /
836			    (end - start));
837		else
838#ifdef __FreeBSD__
839			return (time_second);
840#else
841			return (time.tv_sec);
842#endif
843	}
844	return (state->expire + timeout);
845}
846
847void
848pf_purge_expired_src_nodes(void)
849{
850	 struct pf_src_node		*cur, *next;
851
852	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
853		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
854
855#ifdef __FreeBSD__
856		 if (cur->states <= 0 && cur->expire <= time_second) {
857#else
858		 if (cur->states <= 0 && cur->expire <= time.tv_sec) {
859#endif
860			 if (cur->rule.ptr != NULL) {
861				 cur->rule.ptr->src_nodes--;
862				 if (cur->rule.ptr->states <= 0 &&
863				     cur->rule.ptr->max_src_nodes <= 0)
864					 pf_rm_rule(NULL, cur->rule.ptr);
865			 }
866			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
867			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
868			 pf_status.src_nodes--;
869			 pool_put(&pf_src_tree_pl, cur);
870		 }
871	 }
872}
873
874void
875pf_src_tree_remove_state(struct pf_state *s)
876{
877	u_int32_t timeout;
878
879	if (s->src_node != NULL) {
880		if (--s->src_node->states <= 0) {
881			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
882			if (!timeout)
883				timeout =
884				    pf_default_rule.timeout[PFTM_SRC_NODE];
885#ifdef __FreeBSD__
886			s->src_node->expire = time_second + timeout;
887#else
888			s->src_node->expire = time.tv_sec + timeout;
889#endif
890		}
891	}
892	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
893		if (--s->nat_src_node->states <= 0) {
894			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
895			if (!timeout)
896				timeout =
897				    pf_default_rule.timeout[PFTM_SRC_NODE];
898#ifdef __FreeBSD__
899			s->nat_src_node->expire = time_second + timeout;
900#else
901			s->nat_src_node->expire = time.tv_sec + timeout;
902#endif
903		}
904	}
905	s->src_node = s->nat_src_node = NULL;
906}
907
908void
909pf_purge_expired_states(void)
910{
911	struct pf_state		*cur, *next;
912
913	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
914	    cur; cur = next) {
915		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
916
917#ifdef __FreeBSD__
918		if (pf_state_expires(cur) <= time_second) {
919#else
920		if (pf_state_expires(cur) <= time.tv_sec) {
921#endif
922			if (cur->src.state == PF_TCPS_PROXY_DST)
923				pf_send_tcp(cur->rule.ptr, cur->af,
924				    &cur->ext.addr, &cur->lan.addr,
925				    cur->ext.port, cur->lan.port,
926				    cur->src.seqhi, cur->src.seqlo + 1, 0,
927				    TH_RST|TH_ACK, 0, 0);
928			RB_REMOVE(pf_state_tree_ext_gwy,
929			    &cur->u.s.kif->pfik_ext_gwy, cur);
930			RB_REMOVE(pf_state_tree_lan_ext,
931			    &cur->u.s.kif->pfik_lan_ext, cur);
932			RB_REMOVE(pf_state_tree_id, &tree_id, cur);
933#if NPFSYNC
934			pfsync_delete_state(cur);
935#endif
936			pf_src_tree_remove_state(cur);
937			if (--cur->rule.ptr->states <= 0 &&
938			    cur->rule.ptr->src_nodes <= 0)
939				pf_rm_rule(NULL, cur->rule.ptr);
940			if (cur->nat_rule.ptr != NULL)
941				if (--cur->nat_rule.ptr->states <= 0 &&
942					cur->nat_rule.ptr->src_nodes <= 0)
943					pf_rm_rule(NULL, cur->nat_rule.ptr);
944			if (cur->anchor.ptr != NULL)
945				if (--cur->anchor.ptr->states <= 0)
946					pf_rm_rule(NULL, cur->anchor.ptr);
947			pf_normalize_tcp_cleanup(cur);
948			pfi_detach_state(cur->u.s.kif);
949			TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
950			pool_put(&pf_state_pl, cur);
951			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
952			pf_status.states--;
953		}
954	}
955}
956
957int
958pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
959{
960	if (aw->type != PF_ADDR_TABLE)
961		return (0);
962	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
963		return (1);
964	return (0);
965}
966
967void
968pf_tbladdr_remove(struct pf_addr_wrap *aw)
969{
970	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
971		return;
972	pfr_detach_table(aw->p.tbl);
973	aw->p.tbl = NULL;
974}
975
976void
977pf_tbladdr_copyout(struct pf_addr_wrap *aw)
978{
979	struct pfr_ktable *kt = aw->p.tbl;
980
981	if (aw->type != PF_ADDR_TABLE || kt == NULL)
982		return;
983	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
984		kt = kt->pfrkt_root;
985	aw->p.tbl = NULL;
986	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
987		kt->pfrkt_cnt : -1;
988}
989
990void
991pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
992{
993	switch (af) {
994#ifdef INET
995	case AF_INET: {
996		u_int32_t a = ntohl(addr->addr32[0]);
997		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
998		    (a>>8)&255, a&255);
999		if (p) {
1000			p = ntohs(p);
1001			printf(":%u", p);
1002		}
1003		break;
1004	}
1005#endif /* INET */
1006#ifdef INET6
1007	case AF_INET6: {
1008		u_int16_t b;
1009		u_int8_t i, curstart = 255, curend = 0,
1010		    maxstart = 0, maxend = 0;
1011		for (i = 0; i < 8; i++) {
1012			if (!addr->addr16[i]) {
1013				if (curstart == 255)
1014					curstart = i;
1015				else
1016					curend = i;
1017			} else {
1018				if (curstart) {
1019					if ((curend - curstart) >
1020					    (maxend - maxstart)) {
1021						maxstart = curstart;
1022						maxend = curend;
1023						curstart = 255;
1024					}
1025				}
1026			}
1027		}
1028		for (i = 0; i < 8; i++) {
1029			if (i >= maxstart && i <= maxend) {
1030				if (maxend != 7) {
1031					if (i == maxstart)
1032						printf(":");
1033				} else {
1034					if (i == maxend)
1035						printf(":");
1036				}
1037			} else {
1038				b = ntohs(addr->addr16[i]);
1039				printf("%x", b);
1040				if (i < 7)
1041					printf(":");
1042			}
1043		}
1044		if (p) {
1045			p = ntohs(p);
1046			printf("[%u]", p);
1047		}
1048		break;
1049	}
1050#endif /* INET6 */
1051	}
1052}
1053
1054void
1055pf_print_state(struct pf_state *s)
1056{
1057	switch (s->proto) {
1058	case IPPROTO_TCP:
1059		printf("TCP ");
1060		break;
1061	case IPPROTO_UDP:
1062		printf("UDP ");
1063		break;
1064	case IPPROTO_ICMP:
1065		printf("ICMP ");
1066		break;
1067	case IPPROTO_ICMPV6:
1068		printf("ICMPV6 ");
1069		break;
1070	default:
1071		printf("%u ", s->proto);
1072		break;
1073	}
1074	pf_print_host(&s->lan.addr, s->lan.port, s->af);
1075	printf(" ");
1076	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1077	printf(" ");
1078	pf_print_host(&s->ext.addr, s->ext.port, s->af);
1079	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1080	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1081	if (s->src.wscale && s->dst.wscale)
1082		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1083	printf("]");
1084	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1085	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1086	if (s->src.wscale && s->dst.wscale)
1087		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1088	printf("]");
1089	printf(" %u:%u", s->src.state, s->dst.state);
1090}
1091
1092void
1093pf_print_flags(u_int8_t f)
1094{
1095	if (f)
1096		printf(" ");
1097	if (f & TH_FIN)
1098		printf("F");
1099	if (f & TH_SYN)
1100		printf("S");
1101	if (f & TH_RST)
1102		printf("R");
1103	if (f & TH_PUSH)
1104		printf("P");
1105	if (f & TH_ACK)
1106		printf("A");
1107	if (f & TH_URG)
1108		printf("U");
1109	if (f & TH_ECE)
1110		printf("E");
1111	if (f & TH_CWR)
1112		printf("W");
1113}
1114
1115#define	PF_SET_SKIP_STEPS(i)					\
1116	do {							\
1117		while (head[i] != cur) {			\
1118			head[i]->skip[i].ptr = cur;		\
1119			head[i] = TAILQ_NEXT(head[i], entries);	\
1120		}						\
1121	} while (0)
1122
1123void
1124pf_calc_skip_steps(struct pf_rulequeue *rules)
1125{
1126	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1127	int i;
1128
1129	cur = TAILQ_FIRST(rules);
1130	prev = cur;
1131	for (i = 0; i < PF_SKIP_COUNT; ++i)
1132		head[i] = cur;
1133	while (cur != NULL) {
1134
1135		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1136			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1137		if (cur->direction != prev->direction)
1138			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1139		if (cur->af != prev->af)
1140			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1141		if (cur->proto != prev->proto)
1142			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1143		if (cur->src.not != prev->src.not ||
1144		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1145			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1146		if (cur->src.port[0] != prev->src.port[0] ||
1147		    cur->src.port[1] != prev->src.port[1] ||
1148		    cur->src.port_op != prev->src.port_op)
1149			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1150		if (cur->dst.not != prev->dst.not ||
1151		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1152			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1153		if (cur->dst.port[0] != prev->dst.port[0] ||
1154		    cur->dst.port[1] != prev->dst.port[1] ||
1155		    cur->dst.port_op != prev->dst.port_op)
1156			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1157
1158		prev = cur;
1159		cur = TAILQ_NEXT(cur, entries);
1160	}
1161	for (i = 0; i < PF_SKIP_COUNT; ++i)
1162		PF_SET_SKIP_STEPS(i);
1163}
1164
1165int
1166pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1167{
1168	if (aw1->type != aw2->type)
1169		return (1);
1170	switch (aw1->type) {
1171	case PF_ADDR_ADDRMASK:
1172		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1173			return (1);
1174		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1175			return (1);
1176		return (0);
1177	case PF_ADDR_DYNIFTL:
1178		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1179	case PF_ADDR_NOROUTE:
1180		return (0);
1181	case PF_ADDR_TABLE:
1182		return (aw1->p.tbl != aw2->p.tbl);
1183	default:
1184		printf("invalid address type: %d\n", aw1->type);
1185		return (1);
1186	}
1187}
1188
1189void
1190pf_update_anchor_rules()
1191{
1192	struct pf_rule	*rule;
1193	int		 i;
1194
1195	for (i = 0; i < PF_RULESET_MAX; ++i)
1196		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1197		    entries)
1198			if (rule->anchorname[0])
1199				rule->anchor = pf_find_anchor(rule->anchorname);
1200			else
1201				rule->anchor = NULL;
1202}
1203
1204u_int16_t
1205pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1206{
1207	u_int32_t	l;
1208
1209	if (udp && !cksum)
1210		return (0x0000);
1211	l = cksum + old - new;
1212	l = (l >> 16) + (l & 65535);
1213	l = l & 65535;
1214	if (udp && !l)
1215		return (0xFFFF);
1216	return (l);
1217}
1218
1219void
1220pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1221    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1222{
1223	struct pf_addr	ao;
1224	u_int16_t	po = *p;
1225
1226	PF_ACPY(&ao, a, af);
1227	PF_ACPY(a, an, af);
1228
1229	*p = pn;
1230
1231	switch (af) {
1232#ifdef INET
1233	case AF_INET:
1234		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1235		    ao.addr16[0], an->addr16[0], 0),
1236		    ao.addr16[1], an->addr16[1], 0);
1237		*p = pn;
1238		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1239		    ao.addr16[0], an->addr16[0], u),
1240		    ao.addr16[1], an->addr16[1], u),
1241		    po, pn, u);
1242		break;
1243#endif /* INET */
1244#ifdef INET6
1245	case AF_INET6:
1246		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1247		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1248		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1249		    ao.addr16[0], an->addr16[0], u),
1250		    ao.addr16[1], an->addr16[1], u),
1251		    ao.addr16[2], an->addr16[2], u),
1252		    ao.addr16[3], an->addr16[3], u),
1253		    ao.addr16[4], an->addr16[4], u),
1254		    ao.addr16[5], an->addr16[5], u),
1255		    ao.addr16[6], an->addr16[6], u),
1256		    ao.addr16[7], an->addr16[7], u),
1257		    po, pn, u);
1258		break;
1259#endif /* INET6 */
1260	}
1261}
1262
1263
1264/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1265void
1266pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1267{
1268	u_int32_t	ao;
1269
1270	memcpy(&ao, a, sizeof(ao));
1271	memcpy(a, &an, sizeof(u_int32_t));
1272	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1273	    ao % 65536, an % 65536, u);
1274}
1275
1276#ifdef INET6
1277void
1278pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1279{
1280	struct pf_addr	ao;
1281
1282	PF_ACPY(&ao, a, AF_INET6);
1283	PF_ACPY(a, an, AF_INET6);
1284
1285	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1286	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1287	    pf_cksum_fixup(pf_cksum_fixup(*c,
1288	    ao.addr16[0], an->addr16[0], u),
1289	    ao.addr16[1], an->addr16[1], u),
1290	    ao.addr16[2], an->addr16[2], u),
1291	    ao.addr16[3], an->addr16[3], u),
1292	    ao.addr16[4], an->addr16[4], u),
1293	    ao.addr16[5], an->addr16[5], u),
1294	    ao.addr16[6], an->addr16[6], u),
1295	    ao.addr16[7], an->addr16[7], u);
1296}
1297#endif /* INET6 */
1298
1299void
1300pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1301    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1302    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1303{
1304	struct pf_addr	oia, ooa;
1305
1306	PF_ACPY(&oia, ia, af);
1307	PF_ACPY(&ooa, oa, af);
1308
1309	/* Change inner protocol port, fix inner protocol checksum. */
1310	if (ip != NULL) {
1311		u_int16_t	oip = *ip;
1312		u_int32_t	opc = 0;	/* make the compiler happy */
1313
1314		if (pc != NULL)
1315			opc = *pc;
1316		*ip = np;
1317		if (pc != NULL)
1318			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1319		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1320		if (pc != NULL)
1321			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1322	}
1323	/* Change inner ip address, fix inner ip and icmp checksums. */
1324	PF_ACPY(ia, na, af);
1325	switch (af) {
1326#ifdef INET
1327	case AF_INET: {
1328		u_int32_t	 oh2c = *h2c;
1329
1330		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1331		    oia.addr16[0], ia->addr16[0], 0),
1332		    oia.addr16[1], ia->addr16[1], 0);
1333		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1334		    oia.addr16[0], ia->addr16[0], 0),
1335		    oia.addr16[1], ia->addr16[1], 0);
1336		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1337		break;
1338	}
1339#endif /* INET */
1340#ifdef INET6
1341	case AF_INET6:
1342		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1343		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1344		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1345		    oia.addr16[0], ia->addr16[0], u),
1346		    oia.addr16[1], ia->addr16[1], u),
1347		    oia.addr16[2], ia->addr16[2], u),
1348		    oia.addr16[3], ia->addr16[3], u),
1349		    oia.addr16[4], ia->addr16[4], u),
1350		    oia.addr16[5], ia->addr16[5], u),
1351		    oia.addr16[6], ia->addr16[6], u),
1352		    oia.addr16[7], ia->addr16[7], u);
1353		break;
1354#endif /* INET6 */
1355	}
1356	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1357	PF_ACPY(oa, na, af);
1358	switch (af) {
1359#ifdef INET
1360	case AF_INET:
1361		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1362		    ooa.addr16[0], oa->addr16[0], 0),
1363		    ooa.addr16[1], oa->addr16[1], 0);
1364		break;
1365#endif /* INET */
1366#ifdef INET6
1367	case AF_INET6:
1368		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1369		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1370		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1371		    ooa.addr16[0], oa->addr16[0], u),
1372		    ooa.addr16[1], oa->addr16[1], u),
1373		    ooa.addr16[2], oa->addr16[2], u),
1374		    ooa.addr16[3], oa->addr16[3], u),
1375		    ooa.addr16[4], oa->addr16[4], u),
1376		    ooa.addr16[5], oa->addr16[5], u),
1377		    ooa.addr16[6], oa->addr16[6], u),
1378		    ooa.addr16[7], oa->addr16[7], u);
1379		break;
1380#endif /* INET6 */
1381	}
1382}
1383
1384void
1385pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1386    const struct pf_addr *saddr, const struct pf_addr *daddr,
1387    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1388    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1389{
1390	struct mbuf	*m;
1391#ifdef ALTQ
1392	struct m_tag	*mtag;
1393#endif
1394	int		 len = 0, tlen;		/* make the compiler happy */
1395#ifdef INET
1396	struct ip	*h = NULL;		/* make the compiler happy */
1397#endif /* INET */
1398#ifdef INET6
1399	struct ip6_hdr	*h6 = NULL;		/* make the compiler happy */
1400#endif /* INET6 */
1401	struct tcphdr	*th = NULL;		/* make the compiler happy */
1402#ifdef __FreeBSD__
1403	struct ip 	*ip;
1404#endif
1405	char *opt;
1406
1407	/* maximum segment size tcp option */
1408	tlen = sizeof(struct tcphdr);
1409	if (mss)
1410		tlen += 4;
1411
1412	switch (af) {
1413#ifdef INET
1414	case AF_INET:
1415		len = sizeof(struct ip) + tlen;
1416		break;
1417#endif /* INET */
1418#ifdef INET6
1419	case AF_INET6:
1420		len = sizeof(struct ip6_hdr) + tlen;
1421		break;
1422#endif /* INET6 */
1423	}
1424
1425	/* create outgoing mbuf */
1426#ifdef __FreeBSD__
1427	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1428	if (m == NULL)
1429		return;
1430	m->m_flags |= M_SKIP_FIREWALL;
1431#else
1432	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1433	if (mtag == NULL)
1434		return;
1435	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1436	if (m == NULL) {
1437		m_tag_free(mtag);
1438		return;
1439	}
1440	m_tag_prepend(m, mtag);
1441#endif
1442#ifdef ALTQ
1443	if (r != NULL && r->qid) {
1444		struct altq_tag *atag;
1445
1446		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1447		if (mtag != NULL) {
1448			atag = (struct altq_tag *)(mtag + 1);
1449			atag->qid = r->qid;
1450			/* add hints for ecn */
1451			atag->af = af;
1452			atag->hdr = mtod(m, struct ip *);
1453			m_tag_prepend(m, mtag);
1454		}
1455	}
1456#endif
1457	m->m_data += max_linkhdr;
1458	m->m_pkthdr.len = m->m_len = len;
1459	m->m_pkthdr.rcvif = NULL;
1460	bzero(m->m_data, len);
1461	switch (af) {
1462#ifdef INET
1463	case AF_INET:
1464		h = mtod(m, struct ip *);
1465
1466		/* IP header fields included in the TCP checksum */
1467		h->ip_p = IPPROTO_TCP;
1468		h->ip_len = htons(tlen);
1469		h->ip_src.s_addr = saddr->v4.s_addr;
1470		h->ip_dst.s_addr = daddr->v4.s_addr;
1471
1472		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1473		break;
1474#endif /* INET */
1475#ifdef INET6
1476	case AF_INET6:
1477		h6 = mtod(m, struct ip6_hdr *);
1478
1479		/* IP header fields included in the TCP checksum */
1480		h6->ip6_nxt = IPPROTO_TCP;
1481		h6->ip6_plen = htons(tlen);
1482		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1483		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1484
1485		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1486		break;
1487#endif /* INET6 */
1488	}
1489
1490	/* TCP header */
1491	th->th_sport = sport;
1492	th->th_dport = dport;
1493	th->th_seq = htonl(seq);
1494	th->th_ack = htonl(ack);
1495	th->th_off = tlen >> 2;
1496	th->th_flags = flags;
1497	th->th_win = htons(win);
1498
1499	if (mss) {
1500		opt = (char *)(th + 1);
1501		opt[0] = TCPOPT_MAXSEG;
1502		opt[1] = 4;
1503		HTONS(mss);
1504		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1505	}
1506
1507	switch (af) {
1508#ifdef INET
1509	case AF_INET:
1510		/* TCP checksum */
1511		th->th_sum = in_cksum(m, len);
1512
1513		/* Finish the IP header */
1514		h->ip_v = 4;
1515		h->ip_hl = sizeof(*h) >> 2;
1516		h->ip_tos = IPTOS_LOWDELAY;
1517#ifdef __FreeBSD__
1518		h->ip_off = path_mtu_discovery ? IP_DF : 0;
1519		h->ip_len = len;
1520#else
1521		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1522		h->ip_len = htons(len);
1523#endif
1524		h->ip_ttl = ttl ? ttl : ip_defttl;
1525		h->ip_sum = 0;
1526#ifdef __FreeBSD__
1527		ip = mtod(m, struct ip *);
1528		PF_UNLOCK();
1529		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1530			(void *)NULL);
1531		PF_LOCK();
1532#else /* ! __FreeBSD__ */
1533		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1534		    (void *)NULL);
1535#endif
1536		break;
1537#endif /* INET */
1538#ifdef INET6
1539	case AF_INET6:
1540		/* TCP checksum */
1541		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1542		    sizeof(struct ip6_hdr), tlen);
1543
1544		h6->ip6_vfc |= IPV6_VERSION;
1545		h6->ip6_hlim = IPV6_DEFHLIM;
1546
1547#ifdef __FreeBSD__
1548		PF_UNLOCK();
1549		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1550		PF_LOCK();
1551#else
1552		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1553#endif
1554		break;
1555#endif /* INET6 */
1556	}
1557}
1558
1559void
1560pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1561    struct pf_rule *r)
1562{
1563#ifdef ALTQ
1564	struct m_tag	*mtag;
1565#endif
1566	struct mbuf	*m0;
1567#ifdef __FreeBSD__
1568	struct ip *ip;
1569#endif
1570
1571#ifdef __FreeBSD__
1572	m0 = m_copypacket(m, M_DONTWAIT);
1573	if (m0 == NULL)
1574		return;
1575	m0->m_flags |= M_SKIP_FIREWALL;
1576#else
1577	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1578	if (mtag == NULL)
1579		return;
1580	m0 = m_copy(m, 0, M_COPYALL);
1581	if (m0 == NULL) {
1582		m_tag_free(mtag);
1583		return;
1584	}
1585	m_tag_prepend(m0, mtag);
1586#endif
1587
1588#ifdef ALTQ
1589	if (r->qid) {
1590		struct altq_tag *atag;
1591
1592		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1593		if (mtag != NULL) {
1594			atag = (struct altq_tag *)(mtag + 1);
1595			atag->qid = r->qid;
1596			/* add hints for ecn */
1597			atag->af = af;
1598			atag->hdr = mtod(m0, struct ip *);
1599			m_tag_prepend(m0, mtag);
1600		}
1601	}
1602#endif
1603
1604	switch (af) {
1605#ifdef INET
1606	case AF_INET:
1607#ifdef __FreeBSD__
1608		/* icmp_error() expects host byte ordering */
1609		ip = mtod(m0, struct ip *);
1610		NTOHS(ip->ip_len);
1611		NTOHS(ip->ip_off);
1612		PF_UNLOCK();
1613#endif
1614		icmp_error(m0, type, code, 0, (void *)NULL);
1615#ifdef __FreeBSD__
1616		PF_LOCK();
1617#endif
1618		break;
1619#endif /* INET */
1620#ifdef INET6
1621	case AF_INET6:
1622#ifdef __FreeBSD__
1623		PF_UNLOCK();
1624#endif
1625		icmp6_error(m0, type, code, 0);
1626#ifdef __FreeBSD__
1627		PF_LOCK();
1628#endif
1629		break;
1630#endif /* INET6 */
1631	}
1632}
1633
1634/*
1635 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1636 * If n is 0, they match if they are equal. If n is != 0, they match if they
1637 * are different.
1638 */
1639int
1640pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1641    struct pf_addr *b, sa_family_t af)
1642{
1643	int	match = 0;
1644
1645	switch (af) {
1646#ifdef INET
1647	case AF_INET:
1648		if ((a->addr32[0] & m->addr32[0]) ==
1649		    (b->addr32[0] & m->addr32[0]))
1650			match++;
1651		break;
1652#endif /* INET */
1653#ifdef INET6
1654	case AF_INET6:
1655		if (((a->addr32[0] & m->addr32[0]) ==
1656		     (b->addr32[0] & m->addr32[0])) &&
1657		    ((a->addr32[1] & m->addr32[1]) ==
1658		     (b->addr32[1] & m->addr32[1])) &&
1659		    ((a->addr32[2] & m->addr32[2]) ==
1660		     (b->addr32[2] & m->addr32[2])) &&
1661		    ((a->addr32[3] & m->addr32[3]) ==
1662		     (b->addr32[3] & m->addr32[3])))
1663			match++;
1664		break;
1665#endif /* INET6 */
1666	}
1667	if (match) {
1668		if (n)
1669			return (0);
1670		else
1671			return (1);
1672	} else {
1673		if (n)
1674			return (1);
1675		else
1676			return (0);
1677	}
1678}
1679
1680int
1681pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1682{
1683	switch (op) {
1684	case PF_OP_IRG:
1685		return ((p > a1) && (p < a2));
1686	case PF_OP_XRG:
1687		return ((p < a1) || (p > a2));
1688	case PF_OP_RRG:
1689		return ((p >= a1) && (p <= a2));
1690	case PF_OP_EQ:
1691		return (p == a1);
1692	case PF_OP_NE:
1693		return (p != a1);
1694	case PF_OP_LT:
1695		return (p < a1);
1696	case PF_OP_LE:
1697		return (p <= a1);
1698	case PF_OP_GT:
1699		return (p > a1);
1700	case PF_OP_GE:
1701		return (p >= a1);
1702	}
1703	return (0); /* never reached */
1704}
1705
1706int
1707pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1708{
1709	NTOHS(a1);
1710	NTOHS(a2);
1711	NTOHS(p);
1712	return (pf_match(op, a1, a2, p));
1713}
1714
1715int
1716pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1717{
1718	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1719		return (0);
1720	return (pf_match(op, a1, a2, u));
1721}
1722
1723int
1724pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1725{
1726	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1727		return (0);
1728	return (pf_match(op, a1, a2, g));
1729}
1730
1731struct pf_tag *
1732pf_get_tag(struct mbuf *m)
1733{
1734	struct m_tag	*mtag;
1735
1736	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1737		return ((struct pf_tag *)(mtag + 1));
1738	else
1739		return (NULL);
1740}
1741
1742int
1743pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule,
1744    struct pf_tag *pftag, int *tag)
1745{
1746	if (*tag == -1) {	/* find mbuf tag */
1747		pftag = pf_get_tag(m);
1748		if (pftag != NULL)
1749			*tag = pftag->tag;
1750		else
1751			*tag = 0;
1752		if (nat_rule != NULL && nat_rule->tag)
1753			*tag = nat_rule->tag;
1754	}
1755
1756	return ((!r->match_tag_not && r->match_tag == *tag) ||
1757	    (r->match_tag_not && r->match_tag != *tag));
1758}
1759
1760int
1761pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1762{
1763	struct m_tag	*mtag;
1764
1765	if (tag <= 0)
1766		return (0);
1767
1768	if (pftag == NULL) {
1769		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1770		if (mtag == NULL)
1771			return (1);
1772		((struct pf_tag *)(mtag + 1))->tag = tag;
1773		m_tag_prepend(m, mtag);
1774	} else
1775		pftag->tag = tag;
1776
1777	return (0);
1778}
1779
1780#define PF_STEP_INTO_ANCHOR(r, a, s, n)					\
1781	do {								\
1782		if ((r) == NULL || (r)->anchor == NULL ||		\
1783		    (s) != NULL || (a) != NULL)				\
1784			panic("PF_STEP_INTO_ANCHOR");			\
1785		(a) = (r);						\
1786		(s) = TAILQ_FIRST(&(r)->anchor->rulesets);		\
1787		(r) = NULL;						\
1788		while ((s) != NULL && ((r) =				\
1789		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1790			(s) = TAILQ_NEXT((s), entries);			\
1791		if ((r) == NULL) {					\
1792			(r) = TAILQ_NEXT((a), entries);			\
1793			(a) = NULL;					\
1794		}							\
1795	} while (0)
1796
1797#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)				\
1798	do {								\
1799		if ((r) != NULL || (a) == NULL || (s) == NULL)		\
1800			panic("PF_STEP_OUT_OF_ANCHOR");			\
1801		(s) = TAILQ_NEXT((s), entries);				\
1802		while ((s) != NULL && ((r) =				\
1803		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1804			(s) = TAILQ_NEXT((s), entries);			\
1805		if ((r) == NULL) {					\
1806			(r) = TAILQ_NEXT((a), entries);			\
1807			(a) = NULL;					\
1808		}							\
1809	} while (0)
1810
1811#ifdef INET6
1812void
1813pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1814    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1815{
1816	switch (af) {
1817#ifdef INET
1818	case AF_INET:
1819		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1820		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1821		break;
1822#endif /* INET */
1823	case AF_INET6:
1824		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1825		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1826		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1827		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1828		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1829		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1830		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1831		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1832		break;
1833	}
1834}
1835
1836void
1837pf_addr_inc(struct pf_addr *addr, sa_family_t af)
1838{
1839	switch (af) {
1840#ifdef INET
1841	case AF_INET:
1842		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1843		break;
1844#endif /* INET */
1845	case AF_INET6:
1846		if (addr->addr32[3] == 0xffffffff) {
1847			addr->addr32[3] = 0;
1848			if (addr->addr32[2] == 0xffffffff) {
1849				addr->addr32[2] = 0;
1850				if (addr->addr32[1] == 0xffffffff) {
1851					addr->addr32[1] = 0;
1852					addr->addr32[0] =
1853					    htonl(ntohl(addr->addr32[0]) + 1);
1854				} else
1855					addr->addr32[1] =
1856					    htonl(ntohl(addr->addr32[1]) + 1);
1857			} else
1858				addr->addr32[2] =
1859				    htonl(ntohl(addr->addr32[2]) + 1);
1860		} else
1861			addr->addr32[3] =
1862			    htonl(ntohl(addr->addr32[3]) + 1);
1863		break;
1864	}
1865}
1866#endif /* INET6 */
1867
1868#define mix(a,b,c) \
1869	do {					\
1870		a -= b; a -= c; a ^= (c >> 13);	\
1871		b -= c; b -= a; b ^= (a << 8);	\
1872		c -= a; c -= b; c ^= (b >> 13);	\
1873		a -= b; a -= c; a ^= (c >> 12);	\
1874		b -= c; b -= a; b ^= (a << 16);	\
1875		c -= a; c -= b; c ^= (b >> 5);	\
1876		a -= b; a -= c; a ^= (c >> 3);	\
1877		b -= c; b -= a; b ^= (a << 10);	\
1878		c -= a; c -= b; c ^= (b >> 15);	\
1879	} while (0)
1880
1881/*
1882 * hash function based on bridge_hash in if_bridge.c
1883 */
1884void
1885pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1886    struct pf_poolhashkey *key, sa_family_t af)
1887{
1888	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1889
1890	switch (af) {
1891#ifdef INET
1892	case AF_INET:
1893		a += inaddr->addr32[0];
1894		b += key->key32[1];
1895		mix(a, b, c);
1896		hash->addr32[0] = c + key->key32[2];
1897		break;
1898#endif /* INET */
1899#ifdef INET6
1900	case AF_INET6:
1901		a += inaddr->addr32[0];
1902		b += inaddr->addr32[2];
1903		mix(a, b, c);
1904		hash->addr32[0] = c;
1905		a += inaddr->addr32[1];
1906		b += inaddr->addr32[3];
1907		c += key->key32[1];
1908		mix(a, b, c);
1909		hash->addr32[1] = c;
1910		a += inaddr->addr32[2];
1911		b += inaddr->addr32[1];
1912		c += key->key32[2];
1913		mix(a, b, c);
1914		hash->addr32[2] = c;
1915		a += inaddr->addr32[3];
1916		b += inaddr->addr32[0];
1917		c += key->key32[3];
1918		mix(a, b, c);
1919		hash->addr32[3] = c;
1920		break;
1921#endif /* INET6 */
1922	}
1923}
1924
1925int
1926pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
1927    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
1928{
1929	unsigned char		 hash[16];
1930	struct pf_pool		*rpool = &r->rpool;
1931	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
1932	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
1933	struct pf_pooladdr	*acur = rpool->cur;
1934	struct pf_src_node	 k;
1935
1936	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
1937	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1938		k.af = af;
1939		PF_ACPY(&k.addr, saddr, af);
1940		if (r->rule_flag & PFRULE_RULESRCTRACK ||
1941		    r->rpool.opts & PF_POOL_STICKYADDR)
1942			k.rule.ptr = r;
1943		else
1944			k.rule.ptr = NULL;
1945		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1946		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1947		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
1948			PF_ACPY(naddr, &(*sn)->raddr, af);
1949			if (pf_status.debug >= PF_DEBUG_MISC) {
1950				printf("pf_map_addr: src tracking maps ");
1951				pf_print_host(&k.addr, 0, af);
1952				printf(" to ");
1953				pf_print_host(naddr, 0, af);
1954				printf("\n");
1955			}
1956			return (0);
1957		}
1958	}
1959
1960	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1961		return (1);
1962	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1963		if (af == AF_INET) {
1964			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
1965			    (rpool->opts & PF_POOL_TYPEMASK) !=
1966			    PF_POOL_ROUNDROBIN)
1967				return (1);
1968			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
1969			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
1970		} else {
1971			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
1972			    (rpool->opts & PF_POOL_TYPEMASK) !=
1973			    PF_POOL_ROUNDROBIN)
1974				return (1);
1975			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
1976			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
1977		}
1978	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1979		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1980			return (1); /* unsupported */
1981	} else {
1982		raddr = &rpool->cur->addr.v.a.addr;
1983		rmask = &rpool->cur->addr.v.a.mask;
1984	}
1985
1986	switch (rpool->opts & PF_POOL_TYPEMASK) {
1987	case PF_POOL_NONE:
1988		PF_ACPY(naddr, raddr, af);
1989		break;
1990	case PF_POOL_BITMASK:
1991		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
1992		break;
1993	case PF_POOL_RANDOM:
1994		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
1995			switch (af) {
1996#ifdef INET
1997			case AF_INET:
1998				rpool->counter.addr32[0] = arc4random();
1999				break;
2000#endif /* INET */
2001#ifdef INET6
2002			case AF_INET6:
2003				if (rmask->addr32[3] != 0xffffffff)
2004					rpool->counter.addr32[3] = arc4random();
2005				else
2006					break;
2007				if (rmask->addr32[2] != 0xffffffff)
2008					rpool->counter.addr32[2] = arc4random();
2009				else
2010					break;
2011				if (rmask->addr32[1] != 0xffffffff)
2012					rpool->counter.addr32[1] = arc4random();
2013				else
2014					break;
2015				if (rmask->addr32[0] != 0xffffffff)
2016					rpool->counter.addr32[0] = arc4random();
2017				break;
2018#endif /* INET6 */
2019			}
2020			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2021			PF_ACPY(init_addr, naddr, af);
2022
2023		} else {
2024			PF_AINC(&rpool->counter, af);
2025			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2026		}
2027		break;
2028	case PF_POOL_SRCHASH:
2029		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2030		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2031		break;
2032	case PF_POOL_ROUNDROBIN:
2033		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2034			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2035			    &rpool->tblidx, &rpool->counter,
2036			    &raddr, &rmask, af))
2037				goto get_addr;
2038		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2039			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2040			    &rpool->tblidx, &rpool->counter,
2041			    &raddr, &rmask, af))
2042				goto get_addr;
2043		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2044			goto get_addr;
2045
2046	try_next:
2047		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2048			rpool->cur = TAILQ_FIRST(&rpool->list);
2049		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2050			rpool->tblidx = -1;
2051			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2052			    &rpool->tblidx, &rpool->counter,
2053			    &raddr, &rmask, af)) {
2054				/* table contains no address of type 'af' */
2055				if (rpool->cur != acur)
2056					goto try_next;
2057				return (1);
2058			}
2059		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2060			rpool->tblidx = -1;
2061			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2062			    &rpool->tblidx, &rpool->counter,
2063			    &raddr, &rmask, af)) {
2064				/* table contains no address of type 'af' */
2065				if (rpool->cur != acur)
2066					goto try_next;
2067				return (1);
2068			}
2069		} else {
2070			raddr = &rpool->cur->addr.v.a.addr;
2071			rmask = &rpool->cur->addr.v.a.mask;
2072			PF_ACPY(&rpool->counter, raddr, af);
2073		}
2074
2075	get_addr:
2076		PF_ACPY(naddr, &rpool->counter, af);
2077		PF_AINC(&rpool->counter, af);
2078		break;
2079	}
2080	if (*sn != NULL)
2081		PF_ACPY(&(*sn)->raddr, naddr, af);
2082
2083	if (pf_status.debug >= PF_DEBUG_MISC &&
2084	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2085		printf("pf_map_addr: selected address ");
2086		pf_print_host(naddr, 0, af);
2087		printf("\n");
2088	}
2089
2090	return (0);
2091}
2092
2093int
2094pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2095    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2096    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2097    struct pf_src_node **sn)
2098{
2099	struct pf_state		key;
2100	struct pf_addr		init_addr;
2101	u_int16_t		cut;
2102
2103	bzero(&init_addr, sizeof(init_addr));
2104	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2105		return (1);
2106
2107	do {
2108		key.af = af;
2109		key.proto = proto;
2110		PF_ACPY(&key.ext.addr, daddr, key.af);
2111		PF_ACPY(&key.gwy.addr, naddr, key.af);
2112		key.ext.port = dport;
2113
2114		/*
2115		 * port search; start random, step;
2116		 * similar 2 portloop in in_pcbbind
2117		 */
2118		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
2119			key.gwy.port = 0;
2120			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2121				return (0);
2122		} else if (low == 0 && high == 0) {
2123			key.gwy.port = *nport;
2124			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2125				return (0);
2126		} else if (low == high) {
2127			key.gwy.port = htons(low);
2128			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2129				*nport = htons(low);
2130				return (0);
2131			}
2132		} else {
2133			u_int16_t tmp;
2134
2135			if (low > high) {
2136				tmp = low;
2137				low = high;
2138				high = tmp;
2139			}
2140			/* low < high */
2141			cut = arc4random() % (1 + high - low) + low;
2142			/* low <= cut <= high */
2143			for (tmp = cut; tmp <= high; ++(tmp)) {
2144				key.gwy.port = htons(tmp);
2145				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2146				    NULL) {
2147					*nport = htons(tmp);
2148					return (0);
2149				}
2150			}
2151			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2152				key.gwy.port = htons(tmp);
2153				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2154				    NULL) {
2155					*nport = htons(tmp);
2156					return (0);
2157				}
2158			}
2159		}
2160
2161		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2162		case PF_POOL_RANDOM:
2163		case PF_POOL_ROUNDROBIN:
2164			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2165				return (1);
2166			break;
2167		case PF_POOL_NONE:
2168		case PF_POOL_SRCHASH:
2169		case PF_POOL_BITMASK:
2170		default:
2171			return (1);
2172		}
2173	} while (! PF_AEQ(&init_addr, naddr, af) );
2174
2175	return (1);					/* none available */
2176}
2177
2178struct pf_rule *
2179pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2180    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2181    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2182{
2183	struct pf_rule		*r, *rm = NULL, *anchorrule = NULL;
2184	struct pf_ruleset	*ruleset = NULL;
2185
2186	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2187	while (r && rm == NULL) {
2188		struct pf_rule_addr	*src = NULL, *dst = NULL;
2189		struct pf_addr_wrap	*xdst = NULL;
2190
2191		if (r->action == PF_BINAT && direction == PF_IN) {
2192			src = &r->dst;
2193			if (r->rpool.cur != NULL)
2194				xdst = &r->rpool.cur->addr;
2195		} else {
2196			src = &r->src;
2197			dst = &r->dst;
2198		}
2199
2200		r->evaluations++;
2201		if (r->kif != NULL &&
2202		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2203			r = r->skip[PF_SKIP_IFP].ptr;
2204		else if (r->direction && r->direction != direction)
2205			r = r->skip[PF_SKIP_DIR].ptr;
2206		else if (r->af && r->af != pd->af)
2207			r = r->skip[PF_SKIP_AF].ptr;
2208		else if (r->proto && r->proto != pd->proto)
2209			r = r->skip[PF_SKIP_PROTO].ptr;
2210		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
2211			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2212			    PF_SKIP_DST_ADDR].ptr;
2213		else if (src->port_op && !pf_match_port(src->port_op,
2214		    src->port[0], src->port[1], sport))
2215			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2216			    PF_SKIP_DST_PORT].ptr;
2217		else if (dst != NULL &&
2218		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
2219			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2220		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2221			r = TAILQ_NEXT(r, entries);
2222		else if (dst != NULL && dst->port_op &&
2223		    !pf_match_port(dst->port_op, dst->port[0],
2224		    dst->port[1], dport))
2225			r = r->skip[PF_SKIP_DST_PORT].ptr;
2226		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2227		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2228		    off, pd->hdr.tcp), r->os_fingerprint)))
2229			r = TAILQ_NEXT(r, entries);
2230		else if (r->anchorname[0] && r->anchor == NULL)
2231			r = TAILQ_NEXT(r, entries);
2232		else if (r->anchor == NULL)
2233				rm = r;
2234		else
2235			PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2236		if (r == NULL && anchorrule != NULL)
2237			PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2238			    rs_num);
2239	}
2240	if (rm != NULL && (rm->action == PF_NONAT ||
2241	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2242		return (NULL);
2243	return (rm);
2244}
2245
2246struct pf_rule *
2247pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2248    struct pfi_kif *kif, struct pf_src_node **sn,
2249    struct pf_addr *saddr, u_int16_t sport,
2250    struct pf_addr *daddr, u_int16_t dport,
2251    struct pf_addr *naddr, u_int16_t *nport)
2252{
2253	struct pf_rule	*r = NULL;
2254
2255	if (direction == PF_OUT) {
2256		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2257		    sport, daddr, dport, PF_RULESET_BINAT);
2258		if (r == NULL)
2259			r = pf_match_translation(pd, m, off, direction, kif,
2260			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2261	} else {
2262		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2263		    sport, daddr, dport, PF_RULESET_RDR);
2264		if (r == NULL)
2265			r = pf_match_translation(pd, m, off, direction, kif,
2266			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2267	}
2268
2269	if (r != NULL) {
2270		switch (r->action) {
2271		case PF_NONAT:
2272		case PF_NOBINAT:
2273		case PF_NORDR:
2274			return (NULL);
2275		case PF_NAT:
2276			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2277			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2278			    r->rpool.proxy_port[1], sn)) {
2279				DPFPRINTF(PF_DEBUG_MISC,
2280				    ("pf: NAT proxy port allocation "
2281				    "(%u-%u) failed\n",
2282				    r->rpool.proxy_port[0],
2283				    r->rpool.proxy_port[1]));
2284				return (NULL);
2285			}
2286			break;
2287		case PF_BINAT:
2288			switch (direction) {
2289			case PF_OUT:
2290				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2291					if (pd->af == AF_INET) {
2292						if (r->rpool.cur->addr.p.dyn->
2293						    pfid_acnt4 < 1)
2294							return (NULL);
2295						PF_POOLMASK(naddr,
2296						    &r->rpool.cur->addr.p.dyn->
2297						    pfid_addr4,
2298						    &r->rpool.cur->addr.p.dyn->
2299						    pfid_mask4,
2300						    saddr, AF_INET);
2301					} else {
2302						if (r->rpool.cur->addr.p.dyn->
2303						    pfid_acnt6 < 1)
2304							return (NULL);
2305						PF_POOLMASK(naddr,
2306						    &r->rpool.cur->addr.p.dyn->
2307						    pfid_addr6,
2308						    &r->rpool.cur->addr.p.dyn->
2309						    pfid_mask6,
2310						    saddr, AF_INET6);
2311					}
2312				} else
2313					PF_POOLMASK(naddr,
2314					    &r->rpool.cur->addr.v.a.addr,
2315					    &r->rpool.cur->addr.v.a.mask,
2316					    saddr, pd->af);
2317				break;
2318			case PF_IN:
2319				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2320					if (pd->af == AF_INET) {
2321						if (r->src.addr.p.dyn->
2322						    pfid_acnt4 < 1)
2323							return (NULL);
2324						PF_POOLMASK(naddr,
2325						    &r->src.addr.p.dyn->
2326						    pfid_addr4,
2327						    &r->src.addr.p.dyn->
2328						    pfid_mask4,
2329						    daddr, AF_INET);
2330					} else {
2331						if (r->src.addr.p.dyn->
2332						    pfid_acnt6 < 1)
2333							return (NULL);
2334						PF_POOLMASK(naddr,
2335						    &r->src.addr.p.dyn->
2336						    pfid_addr6,
2337						    &r->src.addr.p.dyn->
2338						    pfid_mask6,
2339						    daddr, AF_INET6);
2340					}
2341				} else
2342					PF_POOLMASK(naddr,
2343					    &r->src.addr.v.a.addr,
2344					    &r->src.addr.v.a.mask, daddr,
2345					    pd->af);
2346				break;
2347			}
2348			break;
2349		case PF_RDR: {
2350			if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn))
2351				return (NULL);
2352
2353			if (r->rpool.proxy_port[1]) {
2354				u_int32_t	tmp_nport;
2355
2356				tmp_nport = ((ntohs(dport) -
2357				    ntohs(r->dst.port[0])) %
2358				    (r->rpool.proxy_port[1] -
2359				    r->rpool.proxy_port[0] + 1)) +
2360				    r->rpool.proxy_port[0];
2361
2362				/* wrap around if necessary */
2363				if (tmp_nport > 65535)
2364					tmp_nport -= 65535;
2365				*nport = htons((u_int16_t)tmp_nport);
2366			} else if (r->rpool.proxy_port[0])
2367				*nport = htons(r->rpool.proxy_port[0]);
2368			break;
2369		}
2370		default:
2371			return (NULL);
2372		}
2373	}
2374
2375	return (r);
2376}
2377
2378int
2379pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
2380{
2381	struct pf_addr		*saddr, *daddr;
2382	u_int16_t		 sport, dport;
2383#ifdef __FreeBSD__
2384	struct inpcbinfo	*pi;
2385#else
2386	struct inpcbtable	*tb;
2387#endif
2388	struct inpcb		*inp;
2389
2390	*uid = UID_MAX;
2391	*gid = GID_MAX;
2392	switch (pd->proto) {
2393	case IPPROTO_TCP:
2394		sport = pd->hdr.tcp->th_sport;
2395		dport = pd->hdr.tcp->th_dport;
2396#ifdef __FreeBSD__
2397		pi = &tcbinfo;
2398#else
2399		tb = &tcbtable;
2400#endif
2401		break;
2402	case IPPROTO_UDP:
2403		sport = pd->hdr.udp->uh_sport;
2404		dport = pd->hdr.udp->uh_dport;
2405#ifdef __FreeBSD__
2406		pi = &udbinfo;
2407#else
2408		tb = &udbtable;
2409#endif
2410		break;
2411	default:
2412		return (0);
2413	}
2414	if (direction == PF_IN) {
2415		saddr = pd->src;
2416		daddr = pd->dst;
2417	} else {
2418		u_int16_t	p;
2419
2420		p = sport;
2421		sport = dport;
2422		dport = p;
2423		saddr = pd->dst;
2424		daddr = pd->src;
2425	}
2426	switch (pd->af) {
2427	case AF_INET:
2428#ifdef __FreeBSD__
2429		INP_INFO_RLOCK(pi);	/* XXX LOR */
2430		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2431			dport, 0, NULL);
2432		if (inp == NULL) {
2433			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2434			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2435			if(inp == NULL) {
2436				INP_INFO_RUNLOCK(pi);
2437				return (0);
2438			}
2439		}
2440#else
2441		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2442		if (inp == NULL) {
2443			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2444			if (inp == NULL)
2445				return (0);
2446		}
2447#endif
2448		break;
2449#ifdef INET6
2450	case AF_INET6:
2451#ifdef __FreeBSD__
2452		INP_INFO_RLOCK(pi);
2453		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2454			&daddr->v6, dport, 0, NULL);
2455		if (inp == NULL) {
2456			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2457			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2458			if (inp == NULL) {
2459				INP_INFO_RUNLOCK(pi);
2460				return (0);
2461			}
2462		}
2463#else
2464		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2465		    dport);
2466		if (inp == NULL) {
2467			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2468			if (inp == NULL)
2469				return (0);
2470		}
2471#endif
2472		break;
2473#endif /* INET6 */
2474
2475	default:
2476		return (0);
2477	}
2478#ifdef __FreeBSD__
2479	INP_LOCK(inp);
2480	*uid = inp->inp_socket->so_cred->cr_uid;
2481	*gid = inp->inp_socket->so_cred->cr_groups[0];
2482	INP_UNLOCK(inp);
2483	INP_INFO_RUNLOCK(pi);
2484#else
2485	*uid = inp->inp_socket->so_euid;
2486	*gid = inp->inp_socket->so_egid;
2487#endif
2488	return (1);
2489}
2490
2491u_int8_t
2492pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2493{
2494	int		 hlen;
2495	u_int8_t	 hdr[60];
2496	u_int8_t	*opt, optlen;
2497	u_int8_t	 wscale = 0;
2498
2499	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2500	if (hlen <= sizeof(struct tcphdr))
2501		return (0);
2502	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2503		return (0);
2504	opt = hdr + sizeof(struct tcphdr);
2505	hlen -= sizeof(struct tcphdr);
2506	while (hlen >= 3) {
2507		switch (*opt) {
2508		case TCPOPT_EOL:
2509		case TCPOPT_NOP:
2510			++opt;
2511			--hlen;
2512			break;
2513		case TCPOPT_WINDOW:
2514			wscale = opt[2];
2515			if (wscale > TCP_MAX_WINSHIFT)
2516				wscale = TCP_MAX_WINSHIFT;
2517			wscale |= PF_WSCALE_FLAG;
2518			/* FALLTHROUGH */
2519		default:
2520			optlen = opt[1];
2521			if (optlen < 2)
2522				optlen = 2;
2523			hlen -= optlen;
2524			opt += optlen;
2525			break;
2526		}
2527	}
2528	return (wscale);
2529}
2530
2531u_int16_t
2532pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2533{
2534	int		 hlen;
2535	u_int8_t	 hdr[60];
2536	u_int8_t	*opt, optlen;
2537	u_int16_t	 mss = tcp_mssdflt;
2538
2539	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2540	if (hlen <= sizeof(struct tcphdr))
2541		return (0);
2542	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2543		return (0);
2544	opt = hdr + sizeof(struct tcphdr);
2545	hlen -= sizeof(struct tcphdr);
2546	while (hlen >= TCPOLEN_MAXSEG) {
2547		switch (*opt) {
2548		case TCPOPT_EOL:
2549		case TCPOPT_NOP:
2550			++opt;
2551			--hlen;
2552			break;
2553		case TCPOPT_MAXSEG:
2554			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2555			/* FALLTHROUGH */
2556		default:
2557			optlen = opt[1];
2558			if (optlen < 2)
2559				optlen = 2;
2560			hlen -= optlen;
2561			opt += optlen;
2562			break;
2563		}
2564	}
2565	return (mss);
2566}
2567
2568u_int16_t
2569pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2570{
2571#ifdef INET
2572	struct sockaddr_in	*dst;
2573	struct route		 ro;
2574#endif /* INET */
2575#ifdef INET6
2576	struct sockaddr_in6	*dst6;
2577	struct route_in6	 ro6;
2578#endif /* INET6 */
2579	struct rtentry		*rt = NULL;
2580	int			 hlen = 0;	/* make the compiler happy */
2581	u_int16_t		 mss = tcp_mssdflt;
2582
2583	switch (af) {
2584#ifdef INET
2585	case AF_INET:
2586		hlen = sizeof(struct ip);
2587		bzero(&ro, sizeof(ro));
2588		dst = (struct sockaddr_in *)&ro.ro_dst;
2589		dst->sin_family = AF_INET;
2590		dst->sin_len = sizeof(*dst);
2591		dst->sin_addr = addr->v4;
2592#ifdef __FreeBSD__
2593#ifdef RTF_PRCLONING
2594		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2595#else /* !RTF_PRCLONING */
2596		rtalloc_ign(&ro, RTF_CLONING);
2597#endif
2598#else /* ! __FreeBSD__ */
2599		rtalloc_noclone(&ro, NO_CLONING);
2600#endif
2601		rt = ro.ro_rt;
2602		break;
2603#endif /* INET */
2604#ifdef INET6
2605	case AF_INET6:
2606		hlen = sizeof(struct ip6_hdr);
2607		bzero(&ro6, sizeof(ro6));
2608		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2609		dst6->sin6_family = AF_INET6;
2610		dst6->sin6_len = sizeof(*dst6);
2611		dst6->sin6_addr = addr->v6;
2612#ifdef __FreeBSD__
2613#ifdef RTF_PRCLONING
2614		rtalloc_ign((struct route *)&ro6,
2615		    (RTF_CLONING | RTF_PRCLONING));
2616#else /* !RTF_PRCLONING */
2617		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2618#endif
2619#else /* ! __FreeBSD__ */
2620		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2621#endif
2622		rt = ro6.ro_rt;
2623		break;
2624#endif /* INET6 */
2625	}
2626
2627	if (rt && rt->rt_ifp) {
2628		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2629		mss = max(tcp_mssdflt, mss);
2630		RTFREE(rt);
2631	}
2632	mss = min(mss, offer);
2633	mss = max(mss, 64);		/* sanity - at least max opt space */
2634	return (mss);
2635}
2636
2637void
2638pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2639{
2640	struct pf_rule *r = s->rule.ptr;
2641
2642	s->rt_kif = NULL;
2643	if (!r->rt || r->rt == PF_FASTROUTE)
2644		return;
2645	switch (s->af) {
2646#ifdef INET
2647	case AF_INET:
2648		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2649		    &s->nat_src_node);
2650		s->rt_kif = r->rpool.cur->kif;
2651		break;
2652#endif /* INET */
2653#ifdef INET6
2654	case AF_INET6:
2655		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2656		    &s->nat_src_node);
2657		s->rt_kif = r->rpool.cur->kif;
2658		break;
2659#endif /* INET6 */
2660	}
2661}
2662
2663int
2664pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2665    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2666    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2667{
2668	struct pf_rule		*nr = NULL;
2669	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2670	struct tcphdr		*th = pd->hdr.tcp;
2671	u_int16_t		 bport, nport = 0;
2672	sa_family_t		 af = pd->af;
2673	int			 lookup = -1;
2674	uid_t			 uid;
2675	gid_t			 gid;
2676	struct pf_rule		*r, *a = NULL;
2677	struct pf_ruleset	*ruleset = NULL;
2678	struct pf_src_node	*nsn = NULL;
2679	u_short			 reason;
2680	int			 rewrite = 0;
2681	struct pf_tag		*pftag = NULL;
2682	int			 tag = -1;
2683	u_int16_t		 mss = tcp_mssdflt;
2684
2685	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2686
2687	if (direction == PF_OUT) {
2688		bport = nport = th->th_sport;
2689		/* check outgoing packet for BINAT/NAT */
2690		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2691		    saddr, th->th_sport, daddr, th->th_dport,
2692		    &pd->naddr, &nport)) != NULL) {
2693			PF_ACPY(&pd->baddr, saddr, af);
2694			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2695			    &th->th_sum, &pd->naddr, nport, 0, af);
2696			rewrite++;
2697			if (nr->natpass)
2698				r = NULL;
2699			pd->nat_rule = nr;
2700		}
2701	} else {
2702		bport = nport = th->th_dport;
2703		/* check incoming packet for BINAT/RDR */
2704		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2705		    saddr, th->th_sport, daddr, th->th_dport,
2706		    &pd->naddr, &nport)) != NULL) {
2707			PF_ACPY(&pd->baddr, daddr, af);
2708			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2709			    &th->th_sum, &pd->naddr, nport, 0, af);
2710			rewrite++;
2711			if (nr->natpass)
2712				r = NULL;
2713			pd->nat_rule = nr;
2714		}
2715	}
2716
2717	while (r != NULL) {
2718		r->evaluations++;
2719		if (r->kif != NULL &&
2720		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2721			r = r->skip[PF_SKIP_IFP].ptr;
2722		else if (r->direction && r->direction != direction)
2723			r = r->skip[PF_SKIP_DIR].ptr;
2724		else if (r->af && r->af != af)
2725			r = r->skip[PF_SKIP_AF].ptr;
2726		else if (r->proto && r->proto != IPPROTO_TCP)
2727			r = r->skip[PF_SKIP_PROTO].ptr;
2728		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2729			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2730		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2731		    r->src.port[0], r->src.port[1], th->th_sport))
2732			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2733		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2734			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2735		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2736		    r->dst.port[0], r->dst.port[1], th->th_dport))
2737			r = r->skip[PF_SKIP_DST_PORT].ptr;
2738		else if (r->tos && !(r->tos & pd->tos))
2739			r = TAILQ_NEXT(r, entries);
2740		else if (r->rule_flag & PFRULE_FRAGMENT)
2741			r = TAILQ_NEXT(r, entries);
2742		else if ((r->flagset & th->th_flags) != r->flags)
2743			r = TAILQ_NEXT(r, entries);
2744		else if (r->uid.op && (lookup != -1 || (lookup =
2745		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2746		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2747		    uid))
2748			r = TAILQ_NEXT(r, entries);
2749		else if (r->gid.op && (lookup != -1 || (lookup =
2750		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2751		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2752		    gid))
2753			r = TAILQ_NEXT(r, entries);
2754		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
2755			r = TAILQ_NEXT(r, entries);
2756		else if (r->anchorname[0] && r->anchor == NULL)
2757			r = TAILQ_NEXT(r, entries);
2758		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2759		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2760			r = TAILQ_NEXT(r, entries);
2761		else {
2762			if (r->tag)
2763				tag = r->tag;
2764			if (r->anchor == NULL) {
2765				*rm = r;
2766				*am = a;
2767				*rsm = ruleset;
2768				if ((*rm)->quick)
2769					break;
2770				r = TAILQ_NEXT(r, entries);
2771			} else
2772				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2773				    PF_RULESET_FILTER);
2774		}
2775		if (r == NULL && a != NULL)
2776			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2777			    PF_RULESET_FILTER);
2778	}
2779	r = *rm;
2780	a = *am;
2781	ruleset = *rsm;
2782
2783	REASON_SET(&reason, PFRES_MATCH);
2784
2785	if (r->log) {
2786		if (rewrite)
2787			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2788		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
2789	}
2790
2791	if ((r->action == PF_DROP) &&
2792	    ((r->rule_flag & PFRULE_RETURNRST) ||
2793	    (r->rule_flag & PFRULE_RETURNICMP) ||
2794	    (r->rule_flag & PFRULE_RETURN))) {
2795		/* undo NAT changes, if they have taken place */
2796		if (nr != NULL) {
2797			if (direction == PF_OUT) {
2798				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2799				    &th->th_sum, &pd->baddr, bport, 0, af);
2800				rewrite++;
2801			} else {
2802				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2803				    &th->th_sum, &pd->baddr, bport, 0, af);
2804				rewrite++;
2805			}
2806		}
2807		if (((r->rule_flag & PFRULE_RETURNRST) ||
2808		    (r->rule_flag & PFRULE_RETURN)) &&
2809		    !(th->th_flags & TH_RST)) {
2810			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2811
2812			if (th->th_flags & TH_SYN)
2813				ack++;
2814			if (th->th_flags & TH_FIN)
2815				ack++;
2816			pf_send_tcp(r, af, pd->dst,
2817			    pd->src, th->th_dport, th->th_sport,
2818			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2819			    r->return_ttl);
2820		} else if ((af == AF_INET) && r->return_icmp)
2821			pf_send_icmp(m, r->return_icmp >> 8,
2822			    r->return_icmp & 255, af, r);
2823		else if ((af == AF_INET6) && r->return_icmp6)
2824			pf_send_icmp(m, r->return_icmp6 >> 8,
2825			    r->return_icmp6 & 255, af, r);
2826	}
2827
2828	if (r->action == PF_DROP)
2829		return (PF_DROP);
2830
2831	if (pf_tag_packet(m, pftag, tag)) {
2832		REASON_SET(&reason, PFRES_MEMORY);
2833		return (PF_DROP);
2834	}
2835
2836	if (r->keep_state || nr != NULL ||
2837	    (pd->flags & PFDESC_TCP_NORM)) {
2838		/* create new state */
2839		u_int16_t	 len;
2840		struct pf_state	*s = NULL;
2841		struct pf_src_node *sn = NULL;
2842
2843		len = pd->tot_len - off - (th->th_off << 2);
2844
2845		/* check maximums */
2846		if (r->max_states && (r->states >= r->max_states))
2847			goto cleanup;
2848		/* src node for flter rule */
2849		if ((r->rule_flag & PFRULE_SRCTRACK ||
2850		    r->rpool.opts & PF_POOL_STICKYADDR) &&
2851		    pf_insert_src_node(&sn, r, saddr, af) != 0)
2852			goto cleanup;
2853		/* src node for translation rule */
2854		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
2855		    ((direction == PF_OUT &&
2856		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
2857		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
2858			goto cleanup;
2859		s = pool_get(&pf_state_pl, PR_NOWAIT);
2860		if (s == NULL) {
2861cleanup:
2862			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
2863				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
2864				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2865				pf_status.src_nodes--;
2866				pool_put(&pf_src_tree_pl, sn);
2867			}
2868			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
2869			    nsn->expire == 0) {
2870				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
2871				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2872				pf_status.src_nodes--;
2873				pool_put(&pf_src_tree_pl, nsn);
2874			}
2875			REASON_SET(&reason, PFRES_MEMORY);
2876			return (PF_DROP);
2877		}
2878		bzero(s, sizeof(*s));
2879		r->states++;
2880		if (a != NULL)
2881			a->states++;
2882		s->rule.ptr = r;
2883		s->nat_rule.ptr = nr;
2884		if (s->nat_rule.ptr != NULL)
2885			s->nat_rule.ptr->states++;
2886		s->anchor.ptr = a;
2887		s->allow_opts = r->allow_opts;
2888		s->log = r->log & 2;
2889		s->proto = IPPROTO_TCP;
2890		s->direction = direction;
2891		s->af = af;
2892		if (direction == PF_OUT) {
2893			PF_ACPY(&s->gwy.addr, saddr, af);
2894			s->gwy.port = th->th_sport;		/* sport */
2895			PF_ACPY(&s->ext.addr, daddr, af);
2896			s->ext.port = th->th_dport;
2897			if (nr != NULL) {
2898				PF_ACPY(&s->lan.addr, &pd->baddr, af);
2899				s->lan.port = bport;
2900			} else {
2901				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2902				s->lan.port = s->gwy.port;
2903			}
2904		} else {
2905			PF_ACPY(&s->lan.addr, daddr, af);
2906			s->lan.port = th->th_dport;
2907			PF_ACPY(&s->ext.addr, saddr, af);
2908			s->ext.port = th->th_sport;
2909			if (nr != NULL) {
2910				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
2911				s->gwy.port = bport;
2912			} else {
2913				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2914				s->gwy.port = s->lan.port;
2915			}
2916		}
2917
2918		s->src.seqlo = ntohl(th->th_seq);
2919		s->src.seqhi = s->src.seqlo + len + 1;
2920		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2921		    r->keep_state == PF_STATE_MODULATE) {
2922			/* Generate sequence number modulator */
2923			while ((s->src.seqdiff = arc4random()) == 0)
2924				;
2925			pf_change_a(&th->th_seq, &th->th_sum,
2926			    htonl(s->src.seqlo + s->src.seqdiff), 0);
2927			rewrite = 1;
2928		} else
2929			s->src.seqdiff = 0;
2930		if (th->th_flags & TH_SYN) {
2931			s->src.seqhi++;
2932			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2933		}
2934		s->src.max_win = MAX(ntohs(th->th_win), 1);
2935		if (s->src.wscale & PF_WSCALE_MASK) {
2936			/* Remove scale factor from initial window */
2937			int win = s->src.max_win;
2938			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2939			s->src.max_win = (win - 1) >>
2940			    (s->src.wscale & PF_WSCALE_MASK);
2941		}
2942		if (th->th_flags & TH_FIN)
2943			s->src.seqhi++;
2944		s->dst.seqhi = 1;
2945		s->dst.max_win = 1;
2946		s->src.state = TCPS_SYN_SENT;
2947		s->dst.state = TCPS_CLOSED;
2948#ifdef __FreeBSD__
2949		s->creation = time_second;
2950		s->expire = time_second;
2951#else
2952		s->creation = time.tv_sec;
2953		s->expire = time.tv_sec;
2954#endif
2955		s->timeout = PFTM_TCP_FIRST_PACKET;
2956		pf_set_rt_ifp(s, saddr);
2957		if (sn != NULL) {
2958			s->src_node = sn;
2959			s->src_node->states++;
2960		}
2961		if (nsn != NULL) {
2962			PF_ACPY(&nsn->raddr, &pd->naddr, af);
2963			s->nat_src_node = nsn;
2964			s->nat_src_node->states++;
2965		}
2966		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
2967		    off, pd, th, &s->src, &s->dst)) {
2968			REASON_SET(&reason, PFRES_MEMORY);
2969			pf_src_tree_remove_state(s);
2970			pool_put(&pf_state_pl, s);
2971			return (PF_DROP);
2972		}
2973		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
2974		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
2975		    &s->dst, &rewrite)) {
2976			pf_normalize_tcp_cleanup(s);
2977			pf_src_tree_remove_state(s);
2978			pool_put(&pf_state_pl, s);
2979			return (PF_DROP);
2980		}
2981		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
2982			pf_normalize_tcp_cleanup(s);
2983			REASON_SET(&reason, PFRES_MEMORY);
2984			pf_src_tree_remove_state(s);
2985			pool_put(&pf_state_pl, s);
2986			return (PF_DROP);
2987		} else
2988			*sm = s;
2989		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2990		    r->keep_state == PF_STATE_SYNPROXY) {
2991			s->src.state = PF_TCPS_PROXY_SRC;
2992			if (nr != NULL) {
2993				if (direction == PF_OUT) {
2994					pf_change_ap(saddr, &th->th_sport,
2995					    pd->ip_sum, &th->th_sum, &pd->baddr,
2996					    bport, 0, af);
2997				} else {
2998					pf_change_ap(daddr, &th->th_dport,
2999					    pd->ip_sum, &th->th_sum, &pd->baddr,
3000					    bport, 0, af);
3001				}
3002			}
3003			s->src.seqhi = arc4random();
3004			/* Find mss option */
3005			mss = pf_get_mss(m, off, th->th_off, af);
3006			mss = pf_calc_mss(saddr, af, mss);
3007			mss = pf_calc_mss(daddr, af, mss);
3008			s->src.mss = mss;
3009			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3010			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3011			    TH_SYN|TH_ACK, 0, s->src.mss, 0);
3012			return (PF_SYNPROXY_DROP);
3013		}
3014	}
3015
3016	/* copy back packet headers if we performed NAT operations */
3017	if (rewrite)
3018		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3019
3020	return (PF_PASS);
3021}
3022
3023int
3024pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3025    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3026    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3027{
3028	struct pf_rule		*nr = NULL;
3029	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3030	struct udphdr		*uh = pd->hdr.udp;
3031	u_int16_t		 bport, nport = 0;
3032	sa_family_t		 af = pd->af;
3033	int			 lookup = -1;
3034	uid_t			 uid;
3035	gid_t			 gid;
3036	struct pf_rule		*r, *a = NULL;
3037	struct pf_ruleset	*ruleset = NULL;
3038	struct pf_src_node	*nsn = NULL;
3039	u_short			 reason;
3040	int			 rewrite = 0;
3041	struct pf_tag		*pftag = NULL;
3042	int			 tag = -1;
3043
3044	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3045
3046	if (direction == PF_OUT) {
3047		bport = nport = uh->uh_sport;
3048		/* check outgoing packet for BINAT/NAT */
3049		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3050		    saddr, uh->uh_sport, daddr, uh->uh_dport,
3051		    &pd->naddr, &nport)) != NULL) {
3052			PF_ACPY(&pd->baddr, saddr, af);
3053			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3054			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3055			rewrite++;
3056			if (nr->natpass)
3057				r = NULL;
3058			pd->nat_rule = nr;
3059		}
3060	} else {
3061		bport = nport = uh->uh_dport;
3062		/* check incoming packet for BINAT/RDR */
3063		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3064		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3065		    &nport)) != NULL) {
3066			PF_ACPY(&pd->baddr, daddr, af);
3067			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3068			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3069			rewrite++;
3070			if (nr->natpass)
3071				r = NULL;
3072			pd->nat_rule = nr;
3073		}
3074	}
3075
3076	while (r != NULL) {
3077		r->evaluations++;
3078		if (r->kif != NULL &&
3079		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3080			r = r->skip[PF_SKIP_IFP].ptr;
3081		else if (r->direction && r->direction != direction)
3082			r = r->skip[PF_SKIP_DIR].ptr;
3083		else if (r->af && r->af != af)
3084			r = r->skip[PF_SKIP_AF].ptr;
3085		else if (r->proto && r->proto != IPPROTO_UDP)
3086			r = r->skip[PF_SKIP_PROTO].ptr;
3087		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3088			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3089		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3090		    r->src.port[0], r->src.port[1], uh->uh_sport))
3091			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3092		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3093			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3094		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3095		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
3096			r = r->skip[PF_SKIP_DST_PORT].ptr;
3097		else if (r->tos && !(r->tos & pd->tos))
3098			r = TAILQ_NEXT(r, entries);
3099		else if (r->rule_flag & PFRULE_FRAGMENT)
3100			r = TAILQ_NEXT(r, entries);
3101		else if (r->uid.op && (lookup != -1 || (lookup =
3102		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3103		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3104		    uid))
3105			r = TAILQ_NEXT(r, entries);
3106		else if (r->gid.op && (lookup != -1 || (lookup =
3107		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3108		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3109		    gid))
3110			r = TAILQ_NEXT(r, entries);
3111		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3112			r = TAILQ_NEXT(r, entries);
3113		else if (r->anchorname[0] && r->anchor == NULL)
3114			r = TAILQ_NEXT(r, entries);
3115		else if (r->os_fingerprint != PF_OSFP_ANY)
3116			r = TAILQ_NEXT(r, entries);
3117		else {
3118			if (r->tag)
3119				tag = r->tag;
3120			if (r->anchor == NULL) {
3121				*rm = r;
3122				*am = a;
3123				*rsm = ruleset;
3124				if ((*rm)->quick)
3125					break;
3126				r = TAILQ_NEXT(r, entries);
3127			} else
3128				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3129				    PF_RULESET_FILTER);
3130		}
3131		if (r == NULL && a != NULL)
3132			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3133			    PF_RULESET_FILTER);
3134	}
3135	r = *rm;
3136	a = *am;
3137	ruleset = *rsm;
3138
3139	REASON_SET(&reason, PFRES_MATCH);
3140
3141	if (r->log) {
3142		if (rewrite)
3143			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3144		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3145	}
3146
3147	if ((r->action == PF_DROP) &&
3148	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3149	    (r->rule_flag & PFRULE_RETURN))) {
3150		/* undo NAT changes, if they have taken place */
3151		if (nr != NULL) {
3152			if (direction == PF_OUT) {
3153				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3154				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3155				rewrite++;
3156			} else {
3157				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3158				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3159				rewrite++;
3160			}
3161		}
3162		if ((af == AF_INET) && r->return_icmp)
3163			pf_send_icmp(m, r->return_icmp >> 8,
3164			    r->return_icmp & 255, af, r);
3165		else if ((af == AF_INET6) && r->return_icmp6)
3166			pf_send_icmp(m, r->return_icmp6 >> 8,
3167			    r->return_icmp6 & 255, af, r);
3168	}
3169
3170	if (r->action == PF_DROP)
3171		return (PF_DROP);
3172
3173	if (pf_tag_packet(m, pftag, tag)) {
3174		REASON_SET(&reason, PFRES_MEMORY);
3175		return (PF_DROP);
3176	}
3177
3178	if (r->keep_state || nr != NULL) {
3179		/* create new state */
3180		struct pf_state	*s = NULL;
3181		struct pf_src_node *sn = NULL;
3182
3183		/* check maximums */
3184		if (r->max_states && (r->states >= r->max_states))
3185			goto cleanup;
3186		/* src node for flter rule */
3187		if ((r->rule_flag & PFRULE_SRCTRACK ||
3188		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3189		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3190			goto cleanup;
3191		/* src node for translation rule */
3192		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3193		    ((direction == PF_OUT &&
3194		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3195		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3196			goto cleanup;
3197		s = pool_get(&pf_state_pl, PR_NOWAIT);
3198		if (s == NULL) {
3199cleanup:
3200			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3201				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3202				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3203				pf_status.src_nodes--;
3204				pool_put(&pf_src_tree_pl, sn);
3205			}
3206			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3207			    nsn->expire == 0) {
3208				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3209				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3210				pf_status.src_nodes--;
3211				pool_put(&pf_src_tree_pl, nsn);
3212			}
3213			REASON_SET(&reason, PFRES_MEMORY);
3214			return (PF_DROP);
3215		}
3216		bzero(s, sizeof(*s));
3217		r->states++;
3218		if (a != NULL)
3219			a->states++;
3220		s->rule.ptr = r;
3221		s->nat_rule.ptr = nr;
3222		if (s->nat_rule.ptr != NULL)
3223			s->nat_rule.ptr->states++;
3224		s->anchor.ptr = a;
3225		s->allow_opts = r->allow_opts;
3226		s->log = r->log & 2;
3227		s->proto = IPPROTO_UDP;
3228		s->direction = direction;
3229		s->af = af;
3230		if (direction == PF_OUT) {
3231			PF_ACPY(&s->gwy.addr, saddr, af);
3232			s->gwy.port = uh->uh_sport;
3233			PF_ACPY(&s->ext.addr, daddr, af);
3234			s->ext.port = uh->uh_dport;
3235			if (nr != NULL) {
3236				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3237				s->lan.port = bport;
3238			} else {
3239				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3240				s->lan.port = s->gwy.port;
3241			}
3242		} else {
3243			PF_ACPY(&s->lan.addr, daddr, af);
3244			s->lan.port = uh->uh_dport;
3245			PF_ACPY(&s->ext.addr, saddr, af);
3246			s->ext.port = uh->uh_sport;
3247			if (nr != NULL) {
3248				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3249				s->gwy.port = bport;
3250			} else {
3251				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3252				s->gwy.port = s->lan.port;
3253			}
3254		}
3255		s->src.state = PFUDPS_SINGLE;
3256		s->dst.state = PFUDPS_NO_TRAFFIC;
3257#ifdef __FreeBSD__
3258		s->creation = time_second;
3259		s->expire = time_second;
3260#else
3261		s->creation = time.tv_sec;
3262		s->expire = time.tv_sec;
3263#endif
3264		s->timeout = PFTM_UDP_FIRST_PACKET;
3265		pf_set_rt_ifp(s, saddr);
3266		if (sn != NULL) {
3267			s->src_node = sn;
3268			s->src_node->states++;
3269		}
3270		if (nsn != NULL) {
3271			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3272			s->nat_src_node = nsn;
3273			s->nat_src_node->states++;
3274		}
3275		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3276			REASON_SET(&reason, PFRES_MEMORY);
3277			pf_src_tree_remove_state(s);
3278			pool_put(&pf_state_pl, s);
3279			return (PF_DROP);
3280		} else
3281			*sm = s;
3282	}
3283
3284	/* copy back packet headers if we performed NAT operations */
3285	if (rewrite)
3286		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3287
3288	return (PF_PASS);
3289}
3290
3291int
3292pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3293    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3294    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3295{
3296	struct pf_rule		*nr = NULL;
3297	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3298	struct pf_rule		*r, *a = NULL;
3299	struct pf_ruleset	*ruleset = NULL;
3300	struct pf_src_node	*nsn = NULL;
3301	u_short			 reason;
3302	u_int16_t		 icmpid = 0;	/* make the compiler happy */
3303	sa_family_t		 af = pd->af;
3304	u_int8_t		 icmptype = 0;	/* make the compiler happy */
3305	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
3306	int			 state_icmp = 0;
3307	struct pf_tag		*pftag = NULL;
3308	int			 tag = -1;
3309#ifdef INET6
3310	int			 rewrite = 0;
3311#endif /* INET6 */
3312
3313	switch (pd->proto) {
3314#ifdef INET
3315	case IPPROTO_ICMP:
3316		icmptype = pd->hdr.icmp->icmp_type;
3317		icmpcode = pd->hdr.icmp->icmp_code;
3318		icmpid = pd->hdr.icmp->icmp_id;
3319
3320		if (icmptype == ICMP_UNREACH ||
3321		    icmptype == ICMP_SOURCEQUENCH ||
3322		    icmptype == ICMP_REDIRECT ||
3323		    icmptype == ICMP_TIMXCEED ||
3324		    icmptype == ICMP_PARAMPROB)
3325			state_icmp++;
3326		break;
3327#endif /* INET */
3328#ifdef INET6
3329	case IPPROTO_ICMPV6:
3330		icmptype = pd->hdr.icmp6->icmp6_type;
3331		icmpcode = pd->hdr.icmp6->icmp6_code;
3332		icmpid = pd->hdr.icmp6->icmp6_id;
3333
3334		if (icmptype == ICMP6_DST_UNREACH ||
3335		    icmptype == ICMP6_PACKET_TOO_BIG ||
3336		    icmptype == ICMP6_TIME_EXCEEDED ||
3337		    icmptype == ICMP6_PARAM_PROB)
3338			state_icmp++;
3339		break;
3340#endif /* INET6 */
3341	}
3342
3343	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3344
3345	if (direction == PF_OUT) {
3346		/* check outgoing packet for BINAT/NAT */
3347		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3348		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3349			PF_ACPY(&pd->baddr, saddr, af);
3350			switch (af) {
3351#ifdef INET
3352			case AF_INET:
3353				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3354				    pd->naddr.v4.s_addr, 0);
3355				break;
3356#endif /* INET */
3357#ifdef INET6
3358			case AF_INET6:
3359				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3360				    &pd->naddr, 0);
3361				rewrite++;
3362				break;
3363#endif /* INET6 */
3364			}
3365			if (nr->natpass)
3366				r = NULL;
3367			pd->nat_rule = nr;
3368		}
3369	} else {
3370		/* check incoming packet for BINAT/RDR */
3371		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3372		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3373			PF_ACPY(&pd->baddr, daddr, af);
3374			switch (af) {
3375#ifdef INET
3376			case AF_INET:
3377				pf_change_a(&daddr->v4.s_addr,
3378				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3379				break;
3380#endif /* INET */
3381#ifdef INET6
3382			case AF_INET6:
3383				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3384				    &pd->naddr, 0);
3385				rewrite++;
3386				break;
3387#endif /* INET6 */
3388			}
3389			if (nr->natpass)
3390				r = NULL;
3391			pd->nat_rule = nr;
3392		}
3393	}
3394
3395	while (r != NULL) {
3396		r->evaluations++;
3397		if (r->kif != NULL &&
3398		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3399			r = r->skip[PF_SKIP_IFP].ptr;
3400		else if (r->direction && r->direction != direction)
3401			r = r->skip[PF_SKIP_DIR].ptr;
3402		else if (r->af && r->af != af)
3403			r = r->skip[PF_SKIP_AF].ptr;
3404		else if (r->proto && r->proto != pd->proto)
3405			r = r->skip[PF_SKIP_PROTO].ptr;
3406		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3407			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3408		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3409			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3410		else if (r->type && r->type != icmptype + 1)
3411			r = TAILQ_NEXT(r, entries);
3412		else if (r->code && r->code != icmpcode + 1)
3413			r = TAILQ_NEXT(r, entries);
3414		else if (r->tos && !(r->tos & pd->tos))
3415			r = TAILQ_NEXT(r, entries);
3416		else if (r->rule_flag & PFRULE_FRAGMENT)
3417			r = TAILQ_NEXT(r, entries);
3418		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3419			r = TAILQ_NEXT(r, entries);
3420		else if (r->anchorname[0] && r->anchor == NULL)
3421			r = TAILQ_NEXT(r, entries);
3422		else if (r->os_fingerprint != PF_OSFP_ANY)
3423			r = TAILQ_NEXT(r, entries);
3424		else {
3425			if (r->tag)
3426				tag = r->tag;
3427			if (r->anchor == NULL) {
3428				*rm = r;
3429				*am = a;
3430				*rsm = ruleset;
3431				if ((*rm)->quick)
3432					break;
3433				r = TAILQ_NEXT(r, entries);
3434			} else
3435				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3436				    PF_RULESET_FILTER);
3437		}
3438		if (r == NULL && a != NULL)
3439			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3440			    PF_RULESET_FILTER);
3441	}
3442	r = *rm;
3443	a = *am;
3444	ruleset = *rsm;
3445
3446	REASON_SET(&reason, PFRES_MATCH);
3447
3448	if (r->log) {
3449#ifdef INET6
3450		if (rewrite)
3451			m_copyback(m, off, sizeof(struct icmp6_hdr),
3452			    (caddr_t)pd->hdr.icmp6);
3453#endif /* INET6 */
3454		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3455	}
3456
3457	if (r->action != PF_PASS)
3458		return (PF_DROP);
3459
3460	if (pf_tag_packet(m, pftag, tag)) {
3461		REASON_SET(&reason, PFRES_MEMORY);
3462		return (PF_DROP);
3463	}
3464
3465	if (!state_icmp && (r->keep_state || nr != NULL)) {
3466		/* create new state */
3467		struct pf_state	*s = NULL;
3468		struct pf_src_node *sn = NULL;
3469
3470		/* check maximums */
3471		if (r->max_states && (r->states >= r->max_states))
3472			goto cleanup;
3473		/* src node for flter rule */
3474		if ((r->rule_flag & PFRULE_SRCTRACK ||
3475		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3476		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3477			goto cleanup;
3478		/* src node for translation rule */
3479		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3480		    ((direction == PF_OUT &&
3481		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3482		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3483			goto cleanup;
3484		s = pool_get(&pf_state_pl, PR_NOWAIT);
3485		if (s == NULL) {
3486cleanup:
3487			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3488				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3489				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3490				pf_status.src_nodes--;
3491				pool_put(&pf_src_tree_pl, sn);
3492			}
3493			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3494			    nsn->expire == 0) {
3495				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3496				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3497				pf_status.src_nodes--;
3498				pool_put(&pf_src_tree_pl, nsn);
3499			}
3500			REASON_SET(&reason, PFRES_MEMORY);
3501			return (PF_DROP);
3502		}
3503		bzero(s, sizeof(*s));
3504		r->states++;
3505		if (a != NULL)
3506			a->states++;
3507		s->rule.ptr = r;
3508		s->nat_rule.ptr = nr;
3509		if (s->nat_rule.ptr != NULL)
3510			s->nat_rule.ptr->states++;
3511		s->anchor.ptr = a;
3512		s->allow_opts = r->allow_opts;
3513		s->log = r->log & 2;
3514		s->proto = pd->proto;
3515		s->direction = direction;
3516		s->af = af;
3517		if (direction == PF_OUT) {
3518			PF_ACPY(&s->gwy.addr, saddr, af);
3519			s->gwy.port = icmpid;
3520			PF_ACPY(&s->ext.addr, daddr, af);
3521			s->ext.port = icmpid;
3522			if (nr != NULL)
3523				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3524			else
3525				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3526			s->lan.port = icmpid;
3527		} else {
3528			PF_ACPY(&s->lan.addr, daddr, af);
3529			s->lan.port = icmpid;
3530			PF_ACPY(&s->ext.addr, saddr, af);
3531			s->ext.port = icmpid;
3532			if (nr != NULL)
3533				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3534			else
3535				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3536			s->gwy.port = icmpid;
3537		}
3538#ifdef __FreeBSD__
3539		s->creation = time_second;
3540		s->expire = time_second;
3541#else
3542		s->creation = time.tv_sec;
3543		s->expire = time.tv_sec;
3544#endif
3545		s->timeout = PFTM_ICMP_FIRST_PACKET;
3546		pf_set_rt_ifp(s, saddr);
3547		if (sn != NULL) {
3548			s->src_node = sn;
3549			s->src_node->states++;
3550		}
3551		if (nsn != NULL) {
3552			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3553			s->nat_src_node = nsn;
3554			s->nat_src_node->states++;
3555		}
3556		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3557			REASON_SET(&reason, PFRES_MEMORY);
3558			pf_src_tree_remove_state(s);
3559			pool_put(&pf_state_pl, s);
3560			return (PF_DROP);
3561		} else
3562			*sm = s;
3563	}
3564
3565#ifdef INET6
3566	/* copy back packet headers if we performed IPv6 NAT operations */
3567	if (rewrite)
3568		m_copyback(m, off, sizeof(struct icmp6_hdr),
3569		    (caddr_t)pd->hdr.icmp6);
3570#endif /* INET6 */
3571
3572	return (PF_PASS);
3573}
3574
3575int
3576pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3577    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3578    struct pf_rule **am, struct pf_ruleset **rsm)
3579{
3580	struct pf_rule		*nr = NULL;
3581	struct pf_rule		*r, *a = NULL;
3582	struct pf_ruleset	*ruleset = NULL;
3583	struct pf_src_node	*nsn = NULL;
3584	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3585	sa_family_t		 af = pd->af;
3586	u_short			 reason;
3587	struct pf_tag		*pftag = NULL;
3588	int			 tag = -1;
3589
3590	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3591
3592	if (direction == PF_OUT) {
3593		/* check outgoing packet for BINAT/NAT */
3594		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3595		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3596			PF_ACPY(&pd->baddr, saddr, af);
3597			switch (af) {
3598#ifdef INET
3599			case AF_INET:
3600				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3601				    pd->naddr.v4.s_addr, 0);
3602				break;
3603#endif /* INET */
3604#ifdef INET6
3605			case AF_INET6:
3606				PF_ACPY(saddr, &pd->naddr, af);
3607				break;
3608#endif /* INET6 */
3609			}
3610			if (nr->natpass)
3611				r = NULL;
3612			pd->nat_rule = nr;
3613		}
3614	} else {
3615		/* check incoming packet for BINAT/RDR */
3616		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3617		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3618			PF_ACPY(&pd->baddr, daddr, af);
3619			switch (af) {
3620#ifdef INET
3621			case AF_INET:
3622				pf_change_a(&daddr->v4.s_addr,
3623				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3624				break;
3625#endif /* INET */
3626#ifdef INET6
3627			case AF_INET6:
3628				PF_ACPY(daddr, &pd->naddr, af);
3629				break;
3630#endif /* INET6 */
3631			}
3632			if (nr->natpass)
3633				r = NULL;
3634			pd->nat_rule = nr;
3635		}
3636	}
3637
3638	while (r != NULL) {
3639		r->evaluations++;
3640		if (r->kif != NULL &&
3641		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3642			r = r->skip[PF_SKIP_IFP].ptr;
3643		else if (r->direction && r->direction != direction)
3644			r = r->skip[PF_SKIP_DIR].ptr;
3645		else if (r->af && r->af != af)
3646			r = r->skip[PF_SKIP_AF].ptr;
3647		else if (r->proto && r->proto != pd->proto)
3648			r = r->skip[PF_SKIP_PROTO].ptr;
3649		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3650			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3651		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3652			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3653		else if (r->tos && !(r->tos & pd->tos))
3654			r = TAILQ_NEXT(r, entries);
3655		else if (r->rule_flag & PFRULE_FRAGMENT)
3656			r = TAILQ_NEXT(r, entries);
3657		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3658			r = TAILQ_NEXT(r, entries);
3659		else if (r->anchorname[0] && r->anchor == NULL)
3660			r = TAILQ_NEXT(r, entries);
3661		else if (r->os_fingerprint != PF_OSFP_ANY)
3662			r = TAILQ_NEXT(r, entries);
3663		else {
3664			if (r->tag)
3665				tag = r->tag;
3666			if (r->anchor == NULL) {
3667				*rm = r;
3668				*am = a;
3669				*rsm = ruleset;
3670				if ((*rm)->quick)
3671					break;
3672				r = TAILQ_NEXT(r, entries);
3673			} else
3674				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3675				    PF_RULESET_FILTER);
3676		}
3677		if (r == NULL && a != NULL)
3678			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3679			    PF_RULESET_FILTER);
3680	}
3681	r = *rm;
3682	a = *am;
3683	ruleset = *rsm;
3684
3685	REASON_SET(&reason, PFRES_MATCH);
3686
3687	if (r->log)
3688		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3689
3690	if ((r->action == PF_DROP) &&
3691	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3692	    (r->rule_flag & PFRULE_RETURN))) {
3693		struct pf_addr *a = NULL;
3694
3695		if (nr != NULL) {
3696			if (direction == PF_OUT)
3697				a = saddr;
3698			else
3699				a = daddr;
3700		}
3701		if (a != NULL) {
3702			switch (af) {
3703#ifdef INET
3704			case AF_INET:
3705				pf_change_a(&a->v4.s_addr, pd->ip_sum,
3706				    pd->baddr.v4.s_addr, 0);
3707				break;
3708#endif /* INET */
3709#ifdef INET6
3710			case AF_INET6:
3711				PF_ACPY(a, &pd->baddr, af);
3712				break;
3713#endif /* INET6 */
3714			}
3715		}
3716		if ((af == AF_INET) && r->return_icmp)
3717			pf_send_icmp(m, r->return_icmp >> 8,
3718			    r->return_icmp & 255, af, r);
3719		else if ((af == AF_INET6) && r->return_icmp6)
3720			pf_send_icmp(m, r->return_icmp6 >> 8,
3721			    r->return_icmp6 & 255, af, r);
3722	}
3723
3724	if (r->action != PF_PASS)
3725		return (PF_DROP);
3726
3727	if (pf_tag_packet(m, pftag, tag)) {
3728		REASON_SET(&reason, PFRES_MEMORY);
3729		return (PF_DROP);
3730	}
3731
3732	if (r->keep_state || nr != NULL) {
3733		/* create new state */
3734		struct pf_state	*s = NULL;
3735		struct pf_src_node *sn = NULL;
3736
3737		/* check maximums */
3738		if (r->max_states && (r->states >= r->max_states))
3739			goto cleanup;
3740		/* src node for flter rule */
3741		if ((r->rule_flag & PFRULE_SRCTRACK ||
3742		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3743		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3744			goto cleanup;
3745		/* src node for translation rule */
3746		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3747		    ((direction == PF_OUT &&
3748		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3749		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3750			goto cleanup;
3751		s = pool_get(&pf_state_pl, PR_NOWAIT);
3752		if (s == NULL) {
3753cleanup:
3754			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3755				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3756				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3757				pf_status.src_nodes--;
3758				pool_put(&pf_src_tree_pl, sn);
3759			}
3760			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3761			    nsn->expire == 0) {
3762				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3763				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3764				pf_status.src_nodes--;
3765				pool_put(&pf_src_tree_pl, nsn);
3766			}
3767			REASON_SET(&reason, PFRES_MEMORY);
3768			return (PF_DROP);
3769		}
3770		bzero(s, sizeof(*s));
3771		r->states++;
3772		if (a != NULL)
3773			a->states++;
3774		s->rule.ptr = r;
3775		s->nat_rule.ptr = nr;
3776		if (s->nat_rule.ptr != NULL)
3777			s->nat_rule.ptr->states++;
3778		s->anchor.ptr = a;
3779		s->allow_opts = r->allow_opts;
3780		s->log = r->log & 2;
3781		s->proto = pd->proto;
3782		s->direction = direction;
3783		s->af = af;
3784		if (direction == PF_OUT) {
3785			PF_ACPY(&s->gwy.addr, saddr, af);
3786			PF_ACPY(&s->ext.addr, daddr, af);
3787			if (nr != NULL)
3788				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3789			else
3790				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3791		} else {
3792			PF_ACPY(&s->lan.addr, daddr, af);
3793			PF_ACPY(&s->ext.addr, saddr, af);
3794			if (nr != NULL)
3795				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3796			else
3797				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3798		}
3799		s->src.state = PFOTHERS_SINGLE;
3800		s->dst.state = PFOTHERS_NO_TRAFFIC;
3801#ifdef __FreeBSD__
3802		s->creation = time_second;
3803		s->expire = time_second;
3804#else
3805		s->creation = time.tv_sec;
3806		s->expire = time.tv_sec;
3807#endif
3808		s->timeout = PFTM_OTHER_FIRST_PACKET;
3809		pf_set_rt_ifp(s, saddr);
3810		if (sn != NULL) {
3811			s->src_node = sn;
3812			s->src_node->states++;
3813		}
3814		if (nsn != NULL) {
3815			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3816			s->nat_src_node = nsn;
3817			s->nat_src_node->states++;
3818		}
3819		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3820			REASON_SET(&reason, PFRES_MEMORY);
3821			pf_src_tree_remove_state(s);
3822			pool_put(&pf_state_pl, s);
3823			return (PF_DROP);
3824		} else
3825			*sm = s;
3826	}
3827
3828	return (PF_PASS);
3829}
3830
3831int
3832pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3833    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3834    struct pf_ruleset **rsm)
3835{
3836	struct pf_rule		*r, *a = NULL;
3837	struct pf_ruleset	*ruleset = NULL;
3838	sa_family_t		 af = pd->af;
3839	u_short			 reason;
3840	struct pf_tag		*pftag = NULL;
3841	int			 tag = -1;
3842
3843	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3844	while (r != NULL) {
3845		r->evaluations++;
3846		if (r->kif != NULL &&
3847		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3848			r = r->skip[PF_SKIP_IFP].ptr;
3849		else if (r->direction && r->direction != direction)
3850			r = r->skip[PF_SKIP_DIR].ptr;
3851		else if (r->af && r->af != af)
3852			r = r->skip[PF_SKIP_AF].ptr;
3853		else if (r->proto && r->proto != pd->proto)
3854			r = r->skip[PF_SKIP_PROTO].ptr;
3855		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3856			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3857		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3858			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3859		else if (r->tos && !(r->tos & pd->tos))
3860			r = TAILQ_NEXT(r, entries);
3861		else if (r->src.port_op || r->dst.port_op ||
3862		    r->flagset || r->type || r->code ||
3863		    r->os_fingerprint != PF_OSFP_ANY)
3864			r = TAILQ_NEXT(r, entries);
3865		else if (r->match_tag && !pf_match_tag(m, r, NULL, pftag, &tag))
3866			r = TAILQ_NEXT(r, entries);
3867		else if (r->anchorname[0] && r->anchor == NULL)
3868			r = TAILQ_NEXT(r, entries);
3869		else {
3870			if (r->anchor == NULL) {
3871				*rm = r;
3872				*am = a;
3873				*rsm = ruleset;
3874				if ((*rm)->quick)
3875					break;
3876				r = TAILQ_NEXT(r, entries);
3877			} else
3878				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3879				    PF_RULESET_FILTER);
3880		}
3881		if (r == NULL && a != NULL)
3882			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3883			    PF_RULESET_FILTER);
3884	}
3885	r = *rm;
3886	a = *am;
3887	ruleset = *rsm;
3888
3889	REASON_SET(&reason, PFRES_MATCH);
3890
3891	if (r->log)
3892		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3893
3894	if (r->action != PF_PASS)
3895		return (PF_DROP);
3896
3897	if (pf_tag_packet(m, pftag, tag)) {
3898		REASON_SET(&reason, PFRES_MEMORY);
3899		return (PF_DROP);
3900	}
3901
3902	return (PF_PASS);
3903}
3904
3905int
3906pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3907    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3908    u_short *reason)
3909{
3910	struct pf_state		 key;
3911	struct tcphdr		*th = pd->hdr.tcp;
3912	u_int16_t		 win = ntohs(th->th_win);
3913	u_int32_t		 ack, end, seq;
3914	u_int8_t		 sws, dws;
3915	int			 ackskew;
3916	int			 copyback = 0;
3917	struct pf_state_peer	*src, *dst;
3918
3919	key.af = pd->af;
3920	key.proto = IPPROTO_TCP;
3921	if (direction == PF_IN)	{
3922		PF_ACPY(&key.ext.addr, pd->src, key.af);
3923		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3924		key.ext.port = th->th_sport;
3925		key.gwy.port = th->th_dport;
3926	} else {
3927		PF_ACPY(&key.lan.addr, pd->src, key.af);
3928		PF_ACPY(&key.ext.addr, pd->dst, key.af);
3929		key.lan.port = th->th_sport;
3930		key.ext.port = th->th_dport;
3931	}
3932
3933	STATE_LOOKUP();
3934
3935	if (direction == (*state)->direction) {
3936		src = &(*state)->src;
3937		dst = &(*state)->dst;
3938	} else {
3939		src = &(*state)->dst;
3940		dst = &(*state)->src;
3941	}
3942
3943	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3944		if (direction != (*state)->direction)
3945			return (PF_SYNPROXY_DROP);
3946		if (th->th_flags & TH_SYN) {
3947			if (ntohl(th->th_seq) != (*state)->src.seqlo)
3948				return (PF_DROP);
3949			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3950			    pd->src, th->th_dport, th->th_sport,
3951			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3952			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0);
3953			return (PF_SYNPROXY_DROP);
3954		} else if (!(th->th_flags & TH_ACK) ||
3955		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3956		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3957			return (PF_DROP);
3958		else
3959			(*state)->src.state = PF_TCPS_PROXY_DST;
3960	}
3961	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3962		struct pf_state_host *src, *dst;
3963
3964		if (direction == PF_OUT) {
3965			src = &(*state)->gwy;
3966			dst = &(*state)->ext;
3967		} else {
3968			src = &(*state)->ext;
3969			dst = &(*state)->lan;
3970		}
3971		if (direction == (*state)->direction) {
3972			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3973			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3974			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3975				return (PF_DROP);
3976			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3977			if ((*state)->dst.seqhi == 1)
3978				(*state)->dst.seqhi = arc4random();
3979			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3980			    &dst->addr, src->port, dst->port,
3981			    (*state)->dst.seqhi, 0, TH_SYN, 0,
3982			    (*state)->src.mss, 0);
3983			return (PF_SYNPROXY_DROP);
3984		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3985		    (TH_SYN|TH_ACK)) ||
3986		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1))
3987			return (PF_DROP);
3988		else {
3989			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3990			(*state)->dst.seqlo = ntohl(th->th_seq);
3991			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3992			    pd->src, th->th_dport, th->th_sport,
3993			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3994			    TH_ACK, (*state)->src.max_win, 0, 0);
3995			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3996			    &dst->addr, src->port, dst->port,
3997			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3998			    TH_ACK, (*state)->dst.max_win, 0, 0);
3999			(*state)->src.seqdiff = (*state)->dst.seqhi -
4000			    (*state)->src.seqlo;
4001			(*state)->dst.seqdiff = (*state)->src.seqhi -
4002			    (*state)->dst.seqlo;
4003			(*state)->src.seqhi = (*state)->src.seqlo +
4004			    (*state)->src.max_win;
4005			(*state)->dst.seqhi = (*state)->dst.seqlo +
4006			    (*state)->dst.max_win;
4007			(*state)->src.wscale = (*state)->dst.wscale = 0;
4008			(*state)->src.state = (*state)->dst.state =
4009			    TCPS_ESTABLISHED;
4010			return (PF_SYNPROXY_DROP);
4011		}
4012	}
4013
4014	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4015		sws = src->wscale & PF_WSCALE_MASK;
4016		dws = dst->wscale & PF_WSCALE_MASK;
4017	} else
4018		sws = dws = 0;
4019
4020	/*
4021	 * Sequence tracking algorithm from Guido van Rooij's paper:
4022	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4023	 *	tcp_filtering.ps
4024	 */
4025
4026	seq = ntohl(th->th_seq);
4027	if (src->seqlo == 0) {
4028		/* First packet from this end. Set its state */
4029
4030		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4031		    src->scrub == NULL) {
4032			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4033				REASON_SET(reason, PFRES_MEMORY);
4034				return (PF_DROP);
4035			}
4036		}
4037
4038		/* Deferred generation of sequence number modulator */
4039		if (dst->seqdiff && !src->seqdiff) {
4040			while ((src->seqdiff = arc4random()) == 0)
4041				;
4042			ack = ntohl(th->th_ack) - dst->seqdiff;
4043			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4044			    src->seqdiff), 0);
4045			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4046			copyback = 1;
4047		} else {
4048			ack = ntohl(th->th_ack);
4049		}
4050
4051		end = seq + pd->p_len;
4052		if (th->th_flags & TH_SYN) {
4053			end++;
4054			if (dst->wscale & PF_WSCALE_FLAG) {
4055				src->wscale = pf_get_wscale(m, off, th->th_off,
4056				    pd->af);
4057				if (src->wscale & PF_WSCALE_FLAG) {
4058					/* Remove scale factor from initial
4059					 * window */
4060					sws = src->wscale & PF_WSCALE_MASK;
4061					win = ((u_int32_t)win + (1 << sws) - 1)
4062					    >> sws;
4063					dws = dst->wscale & PF_WSCALE_MASK;
4064				} else {
4065					/* fixup other window */
4066					dst->max_win <<= dst->wscale &
4067					    PF_WSCALE_MASK;
4068					/* in case of a retrans SYN|ACK */
4069					dst->wscale = 0;
4070				}
4071			}
4072		}
4073		if (th->th_flags & TH_FIN)
4074			end++;
4075
4076		src->seqlo = seq;
4077		if (src->state < TCPS_SYN_SENT)
4078			src->state = TCPS_SYN_SENT;
4079
4080		/*
4081		 * May need to slide the window (seqhi may have been set by
4082		 * the crappy stack check or if we picked up the connection
4083		 * after establishment)
4084		 */
4085		if (src->seqhi == 1 ||
4086		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4087			src->seqhi = end + MAX(1, dst->max_win << dws);
4088		if (win > src->max_win)
4089			src->max_win = win;
4090
4091	} else {
4092		ack = ntohl(th->th_ack) - dst->seqdiff;
4093		if (src->seqdiff) {
4094			/* Modulate sequence numbers */
4095			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4096			    src->seqdiff), 0);
4097			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4098			copyback = 1;
4099		}
4100		end = seq + pd->p_len;
4101		if (th->th_flags & TH_SYN)
4102			end++;
4103		if (th->th_flags & TH_FIN)
4104			end++;
4105	}
4106
4107	if ((th->th_flags & TH_ACK) == 0) {
4108		/* Let it pass through the ack skew check */
4109		ack = dst->seqlo;
4110	} else if ((ack == 0 &&
4111	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4112	    /* broken tcp stacks do not set ack */
4113	    (dst->state < TCPS_SYN_SENT)) {
4114		/*
4115		 * Many stacks (ours included) will set the ACK number in an
4116		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4117		 */
4118		ack = dst->seqlo;
4119	}
4120
4121	if (seq == end) {
4122		/* Ease sequencing restrictions on no data packets */
4123		seq = src->seqlo;
4124		end = seq;
4125	}
4126
4127	ackskew = dst->seqlo - ack;
4128
4129#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4130	if (SEQ_GEQ(src->seqhi, end) &&
4131	    /* Last octet inside other's window space */
4132	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4133	    /* Retrans: not more than one window back */
4134	    (ackskew >= -MAXACKWINDOW) &&
4135	    /* Acking not more than one reassembled fragment backwards */
4136	    (ackskew <= (MAXACKWINDOW << sws))) {
4137	    /* Acking not more than one window forward */
4138
4139		/* update max window */
4140		if (src->max_win < win)
4141			src->max_win = win;
4142		/* synchronize sequencing */
4143		if (SEQ_GT(end, src->seqlo))
4144			src->seqlo = end;
4145		/* slide the window of what the other end can send */
4146		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4147			dst->seqhi = ack + MAX((win << sws), 1);
4148
4149
4150		/* update states */
4151		if (th->th_flags & TH_SYN)
4152			if (src->state < TCPS_SYN_SENT)
4153				src->state = TCPS_SYN_SENT;
4154		if (th->th_flags & TH_FIN)
4155			if (src->state < TCPS_CLOSING)
4156				src->state = TCPS_CLOSING;
4157		if (th->th_flags & TH_ACK) {
4158			if (dst->state == TCPS_SYN_SENT)
4159				dst->state = TCPS_ESTABLISHED;
4160			else if (dst->state == TCPS_CLOSING)
4161				dst->state = TCPS_FIN_WAIT_2;
4162		}
4163		if (th->th_flags & TH_RST)
4164			src->state = dst->state = TCPS_TIME_WAIT;
4165
4166		/* update expire time */
4167#ifdef __FreeBSD__
4168		(*state)->expire = time_second;
4169#else
4170		(*state)->expire = time.tv_sec;
4171#endif
4172		if (src->state >= TCPS_FIN_WAIT_2 &&
4173		    dst->state >= TCPS_FIN_WAIT_2)
4174			(*state)->timeout = PFTM_TCP_CLOSED;
4175		else if (src->state >= TCPS_FIN_WAIT_2 ||
4176		    dst->state >= TCPS_FIN_WAIT_2)
4177			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4178		else if (src->state < TCPS_ESTABLISHED ||
4179		    dst->state < TCPS_ESTABLISHED)
4180			(*state)->timeout = PFTM_TCP_OPENING;
4181		else if (src->state >= TCPS_CLOSING ||
4182		    dst->state >= TCPS_CLOSING)
4183			(*state)->timeout = PFTM_TCP_CLOSING;
4184		else
4185			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4186
4187		/* Fall through to PASS packet */
4188
4189	} else if ((dst->state < TCPS_SYN_SENT ||
4190		dst->state >= TCPS_FIN_WAIT_2 ||
4191		src->state >= TCPS_FIN_WAIT_2) &&
4192	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4193	    /* Within a window forward of the originating packet */
4194	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4195	    /* Within a window backward of the originating packet */
4196
4197		/*
4198		 * This currently handles three situations:
4199		 *  1) Stupid stacks will shotgun SYNs before their peer
4200		 *     replies.
4201		 *  2) When PF catches an already established stream (the
4202		 *     firewall rebooted, the state table was flushed, routes
4203		 *     changed...)
4204		 *  3) Packets get funky immediately after the connection
4205		 *     closes (this should catch Solaris spurious ACK|FINs
4206		 *     that web servers like to spew after a close)
4207		 *
4208		 * This must be a little more careful than the above code
4209		 * since packet floods will also be caught here. We don't
4210		 * update the TTL here to mitigate the damage of a packet
4211		 * flood and so the same code can handle awkward establishment
4212		 * and a loosened connection close.
4213		 * In the establishment case, a correct peer response will
4214		 * validate the connection, go through the normal state code
4215		 * and keep updating the state TTL.
4216		 */
4217
4218		if (pf_status.debug >= PF_DEBUG_MISC) {
4219			printf("pf: loose state match: ");
4220			pf_print_state(*state);
4221			pf_print_flags(th->th_flags);
4222			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
4223			    seq, ack, pd->p_len, ackskew,
4224			    (*state)->packets[0], (*state)->packets[1]);
4225		}
4226
4227		/* update max window */
4228		if (src->max_win < win)
4229			src->max_win = win;
4230		/* synchronize sequencing */
4231		if (SEQ_GT(end, src->seqlo))
4232			src->seqlo = end;
4233		/* slide the window of what the other end can send */
4234		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4235			dst->seqhi = ack + MAX((win << sws), 1);
4236
4237		/*
4238		 * Cannot set dst->seqhi here since this could be a shotgunned
4239		 * SYN and not an already established connection.
4240		 */
4241
4242		if (th->th_flags & TH_FIN)
4243			if (src->state < TCPS_CLOSING)
4244				src->state = TCPS_CLOSING;
4245		if (th->th_flags & TH_RST)
4246			src->state = dst->state = TCPS_TIME_WAIT;
4247
4248		/* Fall through to PASS packet */
4249
4250	} else {
4251		if ((*state)->dst.state == TCPS_SYN_SENT &&
4252		    (*state)->src.state == TCPS_SYN_SENT) {
4253			/* Send RST for state mismatches during handshake */
4254			if (!(th->th_flags & TH_RST)) {
4255				u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
4256
4257				if (th->th_flags & TH_SYN)
4258					ack++;
4259				if (th->th_flags & TH_FIN)
4260					ack++;
4261				pf_send_tcp((*state)->rule.ptr, pd->af,
4262				    pd->dst, pd->src, th->th_dport,
4263				    th->th_sport, ntohl(th->th_ack), ack,
4264				    TH_RST|TH_ACK, 0, 0,
4265				    (*state)->rule.ptr->return_ttl);
4266			}
4267			src->seqlo = 0;
4268			src->seqhi = 1;
4269			src->max_win = 1;
4270		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4271			printf("pf: BAD state: ");
4272			pf_print_state(*state);
4273			pf_print_flags(th->th_flags);
4274			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
4275			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
4276			    (*state)->packets[0], (*state)->packets[1],
4277			    direction == PF_IN ? "in" : "out",
4278			    direction == (*state)->direction ? "fwd" : "rev");
4279			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4280			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4281			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4282			    ' ': '2',
4283			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4284			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4285			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4286			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4287		}
4288		return (PF_DROP);
4289	}
4290
4291	if (dst->scrub || src->scrub) {
4292		if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4293		    src, dst, &copyback))
4294			return (PF_DROP);
4295	}
4296
4297	/* Any packets which have gotten here are to be passed */
4298
4299	/* translate source/destination address, if necessary */
4300	if (STATE_TRANSLATE(*state)) {
4301		if (direction == PF_OUT)
4302			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4303			    &th->th_sum, &(*state)->gwy.addr,
4304			    (*state)->gwy.port, 0, pd->af);
4305		else
4306			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4307			    &th->th_sum, &(*state)->lan.addr,
4308			    (*state)->lan.port, 0, pd->af);
4309		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4310	} else if (copyback) {
4311		/* Copyback sequence modulation or stateful scrub changes */
4312		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4313	}
4314
4315	return (PF_PASS);
4316}
4317
4318int
4319pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4320    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4321{
4322	struct pf_state_peer	*src, *dst;
4323	struct pf_state		 key;
4324	struct udphdr		*uh = pd->hdr.udp;
4325
4326	key.af = pd->af;
4327	key.proto = IPPROTO_UDP;
4328	if (direction == PF_IN)	{
4329		PF_ACPY(&key.ext.addr, pd->src, key.af);
4330		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4331		key.ext.port = uh->uh_sport;
4332		key.gwy.port = uh->uh_dport;
4333	} else {
4334		PF_ACPY(&key.lan.addr, pd->src, key.af);
4335		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4336		key.lan.port = uh->uh_sport;
4337		key.ext.port = uh->uh_dport;
4338	}
4339
4340	STATE_LOOKUP();
4341
4342	if (direction == (*state)->direction) {
4343		src = &(*state)->src;
4344		dst = &(*state)->dst;
4345	} else {
4346		src = &(*state)->dst;
4347		dst = &(*state)->src;
4348	}
4349
4350	/* update states */
4351	if (src->state < PFUDPS_SINGLE)
4352		src->state = PFUDPS_SINGLE;
4353	if (dst->state == PFUDPS_SINGLE)
4354		dst->state = PFUDPS_MULTIPLE;
4355
4356	/* update expire time */
4357#ifdef __FreeBSD__
4358	(*state)->expire = time_second;
4359#else
4360	(*state)->expire = time.tv_sec;
4361#endif
4362	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4363		(*state)->timeout = PFTM_UDP_MULTIPLE;
4364	else
4365		(*state)->timeout = PFTM_UDP_SINGLE;
4366
4367	/* translate source/destination address, if necessary */
4368	if (STATE_TRANSLATE(*state)) {
4369		if (direction == PF_OUT)
4370			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4371			    &uh->uh_sum, &(*state)->gwy.addr,
4372			    (*state)->gwy.port, 1, pd->af);
4373		else
4374			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4375			    &uh->uh_sum, &(*state)->lan.addr,
4376			    (*state)->lan.port, 1, pd->af);
4377		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4378	}
4379
4380	return (PF_PASS);
4381}
4382
4383int
4384pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4385    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4386{
4387	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4388	u_int16_t	 icmpid = 0;		/* make the compiler happy */
4389	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
4390	u_int8_t	 icmptype = 0;		/* make the compiler happy */
4391	int		 state_icmp = 0;
4392
4393	switch (pd->proto) {
4394#ifdef INET
4395	case IPPROTO_ICMP:
4396		icmptype = pd->hdr.icmp->icmp_type;
4397		icmpid = pd->hdr.icmp->icmp_id;
4398		icmpsum = &pd->hdr.icmp->icmp_cksum;
4399
4400		if (icmptype == ICMP_UNREACH ||
4401		    icmptype == ICMP_SOURCEQUENCH ||
4402		    icmptype == ICMP_REDIRECT ||
4403		    icmptype == ICMP_TIMXCEED ||
4404		    icmptype == ICMP_PARAMPROB)
4405			state_icmp++;
4406		break;
4407#endif /* INET */
4408#ifdef INET6
4409	case IPPROTO_ICMPV6:
4410		icmptype = pd->hdr.icmp6->icmp6_type;
4411		icmpid = pd->hdr.icmp6->icmp6_id;
4412		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4413
4414		if (icmptype == ICMP6_DST_UNREACH ||
4415		    icmptype == ICMP6_PACKET_TOO_BIG ||
4416		    icmptype == ICMP6_TIME_EXCEEDED ||
4417		    icmptype == ICMP6_PARAM_PROB)
4418			state_icmp++;
4419		break;
4420#endif /* INET6 */
4421	}
4422
4423	if (!state_icmp) {
4424
4425		/*
4426		 * ICMP query/reply message not related to a TCP/UDP packet.
4427		 * Search for an ICMP state.
4428		 */
4429		struct pf_state		key;
4430
4431		key.af = pd->af;
4432		key.proto = pd->proto;
4433		if (direction == PF_IN)	{
4434			PF_ACPY(&key.ext.addr, pd->src, key.af);
4435			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4436			key.ext.port = icmpid;
4437			key.gwy.port = icmpid;
4438		} else {
4439			PF_ACPY(&key.lan.addr, pd->src, key.af);
4440			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4441			key.lan.port = icmpid;
4442			key.ext.port = icmpid;
4443		}
4444
4445		STATE_LOOKUP();
4446
4447#ifdef __FreeBSD__
4448		(*state)->expire = time_second;
4449#else
4450		(*state)->expire = time.tv_sec;
4451#endif
4452		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4453
4454		/* translate source/destination address, if necessary */
4455		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4456			if (direction == PF_OUT) {
4457				switch (pd->af) {
4458#ifdef INET
4459				case AF_INET:
4460					pf_change_a(&saddr->v4.s_addr,
4461					    pd->ip_sum,
4462					    (*state)->gwy.addr.v4.s_addr, 0);
4463					break;
4464#endif /* INET */
4465#ifdef INET6
4466				case AF_INET6:
4467					pf_change_a6(saddr,
4468					    &pd->hdr.icmp6->icmp6_cksum,
4469					    &(*state)->gwy.addr, 0);
4470					m_copyback(m, off,
4471					    sizeof(struct icmp6_hdr),
4472					    (caddr_t)pd->hdr.icmp6);
4473					break;
4474#endif /* INET6 */
4475				}
4476			} else {
4477				switch (pd->af) {
4478#ifdef INET
4479				case AF_INET:
4480					pf_change_a(&daddr->v4.s_addr,
4481					    pd->ip_sum,
4482					    (*state)->lan.addr.v4.s_addr, 0);
4483					break;
4484#endif /* INET */
4485#ifdef INET6
4486				case AF_INET6:
4487					pf_change_a6(daddr,
4488					    &pd->hdr.icmp6->icmp6_cksum,
4489					    &(*state)->lan.addr, 0);
4490					m_copyback(m, off,
4491					    sizeof(struct icmp6_hdr),
4492					    (caddr_t)pd->hdr.icmp6);
4493					break;
4494#endif /* INET6 */
4495				}
4496			}
4497		}
4498
4499		return (PF_PASS);
4500
4501	} else {
4502		/*
4503		 * ICMP error message in response to a TCP/UDP packet.
4504		 * Extract the inner TCP/UDP header and search for that state.
4505		 */
4506
4507		struct pf_pdesc	pd2;
4508#ifdef INET
4509		struct ip	h2;
4510#endif /* INET */
4511#ifdef INET6
4512		struct ip6_hdr	h2_6;
4513		int		terminal = 0;
4514#endif /* INET6 */
4515		int		ipoff2 = 0;	/* make the compiler happy */
4516		int		off2 = 0;	/* make the compiler happy */
4517
4518		pd2.af = pd->af;
4519		switch (pd->af) {
4520#ifdef INET
4521		case AF_INET:
4522			/* offset of h2 in mbuf chain */
4523			ipoff2 = off + ICMP_MINLEN;
4524
4525			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4526			    NULL, NULL, pd2.af)) {
4527				DPFPRINTF(PF_DEBUG_MISC,
4528				    ("pf: ICMP error message too short "
4529				    "(ip)\n"));
4530				return (PF_DROP);
4531			}
4532			/*
4533			 * ICMP error messages don't refer to non-first
4534			 * fragments
4535			 */
4536			if (h2.ip_off & htons(IP_OFFMASK))
4537				return (PF_DROP);
4538
4539			/* offset of protocol header that follows h2 */
4540			off2 = ipoff2 + (h2.ip_hl << 2);
4541
4542			pd2.proto = h2.ip_p;
4543			pd2.src = (struct pf_addr *)&h2.ip_src;
4544			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4545			pd2.ip_sum = &h2.ip_sum;
4546			break;
4547#endif /* INET */
4548#ifdef INET6
4549		case AF_INET6:
4550			ipoff2 = off + sizeof(struct icmp6_hdr);
4551
4552			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4553			    NULL, NULL, pd2.af)) {
4554				DPFPRINTF(PF_DEBUG_MISC,
4555				    ("pf: ICMP error message too short "
4556				    "(ip6)\n"));
4557				return (PF_DROP);
4558			}
4559			pd2.proto = h2_6.ip6_nxt;
4560			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4561			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4562			pd2.ip_sum = NULL;
4563			off2 = ipoff2 + sizeof(h2_6);
4564			do {
4565				switch (pd2.proto) {
4566				case IPPROTO_FRAGMENT:
4567					/*
4568					 * ICMPv6 error messages for
4569					 * non-first fragments
4570					 */
4571					return (PF_DROP);
4572				case IPPROTO_AH:
4573				case IPPROTO_HOPOPTS:
4574				case IPPROTO_ROUTING:
4575				case IPPROTO_DSTOPTS: {
4576					/* get next header and header length */
4577					struct ip6_ext opt6;
4578
4579					if (!pf_pull_hdr(m, off2, &opt6,
4580					    sizeof(opt6), NULL, NULL, pd2.af)) {
4581						DPFPRINTF(PF_DEBUG_MISC,
4582						    ("pf: ICMPv6 short opt\n"));
4583						return (PF_DROP);
4584					}
4585					if (pd2.proto == IPPROTO_AH)
4586						off2 += (opt6.ip6e_len + 2) * 4;
4587					else
4588						off2 += (opt6.ip6e_len + 1) * 8;
4589					pd2.proto = opt6.ip6e_nxt;
4590					/* goto the next header */
4591					break;
4592				}
4593				default:
4594					terminal++;
4595					break;
4596				}
4597			} while (!terminal);
4598			break;
4599#endif /* INET6 */
4600		}
4601
4602		switch (pd2.proto) {
4603		case IPPROTO_TCP: {
4604			struct tcphdr		 th;
4605			u_int32_t		 seq;
4606			struct pf_state		 key;
4607			struct pf_state_peer	*src, *dst;
4608			u_int8_t		 dws;
4609			int			 copyback = 0;
4610
4611			/*
4612			 * Only the first 8 bytes of the TCP header can be
4613			 * expected. Don't access any TCP header fields after
4614			 * th_seq, an ackskew test is not possible.
4615			 */
4616			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) {
4617				DPFPRINTF(PF_DEBUG_MISC,
4618				    ("pf: ICMP error message too short "
4619				    "(tcp)\n"));
4620				return (PF_DROP);
4621			}
4622
4623			key.af = pd2.af;
4624			key.proto = IPPROTO_TCP;
4625			if (direction == PF_IN)	{
4626				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4627				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4628				key.ext.port = th.th_dport;
4629				key.gwy.port = th.th_sport;
4630			} else {
4631				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4632				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4633				key.lan.port = th.th_dport;
4634				key.ext.port = th.th_sport;
4635			}
4636
4637			STATE_LOOKUP();
4638
4639			if (direction == (*state)->direction) {
4640				src = &(*state)->dst;
4641				dst = &(*state)->src;
4642			} else {
4643				src = &(*state)->src;
4644				dst = &(*state)->dst;
4645			}
4646
4647			if (src->wscale && dst->wscale &&
4648			    !(th.th_flags & TH_SYN))
4649				dws = dst->wscale & PF_WSCALE_MASK;
4650			else
4651				dws = 0;
4652
4653			/* Demodulate sequence number */
4654			seq = ntohl(th.th_seq) - src->seqdiff;
4655			if (src->seqdiff) {
4656				pf_change_a(&th.th_seq, icmpsum,
4657				    htonl(seq), 0);
4658				copyback = 1;
4659			}
4660
4661			if (!SEQ_GEQ(src->seqhi, seq) ||
4662			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4663				if (pf_status.debug >= PF_DEBUG_MISC) {
4664					printf("pf: BAD ICMP %d:%d ",
4665					    icmptype, pd->hdr.icmp->icmp_code);
4666					pf_print_host(pd->src, 0, pd->af);
4667					printf(" -> ");
4668					pf_print_host(pd->dst, 0, pd->af);
4669					printf(" state: ");
4670					pf_print_state(*state);
4671					printf(" seq=%u\n", seq);
4672				}
4673				return (PF_DROP);
4674			}
4675
4676			if (STATE_TRANSLATE(*state)) {
4677				if (direction == PF_IN) {
4678					pf_change_icmp(pd2.src, &th.th_sport,
4679					    daddr, &(*state)->lan.addr,
4680					    (*state)->lan.port, NULL,
4681					    pd2.ip_sum, icmpsum,
4682					    pd->ip_sum, 0, pd2.af);
4683				} else {
4684					pf_change_icmp(pd2.dst, &th.th_dport,
4685					    saddr, &(*state)->gwy.addr,
4686					    (*state)->gwy.port, NULL,
4687					    pd2.ip_sum, icmpsum,
4688					    pd->ip_sum, 0, pd2.af);
4689				}
4690				copyback = 1;
4691			}
4692
4693			if (copyback) {
4694				switch (pd2.af) {
4695#ifdef INET
4696				case AF_INET:
4697					m_copyback(m, off, ICMP_MINLEN,
4698					    (caddr_t)pd->hdr.icmp);
4699					m_copyback(m, ipoff2, sizeof(h2),
4700					    (caddr_t)&h2);
4701					break;
4702#endif /* INET */
4703#ifdef INET6
4704				case AF_INET6:
4705					m_copyback(m, off,
4706					    sizeof(struct icmp6_hdr),
4707					    (caddr_t)pd->hdr.icmp6);
4708					m_copyback(m, ipoff2, sizeof(h2_6),
4709					    (caddr_t)&h2_6);
4710					break;
4711#endif /* INET6 */
4712				}
4713				m_copyback(m, off2, 8, (caddr_t)&th);
4714			}
4715
4716			return (PF_PASS);
4717			break;
4718		}
4719		case IPPROTO_UDP: {
4720			struct udphdr		uh;
4721			struct pf_state		key;
4722
4723			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4724			    NULL, NULL, pd2.af)) {
4725				DPFPRINTF(PF_DEBUG_MISC,
4726				    ("pf: ICMP error message too short "
4727				    "(udp)\n"));
4728				return (PF_DROP);
4729			}
4730
4731			key.af = pd2.af;
4732			key.proto = IPPROTO_UDP;
4733			if (direction == PF_IN)	{
4734				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4735				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4736				key.ext.port = uh.uh_dport;
4737				key.gwy.port = uh.uh_sport;
4738			} else {
4739				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4740				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4741				key.lan.port = uh.uh_dport;
4742				key.ext.port = uh.uh_sport;
4743			}
4744
4745			STATE_LOOKUP();
4746
4747			if (STATE_TRANSLATE(*state)) {
4748				if (direction == PF_IN) {
4749					pf_change_icmp(pd2.src, &uh.uh_sport,
4750					    daddr, &(*state)->lan.addr,
4751					    (*state)->lan.port, &uh.uh_sum,
4752					    pd2.ip_sum, icmpsum,
4753					    pd->ip_sum, 1, pd2.af);
4754				} else {
4755					pf_change_icmp(pd2.dst, &uh.uh_dport,
4756					    saddr, &(*state)->gwy.addr,
4757					    (*state)->gwy.port, &uh.uh_sum,
4758					    pd2.ip_sum, icmpsum,
4759					    pd->ip_sum, 1, pd2.af);
4760				}
4761				switch (pd2.af) {
4762#ifdef INET
4763				case AF_INET:
4764					m_copyback(m, off, ICMP_MINLEN,
4765					    (caddr_t)pd->hdr.icmp);
4766					m_copyback(m, ipoff2, sizeof(h2),
4767					    (caddr_t)&h2);
4768					break;
4769#endif /* INET */
4770#ifdef INET6
4771				case AF_INET6:
4772					m_copyback(m, off,
4773					    sizeof(struct icmp6_hdr),
4774					    (caddr_t)pd->hdr.icmp6);
4775					m_copyback(m, ipoff2, sizeof(h2_6),
4776					    (caddr_t)&h2_6);
4777					break;
4778#endif /* INET6 */
4779				}
4780				m_copyback(m, off2, sizeof(uh),
4781				    (caddr_t)&uh);
4782			}
4783
4784			return (PF_PASS);
4785			break;
4786		}
4787#ifdef INET
4788		case IPPROTO_ICMP: {
4789			struct icmp		iih;
4790			struct pf_state		key;
4791
4792			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4793			    NULL, NULL, pd2.af)) {
4794				DPFPRINTF(PF_DEBUG_MISC,
4795				    ("pf: ICMP error message too short i"
4796				    "(icmp)\n"));
4797				return (PF_DROP);
4798			}
4799
4800			key.af = pd2.af;
4801			key.proto = IPPROTO_ICMP;
4802			if (direction == PF_IN)	{
4803				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4804				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4805				key.ext.port = iih.icmp_id;
4806				key.gwy.port = iih.icmp_id;
4807			} else {
4808				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4809				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4810				key.lan.port = iih.icmp_id;
4811				key.ext.port = iih.icmp_id;
4812			}
4813
4814			STATE_LOOKUP();
4815
4816			if (STATE_TRANSLATE(*state)) {
4817				if (direction == PF_IN) {
4818					pf_change_icmp(pd2.src, &iih.icmp_id,
4819					    daddr, &(*state)->lan.addr,
4820					    (*state)->lan.port, NULL,
4821					    pd2.ip_sum, icmpsum,
4822					    pd->ip_sum, 0, AF_INET);
4823				} else {
4824					pf_change_icmp(pd2.dst, &iih.icmp_id,
4825					    saddr, &(*state)->gwy.addr,
4826					    (*state)->gwy.port, NULL,
4827					    pd2.ip_sum, icmpsum,
4828					    pd->ip_sum, 0, AF_INET);
4829				}
4830				m_copyback(m, off, ICMP_MINLEN,
4831				    (caddr_t)pd->hdr.icmp);
4832				m_copyback(m, ipoff2, sizeof(h2),
4833				    (caddr_t)&h2);
4834				m_copyback(m, off2, ICMP_MINLEN,
4835				    (caddr_t)&iih);
4836			}
4837
4838			return (PF_PASS);
4839			break;
4840		}
4841#endif /* INET */
4842#ifdef INET6
4843		case IPPROTO_ICMPV6: {
4844			struct icmp6_hdr	iih;
4845			struct pf_state		key;
4846
4847			if (!pf_pull_hdr(m, off2, &iih,
4848			    sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) {
4849				DPFPRINTF(PF_DEBUG_MISC,
4850				    ("pf: ICMP error message too short "
4851				    "(icmp6)\n"));
4852				return (PF_DROP);
4853			}
4854
4855			key.af = pd2.af;
4856			key.proto = IPPROTO_ICMPV6;
4857			if (direction == PF_IN)	{
4858				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4859				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4860				key.ext.port = iih.icmp6_id;
4861				key.gwy.port = iih.icmp6_id;
4862			} else {
4863				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4864				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4865				key.lan.port = iih.icmp6_id;
4866				key.ext.port = iih.icmp6_id;
4867			}
4868
4869			STATE_LOOKUP();
4870
4871			if (STATE_TRANSLATE(*state)) {
4872				if (direction == PF_IN) {
4873					pf_change_icmp(pd2.src, &iih.icmp6_id,
4874					    daddr, &(*state)->lan.addr,
4875					    (*state)->lan.port, NULL,
4876					    pd2.ip_sum, icmpsum,
4877					    pd->ip_sum, 0, AF_INET6);
4878				} else {
4879					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4880					    saddr, &(*state)->gwy.addr,
4881					    (*state)->gwy.port, NULL,
4882					    pd2.ip_sum, icmpsum,
4883					    pd->ip_sum, 0, AF_INET6);
4884				}
4885				m_copyback(m, off, sizeof(struct icmp6_hdr),
4886				    (caddr_t)pd->hdr.icmp6);
4887				m_copyback(m, ipoff2, sizeof(h2_6),
4888				    (caddr_t)&h2_6);
4889				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4890				    (caddr_t)&iih);
4891			}
4892
4893			return (PF_PASS);
4894			break;
4895		}
4896#endif /* INET6 */
4897		default: {
4898			struct pf_state		key;
4899
4900			key.af = pd2.af;
4901			key.proto = pd2.proto;
4902			if (direction == PF_IN)	{
4903				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4904				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4905				key.ext.port = 0;
4906				key.gwy.port = 0;
4907			} else {
4908				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4909				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4910				key.lan.port = 0;
4911				key.ext.port = 0;
4912			}
4913
4914			STATE_LOOKUP();
4915
4916			if (STATE_TRANSLATE(*state)) {
4917				if (direction == PF_IN) {
4918					pf_change_icmp(pd2.src, NULL,
4919					    daddr, &(*state)->lan.addr,
4920					    0, NULL,
4921					    pd2.ip_sum, icmpsum,
4922					    pd->ip_sum, 0, pd2.af);
4923				} else {
4924					pf_change_icmp(pd2.dst, NULL,
4925					    saddr, &(*state)->gwy.addr,
4926					    0, NULL,
4927					    pd2.ip_sum, icmpsum,
4928					    pd->ip_sum, 0, pd2.af);
4929				}
4930				switch (pd2.af) {
4931#ifdef INET
4932				case AF_INET:
4933					m_copyback(m, off, ICMP_MINLEN,
4934					    (caddr_t)pd->hdr.icmp);
4935					m_copyback(m, ipoff2, sizeof(h2),
4936					    (caddr_t)&h2);
4937					break;
4938#endif /* INET */
4939#ifdef INET6
4940				case AF_INET6:
4941					m_copyback(m, off,
4942					    sizeof(struct icmp6_hdr),
4943					    (caddr_t)pd->hdr.icmp6);
4944					m_copyback(m, ipoff2, sizeof(h2_6),
4945					    (caddr_t)&h2_6);
4946					break;
4947#endif /* INET6 */
4948				}
4949			}
4950
4951			return (PF_PASS);
4952			break;
4953		}
4954		}
4955	}
4956}
4957
4958int
4959pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4960    struct pf_pdesc *pd)
4961{
4962	struct pf_state_peer	*src, *dst;
4963	struct pf_state		 key;
4964
4965	key.af = pd->af;
4966	key.proto = pd->proto;
4967	if (direction == PF_IN)	{
4968		PF_ACPY(&key.ext.addr, pd->src, key.af);
4969		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4970		key.ext.port = 0;
4971		key.gwy.port = 0;
4972	} else {
4973		PF_ACPY(&key.lan.addr, pd->src, key.af);
4974		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4975		key.lan.port = 0;
4976		key.ext.port = 0;
4977	}
4978
4979	STATE_LOOKUP();
4980
4981	if (direction == (*state)->direction) {
4982		src = &(*state)->src;
4983		dst = &(*state)->dst;
4984	} else {
4985		src = &(*state)->dst;
4986		dst = &(*state)->src;
4987	}
4988
4989	/* update states */
4990	if (src->state < PFOTHERS_SINGLE)
4991		src->state = PFOTHERS_SINGLE;
4992	if (dst->state == PFOTHERS_SINGLE)
4993		dst->state = PFOTHERS_MULTIPLE;
4994
4995	/* update expire time */
4996#ifdef __FreeBSD__
4997	(*state)->expire = time_second;
4998#else
4999	(*state)->expire = time.tv_sec;
5000#endif
5001	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5002		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5003	else
5004		(*state)->timeout = PFTM_OTHER_SINGLE;
5005
5006	/* translate source/destination address, if necessary */
5007	if (STATE_TRANSLATE(*state)) {
5008		if (direction == PF_OUT)
5009			switch (pd->af) {
5010#ifdef INET
5011			case AF_INET:
5012				pf_change_a(&pd->src->v4.s_addr,
5013				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5014				    0);
5015				break;
5016#endif /* INET */
5017#ifdef INET6
5018			case AF_INET6:
5019				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5020				break;
5021#endif /* INET6 */
5022			}
5023		else
5024			switch (pd->af) {
5025#ifdef INET
5026			case AF_INET:
5027				pf_change_a(&pd->dst->v4.s_addr,
5028				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5029				    0);
5030				break;
5031#endif /* INET */
5032#ifdef INET6
5033			case AF_INET6:
5034				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5035				break;
5036#endif /* INET6 */
5037			}
5038	}
5039
5040	return (PF_PASS);
5041}
5042
5043/*
5044 * ipoff and off are measured from the start of the mbuf chain.
5045 * h must be at "ipoff" on the mbuf chain.
5046 */
5047void *
5048pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5049    u_short *actionp, u_short *reasonp, sa_family_t af)
5050{
5051	switch (af) {
5052#ifdef INET
5053	case AF_INET: {
5054		struct ip	*h = mtod(m, struct ip *);
5055		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5056
5057		if (fragoff) {
5058			if (fragoff >= len)
5059				ACTION_SET(actionp, PF_PASS);
5060			else {
5061				ACTION_SET(actionp, PF_DROP);
5062				REASON_SET(reasonp, PFRES_FRAG);
5063			}
5064			return (NULL);
5065		}
5066		if (m->m_pkthdr.len < off + len ||
5067		    ntohs(h->ip_len) < off + len) {
5068			ACTION_SET(actionp, PF_DROP);
5069			REASON_SET(reasonp, PFRES_SHORT);
5070			return (NULL);
5071		}
5072		break;
5073	}
5074#endif /* INET */
5075#ifdef INET6
5076	case AF_INET6: {
5077		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5078
5079		if (m->m_pkthdr.len < off + len ||
5080		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5081		    (unsigned)(off + len)) {
5082			ACTION_SET(actionp, PF_DROP);
5083			REASON_SET(reasonp, PFRES_SHORT);
5084			return (NULL);
5085		}
5086		break;
5087	}
5088#endif /* INET6 */
5089	}
5090	m_copydata(m, off, len, p);
5091	return (p);
5092}
5093
5094int
5095pf_routable(struct pf_addr *addr, sa_family_t af)
5096{
5097	struct sockaddr_in	*dst;
5098	struct route		 ro;
5099	int			 ret = 0;
5100
5101	bzero(&ro, sizeof(ro));
5102	dst = satosin(&ro.ro_dst);
5103	dst->sin_family = af;
5104	dst->sin_len = sizeof(*dst);
5105	dst->sin_addr = addr->v4;
5106#ifdef __FreeBSD__
5107#ifdef RTF_PRCLONING
5108	rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING));
5109#else /* !RTF_PRCLONING */
5110	rtalloc_ign(&ro, RTF_CLONING);
5111#endif
5112#else /* ! __FreeBSD__ */
5113	rtalloc_noclone(&ro, NO_CLONING);
5114#endif
5115
5116	if (ro.ro_rt != NULL) {
5117		ret = 1;
5118		RTFREE(ro.ro_rt);
5119	}
5120
5121	return (ret);
5122}
5123
5124#ifdef INET
5125
5126void
5127pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5128    struct pf_state *s)
5129{
5130	struct mbuf		*m0, *m1;
5131	struct m_tag		*mtag;
5132	struct route		 iproute;
5133	struct route		*ro = NULL;	/* XXX: was uninitialized */
5134	struct sockaddr_in	*dst;
5135	struct ip		*ip;
5136	struct ifnet		*ifp = NULL;
5137	struct pf_addr		 naddr;
5138	struct pf_src_node	*sn = NULL;
5139	int			 error = 0;
5140#ifdef __FreeBSD__
5141	int sw_csum;
5142#endif
5143
5144	if (m == NULL || *m == NULL || r == NULL ||
5145	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5146		panic("pf_route: invalid parameters");
5147
5148	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5149		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5150		    NULL) {
5151			m0 = *m;
5152			*m = NULL;
5153			goto bad;
5154		}
5155		*(char *)(mtag + 1) = 1;
5156		m_tag_prepend(*m, mtag);
5157	} else {
5158		if (*(char *)(mtag + 1) > 3) {
5159			m0 = *m;
5160			*m = NULL;
5161			goto bad;
5162		}
5163		(*(char *)(mtag + 1))++;
5164	}
5165
5166	if (r->rt == PF_DUPTO) {
5167#ifdef __FreeBSD__
5168		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5169#else
5170		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5171#endif
5172			return;
5173#ifdef __FreeBSD__
5174		if ((mtag = m_tag_copy(mtag, M_DONTWAIT)) == NULL)
5175#else
5176		if ((mtag = m_tag_copy(mtag)) == NULL)
5177#endif
5178			goto bad;
5179		m_tag_prepend(m0, mtag);
5180	} else {
5181		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5182			return;
5183		m0 = *m;
5184	}
5185
5186	if (m0->m_len < sizeof(struct ip))
5187		panic("pf_route: m0->m_len < sizeof(struct ip)");
5188	ip = mtod(m0, struct ip *);
5189
5190	ro = &iproute;
5191	bzero((caddr_t)ro, sizeof(*ro));
5192	dst = satosin(&ro->ro_dst);
5193	dst->sin_family = AF_INET;
5194	dst->sin_len = sizeof(*dst);
5195	dst->sin_addr = ip->ip_dst;
5196
5197	if (r->rt == PF_FASTROUTE) {
5198		rtalloc(ro);
5199		if (ro->ro_rt == 0) {
5200			ipstat.ips_noroute++;
5201			goto bad;
5202		}
5203
5204		ifp = ro->ro_rt->rt_ifp;
5205		ro->ro_rt->rt_use++;
5206
5207		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5208			dst = satosin(ro->ro_rt->rt_gateway);
5209	} else {
5210		if (TAILQ_EMPTY(&r->rpool.list))
5211			panic("pf_route: TAILQ_EMPTY(&r->rpool.list)");
5212		if (s == NULL) {
5213			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5214			    &naddr, NULL, &sn);
5215			if (!PF_AZERO(&naddr, AF_INET))
5216				dst->sin_addr.s_addr = naddr.v4.s_addr;
5217			ifp = r->rpool.cur->kif ?
5218			    r->rpool.cur->kif->pfik_ifp : NULL;
5219		} else {
5220			if (!PF_AZERO(&s->rt_addr, AF_INET))
5221				dst->sin_addr.s_addr =
5222				    s->rt_addr.v4.s_addr;
5223			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5224		}
5225	}
5226	if (ifp == NULL)
5227		goto bad;
5228
5229	if (oifp != ifp) {
5230#ifdef __FreeBSD__
5231		PF_UNLOCK();
5232		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) {
5233			PF_LOCK();
5234			goto bad;
5235		} else if (m0 == NULL) {
5236			PF_LOCK();
5237			goto done;
5238		}
5239		PF_LOCK();
5240#else
5241		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS)
5242			goto bad;
5243		else if (m0 == NULL)
5244			goto done;
5245#endif
5246		if (m0->m_len < sizeof(struct ip))
5247			panic("pf_route: m0->m_len < sizeof(struct ip)");
5248		ip = mtod(m0, struct ip *);
5249	}
5250
5251#ifdef __FreeBSD__
5252	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5253	m0->m_pkthdr.csum_flags |= CSUM_IP;
5254	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5255	if (sw_csum & CSUM_DELAY_DATA) {
5256		/*
5257		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5258		 */
5259		NTOHS(ip->ip_len);
5260		NTOHS(ip->ip_off);	 /* XXX: needed? */
5261		in_delayed_cksum(m0);
5262		HTONS(ip->ip_len);
5263		HTONS(ip->ip_off);
5264		sw_csum &= ~CSUM_DELAY_DATA;
5265	}
5266	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5267
5268	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5269	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5270		((ip->ip_off & htons(IP_DF)) == 0))) {
5271		/*
5272		 * ip->ip_len = htons(ip->ip_len);
5273		 * ip->ip_off = htons(ip->ip_off);
5274		 */
5275		ip->ip_sum = 0;
5276		if (sw_csum & CSUM_DELAY_IP) {
5277			/* From KAME */
5278			if (ip->ip_v == IPVERSION &&
5279			    (ip->ip_hl << 2) == sizeof(*ip)) {
5280				ip->ip_sum = in_cksum_hdr(ip);
5281			} else {
5282				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5283			}
5284		}
5285		PF_UNLOCK();
5286		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5287		PF_LOCK();
5288		goto done;
5289	}
5290
5291#else
5292	/* Copied from ip_output. */
5293#ifdef IPSEC
5294	/*
5295	 * If deferred crypto processing is needed, check that the
5296	 * interface supports it.
5297	 */
5298	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
5299	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
5300		/* Notify IPsec to do its own crypto. */
5301		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
5302		goto bad;
5303	}
5304#endif /* IPSEC */
5305
5306	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5307	if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
5308		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5309		    ifp->if_bridge != NULL) {
5310			in_delayed_cksum(m0);
5311			m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
5312		}
5313	} else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
5314		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5315		    ifp->if_bridge != NULL) {
5316			in_delayed_cksum(m0);
5317			m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
5318		}
5319	}
5320
5321	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5322		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5323		    ifp->if_bridge == NULL) {
5324			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5325			ipstat.ips_outhwcsum++;
5326		} else {
5327			ip->ip_sum = 0;
5328			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5329		}
5330		/* Update relevant hardware checksum stats for TCP/UDP */
5331		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5332			tcpstat.tcps_outhwcsum++;
5333		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5334			udpstat.udps_outhwcsum++;
5335		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5336		goto done;
5337	}
5338#endif
5339	/*
5340	 * Too large for interface; fragment if possible.
5341	 * Must be able to put at least 8 bytes per fragment.
5342	 */
5343	if (ip->ip_off & htons(IP_DF)) {
5344		ipstat.ips_cantfrag++;
5345		if (r->rt != PF_DUPTO) {
5346#ifdef __FreeBSD__
5347			/* icmp_error() expects host byte ordering */
5348			NTOHS(ip->ip_len);
5349			NTOHS(ip->ip_off);
5350			PF_UNLOCK();
5351#endif
5352			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5353			    ifp);
5354#ifdef __FreeBSD__
5355			PF_LOCK();
5356#endif
5357			goto done;
5358		} else
5359			goto bad;
5360	}
5361
5362	m1 = m0;
5363#ifdef __FreeBSD__
5364	/*
5365	 * XXX: is cheaper + less error prone than own function
5366	 */
5367	NTOHS(ip->ip_len);
5368	NTOHS(ip->ip_off);
5369	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5370#else
5371	error = ip_fragment(m0, ifp, ifp->if_mtu);
5372#endif
5373	if (error) {
5374#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
5375		m0 = NULL;
5376#endif
5377		goto bad;
5378	}
5379
5380	for (m0 = m1; m0; m0 = m1) {
5381		m1 = m0->m_nextpkt;
5382		m0->m_nextpkt = 0;
5383#ifdef __FreeBSD__
5384		if (error == 0) {
5385			PF_UNLOCK();
5386			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5387			    NULL);
5388			PF_LOCK();
5389		} else
5390#else
5391		if (error == 0)
5392			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5393			    NULL);
5394		else
5395#endif
5396			m_freem(m0);
5397	}
5398
5399	if (error == 0)
5400		ipstat.ips_fragmented++;
5401
5402done:
5403	if (r->rt != PF_DUPTO)
5404		*m = NULL;
5405	if (ro == &iproute && ro->ro_rt)
5406		RTFREE(ro->ro_rt);
5407	return;
5408
5409bad:
5410	m_freem(m0);
5411	goto done;
5412}
5413#endif /* INET */
5414
5415#ifdef INET6
5416void
5417pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5418    struct pf_state *s)
5419{
5420	struct mbuf		*m0;
5421	struct m_tag		*mtag;
5422	struct route_in6	 ip6route;
5423	struct route_in6	*ro;
5424	struct sockaddr_in6	*dst;
5425	struct ip6_hdr		*ip6;
5426	struct ifnet		*ifp = NULL;
5427	struct pf_addr		 naddr;
5428	struct pf_src_node	*sn = NULL;
5429	int			 error = 0;
5430
5431	if (m == NULL || *m == NULL || r == NULL ||
5432	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5433		panic("pf_route6: invalid parameters");
5434
5435	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5436		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5437		    NULL) {
5438			m0 = *m;
5439			*m = NULL;
5440			goto bad;
5441		}
5442		*(char *)(mtag + 1) = 1;
5443		m_tag_prepend(*m, mtag);
5444	} else {
5445		if (*(char *)(mtag + 1) > 3) {
5446			m0 = *m;
5447			*m = NULL;
5448			goto bad;
5449		}
5450		(*(char *)(mtag + 1))++;
5451	}
5452
5453	if (r->rt == PF_DUPTO) {
5454#ifdef __FreeBSD__
5455		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5456#else
5457		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5458#endif
5459			return;
5460#ifdef __FreeBSD__
5461		if ((mtag = m_tag_copy(mtag, M_DONTWAIT)) == NULL)
5462#else
5463		if ((mtag = m_tag_copy(mtag)) == NULL)
5464#endif
5465			goto bad;
5466		m_tag_prepend(m0, mtag);
5467	} else {
5468		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5469			return;
5470		m0 = *m;
5471	}
5472
5473	if (m0->m_len < sizeof(struct ip6_hdr))
5474		panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5475	ip6 = mtod(m0, struct ip6_hdr *);
5476
5477	ro = &ip6route;
5478	bzero((caddr_t)ro, sizeof(*ro));
5479	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5480	dst->sin6_family = AF_INET6;
5481	dst->sin6_len = sizeof(*dst);
5482	dst->sin6_addr = ip6->ip6_dst;
5483
5484	/* Cheat. */
5485	if (r->rt == PF_FASTROUTE) {
5486#ifdef __FreeBSD__
5487		m0->m_flags |= M_SKIP_FIREWALL;
5488		PF_UNLOCK();
5489		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5490		PF_LOCK();
5491#else
5492		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5493		if (mtag == NULL)
5494			goto bad;
5495		m_tag_prepend(m0, mtag);
5496		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5497#endif
5498		return;
5499	}
5500
5501	if (TAILQ_EMPTY(&r->rpool.list))
5502		panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)");
5503	if (s == NULL) {
5504		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5505		    &naddr, NULL, &sn);
5506		if (!PF_AZERO(&naddr, AF_INET6))
5507			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5508			    &naddr, AF_INET6);
5509		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5510	} else {
5511		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5512			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5513			    &s->rt_addr, AF_INET6);
5514		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5515	}
5516	if (ifp == NULL)
5517		goto bad;
5518
5519	if (oifp != ifp) {
5520#ifdef __FreeBSD__
5521		PF_UNLOCK();
5522		if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) {
5523			PF_LOCK();
5524			goto bad;
5525		} else if (m0 == NULL) {
5526			PF_LOCK();
5527			goto done;
5528		}
5529		PF_LOCK();
5530#else
5531		if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS)
5532			goto bad;
5533		else if (m0 == NULL)
5534			goto done;
5535#endif
5536		if (m0->m_len < sizeof(struct ip6_hdr))
5537			panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5538		ip6 = mtod(m0, struct ip6_hdr *);
5539	}
5540
5541	/*
5542	 * If the packet is too large for the outgoing interface,
5543	 * send back an icmp6 error.
5544	 */
5545	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5546		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5547	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5548#ifdef __FreeBSD__
5549		PF_UNLOCK();
5550#endif
5551		error = nd6_output(ifp, ifp, m0, dst, NULL);
5552#ifdef __FreeBSD__
5553		PF_LOCK();
5554#endif
5555	} else {
5556		in6_ifstat_inc(ifp, ifs6_in_toobig);
5557#ifdef __FreeBSD__
5558		if (r->rt != PF_DUPTO) {
5559			PF_UNLOCK();
5560			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5561			PF_LOCK();
5562		 } else
5563#else
5564		if (r->rt != PF_DUPTO)
5565			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5566		else
5567#endif
5568			goto bad;
5569	}
5570
5571done:
5572	if (r->rt != PF_DUPTO)
5573		*m = NULL;
5574	return;
5575
5576bad:
5577	m_freem(m0);
5578	goto done;
5579}
5580#endif /* INET6 */
5581
5582
5583#ifdef __FreeBSD__
5584/*
5585 * FreeBSD supports cksum offloads for the following drivers.
5586 *  em(4), fxp(4), gx(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
5587 *   ti(4), txp(4), xl(4)
5588 *
5589 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
5590 *  network driver performed cksum including pseudo header, need to verify
5591 *   csum_data
5592 * CSUM_DATA_VALID :
5593 *  network driver performed cksum, needs to additional pseudo header
5594 *  cksum computation with partial csum_data(i.e. lack of H/W support for
5595 *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
5596 *
5597 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
5598 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
5599 * TCP/UDP layer.
5600 * Also, set csum_data to 0xffff to force cksum validation.
5601 */
5602int
5603pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5604{
5605	u_int16_t sum = 0;
5606	int hw_assist = 0;
5607	struct ip *ip;
5608
5609	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5610		return (1);
5611	if (m->m_pkthdr.len < off + len)
5612		return (1);
5613
5614	switch (p) {
5615	case IPPROTO_TCP:
5616		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5617			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5618				sum = m->m_pkthdr.csum_data;
5619			} else {
5620				ip = mtod(m, struct ip *);
5621				sum = in_pseudo(ip->ip_src.s_addr,
5622					ip->ip_dst.s_addr,
5623					htonl(m->m_pkthdr.csum_data +
5624					    IPPROTO_TCP + ntohs(ip->ip_len)));
5625			}
5626			sum ^= 0xffff;
5627			++hw_assist;
5628		}
5629		break;
5630	case IPPROTO_UDP:
5631		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5632			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5633				sum = m->m_pkthdr.csum_data;
5634			} else {
5635				ip = mtod(m, struct ip *);
5636				sum = in_pseudo(ip->ip_src.s_addr,
5637					ip->ip_dst.s_addr, htonl((u_short)len +
5638					m->m_pkthdr.csum_data + IPPROTO_UDP));
5639			}
5640			sum ^= 0xffff;
5641			++hw_assist;
5642                }
5643		break;
5644	case IPPROTO_ICMP:
5645#ifdef INET6
5646	case IPPROTO_ICMPV6:
5647#endif /* INET6 */
5648		break;
5649	default:
5650		return (1);
5651	}
5652
5653	if (!hw_assist) {
5654		switch (af) {
5655		case AF_INET:
5656			if (p == IPPROTO_ICMP) {
5657				if (m->m_len < off)
5658					return (1);
5659				m->m_data += off;
5660				m->m_len -= off;
5661				sum = in_cksum(m, len);
5662				m->m_data -= off;
5663				m->m_len += off;
5664			} else {
5665				if (m->m_len < sizeof(struct ip))
5666					return (1);
5667				sum = in4_cksum(m, p, off, len);
5668			}
5669			break;
5670#ifdef INET6
5671		case AF_INET6:
5672			if (m->m_len < sizeof(struct ip6_hdr))
5673				return (1);
5674			sum = in6_cksum(m, p, off, len);
5675			break;
5676#endif /* INET6 */
5677		default:
5678			return (1);
5679		}
5680	}
5681	if (sum) {
5682		switch (p) {
5683		case IPPROTO_TCP:
5684			tcpstat.tcps_rcvbadsum++;
5685			break;
5686		case IPPROTO_UDP:
5687			udpstat.udps_badsum++;
5688			break;
5689		case IPPROTO_ICMP:
5690			icmpstat.icps_checksum++;
5691			break;
5692#ifdef INET6
5693		case IPPROTO_ICMPV6:
5694			icmp6stat.icp6s_checksum++;
5695			break;
5696#endif /* INET6 */
5697		}
5698		return (1);
5699	} else {
5700		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
5701			m->m_pkthdr.csum_flags |=
5702			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5703			m->m_pkthdr.csum_data = 0xffff;
5704		}
5705	}
5706	return (0);
5707}
5708#else
5709/*
5710 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5711 *   off is the offset where the protocol header starts
5712 *   len is the total length of protocol header plus payload
5713 * returns 0 when the checksum is valid, otherwise returns 1.
5714 */
5715int
5716pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5717    sa_family_t af)
5718{
5719	u_int16_t flag_ok, flag_bad;
5720	u_int16_t sum;
5721
5722	switch (p) {
5723	case IPPROTO_TCP:
5724		flag_ok = M_TCP_CSUM_IN_OK;
5725		flag_bad = M_TCP_CSUM_IN_BAD;
5726		break;
5727	case IPPROTO_UDP:
5728		flag_ok = M_UDP_CSUM_IN_OK;
5729		flag_bad = M_UDP_CSUM_IN_BAD;
5730		break;
5731	case IPPROTO_ICMP:
5732#ifdef INET6
5733	case IPPROTO_ICMPV6:
5734#endif /* INET6 */
5735		flag_ok = flag_bad = 0;
5736		break;
5737	default:
5738		return (1);
5739	}
5740	if (m->m_pkthdr.csum & flag_ok)
5741		return (0);
5742	if (m->m_pkthdr.csum & flag_bad)
5743		return (1);
5744	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5745		return (1);
5746	if (m->m_pkthdr.len < off + len)
5747		return (1);
5748		switch (af) {
5749	case AF_INET:
5750		if (p == IPPROTO_ICMP) {
5751			if (m->m_len < off)
5752				return (1);
5753			m->m_data += off;
5754			m->m_len -= off;
5755			sum = in_cksum(m, len);
5756			m->m_data -= off;
5757			m->m_len += off;
5758		} else {
5759			if (m->m_len < sizeof(struct ip))
5760				return (1);
5761			sum = in4_cksum(m, p, off, len);
5762		}
5763		break;
5764#ifdef INET6
5765	case AF_INET6:
5766		if (m->m_len < sizeof(struct ip6_hdr))
5767			return (1);
5768		sum = in6_cksum(m, p, off, len);
5769		break;
5770#endif /* INET6 */
5771	default:
5772		return (1);
5773	}
5774	if (sum) {
5775		m->m_pkthdr.csum |= flag_bad;
5776		switch (p) {
5777		case IPPROTO_TCP:
5778			tcpstat.tcps_rcvbadsum++;
5779			break;
5780		case IPPROTO_UDP:
5781			udpstat.udps_badsum++;
5782			break;
5783		case IPPROTO_ICMP:
5784			icmpstat.icps_checksum++;
5785			break;
5786#ifdef INET6
5787		case IPPROTO_ICMPV6:
5788			icmp6stat.icp6s_checksum++;
5789			break;
5790#endif /* INET6 */
5791		}
5792		return (1);
5793	}
5794	m->m_pkthdr.csum |= flag_ok;
5795	return (0);
5796}
5797#endif
5798
5799static int
5800pf_add_mbuf_tag(struct mbuf *m, u_int tag)
5801{
5802	struct m_tag *mtag;
5803
5804	if (m_tag_find(m, tag, NULL) != NULL)
5805		return (0);
5806	mtag = m_tag_get(tag, 0, M_NOWAIT);
5807	if (mtag == NULL)
5808		return (1);
5809	m_tag_prepend(m, mtag);
5810	return (0);
5811}
5812
5813#ifdef INET
5814int
5815pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
5816{
5817	struct pfi_kif		*kif;
5818	u_short			 action, reason = 0, log = 0;
5819	struct mbuf		*m = *m0;
5820	struct ip		*h = NULL;	/* make the compiler happy */
5821	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
5822	struct pf_state		*s = NULL;
5823	struct pf_ruleset	*ruleset = NULL;
5824	struct pf_pdesc		 pd;
5825	int			 off, dirndx, pqid = 0;
5826
5827#ifdef __FreeBSD__
5828	PF_LOCK();
5829#endif
5830	if (!pf_status.running ||
5831#ifdef __FreeBSD__
5832	    (m->m_flags & M_SKIP_FIREWALL)) {
5833		PF_UNLOCK();
5834#else
5835	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5836#endif
5837	    	return (PF_PASS);
5838	}
5839
5840	kif = pfi_index2kif[ifp->if_index];
5841	if (kif == NULL) {
5842#ifdef __FreeBSD__
5843		PF_UNLOCK();
5844#endif
5845		return (PF_DROP);
5846	}
5847
5848#ifdef __FreeBSD__
5849	M_ASSERTPKTHDR(m);
5850#else
5851#ifdef DIAGNOSTIC
5852	if ((m->m_flags & M_PKTHDR) == 0)
5853		panic("non-M_PKTHDR is passed to pf_test");
5854#endif
5855#endif
5856
5857	memset(&pd, 0, sizeof(pd));
5858	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5859		action = PF_DROP;
5860		REASON_SET(&reason, PFRES_SHORT);
5861		log = 1;
5862		goto done;
5863	}
5864
5865	/* We do IP header normalization and packet reassembly here */
5866	if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) {
5867		action = PF_DROP;
5868		goto done;
5869	}
5870	m = *m0;
5871	h = mtod(m, struct ip *);
5872
5873	off = h->ip_hl << 2;
5874	if (off < (int)sizeof(*h)) {
5875		action = PF_DROP;
5876		REASON_SET(&reason, PFRES_SHORT);
5877		log = 1;
5878		goto done;
5879	}
5880
5881	pd.src = (struct pf_addr *)&h->ip_src;
5882	pd.dst = (struct pf_addr *)&h->ip_dst;
5883	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5884	pd.ip_sum = &h->ip_sum;
5885	pd.proto = h->ip_p;
5886	pd.af = AF_INET;
5887	pd.tos = h->ip_tos;
5888	pd.tot_len = ntohs(h->ip_len);
5889
5890	/* handle fragments that didn't get reassembled by normalization */
5891	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5892		action = pf_test_fragment(&r, dir, kif, m, h,
5893		    &pd, &a, &ruleset);
5894		goto done;
5895	}
5896
5897	switch (h->ip_p) {
5898
5899	case IPPROTO_TCP: {
5900		struct tcphdr	th;
5901
5902		pd.hdr.tcp = &th;
5903		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5904		    &action, &reason, AF_INET)) {
5905			log = action != PF_PASS;
5906			goto done;
5907		}
5908		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5909		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
5910			action = PF_DROP;
5911			goto done;
5912		}
5913		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5914		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5915			pqid = 1;
5916		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5917		if (action == PF_DROP)
5918			goto done;
5919		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5920		    &reason);
5921		if (action == PF_PASS) {
5922#if NPFSYNC
5923			pfsync_update_state(s);
5924#endif
5925			r = s->rule.ptr;
5926			a = s->anchor.ptr;
5927			log = s->log;
5928		} else if (s == NULL)
5929			action = pf_test_tcp(&r, &s, dir, kif,
5930			    m, off, h, &pd, &a, &ruleset);
5931		break;
5932	}
5933
5934	case IPPROTO_UDP: {
5935		struct udphdr	uh;
5936
5937		pd.hdr.udp = &uh;
5938		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5939		    &action, &reason, AF_INET)) {
5940			log = action != PF_PASS;
5941			goto done;
5942		}
5943		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5944		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
5945			action = PF_DROP;
5946			goto done;
5947		}
5948		if (uh.uh_dport == 0 ||
5949		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5950		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5951			action = PF_DROP;
5952			goto done;
5953		}
5954		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5955		if (action == PF_PASS) {
5956#if NPFSYNC
5957			pfsync_update_state(s);
5958#endif
5959			r = s->rule.ptr;
5960			a = s->anchor.ptr;
5961			log = s->log;
5962		} else if (s == NULL)
5963			action = pf_test_udp(&r, &s, dir, kif,
5964			    m, off, h, &pd, &a, &ruleset);
5965		break;
5966	}
5967
5968	case IPPROTO_ICMP: {
5969		struct icmp	ih;
5970
5971		pd.hdr.icmp = &ih;
5972		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5973		    &action, &reason, AF_INET)) {
5974			log = action != PF_PASS;
5975			goto done;
5976		}
5977		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5978		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
5979			action = PF_DROP;
5980			goto done;
5981		}
5982		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd);
5983		if (action == PF_PASS) {
5984#if NPFSYNC
5985			pfsync_update_state(s);
5986#endif
5987			r = s->rule.ptr;
5988			a = s->anchor.ptr;
5989			log = s->log;
5990		} else if (s == NULL)
5991			action = pf_test_icmp(&r, &s, dir, kif,
5992			    m, off, h, &pd, &a, &ruleset);
5993		break;
5994	}
5995
5996	default:
5997		action = pf_test_state_other(&s, dir, kif, &pd);
5998		if (action == PF_PASS) {
5999#if NPFSYNC
6000			pfsync_update_state(s);
6001#endif
6002			r = s->rule.ptr;
6003			a = s->anchor.ptr;
6004			log = s->log;
6005		} else if (s == NULL)
6006			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6007			    &pd, &a, &ruleset);
6008		break;
6009	}
6010
6011done:
6012	if (action == PF_PASS && h->ip_hl > 5 &&
6013	    !((s && s->allow_opts) || r->allow_opts)) {
6014		action = PF_DROP;
6015		REASON_SET(&reason, PFRES_SHORT);
6016		log = 1;
6017		DPFPRINTF(PF_DEBUG_MISC,
6018		    ("pf: dropping packet with ip options\n"));
6019	}
6020
6021#ifdef ALTQ
6022	if (action == PF_PASS && r->qid) {
6023		struct m_tag	*mtag;
6024		struct altq_tag	*atag;
6025
6026		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6027		if (mtag != NULL) {
6028			atag = (struct altq_tag *)(mtag + 1);
6029			if (pqid || pd.tos == IPTOS_LOWDELAY)
6030				atag->qid = r->pqid;
6031			else
6032				atag->qid = r->qid;
6033			/* add hints for ecn */
6034			atag->af = AF_INET;
6035			atag->hdr = h;
6036			m_tag_prepend(m, mtag);
6037		}
6038	}
6039#endif
6040
6041	/*
6042	 * connections redirected to loopback should not match sockets
6043	 * bound specifically to loopback due to security implications,
6044	 * see tcp_input() and in_pcblookup_listen().
6045	 */
6046	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6047	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6048	    (s->nat_rule.ptr->action == PF_RDR ||
6049	    s->nat_rule.ptr->action == PF_BINAT) &&
6050	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
6051	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6052		action = PF_DROP;
6053		REASON_SET(&reason, PFRES_MEMORY);
6054	}
6055
6056	if (log)
6057		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset);
6058
6059	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6060	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6061
6062	if (action == PF_PASS || r->action == PF_DROP) {
6063		r->packets++;
6064		r->bytes += pd.tot_len;
6065		if (a != NULL) {
6066			a->packets++;
6067			a->bytes += pd.tot_len;
6068		}
6069		if (s != NULL) {
6070			dirndx = (dir == s->direction) ? 0 : 1;
6071			s->packets[dirndx]++;
6072			s->bytes[dirndx] += pd.tot_len;
6073			if (s->nat_rule.ptr != NULL) {
6074				s->nat_rule.ptr->packets++;
6075				s->nat_rule.ptr->bytes += pd.tot_len;
6076			}
6077			if (s->src_node != NULL) {
6078				s->src_node->packets++;
6079				s->src_node->bytes += pd.tot_len;
6080			}
6081			if (s->nat_src_node != NULL) {
6082				s->nat_src_node->packets++;
6083				s->nat_src_node->bytes += pd.tot_len;
6084			}
6085		}
6086		tr = r;
6087		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6088		if (nr != NULL) {
6089			struct pf_addr *x;
6090			/*
6091			 * XXX: we need to make sure that the addresses
6092			 * passed to pfr_update_stats() are the same than
6093			 * the addresses used during matching (pfr_match)
6094			 */
6095			if (r == &pf_default_rule) {
6096				tr = nr;
6097				x = (s == NULL || s->direction == dir) ?
6098				    &pd.baddr : &pd.naddr;
6099			} else
6100				x = (s == NULL || s->direction == dir) ?
6101				    &pd.naddr : &pd.baddr;
6102			if (x == &pd.baddr || s == NULL) {
6103				/* we need to change the address */
6104				if (dir == PF_OUT)
6105					pd.src = x;
6106				else
6107					pd.dst = x;
6108			}
6109		}
6110		if (tr->src.addr.type == PF_ADDR_TABLE)
6111			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6112			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6113			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6114			    tr->src.not);
6115		if (tr->dst.addr.type == PF_ADDR_TABLE)
6116			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6117			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6118			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6119			    tr->dst.not);
6120	}
6121
6122
6123	if (action == PF_SYNPROXY_DROP) {
6124		m_freem(*m0);
6125		*m0 = NULL;
6126		action = PF_PASS;
6127	} else if (r->rt)
6128		/* pf_route can free the mbuf causing *m0 to become NULL */
6129		pf_route(m0, r, dir, ifp, s);
6130
6131#ifdef __FreeBSD__
6132	PF_UNLOCK();
6133#endif
6134
6135	return (action);
6136}
6137#endif /* INET */
6138
6139#ifdef INET6
6140int
6141pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0)
6142{
6143	struct pfi_kif		*kif;
6144	u_short			 action, reason = 0, log = 0;
6145	struct mbuf		*m = *m0;
6146	struct ip6_hdr		*h = NULL;	/* make the compiler happy */
6147	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6148	struct pf_state		*s = NULL;
6149	struct pf_ruleset	*ruleset = NULL;
6150	struct pf_pdesc		 pd;
6151	int			 off, terminal = 0, dirndx;
6152
6153#ifdef __FreeBSD__
6154	PF_LOCK();
6155#endif
6156
6157	if (!pf_status.running ||
6158#ifdef __FreeBSD__
6159	    (m->m_flags & M_SKIP_FIREWALL)) {
6160		PF_UNLOCK();
6161#else
6162	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
6163#endif
6164		return (PF_PASS);
6165	}
6166
6167	kif = pfi_index2kif[ifp->if_index];
6168	if (kif == NULL) {
6169#ifdef __FreeBSD__
6170		PF_UNLOCK();
6171#endif
6172		return (PF_DROP);
6173	}
6174
6175#ifdef __FreeBSD__
6176	M_ASSERTPKTHDR(m);
6177#else
6178#ifdef DIAGNOSTIC
6179	if ((m->m_flags & M_PKTHDR) == 0)
6180		panic("non-M_PKTHDR is passed to pf_test");
6181#endif
6182#endif
6183
6184	memset(&pd, 0, sizeof(pd));
6185	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6186		action = PF_DROP;
6187		REASON_SET(&reason, PFRES_SHORT);
6188		log = 1;
6189		goto done;
6190	}
6191
6192	/* We do IP header normalization and packet reassembly here */
6193	if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) {
6194		action = PF_DROP;
6195		goto done;
6196	}
6197	m = *m0;
6198	h = mtod(m, struct ip6_hdr *);
6199
6200	pd.src = (struct pf_addr *)&h->ip6_src;
6201	pd.dst = (struct pf_addr *)&h->ip6_dst;
6202	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6203	pd.ip_sum = NULL;
6204	pd.af = AF_INET6;
6205	pd.tos = 0;
6206	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6207
6208	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6209	pd.proto = h->ip6_nxt;
6210	do {
6211		switch (pd.proto) {
6212		case IPPROTO_FRAGMENT:
6213			action = pf_test_fragment(&r, dir, kif, m, h,
6214			    &pd, &a, &ruleset);
6215			if (action == PF_DROP)
6216				REASON_SET(&reason, PFRES_FRAG);
6217			goto done;
6218		case IPPROTO_AH:
6219		case IPPROTO_HOPOPTS:
6220		case IPPROTO_ROUTING:
6221		case IPPROTO_DSTOPTS: {
6222			/* get next header and header length */
6223			struct ip6_ext	opt6;
6224
6225			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6226			    NULL, NULL, pd.af)) {
6227				DPFPRINTF(PF_DEBUG_MISC,
6228				    ("pf: IPv6 short opt\n"));
6229				action = PF_DROP;
6230				REASON_SET(&reason, PFRES_SHORT);
6231				log = 1;
6232				goto done;
6233			}
6234			if (pd.proto == IPPROTO_AH)
6235				off += (opt6.ip6e_len + 2) * 4;
6236			else
6237				off += (opt6.ip6e_len + 1) * 8;
6238			pd.proto = opt6.ip6e_nxt;
6239			/* goto the next header */
6240			break;
6241		}
6242		default:
6243			terminal++;
6244			break;
6245		}
6246	} while (!terminal);
6247
6248	switch (pd.proto) {
6249
6250	case IPPROTO_TCP: {
6251		struct tcphdr	th;
6252
6253		pd.hdr.tcp = &th;
6254		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6255		    &action, &reason, AF_INET6)) {
6256			log = action != PF_PASS;
6257			goto done;
6258		}
6259		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6260		    ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) {
6261			action = PF_DROP;
6262			goto done;
6263		}
6264		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6265		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6266		if (action == PF_DROP)
6267			goto done;
6268		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6269		    &reason);
6270		if (action == PF_PASS) {
6271#if NPFSYNC
6272			pfsync_update_state(s);
6273#endif
6274			r = s->rule.ptr;
6275			a = s->anchor.ptr;
6276			log = s->log;
6277		} else if (s == NULL)
6278			action = pf_test_tcp(&r, &s, dir, kif,
6279			    m, off, h, &pd, &a, &ruleset);
6280		break;
6281	}
6282
6283	case IPPROTO_UDP: {
6284		struct udphdr	uh;
6285
6286		pd.hdr.udp = &uh;
6287		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6288		    &action, &reason, AF_INET6)) {
6289			log = action != PF_PASS;
6290			goto done;
6291		}
6292		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6293		    off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) {
6294			action = PF_DROP;
6295			goto done;
6296		}
6297		if (uh.uh_dport == 0 ||
6298		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6299		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6300			action = PF_DROP;
6301			goto done;
6302		}
6303		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6304		if (action == PF_PASS) {
6305#if NPFSYNC
6306			pfsync_update_state(s);
6307#endif
6308			r = s->rule.ptr;
6309			a = s->anchor.ptr;
6310			log = s->log;
6311		} else if (s == NULL)
6312			action = pf_test_udp(&r, &s, dir, kif,
6313			    m, off, h, &pd, &a, &ruleset);
6314		break;
6315	}
6316
6317	case IPPROTO_ICMPV6: {
6318		struct icmp6_hdr	ih;
6319
6320		pd.hdr.icmp6 = &ih;
6321		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6322		    &action, &reason, AF_INET6)) {
6323			log = action != PF_PASS;
6324			goto done;
6325		}
6326		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6327		    ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) {
6328			action = PF_DROP;
6329			goto done;
6330		}
6331		action = pf_test_state_icmp(&s, dir, kif,
6332		    m, off, h, &pd);
6333		if (action == PF_PASS) {
6334#if NPFSYNC
6335			pfsync_update_state(s);
6336#endif
6337			r = s->rule.ptr;
6338			a = s->anchor.ptr;
6339			log = s->log;
6340		} else if (s == NULL)
6341			action = pf_test_icmp(&r, &s, dir, kif,
6342			    m, off, h, &pd, &a, &ruleset);
6343		break;
6344	}
6345
6346	default:
6347		action = pf_test_state_other(&s, dir, kif, &pd);
6348		if (action == PF_PASS) {
6349			r = s->rule.ptr;
6350			a = s->anchor.ptr;
6351			log = s->log;
6352		} else if (s == NULL)
6353			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6354			    &pd, &a, &ruleset);
6355		break;
6356	}
6357
6358done:
6359	/* XXX handle IPv6 options, if not allowed. not implemented. */
6360
6361#ifdef ALTQ
6362	if (action == PF_PASS && r->qid) {
6363		struct m_tag	*mtag;
6364		struct altq_tag	*atag;
6365
6366		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6367		if (mtag != NULL) {
6368			atag = (struct altq_tag *)(mtag + 1);
6369			if (pd.tos == IPTOS_LOWDELAY)
6370				atag->qid = r->pqid;
6371			else
6372				atag->qid = r->qid;
6373			/* add hints for ecn */
6374			atag->af = AF_INET6;
6375			atag->hdr = h;
6376			m_tag_prepend(m, mtag);
6377		}
6378	}
6379#endif
6380
6381	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6382	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6383	    (s->nat_rule.ptr->action == PF_RDR ||
6384	    s->nat_rule.ptr->action == PF_BINAT) &&
6385	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) &&
6386	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6387		action = PF_DROP;
6388		REASON_SET(&reason, PFRES_MEMORY);
6389	}
6390
6391	if (log)
6392		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset);
6393
6394	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6395	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6396
6397	if (action == PF_PASS || r->action == PF_DROP) {
6398		r->packets++;
6399		r->bytes += pd.tot_len;
6400		if (a != NULL) {
6401			a->packets++;
6402			a->bytes += pd.tot_len;
6403		}
6404		if (s != NULL) {
6405			dirndx = (dir == s->direction) ? 0 : 1;
6406			s->packets[dirndx]++;
6407			s->bytes[dirndx] += pd.tot_len;
6408			if (s->nat_rule.ptr != NULL) {
6409				s->nat_rule.ptr->packets++;
6410				s->nat_rule.ptr->bytes += pd.tot_len;
6411			}
6412			if (s->src_node != NULL) {
6413				s->src_node->packets++;
6414				s->src_node->bytes += pd.tot_len;
6415			}
6416			if (s->nat_src_node != NULL) {
6417				s->nat_src_node->packets++;
6418				s->nat_src_node->bytes += pd.tot_len;
6419			}
6420		}
6421		tr = r;
6422		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6423		if (nr != NULL) {
6424			struct pf_addr *x;
6425			/*
6426			 * XXX: we need to make sure that the addresses
6427			 * passed to pfr_update_stats() are the same than
6428			 * the addresses used during matching (pfr_match)
6429			 */
6430			if (r == &pf_default_rule) {
6431				tr = nr;
6432				x = (s == NULL || s->direction == dir) ?
6433				    &pd.baddr : &pd.naddr;
6434			} else {
6435				x = (s == NULL || s->direction == dir) ?
6436				    &pd.naddr : &pd.baddr;
6437			}
6438			if (x == &pd.baddr || s == NULL) {
6439				if (dir == PF_OUT)
6440					pd.src = x;
6441				else
6442					pd.dst = x;
6443			}
6444		}
6445		if (tr->src.addr.type == PF_ADDR_TABLE)
6446			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6447			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6448			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6449			    tr->src.not);
6450		if (tr->dst.addr.type == PF_ADDR_TABLE)
6451			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6452			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6453			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6454			    tr->dst.not);
6455	}
6456
6457
6458	if (action == PF_SYNPROXY_DROP) {
6459		m_freem(*m0);
6460		*m0 = NULL;
6461		action = PF_PASS;
6462	} else if (r->rt)
6463		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6464		pf_route6(m0, r, dir, ifp, s);
6465
6466#ifdef __FreeBSD__
6467	PF_UNLOCK();
6468#endif
6469	return (action);
6470}
6471#endif /* INET6 */
6472