pf.c revision 132280
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 132280 2004-07-17 05:10:06Z mlaier $	*/
2/*	$OpenBSD: pf.c,v 1.433.2.1 2004/04/30 21:46:33 brad Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * Copyright (c) 2002,2003 Henning Brauer
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 */
38
39#ifdef __FreeBSD__
40#include "opt_inet.h"
41#include "opt_inet6.h"
42#endif
43
44#ifdef __FreeBSD__
45#include "opt_bpf.h"
46#include "opt_pf.h"
47#define	NBPFILTER	DEV_BPF
48#define	NPFLOG		DEV_PFLOG
49#define	NPFSYNC		DEV_PFSYNC
50#else
51#include "bpfilter.h"
52#include "pflog.h"
53#include "pfsync.h"
54#endif
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/mbuf.h>
59#include <sys/filio.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/kernel.h>
63#include <sys/time.h>
64#ifdef __FreeBSD__
65#include <sys/sysctl.h>
66#include <sys/endian.h>
67#else
68#include <sys/pool.h>
69#endif
70
71#include <net/if.h>
72#include <net/if_types.h>
73#include <net/bpf.h>
74#include <net/route.h>
75
76#include <netinet/in.h>
77#include <netinet/in_var.h>
78#include <netinet/in_systm.h>
79#include <netinet/ip.h>
80#include <netinet/ip_var.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_seq.h>
83#include <netinet/udp.h>
84#include <netinet/ip_icmp.h>
85#include <netinet/in_pcb.h>
86#include <netinet/tcp_timer.h>
87#include <netinet/tcp_var.h>
88#include <netinet/udp_var.h>
89#include <netinet/icmp_var.h>
90
91#ifndef __FreeBSD__
92#include <dev/rndvar.h>
93#endif
94#include <net/pfvar.h>
95#include <net/if_pflog.h>
96
97#if NPFSYNC > 0
98#include <net/if_pfsync.h>
99#endif /* NPFSYNC > 0 */
100
101#ifdef INET6
102#include <netinet/ip6.h>
103#include <netinet/in_pcb.h>
104#include <netinet/icmp6.h>
105#include <netinet6/nd6.h>
106#ifdef __FreeBSD__
107#include <netinet6/ip6_var.h>
108#include <netinet6/in6_pcb.h>
109#endif
110#endif /* INET6 */
111
112#ifdef __FreeBSD__
113#include <machine/in_cksum.h>
114#include <sys/limits.h>
115#include <sys/ucred.h>
116
117extern int ip_optcopy(struct ip *, struct ip *);
118#endif
119
120#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
121
122/*
123 * Global variables
124 */
125
126struct pf_anchorqueue	 pf_anchors;
127struct pf_ruleset	 pf_main_ruleset;
128struct pf_altqqueue	 pf_altqs[2];
129struct pf_palist	 pf_pabuf;
130struct pf_altqqueue	*pf_altqs_active;
131struct pf_altqqueue	*pf_altqs_inactive;
132struct pf_status	 pf_status;
133
134u_int32_t		 ticket_altqs_active;
135u_int32_t		 ticket_altqs_inactive;
136int			 altqs_inactive_open;
137u_int32_t		 ticket_pabuf;
138
139#ifdef __FreeBSD__
140struct callout	 	 pf_expire_to;			/* expire timeout */
141#else
142struct timeout		 pf_expire_to;			/* expire timeout */
143#endif
144
145
146#ifdef __FreeBSD__
147uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
148uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
149#else
150struct pool		 pf_src_tree_pl, pf_rule_pl;
151struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
152#endif
153
154void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
155void			 pf_print_state(struct pf_state *);
156void			 pf_print_flags(u_int8_t);
157
158u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
159			    u_int8_t);
160void			 pf_change_ap(struct pf_addr *, u_int16_t *,
161			    u_int16_t *, u_int16_t *, struct pf_addr *,
162			    u_int16_t, u_int8_t, sa_family_t);
163#ifdef INET6
164void			 pf_change_a6(struct pf_addr *, u_int16_t *,
165			    struct pf_addr *, u_int8_t);
166#endif /* INET6 */
167void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
168			    struct pf_addr *, struct pf_addr *, u_int16_t,
169			    u_int16_t *, u_int16_t *, u_int16_t *,
170			    u_int16_t *, u_int8_t, sa_family_t);
171void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
172			    const struct pf_addr *, const struct pf_addr *,
173			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
174			    u_int8_t, u_int16_t, u_int16_t, u_int8_t);
175void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
176			    sa_family_t, struct pf_rule *);
177struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
178			    int, int, struct pfi_kif *,
179			    struct pf_addr *, u_int16_t, struct pf_addr *,
180			    u_int16_t, int);
181struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
182			    int, int, struct pfi_kif *, struct pf_src_node **,
183			    struct pf_addr *, u_int16_t,
184			    struct pf_addr *, u_int16_t,
185			    struct pf_addr *, u_int16_t *);
186int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
187			    int, struct pfi_kif *, struct mbuf *, int,
188			    void *, struct pf_pdesc *, struct pf_rule **,
189			    struct pf_ruleset **);
190int			 pf_test_udp(struct pf_rule **, struct pf_state **,
191			    int, struct pfi_kif *, struct mbuf *, int,
192			    void *, struct pf_pdesc *, struct pf_rule **,
193			    struct pf_ruleset **);
194int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
195			    int, struct pfi_kif *, struct mbuf *, int,
196			    void *, struct pf_pdesc *, struct pf_rule **,
197			    struct pf_ruleset **);
198int			 pf_test_other(struct pf_rule **, struct pf_state **,
199			    int, struct pfi_kif *, struct mbuf *, int, void *,
200			    struct pf_pdesc *, struct pf_rule **,
201			    struct pf_ruleset **);
202int			 pf_test_fragment(struct pf_rule **, int,
203			    struct pfi_kif *, struct mbuf *, void *,
204			    struct pf_pdesc *, struct pf_rule **,
205			    struct pf_ruleset **);
206int			 pf_test_state_tcp(struct pf_state **, int,
207			    struct pfi_kif *, struct mbuf *, int,
208			    void *, struct pf_pdesc *, u_short *);
209int			 pf_test_state_udp(struct pf_state **, int,
210			    struct pfi_kif *, struct mbuf *, int,
211			    void *, struct pf_pdesc *);
212int			 pf_test_state_icmp(struct pf_state **, int,
213			    struct pfi_kif *, struct mbuf *, int,
214			    void *, struct pf_pdesc *);
215int			 pf_test_state_other(struct pf_state **, int,
216			    struct pfi_kif *, struct pf_pdesc *);
217struct pf_tag		*pf_get_tag(struct mbuf *);
218int			 pf_match_tag(struct mbuf *, struct pf_rule *,
219			     struct pf_rule *, struct pf_tag *, int *);
220void			 pf_hash(struct pf_addr *, struct pf_addr *,
221			    struct pf_poolhashkey *, sa_family_t);
222int			 pf_map_addr(u_int8_t, struct pf_rule *,
223			    struct pf_addr *, struct pf_addr *,
224			    struct pf_addr *, struct pf_src_node **);
225int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
226			    struct pf_addr *, struct pf_addr *, u_int16_t,
227			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
228			    struct pf_src_node **);
229void			 pf_route(struct mbuf **, struct pf_rule *, int,
230			    struct ifnet *, struct pf_state *);
231void			 pf_route6(struct mbuf **, struct pf_rule *, int,
232			    struct ifnet *, struct pf_state *);
233int			 pf_socket_lookup(uid_t *, gid_t *,
234			    int, struct pf_pdesc *);
235u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
236			    sa_family_t);
237u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
238			    sa_family_t);
239u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
240				u_int16_t);
241void			 pf_set_rt_ifp(struct pf_state *,
242			    struct pf_addr *);
243int			 pf_check_proto_cksum(struct mbuf *, int, int,
244			    u_int8_t, sa_family_t);
245int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
246			    struct pf_addr_wrap *);
247static int		 pf_add_mbuf_tag(struct mbuf *, u_int);
248struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
249			    struct pf_state *, u_int8_t);
250
251#ifdef __FreeBSD__
252int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
253
254struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
255#else
256struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
257	{ &pf_state_pl, PFSTATE_HIWAT },
258	{ &pf_src_tree_pl, PFSNODE_HIWAT },
259	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT }
260};
261#endif
262
263#define STATE_LOOKUP()							\
264	do {								\
265		if (direction == PF_IN)					\
266			*state = pf_find_state_recurse(		\
267			    kif, &key, PF_EXT_GWY);			\
268		else							\
269			*state = pf_find_state_recurse(		\
270			    kif, &key, PF_LAN_EXT);			\
271		if (*state == NULL)					\
272			return (PF_DROP);				\
273		if (direction == PF_OUT &&				\
274		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
275		    (*state)->rule.ptr->direction == PF_OUT) ||		\
276		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
277		    (*state)->rule.ptr->direction == PF_IN)) &&		\
278		    (*state)->rt_kif != NULL &&				\
279		    (*state)->rt_kif != kif)				\
280			return (PF_PASS);				\
281	} while (0)
282
283#define	STATE_TRANSLATE(s) \
284	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
285	((s)->af == AF_INET6 && \
286	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
287	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
288	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
289	(s)->lan.port != (s)->gwy.port
290
291#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) :   \
292	((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent :	       \
293	(k)->pfik_parent->pfik_parent)
294
295static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
296static __inline int pf_state_compare_lan_ext(struct pf_state *,
297	struct pf_state *);
298static __inline int pf_state_compare_ext_gwy(struct pf_state *,
299	struct pf_state *);
300static __inline int pf_state_compare_id(struct pf_state *,
301	struct pf_state *);
302
303struct pf_src_tree tree_src_tracking;
304
305struct pf_state_tree_id tree_id;
306struct pf_state_queue state_updates;
307
308RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
309RB_GENERATE(pf_state_tree_lan_ext, pf_state,
310    u.s.entry_lan_ext, pf_state_compare_lan_ext);
311RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
312    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
313RB_GENERATE(pf_state_tree_id, pf_state,
314    u.s.entry_id, pf_state_compare_id);
315
316#ifdef __FreeBSD__
317static int
318#else
319static __inline int
320#endif
321pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
322{
323	int	diff;
324
325	if (a->rule.ptr > b->rule.ptr)
326		return (1);
327	if (a->rule.ptr < b->rule.ptr)
328		return (-1);
329	if ((diff = a->af - b->af) != 0)
330		return (diff);
331	switch (a->af) {
332#ifdef INET
333	case AF_INET:
334		if (a->addr.addr32[0] > b->addr.addr32[0])
335			return (1);
336		if (a->addr.addr32[0] < b->addr.addr32[0])
337			return (-1);
338		break;
339#endif /* INET */
340#ifdef INET6
341	case AF_INET6:
342		if (a->addr.addr32[3] > b->addr.addr32[3])
343			return (1);
344		if (a->addr.addr32[3] < b->addr.addr32[3])
345			return (-1);
346		if (a->addr.addr32[2] > b->addr.addr32[2])
347			return (1);
348		if (a->addr.addr32[2] < b->addr.addr32[2])
349			return (-1);
350		if (a->addr.addr32[1] > b->addr.addr32[1])
351			return (1);
352		if (a->addr.addr32[1] < b->addr.addr32[1])
353			return (-1);
354		if (a->addr.addr32[0] > b->addr.addr32[0])
355			return (1);
356		if (a->addr.addr32[0] < b->addr.addr32[0])
357			return (-1);
358		break;
359#endif /* INET6 */
360	}
361	return (0);
362}
363
364#ifdef __FreeBSD__
365static int
366#else
367static __inline int
368#endif
369pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
370{
371	int	diff;
372
373	if ((diff = a->proto - b->proto) != 0)
374		return (diff);
375	if ((diff = a->af - b->af) != 0)
376		return (diff);
377	switch (a->af) {
378#ifdef INET
379	case AF_INET:
380		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
381			return (1);
382		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
383			return (-1);
384		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
385			return (1);
386		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
387			return (-1);
388		break;
389#endif /* INET */
390#ifdef INET6
391	case AF_INET6:
392		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
393			return (1);
394		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
395			return (-1);
396		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
397			return (1);
398		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
399			return (-1);
400		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
401			return (1);
402		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
403			return (-1);
404		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
405			return (1);
406		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
407			return (-1);
408		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
409			return (1);
410		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
411			return (-1);
412		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
413			return (1);
414		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
415			return (-1);
416		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
417			return (1);
418		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
419			return (-1);
420		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
421			return (1);
422		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
423			return (-1);
424		break;
425#endif /* INET6 */
426	}
427
428	if ((diff = a->lan.port - b->lan.port) != 0)
429		return (diff);
430	if ((diff = a->ext.port - b->ext.port) != 0)
431		return (diff);
432
433	return (0);
434}
435
436#ifdef __FreeBSD__
437static int
438#else
439static __inline int
440#endif
441pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
442{
443	int	diff;
444
445	if ((diff = a->proto - b->proto) != 0)
446		return (diff);
447	if ((diff = a->af - b->af) != 0)
448		return (diff);
449	switch (a->af) {
450#ifdef INET
451	case AF_INET:
452		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
453			return (1);
454		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
455			return (-1);
456		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
457			return (1);
458		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
459			return (-1);
460		break;
461#endif /* INET */
462#ifdef INET6
463	case AF_INET6:
464		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
465			return (1);
466		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
467			return (-1);
468		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
469			return (1);
470		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
471			return (-1);
472		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
473			return (1);
474		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
475			return (-1);
476		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
477			return (1);
478		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
479			return (-1);
480		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
481			return (1);
482		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
483			return (-1);
484		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
485			return (1);
486		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
487			return (-1);
488		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
489			return (1);
490		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
491			return (-1);
492		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
493			return (1);
494		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
495			return (-1);
496		break;
497#endif /* INET6 */
498	}
499
500	if ((diff = a->ext.port - b->ext.port) != 0)
501		return (diff);
502	if ((diff = a->gwy.port - b->gwy.port) != 0)
503		return (diff);
504
505	return (0);
506}
507
508#ifdef __FreeBSD__
509static int
510#else
511static __inline int
512#endif
513pf_state_compare_id(struct pf_state *a, struct pf_state *b)
514{
515	if (a->id > b->id)
516		return (1);
517	if (a->id < b->id)
518		return (-1);
519	if (a->creatorid > b->creatorid)
520		return (1);
521	if (a->creatorid < b->creatorid)
522		return (-1);
523
524	return (0);
525}
526
527#ifdef INET6
528void
529pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
530{
531	switch (af) {
532#ifdef INET
533	case AF_INET:
534		dst->addr32[0] = src->addr32[0];
535		break;
536#endif /* INET */
537	case AF_INET6:
538		dst->addr32[0] = src->addr32[0];
539		dst->addr32[1] = src->addr32[1];
540		dst->addr32[2] = src->addr32[2];
541		dst->addr32[3] = src->addr32[3];
542		break;
543	}
544}
545#endif
546
547struct pf_state *
548pf_find_state_byid(struct pf_state *key)
549{
550	pf_status.fcounters[FCNT_STATE_SEARCH]++;
551	return (RB_FIND(pf_state_tree_id, &tree_id, key));
552}
553
554struct pf_state *
555pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
556{
557	struct pf_state *s;
558
559	pf_status.fcounters[FCNT_STATE_SEARCH]++;
560
561	switch (tree) {
562	case PF_LAN_EXT:
563		for (; kif != NULL; kif = kif->pfik_parent) {
564			s = RB_FIND(pf_state_tree_lan_ext,
565			    &kif->pfik_lan_ext, key);
566			if (s != NULL)
567				return (s);
568		}
569		return (NULL);
570	case PF_EXT_GWY:
571		for (; kif != NULL; kif = kif->pfik_parent) {
572			s = RB_FIND(pf_state_tree_ext_gwy,
573			    &kif->pfik_ext_gwy, key);
574			if (s != NULL)
575				return (s);
576		}
577		return (NULL);
578	default:
579		panic("pf_find_state_recurse");
580	}
581}
582
583struct pf_state *
584pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
585{
586	struct pf_state *s, *ss = NULL;
587	struct pfi_kif	*kif;
588
589	pf_status.fcounters[FCNT_STATE_SEARCH]++;
590
591	switch (tree) {
592	case PF_LAN_EXT:
593		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
594			s = RB_FIND(pf_state_tree_lan_ext,
595			    &kif->pfik_lan_ext, key);
596			if (s == NULL)
597				continue;
598			if (more == NULL)
599				return (s);
600			ss = s;
601			(*more)++;
602		}
603		return (ss);
604	case PF_EXT_GWY:
605		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
606			s = RB_FIND(pf_state_tree_ext_gwy,
607			    &kif->pfik_ext_gwy, key);
608			if (s == NULL)
609				continue;
610			if (more == NULL)
611				return (s);
612			ss = s;
613			(*more)++;
614		}
615		return (ss);
616	default:
617		panic("pf_find_state_all");
618	}
619}
620
621int
622pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
623    struct pf_addr *src, sa_family_t af)
624{
625	struct pf_src_node	k;
626
627	if (*sn == NULL) {
628		k.af = af;
629		PF_ACPY(&k.addr, src, af);
630		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
631		    rule->rpool.opts & PF_POOL_STICKYADDR)
632			k.rule.ptr = rule;
633		else
634			k.rule.ptr = NULL;
635		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
636		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
637	}
638	if (*sn == NULL) {
639		if (!rule->max_src_nodes ||
640		    rule->src_nodes < rule->max_src_nodes)
641			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
642		if ((*sn) == NULL)
643			return (-1);
644		bzero(*sn, sizeof(struct pf_src_node));
645		(*sn)->af = af;
646		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
647		    rule->rpool.opts & PF_POOL_STICKYADDR)
648			(*sn)->rule.ptr = rule;
649		else
650			(*sn)->rule.ptr = NULL;
651		PF_ACPY(&(*sn)->addr, src, af);
652		if (RB_INSERT(pf_src_tree,
653		    &tree_src_tracking, *sn) != NULL) {
654			if (pf_status.debug >= PF_DEBUG_MISC) {
655				printf("pf: src_tree insert failed: ");
656				pf_print_host(&(*sn)->addr, 0, af);
657				printf("\n");
658			}
659			pool_put(&pf_src_tree_pl, *sn);
660			return (-1);
661		}
662#ifdef __FreeBSD__
663		(*sn)->creation = time_second;
664#else
665		(*sn)->creation = time.tv_sec;
666#endif
667		(*sn)->ruletype = rule->action;
668		if ((*sn)->rule.ptr != NULL)
669			(*sn)->rule.ptr->src_nodes++;
670		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
671		pf_status.src_nodes++;
672	} else {
673		if (rule->max_src_states &&
674		    (*sn)->states >= rule->max_src_states)
675			return (-1);
676	}
677	return (0);
678}
679
680int
681pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
682{
683	/* Thou MUST NOT insert multiple duplicate keys */
684	state->u.s.kif = kif;
685	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
686		if (pf_status.debug >= PF_DEBUG_MISC) {
687			printf("pf: state insert failed: tree_lan_ext");
688			printf(" lan: ");
689			pf_print_host(&state->lan.addr, state->lan.port,
690			    state->af);
691			printf(" gwy: ");
692			pf_print_host(&state->gwy.addr, state->gwy.port,
693			    state->af);
694			printf(" ext: ");
695			pf_print_host(&state->ext.addr, state->ext.port,
696			    state->af);
697			if (state->sync_flags & PFSTATE_FROMSYNC)
698				printf(" (from sync)");
699			printf("\n");
700		}
701		return (-1);
702	}
703
704	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
705		if (pf_status.debug >= PF_DEBUG_MISC) {
706			printf("pf: state insert failed: tree_ext_gwy");
707			printf(" lan: ");
708			pf_print_host(&state->lan.addr, state->lan.port,
709			    state->af);
710			printf(" gwy: ");
711			pf_print_host(&state->gwy.addr, state->gwy.port,
712			    state->af);
713			printf(" ext: ");
714			pf_print_host(&state->ext.addr, state->ext.port,
715			    state->af);
716			if (state->sync_flags & PFSTATE_FROMSYNC)
717				printf(" (from sync)");
718			printf("\n");
719		}
720		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
721		return (-1);
722	}
723
724	if (state->id == 0 && state->creatorid == 0) {
725		state->id = htobe64(pf_status.stateid++);
726		state->creatorid = pf_status.hostid;
727	}
728	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
729		if (pf_status.debug >= PF_DEBUG_MISC) {
730#ifdef __FreeBSD__
731			printf("pf: state insert failed: "
732			    "id: %016llx creatorid: %08x",
733			    (long long)be64toh(state->id),
734			    ntohl(state->creatorid));
735#else
736			printf("pf: state insert failed: "
737			    "id: %016llx creatorid: %08x",
738			    betoh64(state->id), ntohl(state->creatorid));
739#endif
740			if (state->sync_flags & PFSTATE_FROMSYNC)
741				printf(" (from sync)");
742			printf("\n");
743		}
744		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
745		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
746		return (-1);
747	}
748	TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
749
750	pf_status.fcounters[FCNT_STATE_INSERT]++;
751	pf_status.states++;
752	pfi_attach_state(kif);
753#if NPFSYNC
754	pfsync_insert_state(state);
755#endif
756	return (0);
757}
758
759void
760pf_purge_timeout(void *arg)
761{
762#ifdef __FreeBSD__
763	struct callout  *to = arg;
764#else
765	struct timeout	*to = arg;
766#endif
767	int		 s;
768
769#ifdef __FreeBSD__
770	PF_LOCK();
771#endif
772	s = splsoftnet();
773	pf_purge_expired_states();
774	pf_purge_expired_fragments();
775	pf_purge_expired_src_nodes();
776	splx(s);
777#ifdef __FreeBSD__
778	PF_UNLOCK();
779#endif
780
781#ifdef __FreeBSD__
782	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
783	    pf_purge_timeout, to);
784#else
785	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
786#endif
787}
788
789u_int32_t
790pf_state_expires(const struct pf_state *state)
791{
792	u_int32_t	timeout;
793	u_int32_t	start;
794	u_int32_t	end;
795	u_int32_t	states;
796
797	/* handle all PFTM_* > PFTM_MAX here */
798	if (state->timeout == PFTM_PURGE)
799#ifdef __FreeBSD__
800		return (time_second);
801#else
802		return (time.tv_sec);
803#endif
804	if (state->timeout == PFTM_UNTIL_PACKET)
805		return (0);
806#ifdef __FreeBSD__
807	KASSERT((state->timeout < PFTM_MAX),
808	    ("pf_state_expires: timeout > PFTM_MAX"));
809#else
810	KASSERT(state->timeout < PFTM_MAX);
811#endif
812	timeout = state->rule.ptr->timeout[state->timeout];
813	if (!timeout)
814		timeout = pf_default_rule.timeout[state->timeout];
815	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
816	if (start) {
817		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
818		states = state->rule.ptr->states;
819	} else {
820		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
821		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
822		states = pf_status.states;
823	}
824	if (end && states > start && start < end) {
825		if (states < end)
826			return (state->expire + timeout * (end - states) /
827			    (end - start));
828		else
829#ifdef __FreeBSD__
830			return (time_second);
831#else
832			return (time.tv_sec);
833#endif
834	}
835	return (state->expire + timeout);
836}
837
838void
839pf_purge_expired_src_nodes(void)
840{
841	 struct pf_src_node		*cur, *next;
842
843	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
844		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
845
846#ifdef __FreeBSD__
847		 if (cur->states <= 0 && cur->expire <= time_second) {
848#else
849		 if (cur->states <= 0 && cur->expire <= time.tv_sec) {
850#endif
851			 if (cur->rule.ptr != NULL) {
852				 cur->rule.ptr->src_nodes--;
853				 if (cur->rule.ptr->states <= 0 &&
854				     cur->rule.ptr->max_src_nodes <= 0)
855					 pf_rm_rule(NULL, cur->rule.ptr);
856			 }
857			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
858			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
859			 pf_status.src_nodes--;
860			 pool_put(&pf_src_tree_pl, cur);
861		 }
862	 }
863}
864
865void
866pf_src_tree_remove_state(struct pf_state *s)
867{
868	u_int32_t timeout;
869
870	if (s->src_node != NULL) {
871		if (--s->src_node->states <= 0) {
872			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
873			if (!timeout)
874				timeout =
875				    pf_default_rule.timeout[PFTM_SRC_NODE];
876#ifdef __FreeBSD__
877			s->src_node->expire = time_second + timeout;
878#else
879			s->src_node->expire = time.tv_sec + timeout;
880#endif
881		}
882	}
883	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
884		if (--s->nat_src_node->states <= 0) {
885			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
886			if (!timeout)
887				timeout =
888				    pf_default_rule.timeout[PFTM_SRC_NODE];
889#ifdef __FreeBSD__
890			s->nat_src_node->expire = time_second + timeout;
891#else
892			s->nat_src_node->expire = time.tv_sec + timeout;
893#endif
894		}
895	}
896	s->src_node = s->nat_src_node = NULL;
897}
898
899void
900pf_purge_expired_states(void)
901{
902	struct pf_state		*cur, *next;
903
904	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
905	    cur; cur = next) {
906		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
907
908#ifdef __FreeBSD__
909		if (pf_state_expires(cur) <= time_second) {
910#else
911		if (pf_state_expires(cur) <= time.tv_sec) {
912#endif
913			if (cur->src.state == PF_TCPS_PROXY_DST)
914				pf_send_tcp(cur->rule.ptr, cur->af,
915				    &cur->ext.addr, &cur->lan.addr,
916				    cur->ext.port, cur->lan.port,
917				    cur->src.seqhi, cur->src.seqlo + 1, 0,
918				    TH_RST|TH_ACK, 0, 0);
919			RB_REMOVE(pf_state_tree_ext_gwy,
920			    &cur->u.s.kif->pfik_ext_gwy, cur);
921			RB_REMOVE(pf_state_tree_lan_ext,
922			    &cur->u.s.kif->pfik_lan_ext, cur);
923			RB_REMOVE(pf_state_tree_id, &tree_id, cur);
924#if NPFSYNC
925			pfsync_delete_state(cur);
926#endif
927			pf_src_tree_remove_state(cur);
928			if (--cur->rule.ptr->states <= 0 &&
929			    cur->rule.ptr->src_nodes <= 0)
930				pf_rm_rule(NULL, cur->rule.ptr);
931			if (cur->nat_rule.ptr != NULL)
932				if (--cur->nat_rule.ptr->states <= 0 &&
933					cur->nat_rule.ptr->src_nodes <= 0)
934					pf_rm_rule(NULL, cur->nat_rule.ptr);
935			if (cur->anchor.ptr != NULL)
936				if (--cur->anchor.ptr->states <= 0)
937					pf_rm_rule(NULL, cur->anchor.ptr);
938			pf_normalize_tcp_cleanup(cur);
939			pfi_detach_state(cur->u.s.kif);
940			TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
941			pool_put(&pf_state_pl, cur);
942			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
943			pf_status.states--;
944		}
945	}
946}
947
948int
949pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
950{
951	if (aw->type != PF_ADDR_TABLE)
952		return (0);
953	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
954		return (1);
955	return (0);
956}
957
958void
959pf_tbladdr_remove(struct pf_addr_wrap *aw)
960{
961	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
962		return;
963	pfr_detach_table(aw->p.tbl);
964	aw->p.tbl = NULL;
965}
966
967void
968pf_tbladdr_copyout(struct pf_addr_wrap *aw)
969{
970	struct pfr_ktable *kt = aw->p.tbl;
971
972	if (aw->type != PF_ADDR_TABLE || kt == NULL)
973		return;
974	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
975		kt = kt->pfrkt_root;
976	aw->p.tbl = NULL;
977	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
978		kt->pfrkt_cnt : -1;
979}
980
981void
982pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
983{
984	switch (af) {
985#ifdef INET
986	case AF_INET: {
987		u_int32_t a = ntohl(addr->addr32[0]);
988		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
989		    (a>>8)&255, a&255);
990		if (p) {
991			p = ntohs(p);
992			printf(":%u", p);
993		}
994		break;
995	}
996#endif /* INET */
997#ifdef INET6
998	case AF_INET6: {
999		u_int16_t b;
1000		u_int8_t i, curstart = 255, curend = 0,
1001		    maxstart = 0, maxend = 0;
1002		for (i = 0; i < 8; i++) {
1003			if (!addr->addr16[i]) {
1004				if (curstart == 255)
1005					curstart = i;
1006				else
1007					curend = i;
1008			} else {
1009				if (curstart) {
1010					if ((curend - curstart) >
1011					    (maxend - maxstart)) {
1012						maxstart = curstart;
1013						maxend = curend;
1014						curstart = 255;
1015					}
1016				}
1017			}
1018		}
1019		for (i = 0; i < 8; i++) {
1020			if (i >= maxstart && i <= maxend) {
1021				if (maxend != 7) {
1022					if (i == maxstart)
1023						printf(":");
1024				} else {
1025					if (i == maxend)
1026						printf(":");
1027				}
1028			} else {
1029				b = ntohs(addr->addr16[i]);
1030				printf("%x", b);
1031				if (i < 7)
1032					printf(":");
1033			}
1034		}
1035		if (p) {
1036			p = ntohs(p);
1037			printf("[%u]", p);
1038		}
1039		break;
1040	}
1041#endif /* INET6 */
1042	}
1043}
1044
1045void
1046pf_print_state(struct pf_state *s)
1047{
1048	switch (s->proto) {
1049	case IPPROTO_TCP:
1050		printf("TCP ");
1051		break;
1052	case IPPROTO_UDP:
1053		printf("UDP ");
1054		break;
1055	case IPPROTO_ICMP:
1056		printf("ICMP ");
1057		break;
1058	case IPPROTO_ICMPV6:
1059		printf("ICMPV6 ");
1060		break;
1061	default:
1062		printf("%u ", s->proto);
1063		break;
1064	}
1065	pf_print_host(&s->lan.addr, s->lan.port, s->af);
1066	printf(" ");
1067	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1068	printf(" ");
1069	pf_print_host(&s->ext.addr, s->ext.port, s->af);
1070	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1071	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1072	if (s->src.wscale && s->dst.wscale)
1073		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1074	printf("]");
1075	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1076	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1077	if (s->src.wscale && s->dst.wscale)
1078		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1079	printf("]");
1080	printf(" %u:%u", s->src.state, s->dst.state);
1081}
1082
1083void
1084pf_print_flags(u_int8_t f)
1085{
1086	if (f)
1087		printf(" ");
1088	if (f & TH_FIN)
1089		printf("F");
1090	if (f & TH_SYN)
1091		printf("S");
1092	if (f & TH_RST)
1093		printf("R");
1094	if (f & TH_PUSH)
1095		printf("P");
1096	if (f & TH_ACK)
1097		printf("A");
1098	if (f & TH_URG)
1099		printf("U");
1100	if (f & TH_ECE)
1101		printf("E");
1102	if (f & TH_CWR)
1103		printf("W");
1104}
1105
1106#define	PF_SET_SKIP_STEPS(i)					\
1107	do {							\
1108		while (head[i] != cur) {			\
1109			head[i]->skip[i].ptr = cur;		\
1110			head[i] = TAILQ_NEXT(head[i], entries);	\
1111		}						\
1112	} while (0)
1113
1114void
1115pf_calc_skip_steps(struct pf_rulequeue *rules)
1116{
1117	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1118	int i;
1119
1120	cur = TAILQ_FIRST(rules);
1121	prev = cur;
1122	for (i = 0; i < PF_SKIP_COUNT; ++i)
1123		head[i] = cur;
1124	while (cur != NULL) {
1125
1126		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1127			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1128		if (cur->direction != prev->direction)
1129			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1130		if (cur->af != prev->af)
1131			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1132		if (cur->proto != prev->proto)
1133			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1134		if (cur->src.not != prev->src.not ||
1135		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1136			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1137		if (cur->src.port[0] != prev->src.port[0] ||
1138		    cur->src.port[1] != prev->src.port[1] ||
1139		    cur->src.port_op != prev->src.port_op)
1140			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1141		if (cur->dst.not != prev->dst.not ||
1142		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1143			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1144		if (cur->dst.port[0] != prev->dst.port[0] ||
1145		    cur->dst.port[1] != prev->dst.port[1] ||
1146		    cur->dst.port_op != prev->dst.port_op)
1147			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1148
1149		prev = cur;
1150		cur = TAILQ_NEXT(cur, entries);
1151	}
1152	for (i = 0; i < PF_SKIP_COUNT; ++i)
1153		PF_SET_SKIP_STEPS(i);
1154}
1155
1156int
1157pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1158{
1159	if (aw1->type != aw2->type)
1160		return (1);
1161	switch (aw1->type) {
1162	case PF_ADDR_ADDRMASK:
1163		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1164			return (1);
1165		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1166			return (1);
1167		return (0);
1168	case PF_ADDR_DYNIFTL:
1169		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1170	case PF_ADDR_NOROUTE:
1171		return (0);
1172	case PF_ADDR_TABLE:
1173		return (aw1->p.tbl != aw2->p.tbl);
1174	default:
1175		printf("invalid address type: %d\n", aw1->type);
1176		return (1);
1177	}
1178}
1179
1180void
1181pf_update_anchor_rules()
1182{
1183	struct pf_rule	*rule;
1184	int		 i;
1185
1186	for (i = 0; i < PF_RULESET_MAX; ++i)
1187		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1188		    entries)
1189			if (rule->anchorname[0])
1190				rule->anchor = pf_find_anchor(rule->anchorname);
1191			else
1192				rule->anchor = NULL;
1193}
1194
1195u_int16_t
1196pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1197{
1198	u_int32_t	l;
1199
1200	if (udp && !cksum)
1201		return (0x0000);
1202	l = cksum + old - new;
1203	l = (l >> 16) + (l & 65535);
1204	l = l & 65535;
1205	if (udp && !l)
1206		return (0xFFFF);
1207	return (l);
1208}
1209
1210void
1211pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1212    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1213{
1214	struct pf_addr	ao;
1215	u_int16_t	po = *p;
1216
1217	PF_ACPY(&ao, a, af);
1218	PF_ACPY(a, an, af);
1219
1220	*p = pn;
1221
1222	switch (af) {
1223#ifdef INET
1224	case AF_INET:
1225		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1226		    ao.addr16[0], an->addr16[0], 0),
1227		    ao.addr16[1], an->addr16[1], 0);
1228		*p = pn;
1229		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1230		    ao.addr16[0], an->addr16[0], u),
1231		    ao.addr16[1], an->addr16[1], u),
1232		    po, pn, u);
1233		break;
1234#endif /* INET */
1235#ifdef INET6
1236	case AF_INET6:
1237		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1238		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1239		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1240		    ao.addr16[0], an->addr16[0], u),
1241		    ao.addr16[1], an->addr16[1], u),
1242		    ao.addr16[2], an->addr16[2], u),
1243		    ao.addr16[3], an->addr16[3], u),
1244		    ao.addr16[4], an->addr16[4], u),
1245		    ao.addr16[5], an->addr16[5], u),
1246		    ao.addr16[6], an->addr16[6], u),
1247		    ao.addr16[7], an->addr16[7], u),
1248		    po, pn, u);
1249		break;
1250#endif /* INET6 */
1251	}
1252}
1253
1254
1255/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1256void
1257pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1258{
1259	u_int32_t	ao;
1260
1261	memcpy(&ao, a, sizeof(ao));
1262	memcpy(a, &an, sizeof(u_int32_t));
1263	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1264	    ao % 65536, an % 65536, u);
1265}
1266
1267#ifdef INET6
1268void
1269pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1270{
1271	struct pf_addr	ao;
1272
1273	PF_ACPY(&ao, a, AF_INET6);
1274	PF_ACPY(a, an, AF_INET6);
1275
1276	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1277	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1278	    pf_cksum_fixup(pf_cksum_fixup(*c,
1279	    ao.addr16[0], an->addr16[0], u),
1280	    ao.addr16[1], an->addr16[1], u),
1281	    ao.addr16[2], an->addr16[2], u),
1282	    ao.addr16[3], an->addr16[3], u),
1283	    ao.addr16[4], an->addr16[4], u),
1284	    ao.addr16[5], an->addr16[5], u),
1285	    ao.addr16[6], an->addr16[6], u),
1286	    ao.addr16[7], an->addr16[7], u);
1287}
1288#endif /* INET6 */
1289
1290void
1291pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1292    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1293    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1294{
1295	struct pf_addr	oia, ooa;
1296
1297	PF_ACPY(&oia, ia, af);
1298	PF_ACPY(&ooa, oa, af);
1299
1300	/* Change inner protocol port, fix inner protocol checksum. */
1301	if (ip != NULL) {
1302		u_int16_t	oip = *ip;
1303		u_int32_t	opc = 0;	/* make the compiler happy */
1304
1305		if (pc != NULL)
1306			opc = *pc;
1307		*ip = np;
1308		if (pc != NULL)
1309			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1310		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1311		if (pc != NULL)
1312			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1313	}
1314	/* Change inner ip address, fix inner ip and icmp checksums. */
1315	PF_ACPY(ia, na, af);
1316	switch (af) {
1317#ifdef INET
1318	case AF_INET: {
1319		u_int32_t	 oh2c = *h2c;
1320
1321		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1322		    oia.addr16[0], ia->addr16[0], 0),
1323		    oia.addr16[1], ia->addr16[1], 0);
1324		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1325		    oia.addr16[0], ia->addr16[0], 0),
1326		    oia.addr16[1], ia->addr16[1], 0);
1327		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1328		break;
1329	}
1330#endif /* INET */
1331#ifdef INET6
1332	case AF_INET6:
1333		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1334		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1335		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1336		    oia.addr16[0], ia->addr16[0], u),
1337		    oia.addr16[1], ia->addr16[1], u),
1338		    oia.addr16[2], ia->addr16[2], u),
1339		    oia.addr16[3], ia->addr16[3], u),
1340		    oia.addr16[4], ia->addr16[4], u),
1341		    oia.addr16[5], ia->addr16[5], u),
1342		    oia.addr16[6], ia->addr16[6], u),
1343		    oia.addr16[7], ia->addr16[7], u);
1344		break;
1345#endif /* INET6 */
1346	}
1347	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1348	PF_ACPY(oa, na, af);
1349	switch (af) {
1350#ifdef INET
1351	case AF_INET:
1352		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1353		    ooa.addr16[0], oa->addr16[0], 0),
1354		    ooa.addr16[1], oa->addr16[1], 0);
1355		break;
1356#endif /* INET */
1357#ifdef INET6
1358	case AF_INET6:
1359		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1360		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1361		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1362		    ooa.addr16[0], oa->addr16[0], u),
1363		    ooa.addr16[1], oa->addr16[1], u),
1364		    ooa.addr16[2], oa->addr16[2], u),
1365		    ooa.addr16[3], oa->addr16[3], u),
1366		    ooa.addr16[4], oa->addr16[4], u),
1367		    ooa.addr16[5], oa->addr16[5], u),
1368		    ooa.addr16[6], oa->addr16[6], u),
1369		    ooa.addr16[7], oa->addr16[7], u);
1370		break;
1371#endif /* INET6 */
1372	}
1373}
1374
1375void
1376pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1377    const struct pf_addr *saddr, const struct pf_addr *daddr,
1378    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1379    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1380{
1381	struct mbuf	*m;
1382#ifdef ALTQ
1383	struct m_tag	*mtag;
1384#endif
1385	int		 len = 0, tlen;		/* make the compiler happy */
1386#ifdef INET
1387	struct ip	*h = NULL;		/* make the compiler happy */
1388#endif /* INET */
1389#ifdef INET6
1390	struct ip6_hdr	*h6 = NULL;		/* make the compiler happy */
1391#endif /* INET6 */
1392	struct tcphdr	*th = NULL;		/* make the compiler happy */
1393#ifdef __FreeBSD__
1394	struct ip 	*ip;
1395#endif
1396	char *opt;
1397
1398	/* maximum segment size tcp option */
1399	tlen = sizeof(struct tcphdr);
1400	if (mss)
1401		tlen += 4;
1402
1403	switch (af) {
1404#ifdef INET
1405	case AF_INET:
1406		len = sizeof(struct ip) + tlen;
1407		break;
1408#endif /* INET */
1409#ifdef INET6
1410	case AF_INET6:
1411		len = sizeof(struct ip6_hdr) + tlen;
1412		break;
1413#endif /* INET6 */
1414	}
1415
1416	/* create outgoing mbuf */
1417#ifdef __FreeBSD__
1418	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1419	if (m == NULL)
1420		return;
1421	m->m_flags |= M_SKIP_FIREWALL;
1422#else
1423	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1424	if (mtag == NULL)
1425		return;
1426	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1427	if (m == NULL) {
1428		m_tag_free(mtag);
1429		return;
1430	}
1431	m_tag_prepend(m, mtag);
1432#endif
1433#ifdef ALTQ
1434	if (r != NULL && r->qid) {
1435		struct altq_tag *atag;
1436
1437		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1438		if (mtag != NULL) {
1439			atag = (struct altq_tag *)(mtag + 1);
1440			atag->qid = r->qid;
1441			/* add hints for ecn */
1442			atag->af = af;
1443			atag->hdr = mtod(m, struct ip *);
1444			m_tag_prepend(m, mtag);
1445		}
1446	}
1447#endif
1448	m->m_data += max_linkhdr;
1449	m->m_pkthdr.len = m->m_len = len;
1450	m->m_pkthdr.rcvif = NULL;
1451	bzero(m->m_data, len);
1452	switch (af) {
1453#ifdef INET
1454	case AF_INET:
1455		h = mtod(m, struct ip *);
1456
1457		/* IP header fields included in the TCP checksum */
1458		h->ip_p = IPPROTO_TCP;
1459		h->ip_len = htons(tlen);
1460		h->ip_src.s_addr = saddr->v4.s_addr;
1461		h->ip_dst.s_addr = daddr->v4.s_addr;
1462
1463		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1464		break;
1465#endif /* INET */
1466#ifdef INET6
1467	case AF_INET6:
1468		h6 = mtod(m, struct ip6_hdr *);
1469
1470		/* IP header fields included in the TCP checksum */
1471		h6->ip6_nxt = IPPROTO_TCP;
1472		h6->ip6_plen = htons(tlen);
1473		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1474		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1475
1476		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1477		break;
1478#endif /* INET6 */
1479	}
1480
1481	/* TCP header */
1482	th->th_sport = sport;
1483	th->th_dport = dport;
1484	th->th_seq = htonl(seq);
1485	th->th_ack = htonl(ack);
1486	th->th_off = tlen >> 2;
1487	th->th_flags = flags;
1488	th->th_win = htons(win);
1489
1490	if (mss) {
1491		opt = (char *)(th + 1);
1492		opt[0] = TCPOPT_MAXSEG;
1493		opt[1] = 4;
1494		HTONS(mss);
1495		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1496	}
1497
1498	switch (af) {
1499#ifdef INET
1500	case AF_INET:
1501		/* TCP checksum */
1502		th->th_sum = in_cksum(m, len);
1503
1504		/* Finish the IP header */
1505		h->ip_v = 4;
1506		h->ip_hl = sizeof(*h) >> 2;
1507		h->ip_tos = IPTOS_LOWDELAY;
1508#ifdef __FreeBSD__
1509		h->ip_off = path_mtu_discovery ? IP_DF : 0;
1510		h->ip_len = len;
1511#else
1512		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1513		h->ip_len = htons(len);
1514#endif
1515		h->ip_ttl = ttl ? ttl : ip_defttl;
1516		h->ip_sum = 0;
1517#ifdef __FreeBSD__
1518		ip = mtod(m, struct ip *);
1519		PF_UNLOCK();
1520		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1521			(void *)NULL);
1522		PF_LOCK();
1523#else /* ! __FreeBSD__ */
1524		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1525		    (void *)NULL);
1526#endif
1527		break;
1528#endif /* INET */
1529#ifdef INET6
1530	case AF_INET6:
1531		/* TCP checksum */
1532		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1533		    sizeof(struct ip6_hdr), tlen);
1534
1535		h6->ip6_vfc |= IPV6_VERSION;
1536		h6->ip6_hlim = IPV6_DEFHLIM;
1537
1538#ifdef __FreeBSD__
1539		PF_UNLOCK();
1540		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1541		PF_LOCK();
1542#else
1543		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1544#endif
1545		break;
1546#endif /* INET6 */
1547	}
1548}
1549
1550void
1551pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1552    struct pf_rule *r)
1553{
1554#ifdef ALTQ
1555	struct m_tag	*mtag;
1556#endif
1557	struct mbuf	*m0;
1558#ifdef __FreeBSD__
1559	struct ip *ip;
1560#endif
1561
1562#ifdef __FreeBSD__
1563	m0 = m_copypacket(m, M_DONTWAIT);
1564	if (m0 == NULL)
1565		return;
1566	m0->m_flags |= M_SKIP_FIREWALL;
1567#else
1568	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1569	if (mtag == NULL)
1570		return;
1571	m0 = m_copy(m, 0, M_COPYALL);
1572	if (m0 == NULL) {
1573		m_tag_free(mtag);
1574		return;
1575	}
1576	m_tag_prepend(m0, mtag);
1577#endif
1578
1579#ifdef ALTQ
1580	if (r->qid) {
1581		struct altq_tag *atag;
1582
1583		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1584		if (mtag != NULL) {
1585			atag = (struct altq_tag *)(mtag + 1);
1586			atag->qid = r->qid;
1587			/* add hints for ecn */
1588			atag->af = af;
1589			atag->hdr = mtod(m0, struct ip *);
1590			m_tag_prepend(m0, mtag);
1591		}
1592	}
1593#endif
1594
1595	switch (af) {
1596#ifdef INET
1597	case AF_INET:
1598#ifdef __FreeBSD__
1599		/* icmp_error() expects host byte ordering */
1600		ip = mtod(m0, struct ip *);
1601		NTOHS(ip->ip_len);
1602		NTOHS(ip->ip_off);
1603		PF_UNLOCK();
1604#endif
1605		icmp_error(m0, type, code, 0, (void *)NULL);
1606#ifdef __FreeBSD__
1607		PF_LOCK();
1608#endif
1609		break;
1610#endif /* INET */
1611#ifdef INET6
1612	case AF_INET6:
1613#ifdef __FreeBSD__
1614		PF_UNLOCK();
1615#endif
1616		icmp6_error(m0, type, code, 0);
1617#ifdef __FreeBSD__
1618		PF_LOCK();
1619#endif
1620		break;
1621#endif /* INET6 */
1622	}
1623}
1624
1625/*
1626 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1627 * If n is 0, they match if they are equal. If n is != 0, they match if they
1628 * are different.
1629 */
1630int
1631pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1632    struct pf_addr *b, sa_family_t af)
1633{
1634	int	match = 0;
1635
1636	switch (af) {
1637#ifdef INET
1638	case AF_INET:
1639		if ((a->addr32[0] & m->addr32[0]) ==
1640		    (b->addr32[0] & m->addr32[0]))
1641			match++;
1642		break;
1643#endif /* INET */
1644#ifdef INET6
1645	case AF_INET6:
1646		if (((a->addr32[0] & m->addr32[0]) ==
1647		     (b->addr32[0] & m->addr32[0])) &&
1648		    ((a->addr32[1] & m->addr32[1]) ==
1649		     (b->addr32[1] & m->addr32[1])) &&
1650		    ((a->addr32[2] & m->addr32[2]) ==
1651		     (b->addr32[2] & m->addr32[2])) &&
1652		    ((a->addr32[3] & m->addr32[3]) ==
1653		     (b->addr32[3] & m->addr32[3])))
1654			match++;
1655		break;
1656#endif /* INET6 */
1657	}
1658	if (match) {
1659		if (n)
1660			return (0);
1661		else
1662			return (1);
1663	} else {
1664		if (n)
1665			return (1);
1666		else
1667			return (0);
1668	}
1669}
1670
1671int
1672pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1673{
1674	switch (op) {
1675	case PF_OP_IRG:
1676		return ((p > a1) && (p < a2));
1677	case PF_OP_XRG:
1678		return ((p < a1) || (p > a2));
1679	case PF_OP_RRG:
1680		return ((p >= a1) && (p <= a2));
1681	case PF_OP_EQ:
1682		return (p == a1);
1683	case PF_OP_NE:
1684		return (p != a1);
1685	case PF_OP_LT:
1686		return (p < a1);
1687	case PF_OP_LE:
1688		return (p <= a1);
1689	case PF_OP_GT:
1690		return (p > a1);
1691	case PF_OP_GE:
1692		return (p >= a1);
1693	}
1694	return (0); /* never reached */
1695}
1696
1697int
1698pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1699{
1700	NTOHS(a1);
1701	NTOHS(a2);
1702	NTOHS(p);
1703	return (pf_match(op, a1, a2, p));
1704}
1705
1706int
1707pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1708{
1709	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1710		return (0);
1711	return (pf_match(op, a1, a2, u));
1712}
1713
1714int
1715pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1716{
1717	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1718		return (0);
1719	return (pf_match(op, a1, a2, g));
1720}
1721
1722struct pf_tag *
1723pf_get_tag(struct mbuf *m)
1724{
1725	struct m_tag	*mtag;
1726
1727	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1728		return ((struct pf_tag *)(mtag + 1));
1729	else
1730		return (NULL);
1731}
1732
1733int
1734pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule,
1735    struct pf_tag *pftag, int *tag)
1736{
1737	if (*tag == -1) {	/* find mbuf tag */
1738		pftag = pf_get_tag(m);
1739		if (pftag != NULL)
1740			*tag = pftag->tag;
1741		else
1742			*tag = 0;
1743		if (nat_rule != NULL && nat_rule->tag)
1744			*tag = nat_rule->tag;
1745	}
1746
1747	return ((!r->match_tag_not && r->match_tag == *tag) ||
1748	    (r->match_tag_not && r->match_tag != *tag));
1749}
1750
1751int
1752pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1753{
1754	struct m_tag	*mtag;
1755
1756	if (tag <= 0)
1757		return (0);
1758
1759	if (pftag == NULL) {
1760		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1761		if (mtag == NULL)
1762			return (1);
1763		((struct pf_tag *)(mtag + 1))->tag = tag;
1764		m_tag_prepend(m, mtag);
1765	} else
1766		pftag->tag = tag;
1767
1768	return (0);
1769}
1770
1771#define PF_STEP_INTO_ANCHOR(r, a, s, n)					\
1772	do {								\
1773		if ((r) == NULL || (r)->anchor == NULL ||		\
1774		    (s) != NULL || (a) != NULL)				\
1775			panic("PF_STEP_INTO_ANCHOR");			\
1776		(a) = (r);						\
1777		(s) = TAILQ_FIRST(&(r)->anchor->rulesets);		\
1778		(r) = NULL;						\
1779		while ((s) != NULL && ((r) =				\
1780		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1781			(s) = TAILQ_NEXT((s), entries);			\
1782		if ((r) == NULL) {					\
1783			(r) = TAILQ_NEXT((a), entries);			\
1784			(a) = NULL;					\
1785		}							\
1786	} while (0)
1787
1788#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)				\
1789	do {								\
1790		if ((r) != NULL || (a) == NULL || (s) == NULL)		\
1791			panic("PF_STEP_OUT_OF_ANCHOR");			\
1792		(s) = TAILQ_NEXT((s), entries);				\
1793		while ((s) != NULL && ((r) =				\
1794		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1795			(s) = TAILQ_NEXT((s), entries);			\
1796		if ((r) == NULL) {					\
1797			(r) = TAILQ_NEXT((a), entries);			\
1798			(a) = NULL;					\
1799		}							\
1800	} while (0)
1801
1802#ifdef INET6
1803void
1804pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1805    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1806{
1807	switch (af) {
1808#ifdef INET
1809	case AF_INET:
1810		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1811		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1812		break;
1813#endif /* INET */
1814	case AF_INET6:
1815		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1816		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1817		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1818		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1819		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1820		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1821		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1822		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1823		break;
1824	}
1825}
1826
1827void
1828pf_addr_inc(struct pf_addr *addr, sa_family_t af)
1829{
1830	switch (af) {
1831#ifdef INET
1832	case AF_INET:
1833		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1834		break;
1835#endif /* INET */
1836	case AF_INET6:
1837		if (addr->addr32[3] == 0xffffffff) {
1838			addr->addr32[3] = 0;
1839			if (addr->addr32[2] == 0xffffffff) {
1840				addr->addr32[2] = 0;
1841				if (addr->addr32[1] == 0xffffffff) {
1842					addr->addr32[1] = 0;
1843					addr->addr32[0] =
1844					    htonl(ntohl(addr->addr32[0]) + 1);
1845				} else
1846					addr->addr32[1] =
1847					    htonl(ntohl(addr->addr32[1]) + 1);
1848			} else
1849				addr->addr32[2] =
1850				    htonl(ntohl(addr->addr32[2]) + 1);
1851		} else
1852			addr->addr32[3] =
1853			    htonl(ntohl(addr->addr32[3]) + 1);
1854		break;
1855	}
1856}
1857#endif /* INET6 */
1858
1859#define mix(a,b,c) \
1860	do {					\
1861		a -= b; a -= c; a ^= (c >> 13);	\
1862		b -= c; b -= a; b ^= (a << 8);	\
1863		c -= a; c -= b; c ^= (b >> 13);	\
1864		a -= b; a -= c; a ^= (c >> 12);	\
1865		b -= c; b -= a; b ^= (a << 16);	\
1866		c -= a; c -= b; c ^= (b >> 5);	\
1867		a -= b; a -= c; a ^= (c >> 3);	\
1868		b -= c; b -= a; b ^= (a << 10);	\
1869		c -= a; c -= b; c ^= (b >> 15);	\
1870	} while (0)
1871
1872/*
1873 * hash function based on bridge_hash in if_bridge.c
1874 */
1875void
1876pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1877    struct pf_poolhashkey *key, sa_family_t af)
1878{
1879	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1880
1881	switch (af) {
1882#ifdef INET
1883	case AF_INET:
1884		a += inaddr->addr32[0];
1885		b += key->key32[1];
1886		mix(a, b, c);
1887		hash->addr32[0] = c + key->key32[2];
1888		break;
1889#endif /* INET */
1890#ifdef INET6
1891	case AF_INET6:
1892		a += inaddr->addr32[0];
1893		b += inaddr->addr32[2];
1894		mix(a, b, c);
1895		hash->addr32[0] = c;
1896		a += inaddr->addr32[1];
1897		b += inaddr->addr32[3];
1898		c += key->key32[1];
1899		mix(a, b, c);
1900		hash->addr32[1] = c;
1901		a += inaddr->addr32[2];
1902		b += inaddr->addr32[1];
1903		c += key->key32[2];
1904		mix(a, b, c);
1905		hash->addr32[2] = c;
1906		a += inaddr->addr32[3];
1907		b += inaddr->addr32[0];
1908		c += key->key32[3];
1909		mix(a, b, c);
1910		hash->addr32[3] = c;
1911		break;
1912#endif /* INET6 */
1913	}
1914}
1915
1916int
1917pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
1918    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
1919{
1920	unsigned char		 hash[16];
1921	struct pf_pool		*rpool = &r->rpool;
1922	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
1923	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
1924	struct pf_pooladdr	*acur = rpool->cur;
1925	struct pf_src_node	 k;
1926
1927	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
1928	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1929		k.af = af;
1930		PF_ACPY(&k.addr, saddr, af);
1931		if (r->rule_flag & PFRULE_RULESRCTRACK ||
1932		    r->rpool.opts & PF_POOL_STICKYADDR)
1933			k.rule.ptr = r;
1934		else
1935			k.rule.ptr = NULL;
1936		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1937		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1938		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
1939			PF_ACPY(naddr, &(*sn)->raddr, af);
1940			if (pf_status.debug >= PF_DEBUG_MISC) {
1941				printf("pf_map_addr: src tracking maps ");
1942				pf_print_host(&k.addr, 0, af);
1943				printf(" to ");
1944				pf_print_host(naddr, 0, af);
1945				printf("\n");
1946			}
1947			return (0);
1948		}
1949	}
1950
1951	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1952		return (1);
1953	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1954		if (af == AF_INET) {
1955			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
1956			    (rpool->opts & PF_POOL_TYPEMASK) !=
1957			    PF_POOL_ROUNDROBIN)
1958				return (1);
1959			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
1960			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
1961		} else {
1962			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
1963			    (rpool->opts & PF_POOL_TYPEMASK) !=
1964			    PF_POOL_ROUNDROBIN)
1965				return (1);
1966			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
1967			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
1968		}
1969	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1970		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1971			return (1); /* unsupported */
1972	} else {
1973		raddr = &rpool->cur->addr.v.a.addr;
1974		rmask = &rpool->cur->addr.v.a.mask;
1975	}
1976
1977	switch (rpool->opts & PF_POOL_TYPEMASK) {
1978	case PF_POOL_NONE:
1979		PF_ACPY(naddr, raddr, af);
1980		break;
1981	case PF_POOL_BITMASK:
1982		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
1983		break;
1984	case PF_POOL_RANDOM:
1985		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
1986			switch (af) {
1987#ifdef INET
1988			case AF_INET:
1989				rpool->counter.addr32[0] = arc4random();
1990				break;
1991#endif /* INET */
1992#ifdef INET6
1993			case AF_INET6:
1994				if (rmask->addr32[3] != 0xffffffff)
1995					rpool->counter.addr32[3] = arc4random();
1996				else
1997					break;
1998				if (rmask->addr32[2] != 0xffffffff)
1999					rpool->counter.addr32[2] = arc4random();
2000				else
2001					break;
2002				if (rmask->addr32[1] != 0xffffffff)
2003					rpool->counter.addr32[1] = arc4random();
2004				else
2005					break;
2006				if (rmask->addr32[0] != 0xffffffff)
2007					rpool->counter.addr32[0] = arc4random();
2008				break;
2009#endif /* INET6 */
2010			}
2011			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2012			PF_ACPY(init_addr, naddr, af);
2013
2014		} else {
2015			PF_AINC(&rpool->counter, af);
2016			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2017		}
2018		break;
2019	case PF_POOL_SRCHASH:
2020		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2021		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2022		break;
2023	case PF_POOL_ROUNDROBIN:
2024		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2025			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2026			    &rpool->tblidx, &rpool->counter,
2027			    &raddr, &rmask, af))
2028				goto get_addr;
2029		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2030			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2031			    &rpool->tblidx, &rpool->counter,
2032			    &raddr, &rmask, af))
2033				goto get_addr;
2034		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2035			goto get_addr;
2036
2037	try_next:
2038		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2039			rpool->cur = TAILQ_FIRST(&rpool->list);
2040		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2041			rpool->tblidx = -1;
2042			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2043			    &rpool->tblidx, &rpool->counter,
2044			    &raddr, &rmask, af)) {
2045				/* table contains no address of type 'af' */
2046				if (rpool->cur != acur)
2047					goto try_next;
2048				return (1);
2049			}
2050		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2051			rpool->tblidx = -1;
2052			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2053			    &rpool->tblidx, &rpool->counter,
2054			    &raddr, &rmask, af)) {
2055				/* table contains no address of type 'af' */
2056				if (rpool->cur != acur)
2057					goto try_next;
2058				return (1);
2059			}
2060		} else {
2061			raddr = &rpool->cur->addr.v.a.addr;
2062			rmask = &rpool->cur->addr.v.a.mask;
2063			PF_ACPY(&rpool->counter, raddr, af);
2064		}
2065
2066	get_addr:
2067		PF_ACPY(naddr, &rpool->counter, af);
2068		PF_AINC(&rpool->counter, af);
2069		break;
2070	}
2071	if (*sn != NULL)
2072		PF_ACPY(&(*sn)->raddr, naddr, af);
2073
2074	if (pf_status.debug >= PF_DEBUG_MISC &&
2075	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2076		printf("pf_map_addr: selected address ");
2077		pf_print_host(naddr, 0, af);
2078		printf("\n");
2079	}
2080
2081	return (0);
2082}
2083
2084int
2085pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2086    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2087    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2088    struct pf_src_node **sn)
2089{
2090	struct pf_state		key;
2091	struct pf_addr		init_addr;
2092	u_int16_t		cut;
2093
2094	bzero(&init_addr, sizeof(init_addr));
2095	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2096		return (1);
2097
2098	do {
2099		key.af = af;
2100		key.proto = proto;
2101		PF_ACPY(&key.ext.addr, daddr, key.af);
2102		PF_ACPY(&key.gwy.addr, naddr, key.af);
2103		key.ext.port = dport;
2104
2105		/*
2106		 * port search; start random, step;
2107		 * similar 2 portloop in in_pcbbind
2108		 */
2109		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
2110			key.gwy.port = 0;
2111			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2112				return (0);
2113		} else if (low == 0 && high == 0) {
2114			key.gwy.port = *nport;
2115			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2116				return (0);
2117		} else if (low == high) {
2118			key.gwy.port = htons(low);
2119			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2120				*nport = htons(low);
2121				return (0);
2122			}
2123		} else {
2124			u_int16_t tmp;
2125
2126			if (low > high) {
2127				tmp = low;
2128				low = high;
2129				high = tmp;
2130			}
2131			/* low < high */
2132			cut = arc4random() % (1 + high - low) + low;
2133			/* low <= cut <= high */
2134			for (tmp = cut; tmp <= high; ++(tmp)) {
2135				key.gwy.port = htons(tmp);
2136				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2137				    NULL) {
2138					*nport = htons(tmp);
2139					return (0);
2140				}
2141			}
2142			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2143				key.gwy.port = htons(tmp);
2144				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2145				    NULL) {
2146					*nport = htons(tmp);
2147					return (0);
2148				}
2149			}
2150		}
2151
2152		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2153		case PF_POOL_RANDOM:
2154		case PF_POOL_ROUNDROBIN:
2155			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2156				return (1);
2157			break;
2158		case PF_POOL_NONE:
2159		case PF_POOL_SRCHASH:
2160		case PF_POOL_BITMASK:
2161		default:
2162			return (1);
2163		}
2164	} while (! PF_AEQ(&init_addr, naddr, af) );
2165
2166	return (1);					/* none available */
2167}
2168
2169struct pf_rule *
2170pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2171    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2172    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2173{
2174	struct pf_rule		*r, *rm = NULL, *anchorrule = NULL;
2175	struct pf_ruleset	*ruleset = NULL;
2176
2177	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2178	while (r && rm == NULL) {
2179		struct pf_rule_addr	*src = NULL, *dst = NULL;
2180		struct pf_addr_wrap	*xdst = NULL;
2181
2182		if (r->action == PF_BINAT && direction == PF_IN) {
2183			src = &r->dst;
2184			if (r->rpool.cur != NULL)
2185				xdst = &r->rpool.cur->addr;
2186		} else {
2187			src = &r->src;
2188			dst = &r->dst;
2189		}
2190
2191		r->evaluations++;
2192		if (r->kif != NULL &&
2193		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2194			r = r->skip[PF_SKIP_IFP].ptr;
2195		else if (r->direction && r->direction != direction)
2196			r = r->skip[PF_SKIP_DIR].ptr;
2197		else if (r->af && r->af != pd->af)
2198			r = r->skip[PF_SKIP_AF].ptr;
2199		else if (r->proto && r->proto != pd->proto)
2200			r = r->skip[PF_SKIP_PROTO].ptr;
2201		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
2202			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2203			    PF_SKIP_DST_ADDR].ptr;
2204		else if (src->port_op && !pf_match_port(src->port_op,
2205		    src->port[0], src->port[1], sport))
2206			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2207			    PF_SKIP_DST_PORT].ptr;
2208		else if (dst != NULL &&
2209		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
2210			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2211		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2212			r = TAILQ_NEXT(r, entries);
2213		else if (dst != NULL && dst->port_op &&
2214		    !pf_match_port(dst->port_op, dst->port[0],
2215		    dst->port[1], dport))
2216			r = r->skip[PF_SKIP_DST_PORT].ptr;
2217		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2218		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2219		    off, pd->hdr.tcp), r->os_fingerprint)))
2220			r = TAILQ_NEXT(r, entries);
2221		else if (r->anchorname[0] && r->anchor == NULL)
2222			r = TAILQ_NEXT(r, entries);
2223		else if (r->anchor == NULL)
2224				rm = r;
2225		else
2226			PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2227		if (r == NULL && anchorrule != NULL)
2228			PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2229			    rs_num);
2230	}
2231	if (rm != NULL && (rm->action == PF_NONAT ||
2232	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2233		return (NULL);
2234	return (rm);
2235}
2236
2237struct pf_rule *
2238pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2239    struct pfi_kif *kif, struct pf_src_node **sn,
2240    struct pf_addr *saddr, u_int16_t sport,
2241    struct pf_addr *daddr, u_int16_t dport,
2242    struct pf_addr *naddr, u_int16_t *nport)
2243{
2244	struct pf_rule	*r = NULL;
2245
2246	if (direction == PF_OUT) {
2247		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2248		    sport, daddr, dport, PF_RULESET_BINAT);
2249		if (r == NULL)
2250			r = pf_match_translation(pd, m, off, direction, kif,
2251			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2252	} else {
2253		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2254		    sport, daddr, dport, PF_RULESET_RDR);
2255		if (r == NULL)
2256			r = pf_match_translation(pd, m, off, direction, kif,
2257			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2258	}
2259
2260	if (r != NULL) {
2261		switch (r->action) {
2262		case PF_NONAT:
2263		case PF_NOBINAT:
2264		case PF_NORDR:
2265			return (NULL);
2266		case PF_NAT:
2267			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2268			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2269			    r->rpool.proxy_port[1], sn)) {
2270				DPFPRINTF(PF_DEBUG_MISC,
2271				    ("pf: NAT proxy port allocation "
2272				    "(%u-%u) failed\n",
2273				    r->rpool.proxy_port[0],
2274				    r->rpool.proxy_port[1]));
2275				return (NULL);
2276			}
2277			break;
2278		case PF_BINAT:
2279			switch (direction) {
2280			case PF_OUT:
2281				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2282					if (pd->af == AF_INET) {
2283						if (r->rpool.cur->addr.p.dyn->
2284						    pfid_acnt4 < 1)
2285							return (NULL);
2286						PF_POOLMASK(naddr,
2287						    &r->rpool.cur->addr.p.dyn->
2288						    pfid_addr4,
2289						    &r->rpool.cur->addr.p.dyn->
2290						    pfid_mask4,
2291						    saddr, AF_INET);
2292					} else {
2293						if (r->rpool.cur->addr.p.dyn->
2294						    pfid_acnt6 < 1)
2295							return (NULL);
2296						PF_POOLMASK(naddr,
2297						    &r->rpool.cur->addr.p.dyn->
2298						    pfid_addr6,
2299						    &r->rpool.cur->addr.p.dyn->
2300						    pfid_mask6,
2301						    saddr, AF_INET6);
2302					}
2303				} else
2304					PF_POOLMASK(naddr,
2305					    &r->rpool.cur->addr.v.a.addr,
2306					    &r->rpool.cur->addr.v.a.mask,
2307					    saddr, pd->af);
2308				break;
2309			case PF_IN:
2310				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2311					if (pd->af == AF_INET) {
2312						if (r->src.addr.p.dyn->
2313						    pfid_acnt4 < 1)
2314							return (NULL);
2315						PF_POOLMASK(naddr,
2316						    &r->src.addr.p.dyn->
2317						    pfid_addr4,
2318						    &r->src.addr.p.dyn->
2319						    pfid_mask4,
2320						    daddr, AF_INET);
2321					} else {
2322						if (r->src.addr.p.dyn->
2323						    pfid_acnt6 < 1)
2324							return (NULL);
2325						PF_POOLMASK(naddr,
2326						    &r->src.addr.p.dyn->
2327						    pfid_addr6,
2328						    &r->src.addr.p.dyn->
2329						    pfid_mask6,
2330						    daddr, AF_INET6);
2331					}
2332				} else
2333					PF_POOLMASK(naddr,
2334					    &r->src.addr.v.a.addr,
2335					    &r->src.addr.v.a.mask, daddr,
2336					    pd->af);
2337				break;
2338			}
2339			break;
2340		case PF_RDR: {
2341			if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn))
2342				return (NULL);
2343
2344			if (r->rpool.proxy_port[1]) {
2345				u_int32_t	tmp_nport;
2346
2347				tmp_nport = ((ntohs(dport) -
2348				    ntohs(r->dst.port[0])) %
2349				    (r->rpool.proxy_port[1] -
2350				    r->rpool.proxy_port[0] + 1)) +
2351				    r->rpool.proxy_port[0];
2352
2353				/* wrap around if necessary */
2354				if (tmp_nport > 65535)
2355					tmp_nport -= 65535;
2356				*nport = htons((u_int16_t)tmp_nport);
2357			} else if (r->rpool.proxy_port[0])
2358				*nport = htons(r->rpool.proxy_port[0]);
2359			break;
2360		}
2361		default:
2362			return (NULL);
2363		}
2364	}
2365
2366	return (r);
2367}
2368
2369int
2370pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
2371{
2372	struct pf_addr		*saddr, *daddr;
2373	u_int16_t		 sport, dport;
2374#ifdef __FreeBSD__
2375	struct inpcbinfo	*pi;
2376#else
2377	struct inpcbtable	*tb;
2378#endif
2379	struct inpcb		*inp;
2380
2381	*uid = UID_MAX;
2382	*gid = GID_MAX;
2383	switch (pd->proto) {
2384	case IPPROTO_TCP:
2385		sport = pd->hdr.tcp->th_sport;
2386		dport = pd->hdr.tcp->th_dport;
2387#ifdef __FreeBSD__
2388		pi = &tcbinfo;
2389#else
2390		tb = &tcbtable;
2391#endif
2392		break;
2393	case IPPROTO_UDP:
2394		sport = pd->hdr.udp->uh_sport;
2395		dport = pd->hdr.udp->uh_dport;
2396#ifdef __FreeBSD__
2397		pi = &udbinfo;
2398#else
2399		tb = &udbtable;
2400#endif
2401		break;
2402	default:
2403		return (0);
2404	}
2405	if (direction == PF_IN) {
2406		saddr = pd->src;
2407		daddr = pd->dst;
2408	} else {
2409		u_int16_t	p;
2410
2411		p = sport;
2412		sport = dport;
2413		dport = p;
2414		saddr = pd->dst;
2415		daddr = pd->src;
2416	}
2417	switch (pd->af) {
2418	case AF_INET:
2419#ifdef __FreeBSD__
2420		INP_INFO_RLOCK(pi);	/* XXX LOR */
2421		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2422			dport, 0, NULL);
2423		if (inp == NULL) {
2424			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2425			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2426			if(inp == NULL) {
2427				INP_INFO_RUNLOCK(pi);
2428				return (0);
2429			}
2430		}
2431#else
2432		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2433		if (inp == NULL) {
2434			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2435			if (inp == NULL)
2436				return (0);
2437		}
2438#endif
2439		break;
2440#ifdef INET6
2441	case AF_INET6:
2442#ifdef __FreeBSD__
2443		INP_INFO_RLOCK(pi);
2444		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2445			&daddr->v6, dport, 0, NULL);
2446		if (inp == NULL) {
2447			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2448			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2449			if (inp == NULL) {
2450				INP_INFO_RUNLOCK(pi);
2451				return (0);
2452			}
2453		}
2454#else
2455		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2456		    dport);
2457		if (inp == NULL) {
2458			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2459			if (inp == NULL)
2460				return (0);
2461		}
2462#endif
2463		break;
2464#endif /* INET6 */
2465
2466	default:
2467		return (0);
2468	}
2469#ifdef __FreeBSD__
2470	INP_LOCK(inp);
2471	*uid = inp->inp_socket->so_cred->cr_uid;
2472	*gid = inp->inp_socket->so_cred->cr_groups[0];
2473	INP_UNLOCK(inp);
2474	INP_INFO_RUNLOCK(pi);
2475#else
2476	*uid = inp->inp_socket->so_euid;
2477	*gid = inp->inp_socket->so_egid;
2478#endif
2479	return (1);
2480}
2481
2482u_int8_t
2483pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2484{
2485	int		 hlen;
2486	u_int8_t	 hdr[60];
2487	u_int8_t	*opt, optlen;
2488	u_int8_t	 wscale = 0;
2489
2490	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2491	if (hlen <= sizeof(struct tcphdr))
2492		return (0);
2493	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2494		return (0);
2495	opt = hdr + sizeof(struct tcphdr);
2496	hlen -= sizeof(struct tcphdr);
2497	while (hlen >= 3) {
2498		switch (*opt) {
2499		case TCPOPT_EOL:
2500		case TCPOPT_NOP:
2501			++opt;
2502			--hlen;
2503			break;
2504		case TCPOPT_WINDOW:
2505			wscale = opt[2];
2506			if (wscale > TCP_MAX_WINSHIFT)
2507				wscale = TCP_MAX_WINSHIFT;
2508			wscale |= PF_WSCALE_FLAG;
2509			/* FALLTHROUGH */
2510		default:
2511			optlen = opt[1];
2512			if (optlen < 2)
2513				optlen = 2;
2514			hlen -= optlen;
2515			opt += optlen;
2516			break;
2517		}
2518	}
2519	return (wscale);
2520}
2521
2522u_int16_t
2523pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2524{
2525	int		 hlen;
2526	u_int8_t	 hdr[60];
2527	u_int8_t	*opt, optlen;
2528	u_int16_t	 mss = tcp_mssdflt;
2529
2530	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2531	if (hlen <= sizeof(struct tcphdr))
2532		return (0);
2533	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2534		return (0);
2535	opt = hdr + sizeof(struct tcphdr);
2536	hlen -= sizeof(struct tcphdr);
2537	while (hlen >= TCPOLEN_MAXSEG) {
2538		switch (*opt) {
2539		case TCPOPT_EOL:
2540		case TCPOPT_NOP:
2541			++opt;
2542			--hlen;
2543			break;
2544		case TCPOPT_MAXSEG:
2545			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2546			/* FALLTHROUGH */
2547		default:
2548			optlen = opt[1];
2549			if (optlen < 2)
2550				optlen = 2;
2551			hlen -= optlen;
2552			opt += optlen;
2553			break;
2554		}
2555	}
2556	return (mss);
2557}
2558
2559u_int16_t
2560pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2561{
2562#ifdef INET
2563	struct sockaddr_in	*dst;
2564	struct route		 ro;
2565#endif /* INET */
2566#ifdef INET6
2567	struct sockaddr_in6	*dst6;
2568	struct route_in6	 ro6;
2569#endif /* INET6 */
2570	struct rtentry		*rt = NULL;
2571	int			 hlen = 0;	/* make the compiler happy */
2572	u_int16_t		 mss = tcp_mssdflt;
2573
2574	switch (af) {
2575#ifdef INET
2576	case AF_INET:
2577		hlen = sizeof(struct ip);
2578		bzero(&ro, sizeof(ro));
2579		dst = (struct sockaddr_in *)&ro.ro_dst;
2580		dst->sin_family = AF_INET;
2581		dst->sin_len = sizeof(*dst);
2582		dst->sin_addr = addr->v4;
2583#ifdef __FreeBSD__
2584#ifdef RTF_PRCLONING
2585		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2586#else /* !RTF_PRCLONING */
2587		rtalloc_ign(&ro, RTF_CLONING);
2588#endif
2589#else /* ! __FreeBSD__ */
2590		rtalloc_noclone(&ro, NO_CLONING);
2591#endif
2592		rt = ro.ro_rt;
2593		break;
2594#endif /* INET */
2595#ifdef INET6
2596	case AF_INET6:
2597		hlen = sizeof(struct ip6_hdr);
2598		bzero(&ro6, sizeof(ro6));
2599		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2600		dst6->sin6_family = AF_INET6;
2601		dst6->sin6_len = sizeof(*dst6);
2602		dst6->sin6_addr = addr->v6;
2603#ifdef __FreeBSD__
2604#ifdef RTF_PRCLONING
2605		rtalloc_ign((struct route *)&ro6,
2606		    (RTF_CLONING | RTF_PRCLONING));
2607#else /* !RTF_PRCLONING */
2608		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2609#endif
2610#else /* ! __FreeBSD__ */
2611		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2612#endif
2613		rt = ro6.ro_rt;
2614		break;
2615#endif /* INET6 */
2616	}
2617
2618	if (rt && rt->rt_ifp) {
2619		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2620		mss = max(tcp_mssdflt, mss);
2621		RTFREE(rt);
2622	}
2623	mss = min(mss, offer);
2624	mss = max(mss, 64);		/* sanity - at least max opt space */
2625	return (mss);
2626}
2627
2628void
2629pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2630{
2631	struct pf_rule *r = s->rule.ptr;
2632
2633	s->rt_kif = NULL;
2634	if (!r->rt || r->rt == PF_FASTROUTE)
2635		return;
2636	switch (s->af) {
2637#ifdef INET
2638	case AF_INET:
2639		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2640		    &s->nat_src_node);
2641		s->rt_kif = r->rpool.cur->kif;
2642		break;
2643#endif /* INET */
2644#ifdef INET6
2645	case AF_INET6:
2646		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2647		    &s->nat_src_node);
2648		s->rt_kif = r->rpool.cur->kif;
2649		break;
2650#endif /* INET6 */
2651	}
2652}
2653
2654int
2655pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2656    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2657    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2658{
2659	struct pf_rule		*nr = NULL;
2660	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2661	struct tcphdr		*th = pd->hdr.tcp;
2662	u_int16_t		 bport, nport = 0;
2663	sa_family_t		 af = pd->af;
2664	int			 lookup = -1;
2665	uid_t			 uid;
2666	gid_t			 gid;
2667	struct pf_rule		*r, *a = NULL;
2668	struct pf_ruleset	*ruleset = NULL;
2669	struct pf_src_node	*nsn = NULL;
2670	u_short			 reason;
2671	int			 rewrite = 0;
2672	struct pf_tag		*pftag = NULL;
2673	int			 tag = -1;
2674	u_int16_t		 mss = tcp_mssdflt;
2675
2676	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2677
2678	if (direction == PF_OUT) {
2679		bport = nport = th->th_sport;
2680		/* check outgoing packet for BINAT/NAT */
2681		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2682		    saddr, th->th_sport, daddr, th->th_dport,
2683		    &pd->naddr, &nport)) != NULL) {
2684			PF_ACPY(&pd->baddr, saddr, af);
2685			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2686			    &th->th_sum, &pd->naddr, nport, 0, af);
2687			rewrite++;
2688			if (nr->natpass)
2689				r = NULL;
2690			pd->nat_rule = nr;
2691		}
2692	} else {
2693		bport = nport = th->th_dport;
2694		/* check incoming packet for BINAT/RDR */
2695		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2696		    saddr, th->th_sport, daddr, th->th_dport,
2697		    &pd->naddr, &nport)) != NULL) {
2698			PF_ACPY(&pd->baddr, daddr, af);
2699			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2700			    &th->th_sum, &pd->naddr, nport, 0, af);
2701			rewrite++;
2702			if (nr->natpass)
2703				r = NULL;
2704			pd->nat_rule = nr;
2705		}
2706	}
2707
2708	while (r != NULL) {
2709		r->evaluations++;
2710		if (r->kif != NULL &&
2711		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2712			r = r->skip[PF_SKIP_IFP].ptr;
2713		else if (r->direction && r->direction != direction)
2714			r = r->skip[PF_SKIP_DIR].ptr;
2715		else if (r->af && r->af != af)
2716			r = r->skip[PF_SKIP_AF].ptr;
2717		else if (r->proto && r->proto != IPPROTO_TCP)
2718			r = r->skip[PF_SKIP_PROTO].ptr;
2719		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2720			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2721		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2722		    r->src.port[0], r->src.port[1], th->th_sport))
2723			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2724		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2725			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2726		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2727		    r->dst.port[0], r->dst.port[1], th->th_dport))
2728			r = r->skip[PF_SKIP_DST_PORT].ptr;
2729		else if (r->tos && !(r->tos & pd->tos))
2730			r = TAILQ_NEXT(r, entries);
2731		else if (r->rule_flag & PFRULE_FRAGMENT)
2732			r = TAILQ_NEXT(r, entries);
2733		else if ((r->flagset & th->th_flags) != r->flags)
2734			r = TAILQ_NEXT(r, entries);
2735		else if (r->uid.op && (lookup != -1 || (lookup =
2736		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2737		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2738		    uid))
2739			r = TAILQ_NEXT(r, entries);
2740		else if (r->gid.op && (lookup != -1 || (lookup =
2741		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2742		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2743		    gid))
2744			r = TAILQ_NEXT(r, entries);
2745		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
2746			r = TAILQ_NEXT(r, entries);
2747		else if (r->anchorname[0] && r->anchor == NULL)
2748			r = TAILQ_NEXT(r, entries);
2749		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2750		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2751			r = TAILQ_NEXT(r, entries);
2752		else {
2753			if (r->tag)
2754				tag = r->tag;
2755			if (r->anchor == NULL) {
2756				*rm = r;
2757				*am = a;
2758				*rsm = ruleset;
2759				if ((*rm)->quick)
2760					break;
2761				r = TAILQ_NEXT(r, entries);
2762			} else
2763				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2764				    PF_RULESET_FILTER);
2765		}
2766		if (r == NULL && a != NULL)
2767			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2768			    PF_RULESET_FILTER);
2769	}
2770	r = *rm;
2771	a = *am;
2772	ruleset = *rsm;
2773
2774	REASON_SET(&reason, PFRES_MATCH);
2775
2776	if (r->log) {
2777		if (rewrite)
2778			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2779		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
2780	}
2781
2782	if ((r->action == PF_DROP) &&
2783	    ((r->rule_flag & PFRULE_RETURNRST) ||
2784	    (r->rule_flag & PFRULE_RETURNICMP) ||
2785	    (r->rule_flag & PFRULE_RETURN))) {
2786		/* undo NAT changes, if they have taken place */
2787		if (nr != NULL) {
2788			if (direction == PF_OUT) {
2789				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2790				    &th->th_sum, &pd->baddr, bport, 0, af);
2791				rewrite++;
2792			} else {
2793				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2794				    &th->th_sum, &pd->baddr, bport, 0, af);
2795				rewrite++;
2796			}
2797		}
2798		if (((r->rule_flag & PFRULE_RETURNRST) ||
2799		    (r->rule_flag & PFRULE_RETURN)) &&
2800		    !(th->th_flags & TH_RST)) {
2801			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2802
2803			if (th->th_flags & TH_SYN)
2804				ack++;
2805			if (th->th_flags & TH_FIN)
2806				ack++;
2807			pf_send_tcp(r, af, pd->dst,
2808			    pd->src, th->th_dport, th->th_sport,
2809			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2810			    r->return_ttl);
2811		} else if ((af == AF_INET) && r->return_icmp)
2812			pf_send_icmp(m, r->return_icmp >> 8,
2813			    r->return_icmp & 255, af, r);
2814		else if ((af == AF_INET6) && r->return_icmp6)
2815			pf_send_icmp(m, r->return_icmp6 >> 8,
2816			    r->return_icmp6 & 255, af, r);
2817	}
2818
2819	if (r->action == PF_DROP)
2820		return (PF_DROP);
2821
2822	if (pf_tag_packet(m, pftag, tag)) {
2823		REASON_SET(&reason, PFRES_MEMORY);
2824		return (PF_DROP);
2825	}
2826
2827	if (r->keep_state || nr != NULL ||
2828	    (pd->flags & PFDESC_TCP_NORM)) {
2829		/* create new state */
2830		u_int16_t	 len;
2831		struct pf_state	*s = NULL;
2832		struct pf_src_node *sn = NULL;
2833
2834		len = pd->tot_len - off - (th->th_off << 2);
2835
2836		/* check maximums */
2837		if (r->max_states && (r->states >= r->max_states))
2838			goto cleanup;
2839		/* src node for flter rule */
2840		if ((r->rule_flag & PFRULE_SRCTRACK ||
2841		    r->rpool.opts & PF_POOL_STICKYADDR) &&
2842		    pf_insert_src_node(&sn, r, saddr, af) != 0)
2843			goto cleanup;
2844		/* src node for translation rule */
2845		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
2846		    ((direction == PF_OUT &&
2847		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
2848		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
2849			goto cleanup;
2850		s = pool_get(&pf_state_pl, PR_NOWAIT);
2851		if (s == NULL) {
2852cleanup:
2853			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
2854				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
2855				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2856				pf_status.src_nodes--;
2857				pool_put(&pf_src_tree_pl, sn);
2858			}
2859			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
2860			    nsn->expire == 0) {
2861				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
2862				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2863				pf_status.src_nodes--;
2864				pool_put(&pf_src_tree_pl, nsn);
2865			}
2866			REASON_SET(&reason, PFRES_MEMORY);
2867			return (PF_DROP);
2868		}
2869		bzero(s, sizeof(*s));
2870		r->states++;
2871		if (a != NULL)
2872			a->states++;
2873		s->rule.ptr = r;
2874		s->nat_rule.ptr = nr;
2875		if (s->nat_rule.ptr != NULL)
2876			s->nat_rule.ptr->states++;
2877		s->anchor.ptr = a;
2878		s->allow_opts = r->allow_opts;
2879		s->log = r->log & 2;
2880		s->proto = IPPROTO_TCP;
2881		s->direction = direction;
2882		s->af = af;
2883		if (direction == PF_OUT) {
2884			PF_ACPY(&s->gwy.addr, saddr, af);
2885			s->gwy.port = th->th_sport;		/* sport */
2886			PF_ACPY(&s->ext.addr, daddr, af);
2887			s->ext.port = th->th_dport;
2888			if (nr != NULL) {
2889				PF_ACPY(&s->lan.addr, &pd->baddr, af);
2890				s->lan.port = bport;
2891			} else {
2892				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2893				s->lan.port = s->gwy.port;
2894			}
2895		} else {
2896			PF_ACPY(&s->lan.addr, daddr, af);
2897			s->lan.port = th->th_dport;
2898			PF_ACPY(&s->ext.addr, saddr, af);
2899			s->ext.port = th->th_sport;
2900			if (nr != NULL) {
2901				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
2902				s->gwy.port = bport;
2903			} else {
2904				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2905				s->gwy.port = s->lan.port;
2906			}
2907		}
2908
2909		s->src.seqlo = ntohl(th->th_seq);
2910		s->src.seqhi = s->src.seqlo + len + 1;
2911		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2912		    r->keep_state == PF_STATE_MODULATE) {
2913			/* Generate sequence number modulator */
2914			while ((s->src.seqdiff = arc4random()) == 0)
2915				;
2916			pf_change_a(&th->th_seq, &th->th_sum,
2917			    htonl(s->src.seqlo + s->src.seqdiff), 0);
2918			rewrite = 1;
2919		} else
2920			s->src.seqdiff = 0;
2921		if (th->th_flags & TH_SYN) {
2922			s->src.seqhi++;
2923			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2924		}
2925		s->src.max_win = MAX(ntohs(th->th_win), 1);
2926		if (s->src.wscale & PF_WSCALE_MASK) {
2927			/* Remove scale factor from initial window */
2928			int win = s->src.max_win;
2929			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2930			s->src.max_win = (win - 1) >>
2931			    (s->src.wscale & PF_WSCALE_MASK);
2932		}
2933		if (th->th_flags & TH_FIN)
2934			s->src.seqhi++;
2935		s->dst.seqhi = 1;
2936		s->dst.max_win = 1;
2937		s->src.state = TCPS_SYN_SENT;
2938		s->dst.state = TCPS_CLOSED;
2939#ifdef __FreeBSD__
2940		s->creation = time_second;
2941		s->expire = time_second;
2942#else
2943		s->creation = time.tv_sec;
2944		s->expire = time.tv_sec;
2945#endif
2946		s->timeout = PFTM_TCP_FIRST_PACKET;
2947		pf_set_rt_ifp(s, saddr);
2948		if (sn != NULL) {
2949			s->src_node = sn;
2950			s->src_node->states++;
2951		}
2952		if (nsn != NULL) {
2953			PF_ACPY(&nsn->raddr, &pd->naddr, af);
2954			s->nat_src_node = nsn;
2955			s->nat_src_node->states++;
2956		}
2957		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
2958		    off, pd, th, &s->src, &s->dst)) {
2959			REASON_SET(&reason, PFRES_MEMORY);
2960			pf_src_tree_remove_state(s);
2961			pool_put(&pf_state_pl, s);
2962			return (PF_DROP);
2963		}
2964		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
2965		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
2966		    &s->dst, &rewrite)) {
2967			pf_normalize_tcp_cleanup(s);
2968			pf_src_tree_remove_state(s);
2969			pool_put(&pf_state_pl, s);
2970			return (PF_DROP);
2971		}
2972		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
2973			pf_normalize_tcp_cleanup(s);
2974			REASON_SET(&reason, PFRES_MEMORY);
2975			pf_src_tree_remove_state(s);
2976			pool_put(&pf_state_pl, s);
2977			return (PF_DROP);
2978		} else
2979			*sm = s;
2980		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2981		    r->keep_state == PF_STATE_SYNPROXY) {
2982			s->src.state = PF_TCPS_PROXY_SRC;
2983			if (nr != NULL) {
2984				if (direction == PF_OUT) {
2985					pf_change_ap(saddr, &th->th_sport,
2986					    pd->ip_sum, &th->th_sum, &pd->baddr,
2987					    bport, 0, af);
2988				} else {
2989					pf_change_ap(daddr, &th->th_dport,
2990					    pd->ip_sum, &th->th_sum, &pd->baddr,
2991					    bport, 0, af);
2992				}
2993			}
2994			s->src.seqhi = arc4random();
2995			/* Find mss option */
2996			mss = pf_get_mss(m, off, th->th_off, af);
2997			mss = pf_calc_mss(saddr, af, mss);
2998			mss = pf_calc_mss(daddr, af, mss);
2999			s->src.mss = mss;
3000			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3001			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3002			    TH_SYN|TH_ACK, 0, s->src.mss, 0);
3003			return (PF_SYNPROXY_DROP);
3004		}
3005	}
3006
3007	/* copy back packet headers if we performed NAT operations */
3008	if (rewrite)
3009		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3010
3011	return (PF_PASS);
3012}
3013
3014int
3015pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3016    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3017    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3018{
3019	struct pf_rule		*nr = NULL;
3020	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3021	struct udphdr		*uh = pd->hdr.udp;
3022	u_int16_t		 bport, nport = 0;
3023	sa_family_t		 af = pd->af;
3024	int			 lookup = -1;
3025	uid_t			 uid;
3026	gid_t			 gid;
3027	struct pf_rule		*r, *a = NULL;
3028	struct pf_ruleset	*ruleset = NULL;
3029	struct pf_src_node	*nsn = NULL;
3030	u_short			 reason;
3031	int			 rewrite = 0;
3032	struct pf_tag		*pftag = NULL;
3033	int			 tag = -1;
3034
3035	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3036
3037	if (direction == PF_OUT) {
3038		bport = nport = uh->uh_sport;
3039		/* check outgoing packet for BINAT/NAT */
3040		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3041		    saddr, uh->uh_sport, daddr, uh->uh_dport,
3042		    &pd->naddr, &nport)) != NULL) {
3043			PF_ACPY(&pd->baddr, saddr, af);
3044			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3045			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3046			rewrite++;
3047			if (nr->natpass)
3048				r = NULL;
3049			pd->nat_rule = nr;
3050		}
3051	} else {
3052		bport = nport = uh->uh_dport;
3053		/* check incoming packet for BINAT/RDR */
3054		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3055		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3056		    &nport)) != NULL) {
3057			PF_ACPY(&pd->baddr, daddr, af);
3058			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3059			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3060			rewrite++;
3061			if (nr->natpass)
3062				r = NULL;
3063			pd->nat_rule = nr;
3064		}
3065	}
3066
3067	while (r != NULL) {
3068		r->evaluations++;
3069		if (r->kif != NULL &&
3070		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3071			r = r->skip[PF_SKIP_IFP].ptr;
3072		else if (r->direction && r->direction != direction)
3073			r = r->skip[PF_SKIP_DIR].ptr;
3074		else if (r->af && r->af != af)
3075			r = r->skip[PF_SKIP_AF].ptr;
3076		else if (r->proto && r->proto != IPPROTO_UDP)
3077			r = r->skip[PF_SKIP_PROTO].ptr;
3078		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3079			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3080		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3081		    r->src.port[0], r->src.port[1], uh->uh_sport))
3082			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3083		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3084			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3085		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3086		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
3087			r = r->skip[PF_SKIP_DST_PORT].ptr;
3088		else if (r->tos && !(r->tos & pd->tos))
3089			r = TAILQ_NEXT(r, entries);
3090		else if (r->rule_flag & PFRULE_FRAGMENT)
3091			r = TAILQ_NEXT(r, entries);
3092		else if (r->uid.op && (lookup != -1 || (lookup =
3093		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3094		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3095		    uid))
3096			r = TAILQ_NEXT(r, entries);
3097		else if (r->gid.op && (lookup != -1 || (lookup =
3098		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3099		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3100		    gid))
3101			r = TAILQ_NEXT(r, entries);
3102		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3103			r = TAILQ_NEXT(r, entries);
3104		else if (r->anchorname[0] && r->anchor == NULL)
3105			r = TAILQ_NEXT(r, entries);
3106		else if (r->os_fingerprint != PF_OSFP_ANY)
3107			r = TAILQ_NEXT(r, entries);
3108		else {
3109			if (r->tag)
3110				tag = r->tag;
3111			if (r->anchor == NULL) {
3112				*rm = r;
3113				*am = a;
3114				*rsm = ruleset;
3115				if ((*rm)->quick)
3116					break;
3117				r = TAILQ_NEXT(r, entries);
3118			} else
3119				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3120				    PF_RULESET_FILTER);
3121		}
3122		if (r == NULL && a != NULL)
3123			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3124			    PF_RULESET_FILTER);
3125	}
3126	r = *rm;
3127	a = *am;
3128	ruleset = *rsm;
3129
3130	REASON_SET(&reason, PFRES_MATCH);
3131
3132	if (r->log) {
3133		if (rewrite)
3134			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3135		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3136	}
3137
3138	if ((r->action == PF_DROP) &&
3139	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3140	    (r->rule_flag & PFRULE_RETURN))) {
3141		/* undo NAT changes, if they have taken place */
3142		if (nr != NULL) {
3143			if (direction == PF_OUT) {
3144				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3145				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3146				rewrite++;
3147			} else {
3148				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3149				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3150				rewrite++;
3151			}
3152		}
3153		if ((af == AF_INET) && r->return_icmp)
3154			pf_send_icmp(m, r->return_icmp >> 8,
3155			    r->return_icmp & 255, af, r);
3156		else if ((af == AF_INET6) && r->return_icmp6)
3157			pf_send_icmp(m, r->return_icmp6 >> 8,
3158			    r->return_icmp6 & 255, af, r);
3159	}
3160
3161	if (r->action == PF_DROP)
3162		return (PF_DROP);
3163
3164	if (pf_tag_packet(m, pftag, tag)) {
3165		REASON_SET(&reason, PFRES_MEMORY);
3166		return (PF_DROP);
3167	}
3168
3169	if (r->keep_state || nr != NULL) {
3170		/* create new state */
3171		struct pf_state	*s = NULL;
3172		struct pf_src_node *sn = NULL;
3173
3174		/* check maximums */
3175		if (r->max_states && (r->states >= r->max_states))
3176			goto cleanup;
3177		/* src node for flter rule */
3178		if ((r->rule_flag & PFRULE_SRCTRACK ||
3179		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3180		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3181			goto cleanup;
3182		/* src node for translation rule */
3183		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3184		    ((direction == PF_OUT &&
3185		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3186		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3187			goto cleanup;
3188		s = pool_get(&pf_state_pl, PR_NOWAIT);
3189		if (s == NULL) {
3190cleanup:
3191			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3192				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3193				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3194				pf_status.src_nodes--;
3195				pool_put(&pf_src_tree_pl, sn);
3196			}
3197			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3198			    nsn->expire == 0) {
3199				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3200				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3201				pf_status.src_nodes--;
3202				pool_put(&pf_src_tree_pl, nsn);
3203			}
3204			REASON_SET(&reason, PFRES_MEMORY);
3205			return (PF_DROP);
3206		}
3207		bzero(s, sizeof(*s));
3208		r->states++;
3209		if (a != NULL)
3210			a->states++;
3211		s->rule.ptr = r;
3212		s->nat_rule.ptr = nr;
3213		if (s->nat_rule.ptr != NULL)
3214			s->nat_rule.ptr->states++;
3215		s->anchor.ptr = a;
3216		s->allow_opts = r->allow_opts;
3217		s->log = r->log & 2;
3218		s->proto = IPPROTO_UDP;
3219		s->direction = direction;
3220		s->af = af;
3221		if (direction == PF_OUT) {
3222			PF_ACPY(&s->gwy.addr, saddr, af);
3223			s->gwy.port = uh->uh_sport;
3224			PF_ACPY(&s->ext.addr, daddr, af);
3225			s->ext.port = uh->uh_dport;
3226			if (nr != NULL) {
3227				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3228				s->lan.port = bport;
3229			} else {
3230				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3231				s->lan.port = s->gwy.port;
3232			}
3233		} else {
3234			PF_ACPY(&s->lan.addr, daddr, af);
3235			s->lan.port = uh->uh_dport;
3236			PF_ACPY(&s->ext.addr, saddr, af);
3237			s->ext.port = uh->uh_sport;
3238			if (nr != NULL) {
3239				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3240				s->gwy.port = bport;
3241			} else {
3242				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3243				s->gwy.port = s->lan.port;
3244			}
3245		}
3246		s->src.state = PFUDPS_SINGLE;
3247		s->dst.state = PFUDPS_NO_TRAFFIC;
3248#ifdef __FreeBSD__
3249		s->creation = time_second;
3250		s->expire = time_second;
3251#else
3252		s->creation = time.tv_sec;
3253		s->expire = time.tv_sec;
3254#endif
3255		s->timeout = PFTM_UDP_FIRST_PACKET;
3256		pf_set_rt_ifp(s, saddr);
3257		if (sn != NULL) {
3258			s->src_node = sn;
3259			s->src_node->states++;
3260		}
3261		if (nsn != NULL) {
3262			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3263			s->nat_src_node = nsn;
3264			s->nat_src_node->states++;
3265		}
3266		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3267			REASON_SET(&reason, PFRES_MEMORY);
3268			pf_src_tree_remove_state(s);
3269			pool_put(&pf_state_pl, s);
3270			return (PF_DROP);
3271		} else
3272			*sm = s;
3273	}
3274
3275	/* copy back packet headers if we performed NAT operations */
3276	if (rewrite)
3277		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3278
3279	return (PF_PASS);
3280}
3281
3282int
3283pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3284    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3285    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3286{
3287	struct pf_rule		*nr = NULL;
3288	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3289	struct pf_rule		*r, *a = NULL;
3290	struct pf_ruleset	*ruleset = NULL;
3291	struct pf_src_node	*nsn = NULL;
3292	u_short			 reason;
3293	u_int16_t		 icmpid = 0;	/* make the compiler happy */
3294	sa_family_t		 af = pd->af;
3295	u_int8_t		 icmptype = 0;	/* make the compiler happy */
3296	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
3297	int			 state_icmp = 0;
3298	struct pf_tag		*pftag = NULL;
3299	int			 tag = -1;
3300#ifdef INET6
3301	int			 rewrite = 0;
3302#endif /* INET6 */
3303
3304	switch (pd->proto) {
3305#ifdef INET
3306	case IPPROTO_ICMP:
3307		icmptype = pd->hdr.icmp->icmp_type;
3308		icmpcode = pd->hdr.icmp->icmp_code;
3309		icmpid = pd->hdr.icmp->icmp_id;
3310
3311		if (icmptype == ICMP_UNREACH ||
3312		    icmptype == ICMP_SOURCEQUENCH ||
3313		    icmptype == ICMP_REDIRECT ||
3314		    icmptype == ICMP_TIMXCEED ||
3315		    icmptype == ICMP_PARAMPROB)
3316			state_icmp++;
3317		break;
3318#endif /* INET */
3319#ifdef INET6
3320	case IPPROTO_ICMPV6:
3321		icmptype = pd->hdr.icmp6->icmp6_type;
3322		icmpcode = pd->hdr.icmp6->icmp6_code;
3323		icmpid = pd->hdr.icmp6->icmp6_id;
3324
3325		if (icmptype == ICMP6_DST_UNREACH ||
3326		    icmptype == ICMP6_PACKET_TOO_BIG ||
3327		    icmptype == ICMP6_TIME_EXCEEDED ||
3328		    icmptype == ICMP6_PARAM_PROB)
3329			state_icmp++;
3330		break;
3331#endif /* INET6 */
3332	}
3333
3334	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3335
3336	if (direction == PF_OUT) {
3337		/* check outgoing packet for BINAT/NAT */
3338		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3339		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3340			PF_ACPY(&pd->baddr, saddr, af);
3341			switch (af) {
3342#ifdef INET
3343			case AF_INET:
3344				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3345				    pd->naddr.v4.s_addr, 0);
3346				break;
3347#endif /* INET */
3348#ifdef INET6
3349			case AF_INET6:
3350				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3351				    &pd->naddr, 0);
3352				rewrite++;
3353				break;
3354#endif /* INET6 */
3355			}
3356			if (nr->natpass)
3357				r = NULL;
3358			pd->nat_rule = nr;
3359		}
3360	} else {
3361		/* check incoming packet for BINAT/RDR */
3362		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3363		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3364			PF_ACPY(&pd->baddr, daddr, af);
3365			switch (af) {
3366#ifdef INET
3367			case AF_INET:
3368				pf_change_a(&daddr->v4.s_addr,
3369				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3370				break;
3371#endif /* INET */
3372#ifdef INET6
3373			case AF_INET6:
3374				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3375				    &pd->naddr, 0);
3376				rewrite++;
3377				break;
3378#endif /* INET6 */
3379			}
3380			if (nr->natpass)
3381				r = NULL;
3382			pd->nat_rule = nr;
3383		}
3384	}
3385
3386	while (r != NULL) {
3387		r->evaluations++;
3388		if (r->kif != NULL &&
3389		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3390			r = r->skip[PF_SKIP_IFP].ptr;
3391		else if (r->direction && r->direction != direction)
3392			r = r->skip[PF_SKIP_DIR].ptr;
3393		else if (r->af && r->af != af)
3394			r = r->skip[PF_SKIP_AF].ptr;
3395		else if (r->proto && r->proto != pd->proto)
3396			r = r->skip[PF_SKIP_PROTO].ptr;
3397		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3398			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3399		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3400			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3401		else if (r->type && r->type != icmptype + 1)
3402			r = TAILQ_NEXT(r, entries);
3403		else if (r->code && r->code != icmpcode + 1)
3404			r = TAILQ_NEXT(r, entries);
3405		else if (r->tos && !(r->tos & pd->tos))
3406			r = TAILQ_NEXT(r, entries);
3407		else if (r->rule_flag & PFRULE_FRAGMENT)
3408			r = TAILQ_NEXT(r, entries);
3409		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3410			r = TAILQ_NEXT(r, entries);
3411		else if (r->anchorname[0] && r->anchor == NULL)
3412			r = TAILQ_NEXT(r, entries);
3413		else if (r->os_fingerprint != PF_OSFP_ANY)
3414			r = TAILQ_NEXT(r, entries);
3415		else {
3416			if (r->tag)
3417				tag = r->tag;
3418			if (r->anchor == NULL) {
3419				*rm = r;
3420				*am = a;
3421				*rsm = ruleset;
3422				if ((*rm)->quick)
3423					break;
3424				r = TAILQ_NEXT(r, entries);
3425			} else
3426				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3427				    PF_RULESET_FILTER);
3428		}
3429		if (r == NULL && a != NULL)
3430			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3431			    PF_RULESET_FILTER);
3432	}
3433	r = *rm;
3434	a = *am;
3435	ruleset = *rsm;
3436
3437	REASON_SET(&reason, PFRES_MATCH);
3438
3439	if (r->log) {
3440#ifdef INET6
3441		if (rewrite)
3442			m_copyback(m, off, sizeof(struct icmp6_hdr),
3443			    (caddr_t)pd->hdr.icmp6);
3444#endif /* INET6 */
3445		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3446	}
3447
3448	if (r->action != PF_PASS)
3449		return (PF_DROP);
3450
3451	if (pf_tag_packet(m, pftag, tag)) {
3452		REASON_SET(&reason, PFRES_MEMORY);
3453		return (PF_DROP);
3454	}
3455
3456	if (!state_icmp && (r->keep_state || nr != NULL)) {
3457		/* create new state */
3458		struct pf_state	*s = NULL;
3459		struct pf_src_node *sn = NULL;
3460
3461		/* check maximums */
3462		if (r->max_states && (r->states >= r->max_states))
3463			goto cleanup;
3464		/* src node for flter rule */
3465		if ((r->rule_flag & PFRULE_SRCTRACK ||
3466		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3467		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3468			goto cleanup;
3469		/* src node for translation rule */
3470		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3471		    ((direction == PF_OUT &&
3472		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3473		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3474			goto cleanup;
3475		s = pool_get(&pf_state_pl, PR_NOWAIT);
3476		if (s == NULL) {
3477cleanup:
3478			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3479				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3480				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3481				pf_status.src_nodes--;
3482				pool_put(&pf_src_tree_pl, sn);
3483			}
3484			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3485			    nsn->expire == 0) {
3486				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3487				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3488				pf_status.src_nodes--;
3489				pool_put(&pf_src_tree_pl, nsn);
3490			}
3491			REASON_SET(&reason, PFRES_MEMORY);
3492			return (PF_DROP);
3493		}
3494		bzero(s, sizeof(*s));
3495		r->states++;
3496		if (a != NULL)
3497			a->states++;
3498		s->rule.ptr = r;
3499		s->nat_rule.ptr = nr;
3500		if (s->nat_rule.ptr != NULL)
3501			s->nat_rule.ptr->states++;
3502		s->anchor.ptr = a;
3503		s->allow_opts = r->allow_opts;
3504		s->log = r->log & 2;
3505		s->proto = pd->proto;
3506		s->direction = direction;
3507		s->af = af;
3508		if (direction == PF_OUT) {
3509			PF_ACPY(&s->gwy.addr, saddr, af);
3510			s->gwy.port = icmpid;
3511			PF_ACPY(&s->ext.addr, daddr, af);
3512			s->ext.port = icmpid;
3513			if (nr != NULL)
3514				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3515			else
3516				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3517			s->lan.port = icmpid;
3518		} else {
3519			PF_ACPY(&s->lan.addr, daddr, af);
3520			s->lan.port = icmpid;
3521			PF_ACPY(&s->ext.addr, saddr, af);
3522			s->ext.port = icmpid;
3523			if (nr != NULL)
3524				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3525			else
3526				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3527			s->gwy.port = icmpid;
3528		}
3529#ifdef __FreeBSD__
3530		s->creation = time_second;
3531		s->expire = time_second;
3532#else
3533		s->creation = time.tv_sec;
3534		s->expire = time.tv_sec;
3535#endif
3536		s->timeout = PFTM_ICMP_FIRST_PACKET;
3537		pf_set_rt_ifp(s, saddr);
3538		if (sn != NULL) {
3539			s->src_node = sn;
3540			s->src_node->states++;
3541		}
3542		if (nsn != NULL) {
3543			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3544			s->nat_src_node = nsn;
3545			s->nat_src_node->states++;
3546		}
3547		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3548			REASON_SET(&reason, PFRES_MEMORY);
3549			pf_src_tree_remove_state(s);
3550			pool_put(&pf_state_pl, s);
3551			return (PF_DROP);
3552		} else
3553			*sm = s;
3554	}
3555
3556#ifdef INET6
3557	/* copy back packet headers if we performed IPv6 NAT operations */
3558	if (rewrite)
3559		m_copyback(m, off, sizeof(struct icmp6_hdr),
3560		    (caddr_t)pd->hdr.icmp6);
3561#endif /* INET6 */
3562
3563	return (PF_PASS);
3564}
3565
3566int
3567pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3568    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3569    struct pf_rule **am, struct pf_ruleset **rsm)
3570{
3571	struct pf_rule		*nr = NULL;
3572	struct pf_rule		*r, *a = NULL;
3573	struct pf_ruleset	*ruleset = NULL;
3574	struct pf_src_node	*nsn = NULL;
3575	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3576	sa_family_t		 af = pd->af;
3577	u_short			 reason;
3578	struct pf_tag		*pftag = NULL;
3579	int			 tag = -1;
3580
3581	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3582
3583	if (direction == PF_OUT) {
3584		/* check outgoing packet for BINAT/NAT */
3585		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3586		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3587			PF_ACPY(&pd->baddr, saddr, af);
3588			switch (af) {
3589#ifdef INET
3590			case AF_INET:
3591				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3592				    pd->naddr.v4.s_addr, 0);
3593				break;
3594#endif /* INET */
3595#ifdef INET6
3596			case AF_INET6:
3597				PF_ACPY(saddr, &pd->naddr, af);
3598				break;
3599#endif /* INET6 */
3600			}
3601			if (nr->natpass)
3602				r = NULL;
3603			pd->nat_rule = nr;
3604		}
3605	} else {
3606		/* check incoming packet for BINAT/RDR */
3607		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3608		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3609			PF_ACPY(&pd->baddr, daddr, af);
3610			switch (af) {
3611#ifdef INET
3612			case AF_INET:
3613				pf_change_a(&daddr->v4.s_addr,
3614				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3615				break;
3616#endif /* INET */
3617#ifdef INET6
3618			case AF_INET6:
3619				PF_ACPY(daddr, &pd->naddr, af);
3620				break;
3621#endif /* INET6 */
3622			}
3623			if (nr->natpass)
3624				r = NULL;
3625			pd->nat_rule = nr;
3626		}
3627	}
3628
3629	while (r != NULL) {
3630		r->evaluations++;
3631		if (r->kif != NULL &&
3632		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3633			r = r->skip[PF_SKIP_IFP].ptr;
3634		else if (r->direction && r->direction != direction)
3635			r = r->skip[PF_SKIP_DIR].ptr;
3636		else if (r->af && r->af != af)
3637			r = r->skip[PF_SKIP_AF].ptr;
3638		else if (r->proto && r->proto != pd->proto)
3639			r = r->skip[PF_SKIP_PROTO].ptr;
3640		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3641			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3642		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3643			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3644		else if (r->tos && !(r->tos & pd->tos))
3645			r = TAILQ_NEXT(r, entries);
3646		else if (r->rule_flag & PFRULE_FRAGMENT)
3647			r = TAILQ_NEXT(r, entries);
3648		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3649			r = TAILQ_NEXT(r, entries);
3650		else if (r->anchorname[0] && r->anchor == NULL)
3651			r = TAILQ_NEXT(r, entries);
3652		else if (r->os_fingerprint != PF_OSFP_ANY)
3653			r = TAILQ_NEXT(r, entries);
3654		else {
3655			if (r->tag)
3656				tag = r->tag;
3657			if (r->anchor == NULL) {
3658				*rm = r;
3659				*am = a;
3660				*rsm = ruleset;
3661				if ((*rm)->quick)
3662					break;
3663				r = TAILQ_NEXT(r, entries);
3664			} else
3665				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3666				    PF_RULESET_FILTER);
3667		}
3668		if (r == NULL && a != NULL)
3669			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3670			    PF_RULESET_FILTER);
3671	}
3672	r = *rm;
3673	a = *am;
3674	ruleset = *rsm;
3675
3676	REASON_SET(&reason, PFRES_MATCH);
3677
3678	if (r->log)
3679		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3680
3681	if ((r->action == PF_DROP) &&
3682	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3683	    (r->rule_flag & PFRULE_RETURN))) {
3684		struct pf_addr *a = NULL;
3685
3686		if (nr != NULL) {
3687			if (direction == PF_OUT)
3688				a = saddr;
3689			else
3690				a = daddr;
3691		}
3692		if (a != NULL) {
3693			switch (af) {
3694#ifdef INET
3695			case AF_INET:
3696				pf_change_a(&a->v4.s_addr, pd->ip_sum,
3697				    pd->baddr.v4.s_addr, 0);
3698				break;
3699#endif /* INET */
3700#ifdef INET6
3701			case AF_INET6:
3702				PF_ACPY(a, &pd->baddr, af);
3703				break;
3704#endif /* INET6 */
3705			}
3706		}
3707		if ((af == AF_INET) && r->return_icmp)
3708			pf_send_icmp(m, r->return_icmp >> 8,
3709			    r->return_icmp & 255, af, r);
3710		else if ((af == AF_INET6) && r->return_icmp6)
3711			pf_send_icmp(m, r->return_icmp6 >> 8,
3712			    r->return_icmp6 & 255, af, r);
3713	}
3714
3715	if (r->action != PF_PASS)
3716		return (PF_DROP);
3717
3718	if (pf_tag_packet(m, pftag, tag)) {
3719		REASON_SET(&reason, PFRES_MEMORY);
3720		return (PF_DROP);
3721	}
3722
3723	if (r->keep_state || nr != NULL) {
3724		/* create new state */
3725		struct pf_state	*s = NULL;
3726		struct pf_src_node *sn = NULL;
3727
3728		/* check maximums */
3729		if (r->max_states && (r->states >= r->max_states))
3730			goto cleanup;
3731		/* src node for flter rule */
3732		if ((r->rule_flag & PFRULE_SRCTRACK ||
3733		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3734		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3735			goto cleanup;
3736		/* src node for translation rule */
3737		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3738		    ((direction == PF_OUT &&
3739		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3740		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3741			goto cleanup;
3742		s = pool_get(&pf_state_pl, PR_NOWAIT);
3743		if (s == NULL) {
3744cleanup:
3745			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3746				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3747				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3748				pf_status.src_nodes--;
3749				pool_put(&pf_src_tree_pl, sn);
3750			}
3751			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3752			    nsn->expire == 0) {
3753				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3754				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3755				pf_status.src_nodes--;
3756				pool_put(&pf_src_tree_pl, nsn);
3757			}
3758			REASON_SET(&reason, PFRES_MEMORY);
3759			return (PF_DROP);
3760		}
3761		bzero(s, sizeof(*s));
3762		r->states++;
3763		if (a != NULL)
3764			a->states++;
3765		s->rule.ptr = r;
3766		s->nat_rule.ptr = nr;
3767		if (s->nat_rule.ptr != NULL)
3768			s->nat_rule.ptr->states++;
3769		s->anchor.ptr = a;
3770		s->allow_opts = r->allow_opts;
3771		s->log = r->log & 2;
3772		s->proto = pd->proto;
3773		s->direction = direction;
3774		s->af = af;
3775		if (direction == PF_OUT) {
3776			PF_ACPY(&s->gwy.addr, saddr, af);
3777			PF_ACPY(&s->ext.addr, daddr, af);
3778			if (nr != NULL)
3779				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3780			else
3781				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3782		} else {
3783			PF_ACPY(&s->lan.addr, daddr, af);
3784			PF_ACPY(&s->ext.addr, saddr, af);
3785			if (nr != NULL)
3786				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3787			else
3788				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3789		}
3790		s->src.state = PFOTHERS_SINGLE;
3791		s->dst.state = PFOTHERS_NO_TRAFFIC;
3792#ifdef __FreeBSD__
3793		s->creation = time_second;
3794		s->expire = time_second;
3795#else
3796		s->creation = time.tv_sec;
3797		s->expire = time.tv_sec;
3798#endif
3799		s->timeout = PFTM_OTHER_FIRST_PACKET;
3800		pf_set_rt_ifp(s, saddr);
3801		if (sn != NULL) {
3802			s->src_node = sn;
3803			s->src_node->states++;
3804		}
3805		if (nsn != NULL) {
3806			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3807			s->nat_src_node = nsn;
3808			s->nat_src_node->states++;
3809		}
3810		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3811			REASON_SET(&reason, PFRES_MEMORY);
3812			pf_src_tree_remove_state(s);
3813			pool_put(&pf_state_pl, s);
3814			return (PF_DROP);
3815		} else
3816			*sm = s;
3817	}
3818
3819	return (PF_PASS);
3820}
3821
3822int
3823pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3824    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3825    struct pf_ruleset **rsm)
3826{
3827	struct pf_rule		*r, *a = NULL;
3828	struct pf_ruleset	*ruleset = NULL;
3829	sa_family_t		 af = pd->af;
3830	u_short			 reason;
3831	struct pf_tag		*pftag = NULL;
3832	int			 tag = -1;
3833
3834	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3835	while (r != NULL) {
3836		r->evaluations++;
3837		if (r->kif != NULL &&
3838		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3839			r = r->skip[PF_SKIP_IFP].ptr;
3840		else if (r->direction && r->direction != direction)
3841			r = r->skip[PF_SKIP_DIR].ptr;
3842		else if (r->af && r->af != af)
3843			r = r->skip[PF_SKIP_AF].ptr;
3844		else if (r->proto && r->proto != pd->proto)
3845			r = r->skip[PF_SKIP_PROTO].ptr;
3846		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3847			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3848		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3849			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3850		else if (r->tos && !(r->tos & pd->tos))
3851			r = TAILQ_NEXT(r, entries);
3852		else if (r->src.port_op || r->dst.port_op ||
3853		    r->flagset || r->type || r->code ||
3854		    r->os_fingerprint != PF_OSFP_ANY)
3855			r = TAILQ_NEXT(r, entries);
3856		else if (r->match_tag && !pf_match_tag(m, r, NULL, pftag, &tag))
3857			r = TAILQ_NEXT(r, entries);
3858		else if (r->anchorname[0] && r->anchor == NULL)
3859			r = TAILQ_NEXT(r, entries);
3860		else {
3861			if (r->anchor == NULL) {
3862				*rm = r;
3863				*am = a;
3864				*rsm = ruleset;
3865				if ((*rm)->quick)
3866					break;
3867				r = TAILQ_NEXT(r, entries);
3868			} else
3869				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3870				    PF_RULESET_FILTER);
3871		}
3872		if (r == NULL && a != NULL)
3873			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3874			    PF_RULESET_FILTER);
3875	}
3876	r = *rm;
3877	a = *am;
3878	ruleset = *rsm;
3879
3880	REASON_SET(&reason, PFRES_MATCH);
3881
3882	if (r->log)
3883		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3884
3885	if (r->action != PF_PASS)
3886		return (PF_DROP);
3887
3888	if (pf_tag_packet(m, pftag, tag)) {
3889		REASON_SET(&reason, PFRES_MEMORY);
3890		return (PF_DROP);
3891	}
3892
3893	return (PF_PASS);
3894}
3895
3896int
3897pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3898    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3899    u_short *reason)
3900{
3901	struct pf_state		 key;
3902	struct tcphdr		*th = pd->hdr.tcp;
3903	u_int16_t		 win = ntohs(th->th_win);
3904	u_int32_t		 ack, end, seq;
3905	u_int8_t		 sws, dws;
3906	int			 ackskew;
3907	int			 copyback = 0;
3908	struct pf_state_peer	*src, *dst;
3909
3910	key.af = pd->af;
3911	key.proto = IPPROTO_TCP;
3912	if (direction == PF_IN)	{
3913		PF_ACPY(&key.ext.addr, pd->src, key.af);
3914		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3915		key.ext.port = th->th_sport;
3916		key.gwy.port = th->th_dport;
3917	} else {
3918		PF_ACPY(&key.lan.addr, pd->src, key.af);
3919		PF_ACPY(&key.ext.addr, pd->dst, key.af);
3920		key.lan.port = th->th_sport;
3921		key.ext.port = th->th_dport;
3922	}
3923
3924	STATE_LOOKUP();
3925
3926	if (direction == (*state)->direction) {
3927		src = &(*state)->src;
3928		dst = &(*state)->dst;
3929	} else {
3930		src = &(*state)->dst;
3931		dst = &(*state)->src;
3932	}
3933
3934	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3935		if (direction != (*state)->direction)
3936			return (PF_SYNPROXY_DROP);
3937		if (th->th_flags & TH_SYN) {
3938			if (ntohl(th->th_seq) != (*state)->src.seqlo)
3939				return (PF_DROP);
3940			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3941			    pd->src, th->th_dport, th->th_sport,
3942			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3943			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0);
3944			return (PF_SYNPROXY_DROP);
3945		} else if (!(th->th_flags & TH_ACK) ||
3946		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3947		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3948			return (PF_DROP);
3949		else
3950			(*state)->src.state = PF_TCPS_PROXY_DST;
3951	}
3952	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3953		struct pf_state_host *src, *dst;
3954
3955		if (direction == PF_OUT) {
3956			src = &(*state)->gwy;
3957			dst = &(*state)->ext;
3958		} else {
3959			src = &(*state)->ext;
3960			dst = &(*state)->lan;
3961		}
3962		if (direction == (*state)->direction) {
3963			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3964			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3965			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3966				return (PF_DROP);
3967			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3968			if ((*state)->dst.seqhi == 1)
3969				(*state)->dst.seqhi = arc4random();
3970			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3971			    &dst->addr, src->port, dst->port,
3972			    (*state)->dst.seqhi, 0, TH_SYN, 0,
3973			    (*state)->src.mss, 0);
3974			return (PF_SYNPROXY_DROP);
3975		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3976		    (TH_SYN|TH_ACK)) ||
3977		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1))
3978			return (PF_DROP);
3979		else {
3980			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3981			(*state)->dst.seqlo = ntohl(th->th_seq);
3982			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3983			    pd->src, th->th_dport, th->th_sport,
3984			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3985			    TH_ACK, (*state)->src.max_win, 0, 0);
3986			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3987			    &dst->addr, src->port, dst->port,
3988			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3989			    TH_ACK, (*state)->dst.max_win, 0, 0);
3990			(*state)->src.seqdiff = (*state)->dst.seqhi -
3991			    (*state)->src.seqlo;
3992			(*state)->dst.seqdiff = (*state)->src.seqhi -
3993			    (*state)->dst.seqlo;
3994			(*state)->src.seqhi = (*state)->src.seqlo +
3995			    (*state)->src.max_win;
3996			(*state)->dst.seqhi = (*state)->dst.seqlo +
3997			    (*state)->dst.max_win;
3998			(*state)->src.wscale = (*state)->dst.wscale = 0;
3999			(*state)->src.state = (*state)->dst.state =
4000			    TCPS_ESTABLISHED;
4001			return (PF_SYNPROXY_DROP);
4002		}
4003	}
4004
4005	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4006		sws = src->wscale & PF_WSCALE_MASK;
4007		dws = dst->wscale & PF_WSCALE_MASK;
4008	} else
4009		sws = dws = 0;
4010
4011	/*
4012	 * Sequence tracking algorithm from Guido van Rooij's paper:
4013	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4014	 *	tcp_filtering.ps
4015	 */
4016
4017	seq = ntohl(th->th_seq);
4018	if (src->seqlo == 0) {
4019		/* First packet from this end. Set its state */
4020
4021		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4022		    src->scrub == NULL) {
4023			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4024				REASON_SET(reason, PFRES_MEMORY);
4025				return (PF_DROP);
4026			}
4027		}
4028
4029		/* Deferred generation of sequence number modulator */
4030		if (dst->seqdiff && !src->seqdiff) {
4031			while ((src->seqdiff = arc4random()) == 0)
4032				;
4033			ack = ntohl(th->th_ack) - dst->seqdiff;
4034			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4035			    src->seqdiff), 0);
4036			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4037			copyback = 1;
4038		} else {
4039			ack = ntohl(th->th_ack);
4040		}
4041
4042		end = seq + pd->p_len;
4043		if (th->th_flags & TH_SYN) {
4044			end++;
4045			if (dst->wscale & PF_WSCALE_FLAG) {
4046				src->wscale = pf_get_wscale(m, off, th->th_off,
4047				    pd->af);
4048				if (src->wscale & PF_WSCALE_FLAG) {
4049					/* Remove scale factor from initial
4050					 * window */
4051					sws = src->wscale & PF_WSCALE_MASK;
4052					win = ((u_int32_t)win + (1 << sws) - 1)
4053					    >> sws;
4054					dws = dst->wscale & PF_WSCALE_MASK;
4055				} else {
4056					/* fixup other window */
4057					dst->max_win <<= dst->wscale &
4058					    PF_WSCALE_MASK;
4059					/* in case of a retrans SYN|ACK */
4060					dst->wscale = 0;
4061				}
4062			}
4063		}
4064		if (th->th_flags & TH_FIN)
4065			end++;
4066
4067		src->seqlo = seq;
4068		if (src->state < TCPS_SYN_SENT)
4069			src->state = TCPS_SYN_SENT;
4070
4071		/*
4072		 * May need to slide the window (seqhi may have been set by
4073		 * the crappy stack check or if we picked up the connection
4074		 * after establishment)
4075		 */
4076		if (src->seqhi == 1 ||
4077		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4078			src->seqhi = end + MAX(1, dst->max_win << dws);
4079		if (win > src->max_win)
4080			src->max_win = win;
4081
4082	} else {
4083		ack = ntohl(th->th_ack) - dst->seqdiff;
4084		if (src->seqdiff) {
4085			/* Modulate sequence numbers */
4086			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4087			    src->seqdiff), 0);
4088			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4089			copyback = 1;
4090		}
4091		end = seq + pd->p_len;
4092		if (th->th_flags & TH_SYN)
4093			end++;
4094		if (th->th_flags & TH_FIN)
4095			end++;
4096	}
4097
4098	if ((th->th_flags & TH_ACK) == 0) {
4099		/* Let it pass through the ack skew check */
4100		ack = dst->seqlo;
4101	} else if ((ack == 0 &&
4102	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4103	    /* broken tcp stacks do not set ack */
4104	    (dst->state < TCPS_SYN_SENT)) {
4105		/*
4106		 * Many stacks (ours included) will set the ACK number in an
4107		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4108		 */
4109		ack = dst->seqlo;
4110	}
4111
4112	if (seq == end) {
4113		/* Ease sequencing restrictions on no data packets */
4114		seq = src->seqlo;
4115		end = seq;
4116	}
4117
4118	ackskew = dst->seqlo - ack;
4119
4120#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4121	if (SEQ_GEQ(src->seqhi, end) &&
4122	    /* Last octet inside other's window space */
4123	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4124	    /* Retrans: not more than one window back */
4125	    (ackskew >= -MAXACKWINDOW) &&
4126	    /* Acking not more than one reassembled fragment backwards */
4127	    (ackskew <= (MAXACKWINDOW << sws))) {
4128	    /* Acking not more than one window forward */
4129
4130		/* update max window */
4131		if (src->max_win < win)
4132			src->max_win = win;
4133		/* synchronize sequencing */
4134		if (SEQ_GT(end, src->seqlo))
4135			src->seqlo = end;
4136		/* slide the window of what the other end can send */
4137		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4138			dst->seqhi = ack + MAX((win << sws), 1);
4139
4140
4141		/* update states */
4142		if (th->th_flags & TH_SYN)
4143			if (src->state < TCPS_SYN_SENT)
4144				src->state = TCPS_SYN_SENT;
4145		if (th->th_flags & TH_FIN)
4146			if (src->state < TCPS_CLOSING)
4147				src->state = TCPS_CLOSING;
4148		if (th->th_flags & TH_ACK) {
4149			if (dst->state == TCPS_SYN_SENT)
4150				dst->state = TCPS_ESTABLISHED;
4151			else if (dst->state == TCPS_CLOSING)
4152				dst->state = TCPS_FIN_WAIT_2;
4153		}
4154		if (th->th_flags & TH_RST)
4155			src->state = dst->state = TCPS_TIME_WAIT;
4156
4157		/* update expire time */
4158#ifdef __FreeBSD__
4159		(*state)->expire = time_second;
4160#else
4161		(*state)->expire = time.tv_sec;
4162#endif
4163		if (src->state >= TCPS_FIN_WAIT_2 &&
4164		    dst->state >= TCPS_FIN_WAIT_2)
4165			(*state)->timeout = PFTM_TCP_CLOSED;
4166		else if (src->state >= TCPS_FIN_WAIT_2 ||
4167		    dst->state >= TCPS_FIN_WAIT_2)
4168			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4169		else if (src->state < TCPS_ESTABLISHED ||
4170		    dst->state < TCPS_ESTABLISHED)
4171			(*state)->timeout = PFTM_TCP_OPENING;
4172		else if (src->state >= TCPS_CLOSING ||
4173		    dst->state >= TCPS_CLOSING)
4174			(*state)->timeout = PFTM_TCP_CLOSING;
4175		else
4176			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4177
4178		/* Fall through to PASS packet */
4179
4180	} else if ((dst->state < TCPS_SYN_SENT ||
4181		dst->state >= TCPS_FIN_WAIT_2 ||
4182		src->state >= TCPS_FIN_WAIT_2) &&
4183	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4184	    /* Within a window forward of the originating packet */
4185	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4186	    /* Within a window backward of the originating packet */
4187
4188		/*
4189		 * This currently handles three situations:
4190		 *  1) Stupid stacks will shotgun SYNs before their peer
4191		 *     replies.
4192		 *  2) When PF catches an already established stream (the
4193		 *     firewall rebooted, the state table was flushed, routes
4194		 *     changed...)
4195		 *  3) Packets get funky immediately after the connection
4196		 *     closes (this should catch Solaris spurious ACK|FINs
4197		 *     that web servers like to spew after a close)
4198		 *
4199		 * This must be a little more careful than the above code
4200		 * since packet floods will also be caught here. We don't
4201		 * update the TTL here to mitigate the damage of a packet
4202		 * flood and so the same code can handle awkward establishment
4203		 * and a loosened connection close.
4204		 * In the establishment case, a correct peer response will
4205		 * validate the connection, go through the normal state code
4206		 * and keep updating the state TTL.
4207		 */
4208
4209		if (pf_status.debug >= PF_DEBUG_MISC) {
4210			printf("pf: loose state match: ");
4211			pf_print_state(*state);
4212			pf_print_flags(th->th_flags);
4213			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
4214			    seq, ack, pd->p_len, ackskew,
4215			    (*state)->packets[0], (*state)->packets[1]);
4216		}
4217
4218		/* update max window */
4219		if (src->max_win < win)
4220			src->max_win = win;
4221		/* synchronize sequencing */
4222		if (SEQ_GT(end, src->seqlo))
4223			src->seqlo = end;
4224		/* slide the window of what the other end can send */
4225		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4226			dst->seqhi = ack + MAX((win << sws), 1);
4227
4228		/*
4229		 * Cannot set dst->seqhi here since this could be a shotgunned
4230		 * SYN and not an already established connection.
4231		 */
4232
4233		if (th->th_flags & TH_FIN)
4234			if (src->state < TCPS_CLOSING)
4235				src->state = TCPS_CLOSING;
4236		if (th->th_flags & TH_RST)
4237			src->state = dst->state = TCPS_TIME_WAIT;
4238
4239		/* Fall through to PASS packet */
4240
4241	} else {
4242		if ((*state)->dst.state == TCPS_SYN_SENT &&
4243		    (*state)->src.state == TCPS_SYN_SENT) {
4244			/* Send RST for state mismatches during handshake */
4245			if (!(th->th_flags & TH_RST)) {
4246				u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
4247
4248				if (th->th_flags & TH_SYN)
4249					ack++;
4250				if (th->th_flags & TH_FIN)
4251					ack++;
4252				pf_send_tcp((*state)->rule.ptr, pd->af,
4253				    pd->dst, pd->src, th->th_dport,
4254				    th->th_sport, ntohl(th->th_ack), ack,
4255				    TH_RST|TH_ACK, 0, 0,
4256				    (*state)->rule.ptr->return_ttl);
4257			}
4258			src->seqlo = 0;
4259			src->seqhi = 1;
4260			src->max_win = 1;
4261		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4262			printf("pf: BAD state: ");
4263			pf_print_state(*state);
4264			pf_print_flags(th->th_flags);
4265			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
4266			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
4267			    (*state)->packets[0], (*state)->packets[1],
4268			    direction == PF_IN ? "in" : "out",
4269			    direction == (*state)->direction ? "fwd" : "rev");
4270			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4271			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4272			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4273			    ' ': '2',
4274			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4275			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4276			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4277			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4278		}
4279		return (PF_DROP);
4280	}
4281
4282	if (dst->scrub || src->scrub) {
4283		if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4284		    src, dst, &copyback))
4285			return (PF_DROP);
4286	}
4287
4288	/* Any packets which have gotten here are to be passed */
4289
4290	/* translate source/destination address, if necessary */
4291	if (STATE_TRANSLATE(*state)) {
4292		if (direction == PF_OUT)
4293			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4294			    &th->th_sum, &(*state)->gwy.addr,
4295			    (*state)->gwy.port, 0, pd->af);
4296		else
4297			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4298			    &th->th_sum, &(*state)->lan.addr,
4299			    (*state)->lan.port, 0, pd->af);
4300		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4301	} else if (copyback) {
4302		/* Copyback sequence modulation or stateful scrub changes */
4303		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4304	}
4305
4306	return (PF_PASS);
4307}
4308
4309int
4310pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4311    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4312{
4313	struct pf_state_peer	*src, *dst;
4314	struct pf_state		 key;
4315	struct udphdr		*uh = pd->hdr.udp;
4316
4317	key.af = pd->af;
4318	key.proto = IPPROTO_UDP;
4319	if (direction == PF_IN)	{
4320		PF_ACPY(&key.ext.addr, pd->src, key.af);
4321		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4322		key.ext.port = uh->uh_sport;
4323		key.gwy.port = uh->uh_dport;
4324	} else {
4325		PF_ACPY(&key.lan.addr, pd->src, key.af);
4326		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4327		key.lan.port = uh->uh_sport;
4328		key.ext.port = uh->uh_dport;
4329	}
4330
4331	STATE_LOOKUP();
4332
4333	if (direction == (*state)->direction) {
4334		src = &(*state)->src;
4335		dst = &(*state)->dst;
4336	} else {
4337		src = &(*state)->dst;
4338		dst = &(*state)->src;
4339	}
4340
4341	/* update states */
4342	if (src->state < PFUDPS_SINGLE)
4343		src->state = PFUDPS_SINGLE;
4344	if (dst->state == PFUDPS_SINGLE)
4345		dst->state = PFUDPS_MULTIPLE;
4346
4347	/* update expire time */
4348#ifdef __FreeBSD__
4349	(*state)->expire = time_second;
4350#else
4351	(*state)->expire = time.tv_sec;
4352#endif
4353	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4354		(*state)->timeout = PFTM_UDP_MULTIPLE;
4355	else
4356		(*state)->timeout = PFTM_UDP_SINGLE;
4357
4358	/* translate source/destination address, if necessary */
4359	if (STATE_TRANSLATE(*state)) {
4360		if (direction == PF_OUT)
4361			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4362			    &uh->uh_sum, &(*state)->gwy.addr,
4363			    (*state)->gwy.port, 1, pd->af);
4364		else
4365			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4366			    &uh->uh_sum, &(*state)->lan.addr,
4367			    (*state)->lan.port, 1, pd->af);
4368		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4369	}
4370
4371	return (PF_PASS);
4372}
4373
4374int
4375pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4376    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4377{
4378	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4379	u_int16_t	 icmpid = 0;		/* make the compiler happy */
4380	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
4381	u_int8_t	 icmptype = 0;		/* make the compiler happy */
4382	int		 state_icmp = 0;
4383
4384	switch (pd->proto) {
4385#ifdef INET
4386	case IPPROTO_ICMP:
4387		icmptype = pd->hdr.icmp->icmp_type;
4388		icmpid = pd->hdr.icmp->icmp_id;
4389		icmpsum = &pd->hdr.icmp->icmp_cksum;
4390
4391		if (icmptype == ICMP_UNREACH ||
4392		    icmptype == ICMP_SOURCEQUENCH ||
4393		    icmptype == ICMP_REDIRECT ||
4394		    icmptype == ICMP_TIMXCEED ||
4395		    icmptype == ICMP_PARAMPROB)
4396			state_icmp++;
4397		break;
4398#endif /* INET */
4399#ifdef INET6
4400	case IPPROTO_ICMPV6:
4401		icmptype = pd->hdr.icmp6->icmp6_type;
4402		icmpid = pd->hdr.icmp6->icmp6_id;
4403		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4404
4405		if (icmptype == ICMP6_DST_UNREACH ||
4406		    icmptype == ICMP6_PACKET_TOO_BIG ||
4407		    icmptype == ICMP6_TIME_EXCEEDED ||
4408		    icmptype == ICMP6_PARAM_PROB)
4409			state_icmp++;
4410		break;
4411#endif /* INET6 */
4412	}
4413
4414	if (!state_icmp) {
4415
4416		/*
4417		 * ICMP query/reply message not related to a TCP/UDP packet.
4418		 * Search for an ICMP state.
4419		 */
4420		struct pf_state		key;
4421
4422		key.af = pd->af;
4423		key.proto = pd->proto;
4424		if (direction == PF_IN)	{
4425			PF_ACPY(&key.ext.addr, pd->src, key.af);
4426			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4427			key.ext.port = icmpid;
4428			key.gwy.port = icmpid;
4429		} else {
4430			PF_ACPY(&key.lan.addr, pd->src, key.af);
4431			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4432			key.lan.port = icmpid;
4433			key.ext.port = icmpid;
4434		}
4435
4436		STATE_LOOKUP();
4437
4438#ifdef __FreeBSD__
4439		(*state)->expire = time_second;
4440#else
4441		(*state)->expire = time.tv_sec;
4442#endif
4443		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4444
4445		/* translate source/destination address, if necessary */
4446		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4447			if (direction == PF_OUT) {
4448				switch (pd->af) {
4449#ifdef INET
4450				case AF_INET:
4451					pf_change_a(&saddr->v4.s_addr,
4452					    pd->ip_sum,
4453					    (*state)->gwy.addr.v4.s_addr, 0);
4454					break;
4455#endif /* INET */
4456#ifdef INET6
4457				case AF_INET6:
4458					pf_change_a6(saddr,
4459					    &pd->hdr.icmp6->icmp6_cksum,
4460					    &(*state)->gwy.addr, 0);
4461					m_copyback(m, off,
4462					    sizeof(struct icmp6_hdr),
4463					    (caddr_t)pd->hdr.icmp6);
4464					break;
4465#endif /* INET6 */
4466				}
4467			} else {
4468				switch (pd->af) {
4469#ifdef INET
4470				case AF_INET:
4471					pf_change_a(&daddr->v4.s_addr,
4472					    pd->ip_sum,
4473					    (*state)->lan.addr.v4.s_addr, 0);
4474					break;
4475#endif /* INET */
4476#ifdef INET6
4477				case AF_INET6:
4478					pf_change_a6(daddr,
4479					    &pd->hdr.icmp6->icmp6_cksum,
4480					    &(*state)->lan.addr, 0);
4481					m_copyback(m, off,
4482					    sizeof(struct icmp6_hdr),
4483					    (caddr_t)pd->hdr.icmp6);
4484					break;
4485#endif /* INET6 */
4486				}
4487			}
4488		}
4489
4490		return (PF_PASS);
4491
4492	} else {
4493		/*
4494		 * ICMP error message in response to a TCP/UDP packet.
4495		 * Extract the inner TCP/UDP header and search for that state.
4496		 */
4497
4498		struct pf_pdesc	pd2;
4499#ifdef INET
4500		struct ip	h2;
4501#endif /* INET */
4502#ifdef INET6
4503		struct ip6_hdr	h2_6;
4504		int		terminal = 0;
4505#endif /* INET6 */
4506		int		ipoff2 = 0;	/* make the compiler happy */
4507		int		off2 = 0;	/* make the compiler happy */
4508
4509		pd2.af = pd->af;
4510		switch (pd->af) {
4511#ifdef INET
4512		case AF_INET:
4513			/* offset of h2 in mbuf chain */
4514			ipoff2 = off + ICMP_MINLEN;
4515
4516			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4517			    NULL, NULL, pd2.af)) {
4518				DPFPRINTF(PF_DEBUG_MISC,
4519				    ("pf: ICMP error message too short "
4520				    "(ip)\n"));
4521				return (PF_DROP);
4522			}
4523			/*
4524			 * ICMP error messages don't refer to non-first
4525			 * fragments
4526			 */
4527			if (h2.ip_off & htons(IP_OFFMASK))
4528				return (PF_DROP);
4529
4530			/* offset of protocol header that follows h2 */
4531			off2 = ipoff2 + (h2.ip_hl << 2);
4532
4533			pd2.proto = h2.ip_p;
4534			pd2.src = (struct pf_addr *)&h2.ip_src;
4535			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4536			pd2.ip_sum = &h2.ip_sum;
4537			break;
4538#endif /* INET */
4539#ifdef INET6
4540		case AF_INET6:
4541			ipoff2 = off + sizeof(struct icmp6_hdr);
4542
4543			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4544			    NULL, NULL, pd2.af)) {
4545				DPFPRINTF(PF_DEBUG_MISC,
4546				    ("pf: ICMP error message too short "
4547				    "(ip6)\n"));
4548				return (PF_DROP);
4549			}
4550			pd2.proto = h2_6.ip6_nxt;
4551			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4552			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4553			pd2.ip_sum = NULL;
4554			off2 = ipoff2 + sizeof(h2_6);
4555			do {
4556				switch (pd2.proto) {
4557				case IPPROTO_FRAGMENT:
4558					/*
4559					 * ICMPv6 error messages for
4560					 * non-first fragments
4561					 */
4562					return (PF_DROP);
4563				case IPPROTO_AH:
4564				case IPPROTO_HOPOPTS:
4565				case IPPROTO_ROUTING:
4566				case IPPROTO_DSTOPTS: {
4567					/* get next header and header length */
4568					struct ip6_ext opt6;
4569
4570					if (!pf_pull_hdr(m, off2, &opt6,
4571					    sizeof(opt6), NULL, NULL, pd2.af)) {
4572						DPFPRINTF(PF_DEBUG_MISC,
4573						    ("pf: ICMPv6 short opt\n"));
4574						return (PF_DROP);
4575					}
4576					if (pd2.proto == IPPROTO_AH)
4577						off2 += (opt6.ip6e_len + 2) * 4;
4578					else
4579						off2 += (opt6.ip6e_len + 1) * 8;
4580					pd2.proto = opt6.ip6e_nxt;
4581					/* goto the next header */
4582					break;
4583				}
4584				default:
4585					terminal++;
4586					break;
4587				}
4588			} while (!terminal);
4589			break;
4590#endif /* INET6 */
4591		}
4592
4593		switch (pd2.proto) {
4594		case IPPROTO_TCP: {
4595			struct tcphdr		 th;
4596			u_int32_t		 seq;
4597			struct pf_state		 key;
4598			struct pf_state_peer	*src, *dst;
4599			u_int8_t		 dws;
4600			int			 copyback = 0;
4601
4602			/*
4603			 * Only the first 8 bytes of the TCP header can be
4604			 * expected. Don't access any TCP header fields after
4605			 * th_seq, an ackskew test is not possible.
4606			 */
4607			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) {
4608				DPFPRINTF(PF_DEBUG_MISC,
4609				    ("pf: ICMP error message too short "
4610				    "(tcp)\n"));
4611				return (PF_DROP);
4612			}
4613
4614			key.af = pd2.af;
4615			key.proto = IPPROTO_TCP;
4616			if (direction == PF_IN)	{
4617				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4618				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4619				key.ext.port = th.th_dport;
4620				key.gwy.port = th.th_sport;
4621			} else {
4622				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4623				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4624				key.lan.port = th.th_dport;
4625				key.ext.port = th.th_sport;
4626			}
4627
4628			STATE_LOOKUP();
4629
4630			if (direction == (*state)->direction) {
4631				src = &(*state)->dst;
4632				dst = &(*state)->src;
4633			} else {
4634				src = &(*state)->src;
4635				dst = &(*state)->dst;
4636			}
4637
4638			if (src->wscale && dst->wscale &&
4639			    !(th.th_flags & TH_SYN))
4640				dws = dst->wscale & PF_WSCALE_MASK;
4641			else
4642				dws = 0;
4643
4644			/* Demodulate sequence number */
4645			seq = ntohl(th.th_seq) - src->seqdiff;
4646			if (src->seqdiff) {
4647				pf_change_a(&th.th_seq, icmpsum,
4648				    htonl(seq), 0);
4649				copyback = 1;
4650			}
4651
4652			if (!SEQ_GEQ(src->seqhi, seq) ||
4653			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4654				if (pf_status.debug >= PF_DEBUG_MISC) {
4655					printf("pf: BAD ICMP %d:%d ",
4656					    icmptype, pd->hdr.icmp->icmp_code);
4657					pf_print_host(pd->src, 0, pd->af);
4658					printf(" -> ");
4659					pf_print_host(pd->dst, 0, pd->af);
4660					printf(" state: ");
4661					pf_print_state(*state);
4662					printf(" seq=%u\n", seq);
4663				}
4664				return (PF_DROP);
4665			}
4666
4667			if (STATE_TRANSLATE(*state)) {
4668				if (direction == PF_IN) {
4669					pf_change_icmp(pd2.src, &th.th_sport,
4670					    daddr, &(*state)->lan.addr,
4671					    (*state)->lan.port, NULL,
4672					    pd2.ip_sum, icmpsum,
4673					    pd->ip_sum, 0, pd2.af);
4674				} else {
4675					pf_change_icmp(pd2.dst, &th.th_dport,
4676					    saddr, &(*state)->gwy.addr,
4677					    (*state)->gwy.port, NULL,
4678					    pd2.ip_sum, icmpsum,
4679					    pd->ip_sum, 0, pd2.af);
4680				}
4681				copyback = 1;
4682			}
4683
4684			if (copyback) {
4685				switch (pd2.af) {
4686#ifdef INET
4687				case AF_INET:
4688					m_copyback(m, off, ICMP_MINLEN,
4689					    (caddr_t)pd->hdr.icmp);
4690					m_copyback(m, ipoff2, sizeof(h2),
4691					    (caddr_t)&h2);
4692					break;
4693#endif /* INET */
4694#ifdef INET6
4695				case AF_INET6:
4696					m_copyback(m, off,
4697					    sizeof(struct icmp6_hdr),
4698					    (caddr_t)pd->hdr.icmp6);
4699					m_copyback(m, ipoff2, sizeof(h2_6),
4700					    (caddr_t)&h2_6);
4701					break;
4702#endif /* INET6 */
4703				}
4704				m_copyback(m, off2, 8, (caddr_t)&th);
4705			}
4706
4707			return (PF_PASS);
4708			break;
4709		}
4710		case IPPROTO_UDP: {
4711			struct udphdr		uh;
4712			struct pf_state		key;
4713
4714			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4715			    NULL, NULL, pd2.af)) {
4716				DPFPRINTF(PF_DEBUG_MISC,
4717				    ("pf: ICMP error message too short "
4718				    "(udp)\n"));
4719				return (PF_DROP);
4720			}
4721
4722			key.af = pd2.af;
4723			key.proto = IPPROTO_UDP;
4724			if (direction == PF_IN)	{
4725				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4726				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4727				key.ext.port = uh.uh_dport;
4728				key.gwy.port = uh.uh_sport;
4729			} else {
4730				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4731				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4732				key.lan.port = uh.uh_dport;
4733				key.ext.port = uh.uh_sport;
4734			}
4735
4736			STATE_LOOKUP();
4737
4738			if (STATE_TRANSLATE(*state)) {
4739				if (direction == PF_IN) {
4740					pf_change_icmp(pd2.src, &uh.uh_sport,
4741					    daddr, &(*state)->lan.addr,
4742					    (*state)->lan.port, &uh.uh_sum,
4743					    pd2.ip_sum, icmpsum,
4744					    pd->ip_sum, 1, pd2.af);
4745				} else {
4746					pf_change_icmp(pd2.dst, &uh.uh_dport,
4747					    saddr, &(*state)->gwy.addr,
4748					    (*state)->gwy.port, &uh.uh_sum,
4749					    pd2.ip_sum, icmpsum,
4750					    pd->ip_sum, 1, pd2.af);
4751				}
4752				switch (pd2.af) {
4753#ifdef INET
4754				case AF_INET:
4755					m_copyback(m, off, ICMP_MINLEN,
4756					    (caddr_t)pd->hdr.icmp);
4757					m_copyback(m, ipoff2, sizeof(h2),
4758					    (caddr_t)&h2);
4759					break;
4760#endif /* INET */
4761#ifdef INET6
4762				case AF_INET6:
4763					m_copyback(m, off,
4764					    sizeof(struct icmp6_hdr),
4765					    (caddr_t)pd->hdr.icmp6);
4766					m_copyback(m, ipoff2, sizeof(h2_6),
4767					    (caddr_t)&h2_6);
4768					break;
4769#endif /* INET6 */
4770				}
4771				m_copyback(m, off2, sizeof(uh),
4772				    (caddr_t)&uh);
4773			}
4774
4775			return (PF_PASS);
4776			break;
4777		}
4778#ifdef INET
4779		case IPPROTO_ICMP: {
4780			struct icmp		iih;
4781			struct pf_state		key;
4782
4783			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4784			    NULL, NULL, pd2.af)) {
4785				DPFPRINTF(PF_DEBUG_MISC,
4786				    ("pf: ICMP error message too short i"
4787				    "(icmp)\n"));
4788				return (PF_DROP);
4789			}
4790
4791			key.af = pd2.af;
4792			key.proto = IPPROTO_ICMP;
4793			if (direction == PF_IN)	{
4794				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4795				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4796				key.ext.port = iih.icmp_id;
4797				key.gwy.port = iih.icmp_id;
4798			} else {
4799				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4800				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4801				key.lan.port = iih.icmp_id;
4802				key.ext.port = iih.icmp_id;
4803			}
4804
4805			STATE_LOOKUP();
4806
4807			if (STATE_TRANSLATE(*state)) {
4808				if (direction == PF_IN) {
4809					pf_change_icmp(pd2.src, &iih.icmp_id,
4810					    daddr, &(*state)->lan.addr,
4811					    (*state)->lan.port, NULL,
4812					    pd2.ip_sum, icmpsum,
4813					    pd->ip_sum, 0, AF_INET);
4814				} else {
4815					pf_change_icmp(pd2.dst, &iih.icmp_id,
4816					    saddr, &(*state)->gwy.addr,
4817					    (*state)->gwy.port, NULL,
4818					    pd2.ip_sum, icmpsum,
4819					    pd->ip_sum, 0, AF_INET);
4820				}
4821				m_copyback(m, off, ICMP_MINLEN,
4822				    (caddr_t)pd->hdr.icmp);
4823				m_copyback(m, ipoff2, sizeof(h2),
4824				    (caddr_t)&h2);
4825				m_copyback(m, off2, ICMP_MINLEN,
4826				    (caddr_t)&iih);
4827			}
4828
4829			return (PF_PASS);
4830			break;
4831		}
4832#endif /* INET */
4833#ifdef INET6
4834		case IPPROTO_ICMPV6: {
4835			struct icmp6_hdr	iih;
4836			struct pf_state		key;
4837
4838			if (!pf_pull_hdr(m, off2, &iih,
4839			    sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) {
4840				DPFPRINTF(PF_DEBUG_MISC,
4841				    ("pf: ICMP error message too short "
4842				    "(icmp6)\n"));
4843				return (PF_DROP);
4844			}
4845
4846			key.af = pd2.af;
4847			key.proto = IPPROTO_ICMPV6;
4848			if (direction == PF_IN)	{
4849				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4850				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4851				key.ext.port = iih.icmp6_id;
4852				key.gwy.port = iih.icmp6_id;
4853			} else {
4854				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4855				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4856				key.lan.port = iih.icmp6_id;
4857				key.ext.port = iih.icmp6_id;
4858			}
4859
4860			STATE_LOOKUP();
4861
4862			if (STATE_TRANSLATE(*state)) {
4863				if (direction == PF_IN) {
4864					pf_change_icmp(pd2.src, &iih.icmp6_id,
4865					    daddr, &(*state)->lan.addr,
4866					    (*state)->lan.port, NULL,
4867					    pd2.ip_sum, icmpsum,
4868					    pd->ip_sum, 0, AF_INET6);
4869				} else {
4870					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4871					    saddr, &(*state)->gwy.addr,
4872					    (*state)->gwy.port, NULL,
4873					    pd2.ip_sum, icmpsum,
4874					    pd->ip_sum, 0, AF_INET6);
4875				}
4876				m_copyback(m, off, sizeof(struct icmp6_hdr),
4877				    (caddr_t)pd->hdr.icmp6);
4878				m_copyback(m, ipoff2, sizeof(h2_6),
4879				    (caddr_t)&h2_6);
4880				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4881				    (caddr_t)&iih);
4882			}
4883
4884			return (PF_PASS);
4885			break;
4886		}
4887#endif /* INET6 */
4888		default: {
4889			struct pf_state		key;
4890
4891			key.af = pd2.af;
4892			key.proto = pd2.proto;
4893			if (direction == PF_IN)	{
4894				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4895				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4896				key.ext.port = 0;
4897				key.gwy.port = 0;
4898			} else {
4899				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4900				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4901				key.lan.port = 0;
4902				key.ext.port = 0;
4903			}
4904
4905			STATE_LOOKUP();
4906
4907			if (STATE_TRANSLATE(*state)) {
4908				if (direction == PF_IN) {
4909					pf_change_icmp(pd2.src, NULL,
4910					    daddr, &(*state)->lan.addr,
4911					    0, NULL,
4912					    pd2.ip_sum, icmpsum,
4913					    pd->ip_sum, 0, pd2.af);
4914				} else {
4915					pf_change_icmp(pd2.dst, NULL,
4916					    saddr, &(*state)->gwy.addr,
4917					    0, NULL,
4918					    pd2.ip_sum, icmpsum,
4919					    pd->ip_sum, 0, pd2.af);
4920				}
4921				switch (pd2.af) {
4922#ifdef INET
4923				case AF_INET:
4924					m_copyback(m, off, ICMP_MINLEN,
4925					    (caddr_t)pd->hdr.icmp);
4926					m_copyback(m, ipoff2, sizeof(h2),
4927					    (caddr_t)&h2);
4928					break;
4929#endif /* INET */
4930#ifdef INET6
4931				case AF_INET6:
4932					m_copyback(m, off,
4933					    sizeof(struct icmp6_hdr),
4934					    (caddr_t)pd->hdr.icmp6);
4935					m_copyback(m, ipoff2, sizeof(h2_6),
4936					    (caddr_t)&h2_6);
4937					break;
4938#endif /* INET6 */
4939				}
4940			}
4941
4942			return (PF_PASS);
4943			break;
4944		}
4945		}
4946	}
4947}
4948
4949int
4950pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4951    struct pf_pdesc *pd)
4952{
4953	struct pf_state_peer	*src, *dst;
4954	struct pf_state		 key;
4955
4956	key.af = pd->af;
4957	key.proto = pd->proto;
4958	if (direction == PF_IN)	{
4959		PF_ACPY(&key.ext.addr, pd->src, key.af);
4960		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4961		key.ext.port = 0;
4962		key.gwy.port = 0;
4963	} else {
4964		PF_ACPY(&key.lan.addr, pd->src, key.af);
4965		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4966		key.lan.port = 0;
4967		key.ext.port = 0;
4968	}
4969
4970	STATE_LOOKUP();
4971
4972	if (direction == (*state)->direction) {
4973		src = &(*state)->src;
4974		dst = &(*state)->dst;
4975	} else {
4976		src = &(*state)->dst;
4977		dst = &(*state)->src;
4978	}
4979
4980	/* update states */
4981	if (src->state < PFOTHERS_SINGLE)
4982		src->state = PFOTHERS_SINGLE;
4983	if (dst->state == PFOTHERS_SINGLE)
4984		dst->state = PFOTHERS_MULTIPLE;
4985
4986	/* update expire time */
4987#ifdef __FreeBSD__
4988	(*state)->expire = time_second;
4989#else
4990	(*state)->expire = time.tv_sec;
4991#endif
4992	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4993		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4994	else
4995		(*state)->timeout = PFTM_OTHER_SINGLE;
4996
4997	/* translate source/destination address, if necessary */
4998	if (STATE_TRANSLATE(*state)) {
4999		if (direction == PF_OUT)
5000			switch (pd->af) {
5001#ifdef INET
5002			case AF_INET:
5003				pf_change_a(&pd->src->v4.s_addr,
5004				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5005				    0);
5006				break;
5007#endif /* INET */
5008#ifdef INET6
5009			case AF_INET6:
5010				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5011				break;
5012#endif /* INET6 */
5013			}
5014		else
5015			switch (pd->af) {
5016#ifdef INET
5017			case AF_INET:
5018				pf_change_a(&pd->dst->v4.s_addr,
5019				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5020				    0);
5021				break;
5022#endif /* INET */
5023#ifdef INET6
5024			case AF_INET6:
5025				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5026				break;
5027#endif /* INET6 */
5028			}
5029	}
5030
5031	return (PF_PASS);
5032}
5033
5034/*
5035 * ipoff and off are measured from the start of the mbuf chain.
5036 * h must be at "ipoff" on the mbuf chain.
5037 */
5038void *
5039pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5040    u_short *actionp, u_short *reasonp, sa_family_t af)
5041{
5042	switch (af) {
5043#ifdef INET
5044	case AF_INET: {
5045		struct ip	*h = mtod(m, struct ip *);
5046		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5047
5048		if (fragoff) {
5049			if (fragoff >= len)
5050				ACTION_SET(actionp, PF_PASS);
5051			else {
5052				ACTION_SET(actionp, PF_DROP);
5053				REASON_SET(reasonp, PFRES_FRAG);
5054			}
5055			return (NULL);
5056		}
5057		if (m->m_pkthdr.len < off + len ||
5058		    ntohs(h->ip_len) < off + len) {
5059			ACTION_SET(actionp, PF_DROP);
5060			REASON_SET(reasonp, PFRES_SHORT);
5061			return (NULL);
5062		}
5063		break;
5064	}
5065#endif /* INET */
5066#ifdef INET6
5067	case AF_INET6: {
5068		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5069
5070		if (m->m_pkthdr.len < off + len ||
5071		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5072		    (unsigned)(off + len)) {
5073			ACTION_SET(actionp, PF_DROP);
5074			REASON_SET(reasonp, PFRES_SHORT);
5075			return (NULL);
5076		}
5077		break;
5078	}
5079#endif /* INET6 */
5080	}
5081	m_copydata(m, off, len, p);
5082	return (p);
5083}
5084
5085int
5086pf_routable(struct pf_addr *addr, sa_family_t af)
5087{
5088	struct sockaddr_in	*dst;
5089	struct route		 ro;
5090	int			 ret = 0;
5091
5092	bzero(&ro, sizeof(ro));
5093	dst = satosin(&ro.ro_dst);
5094	dst->sin_family = af;
5095	dst->sin_len = sizeof(*dst);
5096	dst->sin_addr = addr->v4;
5097#ifdef __FreeBSD__
5098#ifdef RTF_PRCLONING
5099	rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING));
5100#else /* !RTF_PRCLONING */
5101	rtalloc_ign(&ro, RTF_CLONING);
5102#endif
5103#else /* ! __FreeBSD__ */
5104	rtalloc_noclone(&ro, NO_CLONING);
5105#endif
5106
5107	if (ro.ro_rt != NULL) {
5108		ret = 1;
5109		RTFREE(ro.ro_rt);
5110	}
5111
5112	return (ret);
5113}
5114
5115#ifdef INET
5116
5117void
5118pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5119    struct pf_state *s)
5120{
5121	struct mbuf		*m0, *m1;
5122	struct route		 iproute;
5123	struct route		*ro = NULL;	/* XXX: was uninitialized */
5124	struct sockaddr_in	*dst;
5125	struct ip		*ip;
5126	struct ifnet		*ifp = NULL;
5127	struct m_tag		*mtag;
5128	struct pf_addr		 naddr;
5129	struct pf_src_node	*sn = NULL;
5130	int			 error = 0;
5131#ifdef __FreeBSD__
5132	int sw_csum;
5133#endif
5134
5135	if (m == NULL || *m == NULL || r == NULL ||
5136	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5137		panic("pf_route: invalid parameters");
5138
5139	if (r->rt == PF_DUPTO) {
5140		m0 = *m;
5141		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5142		if (mtag == NULL) {
5143			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5144			if (mtag == NULL)
5145				goto bad;
5146			m_tag_prepend(m0, mtag);
5147		}
5148#ifdef __FreeBSD__
5149		m0 = m_dup(*m, M_DONTWAIT);
5150#else
5151		m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT);
5152#endif
5153		if (m0 == NULL)
5154			return;
5155	} else {
5156		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5157			return;
5158		m0 = *m;
5159	}
5160
5161	if (m0->m_len < sizeof(struct ip))
5162		panic("pf_route: m0->m_len < sizeof(struct ip)");
5163	ip = mtod(m0, struct ip *);
5164
5165	ro = &iproute;
5166	bzero((caddr_t)ro, sizeof(*ro));
5167	dst = satosin(&ro->ro_dst);
5168	dst->sin_family = AF_INET;
5169	dst->sin_len = sizeof(*dst);
5170	dst->sin_addr = ip->ip_dst;
5171
5172	if (r->rt == PF_FASTROUTE) {
5173		rtalloc(ro);
5174		if (ro->ro_rt == 0) {
5175			ipstat.ips_noroute++;
5176			goto bad;
5177		}
5178
5179		ifp = ro->ro_rt->rt_ifp;
5180		ro->ro_rt->rt_use++;
5181
5182		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5183			dst = satosin(ro->ro_rt->rt_gateway);
5184	} else {
5185		if (TAILQ_EMPTY(&r->rpool.list))
5186			panic("pf_route: TAILQ_EMPTY(&r->rpool.list)");
5187		if (s == NULL) {
5188			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5189			    &naddr, NULL, &sn);
5190			if (!PF_AZERO(&naddr, AF_INET))
5191				dst->sin_addr.s_addr = naddr.v4.s_addr;
5192			ifp = r->rpool.cur->kif ?
5193			    r->rpool.cur->kif->pfik_ifp : NULL;
5194		} else {
5195			if (!PF_AZERO(&s->rt_addr, AF_INET))
5196				dst->sin_addr.s_addr =
5197				    s->rt_addr.v4.s_addr;
5198			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5199		}
5200	}
5201
5202	if (ifp == NULL)
5203		goto bad;
5204
5205	if (m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL) != NULL)
5206		goto bad;
5207	mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5208	if (mtag == NULL)
5209		goto bad;
5210	m_tag_prepend(m0, mtag);
5211
5212	if (oifp != ifp) {
5213#ifdef __FreeBSD__
5214		PF_UNLOCK();
5215		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) {
5216			PF_LOCK();
5217			goto bad;
5218		} else if (m0 == NULL) {
5219			PF_LOCK();
5220			goto done;
5221		}
5222		PF_LOCK();
5223#else
5224		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS)
5225			goto bad;
5226		else if (m0 == NULL)
5227			goto done;
5228#endif
5229		if (m0->m_len < sizeof(struct ip))
5230			panic("pf_route: m0->m_len < sizeof(struct ip)");
5231		ip = mtod(m0, struct ip *);
5232	}
5233
5234#ifdef __FreeBSD__
5235	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5236	m0->m_pkthdr.csum_flags |= CSUM_IP;
5237	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5238	if (sw_csum & CSUM_DELAY_DATA) {
5239		/*
5240		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5241		 */
5242		NTOHS(ip->ip_len);
5243		NTOHS(ip->ip_off);	 /* XXX: needed? */
5244		in_delayed_cksum(m0);
5245		HTONS(ip->ip_len);
5246		HTONS(ip->ip_off);
5247		sw_csum &= ~CSUM_DELAY_DATA;
5248	}
5249	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5250
5251	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5252	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5253		((ip->ip_off & htons(IP_DF)) == 0))) {
5254		/*
5255		 * ip->ip_len = htons(ip->ip_len);
5256		 * ip->ip_off = htons(ip->ip_off);
5257		 */
5258		ip->ip_sum = 0;
5259		if (sw_csum & CSUM_DELAY_IP) {
5260			/* From KAME */
5261			if (ip->ip_v == IPVERSION &&
5262			    (ip->ip_hl << 2) == sizeof(*ip)) {
5263				ip->ip_sum = in_cksum_hdr(ip);
5264			} else {
5265				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5266			}
5267		}
5268		PF_UNLOCK();
5269		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5270		PF_LOCK();
5271		goto done;
5272	}
5273
5274#else
5275	/* Copied from ip_output. */
5276#ifdef IPSEC
5277	/*
5278	 * If deferred crypto processing is needed, check that the
5279	 * interface supports it.
5280	 */
5281	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
5282	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
5283		/* Notify IPsec to do its own crypto. */
5284		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
5285		goto bad;
5286	}
5287#endif /* IPSEC */
5288
5289	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5290	if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
5291		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5292		    ifp->if_bridge != NULL) {
5293			in_delayed_cksum(m0);
5294			m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
5295		}
5296	} else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
5297		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5298		    ifp->if_bridge != NULL) {
5299			in_delayed_cksum(m0);
5300			m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
5301		}
5302	}
5303
5304	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5305		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5306		    ifp->if_bridge == NULL) {
5307			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5308			ipstat.ips_outhwcsum++;
5309		} else {
5310			ip->ip_sum = 0;
5311			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5312		}
5313		/* Update relevant hardware checksum stats for TCP/UDP */
5314		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5315			tcpstat.tcps_outhwcsum++;
5316		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5317			udpstat.udps_outhwcsum++;
5318		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5319		goto done;
5320	}
5321#endif
5322	/*
5323	 * Too large for interface; fragment if possible.
5324	 * Must be able to put at least 8 bytes per fragment.
5325	 */
5326	if (ip->ip_off & htons(IP_DF)) {
5327		ipstat.ips_cantfrag++;
5328		if (r->rt != PF_DUPTO) {
5329#ifdef __FreeBSD__
5330			/* icmp_error() expects host byte ordering */
5331			NTOHS(ip->ip_len);
5332			NTOHS(ip->ip_off);
5333			PF_UNLOCK();
5334#endif
5335			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5336			    ifp);
5337#ifdef __FreeBSD__
5338			PF_LOCK();
5339#endif
5340			goto done;
5341		} else
5342			goto bad;
5343	}
5344
5345	m1 = m0;
5346#ifdef __FreeBSD__
5347	/*
5348	 * XXX: is cheaper + less error prone than own function
5349	 */
5350	NTOHS(ip->ip_len);
5351	NTOHS(ip->ip_off);
5352	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5353#else
5354	error = ip_fragment(m0, ifp, ifp->if_mtu);
5355#endif
5356	if (error) {
5357#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
5358		m0 = NULL;
5359#endif
5360		goto bad;
5361	}
5362
5363	for (m0 = m1; m0; m0 = m1) {
5364		m1 = m0->m_nextpkt;
5365		m0->m_nextpkt = 0;
5366#ifdef __FreeBSD__
5367		if (error == 0) {
5368			PF_UNLOCK();
5369			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5370			    NULL);
5371			PF_LOCK();
5372		} else
5373#else
5374		if (error == 0)
5375			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5376			    NULL);
5377		else
5378#endif
5379			m_freem(m0);
5380	}
5381
5382	if (error == 0)
5383		ipstat.ips_fragmented++;
5384
5385done:
5386	if (r->rt != PF_DUPTO)
5387		*m = NULL;
5388	if (ro == &iproute && ro->ro_rt)
5389		RTFREE(ro->ro_rt);
5390	return;
5391
5392bad:
5393	m_freem(m0);
5394	goto done;
5395}
5396#endif /* INET */
5397
5398#ifdef INET6
5399void
5400pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5401    struct pf_state *s)
5402{
5403	struct mbuf		*m0;
5404	struct m_tag		*mtag;
5405	struct route_in6	 ip6route;
5406	struct route_in6	*ro;
5407	struct sockaddr_in6	*dst;
5408	struct ip6_hdr		*ip6;
5409	struct ifnet		*ifp = NULL;
5410	struct pf_addr		 naddr;
5411	struct pf_src_node	*sn = NULL;
5412	int			 error = 0;
5413
5414	if (m == NULL || *m == NULL || r == NULL ||
5415	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5416		panic("pf_route6: invalid parameters");
5417
5418	if (r->rt == PF_DUPTO) {
5419		m0 = *m;
5420		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5421		if (mtag == NULL) {
5422			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5423			if (mtag == NULL)
5424				goto bad;
5425			m_tag_prepend(m0, mtag);
5426		}
5427#ifdef __FreeBSD__
5428		m0 = m_dup(*m, M_DONTWAIT);
5429#else
5430		m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT);
5431#endif
5432		if (m0 == NULL)
5433			return;
5434	} else {
5435		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5436			return;
5437		m0 = *m;
5438	}
5439
5440	if (m0->m_len < sizeof(struct ip6_hdr))
5441		panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5442	ip6 = mtod(m0, struct ip6_hdr *);
5443
5444	ro = &ip6route;
5445	bzero((caddr_t)ro, sizeof(*ro));
5446	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5447	dst->sin6_family = AF_INET6;
5448	dst->sin6_len = sizeof(*dst);
5449	dst->sin6_addr = ip6->ip6_dst;
5450
5451	/* Cheat. */
5452	if (r->rt == PF_FASTROUTE) {
5453#ifdef __FreeBSD__
5454		m0->m_flags |= M_SKIP_FIREWALL;
5455		PF_UNLOCK();
5456		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5457		PF_LOCK();
5458#else
5459		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5460		if (mtag == NULL)
5461			goto bad;
5462		m_tag_prepend(m0, mtag);
5463		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5464#endif
5465		return;
5466	}
5467
5468	if (TAILQ_EMPTY(&r->rpool.list))
5469		panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)");
5470	if (s == NULL) {
5471		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5472		    &naddr, NULL, &sn);
5473		if (!PF_AZERO(&naddr, AF_INET6))
5474			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5475			    &naddr, AF_INET6);
5476		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5477	} else {
5478		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5479			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5480			    &s->rt_addr, AF_INET6);
5481		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5482	}
5483
5484	if (ifp == NULL)
5485		goto bad;
5486
5487	if (oifp != ifp) {
5488		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5489		if (mtag == NULL) {
5490			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5491			if (mtag == NULL)
5492				goto bad;
5493			m_tag_prepend(m0, mtag);
5494#ifdef __FreeBSD__
5495			PF_UNLOCK();
5496			if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) {
5497				PF_LOCK();
5498				goto bad;
5499			} else if (m0 == NULL) {
5500				PF_LOCK();
5501				goto done;
5502			}
5503			PF_LOCK();
5504#else
5505			if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS)
5506				goto bad;
5507			else if (m0 == NULL)
5508				goto done;
5509#endif
5510		}
5511	}
5512
5513	/*
5514	 * If the packet is too large for the outgoing interface,
5515	 * send back an icmp6 error.
5516	 */
5517	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5518		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5519	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5520#ifdef __FreeBSD__
5521		PF_UNLOCK();
5522#endif
5523		error = nd6_output(ifp, ifp, m0, dst, NULL);
5524#ifdef __FreeBSD__
5525		PF_LOCK();
5526#endif
5527	} else {
5528		in6_ifstat_inc(ifp, ifs6_in_toobig);
5529#ifdef __FreeBSD__
5530		if (r->rt != PF_DUPTO) {
5531			PF_UNLOCK();
5532			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5533			PF_LOCK();
5534		 } else
5535#else
5536		if (r->rt != PF_DUPTO)
5537			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5538		else
5539#endif
5540			goto bad;
5541	}
5542
5543done:
5544	if (r->rt != PF_DUPTO)
5545		*m = NULL;
5546	return;
5547
5548bad:
5549	m_freem(m0);
5550	goto done;
5551}
5552#endif /* INET6 */
5553
5554
5555#ifdef __FreeBSD__
5556/*
5557 * XXX
5558 * FreeBSD supports cksum offload for the following drivers.
5559 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
5560 * If we can make full use of it we would outperform ipfw/ipfilter in
5561 * very heavy traffic.
5562 * I have not tested 'cause I don't have NICs that supports cksum offload.
5563 * (There might be problems. Typical phenomena would be
5564 *   1. No route message for UDP packet.
5565 *   2. No connection acceptance from external hosts regardless of rule set.)
5566 */
5567int
5568pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5569{
5570	u_int16_t sum = 0;
5571	int hw_assist = 0;
5572	struct ip *ip;
5573
5574	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5575		return (1);
5576	if (m->m_pkthdr.len < off + len)
5577		return (1);
5578
5579	switch (p) {
5580	case IPPROTO_TCP:
5581		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5582			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5583				sum = m->m_pkthdr.csum_data;
5584			} else {
5585				ip = mtod(m, struct ip *);
5586				sum = in_pseudo(ip->ip_src.s_addr,
5587					ip->ip_dst.s_addr,
5588					htonl(m->m_pkthdr.csum_data +
5589					    IPPROTO_TCP) + ip->ip_len);
5590			}
5591			sum ^= 0xffff;
5592			++hw_assist;
5593		}
5594		break;
5595	case IPPROTO_UDP:
5596		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5597			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5598				sum = m->m_pkthdr.csum_data;
5599			} else {
5600				ip = mtod(m, struct ip *);
5601				sum = in_pseudo(ip->ip_src.s_addr,
5602					ip->ip_dst.s_addr, htonl((u_short)len +
5603					m->m_pkthdr.csum_data + IPPROTO_UDP));
5604			}
5605			sum ^= 0xffff;
5606			++hw_assist;
5607                }
5608		break;
5609	case IPPROTO_ICMP:
5610#ifdef INET6
5611	case IPPROTO_ICMPV6:
5612#endif /* INET6 */
5613		break;
5614	default:
5615		return (1);
5616	}
5617
5618	if (!hw_assist) {
5619		switch (af) {
5620		case AF_INET:
5621			if (p == IPPROTO_ICMP) {
5622				if (m->m_len < off)
5623					return (1);
5624				m->m_data += off;
5625				m->m_len -= off;
5626				sum = in_cksum(m, len);
5627				m->m_data -= off;
5628				m->m_len += off;
5629			} else {
5630				if (m->m_len < sizeof(struct ip))
5631					return (1);
5632				sum = in4_cksum(m, p, off, len);
5633				if (sum == 0) {
5634					m->m_pkthdr.csum_flags |=
5635					    (CSUM_DATA_VALID |
5636					     CSUM_PSEUDO_HDR);
5637					m->m_pkthdr.csum_data = 0xffff;
5638				}
5639			}
5640			break;
5641#ifdef INET6
5642		case AF_INET6:
5643			if (m->m_len < sizeof(struct ip6_hdr))
5644				return (1);
5645			sum = in6_cksum(m, p, off, len);
5646			/*
5647			 * XXX
5648			 * IPv6 H/W cksum off-load not supported yet!
5649			 *
5650			 * if (sum == 0) {
5651			 *	m->m_pkthdr.csum_flags |=
5652			 *	    (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
5653			 *	m->m_pkthdr.csum_data = 0xffff;
5654			 *}
5655			 */
5656			break;
5657#endif /* INET6 */
5658		default:
5659			return (1);
5660		}
5661	}
5662	if (sum) {
5663		switch (p) {
5664		case IPPROTO_TCP:
5665			tcpstat.tcps_rcvbadsum++;
5666			break;
5667		case IPPROTO_UDP:
5668			udpstat.udps_badsum++;
5669			break;
5670		case IPPROTO_ICMP:
5671			icmpstat.icps_checksum++;
5672			break;
5673#ifdef INET6
5674		case IPPROTO_ICMPV6:
5675			icmp6stat.icp6s_checksum++;
5676			break;
5677#endif /* INET6 */
5678		}
5679		return (1);
5680	}
5681	return (0);
5682}
5683#else
5684/*
5685 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5686 *   off is the offset where the protocol header starts
5687 *   len is the total length of protocol header plus payload
5688 * returns 0 when the checksum is valid, otherwise returns 1.
5689 */
5690int
5691pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5692    sa_family_t af)
5693{
5694	u_int16_t flag_ok, flag_bad;
5695	u_int16_t sum;
5696
5697	switch (p) {
5698	case IPPROTO_TCP:
5699		flag_ok = M_TCP_CSUM_IN_OK;
5700		flag_bad = M_TCP_CSUM_IN_BAD;
5701		break;
5702	case IPPROTO_UDP:
5703		flag_ok = M_UDP_CSUM_IN_OK;
5704		flag_bad = M_UDP_CSUM_IN_BAD;
5705		break;
5706	case IPPROTO_ICMP:
5707#ifdef INET6
5708	case IPPROTO_ICMPV6:
5709#endif /* INET6 */
5710		flag_ok = flag_bad = 0;
5711		break;
5712	default:
5713		return (1);
5714	}
5715	if (m->m_pkthdr.csum & flag_ok)
5716		return (0);
5717	if (m->m_pkthdr.csum & flag_bad)
5718		return (1);
5719	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5720		return (1);
5721	if (m->m_pkthdr.len < off + len)
5722		return (1);
5723		switch (af) {
5724	case AF_INET:
5725		if (p == IPPROTO_ICMP) {
5726			if (m->m_len < off)
5727				return (1);
5728			m->m_data += off;
5729			m->m_len -= off;
5730			sum = in_cksum(m, len);
5731			m->m_data -= off;
5732			m->m_len += off;
5733		} else {
5734			if (m->m_len < sizeof(struct ip))
5735				return (1);
5736			sum = in4_cksum(m, p, off, len);
5737		}
5738		break;
5739#ifdef INET6
5740	case AF_INET6:
5741		if (m->m_len < sizeof(struct ip6_hdr))
5742			return (1);
5743		sum = in6_cksum(m, p, off, len);
5744		break;
5745#endif /* INET6 */
5746	default:
5747		return (1);
5748	}
5749	if (sum) {
5750		m->m_pkthdr.csum |= flag_bad;
5751		switch (p) {
5752		case IPPROTO_TCP:
5753			tcpstat.tcps_rcvbadsum++;
5754			break;
5755		case IPPROTO_UDP:
5756			udpstat.udps_badsum++;
5757			break;
5758		case IPPROTO_ICMP:
5759			icmpstat.icps_checksum++;
5760			break;
5761#ifdef INET6
5762		case IPPROTO_ICMPV6:
5763			icmp6stat.icp6s_checksum++;
5764			break;
5765#endif /* INET6 */
5766		}
5767		return (1);
5768	}
5769	m->m_pkthdr.csum |= flag_ok;
5770	return (0);
5771}
5772#endif
5773
5774static int
5775pf_add_mbuf_tag(struct mbuf *m, u_int tag)
5776{
5777	struct m_tag *mtag;
5778
5779	if (m_tag_find(m, tag, NULL) != NULL)
5780		return (0);
5781	mtag = m_tag_get(tag, 0, M_NOWAIT);
5782	if (mtag == NULL)
5783		return (1);
5784	m_tag_prepend(m, mtag);
5785	return (0);
5786}
5787
5788#ifdef INET
5789int
5790pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
5791{
5792	struct pfi_kif		*kif;
5793	u_short			 action, reason = 0, log = 0;
5794	struct mbuf		*m = *m0;
5795	struct ip		*h = NULL;	/* make the compiler happy */
5796	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
5797	struct pf_state		*s = NULL;
5798	struct pf_ruleset	*ruleset = NULL;
5799	struct pf_pdesc		 pd;
5800	int			 off, dirndx, pqid = 0;
5801
5802#ifdef __FreeBSD__
5803	PF_LOCK();
5804#endif
5805	if (!pf_status.running ||
5806#ifdef __FreeBSD__
5807	    (m->m_flags & M_SKIP_FIREWALL)) {
5808		PF_UNLOCK();
5809#else
5810	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5811#endif
5812	    	return (PF_PASS);
5813	}
5814
5815	kif = pfi_index2kif[ifp->if_index];
5816	if (kif == NULL) {
5817#ifdef __FreeBSD__
5818		PF_UNLOCK();
5819#endif
5820		return (PF_DROP);
5821	}
5822
5823#ifdef __FreeBSD__
5824	M_ASSERTPKTHDR(m);
5825#else
5826#ifdef DIAGNOSTIC
5827	if ((m->m_flags & M_PKTHDR) == 0)
5828		panic("non-M_PKTHDR is passed to pf_test");
5829#endif
5830#endif
5831
5832	memset(&pd, 0, sizeof(pd));
5833	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5834		action = PF_DROP;
5835		REASON_SET(&reason, PFRES_SHORT);
5836		log = 1;
5837		goto done;
5838	}
5839
5840	/* We do IP header normalization and packet reassembly here */
5841	if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) {
5842		action = PF_DROP;
5843		goto done;
5844	}
5845	m = *m0;
5846	h = mtod(m, struct ip *);
5847
5848	off = h->ip_hl << 2;
5849	if (off < (int)sizeof(*h)) {
5850		action = PF_DROP;
5851		REASON_SET(&reason, PFRES_SHORT);
5852		log = 1;
5853		goto done;
5854	}
5855
5856	pd.src = (struct pf_addr *)&h->ip_src;
5857	pd.dst = (struct pf_addr *)&h->ip_dst;
5858	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5859	pd.ip_sum = &h->ip_sum;
5860	pd.proto = h->ip_p;
5861	pd.af = AF_INET;
5862	pd.tos = h->ip_tos;
5863	pd.tot_len = ntohs(h->ip_len);
5864
5865	/* handle fragments that didn't get reassembled by normalization */
5866	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5867		action = pf_test_fragment(&r, dir, kif, m, h,
5868		    &pd, &a, &ruleset);
5869		goto done;
5870	}
5871
5872	switch (h->ip_p) {
5873
5874	case IPPROTO_TCP: {
5875		struct tcphdr	th;
5876
5877		pd.hdr.tcp = &th;
5878		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5879		    &action, &reason, AF_INET)) {
5880			log = action != PF_PASS;
5881			goto done;
5882		}
5883		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5884		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
5885			action = PF_DROP;
5886			goto done;
5887		}
5888		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5889		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5890			pqid = 1;
5891		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5892		if (action == PF_DROP)
5893			goto done;
5894		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5895		    &reason);
5896		if (action == PF_PASS) {
5897#if NPFSYNC
5898			pfsync_update_state(s);
5899#endif
5900			r = s->rule.ptr;
5901			a = s->anchor.ptr;
5902			log = s->log;
5903		} else if (s == NULL)
5904			action = pf_test_tcp(&r, &s, dir, kif,
5905			    m, off, h, &pd, &a, &ruleset);
5906		break;
5907	}
5908
5909	case IPPROTO_UDP: {
5910		struct udphdr	uh;
5911
5912		pd.hdr.udp = &uh;
5913		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5914		    &action, &reason, AF_INET)) {
5915			log = action != PF_PASS;
5916			goto done;
5917		}
5918		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5919		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
5920			action = PF_DROP;
5921			goto done;
5922		}
5923		if (uh.uh_dport == 0 ||
5924		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5925		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5926			action = PF_DROP;
5927			goto done;
5928		}
5929		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5930		if (action == PF_PASS) {
5931#if NPFSYNC
5932			pfsync_update_state(s);
5933#endif
5934			r = s->rule.ptr;
5935			a = s->anchor.ptr;
5936			log = s->log;
5937		} else if (s == NULL)
5938			action = pf_test_udp(&r, &s, dir, kif,
5939			    m, off, h, &pd, &a, &ruleset);
5940		break;
5941	}
5942
5943	case IPPROTO_ICMP: {
5944		struct icmp	ih;
5945
5946		pd.hdr.icmp = &ih;
5947		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5948		    &action, &reason, AF_INET)) {
5949			log = action != PF_PASS;
5950			goto done;
5951		}
5952		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5953		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
5954			action = PF_DROP;
5955			goto done;
5956		}
5957		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd);
5958		if (action == PF_PASS) {
5959#if NPFSYNC
5960			pfsync_update_state(s);
5961#endif
5962			r = s->rule.ptr;
5963			a = s->anchor.ptr;
5964			log = s->log;
5965		} else if (s == NULL)
5966			action = pf_test_icmp(&r, &s, dir, kif,
5967			    m, off, h, &pd, &a, &ruleset);
5968		break;
5969	}
5970
5971	default:
5972		action = pf_test_state_other(&s, dir, kif, &pd);
5973		if (action == PF_PASS) {
5974#if NPFSYNC
5975			pfsync_update_state(s);
5976#endif
5977			r = s->rule.ptr;
5978			a = s->anchor.ptr;
5979			log = s->log;
5980		} else if (s == NULL)
5981			action = pf_test_other(&r, &s, dir, kif, m, off, h,
5982			    &pd, &a, &ruleset);
5983		break;
5984	}
5985
5986done:
5987	if (action == PF_PASS && h->ip_hl > 5 &&
5988	    !((s && s->allow_opts) || r->allow_opts)) {
5989		action = PF_DROP;
5990		REASON_SET(&reason, PFRES_SHORT);
5991		log = 1;
5992		DPFPRINTF(PF_DEBUG_MISC,
5993		    ("pf: dropping packet with ip options\n"));
5994	}
5995
5996#ifdef ALTQ
5997	if (action == PF_PASS && r->qid) {
5998		struct m_tag	*mtag;
5999		struct altq_tag	*atag;
6000
6001		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6002		if (mtag != NULL) {
6003			atag = (struct altq_tag *)(mtag + 1);
6004			if (pqid || pd.tos == IPTOS_LOWDELAY)
6005				atag->qid = r->pqid;
6006			else
6007				atag->qid = r->qid;
6008			/* add hints for ecn */
6009			atag->af = AF_INET;
6010			atag->hdr = h;
6011			m_tag_prepend(m, mtag);
6012		}
6013	}
6014#endif
6015
6016	/*
6017	 * connections redirected to loopback should not match sockets
6018	 * bound specifically to loopback due to security implications,
6019	 * see tcp_input() and in_pcblookup_listen().
6020	 */
6021	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6022	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6023	    (s->nat_rule.ptr->action == PF_RDR ||
6024	    s->nat_rule.ptr->action == PF_BINAT) &&
6025	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
6026	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6027		action = PF_DROP;
6028		REASON_SET(&reason, PFRES_MEMORY);
6029	}
6030
6031	if (log)
6032		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset);
6033
6034	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6035	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6036
6037	if (action == PF_PASS || r->action == PF_DROP) {
6038		r->packets++;
6039		r->bytes += pd.tot_len;
6040		if (a != NULL) {
6041			a->packets++;
6042			a->bytes += pd.tot_len;
6043		}
6044		if (s != NULL) {
6045			dirndx = (dir == s->direction) ? 0 : 1;
6046			s->packets[dirndx]++;
6047			s->bytes[dirndx] += pd.tot_len;
6048			if (s->nat_rule.ptr != NULL) {
6049				s->nat_rule.ptr->packets++;
6050				s->nat_rule.ptr->bytes += pd.tot_len;
6051			}
6052			if (s->src_node != NULL) {
6053				s->src_node->packets++;
6054				s->src_node->bytes += pd.tot_len;
6055			}
6056			if (s->nat_src_node != NULL) {
6057				s->nat_src_node->packets++;
6058				s->nat_src_node->bytes += pd.tot_len;
6059			}
6060		}
6061		tr = r;
6062		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6063		if (nr != NULL) {
6064			struct pf_addr *x;
6065			/*
6066			 * XXX: we need to make sure that the addresses
6067			 * passed to pfr_update_stats() are the same than
6068			 * the addresses used during matching (pfr_match)
6069			 */
6070			if (r == &pf_default_rule) {
6071				tr = nr;
6072				x = (s == NULL || s->direction == dir) ?
6073				    &pd.baddr : &pd.naddr;
6074			} else
6075				x = (s == NULL || s->direction == dir) ?
6076				    &pd.naddr : &pd.baddr;
6077			if (x == &pd.baddr || s == NULL) {
6078				/* we need to change the address */
6079				if (dir == PF_OUT)
6080					pd.src = x;
6081				else
6082					pd.dst = x;
6083			}
6084		}
6085		if (tr->src.addr.type == PF_ADDR_TABLE)
6086			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6087			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6088			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6089			    tr->src.not);
6090		if (tr->dst.addr.type == PF_ADDR_TABLE)
6091			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6092			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6093			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6094			    tr->dst.not);
6095	}
6096
6097
6098	if (action == PF_SYNPROXY_DROP) {
6099		m_freem(*m0);
6100		*m0 = NULL;
6101		action = PF_PASS;
6102	} else if (r->rt)
6103		/* pf_route can free the mbuf causing *m0 to become NULL */
6104		pf_route(m0, r, dir, ifp, s);
6105
6106#ifdef __FreeBSD__
6107	PF_UNLOCK();
6108#endif
6109
6110	return (action);
6111}
6112#endif /* INET */
6113
6114#ifdef INET6
6115int
6116pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0)
6117{
6118	struct pfi_kif		*kif;
6119	u_short			 action, reason = 0, log = 0;
6120	struct mbuf		*m = *m0;
6121	struct ip6_hdr		*h = NULL;	/* make the compiler happy */
6122	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6123	struct pf_state		*s = NULL;
6124	struct pf_ruleset	*ruleset = NULL;
6125	struct pf_pdesc		 pd;
6126	int			 off, terminal = 0, dirndx;
6127
6128#ifdef __FreeBSD__
6129	PF_LOCK();
6130#endif
6131
6132	if (!pf_status.running ||
6133#ifdef __FreeBSD__
6134	    (m->m_flags & M_SKIP_FIREWALL)) {
6135		PF_UNLOCK();
6136#else
6137	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
6138#endif
6139		return (PF_PASS);
6140	}
6141
6142	kif = pfi_index2kif[ifp->if_index];
6143	if (kif == NULL) {
6144#ifdef __FreeBSD__
6145		PF_UNLOCK();
6146#endif
6147		return (PF_DROP);
6148	}
6149
6150#ifdef __FreeBSD__
6151	M_ASSERTPKTHDR(m);
6152#else
6153#ifdef DIAGNOSTIC
6154	if ((m->m_flags & M_PKTHDR) == 0)
6155		panic("non-M_PKTHDR is passed to pf_test");
6156#endif
6157#endif
6158
6159	memset(&pd, 0, sizeof(pd));
6160	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6161		action = PF_DROP;
6162		REASON_SET(&reason, PFRES_SHORT);
6163		log = 1;
6164		goto done;
6165	}
6166
6167	/* We do IP header normalization and packet reassembly here */
6168	if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) {
6169		action = PF_DROP;
6170		goto done;
6171	}
6172	m = *m0;
6173	h = mtod(m, struct ip6_hdr *);
6174
6175	pd.src = (struct pf_addr *)&h->ip6_src;
6176	pd.dst = (struct pf_addr *)&h->ip6_dst;
6177	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6178	pd.ip_sum = NULL;
6179	pd.af = AF_INET6;
6180	pd.tos = 0;
6181	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6182
6183	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6184	pd.proto = h->ip6_nxt;
6185	do {
6186		switch (pd.proto) {
6187		case IPPROTO_FRAGMENT:
6188			action = pf_test_fragment(&r, dir, kif, m, h,
6189			    &pd, &a, &ruleset);
6190			if (action == PF_DROP)
6191				REASON_SET(&reason, PFRES_FRAG);
6192			goto done;
6193		case IPPROTO_AH:
6194		case IPPROTO_HOPOPTS:
6195		case IPPROTO_ROUTING:
6196		case IPPROTO_DSTOPTS: {
6197			/* get next header and header length */
6198			struct ip6_ext	opt6;
6199
6200			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6201			    NULL, NULL, pd.af)) {
6202				DPFPRINTF(PF_DEBUG_MISC,
6203				    ("pf: IPv6 short opt\n"));
6204				action = PF_DROP;
6205				REASON_SET(&reason, PFRES_SHORT);
6206				log = 1;
6207				goto done;
6208			}
6209			if (pd.proto == IPPROTO_AH)
6210				off += (opt6.ip6e_len + 2) * 4;
6211			else
6212				off += (opt6.ip6e_len + 1) * 8;
6213			pd.proto = opt6.ip6e_nxt;
6214			/* goto the next header */
6215			break;
6216		}
6217		default:
6218			terminal++;
6219			break;
6220		}
6221	} while (!terminal);
6222
6223	switch (pd.proto) {
6224
6225	case IPPROTO_TCP: {
6226		struct tcphdr	th;
6227
6228		pd.hdr.tcp = &th;
6229		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6230		    &action, &reason, AF_INET6)) {
6231			log = action != PF_PASS;
6232			goto done;
6233		}
6234		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6235		    ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) {
6236			action = PF_DROP;
6237			goto done;
6238		}
6239		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6240		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6241		if (action == PF_DROP)
6242			goto done;
6243		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6244		    &reason);
6245		if (action == PF_PASS) {
6246#if NPFSYNC
6247			pfsync_update_state(s);
6248#endif
6249			r = s->rule.ptr;
6250			a = s->anchor.ptr;
6251			log = s->log;
6252		} else if (s == NULL)
6253			action = pf_test_tcp(&r, &s, dir, kif,
6254			    m, off, h, &pd, &a, &ruleset);
6255		break;
6256	}
6257
6258	case IPPROTO_UDP: {
6259		struct udphdr	uh;
6260
6261		pd.hdr.udp = &uh;
6262		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6263		    &action, &reason, AF_INET6)) {
6264			log = action != PF_PASS;
6265			goto done;
6266		}
6267		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6268		    off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) {
6269			action = PF_DROP;
6270			goto done;
6271		}
6272		if (uh.uh_dport == 0 ||
6273		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6274		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6275			action = PF_DROP;
6276			goto done;
6277		}
6278		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6279		if (action == PF_PASS) {
6280#if NPFSYNC
6281			pfsync_update_state(s);
6282#endif
6283			r = s->rule.ptr;
6284			a = s->anchor.ptr;
6285			log = s->log;
6286		} else if (s == NULL)
6287			action = pf_test_udp(&r, &s, dir, kif,
6288			    m, off, h, &pd, &a, &ruleset);
6289		break;
6290	}
6291
6292	case IPPROTO_ICMPV6: {
6293		struct icmp6_hdr	ih;
6294
6295		pd.hdr.icmp6 = &ih;
6296		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6297		    &action, &reason, AF_INET6)) {
6298			log = action != PF_PASS;
6299			goto done;
6300		}
6301		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6302		    ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) {
6303			action = PF_DROP;
6304			goto done;
6305		}
6306		action = pf_test_state_icmp(&s, dir, kif,
6307		    m, off, h, &pd);
6308		if (action == PF_PASS) {
6309#if NPFSYNC
6310			pfsync_update_state(s);
6311#endif
6312			r = s->rule.ptr;
6313			a = s->anchor.ptr;
6314			log = s->log;
6315		} else if (s == NULL)
6316			action = pf_test_icmp(&r, &s, dir, kif,
6317			    m, off, h, &pd, &a, &ruleset);
6318		break;
6319	}
6320
6321	default:
6322		action = pf_test_state_other(&s, dir, kif, &pd);
6323		if (action == PF_PASS) {
6324			r = s->rule.ptr;
6325			a = s->anchor.ptr;
6326			log = s->log;
6327		} else if (s == NULL)
6328			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6329			    &pd, &a, &ruleset);
6330		break;
6331	}
6332
6333done:
6334	/* XXX handle IPv6 options, if not allowed. not implemented. */
6335
6336#ifdef ALTQ
6337	if (action == PF_PASS && r->qid) {
6338		struct m_tag	*mtag;
6339		struct altq_tag	*atag;
6340
6341		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6342		if (mtag != NULL) {
6343			atag = (struct altq_tag *)(mtag + 1);
6344			if (pd.tos == IPTOS_LOWDELAY)
6345				atag->qid = r->pqid;
6346			else
6347				atag->qid = r->qid;
6348			/* add hints for ecn */
6349			atag->af = AF_INET6;
6350			atag->hdr = h;
6351			m_tag_prepend(m, mtag);
6352		}
6353	}
6354#endif
6355
6356	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6357	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6358	    (s->nat_rule.ptr->action == PF_RDR ||
6359	    s->nat_rule.ptr->action == PF_BINAT) &&
6360	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) &&
6361	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6362		action = PF_DROP;
6363		REASON_SET(&reason, PFRES_MEMORY);
6364	}
6365
6366	if (log)
6367		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset);
6368
6369	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6370	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6371
6372	if (action == PF_PASS || r->action == PF_DROP) {
6373		r->packets++;
6374		r->bytes += pd.tot_len;
6375		if (a != NULL) {
6376			a->packets++;
6377			a->bytes += pd.tot_len;
6378		}
6379		if (s != NULL) {
6380			dirndx = (dir == s->direction) ? 0 : 1;
6381			s->packets[dirndx]++;
6382			s->bytes[dirndx] += pd.tot_len;
6383			if (s->nat_rule.ptr != NULL) {
6384				s->nat_rule.ptr->packets++;
6385				s->nat_rule.ptr->bytes += pd.tot_len;
6386			}
6387			if (s->src_node != NULL) {
6388				s->src_node->packets++;
6389				s->src_node->bytes += pd.tot_len;
6390			}
6391			if (s->nat_src_node != NULL) {
6392				s->nat_src_node->packets++;
6393				s->nat_src_node->bytes += pd.tot_len;
6394			}
6395		}
6396		tr = r;
6397		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6398		if (nr != NULL) {
6399			struct pf_addr *x;
6400			/*
6401			 * XXX: we need to make sure that the addresses
6402			 * passed to pfr_update_stats() are the same than
6403			 * the addresses used during matching (pfr_match)
6404			 */
6405			if (r == &pf_default_rule) {
6406				tr = nr;
6407				x = (s == NULL || s->direction == dir) ?
6408				    &pd.baddr : &pd.naddr;
6409			} else {
6410				x = (s == NULL || s->direction == dir) ?
6411				    &pd.naddr : &pd.baddr;
6412			}
6413			if (x == &pd.baddr || s == NULL) {
6414				if (dir == PF_OUT)
6415					pd.src = x;
6416				else
6417					pd.dst = x;
6418			}
6419		}
6420		if (tr->src.addr.type == PF_ADDR_TABLE)
6421			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6422			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6423			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6424			    tr->src.not);
6425		if (tr->dst.addr.type == PF_ADDR_TABLE)
6426			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6427			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6428			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6429			    tr->dst.not);
6430	}
6431
6432
6433	if (action == PF_SYNPROXY_DROP) {
6434		m_freem(*m0);
6435		*m0 = NULL;
6436		action = PF_PASS;
6437	} else if (r->rt)
6438		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6439		pf_route6(m0, r, dir, ifp, s);
6440
6441#ifdef __FreeBSD__
6442	PF_UNLOCK();
6443#endif
6444	return (action);
6445}
6446#endif /* INET6 */
6447