pf.c revision 132303
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 132303 2004-07-17 17:15:15Z mlaier $	*/
2/*	$OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * Copyright (c) 2002,2003 Henning Brauer
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 */
38
39#ifdef __FreeBSD__
40#include "opt_inet.h"
41#include "opt_inet6.h"
42#endif
43
44#ifdef __FreeBSD__
45#include "opt_bpf.h"
46#include "opt_pf.h"
47#define	NBPFILTER	DEV_BPF
48#define	NPFLOG		DEV_PFLOG
49#define	NPFSYNC		DEV_PFSYNC
50#else
51#include "bpfilter.h"
52#include "pflog.h"
53#include "pfsync.h"
54#endif
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/mbuf.h>
59#include <sys/filio.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/kernel.h>
63#include <sys/time.h>
64#ifdef __FreeBSD__
65#include <sys/sysctl.h>
66#include <sys/endian.h>
67#else
68#include <sys/pool.h>
69#endif
70
71#include <net/if.h>
72#include <net/if_types.h>
73#include <net/bpf.h>
74#include <net/route.h>
75
76#include <netinet/in.h>
77#include <netinet/in_var.h>
78#include <netinet/in_systm.h>
79#include <netinet/ip.h>
80#include <netinet/ip_var.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_seq.h>
83#include <netinet/udp.h>
84#include <netinet/ip_icmp.h>
85#include <netinet/in_pcb.h>
86#include <netinet/tcp_timer.h>
87#include <netinet/tcp_var.h>
88#include <netinet/udp_var.h>
89#include <netinet/icmp_var.h>
90
91#ifndef __FreeBSD__
92#include <dev/rndvar.h>
93#endif
94#include <net/pfvar.h>
95#include <net/if_pflog.h>
96
97#if NPFSYNC > 0
98#include <net/if_pfsync.h>
99#endif /* NPFSYNC > 0 */
100
101#ifdef INET6
102#include <netinet/ip6.h>
103#include <netinet/in_pcb.h>
104#include <netinet/icmp6.h>
105#include <netinet6/nd6.h>
106#ifdef __FreeBSD__
107#include <netinet6/ip6_var.h>
108#include <netinet6/in6_pcb.h>
109#endif
110#endif /* INET6 */
111
112#ifdef __FreeBSD__
113#include <machine/in_cksum.h>
114#include <sys/limits.h>
115#include <sys/ucred.h>
116
117extern int ip_optcopy(struct ip *, struct ip *);
118#endif
119
120#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
121
122/*
123 * Global variables
124 */
125
126struct pf_anchorqueue	 pf_anchors;
127struct pf_ruleset	 pf_main_ruleset;
128struct pf_altqqueue	 pf_altqs[2];
129struct pf_palist	 pf_pabuf;
130struct pf_altqqueue	*pf_altqs_active;
131struct pf_altqqueue	*pf_altqs_inactive;
132struct pf_status	 pf_status;
133
134u_int32_t		 ticket_altqs_active;
135u_int32_t		 ticket_altqs_inactive;
136int			 altqs_inactive_open;
137u_int32_t		 ticket_pabuf;
138
139#ifdef __FreeBSD__
140struct callout	 	 pf_expire_to;			/* expire timeout */
141#else
142struct timeout		 pf_expire_to;			/* expire timeout */
143#endif
144
145
146#ifdef __FreeBSD__
147uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
148uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
149#else
150struct pool		 pf_src_tree_pl, pf_rule_pl;
151struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
152#endif
153
154void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
155void			 pf_print_state(struct pf_state *);
156void			 pf_print_flags(u_int8_t);
157
158u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
159			    u_int8_t);
160void			 pf_change_ap(struct pf_addr *, u_int16_t *,
161			    u_int16_t *, u_int16_t *, struct pf_addr *,
162			    u_int16_t, u_int8_t, sa_family_t);
163#ifdef INET6
164void			 pf_change_a6(struct pf_addr *, u_int16_t *,
165			    struct pf_addr *, u_int8_t);
166#endif /* INET6 */
167void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
168			    struct pf_addr *, struct pf_addr *, u_int16_t,
169			    u_int16_t *, u_int16_t *, u_int16_t *,
170			    u_int16_t *, u_int8_t, sa_family_t);
171void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
172			    const struct pf_addr *, const struct pf_addr *,
173			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
174			    u_int8_t, u_int16_t, u_int16_t, u_int8_t);
175void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
176			    sa_family_t, struct pf_rule *);
177struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
178			    int, int, struct pfi_kif *,
179			    struct pf_addr *, u_int16_t, struct pf_addr *,
180			    u_int16_t, int);
181struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
182			    int, int, struct pfi_kif *, struct pf_src_node **,
183			    struct pf_addr *, u_int16_t,
184			    struct pf_addr *, u_int16_t,
185			    struct pf_addr *, u_int16_t *);
186int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
187			    int, struct pfi_kif *, struct mbuf *, int,
188			    void *, struct pf_pdesc *, struct pf_rule **,
189			    struct pf_ruleset **);
190int			 pf_test_udp(struct pf_rule **, struct pf_state **,
191			    int, struct pfi_kif *, struct mbuf *, int,
192			    void *, struct pf_pdesc *, struct pf_rule **,
193			    struct pf_ruleset **);
194int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
195			    int, struct pfi_kif *, struct mbuf *, int,
196			    void *, struct pf_pdesc *, struct pf_rule **,
197			    struct pf_ruleset **);
198int			 pf_test_other(struct pf_rule **, struct pf_state **,
199			    int, struct pfi_kif *, struct mbuf *, int, void *,
200			    struct pf_pdesc *, struct pf_rule **,
201			    struct pf_ruleset **);
202int			 pf_test_fragment(struct pf_rule **, int,
203			    struct pfi_kif *, struct mbuf *, void *,
204			    struct pf_pdesc *, struct pf_rule **,
205			    struct pf_ruleset **);
206int			 pf_test_state_tcp(struct pf_state **, int,
207			    struct pfi_kif *, struct mbuf *, int,
208			    void *, struct pf_pdesc *, u_short *);
209int			 pf_test_state_udp(struct pf_state **, int,
210			    struct pfi_kif *, struct mbuf *, int,
211			    void *, struct pf_pdesc *);
212int			 pf_test_state_icmp(struct pf_state **, int,
213			    struct pfi_kif *, struct mbuf *, int,
214			    void *, struct pf_pdesc *);
215int			 pf_test_state_other(struct pf_state **, int,
216			    struct pfi_kif *, struct pf_pdesc *);
217struct pf_tag		*pf_get_tag(struct mbuf *);
218int			 pf_match_tag(struct mbuf *, struct pf_rule *,
219			     struct pf_rule *, struct pf_tag *, int *);
220void			 pf_hash(struct pf_addr *, struct pf_addr *,
221			    struct pf_poolhashkey *, sa_family_t);
222int			 pf_map_addr(u_int8_t, struct pf_rule *,
223			    struct pf_addr *, struct pf_addr *,
224			    struct pf_addr *, struct pf_src_node **);
225int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
226			    struct pf_addr *, struct pf_addr *, u_int16_t,
227			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
228			    struct pf_src_node **);
229void			 pf_route(struct mbuf **, struct pf_rule *, int,
230			    struct ifnet *, struct pf_state *);
231void			 pf_route6(struct mbuf **, struct pf_rule *, int,
232			    struct ifnet *, struct pf_state *);
233int			 pf_socket_lookup(uid_t *, gid_t *,
234			    int, struct pf_pdesc *);
235u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
236			    sa_family_t);
237u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
238			    sa_family_t);
239u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
240				u_int16_t);
241void			 pf_set_rt_ifp(struct pf_state *,
242			    struct pf_addr *);
243int			 pf_check_proto_cksum(struct mbuf *, int, int,
244			    u_int8_t, sa_family_t);
245int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
246			    struct pf_addr_wrap *);
247static int		 pf_add_mbuf_tag(struct mbuf *, u_int);
248struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
249			    struct pf_state *, u_int8_t);
250
251#ifdef __FreeBSD__
252int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
253
254struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
255#else
256struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
257	{ &pf_state_pl, PFSTATE_HIWAT },
258	{ &pf_src_tree_pl, PFSNODE_HIWAT },
259	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT }
260};
261#endif
262
263#define STATE_LOOKUP()							\
264	do {								\
265		if (direction == PF_IN)					\
266			*state = pf_find_state_recurse(		\
267			    kif, &key, PF_EXT_GWY);			\
268		else							\
269			*state = pf_find_state_recurse(		\
270			    kif, &key, PF_LAN_EXT);			\
271		if (*state == NULL)					\
272			return (PF_DROP);				\
273		if (direction == PF_OUT &&				\
274		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
275		    (*state)->rule.ptr->direction == PF_OUT) ||		\
276		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
277		    (*state)->rule.ptr->direction == PF_IN)) &&		\
278		    (*state)->rt_kif != NULL &&				\
279		    (*state)->rt_kif != kif)				\
280			return (PF_PASS);				\
281	} while (0)
282
283#define	STATE_TRANSLATE(s) \
284	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
285	((s)->af == AF_INET6 && \
286	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
287	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
288	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
289	(s)->lan.port != (s)->gwy.port
290
291#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) :   \
292	((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent :	       \
293	(k)->pfik_parent->pfik_parent)
294
295static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
296static __inline int pf_state_compare_lan_ext(struct pf_state *,
297	struct pf_state *);
298static __inline int pf_state_compare_ext_gwy(struct pf_state *,
299	struct pf_state *);
300static __inline int pf_state_compare_id(struct pf_state *,
301	struct pf_state *);
302
303struct pf_src_tree tree_src_tracking;
304
305struct pf_state_tree_id tree_id;
306struct pf_state_queue state_updates;
307
308RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
309RB_GENERATE(pf_state_tree_lan_ext, pf_state,
310    u.s.entry_lan_ext, pf_state_compare_lan_ext);
311RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
312    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
313RB_GENERATE(pf_state_tree_id, pf_state,
314    u.s.entry_id, pf_state_compare_id);
315
316#ifdef __FreeBSD__
317static int
318#else
319static __inline int
320#endif
321pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
322{
323	int	diff;
324
325	if (a->rule.ptr > b->rule.ptr)
326		return (1);
327	if (a->rule.ptr < b->rule.ptr)
328		return (-1);
329	if ((diff = a->af - b->af) != 0)
330		return (diff);
331	switch (a->af) {
332#ifdef INET
333	case AF_INET:
334		if (a->addr.addr32[0] > b->addr.addr32[0])
335			return (1);
336		if (a->addr.addr32[0] < b->addr.addr32[0])
337			return (-1);
338		break;
339#endif /* INET */
340#ifdef INET6
341	case AF_INET6:
342		if (a->addr.addr32[3] > b->addr.addr32[3])
343			return (1);
344		if (a->addr.addr32[3] < b->addr.addr32[3])
345			return (-1);
346		if (a->addr.addr32[2] > b->addr.addr32[2])
347			return (1);
348		if (a->addr.addr32[2] < b->addr.addr32[2])
349			return (-1);
350		if (a->addr.addr32[1] > b->addr.addr32[1])
351			return (1);
352		if (a->addr.addr32[1] < b->addr.addr32[1])
353			return (-1);
354		if (a->addr.addr32[0] > b->addr.addr32[0])
355			return (1);
356		if (a->addr.addr32[0] < b->addr.addr32[0])
357			return (-1);
358		break;
359#endif /* INET6 */
360	}
361	return (0);
362}
363
364#ifdef __FreeBSD__
365static int
366#else
367static __inline int
368#endif
369pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
370{
371	int	diff;
372
373	if ((diff = a->proto - b->proto) != 0)
374		return (diff);
375	if ((diff = a->af - b->af) != 0)
376		return (diff);
377	switch (a->af) {
378#ifdef INET
379	case AF_INET:
380		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
381			return (1);
382		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
383			return (-1);
384		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
385			return (1);
386		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
387			return (-1);
388		break;
389#endif /* INET */
390#ifdef INET6
391	case AF_INET6:
392		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
393			return (1);
394		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
395			return (-1);
396		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
397			return (1);
398		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
399			return (-1);
400		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
401			return (1);
402		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
403			return (-1);
404		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
405			return (1);
406		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
407			return (-1);
408		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
409			return (1);
410		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
411			return (-1);
412		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
413			return (1);
414		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
415			return (-1);
416		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
417			return (1);
418		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
419			return (-1);
420		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
421			return (1);
422		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
423			return (-1);
424		break;
425#endif /* INET6 */
426	}
427
428	if ((diff = a->lan.port - b->lan.port) != 0)
429		return (diff);
430	if ((diff = a->ext.port - b->ext.port) != 0)
431		return (diff);
432
433	return (0);
434}
435
436#ifdef __FreeBSD__
437static int
438#else
439static __inline int
440#endif
441pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
442{
443	int	diff;
444
445	if ((diff = a->proto - b->proto) != 0)
446		return (diff);
447	if ((diff = a->af - b->af) != 0)
448		return (diff);
449	switch (a->af) {
450#ifdef INET
451	case AF_INET:
452		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
453			return (1);
454		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
455			return (-1);
456		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
457			return (1);
458		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
459			return (-1);
460		break;
461#endif /* INET */
462#ifdef INET6
463	case AF_INET6:
464		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
465			return (1);
466		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
467			return (-1);
468		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
469			return (1);
470		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
471			return (-1);
472		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
473			return (1);
474		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
475			return (-1);
476		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
477			return (1);
478		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
479			return (-1);
480		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
481			return (1);
482		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
483			return (-1);
484		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
485			return (1);
486		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
487			return (-1);
488		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
489			return (1);
490		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
491			return (-1);
492		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
493			return (1);
494		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
495			return (-1);
496		break;
497#endif /* INET6 */
498	}
499
500	if ((diff = a->ext.port - b->ext.port) != 0)
501		return (diff);
502	if ((diff = a->gwy.port - b->gwy.port) != 0)
503		return (diff);
504
505	return (0);
506}
507
508#ifdef __FreeBSD__
509static int
510#else
511static __inline int
512#endif
513pf_state_compare_id(struct pf_state *a, struct pf_state *b)
514{
515	if (a->id > b->id)
516		return (1);
517	if (a->id < b->id)
518		return (-1);
519	if (a->creatorid > b->creatorid)
520		return (1);
521	if (a->creatorid < b->creatorid)
522		return (-1);
523
524	return (0);
525}
526
527#ifdef INET6
528void
529pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
530{
531	switch (af) {
532#ifdef INET
533	case AF_INET:
534		dst->addr32[0] = src->addr32[0];
535		break;
536#endif /* INET */
537	case AF_INET6:
538		dst->addr32[0] = src->addr32[0];
539		dst->addr32[1] = src->addr32[1];
540		dst->addr32[2] = src->addr32[2];
541		dst->addr32[3] = src->addr32[3];
542		break;
543	}
544}
545#endif
546
547struct pf_state *
548pf_find_state_byid(struct pf_state *key)
549{
550	pf_status.fcounters[FCNT_STATE_SEARCH]++;
551	return (RB_FIND(pf_state_tree_id, &tree_id, key));
552}
553
554struct pf_state *
555pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
556{
557	struct pf_state *s;
558
559	pf_status.fcounters[FCNT_STATE_SEARCH]++;
560
561	switch (tree) {
562	case PF_LAN_EXT:
563		for (; kif != NULL; kif = kif->pfik_parent) {
564			s = RB_FIND(pf_state_tree_lan_ext,
565			    &kif->pfik_lan_ext, key);
566			if (s != NULL)
567				return (s);
568		}
569		return (NULL);
570	case PF_EXT_GWY:
571		for (; kif != NULL; kif = kif->pfik_parent) {
572			s = RB_FIND(pf_state_tree_ext_gwy,
573			    &kif->pfik_ext_gwy, key);
574			if (s != NULL)
575				return (s);
576		}
577		return (NULL);
578	default:
579		panic("pf_find_state_recurse");
580	}
581}
582
583struct pf_state *
584pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
585{
586	struct pf_state *s, *ss = NULL;
587	struct pfi_kif	*kif;
588
589	pf_status.fcounters[FCNT_STATE_SEARCH]++;
590
591	switch (tree) {
592	case PF_LAN_EXT:
593		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
594			s = RB_FIND(pf_state_tree_lan_ext,
595			    &kif->pfik_lan_ext, key);
596			if (s == NULL)
597				continue;
598			if (more == NULL)
599				return (s);
600			ss = s;
601			(*more)++;
602		}
603		return (ss);
604	case PF_EXT_GWY:
605		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
606			s = RB_FIND(pf_state_tree_ext_gwy,
607			    &kif->pfik_ext_gwy, key);
608			if (s == NULL)
609				continue;
610			if (more == NULL)
611				return (s);
612			ss = s;
613			(*more)++;
614		}
615		return (ss);
616	default:
617		panic("pf_find_state_all");
618	}
619}
620
621int
622pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
623    struct pf_addr *src, sa_family_t af)
624{
625	struct pf_src_node	k;
626
627	if (*sn == NULL) {
628		k.af = af;
629		PF_ACPY(&k.addr, src, af);
630		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
631		    rule->rpool.opts & PF_POOL_STICKYADDR)
632			k.rule.ptr = rule;
633		else
634			k.rule.ptr = NULL;
635		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
636		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
637	}
638	if (*sn == NULL) {
639		if (!rule->max_src_nodes ||
640		    rule->src_nodes < rule->max_src_nodes)
641			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
642		if ((*sn) == NULL)
643			return (-1);
644		bzero(*sn, sizeof(struct pf_src_node));
645		(*sn)->af = af;
646		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
647		    rule->rpool.opts & PF_POOL_STICKYADDR)
648			(*sn)->rule.ptr = rule;
649		else
650			(*sn)->rule.ptr = NULL;
651		PF_ACPY(&(*sn)->addr, src, af);
652		if (RB_INSERT(pf_src_tree,
653		    &tree_src_tracking, *sn) != NULL) {
654			if (pf_status.debug >= PF_DEBUG_MISC) {
655				printf("pf: src_tree insert failed: ");
656				pf_print_host(&(*sn)->addr, 0, af);
657				printf("\n");
658			}
659			pool_put(&pf_src_tree_pl, *sn);
660			return (-1);
661		}
662#ifdef __FreeBSD__
663		(*sn)->creation = time_second;
664#else
665		(*sn)->creation = time.tv_sec;
666#endif
667		(*sn)->ruletype = rule->action;
668		if ((*sn)->rule.ptr != NULL)
669			(*sn)->rule.ptr->src_nodes++;
670		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
671		pf_status.src_nodes++;
672	} else {
673		if (rule->max_src_states &&
674		    (*sn)->states >= rule->max_src_states)
675			return (-1);
676	}
677	return (0);
678}
679
680int
681pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
682{
683	/* Thou MUST NOT insert multiple duplicate keys */
684	state->u.s.kif = kif;
685	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
686		if (pf_status.debug >= PF_DEBUG_MISC) {
687			printf("pf: state insert failed: tree_lan_ext");
688			printf(" lan: ");
689			pf_print_host(&state->lan.addr, state->lan.port,
690			    state->af);
691			printf(" gwy: ");
692			pf_print_host(&state->gwy.addr, state->gwy.port,
693			    state->af);
694			printf(" ext: ");
695			pf_print_host(&state->ext.addr, state->ext.port,
696			    state->af);
697			if (state->sync_flags & PFSTATE_FROMSYNC)
698				printf(" (from sync)");
699			printf("\n");
700		}
701		return (-1);
702	}
703
704	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
705		if (pf_status.debug >= PF_DEBUG_MISC) {
706			printf("pf: state insert failed: tree_ext_gwy");
707			printf(" lan: ");
708			pf_print_host(&state->lan.addr, state->lan.port,
709			    state->af);
710			printf(" gwy: ");
711			pf_print_host(&state->gwy.addr, state->gwy.port,
712			    state->af);
713			printf(" ext: ");
714			pf_print_host(&state->ext.addr, state->ext.port,
715			    state->af);
716			if (state->sync_flags & PFSTATE_FROMSYNC)
717				printf(" (from sync)");
718			printf("\n");
719		}
720		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
721		return (-1);
722	}
723
724	if (state->id == 0 && state->creatorid == 0) {
725		state->id = htobe64(pf_status.stateid++);
726		state->creatorid = pf_status.hostid;
727	}
728	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
729		if (pf_status.debug >= PF_DEBUG_MISC) {
730#ifdef __FreeBSD__
731			printf("pf: state insert failed: "
732			    "id: %016llx creatorid: %08x",
733			    (long long)be64toh(state->id),
734			    ntohl(state->creatorid));
735#else
736			printf("pf: state insert failed: "
737			    "id: %016llx creatorid: %08x",
738			    betoh64(state->id), ntohl(state->creatorid));
739#endif
740			if (state->sync_flags & PFSTATE_FROMSYNC)
741				printf(" (from sync)");
742			printf("\n");
743		}
744		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
745		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
746		return (-1);
747	}
748	TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
749
750	pf_status.fcounters[FCNT_STATE_INSERT]++;
751	pf_status.states++;
752	pfi_attach_state(kif);
753#if NPFSYNC
754	pfsync_insert_state(state);
755#endif
756	return (0);
757}
758
759void
760pf_purge_timeout(void *arg)
761{
762#ifdef __FreeBSD__
763	struct callout  *to = arg;
764#else
765	struct timeout	*to = arg;
766#endif
767	int		 s;
768
769#ifdef __FreeBSD__
770	PF_LOCK();
771#endif
772	s = splsoftnet();
773	pf_purge_expired_states();
774	pf_purge_expired_fragments();
775	pf_purge_expired_src_nodes();
776	splx(s);
777#ifdef __FreeBSD__
778	PF_UNLOCK();
779#endif
780
781#ifdef __FreeBSD__
782	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
783	    pf_purge_timeout, to);
784#else
785	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
786#endif
787}
788
789u_int32_t
790pf_state_expires(const struct pf_state *state)
791{
792	u_int32_t	timeout;
793	u_int32_t	start;
794	u_int32_t	end;
795	u_int32_t	states;
796
797	/* handle all PFTM_* > PFTM_MAX here */
798	if (state->timeout == PFTM_PURGE)
799#ifdef __FreeBSD__
800		return (time_second);
801#else
802		return (time.tv_sec);
803#endif
804	if (state->timeout == PFTM_UNTIL_PACKET)
805		return (0);
806#ifdef __FreeBSD__
807	KASSERT((state->timeout < PFTM_MAX),
808	    ("pf_state_expires: timeout > PFTM_MAX"));
809#else
810	KASSERT(state->timeout < PFTM_MAX);
811#endif
812	timeout = state->rule.ptr->timeout[state->timeout];
813	if (!timeout)
814		timeout = pf_default_rule.timeout[state->timeout];
815	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
816	if (start) {
817		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
818		states = state->rule.ptr->states;
819	} else {
820		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
821		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
822		states = pf_status.states;
823	}
824	if (end && states > start && start < end) {
825		if (states < end)
826			return (state->expire + timeout * (end - states) /
827			    (end - start));
828		else
829#ifdef __FreeBSD__
830			return (time_second);
831#else
832			return (time.tv_sec);
833#endif
834	}
835	return (state->expire + timeout);
836}
837
838void
839pf_purge_expired_src_nodes(void)
840{
841	 struct pf_src_node		*cur, *next;
842
843	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
844		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
845
846#ifdef __FreeBSD__
847		 if (cur->states <= 0 && cur->expire <= time_second) {
848#else
849		 if (cur->states <= 0 && cur->expire <= time.tv_sec) {
850#endif
851			 if (cur->rule.ptr != NULL) {
852				 cur->rule.ptr->src_nodes--;
853				 if (cur->rule.ptr->states <= 0 &&
854				     cur->rule.ptr->max_src_nodes <= 0)
855					 pf_rm_rule(NULL, cur->rule.ptr);
856			 }
857			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
858			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
859			 pf_status.src_nodes--;
860			 pool_put(&pf_src_tree_pl, cur);
861		 }
862	 }
863}
864
865void
866pf_src_tree_remove_state(struct pf_state *s)
867{
868	u_int32_t timeout;
869
870	if (s->src_node != NULL) {
871		if (--s->src_node->states <= 0) {
872			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
873			if (!timeout)
874				timeout =
875				    pf_default_rule.timeout[PFTM_SRC_NODE];
876#ifdef __FreeBSD__
877			s->src_node->expire = time_second + timeout;
878#else
879			s->src_node->expire = time.tv_sec + timeout;
880#endif
881		}
882	}
883	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
884		if (--s->nat_src_node->states <= 0) {
885			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
886			if (!timeout)
887				timeout =
888				    pf_default_rule.timeout[PFTM_SRC_NODE];
889#ifdef __FreeBSD__
890			s->nat_src_node->expire = time_second + timeout;
891#else
892			s->nat_src_node->expire = time.tv_sec + timeout;
893#endif
894		}
895	}
896	s->src_node = s->nat_src_node = NULL;
897}
898
899void
900pf_purge_expired_states(void)
901{
902	struct pf_state		*cur, *next;
903
904	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
905	    cur; cur = next) {
906		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
907
908#ifdef __FreeBSD__
909		if (pf_state_expires(cur) <= time_second) {
910#else
911		if (pf_state_expires(cur) <= time.tv_sec) {
912#endif
913			if (cur->src.state == PF_TCPS_PROXY_DST)
914				pf_send_tcp(cur->rule.ptr, cur->af,
915				    &cur->ext.addr, &cur->lan.addr,
916				    cur->ext.port, cur->lan.port,
917				    cur->src.seqhi, cur->src.seqlo + 1, 0,
918				    TH_RST|TH_ACK, 0, 0);
919			RB_REMOVE(pf_state_tree_ext_gwy,
920			    &cur->u.s.kif->pfik_ext_gwy, cur);
921			RB_REMOVE(pf_state_tree_lan_ext,
922			    &cur->u.s.kif->pfik_lan_ext, cur);
923			RB_REMOVE(pf_state_tree_id, &tree_id, cur);
924#if NPFSYNC
925			pfsync_delete_state(cur);
926#endif
927			pf_src_tree_remove_state(cur);
928			if (--cur->rule.ptr->states <= 0 &&
929			    cur->rule.ptr->src_nodes <= 0)
930				pf_rm_rule(NULL, cur->rule.ptr);
931			if (cur->nat_rule.ptr != NULL)
932				if (--cur->nat_rule.ptr->states <= 0 &&
933					cur->nat_rule.ptr->src_nodes <= 0)
934					pf_rm_rule(NULL, cur->nat_rule.ptr);
935			if (cur->anchor.ptr != NULL)
936				if (--cur->anchor.ptr->states <= 0)
937					pf_rm_rule(NULL, cur->anchor.ptr);
938			pf_normalize_tcp_cleanup(cur);
939			pfi_detach_state(cur->u.s.kif);
940			TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
941			pool_put(&pf_state_pl, cur);
942			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
943			pf_status.states--;
944		}
945	}
946}
947
948int
949pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
950{
951	if (aw->type != PF_ADDR_TABLE)
952		return (0);
953	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
954		return (1);
955	return (0);
956}
957
958void
959pf_tbladdr_remove(struct pf_addr_wrap *aw)
960{
961	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
962		return;
963	pfr_detach_table(aw->p.tbl);
964	aw->p.tbl = NULL;
965}
966
967void
968pf_tbladdr_copyout(struct pf_addr_wrap *aw)
969{
970	struct pfr_ktable *kt = aw->p.tbl;
971
972	if (aw->type != PF_ADDR_TABLE || kt == NULL)
973		return;
974	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
975		kt = kt->pfrkt_root;
976	aw->p.tbl = NULL;
977	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
978		kt->pfrkt_cnt : -1;
979}
980
981void
982pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
983{
984	switch (af) {
985#ifdef INET
986	case AF_INET: {
987		u_int32_t a = ntohl(addr->addr32[0]);
988		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
989		    (a>>8)&255, a&255);
990		if (p) {
991			p = ntohs(p);
992			printf(":%u", p);
993		}
994		break;
995	}
996#endif /* INET */
997#ifdef INET6
998	case AF_INET6: {
999		u_int16_t b;
1000		u_int8_t i, curstart = 255, curend = 0,
1001		    maxstart = 0, maxend = 0;
1002		for (i = 0; i < 8; i++) {
1003			if (!addr->addr16[i]) {
1004				if (curstart == 255)
1005					curstart = i;
1006				else
1007					curend = i;
1008			} else {
1009				if (curstart) {
1010					if ((curend - curstart) >
1011					    (maxend - maxstart)) {
1012						maxstart = curstart;
1013						maxend = curend;
1014						curstart = 255;
1015					}
1016				}
1017			}
1018		}
1019		for (i = 0; i < 8; i++) {
1020			if (i >= maxstart && i <= maxend) {
1021				if (maxend != 7) {
1022					if (i == maxstart)
1023						printf(":");
1024				} else {
1025					if (i == maxend)
1026						printf(":");
1027				}
1028			} else {
1029				b = ntohs(addr->addr16[i]);
1030				printf("%x", b);
1031				if (i < 7)
1032					printf(":");
1033			}
1034		}
1035		if (p) {
1036			p = ntohs(p);
1037			printf("[%u]", p);
1038		}
1039		break;
1040	}
1041#endif /* INET6 */
1042	}
1043}
1044
1045void
1046pf_print_state(struct pf_state *s)
1047{
1048	switch (s->proto) {
1049	case IPPROTO_TCP:
1050		printf("TCP ");
1051		break;
1052	case IPPROTO_UDP:
1053		printf("UDP ");
1054		break;
1055	case IPPROTO_ICMP:
1056		printf("ICMP ");
1057		break;
1058	case IPPROTO_ICMPV6:
1059		printf("ICMPV6 ");
1060		break;
1061	default:
1062		printf("%u ", s->proto);
1063		break;
1064	}
1065	pf_print_host(&s->lan.addr, s->lan.port, s->af);
1066	printf(" ");
1067	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1068	printf(" ");
1069	pf_print_host(&s->ext.addr, s->ext.port, s->af);
1070	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1071	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1072	if (s->src.wscale && s->dst.wscale)
1073		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1074	printf("]");
1075	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1076	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1077	if (s->src.wscale && s->dst.wscale)
1078		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1079	printf("]");
1080	printf(" %u:%u", s->src.state, s->dst.state);
1081}
1082
1083void
1084pf_print_flags(u_int8_t f)
1085{
1086	if (f)
1087		printf(" ");
1088	if (f & TH_FIN)
1089		printf("F");
1090	if (f & TH_SYN)
1091		printf("S");
1092	if (f & TH_RST)
1093		printf("R");
1094	if (f & TH_PUSH)
1095		printf("P");
1096	if (f & TH_ACK)
1097		printf("A");
1098	if (f & TH_URG)
1099		printf("U");
1100	if (f & TH_ECE)
1101		printf("E");
1102	if (f & TH_CWR)
1103		printf("W");
1104}
1105
1106#define	PF_SET_SKIP_STEPS(i)					\
1107	do {							\
1108		while (head[i] != cur) {			\
1109			head[i]->skip[i].ptr = cur;		\
1110			head[i] = TAILQ_NEXT(head[i], entries);	\
1111		}						\
1112	} while (0)
1113
1114void
1115pf_calc_skip_steps(struct pf_rulequeue *rules)
1116{
1117	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1118	int i;
1119
1120	cur = TAILQ_FIRST(rules);
1121	prev = cur;
1122	for (i = 0; i < PF_SKIP_COUNT; ++i)
1123		head[i] = cur;
1124	while (cur != NULL) {
1125
1126		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1127			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1128		if (cur->direction != prev->direction)
1129			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1130		if (cur->af != prev->af)
1131			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1132		if (cur->proto != prev->proto)
1133			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1134		if (cur->src.not != prev->src.not ||
1135		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1136			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1137		if (cur->src.port[0] != prev->src.port[0] ||
1138		    cur->src.port[1] != prev->src.port[1] ||
1139		    cur->src.port_op != prev->src.port_op)
1140			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1141		if (cur->dst.not != prev->dst.not ||
1142		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1143			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1144		if (cur->dst.port[0] != prev->dst.port[0] ||
1145		    cur->dst.port[1] != prev->dst.port[1] ||
1146		    cur->dst.port_op != prev->dst.port_op)
1147			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1148
1149		prev = cur;
1150		cur = TAILQ_NEXT(cur, entries);
1151	}
1152	for (i = 0; i < PF_SKIP_COUNT; ++i)
1153		PF_SET_SKIP_STEPS(i);
1154}
1155
1156int
1157pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1158{
1159	if (aw1->type != aw2->type)
1160		return (1);
1161	switch (aw1->type) {
1162	case PF_ADDR_ADDRMASK:
1163		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1164			return (1);
1165		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1166			return (1);
1167		return (0);
1168	case PF_ADDR_DYNIFTL:
1169		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1170	case PF_ADDR_NOROUTE:
1171		return (0);
1172	case PF_ADDR_TABLE:
1173		return (aw1->p.tbl != aw2->p.tbl);
1174	default:
1175		printf("invalid address type: %d\n", aw1->type);
1176		return (1);
1177	}
1178}
1179
1180void
1181pf_update_anchor_rules()
1182{
1183	struct pf_rule	*rule;
1184	int		 i;
1185
1186	for (i = 0; i < PF_RULESET_MAX; ++i)
1187		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1188		    entries)
1189			if (rule->anchorname[0])
1190				rule->anchor = pf_find_anchor(rule->anchorname);
1191			else
1192				rule->anchor = NULL;
1193}
1194
1195u_int16_t
1196pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1197{
1198	u_int32_t	l;
1199
1200	if (udp && !cksum)
1201		return (0x0000);
1202	l = cksum + old - new;
1203	l = (l >> 16) + (l & 65535);
1204	l = l & 65535;
1205	if (udp && !l)
1206		return (0xFFFF);
1207	return (l);
1208}
1209
1210void
1211pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1212    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1213{
1214	struct pf_addr	ao;
1215	u_int16_t	po = *p;
1216
1217	PF_ACPY(&ao, a, af);
1218	PF_ACPY(a, an, af);
1219
1220	*p = pn;
1221
1222	switch (af) {
1223#ifdef INET
1224	case AF_INET:
1225		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1226		    ao.addr16[0], an->addr16[0], 0),
1227		    ao.addr16[1], an->addr16[1], 0);
1228		*p = pn;
1229		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1230		    ao.addr16[0], an->addr16[0], u),
1231		    ao.addr16[1], an->addr16[1], u),
1232		    po, pn, u);
1233		break;
1234#endif /* INET */
1235#ifdef INET6
1236	case AF_INET6:
1237		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1238		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1239		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1240		    ao.addr16[0], an->addr16[0], u),
1241		    ao.addr16[1], an->addr16[1], u),
1242		    ao.addr16[2], an->addr16[2], u),
1243		    ao.addr16[3], an->addr16[3], u),
1244		    ao.addr16[4], an->addr16[4], u),
1245		    ao.addr16[5], an->addr16[5], u),
1246		    ao.addr16[6], an->addr16[6], u),
1247		    ao.addr16[7], an->addr16[7], u),
1248		    po, pn, u);
1249		break;
1250#endif /* INET6 */
1251	}
1252}
1253
1254
1255/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1256void
1257pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1258{
1259	u_int32_t	ao;
1260
1261	memcpy(&ao, a, sizeof(ao));
1262	memcpy(a, &an, sizeof(u_int32_t));
1263	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1264	    ao % 65536, an % 65536, u);
1265}
1266
1267#ifdef INET6
1268void
1269pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1270{
1271	struct pf_addr	ao;
1272
1273	PF_ACPY(&ao, a, AF_INET6);
1274	PF_ACPY(a, an, AF_INET6);
1275
1276	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1277	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1278	    pf_cksum_fixup(pf_cksum_fixup(*c,
1279	    ao.addr16[0], an->addr16[0], u),
1280	    ao.addr16[1], an->addr16[1], u),
1281	    ao.addr16[2], an->addr16[2], u),
1282	    ao.addr16[3], an->addr16[3], u),
1283	    ao.addr16[4], an->addr16[4], u),
1284	    ao.addr16[5], an->addr16[5], u),
1285	    ao.addr16[6], an->addr16[6], u),
1286	    ao.addr16[7], an->addr16[7], u);
1287}
1288#endif /* INET6 */
1289
1290void
1291pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1292    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1293    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1294{
1295	struct pf_addr	oia, ooa;
1296
1297	PF_ACPY(&oia, ia, af);
1298	PF_ACPY(&ooa, oa, af);
1299
1300	/* Change inner protocol port, fix inner protocol checksum. */
1301	if (ip != NULL) {
1302		u_int16_t	oip = *ip;
1303		u_int32_t	opc = 0;	/* make the compiler happy */
1304
1305		if (pc != NULL)
1306			opc = *pc;
1307		*ip = np;
1308		if (pc != NULL)
1309			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1310		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1311		if (pc != NULL)
1312			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1313	}
1314	/* Change inner ip address, fix inner ip and icmp checksums. */
1315	PF_ACPY(ia, na, af);
1316	switch (af) {
1317#ifdef INET
1318	case AF_INET: {
1319		u_int32_t	 oh2c = *h2c;
1320
1321		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1322		    oia.addr16[0], ia->addr16[0], 0),
1323		    oia.addr16[1], ia->addr16[1], 0);
1324		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1325		    oia.addr16[0], ia->addr16[0], 0),
1326		    oia.addr16[1], ia->addr16[1], 0);
1327		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1328		break;
1329	}
1330#endif /* INET */
1331#ifdef INET6
1332	case AF_INET6:
1333		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1334		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1335		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1336		    oia.addr16[0], ia->addr16[0], u),
1337		    oia.addr16[1], ia->addr16[1], u),
1338		    oia.addr16[2], ia->addr16[2], u),
1339		    oia.addr16[3], ia->addr16[3], u),
1340		    oia.addr16[4], ia->addr16[4], u),
1341		    oia.addr16[5], ia->addr16[5], u),
1342		    oia.addr16[6], ia->addr16[6], u),
1343		    oia.addr16[7], ia->addr16[7], u);
1344		break;
1345#endif /* INET6 */
1346	}
1347	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1348	PF_ACPY(oa, na, af);
1349	switch (af) {
1350#ifdef INET
1351	case AF_INET:
1352		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1353		    ooa.addr16[0], oa->addr16[0], 0),
1354		    ooa.addr16[1], oa->addr16[1], 0);
1355		break;
1356#endif /* INET */
1357#ifdef INET6
1358	case AF_INET6:
1359		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1360		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1361		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1362		    ooa.addr16[0], oa->addr16[0], u),
1363		    ooa.addr16[1], oa->addr16[1], u),
1364		    ooa.addr16[2], oa->addr16[2], u),
1365		    ooa.addr16[3], oa->addr16[3], u),
1366		    ooa.addr16[4], oa->addr16[4], u),
1367		    ooa.addr16[5], oa->addr16[5], u),
1368		    ooa.addr16[6], oa->addr16[6], u),
1369		    ooa.addr16[7], oa->addr16[7], u);
1370		break;
1371#endif /* INET6 */
1372	}
1373}
1374
1375void
1376pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1377    const struct pf_addr *saddr, const struct pf_addr *daddr,
1378    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1379    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1380{
1381	struct mbuf	*m;
1382#ifdef ALTQ
1383	struct m_tag	*mtag;
1384#endif
1385	int		 len = 0, tlen;		/* make the compiler happy */
1386#ifdef INET
1387	struct ip	*h = NULL;		/* make the compiler happy */
1388#endif /* INET */
1389#ifdef INET6
1390	struct ip6_hdr	*h6 = NULL;		/* make the compiler happy */
1391#endif /* INET6 */
1392	struct tcphdr	*th = NULL;		/* make the compiler happy */
1393#ifdef __FreeBSD__
1394	struct ip 	*ip;
1395#endif
1396	char *opt;
1397
1398	/* maximum segment size tcp option */
1399	tlen = sizeof(struct tcphdr);
1400	if (mss)
1401		tlen += 4;
1402
1403	switch (af) {
1404#ifdef INET
1405	case AF_INET:
1406		len = sizeof(struct ip) + tlen;
1407		break;
1408#endif /* INET */
1409#ifdef INET6
1410	case AF_INET6:
1411		len = sizeof(struct ip6_hdr) + tlen;
1412		break;
1413#endif /* INET6 */
1414	}
1415
1416	/* create outgoing mbuf */
1417#ifdef __FreeBSD__
1418	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1419	if (m == NULL)
1420		return;
1421	m->m_flags |= M_SKIP_FIREWALL;
1422#else
1423	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1424	if (mtag == NULL)
1425		return;
1426	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1427	if (m == NULL) {
1428		m_tag_free(mtag);
1429		return;
1430	}
1431	m_tag_prepend(m, mtag);
1432#endif
1433#ifdef ALTQ
1434	if (r != NULL && r->qid) {
1435		struct altq_tag *atag;
1436
1437		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1438		if (mtag != NULL) {
1439			atag = (struct altq_tag *)(mtag + 1);
1440			atag->qid = r->qid;
1441			/* add hints for ecn */
1442			atag->af = af;
1443			atag->hdr = mtod(m, struct ip *);
1444			m_tag_prepend(m, mtag);
1445		}
1446	}
1447#endif
1448	m->m_data += max_linkhdr;
1449	m->m_pkthdr.len = m->m_len = len;
1450	m->m_pkthdr.rcvif = NULL;
1451	bzero(m->m_data, len);
1452	switch (af) {
1453#ifdef INET
1454	case AF_INET:
1455		h = mtod(m, struct ip *);
1456
1457		/* IP header fields included in the TCP checksum */
1458		h->ip_p = IPPROTO_TCP;
1459		h->ip_len = htons(tlen);
1460		h->ip_src.s_addr = saddr->v4.s_addr;
1461		h->ip_dst.s_addr = daddr->v4.s_addr;
1462
1463		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1464		break;
1465#endif /* INET */
1466#ifdef INET6
1467	case AF_INET6:
1468		h6 = mtod(m, struct ip6_hdr *);
1469
1470		/* IP header fields included in the TCP checksum */
1471		h6->ip6_nxt = IPPROTO_TCP;
1472		h6->ip6_plen = htons(tlen);
1473		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1474		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1475
1476		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1477		break;
1478#endif /* INET6 */
1479	}
1480
1481	/* TCP header */
1482	th->th_sport = sport;
1483	th->th_dport = dport;
1484	th->th_seq = htonl(seq);
1485	th->th_ack = htonl(ack);
1486	th->th_off = tlen >> 2;
1487	th->th_flags = flags;
1488	th->th_win = htons(win);
1489
1490	if (mss) {
1491		opt = (char *)(th + 1);
1492		opt[0] = TCPOPT_MAXSEG;
1493		opt[1] = 4;
1494		HTONS(mss);
1495		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1496	}
1497
1498	switch (af) {
1499#ifdef INET
1500	case AF_INET:
1501		/* TCP checksum */
1502		th->th_sum = in_cksum(m, len);
1503
1504		/* Finish the IP header */
1505		h->ip_v = 4;
1506		h->ip_hl = sizeof(*h) >> 2;
1507		h->ip_tos = IPTOS_LOWDELAY;
1508#ifdef __FreeBSD__
1509		h->ip_off = path_mtu_discovery ? IP_DF : 0;
1510		h->ip_len = len;
1511#else
1512		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1513		h->ip_len = htons(len);
1514#endif
1515		h->ip_ttl = ttl ? ttl : ip_defttl;
1516		h->ip_sum = 0;
1517#ifdef __FreeBSD__
1518		ip = mtod(m, struct ip *);
1519		PF_UNLOCK();
1520		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1521			(void *)NULL);
1522		PF_LOCK();
1523#else /* ! __FreeBSD__ */
1524		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1525		    (void *)NULL);
1526#endif
1527		break;
1528#endif /* INET */
1529#ifdef INET6
1530	case AF_INET6:
1531		/* TCP checksum */
1532		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1533		    sizeof(struct ip6_hdr), tlen);
1534
1535		h6->ip6_vfc |= IPV6_VERSION;
1536		h6->ip6_hlim = IPV6_DEFHLIM;
1537
1538#ifdef __FreeBSD__
1539		PF_UNLOCK();
1540		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1541		PF_LOCK();
1542#else
1543		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1544#endif
1545		break;
1546#endif /* INET6 */
1547	}
1548}
1549
1550void
1551pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1552    struct pf_rule *r)
1553{
1554#ifdef ALTQ
1555	struct m_tag	*mtag;
1556#endif
1557	struct mbuf	*m0;
1558#ifdef __FreeBSD__
1559	struct ip *ip;
1560#endif
1561
1562#ifdef __FreeBSD__
1563	m0 = m_copypacket(m, M_DONTWAIT);
1564	if (m0 == NULL)
1565		return;
1566	m0->m_flags |= M_SKIP_FIREWALL;
1567#else
1568	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1569	if (mtag == NULL)
1570		return;
1571	m0 = m_copy(m, 0, M_COPYALL);
1572	if (m0 == NULL) {
1573		m_tag_free(mtag);
1574		return;
1575	}
1576	m_tag_prepend(m0, mtag);
1577#endif
1578
1579#ifdef ALTQ
1580	if (r->qid) {
1581		struct altq_tag *atag;
1582
1583		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1584		if (mtag != NULL) {
1585			atag = (struct altq_tag *)(mtag + 1);
1586			atag->qid = r->qid;
1587			/* add hints for ecn */
1588			atag->af = af;
1589			atag->hdr = mtod(m0, struct ip *);
1590			m_tag_prepend(m0, mtag);
1591		}
1592	}
1593#endif
1594
1595	switch (af) {
1596#ifdef INET
1597	case AF_INET:
1598#ifdef __FreeBSD__
1599		/* icmp_error() expects host byte ordering */
1600		ip = mtod(m0, struct ip *);
1601		NTOHS(ip->ip_len);
1602		NTOHS(ip->ip_off);
1603		PF_UNLOCK();
1604#endif
1605		icmp_error(m0, type, code, 0, (void *)NULL);
1606#ifdef __FreeBSD__
1607		PF_LOCK();
1608#endif
1609		break;
1610#endif /* INET */
1611#ifdef INET6
1612	case AF_INET6:
1613#ifdef __FreeBSD__
1614		PF_UNLOCK();
1615#endif
1616		icmp6_error(m0, type, code, 0);
1617#ifdef __FreeBSD__
1618		PF_LOCK();
1619#endif
1620		break;
1621#endif /* INET6 */
1622	}
1623}
1624
1625/*
1626 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1627 * If n is 0, they match if they are equal. If n is != 0, they match if they
1628 * are different.
1629 */
1630int
1631pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1632    struct pf_addr *b, sa_family_t af)
1633{
1634	int	match = 0;
1635
1636	switch (af) {
1637#ifdef INET
1638	case AF_INET:
1639		if ((a->addr32[0] & m->addr32[0]) ==
1640		    (b->addr32[0] & m->addr32[0]))
1641			match++;
1642		break;
1643#endif /* INET */
1644#ifdef INET6
1645	case AF_INET6:
1646		if (((a->addr32[0] & m->addr32[0]) ==
1647		     (b->addr32[0] & m->addr32[0])) &&
1648		    ((a->addr32[1] & m->addr32[1]) ==
1649		     (b->addr32[1] & m->addr32[1])) &&
1650		    ((a->addr32[2] & m->addr32[2]) ==
1651		     (b->addr32[2] & m->addr32[2])) &&
1652		    ((a->addr32[3] & m->addr32[3]) ==
1653		     (b->addr32[3] & m->addr32[3])))
1654			match++;
1655		break;
1656#endif /* INET6 */
1657	}
1658	if (match) {
1659		if (n)
1660			return (0);
1661		else
1662			return (1);
1663	} else {
1664		if (n)
1665			return (1);
1666		else
1667			return (0);
1668	}
1669}
1670
1671int
1672pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1673{
1674	switch (op) {
1675	case PF_OP_IRG:
1676		return ((p > a1) && (p < a2));
1677	case PF_OP_XRG:
1678		return ((p < a1) || (p > a2));
1679	case PF_OP_RRG:
1680		return ((p >= a1) && (p <= a2));
1681	case PF_OP_EQ:
1682		return (p == a1);
1683	case PF_OP_NE:
1684		return (p != a1);
1685	case PF_OP_LT:
1686		return (p < a1);
1687	case PF_OP_LE:
1688		return (p <= a1);
1689	case PF_OP_GT:
1690		return (p > a1);
1691	case PF_OP_GE:
1692		return (p >= a1);
1693	}
1694	return (0); /* never reached */
1695}
1696
1697int
1698pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1699{
1700	NTOHS(a1);
1701	NTOHS(a2);
1702	NTOHS(p);
1703	return (pf_match(op, a1, a2, p));
1704}
1705
1706int
1707pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1708{
1709	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1710		return (0);
1711	return (pf_match(op, a1, a2, u));
1712}
1713
1714int
1715pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1716{
1717	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1718		return (0);
1719	return (pf_match(op, a1, a2, g));
1720}
1721
1722struct pf_tag *
1723pf_get_tag(struct mbuf *m)
1724{
1725	struct m_tag	*mtag;
1726
1727	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1728		return ((struct pf_tag *)(mtag + 1));
1729	else
1730		return (NULL);
1731}
1732
1733int
1734pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule,
1735    struct pf_tag *pftag, int *tag)
1736{
1737	if (*tag == -1) {	/* find mbuf tag */
1738		pftag = pf_get_tag(m);
1739		if (pftag != NULL)
1740			*tag = pftag->tag;
1741		else
1742			*tag = 0;
1743		if (nat_rule != NULL && nat_rule->tag)
1744			*tag = nat_rule->tag;
1745	}
1746
1747	return ((!r->match_tag_not && r->match_tag == *tag) ||
1748	    (r->match_tag_not && r->match_tag != *tag));
1749}
1750
1751int
1752pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1753{
1754	struct m_tag	*mtag;
1755
1756	if (tag <= 0)
1757		return (0);
1758
1759	if (pftag == NULL) {
1760		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1761		if (mtag == NULL)
1762			return (1);
1763		((struct pf_tag *)(mtag + 1))->tag = tag;
1764		m_tag_prepend(m, mtag);
1765	} else
1766		pftag->tag = tag;
1767
1768	return (0);
1769}
1770
1771#define PF_STEP_INTO_ANCHOR(r, a, s, n)					\
1772	do {								\
1773		if ((r) == NULL || (r)->anchor == NULL ||		\
1774		    (s) != NULL || (a) != NULL)				\
1775			panic("PF_STEP_INTO_ANCHOR");			\
1776		(a) = (r);						\
1777		(s) = TAILQ_FIRST(&(r)->anchor->rulesets);		\
1778		(r) = NULL;						\
1779		while ((s) != NULL && ((r) =				\
1780		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1781			(s) = TAILQ_NEXT((s), entries);			\
1782		if ((r) == NULL) {					\
1783			(r) = TAILQ_NEXT((a), entries);			\
1784			(a) = NULL;					\
1785		}							\
1786	} while (0)
1787
1788#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)				\
1789	do {								\
1790		if ((r) != NULL || (a) == NULL || (s) == NULL)		\
1791			panic("PF_STEP_OUT_OF_ANCHOR");			\
1792		(s) = TAILQ_NEXT((s), entries);				\
1793		while ((s) != NULL && ((r) =				\
1794		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1795			(s) = TAILQ_NEXT((s), entries);			\
1796		if ((r) == NULL) {					\
1797			(r) = TAILQ_NEXT((a), entries);			\
1798			(a) = NULL;					\
1799		}							\
1800	} while (0)
1801
1802#ifdef INET6
1803void
1804pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1805    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1806{
1807	switch (af) {
1808#ifdef INET
1809	case AF_INET:
1810		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1811		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1812		break;
1813#endif /* INET */
1814	case AF_INET6:
1815		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1816		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1817		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1818		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1819		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1820		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1821		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1822		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1823		break;
1824	}
1825}
1826
1827void
1828pf_addr_inc(struct pf_addr *addr, sa_family_t af)
1829{
1830	switch (af) {
1831#ifdef INET
1832	case AF_INET:
1833		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1834		break;
1835#endif /* INET */
1836	case AF_INET6:
1837		if (addr->addr32[3] == 0xffffffff) {
1838			addr->addr32[3] = 0;
1839			if (addr->addr32[2] == 0xffffffff) {
1840				addr->addr32[2] = 0;
1841				if (addr->addr32[1] == 0xffffffff) {
1842					addr->addr32[1] = 0;
1843					addr->addr32[0] =
1844					    htonl(ntohl(addr->addr32[0]) + 1);
1845				} else
1846					addr->addr32[1] =
1847					    htonl(ntohl(addr->addr32[1]) + 1);
1848			} else
1849				addr->addr32[2] =
1850				    htonl(ntohl(addr->addr32[2]) + 1);
1851		} else
1852			addr->addr32[3] =
1853			    htonl(ntohl(addr->addr32[3]) + 1);
1854		break;
1855	}
1856}
1857#endif /* INET6 */
1858
1859#define mix(a,b,c) \
1860	do {					\
1861		a -= b; a -= c; a ^= (c >> 13);	\
1862		b -= c; b -= a; b ^= (a << 8);	\
1863		c -= a; c -= b; c ^= (b >> 13);	\
1864		a -= b; a -= c; a ^= (c >> 12);	\
1865		b -= c; b -= a; b ^= (a << 16);	\
1866		c -= a; c -= b; c ^= (b >> 5);	\
1867		a -= b; a -= c; a ^= (c >> 3);	\
1868		b -= c; b -= a; b ^= (a << 10);	\
1869		c -= a; c -= b; c ^= (b >> 15);	\
1870	} while (0)
1871
1872/*
1873 * hash function based on bridge_hash in if_bridge.c
1874 */
1875void
1876pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1877    struct pf_poolhashkey *key, sa_family_t af)
1878{
1879	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1880
1881	switch (af) {
1882#ifdef INET
1883	case AF_INET:
1884		a += inaddr->addr32[0];
1885		b += key->key32[1];
1886		mix(a, b, c);
1887		hash->addr32[0] = c + key->key32[2];
1888		break;
1889#endif /* INET */
1890#ifdef INET6
1891	case AF_INET6:
1892		a += inaddr->addr32[0];
1893		b += inaddr->addr32[2];
1894		mix(a, b, c);
1895		hash->addr32[0] = c;
1896		a += inaddr->addr32[1];
1897		b += inaddr->addr32[3];
1898		c += key->key32[1];
1899		mix(a, b, c);
1900		hash->addr32[1] = c;
1901		a += inaddr->addr32[2];
1902		b += inaddr->addr32[1];
1903		c += key->key32[2];
1904		mix(a, b, c);
1905		hash->addr32[2] = c;
1906		a += inaddr->addr32[3];
1907		b += inaddr->addr32[0];
1908		c += key->key32[3];
1909		mix(a, b, c);
1910		hash->addr32[3] = c;
1911		break;
1912#endif /* INET6 */
1913	}
1914}
1915
1916int
1917pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
1918    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
1919{
1920	unsigned char		 hash[16];
1921	struct pf_pool		*rpool = &r->rpool;
1922	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
1923	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
1924	struct pf_pooladdr	*acur = rpool->cur;
1925	struct pf_src_node	 k;
1926
1927	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
1928	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1929		k.af = af;
1930		PF_ACPY(&k.addr, saddr, af);
1931		if (r->rule_flag & PFRULE_RULESRCTRACK ||
1932		    r->rpool.opts & PF_POOL_STICKYADDR)
1933			k.rule.ptr = r;
1934		else
1935			k.rule.ptr = NULL;
1936		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1937		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1938		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
1939			PF_ACPY(naddr, &(*sn)->raddr, af);
1940			if (pf_status.debug >= PF_DEBUG_MISC) {
1941				printf("pf_map_addr: src tracking maps ");
1942				pf_print_host(&k.addr, 0, af);
1943				printf(" to ");
1944				pf_print_host(naddr, 0, af);
1945				printf("\n");
1946			}
1947			return (0);
1948		}
1949	}
1950
1951	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1952		return (1);
1953	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1954		if (af == AF_INET) {
1955			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
1956			    (rpool->opts & PF_POOL_TYPEMASK) !=
1957			    PF_POOL_ROUNDROBIN)
1958				return (1);
1959			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
1960			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
1961		} else {
1962			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
1963			    (rpool->opts & PF_POOL_TYPEMASK) !=
1964			    PF_POOL_ROUNDROBIN)
1965				return (1);
1966			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
1967			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
1968		}
1969	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1970		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1971			return (1); /* unsupported */
1972	} else {
1973		raddr = &rpool->cur->addr.v.a.addr;
1974		rmask = &rpool->cur->addr.v.a.mask;
1975	}
1976
1977	switch (rpool->opts & PF_POOL_TYPEMASK) {
1978	case PF_POOL_NONE:
1979		PF_ACPY(naddr, raddr, af);
1980		break;
1981	case PF_POOL_BITMASK:
1982		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
1983		break;
1984	case PF_POOL_RANDOM:
1985		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
1986			switch (af) {
1987#ifdef INET
1988			case AF_INET:
1989				rpool->counter.addr32[0] = arc4random();
1990				break;
1991#endif /* INET */
1992#ifdef INET6
1993			case AF_INET6:
1994				if (rmask->addr32[3] != 0xffffffff)
1995					rpool->counter.addr32[3] = arc4random();
1996				else
1997					break;
1998				if (rmask->addr32[2] != 0xffffffff)
1999					rpool->counter.addr32[2] = arc4random();
2000				else
2001					break;
2002				if (rmask->addr32[1] != 0xffffffff)
2003					rpool->counter.addr32[1] = arc4random();
2004				else
2005					break;
2006				if (rmask->addr32[0] != 0xffffffff)
2007					rpool->counter.addr32[0] = arc4random();
2008				break;
2009#endif /* INET6 */
2010			}
2011			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2012			PF_ACPY(init_addr, naddr, af);
2013
2014		} else {
2015			PF_AINC(&rpool->counter, af);
2016			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2017		}
2018		break;
2019	case PF_POOL_SRCHASH:
2020		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2021		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2022		break;
2023	case PF_POOL_ROUNDROBIN:
2024		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2025			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2026			    &rpool->tblidx, &rpool->counter,
2027			    &raddr, &rmask, af))
2028				goto get_addr;
2029		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2030			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2031			    &rpool->tblidx, &rpool->counter,
2032			    &raddr, &rmask, af))
2033				goto get_addr;
2034		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2035			goto get_addr;
2036
2037	try_next:
2038		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2039			rpool->cur = TAILQ_FIRST(&rpool->list);
2040		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2041			rpool->tblidx = -1;
2042			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2043			    &rpool->tblidx, &rpool->counter,
2044			    &raddr, &rmask, af)) {
2045				/* table contains no address of type 'af' */
2046				if (rpool->cur != acur)
2047					goto try_next;
2048				return (1);
2049			}
2050		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2051			rpool->tblidx = -1;
2052			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2053			    &rpool->tblidx, &rpool->counter,
2054			    &raddr, &rmask, af)) {
2055				/* table contains no address of type 'af' */
2056				if (rpool->cur != acur)
2057					goto try_next;
2058				return (1);
2059			}
2060		} else {
2061			raddr = &rpool->cur->addr.v.a.addr;
2062			rmask = &rpool->cur->addr.v.a.mask;
2063			PF_ACPY(&rpool->counter, raddr, af);
2064		}
2065
2066	get_addr:
2067		PF_ACPY(naddr, &rpool->counter, af);
2068		PF_AINC(&rpool->counter, af);
2069		break;
2070	}
2071	if (*sn != NULL)
2072		PF_ACPY(&(*sn)->raddr, naddr, af);
2073
2074	if (pf_status.debug >= PF_DEBUG_MISC &&
2075	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2076		printf("pf_map_addr: selected address ");
2077		pf_print_host(naddr, 0, af);
2078		printf("\n");
2079	}
2080
2081	return (0);
2082}
2083
2084int
2085pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2086    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2087    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2088    struct pf_src_node **sn)
2089{
2090	struct pf_state		key;
2091	struct pf_addr		init_addr;
2092	u_int16_t		cut;
2093
2094	bzero(&init_addr, sizeof(init_addr));
2095	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2096		return (1);
2097
2098	do {
2099		key.af = af;
2100		key.proto = proto;
2101		PF_ACPY(&key.ext.addr, daddr, key.af);
2102		PF_ACPY(&key.gwy.addr, naddr, key.af);
2103		key.ext.port = dport;
2104
2105		/*
2106		 * port search; start random, step;
2107		 * similar 2 portloop in in_pcbbind
2108		 */
2109		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
2110			key.gwy.port = 0;
2111			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2112				return (0);
2113		} else if (low == 0 && high == 0) {
2114			key.gwy.port = *nport;
2115			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2116				return (0);
2117		} else if (low == high) {
2118			key.gwy.port = htons(low);
2119			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2120				*nport = htons(low);
2121				return (0);
2122			}
2123		} else {
2124			u_int16_t tmp;
2125
2126			if (low > high) {
2127				tmp = low;
2128				low = high;
2129				high = tmp;
2130			}
2131			/* low < high */
2132			cut = arc4random() % (1 + high - low) + low;
2133			/* low <= cut <= high */
2134			for (tmp = cut; tmp <= high; ++(tmp)) {
2135				key.gwy.port = htons(tmp);
2136				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2137				    NULL) {
2138					*nport = htons(tmp);
2139					return (0);
2140				}
2141			}
2142			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2143				key.gwy.port = htons(tmp);
2144				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2145				    NULL) {
2146					*nport = htons(tmp);
2147					return (0);
2148				}
2149			}
2150		}
2151
2152		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2153		case PF_POOL_RANDOM:
2154		case PF_POOL_ROUNDROBIN:
2155			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2156				return (1);
2157			break;
2158		case PF_POOL_NONE:
2159		case PF_POOL_SRCHASH:
2160		case PF_POOL_BITMASK:
2161		default:
2162			return (1);
2163		}
2164	} while (! PF_AEQ(&init_addr, naddr, af) );
2165
2166	return (1);					/* none available */
2167}
2168
2169struct pf_rule *
2170pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2171    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2172    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2173{
2174	struct pf_rule		*r, *rm = NULL, *anchorrule = NULL;
2175	struct pf_ruleset	*ruleset = NULL;
2176
2177	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2178	while (r && rm == NULL) {
2179		struct pf_rule_addr	*src = NULL, *dst = NULL;
2180		struct pf_addr_wrap	*xdst = NULL;
2181
2182		if (r->action == PF_BINAT && direction == PF_IN) {
2183			src = &r->dst;
2184			if (r->rpool.cur != NULL)
2185				xdst = &r->rpool.cur->addr;
2186		} else {
2187			src = &r->src;
2188			dst = &r->dst;
2189		}
2190
2191		r->evaluations++;
2192		if (r->kif != NULL &&
2193		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2194			r = r->skip[PF_SKIP_IFP].ptr;
2195		else if (r->direction && r->direction != direction)
2196			r = r->skip[PF_SKIP_DIR].ptr;
2197		else if (r->af && r->af != pd->af)
2198			r = r->skip[PF_SKIP_AF].ptr;
2199		else if (r->proto && r->proto != pd->proto)
2200			r = r->skip[PF_SKIP_PROTO].ptr;
2201		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
2202			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2203			    PF_SKIP_DST_ADDR].ptr;
2204		else if (src->port_op && !pf_match_port(src->port_op,
2205		    src->port[0], src->port[1], sport))
2206			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2207			    PF_SKIP_DST_PORT].ptr;
2208		else if (dst != NULL &&
2209		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
2210			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2211		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2212			r = TAILQ_NEXT(r, entries);
2213		else if (dst != NULL && dst->port_op &&
2214		    !pf_match_port(dst->port_op, dst->port[0],
2215		    dst->port[1], dport))
2216			r = r->skip[PF_SKIP_DST_PORT].ptr;
2217		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2218		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2219		    off, pd->hdr.tcp), r->os_fingerprint)))
2220			r = TAILQ_NEXT(r, entries);
2221		else if (r->anchorname[0] && r->anchor == NULL)
2222			r = TAILQ_NEXT(r, entries);
2223		else if (r->anchor == NULL)
2224				rm = r;
2225		else
2226			PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2227		if (r == NULL && anchorrule != NULL)
2228			PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2229			    rs_num);
2230	}
2231	if (rm != NULL && (rm->action == PF_NONAT ||
2232	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2233		return (NULL);
2234	return (rm);
2235}
2236
2237struct pf_rule *
2238pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2239    struct pfi_kif *kif, struct pf_src_node **sn,
2240    struct pf_addr *saddr, u_int16_t sport,
2241    struct pf_addr *daddr, u_int16_t dport,
2242    struct pf_addr *naddr, u_int16_t *nport)
2243{
2244	struct pf_rule	*r = NULL;
2245
2246	if (direction == PF_OUT) {
2247		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2248		    sport, daddr, dport, PF_RULESET_BINAT);
2249		if (r == NULL)
2250			r = pf_match_translation(pd, m, off, direction, kif,
2251			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2252	} else {
2253		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2254		    sport, daddr, dport, PF_RULESET_RDR);
2255		if (r == NULL)
2256			r = pf_match_translation(pd, m, off, direction, kif,
2257			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2258	}
2259
2260	if (r != NULL) {
2261		switch (r->action) {
2262		case PF_NONAT:
2263		case PF_NOBINAT:
2264		case PF_NORDR:
2265			return (NULL);
2266		case PF_NAT:
2267			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2268			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2269			    r->rpool.proxy_port[1], sn)) {
2270				DPFPRINTF(PF_DEBUG_MISC,
2271				    ("pf: NAT proxy port allocation "
2272				    "(%u-%u) failed\n",
2273				    r->rpool.proxy_port[0],
2274				    r->rpool.proxy_port[1]));
2275				return (NULL);
2276			}
2277			break;
2278		case PF_BINAT:
2279			switch (direction) {
2280			case PF_OUT:
2281				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2282					if (pd->af == AF_INET) {
2283						if (r->rpool.cur->addr.p.dyn->
2284						    pfid_acnt4 < 1)
2285							return (NULL);
2286						PF_POOLMASK(naddr,
2287						    &r->rpool.cur->addr.p.dyn->
2288						    pfid_addr4,
2289						    &r->rpool.cur->addr.p.dyn->
2290						    pfid_mask4,
2291						    saddr, AF_INET);
2292					} else {
2293						if (r->rpool.cur->addr.p.dyn->
2294						    pfid_acnt6 < 1)
2295							return (NULL);
2296						PF_POOLMASK(naddr,
2297						    &r->rpool.cur->addr.p.dyn->
2298						    pfid_addr6,
2299						    &r->rpool.cur->addr.p.dyn->
2300						    pfid_mask6,
2301						    saddr, AF_INET6);
2302					}
2303				} else
2304					PF_POOLMASK(naddr,
2305					    &r->rpool.cur->addr.v.a.addr,
2306					    &r->rpool.cur->addr.v.a.mask,
2307					    saddr, pd->af);
2308				break;
2309			case PF_IN:
2310				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2311					if (pd->af == AF_INET) {
2312						if (r->src.addr.p.dyn->
2313						    pfid_acnt4 < 1)
2314							return (NULL);
2315						PF_POOLMASK(naddr,
2316						    &r->src.addr.p.dyn->
2317						    pfid_addr4,
2318						    &r->src.addr.p.dyn->
2319						    pfid_mask4,
2320						    daddr, AF_INET);
2321					} else {
2322						if (r->src.addr.p.dyn->
2323						    pfid_acnt6 < 1)
2324							return (NULL);
2325						PF_POOLMASK(naddr,
2326						    &r->src.addr.p.dyn->
2327						    pfid_addr6,
2328						    &r->src.addr.p.dyn->
2329						    pfid_mask6,
2330						    daddr, AF_INET6);
2331					}
2332				} else
2333					PF_POOLMASK(naddr,
2334					    &r->src.addr.v.a.addr,
2335					    &r->src.addr.v.a.mask, daddr,
2336					    pd->af);
2337				break;
2338			}
2339			break;
2340		case PF_RDR: {
2341			if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn))
2342				return (NULL);
2343
2344			if (r->rpool.proxy_port[1]) {
2345				u_int32_t	tmp_nport;
2346
2347				tmp_nport = ((ntohs(dport) -
2348				    ntohs(r->dst.port[0])) %
2349				    (r->rpool.proxy_port[1] -
2350				    r->rpool.proxy_port[0] + 1)) +
2351				    r->rpool.proxy_port[0];
2352
2353				/* wrap around if necessary */
2354				if (tmp_nport > 65535)
2355					tmp_nport -= 65535;
2356				*nport = htons((u_int16_t)tmp_nport);
2357			} else if (r->rpool.proxy_port[0])
2358				*nport = htons(r->rpool.proxy_port[0]);
2359			break;
2360		}
2361		default:
2362			return (NULL);
2363		}
2364	}
2365
2366	return (r);
2367}
2368
2369int
2370pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
2371{
2372	struct pf_addr		*saddr, *daddr;
2373	u_int16_t		 sport, dport;
2374#ifdef __FreeBSD__
2375	struct inpcbinfo	*pi;
2376#else
2377	struct inpcbtable	*tb;
2378#endif
2379	struct inpcb		*inp;
2380
2381	*uid = UID_MAX;
2382	*gid = GID_MAX;
2383	switch (pd->proto) {
2384	case IPPROTO_TCP:
2385		sport = pd->hdr.tcp->th_sport;
2386		dport = pd->hdr.tcp->th_dport;
2387#ifdef __FreeBSD__
2388		pi = &tcbinfo;
2389#else
2390		tb = &tcbtable;
2391#endif
2392		break;
2393	case IPPROTO_UDP:
2394		sport = pd->hdr.udp->uh_sport;
2395		dport = pd->hdr.udp->uh_dport;
2396#ifdef __FreeBSD__
2397		pi = &udbinfo;
2398#else
2399		tb = &udbtable;
2400#endif
2401		break;
2402	default:
2403		return (0);
2404	}
2405	if (direction == PF_IN) {
2406		saddr = pd->src;
2407		daddr = pd->dst;
2408	} else {
2409		u_int16_t	p;
2410
2411		p = sport;
2412		sport = dport;
2413		dport = p;
2414		saddr = pd->dst;
2415		daddr = pd->src;
2416	}
2417	switch (pd->af) {
2418	case AF_INET:
2419#ifdef __FreeBSD__
2420		INP_INFO_RLOCK(pi);	/* XXX LOR */
2421		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2422			dport, 0, NULL);
2423		if (inp == NULL) {
2424			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2425			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2426			if(inp == NULL) {
2427				INP_INFO_RUNLOCK(pi);
2428				return (0);
2429			}
2430		}
2431#else
2432		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2433		if (inp == NULL) {
2434			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2435			if (inp == NULL)
2436				return (0);
2437		}
2438#endif
2439		break;
2440#ifdef INET6
2441	case AF_INET6:
2442#ifdef __FreeBSD__
2443		INP_INFO_RLOCK(pi);
2444		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2445			&daddr->v6, dport, 0, NULL);
2446		if (inp == NULL) {
2447			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2448			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2449			if (inp == NULL) {
2450				INP_INFO_RUNLOCK(pi);
2451				return (0);
2452			}
2453		}
2454#else
2455		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2456		    dport);
2457		if (inp == NULL) {
2458			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2459			if (inp == NULL)
2460				return (0);
2461		}
2462#endif
2463		break;
2464#endif /* INET6 */
2465
2466	default:
2467		return (0);
2468	}
2469#ifdef __FreeBSD__
2470	INP_LOCK(inp);
2471	*uid = inp->inp_socket->so_cred->cr_uid;
2472	*gid = inp->inp_socket->so_cred->cr_groups[0];
2473	INP_UNLOCK(inp);
2474	INP_INFO_RUNLOCK(pi);
2475#else
2476	*uid = inp->inp_socket->so_euid;
2477	*gid = inp->inp_socket->so_egid;
2478#endif
2479	return (1);
2480}
2481
2482u_int8_t
2483pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2484{
2485	int		 hlen;
2486	u_int8_t	 hdr[60];
2487	u_int8_t	*opt, optlen;
2488	u_int8_t	 wscale = 0;
2489
2490	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2491	if (hlen <= sizeof(struct tcphdr))
2492		return (0);
2493	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2494		return (0);
2495	opt = hdr + sizeof(struct tcphdr);
2496	hlen -= sizeof(struct tcphdr);
2497	while (hlen >= 3) {
2498		switch (*opt) {
2499		case TCPOPT_EOL:
2500		case TCPOPT_NOP:
2501			++opt;
2502			--hlen;
2503			break;
2504		case TCPOPT_WINDOW:
2505			wscale = opt[2];
2506			if (wscale > TCP_MAX_WINSHIFT)
2507				wscale = TCP_MAX_WINSHIFT;
2508			wscale |= PF_WSCALE_FLAG;
2509			/* FALLTHROUGH */
2510		default:
2511			optlen = opt[1];
2512			if (optlen < 2)
2513				optlen = 2;
2514			hlen -= optlen;
2515			opt += optlen;
2516			break;
2517		}
2518	}
2519	return (wscale);
2520}
2521
2522u_int16_t
2523pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2524{
2525	int		 hlen;
2526	u_int8_t	 hdr[60];
2527	u_int8_t	*opt, optlen;
2528	u_int16_t	 mss = tcp_mssdflt;
2529
2530	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2531	if (hlen <= sizeof(struct tcphdr))
2532		return (0);
2533	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2534		return (0);
2535	opt = hdr + sizeof(struct tcphdr);
2536	hlen -= sizeof(struct tcphdr);
2537	while (hlen >= TCPOLEN_MAXSEG) {
2538		switch (*opt) {
2539		case TCPOPT_EOL:
2540		case TCPOPT_NOP:
2541			++opt;
2542			--hlen;
2543			break;
2544		case TCPOPT_MAXSEG:
2545			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2546			/* FALLTHROUGH */
2547		default:
2548			optlen = opt[1];
2549			if (optlen < 2)
2550				optlen = 2;
2551			hlen -= optlen;
2552			opt += optlen;
2553			break;
2554		}
2555	}
2556	return (mss);
2557}
2558
2559u_int16_t
2560pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2561{
2562#ifdef INET
2563	struct sockaddr_in	*dst;
2564	struct route		 ro;
2565#endif /* INET */
2566#ifdef INET6
2567	struct sockaddr_in6	*dst6;
2568	struct route_in6	 ro6;
2569#endif /* INET6 */
2570	struct rtentry		*rt = NULL;
2571	int			 hlen = 0;	/* make the compiler happy */
2572	u_int16_t		 mss = tcp_mssdflt;
2573
2574	switch (af) {
2575#ifdef INET
2576	case AF_INET:
2577		hlen = sizeof(struct ip);
2578		bzero(&ro, sizeof(ro));
2579		dst = (struct sockaddr_in *)&ro.ro_dst;
2580		dst->sin_family = AF_INET;
2581		dst->sin_len = sizeof(*dst);
2582		dst->sin_addr = addr->v4;
2583#ifdef __FreeBSD__
2584#ifdef RTF_PRCLONING
2585		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2586#else /* !RTF_PRCLONING */
2587		rtalloc_ign(&ro, RTF_CLONING);
2588#endif
2589#else /* ! __FreeBSD__ */
2590		rtalloc_noclone(&ro, NO_CLONING);
2591#endif
2592		rt = ro.ro_rt;
2593		break;
2594#endif /* INET */
2595#ifdef INET6
2596	case AF_INET6:
2597		hlen = sizeof(struct ip6_hdr);
2598		bzero(&ro6, sizeof(ro6));
2599		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2600		dst6->sin6_family = AF_INET6;
2601		dst6->sin6_len = sizeof(*dst6);
2602		dst6->sin6_addr = addr->v6;
2603#ifdef __FreeBSD__
2604#ifdef RTF_PRCLONING
2605		rtalloc_ign((struct route *)&ro6,
2606		    (RTF_CLONING | RTF_PRCLONING));
2607#else /* !RTF_PRCLONING */
2608		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2609#endif
2610#else /* ! __FreeBSD__ */
2611		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2612#endif
2613		rt = ro6.ro_rt;
2614		break;
2615#endif /* INET6 */
2616	}
2617
2618	if (rt && rt->rt_ifp) {
2619		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2620		mss = max(tcp_mssdflt, mss);
2621		RTFREE(rt);
2622	}
2623	mss = min(mss, offer);
2624	mss = max(mss, 64);		/* sanity - at least max opt space */
2625	return (mss);
2626}
2627
2628void
2629pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2630{
2631	struct pf_rule *r = s->rule.ptr;
2632
2633	s->rt_kif = NULL;
2634	if (!r->rt || r->rt == PF_FASTROUTE)
2635		return;
2636	switch (s->af) {
2637#ifdef INET
2638	case AF_INET:
2639		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2640		    &s->nat_src_node);
2641		s->rt_kif = r->rpool.cur->kif;
2642		break;
2643#endif /* INET */
2644#ifdef INET6
2645	case AF_INET6:
2646		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2647		    &s->nat_src_node);
2648		s->rt_kif = r->rpool.cur->kif;
2649		break;
2650#endif /* INET6 */
2651	}
2652}
2653
2654int
2655pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2656    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2657    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2658{
2659	struct pf_rule		*nr = NULL;
2660	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2661	struct tcphdr		*th = pd->hdr.tcp;
2662	u_int16_t		 bport, nport = 0;
2663	sa_family_t		 af = pd->af;
2664	int			 lookup = -1;
2665	uid_t			 uid;
2666	gid_t			 gid;
2667	struct pf_rule		*r, *a = NULL;
2668	struct pf_ruleset	*ruleset = NULL;
2669	struct pf_src_node	*nsn = NULL;
2670	u_short			 reason;
2671	int			 rewrite = 0;
2672	struct pf_tag		*pftag = NULL;
2673	int			 tag = -1;
2674	u_int16_t		 mss = tcp_mssdflt;
2675
2676	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2677
2678	if (direction == PF_OUT) {
2679		bport = nport = th->th_sport;
2680		/* check outgoing packet for BINAT/NAT */
2681		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2682		    saddr, th->th_sport, daddr, th->th_dport,
2683		    &pd->naddr, &nport)) != NULL) {
2684			PF_ACPY(&pd->baddr, saddr, af);
2685			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2686			    &th->th_sum, &pd->naddr, nport, 0, af);
2687			rewrite++;
2688			if (nr->natpass)
2689				r = NULL;
2690			pd->nat_rule = nr;
2691		}
2692	} else {
2693		bport = nport = th->th_dport;
2694		/* check incoming packet for BINAT/RDR */
2695		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2696		    saddr, th->th_sport, daddr, th->th_dport,
2697		    &pd->naddr, &nport)) != NULL) {
2698			PF_ACPY(&pd->baddr, daddr, af);
2699			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2700			    &th->th_sum, &pd->naddr, nport, 0, af);
2701			rewrite++;
2702			if (nr->natpass)
2703				r = NULL;
2704			pd->nat_rule = nr;
2705		}
2706	}
2707
2708	while (r != NULL) {
2709		r->evaluations++;
2710		if (r->kif != NULL &&
2711		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2712			r = r->skip[PF_SKIP_IFP].ptr;
2713		else if (r->direction && r->direction != direction)
2714			r = r->skip[PF_SKIP_DIR].ptr;
2715		else if (r->af && r->af != af)
2716			r = r->skip[PF_SKIP_AF].ptr;
2717		else if (r->proto && r->proto != IPPROTO_TCP)
2718			r = r->skip[PF_SKIP_PROTO].ptr;
2719		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2720			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2721		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2722		    r->src.port[0], r->src.port[1], th->th_sport))
2723			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2724		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2725			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2726		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2727		    r->dst.port[0], r->dst.port[1], th->th_dport))
2728			r = r->skip[PF_SKIP_DST_PORT].ptr;
2729		else if (r->tos && !(r->tos & pd->tos))
2730			r = TAILQ_NEXT(r, entries);
2731		else if (r->rule_flag & PFRULE_FRAGMENT)
2732			r = TAILQ_NEXT(r, entries);
2733		else if ((r->flagset & th->th_flags) != r->flags)
2734			r = TAILQ_NEXT(r, entries);
2735		else if (r->uid.op && (lookup != -1 || (lookup =
2736		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2737		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2738		    uid))
2739			r = TAILQ_NEXT(r, entries);
2740		else if (r->gid.op && (lookup != -1 || (lookup =
2741		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2742		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2743		    gid))
2744			r = TAILQ_NEXT(r, entries);
2745		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
2746			r = TAILQ_NEXT(r, entries);
2747		else if (r->anchorname[0] && r->anchor == NULL)
2748			r = TAILQ_NEXT(r, entries);
2749		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2750		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2751			r = TAILQ_NEXT(r, entries);
2752		else {
2753			if (r->tag)
2754				tag = r->tag;
2755			if (r->anchor == NULL) {
2756				*rm = r;
2757				*am = a;
2758				*rsm = ruleset;
2759				if ((*rm)->quick)
2760					break;
2761				r = TAILQ_NEXT(r, entries);
2762			} else
2763				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2764				    PF_RULESET_FILTER);
2765		}
2766		if (r == NULL && a != NULL)
2767			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2768			    PF_RULESET_FILTER);
2769	}
2770	r = *rm;
2771	a = *am;
2772	ruleset = *rsm;
2773
2774	REASON_SET(&reason, PFRES_MATCH);
2775
2776	if (r->log) {
2777		if (rewrite)
2778			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2779		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
2780	}
2781
2782	if ((r->action == PF_DROP) &&
2783	    ((r->rule_flag & PFRULE_RETURNRST) ||
2784	    (r->rule_flag & PFRULE_RETURNICMP) ||
2785	    (r->rule_flag & PFRULE_RETURN))) {
2786		/* undo NAT changes, if they have taken place */
2787		if (nr != NULL) {
2788			if (direction == PF_OUT) {
2789				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2790				    &th->th_sum, &pd->baddr, bport, 0, af);
2791				rewrite++;
2792			} else {
2793				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2794				    &th->th_sum, &pd->baddr, bport, 0, af);
2795				rewrite++;
2796			}
2797		}
2798		if (((r->rule_flag & PFRULE_RETURNRST) ||
2799		    (r->rule_flag & PFRULE_RETURN)) &&
2800		    !(th->th_flags & TH_RST)) {
2801			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2802
2803			if (th->th_flags & TH_SYN)
2804				ack++;
2805			if (th->th_flags & TH_FIN)
2806				ack++;
2807			pf_send_tcp(r, af, pd->dst,
2808			    pd->src, th->th_dport, th->th_sport,
2809			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2810			    r->return_ttl);
2811		} else if ((af == AF_INET) && r->return_icmp)
2812			pf_send_icmp(m, r->return_icmp >> 8,
2813			    r->return_icmp & 255, af, r);
2814		else if ((af == AF_INET6) && r->return_icmp6)
2815			pf_send_icmp(m, r->return_icmp6 >> 8,
2816			    r->return_icmp6 & 255, af, r);
2817	}
2818
2819	if (r->action == PF_DROP)
2820		return (PF_DROP);
2821
2822	if (pf_tag_packet(m, pftag, tag)) {
2823		REASON_SET(&reason, PFRES_MEMORY);
2824		return (PF_DROP);
2825	}
2826
2827	if (r->keep_state || nr != NULL ||
2828	    (pd->flags & PFDESC_TCP_NORM)) {
2829		/* create new state */
2830		u_int16_t	 len;
2831		struct pf_state	*s = NULL;
2832		struct pf_src_node *sn = NULL;
2833
2834		len = pd->tot_len - off - (th->th_off << 2);
2835
2836		/* check maximums */
2837		if (r->max_states && (r->states >= r->max_states))
2838			goto cleanup;
2839		/* src node for flter rule */
2840		if ((r->rule_flag & PFRULE_SRCTRACK ||
2841		    r->rpool.opts & PF_POOL_STICKYADDR) &&
2842		    pf_insert_src_node(&sn, r, saddr, af) != 0)
2843			goto cleanup;
2844		/* src node for translation rule */
2845		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
2846		    ((direction == PF_OUT &&
2847		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
2848		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
2849			goto cleanup;
2850		s = pool_get(&pf_state_pl, PR_NOWAIT);
2851		if (s == NULL) {
2852cleanup:
2853			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
2854				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
2855				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2856				pf_status.src_nodes--;
2857				pool_put(&pf_src_tree_pl, sn);
2858			}
2859			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
2860			    nsn->expire == 0) {
2861				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
2862				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2863				pf_status.src_nodes--;
2864				pool_put(&pf_src_tree_pl, nsn);
2865			}
2866			REASON_SET(&reason, PFRES_MEMORY);
2867			return (PF_DROP);
2868		}
2869		bzero(s, sizeof(*s));
2870		r->states++;
2871		if (a != NULL)
2872			a->states++;
2873		s->rule.ptr = r;
2874		s->nat_rule.ptr = nr;
2875		if (s->nat_rule.ptr != NULL)
2876			s->nat_rule.ptr->states++;
2877		s->anchor.ptr = a;
2878		s->allow_opts = r->allow_opts;
2879		s->log = r->log & 2;
2880		s->proto = IPPROTO_TCP;
2881		s->direction = direction;
2882		s->af = af;
2883		if (direction == PF_OUT) {
2884			PF_ACPY(&s->gwy.addr, saddr, af);
2885			s->gwy.port = th->th_sport;		/* sport */
2886			PF_ACPY(&s->ext.addr, daddr, af);
2887			s->ext.port = th->th_dport;
2888			if (nr != NULL) {
2889				PF_ACPY(&s->lan.addr, &pd->baddr, af);
2890				s->lan.port = bport;
2891			} else {
2892				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2893				s->lan.port = s->gwy.port;
2894			}
2895		} else {
2896			PF_ACPY(&s->lan.addr, daddr, af);
2897			s->lan.port = th->th_dport;
2898			PF_ACPY(&s->ext.addr, saddr, af);
2899			s->ext.port = th->th_sport;
2900			if (nr != NULL) {
2901				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
2902				s->gwy.port = bport;
2903			} else {
2904				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2905				s->gwy.port = s->lan.port;
2906			}
2907		}
2908
2909		s->src.seqlo = ntohl(th->th_seq);
2910		s->src.seqhi = s->src.seqlo + len + 1;
2911		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2912		    r->keep_state == PF_STATE_MODULATE) {
2913			/* Generate sequence number modulator */
2914			while ((s->src.seqdiff = arc4random()) == 0)
2915				;
2916			pf_change_a(&th->th_seq, &th->th_sum,
2917			    htonl(s->src.seqlo + s->src.seqdiff), 0);
2918			rewrite = 1;
2919		} else
2920			s->src.seqdiff = 0;
2921		if (th->th_flags & TH_SYN) {
2922			s->src.seqhi++;
2923			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2924		}
2925		s->src.max_win = MAX(ntohs(th->th_win), 1);
2926		if (s->src.wscale & PF_WSCALE_MASK) {
2927			/* Remove scale factor from initial window */
2928			int win = s->src.max_win;
2929			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2930			s->src.max_win = (win - 1) >>
2931			    (s->src.wscale & PF_WSCALE_MASK);
2932		}
2933		if (th->th_flags & TH_FIN)
2934			s->src.seqhi++;
2935		s->dst.seqhi = 1;
2936		s->dst.max_win = 1;
2937		s->src.state = TCPS_SYN_SENT;
2938		s->dst.state = TCPS_CLOSED;
2939#ifdef __FreeBSD__
2940		s->creation = time_second;
2941		s->expire = time_second;
2942#else
2943		s->creation = time.tv_sec;
2944		s->expire = time.tv_sec;
2945#endif
2946		s->timeout = PFTM_TCP_FIRST_PACKET;
2947		pf_set_rt_ifp(s, saddr);
2948		if (sn != NULL) {
2949			s->src_node = sn;
2950			s->src_node->states++;
2951		}
2952		if (nsn != NULL) {
2953			PF_ACPY(&nsn->raddr, &pd->naddr, af);
2954			s->nat_src_node = nsn;
2955			s->nat_src_node->states++;
2956		}
2957		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
2958		    off, pd, th, &s->src, &s->dst)) {
2959			REASON_SET(&reason, PFRES_MEMORY);
2960			pf_src_tree_remove_state(s);
2961			pool_put(&pf_state_pl, s);
2962			return (PF_DROP);
2963		}
2964		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
2965		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
2966		    &s->dst, &rewrite)) {
2967			pf_normalize_tcp_cleanup(s);
2968			pf_src_tree_remove_state(s);
2969			pool_put(&pf_state_pl, s);
2970			return (PF_DROP);
2971		}
2972		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
2973			pf_normalize_tcp_cleanup(s);
2974			REASON_SET(&reason, PFRES_MEMORY);
2975			pf_src_tree_remove_state(s);
2976			pool_put(&pf_state_pl, s);
2977			return (PF_DROP);
2978		} else
2979			*sm = s;
2980		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2981		    r->keep_state == PF_STATE_SYNPROXY) {
2982			s->src.state = PF_TCPS_PROXY_SRC;
2983			if (nr != NULL) {
2984				if (direction == PF_OUT) {
2985					pf_change_ap(saddr, &th->th_sport,
2986					    pd->ip_sum, &th->th_sum, &pd->baddr,
2987					    bport, 0, af);
2988				} else {
2989					pf_change_ap(daddr, &th->th_dport,
2990					    pd->ip_sum, &th->th_sum, &pd->baddr,
2991					    bport, 0, af);
2992				}
2993			}
2994			s->src.seqhi = arc4random();
2995			/* Find mss option */
2996			mss = pf_get_mss(m, off, th->th_off, af);
2997			mss = pf_calc_mss(saddr, af, mss);
2998			mss = pf_calc_mss(daddr, af, mss);
2999			s->src.mss = mss;
3000			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3001			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3002			    TH_SYN|TH_ACK, 0, s->src.mss, 0);
3003			return (PF_SYNPROXY_DROP);
3004		}
3005	}
3006
3007	/* copy back packet headers if we performed NAT operations */
3008	if (rewrite)
3009		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3010
3011	return (PF_PASS);
3012}
3013
3014int
3015pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3016    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3017    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3018{
3019	struct pf_rule		*nr = NULL;
3020	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3021	struct udphdr		*uh = pd->hdr.udp;
3022	u_int16_t		 bport, nport = 0;
3023	sa_family_t		 af = pd->af;
3024	int			 lookup = -1;
3025	uid_t			 uid;
3026	gid_t			 gid;
3027	struct pf_rule		*r, *a = NULL;
3028	struct pf_ruleset	*ruleset = NULL;
3029	struct pf_src_node	*nsn = NULL;
3030	u_short			 reason;
3031	int			 rewrite = 0;
3032	struct pf_tag		*pftag = NULL;
3033	int			 tag = -1;
3034
3035	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3036
3037	if (direction == PF_OUT) {
3038		bport = nport = uh->uh_sport;
3039		/* check outgoing packet for BINAT/NAT */
3040		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3041		    saddr, uh->uh_sport, daddr, uh->uh_dport,
3042		    &pd->naddr, &nport)) != NULL) {
3043			PF_ACPY(&pd->baddr, saddr, af);
3044			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3045			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3046			rewrite++;
3047			if (nr->natpass)
3048				r = NULL;
3049			pd->nat_rule = nr;
3050		}
3051	} else {
3052		bport = nport = uh->uh_dport;
3053		/* check incoming packet for BINAT/RDR */
3054		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3055		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3056		    &nport)) != NULL) {
3057			PF_ACPY(&pd->baddr, daddr, af);
3058			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3059			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3060			rewrite++;
3061			if (nr->natpass)
3062				r = NULL;
3063			pd->nat_rule = nr;
3064		}
3065	}
3066
3067	while (r != NULL) {
3068		r->evaluations++;
3069		if (r->kif != NULL &&
3070		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3071			r = r->skip[PF_SKIP_IFP].ptr;
3072		else if (r->direction && r->direction != direction)
3073			r = r->skip[PF_SKIP_DIR].ptr;
3074		else if (r->af && r->af != af)
3075			r = r->skip[PF_SKIP_AF].ptr;
3076		else if (r->proto && r->proto != IPPROTO_UDP)
3077			r = r->skip[PF_SKIP_PROTO].ptr;
3078		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3079			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3080		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3081		    r->src.port[0], r->src.port[1], uh->uh_sport))
3082			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3083		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3084			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3085		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3086		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
3087			r = r->skip[PF_SKIP_DST_PORT].ptr;
3088		else if (r->tos && !(r->tos & pd->tos))
3089			r = TAILQ_NEXT(r, entries);
3090		else if (r->rule_flag & PFRULE_FRAGMENT)
3091			r = TAILQ_NEXT(r, entries);
3092		else if (r->uid.op && (lookup != -1 || (lookup =
3093		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3094		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3095		    uid))
3096			r = TAILQ_NEXT(r, entries);
3097		else if (r->gid.op && (lookup != -1 || (lookup =
3098		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3099		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3100		    gid))
3101			r = TAILQ_NEXT(r, entries);
3102		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3103			r = TAILQ_NEXT(r, entries);
3104		else if (r->anchorname[0] && r->anchor == NULL)
3105			r = TAILQ_NEXT(r, entries);
3106		else if (r->os_fingerprint != PF_OSFP_ANY)
3107			r = TAILQ_NEXT(r, entries);
3108		else {
3109			if (r->tag)
3110				tag = r->tag;
3111			if (r->anchor == NULL) {
3112				*rm = r;
3113				*am = a;
3114				*rsm = ruleset;
3115				if ((*rm)->quick)
3116					break;
3117				r = TAILQ_NEXT(r, entries);
3118			} else
3119				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3120				    PF_RULESET_FILTER);
3121		}
3122		if (r == NULL && a != NULL)
3123			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3124			    PF_RULESET_FILTER);
3125	}
3126	r = *rm;
3127	a = *am;
3128	ruleset = *rsm;
3129
3130	REASON_SET(&reason, PFRES_MATCH);
3131
3132	if (r->log) {
3133		if (rewrite)
3134			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3135		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3136	}
3137
3138	if ((r->action == PF_DROP) &&
3139	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3140	    (r->rule_flag & PFRULE_RETURN))) {
3141		/* undo NAT changes, if they have taken place */
3142		if (nr != NULL) {
3143			if (direction == PF_OUT) {
3144				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3145				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3146				rewrite++;
3147			} else {
3148				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3149				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3150				rewrite++;
3151			}
3152		}
3153		if ((af == AF_INET) && r->return_icmp)
3154			pf_send_icmp(m, r->return_icmp >> 8,
3155			    r->return_icmp & 255, af, r);
3156		else if ((af == AF_INET6) && r->return_icmp6)
3157			pf_send_icmp(m, r->return_icmp6 >> 8,
3158			    r->return_icmp6 & 255, af, r);
3159	}
3160
3161	if (r->action == PF_DROP)
3162		return (PF_DROP);
3163
3164	if (pf_tag_packet(m, pftag, tag)) {
3165		REASON_SET(&reason, PFRES_MEMORY);
3166		return (PF_DROP);
3167	}
3168
3169	if (r->keep_state || nr != NULL) {
3170		/* create new state */
3171		struct pf_state	*s = NULL;
3172		struct pf_src_node *sn = NULL;
3173
3174		/* check maximums */
3175		if (r->max_states && (r->states >= r->max_states))
3176			goto cleanup;
3177		/* src node for flter rule */
3178		if ((r->rule_flag & PFRULE_SRCTRACK ||
3179		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3180		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3181			goto cleanup;
3182		/* src node for translation rule */
3183		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3184		    ((direction == PF_OUT &&
3185		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3186		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3187			goto cleanup;
3188		s = pool_get(&pf_state_pl, PR_NOWAIT);
3189		if (s == NULL) {
3190cleanup:
3191			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3192				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3193				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3194				pf_status.src_nodes--;
3195				pool_put(&pf_src_tree_pl, sn);
3196			}
3197			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3198			    nsn->expire == 0) {
3199				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3200				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3201				pf_status.src_nodes--;
3202				pool_put(&pf_src_tree_pl, nsn);
3203			}
3204			REASON_SET(&reason, PFRES_MEMORY);
3205			return (PF_DROP);
3206		}
3207		bzero(s, sizeof(*s));
3208		r->states++;
3209		if (a != NULL)
3210			a->states++;
3211		s->rule.ptr = r;
3212		s->nat_rule.ptr = nr;
3213		if (s->nat_rule.ptr != NULL)
3214			s->nat_rule.ptr->states++;
3215		s->anchor.ptr = a;
3216		s->allow_opts = r->allow_opts;
3217		s->log = r->log & 2;
3218		s->proto = IPPROTO_UDP;
3219		s->direction = direction;
3220		s->af = af;
3221		if (direction == PF_OUT) {
3222			PF_ACPY(&s->gwy.addr, saddr, af);
3223			s->gwy.port = uh->uh_sport;
3224			PF_ACPY(&s->ext.addr, daddr, af);
3225			s->ext.port = uh->uh_dport;
3226			if (nr != NULL) {
3227				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3228				s->lan.port = bport;
3229			} else {
3230				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3231				s->lan.port = s->gwy.port;
3232			}
3233		} else {
3234			PF_ACPY(&s->lan.addr, daddr, af);
3235			s->lan.port = uh->uh_dport;
3236			PF_ACPY(&s->ext.addr, saddr, af);
3237			s->ext.port = uh->uh_sport;
3238			if (nr != NULL) {
3239				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3240				s->gwy.port = bport;
3241			} else {
3242				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3243				s->gwy.port = s->lan.port;
3244			}
3245		}
3246		s->src.state = PFUDPS_SINGLE;
3247		s->dst.state = PFUDPS_NO_TRAFFIC;
3248#ifdef __FreeBSD__
3249		s->creation = time_second;
3250		s->expire = time_second;
3251#else
3252		s->creation = time.tv_sec;
3253		s->expire = time.tv_sec;
3254#endif
3255		s->timeout = PFTM_UDP_FIRST_PACKET;
3256		pf_set_rt_ifp(s, saddr);
3257		if (sn != NULL) {
3258			s->src_node = sn;
3259			s->src_node->states++;
3260		}
3261		if (nsn != NULL) {
3262			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3263			s->nat_src_node = nsn;
3264			s->nat_src_node->states++;
3265		}
3266		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3267			REASON_SET(&reason, PFRES_MEMORY);
3268			pf_src_tree_remove_state(s);
3269			pool_put(&pf_state_pl, s);
3270			return (PF_DROP);
3271		} else
3272			*sm = s;
3273	}
3274
3275	/* copy back packet headers if we performed NAT operations */
3276	if (rewrite)
3277		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3278
3279	return (PF_PASS);
3280}
3281
3282int
3283pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3284    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3285    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3286{
3287	struct pf_rule		*nr = NULL;
3288	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3289	struct pf_rule		*r, *a = NULL;
3290	struct pf_ruleset	*ruleset = NULL;
3291	struct pf_src_node	*nsn = NULL;
3292	u_short			 reason;
3293	u_int16_t		 icmpid = 0;	/* make the compiler happy */
3294	sa_family_t		 af = pd->af;
3295	u_int8_t		 icmptype = 0;	/* make the compiler happy */
3296	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
3297	int			 state_icmp = 0;
3298	struct pf_tag		*pftag = NULL;
3299	int			 tag = -1;
3300#ifdef INET6
3301	int			 rewrite = 0;
3302#endif /* INET6 */
3303
3304	switch (pd->proto) {
3305#ifdef INET
3306	case IPPROTO_ICMP:
3307		icmptype = pd->hdr.icmp->icmp_type;
3308		icmpcode = pd->hdr.icmp->icmp_code;
3309		icmpid = pd->hdr.icmp->icmp_id;
3310
3311		if (icmptype == ICMP_UNREACH ||
3312		    icmptype == ICMP_SOURCEQUENCH ||
3313		    icmptype == ICMP_REDIRECT ||
3314		    icmptype == ICMP_TIMXCEED ||
3315		    icmptype == ICMP_PARAMPROB)
3316			state_icmp++;
3317		break;
3318#endif /* INET */
3319#ifdef INET6
3320	case IPPROTO_ICMPV6:
3321		icmptype = pd->hdr.icmp6->icmp6_type;
3322		icmpcode = pd->hdr.icmp6->icmp6_code;
3323		icmpid = pd->hdr.icmp6->icmp6_id;
3324
3325		if (icmptype == ICMP6_DST_UNREACH ||
3326		    icmptype == ICMP6_PACKET_TOO_BIG ||
3327		    icmptype == ICMP6_TIME_EXCEEDED ||
3328		    icmptype == ICMP6_PARAM_PROB)
3329			state_icmp++;
3330		break;
3331#endif /* INET6 */
3332	}
3333
3334	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3335
3336	if (direction == PF_OUT) {
3337		/* check outgoing packet for BINAT/NAT */
3338		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3339		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3340			PF_ACPY(&pd->baddr, saddr, af);
3341			switch (af) {
3342#ifdef INET
3343			case AF_INET:
3344				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3345				    pd->naddr.v4.s_addr, 0);
3346				break;
3347#endif /* INET */
3348#ifdef INET6
3349			case AF_INET6:
3350				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3351				    &pd->naddr, 0);
3352				rewrite++;
3353				break;
3354#endif /* INET6 */
3355			}
3356			if (nr->natpass)
3357				r = NULL;
3358			pd->nat_rule = nr;
3359		}
3360	} else {
3361		/* check incoming packet for BINAT/RDR */
3362		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3363		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3364			PF_ACPY(&pd->baddr, daddr, af);
3365			switch (af) {
3366#ifdef INET
3367			case AF_INET:
3368				pf_change_a(&daddr->v4.s_addr,
3369				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3370				break;
3371#endif /* INET */
3372#ifdef INET6
3373			case AF_INET6:
3374				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3375				    &pd->naddr, 0);
3376				rewrite++;
3377				break;
3378#endif /* INET6 */
3379			}
3380			if (nr->natpass)
3381				r = NULL;
3382			pd->nat_rule = nr;
3383		}
3384	}
3385
3386	while (r != NULL) {
3387		r->evaluations++;
3388		if (r->kif != NULL &&
3389		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3390			r = r->skip[PF_SKIP_IFP].ptr;
3391		else if (r->direction && r->direction != direction)
3392			r = r->skip[PF_SKIP_DIR].ptr;
3393		else if (r->af && r->af != af)
3394			r = r->skip[PF_SKIP_AF].ptr;
3395		else if (r->proto && r->proto != pd->proto)
3396			r = r->skip[PF_SKIP_PROTO].ptr;
3397		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3398			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3399		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3400			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3401		else if (r->type && r->type != icmptype + 1)
3402			r = TAILQ_NEXT(r, entries);
3403		else if (r->code && r->code != icmpcode + 1)
3404			r = TAILQ_NEXT(r, entries);
3405		else if (r->tos && !(r->tos & pd->tos))
3406			r = TAILQ_NEXT(r, entries);
3407		else if (r->rule_flag & PFRULE_FRAGMENT)
3408			r = TAILQ_NEXT(r, entries);
3409		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3410			r = TAILQ_NEXT(r, entries);
3411		else if (r->anchorname[0] && r->anchor == NULL)
3412			r = TAILQ_NEXT(r, entries);
3413		else if (r->os_fingerprint != PF_OSFP_ANY)
3414			r = TAILQ_NEXT(r, entries);
3415		else {
3416			if (r->tag)
3417				tag = r->tag;
3418			if (r->anchor == NULL) {
3419				*rm = r;
3420				*am = a;
3421				*rsm = ruleset;
3422				if ((*rm)->quick)
3423					break;
3424				r = TAILQ_NEXT(r, entries);
3425			} else
3426				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3427				    PF_RULESET_FILTER);
3428		}
3429		if (r == NULL && a != NULL)
3430			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3431			    PF_RULESET_FILTER);
3432	}
3433	r = *rm;
3434	a = *am;
3435	ruleset = *rsm;
3436
3437	REASON_SET(&reason, PFRES_MATCH);
3438
3439	if (r->log) {
3440#ifdef INET6
3441		if (rewrite)
3442			m_copyback(m, off, sizeof(struct icmp6_hdr),
3443			    (caddr_t)pd->hdr.icmp6);
3444#endif /* INET6 */
3445		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3446	}
3447
3448	if (r->action != PF_PASS)
3449		return (PF_DROP);
3450
3451	if (pf_tag_packet(m, pftag, tag)) {
3452		REASON_SET(&reason, PFRES_MEMORY);
3453		return (PF_DROP);
3454	}
3455
3456	if (!state_icmp && (r->keep_state || nr != NULL)) {
3457		/* create new state */
3458		struct pf_state	*s = NULL;
3459		struct pf_src_node *sn = NULL;
3460
3461		/* check maximums */
3462		if (r->max_states && (r->states >= r->max_states))
3463			goto cleanup;
3464		/* src node for flter rule */
3465		if ((r->rule_flag & PFRULE_SRCTRACK ||
3466		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3467		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3468			goto cleanup;
3469		/* src node for translation rule */
3470		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3471		    ((direction == PF_OUT &&
3472		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3473		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3474			goto cleanup;
3475		s = pool_get(&pf_state_pl, PR_NOWAIT);
3476		if (s == NULL) {
3477cleanup:
3478			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3479				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3480				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3481				pf_status.src_nodes--;
3482				pool_put(&pf_src_tree_pl, sn);
3483			}
3484			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3485			    nsn->expire == 0) {
3486				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3487				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3488				pf_status.src_nodes--;
3489				pool_put(&pf_src_tree_pl, nsn);
3490			}
3491			REASON_SET(&reason, PFRES_MEMORY);
3492			return (PF_DROP);
3493		}
3494		bzero(s, sizeof(*s));
3495		r->states++;
3496		if (a != NULL)
3497			a->states++;
3498		s->rule.ptr = r;
3499		s->nat_rule.ptr = nr;
3500		if (s->nat_rule.ptr != NULL)
3501			s->nat_rule.ptr->states++;
3502		s->anchor.ptr = a;
3503		s->allow_opts = r->allow_opts;
3504		s->log = r->log & 2;
3505		s->proto = pd->proto;
3506		s->direction = direction;
3507		s->af = af;
3508		if (direction == PF_OUT) {
3509			PF_ACPY(&s->gwy.addr, saddr, af);
3510			s->gwy.port = icmpid;
3511			PF_ACPY(&s->ext.addr, daddr, af);
3512			s->ext.port = icmpid;
3513			if (nr != NULL)
3514				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3515			else
3516				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3517			s->lan.port = icmpid;
3518		} else {
3519			PF_ACPY(&s->lan.addr, daddr, af);
3520			s->lan.port = icmpid;
3521			PF_ACPY(&s->ext.addr, saddr, af);
3522			s->ext.port = icmpid;
3523			if (nr != NULL)
3524				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3525			else
3526				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3527			s->gwy.port = icmpid;
3528		}
3529#ifdef __FreeBSD__
3530		s->creation = time_second;
3531		s->expire = time_second;
3532#else
3533		s->creation = time.tv_sec;
3534		s->expire = time.tv_sec;
3535#endif
3536		s->timeout = PFTM_ICMP_FIRST_PACKET;
3537		pf_set_rt_ifp(s, saddr);
3538		if (sn != NULL) {
3539			s->src_node = sn;
3540			s->src_node->states++;
3541		}
3542		if (nsn != NULL) {
3543			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3544			s->nat_src_node = nsn;
3545			s->nat_src_node->states++;
3546		}
3547		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3548			REASON_SET(&reason, PFRES_MEMORY);
3549			pf_src_tree_remove_state(s);
3550			pool_put(&pf_state_pl, s);
3551			return (PF_DROP);
3552		} else
3553			*sm = s;
3554	}
3555
3556#ifdef INET6
3557	/* copy back packet headers if we performed IPv6 NAT operations */
3558	if (rewrite)
3559		m_copyback(m, off, sizeof(struct icmp6_hdr),
3560		    (caddr_t)pd->hdr.icmp6);
3561#endif /* INET6 */
3562
3563	return (PF_PASS);
3564}
3565
3566int
3567pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3568    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3569    struct pf_rule **am, struct pf_ruleset **rsm)
3570{
3571	struct pf_rule		*nr = NULL;
3572	struct pf_rule		*r, *a = NULL;
3573	struct pf_ruleset	*ruleset = NULL;
3574	struct pf_src_node	*nsn = NULL;
3575	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3576	sa_family_t		 af = pd->af;
3577	u_short			 reason;
3578	struct pf_tag		*pftag = NULL;
3579	int			 tag = -1;
3580
3581	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3582
3583	if (direction == PF_OUT) {
3584		/* check outgoing packet for BINAT/NAT */
3585		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3586		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3587			PF_ACPY(&pd->baddr, saddr, af);
3588			switch (af) {
3589#ifdef INET
3590			case AF_INET:
3591				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3592				    pd->naddr.v4.s_addr, 0);
3593				break;
3594#endif /* INET */
3595#ifdef INET6
3596			case AF_INET6:
3597				PF_ACPY(saddr, &pd->naddr, af);
3598				break;
3599#endif /* INET6 */
3600			}
3601			if (nr->natpass)
3602				r = NULL;
3603			pd->nat_rule = nr;
3604		}
3605	} else {
3606		/* check incoming packet for BINAT/RDR */
3607		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3608		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3609			PF_ACPY(&pd->baddr, daddr, af);
3610			switch (af) {
3611#ifdef INET
3612			case AF_INET:
3613				pf_change_a(&daddr->v4.s_addr,
3614				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3615				break;
3616#endif /* INET */
3617#ifdef INET6
3618			case AF_INET6:
3619				PF_ACPY(daddr, &pd->naddr, af);
3620				break;
3621#endif /* INET6 */
3622			}
3623			if (nr->natpass)
3624				r = NULL;
3625			pd->nat_rule = nr;
3626		}
3627	}
3628
3629	while (r != NULL) {
3630		r->evaluations++;
3631		if (r->kif != NULL &&
3632		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3633			r = r->skip[PF_SKIP_IFP].ptr;
3634		else if (r->direction && r->direction != direction)
3635			r = r->skip[PF_SKIP_DIR].ptr;
3636		else if (r->af && r->af != af)
3637			r = r->skip[PF_SKIP_AF].ptr;
3638		else if (r->proto && r->proto != pd->proto)
3639			r = r->skip[PF_SKIP_PROTO].ptr;
3640		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3641			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3642		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3643			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3644		else if (r->tos && !(r->tos & pd->tos))
3645			r = TAILQ_NEXT(r, entries);
3646		else if (r->rule_flag & PFRULE_FRAGMENT)
3647			r = TAILQ_NEXT(r, entries);
3648		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3649			r = TAILQ_NEXT(r, entries);
3650		else if (r->anchorname[0] && r->anchor == NULL)
3651			r = TAILQ_NEXT(r, entries);
3652		else if (r->os_fingerprint != PF_OSFP_ANY)
3653			r = TAILQ_NEXT(r, entries);
3654		else {
3655			if (r->tag)
3656				tag = r->tag;
3657			if (r->anchor == NULL) {
3658				*rm = r;
3659				*am = a;
3660				*rsm = ruleset;
3661				if ((*rm)->quick)
3662					break;
3663				r = TAILQ_NEXT(r, entries);
3664			} else
3665				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3666				    PF_RULESET_FILTER);
3667		}
3668		if (r == NULL && a != NULL)
3669			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3670			    PF_RULESET_FILTER);
3671	}
3672	r = *rm;
3673	a = *am;
3674	ruleset = *rsm;
3675
3676	REASON_SET(&reason, PFRES_MATCH);
3677
3678	if (r->log)
3679		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3680
3681	if ((r->action == PF_DROP) &&
3682	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3683	    (r->rule_flag & PFRULE_RETURN))) {
3684		struct pf_addr *a = NULL;
3685
3686		if (nr != NULL) {
3687			if (direction == PF_OUT)
3688				a = saddr;
3689			else
3690				a = daddr;
3691		}
3692		if (a != NULL) {
3693			switch (af) {
3694#ifdef INET
3695			case AF_INET:
3696				pf_change_a(&a->v4.s_addr, pd->ip_sum,
3697				    pd->baddr.v4.s_addr, 0);
3698				break;
3699#endif /* INET */
3700#ifdef INET6
3701			case AF_INET6:
3702				PF_ACPY(a, &pd->baddr, af);
3703				break;
3704#endif /* INET6 */
3705			}
3706		}
3707		if ((af == AF_INET) && r->return_icmp)
3708			pf_send_icmp(m, r->return_icmp >> 8,
3709			    r->return_icmp & 255, af, r);
3710		else if ((af == AF_INET6) && r->return_icmp6)
3711			pf_send_icmp(m, r->return_icmp6 >> 8,
3712			    r->return_icmp6 & 255, af, r);
3713	}
3714
3715	if (r->action != PF_PASS)
3716		return (PF_DROP);
3717
3718	if (pf_tag_packet(m, pftag, tag)) {
3719		REASON_SET(&reason, PFRES_MEMORY);
3720		return (PF_DROP);
3721	}
3722
3723	if (r->keep_state || nr != NULL) {
3724		/* create new state */
3725		struct pf_state	*s = NULL;
3726		struct pf_src_node *sn = NULL;
3727
3728		/* check maximums */
3729		if (r->max_states && (r->states >= r->max_states))
3730			goto cleanup;
3731		/* src node for flter rule */
3732		if ((r->rule_flag & PFRULE_SRCTRACK ||
3733		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3734		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3735			goto cleanup;
3736		/* src node for translation rule */
3737		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3738		    ((direction == PF_OUT &&
3739		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3740		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3741			goto cleanup;
3742		s = pool_get(&pf_state_pl, PR_NOWAIT);
3743		if (s == NULL) {
3744cleanup:
3745			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3746				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3747				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3748				pf_status.src_nodes--;
3749				pool_put(&pf_src_tree_pl, sn);
3750			}
3751			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3752			    nsn->expire == 0) {
3753				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3754				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3755				pf_status.src_nodes--;
3756				pool_put(&pf_src_tree_pl, nsn);
3757			}
3758			REASON_SET(&reason, PFRES_MEMORY);
3759			return (PF_DROP);
3760		}
3761		bzero(s, sizeof(*s));
3762		r->states++;
3763		if (a != NULL)
3764			a->states++;
3765		s->rule.ptr = r;
3766		s->nat_rule.ptr = nr;
3767		if (s->nat_rule.ptr != NULL)
3768			s->nat_rule.ptr->states++;
3769		s->anchor.ptr = a;
3770		s->allow_opts = r->allow_opts;
3771		s->log = r->log & 2;
3772		s->proto = pd->proto;
3773		s->direction = direction;
3774		s->af = af;
3775		if (direction == PF_OUT) {
3776			PF_ACPY(&s->gwy.addr, saddr, af);
3777			PF_ACPY(&s->ext.addr, daddr, af);
3778			if (nr != NULL)
3779				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3780			else
3781				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3782		} else {
3783			PF_ACPY(&s->lan.addr, daddr, af);
3784			PF_ACPY(&s->ext.addr, saddr, af);
3785			if (nr != NULL)
3786				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3787			else
3788				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3789		}
3790		s->src.state = PFOTHERS_SINGLE;
3791		s->dst.state = PFOTHERS_NO_TRAFFIC;
3792#ifdef __FreeBSD__
3793		s->creation = time_second;
3794		s->expire = time_second;
3795#else
3796		s->creation = time.tv_sec;
3797		s->expire = time.tv_sec;
3798#endif
3799		s->timeout = PFTM_OTHER_FIRST_PACKET;
3800		pf_set_rt_ifp(s, saddr);
3801		if (sn != NULL) {
3802			s->src_node = sn;
3803			s->src_node->states++;
3804		}
3805		if (nsn != NULL) {
3806			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3807			s->nat_src_node = nsn;
3808			s->nat_src_node->states++;
3809		}
3810		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3811			REASON_SET(&reason, PFRES_MEMORY);
3812			pf_src_tree_remove_state(s);
3813			pool_put(&pf_state_pl, s);
3814			return (PF_DROP);
3815		} else
3816			*sm = s;
3817	}
3818
3819	return (PF_PASS);
3820}
3821
3822int
3823pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3824    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3825    struct pf_ruleset **rsm)
3826{
3827	struct pf_rule		*r, *a = NULL;
3828	struct pf_ruleset	*ruleset = NULL;
3829	sa_family_t		 af = pd->af;
3830	u_short			 reason;
3831	struct pf_tag		*pftag = NULL;
3832	int			 tag = -1;
3833
3834	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3835	while (r != NULL) {
3836		r->evaluations++;
3837		if (r->kif != NULL &&
3838		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3839			r = r->skip[PF_SKIP_IFP].ptr;
3840		else if (r->direction && r->direction != direction)
3841			r = r->skip[PF_SKIP_DIR].ptr;
3842		else if (r->af && r->af != af)
3843			r = r->skip[PF_SKIP_AF].ptr;
3844		else if (r->proto && r->proto != pd->proto)
3845			r = r->skip[PF_SKIP_PROTO].ptr;
3846		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3847			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3848		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3849			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3850		else if (r->tos && !(r->tos & pd->tos))
3851			r = TAILQ_NEXT(r, entries);
3852		else if (r->src.port_op || r->dst.port_op ||
3853		    r->flagset || r->type || r->code ||
3854		    r->os_fingerprint != PF_OSFP_ANY)
3855			r = TAILQ_NEXT(r, entries);
3856		else if (r->match_tag && !pf_match_tag(m, r, NULL, pftag, &tag))
3857			r = TAILQ_NEXT(r, entries);
3858		else if (r->anchorname[0] && r->anchor == NULL)
3859			r = TAILQ_NEXT(r, entries);
3860		else {
3861			if (r->anchor == NULL) {
3862				*rm = r;
3863				*am = a;
3864				*rsm = ruleset;
3865				if ((*rm)->quick)
3866					break;
3867				r = TAILQ_NEXT(r, entries);
3868			} else
3869				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3870				    PF_RULESET_FILTER);
3871		}
3872		if (r == NULL && a != NULL)
3873			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3874			    PF_RULESET_FILTER);
3875	}
3876	r = *rm;
3877	a = *am;
3878	ruleset = *rsm;
3879
3880	REASON_SET(&reason, PFRES_MATCH);
3881
3882	if (r->log)
3883		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3884
3885	if (r->action != PF_PASS)
3886		return (PF_DROP);
3887
3888	if (pf_tag_packet(m, pftag, tag)) {
3889		REASON_SET(&reason, PFRES_MEMORY);
3890		return (PF_DROP);
3891	}
3892
3893	return (PF_PASS);
3894}
3895
3896int
3897pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3898    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3899    u_short *reason)
3900{
3901	struct pf_state		 key;
3902	struct tcphdr		*th = pd->hdr.tcp;
3903	u_int16_t		 win = ntohs(th->th_win);
3904	u_int32_t		 ack, end, seq;
3905	u_int8_t		 sws, dws;
3906	int			 ackskew;
3907	int			 copyback = 0;
3908	struct pf_state_peer	*src, *dst;
3909
3910	key.af = pd->af;
3911	key.proto = IPPROTO_TCP;
3912	if (direction == PF_IN)	{
3913		PF_ACPY(&key.ext.addr, pd->src, key.af);
3914		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3915		key.ext.port = th->th_sport;
3916		key.gwy.port = th->th_dport;
3917	} else {
3918		PF_ACPY(&key.lan.addr, pd->src, key.af);
3919		PF_ACPY(&key.ext.addr, pd->dst, key.af);
3920		key.lan.port = th->th_sport;
3921		key.ext.port = th->th_dport;
3922	}
3923
3924	STATE_LOOKUP();
3925
3926	if (direction == (*state)->direction) {
3927		src = &(*state)->src;
3928		dst = &(*state)->dst;
3929	} else {
3930		src = &(*state)->dst;
3931		dst = &(*state)->src;
3932	}
3933
3934	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3935		if (direction != (*state)->direction)
3936			return (PF_SYNPROXY_DROP);
3937		if (th->th_flags & TH_SYN) {
3938			if (ntohl(th->th_seq) != (*state)->src.seqlo)
3939				return (PF_DROP);
3940			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3941			    pd->src, th->th_dport, th->th_sport,
3942			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3943			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0);
3944			return (PF_SYNPROXY_DROP);
3945		} else if (!(th->th_flags & TH_ACK) ||
3946		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3947		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3948			return (PF_DROP);
3949		else
3950			(*state)->src.state = PF_TCPS_PROXY_DST;
3951	}
3952	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3953		struct pf_state_host *src, *dst;
3954
3955		if (direction == PF_OUT) {
3956			src = &(*state)->gwy;
3957			dst = &(*state)->ext;
3958		} else {
3959			src = &(*state)->ext;
3960			dst = &(*state)->lan;
3961		}
3962		if (direction == (*state)->direction) {
3963			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3964			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3965			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3966				return (PF_DROP);
3967			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3968			if ((*state)->dst.seqhi == 1)
3969				(*state)->dst.seqhi = arc4random();
3970			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3971			    &dst->addr, src->port, dst->port,
3972			    (*state)->dst.seqhi, 0, TH_SYN, 0,
3973			    (*state)->src.mss, 0);
3974			return (PF_SYNPROXY_DROP);
3975		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3976		    (TH_SYN|TH_ACK)) ||
3977		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1))
3978			return (PF_DROP);
3979		else {
3980			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3981			(*state)->dst.seqlo = ntohl(th->th_seq);
3982			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3983			    pd->src, th->th_dport, th->th_sport,
3984			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3985			    TH_ACK, (*state)->src.max_win, 0, 0);
3986			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3987			    &dst->addr, src->port, dst->port,
3988			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3989			    TH_ACK, (*state)->dst.max_win, 0, 0);
3990			(*state)->src.seqdiff = (*state)->dst.seqhi -
3991			    (*state)->src.seqlo;
3992			(*state)->dst.seqdiff = (*state)->src.seqhi -
3993			    (*state)->dst.seqlo;
3994			(*state)->src.seqhi = (*state)->src.seqlo +
3995			    (*state)->src.max_win;
3996			(*state)->dst.seqhi = (*state)->dst.seqlo +
3997			    (*state)->dst.max_win;
3998			(*state)->src.wscale = (*state)->dst.wscale = 0;
3999			(*state)->src.state = (*state)->dst.state =
4000			    TCPS_ESTABLISHED;
4001			return (PF_SYNPROXY_DROP);
4002		}
4003	}
4004
4005	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4006		sws = src->wscale & PF_WSCALE_MASK;
4007		dws = dst->wscale & PF_WSCALE_MASK;
4008	} else
4009		sws = dws = 0;
4010
4011	/*
4012	 * Sequence tracking algorithm from Guido van Rooij's paper:
4013	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4014	 *	tcp_filtering.ps
4015	 */
4016
4017	seq = ntohl(th->th_seq);
4018	if (src->seqlo == 0) {
4019		/* First packet from this end. Set its state */
4020
4021		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4022		    src->scrub == NULL) {
4023			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4024				REASON_SET(reason, PFRES_MEMORY);
4025				return (PF_DROP);
4026			}
4027		}
4028
4029		/* Deferred generation of sequence number modulator */
4030		if (dst->seqdiff && !src->seqdiff) {
4031			while ((src->seqdiff = arc4random()) == 0)
4032				;
4033			ack = ntohl(th->th_ack) - dst->seqdiff;
4034			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4035			    src->seqdiff), 0);
4036			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4037			copyback = 1;
4038		} else {
4039			ack = ntohl(th->th_ack);
4040		}
4041
4042		end = seq + pd->p_len;
4043		if (th->th_flags & TH_SYN) {
4044			end++;
4045			if (dst->wscale & PF_WSCALE_FLAG) {
4046				src->wscale = pf_get_wscale(m, off, th->th_off,
4047				    pd->af);
4048				if (src->wscale & PF_WSCALE_FLAG) {
4049					/* Remove scale factor from initial
4050					 * window */
4051					sws = src->wscale & PF_WSCALE_MASK;
4052					win = ((u_int32_t)win + (1 << sws) - 1)
4053					    >> sws;
4054					dws = dst->wscale & PF_WSCALE_MASK;
4055				} else {
4056					/* fixup other window */
4057					dst->max_win <<= dst->wscale &
4058					    PF_WSCALE_MASK;
4059					/* in case of a retrans SYN|ACK */
4060					dst->wscale = 0;
4061				}
4062			}
4063		}
4064		if (th->th_flags & TH_FIN)
4065			end++;
4066
4067		src->seqlo = seq;
4068		if (src->state < TCPS_SYN_SENT)
4069			src->state = TCPS_SYN_SENT;
4070
4071		/*
4072		 * May need to slide the window (seqhi may have been set by
4073		 * the crappy stack check or if we picked up the connection
4074		 * after establishment)
4075		 */
4076		if (src->seqhi == 1 ||
4077		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4078			src->seqhi = end + MAX(1, dst->max_win << dws);
4079		if (win > src->max_win)
4080			src->max_win = win;
4081
4082	} else {
4083		ack = ntohl(th->th_ack) - dst->seqdiff;
4084		if (src->seqdiff) {
4085			/* Modulate sequence numbers */
4086			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4087			    src->seqdiff), 0);
4088			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4089			copyback = 1;
4090		}
4091		end = seq + pd->p_len;
4092		if (th->th_flags & TH_SYN)
4093			end++;
4094		if (th->th_flags & TH_FIN)
4095			end++;
4096	}
4097
4098	if ((th->th_flags & TH_ACK) == 0) {
4099		/* Let it pass through the ack skew check */
4100		ack = dst->seqlo;
4101	} else if ((ack == 0 &&
4102	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4103	    /* broken tcp stacks do not set ack */
4104	    (dst->state < TCPS_SYN_SENT)) {
4105		/*
4106		 * Many stacks (ours included) will set the ACK number in an
4107		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4108		 */
4109		ack = dst->seqlo;
4110	}
4111
4112	if (seq == end) {
4113		/* Ease sequencing restrictions on no data packets */
4114		seq = src->seqlo;
4115		end = seq;
4116	}
4117
4118	ackskew = dst->seqlo - ack;
4119
4120#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4121	if (SEQ_GEQ(src->seqhi, end) &&
4122	    /* Last octet inside other's window space */
4123	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4124	    /* Retrans: not more than one window back */
4125	    (ackskew >= -MAXACKWINDOW) &&
4126	    /* Acking not more than one reassembled fragment backwards */
4127	    (ackskew <= (MAXACKWINDOW << sws))) {
4128	    /* Acking not more than one window forward */
4129
4130		/* update max window */
4131		if (src->max_win < win)
4132			src->max_win = win;
4133		/* synchronize sequencing */
4134		if (SEQ_GT(end, src->seqlo))
4135			src->seqlo = end;
4136		/* slide the window of what the other end can send */
4137		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4138			dst->seqhi = ack + MAX((win << sws), 1);
4139
4140
4141		/* update states */
4142		if (th->th_flags & TH_SYN)
4143			if (src->state < TCPS_SYN_SENT)
4144				src->state = TCPS_SYN_SENT;
4145		if (th->th_flags & TH_FIN)
4146			if (src->state < TCPS_CLOSING)
4147				src->state = TCPS_CLOSING;
4148		if (th->th_flags & TH_ACK) {
4149			if (dst->state == TCPS_SYN_SENT)
4150				dst->state = TCPS_ESTABLISHED;
4151			else if (dst->state == TCPS_CLOSING)
4152				dst->state = TCPS_FIN_WAIT_2;
4153		}
4154		if (th->th_flags & TH_RST)
4155			src->state = dst->state = TCPS_TIME_WAIT;
4156
4157		/* update expire time */
4158#ifdef __FreeBSD__
4159		(*state)->expire = time_second;
4160#else
4161		(*state)->expire = time.tv_sec;
4162#endif
4163		if (src->state >= TCPS_FIN_WAIT_2 &&
4164		    dst->state >= TCPS_FIN_WAIT_2)
4165			(*state)->timeout = PFTM_TCP_CLOSED;
4166		else if (src->state >= TCPS_FIN_WAIT_2 ||
4167		    dst->state >= TCPS_FIN_WAIT_2)
4168			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4169		else if (src->state < TCPS_ESTABLISHED ||
4170		    dst->state < TCPS_ESTABLISHED)
4171			(*state)->timeout = PFTM_TCP_OPENING;
4172		else if (src->state >= TCPS_CLOSING ||
4173		    dst->state >= TCPS_CLOSING)
4174			(*state)->timeout = PFTM_TCP_CLOSING;
4175		else
4176			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4177
4178		/* Fall through to PASS packet */
4179
4180	} else if ((dst->state < TCPS_SYN_SENT ||
4181		dst->state >= TCPS_FIN_WAIT_2 ||
4182		src->state >= TCPS_FIN_WAIT_2) &&
4183	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4184	    /* Within a window forward of the originating packet */
4185	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4186	    /* Within a window backward of the originating packet */
4187
4188		/*
4189		 * This currently handles three situations:
4190		 *  1) Stupid stacks will shotgun SYNs before their peer
4191		 *     replies.
4192		 *  2) When PF catches an already established stream (the
4193		 *     firewall rebooted, the state table was flushed, routes
4194		 *     changed...)
4195		 *  3) Packets get funky immediately after the connection
4196		 *     closes (this should catch Solaris spurious ACK|FINs
4197		 *     that web servers like to spew after a close)
4198		 *
4199		 * This must be a little more careful than the above code
4200		 * since packet floods will also be caught here. We don't
4201		 * update the TTL here to mitigate the damage of a packet
4202		 * flood and so the same code can handle awkward establishment
4203		 * and a loosened connection close.
4204		 * In the establishment case, a correct peer response will
4205		 * validate the connection, go through the normal state code
4206		 * and keep updating the state TTL.
4207		 */
4208
4209		if (pf_status.debug >= PF_DEBUG_MISC) {
4210			printf("pf: loose state match: ");
4211			pf_print_state(*state);
4212			pf_print_flags(th->th_flags);
4213			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
4214			    seq, ack, pd->p_len, ackskew,
4215			    (*state)->packets[0], (*state)->packets[1]);
4216		}
4217
4218		/* update max window */
4219		if (src->max_win < win)
4220			src->max_win = win;
4221		/* synchronize sequencing */
4222		if (SEQ_GT(end, src->seqlo))
4223			src->seqlo = end;
4224		/* slide the window of what the other end can send */
4225		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4226			dst->seqhi = ack + MAX((win << sws), 1);
4227
4228		/*
4229		 * Cannot set dst->seqhi here since this could be a shotgunned
4230		 * SYN and not an already established connection.
4231		 */
4232
4233		if (th->th_flags & TH_FIN)
4234			if (src->state < TCPS_CLOSING)
4235				src->state = TCPS_CLOSING;
4236		if (th->th_flags & TH_RST)
4237			src->state = dst->state = TCPS_TIME_WAIT;
4238
4239		/* Fall through to PASS packet */
4240
4241	} else {
4242		if ((*state)->dst.state == TCPS_SYN_SENT &&
4243		    (*state)->src.state == TCPS_SYN_SENT) {
4244			/* Send RST for state mismatches during handshake */
4245			if (!(th->th_flags & TH_RST)) {
4246				u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
4247
4248				if (th->th_flags & TH_SYN)
4249					ack++;
4250				if (th->th_flags & TH_FIN)
4251					ack++;
4252				pf_send_tcp((*state)->rule.ptr, pd->af,
4253				    pd->dst, pd->src, th->th_dport,
4254				    th->th_sport, ntohl(th->th_ack), ack,
4255				    TH_RST|TH_ACK, 0, 0,
4256				    (*state)->rule.ptr->return_ttl);
4257			}
4258			src->seqlo = 0;
4259			src->seqhi = 1;
4260			src->max_win = 1;
4261		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4262			printf("pf: BAD state: ");
4263			pf_print_state(*state);
4264			pf_print_flags(th->th_flags);
4265			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
4266			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
4267			    (*state)->packets[0], (*state)->packets[1],
4268			    direction == PF_IN ? "in" : "out",
4269			    direction == (*state)->direction ? "fwd" : "rev");
4270			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4271			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4272			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4273			    ' ': '2',
4274			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4275			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4276			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4277			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4278		}
4279		return (PF_DROP);
4280	}
4281
4282	if (dst->scrub || src->scrub) {
4283		if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4284		    src, dst, &copyback))
4285			return (PF_DROP);
4286	}
4287
4288	/* Any packets which have gotten here are to be passed */
4289
4290	/* translate source/destination address, if necessary */
4291	if (STATE_TRANSLATE(*state)) {
4292		if (direction == PF_OUT)
4293			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4294			    &th->th_sum, &(*state)->gwy.addr,
4295			    (*state)->gwy.port, 0, pd->af);
4296		else
4297			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4298			    &th->th_sum, &(*state)->lan.addr,
4299			    (*state)->lan.port, 0, pd->af);
4300		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4301	} else if (copyback) {
4302		/* Copyback sequence modulation or stateful scrub changes */
4303		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4304	}
4305
4306	return (PF_PASS);
4307}
4308
4309int
4310pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4311    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4312{
4313	struct pf_state_peer	*src, *dst;
4314	struct pf_state		 key;
4315	struct udphdr		*uh = pd->hdr.udp;
4316
4317	key.af = pd->af;
4318	key.proto = IPPROTO_UDP;
4319	if (direction == PF_IN)	{
4320		PF_ACPY(&key.ext.addr, pd->src, key.af);
4321		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4322		key.ext.port = uh->uh_sport;
4323		key.gwy.port = uh->uh_dport;
4324	} else {
4325		PF_ACPY(&key.lan.addr, pd->src, key.af);
4326		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4327		key.lan.port = uh->uh_sport;
4328		key.ext.port = uh->uh_dport;
4329	}
4330
4331	STATE_LOOKUP();
4332
4333	if (direction == (*state)->direction) {
4334		src = &(*state)->src;
4335		dst = &(*state)->dst;
4336	} else {
4337		src = &(*state)->dst;
4338		dst = &(*state)->src;
4339	}
4340
4341	/* update states */
4342	if (src->state < PFUDPS_SINGLE)
4343		src->state = PFUDPS_SINGLE;
4344	if (dst->state == PFUDPS_SINGLE)
4345		dst->state = PFUDPS_MULTIPLE;
4346
4347	/* update expire time */
4348#ifdef __FreeBSD__
4349	(*state)->expire = time_second;
4350#else
4351	(*state)->expire = time.tv_sec;
4352#endif
4353	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4354		(*state)->timeout = PFTM_UDP_MULTIPLE;
4355	else
4356		(*state)->timeout = PFTM_UDP_SINGLE;
4357
4358	/* translate source/destination address, if necessary */
4359	if (STATE_TRANSLATE(*state)) {
4360		if (direction == PF_OUT)
4361			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4362			    &uh->uh_sum, &(*state)->gwy.addr,
4363			    (*state)->gwy.port, 1, pd->af);
4364		else
4365			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4366			    &uh->uh_sum, &(*state)->lan.addr,
4367			    (*state)->lan.port, 1, pd->af);
4368		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4369	}
4370
4371	return (PF_PASS);
4372}
4373
4374int
4375pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4376    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4377{
4378	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4379	u_int16_t	 icmpid = 0;		/* make the compiler happy */
4380	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
4381	u_int8_t	 icmptype = 0;		/* make the compiler happy */
4382	int		 state_icmp = 0;
4383
4384	switch (pd->proto) {
4385#ifdef INET
4386	case IPPROTO_ICMP:
4387		icmptype = pd->hdr.icmp->icmp_type;
4388		icmpid = pd->hdr.icmp->icmp_id;
4389		icmpsum = &pd->hdr.icmp->icmp_cksum;
4390
4391		if (icmptype == ICMP_UNREACH ||
4392		    icmptype == ICMP_SOURCEQUENCH ||
4393		    icmptype == ICMP_REDIRECT ||
4394		    icmptype == ICMP_TIMXCEED ||
4395		    icmptype == ICMP_PARAMPROB)
4396			state_icmp++;
4397		break;
4398#endif /* INET */
4399#ifdef INET6
4400	case IPPROTO_ICMPV6:
4401		icmptype = pd->hdr.icmp6->icmp6_type;
4402		icmpid = pd->hdr.icmp6->icmp6_id;
4403		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4404
4405		if (icmptype == ICMP6_DST_UNREACH ||
4406		    icmptype == ICMP6_PACKET_TOO_BIG ||
4407		    icmptype == ICMP6_TIME_EXCEEDED ||
4408		    icmptype == ICMP6_PARAM_PROB)
4409			state_icmp++;
4410		break;
4411#endif /* INET6 */
4412	}
4413
4414	if (!state_icmp) {
4415
4416		/*
4417		 * ICMP query/reply message not related to a TCP/UDP packet.
4418		 * Search for an ICMP state.
4419		 */
4420		struct pf_state		key;
4421
4422		key.af = pd->af;
4423		key.proto = pd->proto;
4424		if (direction == PF_IN)	{
4425			PF_ACPY(&key.ext.addr, pd->src, key.af);
4426			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4427			key.ext.port = icmpid;
4428			key.gwy.port = icmpid;
4429		} else {
4430			PF_ACPY(&key.lan.addr, pd->src, key.af);
4431			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4432			key.lan.port = icmpid;
4433			key.ext.port = icmpid;
4434		}
4435
4436		STATE_LOOKUP();
4437
4438#ifdef __FreeBSD__
4439		(*state)->expire = time_second;
4440#else
4441		(*state)->expire = time.tv_sec;
4442#endif
4443		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4444
4445		/* translate source/destination address, if necessary */
4446		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4447			if (direction == PF_OUT) {
4448				switch (pd->af) {
4449#ifdef INET
4450				case AF_INET:
4451					pf_change_a(&saddr->v4.s_addr,
4452					    pd->ip_sum,
4453					    (*state)->gwy.addr.v4.s_addr, 0);
4454					break;
4455#endif /* INET */
4456#ifdef INET6
4457				case AF_INET6:
4458					pf_change_a6(saddr,
4459					    &pd->hdr.icmp6->icmp6_cksum,
4460					    &(*state)->gwy.addr, 0);
4461					m_copyback(m, off,
4462					    sizeof(struct icmp6_hdr),
4463					    (caddr_t)pd->hdr.icmp6);
4464					break;
4465#endif /* INET6 */
4466				}
4467			} else {
4468				switch (pd->af) {
4469#ifdef INET
4470				case AF_INET:
4471					pf_change_a(&daddr->v4.s_addr,
4472					    pd->ip_sum,
4473					    (*state)->lan.addr.v4.s_addr, 0);
4474					break;
4475#endif /* INET */
4476#ifdef INET6
4477				case AF_INET6:
4478					pf_change_a6(daddr,
4479					    &pd->hdr.icmp6->icmp6_cksum,
4480					    &(*state)->lan.addr, 0);
4481					m_copyback(m, off,
4482					    sizeof(struct icmp6_hdr),
4483					    (caddr_t)pd->hdr.icmp6);
4484					break;
4485#endif /* INET6 */
4486				}
4487			}
4488		}
4489
4490		return (PF_PASS);
4491
4492	} else {
4493		/*
4494		 * ICMP error message in response to a TCP/UDP packet.
4495		 * Extract the inner TCP/UDP header and search for that state.
4496		 */
4497
4498		struct pf_pdesc	pd2;
4499#ifdef INET
4500		struct ip	h2;
4501#endif /* INET */
4502#ifdef INET6
4503		struct ip6_hdr	h2_6;
4504		int		terminal = 0;
4505#endif /* INET6 */
4506		int		ipoff2 = 0;	/* make the compiler happy */
4507		int		off2 = 0;	/* make the compiler happy */
4508
4509		pd2.af = pd->af;
4510		switch (pd->af) {
4511#ifdef INET
4512		case AF_INET:
4513			/* offset of h2 in mbuf chain */
4514			ipoff2 = off + ICMP_MINLEN;
4515
4516			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4517			    NULL, NULL, pd2.af)) {
4518				DPFPRINTF(PF_DEBUG_MISC,
4519				    ("pf: ICMP error message too short "
4520				    "(ip)\n"));
4521				return (PF_DROP);
4522			}
4523			/*
4524			 * ICMP error messages don't refer to non-first
4525			 * fragments
4526			 */
4527			if (h2.ip_off & htons(IP_OFFMASK))
4528				return (PF_DROP);
4529
4530			/* offset of protocol header that follows h2 */
4531			off2 = ipoff2 + (h2.ip_hl << 2);
4532
4533			pd2.proto = h2.ip_p;
4534			pd2.src = (struct pf_addr *)&h2.ip_src;
4535			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4536			pd2.ip_sum = &h2.ip_sum;
4537			break;
4538#endif /* INET */
4539#ifdef INET6
4540		case AF_INET6:
4541			ipoff2 = off + sizeof(struct icmp6_hdr);
4542
4543			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4544			    NULL, NULL, pd2.af)) {
4545				DPFPRINTF(PF_DEBUG_MISC,
4546				    ("pf: ICMP error message too short "
4547				    "(ip6)\n"));
4548				return (PF_DROP);
4549			}
4550			pd2.proto = h2_6.ip6_nxt;
4551			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4552			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4553			pd2.ip_sum = NULL;
4554			off2 = ipoff2 + sizeof(h2_6);
4555			do {
4556				switch (pd2.proto) {
4557				case IPPROTO_FRAGMENT:
4558					/*
4559					 * ICMPv6 error messages for
4560					 * non-first fragments
4561					 */
4562					return (PF_DROP);
4563				case IPPROTO_AH:
4564				case IPPROTO_HOPOPTS:
4565				case IPPROTO_ROUTING:
4566				case IPPROTO_DSTOPTS: {
4567					/* get next header and header length */
4568					struct ip6_ext opt6;
4569
4570					if (!pf_pull_hdr(m, off2, &opt6,
4571					    sizeof(opt6), NULL, NULL, pd2.af)) {
4572						DPFPRINTF(PF_DEBUG_MISC,
4573						    ("pf: ICMPv6 short opt\n"));
4574						return (PF_DROP);
4575					}
4576					if (pd2.proto == IPPROTO_AH)
4577						off2 += (opt6.ip6e_len + 2) * 4;
4578					else
4579						off2 += (opt6.ip6e_len + 1) * 8;
4580					pd2.proto = opt6.ip6e_nxt;
4581					/* goto the next header */
4582					break;
4583				}
4584				default:
4585					terminal++;
4586					break;
4587				}
4588			} while (!terminal);
4589			break;
4590#endif /* INET6 */
4591		}
4592
4593		switch (pd2.proto) {
4594		case IPPROTO_TCP: {
4595			struct tcphdr		 th;
4596			u_int32_t		 seq;
4597			struct pf_state		 key;
4598			struct pf_state_peer	*src, *dst;
4599			u_int8_t		 dws;
4600			int			 copyback = 0;
4601
4602			/*
4603			 * Only the first 8 bytes of the TCP header can be
4604			 * expected. Don't access any TCP header fields after
4605			 * th_seq, an ackskew test is not possible.
4606			 */
4607			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) {
4608				DPFPRINTF(PF_DEBUG_MISC,
4609				    ("pf: ICMP error message too short "
4610				    "(tcp)\n"));
4611				return (PF_DROP);
4612			}
4613
4614			key.af = pd2.af;
4615			key.proto = IPPROTO_TCP;
4616			if (direction == PF_IN)	{
4617				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4618				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4619				key.ext.port = th.th_dport;
4620				key.gwy.port = th.th_sport;
4621			} else {
4622				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4623				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4624				key.lan.port = th.th_dport;
4625				key.ext.port = th.th_sport;
4626			}
4627
4628			STATE_LOOKUP();
4629
4630			if (direction == (*state)->direction) {
4631				src = &(*state)->dst;
4632				dst = &(*state)->src;
4633			} else {
4634				src = &(*state)->src;
4635				dst = &(*state)->dst;
4636			}
4637
4638			if (src->wscale && dst->wscale &&
4639			    !(th.th_flags & TH_SYN))
4640				dws = dst->wscale & PF_WSCALE_MASK;
4641			else
4642				dws = 0;
4643
4644			/* Demodulate sequence number */
4645			seq = ntohl(th.th_seq) - src->seqdiff;
4646			if (src->seqdiff) {
4647				pf_change_a(&th.th_seq, icmpsum,
4648				    htonl(seq), 0);
4649				copyback = 1;
4650			}
4651
4652			if (!SEQ_GEQ(src->seqhi, seq) ||
4653			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4654				if (pf_status.debug >= PF_DEBUG_MISC) {
4655					printf("pf: BAD ICMP %d:%d ",
4656					    icmptype, pd->hdr.icmp->icmp_code);
4657					pf_print_host(pd->src, 0, pd->af);
4658					printf(" -> ");
4659					pf_print_host(pd->dst, 0, pd->af);
4660					printf(" state: ");
4661					pf_print_state(*state);
4662					printf(" seq=%u\n", seq);
4663				}
4664				return (PF_DROP);
4665			}
4666
4667			if (STATE_TRANSLATE(*state)) {
4668				if (direction == PF_IN) {
4669					pf_change_icmp(pd2.src, &th.th_sport,
4670					    daddr, &(*state)->lan.addr,
4671					    (*state)->lan.port, NULL,
4672					    pd2.ip_sum, icmpsum,
4673					    pd->ip_sum, 0, pd2.af);
4674				} else {
4675					pf_change_icmp(pd2.dst, &th.th_dport,
4676					    saddr, &(*state)->gwy.addr,
4677					    (*state)->gwy.port, NULL,
4678					    pd2.ip_sum, icmpsum,
4679					    pd->ip_sum, 0, pd2.af);
4680				}
4681				copyback = 1;
4682			}
4683
4684			if (copyback) {
4685				switch (pd2.af) {
4686#ifdef INET
4687				case AF_INET:
4688					m_copyback(m, off, ICMP_MINLEN,
4689					    (caddr_t)pd->hdr.icmp);
4690					m_copyback(m, ipoff2, sizeof(h2),
4691					    (caddr_t)&h2);
4692					break;
4693#endif /* INET */
4694#ifdef INET6
4695				case AF_INET6:
4696					m_copyback(m, off,
4697					    sizeof(struct icmp6_hdr),
4698					    (caddr_t)pd->hdr.icmp6);
4699					m_copyback(m, ipoff2, sizeof(h2_6),
4700					    (caddr_t)&h2_6);
4701					break;
4702#endif /* INET6 */
4703				}
4704				m_copyback(m, off2, 8, (caddr_t)&th);
4705			}
4706
4707			return (PF_PASS);
4708			break;
4709		}
4710		case IPPROTO_UDP: {
4711			struct udphdr		uh;
4712			struct pf_state		key;
4713
4714			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4715			    NULL, NULL, pd2.af)) {
4716				DPFPRINTF(PF_DEBUG_MISC,
4717				    ("pf: ICMP error message too short "
4718				    "(udp)\n"));
4719				return (PF_DROP);
4720			}
4721
4722			key.af = pd2.af;
4723			key.proto = IPPROTO_UDP;
4724			if (direction == PF_IN)	{
4725				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4726				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4727				key.ext.port = uh.uh_dport;
4728				key.gwy.port = uh.uh_sport;
4729			} else {
4730				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4731				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4732				key.lan.port = uh.uh_dport;
4733				key.ext.port = uh.uh_sport;
4734			}
4735
4736			STATE_LOOKUP();
4737
4738			if (STATE_TRANSLATE(*state)) {
4739				if (direction == PF_IN) {
4740					pf_change_icmp(pd2.src, &uh.uh_sport,
4741					    daddr, &(*state)->lan.addr,
4742					    (*state)->lan.port, &uh.uh_sum,
4743					    pd2.ip_sum, icmpsum,
4744					    pd->ip_sum, 1, pd2.af);
4745				} else {
4746					pf_change_icmp(pd2.dst, &uh.uh_dport,
4747					    saddr, &(*state)->gwy.addr,
4748					    (*state)->gwy.port, &uh.uh_sum,
4749					    pd2.ip_sum, icmpsum,
4750					    pd->ip_sum, 1, pd2.af);
4751				}
4752				switch (pd2.af) {
4753#ifdef INET
4754				case AF_INET:
4755					m_copyback(m, off, ICMP_MINLEN,
4756					    (caddr_t)pd->hdr.icmp);
4757					m_copyback(m, ipoff2, sizeof(h2),
4758					    (caddr_t)&h2);
4759					break;
4760#endif /* INET */
4761#ifdef INET6
4762				case AF_INET6:
4763					m_copyback(m, off,
4764					    sizeof(struct icmp6_hdr),
4765					    (caddr_t)pd->hdr.icmp6);
4766					m_copyback(m, ipoff2, sizeof(h2_6),
4767					    (caddr_t)&h2_6);
4768					break;
4769#endif /* INET6 */
4770				}
4771				m_copyback(m, off2, sizeof(uh),
4772				    (caddr_t)&uh);
4773			}
4774
4775			return (PF_PASS);
4776			break;
4777		}
4778#ifdef INET
4779		case IPPROTO_ICMP: {
4780			struct icmp		iih;
4781			struct pf_state		key;
4782
4783			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4784			    NULL, NULL, pd2.af)) {
4785				DPFPRINTF(PF_DEBUG_MISC,
4786				    ("pf: ICMP error message too short i"
4787				    "(icmp)\n"));
4788				return (PF_DROP);
4789			}
4790
4791			key.af = pd2.af;
4792			key.proto = IPPROTO_ICMP;
4793			if (direction == PF_IN)	{
4794				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4795				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4796				key.ext.port = iih.icmp_id;
4797				key.gwy.port = iih.icmp_id;
4798			} else {
4799				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4800				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4801				key.lan.port = iih.icmp_id;
4802				key.ext.port = iih.icmp_id;
4803			}
4804
4805			STATE_LOOKUP();
4806
4807			if (STATE_TRANSLATE(*state)) {
4808				if (direction == PF_IN) {
4809					pf_change_icmp(pd2.src, &iih.icmp_id,
4810					    daddr, &(*state)->lan.addr,
4811					    (*state)->lan.port, NULL,
4812					    pd2.ip_sum, icmpsum,
4813					    pd->ip_sum, 0, AF_INET);
4814				} else {
4815					pf_change_icmp(pd2.dst, &iih.icmp_id,
4816					    saddr, &(*state)->gwy.addr,
4817					    (*state)->gwy.port, NULL,
4818					    pd2.ip_sum, icmpsum,
4819					    pd->ip_sum, 0, AF_INET);
4820				}
4821				m_copyback(m, off, ICMP_MINLEN,
4822				    (caddr_t)pd->hdr.icmp);
4823				m_copyback(m, ipoff2, sizeof(h2),
4824				    (caddr_t)&h2);
4825				m_copyback(m, off2, ICMP_MINLEN,
4826				    (caddr_t)&iih);
4827			}
4828
4829			return (PF_PASS);
4830			break;
4831		}
4832#endif /* INET */
4833#ifdef INET6
4834		case IPPROTO_ICMPV6: {
4835			struct icmp6_hdr	iih;
4836			struct pf_state		key;
4837
4838			if (!pf_pull_hdr(m, off2, &iih,
4839			    sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) {
4840				DPFPRINTF(PF_DEBUG_MISC,
4841				    ("pf: ICMP error message too short "
4842				    "(icmp6)\n"));
4843				return (PF_DROP);
4844			}
4845
4846			key.af = pd2.af;
4847			key.proto = IPPROTO_ICMPV6;
4848			if (direction == PF_IN)	{
4849				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4850				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4851				key.ext.port = iih.icmp6_id;
4852				key.gwy.port = iih.icmp6_id;
4853			} else {
4854				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4855				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4856				key.lan.port = iih.icmp6_id;
4857				key.ext.port = iih.icmp6_id;
4858			}
4859
4860			STATE_LOOKUP();
4861
4862			if (STATE_TRANSLATE(*state)) {
4863				if (direction == PF_IN) {
4864					pf_change_icmp(pd2.src, &iih.icmp6_id,
4865					    daddr, &(*state)->lan.addr,
4866					    (*state)->lan.port, NULL,
4867					    pd2.ip_sum, icmpsum,
4868					    pd->ip_sum, 0, AF_INET6);
4869				} else {
4870					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4871					    saddr, &(*state)->gwy.addr,
4872					    (*state)->gwy.port, NULL,
4873					    pd2.ip_sum, icmpsum,
4874					    pd->ip_sum, 0, AF_INET6);
4875				}
4876				m_copyback(m, off, sizeof(struct icmp6_hdr),
4877				    (caddr_t)pd->hdr.icmp6);
4878				m_copyback(m, ipoff2, sizeof(h2_6),
4879				    (caddr_t)&h2_6);
4880				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4881				    (caddr_t)&iih);
4882			}
4883
4884			return (PF_PASS);
4885			break;
4886		}
4887#endif /* INET6 */
4888		default: {
4889			struct pf_state		key;
4890
4891			key.af = pd2.af;
4892			key.proto = pd2.proto;
4893			if (direction == PF_IN)	{
4894				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4895				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4896				key.ext.port = 0;
4897				key.gwy.port = 0;
4898			} else {
4899				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4900				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4901				key.lan.port = 0;
4902				key.ext.port = 0;
4903			}
4904
4905			STATE_LOOKUP();
4906
4907			if (STATE_TRANSLATE(*state)) {
4908				if (direction == PF_IN) {
4909					pf_change_icmp(pd2.src, NULL,
4910					    daddr, &(*state)->lan.addr,
4911					    0, NULL,
4912					    pd2.ip_sum, icmpsum,
4913					    pd->ip_sum, 0, pd2.af);
4914				} else {
4915					pf_change_icmp(pd2.dst, NULL,
4916					    saddr, &(*state)->gwy.addr,
4917					    0, NULL,
4918					    pd2.ip_sum, icmpsum,
4919					    pd->ip_sum, 0, pd2.af);
4920				}
4921				switch (pd2.af) {
4922#ifdef INET
4923				case AF_INET:
4924					m_copyback(m, off, ICMP_MINLEN,
4925					    (caddr_t)pd->hdr.icmp);
4926					m_copyback(m, ipoff2, sizeof(h2),
4927					    (caddr_t)&h2);
4928					break;
4929#endif /* INET */
4930#ifdef INET6
4931				case AF_INET6:
4932					m_copyback(m, off,
4933					    sizeof(struct icmp6_hdr),
4934					    (caddr_t)pd->hdr.icmp6);
4935					m_copyback(m, ipoff2, sizeof(h2_6),
4936					    (caddr_t)&h2_6);
4937					break;
4938#endif /* INET6 */
4939				}
4940			}
4941
4942			return (PF_PASS);
4943			break;
4944		}
4945		}
4946	}
4947}
4948
4949int
4950pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4951    struct pf_pdesc *pd)
4952{
4953	struct pf_state_peer	*src, *dst;
4954	struct pf_state		 key;
4955
4956	key.af = pd->af;
4957	key.proto = pd->proto;
4958	if (direction == PF_IN)	{
4959		PF_ACPY(&key.ext.addr, pd->src, key.af);
4960		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4961		key.ext.port = 0;
4962		key.gwy.port = 0;
4963	} else {
4964		PF_ACPY(&key.lan.addr, pd->src, key.af);
4965		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4966		key.lan.port = 0;
4967		key.ext.port = 0;
4968	}
4969
4970	STATE_LOOKUP();
4971
4972	if (direction == (*state)->direction) {
4973		src = &(*state)->src;
4974		dst = &(*state)->dst;
4975	} else {
4976		src = &(*state)->dst;
4977		dst = &(*state)->src;
4978	}
4979
4980	/* update states */
4981	if (src->state < PFOTHERS_SINGLE)
4982		src->state = PFOTHERS_SINGLE;
4983	if (dst->state == PFOTHERS_SINGLE)
4984		dst->state = PFOTHERS_MULTIPLE;
4985
4986	/* update expire time */
4987#ifdef __FreeBSD__
4988	(*state)->expire = time_second;
4989#else
4990	(*state)->expire = time.tv_sec;
4991#endif
4992	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4993		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4994	else
4995		(*state)->timeout = PFTM_OTHER_SINGLE;
4996
4997	/* translate source/destination address, if necessary */
4998	if (STATE_TRANSLATE(*state)) {
4999		if (direction == PF_OUT)
5000			switch (pd->af) {
5001#ifdef INET
5002			case AF_INET:
5003				pf_change_a(&pd->src->v4.s_addr,
5004				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5005				    0);
5006				break;
5007#endif /* INET */
5008#ifdef INET6
5009			case AF_INET6:
5010				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5011				break;
5012#endif /* INET6 */
5013			}
5014		else
5015			switch (pd->af) {
5016#ifdef INET
5017			case AF_INET:
5018				pf_change_a(&pd->dst->v4.s_addr,
5019				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5020				    0);
5021				break;
5022#endif /* INET */
5023#ifdef INET6
5024			case AF_INET6:
5025				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5026				break;
5027#endif /* INET6 */
5028			}
5029	}
5030
5031	return (PF_PASS);
5032}
5033
5034/*
5035 * ipoff and off are measured from the start of the mbuf chain.
5036 * h must be at "ipoff" on the mbuf chain.
5037 */
5038void *
5039pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5040    u_short *actionp, u_short *reasonp, sa_family_t af)
5041{
5042	switch (af) {
5043#ifdef INET
5044	case AF_INET: {
5045		struct ip	*h = mtod(m, struct ip *);
5046		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5047
5048		if (fragoff) {
5049			if (fragoff >= len)
5050				ACTION_SET(actionp, PF_PASS);
5051			else {
5052				ACTION_SET(actionp, PF_DROP);
5053				REASON_SET(reasonp, PFRES_FRAG);
5054			}
5055			return (NULL);
5056		}
5057		if (m->m_pkthdr.len < off + len ||
5058		    ntohs(h->ip_len) < off + len) {
5059			ACTION_SET(actionp, PF_DROP);
5060			REASON_SET(reasonp, PFRES_SHORT);
5061			return (NULL);
5062		}
5063		break;
5064	}
5065#endif /* INET */
5066#ifdef INET6
5067	case AF_INET6: {
5068		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5069
5070		if (m->m_pkthdr.len < off + len ||
5071		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5072		    (unsigned)(off + len)) {
5073			ACTION_SET(actionp, PF_DROP);
5074			REASON_SET(reasonp, PFRES_SHORT);
5075			return (NULL);
5076		}
5077		break;
5078	}
5079#endif /* INET6 */
5080	}
5081	m_copydata(m, off, len, p);
5082	return (p);
5083}
5084
5085int
5086pf_routable(struct pf_addr *addr, sa_family_t af)
5087{
5088	struct sockaddr_in	*dst;
5089	struct route		 ro;
5090	int			 ret = 0;
5091
5092	bzero(&ro, sizeof(ro));
5093	dst = satosin(&ro.ro_dst);
5094	dst->sin_family = af;
5095	dst->sin_len = sizeof(*dst);
5096	dst->sin_addr = addr->v4;
5097#ifdef __FreeBSD__
5098#ifdef RTF_PRCLONING
5099	rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING));
5100#else /* !RTF_PRCLONING */
5101	rtalloc_ign(&ro, RTF_CLONING);
5102#endif
5103#else /* ! __FreeBSD__ */
5104	rtalloc_noclone(&ro, NO_CLONING);
5105#endif
5106
5107	if (ro.ro_rt != NULL) {
5108		ret = 1;
5109		RTFREE(ro.ro_rt);
5110	}
5111
5112	return (ret);
5113}
5114
5115#ifdef INET
5116
5117void
5118pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5119    struct pf_state *s)
5120{
5121	struct mbuf		*m0, *m1;
5122	struct m_tag		*mtag;
5123	struct route		 iproute;
5124	struct route		*ro = NULL;	/* XXX: was uninitialized */
5125	struct sockaddr_in	*dst;
5126	struct ip		*ip;
5127	struct ifnet		*ifp = NULL;
5128	struct pf_addr		 naddr;
5129	struct pf_src_node	*sn = NULL;
5130	int			 error = 0;
5131#ifdef __FreeBSD__
5132	int sw_csum;
5133#endif
5134
5135	if (m == NULL || *m == NULL || r == NULL ||
5136	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5137		panic("pf_route: invalid parameters");
5138
5139	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5140		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5141		    NULL) {
5142			m0 = *m;
5143			*m = NULL;
5144			goto bad;
5145		}
5146		*(char *)(mtag + 1) = 1;
5147		m_tag_prepend(*m, mtag);
5148	} else {
5149		if (*(char *)(mtag + 1) > 3) {
5150			m0 = *m;
5151			*m = NULL;
5152			goto bad;
5153		}
5154		(*(char *)(mtag + 1))++;
5155	}
5156
5157	if (r->rt == PF_DUPTO) {
5158#ifdef __FreeBSD__
5159		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5160#else
5161		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5162#endif
5163			return;
5164		if ((mtag = m_tag_copy(mtag)) == NULL)
5165			goto bad;
5166		m_tag_prepend(m0, mtag);
5167	} else {
5168		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5169			return;
5170		m0 = *m;
5171	}
5172
5173	if (m0->m_len < sizeof(struct ip))
5174		panic("pf_route: m0->m_len < sizeof(struct ip)");
5175	ip = mtod(m0, struct ip *);
5176
5177	ro = &iproute;
5178	bzero((caddr_t)ro, sizeof(*ro));
5179	dst = satosin(&ro->ro_dst);
5180	dst->sin_family = AF_INET;
5181	dst->sin_len = sizeof(*dst);
5182	dst->sin_addr = ip->ip_dst;
5183
5184	if (r->rt == PF_FASTROUTE) {
5185		rtalloc(ro);
5186		if (ro->ro_rt == 0) {
5187			ipstat.ips_noroute++;
5188			goto bad;
5189		}
5190
5191		ifp = ro->ro_rt->rt_ifp;
5192		ro->ro_rt->rt_use++;
5193
5194		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5195			dst = satosin(ro->ro_rt->rt_gateway);
5196	} else {
5197		if (TAILQ_EMPTY(&r->rpool.list))
5198			panic("pf_route: TAILQ_EMPTY(&r->rpool.list)");
5199		if (s == NULL) {
5200			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5201			    &naddr, NULL, &sn);
5202			if (!PF_AZERO(&naddr, AF_INET))
5203				dst->sin_addr.s_addr = naddr.v4.s_addr;
5204			ifp = r->rpool.cur->kif ?
5205			    r->rpool.cur->kif->pfik_ifp : NULL;
5206		} else {
5207			if (!PF_AZERO(&s->rt_addr, AF_INET))
5208				dst->sin_addr.s_addr =
5209				    s->rt_addr.v4.s_addr;
5210			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5211		}
5212	}
5213	if (ifp == NULL)
5214		goto bad;
5215
5216	if (oifp != ifp) {
5217#ifdef __FreeBSD__
5218		PF_UNLOCK();
5219		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) {
5220			PF_LOCK();
5221			goto bad;
5222		} else if (m0 == NULL) {
5223			PF_LOCK();
5224			goto done;
5225		}
5226		PF_LOCK();
5227#else
5228		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS)
5229			goto bad;
5230		else if (m0 == NULL)
5231			goto done;
5232#endif
5233		if (m0->m_len < sizeof(struct ip))
5234			panic("pf_route: m0->m_len < sizeof(struct ip)");
5235		ip = mtod(m0, struct ip *);
5236	}
5237
5238#ifdef __FreeBSD__
5239	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5240	m0->m_pkthdr.csum_flags |= CSUM_IP;
5241	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5242	if (sw_csum & CSUM_DELAY_DATA) {
5243		/*
5244		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5245		 */
5246		NTOHS(ip->ip_len);
5247		NTOHS(ip->ip_off);	 /* XXX: needed? */
5248		in_delayed_cksum(m0);
5249		HTONS(ip->ip_len);
5250		HTONS(ip->ip_off);
5251		sw_csum &= ~CSUM_DELAY_DATA;
5252	}
5253	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5254
5255	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5256	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5257		((ip->ip_off & htons(IP_DF)) == 0))) {
5258		/*
5259		 * ip->ip_len = htons(ip->ip_len);
5260		 * ip->ip_off = htons(ip->ip_off);
5261		 */
5262		ip->ip_sum = 0;
5263		if (sw_csum & CSUM_DELAY_IP) {
5264			/* From KAME */
5265			if (ip->ip_v == IPVERSION &&
5266			    (ip->ip_hl << 2) == sizeof(*ip)) {
5267				ip->ip_sum = in_cksum_hdr(ip);
5268			} else {
5269				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5270			}
5271		}
5272		PF_UNLOCK();
5273		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5274		PF_LOCK();
5275		goto done;
5276	}
5277
5278#else
5279	/* Copied from ip_output. */
5280#ifdef IPSEC
5281	/*
5282	 * If deferred crypto processing is needed, check that the
5283	 * interface supports it.
5284	 */
5285	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
5286	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
5287		/* Notify IPsec to do its own crypto. */
5288		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
5289		goto bad;
5290	}
5291#endif /* IPSEC */
5292
5293	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5294	if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
5295		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5296		    ifp->if_bridge != NULL) {
5297			in_delayed_cksum(m0);
5298			m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
5299		}
5300	} else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
5301		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5302		    ifp->if_bridge != NULL) {
5303			in_delayed_cksum(m0);
5304			m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
5305		}
5306	}
5307
5308	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5309		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5310		    ifp->if_bridge == NULL) {
5311			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5312			ipstat.ips_outhwcsum++;
5313		} else {
5314			ip->ip_sum = 0;
5315			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5316		}
5317		/* Update relevant hardware checksum stats for TCP/UDP */
5318		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5319			tcpstat.tcps_outhwcsum++;
5320		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5321			udpstat.udps_outhwcsum++;
5322		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5323		goto done;
5324	}
5325#endif
5326	/*
5327	 * Too large for interface; fragment if possible.
5328	 * Must be able to put at least 8 bytes per fragment.
5329	 */
5330	if (ip->ip_off & htons(IP_DF)) {
5331		ipstat.ips_cantfrag++;
5332		if (r->rt != PF_DUPTO) {
5333#ifdef __FreeBSD__
5334			/* icmp_error() expects host byte ordering */
5335			NTOHS(ip->ip_len);
5336			NTOHS(ip->ip_off);
5337			PF_UNLOCK();
5338#endif
5339			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5340			    ifp);
5341#ifdef __FreeBSD__
5342			PF_LOCK();
5343#endif
5344			goto done;
5345		} else
5346			goto bad;
5347	}
5348
5349	m1 = m0;
5350#ifdef __FreeBSD__
5351	/*
5352	 * XXX: is cheaper + less error prone than own function
5353	 */
5354	NTOHS(ip->ip_len);
5355	NTOHS(ip->ip_off);
5356	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5357#else
5358	error = ip_fragment(m0, ifp, ifp->if_mtu);
5359#endif
5360	if (error) {
5361#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
5362		m0 = NULL;
5363#endif
5364		goto bad;
5365	}
5366
5367	for (m0 = m1; m0; m0 = m1) {
5368		m1 = m0->m_nextpkt;
5369		m0->m_nextpkt = 0;
5370#ifdef __FreeBSD__
5371		if (error == 0) {
5372			PF_UNLOCK();
5373			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5374			    NULL);
5375			PF_LOCK();
5376		} else
5377#else
5378		if (error == 0)
5379			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5380			    NULL);
5381		else
5382#endif
5383			m_freem(m0);
5384	}
5385
5386	if (error == 0)
5387		ipstat.ips_fragmented++;
5388
5389done:
5390	if (r->rt != PF_DUPTO)
5391		*m = NULL;
5392	if (ro == &iproute && ro->ro_rt)
5393		RTFREE(ro->ro_rt);
5394	return;
5395
5396bad:
5397	m_freem(m0);
5398	goto done;
5399}
5400#endif /* INET */
5401
5402#ifdef INET6
5403void
5404pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5405    struct pf_state *s)
5406{
5407	struct mbuf		*m0;
5408	struct m_tag		*mtag;
5409	struct route_in6	 ip6route;
5410	struct route_in6	*ro;
5411	struct sockaddr_in6	*dst;
5412	struct ip6_hdr		*ip6;
5413	struct ifnet		*ifp = NULL;
5414	struct pf_addr		 naddr;
5415	struct pf_src_node	*sn = NULL;
5416	int			 error = 0;
5417
5418	if (m == NULL || *m == NULL || r == NULL ||
5419	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5420		panic("pf_route6: invalid parameters");
5421
5422	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5423		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5424		    NULL) {
5425			m0 = *m;
5426			*m = NULL;
5427			goto bad;
5428		}
5429		*(char *)(mtag + 1) = 1;
5430		m_tag_prepend(*m, mtag);
5431	} else {
5432		if (*(char *)(mtag + 1) > 3) {
5433			m0 = *m;
5434			*m = NULL;
5435			goto bad;
5436		}
5437		(*(char *)(mtag + 1))++;
5438	}
5439
5440	if (r->rt == PF_DUPTO) {
5441#ifdef __FreeBSD__
5442		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5443#else
5444		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5445#endif
5446			return;
5447		if ((mtag = m_tag_copy(mtag)) == NULL)
5448			goto bad;
5449		m_tag_prepend(m0, mtag);
5450	} else {
5451		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5452			return;
5453		m0 = *m;
5454	}
5455
5456	if (m0->m_len < sizeof(struct ip6_hdr))
5457		panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5458	ip6 = mtod(m0, struct ip6_hdr *);
5459
5460	ro = &ip6route;
5461	bzero((caddr_t)ro, sizeof(*ro));
5462	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5463	dst->sin6_family = AF_INET6;
5464	dst->sin6_len = sizeof(*dst);
5465	dst->sin6_addr = ip6->ip6_dst;
5466
5467	/* Cheat. */
5468	if (r->rt == PF_FASTROUTE) {
5469#ifdef __FreeBSD__
5470		m0->m_flags |= M_SKIP_FIREWALL;
5471		PF_UNLOCK();
5472		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5473		PF_LOCK();
5474#else
5475		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5476		if (mtag == NULL)
5477			goto bad;
5478		m_tag_prepend(m0, mtag);
5479		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5480#endif
5481		return;
5482	}
5483
5484	if (TAILQ_EMPTY(&r->rpool.list))
5485		panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)");
5486	if (s == NULL) {
5487		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5488		    &naddr, NULL, &sn);
5489		if (!PF_AZERO(&naddr, AF_INET6))
5490			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5491			    &naddr, AF_INET6);
5492		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5493	} else {
5494		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5495			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5496			    &s->rt_addr, AF_INET6);
5497		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5498	}
5499	if (ifp == NULL)
5500		goto bad;
5501
5502	if (oifp != ifp) {
5503#ifdef __FreeBSD__
5504		PF_UNLOCK();
5505		if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) {
5506			PF_LOCK();
5507			goto bad;
5508		} else if (m0 == NULL) {
5509			PF_LOCK();
5510			goto done;
5511		}
5512		PF_LOCK();
5513#else
5514		if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS)
5515			goto bad;
5516		else if (m0 == NULL)
5517			goto done;
5518#endif
5519		if (m0->m_len < sizeof(struct ip6_hdr))
5520			panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5521		ip6 = mtod(m0, struct ip6_hdr *);
5522	}
5523
5524	/*
5525	 * If the packet is too large for the outgoing interface,
5526	 * send back an icmp6 error.
5527	 */
5528	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5529		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5530	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5531#ifdef __FreeBSD__
5532		PF_UNLOCK();
5533#endif
5534		error = nd6_output(ifp, ifp, m0, dst, NULL);
5535#ifdef __FreeBSD__
5536		PF_LOCK();
5537#endif
5538	} else {
5539		in6_ifstat_inc(ifp, ifs6_in_toobig);
5540#ifdef __FreeBSD__
5541		if (r->rt != PF_DUPTO) {
5542			PF_UNLOCK();
5543			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5544			PF_LOCK();
5545		 } else
5546#else
5547		if (r->rt != PF_DUPTO)
5548			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5549		else
5550#endif
5551			goto bad;
5552	}
5553
5554done:
5555	if (r->rt != PF_DUPTO)
5556		*m = NULL;
5557	return;
5558
5559bad:
5560	m_freem(m0);
5561	goto done;
5562}
5563#endif /* INET6 */
5564
5565
5566#ifdef __FreeBSD__
5567/*
5568 * XXX
5569 * FreeBSD supports cksum offload for the following drivers.
5570 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
5571 * If we can make full use of it we would outperform ipfw/ipfilter in
5572 * very heavy traffic.
5573 * I have not tested 'cause I don't have NICs that supports cksum offload.
5574 * (There might be problems. Typical phenomena would be
5575 *   1. No route message for UDP packet.
5576 *   2. No connection acceptance from external hosts regardless of rule set.)
5577 */
5578int
5579pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5580{
5581	u_int16_t sum = 0;
5582	int hw_assist = 0;
5583	struct ip *ip;
5584
5585	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5586		return (1);
5587	if (m->m_pkthdr.len < off + len)
5588		return (1);
5589
5590	switch (p) {
5591	case IPPROTO_TCP:
5592		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5593			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5594				sum = m->m_pkthdr.csum_data;
5595			} else {
5596				ip = mtod(m, struct ip *);
5597				sum = in_pseudo(ip->ip_src.s_addr,
5598					ip->ip_dst.s_addr,
5599					htonl(m->m_pkthdr.csum_data +
5600					    IPPROTO_TCP) + ip->ip_len);
5601			}
5602			sum ^= 0xffff;
5603			++hw_assist;
5604		}
5605		break;
5606	case IPPROTO_UDP:
5607		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5608			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5609				sum = m->m_pkthdr.csum_data;
5610			} else {
5611				ip = mtod(m, struct ip *);
5612				sum = in_pseudo(ip->ip_src.s_addr,
5613					ip->ip_dst.s_addr, htonl((u_short)len +
5614					m->m_pkthdr.csum_data + IPPROTO_UDP));
5615			}
5616			sum ^= 0xffff;
5617			++hw_assist;
5618                }
5619		break;
5620	case IPPROTO_ICMP:
5621#ifdef INET6
5622	case IPPROTO_ICMPV6:
5623#endif /* INET6 */
5624		break;
5625	default:
5626		return (1);
5627	}
5628
5629	if (!hw_assist) {
5630		switch (af) {
5631		case AF_INET:
5632			if (p == IPPROTO_ICMP) {
5633				if (m->m_len < off)
5634					return (1);
5635				m->m_data += off;
5636				m->m_len -= off;
5637				sum = in_cksum(m, len);
5638				m->m_data -= off;
5639				m->m_len += off;
5640			} else {
5641				if (m->m_len < sizeof(struct ip))
5642					return (1);
5643				sum = in4_cksum(m, p, off, len);
5644				if (sum == 0) {
5645					m->m_pkthdr.csum_flags |=
5646					    (CSUM_DATA_VALID |
5647					     CSUM_PSEUDO_HDR);
5648					m->m_pkthdr.csum_data = 0xffff;
5649				}
5650			}
5651			break;
5652#ifdef INET6
5653		case AF_INET6:
5654			if (m->m_len < sizeof(struct ip6_hdr))
5655				return (1);
5656			sum = in6_cksum(m, p, off, len);
5657			/*
5658			 * XXX
5659			 * IPv6 H/W cksum off-load not supported yet!
5660			 *
5661			 * if (sum == 0) {
5662			 *	m->m_pkthdr.csum_flags |=
5663			 *	    (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
5664			 *	m->m_pkthdr.csum_data = 0xffff;
5665			 *}
5666			 */
5667			break;
5668#endif /* INET6 */
5669		default:
5670			return (1);
5671		}
5672	}
5673	if (sum) {
5674		switch (p) {
5675		case IPPROTO_TCP:
5676			tcpstat.tcps_rcvbadsum++;
5677			break;
5678		case IPPROTO_UDP:
5679			udpstat.udps_badsum++;
5680			break;
5681		case IPPROTO_ICMP:
5682			icmpstat.icps_checksum++;
5683			break;
5684#ifdef INET6
5685		case IPPROTO_ICMPV6:
5686			icmp6stat.icp6s_checksum++;
5687			break;
5688#endif /* INET6 */
5689		}
5690		return (1);
5691	}
5692	return (0);
5693}
5694#else
5695/*
5696 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5697 *   off is the offset where the protocol header starts
5698 *   len is the total length of protocol header plus payload
5699 * returns 0 when the checksum is valid, otherwise returns 1.
5700 */
5701int
5702pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5703    sa_family_t af)
5704{
5705	u_int16_t flag_ok, flag_bad;
5706	u_int16_t sum;
5707
5708	switch (p) {
5709	case IPPROTO_TCP:
5710		flag_ok = M_TCP_CSUM_IN_OK;
5711		flag_bad = M_TCP_CSUM_IN_BAD;
5712		break;
5713	case IPPROTO_UDP:
5714		flag_ok = M_UDP_CSUM_IN_OK;
5715		flag_bad = M_UDP_CSUM_IN_BAD;
5716		break;
5717	case IPPROTO_ICMP:
5718#ifdef INET6
5719	case IPPROTO_ICMPV6:
5720#endif /* INET6 */
5721		flag_ok = flag_bad = 0;
5722		break;
5723	default:
5724		return (1);
5725	}
5726	if (m->m_pkthdr.csum & flag_ok)
5727		return (0);
5728	if (m->m_pkthdr.csum & flag_bad)
5729		return (1);
5730	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5731		return (1);
5732	if (m->m_pkthdr.len < off + len)
5733		return (1);
5734		switch (af) {
5735	case AF_INET:
5736		if (p == IPPROTO_ICMP) {
5737			if (m->m_len < off)
5738				return (1);
5739			m->m_data += off;
5740			m->m_len -= off;
5741			sum = in_cksum(m, len);
5742			m->m_data -= off;
5743			m->m_len += off;
5744		} else {
5745			if (m->m_len < sizeof(struct ip))
5746				return (1);
5747			sum = in4_cksum(m, p, off, len);
5748		}
5749		break;
5750#ifdef INET6
5751	case AF_INET6:
5752		if (m->m_len < sizeof(struct ip6_hdr))
5753			return (1);
5754		sum = in6_cksum(m, p, off, len);
5755		break;
5756#endif /* INET6 */
5757	default:
5758		return (1);
5759	}
5760	if (sum) {
5761		m->m_pkthdr.csum |= flag_bad;
5762		switch (p) {
5763		case IPPROTO_TCP:
5764			tcpstat.tcps_rcvbadsum++;
5765			break;
5766		case IPPROTO_UDP:
5767			udpstat.udps_badsum++;
5768			break;
5769		case IPPROTO_ICMP:
5770			icmpstat.icps_checksum++;
5771			break;
5772#ifdef INET6
5773		case IPPROTO_ICMPV6:
5774			icmp6stat.icp6s_checksum++;
5775			break;
5776#endif /* INET6 */
5777		}
5778		return (1);
5779	}
5780	m->m_pkthdr.csum |= flag_ok;
5781	return (0);
5782}
5783#endif
5784
5785static int
5786pf_add_mbuf_tag(struct mbuf *m, u_int tag)
5787{
5788	struct m_tag *mtag;
5789
5790	if (m_tag_find(m, tag, NULL) != NULL)
5791		return (0);
5792	mtag = m_tag_get(tag, 0, M_NOWAIT);
5793	if (mtag == NULL)
5794		return (1);
5795	m_tag_prepend(m, mtag);
5796	return (0);
5797}
5798
5799#ifdef INET
5800int
5801pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
5802{
5803	struct pfi_kif		*kif;
5804	u_short			 action, reason = 0, log = 0;
5805	struct mbuf		*m = *m0;
5806	struct ip		*h = NULL;	/* make the compiler happy */
5807	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
5808	struct pf_state		*s = NULL;
5809	struct pf_ruleset	*ruleset = NULL;
5810	struct pf_pdesc		 pd;
5811	int			 off, dirndx, pqid = 0;
5812
5813#ifdef __FreeBSD__
5814	PF_LOCK();
5815#endif
5816	if (!pf_status.running ||
5817#ifdef __FreeBSD__
5818	    (m->m_flags & M_SKIP_FIREWALL)) {
5819		PF_UNLOCK();
5820#else
5821	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5822#endif
5823	    	return (PF_PASS);
5824	}
5825
5826	kif = pfi_index2kif[ifp->if_index];
5827	if (kif == NULL) {
5828#ifdef __FreeBSD__
5829		PF_UNLOCK();
5830#endif
5831		return (PF_DROP);
5832	}
5833
5834#ifdef __FreeBSD__
5835	M_ASSERTPKTHDR(m);
5836#else
5837#ifdef DIAGNOSTIC
5838	if ((m->m_flags & M_PKTHDR) == 0)
5839		panic("non-M_PKTHDR is passed to pf_test");
5840#endif
5841#endif
5842
5843	memset(&pd, 0, sizeof(pd));
5844	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5845		action = PF_DROP;
5846		REASON_SET(&reason, PFRES_SHORT);
5847		log = 1;
5848		goto done;
5849	}
5850
5851	/* We do IP header normalization and packet reassembly here */
5852	if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) {
5853		action = PF_DROP;
5854		goto done;
5855	}
5856	m = *m0;
5857	h = mtod(m, struct ip *);
5858
5859	off = h->ip_hl << 2;
5860	if (off < (int)sizeof(*h)) {
5861		action = PF_DROP;
5862		REASON_SET(&reason, PFRES_SHORT);
5863		log = 1;
5864		goto done;
5865	}
5866
5867	pd.src = (struct pf_addr *)&h->ip_src;
5868	pd.dst = (struct pf_addr *)&h->ip_dst;
5869	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5870	pd.ip_sum = &h->ip_sum;
5871	pd.proto = h->ip_p;
5872	pd.af = AF_INET;
5873	pd.tos = h->ip_tos;
5874	pd.tot_len = ntohs(h->ip_len);
5875
5876	/* handle fragments that didn't get reassembled by normalization */
5877	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5878		action = pf_test_fragment(&r, dir, kif, m, h,
5879		    &pd, &a, &ruleset);
5880		goto done;
5881	}
5882
5883	switch (h->ip_p) {
5884
5885	case IPPROTO_TCP: {
5886		struct tcphdr	th;
5887
5888		pd.hdr.tcp = &th;
5889		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5890		    &action, &reason, AF_INET)) {
5891			log = action != PF_PASS;
5892			goto done;
5893		}
5894		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5895		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
5896			action = PF_DROP;
5897			goto done;
5898		}
5899		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5900		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5901			pqid = 1;
5902		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5903		if (action == PF_DROP)
5904			goto done;
5905		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5906		    &reason);
5907		if (action == PF_PASS) {
5908#if NPFSYNC
5909			pfsync_update_state(s);
5910#endif
5911			r = s->rule.ptr;
5912			a = s->anchor.ptr;
5913			log = s->log;
5914		} else if (s == NULL)
5915			action = pf_test_tcp(&r, &s, dir, kif,
5916			    m, off, h, &pd, &a, &ruleset);
5917		break;
5918	}
5919
5920	case IPPROTO_UDP: {
5921		struct udphdr	uh;
5922
5923		pd.hdr.udp = &uh;
5924		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5925		    &action, &reason, AF_INET)) {
5926			log = action != PF_PASS;
5927			goto done;
5928		}
5929		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5930		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
5931			action = PF_DROP;
5932			goto done;
5933		}
5934		if (uh.uh_dport == 0 ||
5935		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5936		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5937			action = PF_DROP;
5938			goto done;
5939		}
5940		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5941		if (action == PF_PASS) {
5942#if NPFSYNC
5943			pfsync_update_state(s);
5944#endif
5945			r = s->rule.ptr;
5946			a = s->anchor.ptr;
5947			log = s->log;
5948		} else if (s == NULL)
5949			action = pf_test_udp(&r, &s, dir, kif,
5950			    m, off, h, &pd, &a, &ruleset);
5951		break;
5952	}
5953
5954	case IPPROTO_ICMP: {
5955		struct icmp	ih;
5956
5957		pd.hdr.icmp = &ih;
5958		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5959		    &action, &reason, AF_INET)) {
5960			log = action != PF_PASS;
5961			goto done;
5962		}
5963		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5964		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
5965			action = PF_DROP;
5966			goto done;
5967		}
5968		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd);
5969		if (action == PF_PASS) {
5970#if NPFSYNC
5971			pfsync_update_state(s);
5972#endif
5973			r = s->rule.ptr;
5974			a = s->anchor.ptr;
5975			log = s->log;
5976		} else if (s == NULL)
5977			action = pf_test_icmp(&r, &s, dir, kif,
5978			    m, off, h, &pd, &a, &ruleset);
5979		break;
5980	}
5981
5982	default:
5983		action = pf_test_state_other(&s, dir, kif, &pd);
5984		if (action == PF_PASS) {
5985#if NPFSYNC
5986			pfsync_update_state(s);
5987#endif
5988			r = s->rule.ptr;
5989			a = s->anchor.ptr;
5990			log = s->log;
5991		} else if (s == NULL)
5992			action = pf_test_other(&r, &s, dir, kif, m, off, h,
5993			    &pd, &a, &ruleset);
5994		break;
5995	}
5996
5997done:
5998	if (action == PF_PASS && h->ip_hl > 5 &&
5999	    !((s && s->allow_opts) || r->allow_opts)) {
6000		action = PF_DROP;
6001		REASON_SET(&reason, PFRES_SHORT);
6002		log = 1;
6003		DPFPRINTF(PF_DEBUG_MISC,
6004		    ("pf: dropping packet with ip options\n"));
6005	}
6006
6007#ifdef ALTQ
6008	if (action == PF_PASS && r->qid) {
6009		struct m_tag	*mtag;
6010		struct altq_tag	*atag;
6011
6012		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6013		if (mtag != NULL) {
6014			atag = (struct altq_tag *)(mtag + 1);
6015			if (pqid || pd.tos == IPTOS_LOWDELAY)
6016				atag->qid = r->pqid;
6017			else
6018				atag->qid = r->qid;
6019			/* add hints for ecn */
6020			atag->af = AF_INET;
6021			atag->hdr = h;
6022			m_tag_prepend(m, mtag);
6023		}
6024	}
6025#endif
6026
6027	/*
6028	 * connections redirected to loopback should not match sockets
6029	 * bound specifically to loopback due to security implications,
6030	 * see tcp_input() and in_pcblookup_listen().
6031	 */
6032	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6033	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6034	    (s->nat_rule.ptr->action == PF_RDR ||
6035	    s->nat_rule.ptr->action == PF_BINAT) &&
6036	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
6037	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6038		action = PF_DROP;
6039		REASON_SET(&reason, PFRES_MEMORY);
6040	}
6041
6042	if (log)
6043		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset);
6044
6045	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6046	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6047
6048	if (action == PF_PASS || r->action == PF_DROP) {
6049		r->packets++;
6050		r->bytes += pd.tot_len;
6051		if (a != NULL) {
6052			a->packets++;
6053			a->bytes += pd.tot_len;
6054		}
6055		if (s != NULL) {
6056			dirndx = (dir == s->direction) ? 0 : 1;
6057			s->packets[dirndx]++;
6058			s->bytes[dirndx] += pd.tot_len;
6059			if (s->nat_rule.ptr != NULL) {
6060				s->nat_rule.ptr->packets++;
6061				s->nat_rule.ptr->bytes += pd.tot_len;
6062			}
6063			if (s->src_node != NULL) {
6064				s->src_node->packets++;
6065				s->src_node->bytes += pd.tot_len;
6066			}
6067			if (s->nat_src_node != NULL) {
6068				s->nat_src_node->packets++;
6069				s->nat_src_node->bytes += pd.tot_len;
6070			}
6071		}
6072		tr = r;
6073		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6074		if (nr != NULL) {
6075			struct pf_addr *x;
6076			/*
6077			 * XXX: we need to make sure that the addresses
6078			 * passed to pfr_update_stats() are the same than
6079			 * the addresses used during matching (pfr_match)
6080			 */
6081			if (r == &pf_default_rule) {
6082				tr = nr;
6083				x = (s == NULL || s->direction == dir) ?
6084				    &pd.baddr : &pd.naddr;
6085			} else
6086				x = (s == NULL || s->direction == dir) ?
6087				    &pd.naddr : &pd.baddr;
6088			if (x == &pd.baddr || s == NULL) {
6089				/* we need to change the address */
6090				if (dir == PF_OUT)
6091					pd.src = x;
6092				else
6093					pd.dst = x;
6094			}
6095		}
6096		if (tr->src.addr.type == PF_ADDR_TABLE)
6097			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6098			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6099			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6100			    tr->src.not);
6101		if (tr->dst.addr.type == PF_ADDR_TABLE)
6102			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6103			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6104			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6105			    tr->dst.not);
6106	}
6107
6108
6109	if (action == PF_SYNPROXY_DROP) {
6110		m_freem(*m0);
6111		*m0 = NULL;
6112		action = PF_PASS;
6113	} else if (r->rt)
6114		/* pf_route can free the mbuf causing *m0 to become NULL */
6115		pf_route(m0, r, dir, ifp, s);
6116
6117#ifdef __FreeBSD__
6118	PF_UNLOCK();
6119#endif
6120
6121	return (action);
6122}
6123#endif /* INET */
6124
6125#ifdef INET6
6126int
6127pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0)
6128{
6129	struct pfi_kif		*kif;
6130	u_short			 action, reason = 0, log = 0;
6131	struct mbuf		*m = *m0;
6132	struct ip6_hdr		*h = NULL;	/* make the compiler happy */
6133	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6134	struct pf_state		*s = NULL;
6135	struct pf_ruleset	*ruleset = NULL;
6136	struct pf_pdesc		 pd;
6137	int			 off, terminal = 0, dirndx;
6138
6139#ifdef __FreeBSD__
6140	PF_LOCK();
6141#endif
6142
6143	if (!pf_status.running ||
6144#ifdef __FreeBSD__
6145	    (m->m_flags & M_SKIP_FIREWALL)) {
6146		PF_UNLOCK();
6147#else
6148	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
6149#endif
6150		return (PF_PASS);
6151	}
6152
6153	kif = pfi_index2kif[ifp->if_index];
6154	if (kif == NULL) {
6155#ifdef __FreeBSD__
6156		PF_UNLOCK();
6157#endif
6158		return (PF_DROP);
6159	}
6160
6161#ifdef __FreeBSD__
6162	M_ASSERTPKTHDR(m);
6163#else
6164#ifdef DIAGNOSTIC
6165	if ((m->m_flags & M_PKTHDR) == 0)
6166		panic("non-M_PKTHDR is passed to pf_test");
6167#endif
6168#endif
6169
6170	memset(&pd, 0, sizeof(pd));
6171	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6172		action = PF_DROP;
6173		REASON_SET(&reason, PFRES_SHORT);
6174		log = 1;
6175		goto done;
6176	}
6177
6178	/* We do IP header normalization and packet reassembly here */
6179	if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) {
6180		action = PF_DROP;
6181		goto done;
6182	}
6183	m = *m0;
6184	h = mtod(m, struct ip6_hdr *);
6185
6186	pd.src = (struct pf_addr *)&h->ip6_src;
6187	pd.dst = (struct pf_addr *)&h->ip6_dst;
6188	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6189	pd.ip_sum = NULL;
6190	pd.af = AF_INET6;
6191	pd.tos = 0;
6192	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6193
6194	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6195	pd.proto = h->ip6_nxt;
6196	do {
6197		switch (pd.proto) {
6198		case IPPROTO_FRAGMENT:
6199			action = pf_test_fragment(&r, dir, kif, m, h,
6200			    &pd, &a, &ruleset);
6201			if (action == PF_DROP)
6202				REASON_SET(&reason, PFRES_FRAG);
6203			goto done;
6204		case IPPROTO_AH:
6205		case IPPROTO_HOPOPTS:
6206		case IPPROTO_ROUTING:
6207		case IPPROTO_DSTOPTS: {
6208			/* get next header and header length */
6209			struct ip6_ext	opt6;
6210
6211			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6212			    NULL, NULL, pd.af)) {
6213				DPFPRINTF(PF_DEBUG_MISC,
6214				    ("pf: IPv6 short opt\n"));
6215				action = PF_DROP;
6216				REASON_SET(&reason, PFRES_SHORT);
6217				log = 1;
6218				goto done;
6219			}
6220			if (pd.proto == IPPROTO_AH)
6221				off += (opt6.ip6e_len + 2) * 4;
6222			else
6223				off += (opt6.ip6e_len + 1) * 8;
6224			pd.proto = opt6.ip6e_nxt;
6225			/* goto the next header */
6226			break;
6227		}
6228		default:
6229			terminal++;
6230			break;
6231		}
6232	} while (!terminal);
6233
6234	switch (pd.proto) {
6235
6236	case IPPROTO_TCP: {
6237		struct tcphdr	th;
6238
6239		pd.hdr.tcp = &th;
6240		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6241		    &action, &reason, AF_INET6)) {
6242			log = action != PF_PASS;
6243			goto done;
6244		}
6245		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6246		    ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) {
6247			action = PF_DROP;
6248			goto done;
6249		}
6250		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6251		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6252		if (action == PF_DROP)
6253			goto done;
6254		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6255		    &reason);
6256		if (action == PF_PASS) {
6257#if NPFSYNC
6258			pfsync_update_state(s);
6259#endif
6260			r = s->rule.ptr;
6261			a = s->anchor.ptr;
6262			log = s->log;
6263		} else if (s == NULL)
6264			action = pf_test_tcp(&r, &s, dir, kif,
6265			    m, off, h, &pd, &a, &ruleset);
6266		break;
6267	}
6268
6269	case IPPROTO_UDP: {
6270		struct udphdr	uh;
6271
6272		pd.hdr.udp = &uh;
6273		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6274		    &action, &reason, AF_INET6)) {
6275			log = action != PF_PASS;
6276			goto done;
6277		}
6278		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6279		    off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) {
6280			action = PF_DROP;
6281			goto done;
6282		}
6283		if (uh.uh_dport == 0 ||
6284		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6285		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6286			action = PF_DROP;
6287			goto done;
6288		}
6289		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6290		if (action == PF_PASS) {
6291#if NPFSYNC
6292			pfsync_update_state(s);
6293#endif
6294			r = s->rule.ptr;
6295			a = s->anchor.ptr;
6296			log = s->log;
6297		} else if (s == NULL)
6298			action = pf_test_udp(&r, &s, dir, kif,
6299			    m, off, h, &pd, &a, &ruleset);
6300		break;
6301	}
6302
6303	case IPPROTO_ICMPV6: {
6304		struct icmp6_hdr	ih;
6305
6306		pd.hdr.icmp6 = &ih;
6307		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6308		    &action, &reason, AF_INET6)) {
6309			log = action != PF_PASS;
6310			goto done;
6311		}
6312		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6313		    ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) {
6314			action = PF_DROP;
6315			goto done;
6316		}
6317		action = pf_test_state_icmp(&s, dir, kif,
6318		    m, off, h, &pd);
6319		if (action == PF_PASS) {
6320#if NPFSYNC
6321			pfsync_update_state(s);
6322#endif
6323			r = s->rule.ptr;
6324			a = s->anchor.ptr;
6325			log = s->log;
6326		} else if (s == NULL)
6327			action = pf_test_icmp(&r, &s, dir, kif,
6328			    m, off, h, &pd, &a, &ruleset);
6329		break;
6330	}
6331
6332	default:
6333		action = pf_test_state_other(&s, dir, kif, &pd);
6334		if (action == PF_PASS) {
6335			r = s->rule.ptr;
6336			a = s->anchor.ptr;
6337			log = s->log;
6338		} else if (s == NULL)
6339			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6340			    &pd, &a, &ruleset);
6341		break;
6342	}
6343
6344done:
6345	/* XXX handle IPv6 options, if not allowed. not implemented. */
6346
6347#ifdef ALTQ
6348	if (action == PF_PASS && r->qid) {
6349		struct m_tag	*mtag;
6350		struct altq_tag	*atag;
6351
6352		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6353		if (mtag != NULL) {
6354			atag = (struct altq_tag *)(mtag + 1);
6355			if (pd.tos == IPTOS_LOWDELAY)
6356				atag->qid = r->pqid;
6357			else
6358				atag->qid = r->qid;
6359			/* add hints for ecn */
6360			atag->af = AF_INET6;
6361			atag->hdr = h;
6362			m_tag_prepend(m, mtag);
6363		}
6364	}
6365#endif
6366
6367	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6368	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6369	    (s->nat_rule.ptr->action == PF_RDR ||
6370	    s->nat_rule.ptr->action == PF_BINAT) &&
6371	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) &&
6372	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6373		action = PF_DROP;
6374		REASON_SET(&reason, PFRES_MEMORY);
6375	}
6376
6377	if (log)
6378		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset);
6379
6380	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6381	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6382
6383	if (action == PF_PASS || r->action == PF_DROP) {
6384		r->packets++;
6385		r->bytes += pd.tot_len;
6386		if (a != NULL) {
6387			a->packets++;
6388			a->bytes += pd.tot_len;
6389		}
6390		if (s != NULL) {
6391			dirndx = (dir == s->direction) ? 0 : 1;
6392			s->packets[dirndx]++;
6393			s->bytes[dirndx] += pd.tot_len;
6394			if (s->nat_rule.ptr != NULL) {
6395				s->nat_rule.ptr->packets++;
6396				s->nat_rule.ptr->bytes += pd.tot_len;
6397			}
6398			if (s->src_node != NULL) {
6399				s->src_node->packets++;
6400				s->src_node->bytes += pd.tot_len;
6401			}
6402			if (s->nat_src_node != NULL) {
6403				s->nat_src_node->packets++;
6404				s->nat_src_node->bytes += pd.tot_len;
6405			}
6406		}
6407		tr = r;
6408		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6409		if (nr != NULL) {
6410			struct pf_addr *x;
6411			/*
6412			 * XXX: we need to make sure that the addresses
6413			 * passed to pfr_update_stats() are the same than
6414			 * the addresses used during matching (pfr_match)
6415			 */
6416			if (r == &pf_default_rule) {
6417				tr = nr;
6418				x = (s == NULL || s->direction == dir) ?
6419				    &pd.baddr : &pd.naddr;
6420			} else {
6421				x = (s == NULL || s->direction == dir) ?
6422				    &pd.naddr : &pd.baddr;
6423			}
6424			if (x == &pd.baddr || s == NULL) {
6425				if (dir == PF_OUT)
6426					pd.src = x;
6427				else
6428					pd.dst = x;
6429			}
6430		}
6431		if (tr->src.addr.type == PF_ADDR_TABLE)
6432			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6433			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6434			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6435			    tr->src.not);
6436		if (tr->dst.addr.type == PF_ADDR_TABLE)
6437			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6438			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6439			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6440			    tr->dst.not);
6441	}
6442
6443
6444	if (action == PF_SYNPROXY_DROP) {
6445		m_freem(*m0);
6446		*m0 = NULL;
6447		action = PF_PASS;
6448	} else if (r->rt)
6449		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6450		pf_route6(m0, r, dir, ifp, s);
6451
6452#ifdef __FreeBSD__
6453	PF_UNLOCK();
6454#endif
6455	return (action);
6456}
6457#endif /* INET6 */
6458